Linux MPI+HIP混编
2021/8/26 7:07:16
本文主要是介绍Linux MPI+HIP混编,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
Linux MPI+HIP混编
源文件:
pi_hip.cpp
#include<stdio.h> #include<stdlib.h> #include <hip/hip_runtime.h> #define NBIN 10000000 // Number of bins #define NUM_BLOCK 13 // Number of thread blocks #define NUM_THREAD 192 // Number of threads per block __global__ void cal_pi(float *sum,int nbin,float step,float offset,int nthreads,int nblocks) { int i; float x; int idx = blockIdx.x*blockDim.x+threadIdx.x; // Sequential thread index across blocks for (i=idx; i< nbin; i+=nthreads*nblocks) { // Interleaved bin assignment to threads x = offset+(i+0.5)*step; sum[idx] += 4.0/(1.0+x*x); } } void computePI(int nproc,int myid, float *sumHost,float step) { int nbin; float offset; float *sumDev; // Pointers to device arrays dim3 dimGrid(NUM_BLOCK,1,1); // Grid dimensions (only use 1D) dim3 dimBlock(NUM_THREAD,1,1); // Block dimensions (only use 1D) nbin = NBIN/nproc; // Number of bins per MPI process offset = myid*step*nbin; // Quadrature-point offset size_t size = NUM_BLOCK*NUM_THREAD*sizeof(float); //Array memory size hipMalloc((void **) &sumDev,size); // Allocate array on device hipMemset(sumDev,0,size); // Reset array in device to 0 // // Calculate on device (call CUDA kernel) hipLaunchKernelGGL(cal_pi,dimGrid,dimBlock,0,0,sumDev,nbin,step,offset,NUM_THREAD,NUM_BLOCK); // // Retrieve result from device and store it in host array hipMemcpy(sumHost,sumDev,size,hipMemcpyDeviceToHost); hipFree(sumDev); }
源文件:
main.cpp
#include <mpi.h> #include <stdio.h> #include <stdlib.h> #define NBIN 10000000 // Number of bins #define NUM_BLOCK 13 // Number of thread blocks #define NUM_THREAD 192 // Number of threads per block // Kernel that executes on the CUDA device void computePI(int nproc,int myid,float *sumHost,float step); int main(int argc,char **argv) { int myid,nproc,tid, nbin; float pi=0.0, pig, step; float *sumHost; // Pointers to host arrays MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&myid); // My MPI rank MPI_Comm_size(MPI_COMM_WORLD,&nproc); // Number of MPI processes size_t size = NUM_BLOCK*NUM_THREAD*sizeof(float); //Array memory size sumHost = (float *)malloc(size); // Allocate array on host nbin = NBIN/nproc; // Number of bins per MPI process step = 1.0/(float)(nbin*nproc); // Step size with redefined number of bins computePI(nproc,myid,sumHost,step); // // Reduction over CUDA threads for(tid=0; tid<NUM_THREAD*NUM_BLOCK; tid++) pi += sumHost[tid]; printf("step = %11.7f\n", step); pi *=step; free(sumHost); printf("myid = %d: partial pi = %11.7f\n",myid, pi); // // Reduction over MPI processes MPI_Allreduce(&pi,&pig,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD); if (myid==0) printf("PI = %11.7f\n",pig); MPI_Finalize(); return 0; }
Makefile文件:
方法1:直接将pi_hip.cpp编译成.o文件
MPILIB=-L/opt/hpc/software/openmpi-3.1.2/lib -lmpi all: make clean hipcc -c -O3 -std=c++11 -D_HIP_PLATFORM_HCC__ -o pi_hip.o pi_hip.cpp mpicc -w -g -o main.o -c main.cpp hipcc $(MPILIB) -g -o out-pi pi_hip.o main.o -lm -lstdc++ mpirun -mca pml ucx -np 8 ./out-pi clean: rm -fr *.o *.err *.out out-pi
编译: 分别用 hipcc 和mpicc 编译 *_hip.cpp 和 *.cpp 文件 (不分前后)
链接: 用hipcc链接,注意加上mpicc的库
运行: 直接用mpicc运行,注意加上一些参数(如 -mca pml ucx)
方法2:直接将pi_hip.cpp编译动态库(.so文件)
HIPLIB=-L/opt/rocm/hip/lib -lhip_hcc MPILIB=-L/opt/hpc/software/mpi/hpcx/v2.4.0/ompi/lib -lmpi all: make clean hipcc -c -fpic -O3 -std=c++11 -D_HIP_PLATFORM_HCC__ -o pi_hip.o pi_hip.cpp hipcc -shared pi_hip.o -o libpi_hip.so mpicc -w -g -o main.o -c main.cpp hipcc $(MPILIB) -g -o out-pi -L./ -lpi_hip main.o -lm -lstdc++ mpirun -mca pml ucx -np 8 ./out-pi clean: rm -fr *.o *.err *.out out-pi *.so
编译:
用hipcc将_hip.cpp编译成.o文件,再进一步编译成.so文件
用mpicc将.cpp文件编译成.o文件
链接:用hipcc链接,注意加上mpicc的库,和.so文件
运行: 直接用mpicc运行,注意加上一些参数(如 -mca pml ucx)
这篇关于Linux MPI+HIP混编的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-12-18git仓库有更新,jenkins 自动触发拉代码怎么配置的?-icode9专业技术文章分享
- 2024-12-18Jenkins webhook 方式怎么配置指定的分支?-icode9专业技术文章分享
- 2024-12-13Linux C++项目实战入门教程
- 2024-12-13Linux C++编程项目实战入门教程
- 2024-12-11Linux部署Scrapy教程:新手入门指南
- 2024-12-11怎么将在本地创建的 Maven 仓库迁移到 Linux 服务器上?-icode9专业技术文章分享
- 2024-12-10Linux常用命令
- 2024-12-06谁看谁服! Linux 创始人对于进程和线程的理解是…
- 2024-12-04操作系统教程:新手入门及初级技巧详解
- 2024-12-04操作系统入门:新手必学指南