basic
This commit is contained in:
@ -39,7 +39,10 @@ bool compare(float *a, float *b, const int N) {
|
|||||||
|
|
||||||
__global__ void vectorAddGPU(float *a, float *b, float *c, const int N) {
|
__global__ void vectorAddGPU(float *a, float *b, float *c, const int N) {
|
||||||
// Implement your vector add kernel here
|
// Implement your vector add kernel here
|
||||||
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (idx < N) {
|
||||||
|
c[idx] = a[idx] + b[idx];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
@ -58,17 +61,39 @@ int main() {
|
|||||||
|
|
||||||
// ************** START GPU MEMORY ALLOCATION **************
|
// ************** START GPU MEMORY ALLOCATION **************
|
||||||
// Implement your code here
|
// Implement your code here
|
||||||
|
float *d_a, *d_b, *d_c;
|
||||||
|
cudaMalloc(&d_a, MAXN * sizeof(float));
|
||||||
|
cudaMalloc(&d_b, MAXN * sizeof(float));
|
||||||
|
cudaMalloc(&d_c, MAXN * sizeof(float));
|
||||||
|
|
||||||
|
cudaMemcpy(d_a, a, MAXN * sizeof(float), cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(d_b, b, MAXN * sizeof(float), cudaMemcpyHostToDevice);
|
||||||
// ************** START GPU COMPUTATION **************
|
// ************** START GPU COMPUTATION **************
|
||||||
start = std::chrono::high_resolution_clock::now();
|
start = std::chrono::high_resolution_clock::now();
|
||||||
// Implement your code here
|
int threadsPerBlock = 512;
|
||||||
|
int blocksPerGrid = (MAXN + threadsPerBlock - 1) / threadsPerBlock;
|
||||||
|
vectorAddGPU<<<blocksPerGrid, threadsPerBlock>>>(d_a, d_b, d_c, MAXN);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
end = std::chrono::high_resolution_clock::now();
|
end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
float *result = new float[MAXN];
|
float *result = new float[MAXN];
|
||||||
// Copy the result from GPU to CPU
|
// Copy the result from GPU to CPU
|
||||||
|
cudaMemcpy(result, d_c, MAXN * sizeof(float), cudaMemcpyDeviceToHost);
|
||||||
if (compare(c, result, MAXN)) {
|
if (compare(c, result, MAXN)) {
|
||||||
std::chrono::duration<double> new_elapsed = end - start;
|
std::chrono::duration<double> new_elapsed = end - start;
|
||||||
printf("GPU time: %.3fs\n", new_elapsed.count());
|
printf("GPU time: %.3fs\n", new_elapsed.count());
|
||||||
printf("Speedup: %.2fx\n", elapsed.count() / new_elapsed.count());
|
printf("Speedup: %.2fx\n", elapsed.count() / new_elapsed.count());
|
||||||
}
|
}
|
||||||
|
// Free GPU memory
|
||||||
|
cudaFree(d_a);
|
||||||
|
cudaFree(d_b);
|
||||||
|
cudaFree(d_c);
|
||||||
|
|
||||||
|
// Free CPU memory
|
||||||
|
delete[] a;
|
||||||
|
delete[] b;
|
||||||
|
delete[] c;
|
||||||
|
delete[] result;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
Reference in New Issue
Block a user