-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhumanlogic.cuh
112 lines (71 loc) · 2.82 KB
/
humanlogic.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#ifndef ANG_H
#define ANG_H
// this file will call populate.cu and human.cu
#pragma once
#include <math.h>
#include "util/error_utils.cuh"
#include "data.cuh"
#include "populate.cu"
#include "human.cu"
using namespace std;
void HumanLogic (Square * h_unsolved, Square * d_unsolved,
Square * d_solved, int n) {
// TODO: memcpy
// TODO: set grid/TPB
// TODO: call populate kernel (located in populate.cu)
// TODO: after populate works, test human.cu
// TODO: after every human call, need to populate again
int memsize = sizeof(Square) * n * n;
ERROR_CHECK( cudaMemcpy(d_unsolved, h_unsolved, memsize,
cudaMemcpyHostToDevice) );
int threadsPerBlock = n*n;
int blocksPerGrid = (n + threadsPerBlock -1) / threadsPerBlock;
// int blocksPerGrid = 1;
int* d_points;
ERROR_CHECK( cudaMalloc((void**) &d_points, sizeof(int)) );
int* h_points = (int*) malloc(sizeof(int));
// ERROR_CHECK( cudaMemcpy(h_points, d_points, sizeof(int),
// cudaMemcpyDeviceToHost));
//for(int jj=0; jj<1; jj++) {
populate<<<blocksPerGrid, threadsPerBlock>>>(d_unsolved);
ERROR_CHECK( cudaPeekAtLastError() );
ERROR_CHECK( cudaDeviceSynchronize() );
ERROR_CHECK( cudaMemcpy(h_unsolved, d_unsolved, memsize,
cudaMemcpyDeviceToHost) );
debug_values(h_unsolved);
// int* d_points;
// ERROR_CHECK( cudaMalloc((void**) &d_points, sizeof(int)) );
human<<<blocksPerGrid, threadsPerBlock>>>(d_unsolved, n, d_points);
ERROR_CHECK( cudaPeekAtLastError() );
ERROR_CHECK( cudaDeviceSynchronize() );
// int* h_points = (int*) malloc(sizeof(int));
ERROR_CHECK( cudaMemcpy(h_points, d_points, sizeof(int),
cudaMemcpyDeviceToHost));
// printf("Amount of work done this round is %d.\n", *h_points);
//}
ERROR_CHECK( cudaMemcpy(h_unsolved, d_unsolved, memsize,
cudaMemcpyDeviceToHost) );
// const char * finished = "/********** Angela's (C) **********/";
//output(finished, "-alg", n, false, h_unsolved);
//round 2
populate<<<blocksPerGrid, threadsPerBlock>>>(d_unsolved);
ERROR_CHECK( cudaPeekAtLastError() );
ERROR_CHECK( cudaDeviceSynchronize() );
ERROR_CHECK( cudaMemcpy(h_unsolved, d_unsolved, memsize,
cudaMemcpyDeviceToHost) );
debug_values(h_unsolved);
// int* d_points;
// ERROR_CHECK( cudaMalloc((void**) &d_points, sizeof(int)) );
human<<<blocksPerGrid, threadsPerBlock>>>(d_unsolved, n, d_points);
ERROR_CHECK( cudaPeekAtLastError() );
ERROR_CHECK( cudaDeviceSynchronize() );
// int* h_points = (int*) malloc(sizeof(int));
ERROR_CHECK( cudaMemcpy(h_points, d_points, sizeof(int),
cudaMemcpyDeviceToHost));
//printf("Amount of work done this round is %d.\n", *h_points);
ERROR_CHECK( cudaMemcpy(h_unsolved, d_unsolved, memsize,
cudaMemcpyDeviceToHost) );
// const char * finished = "/********** Angela's (C) **********/";
//output(finished, "-bee", n, false, h_unsolved);
}
#endif