#include #include #include #include #include #include #include #include #include #include #include std::random_device rd; std::mt19937 rng(rd()); class KMeans { private: std::vector< std::pair > points; std::vector< std::pair > centroids; std::vector clusters; int cluster_tot; /* Total number of clusters */ int points_tot; const int max_iter = 10 * 1000; /* Max number of iteration */ const double threshold = 1e-3; /* Stopping criterion */ cl_device_id device_id; cl_context context; cl_command_queue commands; cl_program program_reassign, program_recompute; cl_kernel kernel_reassign, kernel_recompute; double compute_score(); void reassign(); void reassign_GPU(); void recompute(); void recompute_GPU(); public: KMeans(int bound, int cluster_tot, int points_tot); int numPoints() { return points_tot; } int getCluster(int i) { return clusters[i]; } std::vector getClusters() { return clusters; } std::pair getPoint(int i) { return points[i]; } void run(); void setup_cl(); }; KMeans::KMeans(int bound, int cluster_tot_, int points_tot_) { std::uniform_int_distribution uni(0, bound); cluster_tot = cluster_tot_; points_tot = points_tot_; points = std::vector< std::pair >(points_tot); clusters = std::vector(points_tot); centroids = std::vector< std::pair >(cluster_tot); for (int i = 0; i < points_tot; i++) { points[i] = std::make_pair(uni(rng), uni(rng)); clusters[i] = i % cluster_tot; } for (int k = 0; k < cluster_tot; k++) { centroids[k] = std::make_pair(uni(rng), uni(rng)); } } std::string read_kernel(std::string path) { std::ifstream s(path); if (s.is_open()) { return std::string((std::istreambuf_iterator(s)), std::istreambuf_iterator()); } throw std::invalid_argument("Failed to open file " + path); } void KMeans::setup_cl() { std::string reassign_kernel_code = read_kernel("reassign.cl"); std::string recompute_kernel_code = read_kernel("recompute.cl"); const char* reassign_k = reassign_kernel_code.c_str(); const char* recompute_k = recompute_kernel_code.c_str(); int err; int gpu = 1; err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); if (err != CL_SUCCESS) throw std::invalid_argument("Error: Failed to create a device group!"); context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context) throw std::invalid_argument("Error: Failed to create a compute context!"); commands = clCreateCommandQueue(context, device_id, 0, &err); if (!commands) throw std::invalid_argument("Error: Failed to create a command commands!"); program_reassign = clCreateProgramWithSource(context, 1, (const char **) & reassign_k, NULL, &err); if (!program_reassign) throw std::invalid_argument("Error: Failed to create program Reassign!"); program_recompute = clCreateProgramWithSource(context, 1, (const char **) & recompute_k, NULL, &err); if (!program_recompute) throw std::invalid_argument("Error: Failed to create program Recompute!"); err = clBuildProgram(program_reassign, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; clGetProgramBuildInfo(program_reassign, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); throw std::invalid_argument("Failed to build kernel reassign:\n" + std::string(buffer)); } err = clBuildProgram(program_recompute, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; clGetProgramBuildInfo(program_recompute, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); throw std::invalid_argument("Failed to build kernel recompute:\n" + std::string(buffer)); } kernel_reassign = clCreateKernel(program_reassign, "reassign", &err); if (!kernel_reassign || err != CL_SUCCESS) throw std::invalid_argument("Error: Failed to create compute kernel reassign!"); kernel_recompute = clCreateKernel(program_recompute, "recompute", &err); if (!kernel_recompute || err != CL_SUCCESS) throw std::invalid_argument("Error: Failed to create compute kernel!"); } double euclid(std::pair p1, std::pair p2) { return sqrt(pow(p1.first - p2.first, 2.0) + pow(p1.second - p2.second, 2.0)); } double KMeans::compute_score() { double score = 0.0; for (int i = 0; i < points_tot; i++) { //std::cout << "LOL" << std::endl; std::pair coord = points[i]; //std::cout << "LOL2 " << clusters[i] << std::endl; std::pair cent = centroids[clusters[i]]; double d = euclid(coord, cent); score += d; } return score; } void KMeans::run() { double current_score = 0.0; int iter = 0; while (true) { //std::cout << "ITER " << iter << std::endl; double sc = compute_score(); std::cout << "New score " << sc << std::endl; if (std::abs(sc - current_score) < threshold || iter >= max_iter) { std::cout << iter << " iterations" << std::endl; break; } current_score = sc; reassign(); //reassign_GPU(); //recompute(); recompute_GPU(); iter++; } } /* Reassign each point to its nearest centroid */ void KMeans::reassign() { for (int i = 0; i < points_tot; i++) { double dmin = 1e25; int kmin = 0; //std::cout << "LAULE " << i << " / " << points_tot << std::endl; for (int k = 0; k < cluster_tot; k++) { double d = euclid(points[i], centroids[k]); if (d < dmin) { kmin = k; dmin = d; } } clusters[i] = kmin; } } void KMeans::reassign_GPU() { int err; /* Define buffers */ cl_mem out_clusters = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * points_tot, NULL, NULL); cl_mem in_points = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * points_tot * 2, NULL, NULL); cl_mem in_centroids = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * cluster_tot * 2, NULL, NULL); /* Fill buffers */ int* c_points = (int*) malloc(sizeof(int) * points_tot * 2); for (int i = 0; i < points_tot; i++) { c_points[2 * i] = points[i].first; c_points[2 * i + 1] = points[i].second; } if (clEnqueueWriteBuffer(commands, in_points, CL_TRUE, 0, sizeof(int) * points_tot * 2, c_points, 0, NULL, NULL) != CL_SUCCESS) { throw std::invalid_argument("Failed to write to c_points array"); }; int* c_centroids = (int*) malloc(sizeof(int) * cluster_tot * 2); for (int i = 0; i < cluster_tot; i++) { c_centroids[2 * i] = centroids[i].first; c_centroids[2 * i + 1] = centroids[i].second; } if (clEnqueueWriteBuffer(commands, in_centroids, CL_TRUE, 0, sizeof(int) * cluster_tot * 2, c_centroids, 0, NULL, NULL) != CL_SUCCESS) { throw std::invalid_argument("Failed to write to c_centroids array"); }; err = 0; err = clSetKernelArg(kernel_reassign, 0, sizeof(cl_mem), &in_points); err |= clSetKernelArg(kernel_reassign, 1, sizeof(cl_mem), &in_centroids); err |= clSetKernelArg(kernel_reassign, 2, sizeof(cl_mem), &out_clusters); err |= clSetKernelArg(kernel_reassign, 3, sizeof(int), &points_tot); err |= clSetKernelArg(kernel_reassign, 4, sizeof(int), &cluster_tot); if (err != CL_SUCCESS) { throw std::invalid_argument("Failed to set arguments"); } size_t local, global; err = clGetKernelWorkGroupInfo(kernel_reassign, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); if (err != CL_SUCCESS) { throw std::invalid_argument("Failed to set arguments 1"); } global = local; while (points_tot < global) { global *= 2; } err += clEnqueueNDRangeKernel(commands, kernel_reassign, 1, NULL, &global, &local, 0, NULL, NULL); clFinish(commands); if (err != CL_SUCCESS) std::cout << err << std::endl; if (err != CL_SUCCESS) { throw std::invalid_argument("Failed to set arguments 2"); } err += clEnqueueReadBuffer( commands, out_clusters, CL_TRUE, 0, sizeof(int) * points_tot, clusters.data(), 0, NULL, NULL ); if (err != CL_SUCCESS) { throw std::invalid_argument("Failed to set arguments 3"); } } void KMeans::recompute() { std::vector clusters_size = std::vector(cluster_tot, 0); std::vector clusters_x = std::vector(cluster_tot, 0); std::vector clusters_y = std::vector(cluster_tot, 0); for (int i = 0; i < points_tot; i++) { clusters_size[clusters[i]]++; clusters_x[clusters[i]] += points[i].first; clusters_y[clusters[i]] += points[i].second; } for (int k = 0; k < cluster_tot; k++) { if (!clusters_size[k]) { continue; } centroids[k] = std::make_pair( (int) (clusters_x[k] / clusters_size[k]), (int) (clusters_y[k] / clusters_size[k]) ); } } void KMeans::recompute_GPU() { int err; /* Define buffers */ cl_mem in_clusters = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * points_tot, NULL, NULL); cl_mem in_points = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * points_tot * 2, NULL, NULL); cl_mem out_clusters_size = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * points_tot, NULL, NULL); cl_mem out_clusters_x = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * points_tot, NULL, NULL); cl_mem out_clusters_y = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * points_tot * 2, NULL, NULL); /* Fill buffers */ int* c_points = (int*) malloc(sizeof(int) * points_tot * 2); for (int i = 0; i < points_tot; i++) { c_points[2 * i] = points[i].first; c_points[2 * i + 1] = points[i].second; } if (clEnqueueWriteBuffer(commands, in_points, CL_TRUE, 0, sizeof(int) * points_tot * 2, c_points, 0, NULL, NULL) != CL_SUCCESS) { throw std::invalid_argument("Failed to write to c_points array"); }; if (clEnqueueWriteBuffer(commands, in_points, CL_TRUE, 0, sizeof(int) * points_tot, clusters.data(), 0, NULL, NULL) != CL_SUCCESS) { throw std::invalid_argument("Failed to write to c_clusters array"); }; int* c_clusters_size = (int*) malloc(sizeof(int) * points_tot); int* c_clusters_x = (int*) malloc(sizeof(int) * points_tot); int* c_clusters_y = (int*) malloc(sizeof(int) * points_tot); err = 0; err = clSetKernelArg(kernel_recompute, 0, sizeof(cl_mem), &in_points); err |= clSetKernelArg(kernel_recompute, 1, sizeof(cl_mem), &in_clusters); err |= clSetKernelArg(kernel_recompute, 2, sizeof(cl_mem), &out_clusters_size); err |= clSetKernelArg(kernel_recompute, 3, sizeof(cl_mem), &out_clusters_x); err |= clSetKernelArg(kernel_recompute, 4, sizeof(cl_mem), &out_clusters_y); err |= clSetKernelArg(kernel_recompute, 5, sizeof(int), &points_tot); if (err != CL_SUCCESS) { std::cout << err << std::endl; throw std::invalid_argument("Failed to set arguments"); } size_t local, global; err = clGetKernelWorkGroupInfo(kernel_recompute, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); if (err != CL_SUCCESS) { throw std::invalid_argument("Failed to set arguments 1"); } global = local; while (points_tot < global) { global *= 2; } err += clEnqueueNDRangeKernel(commands, kernel_recompute, 1, NULL, &global, &local, 0, NULL, NULL); clFinish(commands); if (err != CL_SUCCESS) std::cout << err << std::endl; if (err != CL_SUCCESS) { throw std::invalid_argument("Failed to set arguments 2"); } err += clEnqueueReadBuffer( commands, out_clusters_size, CL_TRUE, 0, sizeof(int) * points_tot, c_clusters_size, 0, NULL, NULL ); err += clEnqueueReadBuffer( commands, out_clusters_x, CL_TRUE, 0, sizeof(int) * points_tot, c_clusters_x, 0, NULL, NULL ); err += clEnqueueReadBuffer( commands, out_clusters_y, CL_TRUE, 0, sizeof(int) * points_tot, c_clusters_y, 0, NULL, NULL ); if (err != CL_SUCCESS) { throw std::invalid_argument("Failed to set arguments 3"); } for (int k = 0; k < cluster_tot; k++) { if (!c_clusters_size[k]) { continue; } centroids[k] = std::make_pair( (int) (c_clusters_x[k] / c_clusters_size[k]), (int) (c_clusters_y[k] / c_clusters_size[k]) ); } } int main(int argc, char **argv) { const int WINDOW_SIZE = 1500; /*if (SDL_Init(SDL_INIT_EVERYTHING) != 0) { std::cerr << "SDL_Init error: " << SDL_GetError() << std::endl; return 1; } SDL_Window *window; SDL_Renderer *renderer; SDL_Event event; SDL_CreateWindowAndRenderer(WINDOW_SIZE, WINDOW_SIZE, 0, &window, &renderer); while (false) { SDL_SetRenderDrawColor(renderer, 10, 10, 10, 0); SDL_RenderClear(renderer); for (int i = 0; i < kmeans.numPoints(); i++) { int c = kmeans.getCluster(i); std::pair coord = kmeans.getPoint(i); SDL_SetRenderDrawColor(renderer, c, 255-c, c, 255); SDL_RenderDrawPoint(renderer, coord.first, coord.second); } SDL_RenderPresent(renderer); if (SDL_PollEvent(&event) && event.type == SDL_MOUSEBUTTONDOWN) break; }*/ KMeans kmeans = KMeans(WINDOW_SIZE, 3, 1000); kmeans.setup_cl(); std::cout << "BEGIN" << std::endl; std::clock_t start = std::clock(); double duration; kmeans.run(); duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; std::cout << " # DURATION: " << duration << "s" << std::endl; std::cout << "END" << std::endl; //SDL_DestroyRenderer(renderer); //SDL_DestroyWindow(window); // SDL_Quit(); return 0; }