Working Magnitude


SUBMITTED BY: Guest

DATE: Aug. 3, 2014, 4:44 p.m.

FORMAT: C++

SIZE: 8.8 kB

HITS: 24795

  1. /*
  2. KERNEL KERNEL KERNEL KERNEL KERNEL
  3. */
  4. //#pragma OPENCL EXTENSION cl_khr_fp64 : enable
  5. __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
  6. __kernel void grad(
  7. __read_only image2d_t input_image,
  8. __global double * temp
  9. ) {
  10. const int2 pos = {get_global_id(0), get_global_id(1)};
  11. float4 dx = 0.0f;
  12. __private float4 dy = 0.0f;
  13. float4 tmp = 0.0f;
  14. tmp = read_imagef(input_image, sampler, (int2)((pos.x), pos.y) );
  15. // calculate gradients in dx direction using [1, 0, -1] kernel
  16. for(int a = -1; a < 2; a++) {
  17. dx += (-a)*read_imagef(input_image, sampler, (int2)((pos.x+a), pos.y) );
  18. }
  19. // calculate gradients in dy direction using transposed [1, 0, -1] kernel
  20. for(int b = -1; b < 2; b++) {
  21. dy += (-b)*read_imagef(input_image, sampler, (int2)(pos.x, (pos.y+b)) );
  22. }
  23. /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  24. Dalal and Triggs suggested:
  25. "For colour images, we calculate separate gradients for
  26. each colour channel, and take the one with the largest norm
  27. as the pixel’s gradient vector."
  28. */// !!!!!!!!!!!!!!!!!!!!!!!!!!!!
  29. float R_x=0.0f, G_x=0.0f, B_x=0.0f, x=0;
  30. float R_y=0.0f, G_y=0.0f, B_y=0.0f, y=0;
  31. R_x += dx.x; G_x += dx.y; B_x += dx.z;
  32. R_y += dy.x; G_y += dy.y; B_y += dy.z;
  33. double xx = 0, yy = 0;
  34. x = (R_x>G_x && R_x>B_x) ? dx.x : ((G_x>B_x) ? dx.y : dx.z);
  35. y = (R_y>G_y && R_y>B_y) ? dy.x : ((G_y>B_y) ? dy.y : dy.z);
  36. xx = (double)x*x;
  37. yy = (double)y*y;
  38. temp[pos.x+pos.y*get_global_size(0)] = sqrt(xx+yy);
  39. }
  40. /*
  41. MAIN MAIN MAIN MAIN MAIN
  42. */
  43. //
  44. // main.cpp
  45. // Lena (ImageMagick + OpenCL Utilities)
  46. //
  47. // Created by Mateusz Paluchowski on 13.07.2014.
  48. // Copyright (c) 2014 Mateusz Paluchowski. All rights reserved.
  49. //
  50. #include <iostream>
  51. #include "OpenCLUtilities/openCLUtilities.hpp"
  52. #include "ImageMagick-6/Magick++.h"
  53. using namespace Magick;
  54. using namespace cl;
  55. int main(int argc, char **argv) {
  56. InitializeMagick(*argv);
  57. Magick::Image image1;
  58. image1.read("/Users/Mateusz/Desktop/Lenna.png");
  59. long image1_size = 4 * image1.rows() * image1.columns();
  60. uint8_t *image1_pixels = new uint8_t[image1_size];
  61. image1.write(0, 0, image1.columns(), image1.rows(), "RGBA", CharPixel, image1_pixels);
  62. // TESTE TESTE TESTE TESTE
  63. //
  64. float *angle = new float[image1_size/4];
  65. double *magnitude = new double[image1_size/4];
  66. double *temp = new double[image1_size/4];
  67. Context context = createCLContextFromArguments(argc, argv);
  68. Program program = buildProgramFromSource(context, "/Users/Mateusz/Desktop/grad.cl");
  69. std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
  70. CommandQueue queue = CommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE);
  71. cl::Device dev0 = devices[1];
  72. std::string name;
  73. dev0.getInfo(CL_DEVICE_NAME, &name);
  74. std::cout << "Used device: " << name << std::endl;
  75. Image2D clImage1 = Image2D(context,
  76. CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
  77. ImageFormat(CL_RGBA, CL_UNSIGNED_INT8),
  78. image1.columns(), image1.rows(), 0, image1_pixels);
  79. // Create a buffer for the result
  80. Buffer clResultMagnitude = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
  81. Buffer clResultAngle = Buffer(context, CL_MEM_READ_WRITE, sizeof(float)*image1.rows()*image1.columns());
  82. Buffer clTemp = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
  83. /*
  84. Kernel gradients = Kernel(program, "gradients");
  85. gradients.setArg(0, clImage1);
  86. gradients.setArg(1, clResultMagnitude);
  87. gradients.setArg(2, clResultAngle);
  88. gradients.setArg(3, clTemp);
  89. */
  90. Kernel gradients = Kernel(program, "grad");
  91. gradients.setArg(0, clImage1);
  92. gradients.setArg(1, clTemp);
  93. Event kernel_event, read_event;
  94. queue.enqueueNDRangeKernel(gradients, NullRange,
  95. NDRange(image1.columns(), image1.rows() ),
  96. NullRange, NULL, &kernel_event);
  97. // Program program1 = buildProgramFromSource(context, "/Users/Mateusz/Desktop/histograms.cl");
  98. // Kernel histograms = Kernel(program, "histograms");
  99. // histograms.setArg(0, clResultMagnitude);
  100. // histograms.setArg(1, clResultAngle);
  101. //Synchronize?
  102. kernel_event.wait();
  103. // Transfer image back to host
  104. // queue.enqueueReadBuffer(clResultMagnitude, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), magnitude);
  105. // queue.enqueueReadBuffer(clResultAngle, CL_TRUE, 0, sizeof(float)*image1.columns()*image1.rows(), angle);
  106. queue.enqueueReadBuffer(clTemp, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), temp);
  107. // DEBUGGING OUTPUT
  108. std::cout<<std::endl;
  109. /*
  110. for (int i=0; i<image1_size/4; i++) {
  111. if(temp[i]>0)
  112. std::cout << "TEMP: " << temp[i] << std::endl;
  113. }
  114. */
  115. // std::cout << "Angle: " << angle[150] << std::endl;
  116. std::cout<<std::endl;
  117. /* Profiling */
  118. cl_ulong start = kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
  119. cl_ulong end = kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
  120. double time = 1.e-6 * (end-start);
  121. std::cout << "Time for kernel to execute " << time << " ms" << std::endl;
  122. /* Theoretical and effective memory bandwidth */
  123. //theor = 80.3GB/s
  124. cl_ulong mem_size;
  125. cl_uint bus_size;
  126. cl_uint clock_rate;
  127. cl_uchar memory_bandwidth;
  128. int memory_bus_width = 128;
  129. int effective_memory_clock_rate = 5016;
  130. dev0.getInfo(CL_DEVICE_ADDRESS_BITS, &bus_size);
  131. dev0.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &mem_size);
  132. dev0.getInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY, &clock_rate);
  133. memory_bandwidth = (effective_memory_clock_rate * 10e6 * memory_bus_width*2/8)/(10e9);
  134. std::cout << "Bus size in bits: " << bus_size << std::endl;
  135. std::cout << "Global memory size in bytes: " << mem_size << std::endl;
  136. std::cout << "Max clock rate: " << clock_rate << std::endl;
  137. std::cout << "Theoretical memory bandwidth: " << (float)memory_bandwidth << " GB/s" << std::endl;
  138. long No_of_bits_read = sizeof(uint8_t)*4 * image1.rows() * image1.columns();
  139. long No_of_bits_written = sizeof(float)*image1.rows()*image1.columns() *2;
  140. float effective_bandwidth = ((No_of_bits_read + No_of_bits_written)/10e9)/(time*10e-6);
  141. std::cout << "Effective memory bandwidth: " << effective_bandwidth << " GB/s" << std::endl;
  142. /* Result image saving */
  143. double temp_max=0;
  144. for (int i=0; i<image1_size/4; i++) {
  145. if (temp[i]>temp_max) {
  146. temp_max=temp[i];
  147. }
  148. }
  149. for (int i=0; i<image1_size/4; i++) {
  150. temp [i] = 255* temp[i]/temp_max;
  151. }
  152. uint8_t *image1_test = new uint8_t[image1_size*3/4];
  153. for (int i =0; i<image1_size*3/4; i++) {
  154. image1_test[i]=0;
  155. }
  156. for (int i =0; i<image1_size/4; i++) {
  157. image1_test[i*3]=temp[i];
  158. image1_test[i*3+1]=temp[i];
  159. image1_test[i*3+2]=temp[i];
  160. }
  161. std::cout << angle[0] << " vs. " << temp[0] << " vs. " << image1_pixels[0] << std::endl;
  162. // image1.read(image1.columns(), image1.rows(), "G", FloatPixel, angle);
  163. // image1.type( GrayscaleType );
  164. // image1.write("/Users/Mateusz/Desktop/ang.png");
  165. // image1.read(image1.columns(), image1.rows(), "G", FloatPixel, temp);
  166. // image1.type( GrayscaleType );
  167. // image1.write("/Users/Mateusz/Desktop/mag.png");
  168. image1.read(image1.columns(), image1.rows(), "RGB", CharPixel, image1_test);
  169. image1.type( GrayscaleType );
  170. image1.write("/Users/Mateusz/Desktop/R.png");
  171. }

comments powered by Disqus