/*
KERNEL KERNEL KERNEL KERNEL KERNEL
*/
//#pragma OPENCL EXTENSION cl_khr_fp64 : enable
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void grad(
__read_only image2d_t input_image,
__global double * temp
) {
const int2 pos = {get_global_id(0), get_global_id(1)};
float4 dx = 0.0f;
__private float4 dy = 0.0f;
float4 tmp = 0.0f;
tmp = read_imagef(input_image, sampler, (int2)((pos.x), pos.y) );
// calculate gradients in dx direction using [1, 0, -1] kernel
for(int a = -1; a < 2; a++) {
dx += (-a)*read_imagef(input_image, sampler, (int2)((pos.x+a), pos.y) );
}
// calculate gradients in dy direction using transposed [1, 0, -1] kernel
for(int b = -1; b < 2; b++) {
dy += (-b)*read_imagef(input_image, sampler, (int2)(pos.x, (pos.y+b)) );
}
/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Dalal and Triggs suggested:
"For colour images, we calculate separate gradients for
each colour channel, and take the one with the largest norm
as the pixel’s gradient vector."
*/// !!!!!!!!!!!!!!!!!!!!!!!!!!!!
float R_x=0.0f, G_x=0.0f, B_x=0.0f, x=0;
float R_y=0.0f, G_y=0.0f, B_y=0.0f, y=0;
R_x += dx.x; G_x += dx.y; B_x += dx.z;
R_y += dy.x; G_y += dy.y; B_y += dy.z;
double xx = 0, yy = 0;
x = (R_x>G_x && R_x>B_x) ? dx.x : ((G_x>B_x) ? dx.y : dx.z);
y = (R_y>G_y && R_y>B_y) ? dy.x : ((G_y>B_y) ? dy.y : dy.z);
xx = (double)x*x;
yy = (double)y*y;
temp[pos.x+pos.y*get_global_size(0)] = sqrt(xx+yy);
}
/*
MAIN MAIN MAIN MAIN MAIN
*/
//
// main.cpp
// Lena (ImageMagick + OpenCL Utilities)
//
// Created by Mateusz Paluchowski on 13.07.2014.
// Copyright (c) 2014 Mateusz Paluchowski. All rights reserved.
//
#include <iostream>
#include "OpenCLUtilities/openCLUtilities.hpp"
#include "ImageMagick-6/Magick++.h"
using namespace Magick;
using namespace cl;
int main(int argc, char **argv) {
InitializeMagick(*argv);
Magick::Image image1;
image1.read("/Users/Mateusz/Desktop/Lenna.png");
long image1_size = 4 * image1.rows() * image1.columns();
uint8_t *image1_pixels = new uint8_t[image1_size];
image1.write(0, 0, image1.columns(), image1.rows(), "RGBA", CharPixel, image1_pixels);
// TESTE TESTE TESTE TESTE
//
float *angle = new float[image1_size/4];
double *magnitude = new double[image1_size/4];
double *temp = new double[image1_size/4];
Context context = createCLContextFromArguments(argc, argv);
Program program = buildProgramFromSource(context, "/Users/Mateusz/Desktop/grad.cl");
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
CommandQueue queue = CommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE);
cl::Device dev0 = devices[1];
std::string name;
dev0.getInfo(CL_DEVICE_NAME, &name);
std::cout << "Used device: " << name << std::endl;
Image2D clImage1 = Image2D(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ImageFormat(CL_RGBA, CL_UNSIGNED_INT8),
image1.columns(), image1.rows(), 0, image1_pixels);
// Create a buffer for the result
Buffer clResultMagnitude = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
Buffer clResultAngle = Buffer(context, CL_MEM_READ_WRITE, sizeof(float)*image1.rows()*image1.columns());
Buffer clTemp = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
/*
Kernel gradients = Kernel(program, "gradients");
gradients.setArg(0, clImage1);
gradients.setArg(1, clResultMagnitude);
gradients.setArg(2, clResultAngle);
gradients.setArg(3, clTemp);
*/
Kernel gradients = Kernel(program, "grad");
gradients.setArg(0, clImage1);
gradients.setArg(1, clTemp);
Event kernel_event, read_event;
queue.enqueueNDRangeKernel(gradients, NullRange,
NDRange(image1.columns(), image1.rows() ),
NullRange, NULL, &kernel_event);
// Program program1 = buildProgramFromSource(context, "/Users/Mateusz/Desktop/histograms.cl");
// Kernel histograms = Kernel(program, "histograms");
// histograms.setArg(0, clResultMagnitude);
// histograms.setArg(1, clResultAngle);
//Synchronize?
kernel_event.wait();
// Transfer image back to host
// queue.enqueueReadBuffer(clResultMagnitude, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), magnitude);
// queue.enqueueReadBuffer(clResultAngle, CL_TRUE, 0, sizeof(float)*image1.columns()*image1.rows(), angle);
queue.enqueueReadBuffer(clTemp, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), temp);
// DEBUGGING OUTPUT
std::cout<<std::endl;
/*
for (int i=0; i<image1_size/4; i++) {
if(temp[i]>0)
std::cout << "TEMP: " << temp[i] << std::endl;
}
*/
// std::cout << "Angle: " << angle[150] << std::endl;
std::cout<<std::endl;
/* Profiling */
cl_ulong start = kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
cl_ulong end = kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
double time = 1.e-6 * (end-start);
std::cout << "Time for kernel to execute " << time << " ms" << std::endl;
/* Theoretical and effective memory bandwidth */
//theor = 80.3GB/s
cl_ulong mem_size;
cl_uint bus_size;
cl_uint clock_rate;
cl_uchar memory_bandwidth;
int memory_bus_width = 128;
int effective_memory_clock_rate = 5016;
dev0.getInfo(CL_DEVICE_ADDRESS_BITS, &bus_size);
dev0.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &mem_size);
dev0.getInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY, &clock_rate);
memory_bandwidth = (effective_memory_clock_rate * 10e6 * memory_bus_width*2/8)/(10e9);
std::cout << "Bus size in bits: " << bus_size << std::endl;
std::cout << "Global memory size in bytes: " << mem_size << std::endl;
std::cout << "Max clock rate: " << clock_rate << std::endl;
std::cout << "Theoretical memory bandwidth: " << (float)memory_bandwidth << " GB/s" << std::endl;
long No_of_bits_read = sizeof(uint8_t)*4 * image1.rows() * image1.columns();
long No_of_bits_written = sizeof(float)*image1.rows()*image1.columns() *2;
float effective_bandwidth = ((No_of_bits_read + No_of_bits_written)/10e9)/(time*10e-6);
std::cout << "Effective memory bandwidth: " << effective_bandwidth << " GB/s" << std::endl;
/* Result image saving */
double temp_max=0;
for (int i=0; i<image1_size/4; i++) {
if (temp[i]>temp_max) {
temp_max=temp[i];
}
}
for (int i=0; i<image1_size/4; i++) {
temp [i] = 255* temp[i]/temp_max;
}
uint8_t *image1_test = new uint8_t[image1_size*3/4];
for (int i =0; i<image1_size*3/4; i++) {
image1_test[i]=0;
}
for (int i =0; i<image1_size/4; i++) {
image1_test[i*3]=temp[i];
image1_test[i*3+1]=temp[i];
image1_test[i*3+2]=temp[i];
}
std::cout << angle[0] << " vs. " << temp[0] << " vs. " << image1_pixels[0] << std::endl;
// image1.read(image1.columns(), image1.rows(), "G", FloatPixel, angle);
// image1.type( GrayscaleType );
// image1.write("/Users/Mateusz/Desktop/ang.png");
// image1.read(image1.columns(), image1.rows(), "G", FloatPixel, temp);
// image1.type( GrayscaleType );
// image1.write("/Users/Mateusz/Desktop/mag.png");
image1.read(image1.columns(), image1.rows(), "RGB", CharPixel, image1_test);
image1.type( GrayscaleType );
image1.write("/Users/Mateusz/Desktop/R.png");
}