Working Magnitude

SUBMITTED BY: Guest
DATE: Aug. 3, 2014, 4:44 p.m.
FORMAT: C++
SIZE: 8.8 kB
Raw Download
HITS: 24795
Report
/*
                KERNEL KERNEL KERNEL KERNEL KERNEL
*/
 
 
//#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
 
__kernel void grad(
                   __read_only image2d_t input_image,
                   __global double * temp
                    ) {
   
    const int2 pos = {get_global_id(0), get_global_id(1)};
   
     float4 dx = 0.0f;
    __private float4 dy = 0.0f;
    float4 tmp = 0.0f;
 
    tmp = read_imagef(input_image, sampler, (int2)((pos.x), pos.y) );
   
   
// calculate gradients in dx direction using [1, 0, -1] kernel
    for(int a = -1; a < 2; a++) {
            dx += (-a)*read_imagef(input_image, sampler, (int2)((pos.x+a), pos.y) );
    }
   
// calculate gradients in dy direction using transposed [1, 0, -1] kernel
    for(int b = -1; b < 2; b++) {
        dy += (-b)*read_imagef(input_image, sampler, (int2)(pos.x, (pos.y+b)) );
    }
   
/*  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
    Dalal and Triggs suggested:
 "For colour images, we calculate separate gradients for
 each colour channel, and take the one with the largest norm
 as the pixel’s gradient vector."
 
     
*///  !!!!!!!!!!!!!!!!!!!!!!!!!!!!
   
    float R_x=0.0f, G_x=0.0f, B_x=0.0f, x=0;
    float R_y=0.0f, G_y=0.0f, B_y=0.0f, y=0;
    R_x += dx.x; G_x += dx.y; B_x += dx.z;
    R_y += dy.x; G_y += dy.y; B_y += dy.z;
    double xx = 0, yy = 0;
   
     x = (R_x>G_x && R_x>B_x) ? dx.x : ((G_x>B_x) ? dx.y : dx.z);
     y = (R_y>G_y && R_y>B_y) ? dy.x : ((G_y>B_y) ? dy.y : dy.z);
 
    xx = (double)x*x;
    yy = (double)y*y;
 
    temp[pos.x+pos.y*get_global_size(0)] = sqrt(xx+yy);
 
}
 
 
/*
                MAIN MAIN MAIN MAIN MAIN
*/
 
//
//  main.cpp
//  Lena (ImageMagick + OpenCL Utilities)
//
//  Created by Mateusz Paluchowski on 13.07.2014.
//  Copyright (c) 2014 Mateusz Paluchowski. All rights reserved.
//
 
#include <iostream>
 
#include "OpenCLUtilities/openCLUtilities.hpp"
#include "ImageMagick-6/Magick++.h"
 
using namespace Magick;
using namespace cl;
 
int main(int argc, char **argv) {
    InitializeMagick(*argv);
   
    Magick::Image image1;
    image1.read("/Users/Mateusz/Desktop/Lenna.png");
 
    long image1_size = 4 * image1.rows() * image1.columns();
    uint8_t *image1_pixels = new uint8_t[image1_size];
    image1.write(0, 0, image1.columns(), image1.rows(), "RGBA", CharPixel, image1_pixels);
   
   
   
    // TESTE TESTE TESTE TESTE
   
   
   
    //
    float *angle = new float[image1_size/4];
    double *magnitude = new double[image1_size/4];
    double *temp = new double[image1_size/4];
   
   
 
    Context context = createCLContextFromArguments(argc, argv);
    Program program = buildProgramFromSource(context, "/Users/Mateusz/Desktop/grad.cl");
    std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
    CommandQueue queue = CommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE);
   
    cl::Device dev0 = devices[1];
    std::string name;
    dev0.getInfo(CL_DEVICE_NAME, &name);
    std::cout << "Used device: " << name << std::endl;
   
    Image2D clImage1 = Image2D(context,
                               CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                               ImageFormat(CL_RGBA, CL_UNSIGNED_INT8),
                               image1.columns(), image1.rows(), 0, image1_pixels);
 
   
    // Create a buffer for the result
    Buffer clResultMagnitude = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
    Buffer clResultAngle = Buffer(context, CL_MEM_READ_WRITE, sizeof(float)*image1.rows()*image1.columns());
    Buffer clTemp = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
 
 
    /*
    Kernel gradients = Kernel(program, "gradients");
        gradients.setArg(0, clImage1);
        gradients.setArg(1, clResultMagnitude);
        gradients.setArg(2, clResultAngle);
        gradients.setArg(3, clTemp);
    */
    Kernel gradients = Kernel(program, "grad");
    gradients.setArg(0, clImage1);
    gradients.setArg(1, clTemp);
 
 
   
    Event kernel_event, read_event;
    queue.enqueueNDRangeKernel(gradients, NullRange,
                               NDRange(image1.columns(), image1.rows() ),
                               NullRange, NULL, &kernel_event);
   
//    Program program1 = buildProgramFromSource(context, "/Users/Mateusz/Desktop/histograms.cl");
//    Kernel histograms = Kernel(program, "histograms");
//    histograms.setArg(0, clResultMagnitude);
//    histograms.setArg(1, clResultAngle);
   
   
    //Synchronize?
    kernel_event.wait();
   
    // Transfer image back to host
//    queue.enqueueReadBuffer(clResultMagnitude, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), magnitude);
//    queue.enqueueReadBuffer(clResultAngle, CL_TRUE, 0, sizeof(float)*image1.columns()*image1.rows(), angle);
    queue.enqueueReadBuffer(clTemp, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), temp);
 
   
    //          DEBUGGING OUTPUT
    std::cout<<std::endl;
   /*
    for (int i=0; i<image1_size/4; i++) {
        if(temp[i]>0)
                std::cout << "TEMP: " << temp[i] << std::endl;
    }
  */
//    std::cout << "Angle: " << angle[150] << std::endl;
    std::cout<<std::endl;
 
   
    /*      Profiling       */
    cl_ulong start = kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
    cl_ulong end = kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
    double time = 1.e-6 * (end-start);
    std::cout << "Time for kernel to execute " << time << " ms" << std::endl;
   
    /*  Theoretical and effective memory bandwidth    */
    //theor = 80.3GB/s
    cl_ulong mem_size;
    cl_uint bus_size;
    cl_uint clock_rate;
    cl_uchar memory_bandwidth;
    int memory_bus_width = 128;
    int effective_memory_clock_rate = 5016;
    dev0.getInfo(CL_DEVICE_ADDRESS_BITS, &bus_size);
    dev0.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &mem_size);
    dev0.getInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY, &clock_rate);
    memory_bandwidth = (effective_memory_clock_rate * 10e6 * memory_bus_width*2/8)/(10e9);
    std::cout << "Bus size in bits: " << bus_size  << std::endl;
    std::cout << "Global memory size in bytes: " <<  mem_size << std::endl;
    std::cout << "Max clock rate: " <<  clock_rate << std::endl;
    std::cout << "Theoretical memory bandwidth: " <<  (float)memory_bandwidth << " GB/s" << std::endl;
   
    long No_of_bits_read = sizeof(uint8_t)*4 * image1.rows() * image1.columns();
    long No_of_bits_written = sizeof(float)*image1.rows()*image1.columns() *2;
    float effective_bandwidth = ((No_of_bits_read + No_of_bits_written)/10e9)/(time*10e-6);
    std::cout << "Effective memory bandwidth: " <<  effective_bandwidth << " GB/s" << std::endl;
 
   
    /*      Result image saving       */
   
    double temp_max=0;
    for (int i=0; i<image1_size/4; i++) {
        if (temp[i]>temp_max) {
            temp_max=temp[i];
        }
    }
   
    for (int i=0; i<image1_size/4; i++) {
    temp [i] = 255* temp[i]/temp_max;
    }
   
    uint8_t *image1_test = new uint8_t[image1_size*3/4];
    for (int i =0; i<image1_size*3/4; i++) {
        image1_test[i]=0;
    }
    for (int i =0; i<image1_size/4; i++) {
        image1_test[i*3]=temp[i];
        image1_test[i*3+1]=temp[i];
        image1_test[i*3+2]=temp[i];
       
    }
   
    std::cout << angle[0] << " vs. " << temp[0] << " vs. " << image1_pixels[0] << std::endl;
   
//    image1.read(image1.columns(), image1.rows(), "G", FloatPixel, angle);
//    image1.type( GrayscaleType );
//    image1.write("/Users/Mateusz/Desktop/ang.png");
   
 
//    image1.read(image1.columns(), image1.rows(), "G", FloatPixel, temp);
//    image1.type( GrayscaleType );
//    image1.write("/Users/Mateusz/Desktop/mag.png");
   
   
    image1.read(image1.columns(), image1.rows(), "RGB", CharPixel, image1_test);
    image1.type( GrayscaleType );
    image1.write("/Users/Mateusz/Desktop/R.png");
}