Opentk/Source/Examples/OpenCL/VectorAdd.cs

116 lines
4.7 KiB
C#

using System;
using System.Collections.Generic;
using System.Text;
using OpenTK.Compute.CL10;
namespace Examples
{
using cl_context = IntPtr;
using cl_device_id = IntPtr;
using cl_command_queue = IntPtr;
using cl_program = IntPtr;
using cl_kernel = IntPtr;
using cl_mem = IntPtr;
[Example("Vector Addition", ExampleCategory.OpenCL, "1.0")]
class FFT
{
public static void Main()
{
const int cnBlockSize = 512;
const int cnBlocks = 3;
IntPtr cnDimension = new IntPtr(cnBlocks * cnBlockSize);
string sProgramSource = @"
__kernel void
vectorAdd(__global const float * a,
__global const float * b,
__global float * c)
{
// Vector element index
int nIndex = get_global_id(0);
c[nIndex] = a[nIndex] + b[nIndex];
}
";
ErrorCode error;
// create OpenCL device & context
cl_context hContext;
unsafe { hContext = CL.CreateContextFromType((ContextProperties*)null, DeviceTypeFlags.DeviceTypeDefault, IntPtr.Zero, IntPtr.Zero, &error); }
// query all devices available to the context
IntPtr nContextDescriptorSize;
CL.GetContextInfo(hContext, ContextInfo.ContextDevices, IntPtr.Zero, IntPtr.Zero, out nContextDescriptorSize);
cl_device_id[] aDevices = new cl_device_id[nContextDescriptorSize.ToInt32()];
unsafe
{
fixed (cl_device_id* ptr = aDevices)
{
IntPtr ret;
CL.GetContextInfo(hContext, ContextInfo.ContextDevices, nContextDescriptorSize, new IntPtr(ptr), out ret);
}
}
// create a command queue for first device the context reported
cl_command_queue hCmdQueue = CL.CreateCommandQueue(hContext, aDevices[0], (CommandQueueFlags)0, out error);
// create & compile program
cl_program hProgram;
unsafe { hProgram = CL.CreateProgramWithSource(hContext, 1, new string[] { sProgramSource }, null, &error); }
CL.BuildProgram(hProgram, 0, (IntPtr[])null, null, IntPtr.Zero, IntPtr.Zero);
// create kernel
cl_kernel hKernel = CL.CreateKernel(hProgram, "vectorAdd", out error);
// allocate host vectors
float[] A = new float[cnDimension.ToInt32()];
float[] B = new float[cnDimension.ToInt32()];
float[] C = new float[cnDimension.ToInt32()];
// initialize host memory
// randomInit(pA, cnDimension);
//randomInit(pB, cnDimension);
// allocate device memory
unsafe
{
fixed (float* pA = A)
fixed (float* pB = B)
fixed (float* pC = C)
{
cl_mem hDeviceMemA, hDeviceMemB, hDeviceMemC;
hDeviceMemA = CL.CreateBuffer(hContext,
MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
new IntPtr(cnDimension.ToInt32() * sizeof(float)),
new IntPtr(pA),
out error);
hDeviceMemB = CL.CreateBuffer(hContext,
MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
new IntPtr(cnDimension.ToInt32() * sizeof(float)),
new IntPtr(pA),
out error);
hDeviceMemC = CL.CreateBuffer(hContext,
MemFlags.MemWriteOnly,
new IntPtr(cnDimension.ToInt32() * sizeof(float)),
IntPtr.Zero,
out error);
// setup parameter values
CL.SetKernelArg(hKernel, 0, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemA));
CL.SetKernelArg(hKernel, 1, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemB));
CL.SetKernelArg(hKernel, 2, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemC));
// execute kernel
CL.EnqueueNDRangeKernel(hCmdQueue, hKernel, 1, null, &cnDimension, null, 0, null, null);
// copy results from device back to host
CL.EnqueueReadBuffer(hContext, hDeviceMemC, true, IntPtr.Zero,
new IntPtr(cnDimension.ToInt32() * sizeof(float)),
new IntPtr(pC), 0, null, (IntPtr[])null);
CL.ReleaseMemObject(hDeviceMemA);
CL.ReleaseMemObject(hDeviceMemB);
CL.ReleaseMemObject(hDeviceMemC);
}
}
}
}
}