Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.geometry_matrix_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=geometry_matrix)
if copy_column_major:
geometry_matric_col_maj = geometry_matrix.flatten(order='F')
self.geometry_matric_col_maj_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=geometry_matric_col_maj)
else:
self.geometry_matric_col_maj_device = None
if laplacian_matrix is not None:
laplacian_matrix = laplacian_matrix.flatten(order='F').astype(np.float32)
self.laplacian_matrix_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=laplacian_matrix)
else:
self.laplacian_matrix_device = None
self.cell_ray_densities_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=cell_ray_densities)
self.ray_lengths_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=ray_lengths)
grad_penalty = np.zeros(self.n_sources, dtype=np.float32)
self.grad_penalty_device = cl.Buffer(self.cl_context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=grad_penalty)
self.solution_device = cl.Buffer(self.cl_context, mf.READ_WRITE, cell_ray_densities.nbytes)
self.detectors_device = cl.Buffer(self.cl_context, mf.READ_ONLY, ray_lengths.nbytes)
self.y_hat_device = cl.Buffer(self.cl_context, mf.READ_WRITE, ray_lengths.nbytes)
# calculating global and local work sizes
nrem = self.n_sources % block_size
gws_sources_x = self.n_sources + bool(nrem) * (block_size - nrem)
mrem = self.m_detectors % block_size
gws_detectors_x = self.m_detectors + bool(mrem) * (block_size - mrem)
mrem_rm = self.m_detectors % block_size_row_maj
gws_detectors_row_maj_x = self.m_detectors + bool(mrem_rm) * (block_size - mrem_rm)
if use_atomic:
gws_sources_row_maj_y = self.n_sources // steps_per_thread_row_maj + bool(self.n_sources % steps_per_thread_row_maj)
gws_sources_y = self.n_sources // steps_per_thread + bool(self.n_sources % steps_per_thread)
gws_detectors_y = self.m_detectors // steps_per_thread + bool(self.m_detectors % steps_per_thread)
else:
gws_sources_row_maj_y = gws_sources_y = gws_detectors_y = 1
if (max_elements % (cta_size * 4)) == 0:
num_blocks = max_elements // (cta_size * 4)
else:
num_blocks = max_elements // (cta_size * 4) + 1
self.d_temp_keys = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * max_elements)
self.d_temp_values = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * max_elements)
self.d_counters = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * self.WARP_SIZE * num_blocks)
self.d_counters_sum = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * self.WARP_SIZE * num_blocks)
self.d_block_offsets = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * self.WARP_SIZE * num_blocks)
numscan = max_elements//2//cta_size*16
if numscan >= self.MIN_LARGE_ARRAY_SIZE:
#MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE 1024
self.scan_buffer = cl.Buffer(self.ctx, mf.READ_WRITE, size = self.dtype_size * numscan // 1024)
self.vel = vel
#Setup vertex buffer objects and share them with OpenCL as GLBuffers
self.pos_vbo.bind()
#For some there is no single buffer but an array of buffers
#https://github.com/enjalot/adventures_in_opencl/commit/61bfd373478767249fe8a3aa77e7e36b22d453c4
try:
self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffer))
self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffer))
except AttributeError:
self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0]))
self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0]))
self.col_vbo.bind()
#pure OpenCL arrays
self.vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel)
self.pos_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.pos)
self.vel_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.vel)
self.queue.finish()
# set up the list of GL objects to share with opencl
self.gl_objects = [self.pos_cl, self.col_cl]
def prepare_training( self, context ):
"""
Create additional buffers to store learning rate for each weight.
@param layer
Input layer.
"""
super( RPROP, self ).prepare_training( context )
self.n_buf = pyopencl.Buffer(
context.opencl.context, pyopencl.mem_flags.READ_WRITE | pyopencl.mem_flags.COPY_HOST_PTR,
hostbuf = numpy.array( [ self.n ] * context._weights_buf_size, numpy.float32 )
)
self.prev_gradient_buf = pyopencl.Buffer(
context.opencl.context, pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.COPY_HOST_PTR,
hostbuf = numpy.zeros( [ context._weights_buf_size ], numpy.float32 )
)
cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
hostbuf=self.data)
self.clbufvit = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
hostbuf=self.datavit)
self.clbufdensity = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE,
size=D*D*4)
self.clbufdensityint = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE,
size=D*D*4)
self.clbufdensityvit = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE,
size=D*D*8)
self.clbufdensityc = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE,
size=D*D*8)
self.clbuffft = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE,
size=D*D*8)
self.clbufifft = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE,
size=D*D*8)
self.clbufpotential = cl.Buffer(self.ctx,
#Setup vertex buffer objects and share them with OpenCL as GLBuffers
self.pos_vbo.bind()
#For some there is no single buffer but an array of buffers
#https://github.com/enjalot/adventures_in_opencl/commit/61bfd373478767249fe8a3aa77e7e36b22d453c4
try:
self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffer))
self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffer))
except AttributeError:
self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0]))
self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0]))
self.col_vbo.bind()
#pure OpenCL arrays
self.vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel)
self.pos_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.pos)
self.vel_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.vel)
self.queue.finish()
# set up the list of GL objects to share with opencl
self.gl_objects = [self.pos_cl, self.col_cl]
local_size = 256
thread_strides = 32
macroblock_count = 33
dtype = numpy.float32
total_size = local_size*thread_strides*macroblock_count
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
a = numpy.random.randn(total_size).astype(dtype)
b = numpy.random.randn(total_size).astype(dtype)
mf = cl.mem_flags
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
from mako.template import Template
tpl = Template("""
__kernel void add(
__global ${ type_name } *tgt,
__global const ${ type_name } *op1,
__global const ${ type_name } *op2)
{
int idx = get_local_id(0)
+ ${ local_size } * ${ thread_strides }
* get_group_id(0);
% for i in range(thread_strides):
<% offset = i*local_size %>
'wf_size':peak_width*nb_channel,'nb_cluster' : nb_cluster}
prg = pyopencl.Program(self.ctx, kernel)
opencl_prg = prg.build(options='-cl-mad-enable')
self.kern_waveform_distance = getattr(opencl_prg, 'waveform_distance')
# create CL buffers
wf_shape = centers.shape[1:]
one_waveform = np.zeros(wf_shape, dtype='float32')
self.one_waveform_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=one_waveform)
self.catalogue_center_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=centers)
self.waveform_distance = np.zeros((nb_cluster), dtype='float32')
self.waveform_distance_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=self.waveform_distance)
self.sparse_mask_level1_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=self.sparse_mask_level1.astype('u1'))
rms_waveform_channel = np.zeros(nb_channel, dtype='float32')
self.rms_waveform_channel_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=rms_waveform_channel)
self.cl_global_size = (centers.shape[0], centers.shape[2])
self.cl_local_size = (centers.shape[0], 1) # faster a GPU because of memory access
# force engine to global
p = dict(self.catalogue['peak_detector_params'])
p.pop('engine')
p.pop('method')
self.peakdetector_method = 'global'
self.peakdetector_engine = 'numpy'
PeakDetector_class = get_peak_detector_class(self.peakdetector_method, self.peakdetector_engine)
def __init__(self, max_elements, cta_size, dtype):
plat = cl.get_platforms()[0]
device = plat.get_devices()[0]
self.ctx = cl.Context(devices=[device])
self.queue = cl.CommandQueue(self.ctx, device)
self.loadProgram()
self.uintsz = dtype.itemsize
self.d_tempKeys = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.uintsz * max_elements)
self.d_tempValues = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.uintsz * max_elements)
arg.bufsize = nonbuf.nbytes
arg.devdata = cl.LocalMemory(arg.bufsize)
elif arg.is_pointer:
# If arg is a pointer to global memory, then we
# allocate host memory and populate with values:
arg.hostdata = nparray(veclength).astype(dtype)
# Determine flags to pass to OpenCL buffer creation:
arg.flags = cl.mem_flags.COPY_HOST_PTR
if arg.is_const:
arg.flags |= cl.mem_flags.READ_ONLY
else:
arg.flags |= cl.mem_flags.READ_WRITE
# Allocate device memory:
arg.devdata = cl.Buffer(
driver.context, arg.flags, hostbuf=arg.hostdata)
# Record transfer overhead. If it's a const buffer,
# we're not reading back to host.
if arg.is_const:
transfer += arg.hostdata.nbytes
else:
transfer += 2 * arg.hostdata.nbytes
else:
# If arg is not a pointer, then it's a scalar value:
arg.devdata = dtype(size)
except Exception as e:
raise E_BAD_ARGS(e)
return KernelPayload(driver.context, args, (size,), transfer)