Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
context = ctx_factory()
queue = cl.CommandQueue(context)
from pyopencl.algorithm import ListOfListsBuilder
from pyopencl.tools import VectorArg
builder = ListOfListsBuilder(context, [("mylist", np.int32)], """//CL//
void generate(LIST_ARG_DECL USER_ARG_DECL index_type i)
{
APPEND_mylist(input_list[i]);
}
""", arg_decls=[
VectorArg(float, "input_list", with_offset=True)])
n = 10000
input_list = cl.array.zeros(queue, (n + 10,), float)
input_list[10:] = 1
result, evt = builder(queue, n, input_list[10:])
inf = result["mylist"]
assert inf.count == n
assert (inf.lists.get() == 1).all()
def to_device( self, cpuarray ):
arr = cl_array.to_device(self.queue, cpuarray)
self._cl_arrays.append(arr)
return arr
def remove_nans(self, nan_value_4102_ext, images_mem_4131_ext):
try:
nan_value_4102 = np.int16(ct.c_int16(nan_value_4102_ext))
except (TypeError, AssertionError) as e:
raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i16",
type(nan_value_4102_ext),
nan_value_4102_ext))
try:
assert ((type(images_mem_4131_ext) in [np.ndarray,
cl.array.Array]) and (images_mem_4131_ext.dtype == np.int16)), "Parameter has unexpected type"
sizze_4099 = np.int32(images_mem_4131_ext.shape[0])
sizze_4100 = np.int32(images_mem_4131_ext.shape[1])
sizze_4101 = np.int32(images_mem_4131_ext.shape[2])
if (type(images_mem_4131_ext) == cl.array.Array):
images_mem_4131 = images_mem_4131_ext.data
else:
images_mem_4131 = opencl_alloc(self,
np.int64(images_mem_4131_ext.nbytes),
"images_mem_4131")
if (np.int64(images_mem_4131_ext.nbytes) != 0):
cl.enqueue_copy(self.queue, images_mem_4131,
normaliseArray(images_mem_4131_ext),
is_blocking=synchronous)
except (TypeError, AssertionError) as e:
raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][][]i16",
type(images_mem_4131_ext),
def _allocate_memory(self):
self.d_filter_f = parray.zeros(self.queue, (self.sino_f_shape[-1],), np.complex64)
self.is_cpu = (self.device.type == "CPU")
# These are already allocated by FFT() if using the opencl backend
if self.fft_backend == "opencl":
self.d_sino_padded = self.fft.data_in
self.d_sino_f = self.fft.data_out
else:
# When using the numpy backend, arrays are not pre-allocated
self.d_sino_padded = np.zeros(self.sino_padded_shape, "f")
self.d_sino_f = np.zeros(self.sino_f_shape, np.complex64)
# These are needed for rectangular memcpy in certain cases (see below).
self.tmp_sino_device = parray.zeros(self.queue, self.sino_shape, "f")
self.tmp_sino_host = np.zeros(self.sino_shape, "f")
def _sample_opencl_array(kern, image, xs, ys, output, queue=None):
assert isinstance(image, cl.array.Array)
assert isinstance(xs, cl.array.Array)
assert isinstance(ys, cl.array.Array)
assert isinstance(output, cl.array.Array)
assert image.dtype == np.float32
assert xs.shape == ys.shape
assert output.shape == output.shape
assert output.dtype == np.float32
image_strides = struct.pack('iiii',
*(tuple(s//image.dtype.itemsize for s in image.strides) + (0,0,0,0))[:4])
image_offset = np.int32(image.offset)
image_shape = struct.pack('iiii', *(tuple(image.shape) + (1,1,1,1))[:4])
xs_strides = struct.pack('iiii',
*(tuple(s//xs.dtype.itemsize for s in xs.strides) + (0,0,0,0))[:4])
def cl_test_sobel(im):
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
sobel = Sobel(ctx, queue)
im_buf = cl_array.to_device(queue, im)
mag_buf = cl_array.empty_like(im_buf)
imgx_buf = cl_array.empty_like(im_buf)
imgy_buf = cl_array.empty_like(im_buf)
sobel(im_buf, imgx_buf, imgy_buf, mag_buf)
return (mag_buf.get(), imgx_buf.get(), imgy_buf.get())
# writes: force_split_box, have_upper_level_split_box
evt = level_restrict_kernel(
upper_level,
root_extent,
box_has_children,
force_split_box,
have_upper_level_split_box,
*(box_child_ids + box_centers),
slice=upper_level_slice,
wait_for=wait_for)
wait_for = [evt]
if debug:
force_split_box.finish()
boxes_split.append(int(cl.array.sum(
force_split_box[upper_level_slice]).get()))
if int(have_upper_level_split_box.get()) == 0:
break
did_upper_level_split = True
if debug:
total_boxes_split = sum(boxes_split)
logger.debug("level restriction: {total_boxes_split} boxes split"
.format(total_boxes_split=total_boxes_split))
from itertools import count
for level_, nboxes_split in zip(
count(level - 2, step=-1), boxes_split[:-1]):
logger.debug("level {level}: {nboxes_split} boxes split"
.format(level=level_, nboxes_split=nboxes_split))
"""
Allocates required temporary arrays matching those passed via keyword.
:returns: A :class:`dict` of named arrays, suitable for passing via
dictionary expansion.
.. versionadded:: 2020.2
"""
tmp_arrays = {}
for name in self.dof_names:
f = kwargs[name]
tmp_name = gen_tmp_name(name)
import pyopencl.array as cla
if isinstance(f, cla.Array):
tmp_arrays[tmp_name] = cla.zeros_like(f)
elif isinstance(f, np.ndarray):
tmp_arrays[tmp_name] = np.zeros_like(f)
else:
raise ValueError("Could not generate tmp array for %s of type %s"
% (f, type(f)))
return tmp_arrays
def __call__(self, input_ary, output_ary=None, allocator=None, queue=None):
allocator = allocator or input_ary.allocator
queue = queue or input_ary.queue or output_ary.queue
if output_ary is None:
output_ary = input_ary
if isinstance(output_ary, (str, six.text_type)) and output_ary == "new":
output_ary = cl.array.empty_like(input_ary, allocator=allocator)
if input_ary.shape != output_ary.shape:
raise ValueError("input and output must have the same shape")
if not input_ary.flags.forc:
raise RuntimeError("ScanKernel cannot "
"deal with non-contiguous arrays")
n, = input_ary.shape
if not n:
return output_ary
GenericScanKernel.__call__(self,
input_ary, output_ary, allocator=allocator, queue=queue)
def _fit_single_preprocess(self, data, dates, nan_value):
start = time.time()
mapped_indices = self._map_indices(dates).astype(numpy.int32)
N = data.shape[0]
self.n = self._compute_end_history_index(dates)
self.lam = self._compute_lam(N, self.n)
end = time.time()
if self.verbose > 0:
self._timers['initialization'] += end - start
print("--- runtime for data initialization:\t\t{}".format(end - start))
# (1) copy data from host to device
start = time.time()
data_cl = pycl_array.to_device(self.queue, data)
mapped_indices_cl = pycl_array.to_device(self.queue, mapped_indices)
end = time.time()
if self.verbose > 0:
self._timers['transfer_host_gpu'] += end - start
print("--- runtime for data transfer (host->device):\t{}".format(end - start))
start = time.time()
data_cl = self.futobj.remove_nans(nan_value, data_cl)
y_cl = self.futobj.reshapeTransp(data_cl)
end = time.time()
if self.verbose > 0:
self._timers['preprocessing'] += end - start
print("--- runtime for data preprocessing:\t\t{}".format(end - start))
return y_cl, mapped_indices_cl