Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@cuda.jit
def gaussker_2d1_cuda(x, y, c, hx, hy, nf1, nf2, nspread, tau, real_ftau, imag_ftau ):
"""This kernel function for gauss grid 1d typ1, and it will be executed by a thread."""
i = cuda.grid(1)
if i > x.shape[0]:
return
#do the 1d griding here
xi = x[i] % (2 * np.pi) #x, shift the source point xj so that it lies in [0,2*pi]
yi = y[i] % (2 * np.pi) #y, shift the source point yj so that it lies in [0,2*pi]
mx = 1 + int(xi // hx) #index for the closest grid point
my = 1 + int(yi // hy) #index for the closest grid point
for mmx in range(-nspread, nspread): #mm index for all the spreading points
for mmy in range(-nspread,nspread):
#griding with g(x,y) = exp(-(x^2 + y^2) / 4*tau)
#ftau[(mx + mmx) % nf1, (my + mmy) % nf2] +=
tmp = c[i] * exp(-0.25 * (\
(xi - hx * (mx + mmx)) ** 2 + \
@cuda.jit
def gaussker_3d1_fast_cuda(x, y, z, c, hx, hy, hz, nf1, nf2, nf3, nspread, tau, E3, real_ftau, imag_ftau ):
"""This kernel function for gauss grid 1d typ1, and it will be executed by a thread."""
i = cuda.grid(1)
if i > c.shape[0]:
return
#read x, y, z values
xi = x[i] % (2 * np.pi) #x, shift the source point xj so that it lies in [0,2*pi]
yi = y[i] % (2 * np.pi) #y, shift the source point yj so that it lies in [0,2*pi]
zi = z[i] % (2 * np.pi) #z, shift the source point zj so that it lies in [0,2*pi]
mx = 1 + int(xi // hx) #index for the closest grid point
my = 1 + int(yi // hy) #index for the closest grid point
mz = 1 + int(zi // hz) #index for the closest grid point
xi = (xi - hx * mx) #offsets from the closest grid point
yi = (yi - hy * my) #offsets from the closest grid point
zi = (zi - hz * mz) #offsets from the closest grid point
def main():
cu_discriminant = vectorize(['f4(f4, f4, f4)', 'f8(f8, f8, f8)'],
target='cuda')(poly.discriminant)
N = 1e+8 // 2
print('Data size', N)
A, B, C = poly.generate_input(N, dtype=np.float32)
D = np.empty(A.shape, dtype=A.dtype)
stream = cuda.stream()
print('== One')
ts = time()
with stream.auto_synchronize():
@jit
def increment1(value):
return value + 1
# Parameters
Nz = 2048
Nr = 256
rmax = 50.e-6
m = 0
# Initialize the random test_field
interp_field_r = np.random.rand(Nz, Nr) + 1.j*np.random.rand(Nz, Nr)
interp_field_t = np.random.rand(Nz, Nr) + 1.j*np.random.rand(Nz, Nr)
d_interp_field_r = cuda.to_device( interp_field_r )
d_interp_field_t = cuda.to_device( interp_field_t )
# Initialize the field in spectral space
spect_field_p = np.empty_like( interp_field_r )
spect_field_m = np.empty_like( interp_field_t )
d_spect_field_p = cuda.to_device( spect_field_p )
d_spect_field_m = cuda.to_device( spect_field_m )
# Initialize the field after back and forth transformation
back_field_r = np.empty_like( interp_field_r )
back_field_t = np.empty_like( interp_field_t )
d_back_field_r = cuda.to_device( back_field_r )
d_back_field_t = cuda.to_device( back_field_t )
# ----------------
# Scalar transform
# ----------------
print( '\n ### Scalar transform \n' )
# Perform the transform on the CPU
trans_cpu = SpectralTransformer( Nz, Nr, m, rmax )
# Do a loop so as to get the fastest time
# and remove compilation time
tmin = 1.
def resdec(f):
if not numbaexists:
return f
return jit(f)
return resdec
@numba.jit(UniTuple(f8[:, :], 3)(f8[:, :], f8[:], f8[:]), nopython=True, nogil=True)
def positions_vector(position, orientation, radius_ts):
"""Center and shoulder positions"""
x = np.cos(orientation)
y = np.sin(orientation)
t = np.stack((y, -x), axis=1)
offset = t * radius_ts
position_ls = position - offset
position_rs = position + offset
return position, position_ls, position_rs
@jit(u4(u4[:, :]), nopython=True, cache=True)
def true_num_ops(exponent_matrix):
"""
without counting additions (just MUL & POW) and but WITH considering the coefficients (1 MUL per monomial)
"""
num_ops = 0
for monomial_nr in range(exponent_matrix.shape[0]):
for dim in range(exponent_matrix.shape[1]):
exp = exponent_matrix[monomial_nr, dim]
if exp > 0:
# per scalar factor 1 MUL operation is required
num_ops += 1
if exp >= 2:
# for scalar factors with exponent >= 2 additionally 1 POW operation is required
num_ops += 1
return num_ops
@jit(float64(float64, float64),nopython=True)
def gb(X,Y):
return Godunov.NumFlux(Burg,X,Y)
@numba.jit(nopython=True,nogil=True)
def i_will_win(state, last_move, player):
""" Return true if I will win next step if the opponent don't have 4-in-a-row.
Winning Conditions:
1. 5 in a row.
2. 4 in a row with both end open. (free 4)
3. 4 in a row with one missing stone x 2 (hard 4 x 2)
"""
r, c = last_move
# try all 4 directions, the other 4 is equivalent
directions = [(1,1), (1,0), (0,1), (1,-1)]
n_hard_4 = 0 # number of hard 4s found
for dr, dc in directions:
line_length = 1 # last_move
# try to extend in the positive direction (max 4 times)
ext_r = r
ext_c = c