Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
_compressed_grad = g_compress(grads)
req_isend = communicator.isend(_compressed_grad, dest=0, tag=88+channel_index)
else:
req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88+channel_index)
################################################################################################
req_send_check.append(req_isend)
channel_index-=1
mod_counters_[mod_avail_index]+=1
elif mod_counters_[mod_avail_index] == 1:
grads = tmp_grad_weight.data.numpy().astype(np.float64)
###############################################################################################
if compress_grad == 'compress':
_compressed_grad = g_compress(grads)
req_isend = communicator.isend(_compressed_grad, dest=0, tag=88+channel_index)
else:
req_isend = communicator.Isend([grads, MPI.DOUBLE], dest=0, tag=88+channel_index)
################################################################################################
req_send_check.append(req_isend)
channel_index-=1
mod_counters_[mod_avail_index]+=1
# update counters
mod_avail_index-=1
else:
continue
if mod_counters_[0] == 1:
req_send_check[-1].wait()
grads = tmp_grad_weight.data.numpy().astype(np.float64)
###############################################################################################
if compress_grad == 'compress':
_compressed_grad = g_compress(grads)
req_isend = communicator.isend(_compressed_grad, dest=0, tag=88+channel_index)
else:
instances: training instances
total_internal_node_num: total number of internal nodes
embsize: word embedding vector size
lambda_reg: the weight of regularizer
Returns:
total_cost: the value of the objective function at theta
total_grad: the gradients of the objective function at theta
'''
if rank == 0:
# send working signal
send_working_signal()
# send theta
comm.Bcast([theta, MPI.DOUBLE], root=0)
# init recursive autoencoder
rae = RecursiveAutoencoder.build(theta, embsize)
# compute local reconstruction error and gradients
rec_error, gradient_vec = process_local_batch(rae, word_vectors, instances)
# compute total reconstruction error
total_rec_error = comm.reduce(rec_error, op=MPI.SUM, root=0)
# compute total cost
reg = rae.get_weights_square()
total_cost = total_rec_error / total_internal_node_num + lambda_reg/2 * reg
# compute gradients
total_grad = zeros_like(gradient_vec)
for i in range(1,self.dim+1):
self.SigPts[:,i] = self.Mx + self.P[:,i-1]
for i in range(self.dim+1,2*self.dim+1):
self.SigPts[:,i] = self.Mx - self.P[:,i-self.dim-1]
# TIME UPDATE
# PARALLELIZED DYNAMICS
rank = self.comm.Get_rank()
counts = balancedpartition( 2*self.dim + 1, self.nb_procs )
disp = displacements( counts )
recvdata = np.zeros([2*self.dim + 1, counts[rank]], dtype=np.float64 )
for ii in range( self.dim ): # SINCE SCATTER SENDS 1D ARRAY, PERFORMS LINEWISE SCATTER
self.comm.Scatterv( [self.SigPts[ii], counts, disp, MPI.DOUBLE], recvdata[ii], root=0 )
measdata = np.zeros([self.obs, counts[rank]])
for i in range( 0, counts[rank] ): # USUAL POINTWISE PROPAGATION
recvdata [:,i] = self.dynamics(recvdata[:,i],self.Time,self.dt)
measdata[:,i] = self.observe(recvdata[:,i])
sigptstmp = np.zeros([self.dim, counts[rank]], dtype=np.float64)
premeastmp = np.zeros([self.obs, counts[rank]], dtype=np.float64)
for ii in range( self.dim ):
self.comm.Gatherv( [recvdata[ii], counts[rank]], [self.SigPts[ii], counts, disp, MPI.DOUBLE], root=0 )
for ii in range( self.obs ):
self.comm.Gatherv( [measdata[ii], counts[rank]], [self.PreMeas[ii], counts, disp, MPI.DOUBLE], root=0 )
self.comm.Barrier()
if (rank == 0):
from mpi4py import MPI
yt.utilities.logger.uncolorize_logging()
# Even though the uncolorize function already resets the format string,
# we reset it again so that it includes the processor.
f = logging.Formatter("P%03i %s" % (MPI.COMM_WORLD.rank,
yt.utilities.logger.ufstring))
if len(yt.utilities.logger.rootLogger.handlers) > 0:
yt.utilities.logger.rootLogger.handlers[0].setFormatter(f)
if ytcfg.getboolean("yt", "parallel_traceback"):
sys.excepthook = traceback_writer_hook("_%03i" % MPI.COMM_WORLD.rank)
if ytcfg.getint("yt","LogLevel") < 20:
yt.utilities.logger.ytLogger.warning(
"Log Level is set low -- this could affect parallel performance!")
dtype_names = dict(
float32 = MPI.FLOAT,
float64 = MPI.DOUBLE,
int32 = MPI.INT,
int64 = MPI.LONG
)
op_names = dict(
sum = MPI.SUM,
min = MPI.MIN,
max = MPI.MAX
)
else:
dtype_names = dict(
float32 = "MPI.FLOAT",
float64 = "MPI.DOUBLE",
int32 = "MPI.INT",
int64 = "MPI.LONG"
)
self.config = config
self.args = args
filename = args.struct_file[0]
self.struct_filename = filename
self.npoints,self.natoms = coord_reader.get_nframes_natoms(filename)
if coord_reader.supports_parallel_reading(filename):
# read coordinates in parallel
self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread(comm, self.npoints)
coords_thread = coord_reader.get_coordinates(filename, idxs=self.idxs_thread)
coords_ravel = coords_thread.ravel()
ravel_lengths, ravel_offsets = p_index.get_ravel_offsets(self.npoints_per_thread,self.natoms)
coordstemp = np.zeros(self.npoints*3*self.natoms, dtype='float')
start = MPI.Wtime()
comm.Allgatherv(coords_ravel, (coordstemp, ravel_lengths, ravel_offsets, MPI.DOUBLE))
self.coords = coordstemp.reshape((self.npoints,3,self.natoms))
else:
# serial reading
if rank == 0:
self.coords = coord_reader.get_coordinates(filename)
else:
self.coords = np.zeros((self.npoints,3,self.natoms),dtype=np.double)
comm.Bcast(self.coords, root=0)
# load configurations
#format_struct_file = os.path.splitext(args.struct_file[0])[1]
#if format_struct_file == '.gro': # use lsdmap reader
# struct_file = reader.open(args.struct_file)
# self.npoints = struct_file.nlines
# idxs_thread = p_index.get_idxs_thread_v(comm, self.npoints)
# coords_thread = struct_file.readlines(idxs_thread)
from numpy import array
from output import out
from time import time
from processSlice_pyrex_py2 import processSlice
if __name__ == '__main__':
n = 1000000000
delta = 1.0 / n
startTime = time()
comm = MPI.COMM_WORLD
myId = comm.Get_rank()
sliceSize = n / comm.Get_size()
localSum = array([processSlice(myId, sliceSize, delta)])
sum = array([0.0])
comm.Reduce((localSum, MPI.DOUBLE), (sum, MPI.DOUBLE))
if myId == 0:
pi = 4.0 * delta * sum[0]
elapseTime = time() - startTime
out(__file__, pi, n, elapseTime)
slices : tuple of slices
the chunk slices with respect to the given shape
"""
pass
class MPICommunication(Communication):
# static mapping of torch types to the respective MPI type handle
__mpi_type_mappings = {
torch.uint8: MPI.UNSIGNED_CHAR,
torch.int8: MPI.SIGNED_CHAR,
torch.int16: MPI.SHORT_INT,
torch.int32: MPI.INT,
torch.int64: MPI.LONG,
torch.float32: MPI.FLOAT,
torch.float64: MPI.DOUBLE
}
def __init__(self, handle=MPI.COMM_WORLD):
self.handle = handle
self.rank = handle.Get_rank()
self.size = handle.Get_size()
def is_distributed(self):
"""
Determines whether the communicator is distributed, i.e. handles more than one node.
Returns
-------
distribution_flag : bool
flag indicating whether the communicator contains distributed resources
"""
m = grid.m
di, dj = n, m
di, dj = nv+2, mv+2
nv0 = di*np
mv0 = dj*mp
sizes = ones(np*mp)*N
offsets = arange(np*mp).reshape((mp, np)).ravel()*N
buff_loc = x.ravel().copy()
buff_loc[grid.msk.ravel() == 0] = NaN
buff_glo = zeros(N*np*mp)
comm.Allgatherv(buff_loc, [buff_glo, sizes, offsets, MPI.DOUBLE])
I, J = meshgrid(arange(nv), arange(mv))
xglo = zeros((mv0, nv0))
for j in range(mp):
for i in range(np):
k = i+j*np
xglo[J+j*dj, I+i*di] = buff_glo[k*N:(k+1)*N].reshape((mv, nv))
return xglo
def outputRow(self, time):
""" Outputs a single row of currently calculated gauge data to self.file"""
assert self.isGaugeOwner
if self.isPointGauge or self.isLineGauge:
self.localQuantitiesBuf = np.concatenate([gaugesVec.getArray() for gaugesVec in
self.pointGaugeVecs]).astype(np.double)
logEvent("Sending local array of type %s and shape %s to root on comm %s" % (
str(self.localQuantitiesBuf.dtype), str(self.localQuantitiesBuf.shape), str(self.gaugeComm)), 9)
if self.gaugeComm.rank == 0:
logEvent("Receiving global array of type %s and shape %s on comm %s" % (
str(self.localQuantitiesBuf.dtype), str(self.globalQuantitiesBuf.shape), str(self.gaugeComm)), 9)
self.gaugeComm.Gatherv(sendbuf=[self.localQuantitiesBuf, MPI.DOUBLE],
recvbuf=[self.globalQuantitiesBuf, (self.globalQuantitiesCounts, None),
MPI.DOUBLE], root=0)
self.gaugeComm.Barrier()
if self.isLineIntegralGauge:
lineIntegralGaugeBuf = self.lineIntegralGaugesVec.getArray()
globalLineIntegralGaugeBuf = lineIntegralGaugeBuf.copy()
self.gaugeComm.Reduce(lineIntegralGaugeBuf, globalLineIntegralGaugeBuf, op=MPI.SUM)
else:
globalLineIntegralGaugeBuf = []
if self.gaugeComm.rank == 0:
self.file.write("%25.15e" % time)
if self.isPointGauge or self.isLineGauge:
for id in self.globalQuantitiesMap:
self.file.write(", %43.18e" % (self.globalQuantitiesBuf[id],))
if self.isLineIntegralGauge:
for lineIntegralGauge in globalLineIntegralGaugeBuf:
self.file.write(", %80.18e" % (lineIntegralGauge))
comm.Barrier()
# Prepare a vector to be broadcasted.
Bn = 3
An = Bn * comm.size
if rank == 0:
A = np.arange(An, dtype=np.float64) # rank 0 has proper data
else:
A = np.empty(An, dtype=np.float64) # all other just an empty array
PrintInMPIRank(comm, "Init", A)
# ----------------------------------------------------------
# Broadcast A from rank 0 to everybody, means root=0.
# Default: comm.Bcast( [A, MPI.DOUBLE], root=0 )
comm.Bcast( [A, MPI.DOUBLE] )
PrintInMPIRank(comm, "Broadcast from rank0", A)
# ----------------------------------------------------------
B = np.empty(Bn, dtype=np.float64)
# Scatter data into B arrays.
comm.Scatter( [A, MPI.DOUBLE], [B, MPI.DOUBLE] )
PrintInMPIRank(comm, "Scatter from rank0", B)
# Everybody is multiplying by 2.
B *= 2
# ----------------------------------------------------------
# Gather B data into A rank0.
# Default root=0.
comm.Gather( [B, MPI.DOUBLE], [A, MPI.DOUBLE], root=1 )
PrintInMPIRank(comm, "Gather", A)