Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Check for constant, if none add one, see Greene 2003, pg. 222
# if constant == False:
# X = np.hstack((np.ones((n,1)),X))
# Check for multicollinearity in the X matrix
ci = condition_index(reg)
if ci > 30:
white_result = "Not computed due to multicollinearity."
return white_result
# Compute cross-products and squares of the regression variables
if type(X).__name__ == 'ndarray':
A = np.zeros((n, (k * (k + 1)) // 2))
elif type(X).__name__ == 'csc_matrix' or type(X).__name__ == 'csr_matrix':
# this is probably inefficient
A = SP.lil_matrix((n, (k * (k + 1)) // 2))
else:
raise Exception("unknown X type, %s" % type(X).__name__)
counter = 0
for i in range(k):
for j in range(i, k):
v = spmultiply(X[:, i], X[:, j], False)
A[:, counter] = v
counter += 1
# Append the original variables
A = sphstack(X, A) # note: this also converts a LIL to CSR
n, k = A.shape
# Check to identify any duplicate or constant columns in A
omitcolumn = []
for i in range(k):
if u in layers_nx[-1]:
frontier.add(v)
edge_frontier.add(g.edge_id(u, v))
else:
layers_nx.append(frontier)
edges_nx.append(edge_frontier)
frontier = set([v])
edge_frontier = set([g.edge_id(u, v)])
# avoids empty successors
if len(frontier) > 0 and len(edge_frontier) > 0:
layers_nx.append(frontier)
edges_nx.append(edge_frontier)
return layers_nx, edges_nx
g = dgl.DGLGraph()
a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n))
g.from_scipy_sparse_matrix(a)
g_nx = g.to_networkx()
src = random.choice(range(n))
layers_nx, _ = _bfs_nx(g_nx, src)
layers_dgl = dgl.bfs_nodes_generator(g, src)
assert len(layers_dgl) == len(layers_nx)
assert all(toset(x) == y for x, y in zip(layers_dgl, layers_nx))
g_nx = nx.random_tree(n, seed=42)
g = dgl.DGLGraph()
g.from_networkx(g_nx)
src = 0
_, edges_nx = _bfs_nx(g_nx, src)
edges_dgl = dgl.bfs_edges_generator(g, src)
assert len(edges_dgl) == len(edges_nx)
assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx))
else:
features = sps.dok_matrix((len(sentence_list), desc_dim), dtype='float32')
for s, sentence in enumerate(sentence_list):
# NOTE: use both alphanumeric and stemming normalization
sentence = utils.normalize_stemming(utils.normalize_alphanumeric(sentence.lower())).split(' ')
# for each word in the normalized sentence
for word in sentence:
if word not in model.vocab: continue
widx = model.vocab.index(word)
features[s,widx] = model.tfidf[widx][midx]
if is_qa: # if not sparse, use numpy.linalg.norm
features[s] /= (np.linalg.norm(features[s]) + 1e-6)
else: # if sparse, use scipy.sparse.linalg.norm
features[s] /= (sps.linalg.norm(features[s]) + 1e-6)
elif desc == 'word2vec':
desc_dim = model.get_vector(model.vocab[-1]).shape[0]
features = np.zeros((len(sentence_list), desc_dim), dtype='float32')
for s, sentence in enumerate(sentence_list):
# NOTE: use only alphanumeric normalization, no stemming
sentence = utils.normalize_alphanumeric(sentence.lower()).split(' ')
# for each word in the normalized sentence
for word in sentence:
if word not in model.vocab: continue
features[s] += model.get_vector(word)
features[s] /= (np.linalg.norm(features[s]) + 1e-6)
return features
def mcl_xyz(f):
l2n = {}
dmx = 0
# for x,y,z in xyz:
for i in f:
x, y = i.split('\t', 3)[:2]
if x not in l2n:
l2n[x] = dmx
dmx += 1
if y not in l2n:
l2n[y] = dmx
dmx += 1
f.seek(0)
dmx += 1
G_d = sparse.lil_matrix((dmx, dmx), dtype='float32')
# for x,y,z in xyz:
for i in f:
x, y, z = i.split('\t', 4)[:3]
if x > y:
continue
X, Y = map(l2n.get, [x, y])
Z = float(z)
G_d[X, Y] = Z
G_d[Y, X] = Z
#print G_d.data
n2l = {}
while l2n:
key, val = l2n.popitem()
n2l[val] = key
def unitVec(vec):
"""
Scale a vector to unit length. The only exception is the zero vector, which
is returned back unchanged.
If the input is sparse (list of 2-tuples), output will also be sparse. Otherwise,
output will be a numpy array.
"""
if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
vec = vec.toarray().flatten()
try:
first = iter(vec).next() # is there at least one element?
except:
return vec
if isinstance(first, tuple): # sparse format?
vecLen = 1.0 * math.sqrt(sum(val * val for _, val in vec))
assert vecLen > 0.0, "sparse documents must not contain any explicit zero entries"
if vecLen != 1.0:
return [(termId, val / vecLen) for termId, val in vec]
else:
return list(vec)
else: # dense format
vec = numpy.asarray(vec, dtype=float)
raise Exception('Must specify either "points" or "num_points"')
points = tools.generate_base_points(num_points=num_points,
domain_size=domain_size)
# Perform tessellation
vor = sptl.Voronoi(points=points)
# Combine points
pts_vor = vor.vertices
pts_all = sp.vstack((points, pts_vor))
Npts = sp.size(points, 0)
Nvor = sp.size(pts_vor, 0)
Nall = Nvor + Npts
# Create adjacency matrix in lil format for quick matrix construction
am = sp.sparse.lil_matrix((Nall, Nall))
for ridge in vor.ridge_dict.keys():
# Make Delaunay-to-Delauny connections
[am.rows[i].extend([ridge[0], ridge[1]]) for i in ridge]
row = vor.ridge_dict[ridge]
if -1 not in row:
# Index Voronoi vertex numbers by Npts
row = [i + Npts for i in row]
# Make Voronoi-to-Delaunay connections
[am.rows[i].extend(row) for i in ridge]
# Make Voronoi-to-Voronoi connections
row.append(row[0])
[am.rows[row[i]].append(row[i+1]) for i in range(len(row)-1)]
# Ensure connections are made symmetrically
[am.rows[row[i+1]].append(row[i]) for i in range(len(row)-1)]
# Finalize adjacency matrix by assigning data values to each location
am.data = am.rows # Values don't matter, only shape, so use 'rows'
if method == self._lmax_method:
return
self._lmax_method = method
if method == 'lanczos':
try:
# We need to cast the matrix L to a supported type.
# TODO: not good for memory. Cast earlier?
lmax = sparse.linalg.eigsh(self.L.asfptype(), k=1, tol=5e-3,
ncv=min(self.N, 10),
return_eigenvectors=False)
lmax = lmax[0]
assert lmax <= self._get_upper_bound() + 1e-12
lmax *= 1.01 # Increase by 1% to be robust to errors.
self._lmax = lmax
except sparse.linalg.ArpackNoConvergence:
raise ValueError('The Lanczos method did not converge. '
'Try to use bounds.')
elif method == 'bounds':
self._lmax = self._get_upper_bound()
else:
raise ValueError('Unknown method {}'.format(method))
def to_dense(D):
if sprs.issparse(D):
return D.toarray()
elif isinstance(D, np.ndarray):
return D
def solve(M, r, sym_pos=False):
return sps.linalg.lsqr(M, r)[0]
else:
non_cv_col = training.columns.tolist()
non_cv_col.remove('index')
train_cv = training['index']
predict_cv = predict['index']
training=training[non_cv_col]
predict=predict[non_cv_col]
training = training.astype(np.float16)
predict = predict.astype(np.float16)
cv = CountVectorizer(ngram_range=(1, 1), max_df=0.8, min_df=5)
train_cv = cv.fit_transform(train_cv)
predict_cv = cv.transform(predict_cv)
training = sparse.hstack((training, train_cv))
predict = sparse.hstack((predict, predict_cv))
del train_cv
del predict_cv
gc.collect()
training = training.tocsr()
predict = predict.tocsr()
print("training, predict shape:",training.shape, predict.shape)
return training,label,predict