How to use the sklearn.decomposition.PCA function in sklearn

To help you get started, we’ve selected a few sklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intel / Resilient-ML-Research-Platform / atdml / tasks / ml / pca_skLearn.py View on Github external
X_tr = pca.transform(X)
        print "INFO: X_tr.shape=",X_tr.shape
        
        if len(X_tr[0])<3:
            print "ERROR: Dataset has dimension less than 3"
            
        elif n_components <3:
            print "WARNING: set n_components to 3 for 3D graph"
            n_components=3
        
        X_reduced = X_tr[:,0:n_components]
        k=n_components
        print "INFO: sklearn_PCA_transform: n_components =",n_components, ", threshold=",threshold
        print "RESULT: PCA ratio_vec=",ratio_vec
    elif k >0: # by n_components  ===============
        pca = PCA(n_components=k)    
        pca.fit(X)
        X_reduced = pca.transform(X)
        print "INFO: PCA n_components =",k
    
    if not X_reduced is None:
        print "INFO: sklearn_PCA_transform: X_reduced.shape=",X_reduced.shape

    return (X_reduced, k, pca)
github bioFAM / MOFA / mofapy / core / init_nodes.py View on Github external
scale_covariates: 
        """

        # Initialise mean of the Q distribution
        if qmean is not None:
            if isinstance(qmean,str):
                if qmean == "random": # Random initialisation of latent variables
                    qmean = stats.norm.rvs(loc=0, scale=1, size=(self.N,self.K))

                elif qmean == "orthogonal": # Latent variables are initialised randomly but ensuring orthogonality
                    pca = sklearn.decomposition.PCA(n_components=self.K, copy=True, whiten=True)
                    pca.fit(stats.norm.rvs(loc=0, scale=1, size=(self.N,9999)).T)
                    qmean = pca.components_.T

                elif qmean == "pca": # Latent variables are initialised from PCA in the concatenated matrix
                    pca = sklearn.decomposition.PCA(n_components=self.K, copy=True, whiten=True)
                    pca.fit(s.concatenate(self.data,axis=1).T)
                    qmean = pca.components_.T

            elif isinstance(qmean,s.ndarray):
                assert qmean.shape == (self.N,self.K)

            elif isinstance(qmean,(int,float)):
                qmean = s.ones((self.N,self.K)) * qmean

            else:
                print("Wrong initialisation for Z")
                exit()

        # Add covariates
        if covariates is not None:
            assert scale_covariates != None, "If you use covariates also define data_opts['scale_covariates']"
github algorithmica-repository / datascience / 2019-jan / 8.feature reduction / lpca1.py View on Github external
X, y = make_classification(n_samples = 100,
                                       n_features = 3,
                                       n_informative = 2,
                                       n_redundant = 1,
                                       n_classes = 2,
                                       weights = [.4, .6])

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size=0.1, 
                                                    random_state=1)
corr = np.corrcoef(X_train, rowvar=False)
sns.heatmap(corr)
plot_data_3d(X_train, y_train) 

lpca = decomposition.PCA(2)
pca_data = lpca.fit_transform(X_train)
print(lpca.explained_variance_ratio_)
plot_data_2d(pca_data, y_train, ['PC1', 'PC2'])
github calico / basenji / bin / basenji_scent.py View on Github external
#######################################################
  # model parameters and placeholders
  #######################################################
  # read parameters
  job = basenji.dna_io.read_job_params(options.params_file)

  job['num_targets'] = targets.shape[1]

  # construct model
  print('Constructing model')
  sys.stdout.flush()

  if job.get('model', 'autoencoder') == 'pca':
    # construct
    model = PCA(n_components=job['latent_dim'])

    # train
    model.fit(targets[tv_line:])

    # validate
    latent_valid = model.transform(targets[:tv_line])
    recon_valid = model.inverse_transform(latent_valid)
    valid_var = targets[:tv_line].var()
    recon_var = (targets[:tv_line] - recon_valid).var()
    r2 = 1.0 - np.divide(recon_var, valid_var)
    print('Valid R2: %7.5f' % r2.mean())

    # save
    joblib.dump(model, model_out_file)

    if options.reconstruct_out_pre:
github pelednoam / mmvt / src / utils / utils.py View on Github external
def pca(x, comps_num=1):
    import sklearn.decomposition as deco

    remove_cols = np.where(np.all(x == np.mean(x, 0), 0))[0]
    x = np.delete(x, remove_cols, 1)
    x = (x - np.mean(x, 0)) / np.std(x, 0)
    pca = deco.PCA(comps_num)
    x = x.T
    x_r = pca.fit(x).transform(x)
    return x_r
github catalyst-team / catalyst / catalyst / contrib / scripts / create_index_model.py View on Github external
def main(args, _=None):
    print("[==       Loading features       ==]")
    features = None
    for in_npy in args.in_npy.split(","):
        features_ = np.load(in_npy, mmap_mode="r")
        if features is None:
            features = features_
        else:
            features = np.concatenate((features, features_), axis=0)

    if args.n_hidden is not None:
        pipeline = Pipeline(
            [
                ("scale", StandardScaler()),
                ("pca", PCA(n_components=args.n_hidden, random_state=42)),
                ("normalize", Normalizer()),
            ]
        )

        print("[==     Transforming features    ==]")
        features = pipeline.fit_transform(features)
        np.save(args.out_npy, features)

        print(
            "[ Explained variance ratio: {ratio:.4} ]".format(
                ratio=pipeline.named_steps["pca"].explained_variance_ratio_.
                sum()
            )
        )

        print("[==        Saving pipeline       ==]")
github sirius-ai / MobileFaceNet_TF / verification.py View on Github external
diff = np.subtract(embeddings1, embeddings2)
        dist = np.sum(np.square(diff), 1)

    global max_threshold
    global min_threshold

    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        # print('train_set', train_set)
        # print('test_set', test_set)
        if pca > 0:
            print('doing pca on', fold_idx)
            embed1_train = embeddings1[train_set]
            embed2_train = embeddings2[train_set]
            _embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
            # print(_embed_train.shape)
            pca_model = PCA(n_components=pca)
            pca_model.fit(_embed_train)
            embed1 = pca_model.transform(embeddings1)
            embed2 = pca_model.transform(embeddings2)
            embed1 = sklearn.preprocessing.normalize(embed1)
            embed2 = sklearn.preprocessing.normalize(embed2)
            # print(embed1.shape, embed2.shape)
            diff = np.subtract(embed1, embed2)
            dist = np.sum(np.square(diff), 1)

        # Find the best threshold for the fold
        acc_train = np.zeros((nrof_thresholds))
        for threshold_idx, threshold in enumerate(thresholds):
            _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
        best_threshold_index = np.argmax(acc_train)
        # print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index])
        for threshold_idx, threshold in enumerate(thresholds):
github cryoem / eman2 / programs / e2spt_pcasplit.py View on Github external
if options.clean:
		#### do pca twice to remove outliers	
		pca=PCA(options.nbasis)
		pout=pca.fit_transform(imgsnp)
		dst=np.linalg.norm(pout-np.mean(pout, 0), axis=1)
		outlr=dst>np.mean(dst)+np.std(dst)*2
		
		np.savetxt("{}/pca_rmoutlier.txt".format(options.outpath), 
			np.hstack([ptclids, pout]))
		print("Removing {} outliers...".format(np.sum(outlr)))
		
		imgsnp=imgsnp[outlr==False]
		ptclids=ptclids[outlr==False]
	
	
	pca=PCA(options.nbasis)
	pout=pca.fit_transform(imgsnp)
	np.savetxt("{}/pca_ptcls.txt".format(options.outpath), 
		np.hstack([ptclids, pout]))

	basisfile = "{}/pca_basis.hdf".format(options.outpath)
	#threed.process("math.meanshrink",{"n":options.shrink}).write_image(basisfile, 0)
	l=len(data[0])
	for i,c in enumerate(pca.components_):
		eg=c[:l]+c[l:]*1j
		egmap=eg.reshape((sz,sz,sz))
		o=np.real(np.fft.ifftn(np.fft.ifftshift(egmap)))
		m=from_numpy(o.copy())
		m.write_image(basisfile,i)

	print("Classifying particles...")
	kmeans = KMeans(n_clusters=options.nclass).fit(pout)
github danoneata / fisher_vectors / preprocess / pca.py View on Github external
def compute_pca(data, n_components):
    """ Computes PCA on a subset of nr_samples of descriptors. """
    pca = PCA(n_components=n_components)
    pca.fit(data)
    return pca
github pavlin-policar / openTSNE / openTSNE / initialization.py View on Github external
n_components: int
        The dimension of the embedding space.

    random_state: Union[int, RandomState]
        If the value is an int, random_state is the seed used by the random
        number generator. If the value is a RandomState instance, then it will
        be used as the random number generator. If the value is None, the random
        number generator is the RandomState instance used by `np.random`.

    Returns
    -------
    initialization: np.ndarray

    """
    pca_ = PCA(
        n_components=n_components, svd_solver=svd_solver, random_state=random_state
    )
    embedding = pca_.fit_transform(X)

    # The PCA embedding may have high variance, which leads to poor convergence
    normalization = np.std(embedding[:, 0]) * 100
    embedding /= normalization

    return np.ascontiguousarray(embedding)