How to use the xenonpy.descriptor.base.BaseFeaturizer function in xenonpy

To help you get started, we’ve selected a few xenonpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yoshida-lab / XenonPy / tests / descriptor / test_base_desc.py View on Github external
def test_base_feature_props():

    class _FakeFeaturier(BaseFeaturizer):

        def __init__(self):
            super().__init__()

        def featurize(self, *x, **kwargs):
            return x[0]

        @property
        def feature_labels(self):
            return ['labels']

    bf = _FakeFeaturier()
    with pytest.raises(ValueError, match='`on_errors`'):
        bf.on_errors = 'illegal'

    with pytest.raises(ValueError, match='`return_type`'):
github yoshida-lab / XenonPy / tests / descriptor / test_base_desc.py View on Github external
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
    warnings.filterwarnings("ignore", message="numpy.ndarray size changed")

    class _FakeFeaturier1(BaseFeaturizer):

        def __init__(self, n_jobs=1):
            super().__init__(n_jobs=n_jobs)

        def featurize(self, *x, **kwargs):
            return x[0]

        @property
        def feature_labels(self):
            return ['label1']

    class _FakeFeaturier2(BaseFeaturizer):

        def __init__(self, n_jobs=1):
            super().__init__(n_jobs=n_jobs)

        def featurize(self, *x, **kwargs):
            return x[0]

        @property
        def feature_labels(self):
            return ['label2']

    class _FakeFeaturier3(BaseFeaturizer):

        def __init__(self, n_jobs=1):
            super().__init__(n_jobs=n_jobs)
github yoshida-lab / XenonPy / tests / descriptor / test_base_desc.py View on Github external
def test_base_feature_3(data):

    class _ErrorFeaturier(BaseFeaturizer):

        def __init__(self, n_jobs=1, on_errors='raise'):
            super().__init__(n_jobs=n_jobs, on_errors=on_errors)

        def featurize(self, *x):
            raise ValueError()

        @property
        def feature_labels(self):
            return ['labels']

    featurizer = _ErrorFeaturier()
    assert isinstance(featurizer, BaseFeaturizer)
    with pytest.raises(ValueError):
        featurizer.fit_transform([1, 2, 3, 4])
github yoshida-lab / XenonPy / xenonpy / contrib / extend_descriptors / descriptor / mordred_descriptor.py View on Github external
#  Copyright (c) 2019. stewu5. All rights reserved.
#  Use of this source code is governed by a BSD-style
#  license that can be found in the LICENSE file.

import pandas as pd
from mordred import Calculator, descriptors
from rdkit import Chem
from xenonpy.descriptor.base import BaseFeaturizer


class Mordred2DDescriptor(BaseFeaturizer):

    def __init__(self, *, on_errors='raise', return_type='any'):
        # fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class
        super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type)
        self.output = None
        self.__authors__ = ['Stephen Wu', 'TsumiNa']

    def featurize(self, x):
        # check if type(x) = list
        if isinstance(x, pd.Series):
            x = x.tolist()
        if not isinstance(x, list):
            x = [x]
        # check input format, assume SMILES if not RDKit-MOL
        if not isinstance(x[0], Chem.rdchem.Mol):
            x_mol = []
github yoshida-lab / XenonPy / xenonpy / descriptor / structure.py View on Github external
#  Copyright (c) 2019. yoshida-lab. All rights reserved.
#  Use of this source code is governed by a BSD-style
#  license that can be found in the LICENSE file.

import re

import numpy as np
import pymatgen as pm
from pymatgen.analysis.local_env import VoronoiNN

from xenonpy.descriptor.base import BaseDescriptor, BaseFeaturizer

__all__ = ['RadialDistributionFunction', 'OrbitalFieldMatrix', 'Structures']


class RadialDistributionFunction(BaseFeaturizer):
    """
    Calculate pair distribution descriptor for machine learning.

    """

    @property
    def feature_labels(self):
        return [str(d) for d in self._interval[1:]]

    def __init__(self, n_bins=201, r_max=20.0, *, n_jobs=-1, on_errors='raise', return_type='any'):
        """
        
        Parameters
        ----------
        n_bins: int
            Number of radial grid points.
github yoshida-lab / XenonPy / xenonpy / descriptor / fingerprint.py View on Github external
if x is None:
                raise ValueError('can not convert Mol from SMILES %s' % x_)
        if self.input_type == 'any':
            if not isinstance(x, Chem.rdchem.Mol):
                x_ = x
                x = Chem.MolFromSmiles(x)
                if x is None:
                    raise ValueError('can not convert Mol from SMILES %s' % x_)
        return list(rdMol.GetMorganFingerprintAsBitVect(x, self.radius, nBits=self.n_bits))

    @property
    def feature_labels(self):
        return ['ecfp3:' + str(i) for i in range(self.n_bits)]


class DescriptorFeature(BaseFeaturizer):

    def __init__(self, n_jobs=-1, *, input_type='mol', on_errors='raise', return_type='any'):
        """
        All descriptors in RDKit (length = 200) [may include NaN]
            see https://www.rdkit.org/docs/GettingStartedInPython.html#list-of-available-descriptors for the full list

        Parameters
        ----------
        n_jobs: int
            The number of jobs to run in parallel for both fit and predict.
            Can be -1 or # of cups. Set -1 to use all cpu cores (default).
        input_type: string
            Set the specific type of transform input.
            Set to ``mol`` (default) to ``rdkit.Chem.rdchem.Mol`` objects as input.
            When set to ``smlies``, ``transform`` method can use a SMILES list as input.
            Set to ``any`` to use both.
github yoshida-lab / XenonPy / xenonpy / descriptor / fingerprint.py View on Github external
if x is None:
                raise ValueError('can not convert Mol from SMILES %s' % x_)
        if self.input_type == 'any':
            if not isinstance(x, Chem.rdchem.Mol):
                x_ = x
                x = Chem.MolFromSmiles(x)
                if x is None:
                    raise ValueError('can not convert Mol from SMILES %s' % x_)
        return list(MAC.GenMACCSKeys(x))

    @property
    def feature_labels(self):
        return ['maccs:' + str(i) for i in range(167)]


class FCFP(BaseFeaturizer):

    def __init__(self, n_jobs=-1, *, radius=3, n_bits=2048, input_type='mol', on_errors='raise', return_type='any'):
        """
        Morgan (Circular) fingerprints + feature-based (FCFP)
        The algorithm used is described in the paper Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints.
        JCIM 50:742-54 (2010)

        Parameters
        ----------
        n_jobs: int
            The number of jobs to run in parallel for both fit and predict.
            Can be -1 or # of cups. Set -1 to use all cpu cores (default).
        radius: int
            The radius parameter in the Morgan fingerprints, which is roughly half of the diameter parameter in FCFP,
            i.e., radius=2 is roughly equivalent to FCFP4.
        n_bits: int
github yoshida-lab / XenonPy / xenonpy / descriptor / fingerprint.py View on Github external
if x is None:
                raise ValueError('can not convert Mol from SMILES %s' % x_)
        if self.input_type == 'any':
            if not isinstance(x, Chem.rdchem.Mol):
                x_ = x
                x = Chem.MolFromSmiles(x)
                if x is None:
                    raise ValueError('can not convert Mol from SMILES %s' % x_)
        return list(rdMol.GetHashedTopologicalTorsionFingerprintAsBitVect(x, nBits=self.n_bits))

    @property
    def feature_labels(self):
        return ['ttfp:' + str(i) for i in range(self.n_bits)]


class MACCS(BaseFeaturizer):

    def __init__(self, n_jobs=-1, *, input_type='mol', on_errors='raise', return_type='any'):
        """
        The MACCS keys for a molecule. The result is a 167-bit vector. There are 166 public keys,
        but to maintain consistency with other software packages they are numbered from 1.

        Parameters
        ----------
        n_jobs: int
            The number of jobs to run in parallel for both fit and predict.
            Can be -1 or # of cups. Set -1 to use all cpu cores (default).
        input_type: string
            Set the specific type of transform input.
            Set to ``mol`` (default) to ``rdkit.Chem.rdchem.Mol`` objects as input.
            When set to ``smlies``, ``transform`` method can use a SMILES list as input.
            Set to ``any`` to use both.
github yoshida-lab / XenonPy / xenonpy / descriptor / fingerprint.py View on Github external
raise ValueError('can not convert Mol from SMILES %s' % x_)
        if self.input_type == 'any':
            if not isinstance(x, Chem.rdchem.Mol):
                x_ = x
                x = Chem.MolFromSmiles(x)
                if x is None:
                    raise ValueError('can not convert Mol from SMILES %s' % x_)

        return list(Chem.RDKFingerprint(x, fpSize=self.fp_size))

    @property
    def feature_labels(self):
        return ["rdkit:" + str(i) for i in range(self.fp_size)]


class AtomPairFP(BaseFeaturizer):

    def __init__(self, n_jobs=-1, *, n_bits=2048, input_type='mol', on_errors='raise', return_type='any'):
        """
        Atom Pair fingerprints.
        Returns the atom-pair fingerprint for a molecule.The algorithm used is described here:
        R.E. Carhart, D.H. Smith, R. Venkataraghavan;
        "Atom Pairs as Molecular Features in Structure-Activity Studies: Definition and Applications"
        JCICS 25, 64-73 (1985).
        This is currently just in binary bits with fixed length after folding.

        Parameters
        ----------
        n_jobs: int
            The number of jobs to run in parallel for both fit and predict.
            Can be -1 or # of cups. Set -1 to use all cpu cores (default).
        n_bits: int
github yoshida-lab / XenonPy / xenonpy / descriptor / fingerprint.py View on Github external
if x is None:
                raise ValueError('can not convert Mol from SMILES %s' % x_)
        if self.input_type == 'any':
            if not isinstance(x, Chem.rdchem.Mol):
                x_ = x
                x = Chem.MolFromSmiles(x)
                if x is None:
                    raise ValueError('can not convert Mol from SMILES %s' % x_)
        return list(rdMol.GetMorganFingerprintAsBitVect(x, self.radius, nBits=self.n_bits, useFeatures=True))

    @property
    def feature_labels(self):
        return ['fcfp3:' + str(i) for i in range(self.n_bits)]


class ECFP(BaseFeaturizer):

    def __init__(self, n_jobs=-1, *, radius=3, n_bits=2048, input_type='mol', on_errors='raise', return_type='any'):
        """
        Morgan (Circular) fingerprints (ECFP)
        The algorithm used is described in the paper Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints.
        JCIM 50:742-54 (2010)

        Parameters
        ----------
        n_jobs: int
            The number of jobs to run in parallel for both fit and predict.
            Can be -1 or # of cups. Set -1 to use all cpu cores (default).
        radius: int
            The radius parameter in the Morgan fingerprints, which is roughly half of the diameter parameter in ECFP,
            i.e., radius=2 is roughly equivalent to ECFP4.
        n_bits: int