Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def all(self):
fs = [
cf.AtomicOrbitals(),
cf.ElementProperty.from_preset("matminer"),
cf.ElementProperty.from_preset("magpie"),
cf.ElementProperty.from_preset("matscholar_el"),
cf.ElementProperty.from_preset("deml"),
cf.Meredig(),
cf.ElementFraction(),
cf.Stoichiometry(),
cf.TMetalFraction(),
cf.BandCenter(),
cf.ValenceOrbital(),
cf.YangSolidSolution(),
cf.CationProperty.from_preset(preset_name="deml"),
cf.OxidationStates.from_preset(preset_name="deml"),
cf.ElectronAffinity(),
cf.ElectronegativityDiff(),
cf.IonProperty(fast=True),
cf.Miedema(),
cf.AtomicPackingEfficiency(), # slower than the rest
cf.CohesiveEnergy(), # requires mpid present
]
def X_train(self):
return self._X_train
@property
def y_train(self):
return self._y_train
if __name__ == '__main__':
from automatminer.data.load import load_glass_formation
from pymatgen.core import Composition
from matminer.featurizers.composition import ElementProperty
df = load_glass_formation()
df['composition'] = df["formula"].apply(lambda x: Composition(x))
elemprop = ElementProperty.from_preset("matminer")
df = elemprop.featurize_dataframe(df, col_id="composition")
feature_cols = elemprop.feature_labels()
target = "gfa"
automl = AutoSklearnML(X=df[feature_cols],
y=df[target],
dataset_name="ternary glass formation",
time_left_for_this_task=60,
per_run_time_limit=30,
output_folder="/tmp/matbench_automl/tmp",
tmp_folder="/tmp/matbench_automl/out")
automl.classification()
pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# Try predict ehull from initial structure
n = 500
print("Reading csv for {} compounds...".format(n))
df = load_mp('mp_all.csv').sample(n=n)
print("Constructing {} structures from dictionaries...".format(n))
df['structure'] = [Structure.from_dict(s) for s in df['structure']]
df['initial structure'] = [Structure.from_dict(s) for s in df['initial structure']]
df['composition'] = [f.composition for f in df['structure']]
# Pick featurizers
ep = ElementProperty.from_preset("matminer")
bb = BagofBonds()
bf = BondFractions(approx_bonds=True)
sh = StructuralHeterogeneity()
co = ChemicalOrdering()
de = DensityFeatures()
composition_featurizers = [ep]
structure_featurizers = [bf, sh, co, de]
# Featurizing
fls = []
for cf in composition_featurizers:
print("Featurizing {}...".format(cf.__class__.__name__))
cf.fit_featurize_dataframe(df, 'composition', ignore_errors=True)
fls += cf.feature_labels()