Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
- elasticity_G_VRH
- elasticity_log10(G_VRH)
From matminer's dataset library.
"""
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
import numpy as np
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
mpdr = MPDataRetrieval()
df = mpdr.get_dataframe(
criteria={
"e_above_hull": {"$lt": 0.150},
"formation_energy_per_atom": {"$lt": 0.150},
"elasticity": {"$exists": 1, "$ne": None},
},
# "elements": },
properties=[
"material_id",
"structure",
"elasticity.K_VRH",
"elasticity.G_VRH",
"elasticity.G_Voigt",
"elasticity.K_Voigt",
"elasticity.G_Reuss",
from pymatgen import MPRester
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
import numpy as np
from tqdm import tqdm
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
chunksize = 1000
mpdr = MPDataRetrieval()
mpr = MPRester()
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i : i + n]
df = mpdr.get_dataframe(
criteria={
"e_above_hull": {"$lt": 0.150},
"formation_energy_per_atom": {"$lt": 0.150},
"band_gap": {"$exists": 1, "$ne": None},
},
properties=["material_id", "warnings"],
Regenerating from the newest Materials Project calculations
"""
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
from pymatgen import Element
import pandas as pd
import numpy as np
# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
mpdr = MPDataRetrieval()
# df = load_dataset("dielectric_constant")
df = mpdr.get_dataframe(
criteria={"has": "diel"},
properties=[
"material_id",
"diel.n",
"formation_energy_per_atom",
"e_above_hull",
"structure",
],
index_mpid=False,
)
df = df[(df["e_above_hull"] < 0.150) & (df["formation_energy_per_atom"] < 0.150)]
import numpy as np
import pandas as pd
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
from matminer.data_retrieval.retrieve_MongoDB import MongoDataRetrieval
# from matminer.descriptors.composition_features import get_pymatgen_descriptor
from pymongo import MongoClient
from references import Evaluator
pd.set_option('display.width', 2000)
pd.set_option('display.max_colwidth', 500)
pd.set_option('max_columns', None)
pd.set_option('display.max_rows', None)
api_key = 'AJsTCV3n1IOkBi97'
mp_retriever = MPDataRetrieval(api_key)
def pretty_formula(i, data):
A = data.A[i]
B = data.B[i]
anion = data.anion[i]
return A+B+anion
def score_n_store():
fit_eval = Evaluator()
client = MongoClient('localhost', 27017)
unc = client.unc.data_raw
mdb_retriever = MongoDataRetrieval(unc)
from pymatgen import MPRester
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
import numpy as np
from tqdm import tqdm
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
chunksize = 1000
mpdr = MPDataRetrieval()
mpr = MPRester()
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i : i + n]
df = mpdr.get_dataframe(
criteria={"formation_energy_per_atom": {"$lt": 2.5}},
properties=["material_id", "warnings"],
index_mpid=False,
)
print(df["warnings"].astype(str).value_counts())
From matminer's dataset library.
"""
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
mpdr = MPDataRetrieval()
df = load_dataset("phonon_dielectric_mp")
print(df)
mpids = df["mpid"].tolist()
dfe = mpdr.get_dataframe(
criteria={"material_id": {"$in": mpids}},
properties=["e_above_hull", "formation_energy_per_atom", "material_id"],
index_mpid=False,
)
dfe = dfe.rename(columns={"material_id": "mpid"})
df = pd.merge(df, dfe, how="inner")
Args:
max_nsites (int): The maximum number of sites to include in the query.
initial_structures (bool): If true, include the structures before
relaxation.
properties ([str]): list of properties supported by MPDataRetrieval
write_to_csv (bool): whether to write resulting dataframe to csv
limit (int): maximum length of the returned data; no limit if None
Returns (pandas.DataFrame):
retrieved/generated data
"""
properties = properties or [
'pretty_formula', 'e_above_hull', 'band_gap', 'total_magnetization',
'elasticity.elastic_anisotropy', 'elasticity.K_VRH', 'elasticity.G_VRH',
'structure', 'energy', 'energy_per_atom', 'formation_energy_per_atom']
mpdr = MPDataRetrieval()
mpdf = None
for nsites in list(range(1, 101)) + [{'$gt': 100}]:
if nsites==max_nsites:
break
print("Processing nsites = {}".format(nsites))
df = mpdr.get_dataframe(criteria={'nsites': nsites},
properties=properties,
index_mpid=True)
if initial_structures:
# prevent data limit API error using this conditional
isdf = mpdr.get_dataframe(criteria={'nsites': nsites},
properties=['initial_structure'],
index_mpid=True),
df = df.join(isdf, how='inner')
if mpdf is None:
mpdf = df
From matminer's dataset library.
"""
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
mpdr = MPDataRetrieval()
df = load_dataset("castelli_perovskites")
df = df[["structure", "e_form"]]
df = df.reset_index(drop=True)
print(df)
df.to_pickle("castelli.pickle.gz")