Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def read_phosphosite(fname=phosphosite_file):
df = pandas.read_csv(fname, index_col=None, sep='\t', encoding='utf8')
statements = []
for _, row in df.iterrows():
sub_upid = row['SUB_ID']
if not pandas.isnull(sub_upid):
if sub_upid.find('-') != -1:
sub_upid = sub_upid.split('-')[0]
sub_hgnc_symbol = uniprot_client.get_gene_name(sub_upid)
sub_hgnc = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
else:
sub_hgnc_symbol = row['SUB_GENE']
sub_hgnc_id = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
sub_upid = hgnc_client.get_uniprot_id(sub_hgnc_id)
if sub_upid is None:
continue
sub = Agent(sub_hgnc_symbol,
db_refs={'UP': sub_upid,'HGNC': sub_hgnc})
residue = row['Actual_site'][0]
if len(row['Actual_site']) > 1:
position = row['Actual_site'][1:]
else:
position = None
ps = row['phosphosite']
kin_upid = row['KIN_ID']
if not pandas.isnull(kin_upid):
if kin_upid.find('-') != -1:
kin_upid = kin_upid.split('-')[0]
if not uniprot_client.is_human(kin_upid):
# TODO: support more types of URNs
if urn_type == 'agi-cas':
# Identifier is CAS, convert to CHEBI
chebi_id = get_chebi_id_from_cas(urn_id)
if chebi_id:
db_refs['CHEBI'] = chebi_id
db_name = get_chebi_name_from_id(chebi_id)
elif urn_type == 'agi-llid':
# This is an Entrez ID, convert to HGNC
hgnc_id = get_hgnc_from_entrez(urn_id)
if hgnc_id is not None:
db_refs['HGNC'] = hgnc_id
# Convert the HGNC ID to a Uniprot ID
uniprot_id = get_uniprot_id(hgnc_id)
if uniprot_id is not None:
db_refs['UP'] = uniprot_id
# Try to lookup HGNC name; if it's available, set it to the
# agent name
db_name = get_hgnc_name(hgnc_id)
elif urn_type in ['agi-meshdis', 'agi-ncimorgan', 'agi-ncimtissue',
'agi-ncimcelltype']:
if urn_id.startswith('C') and urn_id[1:].isdigit():
# Identifier is probably UMLS
db_refs['UMLS'] = urn_id
else:
# Identifier is MESH
urn_mesh_name = unquote(urn_id)
mesh_id, mesh_name = mesh_client.get_mesh_id_name(urn_mesh_name)
if mesh_id:
def get_genes_to_refseq_ids(problems):
# First, collect refseq IDs for each gene
gene_dict = {}
for row in read_unicode_csv(peptide_file, delimiter='\t', skiprows=1):
site_id = row[0]
gene_sym, rem = site_id.split('.', maxsplit=1)
refseq_id, site_info = rem.split(':')
if gene_sym not in gene_dict:
hgnc_id = hgnc_client.get_hgnc_id(gene_sym)
if not hgnc_id:
problems.add((refseq_id, 'invalid gene symbol'))
continue
up_id_main = hgnc_client.get_uniprot_id(hgnc_id)
if not up_id_main or ', ' in up_id_main:
problems.add((refseq_id, 'could not get Uniprot ID from HGNC'))
continue
gene_dict[gene_sym] = set([refseq_id])
else:
gene_dict[gene_sym].add(refseq_id)
return gene_dict
def agent_from_gene_name(name):
"""Return a grounded Agent based on a gene name."""
agent = Agent(name)
hgnc_id = hgnc_client.get_hgnc_id(name)
uniprot_id = hgnc_client.get_uniprot_id(hgnc_id)
agent.db_refs = {'HGNC': hgnc_id, 'UP': uniprot_id}
return agent
def get_target_agent(target):
target_hgnc_id = hgnc_client.get_hgnc_id(target)
target_up_id = hgnc_client.get_uniprot_id(target_hgnc_id)
target_agent = Agent(target, db_refs={'HGNC': target_hgnc_id,
'UP': target_up_id})
return target_agent
def get_db_refs(egid):
hgnc_id = hgnc_client.get_hgnc_from_entrez(egid)
if not hgnc_id:
logger.info("No HGNC ID for Entrez ID: %s" % egid)
return (None, {})
hgnc_name = hgnc_client.get_hgnc_name(hgnc_id)
if not hgnc_name:
logger.info("No HGNC name for HGNC ID: %s" % hgnc_id)
return (None, {})
up_id = hgnc_client.get_uniprot_id(hgnc_id)
if not up_id:
logger.info("No Uniprot ID for EGID / HGNC ID / Symbol "
"%s / %s / %s" % (egid, hgnc_id, hgnc_name))
return (None, {})
return (hgnc_name, {'HGNC': hgnc_id, 'UP': up_id})
def _get_agent(concept, entity):
name = term_from_uri(concept)
namespace = namespace_from_uri(entity)
db_refs = {}
if namespace == 'HGNC':
agent_name = name
hgnc_id = hgnc_client.get_hgnc_id(name)
if hgnc_id is not None:
db_refs['HGNC'] = str(hgnc_id)
up_id = hgnc_client.get_uniprot_id(hgnc_id)
if up_id:
db_refs['UP'] = up_id
else:
logger.warning('HGNC entity %s with HGNC ID %s has no '
'corresponding Uniprot ID.' %
(name, hgnc_id))
else:
logger.warning("Couldn't get HGNC ID for HGNC symbol %s" %
name)
elif namespace in ('MGI', 'RGD'):
agent_name = name
db_refs[namespace] = name
elif namespace in ('PFH', 'SFAM'):
indra_name = bel_to_indra.get(name)
db_refs[namespace] = name
if indra_name is None: