Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_to_idx():
X = np.array([['a', 'x', 'b'], ['c', 'y', 'd']])
X_idx_expected = [[0, 0, 1], [2, 1, 3]]
rel_to_idx, ent_to_idx = create_mappings(X)
X_idx = to_idx(X, ent_to_idx=ent_to_idx, rel_to_idx=rel_to_idx)
np.testing.assert_array_equal(X_idx, X_idx_expected)
Parameters
----------
remap : boolean
remap the data, if already mapped. One would do this if the dictionary is updated.
"""
if self.using_existing_db:
# since the assumption is that the persisted data is already mapped for an existing db
return
from ..evaluation import to_idx
if len(self.rel_to_idx) == 0 or len(self.ent_to_idx) == 0:
self.generate_mappings()
for key in self.dataset.keys():
if isinstance(self.dataset[key], np.ndarray):
if (not self.mapped_status[key]) or (remap is True):
self.dataset[key] = to_idx(self.dataset[key],
ent_to_idx=self.ent_to_idx,
rel_to_idx=self.rel_to_idx)
self.mapped_status[key] = True
if not self.persistance_status[key]:
self._insert_triples(self.dataset[key], key)
self.persistance_status[key] = True
conn = sqlite3.connect("{}".format(self.dbname))
cur = conn.cursor()
# to maintain integrity of data
cur.execute('Update integrity_check set validity=1 where validity=0')
conn.commit()
cur.execute('''CREATE TRIGGER IF NOT EXISTS triples_table_ins_integrity_check_trigger
AFTER INSERT ON triples_table
BEGIN
"""
if not self.is_fitted:
msg = 'Model has not been fitted.'
logger.error(msg)
raise RuntimeError(msg)
tf.reset_default_graph()
self._load_model_from_trained_params()
if type(X) is not np.ndarray:
X = np.array(X)
if not self.dealing_with_large_graphs:
if not from_idx:
X = to_idx(X, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
x_tf = tf.Variable(X, dtype=tf.int32, trainable=False)
e_s, e_p, e_o = self._lookup_embeddings(x_tf)
scores = self._fn(e_s, e_p, e_o)
with tf.Session(config=self.tf_config) as sess:
sess.run(tf.global_variables_initializer())
return sess.run(scores)
else:
dataset_handle = NumpyDatasetAdapter()
dataset_handle.use_mappings(self.rel_to_idx, self.ent_to_idx)
dataset_handle.set_data(X, "test", mapped_status=from_idx)
self.eval_dataset_handle = dataset_handle
# build tf graph for predictions
def map_data(self, remap=False):
"""map the data to the mappings of ent_to_idx and rel_to_idx
Parameters
----------
remap : boolean
remap the data, if already mapped. One would do this if the dictionary is updated.
"""
from ..evaluation import to_idx
if len(self.rel_to_idx) == 0 or len(self.ent_to_idx) == 0:
self.generate_mappings()
for key in self.dataset.keys():
if (not self.mapped_status[key]) or (remap is True):
self.dataset[key] = to_idx(self.dataset[key],
ent_to_idx=self.ent_to_idx,
rel_to_idx=self.rel_to_idx)
self.mapped_status[key] = True
logger.debug('Using batch entities for generation of corruptions for early stopping')
self.eval_config['corrupt_side'] = self.early_stopping_params.get('corrupt_side', DEFAULT_CORRUPT_SIDE_EVAL)
self.early_stopping_best_value = None
self.early_stopping_stop_counter = 0
try:
# If the filter has already been set in the dataset adapter then just pass x_filter = True
x_filter = self.early_stopping_params['x_filter']
if isinstance(x_filter, np.ndarray):
if x_filter.ndim <= 1 or (np.shape(x_filter)[1]) != 3:
msg = 'Invalid size for input x_valid. Expected (n,3): got {}'.format(np.shape(x_filter))
logger.error(msg)
raise ValueError(msg)
# set the filter triples in the data handler
x_filter = to_idx(x_filter, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
self.eval_dataset_handle.set_filter(x_filter, mapped_status=True)
# set the flag to perform filtering
self.set_filter_for_eval()
except KeyError:
logger.debug('x_filter not found in early_stopping_params.')
pass
# initialize evaluation graph in validation mode i.e. to use validation set
self._initialize_eval_graph("valid")
Probability of each triple to be true according to the Platt scaling calibration.
"""
if not self.is_calibrated:
msg = "Model has not been calibrated. Please call `model.calibrate(...)` before predicting probabilities."
logger.error(msg)
raise RuntimeError(msg)
tf.reset_default_graph()
self._load_model_from_trained_params()
w = tf.Variable(self.calibration_parameters[0], dtype=tf.float32, trainable=False)
b = tf.Variable(self.calibration_parameters[1], dtype=tf.float32, trainable=False)
x_idx = to_idx(X, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
x_tf = tf.Variable(x_idx, dtype=tf.int32, trainable=False)
e_s, e_p, e_o = self._lookup_embeddings(x_tf)
scores = self._fn(e_s, e_p, e_o)
logits = -(w * scores + b)
probas = tf.sigmoid(logits)
with tf.Session(config=self.tf_config) as sess:
sess.run(tf.global_variables_initializer())
return sess.run(probas)
def _initialize_early_stopping(self):
"""Initializes and creates evaluation graph for early stopping.
"""
try:
self.x_valid = self.early_stopping_params['x_valid']
if isinstance(self.x_valid, np.ndarray):
if self.x_valid.ndim <= 1 or (np.shape(self.x_valid)[1]) != 3:
msg = 'Invalid size for input x_valid. Expected (n,3): got {}'.format(np.shape(self.x_valid))
logger.error(msg)
raise ValueError(msg)
# store the validation data in the data handler
self.x_valid = to_idx(self.x_valid, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
self.train_dataset_handle.set_data(self.x_valid, "valid", mapped_status=True)
self.eval_dataset_handle = self.train_dataset_handle
elif isinstance(self.x_valid, AmpligraphDatasetAdapter):
# this assumes that the validation data has already been set in the adapter
self.eval_dataset_handle = self.x_valid
else:
msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, \
got {}'.format(type(self.x_valid))
logger.error(msg)
raise ValueError(msg)
except KeyError:
msg = 'x_valid must be passed for early fitting.'
logger.error(msg)
raise KeyError(msg)
X_neg : ndarray (shape [n, 3])
Numpy array of negative triples.
Returns
-------
scores_pos: tf.Tensor
Tensor with positive scores.
scores_neg: tf.Tensor
Tensor with negative scores.
"""
x_neg = to_idx(X_neg, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
x_neg_tf = tf.Variable(x_neg, dtype=tf.int32, trainable=False)
x_pos = to_idx(X_pos, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
x_pos_tf = tf.Variable(x_pos, dtype=tf.int32, trainable=False)
e_s, e_p, e_o = self._lookup_embeddings(x_neg_tf)
scores_neg = self._fn(e_s, e_p, e_o)
e_s, e_p, e_o = self._lookup_embeddings(x_pos_tf)
scores_pos = self._fn(e_s, e_p, e_o)
return scores_pos, scores_neg