Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
fluid.io.load_vars(
exe, ckpt.latest_model_dir, main_program, predicate=if_exist)
# Compatible with older versions without best_score in checkpoint_pb2
try:
best_score = ckpt.best_score
except:
best_score = -999
logger.info("PaddleHub model checkpoint loaded. current_epoch={}, "
"global_step={}, best_score={:.5f}".format(
ckpt.current_epoch, ckpt.global_step, best_score))
return True, ckpt.current_epoch, ckpt.global_step, best_score
logger.info("PaddleHub model checkpoint not found, start from scratch...")
return False, current_epoch, global_step, best_score
all_predictions[example.qas_id] = nbest_json[0]["text"]
else:
# predict "" iff the null score - the score of best non-null > threshold
score_diff = score_null
if best_non_null_entry:
score_diff -= best_non_null_entry.start_logit + best_non_null_entry.end_logit
scores_diff_json[example.qas_id] = score_diff
if score_diff > null_score_diff_threshold:
all_predictions[example.qas_id] = ""
else:
all_predictions[example.qas_id] = best_non_null_entry.text
all_nbest_json[example.qas_id] = nbest_json
"""Write final predictions to the json file and log-odds of null if needed."""
with open(output_prediction_file, "w") as writer:
logger.info("Writing predictions to: %s" % (output_prediction_file))
writer.write(
json.dumps(all_predictions, indent=4, ensure_ascii=is_english) +
"\n")
with open(output_nbest_file, "w") as writer:
logger.info("Writing nbest to: %s" % (output_nbest_file))
writer.write(
json.dumps(all_nbest_json, indent=4, ensure_ascii=is_english) +
"\n")
if version_2_with_negative:
logger.info("Writing null_log_odds to: %s" % (output_nbest_file))
with open(output_null_log_odds_file, "w") as writer:
writer.write(
json.dumps(scores_diff_json, indent=4, ensure_ascii=is_english)
+ "\n")
def __init__(self):
self.dataset_dir = os.path.join(DATA_HOME, "nlpcc-dbqa")
if not os.path.exists(self.dataset_dir):
ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
else:
logger.info("Dataset {} already cached.".format(self.dataset_dir))
self._load_train_examples()
self._load_test_examples()
self._load_dev_examples()
if strategy is None:
self._strategy = DefaultStrategy()
else:
self._strategy = strategy
if enable_memory_optim:
logger.warning(
"The memory optimization feature has been dropped! PaddleHub now doesn't optimize the memory of the program."
)
self._enable_memory_optim = False
if checkpoint_dir is None:
now = int(time.time())
time_str = time.strftime("%Y%m%d%H%M%S", time.localtime(now))
self._checkpoint_dir = "ckpt_" + time_str
else:
self._checkpoint_dir = checkpoint_dir
logger.info("Checkpoint dir: {}".format(self._checkpoint_dir))
from_module_attr_to_pyobj(module_attr.list.data[str(index)]))
elif module_attr.type == module_desc_pb2.SET:
result = set()
for index in range(len(module_attr.set.data)):
result.add(
from_module_attr_to_pyobj(module_attr.set.data[str(index)]))
elif module_attr.type == module_desc_pb2.MAP:
result = {}
for key, value in module_attr.map.data.items():
key = get_pykey(key, module_attr.map.key_type[key])
result[key] = from_module_attr_to_pyobj(value)
elif module_attr.type == module_desc_pb2.NONE:
result = None
elif module_attr.type == module_desc_pb2.OBJECT:
result = None
logger.warning("can't tran module attr to python object")
else:
result = None
logger.warning("unknown type of module attr")
return result
all_predictions = collections.OrderedDict()
all_nbest_json = collections.OrderedDict()
scores_diff_json = collections.OrderedDict()
for (example_index, example) in enumerate(all_examples):
features = example_index_to_features[example_index]
prelim_predictions = []
# keep track of the minimum score of null start+end of position 0
score_null = 1000000 # large and positive
min_null_feature_index = 0 # the paragraph slice with min mull score
null_start_logit = 0 # the start logit at the slice with min null score
null_end_logit = 0 # the end logit at the slice with min null score
for (feature_index, feature) in enumerate(features):
if feature.unique_id not in unique_id_to_result:
logger.info(
"As using pyreader, the last one batch is so small that the feature %s in the last batch is discarded "
% feature.unique_id)
continue
result = unique_id_to_result[feature.unique_id]
start_indexes = _get_best_indexes(result.start_logits, n_best_size)
end_indexes = _get_best_indexes(result.end_logits, n_best_size)
# if we could have irrelevant answers, get the min score of irrelevant
if version_2_with_negative:
feature_null_score = result.start_logits[0] + result.end_logits[
0]
if feature_null_score < score_null:
score_null = feature_null_score
min_null_feature_index = feature_index
null_start_logit = result.start_logits[0]
null_end_logit = result.end_logits[0]
var_info = copy.deepcopy(get_variable_info(output_var))
output_var = output_program.global_block().create_var(**var_info)
output_program.global_block().append_op(
type="assign",
inputs={'X': input_var},
outputs={'Out': output_var})
block_map = {0: 0}
if need_log:
logger.info("Connect program's input tensor")
for index, block in enumerate(next_program.blocks):
if block.idx == 0:
_copy_vars_and_ops_in_blocks(block, output_program.global_block())
else:
block_map[index] = len(output_program.blocks)
logger.info(
"block_%d in next_program merge into block_%d in pre_program" %
(index, block_map[index]))
new_block = output_program._create_block(
parent_idx=block_map[block.parent_idx])
_copy_vars_and_ops_in_blocks(block, new_block)
if need_log:
logger.info("Connect program's input tensor done")
return output_program
def print_arguments(args):
logger.debug("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
logger.debug("%s: %s" % (arg, value))
logger.debug("------------------------------------------------")