Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_open_args(self):
PathManager.set_strict_kwargs_checking(True)
f = PathManager.open(
self._tmpfile, # type: ignore
mode="r",
buffering=1,
encoding="UTF-8",
errors="ignore",
newline=None,
closefd=True,
opener=None,
)
f.close()
def validate_corpus_exists(
corpus: pytorch_translate_data.ParallelCorpusConfig, split: str, is_npz: bool = True
):
"""
Makes sure that the files in the `corpus` are valid files. `split` is used
for logging.
"""
if is_npz:
if not PathManager.exists(corpus.source.data_file):
raise ValueError(f"{corpus.source.data_file} for {split} not found!")
if not PathManager.exists(corpus.target.data_file):
raise ValueError(f"{corpus.target.data_file} for {split} not found!")
else:
if not IndexedDataset.exists(corpus.source.data_file):
raise ValueError(f"{corpus.source.data_file} for {split} not found!")
if not IndexedDataset.exists(corpus.target.data_file):
raise ValueError(f"{corpus.target.data_file} for {split} not found!")
def load(self, path, num_examples_limit: Optional[int] = None):
with PathManager.open(path, "rb") as f:
npz = np.load(f)
# For big input data, we don't want the cpu to OOM.
# Therefore, we are loading the huge buffer array into disc
# and reading it from disc instead of memory.
if npz["buffer"].nbytes > ARRAY_SIZE_LIMIT_FOR_MEMORY:
self.buffer = np.memmap(
tempfile.NamedTemporaryFile().name,
dtype="float32",
mode="w+",
shape=npz["buffer"].shape,
)
self.buffer[:] = npz["buffer"][:]
else:
self.buffer = npz["buffer"]
self.offsets = npz["offsets"]
# Saving generated box proposals to file.
# Predicted box_proposals are in XYXY_ABS mode.
bbox_mode = BoxMode.XYXY_ABS.value
ids, boxes, objectness_logits = [], [], []
for prediction in self._predictions:
ids.append(prediction["image_id"])
boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
proposal_data = {
"boxes": boxes,
"objectness_logits": objectness_logits,
"ids": ids,
"bbox_mode": bbox_mode,
}
with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
pickle.dump(proposal_data, f)
if not self._do_evaluation:
self._logger.info("Annotations are not available for evaluation.")
return
self._logger.info("Evaluating bbox proposals ...")
res = {}
areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
for limit in [100, 1000]:
for area, suffix in areas.items():
stats = _evaluate_box_proposals(
self._predictions, self._coco_api, area=area, limit=limit
)
key = "AR{}@{:d}".format(suffix, limit)
res[key] = float(stats["ar"].item() * 100)
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(
annos, image_shape, mask_format=self.mask_format
)
# Create a tight bounding box from masks, useful when image is cropped
if self.crop_gen and instances.has("gt_masks"):
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
dataset_dict["instances"] = utils.filter_empty_instances(instances)
# USER: Remove if you don't do semantic/panoptic segmentation.
if "sem_seg_file_name" in dataset_dict:
with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
sem_seg_gt = Image.open(f)
sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
dataset_dict["sem_seg"] = sem_seg_gt
return dataset_dict