Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_validate_warnings_table_limit():
source = 'data/datapackages/invalid/datapackage.json'
report = validate(source, preset='datapackage', table_limit=1)
assert len(report['warnings']) == 1
assert 'table(s) limit' in report['warnings'][0]
def test_validate_table_invalid_row_limit(log):
report = validate('data/invalid.csv', row_limit=2, infer_schema=True)
assert log(report) == [
(1, None, 3, 'blank-header'),
(1, None, 4, 'duplicate-header'),
(1, 2, 3, 'missing-value'),
(1, 2, 4, 'missing-value'),
]
def test_inspector_warnings_table_and_error_limit():
inspector = Inspector(table_limit=1, error_limit=1)
source = 'data/datapackages/invalid/datapackage.json'
report = inspector.inspect(source, preset='datapackage')
assert len(report['warnings']) == 2
assert 'table(s) limit' in report['warnings'][0]
assert 'error(s) limit' in report['warnings'][1]
def test_pipeline_report_limit_in_range(self):
filepath = os.path.join(self.data_dir, 'report_limit_structure.csv')
options = {}
validator = Pipeline(filepath, processors=('structure',),
report_limit=1, options=options)
result, report = validator.run()
self.assertEqual(len(report.generate()['results']), 1)
def test_messytables_source_two(self):
data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/utf-16le_encoded.csv'
pipeline = Pipeline(data)
result, report = pipeline.run()
self.assertTrue(pipeline.data)
def test_pipeline_row_limit_out_range(self):
filepath = os.path.join(self.data_dir, 'valid.csv')
limit = Pipeline.ROW_LIMIT_MAX
validator = Pipeline(filepath, row_limit=(limit + 1))
self.assertEqual(validator.row_limit, limit)
self.assertEqual(validator.pipeline[0].row_limit, limit)
def test_pipeline_report_stream_none(self):
filepath = os.path.join(self.data_dir, 'valid.csv')
report_stream = None
options = {}
validator = Pipeline(filepath, processors=('schema',),
report_stream=report_stream, options=options)
result, report = validator.run()
self.assertTrue(result)
def test_from_url(self):
pipeline = Pipeline(self.data_url)
result, report = pipeline.run()
self.assertTrue(pipeline.data)
def test_pipeline_ignore_duplicate_columns_false(self):
filepath = os.path.join(self.data_dir, 'duplicate_columns.csv')
validator = Pipeline(filepath, processors=('structure',))
result, report = validator.run()
self.assertFalse(result)
def test_pipeline_error_report_when_invalid_excel_error(self):
data_source = os.path.join(self.data_dir, 'hmt', 'invalid_excel.xlsx')
validator = Pipeline(data_source, fail_fast=True, format='excel')
result, report = validator.run()
generated_report = report.generate()
report_results = generated_report['results']
self.assertFalse(result)
self.assertEqual(len(report_results), 1)
self.assertEqual(report_results[0]['result_id'], 'invalid_excel_error')