Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_stream_encoding_explicit_latin1():
with Stream('data/special/latin1.csv', encoding='latin1') as stream:
assert stream.encoding == 'iso8859-1'
assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '©']]
def test_stream_skip_blank_at_the_end_issue_bco_dmo_33():
source = 'data/special/skip-blank-at-the-end.csv'
with Stream(source, headers=1, skip_rows=['#']) as stream:
assert stream.headers == ['test1', 'test2']
assert stream.read() == [['1', '2'], []]
def test_stream_xlsx_merged_cells():
source = 'data/special/merged-cells.xls'
with Stream(source) as stream:
assert stream.read() == [['data', ''], ['', ''], ['', '']]
def test_stream_compression_error_gz():
source = 'id,filename\n\1,dump.tar.gz'
stream = Stream(source, scheme='text', format='csv')
stream.open()
def test_stream_local_csv_zip_multiple_open():
# That's how `tableschema.iter()` acts
stream = Stream('data/table.csv.zip')
stream.open()
assert stream.headers is None
assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
stream.close()
stream.open()
assert stream.headers is None
assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
stream.close()
def test_stream_csv_dialect_should_not_persist_if_sniffing_fails_issue_goodtables_228():
source1 = 'a;b;c\n#comment'
source2 = 'a,b,c\n#comment'
with Stream(source1, scheme='text', format='csv', headers=1, delimiter=';') as stream:
assert stream.headers == ['a', 'b', 'c']
with Stream(source2, scheme='text', format='csv', headers=1) as stream:
assert stream.headers == ['a', 'b', 'c']
def test_stream_xls_with_boolean():
with Stream('data/special/table-with-booleans.xls') as stream:
assert stream.headers is None
assert stream.read() == [['id', 'boolean'], [1, True], [2, False]]
def test_stream_xls_sheet_by_name():
source = 'data/special/sheet2.xls'
with Stream(source, sheet='Sheet2') as stream:
assert stream.fragment == 'Sheet2'
assert stream.read() == [['id', 'name'], [1, 'english'], [2, '中国人']]
parameters['post_parse'].append(drop_bad_rows)
parameters.update(encoding=get_encoding(parameters, resource))
if extension in ('.xls', '.xlsx'):
parameters['post_parse'].append(force_strings)
if extension == '.json':
fill_missing_fields(path)
parameters['post_parse'].append(force_strings)
info('Ingesting file = %s', path)
info('Ingestion parameters = %s', format_to_json(parameters))
parameters.update(headers=get_headers(parameters, path))
with Stream(path, **parameters) as stream:
check_fields_match(resource, stream)
log_sample_table(stream)
yield stream.iter(keyed=True)
import io
import sys
from tabulator import Stream
print('Parse csv format:')
source = 'data/table.csv'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse linear tsv format:')
source = 'data/table.tsv'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse json with dicts:')
source = 'file://data/table-dicts.json'
with Stream(source) as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse json with lists:')
source = 'file://data/table-lists.json'
with Stream(source, headers='row1') as stream: