Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_detect(self):
# Adapted from CPython
detector = Detector()
dialect = detector.detect(self.sample1)
self.assertEqual(dialect.delimiter, ",")
self.assertEqual(dialect.quotechar, "")
self.assertEqual(dialect.escapechar, "")
dialect = detector.detect(self.sample2)
self.assertEqual(dialect.delimiter, ":")
self.assertEqual(dialect.quotechar, "'")
self.assertEqual(dialect.escapechar, "")
def test_has_header_regex_special_delimiter(self):
detector = Detector()
self.assertEqual(detector.has_header(self.sample8), False)
self.assertEqual(
detector.has_header(self.header2 + self.sample8), True
)
def test_delimiters(self):
# Adapted from CPython
detector = Detector()
dialect = detector.detect(self.sample3)
self.assertIn(dialect.delimiter, self.sample3)
dialect = detector.detect(self.sample3, delimiters="?,")
self.assertEqual(dialect.delimiter, "?")
dialect = detector.detect(self.sample3, delimiters="/,")
self.assertEqual(dialect.delimiter, "/")
dialect = detector.detect(self.sample4)
self.assertEqual(dialect.delimiter, ";")
dialect = detector.detect(self.sample5)
self.assertEqual(dialect.delimiter, "\t")
dialect = detector.detect(self.sample6)
self.assertEqual(dialect.delimiter, "|")
dialect = detector.detect(self.sample7)
self.assertEqual(dialect.delimiter, "|")
self.assertEqual(dialect.quotechar, "'")
dialect = detector.detect(self.sample8)
def test_has_header(self):
detector = Detector()
self.assertEqual(detector.has_header(self.sample1), False)
self.assertEqual(
detector.has_header(self.header1 + self.sample1), True
)
-------
rows: generator
Returns file as a generator over rows as dictionaries.
Raises
------
NoDetectionResult
When the dialect detection fails.
"""
if encoding is None:
encoding = get_encoding(filename)
with open(filename, "r", newline="", encoding=encoding) as fid:
if dialect is None:
data = fid.read(num_chars) if num_chars else fid.read()
dialect = Detector().detect(data, verbose=verbose)
fid.seek(0)
r = DictReader(fid, dialect=dialect)
for row in r:
yield row
Additional keyword arguments for the ``pandas.read_csv`` function. You
can specify the file encoding here if needed, and it will be used
during dialect detection.
"""
if not (os.path.exists(filename) and os.path.isfile(filename)):
raise ValueError("Filename must be a regular file")
pd = import_optional_dependency("pandas")
# Use provided encoding or detect it, and record it for pandas
enc = kwargs.get("encoding") or get_encoding(filename)
kwargs["encoding"] = enc
with open(filename, "r", newline="", encoding=enc) as fid:
data = fid.read(num_chars) if num_chars else fid.read()
dialect = Detector().detect(data)
csv_dialect = dialect.to_csv_dialect()
# This is used to catch pandas' warnings when a dialect is supplied.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="^Conflicting values for .*",
category=pd.errors.ParserWarning,
)
df = pd.read_csv(filename, *args, dialect=csv_dialect, **kwargs)
return df
method : str
Dialect detection method to use. Either 'normal' for normal form
detection, 'consistency' for the consistency measure, or 'auto' for
first normal and then consistency.
Returns
-------
dialect : SimpleDialect
The detected dialect as a :class:`SimpleDialect`, or None if detection
failed.
"""
enc = encoding or get_encoding(filename)
with open(filename, "r", newline="", encoding=enc) as fp:
data = fp.read(num_chars) if num_chars else fp.read()
dialect = Detector().detect(data, verbose=verbose, method=method)
return dialect
-------
rows: generator
Returns file as a generator over rows.
Raises
------
NoDetectionResult
When the dialect detection fails.
"""
if encoding is None:
encoding = get_encoding(filename)
with open(filename, "r", newline="", encoding=encoding) as fid:
if dialect is None:
data = fid.read(num_chars) if num_chars else fid.read()
dialect = Detector().detect(data, verbose=verbose)
if dialect is None:
raise NoDetectionResult()
fid.seek(0)
r = reader(fid, dialect)
yield from r