Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_stream_csv(self):
table = [["A", "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="")
with self.subTest(name="simple"):
self._stream_test(table, dialect)
table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=",", quotechar="", escapechar="\\")
with self.subTest(name="escaped"):
self._stream_test(table, dialect)
table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
with self.subTest(name="quoted"):
self._stream_test(table, dialect)
table = [['a"A,0"b', "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
with self.subTest(name="double"):
def test_code_4(self):
table = [["Å", "B,D", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=",", quotechar="", escapechar="\\")
encoding = "ISO-8859-1"
tmpfname = self._build_file(table, dialect, encoding=encoding)
application = build_application()
command = application.find("code")
tester = CommandTester(command)
tester.execute(tmpfname)
exp = f"""\
# Code generated with CleverCSV version {__version__}
import clevercsv
with open("{tmpfname}", "r", newline="", encoding="{encoding}") as fp:
reader = clevercsv.reader(fp, delimiter=",", quotechar="", escapechar="\\\\")
def test_abstraction_10(self):
out = detect_pattern.make_abstraction(
'A,"B,C|"D"', SimpleDialect(delimiter=",", quotechar='"', escapechar="|")
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_1(self):
out = detect_pattern.make_abstraction(
"A,B,C", SimpleDialect(delimiter=",", quotechar="", escapechar="")
)
exp = "CDCDC"
self.assertEqual(exp, out)
def test_type_score_3(self):
# theta_3 from paper
cells = [
["7,5", " Mon, Jan 12", "6,40"],
["100", " Fri, Mar 21", "8,23"],
["8,2", " Thu, Sep 17", "2,71"],
["538,0", "", "7,26"],
["N/A", " Wed, Oct 4", "6,93"],
]
data = "\r".join([";".join(x) for x in cells])
dialect = SimpleDialect(delimiter=";", quotechar='"', escapechar="")
out = type_score(data, dialect)
exp = 11 / 15
self.assertAlmostEqual(exp, out)
def test_form_5(self):
dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
self.assertTrue(is_form_5('"A,B"\n"1,2"\n"3,4"', dialect))
self.assertTrue(is_form_5('"A,B"\n"1,"\n"2,3"', dialect))
self.assertFalse(is_form_5("A,B\n1,2\n3,4", dialect))
self.assertFalse(is_form_5("A,B\n1,\n2,3", dialect))
self.assertFalse(is_form_5('"A,""B"""\n"1,"\n"2,3"', dialect))
def test_pattern_score_2(self):
# theta_2 from paper
data = (
"7,5; Mon, Jan 12;6,40\n100; Fri, Mar 21;8,23\n8,2; Thu, Sep 17;"
'2,71\n538,0;;7,26\n"NA"; Wed, Oct 4;6,93'
)
d = SimpleDialect(delimiter=";", quotechar="", escapechar="")
out = detect_pattern.pattern_score(data, d)
exp = 10 / 3
self.assertAlmostEqual(exp, out)
def test_detect_base(self):
table = [["A", "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="")
with self.subTest(name="simple"):
self._detect_test_wrap(table, dialect)
table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=",", quotechar="", escapechar="\\")
with self.subTest(name="escaped"):
self._detect_test_wrap(table, dialect)
table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
with self.subTest(name="quoted"):
self._detect_test_wrap(table, dialect)
table = [['a"A,0"b', "B", "C"], [1, 2, 3], [4, 5, 6]]
dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
with self.subTest(name="double"):
# escapechars are those that precede a delimiter or quotechar
for u, v in pairwise(data):
if not is_potential_escapechar(u, encoding):
continue
for delim, quotechar in itertools.product(delims, quotechars):
if v == delim or v == quotechar:
escapechars[(delim, quotechar)].add(u)
# remove dialects where the delimiter is always masked by quotes.
dialects = []
for delim in delims:
for quotechar in quotechars:
for escapechar in escapechars[(delim, quotechar)]:
if masked_by_quotechar(data, quotechar, escapechar, delim):
continue
d = SimpleDialect(delim, quotechar, escapechar)
dialects.append(d)
return dialects
def _make_simple_dialect(self, dialect, **fmtparams):
if isinstance(dialect, str):
sd = SimpleDialect.from_csv_dialect(csv.get_dialect(dialect))
elif isinstance(dialect, csv.Dialect):
sd = SimpleDialect.from_csv_dialect(dialect)
elif isinstance(dialect, SimpleDialect):
sd = dialect
else:
raise ValueError("Unknown dialect type: %r" % dialect)
for key, value in fmtparams.items():
if key in ["delimiter", "quotechar", "escapechar", "strict"]:
setattr(sd, key, value)
sd.validate()
return sd