Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_extract_nested_arch_with_corrupted_compressed_should_extract_inner_archives_only_once(self):
test_file = self.get_test_loc('extract/nested_not_compressed/nested_with_not_compressed_gz_file.tgz', copy=True)
expected = [
'nested_with_not_compressed_gz_file.tgz',
'nested_with_not_compressed_gz_file.tgz-extract/top/file',
'nested_with_not_compressed_gz_file.tgz-extract/top/notcompressed.gz'
]
result = list(extract.extract(test_file, recurse=True))
check_no_error(result)
check_files(test_file, expected)
def test_extract_tar_with_broken_links(self):
test_dir = self.get_test_loc('extract/broken_link', copy=True)
result = list(extract.extract(test_dir, recurse=True))
expected = (
'broken-link.tar.bz2',
'broken-link.tar.bz2-extract/openssl/test/Makefile',
)
check_files(test_dir, expected)
expected_warning = [[], []]
warns = [r.warnings for r in result]
assert expected_warning == warns
def test_extract_directory_with_office_docs(self):
test_dir = self.get_test_loc('extract/office_docs', copy=True)
result = list(extract.extract(test_dir, kinds=(extractcode.docs,), recurse=True))
expected = [
'abc.docx',
'abc.docx-extract/[Content_Types].xml',
'abc.docx-extract/docProps/app.xml',
'abc.docx-extract/docProps/core.xml',
'abc.docx-extract/_rels/.rels',
'abc.docx-extract/word/fontTable.xml',
'abc.docx-extract/word/document.xml',
'abc.docx-extract/word/settings.xml',
'abc.docx-extract/word/numbering.xml',
'abc.docx-extract/word/activeX/activeX1.xml',
'abc.docx-extract/word/activeX/activeX2.xml',
'abc.docx-extract/word/activeX/activeX3.xml',
'abc.docx-extract/word/activeX/_rels/activeX1.xml.rels',
'abc.docx-extract/word/activeX/_rels/activeX2.xml.rels',
'abc.docx-extract/word/activeX/_rels/activeX3.xml.rels',
tarinfo.name = 'somefilename-%i.txt' % i
tarinfo.uid = 123
tarinfo.gid = 456
tarinfo.uname = 'johndoe'
tarinfo.gname = 'fake'
tarinfo.type = tarfile.REGTYPE
tarinfo.mode = 0 # this is the readonly part
tarinfo.mtime = time.mktime(datetime.datetime.now().timetuple())
file = io.StringIO()
file.write(TEXT)
file.seek(0)
tarinfo.size = len(TEXT)
tar.addfile(tarinfo, file)
tar.close()
"""
result = list(extract.extract(test_file, recurse=False))
check_no_error(result)
expected = (
'somefilename-0.txt',
'somefilename-1.txt',
)
test_dir = extractcode.get_extraction_path(test_file)
check_files(test_dir, expected)
def test_extract_archive_non_nested(self):
test_dir = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True)
expected = (
'a/b/a.txt',
'a/b/b.txt',
'a/c/c.txt',
)
result = extract.extract(test_dir, recurse=False)
check_no_error(result)
check_files(extractcode.get_extraction_path(test_dir), expected)
result = extract.extract(test_dir, recurse=True)
check_no_error(result)
check_files(extractcode.get_extraction_path(test_dir), expected)
def test_extract_tar_gz_with_spaces_in_name(self):
test_dir = self.get_test_loc('extract/space-tgz', copy=True)
expected = (
'with spaces in name.tar.gz',
'with spaces in name.tar.gz-extract/a/b/a.txt',
'with spaces in name.tar.gz-extract/a/b/b.txt',
'with spaces in name.tar.gz-extract/a/c/c.txt',
)
result = list(extract.extract(test_dir, recurse=True))
check_no_error(result)
check_files(test_dir, expected)
def test_extract_zip_with_spaces_in_name(self):
test_dir = self.get_test_loc('extract/space-zip', copy=True)
expected = (
'with spaces in name.zip',
'with spaces in name.zip-extract/empty_dirs_and_small_files/small_files/small_file.txt'
)
result = list(extract.extract(test_dir, recurse=True))
check_no_error(result)
check_files(test_dir, expected)
'nested_tars.tar.gz-extract/b/a/a.txt',
'nested_tars.tar.gz-extract/b/b/.svn/all-wcprops',
'nested_tars.tar.gz-extract/b/b/.svn/entries',
'nested_tars.tar.gz-extract/b/b/.svn/format',
'nested_tars.tar.gz-extract/b/b/.svn/text-base/a.txt.svn-base',
'nested_tars.tar.gz-extract/b/b/a.txt',
'nested_tars.tar.gz-extract/b/c/.svn/all-wcprops',
'nested_tars.tar.gz-extract/b/c/.svn/entries',
'nested_tars.tar.gz-extract/b/c/.svn/format',
'nested_tars.tar.gz-extract/b/c/.svn/prop-base/a.tar.gz.svn-base',
'nested_tars.tar.gz-extract/b/c/.svn/text-base/a.tar.gz.svn-base',
'nested_tars.tar.gz-extract/b/c/.svn/text-base/a.txt.svn-base',
'nested_tars.tar.gz-extract/b/c/a.tar.gz',
'nested_tars.tar.gz-extract/b/c/a.txt'
]
result1 = list(extract.extract(test_dir, recurse=False))
check_no_error(result1)
check_files(test_dir, expected)
# The setup is a tad complex because we want to have a relative dir
# to the base dir where we run tests from, ie the scancode-toolkit/ dir
# To use relative paths, we use our tmp dir at the root of the code tree
from os.path import dirname, join, abspath
scancode_root = dirname(dirname(dirname(__file__)))
scancode_tmp = join(scancode_root, 'tmp')
fileutils.create_dir(scancode_tmp)
scancode_root_abs = abspath(scancode_root)
import tempfile
test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
test_file = self.get_test_loc('extract/relative_path/basic.zip')
import shutil
shutil.copy(test_file, test_src_dir)
test_src_file = join(test_src_dir, 'basic.zip')
test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX
result = list(extract.extract(test_src_file))
expected = ['c/a/a.txt', 'c/b/a.txt', 'c/c/a.txt']
check_files(test_tgt_dir, expected)
for r in result:
assert [] == r.warnings
assert [] == r.errors
def extract_archives(location, recurse=True):
"""
Yield ExtractEvent while extracting archive(s) and compressed files at
`location`. If `recurse` is True, extract nested archives-in-archives
recursively.
Archives and compressed files are extracted in a directory named
"-extract" created in the same directory as the archive.
Note: this API is returning an iterable and NOT a sequence.
"""
from extractcode.extract import extract
from extractcode import default_kinds
for xevent in extract(location, kinds=default_kinds, recurse=recurse):
yield xevent