Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# git_uri = urljoin('https://github.com/cltk/', corpus_name + '.git')
# self._download_corpus(corpus_type, corpus_name, path)
type_dir_rel = os.path.join(CLTK_DATA_DIR, self.language, corpus_type)
type_dir = os.path.expanduser(type_dir_rel)
repo_name = uri.split('/')[-1] # eg, 'latin_corpus_newton_example.git'
repo_name = repo_name.rstrip('.git')
target_dir = os.path.join(type_dir, repo_name)
target_file = os.path.join(type_dir, repo_name, 'README.md')
# check if corpus already present
# if not, clone
if not os.path.isfile(target_file):
if not os.path.isdir(type_dir):
os.makedirs(type_dir)
try:
msg = "Cloning '{}' from '{}'".format(corpus_name, uri)
logger.info(msg)
Repo.clone_from(uri, target_dir, branch=branch, depth=1,
progress=ProgressPrinter())
except CorpusImportError as corpus_imp_err:
msg = "Git clone of '{}' failed: '{}'".format(uri, corpus_imp_err)
logger.error(msg)
# if corpus is present, pull latest
else:
try:
repo = Repo(target_dir)
assert not repo.bare # or: assert repo.exists()
git_origin = repo.remotes.origin
msg = "Pulling latest '{}' from '{}'.".format(corpus_name, uri)
logger.info(msg)
git_origin.pull()
except CorpusImportError as corpus_imp_err:
msg = "Git pull of '{}' failed: '{}'".format(uri, corpus_imp_err)
logger.info(msg)
if corpus_name in ('phi5', 'phi7', 'tlg'):
if corpus_name == 'phi5':
# normalize path for checking dir
if local_path.endswith('/'):
local_path = local_path[:-1]
# check for right corpus dir
if os.path.split(local_path)[1] != 'PHI5':
logger.info("Directory must be named 'PHI5'.")
if corpus_name == 'phi7':
# normalize local_path for checking dir
if local_path.endswith('/'):
local_path = local_path[:-1]
# check for right corpus dir
if os.path.split(local_path)[1] != 'PHI7':
logger.info("Directory must be named 'PHI7'.")
if corpus_name == 'tlg':
# normalize path for checking dir
if local_path.endswith('/'):
local_path = local_path[:-1]
# check for right corpus dir
if os.path.split(local_path)[1] != 'TLG_E':
logger.info("Directory must be named 'TLG_E'.")
# move the dir-checking commands into a function
data_dir = os.path.expanduser(CLTK_DATA_DIR)
originals_dir = os.path.join(data_dir, 'originals')
# check for `originals` dir; if not present mkdir
if not os.path.isdir(originals_dir):
os.makedirs(originals_dir)
msg = "Wrote directory at '{}'.".format(originals_dir)
logger.info(msg)
tlg_originals_dir = os.path.join(data_dir,
orig_dir = os.path.expanduser(orig_dir_rel)
works_dir = os.path.expanduser(works_dir_rel)
if not os.path.exists(works_dir):
os.makedirs(works_dir)
files = os.listdir(orig_dir)
texts = [x for x in files if x.endswith('.TXT') and x.startswith(file_prefix)]
for file in texts:
orig_file_path = os.path.join(orig_dir, file)
new_file_path = os.path.join(works_dir, file)
try:
self.convert(orig_file_path, new_file_path, divide_works=True, latin=latin)
logger.info('Writing files at %s to %s.', orig_file_path, works_dir)
except Exception as err:
logger.error('Failed to convert files: %s.', err)
def _what_os(self):
"""Get operating system."""
if platform == "linux" or platform == "linux2":
_platform = 'linux'
elif platform == "darwin":
_platform = 'mac'
elif platform == "win32":
_platform = 'windows'
logger.info("Detected '{}' operating system.".format(_platform))
return _platform
def _check_install(self):
"""Check if tlgu installed, if not install it."""
try:
subprocess.check_output(['which', 'tlgu'])
except Exception as exc:
logger.info('TLGU not installed: %s', exc)
logger.info('Installing TLGU.')
if not subprocess.check_output(['which', 'gcc']):
logger.error('GCC seems not to be installed.')
else:
tlgu_path_rel = get_cltk_data_dir() + '/greek/software/greek_software_tlgu'
tlgu_path = os.path.expanduser(tlgu_path_rel)
if not self.testing:
print('Do you want to install TLGU?')
print('To continue, press Return. To exit, Control-C.')
input()
else:
print('Automated or test build, skipping keyboard input confirmation for installation of TLGU.')
try:
command = 'cd {0} && make install'.format(tlgu_path)
print('Going to run command:', command)
p_out = subprocess.call(command, shell=True)
def make(self):
"""Build program."""
#! for linux install Clan
fp = os.path.expanduser('~/cltk_data/multilingual/software/lapos')
p_out = subprocess.call('cd {} && make'.format(fp), shell=True, stdout=subprocess.DEVNULL)
if p_out == 0:
print('Lapos built successfully.')
logger.info('Lapos build successfully.')
else:
print('Lapos did not build successfully.')
logger.error('Lapos did not build successfully.')
def write_concordance_from_string(text: str, name: str) -> None:
"""A reworkinng of write_concordance_from_file(). Refactor these."""
list_of_lists = build_concordance(text) # type: List[List[str]]
user_data_rel = get_cltk_data_dir() + '/user_data' # type: str
user_data = os.path.expanduser(user_data_rel) # type: str
if not os.path.isdir(user_data):
os.makedirs(user_data)
file_path = os.path.join(user_data, 'concordance_' + name + '.txt') # type: str
concordance_output = '' # type: str
for word_list in list_of_lists:
for line in word_list:
concordance_output += line + '\n'
try:
with open(file_path, 'w') as open_file:
open_file.write(concordance_output)
logger.info("Wrote concordance to '%s'.", file_path)
except IOError as io_error:
logger.error("Failed to write concordance to '%s'. Error: %s", file_path, io_error)
logger.info('Tagger {} could not tag {}.'.format(self.tagger, head_word))
return head_word, tag, head_word
elif tag == 'U--------':
return (head_word, tag.lower(), head_word)
else:
entries = self._retrieve_morpheus_entry(head_word)
if entries is None:
return head_word, tag.lower(), head_word
matched_entry = [entry for entry in entries if entry[0] == tag.lower()]
if len(matched_entry) == 0:
logger.info('No matching Morpheus entry found for {}.'.format(head_word))
return head_word, tag.lower(), entries[0][2]
elif len(matched_entry) == 1:
return head_word, tag.lower(), matched_entry[0][2].lower()
else:
logger.info('Multiple matching entries found for {}.'.format(head_word))
return head_word, tag.lower(), matched_entry[1][2].lower()
exist. Source: http://stackoverflow.com/a/1994840
TODO: Move this to file_operations.py module.
:type src_rel: str
:param src_rel: Directory to be copied.
:type dst_rel: str
:param dst_rel: Directory to be created with contents of ``src_rel``.
"""
src = os.path.expanduser(src_rel)
dst = os.path.expanduser(dst_rel)
try:
shutil.copytree(src, dst)
logger.info('Files copied from %s to %s', src, dst)
except OSError as exc:
if exc.errno == errno.ENOTDIR:
shutil.copy(src, dst)
logger.info('Files copied from %s to %s', src, dst)
else:
raise