How to use OpenCC - 10 common examples

To help you get started, we’ve selected a few OpenCC examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github baohaojun / system-config / gcode / playground / test-opencc.py View on Github external
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import opencc
import sys
import os

progname = os.path.basename(sys.argv[0])
if progname != 'test-opencc.py':
    cc = opencc.OpenCC(progname + '.json')
else:
    cc = opencc.OpenCC('s2j.json')

if len(sys.argv) == 1:
    print (cc.convert("亚"))
else:
    print (cc.convert(sys.argv[1]))
github baohaojun / system-config / gcode / playground / test-opencc.py View on Github external
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import opencc
import sys
import os

progname = os.path.basename(sys.argv[0])
if progname != 'test-opencc.py':
    cc = opencc.OpenCC(progname + '.json')
else:
    cc = opencc.OpenCC('s2j.json')

if len(sys.argv) == 1:
    print (cc.convert("亚"))
else:
    print (cc.convert(sys.argv[1]))
github lepture / opencc-python / tests / test_ctypes.py View on Github external
def test_convert():
    text = '乾坤一擲'
    expect = '乾坤一掷'
    assert convert(text) == expect
github baohaojun / system-config / gcode / scim-cs / ime-py / bhj_ime.py View on Github external
def init():
    global _g_ime_reverse
    _g_ime_reverse = ime_reverse()

    global _g_ime_single_mode
    _g_ime_single_mode = os.path.exists(os.path.join(os.environ["HOME"], ".sdim-single"))

    global _g_ime_s2t_mode
    _g_ime_s2t_mode = os.path.exists(os.path.join(os.environ["HOME"], ".sdim-s2t"))
    global _g_opencc_s2t
    global _g_opencc_s2j
    try:
        import opencc
        _g_opencc_s2t = opencc.OpenCC("s2t.json")
        _g_opencc_s2j = opencc.OpenCC("s2j.json")
    except:
        pass

    global _g_ime_trans
    _g_ime_trans = ime_trans()

    global _g_ime_quail
    _g_ime_quail = ime_quail()

    global _g_ime_history
    _g_ime_history = ime_history()
    print('ime init complete')
    sys.stdout.flush()
if __name__ == '__main__':
github jjgod / opf-cc / opf-cc.py View on Github external
os.path.basename(output_file_path))
        # KindleGen introduced redundant data, use kindlestrip to remove that.
        data_file = file(original_output_path, 'rb').read()
        strippedFile = kindlestrip.SectionStripper(data_file)
        outf = file(output_file_path, 'wb')
        outf.write(strippedFile.getResult())
        outf.close()

    print "Removing temporary directory %s" % input_path
    shutil.rmtree(input_path)

if len(sys.argv) < 2:
    print "usage: %s "
    sys.exit(1)

with opencc.OpenCC(config="t2s.json") as converter:
    (input_file_path, extracted_path, output_file_path) = find_paths(converter)
    opf_path = find_opf_path(extracted_path)

    if opf_path:
        files = find_files_to_convert(extracted_path, opf_path)
        if len(files):
            convert_files_in_place(converter, files)
        repack_files(extracted_path, output_file_path, opf_path)
    else:
        print "%s is not in Open Packaging Format, abort." % extracted_path
        sys.exit(1)
github yichen0831 / opencc-python / opencc / opencc.py View on Github external
"""
        if self.matched == True:
            if self.left is not None:
                self.left.convert_tree(test_dict)
            if self.right is not None:
                self.right.convert_tree(test_dict)
        else:
            test_len = min (self.string_len, test_dict[0])
            while test_len != 0:
                # Loop through trying successively smaller substrings in the dictionary
                for i in range(0, self.string_len - test_len + 1):
                    if self.string[i:i+test_len] in test_dict[1]:
                        # Match found.
                        if i > 0:
                            # Put everything to the left of the match into the left sub-tree and further process it
                            self.left = StringTree(self.string[:i])
                            self.left.convert_tree(test_dict)
                        if (i+test_len) < self.string_len:
                            # Put everything to the left of the match into the left sub-tree and further process it
                            self.right = StringTree(self.string[i+test_len:])
                            self.right.convert_tree(test_dict)
                        # Save the dictionary value in this tree
                        value = test_dict[1][self.string[i:i+test_len]]
                        if len(value.split(' ')) > 1:
                            # multiple mapping, use the first one for now
                            value = value.split(' ')[0]
                        self.string = value
                        self.string_len = len(self.string)
                        self.matched = True
                        return
                test_len -= 1
github yichen0831 / opencc-python / opencc / opencc.py View on Github external
if self.right is not None:
                self.right.convert_tree(test_dict)
        else:
            test_len = min (self.string_len, test_dict[0])
            while test_len != 0:
                # Loop through trying successively smaller substrings in the dictionary
                for i in range(0, self.string_len - test_len + 1):
                    if self.string[i:i+test_len] in test_dict[1]:
                        # Match found.
                        if i > 0:
                            # Put everything to the left of the match into the left sub-tree and further process it
                            self.left = StringTree(self.string[:i])
                            self.left.convert_tree(test_dict)
                        if (i+test_len) < self.string_len:
                            # Put everything to the left of the match into the left sub-tree and further process it
                            self.right = StringTree(self.string[i+test_len:])
                            self.right.convert_tree(test_dict)
                        # Save the dictionary value in this tree
                        value = test_dict[1][self.string[i:i+test_len]]
                        if len(value.split(' ')) > 1:
                            # multiple mapping, use the first one for now
                            value = value.split(' ')[0]
                        self.string = value
                        self.string_len = len(self.string)
                        self.matched = True
                        return
                test_len -= 1
github amowu / alfred-chinese-converter / workflow / chineseconverter.py View on Github external
Given a Chinese language string, return a list of alfred items for each of the results
  '''
  index = 0
  results = []

  config_list = [
    ('t2s.json', u'繁體到簡體', 'SimplifiedChinese.png'),
    ('s2t.json', u'簡體到繁體', 'TraditionalChinese.png'),
    ('s2tw.json', u'簡體到臺灣正體', 'TW_taiwan.png'),
    ('tw2s.json', u'臺灣正體到簡體', 'CN_china.png'),
    ('s2hk.json', u'簡體到香港繁體', 'HK_hongKong.png'),
    ('hk2s.json', u'香港繁體(香港小學學習字詞表標準)到簡體', 'CN_china.png'),
    ('s2twp.json', u'簡體到繁體(臺灣正體標準)並轉換爲臺灣常用詞彙', 'TW_taiwan.png'),
  ]
  for config_file, description, icon in config_list:
    converter = opencc.OpenCC(config=config_file, opencc_path='/usr/local/bin/opencc')
    item_value = converter.convert(query_str)
    results.append(alfred.Item(
      title=item_value,
      subtitle=description,
      attributes={
        'uid': alfred.uid(index),
        'arg': item_value,
      },
      icon=icon,
    ))
    index += 1

  return results
github victoresque / ML2017FALL / final / src / NoMLModel / preprocessing.py View on Github external
# -*- coding: utf-8 -*-
import json
import jieba
import pickle
import numpy as np
from tqdm import tqdm
from gensim.models.word2vec import Word2Vec
from opencc import OpenCC
jieba.set_dictionary('dict/dict.txt.big')

w2v = Word2Vec.load('word2vec/zh.bin')
s2t, t2s = OpenCC('s2twp'), OpenCC('tw2sp')
def toW2V(s):
    offset = 0
    offsets = []
    for i, w in enumerate(s):
        ws = t2s.convert(w)
        wt = s2t.convert(w)
        if w in w2v.wv:
            s[i] = w2v.wv[w]
        elif ws in w2v.wv:
            s[i] = w2v.wv[ws]
        elif wt in w2v.wv:
            s[i] = w2v.wv[wt]
        else:
            s[i] = np.zeros((300, ))
        offsets.append(offset)
        offset += len(w)
github yichen0831 / opencc-python / opencc / __main__.py View on Github external
help='Read original text from .')
    parser.add_argument('-o', '--output', metavar='',
                        help='Write converted text to .')
    parser.add_argument('-c', '--config', metavar='',
                        help='Configuration file')
    parser.add_argument('--in-enc', metavar='', default='UTF-8',
                        help='Encoding for input')
    parser.add_argument('--out-enc', metavar='', default='UTF-8',
                        help='Encoding for output')
    args = parser.parse_args()

    if args.config is None:
        print("Please specify a configuration file.", file=sys.stderr)
        return 1

    cc = OpenCC(args.config)

    with io.open(args.input if args.input else 0, encoding=args.in_enc) as f:
        input_str = f.read()
    output_str = cc.convert(input_str)
    with io.open(args.output if args.output else 1, 'w',
              encoding=args.out_enc) as f:
        f.write(output_str)

    return 0

OpenCC

Conversion between Traditional and Simplified Chinese

Apache-2.0
Latest version published 5 months ago

Package Health Score

85 / 100
Full package analysis