Loading pykospacing/__init__.py +1 −3 Original line number Diff line number Diff line from pykospacing.kospacing import * __version__ = '0.01' __version__ = '0.1' pykospacing/embedding_maker.py +9 −15 Original line number Diff line number Diff line __all__ = ['load_embedding', 'load_vocab', 'encoding_and_padding'] import os from keras.preprocessing import sequence import json import numpy as np from keras.preprocessing import sequence __all__ = ['load_embedding', 'load_vocab', 'encoding_and_padding'] def load_embedding(embeddings_file): return(np.load(embeddings_file)) def load_vocab(vocab_path): with open(vocab_path, 'r') as f: data = json.loads(f.read()) Loading @@ -25,7 +23,6 @@ def encoding_and_padding(word2idx_dic, sequences, **params): 1. making item to idx 2. padding :word2idx_dic :sequences: list of lists where each element is a sequence :maxlen: int, maximum length Loading @@ -38,6 +35,3 @@ def encoding_and_padding(word2idx_dic, sequences, **params): seq_idx = [[word2idx_dic.get(a, word2idx_dic['__ETC__']) for a in i] for i in sequences] params['value'] = word2idx_dic['__PAD__'] return(sequence.pad_sequences(seq_idx, **params)) pykospacing/kospacing.py +22 −29 Original line number Diff line number Diff line # -*- coding: utf-8 -*- __all__ = ['spacing',] import json, os, re, sys os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import numpy as np from keras.models import load_model from pykospacing.embedding_maker import load_vocab, encoding_and_padding import pkg_resources, warnings import numpy as np import os import pkg_resources import re import warnings model_path = pkg_resources.resource_filename('pykospacing', os.path.join("resources", "models", "kospacing")) dic_path = pkg_resources.resource_filename('pykospacing', os.path.join("resources", "dicts", "c2v.dic")) model = None __all__ = ['spacing', ] os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' model_path = pkg_resources.resource_filename('pykospacing', os.path.join('resources', 'models', 'kospacing')) dic_path = pkg_resources.resource_filename('pykospacing', os.path.join('resources', 'dicts', 'c2v.dic')) model = load_model(model_path) model._make_predict_function() w2idx, _ = load_vocab(dic_path) class pred_spacing: class pred_spacing: def __init__(self, model, w2idx): self.model = model self.w2idx = w2idx self.pattern = re.compile(r'\s+') def get_spaced_sent(self, raw_sent): #print('sent : {}'.format(raw_sent), file=sys.stdout) raw_sent_ = "«" + raw_sent + "»" raw_sent_ = raw_sent_.replace(' ', '^') sents_in = [raw_sent_, ] Loading @@ -52,14 +48,11 @@ class pred_spacing: return subs pred_spacing = pred_spacing(model, w2idx) def spacing(sent): if len(sent) > 198: warnings.warn("One sentence can not contain more than 198 characters. : {}".format(sent)) warnings.warn('One sentence can not contain more than 198 characters. : {}'.format(sent)) spaced_sent = pred_spacing.get_spaced_sent(sent) return(spaced_sent.strip()) setup.py +5 −16 Original line number Diff line number Diff line from setuptools import setup, find_packages from setuptools import setup from pykospacing import __version__ setup(name='pykospacing', version='0.1', version=__version__, url='https://github.com/haven-jeon/PyKoSpacing', license='GPL-3', author='Heewon Jeon', author_email='madjakarta@gmail.com', description='Python package for automatic Korean word spacing.', packages=['pykospacing', ], long_description=open('README.md', encoding='utf-8').read(), zip_safe=False, include_package_data=True, install_requires=[ Loading @@ -36,5 +27,3 @@ setup(name='pykospacing', ], }, ) Loading
pykospacing/__init__.py +1 −3 Original line number Diff line number Diff line from pykospacing.kospacing import * __version__ = '0.01' __version__ = '0.1'
pykospacing/embedding_maker.py +9 −15 Original line number Diff line number Diff line __all__ = ['load_embedding', 'load_vocab', 'encoding_and_padding'] import os from keras.preprocessing import sequence import json import numpy as np from keras.preprocessing import sequence __all__ = ['load_embedding', 'load_vocab', 'encoding_and_padding'] def load_embedding(embeddings_file): return(np.load(embeddings_file)) def load_vocab(vocab_path): with open(vocab_path, 'r') as f: data = json.loads(f.read()) Loading @@ -25,7 +23,6 @@ def encoding_and_padding(word2idx_dic, sequences, **params): 1. making item to idx 2. padding :word2idx_dic :sequences: list of lists where each element is a sequence :maxlen: int, maximum length Loading @@ -38,6 +35,3 @@ def encoding_and_padding(word2idx_dic, sequences, **params): seq_idx = [[word2idx_dic.get(a, word2idx_dic['__ETC__']) for a in i] for i in sequences] params['value'] = word2idx_dic['__PAD__'] return(sequence.pad_sequences(seq_idx, **params))
pykospacing/kospacing.py +22 −29 Original line number Diff line number Diff line # -*- coding: utf-8 -*- __all__ = ['spacing',] import json, os, re, sys os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import numpy as np from keras.models import load_model from pykospacing.embedding_maker import load_vocab, encoding_and_padding import pkg_resources, warnings import numpy as np import os import pkg_resources import re import warnings model_path = pkg_resources.resource_filename('pykospacing', os.path.join("resources", "models", "kospacing")) dic_path = pkg_resources.resource_filename('pykospacing', os.path.join("resources", "dicts", "c2v.dic")) model = None __all__ = ['spacing', ] os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' model_path = pkg_resources.resource_filename('pykospacing', os.path.join('resources', 'models', 'kospacing')) dic_path = pkg_resources.resource_filename('pykospacing', os.path.join('resources', 'dicts', 'c2v.dic')) model = load_model(model_path) model._make_predict_function() w2idx, _ = load_vocab(dic_path) class pred_spacing: class pred_spacing: def __init__(self, model, w2idx): self.model = model self.w2idx = w2idx self.pattern = re.compile(r'\s+') def get_spaced_sent(self, raw_sent): #print('sent : {}'.format(raw_sent), file=sys.stdout) raw_sent_ = "«" + raw_sent + "»" raw_sent_ = raw_sent_.replace(' ', '^') sents_in = [raw_sent_, ] Loading @@ -52,14 +48,11 @@ class pred_spacing: return subs pred_spacing = pred_spacing(model, w2idx) def spacing(sent): if len(sent) > 198: warnings.warn("One sentence can not contain more than 198 characters. : {}".format(sent)) warnings.warn('One sentence can not contain more than 198 characters. : {}'.format(sent)) spaced_sent = pred_spacing.get_spaced_sent(sent) return(spaced_sent.strip())
setup.py +5 −16 Original line number Diff line number Diff line from setuptools import setup, find_packages from setuptools import setup from pykospacing import __version__ setup(name='pykospacing', version='0.1', version=__version__, url='https://github.com/haven-jeon/PyKoSpacing', license='GPL-3', author='Heewon Jeon', author_email='madjakarta@gmail.com', description='Python package for automatic Korean word spacing.', packages=['pykospacing', ], long_description=open('README.md', encoding='utf-8').read(), zip_safe=False, include_package_data=True, install_requires=[ Loading @@ -36,5 +27,3 @@ setup(name='pykospacing', ], }, )