Skip to content

Commit a183292

Browse files
committed
Add alias() function
1 parent 0010fda commit a183292

File tree

4 files changed

+55
-7
lines changed

4 files changed

+55
-7
lines changed

README.md

+26-1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,28 @@ from kss import split_sentences
111111
output = split_sentences("YOUR_INPUT_STRING", **kwargs)
112112
```
113113

114+
### 6. Alias of module names
115+
Because there are so many modules in Kss, user may have difficulty remembering the names of each module.
116+
Kss provides aliases for some modules to make it easier to use them.
117+
```python
118+
from kss import Kss
119+
120+
module_1 = Kss("split_morphemes")
121+
module_2 = Kss("tokenize")
122+
# For example, 'split_morphemes' module can be loaded by using the alias named 'tokenize'.
123+
```
124+
125+
You can check the alias of each module by using the `alias()` function.
126+
```python
127+
from kss import Kss
128+
129+
Kss.alias()
130+
```
131+
132+
```python
133+
{'aug': 'augment', 'augmentation': 'augment', 'collocation': 'collocate', 'hangulization': 'hangulize', 'hangulisation': 'hangulize', 'hangulise': 'hangulize', 'hanja': 'hanja2hangul', 'hangul2jamo': 'h2j', 'hangul2hcj': 'h2hcj', 'jamo2hangul': 'j2h', 'jamo2hcj': 'j2hcj', 'hcj2hangul': 'hcj2h', 'hcj2jamo': 'hcj2j', 'josa': 'select_josa', 'keyword': 'extract_keywords', 'keywords': 'extract_keywords', 'morpheme': 'split_morphemes', 'morphemes': 'split_morphemes', 'annonymization': 'anonymize', 'news_cleaning': 'clean_news', 'news': 'clean_news', 'completed_form': 'is_completed_form', 'completed': 'is_completed_form', 'filter': 'filter_out', 'reduce_repeats': 'reduce_char_repeats', 'reduce_char': 'reduce_char_repeats', 'reduce_chars': 'reduce_char_repeats', 'reduce_emoticon': 'reduce_emoticon_repeats', 'reduce_emoticons': 'reduce_emoticon_repeats', 'reduce_emo': 'reduce_emoticon_repeats', 'remove_invisible': 'remove_invisible_chars', 'invisible_chars': 'remove_invisible_chars', 'invisible': 'remove_invisible_chars', 'normalization': 'normalize', 'normalisation': 'normalize', 'normalise': 'normalize', 'preprocessing': 'preprocess', 'prep': 'preprocess', 'romanization': 'romanize', 'romanisation': 'romanize', 'romanise': 'romanize', 'safety': 'is_unsafe', 'check_safety': 'is_unsafe', 'sentence': 'split_sentences', 'sentences': 'split_sentences', 'sent_split': 'split_sentences', 'sent_splits': 'split_sentences', 'sents_split': 'split_sentences', 'split_sent': 'split_sentences', 'split_sents': 'split_sentences', 'spacing': 'correct_spacing', 'space': 'correct_spacing', 'spaces': 'correct_spacing', 'summarization': 'summarize_sentences', 'summarize': 'summarize_sentences', 'summ': 'summarize_sentences', 'morph': 'split_morphemes', 'morphs': 'split_morphemes', 'tokenize': 'split_morphemes', 'tokenization': 'split_morphemes', 'split_morph': 'split_morphemes', 'split_morphs': 'split_morphemes', 'morph_split': 'split_morphemes', 'morph_splits': 'split_morphemes', 'morphs_split': 'split_morphemes'}
134+
```
135+
114136
## Supported Modules
115137
Kss supports the following modules and there are the simple usages of each module in the following sections.
116138

@@ -186,7 +208,7 @@ Args:
186208
- text (`Union[str, List[str], Tuple[str]]`): single text or list of texts
187209
- descriptive (`bool`): return descriptive pronunciation, the 'descriptive' means a real-life pronunciation
188210
- group_vowels (`bool`): If True, the vowels of the identical sound are normalized. (e.g. ㅒ -> ㅖ)
189-
- to_syllable: If True, hangul letters or jamo are assembled to form syllables.
211+
- to_syllable (`bool`): If True, hangul letters or jamo are assembled to form syllables.
190212
- convert_english_to_hangul_phonemes (`bool`): If True, convert English to Hangul phonemes
191213
- convert_numbers_to_hangul_phonemes (`bool`): If True, convert numbers to Hangul phonemes
192214
- num_workers (`Union[int, str]`): the number of multiprocessing workers
@@ -691,6 +713,9 @@ Args:
691713
- noun_only (`bool`): whether to extract only nouns or not
692714
- num_workers (`Union[int, str]`): the number of multiprocessing workers
693715

716+
Returns:
717+
- `Union[List[str], List[Tuple[str, float]]]`: list of keywords or list of tuples of keywords and scores
718+
694719
Examples:
695720
```python
696721
>>> from kss import Kss

kss/__init__.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,12 @@
100100
"filter": "filter_out",
101101
"reduce_repeats": "reduce_char_repeats",
102102
"reduce_char": "reduce_char_repeats",
103+
"reduce_chars": "reduce_char_repeats",
103104
"reduce_emoticon": "reduce_emoticon_repeats",
105+
"reduce_emoticons": "reduce_emoticon_repeats",
106+
"reduce_emo": "reduce_emoticon_repeats",
104107
"remove_invisible": "remove_invisible_chars",
108+
"invisible_chars": "remove_invisible_chars",
105109
"invisible": "remove_invisible_chars",
106110
"normalization": "normalize",
107111
"normalisation": "normalize",
@@ -117,12 +121,24 @@
117121
"sentences": "split_sentences",
118122
"sent_split": "split_sentences",
119123
"sent_splits": "split_sentences",
124+
"sents_split": "split_sentences",
125+
"split_sent": "split_sentences",
126+
"split_sents": "split_sentences",
120127
"spacing": "correct_spacing",
121128
"space": "correct_spacing",
122129
"spaces": "correct_spacing",
123130
"summarization": "summarize_sentences",
124131
"summarize": "summarize_sentences",
125132
"summ": "summarize_sentences",
133+
"morph": "split_morphemes",
134+
"morphs": "split_morphemes",
135+
"tokenize": "split_morphemes",
136+
"tokenization": "split_morphemes",
137+
"split_morph": "split_morphemes",
138+
"split_morphs": "split_morphemes",
139+
"morph_split": "split_morphemes",
140+
"morph_splits": "split_morphemes",
141+
"morphs_split": "split_morphemes",
126142
}
127143

128144

@@ -140,6 +156,10 @@ def help(self):
140156
def available():
141157
return list(supported_modules.keys())
142158

159+
@staticmethod
160+
def alias():
161+
return alias
162+
143163
def _check_module(self, module: str, supported_modules, alias):
144164
from kss._utils.sanity_checks import _check_type
145165

@@ -181,4 +201,4 @@ def _find_closest_module(module, min_distance=0.5):
181201

182202

183203
__ALL__ = list(supported_modules.keys()) + ["Kss"]
184-
__version__ = "6.0.1"
204+
__version__ = "6.0.2"

kss/_modules/g2p/g2p.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def g2p(
5454
text (Union[str, List[str], Tuple[str]]): single text or list of texts
5555
descriptive (bool): return descriptive pronunciation, the 'descriptive' means a real-life pronunciation
5656
group_vowels (bool): If True, the vowels of the identical sound are normalized. (e.g. ㅒ -> ㅖ)
57-
to_syllable: If True, hangul letters or jamo are assembled to form syllables.
57+
to_syllable (bool): If True, hangul letters or jamo are assembled to form syllables.
5858
convert_english_to_hangul_phonemes (bool): If True, convert English to Hangul phonemes
5959
convert_numbers_to_hangul_phonemes (bool): If True, convert numbers to Hangul phonemes
6060
num_workers (Union[int, str]): the number of multiprocessing workers

kss/_modules/keywords/extract_keywords.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This code was copied from KR-WordRank [https://github.com/lovit/KR-WordRank]
22
# And modified by Hyunwoong Ko [https://github.com/hyuwoongko]
3-
from typing import List, Union
3+
from typing import List, Union, Tuple
44

55
from kss._modules.keywords.utils import KRWordRank
66
from kss._utils.sanity_checks import _check_text, _check_type, _check_backend_mecab_pecab_only
@@ -15,7 +15,7 @@ def extract_keywords(
1515
backend: str = "auto",
1616
noun_only: bool = True,
1717
num_workers: Union[int, str] = "auto",
18-
):
18+
) -> Union[List[str], List[Tuple[str, float]]]:
1919
"""
2020
This extracts keywords from the given text.
2121
This uses TextRank algorithm to extract keywords.
@@ -30,6 +30,9 @@ def extract_keywords(
3030
noun_only (bool): whether to extract only nouns or not
3131
num_workers (Union[int, str]): the number of multiprocessing workers
3232
33+
Returns:
34+
Union[List[str], List[Tuple[str, float]]]: list of keywords or list of tuples of keywords and scores
35+
3336
Examples:
3437
>>> from kss import Kss
3538
>>> extract_keywords = Kss("extract_keywords")
@@ -50,8 +53,8 @@ def extract_keywords(
5053
>>> print(output)
5154
['너무', '정말', '마지막', '영화', '음악']
5255
53-
References:
54-
This was copied from [KR-WordRank](https://github.com/lovit/KR-WordRank) and modified by Kss
56+
References:
57+
This was copied from [KR-WordRank](https://github.com/lovit/KR-WordRank) and modified by Kss
5558
"""
5659

5760
text, finish = _check_text(text)

0 commit comments

Comments
 (0)