alirezatheh
diff --git a/‎.github/workflows/tests.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/tests.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 9 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/unsupervised/graph_based/multipartite_rank.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/unsupervised/graph_based/multipartite_rank.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/unsupervised/graph_based/position_rank.py‎
Lines changed: 9 additions & 5 deletions b/‎examples/unsupervised/graph_based/position_rank.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎examples/unsupervised/graph_based/single_rank.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/unsupervised/graph_based/single_rank.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/unsupervised/graph_based/text_rank.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/unsupervised/graph_based/text_rank.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/unsupervised/graph_based/topic_rank.py‎
Lines changed: 3 additions & 3 deletions b/‎examples/unsupervised/graph_based/topic_rank.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎perke/base/extractor.py‎
Lines changed: 18 additions & 11 deletions b/‎perke/base/extractor.py‎
Lines changed: 18 additions & 11 deletions
@@ -10,8 +10,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest]
-        python-version: ['3.8', '3.9', '3.10']
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ['3.8', '3.9', '3.10', '3.11']
 
     steps:
       - name: Checkout repository
@@ -35,7 +35,7 @@ jobs:
       - name: Download resources
         run: |
           python3 -m pip install .
-          python3 -m perke download ${{ secrets.GITHUB_TOKEN }}
+          python3 -m perke download
 
       - name: Run tests
         run: pytest
@@ -18,7 +18,7 @@ repos:
           - --profile=black
           - --line-length=79
   - repo: https://github.com/psf/black
-    rev: 23.1.0
+    rev: 23.3.0
     hooks:
       - id: black
         args:
 
@@ -7,6 +7,13 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
 ## [Unreleased]
+### Added
+- Added support for Python `3.11`
+- Brought Windows tests back
+
+### Changed
+- Adopted Perke with all changes in
+  [Hazm](https://github.com/roshan-research/hazm) `0.9` release
 
 ## [0.4.1] - 2023-03-15
 ### Fixed
@@ -32,7 +39,8 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Changed CI from Travis CI to GitHub workflows
 
 ### Removed
-- Removed Windows tests since hazm runs on WSL and WSL tests is same as Linux
+- Removed Windows tests since [Hazm](https://github.com/roshan-research/hazm)
+  runs on WSL and WSL tests is same as Linux
 
 ### Fixed
 - Removed type hints from docstrings
 
@@ -3,7 +3,7 @@
 [![pre-commit.ci](https://results.pre-commit.ci/badge/github/AlirezaTheH/perke/main.svg)](https://results.pre-commit.ci/latest/github/alirezatheh/perke/main)
 [![PyPI Version](https://img.shields.io/pypi/v/perke)](https://pypi.python.org/pypi/perke)
 [![Python Versions](https://img.shields.io/pypi/pyversions/perke)](https://pypi.org/project/perke)
-[![Documentation Status](https://readthedocs.org/projects/perke/badge/?version=stable)](https://perke.readthedocs.io/en/latest/?badge=stable)
+[![Documentation Status](https://readthedocs.org/projects/perke/badge/?version=stable)](https://perke.readthedocs.io/en/stable/?badge=stable)
 
 Perke is a Python keyphrase extraction package for Persian language. It
 provides an end-to-end keyphrase extraction pipeline in which each component
 
@@ -3,7 +3,7 @@
 from perke.unsupervised.graph_based import MultipartiteRank
 
 # Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
 
 # 1. Create a MultipartiteRank extractor.
 extractor = MultipartiteRank(valid_pos_tags=valid_pos_tags)
 
@@ -3,23 +3,27 @@
 from perke.unsupervised.graph_based import PositionRank
 
 # Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'NOUN,EZ', 'ADJ', 'ADJ,EZ'}
 
 # Define the grammar for selecting the keyphrase candidates
 grammar = r"""
     NP:
-        <P>{<N>}<V>
+        {<NOUN>}<VERB>
     NP:
-        {<DETe?|Ne?|NUMe?|AJe|PRO|CL|RESe?><DETe?|Ne?|NUMe?|AJe?|PRO|CL|RESe?>*}
-        <N>}{<.*e?>
+        {<DET(,EZ)?|NOUN(,EZ)?|NUM(,EZ)?|ADJ(,EZ)|PRON><DET(,EZ)|NOUN(,EZ)|NUM(,EZ)|ADJ(,EZ)|PRON>*}
+        <NOUN>}{<.*(,EZ)?>
 """
 
 # 1. Create a PositionRank extractor.
 extractor = PositionRank(valid_pos_tags=valid_pos_tags)
 
 # 2. Load the text.
 input_filepath = Path(__file__).parent.parent.parent / 'input.txt'
-extractor.load_text(input=input_filepath, word_normalization_method=None)
+extractor.load_text(
+    input=input_filepath,
+    word_normalization_method=None,
+    universal_pos_tags=False,
+)
 
 # 3. Select the noun phrases up to 3 words as keyphrase candidates.
 extractor.select_candidates(grammar=grammar, maximum_word_number=3)
 
@@ -3,7 +3,7 @@
 from perke.unsupervised.graph_based import SingleRank
 
 # Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
 
 # 1. Create a SingleRank extractor.
 extractor = SingleRank(valid_pos_tags=valid_pos_tags)
 
@@ -3,7 +3,7 @@
 from perke.unsupervised.graph_based import TextRank
 
 # Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
 
 # 1. Create a TextRank extractor.
 extractor = TextRank(valid_pos_tags=valid_pos_tags)
 
@@ -1,15 +1,15 @@
-from os.path import dirname, join
+from pathlib import Path
 
 from perke.unsupervised.graph_based import TopicRank
 
 # Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
 
 # 1. Create a TopicRank extractor.
 extractor = TopicRank(valid_pos_tags=valid_pos_tags)
 
 # 2. Load the text.
-input_filepath = join(dirname(dirname(dirname(__file__))), 'input.txt')
+input_filepath = Path(__file__).parent.parent.parent / 'input.txt'
 extractor.load_text(input=input_filepath, word_normalization_method='stemming')
 
 # 3. Select the longest sequences of nouns and adjectives, that do
 
@@ -45,7 +45,7 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
         ----------
         valid_pos_tags:
             Set of valid part of speech tags, defaults to nouns and
-            adjectives. I.e. `{'N', 'Ne', 'AJ', 'AJe'}`.
+            adjectives. I.e. `{'NOUN', 'ADJ'}`.
         """
         self.word_normalization_method: Optional[str] = None
         self.sentences: List[Sentence] = []
@@ -54,13 +54,14 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
             punctuation_marks
         )
         if valid_pos_tags is None:
-            valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+            valid_pos_tags = {'NOUN', 'ADJ'}
         self.valid_pos_tags: Set[str] = valid_pos_tags
 
     def load_text(
         self,
         input: Union[str, Path],
         word_normalization_method: WordNormalizationMethod = 'stemming',
+        universal_pos_tags: bool = True,
     ) -> None:
         """
         Loads the text of a document or string.
@@ -74,9 +75,15 @@ def load_text(
             Word normalization method, defaults to `'stemming'`. See
             `perke.base.types.WordNormalizationMethod` for available
             methods.
+
+        universal_pos_tags:
+            Whether to use universal part of speech tags or not,
+            defaults to `True`.
         """
         # Initialize reader
-        reader = RawTextReader(input, word_normalization_method)
+        reader = RawTextReader(
+            input, word_normalization_method, universal_pos_tags
+        )
 
         # Load sentences
         self.sentences = reader.read()
@@ -225,7 +232,7 @@ def _add_candidate_occurrence(
             The offset of the occurrence
 
         normalized_words:
-            List of normalized of words of the occurrence
+            List of normalized words of the occurrence
         """
         # Build the canonical form of the candidate
         canonical_form = ' '.join(normalized_words)
@@ -306,7 +313,7 @@ def _select_candidates_with_longest_sequences(
                     first = sequence_offsets[0]
                     last = sequence_offsets[-1]
 
-                    # Add the ngram as a new candidate occurrence
+                    # Add the n-gram as a new candidate occurrence
                     self._add_candidate_occurrence(
                         words=sentence.words[first : last + 1],
                         offset=offset_shift + first,
@@ -336,20 +343,20 @@ def _select_candidates_with_grammar(
             defaults to::
                 r\"""
                 NP:
-                    <P>{<N>}<V>
+                    {<NOUN>}<VERB>
                 NP:
-                    {<DETe?|Ne?|NUMe?|AJe|PRO|CL|RESe?><DETe?|Ne?|NUMe?|AJe?|PRO|CL|RESe?>*}
-                    <N>}{<.*e?>'
+                    {<DET(,EZ)?|NOUN(,EZ)?|NUM(,EZ)?|ADJ(,EZ)|PRON><DET(,EZ)|NOUN(,EZ)|NUM(,EZ)|ADJ(,EZ)|PRON>*}
+                    <NOUN>}{<.*(,EZ)?>
                 \"""
         """
         # Initialize default grammar if none provided
         if grammar is None:
             grammar = r"""
                 NP:
-                    <P>{<N>}<V>
+                    {<NOUN>}<VERB>
                 NP:
-                    {<DETe?|Ne?|NUMe?|AJe|PRO|CL|RESe?><DETe?|Ne?|NUMe?|AJe?|PRO|CL|RESe?>*}
-                    <N>}{<.*e?>
+                    {<DET(,EZ)?|NOUN(,EZ)?|NUM(,EZ)?|ADJ(,EZ)|PRON><DET(,EZ)|NOUN(,EZ)|NUM(,EZ)|ADJ(,EZ)|PRON>*}
+                    <NOUN>}{<.*(,EZ)?>
             """
 
         # Initialize parser