Skip to content

Commit 54ada4b

Browse files
author
Gaurav Sheni
authored
Add primitives from nlp-primitives add-on (#7)
* add nlp * release notes * fix unit tests * fix unit tests * fix unit tests * fix unit tests * fix unit tests * remove tensorflow * fix data * fix data * fix data * fix data * fix data
1 parent 6f5f75a commit 54ada4b

File tree

202 files changed

+5353977
-25
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

202 files changed

+5353977
-25
lines changed

.github/workflows/release_notes_updated.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ jobs:
3030
repository: ${{ github.event.pull_request.head.repo.full_name }}
3131
- if: ${{ steps.branch.outputs.is_dev == 'true' }}
3232
name: Check if release notes were updated
33-
run: cat docs/release_notes.rst | grep ":pr:\`${{ github.event.number }}\`"
33+
run: cat release_notes.rst | grep ":pr:\`${{ github.event.number }}\`"

.github/workflows/unit_tests_with_latest_deps.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ jobs:
2626
python-version: ${{ matrix.python_version }}
2727
- name: Build premium primitives package (whl, tar.gz)
2828
run: |
29+
make clean
2930
make package
3031
- name: Install premium primitives via sdist
3132
run: |

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ ENV/
127127

128128
# pickle files
129129
*.p
130-
*.pickle
131130

132131
.pytest_cache
133132

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
exclude: |
22
(?x)
3+
^premium_primitives/data/nltk_data/|
34
.html$|.csv$|.svg$|.md$|.txt$|.json$|.xml$|.pickle$|^.github/|
45
(LICENSE.*|README.*)
56
repos:

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ clean:
66
find . -name '.coverage.*' -delete
77
find . -name __pycache__ -delete
88
find . -type d -name ".pytest_cache" -exec rm -r {} +
9+
rm -rf ./build
10+
rm -rf ./dist
11+
rm -rf ./premium_primitives.egg-info
12+
rm -rf ./unpacked_sdist
913

1014
.PHONY: lint
1115
lint:

premium_primitives/__init__.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,50 @@
11
import inspect
22

3+
import nltk.data
4+
from importlib.util import find_spec
5+
36
import featuretools
47
import pkg_resources
58
from featuretools.primitives import AggregationPrimitive, TransformPrimitive
69

7-
from premium_primitives.country_code_to_continent import ( # noqa: F401
10+
from premium_primitives.diversity_score import DiversityScore
11+
from premium_primitives.lsa import LSA
12+
from premium_primitives.mean_characters_per_sentence import MeanCharactersPerSentence
13+
from premium_primitives.number_of_sentences import NumberOfSentences
14+
from premium_primitives.part_of_speech_count import PartOfSpeechCount
15+
from premium_primitives.polarity_score import PolarityScore
16+
from premium_primitives.stopword_count import StopwordCount
17+
18+
from premium_primitives.country_code_to_continent import (
819
CountryCodeToContinent,
920
)
10-
from premium_primitives.country_code_to_income import CountryCodeToIncome # noqa: F401
11-
from premium_primitives.country_code_to_population import ( # noqa: F401
21+
from premium_primitives.country_code_to_income import CountryCodeToIncome
22+
from premium_primitives.country_code_to_population import (
1223
CountryCodeToPopulation,
1324
)
14-
from premium_primitives.latlong_to_city import LatLongToCity # noqa: F401
15-
from premium_primitives.latlong_to_countrycode import LatLongToCountryCode # noqa: F401
16-
from premium_primitives.latlong_to_county import LatLongToCounty # noqa: F401
17-
from premium_primitives.latlong_to_state import LatLongToState # noqa: F401
18-
from premium_primitives.phone_number_to_area import PhoneNumberToArea # noqa: F401
19-
from premium_primitives.phone_number_to_country import ( # noqa: F401
25+
from premium_primitives.latlong_to_city import LatLongToCity
26+
from premium_primitives.latlong_to_countrycode import LatLongToCountryCode
27+
from premium_primitives.latlong_to_county import LatLongToCounty
28+
from premium_primitives.latlong_to_state import LatLongToState
29+
from premium_primitives.phone_number_to_area import PhoneNumberToArea
30+
from premium_primitives.phone_number_to_country import (
2031
PhoneNumberToCountry,
2132
)
22-
from premium_primitives.postalcode_to_latlong import PostalCodeToLatLong # noqa: F401
23-
from premium_primitives.postalcode_to_per_capita_income import ( # noqa: F401
33+
from premium_primitives.postalcode_to_latlong import PostalCodeToLatLong
34+
from premium_primitives.postalcode_to_per_capita_income import (
2435
PostalCodeToPerCapitaIncome,
2536
)
26-
from premium_primitives.postalcode_to_state import PostalCodeToState # noqa: F401
27-
from premium_primitives.sub_region_code_to_median_household_income import ( # noqa: F401
37+
from premium_primitives.postalcode_to_state import PostalCodeToState
38+
from premium_primitives.sub_region_code_to_median_household_income import (
2839
SubRegionCodeToMedianHouseholdIncome,
2940
)
30-
from premium_primitives.sub_region_code_to_per_capita_income import ( # noqa: F401
41+
from premium_primitives.sub_region_code_to_per_capita_income import (
3142
SubRegionCodeToPerCapitaIncome,
3243
)
33-
from premium_primitives.sub_region_code_to_region import ( # noqa: F401
44+
from premium_primitives.sub_region_code_to_region import (
3445
SubRegionCodeToRegion,
3546
)
36-
from premium_primitives.version import __version__ # noqa: F401
47+
from premium_primitives.version import __version__
3748

3849
PREMIUM_PRIMITIVES = [
3950
obj
@@ -46,6 +57,12 @@
4657
)
4758
]
4859

60+
nltk_data_path = pkg_resources.resource_filename(
61+
"premium_primitives",
62+
"data/nltk_data/",
63+
)
64+
nltk.data.path.insert(0, nltk_data_path)
65+
4966

5067
# set data primitives BEFORE we import them
5168
premium_primitives_data_folder = pkg_resources.resource_filename(

0 commit comments

Comments
 (0)