Skip to content

Commit 2d87e9d

Browse files
authored
Merge pull request #7 from andrusha/type-hints
Type hints and comments
2 parents 635dd04 + d353e00 commit 2d87e9d

File tree

6 files changed

+65
-4
lines changed

6 files changed

+65
-4
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pandas-maxminddb"
3-
version = "0.2.0"
3+
version = "0.2.1"
44
authors = ["Andrew Korzhuev <[email protected]>"]
55
edition = "2021"
66

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "maturin"
44

55
[project]
66
name = "pandas-maxminddb"
7-
version = "0.2.0"
7+
version = "0.2.1"
88
description = "Fast geolocation library for Pandas Dataframes, built on Numpy C-FFI"
99
requires-python = ">=3.8"
1010
license = {text = "MIT"}

python/pandas_maxminddb/__init__.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,20 @@
44

55
from .pandas_maxminddb import Reader, ReaderMem, ReaderMmap, mmdb_geolocate
66

7-
__all__ = ["open_database", "GeoAccessor", "ReaderMem", "ReaderMmap"]
7+
__all__ = ["open_database", "GeoAccessor", "Reader", "ReaderMem", "ReaderMmap"]
88

99

1010
@contextmanager
1111
def open_database(mmdb_path: str, mmap=False) -> Reader:
12+
"""
13+
If you want to manage lifetime of the object yourself,
14+
then instantiate ReaderMem / ReaderMmap yourself
15+
16+
:param mmdb_path: path maxmind db
17+
:param mmap: use memory mapping or not, useful for big files and few lookups
18+
:return: corresponding context-managed Reader, which can be used with `with` statement
19+
"""
20+
1221
if mmap:
1322
yield ReaderMmap(mmdb_path)
1423
else:
@@ -17,13 +26,22 @@ def open_database(mmdb_path: str, mmap=False) -> Reader:
1726

1827
@pd.api.extensions.register_dataframe_accessor("geo")
1928
class GeoAccessor:
29+
"""
30+
Defines Dataframe extension, which can be accessible as `some_df.geo.geolocate`
31+
"""
32+
2033
def __init__(self, pandas_obj: pd.DataFrame):
2134
self._obj = pandas_obj
2235

2336
def geolocate(
2437
self, ip_column_name: str, reader: Reader, geo_columns: list = None, parallel=False, parallel_chunk_size=1024
2538
) -> pd.DataFrame:
2639
"""
40+
:param ip_column_name: name of the dataframe column containing IPs, malformed IPs are ignored
41+
:param reader: one of the reader classes
42+
:param geo_columns: list of columns to lookup
43+
:param parallel: if lookups should be done in parallel (uses all the available cores)
44+
:param parallel_chunk_size: size of the job into which ip list is split for parallel processing
2745
:return: appends geolocation information based on the given IP address column
2846
"""
2947
if geo_columns is None:
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import Dict, List
2+
3+
import numpy
4+
5+
class Reader:
6+
"""
7+
Abstract superclass of all the readers, can not be instantiated,
8+
should be used as a type hint
9+
"""
10+
11+
class ReaderMem(Reader):
12+
"""
13+
Loads MMDB in-memory, required when parallel processing is used
14+
"""
15+
16+
def __init__(self, mmdb_path: str) -> None:
17+
"""
18+
:param mmdb_path: path to maxmind db file
19+
"""
20+
21+
class ReaderMmap(Reader):
22+
"""
23+
Uses memory map to read the db, so only the records you're accessing are read from disk.
24+
Useful when memory is limited and few lookups are made
25+
"""
26+
27+
def __init__(self, mmdb_path: str) -> None:
28+
"""
29+
:param mmdb_path: path to maxmind db file
30+
"""
31+
32+
def mmdb_geolocate(
33+
ips: numpy.ndarray, reader: Reader, columns: List[str], parallel: bool, parallel_chunk_size: int
34+
) -> Dict[str, numpy.ndarray]:
35+
"""
36+
37+
:param ips: ndarray of ip strings
38+
:param reader: one of the reader subclasses
39+
:param columns: list of columns to fetch
40+
:param parallel: if processing should be done in parallel
41+
:param parallel_chunk_size: chunk size for ips to be split for parallel processing
42+
:return: dict with keys being columns and values ndarray of lookup results
43+
"""

python/pandas_maxminddb/py.typed

Whitespace-only changes.

0 commit comments

Comments
 (0)