Skip to content

Commit

Permalink
Use importlib API (#186)
Browse files Browse the repository at this point in the history
* Don't repeat yourself

* Use the new-fangled importlib.resources

* Use backport until 3.9 is dead
  • Loading branch information
simoncozens authored Nov 1, 2024
1 parent 1037e30 commit a495359
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 45 deletions.
60 changes: 25 additions & 35 deletions Lib/gflanguages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,56 +25,46 @@

from gflanguages import languages_public_pb2
from google.protobuf import text_format
from pkg_resources import resource_filename
from importlib_resources import files

try:
from ._version import version as __version__ # type: ignore
except ImportError:
__version__ = "0.0.0+unknown"

DATA_DIR = resource_filename("gflanguages", "data")

def _load_thing(thing_type, proto_class, base_dir=None):
things = {}

def LoadLanguages(base_dir=DATA_DIR):
if base_dir is None:
base_dir = DATA_DIR
def read_a_thing(contents):
proto = proto_class()
thing = text_format.Parse(contents, proto)
assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}"
things[thing.id] = thing

languages_dir = os.path.join(base_dir, "languages")
langs = {}
for textproto_file in glob.iglob(os.path.join(languages_dir, "*.textproto")):
with open(textproto_file, "r", encoding="utf-8") as f:
language = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
assert language.id not in langs, f"Duplicate language id: {language.id}"
langs[language.id] = language
return langs
if base_dir is not None:
thing_dir = os.path.join(base_dir, thing_type)
for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")):
with open(textproto_file, "r", encoding="utf-8") as f:
read_a_thing(f.read())
else:
for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir():
if not textproto_file.name.endswith(".textproto"):
continue
read_a_thing(textproto_file.read_text(encoding="utf-8"))
return things


def LoadScripts(base_dir=DATA_DIR):
if base_dir is None:
base_dir = DATA_DIR
def LoadLanguages(base_dir=None):
return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir)

scripts_dir = os.path.join(base_dir, "scripts")
scripts = {}
for textproto_file in glob.iglob(os.path.join(scripts_dir, "*.textproto")):
with open(textproto_file, "r", encoding="utf-8") as f:
script = text_format.Parse(f.read(), languages_public_pb2.ScriptProto())
assert script.id not in scripts, f"Duplicate script id: {script.id}"
scripts[script.id] = script
return scripts

def LoadScripts(base_dir=None):
return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir)

def LoadRegions(base_dir=DATA_DIR):
if base_dir is None:
base_dir = DATA_DIR

regions_dir = os.path.join(base_dir, "regions")
regions = {}
for textproto_file in glob.iglob(os.path.join(regions_dir, "*.textproto")):
with open(textproto_file, "r", encoding="utf-8") as f:
region = text_format.Parse(f.read(), languages_public_pb2.RegionProto())
assert region.id not in regions, f"Duplicate region id: {region.id}"
regions[region.id] = region
return regions
def LoadRegions(base_dir=None):
return _load_thing("regions", languages_public_pb2.RegionProto, base_dir)


def parse(exemplars: str):
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ authors = [
]

dependencies = [
"protobuf>=3.7.0, <4"
"protobuf>=3.7.0, <4",
"importlib_resources", # Needed for 3.9 and below
]

[project.optional-dependencies]
Expand Down
20 changes: 11 additions & 9 deletions tests/test_parsable.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
from gflanguages import DATA_DIR
from importlib_resources import files
import glob
import os
import pytest
from gflanguages import languages_public_pb2
from google.protobuf import text_format


languages_dir = os.path.join(DATA_DIR, "languages")
languages_dir = files("gflanguages.data").joinpath("languages")
textproto_files = [
os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))
file.name for file in languages_dir.iterdir() if file.name.endswith(".textproto")
]


@pytest.mark.parametrize("lang_code", textproto_files)
def test_parsable(lang_code):
with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f:
msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
assert msg.id
assert msg.language
assert msg.script
assert msg.population is not None
f = languages_dir.joinpath(lang_code)
msg = text_format.Parse(
f.read_text(encoding="utf-8"), languages_public_pb2.LanguageProto()
)
assert msg.id
assert msg.language
assert msg.script
assert msg.population is not None

0 comments on commit a495359

Please sign in to comment.