Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@

target
.venv
.env
__pycache__
.idea
dist
.DS_Store
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12.3
32 changes: 24 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,33 @@ The files readlex_converter.json and readlex_converter_phrases.json have been de

The file readlex.dict (and addendum.dict) is adapted for use with Dave Coffin's scrips available at [Dechifro.org](https://www.dechifro.org/shavian/).

## latin2shaw.py
## src/readlex/latin2shaw.py

The file latin2shaw.py is the script I use for the ReadLex Converter. It uses spaCy for part of speech tagging.
The file latin2shaw.py contains the code I use for the ReadLex Converter. It uses spaCy for part of speech tagging.

To use latin2shaw.py you'll need to install the following packages with it:
- spaCy and an English language model (I use en_core_web_sm), following the instructions [here](https://spacy.io/usage)
- unidecode
- smartypants
- bs4 (BeautifulSoup)
To use it as a command line tool:

You will also need to make sure that latin2shaw.py points to the locations where you have saved readlex_converter.json and readlex_converter_phrases.json.
```bash
pip install readlex

# the script can read from stdin and print to stdout
echo "hello world" | latin2shaw

# or use files
latin2shaw --in_file in.txt --out_file out.txt
```

Once installed, it can also be used from python:

```python
from readlex import latin2shaw

print(latin2shaw("hello world"))
```

### For contributors

Once you have cloned the repo and [installed rye](https://rye.astral.sh/), you can install/sync dependencies with `rye sync` and run the script with `rye run latin2shaw`

## Futher information

Expand Down
389 changes: 0 additions & 389 deletions latin2shaw.py

This file was deleted.

43 changes: 43 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
[project]
name = "readlex"
version = "0.1.0"
description = "Auto-transliteration of English language text from latin to Shaw script using the Read Lexicon"
authors = [
{ name = "Shavian-info", email = "contact@shavian.info" },
{ name = "Ingrid", email = "git@ingrids.email" }
]
dependencies = [
"spacy>=3.7.4",
"unidecode>=1.3.8",
"smartypants>=2.0.1",
"bs4>=0.0.2",
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl",
"typed-argument-parser>=1.10.0",
]
readme = "README.md"
requires-python = ">= 3.8"
exclude = [
"readlex.json",
"kingsleyreadlexicon.tsv",
"readlex.dict",
"addendum.dict"
]

[project.scripts]
latin2shaw = "readlex.latin2shaw:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.rye]
managed = true
dev-dependencies = [
"pytest>=8.2.1",
]

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.build.targets.wheel]
packages = ["src/readlex"]
136 changes: 136 additions & 0 deletions requirements-dev.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# generated by rye
# use `rye lock` or `rye sync` to update this lockfile
#
# last locked with the following flags:
# pre: false
# features: []
# all-features: false
# with-sources: false
# generate-hashes: false

-e file:.
annotated-types==0.7.0
# via pydantic
beautifulsoup4==4.12.3
# via bs4
blis==0.7.11
# via thinc
bs4==0.0.2
# via readlex
catalogue==2.0.10
# via spacy
# via srsly
# via thinc
certifi==2024.2.2
# via requests
charset-normalizer==3.3.2
# via requests
click==8.1.7
# via typer
cloudpathlib==0.16.0
# via weasel
confection==0.1.5
# via thinc
# via weasel
cymem==2.0.8
# via preshed
# via spacy
# via thinc
docstring-parser==0.16
# via typed-argument-parser
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
# via readlex
idna==3.7
# via requests
iniconfig==2.0.0
# via pytest
jinja2==3.1.4
# via spacy
langcodes==3.4.0
# via spacy
language-data==1.2.0
# via langcodes
marisa-trie==1.1.1
# via language-data
markupsafe==2.1.5
# via jinja2
murmurhash==1.0.10
# via preshed
# via spacy
# via thinc
mypy-extensions==1.0.0
# via typing-inspect
numpy==1.26.4
# via blis
# via spacy
# via thinc
packaging==24.0
# via pytest
# via spacy
# via thinc
# via weasel
pluggy==1.5.0
# via pytest
preshed==3.0.9
# via spacy
# via thinc
pydantic==2.7.2
# via confection
# via spacy
# via thinc
# via weasel
pydantic-core==2.18.3
# via pydantic
pytest==8.2.1
requests==2.32.3
# via spacy
# via weasel
setuptools==70.0.0
# via marisa-trie
# via spacy
# via thinc
smart-open==6.4.0
# via spacy
# via weasel
smartypants==2.0.1
# via readlex
soupsieve==2.5
# via beautifulsoup4
spacy==3.7.4
# via en-core-web-sm
# via readlex
spacy-legacy==3.0.12
# via spacy
spacy-loggers==1.0.5
# via spacy
srsly==2.4.8
# via confection
# via spacy
# via thinc
# via weasel
thinc==8.2.3
# via spacy
tqdm==4.66.4
# via spacy
typed-argument-parser==1.10.0
# via readlex
typer==0.9.4
# via spacy
# via weasel
typing-extensions==4.12.0
# via pydantic
# via pydantic-core
# via typer
# via typing-inspect
typing-inspect==0.9.0
# via typed-argument-parser
unidecode==1.3.8
# via readlex
urllib3==2.2.1
# via requests
wasabi==1.1.3
# via spacy
# via thinc
# via weasel
weasel==0.3.4
# via spacy
130 changes: 130 additions & 0 deletions requirements.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# generated by rye
# use `rye lock` or `rye sync` to update this lockfile
#
# last locked with the following flags:
# pre: false
# features: []
# all-features: false
# with-sources: false
# generate-hashes: false

-e file:.
annotated-types==0.7.0
# via pydantic
beautifulsoup4==4.12.3
# via bs4
blis==0.7.11
# via thinc
bs4==0.0.2
# via readlex
catalogue==2.0.10
# via spacy
# via srsly
# via thinc
certifi==2024.2.2
# via requests
charset-normalizer==3.3.2
# via requests
click==8.1.7
# via typer
cloudpathlib==0.16.0
# via weasel
confection==0.1.5
# via thinc
# via weasel
cymem==2.0.8
# via preshed
# via spacy
# via thinc
docstring-parser==0.16
# via typed-argument-parser
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
# via readlex
idna==3.7
# via requests
jinja2==3.1.4
# via spacy
langcodes==3.4.0
# via spacy
language-data==1.2.0
# via langcodes
marisa-trie==1.1.1
# via language-data
markupsafe==2.1.5
# via jinja2
murmurhash==1.0.10
# via preshed
# via spacy
# via thinc
mypy-extensions==1.0.0
# via typing-inspect
numpy==1.26.4
# via blis
# via spacy
# via thinc
packaging==24.0
# via spacy
# via thinc
# via weasel
preshed==3.0.9
# via spacy
# via thinc
pydantic==2.7.2
# via confection
# via spacy
# via thinc
# via weasel
pydantic-core==2.18.3
# via pydantic
requests==2.32.3
# via spacy
# via weasel
setuptools==70.0.0
# via marisa-trie
# via spacy
# via thinc
smart-open==6.4.0
# via spacy
# via weasel
smartypants==2.0.1
# via readlex
soupsieve==2.5
# via beautifulsoup4
spacy==3.7.4
# via en-core-web-sm
# via readlex
spacy-legacy==3.0.12
# via spacy
spacy-loggers==1.0.5
# via spacy
srsly==2.4.8
# via confection
# via spacy
# via thinc
# via weasel
thinc==8.2.3
# via spacy
tqdm==4.66.4
# via spacy
typed-argument-parser==1.10.0
# via readlex
typer==0.9.4
# via spacy
# via weasel
typing-extensions==4.12.0
# via pydantic
# via pydantic-core
# via typer
# via typing-inspect
typing-inspect==0.9.0
# via typed-argument-parser
unidecode==1.3.8
# via readlex
urllib3==2.2.1
# via requests
wasabi==1.1.3
# via spacy
# via thinc
# via weasel
weasel==0.3.4
# via spacy
5 changes: 5 additions & 0 deletions src/readlex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from readlex.latin2shaw import latin2shaw

__all__ = [
"latin2shaw",
]
Loading