Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,24 @@ pip install -e .
```

If you run into cryptic errors about GCC on macOS while installing the requirments, try this instead:

```bash
CFLAGS='-stdlib=libc++' pip install -r requirements.in
```

## Data
Alternatively, you can use `uv`:

```bash
git clone https://github.com/allenai/S2APLER.git
cd S2APLER
uv venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
uv sync
uv pip install -e .
```

## Data

To obtain the paper clustering dataset, run the following command after the package is installed (from inside the `S2APLER` directory):
```[Expected download size is: 3.1 GiB]```

Expand Down
107 changes: 107 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
[project]
name = "s2apler"
version = "0.2.6"
description = "S2APLER: Semantic Scholar (S2) Agglomeration of Papers with Low Error Rate"
readme = "README.md"
requires-python = ">=3.8"
license = { text = "Apache-2.0" }
authors = [
{ name = "Allen Institute for Artificial Intelligence" }
]
keywords = ["clustering", "papers", "semantic-scholar", "machine-learning"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]

dependencies = [
"numpy>=1.23.4,<2",
"scikit-learn>=1.1.3",
"text-unidecode==1.3",
"requests==2.28.1",
"hyperopt==0.2.7",
"pandas==1.5.1",
"lightgbm>=3.3.5",
"fastcluster==1.2.6",
"shap==0.41.0",
"matplotlib>=3.6.2",
"seaborn==0.12.1",
"tqdm==4.64.1",
"jellyfish==0.9.0",
"pylatexenc==2.10",
"awscli>=1.27.12",
]

[project.optional-dependencies]
dev = [
"pytest==7.2.0",
"pytest-cov==4.0.0",
"flake8==5.0.4",
"black==22.10",
"click==8.0.4",
"mypy==0.991",
"pydantic==1.10.2",
]

[project.urls]
Homepage = "https://github.com/allenai/S2APLER"
Repository = "https://github.com/allenai/S2APLER"
Issues = "https://github.com/allenai/S2APLER/issues"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["s2apler"]

[tool.uv]
dev-dependencies = [
"pytest==7.2.0",
"pytest-cov==4.0.0",
"flake8==5.0.4",
"black==22.10",
"click==8.0.4",
"mypy==0.991",
"pydantic==1.10.2",
]

[tool.black]
line-length = 120
target-version = ["py38"]
include = '\.pyi?$'

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = "test_*.py"
python_classes = "Test*"
python_functions = "test_*"
addopts = "-v --cov=s2apler --cov-report=term-missing"

[tool.mypy]
python_version = "3.8"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = false
ignore_missing_imports = true

[tool.coverage.run]
source = ["s2apler"]
omit = ["*/tests/*", "*/test_*.py"]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]
Loading