forked from marin-community/marin
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
229 lines (190 loc) · 6.85 KB
/
pyproject.toml
File metadata and controls
229 lines (190 loc) · 6.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "marin-root"
version = "0.1.0"
description = "Marin workspace root and experiments"
license = { file = "LICENSE" }
requires-python = ">=3.11"
dependencies = [
# workspaces
"marin-iris",
"marin-fray",
"marin-haliax",
"marin-levanter",
"marin",
"marin-rigging",
"marin-zephyr",
# top level deps. try to keep these small.
"watchdog",
"dupekit >= 0.1.0",
]
[tool.uv]
fork-strategy = "fewest"
find-links = [
"https://github.com/marin-community/marin/releases/expanded_assets/dupekit-0.1.0-40ac799",
"https://github.com/marin-community/kitoken/releases/expanded_assets/kitoken-0.10.2-a3012f4",
]
override-dependencies = [
"omegaconf>=2.4.0.dev4",
"antlr4-python3-runtime==4.11",
"python-multipart>=0.0.22",
"wheel>=0.46.2",
"datasets>=3.1.0,<4.0.0",
"equinox>=0.11.10", # Override vizier's pin for JAX 0.8 compatibility
]
[tool.uv.workspace]
members = [
"lib/iris",
"lib/fray",
"lib/haliax",
"lib/levanter",
"lib/marin",
"lib/rigging",
"lib/zephyr",
]
[tool.uv.sources]
marin-iris = { workspace = true }
marin-fray = { workspace = true }
marin-haliax = { workspace = true }
marin-levanter = { workspace = true }
marin = { workspace = true }
marin-rigging = { workspace = true }
marin-zephyr = { workspace = true }
# NOTE: harbor is a pinned git dependency (not a workspace member).
# harbor lives in marin-community/harbor as a pinned git dependency.
harbor = { git = "https://github.com/marin-community/harbor.git", rev = "354692d9c0eab497b05f266aa0dff30e2a238d2e" }
# ### BEGIN RUST-DEV SOURCES ###
# ### END RUST-DEV SOURCES ###
[tool.black]
line-length = 121
target-version = ["py310"]
preview = true
# Note :: Grow more strict over time!
extend-exclude = """
(
scripts/
)
"""
[tool.ruff]
line-length = 121
target-version = "py310"
# Note :: Grow more strict over time!
extend-exclude = ["scripts/"]
[tool.ruff.lint]
select = ["A", "B", "E", "F", "I", "NPY", "RUF", "UP", "W"]
ignore = ["F722", "B008", "UP015", "A005", "I001", "E741"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]
[tool.mypy]
python_version = "3.10"
# Note: Grow more strict over time!
ignore_missing_imports = true
exclude = ["marin/", "scripts/"]
# Pyrefly type checker configuration
[tool.pyrefly]
# Worktrees often live under hidden parent directories (e.g. `.codex/worktrees/*`).
# Disable pyrefly's built-in exclude heuristics so hidden ancestors do not exclude
# the entire project when running from those paths.
disable-project-excludes-heuristics = true
project-includes = [
"lib/marin/src/**/*.py",
"lib/levanter/src/**/*.py",
"lib/haliax/src/**/*.py",
"lib/fray/src/**/*.py",
"lib/iris/src/**/*.py",
"lib/rigging/src/**/*.py",
"lib/zephyr/src/**/*.py",
]
# Explicitly tell Pyrefly where our editable packages live so it resolves imports
# against the library sources instead of the top-level `src` directory, which only
# contains a handful of legacy modules.
search-path = [
"lib/marin/src",
"lib/levanter/src",
"lib/haliax/src",
"lib/rigging/src",
"lib/zephyr/src",
"lib/fray/src",
"lib/iris/src",
]
disable-search-path-heuristics = true
# Prevent pyrefly from querying the Python interpreter for site-packages
# which would auto-detect workspace members as site packages and exclude them
skip-interpreter-query = true
use-ignore-files = false
# Exclude non-production code from type checking
project-excludes = [
"experiments/**",
"scripts/**",
"tests/**",
"examples/**", # Example code doesn't need strict typing
"lib/**/crawl/**", # Crawl scripts have library typing issues with smart_open
"lib/iris/src/iris/rpc/*_pb2*", # Generated protobuf files
"lib/iris/src/iris/rpc/*_connect.py", # Generated Connect RPC files
# Keep key excludes that pyrefly normally adds heuristically.
"**/node_modules",
"**/__pycache__",
"**/venv/**",
]
# Disable specific error codes that are primarily noise from missing type stubs
# These can be gradually enabled as the codebase improves
[tool.pyrefly.errors]
# Missing imports (824 occurrences) - third-party libs without stubs
missing-import = false
# Unexpected keyword arguments (47 occurrences) - dynamic configs from transformers, dataclasses
unexpected-keyword = false
# Missing attributes (315 occurrences) - often from untyped libraries like ray, jax, etc.
# These are mostly false positives from libraries without complete type stubs
missing-attribute = false
# Additional error types with >10 occurrences that are hard to fix systematically:
# Deprecated APIs (19 occurrences) - would require code changes across multiple files
deprecated = false
# Unknown name errors (17 occurrences) - often from dynamic code or missing stubs
unknown-name = false
# Not iterable errors (17 occurrences) - complex type inference issues with JAX/numpy
not-iterable = false
# No matching overload (16 occurrences) - complex overload resolution issues
no-matching-overload = false
# Bad index (12 occurrences) - complex dict/list type inference
bad-index = false
# Library/stub-related noise - disable to focus on real issues:
# Bad argument type (638 occurrences) - mostly third-party library stub issues
# Common culprits: smart_open, Flax API, JAX/numpy arrays
bad-argument-type = false
# Bad context manager (30 occurrences) - tqdm and similar libraries
bad-context-manager = false
# Missing/bad argument count (99 occurrences) - Flax API positional/keyword confusion
missing-argument = false
bad-argument-count = false
# Unbound name (64 occurrences) - pyrefly's control flow analysis doesn't understand
# patterns where variables are guaranteed to be initialized by program logic
unbound-name = false
# Keep these enabled to catch real type errors:
# bad-override = true (126 occurrences)
# bad-assignment = true (94 occurrences)
# bad-return = true (84 occurrences)
# unsupported-operation = true (68 occurrences)
# not-callable = true (27 occurrences)
[tool.hatch.metadata]
allow-direct-references = true
[tool.hatch.build.targets.wheel]
packages = ["experiments"]
[tool.hatch.build.targets.sdist]
packages = ["experiments"]
[tool.pytest.ini_options]
timeout = 60
filterwarnings = ["ignore::DeprecationWarning"]
log_format = "%(asctime)s %(levelname)s %(message)s"
log_date_format = "%Y-%m-%d %H:%M:%S"
markers = [
"slow: mark tests as slow for CI - use -m 'not slow'",
"tpu_ci: mark tests that require a TPU",
"integration: mark tests as integration tests that require cluster infrastructure",
]
testpaths = ["tests", "experiments"]
# Don't run TPU, slow, or integration tests by default. Integration tests
# require external infrastructure (Iris cluster, GCS, gated HF repos, etc.)
# and are run from dedicated CI workflows.
addopts = "--session-timeout=480 -m 'not tpu_ci and not slow and not integration'"