Skip to content

Commit f97e464

Browse files
committed
release
0 parents  commit f97e464

168 files changed

Lines changed: 17465 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/mypy-check.yml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
name: MyPy Check
2+
3+
on:
4+
pull_request:
5+
jobs:
6+
mypy-check:
7+
runs-on: ubuntu-latest
8+
strategy:
9+
fail-fast: false
10+
matrix:
11+
python-version: ["3.12"]
12+
env:
13+
PY_COLORS: "1"
14+
steps:
15+
- uses: actions/checkout@v4
16+
- uses: actions/setup-python@v5
17+
with:
18+
python-version: ${{ matrix.python-version }}
19+
cache: "pip"
20+
- name: Set up authentication for private repo
21+
run: |
22+
git config --global url."https://${{ secrets.POLYMATHIC_REPO_ACCESS }}@github.com/".insteadOf "https://github.com/"
23+
- name: Install package
24+
run: |
25+
python -m pip install --upgrade pip
26+
pip install --extra-index-url https://download.pytorch.org/whl/cpu ".[test]"
27+
pip install mypy
28+
- name: Run MyPy
29+
id: mypy
30+
run: |
31+
mypy walrus tests --install-types --non-interactive &> mypy_output.txt || true
32+
- name: Read MyPy Output
33+
id: read-mypy
34+
run: |
35+
output=$(cat mypy_output.txt | grep -e "error:" -e "note:" -e "warning:" -e "Found")
36+
echo "output<<EOF" >> $GITHUB_OUTPUT
37+
echo "$output" >> $GITHUB_OUTPUT
38+
echo "EOF" >> $GITHUB_OUTPUT
39+
continue-on-error: true
40+
- name: Comment PR
41+
uses: thollander/actions-comment-pull-request@v2
42+
with:
43+
message: |
44+
## [Automatically-Generated MyPy Results]
45+
46+
<details>
47+
<summary>Click to expand/collapse MyPy results</summary>
48+
49+
```
50+
${{ steps.read-mypy.outputs.output }}
51+
```
52+
53+
</details>
54+
55+
_Generated by the file [`mypy-check.yml`](https://github.com/${{ github.repository }}/blob/${{ github.sha }}/.github/workflows/mypy-check.yml)._
56+
comment_tag: mypy-results
57+
GITHUB_TOKEN: ${{ secrets.POLYMATHIC_COMMENT_BOT }}

.github/workflows/tests.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Tests
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- main
8+
jobs:
9+
build:
10+
runs-on: ubuntu-latest
11+
strategy:
12+
fail-fast: false
13+
matrix:
14+
python-version: ["3.10", "3.12"]
15+
env:
16+
PY_COLORS: "1"
17+
steps:
18+
- uses: actions/checkout@v4
19+
- uses: actions/setup-python@v5
20+
with:
21+
python-version: ${{ matrix.python-version }}
22+
cache: "pip"
23+
- name: Set up authentication for private repo
24+
run: |
25+
git config --global url."https://${{ secrets.POLYMATHIC_REPO_ACCESS }}@github.com/".insteadOf "https://github.com/"
26+
- name: Install package
27+
run: |
28+
python -m pip install --upgrade pip
29+
pip install --extra-index-url https://download.pytorch.org/whl/cpu ".[test]"
30+
- name: Run linter
31+
run: |
32+
ruff check walrus tests
33+
ruff check --select I walrus tests
34+
- name: Run tests
35+
run: python -m pytest tests

.gitignore

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Ignore files generated by the build process
2+
build/
3+
dist/
4+
*.egg-info/
5+
6+
# Ignore system and IDE files
7+
.DS_Store
8+
Thumbs.db
9+
.idea/
10+
11+
#Ignoring the data
12+
datasets/active_matter/data/
13+
14+
datasets/euler_quadrants/data/
15+
datasets/euler_quadrants/data_storage_non_benchmarked/
16+
17+
datasets/pattern_formation/data/
18+
19+
20+
datasets/helmholtz_staircase/data/
21+
datasets/viscoelastic_instability/data/
22+
23+
2D/neutron_star_disks/
24+
2D/planetswe/data/
25+
26+
datasets/rayleigh_benard/data/
27+
28+
datasets/shear_flow/data/
29+
datasets/supernova_explosion_128/data/
30+
datasets/supernova_explosion_64/data/
31+
datasets/turbulence_gravity_cooling/data/
32+
datasets/rayleigh_taylor_instability/data/
33+
datasets/turbulent_radiative_layer_3D/data/
34+
datasets/split_turbulent_radiative_layer_3D/
35+
datasets/turbulent_radiative_layer_2D/data/
36+
datasets/acoustic_scattering_discontinuous_2d/data/
37+
datasets/acoustic_scattering_inclusions_2d/data/
38+
datasets/acoustic_scattering_maze_2d/data/
39+
datasets/planetswe/data/
40+
datasets/post_neutron_star_merger/data/
41+
datasets/acoustic_scattering_discontinuous_2d/gif/
42+
datasets/acoustic_scattering_inclusions_2d/gif/
43+
datasets/acoustic_scattering_maze_2d/gif/
44+
2D/
45+
3D/
46+
3D/rayleigh_taylor_instability/data/
47+
the_well/benchmark/scripts_to_launch/
48+
the_well/benchmark/write_bash_script.ipynb
49+
the_well/benchmark/checkpoints/
50+
datasets/convective_envelope_rsg/data/
51+
datasets/MHD_64/data/
52+
datasets/MHD_256/data/
53+
datasets/convective_envelope_rsg/sim.mp4
54+
testing_before_adding/
55+
viz/
56+
venv_benchmark_well/
57+
# Ignore logs and temporary files
58+
*.log
59+
*.tmp
60+
*.pt
61+
*.gif
62+
63+
# Ignore compiled binaries and libraries
64+
*.exe
65+
*.dll
66+
*.so
67+
68+
# Ignore package manager directories
69+
node_modules/
70+
vendor/
71+
72+
# Ignore environment-specific files
73+
.env
74+
.env.local
75+
.env.*.local
76+
77+
# Ignore sensitive or private information
78+
secrets.txt
79+
credentials.json
80+
81+
# Ignore backup files
82+
*.bak
83+
*.swp
84+
85+
# Ignore generated files
86+
*.min.js
87+
*.min.css
88+
__pycache__
89+
90+
# Ignore run generated output
91+
outputs/
92+
wandb/

.pre-commit-config.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
repos:
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
3+
rev: v5.0.0
4+
hooks:
5+
- id: check-merge-conflict
6+
- id: end-of-file-fixer
7+
- id: mixed-line-ending
8+
args: [--fix=lf]
9+
- id: trailing-whitespace
10+
- repo: https://github.com/astral-sh/ruff-pre-commit
11+
# Ruff version.
12+
rev: "v0.8.3"
13+
hooks:
14+
# Run the linter.
15+
- id: ruff
16+
args: [--fix]
17+
# Sort imports
18+
- id: ruff
19+
args: [--select, I, --fix]
20+
- repo: https://github.com/pre-commit/mirrors-mypy
21+
rev: "v1.13.0" # Use the sha / tag you want to point at
22+
hooks:
23+
- id: mypy
24+
args: [--install-types, --non-interactive]

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 Polymathic AI
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
<!-- ![Walrus logo](assets/walrus_circle.png) -->
2+
<!-- <img src="assets/walrus_logo.png" width="360"> -->
3+
4+
# Walrus: A Cross-domain Foundation Model for Continuum Dynamics
5+
<div align="center">
6+
7+
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8+
[![PyTorch](https://img.shields.io/badge/PyTorch-≥2.4.0-ee4c2c.svg)](https://pytorch.org/)
9+
[![arXiv](https://img.shields.io/badge/arXiv-2511.15684-b31b1b.svg)](https://arxiv.org/abs/2511.15684)
10+
[![Model on HF](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-yellow)](https://huggingface.co/polymathic-ai/walrus)
11+
12+
[Getting Started](#getting-started)[Tutorials](#tutorials)[Model Overview](#model-overview)
13+
14+
</div>
15+
16+
---
17+
18+
## Overview
19+
<div align="center">
20+
<img src="assets/ArchitectureWIP.png" alt="Walrus schematic" width="600">
21+
</div>
22+
23+
This repo is built for training and evaluating Walrus, a multi-domain foundation model for continuum dynamics trained primarily on fluid-like behaviors.
24+
Walrus was trained on 19 different physical scenarios spanning 63 physical variables in both 2 and 3D. Walrus utilizes new tools for adaptive computation and improved stability
25+
in order to achieve accurate long-term rollouts while co-adapting sampling and distribution to improve training throughput despite handling varying dimensions, resolutions, and
26+
aspect ratios.
27+
28+
## Getting Started
29+
30+
### Installation
31+
32+
Clone the repository and install locally. Requirements are documented in the pyproject.toml file.
33+
34+
Most of the data used in experiments is from [the Well](https://github.com/PolymathicAI/the_well),
35+
so it may be easier to get started if you have the Well at least partially available.
36+
37+
```bash
38+
git clone git@github.com:PolymathicAI/walrus.git
39+
cd walrus
40+
pip install .
41+
```
42+
By default, this repository does not include all dependencies of non-Walrus models. To install optional dependencies,
43+
instead run:
44+
45+
```bash
46+
pip install .[external_modes]
47+
```
48+
## Running Experiments
49+
50+
This project is orchestrated using [Hydra](https://github.com/facebookresearch/hydra) to give users modular access over
51+
various model components, datasets, and runtime options. All training was done in slurm environments. Example invocations
52+
for training, validating, and finetuning Walrus models can be found in [walrus/run_scripts](walrus/run_scripts).
53+
54+
Most of these use relative paths and slurm. For example, one would launch the training script with:
55+
```bash
56+
cd /path/to/thisfolder/walrus/walrus
57+
sbatch run_scripts/pretrain_example_distributed_walrus.sh
58+
```
59+
Local invocations also available for smaller training and validation runs.
60+
61+
## Tutorials
62+
63+
To help get you started, we include a set of demo notebooks. These are:
64+
65+
- [Transforming data into Well Format](demo_notebooks/walrus_example_0_ConvertingDataIntoWellFormat.ipynb) - Since our repository largely uses structures from the Well, we include a guide
66+
to transforming data so it can be used easily within this repository.
67+
- [Running Walrus](demo_notebooks/walrus_example_1_RunningWalrus.ipynb) - This notebook walks through the basics of how to use Walrus both with Well data and with external data.
68+
69+
If you already have a local copy of the Well, you can skip straight to example 1. However, if you do not, it's recommended to follow example 0 to get some data to use. We also include several example run scripts for training and validation, both using slurm and torchrun for distribution and just running single-GPU code.
70+
71+
## Model Overview
72+
73+
### Architecture
74+
75+
Walrus uses and encoder-processor-decoder structure. Encoder/decoder are hMLPs/transposed hMLPs using stride modulation to dynamically adjust the
76+
internal resolution. The processor consists of blocks containing factorized space and time attention.
77+
78+
### Patch Jittering
79+
80+
Walrus suppressed the growth of long-run instabilities through the use of *patch jittering*. Patch jittering involves randomly translating the reference frame (with padding for boundaries)
81+
before each step. While the paper goes into more theoretical detail on why this works, the core idea is that the specific downsampling pattern leads to predictable accumulation
82+
of error and that randomizing this process can help alleviate this pathology.
83+
84+
### Adaptive Compute
85+
86+
To handle varying compute budgets and problem complexities, we also employ [stride modulation](https://arxiv.org/pdf/2507.09264) to allow users to adjust
87+
their downstream resolution. During pretraining, this was used to keep internal resolution fairly consistent (32/33 per dim in 2D, 16/17 in 3D). In this approach, the downsampling
88+
layers of the encoder/decoder will dynamically adjust their stride based on a target internal resolution.
89+
90+
### Efficient Training
91+
92+
Heterogeneous data can easily lead to training bottlenecks as many distribution primitives require regular syncing. Our approach
93+
adjusts downsampling to ensure consistent token counts, but also adjust context length and batch size to minimize gaps. This isn't
94+
enough to completely eliminate discrepencies, so we must also adjust our sampling strategy to be aware of where these dead cycles can emerge.
95+
We implement sampling such that when training with HSDP, all nodes within a sharding group are forced to sample the same data source. This
96+
balances batch diversity and efficiency.
97+
98+
## License
99+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
100+
101+
## Contact
102+
103+
- **Issues**: [GitHub Issues](https://github.com/PolymathicAI/AION/issues)
104+
105+
For other queries, please reach out to the corresponding author: mmccabe@flatironinstitute.org.
106+
107+
## Acknowledgements
108+
109+
Walrus is built by [Polymathic AI](https://polymathic-ai.org/) as part of our mission of advancing the frontier of AI for scientific application. Polymathic AI gratefully acknowledges funding from the Simons Foundation and Schmidt Sciences, LLC. This work was performed with compute from the Scientific Computing Core, a
110+
division of the Flatiron Institute, a division of the Simons Foundation and from the National AI Research Resource Pilot, including support from NVIDIA
111+
and NVIDIA’s DGX Cloud product which includes the NVIDIA AI Enterprise Software Platform.
112+
113+
## Citing Walrus
114+
115+
Please use the following citation for Walrus:
116+
117+
```
118+
@misc{mccabe2025walruscrossdomainfoundationmodel,
119+
title={Walrus: A Cross-Domain Foundation Model for Continuum Dynamics},
120+
author={Michael McCabe and Payel Mukhopadhyay and Tanya Marwah and Bruno Regaldo-Saint Blancard and Francois Rozet and Cristiana Diaconu and Lucas Meyer and Kaze W. K. Wong and Hadi Sotoudeh and Alberto Bietti and Irina Espejo and Rio Fear and Siavash Golkar and Tom Hehir and Keiya Hirashima and Geraud Krawezik and Francois Lanusse and Rudy Morel and Ruben Ohana and Liam Parker and Mariel Pettee and Jeff Shen and Kyunghyun Cho and Miles Cranmer and Shirley Ho},
121+
year={2025},
122+
eprint={2511.15684},
123+
archivePrefix={arXiv},
124+
primaryClass={cs.LG},
125+
url={https://arxiv.org/abs/2511.15684},
126+
}
127+
```
128+

assets/ArchitectureWIP.png

458 KB
Loading

assets/the_well_logo.png

22.7 KB
Loading

assets/walrus_logo.png

1.25 MB
Loading

0 commit comments

Comments
 (0)