diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 738c7290a..f8235bec9 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -47,11 +47,11 @@ jobs: test-x86_64: name: Test on Ubuntu x86_64 runs-on: self-hosted - timeout-minutes: 120 + timeout-minutes: 180 container: image: ubuntu:22.04 options: - --shm-size=32g + --shm-size=48g -v /home/gha/cache-setup:/github/home/output:rw defaults: run: @@ -86,6 +86,8 @@ jobs: ./build_setup.sh - name: Test sha_hasher + env: + ZISK_TEMPLATE_BRANCH: pre-develop-0.16.0 run: | cd "$GITHUB_WORKSPACE/tools/test-env" ./test_sha_hasher.sh @@ -98,7 +100,7 @@ jobs: - name: Verify Constraints Ethereum block env: DISABLE_PROVE: "1" - BLOCK_INPUTS: "18885301_210_24_rsp.bin" + BLOCK_INPUTS: "mainnet_24626900_221_16_zec_reth.bin" run: | cd "$GITHUB_WORKSPACE/tools/test-env" ./test_eth_block.sh @@ -106,7 +108,7 @@ jobs: - name: Prove Ethereum block env: DISABLE_ROM_SETUP: "1" - BLOCK_INPUTS_DISTRIBUTED: "21429020_5_0_rsp.bin" + BLOCK_INPUTS_DISTRIBUTED: "mainnet_24628607_66_7_zec_reth.bin" run: | cd "$GITHUB_WORKSPACE/tools/test-env" ./test_eth_block.sh @@ -114,7 +116,7 @@ jobs: test-macos: name: Test on macOS runs-on: macos-14 - timeout-minutes: 15 + timeout-minutes: 30 defaults: run: shell: bash @@ -140,6 +142,8 @@ jobs: ./build_zisk.sh - name: Test sha_hasher + env: + ZISK_TEMPLATE_BRANCH: pre-develop-0.16.0 run: | cd "$GITHUB_WORKSPACE/tools/test-env" ./test_sha_hasher.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ba5216afd..dc9ccc284 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -117,7 +117,6 @@ jobs: cp ./target/${TARGET}/release/riscv2zisk zisk-dist/bin/ cp ./target/${TARGET}/release/zisk-coordinator zisk-dist/bin/ cp ./target/${TARGET}/release/zisk-worker zisk-dist/bin/ - cp ./target/${TARGET}/release/libzisk_witness.${LIB_EXT} zisk-dist/bin/ cp ./ziskup/ziskup zisk-dist/bin/ cp ./target/${TARGET}/release/libziskclib.a zisk-dist/bin/ diff --git a/.gitignore b/.gitignore index 1a3447bf3..14573f554 100644 --- a/.gitignore +++ b/.gitignore @@ -8,17 +8,23 @@ *.pilout *.fixed /tmp +**/tmp *.log /emulator-asm/build* +/emulator-asm/src/dma/*.o +/emulator-asm/src/dma/test/*.o +/emulator-asm/src/dma/test/test_dma +/emulator-asm/src/dma/test/build /emulator-asm/src/emu.asm /cache /lib-c/c/build /lib-c/c/lib /lib-float/c/build /logs -precompiles/keccakf/src/keccakf_fixed.bin -precompiles/sha256f/src/sha256f_fixed.bin state-machines/frequent-ops/src/frequent_ops_fixed.bin state-machines/arith/src/arith_frops_fixed.bin state-machines/binary/src/binary_basic_frops_fixed.bin state-machines/binary/src/binary_extension_frops_fixed.bin +reth-inputs/ +/hints +/benchmark \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index c1f559e01..40f42d394 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,8 +15,6 @@ //"--bin", //"proofman-cli", "verify-constraints", - "--witness-lib", - "../zisk/target/debug/libzisk_witness.so", "--elf", "../zisk-testvectors/pessimistic-proof/program/pessimistic-proof-program-keccak.elf", "-i", diff --git a/.vscode/settings.json b/.vscode/settings.json index 9709e08be..56aa3b430 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,7 +3,7 @@ "[rust]": { "editor.defaultFormatter": "rust-lang.rust-analyzer", "editor.formatOnSave": true, - "editor.hover.enabled": true + "editor.hover.enabled": "on" }, "editor.rulers": [ 100 diff --git a/Cargo.lock b/Cargo.lock index 74a646a89..85adaec5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "addr2line" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -24,7 +15,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", - "getrandom 0.3.4", "once_cell", "version_check", "zerocopy", @@ -40,12 +30,12 @@ dependencies = [ ] [[package]] -name = "aligned-vec" -version = "0.6.4" +name = "alloca" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" dependencies = [ - "equator", + "cc", ] [[package]] @@ -121,9 +111,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "ark-bls12-381" @@ -196,7 +186,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62945a2f7e6de02a31fe400aa489f0e0f5b2502e69f95f853adb82a96c7a6b60" dependencies = [ "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -209,7 +199,7 @@ dependencies = [ "num-traits", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -238,6 +228,17 @@ dependencies = [ "ark-std", ] +[[package]] +name = "ark-secp256r1" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cf8be5820de567729bfa73a410ddd07cec8ad102d9a4bf61fd6b2e60db264e8" +dependencies = [ + "ark-ec", + "ark-ff", + "ark-std", +] + [[package]] name = "ark-serialize" version = "0.5.0" @@ -259,7 +260,7 @@ checksum = "213888f660fddcca0d257e88e54ac05bca01885f258ccdf695bafd77031bb69d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -292,22 +293,60 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "asm-runner" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", - "clap", "libc", "mem-common", "mem-planner-cpp", "named-sem", + "proofman-common", "rayon", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", - "ureq", "zisk-common", "zisk-core", ] +[[package]] +name = "asn1-rs" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56624a96882bb8c26d61312ae18cb45868e5a9992ea73c58e45c3101e56a1e60" +dependencies = [ + "asn1-rs-derive", + "asn1-rs-impl", + "displaydoc", + "nom", + "num-traits", + "rusticata-macros", + "thiserror 2.0.18", + "time", +] + +[[package]] +name = "asn1-rs-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "asn1-rs-impl" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -327,7 +366,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -338,7 +377,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -353,11 +392,33 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-lc-rs" +version = "1.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "axum" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "bytes", @@ -380,9 +441,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", @@ -397,25 +458,10 @@ dependencies = [ ] [[package]] -name = "backtrace" -version = "0.3.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-link 0.2.1", -] - -[[package]] -name = "base16ct" -version = "0.2.0" +name = "base64" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" @@ -423,30 +469,13 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "base64ct" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e050f626429857a27ddccb31e0aca21356bfa709c04041aefddac081a8f068a" - [[package]] name = "bincode" -version = "2.0.1" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "bincode_derive", "serde", - "unty", -] - -[[package]] -name = "bincode_derive" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" -dependencies = [ - "virtue", ] [[package]] @@ -455,7 +484,7 @@ version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags 2.10.0", + "bitflags", "cexpr", "clang-sys", "itertools 0.12.1", @@ -466,35 +495,30 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" dependencies = [ "serde_core", ] [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -526,7 +550,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -541,21 +565,21 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "camino" @@ -578,42 +602,37 @@ dependencies = [ [[package]] name = "cargo-zisk" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "asm-runner", - "bytemuck", "clap", "colored", "dirs", "executor", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "futures", "indicatif", - "libloading", "mpi", "proofman", "proofman-common", "proofman-util", - "proofman-verifier", "rand 0.9.2", "reqwest", "rom-setup", "serde", "serde_json", - "server", - "sysinfo 0.37.2", + "sysinfo 0.38.4", "target-lexicon", "tokio", "tracing", - "vergen", + "vergen-git2", "yansi", "zisk-build", "zisk-common", "zisk-core", "zisk-pil", "zisk-sdk", - "zstd", ] [[package]] @@ -627,7 +646,7 @@ dependencies = [ "semver", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -638,9 +657,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.50" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ "find-msvc-tools", "jobserver", @@ -648,6 +667,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cexpr" version = "0.6.0" @@ -671,9 +696,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -712,7 +737,7 @@ dependencies = [ [[package]] name = "circuit" -version = "0.15.0" +version = "0.16.0" [[package]] name = "clang-sys" @@ -727,9 +752,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.53" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" dependencies = [ "clap_builder", "clap_derive", @@ -737,9 +762,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.53" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ "anstream", "anstyle", @@ -749,21 +774,30 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "clap_lex" -version = "0.7.6" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + +[[package]] +name = "cmake" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +dependencies = [ + "cc", +] [[package]] name = "colorchoice" @@ -773,11 +807,21 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "colored" -version = "3.0.0" +version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", ] [[package]] @@ -795,7 +839,7 @@ dependencies = [ "serde-untagged", "serde_core", "serde_json", - "toml 0.9.10+spec-1.1.0", + "toml 0.9.12+spec-1.1.0", "winnow", "yaml-rust2", ] @@ -814,10 +858,10 @@ dependencies = [ ] [[package]] -name = "const-oid" -version = "0.9.6" +name = "const-default" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa" [[package]] name = "const-random" @@ -834,16 +878,16 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "conv" @@ -863,6 +907,16 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -871,9 +925,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpp_demangle" -version = "0.4.5" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2bb79cb74d735044c972aae58ed0aaa9a837e85b01106a54c39e42e97f62253" +checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def" dependencies = [ "cfg-if", ] @@ -906,7 +960,7 @@ dependencies = [ "cast", "ciborium", "clap", - "criterion-plot", + "criterion-plot 0.5.0", "is-terminal", "itertools 0.10.5", "num-traits", @@ -922,6 +976,31 @@ dependencies = [ "walkdir", ] +[[package]] +name = "criterion" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" +dependencies = [ + "alloca", + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot 0.8.2", + "itertools 0.13.0", + "num-traits", + "oorandom", + "page_size", + "plotters", + "rayon", + "regex", + "serde", + "serde_json", + "tinytemplate", + "walkdir", +] + [[package]] name = "criterion-plot" version = "0.5.0" @@ -932,6 +1011,22 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "criterion-plot" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" +dependencies = [ + "cast", + "itertools 0.13.0", +] + +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + [[package]] name = "crossbeam" version = "0.8.4" @@ -994,23 +1089,11 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" -[[package]] -name = "crypto-bigint" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" -dependencies = [ - "generic-array", - "rand_core 0.6.4", - "subtle", - "zeroize", -] - [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -1037,15 +1120,24 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "curves" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "num-bigint", "num-traits", - "rand 0.9.2", ] [[package]] @@ -1054,6 +1146,41 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.117", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -1070,12 +1197,18 @@ dependencies = [ [[package]] name = "data-bus" -version = "0.15.0" +version = "0.16.0" dependencies = [ "zisk-common", "zisk-core", ] +[[package]] +name = "data-encoding" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" + [[package]] name = "debugid" version = "0.8.0" @@ -1086,24 +1219,59 @@ dependencies = [ ] [[package]] -name = "der" -version = "0.7.10" +name = "der-parser" +version = "10.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" dependencies = [ - "const-oid", - "zeroize", + "asn1-rs", + "displaydoc", + "nom", + "num-bigint", + "num-traits", + "rusticata-macros", ] [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + [[package]] name = "digest" version = "0.10.7" @@ -1111,9 +1279,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", - "const-oid", "crypto-common", - "subtle", ] [[package]] @@ -1145,7 +1311,18 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", +] + +[[package]] +name = "dlmalloc" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6738d2e996274e499bc7b0d693c858b7720b9cd2543a0643a3087e6cb0a4fa16" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.61.2", ] [[package]] @@ -1158,18 +1335,10 @@ dependencies = [ ] [[package]] -name = "ecdsa" -version = "0.16.9" +name = "dunce" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" -dependencies = [ - "der", - "digest", - "elliptic-curve", - "rfc6979", - "signature", - "spki", -] +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] name = "educe" @@ -1180,7 +1349,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -1196,22 +1365,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" [[package]] -name = "elliptic-curve" -version = "0.13.8" +name = "embedded-alloc" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd" dependencies = [ - "base16ct", - "crypto-bigint", - "digest", - "ff", - "generic-array", - "group", - "pkcs8", - "rand_core 0.6.4", - "sec1", - "subtle", - "zeroize", + "const-default", + "critical-section", + "linked_list_allocator", + "rlsf", ] [[package]] @@ -1246,7 +1408,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -1259,59 +1421,16 @@ dependencies = [ ] [[package]] -name = "env_filter" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" -dependencies = [ - "log", - "regex", -] - -[[package]] -name = "env_logger" -version = "0.11.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" -dependencies = [ - "anstream", - "anstyle", - "env_filter", - "jiff", - "log", -] - -[[package]] -name = "equator" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" -dependencies = [ - "equator-macro", -] - -[[package]] -name = "equator-macro" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.111", -] - -[[package]] -name = "equivalent" -version = "1.0.2" +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "erased-serde" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3" +checksum = "d2add8a07dd6a8d93ff627029c51de145e12686fbc36ecb298ac22e74cf02dec" dependencies = [ "serde", "serde_core", @@ -1330,26 +1449,32 @@ dependencies = [ [[package]] name = "executor" -version = "0.15.0" +version = "0.16.0" dependencies = [ + "anyhow", "asm-runner", "crossbeam", "data-bus", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "itertools 0.14.0", "mem-common", "mem-planner-cpp", + "named-sem", "pil-std-lib", "precomp-arith-eq", "precomp-arith-eq-384", "precomp-big-int", + "precomp-blake2", + "precomp-dma", "precomp-keccakf", + "precomp-poseidon2", "precomp-sha256f", + "precompiles-common", + "precompiles-hints", "proofman", "proofman-common", "proofman-util", "rayon", - "rom-setup", "sm-arith", "sm-binary", "sm-frequent-ops", @@ -1364,6 +1489,18 @@ dependencies = [ "ziskemu", ] +[[package]] +name = "fastbloom" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7f34442dbe69c60fe8eaf58a8cafff81a1f278816d8ab4db255b3bef4ac3c4" +dependencies = [ + "getrandom 0.3.4", + "libm", + "rand 0.9.2", + "siphasher", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -1371,20 +1508,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] -name = "ff" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +name = "fields" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ - "rand_core 0.6.4", - "subtle", + "cfg-if", + "num-bigint", + "paste", + "serde", ] [[package]] name = "fields" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#620e7c89f39ee2b608bd0f5d43765dc89728ff67" dependencies = [ + "cfg-if", "num-bigint", "paste", "serde", @@ -1392,21 +1531,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" - -[[package]] -name = "findshlibs" -version = "0.10.2" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" -dependencies = [ - "cc", - "lazy_static", - "libc", - "winapi", -] +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -1416,9 +1543,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1445,11 +1572,17 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -1462,9 +1595,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -1472,15 +1605,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -1489,38 +1622,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -1530,26 +1663,24 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", - "zeroize", ] [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", @@ -1567,24 +1698,31 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] [[package]] -name = "gimli" -version = "0.32.3" +name = "getrandom" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] [[package]] name = "git2" -version = "0.19.0" +version = "0.20.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724" +checksum = "7b88256088d75a56f8ecfa070513a775dd9107f6530ef14919dac831af9cfe2b" dependencies = [ - "bitflags 2.10.0", + "bitflags", "libc", "libgit2-sys", "log", @@ -1597,22 +1735,11 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" -[[package]] -name = "group" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" -dependencies = [ - "ff", - "rand_core 0.6.4", - "subtle", -] - [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -1681,15 +1808,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - [[package]] name = "home" version = "0.5.12" @@ -1805,14 +1923,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -1829,9 +1946,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1932,6 +2049,18 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -1955,9 +2084,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -1978,35 +2107,17 @@ dependencies = [ "web-time", ] -[[package]] -name = "inferno" -version = "0.11.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" -dependencies = [ - "ahash", - "indexmap", - "is-terminal", - "itoa", - "log", - "num-format", - "once_cell", - "quick-xml", - "rgb", - "str_stack", -] - [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" dependencies = [ "memchr", "serde", @@ -2067,33 +2178,31 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] -name = "jiff" -version = "0.2.16" +name = "jni" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" dependencies = [ - "jiff-static", + "cesu8", + "cfg-if", + "combine", + "jni-sys", "log", - "portable-atomic", - "portable-atomic-util", - "serde_core", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", ] [[package]] -name = "jiff-static" -version = "0.2.16" +name = "jni-sys" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.111", -] +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "jobserver" @@ -2107,20 +2216,14 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", ] -[[package]] -name = "json" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd" - [[package]] name = "json5" version = "0.4.1" @@ -2132,20 +2235,6 @@ dependencies = [ "serde", ] -[[package]] -name = "k256" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" -dependencies = [ - "cfg-if", - "ecdsa", - "elliptic-curve", - "once_cell", - "sha2", - "signature", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -2158,25 +2247,31 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "lib-c" -version = "0.15.0" +version = "0.16.0" [[package]] name = "lib-float" -version = "0.15.0" +version = "0.16.0" [[package]] name = "libc" -version = "0.2.178" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libffi" -version = "5.0.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0444124f3ffd67e1b0b0c661a7f81a278a135eb54aaad4078e79fbc8be50c8a5" +checksum = "0498fe5655f857803e156523e644dcdcdc3b3c7edda42ea2afdae2e09b2db87b" dependencies = [ "libc", "libffi-sys", @@ -2184,18 +2279,18 @@ dependencies = [ [[package]] name = "libffi-sys" -version = "4.0.0" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d722da8817ea580d0669da6babe2262d7b86a1af1103da24102b8bb9c101ce7" +checksum = "71d4f1d4ce15091955144350b75db16a96d4a63728500122706fb4d29a26afbb" dependencies = [ "cc", ] [[package]] name = "libgit2-sys" -version = "0.17.0+1.8.1" +version = "0.18.3+1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10472326a8a6477c3c20a64547b0059e4b0d086869eee31e6d7da728a8eb7224" +checksum = "c9b3acc4b91781bb0b3386669d325163746af5f6e4f73e6d2d630e09a35f3487" dependencies = [ "cc", "libc", @@ -2213,21 +2308,26 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "libredox" -version = "0.1.11" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df15f6eac291ed1cf25865b1ee60399f57e7c227e7f51bdbd4c5270396a9ed50" +checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ - "bitflags 2.10.0", "libc", ] [[package]] name = "libz-sys" -version = "1.1.23" +version = "1.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +checksum = "d52f4c29e2a68ac30c9087e1b772dc9f44a2b66ed44edf2266cf2be9b03dafc1" dependencies = [ "cc", "libc", @@ -2235,11 +2335,17 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linked_list_allocator" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afa463f5405ee81cdb9cc2baf37e08ec7e4c8209442b5d72c04cfb2cd6e6286" + [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" @@ -2285,21 +2391,18 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] name = "mem-common" -version = "0.15.0" +version = "0.16.0" dependencies = [ "clap", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "num-bigint", "num-traits", - "pil-std-lib", - "proofman", "proofman-common", "proofman-macros", "proofman-util", "rayon", "static_assertions", "tracing", - "witness", "zisk-common", "zisk-core", "zisk-pil", @@ -2307,7 +2410,7 @@ dependencies = [ [[package]] name = "mem-planner-cpp" -version = "0.15.0" +version = "0.16.0" dependencies = [ "mem-common", "proofman-common", @@ -2319,15 +2422,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] @@ -2377,7 +2480,7 @@ dependencies = [ "mpi-sys", "once_cell", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -2393,11 +2496,12 @@ dependencies = [ [[package]] name = "msvc-demangler" -version = "0.10.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4c25a3bb7d880e8eceab4822f3141ad0700d20f025991c1f03bd3d00219a5fc" +checksum = "fbeff6bd154a309b2ada5639b2661ca6ae4599b34e8487dc276d2cd637da2d76" dependencies = [ - "bitflags 2.10.0", + "bitflags", + "itoa", ] [[package]] @@ -2413,19 +2517,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0875efe1a57a20d0cee7034499aa9d764b3c7525563fa3c3f16a2ccf01ddfa04" dependencies = [ "libc", - "thiserror 2.0.17", - "windows", -] - -[[package]] -name = "nix" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" -dependencies = [ - "bitflags 1.3.2", - "cfg-if", - "libc", + "thiserror 2.0.18", + "windows 0.61.3", ] [[package]] @@ -2440,9 +2533,9 @@ dependencies = [ [[package]] name = "ntapi" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" dependencies = [ "winapi", ] @@ -2468,9 +2561,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" [[package]] name = "num-format" @@ -2531,7 +2624,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ - "bitflags 2.10.0", + "bitflags", ] [[package]] @@ -2555,11 +2648,20 @@ dependencies = [ "ruzstd", ] +[[package]] +name = "oid-registry" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12f40cff3dde1b6087cc5d5f5d4d65712f34016a03ed60e9c08dcc392736b5b7" +dependencies = [ + "asn1-rs", +] + [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -2573,6 +2675,12 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + [[package]] name = "option-ext" version = "0.2.0" @@ -2590,12 +2698,13 @@ dependencies = [ ] [[package]] -name = "papergrid" -version = "0.4.0" +name = "page_size" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608b6444acf7f5ea39e8bd06dd6037e34a4b5ddfb29ae840edad49ea798e9e79" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" dependencies = [ - "unicode-width 0.1.14", + "libc", + "winapi", ] [[package]] @@ -2639,6 +2748,16 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64 0.22.1", + "serde_core", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -2647,9 +2766,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pest" -version = "2.8.4" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbcfd20a6d4eeba40179f05735784ad32bdaef05ce8e8af05f180d45bb3e7e22" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", "ucd-trie", @@ -2657,9 +2776,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.8.4" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f72981ade67b1ca6adc26ec221be9f463f2b5839c7508998daa17c23d94d7f" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" dependencies = [ "pest", "pest_generator", @@ -2667,22 +2786,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.4" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee9efd8cdb50d719a80088b76f81aec7c41ed6d522ee750178f83883d271625" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "pest_meta" -version = "2.8.4" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf1d70880e76bdc13ba52eafa6239ce793d85c8e43896507e43dd8984ff05b82" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", "sha2", @@ -2690,56 +2809,60 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", + "hashbrown 0.15.5", "indexmap", ] [[package]] name = "pil-std-lib" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ "colored", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "num-bigint", "num-traits", "proofman-common", "proofman-hints", "proofman-util", "rayon", + "rustc-hash 2.1.1", + "serde", + "serde_json", "tracing", "witness", ] [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -2747,16 +2870,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "pkcs8" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" -dependencies = [ - "der", - "spki", -] - [[package]] name = "pkg-config" version = "0.3.32" @@ -2793,18 +2906,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" - -[[package]] -name = "portable-atomic-util" -version = "0.2.4" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" -dependencies = [ - "portable-atomic", -] +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "potential_utf" @@ -2821,29 +2925,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" -[[package]] -name = "pprof" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afad4d4df7b31280028245f152d5a575083e2abb822d05736f5e47653e77689f" -dependencies = [ - "aligned-vec", - "backtrace", - "cfg-if", - "criterion", - "findshlibs", - "inferno", - "libc", - "log", - "nix", - "once_cell", - "smallvec", - "spin", - "symbolic-demangle", - "tempfile", - "thiserror 1.0.69", -] - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -2855,25 +2936,23 @@ dependencies = [ [[package]] name = "precomp-arith-eq" -version = "0.15.0" +version = "0.16.0" dependencies = [ "ark-bn254", "ark-ff", "ark-secp256k1", + "ark-secp256r1", "ark-std", - "fields", - "k256", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "lazy_static", "lib-c", "mem-common", - "nom", "num-bigint", "num-traits", "path-clean", "pil-std-lib", "precompiles-common", "precompiles-helpers", - "proofman", "proofman-common", "proofman-macros", "proofman-util", @@ -2893,19 +2972,17 @@ dependencies = [ [[package]] name = "precomp-arith-eq-384" -version = "0.15.0" +version = "0.16.0" dependencies = [ "ark-bls12-381", "ark-bn254", "ark-ff", "ark-secp256k1", "ark-std", - "fields", - "k256", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "lazy_static", "lib-c", "mem-common", - "nom", "num-bigint", "num-traits", "path-clean", @@ -2913,7 +2990,6 @@ dependencies = [ "precomp-arith-eq", "precompiles-common", "precompiles-helpers", - "proofman", "proofman-common", "proofman-macros", "proofman-util", @@ -2932,15 +3008,14 @@ dependencies = [ [[package]] name = "precomp-big-int" -version = "0.15.0" +version = "0.16.0" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "generic-array", "lib-c", "mem-common", "pil-std-lib", "precompiles-common", - "proofman", "proofman-common", "proofman-macros", "proofman-util", @@ -2953,21 +3028,18 @@ dependencies = [ ] [[package]] -name = "precomp-keccakf" -version = "0.15.0" +name = "precomp-blake2" +version = "0.16.0" dependencies = [ - "circuit", - "fields", - "path-clean", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "mem-common", "pil-std-lib", "precompiles-common", - "precompiles-helpers", - "proofman", "proofman-common", "proofman-macros", "proofman-util", "rayon", - "tiny-keccak", + "sm-mem", "tracing", "zisk-common", "zisk-core", @@ -2975,18 +3047,61 @@ dependencies = [ ] [[package]] -name = "precomp-sha256f" -version = "0.15.0" +name = "precomp-dma" +version = "0.16.0" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "generic-array", + "lib-c", "mem-common", "pil-std-lib", "precompiles-common", + "precompiles-helpers", "proofman", "proofman-common", "proofman-macros", "proofman-util", "rayon", + "sm-mem", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-keccakf" +version = "0.16.0" +dependencies = [ + "circuit", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "path-clean", + "pil-std-lib", + "precompiles-common", + "precompiles-helpers", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "tiny-keccak", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-poseidon2" +version = "0.16.0" +dependencies = [ + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "mem-common", + "pil-std-lib", + "precompiles-common", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", "sha2", "sm-mem", "tracing", @@ -2995,23 +3110,45 @@ dependencies = [ "zisk-pil", ] +[[package]] +name = "precomp-sha256f" +version = "0.16.0" +dependencies = [ + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "mem-common", + "pil-std-lib", + "precompiles-common", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sm-mem", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + [[package]] name = "precompiles-common" -version = "0.15.0" +version = "0.16.0" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "mem-common", + "sm-mem", "zisk-common", "zisk-core", ] [[package]] name = "precompiles-helpers" -version = "0.15.0" +version = "0.16.0" dependencies = [ "ark-bls12-381", "ark-bn254", "ark-ff", "ark-secp256k1", + "ark-secp256r1", "ark-std", "cfg-if", "circuit", @@ -3020,6 +3157,23 @@ dependencies = [ "num-traits", ] +[[package]] +name = "precompiles-hints" +version = "0.16.0" +dependencies = [ + "anyhow", + "borsh", + "criterion 0.8.2", + "lib-c", + "precompiles-helpers", + "rayon", + "rustls", + "tracing", + "zisk-common", + "zisk-distributed-common", + "ziskos-hints", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -3027,32 +3181,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "proc-macro-crate" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ - "toml_edit 0.23.10+spec-1.0.0", + "toml_edit 0.25.4+spec-1.1.0", ] [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "proofman" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ + "bincode", "blake3", "borsh", "bytemuck", @@ -3061,7 +3216,7 @@ dependencies = [ "crossbeam-channel", "csv", "curves", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "libloading", "mpi", "num-bigint", @@ -3073,7 +3228,6 @@ dependencies = [ "proofman-starks-lib-c", "proofman-util", "proofman-verifier", - "rand 0.9.2", "rayon", "serde", "serde_json", @@ -3085,17 +3239,18 @@ dependencies = [ [[package]] name = "proofman-common" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ + "bincode", "borsh", + "bytemuck", "colored", "crossbeam-channel", "crossbeam-queue", "csv", "env", - "fields", - "indexmap", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "lazy_static", "libloading", "mpi", @@ -3107,8 +3262,7 @@ dependencies = [ "serde", "serde_json", "sysinfo 0.35.2", - "tabled", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "tracing-subscriber", "yansi", @@ -3116,10 +3270,11 @@ dependencies = [ [[package]] name = "proofman-hints" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "itoa", "proofman-common", "proofman-starks-lib-c", "proofman-util", @@ -3128,19 +3283,18 @@ dependencies = [ [[package]] name = "proofman-macros" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ "proc-macro2", "quote", - "rayon", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "proofman-starks-lib-c" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ "crossbeam-channel", "tracing", @@ -3148,31 +3302,33 @@ dependencies = [ [[package]] name = "proofman-util" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ + "bincode", + "bytemuck", "colored", - "fields", + "serde", "sysinfo 0.35.2", - "tracing", ] [[package]] name = "proofman-verifier" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ "bytemuck", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "proofman-util", "rayon", "tracing", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -3180,15 +3336,14 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools 0.14.0", "log", "multimap", - "once_cell", "petgraph", "prettyplease", "prost", @@ -3196,61 +3351,52 @@ dependencies = [ "pulldown-cmark", "pulldown-cmark-to-cmark", "regex", - "syn 2.0.111", + "syn 2.0.117", "tempfile", ] [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] [[package]] name = "pulldown-cmark" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6" dependencies = [ - "bitflags 2.10.0", + "bitflags", "memchr", "unicase", ] [[package]] name = "pulldown-cmark-to-cmark" -version = "21.1.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8246feae3db61428fd0bb94285c690b460e4517d83152377543ca802357785f1" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" dependencies = [ "pulldown-cmark", ] -[[package]] -name = "quick-xml" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" -dependencies = [ - "memchr", -] - [[package]] name = "quinn" version = "0.11.9" @@ -3265,7 +3411,7 @@ dependencies = [ "rustc-hash 2.1.1", "rustls", "socket2", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -3273,11 +3419,12 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", + "fastbloom", "getrandom 0.3.4", "lru-slab", "rand 0.9.2", @@ -3285,8 +3432,9 @@ dependencies = [ "rustc-hash 2.1.1", "rustls", "rustls-pki-types", + "rustls-platform-verifier", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -3308,9 +3456,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.42" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -3321,6 +3469,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.8.5" @@ -3339,7 +3493,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3359,7 +3513,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3368,14 +3522,14 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -3400,13 +3554,27 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "rcgen" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10b99e0098aa4082912d4c649628623db6aba77335e4f4569ff5083a6448b32e" +dependencies = [ + "pem", + "ring", + "rustls-pki-types", + "time", + "x509-parser", + "yasna", +] + [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags", ] [[package]] @@ -3415,16 +3583,16 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -3434,9 +3602,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -3445,17 +3613,17 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" -version = "0.12.26" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-core", "futures-util", @@ -3490,25 +3658,6 @@ dependencies = [ "webpki-roots", ] -[[package]] -name = "rfc6979" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" -dependencies = [ - "hmac", - "subtle", -] - -[[package]] -name = "rgb" -version = "0.8.52" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6a884d2998352bb4daf0183589aec883f16a6da1f4dde84d8e2e9a5409a1ce" -dependencies = [ - "bytemuck", -] - [[package]] name = "ring" version = "0.17.14" @@ -3517,7 +3666,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -3525,22 +3674,33 @@ dependencies = [ [[package]] name = "riscv" -version = "0.15.0" +version = "0.16.0" + +[[package]] +name = "rlsf" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1646a59a9734b8b7a0ac51689388a60fe1625d4b956348e9de07591a1478457a" dependencies = [ - "elf", + "cfg-if", + "const-default", + "libc", + "rustversion", + "svgbobdoc", ] [[package]] name = "rom-setup" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "blake3", "colored", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "proofman-common", "sm-rom", "tracing", + "zisk-common", "zisk-core", "zisk-pil", ] @@ -3551,7 +3711,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd490c5b18261893f14449cbd28cb9c0b637aebf161cd77900bfdedaff21ec32" dependencies = [ - "bitflags 2.10.0", + "bitflags", "once_cell", "serde", "serde_derive", @@ -3571,9 +3731,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc-hash" @@ -3600,13 +3760,22 @@ dependencies = [ "toolchain_find", ] +[[package]] +name = "rusticata-macros" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" +dependencies = [ + "nom", +] + [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.10.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -3615,10 +3784,11 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ + "aws-lc-rs", "log", "once_cell", "ring", @@ -3628,22 +3798,62 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -3666,9 +3876,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "same-file" @@ -3679,6 +3889,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -3686,17 +3905,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "sec1" -version = "0.7.3" +name = "security-framework" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "base16ct", - "der", - "generic-array", - "pkcs8", - "subtle", - "zeroize", + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", ] [[package]] @@ -3757,21 +3985,21 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "indexmap", "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -3804,32 +4032,6 @@ dependencies = [ "serde", ] -[[package]] -name = "server" -version = "0.15.0" -dependencies = [ - "anyhow", - "asm-runner", - "bytemuck", - "clap", - "colored", - "executor", - "fields", - "libloading", - "mpi", - "named-sem", - "proofman", - "proofman-common", - "serde", - "serde_json", - "tracing", - "uuid", - "witness", - "zisk-common", - "zisk-witness", - "zstd", -] - [[package]] name = "sha2" version = "0.10.9" @@ -3874,10 +4076,11 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -3893,36 +4096,31 @@ dependencies = [ "tokio", ] -[[package]] -name = "signature" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" -dependencies = [ - "digest", - "rand_core 0.6.4", -] - [[package]] name = "simd-adler32" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "sm-arith" -version = "0.15.0" +version = "0.16.0" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "num-bigint", "pil-std-lib", - "proofman", "proofman-common", "proofman-macros", "proofman-util", @@ -3938,12 +4136,11 @@ dependencies = [ [[package]] name = "sm-binary" -version = "0.15.0" +version = "0.16.0" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "num-bigint", "pil-std-lib", - "proofman", "proofman-common", "proofman-macros", "proofman-util", @@ -3958,39 +4155,31 @@ dependencies = [ [[package]] name = "sm-frequent-ops" -version = "0.15.0" +version = "0.16.0" dependencies = [ "clap", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "num-bigint", - "pil-std-lib", - "proofman", "proofman-common", - "proofman-macros", "proofman-util", "rayon", "static_assertions", "tracing", - "zisk-common", "zisk-core", - "zisk-pil", ] [[package]] name = "sm-main" -version = "0.15.0" +version = "0.16.0" dependencies = [ - "asm-runner", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "mem-common", "num-bigint", "pil-std-lib", - "proofman", "proofman-common", "proofman-macros", "proofman-util", "rayon", - "sm-mem", "tracing", "zisk-common", "zisk-core", @@ -4000,15 +4189,13 @@ dependencies = [ [[package]] name = "sm-mem" -version = "0.15.0" +version = "0.16.0" dependencies = [ - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "mem-common", - "mem-planner-cpp", "num-bigint", "num-traits", "pil-std-lib", - "proofman", "proofman-common", "proofman-macros", "proofman-util", @@ -4022,12 +4209,11 @@ dependencies = [ [[package]] name = "sm-rom" -version = "0.15.0" +version = "0.16.0" dependencies = [ "asm-runner", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "itertools 0.14.0", - "proofman", "proofman-common", "proofman-macros", "proofman-util", @@ -4046,31 +4232,12 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", -] - -[[package]] -name = "spin" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" -dependencies = [ - "lock_api", -] - -[[package]] -name = "spki" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" -dependencies = [ - "base64ct", - "der", + "windows-sys 0.61.2", ] [[package]] @@ -4085,12 +4252,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "str_stack" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" - [[package]] name = "strsim" version = "0.11.1" @@ -4104,22 +4265,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] -name = "symbolic-common" -version = "12.17.0" +name = "svgbobdoc" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d8046c5674ab857104bc4559d505f4809b8060d57806e45d49737c97afeb60" +checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50" dependencies = [ - "debugid", - "memmap2", - "stable_deref_trait", - "uuid", -] - -[[package]] + "base64 0.13.1", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-width 0.1.14", +] + +[[package]] +name = "symbolic-common" +version = "12.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "751a2823d606b5d0a7616499e4130a516ebd01a44f39811be2b9600936509c23" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] name = "symbolic-demangle" -version = "12.17.0" +version = "12.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1accb6e5c4b0f682de907623912e616b44be1c9e725775155546669dbff720ec" +checksum = "79b237cfbe320601dd24b4ac817a5b68bb28f5508e33f08d42be0682cadc8ac9" dependencies = [ "cc", "cpp_demangle", @@ -4141,9 +4315,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.111" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -4167,7 +4341,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -4181,58 +4355,46 @@ dependencies = [ "ntapi", "objc2-core-foundation", "objc2-io-kit", - "windows", + "windows 0.61.3", ] [[package]] name = "sysinfo" -version = "0.37.2" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16607d5caffd1c07ce073528f9ed972d88db15dd44023fa57142963be3feb11f" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" dependencies = [ "libc", "memchr", "ntapi", "objc2-core-foundation", "objc2-io-kit", - "windows", -] - -[[package]] -name = "tabled" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2407502760ccfd538f2fb1f843dd87b6daf1a17848d57bc5a25617e408ef4c7a" -dependencies = [ - "papergrid", - "tabled_derive", + "windows 0.62.2", ] [[package]] -name = "tabled_derive" -version = "0.3.0" +name = "talc" +version = "4.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "278ea3921cee8c5a69e0542998a089f7a14fa43c9c4e4f9951295da89bd0c943" +checksum = "a3ae828aa394de34c7de08f522d1b86bd1c182c668d27da69caadda00590f26d" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "lock_api", ] [[package]] name = "target-lexicon" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "tempfile" -version = "3.23.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -4249,11 +4411,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -4264,18 +4426,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -4289,9 +4451,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", @@ -4299,22 +4461,22 @@ dependencies = [ "num-conv", "num_threads", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", @@ -4366,9 +4528,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -4383,13 +4545,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -4404,9 +4566,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -4415,9 +4577,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -4440,9 +4602,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.10+spec-1.1.0" +version = "0.9.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" dependencies = [ "indexmap", "serde_core", @@ -4471,6 +4633,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "toml_datetime" +version = "1.0.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" +dependencies = [ + "serde_core", +] + [[package]] name = "toml_edit" version = "0.22.27" @@ -4487,21 +4658,21 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.10+spec-1.0.0" +version = "0.25.4+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2" dependencies = [ "indexmap", - "toml_datetime 0.7.5+spec-1.1.0", + "toml_datetime 1.0.0+spec-1.1.0", "toml_parser", "winnow", ] [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.9+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" dependencies = [ "winnow", ] @@ -4520,13 +4691,13 @@ checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" [[package]] name = "tonic" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" dependencies = [ "async-trait", "axum", - "base64", + "base64 0.22.1", "bytes", "h2", "http", @@ -4549,21 +4720,21 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c40aaccc9f9eccf2cd82ebc111adc13030d23e887244bc9cfa5d1d636049de3" +checksum = "1882ac3bf5ef12877d7ed57aad87e75154c11931c2ba7e6cde5e22d63522c734" dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "tonic-prost" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" dependencies = [ "bytes", "prost", @@ -4572,16 +4743,16 @@ dependencies = [ [[package]] name = "tonic-prost-build" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4a16cba4043dc3ff43fcb3f96b4c5c154c64cbd18ca8dce2ab2c6a451d058a2" +checksum = "f3144df636917574672e93d0f56d7edec49f90305749c668df5101751bb8f95a" dependencies = [ "prettyplease", "proc-macro2", "prost-build", "prost-types", "quote", - "syn 2.0.111", + "syn 2.0.117", "tempfile", "tonic-build", ] @@ -4601,9 +4772,9 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -4624,7 +4795,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.10.0", + "bitflags", "bytes", "futures-util", "http", @@ -4654,6 +4825,7 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -4666,7 +4838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf" dependencies = [ "crossbeam-channel", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "tracing-subscriber", ] @@ -4679,7 +4851,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -4766,15 +4938,15 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" @@ -4795,51 +4967,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - -[[package]] -name = "unty" -version = "0.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" - -[[package]] -name = "ureq" -version = "3.1.4" +name = "unicode-xid" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39cb1dbab692d82a977c0392ffac19e188bd9186a9f32806f0aaa859d75585a" -dependencies = [ - "base64", - "flate2", - "log", - "percent-encoding", - "rustls", - "rustls-pki-types", - "ureq-proto", - "utf-8", - "webpki-roots", -] +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] -name = "ureq-proto" -version = "0.5.3" +name = "untrusted" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" -dependencies = [ - "base64", - "http", - "httparse", - "log", -] +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -4847,12 +4990,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -4867,11 +5004,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.19.0" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", @@ -4891,28 +5028,48 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vergen" -version = "8.3.2" +version = "9.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +checksum = "b849a1f6d8639e8de261e81ee0fc881e3e3620db1af9f2e0da015d4382ceaf75" dependencies = [ "anyhow", - "cfg-if", + "derive_builder", + "rustversion", + "time", + "vergen-lib", +] + +[[package]] +name = "vergen-git2" +version = "9.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51ab55ddf1188c8d679f349775362b0fa9e90bd7a4ac69838b2a087623f0d57" +dependencies = [ + "anyhow", + "derive_builder", "git2", "rustversion", "time", + "vergen", + "vergen-lib", ] [[package]] -name = "version_check" -version = "0.9.5" +name = "vergen-lib" +version = "9.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +checksum = "b34a29ba7e9c59e62f229ae1932fb1b8fb8a6fdcc99215a641913f5f5a59a569" +dependencies = [ + "anyhow", + "derive_builder", + "rustversion", +] [[package]] -name = "virtue" -version = "0.0.18" +name = "version_check" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" @@ -4941,18 +5098,27 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -4963,11 +5129,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -4976,9 +5143,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4986,26 +5153,48 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -5019,11 +5208,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -5039,11 +5240,20 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -5085,11 +5295,23 @@ version = "0.61.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ - "windows-collections", + "windows-collections 0.2.0", "windows-core 0.61.2", - "windows-future", + "windows-future 0.2.1", "windows-link 0.1.3", - "windows-numerics", + "windows-numerics 0.2.0", +] + +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections 0.3.2", + "windows-core 0.62.2", + "windows-future 0.3.2", + "windows-numerics 0.3.1", ] [[package]] @@ -5101,6 +5323,15 @@ dependencies = [ "windows-core 0.61.2", ] +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core 0.62.2", +] + [[package]] name = "windows-core" version = "0.61.2" @@ -5135,7 +5366,18 @@ checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ "windows-core 0.61.2", "windows-link 0.1.3", - "windows-threading", + "windows-threading 0.1.0", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core 0.62.2", + "windows-link 0.2.1", + "windows-threading 0.2.1", ] [[package]] @@ -5146,7 +5388,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -5157,7 +5399,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -5182,6 +5424,16 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core 0.62.2", + "windows-link 0.2.1", +] + [[package]] name = "windows-result" version = "0.3.4" @@ -5218,6 +5470,15 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -5254,6 +5515,21 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5296,6 +5572,21 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -5308,6 +5599,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -5320,6 +5617,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -5344,6 +5647,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -5356,6 +5665,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5368,6 +5683,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -5380,6 +5701,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -5394,26 +5721,108 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.14" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" dependencies = [ "memchr", ] [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "witness" -version = "0.15.0" -source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.15.0#78497c5a05ea316df2188f98c1df66bffb80192f" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0#d61d40bee66b3c7f02bd4ab8661f9f29ad6730a4" dependencies = [ "colored", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "libloading", "proofman-common", "proofman-util", @@ -5427,6 +5836,24 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "x509-parser" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d43b0f71ce057da06bc0851b23ee24f3f86190b07203dd8f567d0b706a185202" +dependencies = [ + "asn1-rs", + "data-encoding", + "der-parser", + "lazy_static", + "nom", + "oid-registry", + "ring", + "rusticata-macros", + "thiserror 2.0.18", + "time", +] + [[package]] name = "yaml-rust2" version = "0.10.4" @@ -5444,6 +5871,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yasna" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" +dependencies = [ + "time", +] + [[package]] name = "yoke" version = "0.8.1" @@ -5463,28 +5899,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -5504,7 +5940,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", "synstructure", ] @@ -5519,13 +5955,13 @@ dependencies = [ [[package]] name = "zeroize_derive" -version = "1.4.2" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] @@ -5558,60 +5994,68 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.117", ] [[package]] name = "zisk-build" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "cargo_metadata", "clap", - "vergen", + "rom-setup", + "tracing", + "vergen-git2", ] [[package]] name = "zisk-common" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", - "bytemuck", - "fields", + "bincode", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "libc", "mpi", "proofman", "proofman-common", + "proofman-util", + "quinn", + "rcgen", + "rustls", "serde", "serde_json", + "thiserror 2.0.18", + "tokio", "tracing", "tracing-subscriber", - "witness", "zisk-core", - "zisk-pil", - "zstd", ] [[package]] name = "zisk-core" -version = "0.15.0" +version = "0.16.0" dependencies = [ "elf", - "fields", - "indexmap", - "json", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "lib-c", + "lib-float", + "paste", "precompiles-helpers", "rayon", "riscv", "serde", - "serde_json", "sha2", "tiny-keccak", - "zisk-pil", - "ziskos", + "zisk-definitions", + "ziskos-hints", ] +[[package]] +name = "zisk-definitions" +version = "0.16.0" + [[package]] name = "zisk-distributed-common" version = "0.1.0" @@ -5621,18 +6065,20 @@ dependencies = [ "chrono", "proofman", "proofman-common", + "proofman-util", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "tracing-appender", "tracing-subscriber", "uuid", + "zisk-common", ] [[package]] name = "zisk-distributed-coordinator" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "async-stream", @@ -5646,13 +6092,14 @@ dependencies = [ "futures-util", "humantime", "proofman", + "proofman-util", "prost-types", "reqwest", "serde", "serde_json", "signal-hook", "signal-hook-tokio", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tonic", "tracing", @@ -5661,11 +6108,12 @@ dependencies = [ "zisk-common", "zisk-distributed-common", "zisk-distributed-grpc-api", + "zisk-sdk", ] [[package]] name = "zisk-distributed-grpc-api" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "chrono", @@ -5683,28 +6131,29 @@ dependencies = [ [[package]] name = "zisk-distributed-worker" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "asm-runner", "borsh", "cargo-zisk", + "chrono", "clap", "colored", "config", - "fields", - "libloading", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "precompiles-hints", "proofman", "proofman-common", "rom-setup", "serde", "tokio", "tokio-stream", - "toml 0.9.10+spec-1.1.0", + "toml 0.9.12+spec-1.1.0", "tonic", "tracing", "uuid", - "vergen", + "vergen-git2", "witness", "zisk-common", "zisk-distributed-common", @@ -5716,10 +6165,9 @@ dependencies = [ [[package]] name = "zisk-pil" -version = "0.15.0" +version = "0.16.0" dependencies = [ - "fields", - "proofman", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "proofman-common", "proofman-macros", "rayon", @@ -5729,61 +6177,40 @@ dependencies = [ [[package]] name = "zisk-sdk" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "asm-runner", - "bytemuck", + "bincode", "colored", - "fields", - "libloading", + "executor", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "precompiles-hints", "proofman", "proofman-common", "proofman-util", + "proofman-verifier", "rom-setup", + "serde", + "sha2", "tracing", + "zisk-build", "zisk-common", + "zisk-core", "zisk-distributed-common", - "zstd", + "ziskemu", ] [[package]] -name = "zisk-witness" -version = "0.15.0" +name = "zisk-verifier" +version = "0.16.0" dependencies = [ - "data-bus", - "env_logger", - "executor", - "fields", - "mem-common", - "pil-std-lib", - "precomp-arith-eq", - "precomp-arith-eq-384", - "precomp-big-int", - "precomp-keccakf", - "precomp-sha256f", - "proofman", - "proofman-common", - "proofman-macros", - "proofman-util", - "rayon", - "sm-arith", - "sm-binary", - "sm-frequent-ops", - "sm-main", - "sm-mem", - "sm-rom", - "tracing", - "witness", - "zisk-common", - "zisk-core", - "zisk-pil", - "ziskemu", + "proofman-verifier", ] [[package]] name = "ziskclib" -version = "0.15.0" +version = "0.16.0" dependencies = [ "sha2", "tiny-keccak", @@ -5791,27 +6218,26 @@ dependencies = [ [[package]] name = "ziskemu" -version = "0.15.0" +version = "0.16.0" dependencies = [ "clap", - "criterion", + "criterion 0.5.1", "data-bus", - "fields", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", "mem-common", "memmap2", "num-format", "object", - "pprof", "proofman-common", "rayon", + "regex", "riscv", "sm-arith", "sm-binary", - "sm-mem", "symbolic-common", "symbolic-demangle", - "sysinfo 0.37.2", - "vergen", + "sysinfo 0.38.4", + "vergen-git2", "zisk-common", "zisk-core", "zisk-pil", @@ -5819,46 +6245,62 @@ dependencies = [ [[package]] name = "ziskos" -version = "0.15.0" +version = "0.16.0" dependencies = [ + "anyhow", "bincode", + "bytes", "cfg-if", - "getrandom 0.2.16", + "critical-section", + "ctor", + "dlmalloc", + "embedded-alloc", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0)", + "getrandom 0.2.17", "lazy_static", "lib-c", "num-bigint", "num-integer", "num-traits", + "once_cell", + "paste", + "precompiles-helpers", "rand 0.8.5", "serde", - "static_assertions", + "sha2", + "talc", "tiny-keccak", + "tokio", + "zisk-common", + "zisk-definitions", + "zisk-verifier", ] [[package]] -name = "zstd" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +name = "ziskos-hints" +version = "0.16.0" dependencies = [ - "zstd-sys", + "anyhow", + "bincode", + "cfg-if", + "fields 0.16.0 (git+https://github.com/0xPolygonHermez/pil2-proofman.git?tag=v0.16.0)", + "getrandom 0.2.17", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "paste", + "precompiles-helpers", + "rand 0.8.5", + "serde", + "sha2", + "tiny-keccak", + "zisk-verifier", ] [[package]] -name = "zstd-sys" -version = "2.0.16+zstd.1.5.7" +name = "zmij" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" -dependencies = [ - "cc", - "pkg-config", -] +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 2693fa5f4..74300c57a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace.package] -version = "0.15.0" +version = "0.16.0" edition = "2021" license = "Apache-2.0 or MIT" keywords = ["zisk", "zkvm", "zero-knowledge"] @@ -7,8 +7,8 @@ repository = "https://github.com/0xPolygonHermez/zisk" categories = ["cryptography"] [workspace.metadata] -gha_pil2_proofman_js_branch = "tags/v0.15.0" -gha_pil2_compiler_branch = "tags/v0.8.0" +gha_pil2_proofman_js_branch = "tags/v0.16.0" +gha_pil2_compiler_branch = "tags/v0.9.0" [workspace] members = [ @@ -26,27 +26,30 @@ members = [ "state-machines/mem", "state-machines/mem-cpp", "state-machines/rom", - "witness-computation", "ziskos/entrypoint", + "ziskos-hints", "precompiles/arith_eq", "precompiles/arith_eq_384", "precompiles/common", + "precompiles/hints", "precompiles/keccakf", "precompiles/sha256f", + "precompiles/blake2", "precompiles/big_int", + "precompiles/dma", "lib-c", "lib-float", "emulator-asm/asm-runner", "ziskclib", "common", "tools/circuit", - "server", "distributed/crates/coordinator", "distributed/crates/grpc-api", "distributed/crates/common", "distributed/crates/worker", "sdk", "ziskbuild", + "definitions", ] resolver = "2" @@ -70,6 +73,7 @@ ziskemu = { path = "emulator" } asm-runner = { path = "emulator-asm/asm-runner" } executor = { path = "executor" } lib-c = { path = "lib-c" } +lib-float = { path = "lib-float" } zisk-pil = { path = "pil" } precomp-arith-eq = { path = "precompiles/arith_eq" } precomp-arith-eq-384 = { path = "precompiles/arith_eq_384" } @@ -77,10 +81,12 @@ precompiles-common = { path = "precompiles/common" } precompiles-helpers = { path = "precompiles/helpers" } precomp-keccakf = { path = "precompiles/keccakf" } precomp-sha256f = { path = "precompiles/sha256f" } +precomp-blake2 = { path = "precompiles/blake2" } +precomp-poseidon2 = { path = "precompiles/poseidon2" } precomp-big-int = { path = "precompiles/big_int" } +precomp-dma = { path = "precompiles/dma" } riscv = { path = "riscv" } rom-setup = { path = "rom-setup" } -server = { path = "server" } sm-arith = { path = "state-machines/arith" } sm-binary = { path = "state-machines/binary" } sm-main = { path = "state-machines/main" } @@ -89,12 +95,15 @@ sm-frequent-ops = { path = "state-machines/frequent-ops" } mem-common = { path = "state-machines/mem-common" } mem-planner-cpp = { path = "state-machines/mem-cpp" } sm-rom = { path = "state-machines/rom" } -zisk-witness = { path = "witness-computation" } ziskclib = { path = "ziskclib" } ziskos = { path = "ziskos/entrypoint" } +ziskos-hints = { path = "ziskos-hints" } circuit = { path = "tools/circuit" } zisk-sdk = { path = "sdk" } zisk-build = { path = "ziskbuild" } +precompiles-hints = { path = "precompiles/hints" } +zisk-verifier = { path = "verifier" } +zisk-definitions = { path = "definitions" } # Distributed crates zisk-distributed-common = { path = "distributed/crates/common" } @@ -103,14 +112,14 @@ zisk-distributed-grpc-api = { path = "distributed/crates/grpc-api" } zisk-distributed-prover = { path = "distributed/crates/worker" } # Proofman -proofman = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } -proofman-common = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } -proofman-macros = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } -proofman-verifier = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } -proofman-util = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } -pil-std-lib = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } -witness = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } -fields = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.15.0" } +proofman = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } +proofman-common = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } +proofman-macros = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } +proofman-verifier = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } +proofman-util = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } +pil-std-lib = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } +witness = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } +fields = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", tag = "v0.16.0" } # Proofman Local development # proofman = { path = "../pil2-proofman/proofman" } # proofman-common = { path = "../pil2-proofman/common" } @@ -131,13 +140,13 @@ colored = "3" ark-ff = "0.5.0" ark-std = "0.5.0" ark-secp256k1 = "0.5" +ark-secp256r1 = "0.5" ark-bn254 = "0.5.0" ark-bls12-381 = "0.5.0" -sysinfo = "0.37" +sysinfo = "0.38" serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0", features = ["preserve_order"] } anyhow = "1.0" -libloading = "0.8" named-sem = "0.2.2" tracing = "0.1" tracing-subscriber = { version = "0.3", features = [ @@ -152,7 +161,7 @@ static_assertions = "1" clap = { version = "4", features = ["derive", "env"] } futures = { version = "0.3" } thiserror = { version = "2" } -bytes = "1.0" +bincode = "1.3.3" tokio = { version = "1", features = ["full"] } tokio-stream = "0.1" @@ -160,6 +169,7 @@ futures-util = "0.3" uuid = { version = "1.0", features = ["serde", "v4"] } chrono = { version = "0.4", features = ["serde"] } sha2 = { version = "0.10.9", features = ["compress"] } +paste = "1.0" # gRPC dependencies tonic = "0.14" @@ -170,12 +180,13 @@ tonic-prost-build = "0.14" prost = "0.14" prost-types = "0.14" config = "0.15" -toml = "0.9" +toml = "0.9.8" borsh = { version = "1.5", features = ["derive"] } reqwest = { version = "0.12", features = [ "stream", "json", "rustls-tls", ], default-features = false } -bytemuck = "1.23" -zstd = "0.13" +vergen-git2 = { version = "9", default-features = false, features = [ + "build", +] } diff --git a/audits/Zisk_Binary_and_Main_Review.pdf b/audits/Zisk_Binary_and_Main_Review.pdf new file mode 100644 index 000000000..f9296b5a6 Binary files /dev/null and b/audits/Zisk_Binary_and_Main_Review.pdf differ diff --git a/book/SUMMARY.md b/book/SUMMARY.md index 1badc60bf..3955c382c 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -8,6 +8,8 @@ - [Quickstart](./getting_started/quickstart.md) - [Writing Programs](./getting_started/writing_programs.md) - [Precompiles](./getting_started/precompiles.md) +- [Distributed Execution](./getting_started/distributed_execution.md) +- [Hints Stream](./getting_started/hints_stream.md) # Developer Guide - [Ziskof](./developer/ziskof.md) diff --git a/book/getting_started/distributed_execution.md b/book/getting_started/distributed_execution.md new file mode 100644 index 000000000..0c0a6cf99 --- /dev/null +++ b/book/getting_started/distributed_execution.md @@ -0,0 +1,562 @@ +# Distributed Proving + +Generating a ZisK proof can be computationally intensive, especially for large programs. The distributed proving system lets you split the workload across multiple machines, reducing proof generation time by parallelizing the work. + +This chapter covers how to set up and run a distributed proving cluster, from launching a coordinator to connecting workers and submitting proof requests. + +## How It Works + +A distributed proving cluster consists of two roles: + +- A **Coordinator** that receives proof requests and orchestrates the work. +- One or more **Workers** that execute the actual proof computation. + +When you submit a proof request, the process unfolds in three phases: + +1. **Partial Contributions** — The coordinator assigns segments of the work to available workers based on their compute capacity. Each worker computes its partial challenges independently. +2. **Prove** — Workers compute the global challenge and generate their respective partial proofs. +3. **Aggregation** — The first worker to finish is selected as the aggregator. It collects all partial proofs and produces the final proof. + +The coordinator returns the final proof to the client once aggregation completes. + +Workers report their compute capacity when they register. The coordinator selects workers sequentially from the available pool until the requested capacity is met. While assigned to a job, a worker is marked as busy and won't receive new tasks. + +## Getting Started + +### Building + +From the project root, build both binaries: + +```bash +cargo build --release --bin zisk-coordinator --bin zisk-worker +``` + +### Running Locally + +**1. Start the coordinator:** + +```bash +cargo run --release --bin zisk-coordinator +``` + +**2. Start a worker** (in a separate terminal): + +```bash +cargo run --release --bin zisk-worker -- --elf --inputs-folder +``` + +**3. Submit a proof request** (in a separate terminal): + +```bash +cargo run --release --bin zisk-coordinator prove --inputs-uri --compute-capacity 10 +``` + +The `--compute-capacity` flag specifies how many compute units the proof requires. The coordinator assigns workers until this capacity is covered. + +### Docker Deployment + +For multi-machine setups, Docker simplifies deployment: + +```bash +# Build the image (CPU-only) +docker build -t zisk-distributed:latest -f distributed/Dockerfile . + +# For GPU support +docker build --build-arg GPU=true -t zisk-distributed:gpu -f distributed/Dockerfile . + +# Create a network for container DNS resolution +docker network create zisk-net || true +``` + +**Start the coordinator:** + +```bash +LOGS_DIR="" +docker run -d --rm --name zisk-coordinator \ + --network zisk-net \ + -v "$LOGS_DIR:/var/log/distributed" \ + -e RUST_LOG=info \ + zisk-distributed:latest \ + zisk-coordinator --config /app/config/coordinator/dev.toml +``` + +**Start a worker:** + +```bash +LOGS_DIR="" +PROVING_KEY_DIR="" +ELF_DIR="" +INPUTS_DIR="" +docker run -d --rm --name zisk-worker-1 \ + --network zisk-net --shm-size=20g \ + -v "$LOGS_DIR:/var/log/distributed" \ + -v "$HOME/.zisk/cache:/app/.zisk/cache:ro" \ + -v "$PROVING_KEY_DIR:/app/proving-keys:ro" \ + -v "$ELF_DIR:/app/elf:ro" \ + -v "$INPUTS_DIR:/app/inputs:ro" \ + -e RUST_LOG=info \ + zisk-distributed:latest zisk-worker --coordinator-url http://zisk-coordinator:50051 \ + --elf /app/elf/zec.elf --proving-key /app/proving-keys --inputs-folder /app/inputs +``` + +**Submit a proof:** + +```bash +docker exec -it zisk-coordinator \ + zisk-coordinator prove --inputs-uri --compute-capacity 10 +``` + +> **Note:** Use the filename only when submitting proofs, not the full path. Workers resolve files relative to their `--inputs-folder`. + +**Container paths reference:** + +| Path | Purpose | +|------|---------| +| `/app/config/{coordinator,worker}/` | Configuration files | +| `/app/bin/` | Binaries | +| `/app/.zisk/cache/` | Cache (mount from host `$HOME/.zisk/cache`) | +| `/var/log/distributed/` | Log files | + +## Coordinator + +The coordinator is responsible for managing the distributed proof generation process. It receives proof requests from clients and assigns work to available workers. + +To start a coordinator instance with default settings: + +```bash +cargo run --release --bin zisk-coordinator +``` + +### Coordinator Configuration + +The coordinator can be configured using either a **TOML configuration file** or **command-line arguments**. +If no configuration file is explicitly provided, the system falls back to the `ZISK_COORDINATOR_CONFIG_PATH` environment variable to locate one. If neither the CLI argument nor environment variable is set, built-in defaults are used. + +**Example:** + +```bash +# You can specify the configuration file path using a command line argument: +cargo run --release --bin zisk-coordinator -- --config /path/to/my-config.toml + +# You can specify the configuration file path using an environment variable: +export ZISK_COORDINATOR_CONFIG_PATH="/path/to/my-config.toml" +cargo run --release --bin zisk-coordinator +``` + +The table below lists the available configuration options for the Coordinator: + +| TOML Key | CLI Argument | Environment Variable| Type | Default | Description | +|-----------------------|--------------|---------------------|------|---------|-------------| +| `service.name` | - | - | String | ZisK Distributed Coordinator | Service name | +| `service.environment` | - | - | String | development | Service environment (development, staging, production) | +| `server.host` | - | - | String | 0.0.0.0 | Server host | +| `server.port` | `--port` | - | Number | 50051 | Server port | +| `server.proofs_dir` | `--proofs-dir` | - | String | proofs | Directory to save generated proofs (conflicts with `--no-save-proofs`) | +| - | `--no-save-proofs` | - | Boolean | false | Disable saving proofs (conflicts with `--proofs-dir`) | +| - | `-c`, `--compressed-proofs` | - | Boolean | false | Generate compressed proofs | +| `server.shutdown_timeout_seconds` | - | - | Number | 30 | Graceful shutdown timeout in seconds | +| `logging.level` | - | RUST_LOG | String | debug | Logging level (error, warn, info, debug, trace) | +| `logging.format` | - | - | String | pretty | Logging format (pretty, json, compact) | +| `logging.file_path` | - | - | String | - | *Optional*. Log file path (enables file logging) | +| `coordinator.max_workers_per_job` | - | - | Number | 10 | Maximum workers per proof job | +| `coordinator.max_total_workers` | - | - | Number | 1000 | Maximum total registered workers | +| `coordinator.phase1_timeout_seconds` | - | - | Number | 300 | Phase 1 timeout in seconds | +| `coordinator.phase2_timeout_seconds` | - | - | Number | 600 | Phase 2 timeout in seconds | +| `coordinator.webhook_url` | `--webhook-url` | - | String | - | *Optional*. Webhook URL to notify on job completion | + + +#### Configuration Files examples + +Example development configuration file: + +```toml +[service] +name = "ZisK Distributed Coordinator" +environment = "development" + +[logging] +level = "debug" +format = "pretty" +``` + +Example production configuration file: + +```toml +[service] +name = "ZisK Distributed Coordinator" +environment = "production" + +[server] +host = "0.0.0.0" +port = 50051 +proofs_dir = "proofs" + +[logging] +level = "info" +format = "json" +file_path = "/var/log/distributed/coordinator.log" + +[coordinator] +max_workers_per_job = 20 # Maximum workers per proof job +max_total_workers = 5000 # Maximum total registered workers +phase1_timeout_seconds = 600 # 10 minutes for phase 1 +phase2_timeout_seconds = 1200 # 20 minutes for phase 2 +webhook_url = "http://webhook.example.com/notify?job_id={$job_id}" +``` + +### Webhook URL + +The Coordinator can notify an external service when a job finishes by sending a request to a configured webhook URL. +The placeholder {$job_id} can be included in the URL and will be replaced with the finished job’s ID. +If no placeholder is provided, the Coordinator automatically appends /{job_id} to the end of the URL. + +All webhook notifications are sent as JSON POST requests with the following structure: + +```json +{ + "job_id": "job_12345", + "success": true, + "duration_ms": 45000, + "proof": , + "timestamp": "2025-10-03T14:30:00Z", + "error": null +} +``` + +##### Fields Description + +| Field | Type | Description | +|-------|------|-------------| +| `job_id` | `string` | Unique identifier for the proof generation job | +| `success` | `boolean` | `true` if proof generation completed successfully, `false` if it failed | +| `duration_ms` | `number` | Total execution time in milliseconds from job start to completion | +| `proof` | `array` \| `null` | Final proof data as array of integers (only present on success) | +| `timestamp` | `string` | ISO 8601 timestamp when the notification was sent | +| `error` | `object` \| `null` | Error details (only present on failure) | + +##### Error Object Structure + +When `success` is `false`, the `error` field contains: + +```json +{ + "code": "WORKER_FAILURE", + "message": "Worker node-003 failed during proof generation: Out of memory" +} +``` + +**Successful Proof Generation Example:** + +```json +{ + "job_id": "job_abc123", + "success": true, + "duration_ms": 32500, + "proof": [1234567890, 9876543210, 1357924680, ...], + "timestamp": "2025-10-03T14:30:25Z", + "error": null +} +``` + +**Failed Job Example:** + +```json +{ + "job_id": "job_def456", + "success": false, + "duration_ms": 15000, + "proof": null, + "timestamp": "2025-10-03T14:31:10Z", + "error": { + "code": "WORKER_ERROR", + "message": "Memory exhaustion during proof generation" + } +} +``` + +#### Webhook Implementation Guidelines + +*HTTP Requirements:* + +- **Method**: POST +- **Content-Type**: `application/json` +- **Timeout**: 10 seconds (configurable) +- **Retry**: Currently no automatic retries (implement idempotency) + +*Recommended Response:* + +Your webhook endpoint should respond with: + +- **Success**: HTTP 200-299 status code +- **Body**: Any valid response (ignored by coordinator) + +```http +HTTP/1.1 200 OK +Content-Type: application/json + +{"received": true, "job_id": "job_abc123"} +``` + +If your webhook endpoint is unavailable or returns an error: + +- The coordinator logs the failure but continues operation +- No automatic retries are performed +- Consider implementing your own retry mechanism or message queue + +### Command Line Arguments + +```bash +# Show help +cargo run --release --bin zisk-coordinator -- --help + +# Run coordinator with custom port +cargo run --release --bin zisk-coordinator -- --port 50051 + +# Run with specific configuration +cargo run --release --bin zisk-coordinator -- --config production.toml + +# Run with webhook URL +cargo run --release --bin zisk-coordinator -- --webhook-url http://webhook.example.com/notify --port 50051 +``` + +## Worker + +The worker is responsible for executing proof generation tasks assigned by the coordinator. It registers with the coordinator, reports its compute capacity, and waits for tasks to be assigned. + +To start a worker instance with default settings: + +```bash +cargo run --release --bin zisk-worker -- --elf --inputs-folder +``` + +### Worker Configuration + +The worker can be configured using either a **TOML configuration file** or **command-line arguments**. +If no configuration file is explicitly provided, the system falls back to the `ZISK_WORKER_CONFIG_PATH` environment variable to locate one. If neither the CLI argument nor environment variable is set, built-in defaults are used. + +**Example:** + +```bash +# You can specify the configuration file path using a command line argument: +cargo run --release --bin zisk-worker -- --config /path/to/my-config.toml + +# You can specify the configuration file path using an environment variable: +export ZISK_WORKER_CONFIG_PATH="/path/to/my-config.toml" +cargo run --release --bin zisk-worker +``` + +### Input Files Handling + +Workers need to know where to find input files for proof generation. The `--inputs-folder` parameter specifies the base directory where input files are stored: + +- **Default**: Current working directory (`.`) if not specified +- **Usage**: When the coordinator sends a prove command with an input filename, the worker combines `--inputs-folder` + `filename` to locate the file +- **Benefits**: Allows input files to be organized in a dedicated directory, separate from the worker executable + +**Example:** +```bash +# Worker with inputs in specific folder +cargo run --release --bin zisk-worker -- --elf program.elf --inputs-folder /data/inputs/ + +# Coordinator requests proof for "input.bin" -> Worker looks for "/data/inputs/input.bin" +cargo run --release --bin zisk-coordinator -- prove --inputs-uri input.bin --compute-capacity 10 +``` + +The table below lists the available configuration options for the Worker: + +| TOML Key | CLI Argument | Environment Variable| Type | Default | Description | +|-----------------------|--------------|---------------------|------|---------|-------------| +| `worker.worker_id` | `--worker-id` | - | String | Auto-generated UUID | Unique worker identifier | +| `worker.compute_capacity.compute_units` | `--compute-capacity` | - | Number | 10 | Worker compute capacity (in compute units) | +| `worker.environment` | - | - | String | development | Service environment (development, staging, production) | +| `worker.inputs_folder` | `--inputs-folder` | - | String | . | Path to folder containing input files | +| `coordinator.url` | `--coordinator-url` | - | String | http://127.0.0.1:50051 | Coordinator server URL | +| `connection.reconnect_interval_seconds` | - | - | Number | 5 | Reconnection interval in seconds | +| `connection.heartbeat_timeout_seconds` | - | - | Number | 30 | Heartbeat timeout in seconds | +| `logging.level` | - | RUST_LOG | String | debug | Logging level (error, warn, info, debug, trace) | +| `logging.format` | - | - | String | pretty | Logging format (pretty, json, compact) | +| `logging.file_path` | - | - | String | - | *Optional*. Log file path (enables file logging) | +| - | `--proving-key` | - | String | ~/.zisk/provingKey | Path to setup folder | +| - | `--elf` | - | String | - | Path to ELF file | +| - | `--asm` | - | String | ~/.zisk/cache | Path to ASM file (mutually exclusive with `--emulator`) | +| - | `--emulator` | - | Boolean | false | Use prebuilt emulator (mutually exclusive with `--asm`) | +| - | `--asm-port` | - | Number | 23115 | Base port for Assembly microservices | +| - | `--shared-tables` | - | Boolean | false | Whether to share tables when worker is running in a cluster | +| - | `-v`, `-vv`, `-vvv`, ... | - | Number | 0 | Verbosity level (0=error, 1=warn, 2=info, 3=debug, 4=trace) | +| - | `-d`, `--debug` | - | String | - | Enable debug mode with optional component filter | +| - | `--verify-constraints` | - | Boolean | false | Whether to verify constraints | +| - | `--unlock-mapped-memory` | - | Boolean | false | Unlock memory map for the ROM file (mutually exclusive with `--emulator`) | +| - | `--hints` | - | Boolean | false | Enable precompile hints processing | +| - | `-m`, `--minimal-memory` | - | Boolean | false | Use minimal memory mode | +| - | `-r`, `--rma` | - | Boolean | false | Enable RMA mode | +| - | `-z`, `--preallocate` | - | Boolean | false | GPU preallocation flag | +| - | `-t`, `--max-streams` | - | Number | - | Maximum number of GPU streams | +| - | `-n`, `--number-threads-witness` | - | Number | - | Number of threads for witness computation | +| - | `-x`, `--max-witness-stored` | - | Number | - | Maximum number of witnesses to store in memory | + +#### Configuration Files examples + +Example development configuration file: + +```toml +[worker] +compute_capacity.compute_units = 10 +environment = "development" + +[logging] +level = "debug" +format = "pretty" +``` + +Example production configuration file: + +```toml +[worker] +worker_id = "my-worker-001" +compute_capacity.compute_units = 10 +environment = "production" +inputs_folder = "/app/inputs" + +[coordinator] +url = "http://127.0.0.1:50051" + +[connection] +reconnect_interval_seconds = 5 +heartbeat_timeout_seconds = 30 + +[logging] +level = "info" +format = "pretty" +file_path = "/var/log/distributed/worker-001.log" +``` + +## Launching a Proof + +To launch a proof generation request, use the `prove` subcommand of the `zisk-coordinator` binary. This sends an RPC request to a running coordinator instance. + +```bash +cargo run --release --bin zisk-coordinator -- prove --inputs-uri --compute-capacity 10 +``` + +The `--compute-capacity` flag indicates the total compute units required to generate a proof. The coordinator will assign one or more workers to meet this capacity, distributing the workload if multiple workers are needed. Requests exceeding the combined capacity of available workers will not be processed and an error will be returned. + +### Prove Subcommand Arguments + +| CLI Argument | Short | Type | Default | Description | +|---|---|---|---|---| +| `--inputs-uri` | - | String | - | Path to the input file for proof generation | +| `--compute-capacity` | `-c` | Number | *required* | Total compute units required for the proof | +| `--coordinator-url` | - | String | http://127.0.0.1:50051 | URL of the coordinator to send the request to | +| `--data-id` | - | String | Auto (from filename or UUID) | Custom identifier for the proof job | +| `--hints-uri` | - | String | - | Path/URI to the precompile hints source | +| `--stream-hints` | - | Boolean | false | Stream hints from the coordinator to workers via gRPC (see [Hints Stream](hints_stream.md)) | +| `--direct-inputs` | `-x` | Boolean | false | Send input data inline via gRPC instead of as a file path | +| `--minimal-compute-capacity` | `-m` | Number | Same as `--compute-capacity` | Minimum acceptable compute capacity (allows partial worker allocation) | +| `--simulated-node` | - | Number | - | Simulated node ID (for testing) | + +### Input and Hints Modes + +The `prove` subcommand supports two modes for delivering inputs and hints to workers: + +**Input modes** (controlled by `--inputs-uri` and `--direct-inputs`): +- **Path mode** (default): The coordinator sends the input file path to workers. Workers must have access to the file at the specified path. +- **Data mode** (`--direct-inputs`): The coordinator reads the input file and sends its contents inline via gRPC. Workers do not need local access to the file. + +**Hints modes** (controlled by `--hints-uri` and `--stream-hints`): +- **Path mode** (default): The coordinator sends the hints URI to workers. Each worker loads hints from the specified path independently. +- **Streaming mode** (`--stream-hints`): The coordinator reads hints from the URI and broadcasts them to all workers in real-time via gRPC. See the [Hints Stream documentation](hints_stream.md) for details. + +**Examples:** +```bash +# Basic proof with file path inputs +zisk-coordinator prove --inputs-uri /data/inputs/my_input.bin --compute-capacity 10 + +# Send input data directly (workers don't need local file access) +zisk-coordinator prove --inputs-uri /data/inputs/my_input.bin -x --compute-capacity 10 + +# With precompile hints in path mode (workers load hints locally) +zisk-coordinator prove --inputs-uri input.bin --hints-uri /data/hints/hints.bin --compute-capacity 10 + +# With precompile hints in streaming mode (coordinator broadcasts to workers) +zisk-coordinator prove --inputs-uri input.bin --hints-uri unix:///tmp/hints.sock --stream-hints --compute-capacity 10 +``` + +## Administrative Operations + +### Health Checks and Monitoring + +The coordinator exposes administrative endpoints for monitoring: + +```bash +# Basic health check +grpcurl -plaintext 127.0.0.1:50051 zisk.distributed.api.v1.ZiskDistributedApi/HealthCheck + +# System status +grpcurl -plaintext 127.0.0.1:50051 zisk.distributed.api.v1.ZiskDistributedApi/SystemStatus + +# List active jobs +grpcurl -plaintext -d '{"active_only": true}' \ + 127.0.0.1:50051 zisk.distributed.api.v1.ZiskDistributedApi/JobsList + +# List connected workers +grpcurl -plaintext -d '{"available_only": true}' \ + 127.0.0.1:50051 zisk.distributed.api.v1.ZiskDistributedApi/WorkersList +``` + +## Troubleshooting + +### Common Issues + +**Worker can't connect to coordinator:** +- Verify coordinator is running and accessible on the specified port +- Check firewall settings if coordinator and worker are on different machines +- Ensure correct URL format: `http://host:port` (not `https://` for default setup) + +**Configuration not loading:** +- Verify TOML syntax with a TOML validator +- Check file permissions on configuration files +- Use CLI overrides to test specific values + +**Worker not receiving tasks:** +- Check worker registration in coordinator logs +- Verify compute capacity is appropriate for available tasks +- Ensure worker ID is unique if running multiple workers +- Confirm coordinator has active jobs to distribute + +**Input file not found errors:** +- Verify the input file exists in the worker's `--inputs-folder` directory +- Check file permissions - worker needs read access to input files +- Ensure you're using the filename only (not full path) when launching proofs +- Confirm `--inputs-folder` path is correct and accessible + +**Port conflicts:** +- Use `--port` flag or update configuration file to change ports +- Check for other services using the same ports + +### Debug Mode + +Enable detailed logging for troubleshooting by modifying configuration files or using CLI arguments: + +```bash +# Coordinator with debug logging (via config file) +cargo run --release --bin zisk-coordinator -- --config debug-coordinator.toml + +# Worker with debug logging (via config file) +cargo run --release --bin zisk-worker -- --config debug-worker.toml +``` + +Where `debug-coordinator.toml` or `debug-worker.toml` contains: +```toml +[logging] +level = "debug" +format = "pretty" +``` + +### Log Files + +When file logging is enabled, logs are written into specified paths in the configuration files. Ensure the application has write permissions to these paths. + +```toml +[logging] +file_path = "/var/log/distributed/coordinator.log" +``` diff --git a/book/getting_started/hints_stream.md b/book/getting_started/hints_stream.md new file mode 100644 index 000000000..1fb720167 --- /dev/null +++ b/book/getting_started/hints_stream.md @@ -0,0 +1,430 @@ +# Hints Stream + +The hints stream accelerates proof generation by offloading expensive operations outside the zkVM execution, then feeding the results back as verifiable data through a high-performance, parallel pipeline. Hints are preprocessed results that allow operations to be handled externally while remaining fully verifiable inside the VM. The system supports two categories of hints: + +1. **Precompile hints**: Cryptographic operations (SHA-256, Keccak-256, elliptic curve operations, pairings, etc.) that are computationally expensive inside a zkVM. +2. **Input hints**: Data that needs to be passed to the zkVM as input during execution. + +The system is designed around three core principles: + +1. **Pre-computing results outside the VM**: The guest program emits hint requests describing the operation and its inputs. +2. **Streaming results back**: A dedicated pipeline processes these requests in parallel, maintaining order, and feeds results to the prover via shared memory. +3. **Verifying inside the VM**: The zkVM circuits verify that the precomputed results are correct, avoiding the cost of computing them inside the zkVM. + +```mermaid +flowchart LR + A["Guest program
Emits hints request"] --> B["ZiskStream"] + B --> C["HintsProcessor
Parallel engine"] + C --> D["StreamSink
ASM emulator/file output"] +``` + +--- + +## Table of Contents + +1. [Hint Format and Protocol](#1-hint-format-and-protocol) +2. [Hints in CLI Execution](#2-hints-in-cli-execution) +3. [Hints in Distributed Execution](#3-hints-in-distributed-execution) +4. [Custom Hint Handlers](#4-custom-hint-handlers) +5. [Generating Hints in Guest Programs](#5-generating-hints-in-guest-programs) + +--- + +## 1. Hint Format and Protocol + +### 1.1. Hint Request Format + +Hints are transmitted as a stream of `u64` values. Each hint request consists of a **header** (1 `u64`) followed by **data** (N `u64` values). + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Header (u64) │ +├·····························································┤ +│ Hint Code (32 bits) Length (32 bits). │ +├─────────────────────────────────────────────────────────────┤ +│ Data[0] (u64) │ +├─────────────────────────────────────────────────────────────┤ +│ Data[1] (u64) │ +├─────────────────────────────────────────────────────────────┤ +│ ... │ +├─────────────────────────────────────────────────────────────┤ +│ Data[N-1] (u64) │ +└─────────────────────────────────────────────────────────────┘ +where N = ceil(Length / 8) +``` +- **Hint Code** (upper 32 bits): Control code or Data Hint Type +- **Length** (lower 32 bits): Payload data size in **bytes**. The last `u64` may contain padding bytes. + +### 1.2. Control Hint Types: + +The following control codes are defined: +- `0x00` (START): Start a new hint stream. Resets processor state and sequence counters. Must be the first hint in the first batch. +- `0x01` (END): End the current hint stream. The processor will wait for all pending hints to be processed before returning. Must be the last hint in its batch; only a `CTRL_START` may follow in a subsequent batch. +- `0x02` (CANCEL): **[Reserved for future use]** Cancel current stream and stop processing further hints. +- `0x03` (ERROR): **[Reserved for future use]** Indicate an error has occurred; stop processing further hints. + +Control codes are for control only and do not have any associated data (Length should be zero). + +### 1.3. Data Hint Types + +For data hints, the hint code (32 bits) is structured as follows: +- **Bit 31 (MSB)**: Pass-through flag. When set, the data bypasses computation and is forwarded directly to the sink. +- **Bits 0-30**: The hint type identifier (control, built-in, or custom code). + (e.g., `HINT_SHA256`, `HINT_BN254_G1_ADD`, `HINT_SECP256K1_RECOVER`, etc.) + +**Example**: A SHA-256 hint (`0x0100`) with a 32-byte input: +``` +Header: 0x00000100_00000020 +Data[0]: first_8_input_bytes_as_u64 +Data[1]: next_8_input_bytes_as_u64 +Data[2]: next_8_input_bytes_as_u64 +Data[3]: last_8_input_bytes_as_u64 +``` + +The same hint with the **pass-through flag** set (bit 31), forwarding pre-computed data directly to the sink without invoking the SHA-256 handler: +``` +Header: 0x80000100_00000020 +``` + +#### 1.3.1 Stream Batching + +The hints protocol supports chunking for individual hints that exceed the transport’s message size limit (currently 128 KB). Each message in the stream contains either a single complete hint or one chunk of a larger hint — hints are never combined in the same message. + +When a hint exceeds the size limit, it must be split into multiple sequential chunks, each sent as a separate message. Each chunk includes a header specifying the total length of the complete hint, allowing the receiver to reassemble all chunks before processing. For example, a hint with a 300 KB payload would be split into three messages: +```Message 1: Header (code + total length), Data[0..N] (first 128 KB chunk) +Message 2: Header (code + total length), Data[0..N] (second 128 KB chunk) +Message 3: Header (code + total length), Data[0..M] (final 44 KB chunk) +``` +The receiver buffers incoming chunks and reassembles them based on the total length specified in the header before invoking the hint handler. This allows the system to handle arbitrarily large hints while respecting transport limitations. + +#### 1.3.2 Pass-Through Hints + +When bit 31 of the hint code is set (e.g., `0x8000_0000 | actual_code`), the hint is marked as **pass-through**: + +- The data payload is forwarded directly to the sink without invoking any handler. +- No worker thread is spawned; the data is queued immediately in the reorder buffer. +- This is useful for pre-computed results that don't need processing. + +### 1.4. Hint Code Types + +| Category | Code Range | Description | +|--------------|---------------------|-------------------------------------| +| **Control** | `0x0000`-`0x000F` | Stream lifecycle management | +| **Built-in** | `0x0100`-`0x0800` | Cryptographic precompile operations | +| **Input** | `0xF0000` | Input data hints | +| **Custom** | User-defined | Application-specific handlers | + +> **Note:** Custom hint codes can technically use any value not occupied by control or built-in codes. By convention, codes `0xA000`-`0xFFFF` are recommended for custom use to avoid future conflicts as new built-in types are added. The processor does not enforce a range restriction — any unrecognized code is treated as custom. + +#### 1.4.1. Control Codes + +Control codes manage the stream lifecycle and do not carry computational data: + +| Code | Name | Description | +|------|------|-------------| +| `0x0000` | `CTRL_START` | Resets processor state. Must be the first hint in the first batch. | +| `0x0001` | `CTRL_END` | Signals end of stream. Blocks until all pending hints complete. Must be the last hint. | +| `0x0002` | `CTRL_CANCEL` | **[Reserved for future use]** Cancels the current stream. Sets error flag and stops processing. | +| `0x0003` | `CTRL_ERROR` | **[Reserved for future use]** External error signal. Sets error flag and stops processing. | + +#### 1.4.2. Built-in Hint Types + +| Code | Name | Description | +|------|------|-------------| +| `0x0100` | `Sha256` | SHA-256 hash computation | +| `0x0200` | `Bn254G1Add` | BN254 G1 point addition | +| `0x0201` | `Bn254G1Mul` | BN254 G1 scalar multiplication | +| `0x0205` | `Bn254PairingCheck` | BN254 pairing check | +| `0x0300` | `Secp256k1EcdsaAddressRecover` | Secp256k1 ECDSA address recovery | +| `0x0301` | `Secp256k1EcdsaVerifyAddressRecover` | Secp256k1 ECDSA verify + address recovery | +| `0x0380` | `Secp256r1EcdsaVerify` | Secp256r1 (P-256) ECDSA verification | +| `0x0400` | `Bls12_381G1Add` | BLS12-381 G1 point addition | +| `0x0401` | `Bls12_381G1Msm` | BLS12-381 G1 multi-scalar multiplication | +| `0x0405` | `Bls12_381G2Add` | BLS12-381 G2 point addition | +| `0x0406` | `Bls12_381G2Msm` | BLS12-381 G2 multi-scalar multiplication | +| `0x040A` | `Bls12_381PairingCheck` | BLS12-381 pairing check | +| `0x0410` | `Bls12_381FpToG1` | BLS12-381 map field element to G1 | +| `0x0411` | `Bls12_381Fp2ToG2` | BLS12-381 map field element to G2 | +| `0x0500` | `ModExp` | Modular exponentiation | +| `0x0600` | `VerifyKzgProof` | KZG polynomial commitment proof verification | +| `0x0700` | `Keccak256` | Keccak-256 hash computation | +| `0x0800` | `Blake2bCompress` | Blake2b compression function | + +#### 1.4.3. Input Hint Type + +Input hints allow passing data to the zkVM during execution. Unlike precompile hints that are processed by worker threads, input hints are forwarded directly to a separate inputs sink. + +| Code | Name | Description | +|------|------|-------------| +| `0xF0000` | `Input` | Input data for the zkVM | + +The input hint payload format is: +- **First 8 bytes**: Length of the input data (as `u64` little-endian) +- **Remaining bytes**: The actual input data, padded to 8-byte alignment + +Input hints are not processed by the parallel worker pool; instead, they are immediately submitted to the inputs sink for consumption by the zkVM. + +#### 1.4.4. Custom Hint Types + +Custom hint types allow users to define their own hint handlers for application-specific logic. Users can register custom handlers via the `HintsProcessor` builder API, providing a mapping from hint code to a processing function (see [Custom Hint Handlers](#4-custom-hint-handlers)). By convention, codes in the range `0xA000`-`0xEFFFF` are recommended for custom use to avoid conflicts with current and future built-in types. If a data hint is received with an unregistered code, the processor returns an error and stops processing immediately. + +### 1.5. Stream Protocol + +A valid hint stream follows this protocol: + +``` +CTRL_START ← Reset state, begin stream + [Hint_1] [Hint_2] ... [Hint_N] ← Data hints (precompile, input, or custom) +CTRL_END ← Wait for completion, end stream +``` + +## 2. Hints in CLI Execution + +There are four CLI commands (`execute`, `prove`, `verify-constraints`, `stats`) that support hints stream system by providing a URI via the `--hints` option. The URI determines the input stream source for hints, which can be a file, Unix socket, QUIC stream, or other custom transport. +The supported schemes are: +``` +--hints file://path → File stream reader +--hints unix://path → Unix socket stream reader +--hints quic://host:port → Quic stream reader +--hints (plain path) → File stream reader +``` + +> **Note:** Only ASM mode supports hints. The emulator mode does not use the hints pipeline. + +## 3. Hints in Distributed Execution + +In the distributed proving system, hints are received by the `coordinator` and broadcasted to all **workers** via gRPC. The coordinator runs a relay that validates incoming hint messages, assigns sequence numbers for ordering, and dispatches them to `workers` asynchronously. `Workers` buffer incoming messages and reorder them by sequence number before processing. The processed hints are then submitted to the sink in the correct order. +There is another mode where workers can load hints from a local path/URI instead of streaming from the coordinator, which is useful for debugging. + +### 3.1. Architecture + +```mermaid +flowchart TD + A["Guest program
Emits hints request"] --> B + + subgraph H["Coordinator"] + B["ZiskStream"] + B --> C["Hints Relay
Validates
Broadcast to all workers (async)
"] + end + + C --> E["Worker 1
Stream incoming hints + Reorder"] + C --> F["Worker 2
Stream incoming hints + Reorder"] + C --> G["Worker N
Stream incoming hints + Reorder"] + + E --> E1["HintsProcessor
Parallel engine"] + E1 --> E2["StreamSink
ASM emulator/file output"] + + F --> F1["HintsProcessor
Parallel engine"] + F1 --> F2["StreamSink
ASM emulator/file output"] + + G --> G1["HintsProcessor
Parallel engine"] + G1 --> G2["StreamSink
ASM emulator/file output"] + + style H fill:transparent,stroke-dasharray: 5 5 +``` + +When the `coordinator` receives a hint request from the guest program, it parses the incoming `u64` stream, validates control codes, assigns sequence numbers for ordering, and broadcasts the data to all workers. + +Three message types are sent over gRPC to workers: + +| StreamMessageKind | When | Payload | +|---|---|---| +| `Start` | On `CTRL_START` | None | +| `Data` | For each data batch | Sequence number + raw bytes | +| `End` | On `CTRL_END` | None | + +Each worker receives the stream of hints, buffers them if they arrive out of order, and sends them to the `HintsProcessor` for parallel processing. The `HintsProcessor` ensures that results are submitted to the sink in the original order. + +### 3.2. Hints Mode Configuration + +When starting a worker, if the `--hints` option is provided, the worker prepares to receive hints from the coordinator. +When launching a proof generation job where hints will be provided, the workers must be started to receive and process hints. +A hints stream system can be configured in two ways: +* **Streaming mode**: Workers receive hints from the coordinator via gRPC. This is the default and recommended mode for production, as it allows real-time processing of hints as they are generated. +* **Path mode**: Workers load hints from a local path/URI. This is useful for debugging or when hints are pre-generated and stored in a file. In this mode, the coordinator does not send hints to workers; instead, each worker reads the hints directly from the specified path. + +#### 3.2.1 Coordinator Hints Streaming Mode + +To start the coordinator in streaming mode, provide the `--hints-uri` option with a URI that the `coordinator` will connect to, and set `--stream-hints` to enable broadcasting to workers. The URI determines the input stream source for hints. +The supported schemes are: +``` +--hints-uri file://path → File stream reader +--hints-uri unix://path → Unix socket stream reader +--hints-uri quic://host:port → Quic stream reader +--hints-uri (plain path) → File stream reader +``` + +Example to launch a prove command in streaming mode: +``` +zisk-coordinator prove --hints-uri unix:///tmp/hints.sock --stream-hints ... +``` + +#### 3.2.2 Worker Hints non-Streaming Mode + +To start a worker in non-streaming mode, provide the `--hints-uri` option with a URI that points to the local workers path where hints are stored, without the `--stream-hints` option. In this mode the worker(s) will load hints from the specified URI instead of receiving them from the coordinator. This mode is useful for debugging or when hints are pre-generated and stored in a file. + +## 4. Custom Hint Handlers + +Register custom handlers via the builder pattern: + +```rust +let processor = HintsProcessor::builder(my_sink) + .custom_hint(0xA000, |data: &[u64]| -> Result> { + // Custom processing logic + Ok(vec![data[0] * 2]) + }) + .custom_hint(0xA001, |data| { + // Another custom handler + Ok(transform(data)) + }) + .build()?; +``` + +**Requirements:** +- Handler function must be `Fn(&[u64]) -> Result> + Send + Sync + 'static`. +- Custom hint codes should not conflict with built-in codes (`0x0000`-`0x0700`). By convention, use codes in the range `0xA000`-`0xFFFF`. + +## 5. Generating Hints in Guest Programs + +To generate hints from the guest program you need to follow these steps and requirements: + +1. **Emit hint requests**: Patch your code or dependent crates to call the external FFI Hints helper functions that generate the hints input data required later by the `HintsProcessor`. See [FFI Hints Helper Functions](#55-ffi-hints-helper-functions) for the list of available built-in FFI Hints helper functions, or [Custom Hints Generation](#56-custom-hints-generation) to learn how to generate custom hints from the guest program. +2. **Add the `ziskos` crate** to your guest `Cargo.toml`. +3. **Initialize and finalize the hint stream**: Call the hints init and close functions immediately before and after the section of code that executes precompile logic. +4. **Enable hints at compile time**: Compile your guest program with `RUSTFLAGS='--cfg zisk_hints'` for the native target to activate hint code generation and FFI helper functions in the `ziskos` crate. +5. **Ensure deterministic execution**: Verify that both the native execution that generates hints and the guest compiled for the `zkvm/zisk` target execute deterministically and produce/consume hints in the exact same order. See [Deterministic Execution Requirement](#54-deterministic-execution-requirement). + +To illustrate these steps, consider the `zec-reth` guest program, which executes and verifies Ethereum Mainnet blocks using the ZisK zkVM: + +https://github.com/0xPolygonHermez/zisk-eth-client/tree/main-reth/bin/guest + +### 5.1 Emit Hint Requests + +`zec-reth` relies on `reth` crates, which expose a `Crypto` trait that allows a guest program to override precompile implementations. This enables zkVM-optimized implementations while also emitting hints so the computation can be performed outside the zkVM. + +For example, the BN254 elliptic curve addition (`bn254_g1_add`) implementation for the `Crypto` trait can be found here: + +https://github.com/0xPolygonHermez/zisk-eth-client/blob/86b71b39d35efb9894696cab115a1177f3e47dbf/crates/guest-reth/src/crypto/impls.rs#L87 + +In that file, two target-specific implementations are provided: one for `zkvm/zisk` and one for native (non-zkVM) targets. When compiling with `--cfg zisk_hints` for the native target, the zkVM-specific implementation emits a hint request using the FFI helper: + +```rust +#[cfg(zisk_hints)] +unsafe { + pub fn hint_bn254_g1_add(p1: *const u8, p2: *const u8); +} +``` + +This call generates the hint input data using the exact input values that will later be used by the ZisK zkVM when executing the `zkvm/zisk` target code. This hint input data is consumed later by the `HintsProcessor`, allowing the `bn254_g1_add` computation to be performed outside the zkVM while remaining fully verifiable inside the circuit. + +After the hint generation, execution continues in the native target code to compute the `bn254_g1_add` result. + +From the guest program, we generate hints containing the input data for the corresponding `zisklib` functions (in this example, the `bn254_g1_add_c` function). These `zisklib` functions may internally invoke one or more precompiles to produce the final result. + +When the hints are processed by the `HintsProcessor`, it executes the same `zisklib` function using the implementation code for the zkvm/zisk target. This produces the exact precompile results expected when executing the guest ELF inside the zkVM. + +As a result, for each `zisklib` function invocation, the `HintsProcessor` may generate one or more precompile hint results corresponding to the precompile inputs originally emitted by the guest. + +### 5.2 Initialize/Finalize Hint Stream + +To start hints generation from your guest program you must call one of the following functions from the `ziskos::hints` crate: + +```rust +pub fn init_hints_file(hints_file_path: PathBuf, ready: Option>) -> Result<()> +``` + +This function stores the generated hints in the file specified by the `hints_file_path` parameter. + +```rust +pub fn init_hints_socket(socket_path: PathBuf, debug_file: Option, ready: Option>) -> Result<()> +``` + +This function sends the hints through the Unix socket specified by the `socket_path` parameter. + +The optional `ready` parameter can be used for synchronization with the host when the guest program is executed in a separate thread to generate hints in parallel. It signals `ready` when the hints generation is ready to start writing hints through the Unix socket. + +The optional `debug_file` parameter can be used to store, in the specified file, a copy of the hints sent through the socket. This file can later be used for debugging purposes. + +To close hints generation you must call: + +```rust +pub fn close_hints() -> Result<()> +``` + +You should call these functions only when the guest is compiled for the native target used for hints generation. This can be achieved by placing the code under the following configuration flag: + +```rust +#[cfg(zisk_hints)] +{ + // Initialization/Finalize Hints generation code + ... +} +``` + +You can review how hints generation is initialized and finalized in the `zec-reth` guest here: + +https://github.com/0xPolygonHermez/zisk-eth-client/blob/main-reth/bin/guest/src/main.rs + +### 5.3 Enable Hints at Compile Time + +Once the guest program is set up to generate hints for the native target, it must be compiled with the `zisk_hints` configuration flag enabled: + +```bash +RUSTFLAGS='--cfg zisk_hints' cargo build --release +``` + +After compiling, executing the guest program will generate the hints binary file at the specified location (if `init_hints_file` was used) or start writing hints to the specified Unix socket (if `init_hints_socket` was used). + +If a hints file was generated, it can be consumed using the `--hints` flag in the `cargo-zisk` commands that support hints (as explained in [Hints in CLI Execution](#2-hints-in-cli-execution)). + +If you want to display metrics in the console about the number of hints generated during native guest execution, you can additionally compile the guest with the `--cfg zisk_hints_metrics` flag. + +To enable hint support when executing the guest inside the zkVM (ELF guest), you must pass the `--hints` flag when generating the assembly ROM using the `cargo-zisk rom-setup` command. + +**NOTE:** Hint processing is not supported when executing the guest ELF file in emulation mode. + +### 5.4 Deterministic Execution Requirement + +An important requirement of the hints generation flow is that the native execution that generates the hints must be fully deterministic and always produce hints in the exact same order. + +Furthermore, the order of hints generated during native execution must match the order in which the guest program compiled for the `zkvm/zisk` target expects to receive them. Since the zkVM execution is also deterministic, any divergence in hint ordering between native execution and zkVM execution will result in incorrect behavior. + +To guarantee deterministic hint generation, the code paths that directly or indirectly generate hints must avoid: + +- The use of threads or parallel execution. +- Data structures such as `HashMap` (or any structure based on randomized hash seeds) when iterated in loops that directly or indirectly call precompile/hint functions. + +Using threads or iterating over non-deterministically ordered data structures may cause the hint generation order to vary between runs, breaking the required alignment between native and zkVM executions. + +### 5.5 FFI Hints Helper Functions + +| Code | Function | +| ---- | -------- | +| `0x0100` | `fn hint_sha256(f_ptr: *const u8, f_len: usize);` | +| `0x0200` | `fn hint_bn254_g1_add(p1: *const u8, p2: *const u8);`| +| `0x0201` | `fn hint_bn254_g1_mul(point: *const u8, scalar: *const u8);` | +| `0x0205` | `fn hint_bn254_pairing_check(pairs: *const u8, num_pairs: usize);` | +| `0x0300` | `fn hint_secp256k1_ecdsa_address_recover(sig: *const u8, recid: *const u8, msg: *const u8);` | +| `0x0301` | `fn hint_secp256k1_ecdsa_verify_and_address_recover(sig: *const u8, msg: *const u8, pk: *const u8);` | +| `0x0380` | `fn hint_secp256r1_ecdsa_verify(msg: *const u8, sig: *const u8, pk: *const u8);` | +| `0x0400` | `fn hint_bls12_381_g1_add(a: *const u8, b: *const u8);` | +| `0x0401` | `fn hint_bls12_381_g1_msm(pairs: *const u8, num_pairs: usize);` | +| `0x0405` | `fn hint_bls12_381_g2_add(a: *const u8, b: *const u8);` | +| `0x0406` | `fn hint_bls12_381_g2_msm(pairs: *const u8, num_pairs: usize);` | +| `0x040A` | `fn hint_bls12_381_pairing_check(pairs: *const u8, num_pairs: usize);` | +| `0x0410` | `fn hint_bls12_381_fp_to_g1(fp: *const u8);` | +| `0x0411` | `fn hint_bls12_381_fp2_to_g2(fp2: *const u8);` | +| `0x0500` | `fn hint_modexp_bytes(base_ptr: *const u8, base_len: usize, exp_ptr: *const u8, exp_len: usize, modulus_ptr: *const u8, modulus_len: usize);` | +| `0x0600` | `fn hint_verify_kzg_proof(z: *const u8, y: *const u8, commitment: *const u8, proof: *const u8);` | +| `0x0700` | `fn hint_keccak256(input_ptr: *const u8, input_len: usize);` | +| `0x0800` | `fn hint_blake2b_compress(...);` | +| `0xF0000` | `fn hint_input_data(input_data_ptr: *const u8, input_data_len: usize);` | + +### 5.6 Custom Hints Generation +To extend the built-in hints, you can generate custom hints for new operations. The first step is to register the new hint in the `HintsProcessor`, as explained in section [Custom Hint Handlers](#4-custom-hint-handlers). Once the hint is registered, you can generate hints for it from the guest program using the following FFI function: + +```rust +fn hint_custom(hint_id: u32, data_ptr: *const u8, data_len: usize, is_result: u8); +``` + +and following the same guidelines described for the built-in FFI hint helper functions. diff --git a/book/getting_started/installation.md b/book/getting_started/installation.md index 5c93bb43f..c389fb127 100644 --- a/book/getting_started/installation.md +++ b/book/getting_started/installation.md @@ -85,6 +85,11 @@ To update ZisK to the latest version, simply run: You can use the flags `--provingkey`, `--verifykey` or `--nokey` to specify the installation setup and skip the selection prompt. +To install the PLONK proving key (provingKeySnark), run: + ```bash + ziskup setup_snark + ``` + ### Option 2: Building from Source @@ -123,8 +128,7 @@ You can use the flags `--provingkey`, `--verifykey` or `--nokey` to specify the 3. Copy the tools to `~/.zisk/bin` directory: ```bash mkdir -p $HOME/.zisk/bin - LIB_EXT=$([[ "$(uname)" == "Darwin" ]] && echo "dylib" || echo "so") - cp target/release/cargo-zisk target/release/ziskemu target/release/riscv2zisk target/release/zisk-coordinator target/release/zisk-worker target/release/libzisk_witness.$LIB_EXT target/release/libziskclib.a $HOME/.zisk/bin + cp target/release/cargo-zisk target/release/ziskemu target/release/riscv2zisk target/release/zisk-coordinator target/release/zisk-worker target/release/libziskclib.a $HOME/.zisk/bin ``` 4. Copy required files for assembly rom setup: @@ -199,27 +203,31 @@ Please note that the process can be long, taking approximately 45-60 minutes dep 5. Compile ZisK PIL: ```bash - node ../pil2-compiler/src/pil.js pil/zisk.pil -I pil,../pil2-proofman/pil2-components/lib/std/pil,state-machines,precompiles -o pil/zisk.pilout -u tmp/fixed -O fixed-to-file + node --max-old-space-size=16384 ../pil2-compiler/src/pil.js pil/zisk.pil -I pil,../pil2-proofman/pil2-components/lib/std/pil,state-machines,precompiles -o pil/zisk.pilout -u tmp/fixed -O fixed-to-file ``` This command will create the `pil/zisk.pilout` file 6. Generate setup data: (this step may take 30-45 minutes): ```bash - node ../pil2-proofman-js/src/main_setup.js --stack-size=8192 -a ./pil/zisk.pilout -b build -t ../pil2-proofman/pil2-components/lib/std/pil -u tmp/fixed -r -s ./state-machines/starkstructs.json + node --max-old-space-size=16384 --stack-size=8192 ../pil2-proofman-js/src/main_setup.js -a ./pil/zisk.pilout -b build -t ../pil2-proofman/pil2-components/lib/std/pil -u tmp/fixed -r -s ./state-machines/starkstructs.json ``` This command generates the `build/provingKey` directory. -7. Copy (or move) the `build/provingKey` directory to `$HOME/.zisk` directory: + Additionally, to generate the snark wrapper: ```bash - cp -R build/provingKey $HOME/.zisk + node ../pil2-proofman-js/src/main_setup_snark.js -b build -t ../pil2-proofman/pil2-components/lib/std/pil -f -w ../powersOfTau28_hez_final_27.ptau -p ./state-machines/publics.json -n plonk ``` -8. Generate constant tree files: + It is stored under the `build/provingKeySnark` directory. + + +7. Copy (or move) the `build/provingKey` directory to `$HOME/.zisk` directory: + ```bash - cargo-zisk check-setup -a + cp -R build/provingKey $HOME/.zisk ``` ## Uninstall Zisk diff --git a/book/getting_started/precompiles.md b/book/getting_started/precompiles.md index 0dc7444a8..3aa3d2bf6 100644 --- a/book/getting_started/precompiles.md +++ b/book/getting_started/precompiles.md @@ -2,7 +2,7 @@ Precompiles are built-in system functions within ZisK’s operating system that accelerate computationally expensive and frequently used operations such as the Keccak-f permutation and Secp256k1 addition and doubling. -These precompiles improve proving efficiency by offloading intensive computations from ZisK programs to dedicated, pre-integrated sub-processors. ZisK manages precompiles as system calls using the RISC-V `ecall` instruction. +These precompiles improve proving efficiency by offloading intensive computations from ZisK programs to dedicated, pre-integrated sub-processors. ## How Precompiles Work @@ -15,12 +15,17 @@ You can see [here](https://github.com/0xPolygonHermez/zisk-patch-tiny-keccak/tre ### Available Precompiles in ZisK Below is a summary of the precompiles currently available in ZisK: -- [syscall_arith256_mod](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/arith256_mod.rs): Modular multiplication followed by addition over 256-bit non-negative integers. +- [syscall_add256](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/add256.rs): Addition over 256-bit non-negative integers. - [syscall_arith256](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/arith256.rs): Multiplication followed by addition over 256-bit non-negative integers. -- [syscall_keccak_f](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/keccakf.rs): Keccak-f[1600] permutation function from the [Keccak](https://keccak.team/files/Keccak-reference-3.0.pdf) cryptographic sponge construction. +- [syscall_arith256_mod](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/arith256_mod.rs): Modular multiplication followed by addition over 256-bit non-negative integers. +- [syscall_arith384_mod](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/arith384_mod.rs): Modular multiplication followed by addition over 256-bit non-negative integers. +- [syscall_keccak_f](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/keccakf.rs): Keccak-f 1600 permutation function from the [Keccak](https://keccak.team/files/Keccak-reference-3.0.pdf) cryptographic sponge construction. - [syscall_sha256_f](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/sha256f.rs): Extend and compress function of the [SHA-256](https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf) cryptographic hash algorithm. +- [syscall_syscall_poseidon2](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/poseidon2.rs): Compression function of the [Poseidon2](https://eprint.iacr.org/2023/323.pdf) cryptographic hash algorithm. - [syscall_secp256k1_add](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/secp256k1_add.rs): Elliptic curve point addition over the [Secp256k1](https://en.bitcoin.it/wiki/Secp256k1) curve. - [syscall_secp256k1_dbl](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/secp256k1_dbl.rs): Elliptic curve point doubling over the [Secp256k1](https://en.bitcoin.it/wiki/Secp256k1) curve. +- [syscall_secp256r1_add](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/secp256r1_add.rs): Elliptic curve point addition over the [Secp256r1](https://csrc.nist.gov/pubs/sp/800/186/final) curve. +- [syscall_secp256r1_dbl](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/secp256r1_dbl.rs): Elliptic curve point doubling over the [Secp256r1](https://csrc.nist.gov/pubs/sp/800/186/final) curve. - [syscall_bn254_curve_add](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bn254_curve_add.rs): Elliptic curve point addition over the [Bn254](https://hackmd.io/kcEJAWISQ56eE6YpBnurgw) curve. - [syscall_bn254_curve_dbl](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bn254_curve_dbl.rs): Elliptic curve point doubling over the [Bn254](https://hackmd.io/kcEJAWISQ56eE6YpBnurgw) curve. - [syscall_bn254_complex_add](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bn254_complex_add.rs): Complex addition within the quadratic extension built over the base field of the [Bn254](https://hackmd.io/kcEJAWISQ56eE6YpBnurgw) curve. @@ -31,5 +36,4 @@ Below is a summary of the precompiles currently available in ZisK: - [syscall_bls12_381_curve_dbl](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bls12_381_curve_dbl.rs): Elliptic curve point doubling over the BLS12-381 curve. - [syscall_bls12_381_complex_add](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs): Complex addition within the quadratic extension built over the base field of the BLS12-381 curve. - [syscall_bls12_381_complex_sub](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs): Complex subtraction within the quadratic extension built over the base field of the BLS12-381 curve. -- [syscall_bls12_381_complex_mul](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs): Complex multiplication within the quadratic extension built over the base field of the BLS12-381 curve. -- [syscall_add256](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/add256.rs): 256 bits addition with one carry in bit and carry out bit. \ No newline at end of file +- [syscall_bls12_381_complex_mul](https://github.com/0xPolygonHermez/zisk/tree/main/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs): Complex multiplication within the quadratic extension built over the base field of the BLS12-381 curve. \ No newline at end of file diff --git a/book/getting_started/proof.md b/book/getting_started/proof.md deleted file mode 100644 index e566c9795..000000000 --- a/book/getting_started/proof.md +++ /dev/null @@ -1,76 +0,0 @@ -## Steps to verify constraints or generate proof - -compile pils: -``` -node ../pil2-compiler/src/pil.js pil/fork_0/pil/zisk.pil -I lib/std/pil -o pil/fork_0/pil/zisk.pilout -``` - -generate "structs" for different airs: -`(cd ../pil2-proofman; cargo run --bin proofman-cli pil-helpers --pilout ../zisk/pil/fork_0/pil/zisk.pilout --path ../zisk/pil/fork_0/src/ -o)` - -prepare "fast tools" (only first time): -`(cd ../zkevm-prover && git switch develop_rust_lib && git submodule init && git submodule update && make -j bctree && make starks_lib -j)` - -setup for pil, this step is necessary **only when pil change**: -`node ../pil2-proofman-js/src/main_setup.js -a pil/fork_0/pil/zisk.pilout -b build -t ../zkevm-prover/build/bctree` - -this step should be done once and is optional. Edit file pil2-proofman/provers/starks-lib-c/Cargo.toml to remove "no_lib_link" from line 12: -`nano ../pil2-proofman/provers/starks-lib-c/Cargo.toml` - -compile witness computation library (libzisk_witness.so). If you haven't nightly mode as default, must add +nightly when do build. -`cargo build --release` - -In the following steps to verify constraints or generate prove, select one of these inputs: -- input.bin: large number of sha -- input_one_segment.bin: only one sha -- input_two_segments.bin: 512 shas - -To **verify constraints** use: -`(cd ../pil2-proofman; cargo run --release --bin proofman-cli verify-constraints --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input.bin --proving-key ../zisk/build/provingKey)` - -To **generate proof** use: -`(cd ../pil2-proofman; cargo run --release --bin proofman-cli verify-constraints --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input.bin --proving-key ../zisk/build/provingKey)` - -## Steps to compile a verifiable rust program - -### Setup -Install qemu: -`sudo apt-get install qemu-system` -Add tokens to access repos: -``` -export GITHUB_ACCESS_TOKEN=.... -export ZISK_TOKEN=.... -``` -### Create new hello_world project -Create project with toolchain: -```bash -cargo-zisk sdk new hello_world -cd hello_world -``` - -Compile and execute in **riscv mode**: -`cargo-zisk run --release` - -Compile and execute in **zisk mode**: -`cargo-zisk run --release --sim` - -Execute with ziskemu: -`ziskemu -i build/input.bin -x -e target/riscv64ima-zisk-zkvm-elf/release/fibonacci` - -### Update toolchain -``` -ziskup -``` -If ziskup fails, could update ziskemu manually. - -### Update ziskemu manually -```bash -cd zisk -git pull -cargo install --path emulator -cp ~/.cargo/bin/ziskemu ~/.zisk/bin/ -``` - -```bash -ziskemu -i build/input.bin -x -e target/riscv64ima-zisk-zkvm-elf/debug/fibonacci -``` diff --git a/book/getting_started/quickstart.md b/book/getting_started/quickstart.md index ad5addd9a..62dafac18 100644 --- a/book/getting_started/quickstart.md +++ b/book/getting_started/quickstart.md @@ -44,19 +44,46 @@ This will create a project with the following structure: ├── build.rs ├── Cargo.toml ├── .gitignore -└── src - └── main.rs +├── guest +| ├── src +| | └── main.rs +| └── Cargo.toml +└── host + ├── src + | └── main.rs + ├── bin + | ├── compressed.rs + | ├── execute.rs + | ├── prove.rs + | ├── plonk.rs + | ├── verify-constraints.rs + | └── ziskemu.rs + ├── build.rs + └── Cargo.toml ``` The example program takes a number `n` as input and computes the SHA-256 hash `n` times. -The `build.rs` file generates an `input.bin` file containing the value of `n` (e.g., 20). This file is used in `main.rs` as input to calculate the hash. +## Build + +The next step is to build the program to generate an ELF file (RISC-V), which will be used later to generate the proof. Execute: -You can run the program on your native architecture with the following command: ```bash -cargo run +cargo build --release ``` -The output will be: + +This command builds the program using the `zkvm` target. The resulting `sha_hasher` ELF file (without extension) is generated in the `./target/elf/riscv64ima-zisk-zkvm-elf/release` directory. + +## Execute + +Before generating a proof, you can test the program using the ZisK emulator to ensure its correctness: + +```bash +cargo run --release --bin ziskemu +``` + +The emulator will execute the program and display the public outputs: + ``` public 0: 0x98211882 public 1: 0xbd13089b @@ -68,73 +95,53 @@ public 6: 0x1f142cac public 7: 0x233f1280 ``` -## Build - -The next step is to build the program using the `cargo-zisk` command to generate an ELF file (RISC-V), which will be used later to generate the proof. Execute: +These outputs should match the native execution, confirming the program works correctly. -```bash -cargo-zisk build --release -``` +## Verify Constraints -This command builds the program using the `zkvm` target. The resulting `sha_hasher` ELF file (without extension) is generated in the `./target/riscv64ima-zisk-zkvm-elf/release` directory. - -## Execute - -Before generating a proof, you can test the program using the ZisK emulator to ensure its correctness. Specify the ELF file (using the `-e` or `--elf flag`) and the input file `input.bin` (using the `-i` or `--inputs` flag): +Once you've confirmed the program executes correctly, you can verify the constraints without generating a full proof. This is useful for debugging and ensuring correctness: ```bash -ziskemu -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin +cargo run --release --bin verify-constraints ``` -The output will be: -``` -98211882 -bd13089b -6ccf1fca -81f7f0e4 -abf6352a -0c39c9b1 -1f142cac -233f1280 -``` +This command will: +1. Execute the program using the ZisK emulator +2. Generate the execution trace +3. Verify all arithmetic and logical constraints +4. Check that all state machine transitions are valid -Alternatively, you can build and run the program with: +If successful, you'll see: -```bash -cargo-zisk run --release -i build/input.bin ``` - -## Prove - -Before generating a proof, you need to generate the program setup files. Execute: - -```bash -cargo-zisk rom-setup -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher +✓ All constraints for Instance #0 of Main were verified +✓ All constraints for Instance #0 of Rom were verified +... +✓ All global constraints were successfully verified ``` -Once the program setup is complete, you can generate and verify a proof using the `cargo-zisk prove` command by providing the ELF file (with the `-e` or `--elf` flag) and the input file (with the `-i` or `--input` flag). +## Prove -To generate and verify a proof for the previously built ELF and input files, execute: +To generate a cryptographic proof of execution, run: ```bash -cargo-zisk prove -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -o proof -a -y +cargo run --release --bin prove ``` -This command generates the proof in the `./proof` directory. If everything goes well, you will see a message similar to: - -``` -... -[INFO ] ProofMan: ✓ Vadcop Final proof was verified -[INFO ] stop <<< GENERATING_VADCOP_PROOF 91706ms -[INFO ] ProofMan: Proofs generated successfully -``` +This will: +1. Execute the program and generate the execution trace +2. Compute witness values for all state machines +3. Generate the polynomial commitments +4. Create the zk-STARK proof -**Note**: You can use concurrent proof generation and GPU support to reduce proving time. For more details, refer to the [Writing Programs](./writing_programs.md) guide. +The proof will be saved in the `./proof` directory. This process may take several minutes depending on the program complexity. -## Verify Proof +## Compressed Proof (Optional) -To verify a generated proof, use the following command: +After generating the proof, you can optionally create a compressed version to reduce the proof size: ```bash -cargo-zisk verify -p ./proof/vadcop_final_proof.bin +cargo run --release --bin compressed ``` + +This generates an additional compressed proof on top of the existing one using recursive composition. The compressed proof is significantly smaller while maintaining the same security guarantees. diff --git a/book/getting_started/quickstart_dev.md b/book/getting_started/quickstart_dev.md deleted file mode 100644 index 245fd3a25..000000000 --- a/book/getting_started/quickstart_dev.md +++ /dev/null @@ -1,205 +0,0 @@ -# Quickstart - -In this guide, we will walk you through the steps to create a simple Zisk project. - -## Requirements - -Before you begin, ensure that you have [Rust](https://www.rust-lang.org/tools/install) installed on your system. - -Optional recommendations: - -- Use the [rust-analyzer](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) extension for VS Code to enhance your Rust development experience. -- Use the [PIL2 Highlight syntax code](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) for VS Code to highlight PIL2 code when writing it. - -## Clone Repositories - -Run the following commands to clone the necessary repositories: - -```bash -git clone -b develop https://github.com/0xPolygonHermez/pil2-compiler.git -git clone -b develop https://github.com/0xPolygonHermez/zisk.git -git clone -b develop https://github.com/0xPolygonHermez/pil2-proofman.git -git clone -b develop https://github.com/0xPolygonHermez/pil2-proofman-js -``` - -## Compile a Verifiable Rust Program - -### Setup -Install qemu: -`sudo apt-get install qemu-system` - -### Create New Hello World Project -Create a new project using the Zisk toolchain: - -```bash -cargo-zisk sdk new hello_world -cd hello_world -``` - -Edit file `build.rs` file to modify the `OUTPUT_DIR` variable to `build`: - -```rust=3 -use std::path::Path; - -// Define constants for the directory and file names -const OUTPUT_DIR: &str = "build"; -const FILE_NAME: &str = "input.bin"; -``` - -### Compile and Run - -- RISC-V mode: -```bash -cargo-zisk run --release -``` - -- Zisk mode: -```bash -cargo-zisk run --release --sim -``` - -- Ziskemu execution: -```bash -ziskemu -i build/input.bin -x -e target/riscv64ima-zisk-zkvm-elf/release/hello_world -``` - -### Updating the Toolchain -To update the Zisk toolchain: - -```bash -ziskup -``` - -If `ziskup` fails, you can manually update `ziskemu`. - -### Manual Ziskemu Update -```bash -cd zisk -git pull -cargo install --path emulator -cp ~/.cargo/bin/ziskemu ~/.zisk/bin/ -``` - -Run the emulator with: - -```bash -ziskemu -i build/input.bin -x -e target/riscv64ima-zisk-zkvm-elf/debug/hello_world -``` - -### Easy Input Update for 64-bit Values -To put `0x0100`, reverse hex sequence: -```bash -echo -en "\x00\x01\x00\x00\x00\x00\x00\x00" > input_two_segments.bin -``` -To input `0x0234`: -```bash -echo -en "\x34\x02\x00\x00\x00\x00\x00\x00" > input_two_segments.bin -``` - -## Prepare Your Setup - -All following commands should be executed in the `zisk` folder. - -### Compile Zisk PIL - -!!!!!! Download pil2-proofman to be able to compile the std -node --max-old-space-size=131072 --stack-size=1500 ../pil2-proofman-js/src/main_setup.js -a pil/zisk_pre_040.pilout -b build/build_pre_040 -t ../pil2-proofman/pil2-stark/build/bctree -i ./build/keccakf_fixed.bin -com es genera el fixed.bin??? - -cargo run --release --bin keccakf_fixed_gen -```bash -node --max-old-space-size=65536 ../pil2-compiler/src/pil.js pil/zisk.pil -I pil,../pil2-proofman/pil2-components/lib/std/pil,state-machines,precompiles -o pil/zisk.pilout -``` - -### Compile PILs with `std_mock` (for testing without `std`): -```bash -node ../pil2-compiler/src/pil.js pil/zisk.pil -I pil,../pil2-components/lib/std_mock/pil,state-machines -o pil/zisk.pilout -``` - -### Compile the PIl2 Stark C++ Library (run only once): -```bash -(cd ../pil2-proofman/pil2-stark && git submodule init && git submodule update && make clean && make -j starks_lib && make -j bctree) -``` - -### Generate PIL-Helpers Rust Code -Run this whenever the `.pilout` file changes: - -```bash -(cd ../pil2-proofman; cargo run --bin proofman-cli pil-helpers --pilout ../zisk/pil/zisk.pilout --path ../zisk/pil/src/ -o) -``` - -### Generate Setup Data -Run this whenever the `.pilout` file changes: - -```bash[] -node --max-old-space-size=131072 --stack-size=1500 ../pil2-proofman-js/src/main_setup.js -a pil/zisk.pilout -b build -t ../pil2-proofman/pil2-stark/build/bctree -``` - -### Compile Witness Computation library (`libzisk_witness.so`) -```bash -cargo build --release -``` - -> If you get a library not found error, set the path manually: -> ```bash -> export RUSTFLAGS="-L native={path to your pil2-stark folder}/pil2-stark/lib" -> ``` - -## Generate & Verify Proofs - -Sample inputs are located in `zisk/emulator/benches/data`: -- `input_one_segment.bin`: single SHA -- `input_two_segments.bin`: 512 SHA -- `input.bin`: large number of SHA - -### Verify Constraints Only -```bash -// Using input_one_segment.bin -(cargo build --release && cd ../pil2-proofman; cargo run --release --bin proofman-cli verify-constraints --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input_one_segment.bin --proving-key ../zisk/build/provingKey) - -// Using input_two_segments.bin -(cargo build --release && cd ../pil2-proofman; cargo run --release --bin proofman-cli verify-constraints --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input_two_segments.bin --proving-key ../zisk/build/provingKey)` - -// Using input.bin -(cargo build --release && cd ../pil2-proofman; cargo run --release --bin proofman-cli verify-constraints --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input.bin --proving-key ../zisk/build/provingKey)` -``` - -### Generate a Proof - -To generate the aggregated proofs, add `-a` - -```bash -// Using input_one_segment.bin -(cargo build --release && cd ../pil2-proofman; cargo run --release --bin proofman-cli prove --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input_one_segment.bin --proving-key ../zisk/build/provingKey --output-dir ../zisk/proofs -a -v) - -// Using input_two_segments.bin -(cargo build --release && cd ../pil2-proofman; cargo run --release --bin proofman-cli prove --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input_two_segments.bin --proving-key ../zisk/build/provingKey --output-dir ../zisk/proofs -a -v) - -// Using input.bin -(cargo build --release && cd ../pil2-proofman; cargo run --release --bin proofman-cli prove --witness-lib ../zisk/target/release/libzisk_witness.so --rom ../zisk/emulator/benches/data/my.elf -i ../zisk/emulator/benches/data/input.bin --proving-key ../zisk/build/provingKey --output-dir ../zisk/proofs -a -v) -``` - -### Distributed prove - -Zisk can run proves using multiple processes in the same server or in multiple servers. To use zisk in distributed mode you need to have installed a mpi library. To use the distributed mode the compilation command is: - -```bash -cargo-zisk build --release --features distributed -``` - -Then the execution command will be: - -```bash -mpirun --bind-to none -np -x OMP_NUM_THREADS= target/release/cargo-zisk prove -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -w $HOME/.zisk/bin/libzisk_witness.so -k $HOME/.zisk/provingKey -o proof -a -y -``` -### Verify the Proof -```bash -node ../pil2-proofman-js/src/main_verify -k ./build/provingKey -p ./proofs -``` - -### Verify the aggregated Proof -If the aggregation proofs are being generated, can be verified with the following command: - -```bash -node ../pil2-proofman-js/src/main_verify -k ./build/provingKey/ -p ./proofs -t vadcop_final -``` diff --git a/book/getting_started/writing_programs.md b/book/getting_started/writing_programs.md index 904a86f1f..0d53bcbd6 100644 --- a/book/getting_started/writing_programs.md +++ b/book/getting_started/writing_programs.md @@ -44,14 +44,7 @@ use ziskos::{read_input_slice, set_output}; use byteorder::ByteOrder; fn main() { - // Read the input data as a byte array from ziskos - let input = read_input_slice(); - - // Convert the input data to a u64 integer - let n: u64 = match input.as_ref().try_into() { - Ok(input_bytes) => u64::from_le_bytes(input_bytes), - Err(e) => panic!("Invalid input, error: {}", e), - }; + let n: u32 = ziskos::io::read(); let mut hash = [0u8; 32]; @@ -63,21 +56,16 @@ fn main() { hash = Into::<[u8; 32]>::into(*digest); } - // Split 'hash' value into chunks of 32 bits and write them to ziskos output - for i in 0..8 { - let val = byteorder::BigEndian::read_u32(&mut hash[i * 4..i * 4 + 4]); - set_output(i, val); - } + ziskos::io::commit(&output); } ``` `Cargo.toml`: ```toml [package] -name = "sha_hasher" +name = "guest" version = "0.1.0" edition = "2021" -default-run = "sha_hasher" [dependencies] byteorder = "1.5.0" @@ -86,27 +74,31 @@ ziskos = { git = "https://github.com/0xPolygonHermez/zisk.git" } ``` ### Input/Output Data -To provide input data for ZisK, you need to write that data in a binary file (e.g., `input.bin`). -If your program requires complex input data, consider using a serialization mechanism (like [`bincode`](https://crates.io/crates/bincode) crate) to store it in `input.bin` file. +To read input data in your ZisK program, use the `ziskos::io::read()` function, which deserializes data from the input: + +```rust +// Read a u32 value from input +let n: u32 = ziskos::io::read(); +``` -In your program, use the `ziskos::read_input_slice()` function to retrieve the input data from the `input.bin` file: +You can also read custom types that implement the `Deserialize` trait: ```rust -// Read the input data as a byte array from ziskos -let input = read_input_slice(); +// Read a custom struct from input +let my_data: MyStruct = ziskos::io::read(); ``` -To write public output data, use the `ziskos::set_output()` function. Since the function accepts `u32` values, split the output data into 32-bit chunks if necessary and increase the `id` parameter of the function in each call: +To write public output data, use the `ziskos::io::commit()` function, which serializes and commits the output: ```rust -// Split 'hash' value into chunks of 32 bits and write them to ziskos output -for i in 0..8 { - let val = byteorder::BigEndian::read_u32(&mut hash[i * 4..i * 4 + 4]); - set_output(i, val); -} +// Commit the hash as public output +let hash: [u8; 32] = compute_hash(); +ziskos::io::commit(&hash); ``` +The output can be any type that implements the `Serialize` trait. The data will be serialized and made available as public outputs that can be verified by anyone checking the proof. + ## Build Before compiling your program for ZisK, you can test it on the native architecture just like any regular Rust program using the `cargo` command. @@ -117,7 +109,7 @@ Once your program is ready to run on ZisK, compile it into an ELF file (RISC-V a cargo-zisk build ``` -This command compiles the program using the `zisk` target. The resulting `sha_hasher` ELF file (without extension) is generated in the `./target/riscv64ima-zisk-zkvm-elf/debug` directory. +This command compiles the program using the `zisk` target. The resulting `guest` ELF file (without extension) is generated in the `./target/riscv64ima-zisk-zkvm-elf/debug` directory. For production, compile the ELF file with the `--release` flag, similar to how you compile Rust projects: @@ -125,7 +117,7 @@ For production, compile the ELF file with the `--release` flag, similar to how y cargo-zisk build --release ``` -In this case, the `sha_hasher` ELF file will be generated in the `./target/riscv64ima-zisk-zkvm-elf/release` directory. +In this case, the `guest` ELF file will be generated in the `./target/elf/riscv64ima-zisk-zkvm-elf/release` directory. ## Execute @@ -133,13 +125,7 @@ You can test your compiled program using the ZisK emulator (`ziskemu`) before ge ```bash cargo-zisk build --release -ziskemu -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -``` - -Alternatively, you can build and execute the program in the ZisK emulator with a single command: - -```bash -cargo-zisk run --release -i build/input.bin +ziskemu -e target/elf/riscv64ima-zisk-zkvm-elf/release/guest -i host/tmp/input.bin ``` If the program requires a large number of ZisK steps, you might encounter the following error: @@ -150,7 +136,7 @@ Error: Error executing Run command To resolve this, you can increase the number of execution steps using the `-n` (`--max-steps`) flag. For example: ```bash -ziskemu -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -n 10000000000 +ziskemu -e target/elf/riscv64ima-zisk-zkvm-elf/release/guest -i host/tmp/input.bin -n 10000000000 ``` ## Metrics and Statistics @@ -158,36 +144,23 @@ ziskemu -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin ### Performance Metrics You can get performance metrics related to the program execution in ZisK using the `-m` (`--log-metrics`) flag in the `cargo-zisk run` command or in `ziskemu` tool: -```bash -cargo-zisk run --release -i build/input.bin -m -``` - -Or ```bash -ziskemu -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -m +ziskemu -e target/elf/riscv64ima-zisk-zkvm-elf/release/guest -i host/tmp/input.bin -m ``` The output will include details such as execution time, throughput, and clock cycles per step: ``` process_rom() steps=85309 duration=0.0009 tp=89.8565 Msteps/s freq=3051.0000 33.9542 clocks/step -98211882 -bd13089b -6ccf1fca ... ``` ### Execution Statistics -You can get statistics related to the program execution in Zisk using the `-x` (`--stats`) flag in the `cargo-zisk run` command or in `ziskemu` tool: - -```bash -cargo-zisk run --release -i build/input.bin -x -``` +You can get statistics related to the program execution in Zisk using the `-X` (`--stats`) flag in `ziskemu` tool: -Or ```bash -ziskemu -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -x +ziskemu -e target/elf/riscv64ima-zisk-zkvm-elf/release/guest -i host/tmp/input.bin -X ``` The output will include details such as cost definitions, total cost, register reads/writes, opcode statistics, etc: @@ -218,10 +191,6 @@ Opcodes: xor: 1.06 sec (77 steps/op) (13774 ops) signextend_b: 0.03 sec (109 steps/op) (320 ops) signextend_w: 0.03 sec (109 steps/op) (320 ops) - -98211882 -bd13089b -6ccf1fca ... ``` @@ -232,7 +201,7 @@ bd13089b Before generating a proof (or verifying the constraints), you need to generate the program setup files. This must be done the first time after building the program ELF file, or any time it changes: ```bash -cargo-zisk rom-setup -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -k $HOME/.zisk/provingKey +cargo-zisk rom-setup -e target/elf/riscv64ima-zisk-zkvm-elf/release/guest -k $HOME/.zisk/provingKey ``` In this command: @@ -251,14 +220,12 @@ cargo-zisk clean Before generating a proof (which can take some time), you can verify that all constraints are satisfied: ```bash -LIB_EXT=$([[ "$(uname)" == "Darwin" ]] && echo "dylib" || echo "so") -cargo-zisk verify-constraints -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -w $HOME/.zisk/bin/libzisk_witness.$LIB_EXT -k $HOME/.zisk/provingKey +cargo-zisk verify-constraints -e target/elf/riscv64ima-zisk-zkvm-elf/release/guest -i host/tmp/input.bin -k $HOME/.zisk/provingKey ``` In this command: * `-e` (`--elf`) specifies the ELF file location. * `-i` (`--input`) specifies the input file location. -* `-w` (`--witness`) specifies the location of the witness library. This is optional and defaults to `$HOME/.zisk/bin/libzisk_witness.$LIB_EXT`. * `-k` (`--proving-key`) specifies the directory containing the proving key. This is optional and defaults to `$HOME/.zisk/provingKey`. If everything is correct, you will see an output similar to: @@ -274,14 +241,12 @@ If everything is correct, you will see an output similar to: To generate a proof, run the following command: ```bash -LIB_EXT=$([[ "$(uname)" == "Darwin" ]] && echo "dylib" || echo "so") -cargo-zisk prove -e target/riscv64ima-zisk-zkvm-elf/release/sha_hasher -i build/input.bin -w $HOME/.zisk/bin/libzisk_witness.$LIB_EXT -k $HOME/.zisk/provingKey -o proof -a -y +cargo-zisk prove -e target/elf/riscv64ima-zisk-zkvm-elf/release/guest -i host/tmp/input.bin -k $HOME/.zisk/provingKey -o proof -a -y ``` In this command: * `-e` (`--elf`) specifies the ELF file location. * `-i` (`--input`) specifies the input file location. -* `-w` (`--witness`) specifies the location of the witness library. This is optional and defaults to `$HOME/.zisk/bin/libzisk_witness.$LIB_EXT`. * `-k` (`--proving-key`) specifies the directory containing the proving key. This is optional and defaults to `$HOME/.zisk/provingKey`. * `-o` (`--output`) determines the output directory (in this example `proof`). * `-a` (`--aggregation`) indicates that a final aggregated proof (containing all generated sub-proofs) should be produced. @@ -333,11 +298,6 @@ Follow these steps to enable GPU support: cargo build --release --features gpu ``` -4. Regenerate constant tree files: - ```bash - cargo-zisk check-setup -a - ``` - You can combine GPU-based execution with concurrent proof generation using multiple processes, as described in the **Concurrent Proof Generation** section. > **Note:** GPU memory is typically more limited than CPU memory. When combining GPU execution with concurrent proof generation, ensure that each process has sufficient memory available on the GPU to avoid out-of-memory errors. @@ -347,7 +307,7 @@ You can combine GPU-based execution with concurrent proof generation using multi To verify a generated proof, use the following command: ```bash -cargo-zisk verify -p ./proof/vadcop_final_proof.bin -s $HOME/.zisk/provingKey/zisk/vadcop_final/vadcop_final.starkinfo.json -e $HOME/.zisk/provingKey/zisk/vadcop_final/vadcop_final.verifier.bin -k $HOME/.zisk/provingKey/zisk/vadcop_final/vadcop_final.verkey.json +cargo-zisk verify -p ./proof/vadcop_final_proof.bin -k $HOME/.zisk/provingKey ``` In this command: diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 9ae695612..1350fe385 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -12,11 +12,7 @@ name = "cargo-zisk" path = "src/bin/cargo-zisk.rs" [build-dependencies] -vergen = { version = "8", default-features = false, features = [ - "build", - "git", - "git2", -] } +vergen-git2.workspace = true [dependencies] zisk-common = { workspace = true } @@ -26,24 +22,19 @@ rom-setup = { workspace = true } zisk-core = { workspace = true } zisk-pil = { workspace = true } asm-runner = { workspace = true } -server = { workspace = true } colored = { workspace = true } fields = { workspace = true } proofman = { workspace = true } proofman-common = { workspace = true } -proofman-verifier = { workspace = true } proofman-util = { workspace = true } sysinfo = { workspace = true } tracing = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } anyhow = { workspace = true } -libloading = { workspace = true } zisk-sdk = { workspace = true } -zstd = { workspace = true } -bytemuck = { workspace = true } clap = { workspace = true } dirs = "6" rand = "0.9" @@ -60,9 +51,11 @@ mpi = { workspace = true } [features] default = [] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] -gpu = ["proofman-common/gpu", "proofman/gpu", "packed"] -packed = ["proofman-common/packed"] +disable_distributed = ["zisk-sdk/disable_distributed"] +diagnostic = ["zisk-sdk/diagnostic"] +custom_rust_llvm = [] +gpu = ["zisk-sdk/gpu"] +packed = ["zisk-sdk/packed"] stats = [] verify = ["fields/verify"] diff --git a/cli/build.rs b/cli/build.rs index 0a689d13e..a9af6f870 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,5 +1,15 @@ fn main() { - vergen::EmitBuilder::builder().build_timestamp().git_sha(true).emit().unwrap(); + let mut builder = vergen_git2::Emitter::default(); + builder + .add_instructions( + &vergen_git2::BuildBuilder::default().build_timestamp(true).build().unwrap(), + ) + .unwrap(); + builder + .add_instructions(&vergen_git2::Git2Builder::default().sha(true).build().unwrap()) + .unwrap(); + builder.emit().unwrap(); + let disable_distributed = std::env::vars().any(|(k, _)| k == "CARGO_FEATURE_DISABLE_DISTRIBUTED"); let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); diff --git a/cli/src/bin/cargo-zisk.rs b/cli/src/bin/cargo-zisk.rs index a242970c4..3d4893d5a 100644 --- a/cli/src/bin/cargo-zisk.rs +++ b/cli/src/bin/cargo-zisk.rs @@ -1,7 +1,7 @@ use anyhow::{anyhow, Context, Result}; use cargo_zisk::commands::{ - ZiskBuild, ZiskCheckSetup, ZiskClean, ZiskExecute, ZiskProve, ZiskProveClient, ZiskRomSetup, - ZiskRun, ZiskSdk, ZiskServer, ZiskStats, ZiskVerify, ZiskVerifyConstraints, + ZiskBuild, ZiskCheckSetup, ZiskClean, ZiskConvertInput, ZiskExecute, ZiskProve, ZiskProveSnark, + ZiskRomSetup, ZiskRun, ZiskSdk, ZiskStats, ZiskVerify, ZiskVerifyConstraints, ZiskVerifySnark, }; use clap::Parser; use zisk_build::ZISK_VERSION_MESSAGE; @@ -17,17 +17,18 @@ use zisk_build::ZISK_VERSION_MESSAGE; )] pub enum Cargo { Build(ZiskBuild), + ConvertInput(ZiskConvertInput), CheckSetup(ZiskCheckSetup), Clean(ZiskClean), Execute(ZiskExecute), - ProveClient(ZiskProveClient), Prove(ZiskProve), + ProveSnark(ZiskProveSnark), RomSetup(ZiskRomSetup), Run(ZiskRun), Sdk(ZiskSdk), - Server(ZiskServer), Stats(ZiskStats), Verify(ZiskVerify), + VerifySnark(ZiskVerifySnark), VerifyConstraints(ZiskVerifyConstraints), } @@ -39,18 +40,21 @@ fn main() -> Result<()> { Cargo::Build(cmd) => { cmd.run().context("Error executing Build command")?; } + Cargo::ConvertInput(cmd) => { + cmd.run().context("Error executing ConvertInput command")?; + } Cargo::CheckSetup(cmd) => { cmd.run().context("Error executing CheckSetup command")?; } Cargo::Clean(cmd) => { cmd.run().context("Error executing Clean command")?; } - Cargo::ProveClient(cmd) => { - cmd.run().context("Error executing ProveClient command")?; - } Cargo::Prove(mut cmd) => { cmd.run().context("Error executing Prove command")?; } + Cargo::ProveSnark(cmd) => { + cmd.run().context("Error executing ProveSnark command")?; + } Cargo::RomSetup(cmd) => { cmd.run().context("Error executing RomSetup command")?; } @@ -58,7 +62,7 @@ fn main() -> Result<()> { cmd.run().context("Error executing Run command")?; } Cargo::Stats(mut cmd) => { - cmd.run().context("Error executing SDK command")?; + cmd.run().context("Error executing Stats command")?; } Cargo::Execute(mut cmd) => { cmd.run().context("Error executing Execute command")?; @@ -66,12 +70,12 @@ fn main() -> Result<()> { Cargo::Sdk(cmd) => { cmd.command.run().context("Error executing SDK command")?; } - Cargo::Server(mut cmd) => { - cmd.run().context("Error executing Server command")?; - } Cargo::Verify(cmd) => { cmd.run().map_err(|e| anyhow!("Error executing Verify command: {}", e))?; } + Cargo::VerifySnark(cmd) => { + cmd.run().context("Error executing VerifySnark command")?; + } Cargo::VerifyConstraints(mut cmd) => { cmd.run().context("Error executing VerifyConstraints command")?; } diff --git a/cli/src/commands/build.rs b/cli/src/commands/build.rs index 7a71c002c..ef0add8ec 100644 --- a/cli/src/commands/build.rs +++ b/cli/src/commands/build.rs @@ -17,13 +17,33 @@ pub struct ZiskBuild { #[clap(long)] no_default_features: bool, + + #[arg(short, long)] + name: Option, + + #[clap(short = 'z', long)] + zisk_path: Option, + + #[clap(long)] + hints: bool, } impl ZiskBuild { pub fn run(&self) -> Result<()> { // Construct the cargo run command + let toolchain_name = if let Some(name) = self.name.as_deref() { + println!("using toolchain_name: {name}"); + name + } else { + "zisk" + }; let mut command = Command::new("cargo"); - command.args(["+zisk", "build"]); + command.args([&format!("+{toolchain_name}"), "build"]); + + // Set RUSTFLAGS for target-cpu=zisk, preserving existing flags + let flags = std::env::var("RUSTFLAGS").unwrap_or_default(); + command.env("RUSTFLAGS", flags.trim()); + // Add the feature selection flags if let Some(features) = &self.features { command.arg("--features").arg(features); diff --git a/cli/src/commands/check_setup.rs b/cli/src/commands/check_setup.rs index 5e63cf9fc..12f27d134 100644 --- a/cli/src/commands/check_setup.rs +++ b/cli/src/commands/check_setup.rs @@ -1,5 +1,5 @@ // extern crate env_logger; -use crate::commands::get_proving_key; +use crate::commands::{get_proving_key, get_proving_key_snark}; use anyhow::Result; use clap::Parser; use colored::Colorize; @@ -7,8 +7,8 @@ use std::path::PathBuf; use fields::Goldilocks; -use proofman::ProofMan; -use proofman_common::initialize_logger; +use proofman::{check_setup_snark, ProofMan}; +use zisk_sdk::setup_logger; #[derive(Parser)] #[command(version, about, long_about = None)] @@ -18,11 +18,15 @@ pub struct ZiskCheckSetup { #[clap(short = 'k', long)] pub proving_key: Option, + /// Setup folder path + #[clap(short = 'w', long)] + pub proving_key_snark: Option, + #[clap(short = 'a', long, default_value_t = false)] pub aggregation: bool, - #[clap(short = 'f', long, default_value_t = false)] - pub final_snark: bool, + #[clap(short = 's', long, default_value_t = false)] + pub snark: bool, /// Verbosity (-v, -vv) #[arg(short, long, action = clap::ArgAction::Count, help = "Increase verbosity level")] @@ -34,16 +38,23 @@ impl ZiskCheckSetup { println!("{} CheckSetup", format!("{: >12}", "Command").bright_green().bold()); println!(); - initialize_logger(self.verbose.into(), None); + setup_logger(self.verbose.into()); ProofMan::::check_setup( get_proving_key(self.proving_key.as_ref()), self.aggregation, - self.final_snark, self.verbose.into(), ) .map_err(|e| anyhow::anyhow!("Error checking setup: {}", e))?; + if self.snark { + check_setup_snark::( + &get_proving_key_snark(self.proving_key_snark.as_ref()), + self.verbose.into(), + ) + .map_err(|e| anyhow::anyhow!("Error checking setup snark: {}", e))? + } + Ok(()) } } diff --git a/cli/src/commands/clean.rs b/cli/src/commands/clean.rs index 4595131eb..4530edaf3 100644 --- a/cli/src/commands/clean.rs +++ b/cli/src/commands/clean.rs @@ -4,9 +4,13 @@ use clap::Parser; use colored::Colorize; use anyhow::{Context, Result}; -use proofman_common::initialize_logger; +use proofman_common::VerboseMode; +use zisk_sdk::setup_logger; -use crate::{commands::get_home_zisk_path, ux::print_banner}; +use crate::{ + commands::get_home_zisk_path, + ux::{print_banner, print_banner_command}, +}; /// Deletes the default zisk setup folder #[derive(Parser, Debug)] @@ -15,14 +19,10 @@ pub struct ZiskClean; impl ZiskClean { pub fn run(&self) -> Result<()> { - initialize_logger(proofman_common::VerboseMode::Info, None); + setup_logger(VerboseMode::Info); print_banner(); - tracing::info!( - "{}", - format!("{} Clean", format!("{: >12}", "Command").bright_green().bold()) - ); - tracing::info!(""); + print_banner_command("Clean"); let home_zisk_path = get_home_zisk_path(); let cache_zisk_path = home_zisk_path.join("cache"); diff --git a/cli/src/commands/common.rs b/cli/src/commands/common.rs index 39bc75490..3bd207e86 100644 --- a/cli/src/commands/common.rs +++ b/cli/src/commands/common.rs @@ -6,29 +6,21 @@ pub fn get_home_dir() -> String { env::var("HOME").expect("get_home_dir() failed to get HOME environment variable") } -/// Gets the default witness computation library file location in the home installation directory. -pub fn get_default_witness_computation_lib() -> PathBuf { - let extension = if cfg!(target_os = "macos") { "dylib" } else { "so" }; - let witness_computation_lib = - format!("{}/.zisk/bin/libzisk_witness.{}", get_home_dir(), extension); - PathBuf::from(witness_computation_lib) -} - /// Gets the default proving key file location in the home installation directory. pub fn get_default_proving_key() -> PathBuf { let proving_key = format!("{}/.zisk/provingKey", get_home_dir()); PathBuf::from(proving_key) } -/// Gets the default zisk folder location in the home installation directory. -pub fn get_home_zisk_path() -> PathBuf { - let zisk_path = format!("{}/.zisk", get_home_dir()); - PathBuf::from(zisk_path) +/// Gets the default proving key file location in the home installation directory. +pub fn get_default_proving_key_snark() -> PathBuf { + let proving_key_snark = format!("{}/.zisk/provingKeySnark", get_home_dir()); + PathBuf::from(proving_key_snark) } /// Gets the default zisk folder location in the home installation directory. -pub fn get_default_zisk_path() -> PathBuf { - let zisk_path = format!("{}/.zisk/zisk", get_home_dir()); +pub fn get_home_zisk_path() -> PathBuf { + let zisk_path = format!("{}/.zisk", get_home_dir()); PathBuf::from(zisk_path) } @@ -64,29 +56,14 @@ pub fn cli_fail_if_macos() -> anyhow::Result<()> { } } -/// If the feature "gpu" is enabled, returns an error indicating that the command is not supported. -pub fn cli_fail_if_gpu_mode() -> anyhow::Result<()> { - if cfg!(feature = "gpu") { - Err(anyhow::anyhow!("Command is not supported on GPU mode")) - } else { - Ok(()) - } -} - -/// Gets the witness computation library file location. -/// Uses the default one if not specified by user. -pub fn get_witness_computation_lib(witness_lib: Option<&PathBuf>) -> PathBuf { - witness_lib.cloned().unwrap_or_else(get_default_witness_computation_lib) -} - /// Gets the proving key file location. /// Uses the default one if not specified by user. pub fn get_proving_key(proving_key: Option<&PathBuf>) -> PathBuf { proving_key.cloned().unwrap_or_else(get_default_proving_key) } -/// Gets the zisk folder. +/// Gets the proving key snark file location. /// Uses the default one if not specified by user. -pub fn get_zisk_path(zisk_path: Option<&PathBuf>) -> PathBuf { - zisk_path.cloned().unwrap_or_else(get_default_zisk_path) +pub fn get_proving_key_snark(proving_key_snark: Option<&PathBuf>) -> PathBuf { + proving_key_snark.cloned().unwrap_or_else(get_default_proving_key_snark) } diff --git a/cli/src/commands/convert_input.rs b/cli/src/commands/convert_input.rs new file mode 100644 index 000000000..9cdc398dd --- /dev/null +++ b/cli/src/commands/convert_input.rs @@ -0,0 +1,187 @@ +use anyhow::{bail, Result}; +use clap::Parser; +use std::fs; +use std::path::{Path, PathBuf}; + +use crate::ux::print_banner; +use crate::ux::print_banner_field; +use zisk_sdk::{setup_logger, ZiskStdin}; + +#[derive(Parser)] +#[command(version, about, long_about = None)] +#[command(propagate_version = true)] +pub struct ZiskConvertInput { + /// Input file to convert + #[clap(short = 'i', long)] + pub input_file: Option, + + /// Output file path + #[clap(short = 'o', long)] + pub output_file: Option, + + /// Input directory containing files to convert + #[clap(short = 'd', long)] + pub input_dir: Option, + + /// Output directory for converted files + #[clap(short = 't', long)] + pub output_dir: Option, + + /// Process subdirectories recursively + #[clap(short = 'r', long)] + pub recursive: bool, + + #[arg(short, long, action = clap::ArgAction::Count, help = "Increase verbosity level")] + pub verbose: u8, +} + +impl ZiskConvertInput { + pub fn run(&self) -> Result<()> { + setup_logger(self.verbose.into()); + + print_banner(); + print_banner_field("Command", "Convert Input"); + + // Validate arguments + let use_files = self.input_file.is_some() || self.output_file.is_some(); + let use_dirs = self.input_dir.is_some() || self.output_dir.is_some(); + + if use_files && use_dirs { + bail!("Cannot use both file and directory modes. Use either -i/-o or --input-dir/--output-dir"); + } + + if use_files { + // File mode - both input and output files must be provided + let input_file = self.input_file.as_ref().ok_or_else(|| { + anyhow::anyhow!("Input file (-i) is required when using file mode") + })?; + let output_file = self.output_file.as_ref().ok_or_else(|| { + anyhow::anyhow!("Output file (-o) is required when using file mode") + })?; + + print_banner_field("Input File", input_file.display()); + print_banner_field("Output File", output_file.display()); + + self.convert_file(input_file, output_file)?; + } else if use_dirs { + // Directory mode - both input and output dirs must be provided + let input_dir = self.input_dir.as_ref().ok_or_else(|| { + anyhow::anyhow!( + "Input directory (--input-dir) is required when using directory mode" + ) + })?; + let output_dir = self.output_dir.as_ref().ok_or_else(|| { + anyhow::anyhow!( + "Output directory (--output-dir) is required when using directory mode" + ) + })?; + + print_banner_field("Input Directory", input_dir.display()); + print_banner_field("Output Directory", output_dir.display()); + print_banner_field("Recursive", if self.recursive { "Yes" } else { "No" }); + + self.convert_directory(input_dir, output_dir)?; + } else { + bail!( + "Either file mode (-i/-o) or directory mode (--input-dir/--output-dir) is required" + ); + } + + println!("\n✓ Input conversion completed successfully!"); + + Ok(()) + } + + fn convert_file(&self, input_path: &PathBuf, output_path: &Path) -> Result<()> { + println!("Converting: {} -> {}", input_path.display(), output_path.display()); + + let input = std::fs::read(input_path)?; + let zisk_stdin = ZiskStdin::new(); + zisk_stdin.write_slice(&input); + zisk_stdin.save(output_path)?; + + Ok(()) + } + + fn convert_directory(&self, input_dir: &PathBuf, output_dir: &PathBuf) -> Result<()> { + if !input_dir.is_dir() { + bail!("Input directory does not exist or is not a directory: {}", input_dir.display()); + } + + fs::create_dir_all(output_dir)?; + + let mut files_converted = 0; + + if self.recursive { + self.convert_directory_recursive( + input_dir, + output_dir, + input_dir, + &mut files_converted, + )?; + } else { + self.convert_directory_flat(input_dir, output_dir, &mut files_converted)?; + } + + println!("\n✓ Converted {} file(s)", files_converted); + + Ok(()) + } + + fn convert_directory_flat( + &self, + input_dir: &PathBuf, + output_dir: &Path, + files_converted: &mut usize, + ) -> Result<()> { + for entry in fs::read_dir(input_dir)? { + let entry = entry?; + let path = entry.path(); + + if path.is_file() { + let file_name = path.file_name().ok_or_else(|| { + anyhow::anyhow!("Failed to get filename for: {}", path.display()) + })?; + let output_path = output_dir.join(file_name); + + self.convert_file(&path, &output_path)?; + *files_converted += 1; + } + } + + Ok(()) + } + + fn convert_directory_recursive( + &self, + current_dir: &PathBuf, + output_base: &PathBuf, + input_base: &PathBuf, + files_converted: &mut usize, + ) -> Result<()> { + for entry in fs::read_dir(current_dir)? { + let entry = entry?; + let path = entry.path(); + + if path.is_file() { + // Compute relative path from input base + let relative_path = path + .strip_prefix(input_base) + .map_err(|_| anyhow::anyhow!("Failed to compute relative path"))?; + let output_path = output_base.join(relative_path); + + // Create parent directory if needed + if let Some(parent) = output_path.parent() { + fs::create_dir_all(parent)?; + } + + self.convert_file(&path, &output_path)?; + *files_converted += 1; + } else if path.is_dir() { + self.convert_directory_recursive(&path, output_base, input_base, files_converted)?; + } + } + + Ok(()) + } +} diff --git a/cli/src/commands/execute.rs b/cli/src/commands/execute.rs index c95f9d19d..990d41268 100644 --- a/cli/src/commands/execute.rs +++ b/cli/src/commands/execute.rs @@ -1,12 +1,14 @@ use anyhow::Result; use clap::Parser; +use colored::Colorize; use std::path::PathBuf; -use tracing::info; +use tracing::{info, warn}; use zisk_build::ZISK_VERSION_MESSAGE; +use zisk_common::ElfBinaryFromFile; use zisk_sdk::{ProverClient, ZiskExecuteResult}; -use crate::{commands::cli_fail_if_gpu_mode, ux::print_banner}; -use zisk_common::io::ZiskStdin; +use crate::ux::{print_banner, print_banner_command, print_banner_field, print_execution_summary}; +use zisk_common::io::{StreamSource, ZiskStdin}; #[derive(Parser)] #[command(author, about, long_about = None, version = ZISK_VERSION_MESSAGE)] @@ -18,10 +20,6 @@ use zisk_common::io::ZiskStdin; .required(false) ))] pub struct ZiskExecute { - /// Witness computation dynamic library path - #[clap(short = 'w', long)] - pub witness_lib: Option, - /// ROM file path /// This is the path to the ROM file that the witness computation dynamic library will use /// to generate the witness. @@ -38,8 +36,16 @@ pub struct ZiskExecute { pub emulator: bool, /// Input path - #[clap(short = 'i', long)] - pub input: Option, + #[clap(short = 'i', long, alias = "input", conflicts_with = "hints")] + pub inputs: Option, + + /// Precompiles Hints path + #[clap(short = 'H', long, conflicts_with = "inputs")] + pub hints: Option, + + /// Force ROM setup + #[clap(short = 'n', long, default_value_t = false)] + pub no_auto_setup: bool, /// Setup folder path #[clap(short = 'k', long)] @@ -61,6 +67,11 @@ pub struct ZiskExecute { #[clap(short = 'u', long, conflicts_with = "emulator")] pub unlock_mapped_memory: bool, + /// Redirect ASM emulator output to file + /// This option is mutually exclusive with `--emulator` + #[clap(long, conflicts_with = "emulator", default_value_t = false)] + pub asm_out_file: bool, + /// Verbosity (-v, -vv) #[arg(short = 'v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] pub verbose: u8, // Using u8 to hold the number of `-v` @@ -71,65 +82,98 @@ pub struct ZiskExecute { impl ZiskExecute { pub fn run(&mut self) -> Result<()> { - cli_fail_if_gpu_mode()?; + // Check if the deprecated alias was used + if std::env::args().any(|arg| arg == "--input") { + eprintln!("{}", "Warning: --input is deprecated, use --inputs instead".yellow().bold()); + } print_banner(); - let stdin = self.create_stdin()?; + print_banner_command("Execute"); - let emulator = if cfg!(target_os = "macos") { true } else { self.emulator }; - let result = if emulator { self.run_emu(stdin)? } else { self.run_asm(stdin)? }; + print_banner_field("Elf", self.elf.display()); - info!( - "Execution completed in {:.2?}, executed steps: {}", - result.duration, result.execution.executed_steps - ); + let inputs_str = self.inputs.clone().unwrap_or_else(|| "None".dimmed().to_string()); + print_banner_field("Input", inputs_str); - Ok(()) - } + if let Some(hints) = &self.hints { + print_banner_field("Prec. Hints", hints); + } - fn create_stdin(&mut self) -> Result { - let stdin = if let Some(input) = &self.input { - if !input.exists() { - return Err(anyhow::anyhow!("Input file not found at {:?}", input.display())); + let stdin = ZiskStdin::from_uri(self.inputs.as_ref())?; + + let hints_stream = match self.hints.as_ref() { + Some(uri) => { + let stream = StreamSource::from_uri(uri)?; + if matches!(stream, StreamSource::Quic(_)) { + anyhow::bail!("QUIC hints source is not supported in CLI mode."); + } + Some(stream) } - ZiskStdin::from_file(input)? + None => None, + }; + + let emulator = if cfg!(target_os = "macos") { + if !self.emulator { + warn!("Emulator mode is forced on macOS due to lack of ASM support."); + } + true } else { - ZiskStdin::null() + self.emulator }; - Ok(stdin) + + let result = + if emulator { self.run_emu(stdin)? } else { self.run_asm(stdin, hints_stream)? }; + + info!("{}", "--- EXECUTE SUMMARY ------------------------".bright_green().bold()); + print_execution_summary( + &result.executor_summary.executor_time, + result.total_duration, + result.executor_summary.steps, + ); + + Ok(()) } pub fn run_emu(&mut self, stdin: ZiskStdin) -> Result { let prover = ProverClient::builder() .emu() .witness() - .witness_lib_path_opt(self.witness_lib.clone()) .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) .verbose(self.verbose) .shared_tables(self.shared_tables) .print_command_info() .build()?; - prover.execute(stdin) + let elf = ElfBinaryFromFile::new(&self.elf, false)?; + let (pk, _) = prover.setup(&elf)?; + prover.execute(&pk, stdin) } - pub fn run_asm(&mut self, stdin: ZiskStdin) -> Result { + pub fn run_asm( + &mut self, + stdin: ZiskStdin, + hints_stream: Option, + ) -> Result { let prover = ProverClient::builder() .asm() - .verify_constraints() - .witness_lib_path_opt(self.witness_lib.clone()) + .witness() .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) .verbose(self.verbose) .shared_tables(self.shared_tables) .asm_path_opt(self.asm.clone()) + .no_auto_setup(self.no_auto_setup) .base_port_opt(self.port) .unlock_mapped_memory(self.unlock_mapped_memory) + .asm_out_file(self.asm_out_file) .print_command_info() .build()?; - prover.execute(stdin) + let elf = ElfBinaryFromFile::new(&self.elf, hints_stream.is_some())?; + let (pk, _) = prover.setup(&elf)?; + if let Some(hints_stream) = hints_stream { + pk.register_hints_stream(hints_stream)?; + } + prover.execute(&pk, stdin) } } diff --git a/cli/src/commands/mod.rs b/cli/src/commands/mod.rs index b1d7fac1b..a06594528 100644 --- a/cli/src/commands/mod.rs +++ b/cli/src/commands/mod.rs @@ -2,28 +2,30 @@ mod build; mod check_setup; mod clean; mod common; +mod convert_input; mod execute; mod prove; -mod prove_client; +mod prove_snark; mod rom_setup; mod run; mod sdk; -mod server; mod stats; mod verify_constraints; +mod verify_snark; mod verify_stark; pub use build::*; pub use check_setup::*; pub use clean::*; pub use common::*; +pub use convert_input::*; pub use execute::*; pub use prove::*; -pub use prove_client::*; +pub use prove_snark::*; pub use rom_setup::*; pub use run::*; pub use sdk::*; -pub use server::*; pub use stats::*; pub use verify_constraints::*; +pub use verify_snark::*; pub use verify_stark::*; diff --git a/cli/src/commands/prove.rs b/cli/src/commands/prove.rs index 5b3833832..26ee0778d 100644 --- a/cli/src/commands/prove.rs +++ b/cli/src/commands/prove.rs @@ -1,14 +1,14 @@ -use crate::ux::print_banner; +use crate::ux::{print_banner, print_banner_command, print_banner_field, print_execution_summary}; use anyhow::Result; use colored::Colorize; use proofman_common::ParamsGPU; use std::path::PathBuf; +use tracing::{info, warn}; use zisk_build::ZISK_VERSION_MESSAGE; -use zisk_common::io::ZiskStdin; -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; -use zisk_sdk::{ProverClient, ZiskProveResult}; +use zisk_common::io::{StreamSource, ZiskStdin}; +use zisk_common::ElfBinaryFromFile; +use zisk_sdk::{ProofOpts, ProverClient, ZiskProof, ZiskProveResult}; // Structure representing the 'prove' subcommand of cargo. #[derive(clap::Args)] @@ -21,10 +21,6 @@ use zisk_sdk::{ProverClient, ZiskProveResult}; .required(false) ))] pub struct ZiskProve { - /// Witness computation dynamic library path - #[clap(short = 'w', long)] - pub witness_lib: Option, - /// ELF file path /// This is the path to the ROM file that the witness computation dynamic library will use /// to generate the witness. @@ -41,13 +37,21 @@ pub struct ZiskProve { pub emulator: bool, /// Input path - #[clap(short = 'i', long)] - pub input: Option, + #[clap(short = 'i', long, alias = "input", conflicts_with = "hints")] + pub inputs: Option, + + /// Precompiles Hints path + #[clap(short = 'H', long, conflicts_with = "inputs")] + pub hints: Option, /// Setup folder path #[clap(short = 'k', long)] pub proving_key: Option, + /// Setup folder path for SNARK + #[clap(short = 'w', long)] + pub proving_key_snark: Option, + /// Output dir path #[clap(short = 'o', long, default_value = "tmp")] pub output_dir: PathBuf, @@ -55,8 +59,8 @@ pub struct ZiskProve { #[clap(short = 'a', long, default_value_t = false)] pub aggregation: bool, - #[clap(short = 'f', long, default_value_t = false)] - pub final_snark: bool, + #[clap(short = 'c', long, default_value_t = false)] + pub compressed: bool, #[clap(short = 'y', long, default_value_t = false)] pub verify_proofs: bool, @@ -80,6 +84,11 @@ pub struct ZiskProve { #[clap(short = 'u', long, conflicts_with = "emulator")] pub unlock_mapped_memory: bool, + /// Redirect ASM emulator output to file + /// This option is mutually exclusive with `--emulator` + #[clap(long, conflicts_with = "emulator", default_value_t = false)] + pub asm_out_file: bool, + /// Verbosity (-v, -vv) #[arg(short ='v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] pub verbose: u8, // Using u8 to hold the number of `-v` @@ -87,7 +96,7 @@ pub struct ZiskProve { #[clap(short = 't', long)] pub max_streams: Option, - #[clap(short = 'n', long)] + #[clap(short = 'h', long)] pub number_threads_witness: Option, #[clap(short = 'x', long)] @@ -104,125 +113,201 @@ pub struct ZiskProve { #[clap(short = 'r', long, default_value_t = false)] pub rma: bool, + + #[clap(short = 'n', long, default_value_t = false)] + pub no_auto_setup: bool, + + #[clap(long, default_value_t = false)] + pub snark: bool, } impl ZiskProve { pub fn run(&mut self) -> Result<()> { + // Check if the deprecated alias was used + if std::env::args().any(|arg| arg == "--input") { + eprintln!("{}", "Warning: --input is deprecated, use --inputs instead".yellow().bold()); + } + print_banner(); - let mut gpu_params = ParamsGPU::new(self.preallocate); + print_banner_command("Prove"); + + print_banner_field("Elf", self.elf.display()); - if self.max_streams.is_some() { - gpu_params.with_max_number_streams(self.max_streams.unwrap()); + let mut gpu_params = None; + if self.preallocate + || self.max_streams.is_some() + || self.number_threads_witness.is_some() + || self.max_witness_stored.is_some() + { + let mut gpu_params_new = ParamsGPU::new(self.preallocate); + if let Some(max_witness_stored) = self.max_witness_stored { + gpu_params_new.with_max_witness_stored(max_witness_stored); + } + gpu_params = Some(gpu_params_new); } - if self.number_threads_witness.is_some() { - gpu_params.with_number_threads_pools_witness(self.number_threads_witness.unwrap()); + + let inputs_str = self.inputs.clone().unwrap_or_else(|| "None".dimmed().to_string()); + print_banner_field("Input", inputs_str); + + if let Some(hints) = &self.hints { + print_banner_field("Prec. Hints", hints); } - if self.max_witness_stored.is_some() { - gpu_params.with_max_witness_stored(self.max_witness_stored.unwrap()); + + if self.snark && self.compressed { + anyhow::bail!("Compressed proofs are not supported for SNARK generation."); } - let stdin = self.create_stdin()?; + let stdin = ZiskStdin::from_uri(self.inputs.as_ref())?; - let emulator = if cfg!(target_os = "macos") { true } else { self.emulator }; + let hints_stream = match self.hints.as_ref() { + Some(uri) => { + let stream = StreamSource::from_uri(uri)?; + if matches!(stream, StreamSource::Quic(_)) { + anyhow::bail!("QUIC hints source is not supported in CLI mode."); + } + Some(stream) + } + None => None, + }; + + let emulator = if cfg!(target_os = "macos") { + if !self.emulator { + warn!("Emulator mode is forced on macOS due to lack of ASM support."); + } + true + } else { + self.emulator + }; let (result, world_rank) = if emulator { self.run_emu(stdin, gpu_params)? } else { - self.run_asm(stdin, gpu_params)? + self.run_asm(stdin, hints_stream, gpu_params)? }; if world_rank == 0 { - let elapsed = result.duration.as_secs_f64(); - tracing::info!(""); - tracing::info!( - "{}", - "--- PROVE SUMMARY ------------------------".bright_green().bold() - ); - if let Some(proof_id) = result.proof.id { - tracing::info!(" Proof ID: {}", proof_id); + info!("{}", "--- PROVE SUMMARY ------------------------".bright_green().bold()); + + if let Some(proof_id) = &result.get_proof_id() { + let output_dir = match result.get_proof() { + ZiskProof::VadcopFinal(_) | ZiskProof::VadcopFinalCompressed(_) => { + self.output_dir.join("vadcop_final_proof.bin") + } + ZiskProof::Plonk(_) | ZiskProof::Fflonk(_) => { + self.output_dir.join("final_snark_proof.bin") + } + _ => { + return Err(anyhow::anyhow!("Unsupported proof type for saving proof file")) + } + }; + result.save_proof_with_publics(output_dir)?; + info!("Proof ID: {}", proof_id); + info!("Proof Time: {:.3} seconds", result.duration.as_secs_f64()); } - tracing::info!(" ► Statistics"); - tracing::info!( - " time: {} seconds, steps: {}", - elapsed, - result.execution.executed_steps + print_execution_summary( + &result.executor_summary.executor_time, + result.duration, + result.executor_summary.steps, ); } Ok(()) } - fn create_stdin(&mut self) -> Result { - let stdin = if let Some(input) = &self.input { - if !input.exists() { - return Err(anyhow::anyhow!("Input file not found at {:?}", input.display())); - } - ZiskStdin::from_file(input)? - } else { - ZiskStdin::null() - }; - Ok(stdin) - } - pub fn run_emu( &mut self, stdin: ZiskStdin, - gpu_params: ParamsGPU, + gpu_params: Option, ) -> Result<(ZiskProveResult, i32)> { let prover = ProverClient::builder() - .emu() - .prove() .aggregation(self.aggregation) - .rma(self.rma) - .witness_lib_path_opt(self.witness_lib.clone()) .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) + .proving_key_snark_path_opt(self.proving_key_snark.clone()) .verbose(self.verbose) .shared_tables(self.shared_tables) - .save_proofs(self.save_proofs) - .output_dir(self.output_dir.clone()) - .verify_proofs(self.verify_proofs) - .minimal_memory(self.minimal_memory) + .with_snark(self.snark) .gpu(gpu_params) .print_command_info() .build()?; - let result = prover.prove(stdin)?; + let elf = ElfBinaryFromFile::new(&self.elf, false)?; + let (pk, _) = prover.setup(&elf)?; + + let proof_options = ProofOpts { + aggregation: self.aggregation, + rma: self.rma, + minimal_memory: self.minimal_memory, + verify_proofs: self.verify_proofs, + save_proofs: self.save_proofs, + output_dir_path: Some(self.output_dir.clone()), + }; + let world_rank = prover.world_rank(); + let mut prover = prover.prove(&pk, stdin).with_proof_options(proof_options); + if self.snark { + prover = prover.plonk(); + } + if self.compressed { + prover = prover.compressed(); + } + let result = prover.run()?; + Ok((result, world_rank)) } pub fn run_asm( &mut self, stdin: ZiskStdin, - gpu_params: ParamsGPU, + hints_stream: Option, + gpu_params: Option, ) -> Result<(ZiskProveResult, i32)> { let prover = ProverClient::builder() - .asm() - .prove() .aggregation(self.aggregation) - .rma(self.rma) - .witness_lib_path_opt(self.witness_lib.clone()) + .asm() .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) + .proving_key_snark_path_opt(self.proving_key_snark.clone()) .verbose(self.verbose) + .with_snark(self.snark) .shared_tables(self.shared_tables) .asm_path_opt(self.asm.clone()) .base_port_opt(self.port) + .no_auto_setup(self.no_auto_setup) .unlock_mapped_memory(self.unlock_mapped_memory) - .save_proofs(self.save_proofs) - .output_dir(self.output_dir.clone()) - .verify_proofs(self.verify_proofs) - .minimal_memory(self.minimal_memory) + .asm_out_file(self.asm_out_file) .gpu(gpu_params) .print_command_info() .build()?; - let result = prover.prove(stdin)?; + let elf = ElfBinaryFromFile::new(&self.elf, hints_stream.is_some())?; + let (pk, _) = prover.setup(&elf)?; + + let proof_options = ProofOpts { + aggregation: self.aggregation, + rma: self.rma, + minimal_memory: self.minimal_memory, + verify_proofs: self.verify_proofs, + save_proofs: self.save_proofs, + output_dir_path: Some(self.output_dir.clone()), + }; + + if let Some(hints_stream) = hints_stream { + pk.register_hints_stream(hints_stream)?; + } + let world_rank = prover.world_rank(); + let mut prover = prover.prove(&pk, stdin).with_proof_options(proof_options); + if self.snark { + prover = prover.plonk(); + } + if self.compressed { + prover = prover.compressed(); + } + + let result = prover.run()?; + Ok((result, world_rank)) } } diff --git a/cli/src/commands/prove_client.rs b/cli/src/commands/prove_client.rs deleted file mode 100644 index a6cb478f2..000000000 --- a/cli/src/commands/prove_client.rs +++ /dev/null @@ -1,179 +0,0 @@ -use anyhow::Result; -use clap::{Parser, Subcommand}; -use server::{ - ZiskProveRequest, ZiskRequest, ZiskResponse, ZiskShutdownRequest, ZiskStatusRequest, - ZiskVerifyConstraintsRequest, -}; -use std::{ - io::{BufRead, BufReader, Write}, - net::TcpStream, - path::PathBuf, -}; - -use crate::commands::DEFAULT_PORT; - -use colored::Colorize; - -#[derive(Parser)] -#[command(name = "Zisk Prover Client", version, about = "Send commands to the prover server")] -pub struct ZiskProveClient { - #[command(subcommand)] - pub command: ClientCommand, -} - -#[derive(Subcommand, Debug)] -#[command(rename_all = "snake_case")] -pub enum ClientCommand { - /// Get server status - Status { - /// Port of the server (by default DEFAULT_PORT) - #[clap(long)] - port: Option, - }, - - /// Shut down the server - Shutdown { - /// Port of the server (by default DEFAULT_PORT) - #[clap(long)] - port: Option, - }, - - Prove { - /// Path to the input file - #[arg(short, long)] - input: PathBuf, - - /// Use aggregation - #[clap(short = 'a', long, default_value_t = false)] - aggregation: bool, - - #[clap(short = 'r', long, default_value_t = false)] - rma: bool, - - /// Use final snark - #[clap(short = 'f', long, default_value_t = false)] - final_snark: bool, - - /// Verify proofs - #[clap(short = 'y', long, default_value_t = false)] - verify_proofs: bool, - - /// Output folder for the proof - #[clap(short = 'o', long, default_value = "tmp")] - output_dir: PathBuf, - - #[clap(short = 'p')] - prefix: String, - - /// Use minimal memory - #[clap(long, default_value_t = false)] - minimal_memory: bool, - - /// Port of the server (by default DEFAULT_PORT) - #[clap(long)] - port: Option, - - /// Verbosity (-v, -vv) - #[arg(short ='v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] - verbose: u8, // Using u8 to hold the number of `-v` - }, - /// Verify constraints from input file - VerifyConstraints { - /// Path to the input file - #[arg(short, long)] - input: PathBuf, - - /// Port of the server (by default DEFAULT_PORT) - #[clap(long)] - port: Option, - - /// Verbosity (-v, -vv) - #[arg(short ='v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] - verbose: u8, // Using u8 to hold the number of `-v` - }, -} - -impl ZiskProveClient { - pub fn run(&self) -> Result<()> { - let request = match &self.command { - ClientCommand::Status { port: _ } => { - ZiskRequest::Status { payload: ZiskStatusRequest {} } - } - ClientCommand::Shutdown { port: _ } => { - ZiskRequest::Shutdown { payload: ZiskShutdownRequest {} } - } - ClientCommand::Prove { - input, - aggregation, - rma, - final_snark, - verify_proofs, - minimal_memory, - output_dir, - prefix, - verbose: _, - port: _, - } => ZiskRequest::Prove { - payload: ZiskProveRequest { - input: input.clone(), - aggregation: *aggregation, - rma: *rma, - final_snark: *final_snark, - verify_proofs: *verify_proofs, - minimal_memory: *minimal_memory, - folder: output_dir.clone(), - prefix: prefix.clone(), - }, - }, - ClientCommand::VerifyConstraints { input, verbose: _, port: _ } => { - ZiskRequest::VerifyConstraints { - payload: ZiskVerifyConstraintsRequest { input: input.clone() }, - } - } - }; - - // Determine the port to use for this client instance. - // - If no port is specified, default to DEFAULT_PORT. - // - If a port is specified, use it as the base port. - // In both cases, the local MPI rank is added to the port to avoid conflicts - // when running multiple processes on the same machine. - let port = match self.command { - ClientCommand::Prove { port, .. } - | ClientCommand::VerifyConstraints { port, .. } - | ClientCommand::Status { port } - | ClientCommand::Shutdown { port } => port.unwrap_or(DEFAULT_PORT), - }; - - // TODO: FIX! - // port += mpi_context.node_rank as u16; - - let address = format!("localhost:{port}"); - - // Open connection - let mut stream = TcpStream::connect(&address) - .map_err(|e| anyhow::anyhow!("Failed to connect to server: {}", e))?; - - // Serialize and send request - let mut request_json = serde_json::to_string(&request)?; - request_json.push('\n'); - stream.write_all(request_json.as_bytes())?; - - // Read and parse response - let mut reader = BufReader::new(stream); - let mut response_line = String::new(); - reader.read_line(&mut response_line)?; - - if let Err(e) = serde_json::from_str::(&response_line) { - return Err(anyhow::anyhow!( - "Failed to parse server response: {}\nRaw: {}", - e, - response_line - )); - } - - println!(); - println!("{} {}", format!("{: >12}", "Response").bright_green().bold(), response_line); - - Ok(()) - } -} diff --git a/cli/src/commands/prove_snark.rs b/cli/src/commands/prove_snark.rs new file mode 100644 index 000000000..ee66c462a --- /dev/null +++ b/cli/src/commands/prove_snark.rs @@ -0,0 +1,76 @@ +// extern crate env_logger; +use anyhow::Result; +use clap::Parser; +use colored::Colorize; +use fields::Goldilocks; +use std::path::PathBuf; + +use crate::ux::{print_banner, print_banner_command, print_banner_field}; +use proofman::SnarkWrapper; +use zisk_sdk::ZiskProofWithPublicValues; + +#[derive(Parser)] +#[command(version, about, long_about = None)] +#[command(propagate_version = true)] +pub struct ZiskProveSnark { + #[clap(short = 'p', long)] + pub proof: String, + + /// ELF file path + /// This is the path to the ROM file that the witness computation dynamic library will use + /// to generate the witness. + #[clap(short = 'e', long)] + pub elf: PathBuf, + + /// Setup folder path + #[clap(short = 'k', long)] + pub proving_key_snark: PathBuf, + + /// Output dir path + #[clap(short = 'o', long, default_value = "tmp")] + pub output_dir: PathBuf, + + /// Verbosity (-v, -vv) + #[arg(short, long, action = clap::ArgAction::Count, help = "Increase verbosity level")] + pub verbose: u8, // Using u8 to hold the number of `-v` +} + +impl ZiskProveSnark { + pub fn run(&self) -> Result<()> { + print_banner(); + + print_banner_command("Prove SNARK"); + + print_banner_field("Elf", self.elf.display()); + + let zisk_proof = ZiskProofWithPublicValues::load(&self.proof).map_err(|e| { + anyhow::anyhow!( + "Failed to load ZiskProofWithPublicValues from file {}: {}", + self.proof, + e + ) + })?; + + let snark_wrapper: SnarkWrapper = + SnarkWrapper::new(&self.proving_key_snark, self.verbose.into())?; + + let proof = zisk_proof.get_vadcop_final_proof()?; + + let snark_proof = + snark_wrapper.generate_final_snark_proof(&proof, Some(self.output_dir.clone()))?; + snark_proof.save(self.output_dir.join("final_snark_proof.bin")).map_err(|e| { + anyhow::anyhow!( + "Failed to save final SNARK proof to output dir {}: {}", + self.output_dir.join("final_snark_proof.bin").display(), + e + ) + })?; + println!( + "{} Final SNARK proof generated. Proof: {:?}, Publics: {:?}", + "Info:".bright_blue().bold(), + snark_proof.proof_bytes, + snark_proof.public_bytes + ); + Ok(()) + } +} diff --git a/cli/src/commands/rom_setup.rs b/cli/src/commands/rom_setup.rs index 66caf6f05..7a35631d5 100644 --- a/cli/src/commands/rom_setup.rs +++ b/cli/src/commands/rom_setup.rs @@ -2,13 +2,16 @@ use anyhow::Result; use clap::Parser; use std::path::PathBuf; +use crate::ux::print_banner_field; +use crate::{commands::get_proving_key, ux::print_banner}; use colored::Colorize; -use proofman_common::initialize_logger; - -use crate::{ - commands::{get_proving_key, get_zisk_path}, - ux::print_banner, -}; +use fields::Goldilocks; +use proofman_common::{MpiCtx, ParamsGPU, ProofCtx, ProofType, SetupCtx, SetupsVadcop}; +use rom_setup::gen_assembly; +use rom_setup::rom_merkle_setup; +use std::sync::Arc; +use zisk_common::ElfBinaryFromFile; +use zisk_sdk::setup_logger; #[derive(Parser)] #[command(version, about, long_about = None)] @@ -22,39 +25,64 @@ pub struct ZiskRomSetup { #[clap(short = 'k', long)] pub proving_key: Option, - /// Setup folder path - #[clap(short = 'z', long)] - pub zisk_path: Option, - /// Output dir path #[clap(short = 'o', long)] pub output_dir: Option, - #[clap(short = 'v', long, default_value_t = false)] - pub verbose: bool, + /// Enable precompile hints in assembly generation + #[clap(short = 'n', long, default_value_t = false)] + pub hints: bool, + + #[arg(short, long, action = clap::ArgAction::Count, help = "Increase verbosity level")] + pub verbose: u8, } impl ZiskRomSetup { pub fn run(&self) -> Result<()> { - initialize_logger(proofman_common::VerboseMode::Info, None); - - tracing::info!( - "{}", - format!("{} Rom Setup", format!("{: >12}", "Command").bright_green().bold()) - ); - tracing::info!(""); + setup_logger(self.verbose.into()); print_banner(); + print_banner_field("Command", "Rom Setup"); + print_banner_field("Elf", self.elf.display()); + if self.hints { + print_banner_field("Hints", "Enabled".yellow()); + } + let proving_key = get_proving_key(self.proving_key.as_ref()); - let zisk_path = get_zisk_path(self.zisk_path.as_ref()); - - rom_setup::rom_full_setup( - &self.elf, - &proving_key, - &zisk_path, - &self.output_dir, - self.verbose, - ) + + print_banner_field("Proving Key", proving_key.display()); + + println!(); + + let mpi_ctx = Arc::new(MpiCtx::new()); + let mut pctx = ProofCtx::create_ctx(proving_key, false, self.verbose.into(), mpi_ctx)?; + + let mut params_gpu = ParamsGPU::new(false); + params_gpu.with_max_number_streams(1); + + let sctx = Arc::new(SetupCtx::::new( + &pctx.global_info, + &ProofType::Basic, + false, + ¶ms_gpu, + &[], + )); + let setups_vadcop = + Arc::new(SetupsVadcop::new(&pctx.global_info, false, false, ¶ms_gpu, &[])); + pctx.set_device_buffers(&sctx, &setups_vadcop, false, ¶ms_gpu)?; + let pctx = Arc::new(pctx); + + tracing::info!("Computing setup for ROM {}", self.elf.display()); + + tracing::info!("Computing merkle root"); + let elf = ElfBinaryFromFile::new(&self.elf, self.hints)?; + rom_merkle_setup::(&pctx, &elf, &self.output_dir)?; + + gen_assembly(&self.elf, &self.output_dir, self.hints, self.verbose > 0)?; + + println!(); + tracing::info!("{}", "ROM setup successfully completed".bright_green().bold()); + Ok(()) } } diff --git a/cli/src/commands/run.rs b/cli/src/commands/run.rs index 8ad7f9709..f6595db0c 100644 --- a/cli/src/commands/run.rs +++ b/cli/src/commands/run.rs @@ -38,6 +38,9 @@ pub struct ZiskRun { #[clap(long, short = 'm')] metrics: bool, + #[clap(short = 'f', long)] + riscof: bool, + #[clap(last = true)] args: Vec, } @@ -72,12 +75,15 @@ impl ZiskRun { if self.metrics { extra_command += " -m "; } - if self.input.is_some() { - let path = Path::new(self.input.as_ref().unwrap()); + if let Some(input) = &self.input { + let path = Path::new(input); if !path.exists() { return Err(anyhow!("Input file does not exist at path: {}", path.display())); } - input_command = format!("-i {}", self.input.as_ref().unwrap()); + input_command = format!("-i {}", input); + } + if self.riscof { + extra_command += " -f "; } runner_command = format!("ziskemu {input_command} {extra_command} -e"); } else { @@ -118,8 +124,8 @@ impl ZiskRun { qemu-system-riscv64 \ -cpu rv64 \ -machine virt \ - -device loader,file=./{},addr=0x90000000 \ - -device loader,file=./{},addr=0x90000008 \ + -device loader,file=./{},addr=0x40000000 \ + -device loader,file=./{},addr=0x40000008 \ -m 1G \ -s \ {} \ diff --git a/cli/src/commands/server.rs b/cli/src/commands/server.rs deleted file mode 100644 index e68d18e92..000000000 --- a/cli/src/commands/server.rs +++ /dev/null @@ -1,280 +0,0 @@ -use anyhow::Result; -use clap::Parser; -use colored::Colorize; -use proofman_common::{json_to_debug_instances_map, DebugInfo, ParamsGPU}; -use rom_setup::{ - gen_elf_hash, get_elf_bin_file_path, get_elf_data_hash, get_rom_blowup_factor_and_arity, - DEFAULT_CACHE_PATH, -}; -use server::ZiskServerParams; -use server::ZiskService; -use std::collections::HashMap; -use std::fs; -use std::{path::PathBuf, process}; -use zisk_common::init_tracing; - -use crate::commands::{get_proving_key, get_witness_computation_lib}; -use crate::ux::print_banner; -use zisk_build::ZISK_VERSION_MESSAGE; - -pub const DEFAULT_PORT: u16 = 7878; -const LOG_PATH: &str = "zisk_prover_server.log"; - -// Structure representing the 'prove' subcommand of cargo. -#[derive(Parser, Debug)] -#[command(name = "Prover Server", version, about = "A TCP-based prover control server", long_about = None, version = ZISK_VERSION_MESSAGE)] -#[command(propagate_version = true)] -#[command(group( - clap::ArgGroup::new("input_mode") - .args(["asm", "emulator"]) - .multiple(false) - .required(false) -))] -pub struct ZiskServer { - /// Optional port number (default 7878) - #[arg(short, long, default_value_t = DEFAULT_PORT)] - port: u16, - - /// Witness computation dynamic library path - #[clap(short = 'w', long)] - pub witness_lib: Option, - - /// ELF file path - /// This is the path to the ROM file that the witness computation dynamic library will use - /// to generate the witness. - #[clap(short = 'e', long)] - pub elf: PathBuf, - - /// ASM file path - /// Optional, mutually exclusive with `--emulator` - #[clap(short = 's', long)] - pub asm: Option, - - /// Use prebuilt emulator (mutually exclusive with `--asm`) - #[clap(short = 'l', long, action = clap::ArgAction::SetTrue)] - pub emulator: bool, - - /// Setup folder path - #[clap(short = 'k', long)] - pub proving_key: Option, - - /// Base port for Assembly microservices (default: 23115). - /// A single execution will use 3 consecutive ports, from this port to port + 2. - /// If you are running multiple instances of ZisK using mpi on the same machine, - /// it will use from this base port to base port + 2 * number_of_instances. - /// For example, if you run 2 mpi instances of ZisK, it will use ports from 23115 to 23117 - /// for the first instance, and from 23118 to 23120 for the second instance. - #[clap(long, conflicts_with = "emulator")] - pub asm_port: Option, - - /// Map unlocked flag - /// This is used to unlock the memory map for the ROM file. - /// If you are running ZisK on a machine with limited memory, you may want to enable this option. - /// This option is mutually exclusive with `--emulator`. - #[clap(short = 'u', long, conflicts_with = "emulator")] - pub unlock_mapped_memory: bool, - - /// Verbosity (-v, -vv) - #[arg(short ='v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] - pub verbose: u8, // Using u8 to hold the number of `-v` - - #[clap(short = 'd', long)] - pub debug: Option>, - - #[clap(short = 'c', long, default_value_t = false)] - pub verify_constraints: bool, - - #[clap(short = 'a', long, default_value_t = false)] - pub aggregation: bool, - - #[clap(short = 'f', long, default_value_t = false)] - pub final_snark: bool, - - #[clap(short = 'r', long, default_value_t = false)] - pub rma: bool, - - /// GPU PARAMS - #[clap(short = 'z', long, default_value_t = false)] - pub preallocate: bool, - - #[clap(short = 't', long)] - pub max_streams: Option, - - #[clap(short = 'n', long)] - pub number_threads_witness: Option, - - #[clap(short = 'x', long)] - pub max_witness_stored: Option, - - #[clap(short = 'j', long, default_value_t = false)] - pub shared_tables: bool, -} - -impl ZiskServer { - pub fn run(&mut self) -> Result<()> { - init_tracing(LOG_PATH); - - print_banner(); - - if !self.elf.exists() { - eprintln!("Error: ELF file '{}' not found.", self.elf.display()); - process::exit(1); - } - - let proving_key = get_proving_key(self.proving_key.as_ref()); - - let debug_info = match &self.debug { - None => DebugInfo::default(), - Some(None) => DebugInfo::new_debug(), - Some(Some(debug_value)) => { - json_to_debug_instances_map(proving_key.clone(), debug_value.clone())? - } - }; - - let default_cache_path = - std::env::var("HOME").ok().map(PathBuf::from).unwrap().join(DEFAULT_CACHE_PATH); - - if !default_cache_path.exists() { - if let Err(e) = fs::create_dir_all(default_cache_path.clone()) { - if e.kind() != std::io::ErrorKind::AlreadyExists { - // prevent collision in distributed mode - panic!("Failed to create the cache directory: {e:?}"); - } - } - } - - let emulator = if cfg!(target_os = "macos") { true } else { self.emulator }; - - let mut asm_rom = None; - if emulator { - self.asm = None; - } else if self.asm.is_none() { - let stem = self.elf.file_stem().unwrap().to_str().unwrap(); - let hash = get_elf_data_hash(&self.elf) - .map_err(|e| anyhow::anyhow!("Error computing ELF hash: {}", e))?; - let new_filename = format!("{stem}-{hash}-mt.bin"); - let asm_rom_filename = format!("{stem}-{hash}-rh.bin"); - asm_rom = Some(default_cache_path.join(asm_rom_filename)); - self.asm = Some(default_cache_path.join(new_filename)); - } - - if let Some(asm_path) = &self.asm { - if !asm_path.exists() { - return Err(anyhow::anyhow!("ASM file not found at {:?}", asm_path.display())); - } - } - - if let Some(asm_rom) = &asm_rom { - if !asm_rom.exists() { - return Err(anyhow::anyhow!("ASM file not found at {:?}", asm_rom.display())); - } - } - - let (blowup_factor, merkle_tree_arity) = get_rom_blowup_factor_and_arity(&proving_key); - - let rom_bin_path = get_elf_bin_file_path( - &self.elf.to_path_buf(), - &default_cache_path, - blowup_factor, - merkle_tree_arity, - )?; - - if !rom_bin_path.exists() { - let _ = gen_elf_hash( - &self.elf.clone(), - rom_bin_path.as_path(), - blowup_factor, - merkle_tree_arity, - false, - ) - .map_err(|e| anyhow::anyhow!("Error generating elf hash: {}", e)); - } - - self.print_command_info(); - let mut custom_commits_map: HashMap = HashMap::new(); - custom_commits_map.insert("rom".to_string(), rom_bin_path); - - let mut gpu_params = ParamsGPU::new(self.preallocate); - - if self.max_streams.is_some() { - gpu_params.with_max_number_streams(self.max_streams.unwrap()); - } - if self.number_threads_witness.is_some() { - gpu_params.with_number_threads_pools_witness(self.number_threads_witness.unwrap()); - } - if self.max_witness_stored.is_some() { - gpu_params.with_max_witness_stored(self.max_witness_stored.unwrap()); - } - - let server_params = ZiskServerParams::new( - self.port, - self.elf.clone(), - get_witness_computation_lib(self.witness_lib.as_ref()), - self.asm.clone(), - asm_rom, - self.asm_port, - custom_commits_map, - emulator, - proving_key, - self.verbose, - debug_info, - self.verify_constraints, - self.aggregation, - self.final_snark, - gpu_params, - self.unlock_mapped_memory, - self.shared_tables, - ); - - if let Err(e) = ZiskService::new(&server_params)?.run() { - eprintln!("Error starting server: {e}"); - process::exit(1); - } - - Ok(()) - } - - fn print_command_info(&self) { - println!("{} Prove Server", format!("{: >12}", "Command").bright_green().bold()); - println!( - "{} TCP server listening on 127.0.0.1:{}", - format!("{: >12}", "Socket").bright_green().bold(), - self.port - ); - println!("{} {}", format!("{: >12}", "Logfile").bright_green().bold(), LOG_PATH); - println!( - "{: >12} {}", - "Witness Lib".bright_green().bold(), - get_witness_computation_lib(self.witness_lib.as_ref()).display() - ); - - println!("{: >12} {}", "Elf".bright_green().bold(), self.elf.display()); - - if self.asm.is_some() { - let asm_path = self.asm.as_ref().unwrap().display(); - println!("{: >12} {}", "ASM runner".bright_green().bold(), asm_path); - } else { - println!( - "{: >12} {}", - "Emulator".bright_green().bold(), - "Running in emulator mode".bright_yellow() - ); - } - - println!( - "{: >12} {}", - "Proving key".bright_green().bold(), - get_proving_key(self.proving_key.as_ref()).display() - ); - - let std_mode = match &self.debug { - None => "Standard mode", - Some(None) => "Debug mode (fast)", - Some(Some(json_file)) => &format!("Debug mode (from config file: {})", json_file), - }; - println!("{: >12} {}", "STD".bright_green().bold(), std_mode); - // println!("{}", format!("{: >12} {}", "Distributed".bright_green().bold(), "ON (nodes: 4, threads: 32)")); - - println!(); - } -} diff --git a/cli/src/commands/stats.rs b/cli/src/commands/stats.rs index d18f25e07..756e028b1 100644 --- a/cli/src/commands/stats.rs +++ b/cli/src/commands/stats.rs @@ -1,14 +1,17 @@ use anyhow::Result; use clap::Parser; +use colored::Colorize; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, fs, path::PathBuf, time::Instant}; +use tracing::warn; use zisk_build::ZISK_VERSION_MESSAGE; -use zisk_common::io::ZiskStdin; -use zisk_common::{ExecutorStats, Stats}; +use zisk_common::io::{StreamSource, ZiskStdin}; +use zisk_common::ElfBinaryFromFile; +use zisk_common::{ExecutorStatsHandle, Stats}; use zisk_pil::*; use zisk_sdk::ProverClient; -use crate::ux::print_banner; +use crate::ux::{print_banner, print_banner_command, print_banner_field}; #[derive(Parser)] #[command(author, about, long_about = None, version = ZISK_VERSION_MESSAGE)] @@ -20,10 +23,6 @@ use crate::ux::print_banner; .required(false) ))] pub struct ZiskStats { - /// Witness computation dynamic library path - #[clap(short = 'w', long)] - pub witness_lib: Option, - /// ROM file path /// This is the path to the ROM file that the witness computation dynamic library will use /// to generate the witness. @@ -40,8 +39,12 @@ pub struct ZiskStats { pub emulator: bool, /// Input path - #[clap(short = 'i', long)] - pub input: Option, + #[clap(short = 'i', long, alias = "input", conflicts_with = "hints")] + pub inputs: Option, + + /// Precompiles Hints path + #[clap(short = 'H', long, conflicts_with = "inputs")] + pub hints: Option, /// Setup folder path #[clap(short = 'k', long)] @@ -63,11 +66,16 @@ pub struct ZiskStats { #[clap(short = 'u', long, conflicts_with = "emulator")] pub unlock_mapped_memory: bool, + /// Redirect ASM emulator output to file + /// This option is mutually exclusive with `--emulator` + #[clap(long, conflicts_with = "emulator", default_value_t = false)] + pub asm_out_file: bool, + /// Verbosity (-v, -vv) #[arg(short = 'v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] pub verbose: u8, // Using u8 to hold the number of `-v` - #[clap(short = 'n', long)] + #[clap(short = 'h', long)] pub number_threads_witness: Option, #[clap(short = 'x', long)] @@ -85,17 +93,55 @@ pub struct ZiskStats { #[clap(short = 'j', long, default_value_t = false)] pub shared_tables: bool, + + #[clap(short = 'n', long, default_value_t = false)] + pub no_auto_setup: bool, } impl ZiskStats { pub fn run(&mut self) -> Result<()> { + // Check if the deprecated alias was used + if std::env::args().any(|arg| arg == "--input") { + eprintln!("{}", "Warning: --input is deprecated, use --inputs instead".yellow().bold()); + } + print_banner(); - let stdin = self.create_stdin()?; + print_banner_command("Stats"); + + print_banner_field("Elf", self.elf.display()); + + let inputs_str = self.inputs.clone().unwrap_or_else(|| "None".dimmed().to_string()); + print_banner_field("Input", inputs_str); + + if let Some(hints) = &self.hints { + print_banner_field("Prec. Hints", hints); + } + + let stdin = ZiskStdin::from_uri(self.inputs.as_ref())?; + + let hints_stream = match self.hints.as_ref() { + Some(uri) => { + let stream = StreamSource::from_uri(uri)?; + if matches!(stream, StreamSource::Quic(_)) { + anyhow::bail!("QUIC hints source is not supported in CLI mode."); + } + Some(stream) + } + None => None, + }; + + let emulator = if cfg!(target_os = "macos") { + if !self.emulator { + warn!("Emulator mode is forced on macOS due to lack of ASM support."); + } + true + } else { + self.emulator + }; - let emulator = if cfg!(target_os = "macos") { true } else { self.emulator }; let (world_rank, n_processes, stats) = - if emulator { self.run_emu(stdin)? } else { self.run_asm(stdin)? }; + if emulator { self.run_emu(stdin)? } else { self.run_asm(stdin, hints_stream)? }; if world_rank % 2 == 1 { std::thread::sleep(std::time::Duration::from_millis(2000)); @@ -108,57 +154,62 @@ impl ZiskStats { ); if let Some(stats) = &stats { - Self::print_stats(&stats.witness_stats); + Self::print_stats(&stats.get_inner().lock().unwrap().witness_stats); stats.print_stats(); } Ok(()) } - fn create_stdin(&mut self) -> Result { - let stdin = if let Some(input) = &self.input { - if !input.exists() { - return Err(anyhow::anyhow!("Input file not found at {:?}", input.display())); - } - ZiskStdin::from_file(input)? - } else { - ZiskStdin::null() - }; - Ok(stdin) - } - - pub fn run_emu(&mut self, stdin: ZiskStdin) -> Result<(i32, i32, Option)> { + pub fn run_emu(&mut self, stdin: ZiskStdin) -> Result<(i32, i32, Option)> { let prover = ProverClient::builder() .emu() .witness() - .witness_lib_path_opt(self.witness_lib.clone()) .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) .verbose(self.verbose) .shared_tables(self.shared_tables) .print_command_info() .build()?; - prover.stats(stdin, self.debug.clone(), self.mpi_node.map(|n| n as u32)) + let elf = ElfBinaryFromFile::new(&self.elf, false)?; + let (pk, _) = prover.setup(&elf)?; + + prover.stats( + &pk, + stdin, + self.debug.clone(), + self.minimal_memory, + self.mpi_node.map(|n| n as u32), + ) } - pub fn run_asm(&mut self, stdin: ZiskStdin) -> Result<(i32, i32, Option)> { + pub fn run_asm( + &mut self, + stdin: ZiskStdin, + hints_stream: Option, + ) -> Result<(i32, i32, Option)> { let prover = ProverClient::builder() .asm() .witness() - .witness_lib_path_opt(self.witness_lib.clone()) .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) .verbose(self.verbose) .shared_tables(self.shared_tables) .asm_path_opt(self.asm.clone()) + .no_auto_setup(self.no_auto_setup) .base_port_opt(self.port) .unlock_mapped_memory(self.unlock_mapped_memory) + .asm_out_file(self.asm_out_file) .print_command_info() .build()?; + let elf = ElfBinaryFromFile::new(&self.elf, hints_stream.is_some())?; + let (pk, _) = prover.setup(&elf)?; + + if let Some(hints_stream) = hints_stream { + pk.register_hints_stream(hints_stream)?; + } let mpi_node = self.mpi_node.map(|n| n as u32); - prover.stats(stdin, self.debug.clone(), mpi_node) + prover.stats(&pk, stdin, self.debug.clone(), self.minimal_memory, mpi_node) } /// Prints stats individually and grouped, with aligned columns. diff --git a/cli/src/commands/verify_constraints.rs b/cli/src/commands/verify_constraints.rs index 4fd80591d..1fc5beb4e 100644 --- a/cli/src/commands/verify_constraints.rs +++ b/cli/src/commands/verify_constraints.rs @@ -1,13 +1,13 @@ -use crate::{commands::cli_fail_if_gpu_mode, ux::print_banner}; +use crate::ux::{print_banner, print_banner_command, print_banner_field, print_execution_summary}; use anyhow::Result; use clap::Parser; use colored::Colorize; use std::path::PathBuf; +use tracing::{info, warn}; use zisk_build::ZISK_VERSION_MESSAGE; -use zisk_common::io::ZiskStdin; -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; +use zisk_common::io::{StreamSource, ZiskStdin}; +use zisk_common::ElfBinaryFromFile; use zisk_sdk::{ProverClient, ZiskVerifyConstraintsResult}; #[derive(Parser)] @@ -20,10 +20,6 @@ use zisk_sdk::{ProverClient, ZiskVerifyConstraintsResult}; .required(false) ))] pub struct ZiskVerifyConstraints { - /// Witness computation dynamic library path - #[clap(short = 'w', long)] - pub witness_lib: Option, - /// ROM file path /// This is the path to the ROM file that the witness computation dynamic library will use /// to generate the witness. @@ -40,8 +36,12 @@ pub struct ZiskVerifyConstraints { pub emulator: bool, /// Input path - #[clap(short = 'i', long)] - pub input: Option, + #[clap(short = 'i', long, alias = "input", conflicts_with = "hints")] + pub inputs: Option, + + /// Precompiles Hints path + #[clap(short = 'H', long, conflicts_with = "inputs")] + pub hints: Option, /// Setup folder path #[clap(short = 'k', long)] @@ -63,6 +63,14 @@ pub struct ZiskVerifyConstraints { #[clap(short = 'u', long, conflicts_with = "emulator")] pub unlock_mapped_memory: bool, + /// Redirect ASM emulator output to file + /// This option is mutually exclusive with `--emulator` + #[clap(long, conflicts_with = "emulator", default_value_t = false)] + pub asm_out_file: bool, + + #[clap(short = 'n', long, default_value_t = false)] + pub no_auto_setup: bool, + /// Verbosity (-v, -vv) #[arg(short = 'v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] pub verbose: u8, // Using u8 to hold the number of `-v` @@ -76,72 +84,114 @@ pub struct ZiskVerifyConstraints { impl ZiskVerifyConstraints { pub fn run(&mut self) -> Result<()> { - cli_fail_if_gpu_mode()?; + // panic::set_hook(Box::new(|panic_info| { + // eprintln!("\x1B[31mPANIC DETECTED"); + // eprintln!("{} at {:?}", panic_info, panic_info.location()); + + // // Backtrace + // let bt = std::backtrace::Backtrace::force_capture(); + // eprintln!("Backtrace:\n{}", bt); + + // std::process::exit(101); + // })); + + // Check if the deprecated alias was used + if std::env::args().any(|arg| arg == "--input") { + eprintln!("{}", "Warning: --input is deprecated, use --inputs instead".yellow().bold()); + } print_banner(); - let stdin = self.create_stdin()?; + print_banner_command("Verify Constraints"); - let emulator = if cfg!(target_os = "macos") { true } else { self.emulator }; - let result = if emulator { self.run_emu(stdin)? } else { self.run_asm(stdin)? }; + print_banner_field("Elf", self.elf.display()); + + let inputs_str = self.inputs.clone().unwrap_or_else(|| "None".dimmed().to_string()); + print_banner_field("Input", inputs_str); + + if let Some(hints) = &self.hints { + print_banner_field("Prec. Hints", hints); + } + + let stdin = ZiskStdin::from_uri(self.inputs.as_ref())?; + + let hints_stream = match self.hints.as_ref() { + Some(uri) => { + let stream = StreamSource::from_uri(uri)?; + if matches!(stream, StreamSource::Quic(_)) { + anyhow::bail!("QUIC hints source is not supported in CLI mode."); + } + Some(stream) + } + None => None, + }; - tracing::info!(""); - tracing::info!( + let emulator = if cfg!(target_os = "macos") { + if !self.emulator { + warn!("Emulator mode is forced on macOS due to lack of ASM support."); + } + true + } else { + self.emulator + }; + + let result = + if emulator { self.run_emu(stdin)? } else { self.run_asm(stdin, hints_stream)? }; + + info!( "{}", "--- VERIFY CONSTRAINTS SUMMARY ------------------------".bright_green().bold() ); - tracing::info!(" ► Statistics"); - tracing::info!( - " time: {:.2} seconds, steps: {}", - result.duration.as_secs_f32(), - result.execution.executed_steps + print_execution_summary( + &result.executor_summary.executor_time, + result.duration, + result.executor_summary.steps, ); Ok(()) } - fn create_stdin(&mut self) -> Result { - let stdin = if let Some(input) = &self.input { - if !input.exists() { - return Err(anyhow::anyhow!("Input file not found at {:?}", input.display())); - } - ZiskStdin::from_file(input)? - } else { - ZiskStdin::null() - }; - Ok(stdin) - } - pub fn run_emu(&mut self, stdin: ZiskStdin) -> Result { let prover = ProverClient::builder() .emu() .verify_constraints() - .witness_lib_path_opt(self.witness_lib.clone()) .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) .verbose(self.verbose) .shared_tables(self.shared_tables) .print_command_info() .build()?; - prover.verify_constraints_debug(stdin, self.debug.clone()) + let elf = ElfBinaryFromFile::new(&self.elf, false)?; + let (pk, _) = prover.setup(&elf)?; + + prover.verify_constraints_debug(&pk, stdin, self.debug.clone()) } - pub fn run_asm(&mut self, stdin: ZiskStdin) -> Result { + pub fn run_asm( + &mut self, + stdin: ZiskStdin, + hints_stream: Option, + ) -> Result { let prover = ProverClient::builder() .asm() .verify_constraints() - .witness_lib_path_opt(self.witness_lib.clone()) .proving_key_path_opt(self.proving_key.clone()) - .elf_path(self.elf.clone()) .verbose(self.verbose) .shared_tables(self.shared_tables) .asm_path_opt(self.asm.clone()) + .no_auto_setup(self.no_auto_setup) .base_port_opt(self.port) .unlock_mapped_memory(self.unlock_mapped_memory) + .asm_out_file(self.asm_out_file) .print_command_info() .build()?; - prover.verify_constraints_debug(stdin, self.debug.clone()) + let elf = ElfBinaryFromFile::new(&self.elf, hints_stream.is_some())?; + let (pk, _) = prover.setup(&elf)?; + + if let Some(hints_stream) = hints_stream { + pk.register_hints_stream(hints_stream)?; + } + prover.verify_constraints_debug(&pk, stdin, self.debug.clone()) } } diff --git a/cli/src/commands/verify_snark.rs b/cli/src/commands/verify_snark.rs new file mode 100644 index 000000000..170486bc6 --- /dev/null +++ b/cli/src/commands/verify_snark.rs @@ -0,0 +1,39 @@ +// extern crate env_logger; +use anyhow::Result; +use clap::Parser; +use colored::Colorize; +use proofman::{verify_snark_proof, SnarkProof}; +use std::path::PathBuf; +use zisk_sdk::setup_logger; + +#[derive(Parser)] +#[command(version, about, long_about = None)] +#[command(propagate_version = true)] +pub struct ZiskVerifySnark { + #[clap(short = 'p', long)] + pub proof: String, + + #[clap(short = 'k', long)] + pub verkey: PathBuf, + + /// Verbosity (-v, -vv) + #[arg(short, long, action = clap::ArgAction::Count, help = "Increase verbosity level")] + pub verbose: u8, // Using u8 to hold the number of `-v` +} + +impl ZiskVerifySnark { + pub fn run(&self) -> Result<()> { + println!("{} ZiskVerifySnark", format!("{: >12}", "Command").bright_green().bold()); + println!(); + + setup_logger(self.verbose.into()); + + let proof = SnarkProof::load(&self.proof).map_err(|e| { + anyhow::anyhow!("Failed to load SnarkProof from file {}: {}", self.proof, e) + })?; + + verify_snark_proof(&proof, &self.verkey).map_err(|e| { + anyhow::anyhow!("SNARK proof verification failed for proof {}: {}", self.proof, e) + }) + } +} diff --git a/cli/src/commands/verify_stark.rs b/cli/src/commands/verify_stark.rs index 0a595a752..485b2b485 100644 --- a/cli/src/commands/verify_stark.rs +++ b/cli/src/commands/verify_stark.rs @@ -1,13 +1,11 @@ -use anyhow::{anyhow, Ok, Result}; +use anyhow::Result; use clap::Parser; use colored::Colorize; -use proofman_common::initialize_logger; -use proofman_verifier::verify; -use std::fs; - +use std::path::PathBuf; use zisk_build::ZISK_VERSION_MESSAGE; - -use super::get_default_verkey; +use zisk_sdk::{ + get_proving_key, setup_logger, verify_zisk_proof_with_proving_key, ZiskProofWithPublicValues, +}; #[derive(Parser)] #[command(author, about, long_about = None, version = ZISK_VERSION_MESSAGE)] @@ -21,12 +19,12 @@ pub struct ZiskVerify { pub verbose: u8, // Using u8 to hold the number of `-v` #[clap(short = 'k', long)] - pub vk: Option, + pub proving_key: Option, } impl ZiskVerify { pub fn run(&self) -> Result<()> { - initialize_logger(self.verbose.into(), None); + setup_logger(self.verbose.into()); tracing::info!( "{}", @@ -36,15 +34,20 @@ impl ZiskVerify { let start = std::time::Instant::now(); - let proof = fs::read(&self.proof)?; + let proof = ZiskProofWithPublicValues::load(&self.proof).map_err(|e| { + anyhow::anyhow!("Error loading VADCoP final proof from {}: {}", &self.proof, e) + })?; - let vk = &self.get_verkey(); - - let valid = verify(&proof, vk); + let result = verify_zisk_proof_with_proving_key( + proof.get_proof(), + proof.get_publics(), + proof.get_program_vk(), + get_proving_key(self.proving_key.as_ref()), + ); let elapsed = start.elapsed(); - if !valid { + if result.is_err() { tracing::info!("{}", "\u{2717} Stark proof was not verified".bright_red().bold()); } else { tracing::info!("{}", "\u{2713} Stark proof was verified".bright_green().bold()); @@ -54,18 +57,6 @@ impl ZiskVerify { tracing::info!(" time: {} milliseconds", elapsed.as_millis()); tracing::info!("{}", "----------------------------".bright_green().bold()); - if !valid { - Err(anyhow!("Stark proof was not verified")) - } else { - Ok(()) - } - } - - /// Gets the verification key - /// Uses the default one if not specified by user. - pub fn get_verkey(&self) -> Vec { - let vk_file = - if self.vk.is_none() { get_default_verkey() } else { self.vk.clone().unwrap() }; - fs::read(&vk_file).unwrap() + result } } diff --git a/cli/src/toolchain/build_toolchain.rs b/cli/src/toolchain/build_toolchain.rs index 35920ff51..8fa1eb63d 100644 --- a/cli/src/toolchain/build_toolchain.rs +++ b/cli/src/toolchain/build_toolchain.rs @@ -6,7 +6,19 @@ use zisk_build::RUSTUP_TOOLCHAIN_NAME; #[derive(Parser)] #[command(name = "build-toolchain", about = "Build the cargo-zisk toolchain.")] -pub struct BuildToolchainCmd {} +pub struct BuildToolchainCmd { + /// Name for the toolchain in rustup + #[arg(short, long)] + name: Option, + + /// Git branch to checkout (default: zisk) + #[arg(short, long)] + branch: Option, + + /// Git tag to checkout (takes precedence over branch) + #[arg(short, long)] + tag: Option, +} impl BuildToolchainCmd { pub fn run(&self) -> Result<()> { @@ -26,7 +38,15 @@ impl BuildToolchainCmd { } println!("No ZISK_BUILD_DIR detected, cloning rust."); - let repo_url = "https://{}@github.com/0xPolygonHermez/rust"; + let repo_url = "https://github.com/0xPolygonHermez/rust"; + + // Determine the ref to checkout: tag takes precedence over branch + let git_ref = self + .tag + .as_ref() + .or(self.branch.as_ref()) + .map(|s| s.as_str()) + .unwrap_or("zisk"); Command::new("git") .args([ @@ -34,12 +54,50 @@ impl BuildToolchainCmd { repo_url, "--depth=1", "--single-branch", - "--branch=zisk", + &format!("--branch={}", git_ref), "zisk-rust", ]) .current_dir(&temp_dir) .run()?; Command::new("git").args(["reset", "--hard"]).current_dir(&dir).run()?; + + #[cfg(feature = "custom_rust_llvm")] + // Initialize submodules EXCEPT llvm-project + // llvm-project will be initialized by the bootstrap system which will also + // apply patches from src/llvm-patches/ automatically + let submodules_to_init = [ + "library/backtrace", + "library/stdarch", + "src/doc/book", + "src/doc/edition-guide", + "src/doc/embedded-book", + "src/doc/nomicon", + "src/doc/reference", + "src/doc/rust-by-example", + "src/gcc", + "src/tools/cargo", + "src/tools/enzyme", + "src/tools/rustc-perf", + ]; + + #[cfg(feature = "custom_rust_llvm")] + for submodule in &submodules_to_init { + println!("Initializing submodule: {}", submodule); + Command::new("git") + .args([ + "submodule", + "update", + "--init", + "--recursive", + "--progress", + "--", + submodule, + ]) + .current_dir(&dir) + .run() + .ok(); // Ignore errors for optional submodules + } + #[cfg(not(feature = "custom_rust_llvm"))] Command::new("git") .args(["submodule", "update", "--init", "--recursive", "--progress"]) .current_dir(&dir) @@ -65,6 +123,10 @@ impl BuildToolchainCmd { || format!("while creating file {temp_dir:?}/riscv64ima-zisk-zkvm-elf.json"), )?; + // Note: LLVM patches are applied automatically by the bootstrap system + // (see src/bootstrap/src/core/build_steps/llvm.rs::apply_llvm_patches) + // The patches are located in src/llvm-patches/*.patch + // Build the toolchain. Command::new("python3") .env("RUST_TARGET_PATH", &temp_dir) @@ -83,8 +145,9 @@ impl BuildToolchainCmd { .run() .with_context(|| "while building the Rust toolchain")?; + let rustup_toolchain_name = self.name.as_deref().unwrap_or(RUSTUP_TOOLCHAIN_NAME); // Remove the existing toolchain from rustup, if it exists. - match Command::new("rustup").args(["toolchain", "remove", RUSTUP_TOOLCHAIN_NAME]).run() { + match Command::new("rustup").args(["toolchain", "remove", rustup_toolchain_name]).run() { Ok(_) => println!("Successfully removed existing toolchain."), Err(_) => println!("No existing toolchain to remove."), } @@ -107,7 +170,7 @@ impl BuildToolchainCmd { // Link the toolchain to rustup. Command::new("rustup") - .args(["toolchain", "link", RUSTUP_TOOLCHAIN_NAME]) + .args(["toolchain", "link", rustup_toolchain_name]) .arg(&toolchain_dir) .run() .with_context(|| "while linking the toolchain to rustup")?; diff --git a/cli/src/toolchain/install_toolchain.rs b/cli/src/toolchain/install_toolchain.rs index ef511c779..f43e627d5 100644 --- a/cli/src/toolchain/install_toolchain.rs +++ b/cli/src/toolchain/install_toolchain.rs @@ -19,7 +19,11 @@ use crate::{get_target, get_toolchain_download_url, is_supported_target}; #[derive(Parser)] #[command(name = "install-toolchain", about = "Install the cargo-zisk toolchain.")] pub struct InstallToolchainCmd { + #[arg(short, long)] version: Option, + + #[arg(short, long)] + name: Option, } impl InstallToolchainCmd { @@ -99,10 +103,12 @@ impl InstallToolchainCmd { } } + let rustup_toolchain_name = self.name.as_deref().unwrap_or(RUSTUP_TOOLCHAIN_NAME); + // Remove the existing toolchain from rustup, if it exists. let mut child = Command::new("rustup") .current_dir(&root_dir) - .args(["toolchain", "remove", RUSTUP_TOOLCHAIN_NAME]) + .args(["toolchain", "remove", rustup_toolchain_name]) .stdout(std::process::Stdio::piped()) .spawn()?; let res = child.wait(); @@ -139,7 +145,7 @@ impl InstallToolchainCmd { .args([ "toolchain", "link", - RUSTUP_TOOLCHAIN_NAME, + rustup_toolchain_name, &new_toolchain_dir.to_string_lossy(), ]) .status()?; diff --git a/cli/src/toolchain/new.rs b/cli/src/toolchain/new.rs index e5df8b33f..d89038e5a 100644 --- a/cli/src/toolchain/new.rs +++ b/cli/src/toolchain/new.rs @@ -19,14 +19,20 @@ impl NewCmd { } // Clone the repository. - let output = Command::new("git") - .arg("clone") + let mut cmd = Command::new("git"); + cmd.arg("clone") .arg(repo_url) .arg(root.as_os_str()) .arg("--recurse-submodules") - .arg("--depth=1") - .output() - .expect("failed to execute command"); + .arg("--depth=1"); + + // Check if ZISK_TEMPLATE_BRANCH environment variable is set, and if so, use it as the branch to clone. + if let Ok(branch) = std::env::var("ZISK_TEMPLATE_BRANCH") { + cmd.arg("--branch").arg(&branch); + } + + let output = cmd.output().expect("failed to execute command"); + if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); return Err(anyhow::anyhow!("failed to clone repository: {}", stderr)); diff --git a/cli/src/ux.rs b/cli/src/ux.rs index 543ab3c8d..3e5fde8d6 100644 --- a/cli/src/ux.rs +++ b/cli/src/ux.rs @@ -1,5 +1,7 @@ use colored::Colorize; use sysinfo::System; +use tracing::info; +use zisk_common::ZiskExecutorTime; pub fn print_banner() { println!(); @@ -49,3 +51,32 @@ pub fn print_banner() { // available_mem // ); } + +pub fn print_banner_command(command: impl std::fmt::Display) { + print_banner_field("Command", command); +} + +pub fn print_banner_field(label: &str, value: impl std::fmt::Display) { + println!("{} {}", format!("{: >12}", label).bright_green().bold(), value); +} + +pub fn print_execution_summary( + executor_time: &ZiskExecutorTime, + total_duration: std::time::Duration, + steps: u64, +) { + info!("Execution completed in {:.2?}, steps: {}", total_duration, steps); + info!( + "Execution summary: {} {:.2?} + {} {:.2?} + {} {:.2?} + {} {:.2?}", + "Proofman".dimmed(), + total_duration - executor_time.total_duration, + "Execution".dimmed(), + executor_time.execution_duration, + "Count&Plan".dimmed(), + executor_time.count_and_plan_duration, + "Count&Plan MO".dimmed(), + executor_time.count_and_plan_mo_duration, + ); + + /*●⎿✔◼✽*/ +} diff --git a/common/Cargo.toml b/common/Cargo.toml index e2dd40cf3..7cd718f7f 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -11,30 +11,35 @@ build = "build.rs" [dependencies] zisk-core = { workspace = true } -zisk-pil = { workspace = true } -witness = { workspace = true } proofman-common = { workspace = true } +proofman-util = { workspace = true } proofman = { workspace = true } fields = { workspace = true } -tracing = { workspace = true} +tracing = { workspace = true } tracing-subscriber = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } anyhow = { workspace = true } -bytemuck = { workspace = true } -zstd = { workspace = true } +thiserror = { workspace = true } +libc = "0.2" + +# QUIC networking +tokio = { workspace = true } +quinn = "0.11" +rustls = { version = "0.23", features = ["ring"] } +rcgen = "0.14" + +bincode = { workspace = true } # Distributed mode (mpi) is only supported on Linux x86_64 [target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] mpi = { workspace = true } -libc = "0.2" - [features] default = [] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] +disable_distributed = [] stats = [] [lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(distributed)'] } \ No newline at end of file +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(distributed)'] } diff --git a/common/src/bus/bus_device.rs b/common/src/bus/bus_device.rs index 4184286c2..26eef3841 100644 --- a/common/src/bus/bus_device.rs +++ b/common/src/bus/bus_device.rs @@ -1,7 +1,4 @@ -use std::{any::Any, collections::VecDeque}; - -use super::BusId; -use crate::MemCollectorInfo; +use std::any::Any; /// Represents a subscriber in the `DataBus` system. /// @@ -11,57 +8,6 @@ use crate::MemCollectorInfo; /// # Associated Type /// * `D` - The type of data handled by the `BusDevice`. pub trait BusDevice: Any + Send + Sync { - /// Processes incoming data sent to the device. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus that sent the data. - /// * `data` - A reference to the data payload being processed. - /// * `pending` – A queue of pending bus operations used to send derived inputs. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - fn process_data( - &mut self, - bus_id: &BusId, - data: &[D], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool; - - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec; - /// Converts the device to a generic `Any` type. fn as_any(self: Box) -> Box; - - /// Performs any necessary cleanup or finalization when the metrics instance is closed. - fn on_close(&mut self) {} -} - -impl BusDevice for Box> { - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - (**self).process_data(bus_id, data, pending, mem_collector_info) - } - - fn bus_id(&self) -> Vec { - (**self).bus_id() - } - - fn as_any(self: Box) -> Box { - (*self).as_any() - } - - fn on_close(&mut self) { - (**self).on_close() - } } diff --git a/common/src/bus/bus_device_metrics.rs b/common/src/bus/bus_device_metrics.rs index e1a2e44cc..88d452e2d 100644 --- a/common/src/bus/bus_device_metrics.rs +++ b/common/src/bus/bus_device_metrics.rs @@ -2,16 +2,14 @@ //! of `BusDevice` and `Metrics`, providing a unified interface for monitoring and managing //! bus operations with associated metrics. -use std::{any::Any, collections::VecDeque}; +use super::BusDevice; -use super::{BusDevice, BusId}; - -use crate::MemCollectorInfo; use crate::Metrics; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum BusDeviceMode { Counter, + CounterAsm, InputGenerator, } @@ -22,30 +20,6 @@ pub enum BusDeviceMode { /// maintaining compatibility with `Metrics` functionality. pub trait BusDeviceMetrics: BusDevice + Metrics + std::any::Any {} -impl BusDevice for Box { - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - (**self).process_data(bus_id, data, pending, mem_collector_info) - } - - fn bus_id(&self) -> Vec { - (**self).bus_id() - } - - fn as_any(self: Box) -> Box { - (*self).as_any() - } - - fn on_close(&mut self) { - (**self).on_close() - } -} - /// Blanket implementation of `BusDeviceMetrics` for any type implementing `BusDevice`, /// `Metrics`, and `std::any::Any`. impl + Metrics + std::any::Any> BusDeviceMetrics for T {} diff --git a/common/src/bus/data_bus_mem.rs b/common/src/bus/data_bus_mem.rs index b42fa663e..13064a936 100644 --- a/common/src/bus/data_bus_mem.rs +++ b/common/src/bus/data_bus_mem.rs @@ -50,17 +50,3 @@ impl MemBusData { [data[MEM_VALUE_0], data[MEM_VALUE_1]] } } - -pub struct MemCollectorInfo { - pub from_addr: u32, - pub to_addr: u32, -} - -impl MemCollectorInfo { - pub fn skip_addr(&self, addr: u32) -> bool { - if addr > self.to_addr || addr < self.from_addr { - return true; - } - false - } -} diff --git a/common/src/bus/data_bus_operation.rs b/common/src/bus/data_bus_operation.rs index 595b9b390..ca20acff5 100644 --- a/common/src/bus/data_bus_operation.rs +++ b/common/src/bus/data_bus_operation.rs @@ -12,6 +12,7 @@ pub const OPERATION_BUS_ID: BusId = BusId(0); /// The size of the operation data payload. pub const OPERATION_BUS_DATA_SIZE: usize = 4; // op,op_type,a,b +pub const OPERATION_PRECOMPILED_BUS_DATA_SIZE: usize = 5; // op,op_type,a,b, step // worst case: // arith_256: 3 x 256 + 2 addr = 3 * 4 + 2 = 14 @@ -34,46 +35,68 @@ const POINT_384_BITS_SIZE: usize = 2 * DATA_384_BITS_SIZE; const COMPLEX_OVER_384_BITS_SIZE: usize = 2 * DATA_384_BITS_SIZE; // use OPERATION_BUS_DATA_SIZE because a = step, b = addr -pub const OPERATION_BUS_KECCAKF_DATA_SIZE: usize = OPERATION_BUS_DATA_SIZE + 25; + +pub const OPERATION_BUS_KECCAKF_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 25; +pub const OPERATION_BUS_POSEIDON2_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 16; pub const OPERATION_BUS_SHA256F_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 3 * DATA_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 3 * DATA_256_BITS_SIZE; pub const OPERATION_BUS_ARITH_256_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 5 * INDIRECTION_SIZE + 3 * DATA_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 5 * INDIRECTION_SIZE + 3 * DATA_256_BITS_SIZE; pub const OPERATION_BUS_ARITH_256_MOD_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 5 * INDIRECTION_SIZE + 4 * DATA_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 5 * INDIRECTION_SIZE + 4 * DATA_256_BITS_SIZE; pub const OPERATION_BUS_SECP256K1_ADD_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * POINT_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * POINT_256_BITS_SIZE; pub const OPERATION_BUS_SECP256K1_DBL_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + POINT_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + POINT_256_BITS_SIZE; pub const OPERATION_BUS_BN254_CURVE_ADD_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * POINT_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * POINT_256_BITS_SIZE; pub const OPERATION_BUS_BN254_CURVE_DBL_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + POINT_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + POINT_256_BITS_SIZE; pub const OPERATION_BUS_BN254_COMPLEX_ADD_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_256_BITS_SIZE; pub const OPERATION_BUS_BN254_COMPLEX_SUB_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_256_BITS_SIZE; pub const OPERATION_BUS_BN254_COMPLEX_MUL_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_256_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_256_BITS_SIZE; pub const OPERATION_BUS_ARITH_384_MOD_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 5 * INDIRECTION_SIZE + 4 * DATA_384_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 5 * INDIRECTION_SIZE + 4 * DATA_384_BITS_SIZE; pub const OPERATION_BUS_BLS12_381_CURVE_ADD_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * POINT_384_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * POINT_384_BITS_SIZE; pub const OPERATION_BUS_BLS12_381_CURVE_DBL_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + POINT_384_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + POINT_384_BITS_SIZE; pub const OPERATION_BUS_BLS12_381_COMPLEX_ADD_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_384_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_384_BITS_SIZE; pub const OPERATION_BUS_BLS12_381_COMPLEX_SUB_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_384_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_384_BITS_SIZE; pub const OPERATION_BUS_BLS12_381_COMPLEX_MUL_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_384_BITS_SIZE; + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * COMPLEX_OVER_384_BITS_SIZE; +pub const OPERATION_BUS_SECP256R1_ADD_DATA_SIZE: usize = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 2 * POINT_256_BITS_SIZE; +pub const OPERATION_BUS_SECP256R1_DBL_DATA_SIZE: usize = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + POINT_256_BITS_SIZE; +pub const OPERATION_BUS_BLAKE2_DATA_SIZE: usize = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2 * INDIRECTION_SIZE + 33 * DATA_64_BITS_SIZE; // bus_data_size + 4 params (&a, &b, cin, &c, a, b) -pub const OPERATION_BUS_ADD_256_DATA_SIZE: usize = - OPERATION_BUS_DATA_SIZE + 4 * PARAMS_SIZE + 2 * DATA_256_BITS_SIZE + SINGLE_RESULT_SIZE; +pub const OPERATION_BUS_ADD_256_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + + 4 * PARAMS_SIZE + + 2 * DATA_256_BITS_SIZE + + SINGLE_RESULT_SIZE; + +pub const DMA_ENCODED: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE; +pub const DMA_MEMCMP_COUNT_BUS: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 1; +// 5 bus_precompiled_data + encoded +pub const OPERATION_BUS_DMA_MEMCPY_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 1; +pub const OPERATION_BUS_DMA_XMEMCPY_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 1; +// 5 bus_precompiled_data + encoded + count_eq +pub const OPERATION_BUS_DMA_MEMCMP_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2; +pub const OPERATION_BUS_DMA_XMEMCMP_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2; +// 5 bus_precompiled_data + encoded +pub const OPERATION_BUS_DMA_INPUTCPY_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 1; +// 5 bus_precompiled_data + encoded (fill_byte encoded) +pub const OPERATION_BUS_DMA_XMEMSET_DATA_SIZE: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + 1; -// 4 bus_data + 5 addr + 4 x 384 = 4 + 5 + 4 * 6 = 33 -pub const MAX_OPERATION_DATA_SIZE: usize = OPERATION_BUS_ARITH_384_MOD_DATA_SIZE; +pub const MAX_OPERATION_DATA_SIZE: usize = OPERATION_BUS_BLAKE2_DATA_SIZE; /// Index of the operation value in the operation data payload. pub const OP: usize = 0; @@ -87,12 +110,16 @@ pub const A: usize = 2; /// Index of the `b` value in the operation data payload. pub const B: usize = 3; +/// Index of the `STEP` value in the operation data payload (only for precompiled operations). +pub const STEP: usize = 4; + /// Type alias for operation data payload. pub type OperationData = [D; OPERATION_BUS_DATA_SIZE]; /// Type alias for precompiles operation data payload. pub type OperationKeccakData = [D; OPERATION_BUS_KECCAKF_DATA_SIZE]; pub type OperationSha256Data = [D; OPERATION_BUS_SHA256F_DATA_SIZE]; +pub type OperationPoseidon2Data = [D; OPERATION_BUS_POSEIDON2_DATA_SIZE]; pub type OperationArith256Data = [D; OPERATION_BUS_ARITH_256_DATA_SIZE]; pub type OperationArith256ModData = [D; OPERATION_BUS_ARITH_256_MOD_DATA_SIZE]; pub type OperationSecp256k1AddData = [D; OPERATION_BUS_SECP256K1_ADD_DATA_SIZE]; @@ -109,11 +136,21 @@ pub type OperationBls12_381ComplexAddData = [D; OPERATION_BUS_BLS12_381_COMPL pub type OperationBls12_381ComplexSubData = [D; OPERATION_BUS_BLS12_381_COMPLEX_SUB_DATA_SIZE]; pub type OperationBls12_381ComplexMulData = [D; OPERATION_BUS_BLS12_381_COMPLEX_MUL_DATA_SIZE]; pub type OperationAdd256Data = [D; OPERATION_BUS_ADD_256_DATA_SIZE]; +pub type OperationDmaMemCpyData = [D; OPERATION_BUS_DMA_MEMCPY_DATA_SIZE]; +pub type OperationDmaMemCmpData = [D; OPERATION_BUS_DMA_MEMCMP_DATA_SIZE]; +pub type OperationDmaInputCpyData = [D; OPERATION_BUS_DMA_INPUTCPY_DATA_SIZE]; +pub type OperationDmaXMemCpyData = [D; OPERATION_BUS_DMA_XMEMCPY_DATA_SIZE]; +pub type OperationDmaXMemCmpData = [D; OPERATION_BUS_DMA_XMEMCMP_DATA_SIZE]; +pub type OperationDmaXMemSetData = [D; OPERATION_BUS_DMA_XMEMSET_DATA_SIZE]; +pub type OperationSecp256r1AddData = [D; OPERATION_BUS_SECP256R1_ADD_DATA_SIZE]; +pub type OperationSecp256r1DblData = [D; OPERATION_BUS_SECP256R1_DBL_DATA_SIZE]; +pub type OperationBlake2Data = [D; OPERATION_BUS_BLAKE2_DATA_SIZE]; pub enum ExtOperationData { OperationData(OperationData), OperationKeccakData(OperationKeccakData), OperationSha256Data(OperationSha256Data), + OperationPoseidon2Data(OperationPoseidon2Data), OperationArith256Data(OperationArith256Data), OperationArith256ModData(OperationArith256ModData), OperationSecp256k1AddData(OperationSecp256k1AddData), @@ -130,27 +167,17 @@ pub enum ExtOperationData { OperationBls12_381ComplexSubData(OperationBls12_381ComplexSubData), OperationBls12_381ComplexMulData(OperationBls12_381ComplexMulData), OperationAdd256Data(OperationAdd256Data), + OperationDmaMemCpyData(OperationDmaMemCpyData), + OperationDmaMemCmpData(OperationDmaMemCmpData), + OperationDmaInputCpyData(OperationDmaInputCpyData), + OperationDmaXMemSetData(OperationDmaXMemSetData), + OperationDmaXMemCpyData(OperationDmaXMemCpyData), + OperationDmaXMemCmpData(OperationDmaXMemCmpData), + OperationSecp256r1AddData(OperationSecp256r1AddData), + OperationSecp256r1DblData(OperationSecp256r1DblData), + OperationBlake2Data(OperationBlake2Data), } -const KECCAK_OP: u8 = ZiskOp::Keccak.code(); -const SHA256_OP: u8 = ZiskOp::Sha256.code(); -const ARITH256_OP: u8 = ZiskOp::Arith256.code(); -const ARITH256_MOD_OP: u8 = ZiskOp::Arith256Mod.code(); -const SECP256K1_ADD_OP: u8 = ZiskOp::Secp256k1Add.code(); -const SECP256K1_DBL_OP: u8 = ZiskOp::Secp256k1Dbl.code(); -const BN254_CURVE_ADD_OP: u8 = ZiskOp::Bn254CurveAdd.code(); -const BN254_CURVE_DBL_OP: u8 = ZiskOp::Bn254CurveDbl.code(); -const BN254_COMPLEX_ADD_OP: u8 = ZiskOp::Bn254ComplexAdd.code(); -const BN254_COMPLEX_SUB_OP: u8 = ZiskOp::Bn254ComplexSub.code(); -const BN254_COMPLEX_MUL_OP: u8 = ZiskOp::Bn254ComplexMul.code(); -const ARITH384_MOD_OP: u8 = ZiskOp::Arith384Mod.code(); -const BLS12_381_CURVE_ADD_OP: u8 = ZiskOp::Bls12_381CurveAdd.code(); -const BLS12_381_CURVE_DBL_OP: u8 = ZiskOp::Bls12_381CurveDbl.code(); -const BLS12_381_COMPLEX_ADD_OP: u8 = ZiskOp::Bls12_381ComplexAdd.code(); -const BLS12_381_COMPLEX_SUB_OP: u8 = ZiskOp::Bls12_381ComplexSub.code(); -const BLS12_381_COMPLEX_MUL_OP: u8 = ZiskOp::Bls12_381ComplexMul.code(); -const ADD256_OP: u8 = ZiskOp::Add256.code(); - // impl> TryFrom<&[D]> for ExtOperationData { impl> TryFrom<&[D]> for ExtOperationData { type Error = &'static str; @@ -161,96 +188,146 @@ impl> TryFrom<&[D]> for ExtOperationData { } let op = data[OP].into(); match op as u8 { - KECCAK_OP => { + ZiskOp::KECCAK => { let array: OperationKeccakData = data.try_into().map_err(|_| "Invalid OperationKeccakData size")?; Ok(ExtOperationData::OperationKeccakData(array)) } - SHA256_OP => { + ZiskOp::SHA256 => { let array: OperationSha256Data = data.try_into().map_err(|_| "Invalid OperationSha256Data size")?; Ok(ExtOperationData::OperationSha256Data(array)) } - ARITH256_OP => { + ZiskOp::POSEIDON2 => { + let array: OperationPoseidon2Data = + data.try_into().map_err(|_| "Invalid OperationPoseidon2Data size")?; + Ok(ExtOperationData::OperationPoseidon2Data(array)) + } + ZiskOp::BLAKE2 => { + let array: OperationBlake2Data = + data.try_into().map_err(|_| "Invalid OperationBlake2Data size")?; + Ok(ExtOperationData::OperationBlake2Data(array)) + } + ZiskOp::ARITH256 => { let array: OperationArith256Data = data.try_into().map_err(|_| "Invalid OperationArith256Data size")?; Ok(ExtOperationData::OperationArith256Data(array)) } - ARITH256_MOD_OP => { + ZiskOp::ARITH256_MOD => { let array: OperationArith256ModData = data.try_into().map_err(|_| "Invalid OperationArith256ModData size")?; Ok(ExtOperationData::OperationArith256ModData(array)) } - SECP256K1_ADD_OP => { + ZiskOp::SECP256K1_ADD => { let array: OperationSecp256k1AddData = data.try_into().map_err(|_| "Invalid OperationSecp256k1AddData size")?; Ok(ExtOperationData::OperationSecp256k1AddData(array)) } - SECP256K1_DBL_OP => { + ZiskOp::SECP256K1_DBL => { let array: OperationSecp256k1DblData = data.try_into().map_err(|_| "Invalid OperationSecp256k1DblData size")?; Ok(ExtOperationData::OperationSecp256k1DblData(array)) } - BN254_CURVE_ADD_OP => { + ZiskOp::BN254_CURVE_ADD => { let array: OperationBn254CurveAddData = data.try_into().map_err(|_| "Invalid OperationBn254CurveAddData size")?; Ok(ExtOperationData::OperationBn254CurveAddData(array)) } - BN254_CURVE_DBL_OP => { + ZiskOp::BN254_CURVE_DBL => { let array: OperationBn254CurveDblData = data.try_into().map_err(|_| "Invalid OperationBn254CurveDblData size")?; Ok(ExtOperationData::OperationBn254CurveDblData(array)) } - BN254_COMPLEX_ADD_OP => { + ZiskOp::BN254_COMPLEX_ADD => { let array: OperationBn254ComplexAddData = data.try_into().map_err(|_| "Invalid OperationBn254ComplexAddData size")?; Ok(ExtOperationData::OperationBn254ComplexAddData(array)) } - BN254_COMPLEX_SUB_OP => { + ZiskOp::BN254_COMPLEX_SUB => { let array: OperationBn254ComplexSubData = data.try_into().map_err(|_| "Invalid OperationBn254ComplexSubData size")?; Ok(ExtOperationData::OperationBn254ComplexSubData(array)) } - BN254_COMPLEX_MUL_OP => { + ZiskOp::BN254_COMPLEX_MUL => { let array: OperationBn254ComplexMulData = data.try_into().map_err(|_| "Invalid OperationBn254ComplexMulData size")?; Ok(ExtOperationData::OperationBn254ComplexMulData(array)) } - ARITH384_MOD_OP => { + ZiskOp::ARITH384_MOD => { let array: OperationArith384ModData = data.try_into().map_err(|_| "Invalid OperationArith384ModData size")?; Ok(ExtOperationData::OperationArith384ModData(array)) } - BLS12_381_CURVE_ADD_OP => { + ZiskOp::BLS12_381_CURVE_ADD => { let array: OperationBls12_381CurveAddData = data.try_into().map_err(|_| "Invalid OperationBls12_381CurveAddData size")?; Ok(ExtOperationData::OperationBls12_381CurveAddData(array)) } - BLS12_381_CURVE_DBL_OP => { + ZiskOp::BLS12_381_CURVE_DBL => { let array: OperationBls12_381CurveDblData = data.try_into().map_err(|_| "Invalid OperationBls12_381CurveDblData size")?; Ok(ExtOperationData::OperationBls12_381CurveDblData(array)) } - BLS12_381_COMPLEX_ADD_OP => { + ZiskOp::BLS12_381_COMPLEX_ADD => { let array: OperationBls12_381ComplexAddData = data.try_into().map_err(|_| "Invalid OperationBls12_381ComplexAddData size")?; Ok(ExtOperationData::OperationBls12_381ComplexAddData(array)) } - BLS12_381_COMPLEX_SUB_OP => { + ZiskOp::BLS12_381_COMPLEX_SUB => { let array: OperationBls12_381ComplexSubData = data.try_into().map_err(|_| "Invalid OperationBls12_381ComplexSubData size")?; Ok(ExtOperationData::OperationBls12_381ComplexSubData(array)) } - BLS12_381_COMPLEX_MUL_OP => { + ZiskOp::BLS12_381_COMPLEX_MUL => { let array: OperationBls12_381ComplexMulData = data.try_into().map_err(|_| "Invalid OperationBls12_381ComplexMulData size")?; Ok(ExtOperationData::OperationBls12_381ComplexMulData(array)) } - ADD256_OP => { + ZiskOp::ADD256 => { let array: OperationAdd256Data = data.try_into().map_err(|_| "Invalid OperationAdd256Data size")?; Ok(ExtOperationData::OperationAdd256Data(array)) } + ZiskOp::DMA_MEMCPY => { + let array: OperationDmaMemCpyData = + data.try_into().map_err(|_| "Invalid OperationDmaMemCpyData size")?; + Ok(ExtOperationData::OperationDmaMemCpyData(array)) + } + ZiskOp::DMA_MEMCMP => { + let array: OperationDmaMemCmpData = + data.try_into().map_err(|_| "Invalid OperationDmaMemCmpData size")?; + Ok(ExtOperationData::OperationDmaMemCmpData(array)) + } + ZiskOp::DMA_INPUTCPY => { + let array: OperationDmaInputCpyData = + data.try_into().map_err(|_| "Invalid OperationDmaInputCpyData size")?; + Ok(ExtOperationData::OperationDmaInputCpyData(array)) + } + ZiskOp::DMA_XMEMSET => { + let array: OperationDmaXMemSetData = + data.try_into().map_err(|_| "Invalid OperationDmaXMemSetData size")?; + Ok(ExtOperationData::OperationDmaXMemSetData(array)) + } + ZiskOp::DMA_XMEMCPY => { + let array: OperationDmaXMemCpyData = + data.try_into().map_err(|_| "Invalid OperationDmaXMemCpyData size")?; + Ok(ExtOperationData::OperationDmaXMemCpyData(array)) + } + ZiskOp::DMA_XMEMCMP => { + let array: OperationDmaXMemCmpData = + data.try_into().map_err(|_| "Invalid OperationDmaXMemCmpData size")?; + Ok(ExtOperationData::OperationDmaXMemCmpData(array)) + } + ZiskOp::SECP256R1_ADD => { + let array: OperationSecp256r1AddData = + data.try_into().map_err(|_| "Invalid OperationSecp256r1AddData size")?; + Ok(ExtOperationData::OperationSecp256r1AddData(array)) + } + ZiskOp::SECP256R1_DBL => { + let array: OperationSecp256r1DblData = + data.try_into().map_err(|_| "Invalid OperationSecp256r1DblData size")?; + Ok(ExtOperationData::OperationSecp256r1DblData(array)) + } _ => { let array: OperationData = data.try_into().map_err(|_| "Invalid OperationData size")?; @@ -284,9 +361,9 @@ impl OperationBusData { op_type: PayloadType, a: u64, b: u64, - pending: &mut VecDeque<(BusId, Vec)>, + pending: &mut VecDeque<(BusId, Vec, Vec)>, ) { - pending.push_back((OPERATION_BUS_ID, vec![op as u64, op_type, a, b])); + pending.push_back((OPERATION_BUS_ID, vec![op as u64, op_type, a, b], Vec::new())); } /// Creates operation data from a `ZiskInst` instruction and its context. @@ -303,166 +380,307 @@ impl OperationBusData { let b = if inst.m32 { ctx.b & 0xffff_ffff } else { ctx.b }; let op = inst.op as u64; let op_type = inst.op_type as u64; + let step = ctx.step; match inst.op_type { ZiskOperationType::Keccak => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationKeccakData(data) } ZiskOperationType::Sha256 => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationSha256Data(data) } + ZiskOperationType::Poseidon2 => { + let mut data = + unsafe { uninit_array::().assume_init() }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationPoseidon2Data(data) + } + + ZiskOperationType::Blake2 => { + let mut data = + unsafe { uninit_array::().assume_init() }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationBlake2Data(data) + } + ZiskOperationType::ArithEq => match inst.op { - ARITH256_OP => { + ZiskOp::ARITH256 => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationArith256Data(data) } - ARITH256_MOD_OP => { + ZiskOp::ARITH256_MOD => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationArith256ModData(data) } - SECP256K1_ADD_OP => { + ZiskOp::SECP256K1_ADD => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationSecp256k1AddData(data) } - SECP256K1_DBL_OP => { + ZiskOp::SECP256K1_DBL => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationSecp256k1DblData(data) } - BN254_CURVE_ADD_OP => { + ZiskOp::BN254_CURVE_ADD => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBn254CurveAddData(data) } - BN254_CURVE_DBL_OP => { + ZiskOp::BN254_CURVE_DBL => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBn254CurveDblData(data) } - BN254_COMPLEX_ADD_OP => { + ZiskOp::BN254_COMPLEX_ADD => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBn254ComplexAddData(data) } - BN254_COMPLEX_SUB_OP => { + ZiskOp::BN254_COMPLEX_SUB => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBn254ComplexSubData(data) } - BN254_COMPLEX_MUL_OP => { + ZiskOp::BN254_COMPLEX_MUL => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBn254ComplexMulData(data) } + ZiskOp::SECP256R1_ADD => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationSecp256r1AddData(data) + } + ZiskOp::SECP256R1_DBL => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationSecp256r1DblData(data) + } _ => ExtOperationData::OperationData([op, op_type, a, b]), }, ZiskOperationType::ArithEq384 => match inst.op { - ARITH384_MOD_OP => { + ZiskOp::ARITH384_MOD => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationArith384ModData(data) } - BLS12_381_CURVE_ADD_OP => { + ZiskOp::BLS12_381_CURVE_ADD => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBls12_381CurveAddData(data) } - BLS12_381_CURVE_DBL_OP => { + ZiskOp::BLS12_381_CURVE_DBL => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBls12_381CurveDblData(data) } - BLS12_381_COMPLEX_ADD_OP => { + ZiskOp::BLS12_381_COMPLEX_ADD => { let mut data = unsafe { uninit_array::() .assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBls12_381ComplexAddData(data) } - BLS12_381_COMPLEX_SUB_OP => { + ZiskOp::BLS12_381_COMPLEX_SUB => { let mut data = unsafe { uninit_array::() .assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBls12_381ComplexSubData(data) } - BLS12_381_COMPLEX_MUL_OP => { + ZiskOp::BLS12_381_COMPLEX_MUL => { let mut data = unsafe { uninit_array::() .assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationBls12_381ComplexMulData(data) } _ => ExtOperationData::OperationData([op, op_type, a, b]), }, ZiskOperationType::BigInt => match inst.op { - ADD256_OP => { + ZiskOp::ADD256 => { let mut data = unsafe { uninit_array::().assume_init() }; - data[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - data[OPERATION_BUS_DATA_SIZE..].copy_from_slice(&ctx.precompiled.input_data); + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); ExtOperationData::OperationAdd256Data(data) } _ => ExtOperationData::OperationData([op, op_type, a, b]), }, + ZiskOperationType::Dma => match inst.op { + ZiskOp::DMA_MEMCPY => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationDmaMemCpyData(data) + } + ZiskOp::DMA_MEMCMP => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationDmaMemCmpData(data) + } + ZiskOp::DMA_INPUTCPY => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationDmaInputCpyData(data) + } + ZiskOp::DMA_XMEMSET => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationDmaXMemSetData(data) + } + ZiskOp::DMA_XMEMCPY => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationDmaXMemCpyData(data) + } + ZiskOp::DMA_XMEMCMP => { + let mut data = unsafe { + uninit_array::().assume_init() + }; + data[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE..] + .copy_from_slice(&ctx.precompiled.input_data); + ExtOperationData::OperationDmaXMemCmpData(data) + } + _ => ExtOperationData::OperationData([op, op_type, a, b]), + }, + _ => ExtOperationData::OperationData([op, op_type, a, b]), } } @@ -477,85 +695,142 @@ impl OperationBusData { let b = if inst.m32 { ctx.b & 0xffff_ffff } else { ctx.b }; let op = inst.op as u64; let op_type = inst.op_type as u64; + let step = ctx.step; match inst.op_type { ZiskOperationType::Keccak => { debug_assert_eq!(ctx.precompiled.input_data.len(), 25); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..OPERATION_BUS_KECCAKF_DATA_SIZE] + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..OPERATION_BUS_KECCAKF_DATA_SIZE] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..OPERATION_BUS_KECCAKF_DATA_SIZE] } ZiskOperationType::Sha256 => { debug_assert_eq!(ctx.precompiled.input_data.len(), 14); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..OPERATION_BUS_SHA256F_DATA_SIZE] + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..OPERATION_BUS_SHA256F_DATA_SIZE] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..OPERATION_BUS_SHA256F_DATA_SIZE] } + ZiskOperationType::Poseidon2 => { + debug_assert_eq!(ctx.precompiled.input_data.len(), 16); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..OPERATION_BUS_POSEIDON2_DATA_SIZE] + .copy_from_slice(&ctx.precompiled.input_data); + &buffer[..OPERATION_BUS_POSEIDON2_DATA_SIZE] + } + + ZiskOperationType::Blake2 => { + debug_assert_eq!(ctx.precompiled.input_data.len(), 35); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..OPERATION_BUS_BLAKE2_DATA_SIZE] + .copy_from_slice(&ctx.precompiled.input_data); + &buffer[..OPERATION_BUS_BLAKE2_DATA_SIZE] + } + ZiskOperationType::ArithEq => match inst.op { - ARITH256_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::ARITH256 => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - ARITH256_MOD_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::ARITH256_MOD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - SECP256K1_ADD_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::SECP256K1_ADD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - SECP256K1_DBL_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::SECP256K1_DBL => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BN254_CURVE_ADD_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BN254_CURVE_ADD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BN254_CURVE_DBL_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BN254_CURVE_DBL => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BN254_COMPLEX_ADD_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BN254_COMPLEX_ADD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BN254_COMPLEX_SUB_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BN254_COMPLEX_SUB => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BN254_COMPLEX_MUL_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BN254_COMPLEX_MUL => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] + .copy_from_slice(&ctx.precompiled.input_data); + &buffer[..len] + } + ZiskOp::SECP256R1_ADD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] + .copy_from_slice(&ctx.precompiled.input_data); + &buffer[..len] + } + ZiskOp::SECP256R1_DBL => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } @@ -566,45 +841,57 @@ impl OperationBusData { }, ZiskOperationType::ArithEq384 => match inst.op { - ARITH384_MOD_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::ARITH384_MOD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BLS12_381_CURVE_ADD_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BLS12_381_CURVE_ADD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BLS12_381_CURVE_DBL_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BLS12_381_CURVE_DBL => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BLS12_381_COMPLEX_ADD_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BLS12_381_COMPLEX_ADD => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BLS12_381_COMPLEX_SUB_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BLS12_381_COMPLEX_SUB => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } - BLS12_381_COMPLEX_MUL_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); - buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + ZiskOp::BLS12_381_COMPLEX_MUL => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } @@ -613,11 +900,35 @@ impl OperationBusData { &buffer[..OPERATION_BUS_DATA_SIZE] } }, + ZiskOperationType::BigInt => match inst.op { - ADD256_OP => { - let len = OPERATION_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + ZiskOp::ADD256 => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] + .copy_from_slice(&ctx.precompiled.input_data); + &buffer[..len] + } + _ => { buffer[0..OPERATION_BUS_DATA_SIZE].copy_from_slice(&[op, op_type, a, b]); - buffer[OPERATION_BUS_DATA_SIZE..len] + &buffer[..OPERATION_BUS_DATA_SIZE] + } + }, + + ZiskOperationType::Dma => match inst.op { + ZiskOp::DMA_MEMCPY + | ZiskOp::DMA_MEMCMP + | ZiskOp::DMA_INPUTCPY + | ZiskOp::DMA_XMEMSET + | ZiskOp::DMA_XMEMCPY + | ZiskOp::DMA_XMEMCMP => { + let len = + OPERATION_PRECOMPILED_BUS_DATA_SIZE + ctx.precompiled.input_data.len(); + buffer[0..OPERATION_PRECOMPILED_BUS_DATA_SIZE] + .copy_from_slice(&[op, op_type, a, b, step]); + buffer[OPERATION_PRECOMPILED_BUS_DATA_SIZE..len] .copy_from_slice(&ctx.precompiled.input_data); &buffer[..len] } @@ -647,6 +958,7 @@ impl OperationBusData { ExtOperationData::OperationData(d) => d[OP] as u8, ExtOperationData::OperationKeccakData(d) => d[OP] as u8, ExtOperationData::OperationSha256Data(d) => d[OP] as u8, + ExtOperationData::OperationPoseidon2Data(d) => d[OP] as u8, ExtOperationData::OperationArith256Data(d) => d[OP] as u8, ExtOperationData::OperationArith256ModData(d) => d[OP] as u8, ExtOperationData::OperationSecp256k1AddData(d) => d[OP] as u8, @@ -663,6 +975,15 @@ impl OperationBusData { ExtOperationData::OperationBls12_381ComplexSubData(d) => d[OP] as u8, ExtOperationData::OperationBls12_381ComplexMulData(d) => d[OP] as u8, ExtOperationData::OperationAdd256Data(d) => d[OP] as u8, + ExtOperationData::OperationDmaMemCpyData(d) => d[OP] as u8, + ExtOperationData::OperationDmaMemCmpData(d) => d[OP] as u8, + ExtOperationData::OperationDmaInputCpyData(d) => d[OP] as u8, + ExtOperationData::OperationDmaXMemSetData(d) => d[OP] as u8, + ExtOperationData::OperationDmaXMemCpyData(d) => d[OP] as u8, + ExtOperationData::OperationDmaXMemCmpData(d) => d[OP] as u8, + ExtOperationData::OperationSecp256r1AddData(d) => d[OP] as u8, + ExtOperationData::OperationSecp256r1DblData(d) => d[OP] as u8, + ExtOperationData::OperationBlake2Data(d) => d[OP] as u8, } } @@ -679,6 +1000,7 @@ impl OperationBusData { ExtOperationData::OperationData(d) => d[OP_TYPE], ExtOperationData::OperationKeccakData(d) => d[OP_TYPE], ExtOperationData::OperationSha256Data(d) => d[OP_TYPE], + ExtOperationData::OperationPoseidon2Data(d) => d[OP_TYPE], ExtOperationData::OperationArith256Data(d) => d[OP_TYPE], ExtOperationData::OperationArith256ModData(d) => d[OP_TYPE], ExtOperationData::OperationSecp256k1AddData(d) => d[OP_TYPE], @@ -695,6 +1017,15 @@ impl OperationBusData { ExtOperationData::OperationBls12_381ComplexSubData(d) => d[OP_TYPE], ExtOperationData::OperationBls12_381ComplexMulData(d) => d[OP_TYPE], ExtOperationData::OperationAdd256Data(d) => d[OP_TYPE], + ExtOperationData::OperationDmaMemCpyData(d) => d[OP_TYPE], + ExtOperationData::OperationDmaMemCmpData(d) => d[OP_TYPE], + ExtOperationData::OperationDmaInputCpyData(d) => d[OP_TYPE], + ExtOperationData::OperationDmaXMemSetData(d) => d[OP_TYPE], + ExtOperationData::OperationDmaXMemCpyData(d) => d[OP_TYPE], + ExtOperationData::OperationDmaXMemCmpData(d) => d[OP_TYPE], + ExtOperationData::OperationSecp256r1AddData(d) => d[OP_TYPE], + ExtOperationData::OperationSecp256r1DblData(d) => d[OP_TYPE], + ExtOperationData::OperationBlake2Data(d) => d[OP_TYPE], } } @@ -711,6 +1042,7 @@ impl OperationBusData { ExtOperationData::OperationData(d) => d[A], ExtOperationData::OperationKeccakData(d) => d[A], ExtOperationData::OperationSha256Data(d) => d[A], + ExtOperationData::OperationPoseidon2Data(d) => d[A], ExtOperationData::OperationArith256Data(d) => d[A], ExtOperationData::OperationArith256ModData(d) => d[A], ExtOperationData::OperationSecp256k1AddData(d) => d[A], @@ -727,6 +1059,15 @@ impl OperationBusData { ExtOperationData::OperationBls12_381ComplexSubData(d) => d[A], ExtOperationData::OperationBls12_381ComplexMulData(d) => d[A], ExtOperationData::OperationAdd256Data(d) => d[A], + ExtOperationData::OperationDmaMemCpyData(d) => d[A], + ExtOperationData::OperationDmaMemCmpData(d) => d[A], + ExtOperationData::OperationDmaInputCpyData(d) => d[A], + ExtOperationData::OperationDmaXMemSetData(d) => d[A], + ExtOperationData::OperationDmaXMemCpyData(d) => d[A], + ExtOperationData::OperationDmaXMemCmpData(d) => d[A], + ExtOperationData::OperationSecp256r1AddData(d) => d[A], + ExtOperationData::OperationSecp256r1DblData(d) => d[A], + ExtOperationData::OperationBlake2Data(d) => d[A], } } @@ -743,6 +1084,7 @@ impl OperationBusData { ExtOperationData::OperationData(d) => d[B], ExtOperationData::OperationKeccakData(d) => d[B], ExtOperationData::OperationSha256Data(d) => d[B], + ExtOperationData::OperationPoseidon2Data(d) => d[B], ExtOperationData::OperationArith256Data(d) => d[B], ExtOperationData::OperationArith256ModData(d) => d[B], ExtOperationData::OperationSecp256k1AddData(d) => d[B], @@ -759,6 +1101,15 @@ impl OperationBusData { ExtOperationData::OperationBls12_381ComplexSubData(d) => d[B], ExtOperationData::OperationBls12_381ComplexMulData(d) => d[B], ExtOperationData::OperationAdd256Data(d) => d[B], + ExtOperationData::OperationDmaMemCpyData(d) => d[B], + ExtOperationData::OperationDmaMemCmpData(d) => d[B], + ExtOperationData::OperationDmaInputCpyData(d) => d[B], + ExtOperationData::OperationDmaXMemSetData(d) => d[B], + ExtOperationData::OperationDmaXMemCpyData(d) => d[B], + ExtOperationData::OperationDmaXMemCmpData(d) => d[B], + ExtOperationData::OperationSecp256r1AddData(d) => d[B], + ExtOperationData::OperationSecp256r1DblData(d) => d[B], + ExtOperationData::OperationBlake2Data(d) => d[B], } } } diff --git a/common/src/component/component_builder.rs b/common/src/component/component_builder.rs index 83c6da267..7ad5fc3bd 100644 --- a/common/src/component/component_builder.rs +++ b/common/src/component/component_builder.rs @@ -4,7 +4,7 @@ //! This trait provides methods to create counters, planners, input collectors, and optional //! input generators, enabling flexible and modular integration of components. -use crate::{BusDevice, BusDeviceMetrics, Instance, InstanceCtx, PayloadType, Plan, Planner}; +use crate::{Instance, InstanceCtx, Plan, Planner}; use fields::PrimeField64; use proofman_common::ProofCtx; @@ -15,12 +15,6 @@ use proofman_common::ProofCtx; /// * `F` - A type that implements the `PrimeField64` trait, representing the field over which /// operations are performed. pub trait ComponentBuilder: Send + Sync { - /// Builds and returns a bus device counter for monitoring metrics. - /// - /// # Returns - /// A boxed implementation of `BusDeviceMetrics`, capable of tracking bus data. - fn build_counter(&self) -> Option>; - /// Builds a planner for planning execution instances. /// /// # Returns @@ -40,16 +34,4 @@ pub trait ComponentBuilder: Send + Sync { /// # Arguments /// * `ictx` - The instance context used to create the instance. fn build_instance(&self, ictx: InstanceCtx) -> Box>; - - /// Optionally creates an input generator for producing inputs to be sent back to the bus. - /// - /// # Returns - /// An `Option` containing a boxed implementation of `BusDevice`, or `None` if the component - /// does not support input generation. - /// - /// # Default Implementation - /// Returns `None` by default, indicating no input generator is provided. - fn build_inputs_generator(&self) -> Option>> { - None - } } diff --git a/common/src/component/component_counter.rs b/common/src/component/component_counter.rs index 133494f1a..5bfbede94 100644 --- a/common/src/component/component_counter.rs +++ b/common/src/component/component_counter.rs @@ -6,7 +6,7 @@ use std::{ any::Any, fmt::Debug, ops::{Add, AddAssign}, - sync::{atomic::AtomicU32, Arc}, + sync::{atomic::AtomicU64, Arc}, }; use zisk_core::{ROM_ADDR, ROM_ENTRY}; @@ -94,10 +94,10 @@ impl AddAssign<&Counter> for Counter { #[derive(Debug)] pub struct CounterStats { /// Shared biod instruction counter for monitoring ROM operations. - pub bios_inst_count: Arc>, + pub bios_inst_count: Arc>, /// Shared program instruction counter for monitoring ROM operations. - pub prog_inst_count: Arc>, + pub prog_inst_count: Arc>, /// The PC of the last executed instruction. pub end_pc: u64, @@ -107,7 +107,7 @@ pub struct CounterStats { } impl CounterStats { - pub fn new(entry_inst_count: Arc>, inst_count: Arc>) -> Self { + pub fn new(entry_inst_count: Arc>, inst_count: Arc>) -> Self { CounterStats { bios_inst_count: entry_inst_count, prog_inst_count: inst_count, @@ -124,7 +124,7 @@ impl CounterStats { /// * `num` - The number of instructions executed at the given PC. /// * `end` - A flag indicating if this is the final instruction in the execution. #[inline(always)] - pub fn update(&mut self, pc: u64, step: u64, num: u32, end: bool) { + pub fn update(&mut self, pc: u64, step: u64, num: u64, end: bool) { if pc < ROM_ADDR { let addr = ((pc - ROM_ENTRY) as usize) >> 2; self.bios_inst_count[addr].fetch_add(num, std::sync::atomic::Ordering::Relaxed); diff --git a/common/src/component/component_instance.rs b/common/src/component/component_instance.rs index 0d00bf3a9..6e02686ca 100644 --- a/common/src/component/component_instance.rs +++ b/common/src/component/component_instance.rs @@ -2,7 +2,7 @@ //! in the context of proof systems. It includes traits and macros for defining instances //! and integrating them with state machines and proofs. -use crate::{BusDevice, CheckPoint, ChunkId, PayloadType}; +use crate::{BusDevice, CheckPoint, ChunkId, PayloadType, StatsType}; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::any::Any; @@ -80,6 +80,10 @@ pub trait Instance: Any + Send + Sync { /// A reference to self as `&dyn Any`. fn as_any(&self) -> &dyn Any; + fn stats_type(&self) -> StatsType { + StatsType::Other + } + fn reset(&self) {} } @@ -129,6 +133,10 @@ macro_rules! table_instance { pub fn new(table_sm: Arc<$TableSM>, ictx: InstanceCtx, bus_id: BusId) -> Self { Self { table_sm, ictx, bus_id } } + + pub fn process_data(&mut self, _bus_id: &BusId, _data: &[u64]) -> bool { + true + } } impl Instance for $InstanceName { @@ -180,19 +188,6 @@ macro_rules! table_instance { } impl BusDevice for $InstanceName { - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - true - } - fn bus_id(&self) -> Vec { - vec![self.bus_id] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self @@ -247,6 +242,10 @@ macro_rules! table_instance_array { pub fn new(table_sm: Arc<$TableSM>, ictx: InstanceCtx, bus_id: BusId) -> Self { Self { table_sm, ictx, bus_id } } + + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { + true + } } impl Instance for $InstanceName { @@ -307,20 +306,6 @@ macro_rules! table_instance_array { } impl BusDevice for $InstanceName { - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - true - } - - fn bus_id(&self) -> Vec { - vec![self.bus_id] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/common/src/component/component_planner.rs b/common/src/component/component_planner.rs index 03a14879c..40212d55d 100644 --- a/common/src/component/component_planner.rs +++ b/common/src/component/component_planner.rs @@ -53,6 +53,33 @@ impl CollectSkipper { true } + /// Determines how many rows of the current instruction should be skipped. This method is useful + /// when an instruction spans multiple rows. + /// + /// # Returns + /// number of rows to skip if the instruction should be skipped, `0` otherwise. + #[inline(always)] + pub fn rows_to_skip(&mut self, rows: u64) -> u64 { + if !self.skipping { + return 0; + } + + if self.skip == 0 || self.skipped >= self.skip { + self.skipping = false; + return 0; + } + + if (self.skipped + rows) >= self.skip { + let result = self.skip - self.skipped; + self.skipped = self.skip; + self.skipping = false; + return result; + } + + self.skipped += rows; + rows + } + #[inline(always)] pub fn should_skip_query(&mut self, apply: bool) -> bool { if !self.skipping { @@ -112,8 +139,8 @@ impl CollectCounter { initial_skipped: 0, collect_count, collected: 0, - initial_skipping: initial_skip > 0, - final_skip_phase: false, + initial_skipping: collect_count > 0 && initial_skip > 0, + final_skip_phase: collect_count == 0, } } @@ -146,6 +173,64 @@ impl CollectCounter { true } + /// Determines whether the current instruction should be skipped. + /// + /// Behavior: + /// 1. Skip first `initial_skip` elements + /// 2. Don't skip next `collect_count` elements + /// 3. Skip all remaining elements + /// + /// Arguments: + /// * `rows` - Number of rows in the current instruction + /// + /// # Returns + /// `Some((skip, count))` where: + /// - `skip` is the number of rows to skip + /// - `count` is the number of rows to collect + /// `None` if all rows should be skipped. + #[inline(always)] + pub fn should_process(&mut self, rows: u32) -> Option<(u32, u32)> { + // Phase 1: Initial skipping + let mut skip = 0; + let mut rows = rows; + if self.initial_skipping { + if self.initial_skip == 0 { + self.initial_skipping = false; + } else if (self.initial_skipped + rows) >= self.initial_skip { + skip = self.initial_skip - self.initial_skipped; + rows -= skip; + self.initial_skipped = self.initial_skip; + self.initial_skipping = false; + // skip only a part of rows, at this point need + // to calculate count of rows not skipped + if rows == 0 { + return None; + } + } else { + self.initial_skipped += rows; + // skip all rows + return None; + } + } + if self.final_skip_phase { + // Phase 3: Skip all remaining elements + None + } else if (self.collected + rows) >= self.collect_count { + // Phase 2: Collecting (not skipping) + let rows_to_collect = self.collect_count - self.collected; + self.final_skip_phase = true; + self.collected = self.collect_count; + if rows_to_collect == 0 { + None + } else { + Some((skip, rows_to_collect)) + } + } else { + self.collected += rows; + Some((skip, rows)) + } + } + /// Reset to initial state with new parameters pub fn reset(&mut self, initial_skip: u32, collect_count: u32) { self.initial_skip = initial_skip; diff --git a/common/src/emu_minimal_trace.rs b/common/src/emu_minimal_trace.rs index 19b3c87bc..9d15de021 100644 --- a/common/src/emu_minimal_trace.rs +++ b/common/src/emu_minimal_trace.rs @@ -1,5 +1,6 @@ //! Emulator trace +use std::borrow::Cow; use std::fmt::{Debug, Formatter}; use zisk_core::REGS_IN_MAIN_TOTAL_NUMBER; @@ -37,8 +38,8 @@ pub struct EmuTrace { pub last_c: u64, /// Number of steps executed pub steps: u64, - /// Memory reads - pub mem_reads: Vec, + /// Memory reads (Cow allows zero-copy from shared memory) + pub mem_reads: Cow<'static, [u64]>, /// If the `end` flag is true, the program executed completely. /// This does not mean that the program ended successfully; it could have found an error condition diff --git a/common/src/executor_stats.rs b/common/src/executor_stats.rs index 0cb477e5a..3a28ead0c 100644 --- a/common/src/executor_stats.rs +++ b/common/src/executor_stats.rs @@ -11,6 +11,206 @@ use zisk_pil::*; use crate::Stats; +/// Trait for types that can be converted to a stats ID. +/// Implemented for `u64` (raw ID), `StatsScope`, and references `&T` where `T: IntoStatsId`. +pub trait IntoStatsId { + fn as_stats_id(&self) -> u64; +} + +impl IntoStatsId for u64 { + #[inline] + fn as_stats_id(&self) -> u64 { + *self + } +} + +impl IntoStatsId for StatsScope { + #[inline] + fn as_stats_id(&self) -> u64 { + self.id() + } +} + +impl IntoStatsId for &T { + #[inline] + fn as_stats_id(&self) -> u64 { + (*self).as_stats_id() + } +} + +/// Creates a new stats scope (StatsScope) and emits a Begin event. +/// When `stats` feature is disabled, creates a zero-sized StatsScope. +/// +/// # Usage +/// ```ignore +/// stats_begin!(self.stats, 0, parent_scope, "PARENT_OP", 0); +/// stats_begin!(self.stats, &parent_scope, child_scope, "CHILD_OP", 0); +/// // ... work ... +/// stats_end!(self.stats, &child_scope); +/// stats_end!(self.stats, &parent_scope); +/// ``` +#[cfg(feature = "stats")] +#[macro_export] +macro_rules! stats_begin { + ($stats:expr, $parent:expr, $scope:ident, $name:expr, $index:expr) => { + let $scope = $crate::StatsScope::new( + $crate::IntoStatsId::as_stats_id(&$parent), + $stats.next_id(), + $name, + $index, + ); + $stats.add_stat( + $scope.parent_id(), + $scope.id(), + $name, + $index, + $crate::ExecutorStatsEvent::Begin, + ); + }; +} + +#[cfg(not(feature = "stats"))] +#[macro_export] +macro_rules! stats_begin { + ($stats:expr, $parent:expr, $scope:ident, $name:expr, $index:expr) => { + let $scope = $crate::StatsScope; + }; +} + +/// Ends a stats scope with an End event. +/// Uses name and index from the scope (passed to stats_begin). +/// When `stats` feature is disabled, this generates no code. +/// +/// # Usage +/// ```ignore +/// stats_begin!(self.stats, &parent_scope, child_scope, "CHILD_OP", 0); +/// // ... work ... +/// stats_end!(self.stats, &child_scope); +/// ``` +#[cfg(feature = "stats")] +#[macro_export] +macro_rules! stats_end { + ($stats:expr, $scope:expr) => { + $stats.add_stat( + $scope.parent_id(), + $scope.id(), + $scope.name(), + $scope.index(), + $crate::ExecutorStatsEvent::End, + ); + }; +} + +#[cfg(not(feature = "stats"))] +#[macro_export] +macro_rules! stats_end { + ($stats:expr, $scope:expr) => {}; +} + +/// Records a stats mark event (single point in time, not a scope). +/// When `stats` feature is disabled, this generates no code. +/// +/// # Usage +/// ```ignore +/// stats_mark!(self.stats, &parent_scope, "CHECKPOINT_NAME", index); +/// ``` +#[cfg(feature = "stats")] +#[macro_export] +macro_rules! stats_mark { + ($stats:expr, $parent:expr, $name:expr, $index:expr) => { + let __mark_id = $stats.next_id(); + $stats.add_stat( + $crate::IntoStatsId::as_stats_id(&$parent), + __mark_id, + $name, + $index, + $crate::ExecutorStatsEvent::Mark, + ); + }; +} + +#[cfg(not(feature = "stats"))] +#[macro_export] +macro_rules! stats_mark { + ($stats:expr, $parent:expr, $name:expr, $index:expr) => {}; +} + +/// Stats scope that holds scope information (parent_id, id, name, index). +/// Created by `stats_begin!` macro, ended by `stats_end!` macro. +/// When `stats` feature is disabled, this is a zero-sized type. +/// +/// # Usage +/// ```ignore +/// stats_begin!(self.stats, 0, parent_scope, "PARENT", 0); +/// stats_begin!(self.stats, &parent_scope, child_scope, "CHILD", 0); +/// // ... work ... +/// stats_end!(self.stats, &child_scope); +/// stats_end!(self.stats, &parent_scope); +/// ``` +#[cfg(feature = "stats")] +pub struct StatsScope { + parent_id: u64, + id: u64, + name: &'static str, + index: usize, +} + +#[cfg(feature = "stats")] +impl StatsScope { + /// Creates a new stats scope. Does NOT emit Begin - use `stats_begin!` macro instead. + #[inline] + pub fn new(parent_id: u64, id: u64, name: &'static str, index: usize) -> Self { + Self { parent_id, id, name, index } + } + + #[inline] + pub fn parent_id(&self) -> u64 { + self.parent_id + } + + #[inline] + pub fn id(&self) -> u64 { + self.id + } + + #[inline] + pub fn name(&self) -> &'static str { + self.name + } + + #[inline] + pub fn index(&self) -> usize { + self.index + } +} + +/// Zero-sized type when stats feature is disabled. All methods are no-ops. +#[cfg(not(feature = "stats"))] +pub struct StatsScope; + +#[cfg(not(feature = "stats"))] +impl StatsScope { + #[inline] + pub fn parent_id(&self) -> u64 { + 0 + } + + #[inline] + pub fn id(&self) -> u64 { + 0 + } + + #[inline] + pub fn name(&self) -> &'static str { + "" + } + + #[inline] + pub fn index(&self) -> usize { + 0 + } +} + #[derive(Debug, Clone)] pub enum ExecutorStatsEvent { Begin, @@ -89,25 +289,22 @@ impl ExecutorStats { val if val == MEM_AIR_IDS[0] => "MEM".to_string(), val if val == ROM_DATA_AIR_IDS[0] => "ROM_DATA".to_string(), val if val == INPUT_DATA_AIR_IDS[0] => "INPUT_DATA".to_string(), + val if val == DMA_PRE_POST_AIR_IDS[0] => "DMA_PRE_POST".to_string(), val if val == MEM_ALIGN_AIR_IDS[0] => "MEM_ALIGN".to_string(), val if val == MEM_ALIGN_BYTE_AIR_IDS[0] => "MEM_ALIGN_BYTE".to_string(), val if val == MEM_ALIGN_READ_BYTE_AIR_IDS[0] => "MEM_ALIGN_READ_BYTE".to_string(), val if val == MEM_ALIGN_WRITE_BYTE_AIR_IDS[0] => "MEM_ALIGN_WRITE_BYTE".to_string(), - // val if val == MEM_ALIGN_ROM_AIR_IDS[0] => "MEM_ALIGN_ROM".to_string(), val if val == ARITH_AIR_IDS[0] => "ARITH".to_string(), - // val if val == ARITH_TABLE_AIR_IDS[0] => "ARITH_TABLE".to_string(), - // val if val == ARITH_RANGE_TABLE_AIR_IDS[0] => "ARITH_RANGE_TABLE".to_string(), val if val == ARITH_EQ_AIR_IDS[0] => "ARITH_EQ".to_string(), - // val if val == ARITH_EQ_LT_TABLE_AIR_IDS[0] => "ARITH_EQ_LT_TABLE".to_string(), + val if val == ARITH_EQ_384_AIR_IDS[0] => "ARITH_EQ_384".to_string(), val if val == BINARY_AIR_IDS[0] => "BINARY".to_string(), val if val == BINARY_ADD_AIR_IDS[0] => "BINARY_ADD".to_string(), - // val if val == BINARY_TABLE_AIR_IDS[0] => "BINARY_TABLE".to_string(), val if val == BINARY_EXTENSION_AIR_IDS[0] => "BINARY_EXTENSION".to_string(), - // val if val == BINARY_EXTENSION_TABLE_AIR_IDS[0] => "BINARY_EXTENSION_TABLE".to_string(), + val if val == ADD_256_AIR_IDS[0] => "ADD_256".to_string(), val if val == KECCAKF_AIR_IDS[0] => "KECCAKF".to_string(), - // val if val == KECCAKF_TABLE_AIR_IDS[0] => "KECCAKF_TABLE".to_string(), val if val == SHA_256_F_AIR_IDS[0] => "SHA_256_F".to_string(), - // val if val == SPECIFIED_RANGES_AIR_IDS[0] => "SPECIFIED_RANGES".to_string(), + val if val == POSEIDON_2_AIR_IDS[0] => "POSEIDON_2".to_string(), + val if val == SPECIFIED_RANGES_AIR_IDS[0] => "SPECIFIED_RANGES".to_string(), _ => format!("Unknown air_id: {air_id}"), } } @@ -237,8 +434,8 @@ impl ExecutorStatsHandle { self.inner.lock().unwrap().print_stats(); } - pub fn get_inner(&self) -> ExecutorStats { - self.inner.lock().unwrap().clone() + pub fn get_inner(&self) -> Arc> { + self.inner.clone() } pub fn insert_witness_stats(&self, airgroup_id: usize, stats: Stats) { diff --git a/common/src/hints.rs b/common/src/hints.rs new file mode 100644 index 000000000..1455ce225 --- /dev/null +++ b/common/src/hints.rs @@ -0,0 +1,564 @@ +//! Hints for ZisK Precompiles stream processing +//! +//! This module provides functionality for parsing precompile hints +//! that are received as a stream of `u64` values. Hints are used to provide preprocessed +//! data to precompile operations in the ZisK zkVM. +//! +//! # Hint Format +//! +//! Each hint consists of: +//! - A **header** (`u64`): Contains the hint type (upper 32 bits) and data length (lower 32 bits) +//! - **Data** (`[u64; length]`): The hint payload, where `length` is specified in the header +//! +//! ```text +//! ┌─────────────────────────────────────────────────────────────┐ +//! │ Header (u64) │ +//! ├·····························································┤ +//! │ Hint Code (32 bits) Length (32 bits). │ +//! ├─────────────────────────────────────────────────────────────┤ +//! │ Data[0] (u64) │ +//! ├─────────────────────────────────────────────────────────────┤ +//! │ Data[1] (u64) │ +//! ├─────────────────────────────────────────────────────────────┤ +//! │ ... │ +//! ├─────────────────────────────────────────────────────────────┤ +//! │ Data[N-1] (u64) │ +//! └─────────────────────────────────────────────────────────────┘ +//! +//! where N = ceil(Length / 8) +//! +//! - Hint Code — Control code or Data Hint Type +//! - Length — Data length in bytes +//! +//! ## Hint Type Layout +//! +//! ### Control codes +//! +//! The following control codes are defined: +//! - `0x00` (START): Reset processor state and global sequence. +//! - `0x01` (END): Wait until completion of all pending hints. +//! - `0x02` (CANCEL): Cancel current stream and stop processing further hints. +//! - `0x03` (ERROR): Indicate an error has occurred; stop processing further hints. +//! +//! Control codes are for control only and do not have any associated data (Length should be zero). +//! +//! ### Data Hint Types: +//! +//! For data hints, the hint code (32 bits) is structured as follows: +//! - **Bit 31 (MSB)**: Indicates if the data is pass-through (1) or requires computation (0) +//! - **Bits 0-30**: Encode the built-in hint code as defined in the constants +//! (e.g., `HINT_SHA256`, `HINT_BN254_G1_ADD`, `HINT_SECP256K1_RECOVER`, etc.) +//! ``` + +use std::fmt::Display; + +use anyhow::Result; + +// === CONTROL CODES === +pub const CTRL_START: u32 = 0x0000; +pub const CTRL_END: u32 = 0x0001; +pub const CTRL_CANCEL: u32 = 0x0002; +pub const CTRL_ERROR: u32 = 0x0003; + +// === INPUT HINT CODES === +pub const HINT_INPUT: u32 = 0xF0000; + +// === BUILT-IN HINT CODES === +// SHA256 hint codes +pub const HINT_SHA256: u32 = 0x0100; + +// BN254 hint codes +pub const HINT_BN254_G1_ADD: u32 = 0x0200; +pub const HINT_BN254_G1_MUL: u32 = 0x0201; +pub const HINT_BN254_PAIRING_CHECK: u32 = 0x0205; + +// Secp256k1 hint codes +pub const HINT_SECP256K1_ECDSA_ADDRESS_RECOVER: u32 = 0x0300; +pub const HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER: u32 = 0x0301; + +// Secp256r1 hint codes +pub const HINT_SECP256R1_ECDSA_VERIFY: u32 = 0x0380; + +// BLS12-381 hint codes +pub const HINT_BLS12_381_G1_ADD: u32 = 0x0400; +pub const HINT_BLS12_381_G1_MSM: u32 = 0x0401; +pub const HINT_BLS12_381_G2_ADD: u32 = 0x0405; +pub const HINT_BLS12_381_G2_MSM: u32 = 0x0406; +pub const HINT_BLS12_381_PAIRING_CHECK: u32 = 0x040A; +pub const HINT_BLS12_381_FP_TO_G1: u32 = 0x0410; +pub const HINT_BLS12_381_FP2_TO_G2: u32 = 0x0411; + +// Modular exponentiation hint codes +pub const HINT_MODEXP: u32 = 0x0500; + +// KZG hint codes +pub const HINT_VERIFY_KZG_PROOF: u32 = 0x0600; + +// Keccak256 hint codes +pub const HINT_KECCAK256: u32 = 0x0700; + +// Blake2b hint codes +pub const HINT_BLAKE2B_COMPRESS: u32 = 0x0800; + +/// Control code variants for stream control. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u32)] +pub enum CtrlHint { + /// Reset processor state and global sequence. + Start = CTRL_START, + /// Wait until completion of all pending hints. + End = CTRL_END, + /// Cancel current stream and stop processing. + Cancel = CTRL_CANCEL, + /// Signal error and stop processing. + Error = CTRL_ERROR, +} + +impl Display for CtrlHint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match self { + CtrlHint::Start => "CTRL_START", + CtrlHint::End => "CTRL_END", + CtrlHint::Cancel => "CTRL_CANCEL", + CtrlHint::Error => "CTRL_ERROR", + }; + write!(f, "{} ({:#x})", name, *self as u32) + } +} + +impl TryFrom for CtrlHint { + type Error = anyhow::Error; + + fn try_from(value: u32) -> Result { + match value { + CTRL_START => Ok(Self::Start), + CTRL_END => Ok(Self::End), + CTRL_CANCEL => Ok(Self::Cancel), + CTRL_ERROR => Ok(Self::Error), + _ => Err(anyhow::anyhow!("Invalid control code: {:#x}", value)), + } + } +} + +/// Built-in hint type variants. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u32)] +pub enum BuiltInHint { + // INPUT hint types. + /// Input data hint. + Input = HINT_INPUT, + + // SHA256 hint types. + /// Compute SHA-256 hash + Sha256 = HINT_SHA256, + + // BN254 hint types + /// BN254 elliptic curve addition. + Bn254G1Add = HINT_BN254_G1_ADD, + /// BN254 elliptic curve scalar multiplication. + Bn254G1Mul = HINT_BN254_G1_MUL, + /// BN254 pairing check. + Bn254PairingCheck = HINT_BN254_PAIRING_CHECK, + + // Secp256k1 hint types. + /// secp256k1 ECDSA address recovery. + Secp256k1EcdsaAddressRecover = HINT_SECP256K1_ECDSA_ADDRESS_RECOVER, + /// secp256k1 ECDSA signature verification and address recovery. + Secp256k1EcdsaVerifyAddressRecover = HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER, + + // Secp256r1 hint types. + /// secp256r1 (P-256) signature verification. + Secp256r1EcdsaVerify = HINT_SECP256R1_ECDSA_VERIFY, + + // BLS12-381 hint types. + /// BLS12-381 G1 addition (returns 96-byte unpadded G1 point) + Bls12_381G1Add = HINT_BLS12_381_G1_ADD, + /// BLS12-381 G1 multi-scalar multiplication (returns 96-byte unpadded G1 point) + Bls12_381G1Msm = HINT_BLS12_381_G1_MSM, + /// BLS12-381 G2 addition (returns 192-byte unpadded G2 point) + Bls12_381G2Add = HINT_BLS12_381_G2_ADD, + /// BLS12-381 G2 multi-scalar multiplication (returns 192-byte unpadded G2 point) + Bls12_381G2Msm = HINT_BLS12_381_G2_MSM, + /// BLS12-381 pairing check. + Bls12_381PairingCheck = HINT_BLS12_381_PAIRING_CHECK, + /// BLS12-381 map field element to G1. + Bls12_381FpToG1 = HINT_BLS12_381_FP_TO_G1, + /// BLS12-381 map field element to G2. + Bls12_381Fp2ToG2 = HINT_BLS12_381_FP2_TO_G2, + + // Modular exponentiation hint types. + /// Modular exponentiation. + ModExp = HINT_MODEXP, + + // KZG hint types. + /// Verify KZG proof. + VerifyKzgProof = HINT_VERIFY_KZG_PROOF, + + // Keccak256 hint types. + /// Compute Keccak-256 hash. + Keccak256 = HINT_KECCAK256, + + // Blake2b hint types. + /// Blake2b compression function. + Blake2bCompress = HINT_BLAKE2B_COMPRESS, +} + +impl Display for BuiltInHint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match self { + // INPUT hint types + BuiltInHint::Input => "INPUT", + // SHA256 hint types + BuiltInHint::Sha256 => "SHA256", + // BN254 Hints + BuiltInHint::Bn254G1Add => "BN254_G1_ADD", + BuiltInHint::Bn254G1Mul => "BN254_G1_MUL", + BuiltInHint::Bn254PairingCheck => "BN254_PAIRING_CHECK", + // Secp256k1 Hints + BuiltInHint::Secp256k1EcdsaAddressRecover => "SECP256K1_ECDSA_ADDRESS_RECOVER", + BuiltInHint::Secp256k1EcdsaVerifyAddressRecover => { + "SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER" + } + // Secp256r1 Hints + BuiltInHint::Secp256r1EcdsaVerify => "SECP256R1_ECDSA_VERIFY", + // BLS12-381 Hints + BuiltInHint::Bls12_381G1Add => "BLS12_381_G1_ADD", + BuiltInHint::Bls12_381G1Msm => "BLS12_381_G1_MSM", + BuiltInHint::Bls12_381G2Add => "BLS12_381_G2_ADD", + BuiltInHint::Bls12_381G2Msm => "BLS12_381_G2_MSM", + BuiltInHint::Bls12_381PairingCheck => "BLS12_381_PAIRING_CHECK", + BuiltInHint::Bls12_381FpToG1 => "BLS12_381_FP_TO_G1", + BuiltInHint::Bls12_381Fp2ToG2 => "BLS12_381_FP2_TO_G2", + // Modular Exponentiation Hint + BuiltInHint::ModExp => "MODEXP", + // KZG Hint + BuiltInHint::VerifyKzgProof => "VERIFY_KZG_PROOF", + // Keccak256 Hint + BuiltInHint::Keccak256 => "KECCAK256", + // Blake2b Hint + BuiltInHint::Blake2bCompress => "BLAKE2B_COMPRESS", + }; + + write!(f, "{} ({:#x})", name, *self as u32) + } +} + +impl TryFrom for BuiltInHint { + type Error = anyhow::Error; + + fn try_from(value: u32) -> Result { + match value { + // INPUT hint types + HINT_INPUT => Ok(Self::Input), + // SHA256 hint types + HINT_SHA256 => Ok(Self::Sha256), + // BN254 Hints + HINT_BN254_G1_ADD => Ok(Self::Bn254G1Add), + HINT_BN254_G1_MUL => Ok(Self::Bn254G1Mul), + HINT_BN254_PAIRING_CHECK => Ok(Self::Bn254PairingCheck), + // Secp256k1 Hints + HINT_SECP256K1_ECDSA_ADDRESS_RECOVER => Ok(Self::Secp256k1EcdsaAddressRecover), + HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER => { + Ok(Self::Secp256k1EcdsaVerifyAddressRecover) + } + // Secp256r1 Hints + HINT_SECP256R1_ECDSA_VERIFY => Ok(Self::Secp256r1EcdsaVerify), + // BLS12-381 Hints + HINT_BLS12_381_G1_ADD => Ok(Self::Bls12_381G1Add), + HINT_BLS12_381_G1_MSM => Ok(Self::Bls12_381G1Msm), + HINT_BLS12_381_G2_ADD => Ok(Self::Bls12_381G2Add), + HINT_BLS12_381_G2_MSM => Ok(Self::Bls12_381G2Msm), + HINT_BLS12_381_PAIRING_CHECK => Ok(Self::Bls12_381PairingCheck), + HINT_BLS12_381_FP_TO_G1 => Ok(Self::Bls12_381FpToG1), + HINT_BLS12_381_FP2_TO_G2 => Ok(Self::Bls12_381Fp2ToG2), + // Modular Exponentiation Hint + HINT_MODEXP => Ok(Self::ModExp), + // KZG Hint + HINT_VERIFY_KZG_PROOF => Ok(Self::VerifyKzgProof), + // Keccak256 Hint + HINT_KECCAK256 => Ok(Self::Keccak256), + // Blake2b Hint + HINT_BLAKE2B_COMPRESS => Ok(Self::Blake2bCompress), + _ => Err(anyhow::anyhow!("Invalid built-in hint code: {:#x}", value)), + } + } +} + +/// Hint code representing either a control code or built-in hint type. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u32)] +pub enum HintCode { + /// Control code for stream management. + Ctrl(CtrlHint), + /// Built-in hint type. + BuiltIn(BuiltInHint), + /// Custom hint type + Custom(u32), +} + +impl Display for HintCode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HintCode::Ctrl(ctrl) => write!(f, "{}", ctrl), + HintCode::BuiltIn(builtin) => write!(f, "{}", builtin), + HintCode::Custom(code) => write!(f, "CUSTOM_HINT_{:#x}", code), + } + } +} + +impl TryFrom for HintCode { + type Error = anyhow::Error; + + fn try_from(value: u32) -> Result { + // Try CtrlCode first + if let Ok(ctrl) = CtrlHint::try_from(value) { + return Ok(HintCode::Ctrl(ctrl)); + } + // Try BuiltInHint next + if let Ok(builtin) = BuiltInHint::try_from(value) { + return Ok(HintCode::BuiltIn(builtin)); + } + // Unknown codes return error - custom codes handled separately + Err(anyhow::anyhow!("Unknown hint code: {:#x}", value)) + } +} + +impl HintCode { + /// Convert HintCode to its u32 discriminant value. + #[inline] + pub const fn to_u32(self) -> u32 { + match self { + // Control Codes + HintCode::Ctrl(CtrlHint::Start) => CTRL_START, + HintCode::Ctrl(CtrlHint::End) => CTRL_END, + HintCode::Ctrl(CtrlHint::Cancel) => CTRL_CANCEL, + HintCode::Ctrl(CtrlHint::Error) => CTRL_ERROR, + + // Built-In Hint Codes + // INPUT hint types + HintCode::BuiltIn(BuiltInHint::Input) => HINT_INPUT, + // SHA256 Hints + HintCode::BuiltIn(BuiltInHint::Sha256) => HINT_SHA256, + // BN254 Hints + HintCode::BuiltIn(BuiltInHint::Bn254G1Add) => HINT_BN254_G1_ADD, + HintCode::BuiltIn(BuiltInHint::Bn254G1Mul) => HINT_BN254_G1_MUL, + HintCode::BuiltIn(BuiltInHint::Bn254PairingCheck) => HINT_BN254_PAIRING_CHECK, + // Secp256k1 Hints + HintCode::BuiltIn(BuiltInHint::Secp256k1EcdsaAddressRecover) => { + HINT_SECP256K1_ECDSA_ADDRESS_RECOVER + } + HintCode::BuiltIn(BuiltInHint::Secp256k1EcdsaVerifyAddressRecover) => { + HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER + } + // Secp256r1 Hints + HintCode::BuiltIn(BuiltInHint::Secp256r1EcdsaVerify) => HINT_SECP256R1_ECDSA_VERIFY, + // BLS12-381 Hints + HintCode::BuiltIn(BuiltInHint::Bls12_381G1Add) => HINT_BLS12_381_G1_ADD, + HintCode::BuiltIn(BuiltInHint::Bls12_381G1Msm) => HINT_BLS12_381_G1_MSM, + HintCode::BuiltIn(BuiltInHint::Bls12_381G2Add) => HINT_BLS12_381_G2_ADD, + HintCode::BuiltIn(BuiltInHint::Bls12_381G2Msm) => HINT_BLS12_381_G2_MSM, + HintCode::BuiltIn(BuiltInHint::Bls12_381PairingCheck) => HINT_BLS12_381_PAIRING_CHECK, + HintCode::BuiltIn(BuiltInHint::Bls12_381FpToG1) => HINT_BLS12_381_FP_TO_G1, + HintCode::BuiltIn(BuiltInHint::Bls12_381Fp2ToG2) => HINT_BLS12_381_FP2_TO_G2, + // Modular Exponentiation Hint + HintCode::BuiltIn(BuiltInHint::ModExp) => HINT_MODEXP, + // KZG Hint + HintCode::BuiltIn(BuiltInHint::VerifyKzgProof) => HINT_VERIFY_KZG_PROOF, + // Keccak256 Hint + HintCode::BuiltIn(BuiltInHint::Keccak256) => HINT_KECCAK256, + + // Blake2b Hint + HintCode::BuiltIn(BuiltInHint::Blake2bCompress) => HINT_BLAKE2B_COMPRESS, + + // Custom Hints + HintCode::Custom(code) => code, + } + } +} + +/// Represents a partially received hint when the slice doesn't contain all data. +/// +/// This is returned when the hint header has been parsed but there isn't enough +/// data in the slice to complete the hint. +#[derive(Debug, Clone)] +pub struct PartialPrecompileHint { + /// The type of hint, determining how the data should be processed. + pub hint_code: HintCode, + /// Whether this hint contains pass-through data (true) or requires computation (false). + pub is_passthrough: bool, + /// The partial hint payload data received so far. + pub data: Vec, + /// Total data length in bytes expected for this hint. + pub expected_len_bytes: usize, + /// Number of u64s still needed to complete the hint. + pub remaining_u64s: usize, +} + +/// Result of parsing a hint from a u64 slice. +#[derive(Debug)] +pub enum PrecompileHintParseResult { + /// A complete hint was successfully parsed. + Complete(PrecompileHint), + /// A partial hint was received; more data is needed. + Partial(PartialPrecompileHint), +} + +/// Represents a single precompile hint parsed from a `u64` slice. +/// +/// A hint consists of a type identifier and associated data. The hint type +/// determines how the data should be processed by the [`PrecompileHintsProcessor`]. +pub struct PrecompileHint { + /// The type of hint, determining how the data should be processed. + pub hint_code: HintCode, + /// Whether this hint contains pass-through data (true) or requires computation (false). + /// Determined by bit 31 (MSB) of the hint code. + pub is_passthrough: bool, + /// The hint payload data. + pub data: Vec, + /// Data length in bytes + pub data_len_bytes: usize, +} + +impl std::fmt::Debug for PrecompileHint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let data_display = if self.data.len() <= 10 { + format!("{:x?}", self.data) + } else { + format!("{:x?}... ({} more)", &self.data[..10], self.data.len() - 10) + }; + f.debug_struct("PrecompileHint") + .field("hint_type", &self.hint_code) + .field("is_passthrough", &self.is_passthrough) + .field("data_len_bytes", &self.data_len_bytes) + .field("data", &data_display) + .finish() + } +} + +impl PrecompileHint { + /// Parses a [`PrecompileHint`] from a slice of `u64` values at the given index, + /// optionally continuing from a previously received partial hint. + /// + /// # Arguments + /// + /// * `slice` - The source slice containing concatenated hints + /// * `idx` - The index where the hint header starts + /// * `allow_custom` - If true, unknown codes create Custom variant; if false, return error + /// * `partial` - Optional partial hint from a previous call to continue accumulating + /// + /// # Returns + /// + /// * `Ok((PrecompileHintParseResult, usize))` - The parse result and number of u64s consumed + /// * `PrecompileHintParseResult::Complete` - Successfully parsed a complete hint + /// * `PrecompileHintParseResult::Partial` - Parsed header but slice doesn't contain all data + /// * `Err` - If the slice is empty, index is out of bounds or hint code is invalid + #[inline(always)] + pub fn from_u64_slice( + slice: &[u64], + idx: usize, + allow_custom: bool, + partial: Option, + ) -> Result<(PrecompileHintParseResult, usize)> { + // If we have a partial hint, continue accumulating from it + if let Some(partial_hint) = partial { + let available = slice.len() - idx; + + if available >= partial_hint.remaining_u64s { + // We have enough data to complete the hint + let consumed = partial_hint.remaining_u64s; + let mut data = partial_hint.data; + data.extend_from_slice(&slice[idx..idx + consumed]); + + return Ok(( + PrecompileHintParseResult::Complete(PrecompileHint { + hint_code: partial_hint.hint_code, + is_passthrough: partial_hint.is_passthrough, + data, + data_len_bytes: partial_hint.expected_len_bytes, + }), + consumed, + )); + } else { + // Still not enough data, accumulate what we have + let mut data = partial_hint.data; + data.extend_from_slice(&slice[idx..]); + let remaining_u64s = partial_hint.remaining_u64s - available; + + return Ok(( + PrecompileHintParseResult::Partial(PartialPrecompileHint { + hint_code: partial_hint.hint_code, + is_passthrough: partial_hint.is_passthrough, + data, + expected_len_bytes: partial_hint.expected_len_bytes, + remaining_u64s, + }), + available, + )); + } + } + + // No partial hint, parse from scratch + if slice.len() <= idx { + return Err(anyhow::anyhow!("Slice too short or index out of bounds")); + } + + let header = slice[idx]; + + // Extract length from lower 32 bits + let length = header & 0xFFFFFFFF; + let length_bytes = length as usize; + + // Calculate how many u64s are needed to hold length + let num_u64s = length.div_ceil(8) as usize; + + // Extract hint code from upper 32 bits + let hint_code_32 = (header >> 32) as u32; + // Extract pass-through flag from bit 31 (MSB) - shift is faster than mask + let is_passthrough = hint_code_32 >> 31 != 0; + // Extract the actual hint code from bits 0-30 - mask is optimal + let hint_code_value = hint_code_32 & 0x7FFFFFFF; + + let hint_code = if allow_custom { + HintCode::try_from(hint_code_value).unwrap_or(HintCode::Custom(hint_code_value)) + } else { + HintCode::try_from(hint_code_value)? + }; + + let available_u64s = slice.len() - idx - 1; + + // Check if we have enough data for the complete hint + if available_u64s < num_u64s { + // Return partial hint with whatever data we have + let data = slice[idx + 1..].to_vec(); + let remaining_u64s = num_u64s - available_u64s; + // Consumed: 1 header + all available data + let consumed = 1 + available_u64s; + + return Ok(( + PrecompileHintParseResult::Partial(PartialPrecompileHint { + hint_code, + is_passthrough, + data, + expected_len_bytes: length_bytes, + remaining_u64s, + }), + consumed, + )); + } + + // Create a new Vec with the hint data. + let data = slice[idx + 1..idx + 1 + num_u64s].to_vec(); + // Consumed: 1 header + num_u64s data + let consumed = 1 + num_u64s; + + Ok(( + PrecompileHintParseResult::Complete(PrecompileHint { + hint_code, + is_passthrough, + data, + data_len_bytes: length_bytes, + }), + consumed, + )) + } +} diff --git a/common/src/io/file_stdin.rs b/common/src/io/file_stdin.rs deleted file mode 100644 index ff51be586..000000000 --- a/common/src/io/file_stdin.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! A file-based implementation of ZiskStdin. -//! This module provides functionality to read input data from a file. - -use std::fs::{self, File}; -use std::io::{BufReader, Read}; -use std::path::{Path, PathBuf}; - -use crate::io::ZiskIO; - -/// A file-based implementation of ZiskStdin that reads from a file. -pub struct ZiskFileStdin { - /// The path to the input file. - path: PathBuf, - - /// Buffered reader for the file. - reader: BufReader, -} - -impl ZiskFileStdin { - /// Create a new FileStdin from a file path. - pub fn new>(path: P) -> std::io::Result { - let path_buf = path.as_ref().to_path_buf(); - let file = File::open(&path_buf)?; - Ok(ZiskFileStdin { path: path_buf, reader: BufReader::new(file) }) - } -} - -impl ZiskIO for ZiskFileStdin { - fn read(&mut self) -> Vec { - fs::read(&self.path).expect("Could not read inputs file") - } - - fn read_slice(&mut self, slice: &mut [u8]) { - self.reader.read_exact(slice).expect("Failed to read slice"); - } - - fn read_into(&mut self, buffer: &mut [u8]) { - self.reader.read_exact(buffer).expect("Failed to read into buffer"); - } - - fn write_serialized(&mut self, _data: &[u8]) { - // This is a read-only stdin implementation - // Writing is not supported for file-based stdin - panic!("Write operations are not supported for FileStdin"); - } - - fn write_bytes(&mut self, _data: &[u8]) { - // This is a read-only stdin implementation - // Writing is not supported for file-based stdin - panic!("Write operations are not supported for FileStdin"); - } -} diff --git a/common/src/io/memory_stdin.rs b/common/src/io/memory_stdin.rs deleted file mode 100644 index d3ad9ce52..000000000 --- a/common/src/io/memory_stdin.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::io::{Cursor, Read}; - -use crate::io::ZiskIO; - -/// A memory-based implementation of ZiskStdin that reads from in-memory data. -pub struct ZiskMemoryStdin { - data: Vec, - cursor: Cursor>, -} - -impl ZiskMemoryStdin { - /// Create a new ZiskMemoryStdin from a vector of bytes. - pub fn new(data: Vec) -> Self { - let cursor = Cursor::new(data.clone()); - ZiskMemoryStdin { data, cursor } - } - - /// Create a new ZiskMemoryStdin from a string (UTF-8 encoded). - pub fn from_string(data: String) -> Self { - Self::new(data.into_bytes()) - } - - /// Create a new ZiskMemoryStdin from a slice of bytes. - pub fn from_slice(data: &[u8]) -> Self { - Self::new(data.to_vec()) - } -} - -impl ZiskIO for ZiskMemoryStdin { - fn read(&mut self) -> Vec { - // Return all the data - self.data.clone() - } - - fn read_slice(&mut self, slice: &mut [u8]) { - self.cursor.read_exact(slice).expect("Failed to read slice from memory"); - } - - fn read_into(&mut self, buffer: &mut [u8]) { - self.cursor.read_exact(buffer).expect("Failed to read into buffer from memory"); - } - - fn write_serialized(&mut self, _data: &[u8]) { - panic!("Write operations are not supported for ZiskMemoryStdin"); - } - - fn write_bytes(&mut self, _data: &[u8]) { - panic!("Write operations are not supported for ZiskMemoryStdin"); - } -} diff --git a/common/src/io/mod.rs b/common/src/io/mod.rs index 06d95dd81..62e380e24 100644 --- a/common/src/io/mod.rs +++ b/common/src/io/mod.rs @@ -1,9 +1,5 @@ -mod file_stdin; -mod memory_stdin; -mod null_stdin; -mod zisk_stdin; +mod stdin; +mod stream; -pub use file_stdin::*; -pub use memory_stdin::*; -pub use null_stdin::*; -pub use zisk_stdin::*; +pub use stdin::*; +pub use stream::*; diff --git a/common/src/io/null_stdin.rs b/common/src/io/null_stdin.rs deleted file mode 100644 index 4348ebbc3..000000000 --- a/common/src/io/null_stdin.rs +++ /dev/null @@ -1,19 +0,0 @@ -use tracing::warn; - -use crate::io::ZiskIO; - -pub struct ZiskNullStdin; - -impl ZiskIO for ZiskNullStdin { - fn read(&mut self) -> Vec { - Vec::new() - } - fn read_slice(&mut self, _slice: &mut [u8]) {} - fn read_into(&mut self, _buffer: &mut [u8]) {} - fn write_serialized(&mut self, _data: &[u8]) { - warn!("NullStdin does not support writing"); - } - fn write_bytes(&mut self, _data: &[u8]) { - warn!("NullStdin does not support writing"); - } -} diff --git a/common/src/io/stdin/file.rs b/common/src/io/stdin/file.rs new file mode 100644 index 000000000..a2c6a139f --- /dev/null +++ b/common/src/io/stdin/file.rs @@ -0,0 +1,111 @@ +//! A file-based implementation of ZiskStdin. +//! This module provides functionality to read input data from a file. + +use anyhow::Result; +use serde::{de::DeserializeOwned, Serialize}; +use std::fs::{self, File}; +use std::io::{BufReader, Read}; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; + +use crate::io::ZiskIO; + +/// A file-based implementation of ZiskStdin that reads from a file. +pub struct ZiskFileStdin { + /// The path to the input file. + path: PathBuf, + + /// Buffered reader for the file. + reader: Mutex>, +} + +impl ZiskFileStdin { + /// Create a new FileStdin from a file path. + pub fn new>(path: P) -> std::io::Result { + let path_buf = path.as_ref().to_path_buf(); + if !path_buf.exists() { + return Err(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("Input file not found at {:?}", path_buf.display()), + )); + } + + let file = File::open(&path_buf)?; + Ok(ZiskFileStdin { path: path_buf, reader: Mutex::new(BufReader::new(file)) }) + } + + fn read_raw_data(&self) -> std::io::Result> { + let mut reader = self.reader.lock().unwrap(); + + let mut len_bytes = [0u8; 8]; + reader.read_exact(&mut len_bytes)?; + let len = usize::from_le_bytes(len_bytes); + + let mut data = vec![0u8; len]; + reader.read_exact(&mut data)?; + + let total_len = 8 + len; + let padding = (8 - (total_len % 8)) % 8; + if padding > 0 { + let mut padding_bytes = vec![0u8; padding]; + reader.read_exact(&mut padding_bytes)?; + } + + Ok(data) + } +} + +impl ZiskIO for ZiskFileStdin { + fn read_raw_bytes(&self) -> Vec { + fs::read(&self.path).expect("Could not read inputs file") + } + + fn read_bytes(&self) -> Vec { + self.read_raw_data().expect("Failed to read into buffer") + } + + fn read_slice(&self, slice: &mut [u8]) { + let data = self.read_raw_data().expect("Failed to read slice"); + assert_eq!( + slice.len(), + data.len(), + "Slice length mismatch: expected {}, got {}", + data.len(), + slice.len() + ); + slice.copy_from_slice(&data); + } + + fn read(&self) -> Result { + let data = self + .read_raw_data() + .map_err(|e| anyhow::anyhow!("Failed to read data from file: {}", e))?; + + bincode::deserialize(&data) + .map_err(|e| anyhow::anyhow!("Failed to deserialize from file: {}", e)) + } + + fn write(&self, _data: &T) { + // This is a read-only stdin implementation + // Writing is not supported for file-based stdin + panic!("Write operations are not supported for FileStdin"); + } + + fn write_slice(&self, _data: &[u8]) { + // This is a read-only stdin implementation + // Writing is not supported for file-based stdin + panic!("Write operations are not supported for FileStdin"); + } + + fn write_proof(&self, _proof: &[u8]) { + // This is a read-only stdin implementation + // Writing is not supported for file-based stdin + panic!("Write operations are not supported for FileStdin"); + } + + fn save(&self, _path: &Path) -> Result<()> { + // This is a read-only stdin implementation + // Saving is not supported for file-based stdin + panic!("Save operations are not supported for FileStdin"); + } +} diff --git a/common/src/io/stdin/memory.rs b/common/src/io/stdin/memory.rs new file mode 100644 index 000000000..8a88b3764 --- /dev/null +++ b/common/src/io/stdin/memory.rs @@ -0,0 +1,143 @@ +use anyhow::Result; +use serde::{de::DeserializeOwned, Serialize}; +use std::io::{Cursor, Read}; +use std::path::Path; +use std::sync::Mutex; + +use crate::io::ZiskIO; + +/// A memory-based implementation of ZiskStdin that reads from in-memory data. +pub struct ZiskMemoryStdin { + data: Mutex>, + cursor: Mutex>>, +} + +impl ZiskMemoryStdin { + /// Create a new ZiskMemoryStdin from a vector of bytes. + pub fn new(data: Vec) -> Self { + let cursor = Mutex::new(Cursor::new(data.clone())); + ZiskMemoryStdin { data: Mutex::new(data), cursor } + } + + /// Create a new ZiskMemoryStdin from a string (UTF-8 encoded). + pub fn from_string(data: String) -> Self { + Self::new(data.into_bytes()) + } + + /// Create a new ZiskMemoryStdin from a slice of bytes. + pub fn from_slice(data: &[u8]) -> Self { + Self::new(data.to_vec()) + } +} + +impl ZiskMemoryStdin { + fn read_raw_data(&self) -> std::io::Result> { + let mut cursor = self.cursor.lock().unwrap(); + + let mut len_bytes = [0u8; 8]; + cursor.read_exact(&mut len_bytes)?; + let len = usize::from_le_bytes(len_bytes); + + let mut data = vec![0u8; len]; + cursor.read_exact(&mut data)?; + + let total_len = 8 + len; + let padding = (8 - (total_len % 8)) % 8; + if padding > 0 { + let mut padding_bytes = vec![0u8; padding]; + cursor.read_exact(&mut padding_bytes)?; + } + + Ok(data) + } +} + +impl ZiskIO for ZiskMemoryStdin { + fn read_raw_bytes(&self) -> Vec { + self.data.lock().unwrap().clone() + } + + fn read_bytes(&self) -> Vec { + self.read_raw_data().expect("Failed to read into buffer from memory") + } + + fn read_slice(&self, slice: &mut [u8]) { + let data = self.read_raw_data().expect("Failed to read slice from memory"); + assert_eq!( + slice.len(), + data.len(), + "Slice length mismatch: expected {}, got {}", + data.len(), + slice.len() + ); + slice.copy_from_slice(&data); + } + + fn read(&self) -> Result { + let data = self + .read_raw_data() + .map_err(|e| anyhow::anyhow!("Failed to read data from memory: {}", e))?; + + bincode::deserialize(&data) + .map_err(|e| anyhow::anyhow!("Failed to deserialize from memory: {}", e)) + } + + fn write(&self, data: &T) { + let mut tmp = Vec::new(); + bincode::serialize_into(&mut tmp, data).expect("Failed to serialize data into memory"); + + // Calculate padding for 8-byte alignment + let data_len = tmp.len(); + let total_len = 8 + data_len; // header + data + let padding = (8 - (total_len % 8)) % 8; + + // Write 8-byte length header (includes padding) + let len_bytes = data_len.to_le_bytes(); + + self.data.lock().unwrap().extend_from_slice(&len_bytes); + self.data.lock().unwrap().extend_from_slice(&tmp); + + // Add padding + if padding > 0 { + self.data.lock().unwrap().extend_from_slice(&vec![0u8; padding]); + } + + let mut cursor = self.cursor.lock().unwrap(); + cursor.get_mut().extend_from_slice(&len_bytes); + cursor.get_mut().extend_from_slice(&tmp); + if padding > 0 { + cursor.get_mut().extend_from_slice(&vec![0u8; padding]); + } + } + + fn write_slice(&self, data: &[u8]) { + let data_len = data.len(); + let total_len = 8 + data_len; + let padding = (8 - (total_len % 8)) % 8; + + let len_bytes = data_len.to_le_bytes(); + + self.data.lock().unwrap().extend_from_slice(&len_bytes); + self.data.lock().unwrap().extend_from_slice(data); + + if padding > 0 { + self.data.lock().unwrap().extend_from_slice(&vec![0u8; padding]); + } + + let mut cursor = self.cursor.lock().unwrap(); + cursor.get_mut().extend_from_slice(&len_bytes); + cursor.get_mut().extend_from_slice(data); + if padding > 0 { + cursor.get_mut().extend_from_slice(&vec![0u8; padding]); + } + } + + fn write_proof(&self, proof: &[u8]) { + self.write_slice(proof); + } + + fn save(&self, path: &Path) -> Result<()> { + std::fs::write(path, self.data.lock().unwrap().as_slice())?; + Ok(()) + } +} diff --git a/common/src/io/stdin/mod.rs b/common/src/io/stdin/mod.rs new file mode 100644 index 000000000..aa38bbece --- /dev/null +++ b/common/src/io/stdin/mod.rs @@ -0,0 +1,9 @@ +mod file; +mod memory; +mod null; +mod zisk_stdin; + +pub use file::*; +pub use memory::*; +pub use null::*; +pub use zisk_stdin::*; diff --git a/common/src/io/stdin/null.rs b/common/src/io/stdin/null.rs new file mode 100644 index 000000000..b039eb374 --- /dev/null +++ b/common/src/io/stdin/null.rs @@ -0,0 +1,37 @@ +use tracing::warn; + +use crate::io::ZiskIO; +use anyhow::Result; +use serde::{de::DeserializeOwned, Serialize}; +use std::path::Path; + +pub struct ZiskNullStdin; + +impl ZiskIO for ZiskNullStdin { + fn read_raw_bytes(&self) -> Vec { + Vec::new() + } + + fn read_bytes(&self) -> Vec { + Vec::new() + } + + fn read_slice(&self, _slice: &mut [u8]) {} + + fn read(&self) -> Result { + Err(anyhow::anyhow!("NullStdin does not support reading")) + } + fn write(&self, _data: &T) { + warn!("NullStdin does not support writing"); + } + fn write_slice(&self, _data: &[u8]) { + warn!("NullStdin does not support writing"); + } + fn write_proof(&self, _proof: &[u8]) { + warn!("NullStdin does not support writing"); + } + fn save(&self, _path: &Path) -> Result<()> { + warn!("NullStdin does not support saving"); + Ok(()) + } +} diff --git a/common/src/io/stdin/zisk_stdin.rs b/common/src/io/stdin/zisk_stdin.rs new file mode 100644 index 000000000..8cb40e03f --- /dev/null +++ b/common/src/io/stdin/zisk_stdin.rs @@ -0,0 +1,237 @@ +use crate::io::{ZiskFileStdin, ZiskMemoryStdin, ZiskNullStdin}; +use anyhow::Result; +use serde::{de::DeserializeOwned, Serialize}; +use std::path::Path; +use std::sync::Arc; + +pub trait ZiskIO: Send + Sync { + fn read_raw_bytes(&self) -> Vec; + + /// Read a value from the buffer. + fn read_bytes(&self) -> Vec; + + /// Read a slice of bytes from the buffer. + fn read_slice(&self, slice: &mut [u8]); + + /// Read and deserialize a value from the buffer. + fn read(&self) -> Result; + + /// Write a serialized value to the buffer. + fn write(&self, data: &T); + + /// Write a slice of bytes to the buffer. + fn write_slice(&self, data: &[u8]); + + /// Write proof + fn write_proof(&self, proof: &[u8]); + + fn save(&self, path: &Path) -> Result<()>; +} + +pub enum ZiskIOVariant { + File(ZiskFileStdin), + Null(ZiskNullStdin), + Memory(ZiskMemoryStdin), +} + +impl ZiskIO for ZiskIOVariant { + fn read_raw_bytes(&self) -> Vec { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.read_raw_bytes(), + ZiskIOVariant::Null(null_stdin) => null_stdin.read_raw_bytes(), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.read_raw_bytes(), + } + } + + fn read_bytes(&self) -> Vec { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.read_bytes(), + ZiskIOVariant::Null(null_stdin) => null_stdin.read_bytes(), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.read_bytes(), + } + } + + fn read_slice(&self, slice: &mut [u8]) { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.read_slice(slice), + ZiskIOVariant::Null(null_stdin) => null_stdin.read_slice(slice), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.read_slice(slice), + } + } + + fn read(&self) -> Result { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.read(), + ZiskIOVariant::Null(null_stdin) => null_stdin.read(), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.read(), + } + } + + fn write(&self, data: &T) { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.write(data), + ZiskIOVariant::Null(null_stdin) => null_stdin.write(data), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.write(data), + } + } + + fn write_slice(&self, data: &[u8]) { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.write_slice(data), + ZiskIOVariant::Null(null_stdin) => null_stdin.write_slice(data), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.write_slice(data), + } + } + + fn write_proof(&self, proof: &[u8]) { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.write_proof(proof), + ZiskIOVariant::Null(null_stdin) => null_stdin.write_proof(proof), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.write_proof(proof), + } + } + + fn save(&self, path: &Path) -> Result<()> { + match self { + ZiskIOVariant::File(file_stdin) => file_stdin.save(path), + ZiskIOVariant::Null(null_stdin) => null_stdin.save(path), + ZiskIOVariant::Memory(memory_stdin) => memory_stdin.save(path), + } + } +} + +#[derive(Clone)] +pub struct ZiskStdin { + io: Arc, +} + +impl ZiskIO for ZiskStdin { + fn read_raw_bytes(&self) -> Vec { + self.io.read_raw_bytes() + } + + fn read_bytes(&self) -> Vec { + self.io.read_bytes() + } + + fn read_slice(&self, slice: &mut [u8]) { + self.io.read_slice(slice) + } + + fn read(&self) -> Result { + self.io.read() + } + + fn write(&self, data: &T) { + self.io.write(data) + } + + fn write_slice(&self, data: &[u8]) { + self.io.write_slice(data) + } + + fn write_proof(&self, proof: &[u8]) { + self.io.write_proof(proof) + } + + fn save(&self, path: &Path) -> Result<()> { + self.io.save(path) + } +} + +impl Default for ZiskStdin { + fn default() -> Self { + Self::new() + } +} + +impl ZiskStdin { + /// Create new memory-based stdin + pub fn new() -> Self { + Self { io: Arc::new(ZiskIOVariant::Memory(ZiskMemoryStdin::new(Vec::new()))) } + } + + /// Create a null stdin (no input) + pub fn null() -> Self { + Self { io: Arc::new(ZiskIOVariant::Null(ZiskNullStdin)) } + } + + /// Create a file-based stdin + pub fn from_file>(path: P) -> Result { + Ok(Self { io: Arc::new(ZiskIOVariant::File(ZiskFileStdin::new(path)?)) }) + } + + pub fn from_vec(data: Vec) -> Self { + Self { io: Arc::new(ZiskIOVariant::Memory(ZiskMemoryStdin::new(data))) } + } + + /// Create a ZiskStdin from a URI string + /// - None -> null stream + /// - "scheme://path" -> parsed based on scheme + /// - No scheme -> treated as file path + pub fn from_uri>(stdin_uri: Option) -> Result { + if stdin_uri.is_none() { + return Ok(ZiskStdin::null()); + } + + let uri = stdin_uri.unwrap().into(); + + // Check if URI contains "://" separator + if let Some(pos) = uri.find("://") { + let (scheme, path) = uri.split_at(pos); + let path = &path[3..]; // Skip "://" + + match scheme { + "file" => ZiskStdin::from_file(path), + // Unknown scheme - could error or fallback + _ => Err(anyhow::anyhow!("Unknown stream source scheme: {}", scheme)), + } + } else { + // No "://" found - fallback as a file path + ZiskStdin::from_file(uri.as_str()) + } + } + + // Inherent methods that delegate to ZiskIO trait + // This allows using these methods without importing the trait + + /// Read raw bytes + pub fn read_raw_bytes(&self) -> Vec { + ZiskIO::read_raw_bytes(self) + } + + /// Read a value from the buffer. + pub fn read_bytes(&self) -> Vec { + ZiskIO::read_bytes(self) + } + + /// Read a slice of bytes from the buffer. + pub fn read_slice(&self, slice: &mut [u8]) { + ZiskIO::read_slice(self, slice) + } + + /// Read and deserialize a value from the buffer. + pub fn read(&self) -> Result { + ZiskIO::read(self) + } + + /// Write a serialized value to the buffer. + pub fn write(&self, data: &T) { + ZiskIO::write(self, data) + } + + /// Write a slice of bytes to the buffer. + pub fn write_slice(&self, data: &[u8]) { + ZiskIO::write_slice(self, data) + } + + /// Write proof + pub fn write_proof(&self, proof: &[u8]) { + ZiskIO::write_proof(self, proof) + } + + /// Save to file + pub fn save(&self, path: &Path) -> Result<()> { + ZiskIO::save(self, path) + } +} diff --git a/common/src/io/stream/file.rs b/common/src/io/stream/file.rs new file mode 100644 index 000000000..9dc224830 --- /dev/null +++ b/common/src/io/stream/file.rs @@ -0,0 +1,142 @@ +//! A file-based implementation of FileStreamReader and FileStreamWriter. +//! This module provides functionality to read and write data from/to files. + +use std::fs::File; +use std::io::{BufReader, BufWriter, Read, Write}; +use std::path::{Path, PathBuf}; + +use super::{StreamRead, StreamWrite}; + +use anyhow::Result; + +/// A file-based implementation of ZiskStdin that reads from a file. +pub struct FileStreamReader { + /// The path to the input file. + path: PathBuf, + + /// Buffered reader for the file. + reader: Option>, + + /// Track if the file has been read already. + has_read: bool, +} + +impl FileStreamReader { + /// Create a new FileStreamReader from a file path. + pub fn new>(path: P) -> std::io::Result { + Ok(FileStreamReader { path: path.as_ref().to_path_buf(), reader: None, has_read: false }) + } +} + +impl StreamRead for FileStreamReader { + /// Open/initialize the stream for reading + fn open(&mut self) -> Result<()> { + if self.is_active() { + return Ok(()); + } + + let file = File::open(&self.path)?; + self.reader = Some(BufReader::new(file)); + self.has_read = false; + Ok(()) + } + + /// Reads the next item from the stream. + /// + /// This method does **not** stream incrementally. Instead, it repeatedly toggles + /// between returning the full file contents and returning `None`, producing the + /// following repeating sequence: `Some(Vec), None, Some(Vec), None, ...` + fn next(&mut self) -> Result>> { + if self.has_read { + self.has_read = false; + return Ok(None); + } + + self.has_read = true; + + // Open the file if it's not already open + self.open()?; + + let reader = self.reader.as_mut().ok_or_else(|| { + anyhow::anyhow!("FileStreamReader: Reader is not initialized after opening the file") + })?; + + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer)?; + + Ok(Some(buffer)) + } + + /// Close the stream + fn close(&mut self) -> Result<()> { + self.reader = None; + Ok(()) + } + + /// Check if the stream is currently active + fn is_active(&self) -> bool { + self.reader.is_some() + } +} + +/// A file-based implementation of StreamWrite that writes to a file. +pub struct FileStreamWriter { + /// The path to the output file. + path: PathBuf, + + /// Buffered writer for the file. + writer: Option>, +} + +impl FileStreamWriter { + /// Create a new FileStreamWriter from a file path. + pub fn new>(path: P) -> std::io::Result { + Ok(FileStreamWriter { path: path.as_ref().to_path_buf(), writer: None }) + } +} + +impl StreamWrite for FileStreamWriter { + /// Open/initialize the stream for writing + fn open(&mut self) -> Result<()> { + if self.is_active() { + return Ok(()); + } + + let file = File::create(&self.path)?; + self.writer = Some(BufWriter::new(file)); + Ok(()) + } + + /// Write data to the stream, returns the number of bytes written + fn write(&mut self, item: &[u8]) -> Result { + // Open the file if it's not already open + self.open()?; + + let writer = self.writer.as_mut().ok_or_else(|| { + anyhow::anyhow!("FileStreamWriter: Writer is not initialized after opening the file") + })?; + + writer.write_all(item)?; + Ok(item.len()) + } + + /// Flush any buffered data + fn flush(&mut self) -> Result<()> { + if let Some(writer) = self.writer.as_mut() { + writer.flush()?; + } + Ok(()) + } + + /// Close the stream + fn close(&mut self) -> Result<()> { + self.flush()?; + self.writer = None; + Ok(()) + } + + /// Check if the stream is currently active + fn is_active(&self) -> bool { + self.writer.is_some() + } +} diff --git a/common/src/io/stream/memory.rs b/common/src/io/stream/memory.rs new file mode 100644 index 000000000..0de94baad --- /dev/null +++ b/common/src/io/stream/memory.rs @@ -0,0 +1,52 @@ +use std::io::{Cursor, Read}; + +use crate::io::stream::StreamRead; + +/// A memory-based implementation of StreamSource that reads from in-memory data. +pub struct MemoryStreamReader { + data: Vec, + cursor: Cursor>, +} + +impl MemoryStreamReader { + /// Create a new MemoryStreamReader from a vector of bytes. + pub fn new(data: Vec) -> Self { + let cursor = Cursor::new(data.clone()); + MemoryStreamReader { data, cursor } + } + + /// Create a new MemoryStreamReader from a string (UTF-8 encoded). + pub fn from_string(data: String) -> Self { + Self::new(data.into_bytes()) + } + + /// Create a new MemoryStreamReader from a slice of bytes. + pub fn from_slice(data: &[u8]) -> Self { + Self::new(data.to_vec()) + } +} + +impl StreamRead for MemoryStreamReader { + fn open(&mut self) -> anyhow::Result<()> { + self.cursor.set_position(0); + Ok(()) + } + + fn next(&mut self) -> anyhow::Result>> { + let mut buffer = Vec::new(); + let bytes_read = self.cursor.read_to_end(&mut buffer)?; + if bytes_read == 0 { + Ok(None) + } else { + Ok(Some(buffer)) + } + } + + fn close(&mut self) -> anyhow::Result<()> { + Ok(()) + } + + fn is_active(&self) -> bool { + self.cursor.position() < self.data.len() as u64 + } +} diff --git a/common/src/io/stream/mod.rs b/common/src/io/stream/mod.rs new file mode 100644 index 000000000..ffdc645c3 --- /dev/null +++ b/common/src/io/stream/mod.rs @@ -0,0 +1,19 @@ +mod file; +mod memory; +mod quic; +mod stream_reader; +mod stream_writer; +mod zisk_stream; + +#[cfg(unix)] +mod unix_socket; + +pub use file::{FileStreamReader, FileStreamWriter}; +pub use memory::MemoryStreamReader; +pub use quic::{QuicStreamReader, QuicStreamWriter}; +pub use stream_reader::*; +pub use stream_writer::*; +pub use zisk_stream::*; + +#[cfg(unix)] +pub use unix_socket::{UnixSocketStreamReader, UnixSocketStreamWriter}; diff --git a/common/src/io/stream/quic.rs b/common/src/io/stream/quic.rs new file mode 100644 index 000000000..84aba2ca5 --- /dev/null +++ b/common/src/io/stream/quic.rs @@ -0,0 +1,635 @@ +//! A QUIC-based implementation of StreamReader and StreamWriter. +//! This module provides functionality to read and write data over QUIC connections +//! for both local and network communication. + +use std::net::SocketAddr; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use quinn::{Connection, Endpoint, ServerConfig}; +use tokio::runtime::{Handle, Runtime}; + +use super::{StreamRead, StreamWrite}; + +/// Helper to run async code, either using current runtime or creating one +fn run_async(f: F) -> Result +where + F: std::future::Future> + Send + 'static, + T: Send + 'static, +{ + // Try to use current runtime handle if we're already in a tokio context + match Handle::try_current() { + Ok(handle) => { + // We're in a tokio runtime, use block_in_place to allow blocking + tokio::task::block_in_place(move || handle.block_on(f)) + } + Err(_) => { + // Not in a runtime, create a temporary one + let rt = Runtime::new().context("Failed to create tokio runtime")?; + rt.block_on(f) + } + } +} + +/// Ensure crypto provider is initialized (idempotent) +fn ensure_crypto_provider() { + use std::sync::Once; + static INIT: Once = Once::new(); + INIT.call_once(|| { + let _ = rustls::crypto::ring::default_provider().install_default(); + }); +} + +/// A QUIC implementation of StreamRead that receives data over QUIC streams. +pub struct QuicStreamReader { + /// The QUIC connection + connection: Option, + + /// Client endpoint + endpoint: Option, + + /// Server address to connect to + server_addr: SocketAddr, +} + +impl QuicStreamReader { + /// Create a new QuicStreamReader that connects to the specified server address. + /// + /// This creates a client endpoint that connects to the server to read data. + pub fn new(server_addr: SocketAddr) -> Result { + // Ensure crypto provider is initialized + ensure_crypto_provider(); + + // We don't need to store a runtime anymore since we'll use run_async helper + Ok(QuicStreamReader { connection: None, endpoint: None, server_addr }) + } +} + +impl StreamRead for QuicStreamReader { + /// Open/initialize the stream for reading + /// + /// Establishes a QUIC connection to the server. + fn open(&mut self) -> Result<()> { + if self.is_active() { + return Ok(()); + } + + let server_addr = self.server_addr; + let (endpoint, connection) = run_async(async move { + let mut endpoint = Endpoint::client("0.0.0.0:0".parse().unwrap())?; + + // Configure to accept self-signed certificates (for development) + let rustls_config = rustls::ClientConfig::builder() + .dangerous() + .with_custom_certificate_verifier(Arc::new(SkipServerVerification)) + .with_no_client_auth(); + + let mut client_config = quinn::ClientConfig::new(Arc::new( + quinn::crypto::rustls::QuicClientConfig::try_from(rustls_config) + .map_err(|e| anyhow::anyhow!("Failed to create QUIC client config: {}", e))?, + )); + + // Configure transport for better performance + let mut transport_config = quinn::TransportConfig::default(); + transport_config.max_concurrent_uni_streams(1024u32.into()); + client_config.transport_config(Arc::new(transport_config)); + + endpoint.set_default_client_config(client_config); + + let connection = endpoint + .connect(server_addr, "localhost")? + .await + .context("Failed to connect to server")?; + + Ok::<_, anyhow::Error>((endpoint, connection)) + })?; + + self.endpoint = Some(endpoint); + self.connection = Some(connection); + + Ok(()) + } + + /// Reads the next message from a QUIC unidirectional stream. + /// + /// Each call to next() accepts a new unidirectional stream and reads + /// all data from it, providing natural message boundaries. + fn next(&mut self) -> Result>> { + self.open()?; + + let connection = self + .connection + .as_ref() + .ok_or_else(|| anyhow::anyhow!("QuicStreamReader: Connection not established"))? + .clone(); + + run_async(async move { + // Accept next unidirectional stream + let mut recv = match connection.accept_uni().await { + Ok(stream) => stream, + Err(quinn::ConnectionError::ApplicationClosed(_)) => { + return Ok(None); + } + Err(quinn::ConnectionError::ConnectionClosed(_)) => { + return Ok(None); + } + Err(quinn::ConnectionError::TimedOut) => { + return Ok(None); + } + Err(e) => return Err(anyhow::anyhow!("Failed to accept stream: {}", e)), + }; + + // Read all data from the stream (10MB max) + let data = + recv.read_to_end(10 * 1024 * 1024).await.context("Failed to read from stream")?; + + Ok(Some(data)) + }) + } + + /// Close the stream + fn close(&mut self) -> Result<()> { + if let Some(connection) = self.connection.take() { + connection.close(0u32.into(), b"closing"); + } + if let Some(endpoint) = self.endpoint.take() { + let _ = run_async(async move { + endpoint.wait_idle().await; + Ok::<_, anyhow::Error>(()) + }); + } + Ok(()) + } + + /// Check if the stream is currently active + fn is_active(&self) -> bool { + self.connection.is_some() + } +} + +/// A QUIC implementation of StreamWrite that sends data over QUIC streams. +pub struct QuicStreamWriter { + /// The QUIC connection + connection: Option, + + /// Tokio runtime for async operations + runtime: Arc, + + /// Server endpoint + endpoint: Option, + + /// Server address to bind to + bind_addr: SocketAddr, +} + +impl QuicStreamWriter { + /// Create a new QuicStreamWriter that listens on the specified address. + /// + /// This creates a server endpoint that waits for incoming reader connections. + pub fn new(bind_addr: SocketAddr) -> Result { + // Ensure crypto provider is initialized + ensure_crypto_provider(); + + let runtime = Arc::new( + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .context("Failed to create tokio runtime")?, + ); + + Ok(QuicStreamWriter { connection: None, runtime, endpoint: None, bind_addr }) + } + + /// Configure server with self-signed certificate + fn configure_server() -> Result { + let cert = rcgen::generate_simple_self_signed(vec!["localhost".into()]) + .context("Failed to generate certificate")?; + + let key = rustls::pki_types::PrivateKeyDer::Pkcs8(cert.signing_key.serialize_der().into()); + let cert_der = rustls::pki_types::CertificateDer::from(cert.cert); + + let mut server_config = ServerConfig::with_single_cert(vec![cert_der], key) + .context("Failed to create server config")?; + + // Configure transport for better performance + let mut transport_config = quinn::TransportConfig::default(); + transport_config.max_concurrent_uni_streams(1024u32.into()); + server_config.transport_config(Arc::new(transport_config)); + + Ok(server_config) + } +} + +impl StreamWrite for QuicStreamWriter { + /// Open/initialize the stream for writing + /// + /// Starts listening for incoming reader connections. + fn open(&mut self) -> Result<()> { + if self.is_active() { + return Ok(()); + } + + // Clean up old resources if they exist + if let Some(endpoint) = self.endpoint.take() { + self.runtime.block_on(async { + endpoint.wait_idle().await; + }); + } + + let server_config = Self::configure_server()?; + + let (endpoint, connection) = self.runtime.block_on(async { + let endpoint = Endpoint::server(server_config, self.bind_addr) + .context("Failed to create server endpoint")?; + + // Wait for incoming connection + let incoming = endpoint.accept().await.context("Failed to accept connection")?; + + let connection = incoming.await.context("Failed to establish connection")?; + + Ok::<_, anyhow::Error>((endpoint, connection)) + })?; + + self.endpoint = Some(endpoint); + self.connection = Some(connection); + + Ok(()) + } + + /// Write data to the stream, returns the number of bytes written. + /// + /// Each call to write() opens a new unidirectional stream, writes the data, + /// and closes the stream, providing natural message boundaries. + fn write(&mut self, item: &[u8]) -> Result { + self.open()?; + + let connection = self + .connection + .as_ref() + .ok_or_else(|| anyhow::anyhow!("QuicStreamWriter: Connection not established"))?; + + let len = item.len(); + let data = item.to_vec(); + + self.runtime.block_on(async { + // Open a new unidirectional stream for this message + let mut send = connection.open_uni().await.context("Failed to open stream")?; + + // Write all data + send.write_all(&data).await.context("Failed to write to stream")?; + + // Finish the stream (signals end of message) + send.finish().context("Failed to finish stream")?; + + Ok(len) + }) + } + + /// Flush any buffered data + /// + /// QUIC handles flushing automatically, so this is a no-op. + fn flush(&mut self) -> Result<()> { + Ok(()) + } + + /// Close the stream + fn close(&mut self) -> Result<()> { + if let Some(connection) = self.connection.take() { + connection.close(0u32.into(), b"closing"); + } + if let Some(endpoint) = self.endpoint.take() { + self.runtime.block_on(async { + endpoint.wait_idle().await; + }); + } + Ok(()) + } + + /// Check if the stream is currently active + fn is_active(&self) -> bool { + self.connection.is_some() + } +} + +/// Certificate verifier that accepts any certificate (for development only!) +/// +/// ⚠️ WARNING: This is INSECURE and should NEVER be used in production. +/// It accepts all certificates without validation, making you vulnerable to MITM attacks. +/// For production use, implement proper certificate validation. +#[derive(Debug)] +struct SkipServerVerification; + +impl rustls::client::danger::ServerCertVerifier for SkipServerVerification { + fn verify_server_cert( + &self, + _end_entity: &rustls::pki_types::CertificateDer<'_>, + _intermediates: &[rustls::pki_types::CertificateDer<'_>], + _server_name: &rustls::pki_types::ServerName<'_>, + _ocsp_response: &[u8], + _now: rustls::pki_types::UnixTime, + ) -> Result { + Ok(rustls::client::danger::ServerCertVerified::assertion()) + } + + fn verify_tls12_signature( + &self, + _message: &[u8], + _cert: &rustls::pki_types::CertificateDer<'_>, + _dss: &rustls::DigitallySignedStruct, + ) -> Result { + Ok(rustls::client::danger::HandshakeSignatureValid::assertion()) + } + + fn verify_tls13_signature( + &self, + _message: &[u8], + _cert: &rustls::pki_types::CertificateDer<'_>, + _dss: &rustls::DigitallySignedStruct, + ) -> Result { + Ok(rustls::client::danger::HandshakeSignatureValid::assertion()) + } + + fn supported_verify_schemes(&self) -> Vec { + vec![ + rustls::SignatureScheme::RSA_PKCS1_SHA256, + rustls::SignatureScheme::ECDSA_NISTP256_SHA256, + rustls::SignatureScheme::ED25519, + ] + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + // Initialize crypto provider once for all tests + fn init_crypto() { + use std::sync::Once; + static INIT: Once = Once::new(); + INIT.call_once(|| { + let _ = rustls::crypto::ring::default_provider().install_default(); + }); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_single_message() { + init_crypto(); + let server_addr: SocketAddr = "127.0.0.1:15001".parse().unwrap(); + + // Channel to signal when writer has written data + let (tx, rx) = mpsc::channel(); + + // Spawn writer (server) thread + let writer_thread = thread::spawn(move || { + let mut writer = QuicStreamWriter::new(server_addr).unwrap(); + writer.write(b"Hello, QUIC!").unwrap(); + tx.send(()).unwrap(); // Signal that data is written + + // Wait for reader to finish before closing + thread::sleep(Duration::from_millis(500)); + writer.close().unwrap(); + }); + + // Give writer time to start listening + thread::sleep(Duration::from_millis(100)); + + // Reader connects (this triggers the writer's connection accept) + let mut reader = QuicStreamReader::new(server_addr).unwrap(); + reader.open().unwrap(); // Explicitly connect + + // Wait for writer to have written data + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + let message = reader.next().unwrap().unwrap(); + assert_eq!(message, b"Hello, QUIC!"); + reader.close().unwrap(); + + writer_thread.join().unwrap(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_multiple_messages() { + init_crypto(); + let server_addr: SocketAddr = "127.0.0.1:15002".parse().unwrap(); + + // Channel to signal when writer has written data + let (tx, rx) = mpsc::channel(); + + // Spawn writer (server) thread + let writer_thread = thread::spawn(move || { + let mut writer = QuicStreamWriter::new(server_addr).unwrap(); + writer.write(b"First").unwrap(); + writer.write(b"Second message").unwrap(); + writer.write(b"Third message with more data!").unwrap(); + tx.send(()).unwrap(); // Signal that data is written + + thread::sleep(Duration::from_millis(200)); + writer.close().unwrap(); + }); + + thread::sleep(Duration::from_millis(100)); + + // Reader connects + let mut reader = QuicStreamReader::new(server_addr).unwrap(); + reader.open().unwrap(); // Explicitly connect + + // Wait for writer to have written data + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"First"); + let msg2 = reader.next().unwrap().unwrap(); + assert_eq!(msg2, b"Second message"); + let msg3 = reader.next().unwrap().unwrap(); + assert_eq!(msg3, b"Third message with more data!"); + reader.close().unwrap(); + + writer_thread.join().unwrap(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_message_boundaries() { + init_crypto(); + let server_addr: SocketAddr = "127.0.0.1:15003".parse().unwrap(); + + // Channel to signal when writer has written data + let (tx, rx) = mpsc::channel(); + + // Spawn writer (server) thread + let writer_thread = thread::spawn(move || { + let mut writer = QuicStreamWriter::new(server_addr).unwrap(); + writer.write(b"ABC").unwrap(); + writer.write(b"DEF").unwrap(); + tx.send(()).unwrap(); // Signal that data is written + + thread::sleep(Duration::from_millis(200)); + writer.close().unwrap(); + }); + + thread::sleep(Duration::from_millis(100)); + + // Reader connects + let mut reader = QuicStreamReader::new(server_addr).unwrap(); + reader.open().unwrap(); // Explicitly connect + + // Wait for writer to have written data + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"ABC"); + let msg2 = reader.next().unwrap().unwrap(); + assert_eq!(msg2, b"DEF"); + // Should NOT be concatenated like "ABCDEF" + reader.close().unwrap(); + + writer_thread.join().unwrap(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_large_message() { + init_crypto(); + let server_addr: SocketAddr = "127.0.0.1:15004".parse().unwrap(); + + // Create a large message (1MB - QUIC can handle this) + let large_data: Vec = (0..1024 * 1024).map(|i| (i % 256) as u8).collect(); + let large_data_clone = large_data.clone(); + + // Channel to signal when writer has written data + let (tx, rx) = mpsc::channel(); + + // Spawn writer (server) thread + let writer_thread = thread::spawn(move || { + let mut writer = QuicStreamWriter::new(server_addr).unwrap(); + writer.write(&large_data).unwrap(); + tx.send(()).unwrap(); // Signal that data is written + + thread::sleep(Duration::from_millis(200)); + writer.close().unwrap(); + }); + + thread::sleep(Duration::from_millis(100)); + + // Reader connects + let mut reader = QuicStreamReader::new(server_addr).unwrap(); + reader.open().unwrap(); // Explicitly connect + + // Wait for writer to have written data + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + let message = reader.next().unwrap().unwrap(); + assert_eq!(message, large_data_clone); + reader.close().unwrap(); + + writer_thread.join().unwrap(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_connection_close() { + init_crypto(); + let server_addr: SocketAddr = "127.0.0.1:15005".parse().unwrap(); + + // Channel to signal when writer has written data + let (tx, rx) = mpsc::channel(); + + // Spawn writer (server) thread + let writer_thread = thread::spawn(move || { + let mut writer = QuicStreamWriter::new(server_addr).unwrap(); + writer.write(b"Message").unwrap(); + tx.send(()).unwrap(); // Signal that data is written + + thread::sleep(Duration::from_millis(200)); + writer.close().unwrap(); + }); + + thread::sleep(Duration::from_millis(100)); + + // Reader connects + let mut reader = QuicStreamReader::new(server_addr).unwrap(); + reader.open().unwrap(); // Explicitly connect + + // Wait for writer to have written data + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"Message"); + reader.close().unwrap(); + + writer_thread.join().unwrap(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_multiple_concurrent_messages() { + init_crypto(); + let server_addr: SocketAddr = "127.0.0.1:15006".parse().unwrap(); + + // Channel to signal when writer has written data + let (tx, rx) = mpsc::channel(); + + // Spawn writer (server) thread + let writer_thread = thread::spawn(move || { + let mut writer = QuicStreamWriter::new(server_addr).unwrap(); + for i in 0..10 { + writer.write(format!("Message {}", i).as_bytes()).unwrap(); + } + tx.send(()).unwrap(); // Signal that data is written + + thread::sleep(Duration::from_millis(200)); + writer.close().unwrap(); + }); + + thread::sleep(Duration::from_millis(100)); + + // Reader connects + let mut reader = QuicStreamReader::new(server_addr).unwrap(); + reader.open().unwrap(); // Explicitly connect + + // Wait for writer to have written data + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + for i in 0..10 { + let msg = reader.next().unwrap().unwrap(); + assert_eq!(msg, format!("Message {}", i).as_bytes()); + } + reader.close().unwrap(); + + writer_thread.join().unwrap(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_writer_closes_early() { + init_crypto(); + let server_addr: SocketAddr = "127.0.0.1:15007".parse().unwrap(); + + // Channel to signal when writer has written data + let (tx, rx) = mpsc::channel(); + + // Spawn writer (server) thread that closes after writing one message + let writer_thread = thread::spawn(move || { + let mut writer = QuicStreamWriter::new(server_addr).unwrap(); + writer.write(b"First").unwrap(); + tx.send(()).unwrap(); // Signal that data is written + + // Writer closes after a short delay + thread::sleep(Duration::from_millis(100)); + writer.close().unwrap(); + }); + + thread::sleep(Duration::from_millis(100)); + + // Reader connects + let mut reader = QuicStreamReader::new(server_addr).unwrap(); + reader.open().unwrap(); // Explicitly connect + + // Wait for writer to have written data + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"First"); + + reader.close().unwrap(); + writer_thread.join().unwrap(); + } +} diff --git a/common/src/io/stream/stream_reader.rs b/common/src/io/stream/stream_reader.rs new file mode 100644 index 000000000..dcb2ce028 --- /dev/null +++ b/common/src/io/stream/stream_reader.rs @@ -0,0 +1,124 @@ +use crate::io::{MemoryStreamReader, QuicStreamReader, UnixSocketStreamReader}; + +use super::FileStreamReader; + +use anyhow::Result; + +/// Core trait for stream reading operations +pub trait StreamRead: Send + 'static { + /// Open/initialize the stream for reading + fn open(&mut self) -> Result<()>; + + /// Read the next item from the stream + /// Returns None when the stream is finished + fn next(&mut self) -> Result>>; + + /// Close the stream + fn close(&mut self) -> Result<()>; + + /// Check if the stream is currently active + fn is_active(&self) -> bool; +} + +pub enum StreamSource { + File(FileStreamReader), + UnixSocket(UnixSocketStreamReader), + Quic(QuicStreamReader), + Memory(MemoryStreamReader), +} + +impl StreamSource { + /// Create a file-based stdin + pub fn from_file>(path: P) -> Result { + Ok(StreamSource::File(FileStreamReader::new(path)?)) + } + + /// Create a memory-based stdin + pub fn from_vec(data: Vec) -> Self { + StreamSource::Memory(MemoryStreamReader::new(data)) + } + + /// Create a Unix socket-based stdin + pub fn from_unix_socket>(path: P) -> Result { + Ok(StreamSource::UnixSocket(UnixSocketStreamReader::new(path.as_ref())?)) + } + + /// Create a QUIC-based stdin + pub fn from_quic(addr: std::net::SocketAddr) -> Result { + Ok(StreamSource::Quic(QuicStreamReader::new(addr)?)) + } + + /// Create a StreamSource from a URI string + /// + /// # URI Formats + /// - `None` → null stream (no input) + /// - `"scheme://resource"` → parsed based on scheme + /// - No scheme → treated as a file path + /// + /// # Supported Schemes + /// - `file://path/to/file` → File-based stream + /// - `unix://path/to/socket` → Unix domain socket stream + /// - `quic://host:port` → QUIC network stream (e.g., `quic://127.0.0.1:8080`) + pub fn from_uri>(hints_uri: S) -> Result { + let uri_str = hints_uri.into(); + + // Check if URI contains "://" separator + if let Some(pos) = uri_str.find("://") { + let (scheme, location) = uri_str.split_at(pos); + let path = &location[3..]; // Skip "://" + + match scheme { + "file" => Self::from_file(path), + "unix" => Self::from_unix_socket(path), + "quic" => Self::from_quic(path.parse()?), + // Unknown scheme - could error or fallback + _ => Err(anyhow::anyhow!("Unknown stream source scheme: {}", scheme)), + } + } else { + // No "://" found - fallback as a file path + StreamSource::from_file(uri_str.as_str()) + } + } +} + +impl StreamRead for StreamSource { + /// Open/initialize the stream for reading + fn open(&mut self) -> Result<()> { + match self { + StreamSource::File(file_stream) => file_stream.open(), + StreamSource::UnixSocket(unix_stream) => unix_stream.open(), + StreamSource::Quic(quic_stream) => quic_stream.open(), + StreamSource::Memory(memory_stream) => memory_stream.open(), + } + } + + /// Read the next item from the stream + fn next(&mut self) -> Result>> { + match self { + StreamSource::File(file_stream) => file_stream.next(), + StreamSource::UnixSocket(unix_stream) => unix_stream.next(), + StreamSource::Quic(quic_stream) => quic_stream.next(), + StreamSource::Memory(memory_stream) => memory_stream.next(), + } + } + + /// Close the stream + fn close(&mut self) -> Result<()> { + match self { + StreamSource::File(file_stream) => file_stream.close(), + StreamSource::UnixSocket(unix_stream) => unix_stream.close(), + StreamSource::Quic(quic_stream) => quic_stream.close(), + StreamSource::Memory(memory_stream) => memory_stream.close(), + } + } + + /// Check if the stream is currently active + fn is_active(&self) -> bool { + match self { + StreamSource::File(file_stream) => file_stream.is_active(), + StreamSource::UnixSocket(unix_stream) => unix_stream.is_active(), + StreamSource::Quic(quic_stream) => quic_stream.is_active(), + StreamSource::Memory(memory_stream) => memory_stream.is_active(), + } + } +} diff --git a/common/src/io/stream/stream_writer.rs b/common/src/io/stream/stream_writer.rs new file mode 100644 index 000000000..44e4998fa --- /dev/null +++ b/common/src/io/stream/stream_writer.rs @@ -0,0 +1,19 @@ +use anyhow::Result; + +/// Core trait for stream writing operations +pub trait StreamWrite: Send + 'static { + /// Open/initialize the stream for writing + fn open(&mut self) -> Result<()>; + + /// Write data to the stream, returns the number of bytes written + fn write(&mut self, item: &[u8]) -> Result; + + /// Flush any buffered data + fn flush(&mut self) -> Result<()>; + + /// Close the stream + fn close(&mut self) -> Result<()>; + + /// Check if the stream is currently active + fn is_active(&self) -> bool; +} diff --git a/common/src/io/stream/unix_socket.rs b/common/src/io/stream/unix_socket.rs new file mode 100644 index 000000000..a93f25076 --- /dev/null +++ b/common/src/io/stream/unix_socket.rs @@ -0,0 +1,967 @@ +//! A Unix domain socket implementation of StreamReader and StreamWriter. +//! This module provides functionality to read and write data through Unix sockets +//! using SOCK_SEQPACKET for message-oriented communication with built-in boundaries. + +use std::io::Write; +use std::os::unix::io::FromRawFd; +use std::os::unix::net::UnixStream; +use std::path::{Path, PathBuf}; +use std::sync::mpsc::{self, Receiver}; +use std::thread::{self, JoinHandle}; + +use anyhow::{Context, Result}; + +use super::{StreamRead, StreamWrite}; + +/// Errors specific to Unix socket operations +#[derive(Debug, thiserror::Error)] +pub enum UnixSocketError { + #[error("No client connected yet")] + NoClientConnected, + + #[error("Socket not connected")] + NotConnected, + + #[error("Failed to write to socket: {0}")] + WriteFailed(#[from] std::io::Error), +} + +/// A Unix domain socket implementation of StreamRead using SOCK_SEQPACKET. +pub struct UnixSocketStreamReader { + /// The path to the Unix socket to connect to. + path: PathBuf, + + /// The connected socket for reading + socket: Option, +} + +impl UnixSocketStreamReader { + /// Create a new UnixSocketStreamReader that connects to the specified socket path. + /// + /// This creates a client socket that connects to the writer to read data. + pub fn new>(path: P) -> Result { + Ok(UnixSocketStreamReader { path: path.as_ref().to_path_buf(), socket: None }) + } + + /// Connect to the Unix socket with SOCK_SEQPACKET type + #[cfg(unix)] + fn connect_socket(&mut self) -> Result<()> { + use std::ffi::CString; + use std::os::unix::ffi::OsStrExt; + + // Create socket with SOCK_SEQPACKET + #[cfg(target_os = "linux")] + let sock_fd = + unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_SEQPACKET | libc::SOCK_CLOEXEC, 0) }; + + #[cfg(not(target_os = "linux"))] + let sock_fd = unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_SEQPACKET, 0) }; + + if sock_fd < 0 { + return Err(anyhow::anyhow!( + "Failed to create socket: {}", + std::io::Error::last_os_error() + )); + } + + // Set CLOEXEC flag on non-Linux systems + #[cfg(not(target_os = "linux"))] + { + let flags = unsafe { libc::fcntl(sock_fd, libc::F_GETFD) }; + if flags >= 0 { + unsafe { libc::fcntl(sock_fd, libc::F_SETFD, flags | libc::FD_CLOEXEC) }; + } + } + + // Connect to the socket path + let c_path = + CString::new(self.path.as_os_str().as_bytes()).context("Invalid socket path")?; + + let mut addr: libc::sockaddr_un = unsafe { std::mem::zeroed() }; + addr.sun_family = libc::AF_UNIX as _; + + let path_bytes = c_path.as_bytes_with_nul(); + if path_bytes.len() > addr.sun_path.len() { + unsafe { libc::close(sock_fd) }; + return Err(anyhow::anyhow!("Socket path too long")); + } + + unsafe { + std::ptr::copy_nonoverlapping( + path_bytes.as_ptr() as *const i8, + addr.sun_path.as_mut_ptr(), + path_bytes.len(), + ); + } + + let addr_len = std::mem::size_of_val(&addr.sun_family) + path_bytes.len(); + + // Retry connect on EINTR + loop { + let result = unsafe { + libc::connect( + sock_fd, + &addr as *const libc::sockaddr_un as *const libc::sockaddr, + addr_len as u32, + ) + }; + + if result < 0 { + let err = std::io::Error::last_os_error(); + if err.kind() == std::io::ErrorKind::Interrupted { + continue; // Retry on EINTR + } + unsafe { libc::close(sock_fd) }; + return Err(anyhow::anyhow!("Failed to connect to socket: {}", err)); + } + + break; + } + + // Convert to UnixStream + let socket = unsafe { UnixStream::from_raw_fd(sock_fd) }; + self.socket = Some(socket); + + Ok(()) + } +} + +impl StreamRead for UnixSocketStreamReader { + /// Open/initialize the stream for reading + /// + /// Connects to the Unix socket server. + fn open(&mut self) -> Result<()> { + if self.is_active() { + return Ok(()); + } + + self.connect_socket()?; + Ok(()) + } + + /// Reads the next message from the Unix socket. + /// + /// With SOCK_SEQPACKET, each recv() reads exactly one complete message, + /// providing natural message boundaries. + fn next(&mut self) -> Result>> { + self.open()?; + + let socket = self + .socket + .as_mut() + .ok_or_else(|| anyhow::anyhow!("UnixSocketStreamReader: Socket not connected"))?; + + // Buffer for receiving messages (128KB max for SOCK_SEQPACKET) + let mut buffer = vec![0u8; 128 * 1024]; + + // Use raw recv to detect MSG_TRUNC + use std::os::unix::io::AsRawFd; + let fd = socket.as_raw_fd(); + + loop { + let n = unsafe { + libc::recv( + fd, + buffer.as_mut_ptr() as *mut libc::c_void, + buffer.len(), + libc::MSG_TRUNC, + ) + }; + + if n < 0 { + let err = std::io::Error::last_os_error(); + if err.kind() == std::io::ErrorKind::Interrupted { + continue; // Retry on EINTR + } + if err.kind() == std::io::ErrorKind::ConnectionReset { + return Ok(None); + } + return Err(anyhow::anyhow!("Failed to read from socket: {}", err)); + } + + if n == 0 { + // Connection closed + return Ok(None); + } + + let n = n as usize; + + // Check if message was truncated + if n > buffer.len() { + return Err(anyhow::anyhow!( + "Message truncated: received {} bytes, buffer size {} bytes", + n, + buffer.len() + )); + } + + buffer.truncate(n); + return Ok(Some(buffer)); + } + } + + /// Close the stream + fn close(&mut self) -> Result<()> { + self.socket = None; + Ok(()) + } + + /// Check if the stream is currently active + fn is_active(&self) -> bool { + self.socket.is_some() + } +} + +impl Drop for UnixSocketStreamReader { + fn drop(&mut self) { + let _ = self.close(); + } +} + +/// A Unix domain socket implementation of StreamWrite using SOCK_SEQPACKET. +pub struct UnixSocketStreamWriter { + /// The path to the Unix socket. + path: PathBuf, + + /// The listening socket file descriptor (server mode) + listener_fd: Option, + + /// The connected socket for writing + socket: Option, + + /// Receiver for the accepted socket from background thread + socket_receiver: Option>, + + /// Handle to the accept thread + accept_thread: Option>, +} + +impl UnixSocketStreamWriter { + /// Create a new UnixSocketStreamWriter that listens on the specified socket path. + /// + /// This creates a server socket that waits for incoming reader connections. + pub fn new>(path: P) -> Result { + Ok(UnixSocketStreamWriter { + path: path.as_ref().to_path_buf(), + listener_fd: None, + socket: None, + socket_receiver: None, + accept_thread: None, + }) + } + + /// Create the Unix socket with SOCK_SEQPACKET type + #[cfg(unix)] + fn create_listener(&mut self) -> Result<()> { + use std::ffi::CString; + use std::os::unix::ffi::OsStrExt; + + // Remove socket file if it exists and is stale + if self.path.exists() { + // Try to detect if socket is stale by attempting connection + let is_stale = std::os::unix::net::UnixStream::connect(&self.path).is_err(); + + if is_stale { + std::fs::remove_file(&self.path).context("Failed to remove stale socket file")?; + } else { + return Err(anyhow::anyhow!( + "Socket path {} is already in use", + self.path.display() + )); + } + } + + // Create socket with SOCK_SEQPACKET for message boundaries + #[cfg(target_os = "linux")] + let sock_fd = + unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_SEQPACKET | libc::SOCK_CLOEXEC, 0) }; + + #[cfg(not(target_os = "linux"))] + let sock_fd = unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_SEQPACKET, 0) }; + + if sock_fd < 0 { + return Err(anyhow::anyhow!( + "Failed to create socket: {}", + std::io::Error::last_os_error() + )); + } + + // Set CLOEXEC flag on non-Linux systems + #[cfg(not(target_os = "linux"))] + { + let flags = unsafe { libc::fcntl(sock_fd, libc::F_GETFD) }; + if flags >= 0 { + unsafe { libc::fcntl(sock_fd, libc::F_SETFD, flags | libc::FD_CLOEXEC) }; + } + } + + // Bind to the socket path + let c_path = + CString::new(self.path.as_os_str().as_bytes()).context("Invalid socket path")?; + + let mut addr: libc::sockaddr_un = unsafe { std::mem::zeroed() }; + addr.sun_family = libc::AF_UNIX as _; + + let path_bytes = c_path.as_bytes_with_nul(); + if path_bytes.len() > addr.sun_path.len() { + unsafe { libc::close(sock_fd) }; + return Err(anyhow::anyhow!("Socket path too long")); + } + + unsafe { + std::ptr::copy_nonoverlapping( + path_bytes.as_ptr() as *const i8, + addr.sun_path.as_mut_ptr(), + path_bytes.len(), + ); + } + + let addr_len = std::mem::size_of_val(&addr.sun_family) + path_bytes.len(); + + let bind_result = unsafe { + libc::bind( + sock_fd, + &addr as *const libc::sockaddr_un as *const libc::sockaddr, + addr_len as u32, + ) + }; + + if bind_result < 0 { + let err = std::io::Error::last_os_error(); + unsafe { libc::close(sock_fd) }; + return Err(anyhow::anyhow!("Failed to bind socket: {}", err)); + } + + // Listen for connections + let listen_result = unsafe { libc::listen(sock_fd, 1) }; + + if listen_result < 0 { + let err = std::io::Error::last_os_error(); + unsafe { libc::close(sock_fd) }; + return Err(anyhow::anyhow!("Failed to listen on socket: {}", err)); + } + + self.listener_fd = Some(sock_fd); + Ok(()) + } + + /// Check if a client is currently connected. + /// + /// Returns `true` if a client is connected and ready to receive data. + pub fn is_client_connected(&mut self) -> bool { + // Already have a connection + if self.socket.is_some() { + return true; + } + + // Try to receive socket from accept thread (non-blocking) + if let Some(rx) = &self.socket_receiver { + if let Ok(stream) = rx.try_recv() { + self.socket = Some(stream); + return true; + } + } + + false + } +} + +impl StreamWrite for UnixSocketStreamWriter { + /// Open/initialize the stream for writing + /// + /// Creates a listening socket and spawns a background thread to accept connections. + /// This is non-blocking - the actual client connection happens lazily on first write. + fn open(&mut self) -> Result<()> { + // If we already have a connected socket, we're done + if self.socket.is_some() { + return Ok(()); + } + + // Create listener if not exists + if self.listener_fd.is_none() { + self.create_listener()?; + } + + // Spawn accept thread if not already running + if self.accept_thread.is_none() { + let listener_fd = self.listener_fd.unwrap(); + let (tx, rx) = mpsc::channel(); + self.socket_receiver = Some(rx); + + let handle = thread::spawn(move || { + // Retry accept on EINTR + let conn_fd = loop { + let fd = unsafe { + libc::accept(listener_fd, std::ptr::null_mut(), std::ptr::null_mut()) + }; + + if fd < 0 { + let err = std::io::Error::last_os_error(); + if err.kind() == std::io::ErrorKind::Interrupted { + continue; // Retry on EINTR + } + eprintln!("Accept failed: {}", err); + return; + } + + break fd; + }; + + // Convert to UnixStream + let stream = unsafe { UnixStream::from_raw_fd(conn_fd) }; + + // Send socket through channel + let _ = tx.send(stream); + }); + + self.accept_thread = Some(handle); + } + + Ok(()) + } + + /// Write data to the stream, returns the number of bytes written. + /// + /// With SOCK_SEQPACKET, each write() sends exactly one complete message, + /// providing natural message boundaries. + /// + /// Returns `NoClientConnected` error if no client has connected yet. + /// The caller can retry the write until a client connects. + fn write(&mut self, item: &[u8]) -> Result { + self.open()?; + + // Receive socket from channel if we don't have it yet + if self.socket.is_none() { + if let Some(rx) = &self.socket_receiver { + // Non-blocking check for socket from accept thread + match rx.try_recv() { + Ok(stream) => { + self.socket = Some(stream); + } + Err(mpsc::TryRecvError::Empty) => { + // Accept thread is running but client hasn't connected yet + return Err(UnixSocketError::NoClientConnected.into()); + } + Err(mpsc::TryRecvError::Disconnected) => { + // Accept thread died unexpectedly + return Err(anyhow::anyhow!("Accept thread terminated unexpectedly")); + } + } + } + } + + let socket = self.socket.as_mut().ok_or(UnixSocketError::NotConnected)?; + + socket.write_all(item).map_err(UnixSocketError::WriteFailed)?; + Ok(item.len()) + } + + /// Flush any buffered data + fn flush(&mut self) -> Result<()> { + if let Some(socket) = self.socket.as_mut() { + socket.flush()?; + } + Ok(()) + } + + /// Close the stream + fn close(&mut self) -> Result<()> { + self.flush()?; + + // Clear the socket + self.socket = None; + + if let Some(fd) = self.listener_fd.take() { + unsafe { libc::close(fd) }; + } + + // Clean up socket file + if self.path.exists() { + let _ = std::fs::remove_file(&self.path); + } + + Ok(()) + } + + /// Check if the stream is currently active + fn is_active(&self) -> bool { + self.socket.is_some() + } +} + +impl Drop for UnixSocketStreamWriter { + fn drop(&mut self) { + let _ = self.close(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::{Arc, Mutex}; + use std::thread; + use std::time::Duration; + + /// Serialize all unix socket tests to prevent fd reuse races. + /// + /// When tests run in parallel and one panics, its Drop closes the listener fd + /// while the accept thread may still be blocked on it. Due to Linux fd reuse, + /// this can cause other tests' accept() calls to operate on the wrong fd (EINVAL). + static TEST_MUTEX: Mutex<()> = Mutex::new(()); + + /// Generate a unique socket path per test. + fn unique_socket_path(test_name: &str) -> String { + format!("/tmp/test_unix_socket_{}_pid{}.sock", test_name, std::process::id(),) + } + + /// Helper: writer retries write until a client connects, panicking on unexpected errors. + fn write_with_retry(writer: &mut UnixSocketStreamWriter, data: &[u8]) { + loop { + match writer.write(data) { + Ok(_) => break, + Err(e) => { + if e.downcast_ref::() + .is_some_and(|ue| matches!(ue, UnixSocketError::NoClientConnected)) + { + thread::sleep(Duration::from_millis(5)); + continue; + } + panic!("Unexpected write error: {}", e); + } + } + } + } + + /// Synchronization state shared between the writer thread and the main (reader) thread. + struct WriterSync { + /// Signaled by the writer after open() completes (bound + listening + accept spawned). + ready: AtomicBool, + /// Signaled by the reader when it has finished reading. The writer waits for this + /// before closing, to prevent the socket from being torn down while the reader + /// still has buffered messages to read. + reader_done: AtomicBool, + } + + /// Helper: spawn writer in a thread with proper synchronization. + /// + /// Returns (join_handle, sync_state). The caller must: + /// 1. Wait for `sync.ready` before connecting the reader. + /// 2. Set `sync.reader_done` after the reader has finished reading. + fn spawn_writer_thread( + socket_path: &str, + write_fn: impl FnOnce(&mut UnixSocketStreamWriter) + Send + 'static, + ) -> (JoinHandle<()>, Arc) { + let sp = socket_path.to_string(); + let sync = Arc::new(WriterSync { + ready: AtomicBool::new(false), + reader_done: AtomicBool::new(false), + }); + let sync_clone = sync.clone(); + + let handle = thread::spawn(move || { + let mut writer = UnixSocketStreamWriter::new(&sp).unwrap(); + writer.open().unwrap(); + sync_clone.ready.store(true, Ordering::Release); + write_fn(&mut writer); + // Wait for reader to finish before closing, to avoid ECONNRESET + let start = std::time::Instant::now(); + while !sync_clone.reader_done.load(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(5) { + panic!("Timed out waiting for reader to finish"); + } + thread::sleep(Duration::from_millis(1)); + } + writer.close().unwrap(); + }); + + (handle, sync) + } + + /// Wait until the writer signals it has finished open() (bound + listening + accept spawned). + fn wait_for_writer(sync: &WriterSync) { + let start = std::time::Instant::now(); + while !sync.ready.load(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(5) { + panic!("Timed out waiting for writer to become ready"); + } + thread::sleep(Duration::from_millis(1)); + } + } + + #[test] + fn test_single_message() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("single"); + let _ = std::fs::remove_file(&socket_path); + + let (writer_thread, sync) = spawn_writer_thread(&socket_path, |writer| { + write_with_retry(writer, b"Hello, World!"); + }); + + wait_for_writer(&sync); + + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + let message = reader.next().unwrap().unwrap(); + assert_eq!(message, b"Hello, World!"); + reader.close().unwrap(); + + sync.reader_done.store(true, Ordering::Release); + writer_thread.join().unwrap(); + } + + #[test] + fn test_multiple_messages() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("multi"); + let _ = std::fs::remove_file(&socket_path); + + let (writer_thread, sync) = spawn_writer_thread(&socket_path, |writer| { + write_with_retry(writer, b"First"); + writer.write(b"Second message").unwrap(); + writer.write(b"Third message with more data!").unwrap(); + }); + + wait_for_writer(&sync); + + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"First"); + let msg2 = reader.next().unwrap().unwrap(); + assert_eq!(msg2, b"Second message"); + let msg3 = reader.next().unwrap().unwrap(); + assert_eq!(msg3, b"Third message with more data!"); + reader.close().unwrap(); + + sync.reader_done.store(true, Ordering::Release); + writer_thread.join().unwrap(); + } + + #[test] + fn test_message_boundaries() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("boundaries"); + let _ = std::fs::remove_file(&socket_path); + + let (writer_thread, sync) = spawn_writer_thread(&socket_path, |writer| { + write_with_retry(writer, b"ABC"); + writer.write(b"DEF").unwrap(); + }); + + wait_for_writer(&sync); + + // Reader should receive each message as discrete unit + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"ABC"); + let msg2 = reader.next().unwrap().unwrap(); + assert_eq!(msg2, b"DEF"); + // Should NOT be concatenated like "ABCDEF" + reader.close().unwrap(); + + sync.reader_done.store(true, Ordering::Release); + writer_thread.join().unwrap(); + } + + #[test] + fn test_large_message() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("large"); + let _ = std::fs::remove_file(&socket_path); + + // Create a large message (64KB - within SOCK_SEQPACKET limits) + let large_data: Vec = (0..64 * 1024).map(|i| (i % 256) as u8).collect(); + let large_data_clone = large_data.clone(); + + let (writer_thread, sync) = spawn_writer_thread(&socket_path, move |writer| { + write_with_retry(writer, &large_data); + }); + + wait_for_writer(&sync); + + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + let message = reader.next().unwrap().unwrap(); + assert_eq!(message, large_data_clone); + reader.close().unwrap(); + + sync.reader_done.store(true, Ordering::Release); + writer_thread.join().unwrap(); + } + + #[test] + fn test_connection_close() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("close"); + let _ = std::fs::remove_file(&socket_path); + + let sp = socket_path.clone(); + + let writer_ready = Arc::new(AtomicBool::new(false)); + let writer_ready_clone = writer_ready.clone(); + let reader_connected = Arc::new(AtomicBool::new(false)); + let reader_connected_clone = reader_connected.clone(); + + // This test intentionally lets the writer close to verify the reader sees EOF, + // so we don't use spawn_writer_thread (which defers close). + let writer_thread = thread::spawn(move || { + let mut writer = UnixSocketStreamWriter::new(&sp).unwrap(); + writer.open().unwrap(); + writer_ready_clone.store(true, Ordering::Release); + + // Wait for the reader to connect before writing + closing + let start = std::time::Instant::now(); + while !reader_connected_clone.load(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(5) { + panic!("Timed out waiting for reader to connect"); + } + thread::sleep(Duration::from_millis(1)); + } + + write_with_retry(&mut writer, b"Message"); + writer.close().unwrap(); + }); + + // Wait for writer to be listening + let start = std::time::Instant::now(); + while !writer_ready.load(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(5) { + panic!("Timed out waiting for writer"); + } + thread::sleep(Duration::from_millis(1)); + } + + // Connect reader and signal writer + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + reader.open().unwrap(); + reader_connected.store(true, Ordering::Release); + + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"Message"); + + // After writer closes, next should return None + let msg2 = reader.next().unwrap(); + assert_eq!(msg2, None); + reader.close().unwrap(); + + writer_thread.join().unwrap(); + } + + #[test] + fn test_stress_many_messages() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("stress"); + let _ = std::fs::remove_file(&socket_path); + + const NUM_MESSAGES: usize = 1000; + + let (writer_thread, sync) = spawn_writer_thread(&socket_path, |writer| { + write_with_retry(writer, b"START"); + + for i in 0..NUM_MESSAGES { + let msg = format!("Message {}", i); + writer.write(msg.as_bytes()).unwrap(); + } + + writer.write(b"END").unwrap(); + }); + + wait_for_writer(&sync); + + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + + let start = reader.next().unwrap().unwrap(); + assert_eq!(start, b"START"); + + for i in 0..NUM_MESSAGES { + let expected = format!("Message {}", i); + let msg = reader.next().unwrap().unwrap(); + assert_eq!(msg, expected.as_bytes(), "Message {} mismatch", i); + } + + let end = reader.next().unwrap().unwrap(); + assert_eq!(end, b"END"); + + reader.close().unwrap(); + sync.reader_done.store(true, Ordering::Release); + writer_thread.join().unwrap(); + } + + #[test] + fn test_non_blocking_open() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("nonblocking"); + let _ = std::fs::remove_file(&socket_path); + + let mut writer = UnixSocketStreamWriter::new(&socket_path).unwrap(); + + let start = std::time::Instant::now(); + writer.open().unwrap(); + let elapsed = start.elapsed(); + + // open() should return almost immediately (definitely under 100ms) + assert!( + elapsed.as_millis() < 100, + "open() took too long: {:?} - should be non-blocking", + elapsed + ); + + // But we shouldn't have a client connected yet + assert!(!writer.is_client_connected()); + + // Connect a dummy reader to unblock the accept thread before closing, + // preventing fd reuse races with the detached accept thread. + let mut dummy = UnixSocketStreamReader::new(&socket_path).unwrap(); + dummy.open().unwrap(); + + writer.close().unwrap(); + // Allow accept thread to fully terminate after fd is closed + thread::sleep(Duration::from_millis(10)); + } + + #[test] + fn test_is_client_connected() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("is_connected"); + let _ = std::fs::remove_file(&socket_path); + + let sp = socket_path.clone(); + + let sync = Arc::new(WriterSync { + ready: AtomicBool::new(false), + reader_done: AtomicBool::new(false), + }); + let sync_clone = sync.clone(); + + let writer_thread = thread::spawn(move || { + let mut writer = UnixSocketStreamWriter::new(&sp).unwrap(); + writer.open().unwrap(); + sync_clone.ready.store(true, Ordering::Release); + + // Initially, no client should be connected + assert!(!writer.is_client_connected()); + + // Wait for client to connect + let mut connected = false; + for _ in 0..200 { + if writer.is_client_connected() { + connected = true; + break; + } + thread::sleep(Duration::from_millis(10)); + } + + assert!(connected, "Client should have connected"); + + // After connection, should remain true + assert!(writer.is_client_connected()); + + // Write should now succeed immediately + writer.write(b"Connected!").unwrap(); + + // Wait for reader to finish before closing + let start = std::time::Instant::now(); + while !sync_clone.reader_done.load(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(5) { + panic!("Timed out waiting for reader to finish"); + } + thread::sleep(Duration::from_millis(1)); + } + writer.close().unwrap(); + }); + + wait_for_writer(&sync); + + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + let message = reader.next().unwrap().unwrap(); + assert_eq!(message, b"Connected!"); + reader.close().unwrap(); + + sync.reader_done.store(true, Ordering::Release); + writer_thread.join().unwrap(); + } + + #[test] + fn test_wait_for_client_with_is_connected() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("wait_client"); + let _ = std::fs::remove_file(&socket_path); + + let sp = socket_path.clone(); + + let sync = Arc::new(WriterSync { + ready: AtomicBool::new(false), + reader_done: AtomicBool::new(false), + }); + let sync_clone = sync.clone(); + + let writer_thread = thread::spawn(move || { + let mut writer = UnixSocketStreamWriter::new(&sp).unwrap(); + writer.open().unwrap(); + sync_clone.ready.store(true, Ordering::Release); + + // Use is_client_connected() to wait for client + while !writer.is_client_connected() { + thread::sleep(Duration::from_millis(10)); + } + + // Now write will succeed without retries + writer.write(b"Message 1").unwrap(); + writer.write(b"Message 2").unwrap(); + writer.write(b"Message 3").unwrap(); + + // Wait for reader to finish before closing + let start = std::time::Instant::now(); + while !sync_clone.reader_done.load(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(5) { + panic!("Timed out waiting for reader to finish"); + } + thread::sleep(Duration::from_millis(1)); + } + writer.close().unwrap(); + }); + + wait_for_writer(&sync); + + let mut reader = UnixSocketStreamReader::new(&socket_path).unwrap(); + let msg1 = reader.next().unwrap().unwrap(); + assert_eq!(msg1, b"Message 1"); + let msg2 = reader.next().unwrap().unwrap(); + assert_eq!(msg2, b"Message 2"); + let msg3 = reader.next().unwrap().unwrap(); + assert_eq!(msg3, b"Message 3"); + reader.close().unwrap(); + + sync.reader_done.store(true, Ordering::Release); + writer_thread.join().unwrap(); + } + + #[test] + fn test_no_client_connected_error() { + let _lock = TEST_MUTEX.lock().unwrap(); + let socket_path = unique_socket_path("no_client"); + let _ = std::fs::remove_file(&socket_path); + + let mut writer = UnixSocketStreamWriter::new(&socket_path).unwrap(); + writer.open().unwrap(); + + // Try to write without any client connected + let result = writer.write(b"Data"); + + // Should get NoClientConnected error + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.downcast_ref::().is_some(), + "Expected UnixSocketError::NoClientConnected" + ); + + // Connect a dummy reader to unblock the accept thread before closing, + // preventing fd reuse races with the detached accept thread. + let mut dummy = UnixSocketStreamReader::new(&socket_path).unwrap(); + dummy.open().unwrap(); + + writer.close().unwrap(); + // Allow accept thread to fully terminate after fd is closed + thread::sleep(Duration::from_millis(10)); + } +} diff --git a/common/src/io/stream/zisk_stream.rs b/common/src/io/stream/zisk_stream.rs new file mode 100644 index 000000000..9312dfd53 --- /dev/null +++ b/common/src/io/stream/zisk_stream.rs @@ -0,0 +1,192 @@ +//! ZiskStream is responsible for reading precompile hints from a stream source and sent to a hints processor. + +use anyhow::Result; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::{Receiver, Sender}; +use std::sync::Arc; +use std::thread::{self, JoinHandle}; + +use crate::io::{StreamRead, StreamSource}; + +pub trait StreamProcessor: Send + Sync + 'static { + /// Process a batch of hint data. + /// + /// # Returns + /// `true` if CTRL_END was encountered (signals end of stream), `false` otherwise. + fn process_hints(&self, data: &[u64], first_batch: bool) -> anyhow::Result; + + fn reset(&self) {} +} + +/// Trait for submitting processed hints to a sink. +/// +/// # Arguments +/// * `processed` - A slice of processed hints as u64 values. +/// +/// # Returns +/// * `Ok(())` - If hints were successfully submitted +/// * `Err` - If submission fails +pub trait StreamSink: Send + Sync + 'static { + fn submit(&self, processed: &[u64]) -> anyhow::Result<()>; + + fn reset(&self) {} +} + +enum ThreadCommand { + Process, + Shutdown, +} + +/// ZiskStream struct manages the processing of precompile hints and writing them to shared memory. +pub struct ZiskStream { + /// The hints processor used to process hints before writing. + hints_processor: Arc

, + + /// Channel sender to communicate with the background thread. + tx: Option>, + + /// Join handle for the background thread. + thread_handle: Option>, + + initialized: AtomicBool, +} + +impl ZiskStream

{ + /// Create a new ZiskStream with the given processor. + /// + /// # Arguments + /// * `hints_processor` - The processor used to process hints. + /// + /// # Returns + /// A new `ZiskStream` instance without a running thread. + pub fn new(hints_processor: P) -> Self { + Self { + hints_processor: Arc::new(hints_processor), + tx: None, + thread_handle: None, + initialized: AtomicBool::new(false), + } + } + + /// Stop the current background thread if running. + fn stop_thread(&mut self) { + if let Some(tx) = self.tx.take() { + let _ = tx.send(ThreadCommand::Shutdown); + } + if let Some(handle) = self.thread_handle.take() { + let _ = handle.join(); + } + } + + /// Set a new StreamSource for the pipeline and spawn a background thread to process hints. + /// + /// This will stop any existing background thread and start a new one with the new stream. + /// + /// # Arguments + /// * `stream` - The new StreamSource source for reading hints. + pub fn set_hints_stream_src(&mut self, mut stream: StreamSource) -> Result<()> { + if !stream.is_active() { + // Stop the existing thread if running + self.stop_thread(); + stream.open()?; + } + + // Create a new channel for communication with the thread + let (tx, rx) = std::sync::mpsc::channel(); + self.tx = Some(tx); + + // Clone Arc references for the thread + let hints_processor = Arc::clone(&self.hints_processor); + + // Spawn the background thread + let thread_handle = thread::spawn(move || { + Self::background_thread(stream, hints_processor, rx); + }); + + self.thread_handle = Some(thread_handle); + + self.initialized.store(true, Ordering::SeqCst); + + Ok(()) + } + + /// Background thread function that processes hints when requested. + fn background_thread( + mut stream: StreamSource, + hints_processor: Arc

, + rx: Receiver, + ) { + while let Ok(ThreadCommand::Process) = rx.recv() { + if let Err(e) = Self::process_stream(&mut stream, &*hints_processor) { + panic!("Error processing hints in background thread: {:?}", e); + } + } + // Loop exits when Shutdown is received or channel is closed + } + + /// Process all hints from the stream. + /// + /// Processes hints in batches until CTRL_END is encountered or the stream ends. + fn process_stream(stream: &mut StreamSource, hints_processor: &P) -> Result<()> { + let mut first_batch = true; + + while let Some(hints) = stream.next()? { + let hints = crate::reinterpret_vec(hints)?; + let has_ctrl_end = hints_processor.process_hints(&hints, first_batch)?; + + first_batch = false; + + // Break if CTRL_END was encountered + if has_ctrl_end { + break; + } + } + + Ok(()) + } + + pub fn reset(&mut self) { + self.hints_processor.reset(); + self.initialized.store(false, Ordering::SeqCst); + } + + /// Trigger the background thread to process hints asynchronously. + /// + /// This method: + /// 1. Sends a command to the background thread to process hints + /// 2. Returns immediately without waiting for processing to complete + /// + /// # Returns + /// * `Ok(())` - If the command was successfully sent + /// * `Err` - If there's no active thread or the channel is closed + pub fn start_stream(&mut self) -> Result<()> { + if !self.initialized.load(Ordering::SeqCst) { + return Err(anyhow::anyhow!( + "Hints stream is not initialized. Call set_hints_stream_src first." + )); + } + + if let Some(tx) = &self.tx { + tx.send(ThreadCommand::Process).map_err(|e| { + anyhow::anyhow!("Failed to send process command to background thread: {}", e) + })?; + Ok(()) + } else { + Err(anyhow::anyhow!("No background thread running. Call set_hints_stream first.")) + } + } + + pub fn is_initialized(&self) -> bool { + self.initialized.load(Ordering::SeqCst) + } + + pub fn get_processor(&self) -> Arc

{ + Arc::clone(&self.hints_processor) + } +} + +impl Drop for ZiskStream

{ + fn drop(&mut self) { + self.stop_thread(); + } +} diff --git a/common/src/io/zisk_stdin.rs b/common/src/io/zisk_stdin.rs deleted file mode 100644 index 8f5b14006..000000000 --- a/common/src/io/zisk_stdin.rs +++ /dev/null @@ -1,111 +0,0 @@ -use crate::io::{ZiskFileStdin, ZiskMemoryStdin, ZiskNullStdin}; -use std::path::Path; - -use anyhow::Result; - -pub trait ZiskIO: Send + Sync { - /// Read a value from the buffer. - fn read(&mut self) -> Vec; - - /// Read a slice of bytes from the buffer. - fn read_slice(&mut self, slice: &mut [u8]); - - /// Read bytes into the provided buffer. - fn read_into(&mut self, buffer: &mut [u8]); - - /// Write a serialized value to the buffer. - fn write_serialized(&mut self, data: &[u8]); - - /// Write a slice of bytes to the buffer. - fn write_bytes(&mut self, data: &[u8]); -} - -pub enum ZiskIOVariant { - File(ZiskFileStdin), - Null(ZiskNullStdin), - Memory(ZiskMemoryStdin), -} - -impl ZiskIO for ZiskIOVariant { - fn read(&mut self) -> Vec { - match self { - ZiskIOVariant::File(file_stdin) => file_stdin.read(), - ZiskIOVariant::Null(null_stdin) => null_stdin.read(), - ZiskIOVariant::Memory(memory_stdin) => memory_stdin.read(), - } - } - - fn read_slice(&mut self, slice: &mut [u8]) { - match self { - ZiskIOVariant::File(file_stdin) => file_stdin.read_slice(slice), - ZiskIOVariant::Null(null_stdin) => null_stdin.read_slice(slice), - ZiskIOVariant::Memory(memory_stdin) => memory_stdin.read_slice(slice), - } - } - - fn read_into(&mut self, buffer: &mut [u8]) { - match self { - ZiskIOVariant::File(file_stdin) => file_stdin.read_into(buffer), - ZiskIOVariant::Null(null_stdin) => null_stdin.read_into(buffer), - ZiskIOVariant::Memory(memory_stdin) => memory_stdin.read_into(buffer), - } - } - - fn write_serialized(&mut self, data: &[u8]) { - match self { - ZiskIOVariant::File(file_stdin) => file_stdin.write_serialized(data), - ZiskIOVariant::Null(null_stdin) => null_stdin.write_serialized(data), - ZiskIOVariant::Memory(memory_stdin) => memory_stdin.write_serialized(data), - } - } - - fn write_bytes(&mut self, data: &[u8]) { - match self { - ZiskIOVariant::File(file_stdin) => file_stdin.write_bytes(data), - ZiskIOVariant::Null(null_stdin) => null_stdin.write_bytes(data), - ZiskIOVariant::Memory(memory_stdin) => memory_stdin.write_bytes(data), - } - } -} - -pub struct ZiskStdin { - io: ZiskIOVariant, -} - -impl ZiskIO for ZiskStdin { - fn read(&mut self) -> Vec { - self.io.read() - } - - fn read_slice(&mut self, slice: &mut [u8]) { - self.io.read_slice(slice) - } - - fn read_into(&mut self, buffer: &mut [u8]) { - self.io.read_into(buffer) - } - - fn write_serialized(&mut self, data: &[u8]) { - self.io.write_serialized(data) - } - - fn write_bytes(&mut self, data: &[u8]) { - self.io.write_bytes(data) - } -} - -impl ZiskStdin { - /// Create a null stdin (no input) - pub fn null() -> Self { - Self { io: ZiskIOVariant::Null(ZiskNullStdin) } - } - - /// Create a file-based stdin - pub fn from_file>(path: P) -> Result { - Ok(Self { io: ZiskIOVariant::File(ZiskFileStdin::new(path)?) }) - } - - pub fn from_vec(data: Vec) -> Self { - Self { io: ZiskIOVariant::Memory(ZiskMemoryStdin::new(data)) } - } -} diff --git a/common/src/lib.rs b/common/src/lib.rs index e2f28a94e..4e002f298 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -2,29 +2,27 @@ mod bus; mod component; mod emu_minimal_trace; mod executor_stats; +mod hints; mod instance_context; pub mod io; mod mpi_context; mod planner_helpers; -mod proof; mod proof_log; mod regular_counters; mod regular_planner; mod types; mod utils; -mod zisk_lib_init; pub use bus::*; pub use component::*; pub use emu_minimal_trace::*; pub use executor_stats::*; +pub use hints::*; pub use instance_context::*; pub use mpi_context::*; pub use planner_helpers::*; -pub use proof::*; pub use proof_log::*; pub use regular_counters::*; pub use regular_planner::*; pub use types::*; pub use utils::*; -pub use zisk_lib_init::*; diff --git a/common/src/planner_helpers.rs b/common/src/planner_helpers.rs index ce089a25b..4ad65e5e8 100644 --- a/common/src/planner_helpers.rs +++ b/common/src/planner_helpers.rs @@ -143,13 +143,13 @@ pub fn plan( pub fn plan_with_frops( counts: &[InstFropsCount], size: u64, -) -> Vec<(CheckPoint, HashMap)> { +) -> Vec<(CheckPoint, HashMap)> { if counts.is_empty() || size == 0 { return vec![]; } let mut checkpoints = Vec::new(); - let mut current_scope: HashMap = HashMap::new(); + let mut current_scope: HashMap = HashMap::new(); let mut remaining_size = size; // Remaining size for the current scope. for (current_chunk, count) in counts.iter().enumerate() { @@ -168,12 +168,7 @@ pub fn plan_with_frops( let force_execute_to_end = has_frops && inst_count == 0; current_scope.insert( ChunkId(current_chunk), - ( - checkpoint_size, - count.frops_count, - force_execute_to_end, - CollectSkipper::new(cumulative_offset), - ), + (checkpoint_size, force_execute_to_end, CollectSkipper::new(cumulative_offset)), ); cumulative_offset += checkpoint_size; @@ -313,7 +308,7 @@ mod tests_frops { let size = 10; let expected = vec![( CheckPoint::Multiple(vec![ChunkId(0)]), - [(ChunkId(0), (10, frops, frops > 0, CollectSkipper::new(0)))] + [(ChunkId(0), (10, frops > 0, CollectSkipper::new(0)))] .into_iter() .collect::>(), )]; @@ -338,19 +333,19 @@ mod tests_frops { let expected = vec![ ( CheckPoint::Multiple(vec![ChunkId(0)]), - [(ChunkId(0), (10, frops, false, CollectSkipper::new(0)))] + [(ChunkId(0), (10, false, CollectSkipper::new(0)))] .into_iter() .collect::>(), ), ( CheckPoint::Multiple(vec![ChunkId(0)]), - [(ChunkId(0), (10, frops, false, CollectSkipper::new(10)))] + [(ChunkId(0), (10, false, CollectSkipper::new(10)))] .into_iter() .collect::>(), ), ( CheckPoint::Multiple(vec![ChunkId(0)]), - [(ChunkId(0), (5, frops, frops > 0, CollectSkipper::new(20)))] + [(ChunkId(0), (5, frops > 0, CollectSkipper::new(20)))] .into_iter() .collect::>(), ), @@ -379,15 +374,15 @@ mod tests_frops { let mut expected = vec![ ( CheckPoint::Multiple(vec![ChunkId(0)]), - [(ChunkId(0), (10, frops[0], false, CollectSkipper::new(0)))] + [(ChunkId(0), (10, false, CollectSkipper::new(0)))] .into_iter() .collect::>(), ), ( CheckPoint::Multiple(vec![ChunkId(0), ChunkId(1)]), [ - (ChunkId(0), (5, frops[0], frops[0] > 0, CollectSkipper::new(10))), - (ChunkId(1), (5, frops[1], frops[1] > 0, CollectSkipper::new(0))), + (ChunkId(0), (5, frops[0] > 0, CollectSkipper::new(10))), + (ChunkId(1), (5, frops[1] > 0, CollectSkipper::new(0))), ] .into_iter() .collect::>(), diff --git a/common/src/proof.rs b/common/src/proof.rs deleted file mode 100644 index e66a39808..000000000 --- a/common/src/proof.rs +++ /dev/null @@ -1,102 +0,0 @@ -use anyhow::Result; -use std::io::{Cursor, Write}; -use std::{fs, path::PathBuf}; -use tracing::info; -use zstd::Encoder; - -/// Saves a proof data to disk. -/// -/// Creates a unique filename to avoid overwriting existing proof files by appending -/// a counter suffix (_2, _3, etc.) if the initial filename already exists. -/// -/// # Arguments -/// -/// * `id` - A unique identifier for the proof -/// * `proof_folder` - The folder where proofs will be saved -/// * `proof_data` - The proof data as a vector of u64 values -/// * `with_zip` - Whether to also save a compressed version of the proof -/// -/// # Returns -/// -/// Returns `Ok(())` on success, or a `CoordinatorError` on failure -pub fn save_proof( - id: &str, - proof_folder: PathBuf, - proof_data: &[u64], - with_zip: bool, -) -> Result<()> { - // Ensure the proofs directory exists - fs::create_dir_all(&proof_folder)?; - - // Generate unique filename to avoid overwriting existing files - let mut raw_path = proof_folder.join(format!("proof_{}.fri", id)); - let mut zip_path = raw_path.with_extension("fri.compressed"); - let mut counter = 2; - - while fs::exists(&raw_path)? || (with_zip && fs::exists(&zip_path)?) { - raw_path = proof_folder.join(format!("proof_{}_{}.fri", id, counter)); - zip_path = raw_path.with_extension("fri.compressed"); - counter += 1; - } - - // Convert Vec to bytes safely - let proof_bytes = bytemuck::cast_slice::(proof_data); - - // Write raw proof file - fs::write(&raw_path, proof_bytes)?; - - // Calculate compression statistics - let raw_size = proof_bytes.len(); - - info!("[PostJob] Saving proof:"); - info!("[PostJob] Raw: {} ({} bytes)", raw_path.display(), raw_size); - - if with_zip { - // Compress proof data and write to file - let zip_size = save_zip_proof(proof_bytes, &zip_path, 1)?; - - let ratio = zip_size as f64 / raw_size as f64; - - info!( - "[PostJob] Compressed: {} ({} bytes, ratio: {:.2}x)", - zip_path.display(), - zip_size, - ratio - ); - } - - Ok(()) -} - -/// Compresses data using zstd and writes it to a file. -/// -/// # Arguments -/// -/// * `data` - The raw data to compress -/// * `zip_path` - Path where the compressed file will be written -/// * `compression_level` - Compression level (1 = fastest, 22 = best compression) -/// -/// # Returns -/// -/// Returns the compressed size in bytes -pub fn save_zip_proof( - data: &[u8], - zip_path: &std::path::Path, - compression_level: i32, -) -> Result { - // Compress data in memory using zstd - let mut compressed_buffer = Cursor::new(Vec::new()); - - let mut encoder = Encoder::new(&mut compressed_buffer, compression_level)?; - encoder.write_all(data)?; - encoder.finish()?; - - // Extract compressed data and get size - let compressed_data = compressed_buffer.into_inner(); - let compressed_size = compressed_data.len(); - - // Write compressed data to file - fs::write(zip_path, &compressed_data)?; - - Ok(compressed_size) -} diff --git a/common/src/regular_counters.rs b/common/src/regular_counters.rs index 8e43e3a48..6561ee33c 100644 --- a/common/src/regular_counters.rs +++ b/common/src/regular_counters.rs @@ -2,9 +2,8 @@ //! sent over the data bus. It is designed to be reusable across multiple state machines //! and collects metrics for specified `ZiskOperationType` instructions. -use crate::MemCollectorInfo; use crate::{BusDevice, BusId, Counter, ExtOperationData, Metrics, OperationBusData}; -use std::{collections::VecDeque, ops::Add}; +use std::ops::Add; use zisk_core::ZiskOperationType; /// The `RegularCounters` struct represents a generic counter that monitors and measures @@ -50,6 +49,24 @@ impl RegularCounters { } None } + + /// Processes data received on the bus, updating counters. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { + debug_assert!(*bus_id == self.bus_id); + + self.measure(data); + + true + } } impl Metrics for RegularCounters { @@ -111,39 +128,6 @@ impl Add for RegularCounters { } impl BusDevice for RegularCounters { - /// Processes data received on the bus, updating counters. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus sending the data. - /// * `data` - The data received from the bus. - /// * `pending` – A queue of pending bus operations used to send derived inputs. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - debug_assert!(*bus_id == self.bus_id); - - self.measure(data); - - true - } - - /// Returns the bus IDs associated with this counter. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![self.bus_id] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/common/src/types.rs b/common/src/types.rs index 4bc9c8291..741bcc7a0 100644 --- a/common/src/types.rs +++ b/common/src/types.rs @@ -1,4 +1,9 @@ +use anyhow::Result; use std::fmt; +use std::fs; +use std::path::Path; +use std::time::Duration; +use std::time::Instant; /// Type representing a chunk identifier. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -63,3 +68,275 @@ impl fmt::Display for SegmentId { write!(f, "{}", self.0) } } + +pub enum StatsType { + Main, + Memory, + Opcodes, + Precompiled, + Tables, + Other, +} + +#[derive(Debug, Default, Clone)] +pub struct StatsCostPerType { + pub main_cost: u64, + pub opcode_cost: u64, + pub memory_cost: u64, + pub precompile_cost: u64, + pub tables_cost: u64, + pub other_cost: u64, +} + +impl StatsCostPerType { + pub fn total_cost(&self) -> u64 { + self.main_cost + + self.opcode_cost + + self.memory_cost + + self.precompile_cost + + self.tables_cost + + self.other_cost + } + + pub fn add_cost(&mut self, stats_type: StatsType, cost: u64) { + match stats_type { + StatsType::Main => self.main_cost += cost, + StatsType::Opcodes => self.opcode_cost += cost, + StatsType::Memory => self.memory_cost += cost, + StatsType::Precompiled => self.precompile_cost += cost, + StatsType::Tables => self.tables_cost += cost, + StatsType::Other => self.other_cost += cost, + } + } +} + +impl fmt::Display for StatsCostPerType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let total = self.total_cost(); + if total == 0 { + return write!(f, "total=0"); + } + + let mut parts = Vec::new(); + + let pct = (self.main_cost as f64 / total as f64) * 100.0; + parts.push(format!("main={} ({:.1}%)", self.main_cost, pct)); + + let pct = (self.opcode_cost as f64 / total as f64) * 100.0; + parts.push(format!("opcode={} ({:.1}%)", self.opcode_cost, pct)); + + let pct = (self.memory_cost as f64 / total as f64) * 100.0; + parts.push(format!("memory={} ({:.1}%)", self.memory_cost, pct)); + + let pct = (self.precompile_cost as f64 / total as f64) * 100.0; + parts.push(format!("precompile={} ({:.1}%)", self.precompile_cost, pct)); + + let pct = (self.tables_cost as f64 / total as f64) * 100.0; + parts.push(format!("tables={} ({:.1}%)", self.tables_cost, pct)); + + if self.other_cost > 0 { + let pct = (self.other_cost as f64 / total as f64) * 100.0; + parts.push(format!("other={} ({:.1}%)", self.other_cost, pct)); + } + + write!(f, "total={} [{}]", total, parts.join(", ")) + } +} + +#[derive(Debug, Default, Clone)] +pub struct ZiskExecutorTime { + /// Total executor duration of the entire execution process. + pub total_duration: Duration, + /// Duration of the execution phase. + pub execution_duration: Duration, + /// Duration of the counting and planning phase for main state machines. + pub count_and_plan_duration: Duration, + /// Duration of the counting and planning phase for memory operations from ASM runner. + pub count_and_plan_mo_duration: Duration, + /// Execution duration of the ASM runner. + pub asm_execution_duration: Option, +} + +#[derive(Debug, Default, Clone)] +pub struct ZiskExecutorSummary { + pub steps: u64, + pub executor_time: ZiskExecutorTime, + pub cost_per_type: StatsCostPerType, +} + +impl ZiskExecutorSummary { + pub fn new( + executed_steps: u64, + execution_time: ZiskExecutorTime, + cost_per_type: StatsCostPerType, + ) -> Self { + Self { steps: executed_steps, executor_time: execution_time, cost_per_type } + } +} + +#[derive(Debug, Clone)] +pub struct Stats { + pub airgroup_id: usize, + pub air_id: usize, + /// Collect start time + pub collect_start_time: Instant, + /// Collect duration in microseconds + pub collect_duration: u64, + /// Witness start time + pub witness_start_time: Instant, + /// Witness duration in microseconds + pub witness_duration: u128, + /// Number of chunks + pub num_chunks: usize, +} + +impl Stats { + /// Creates stats for an instance with no collection phase. + /// + /// Used for main instances and ROM instances with ASM emulator that skip collection. + /// Sets `collect_duration` to 0 and `num_chunks` to 0. + pub fn new_no_collection(airgroup_id: usize, air_id: usize) -> Self { + Self { + airgroup_id, + air_id, + collect_start_time: Instant::now(), + collect_duration: 0, + witness_start_time: Instant::now(), + witness_duration: 0, + num_chunks: 0, + } + } + + /// Creates stats for an instance with a pending collection phase. + /// + /// Used when collection is about to start. The `collect_duration` will be + /// updated later via `set_collect_duration` when collection completes. + pub fn new_pending_collection(airgroup_id: usize, air_id: usize, num_chunks: usize) -> Self { + Self { + airgroup_id, + air_id, + collect_start_time: Instant::now(), + collect_duration: 0, + witness_start_time: Instant::now(), + witness_duration: 0, + num_chunks, + } + } + + /// Creates stats for an instance with completed collection. + /// + /// Used when collection has finished and we know the actual timing. + pub fn new_with_collection( + airgroup_id: usize, + air_id: usize, + num_chunks: usize, + collect_start_time: Instant, + collect_duration: u64, + ) -> Self { + Self { + airgroup_id, + air_id, + collect_start_time, + collect_duration, + witness_start_time: Instant::now(), + witness_duration: 0, + num_chunks, + } + } + + /// Creates stats for a main instance (no collection, witness already computed). + /// + /// Used when witness computation has finished and we know the timing. + /// Main instances don't have a collection phase. + pub fn new_main_completed( + airgroup_id: usize, + air_id: usize, + witness_start_time: Instant, + ) -> Self { + Self { + airgroup_id, + air_id, + collect_start_time: Instant::now(), + collect_duration: 0, + witness_start_time, + witness_duration: witness_start_time.elapsed().as_millis(), + num_chunks: 0, + } + } +} + +pub trait ElfBinaryLike { + fn elf(&self) -> &[u8]; + fn name(&self) -> &str; + fn with_hints(&self) -> bool; + fn path(&self) -> Option; +} + +pub struct ElfBinaryFromFile { + pub elf: Vec, + pub name: String, + pub with_hints: bool, + pub path: Option, +} + +impl ElfBinaryFromFile { + pub fn new(elf: &Path, with_hints: bool) -> Result { + let elf_bin = fs::read(elf) + .map_err(|e| anyhow::anyhow!("Error reading ELF file {}: {}", elf.display(), e))?; + Ok(Self { + elf: elf_bin, + name: elf.file_stem().unwrap().to_str().unwrap().to_string(), + with_hints, + path: Some(elf.to_str().unwrap().to_string()), + }) + } +} + +impl ElfBinaryLike for ElfBinaryFromFile { + fn elf(&self) -> &[u8] { + &self.elf + } + fn name(&self) -> &str { + &self.name + } + fn with_hints(&self) -> bool { + self.with_hints + } + fn path(&self) -> Option { + self.path.clone() + } +} + +pub struct ElfBinary { + pub elf: &'static [u8], + pub name: &'static str, + pub with_hints: bool, + pub path: Option<&'static str>, +} + +impl ElfBinaryLike for ElfBinary { + fn elf(&self) -> &[u8] { + self.elf + } + fn name(&self) -> &str { + self.name + } + fn with_hints(&self) -> bool { + self.with_hints + } + fn path(&self) -> Option { + self.path.map(|s| s.to_string()) + } +} + +#[derive(Default, Debug, Clone)] +pub struct AsmExecutionInfo { + pub time: f32, + pub mhz: f32, +} + +impl fmt::Display for AsmExecutionInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:.3}s ({:.0} MHz)", self.time, self.mhz) + } +} diff --git a/common/src/utils.rs b/common/src/utils.rs index 5163d8c03..25a2d4d7c 100644 --- a/common/src/utils.rs +++ b/common/src/utils.rs @@ -1,6 +1,4 @@ -use std::fs::OpenOptions; use std::mem::MaybeUninit; -use tracing_subscriber::{filter::FilterFn, fmt, prelude::*, EnvFilter}; pub fn create_atomic_vec

(size: usize) -> Vec
{ let mut vec: Vec> = Vec::with_capacity(size); @@ -54,24 +52,58 @@ macro_rules! error_file { }; } -pub fn init_tracing(log_path: &str) { - let file = OpenOptions::new() - .append(true) - .create(true) - .open(log_path) - .expect("Failed to open log file"); - - let file_layer = fmt::layer() - .with_writer(file) - .with_ansi(false) // no color in file - .with_target(false) - .with_filter(FilterFn::new(|meta| meta.target() == "screen_and_file")); - - let stdout_layer = fmt::layer().with_writer(std::io::stdout).with_ansi(true).with_target(false); - - tracing_subscriber::registry() - .with(EnvFilter::from_default_env().add_directive("info".parse().unwrap())) - .with(stdout_layer) - .with(file_layer) - .init(); +/// Reinterprets a `Vec` as a `Vec` by transmuting the underlying memory. +/// +/// This function converts between vector types by reinterpreting the raw memory, +/// adjusting length and capacity based on the size ratio between types. +/// It performs internal unsafe operations but validates all safety requirements +/// before the conversion. +/// +/// # Arguments +/// * `v` - The source vector to reinterpret. +/// +/// # Returns +/// * `Ok(Vec)` - A new vector that owns the same memory as the input vector +/// * `Err` - If validation fails (size incompatibility or alignment issues) +/// +/// # Type Parameters +/// * `T` - Source element type +/// * `U` - Destination element type +pub fn reinterpret_vec(mut v: Vec) -> anyhow::Result> { + let size_t = std::mem::size_of::(); + let size_u = std::mem::size_of::(); + + // Total bytes in Vec + let total_bytes = v.len() * size_t; + + // Compute remainder to see if we need padding + let rem = total_bytes % size_u; + + // If remainder exists, pad with zeroed T elements + if rem != 0 { + // Number of extra bytes needed + let pad_bytes = size_u - rem; + + // Number of T elements to pad (round up) + let pad_t = pad_bytes.div_ceil(size_t); + + v.extend(std::iter::repeat(T::default()).take(pad_t)); + } + + // Check that the pointer is properly aligned for U + if v.as_ptr() as usize % std::mem::align_of::() != 0 { + return Err(anyhow::anyhow!( + "Vec<{}> is not properly aligned for Vec<{}> (requires {}-byte alignment)", + std::any::type_name::(), + std::any::type_name::(), + std::mem::align_of::() + )); + } + + let len = (v.len() * size_t) / size_u; + let cap = (v.capacity() * size_t) / size_u; + let ptr = v.as_ptr() as *mut U; + + std::mem::forget(v); + Ok(unsafe { Vec::from_raw_parts(ptr, len, cap) }) } diff --git a/common/src/zisk_lib_init.rs b/common/src/zisk_lib_init.rs deleted file mode 100644 index 7a7465765..000000000 --- a/common/src/zisk_lib_init.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::{path::PathBuf, time::Instant}; - -use fields::PrimeField64; -use proofman_common::VerboseMode; -use witness::WitnessLibrary; - -use crate::{io::ZiskStdin, ExecutorStats}; - -#[derive(Debug, Default, Clone)] -pub struct ZiskExecutionResult { - pub executed_steps: u64, -} - -#[derive(Debug, Clone)] -pub struct Stats { - pub airgroup_id: usize, - pub air_id: usize, - /// Collect start time - pub collect_start_time: Instant, - /// Collect duration in microseconds - pub collect_duration: u64, - /// Witness start time - pub witness_start_time: Instant, - /// Witness duration in microseconds - pub witness_duration: u128, - /// Number of chunks - pub num_chunks: usize, -} - -/// Extension trait that provides execution result access without Any boxing -pub trait ZiskWitnessLibrary { - fn set_stdin(&self, stdin: ZiskStdin); - fn execution_result(&self) -> Option<(ZiskExecutionResult, ExecutorStats)>; -} - -// SUpertrait for ZiskWitnessLibrary and WitnessLibrary -pub trait ZiskLib: - WitnessLibrary + ZiskWitnessLibrary + Send + Sync -{ -} - -pub type ZiskLibInitFn = fn( - VerboseMode, - PathBuf, // Rom path - Option, // Asm path - Option, // Asm ROM path - Option, // Base port for the ASM microservices - bool, // Unlock_mapped_memory - bool, // Shared_tables -) -> Result>, Box>; diff --git a/core/Cargo.toml b/core/Cargo.toml index cf3b890c2..39e82efe2 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -18,22 +18,24 @@ path = "src/bin/riscv2zisk.rs" [dependencies] precompiles-helpers = { workspace = true } lib-c = { workspace = true } -ziskos = { workspace = true } +ziskos-hints = { workspace = true } riscv = { workspace = true } -zisk-pil = { workspace = true } - rayon = { workspace = true } serde = { workspace = true } -serde_json = { workspace = true } fields = { workspace = true } sha2 = { workspace = true } +paste = { workspace = true } +zisk-definitions = { workspace = true } -indexmap = { version = "2.2.6", features = ["serde"] } -json = "0.12.4" elf = "0.7.4" tiny-keccak = { version = "2.0.2", features = ["keccak"] } +[build-dependencies] +lib-float = { workspace = true } + [features] default = [] +debug_dma = [] +log_dma_ops = [] # sp = [] diff --git a/core/build.rs b/core/build.rs new file mode 100644 index 000000000..d52d03228 --- /dev/null +++ b/core/build.rs @@ -0,0 +1,12 @@ +use std::path::Path; + +fn main() { + // Ensure lib-float is built before we try to include ziskfloat.elf + // The build-dependency on lib-float will trigger its build.rs first. + + // Tell cargo to rerun this build script if the float library changes + let float_lib_path = + Path::new(env!("CARGO_MANIFEST_DIR")).join("../lib-float/c/lib/ziskfloat.elf"); + + println!("cargo:rerun-if-changed={}", float_lib_path.display()); +} diff --git a/core/src/bin/riscv2zisk.rs b/core/src/bin/riscv2zisk.rs index 25c70cac1..a4472d012 100644 --- a/core/src/bin/riscv2zisk.rs +++ b/core/src/bin/riscv2zisk.rs @@ -5,34 +5,32 @@ use std::{env, process}; use zisk_core::Riscv2zisk; /// Performs a transpilation of a RISC-V ELF file to a Zisk ROM file. -/// The binary accepts 2 arguments: the path of the input RISC-V ELF file, and the path of the -/// output Zisk rom file. +/// The binary accepts 3 arguments (4 including the executable name): +/// - the path of the input RISC-V ELF file +/// - the path of the output Zisk rom file +/// - the generation method +/// /// After parsing the arguments, the main function calls Riscv2zisk::runfile to perform the actual /// work. fn main() { - println!("riscv2zisk converts an ELF RISCV file into a ZISK ASM file"); - // Get program arguments let args: Vec = env::args().collect(); // Check program arguments length - if args.len() < 3 || args.len() > 4 { - eprintln!("Error parsing arguments: invalid number of arguments={}. Usage: riscv2zisk [] ", args.len()); + if args.len() != 4 { + eprintln!("Error parsing arguments: invalid number of arguments={}", args.len()); for (i, arg) in args.iter().enumerate() { eprintln!("Argument {i}: {arg}"); } + eprintln!("Usage: riscv2zisk "); process::exit(1); } - // Get the 2 input parameters: ELF (RISCV) file name (input data) and ZisK file name (output - // data) + // Get the 3 arguments: the input ELF file, the output ASM file and the generation method let elf_file = args[1].clone(); - println!("ELF (RISCV) file: {elf_file}"); - let (asm_file, gen_arg) = if args.len() == 4 { - (Some(args[2].clone()), args[3].clone()) - } else { - (None, args[2].clone()) - }; + let asm_file = args[2].clone(); + let gen_arg = args[3].clone(); + println!("riscv2zisk converts a RISCV ELF file ({elf_file}) into a ZISK ASM file ({asm_file}), using generation method {gen_arg}."); let generation_method = match gen_arg.as_str() { "--gen=0" => zisk_core::AsmGenerationMethod::AsmFast, @@ -47,16 +45,22 @@ fn main() { "--gen=9" => zisk_core::AsmGenerationMethod::AsmMemReads, "--gen=10" => zisk_core::AsmGenerationMethod::AsmChunkPlayerMemReadsCollectMain, _ => { - eprintln!("Invalid generation method. Use --gen=0 (fast), =1 (minimal trace), =2 (rom histogram), =3 (main trace), =4 (chunks), =5 (bus op), =6 (zip) or =7 (mem op)."); + eprintln!("Invalid generation method. Use --gen=0 (fast), =1 (minimal trace), =2 (rom histogram), =3 (main trace), =4 (chunks), =5 (bus op), =6 (zip), =7 (mem op), =8 (min trace chunk player), =9 (mem reads), =10 (mem reads chunk player)."); process::exit(1); } }; + // Read ELF file bytes + let elf = std::fs::read(elf_file).unwrap_or_else(|e| { + eprintln!("Error reading ELF file: {e}"); + process::exit(1); + }); + // Create an instance of the program converter - let rv2zk = Riscv2zisk::new(elf_file); + let rv2zk = Riscv2zisk::new(&elf); // Convert program - if let Err(e) = rv2zk.runfile(asm_file.unwrap(), generation_method, true, true) { + if let Err(e) = rv2zk.runfile(asm_file, generation_method, true, true, false) { println!("Application error: {e}"); process::exit(1); } diff --git a/core/src/elf2rom.rs b/core/src/elf2rom.rs index 50518e0c4..d5d911fb5 100644 --- a/core/src/elf2rom.rs +++ b/core/src/elf2rom.rs @@ -3,7 +3,8 @@ use crate::{ add_end_and_lib, elf_extraction::{ - collect_elf_payload, collect_elf_payload_from_bytes, merge_adjacent_ro_sections, ElfPayload, + collect_elf_payload_from_bytes, get_symbol_addresses_from_bytes, + merge_adjacent_ro_sections, ElfPayload, }, riscv2zisk_context::{add_entry_exit_jmp, add_zisk_code, add_zisk_init_data}, AsmGenerationMethod, RoData, ZiskInst, ZiskRom, ZiskRom2Asm, ROM_ADDR, ROM_ADDR_MAX, ROM_ENTRY, @@ -12,13 +13,15 @@ use rayon::prelude::*; use std::{error::Error, path::Path}; /// Executes the ROM transpilation process: from ELF to Zisk -pub fn elf2rom(elf_file: &Path) -> Result> { +pub fn elf2rom(elf: &[u8]) -> Result> { // Load the embedded float library const FLOAT_LIB_DATA: &[u8] = include_bytes!("../../lib-float/c/lib/ziskfloat.elf"); // Extract all relevant sections from the ELF file let payloads: Vec = - vec![collect_elf_payload_from_bytes(FLOAT_LIB_DATA)?, collect_elf_payload(elf_file)?]; + vec![collect_elf_payload_from_bytes(FLOAT_LIB_DATA)?, collect_elf_payload_from_bytes(elf)?]; + // Get DMA function addresses: (memcpy, memcmp, memset, memmove) + let dma_addrs = get_dma_symbol_addresses(elf); // Create an empty ZiskRom instance let mut rom: ZiskRom = ZiskRom { next_init_inst_addr: ROM_ENTRY, ..Default::default() }; @@ -29,7 +32,7 @@ pub fn elf2rom(elf_file: &Path) -> Result> { for (i, payload) in payloads.into_iter().enumerate() { // 1. Add executable code sections for section in &payload.exec { - add_zisk_code(&mut rom, section.addr, §ion.data); + add_zisk_code(&mut rom, section.addr, §ion.data, dma_addrs); } // 2. Add read-write data sections (will be copied to RAM) @@ -65,6 +68,21 @@ pub fn elf2rom(elf_file: &Path) -> Result> { Ok(rom) } +/// Get DMA function addresses from ELF data +/// Returns (memcpy, memcmp, memset, memmove), with 0 for missing symbols +fn get_dma_symbol_addresses(elf_data: &[u8]) -> (u64, u64, u64, u64) { + let symbols = ["memcpy", "memcmp", "memset", "memmove"]; + match get_symbol_addresses_from_bytes(elf_data, &symbols) { + Ok(addrs) => ( + addrs.get("memcpy").copied().unwrap_or(0), + addrs.get("memcmp").copied().unwrap_or(0), + addrs.get("memset").copied().unwrap_or(0), + addrs.get("memmove").copied().unwrap_or(0), + ), + Err(_) => (0, 0, 0, 0), + } +} + /// Optimizes instruction lookup by organizing instructions into direct-access arrays. /// /// ## Problem it solves: @@ -216,14 +234,15 @@ fn optimize_instruction_lookup(rom: &mut ZiskRom) -> Result<(), Box> /// Executes the ELF file data transpilation process into a Zisk ROM, and saves the result into a /// file. The file format can be JSON, PIL-based or binary. pub fn elf2romfile( - elf_file: &Path, + elf: &[u8], asm_file: &Path, generation_method: AsmGenerationMethod, log_output: bool, comments: bool, + hints: bool, ) -> Result<(), Box> { - let rom = elf2rom(elf_file)?; - ZiskRom2Asm::save_to_asm_file(&rom, asm_file, generation_method, log_output, comments); + let rom = elf2rom(elf)?; + ZiskRom2Asm::save_to_asm_file(&rom, asm_file, generation_method, log_output, comments, hints); Ok(()) } diff --git a/core/src/elf_extraction.rs b/core/src/elf_extraction.rs index 5a9db724c..d8ba3edb4 100644 --- a/core/src/elf_extraction.rs +++ b/core/src/elf_extraction.rs @@ -5,12 +5,13 @@ use elf::{ endian::AnyEndian, ElfBytes, }; -use std::{error::Error, fs, path::Path}; +use std::{collections::HashMap, error::Error, fs, path::Path}; use crate::{is_elf_file, RAM_ADDR, RAM_SIZE}; const RAM_START_ADDR: u64 = RAM_ADDR; const RAM_END_ADDR: u64 = RAM_ADDR + RAM_SIZE; +const MAX_ELF_SECTION_SIZE: usize = 1024 * 1024 * 1024; // 1 GiB, arbitrary limit to prevent OOM from malformed ELFs /// Raw bytes of `data` that will live at `addr` once the ROM has booted. #[derive(Debug, Clone)] @@ -32,15 +33,6 @@ pub struct ElfPayload { pub ro: Vec, } -/// Extracts the relevant sections from the ELF file for `ZiskRom` -pub fn collect_elf_payload(elf_path: &Path) -> Result> { - // Read the ELF file - let file_data = - fs::read(elf_path).map_err(|_| format!("Error reading ELF file={}", elf_path.display()))?; - - collect_elf_payload_from_bytes(&file_data) -} - /// Extracts the relevant sections from ELF file bytes for `ZiskRom` pub fn collect_elf_payload_from_bytes(file_data: &[u8]) -> Result> { // Validate it's an ELF file @@ -90,6 +82,12 @@ pub fn collect_elf_payload_from_bytes(file_data: &[u8]) -> Result MAX_ELF_SECTION_SIZE { + return Err(format!( + "ELF section at 0x{:08x} has size {} which exceeds the maximum allowed size of {} bytes.", + sh.sh_addr, size, MAX_ELF_SECTION_SIZE + ).into()); + } // Align size to 4 bytes let aligned_size = (size + 3) & !3; vec![0u8; aligned_size] @@ -167,6 +165,37 @@ pub fn merge_adjacent_ro_sections(sections: &[DataSection]) -> Vec merged } +/// Get addresses for a list of symbols from an ELF file +pub fn get_symbol_addresses( + elf_path: &Path, + symbol_names: &[&str], +) -> Result, Box> { + let file_data = fs::read(elf_path)?; + get_symbol_addresses_from_bytes(&file_data, symbol_names) +} + +/// Get addresses for a list of symbols from ELF bytes +pub fn get_symbol_addresses_from_bytes( + file_data: &[u8], + symbol_names: &[&str], +) -> Result, Box> { + let elf = ElfBytes::::minimal_parse(file_data)?; + let mut result = HashMap::new(); + let names_set: std::collections::HashSet<&str> = symbol_names.iter().copied().collect(); + + if let Some((symtab, strtab)) = elf.symbol_table()? { + for sym in symtab { + if let Ok(name) = strtab.get(sym.st_name as usize) { + if names_set.contains(name) { + result.insert(name.to_string(), sym.st_value); + } + } + } + } + + Ok(result) +} + #[cfg(test)] mod tests { use super::*; diff --git a/core/src/helpers.rs b/core/src/helpers.rs index cea9672af..a6e57c9e8 100644 --- a/core/src/helpers.rs +++ b/core/src/helpers.rs @@ -1,41 +1,19 @@ use sha2::compress256; + #[allow(deprecated)] use sha2::digest::generic_array::{typenum::U64, GenericArray}; -pub fn sha256f(state: &mut [u64; 4], input: &[u64; 8]) { - // Convert both the state and the input to appropriate types - let mut state_u32: [u32; 8] = convert_u64_to_u32(state).try_into().unwrap(); - let block = convert_u64_to_generic_array_bytes(input); - compress256(&mut state_u32, &[block]); - - // Convert the state back to u64 and write it to the memory address - *state = convert_u32_to_u64(&state_u32); -} - -pub fn convert_u64_to_u32(input: &[u64]) -> Vec { - let mut out = Vec::with_capacity(input.len() * 2); - for &word in input { - out.push((word >> 32) as u32); - out.push((word & 0xFFFFFFFF) as u32); - } - out -} +use precompiles_helpers::blake2b_round; #[allow(deprecated)] -pub fn convert_u64_to_generic_array_bytes(input: &[u64; 8]) -> GenericArray { - let mut out = [0u8; 64]; - for (i, word) in input.iter().enumerate() { - for j in 0..8 { - out[i * 8 + j] = (word >> (56 - j * 8)) as u8; - } - } - GenericArray::::clone_from_slice(&out) +pub fn sha256f(state: &mut [u64; 4], input: &[u64; 8]) { + let state_u32: &mut [u32; 8] = unsafe { &mut *(state.as_mut_ptr() as *mut [u32; 8]) }; + let input_u8: &[GenericArray; 1] = + unsafe { &*(input.as_ptr() as *const [GenericArray; 1]) }; + compress256(state_u32, input_u8); } -pub fn convert_u32_to_u64(words: &[u32; 8]) -> [u64; 4] { - let mut out = [0u64; 4]; - for i in 0..4 { - out[i] = ((words[2 * i] as u64) << 32) | (words[2 * i + 1] as u64); - } - out +#[allow(deprecated)] +pub fn blake2br(index: u64, state: &mut [u64; 16], input: &[u64; 16]) { + blake2b_round(state, input, index as u32); } diff --git a/core/src/inst_context.rs b/core/src/inst_context.rs index 9b82dba6a..cb8fbf622 100644 --- a/core/src/inst_context.rs +++ b/core/src/inst_context.rs @@ -8,7 +8,7 @@ use crate::{ Mem, FCALL_PARAMS_MAX_SIZE, FCALL_RESULT_MAX_SIZE, REGS_IN_MAIN_TOTAL_NUMBER, ROM_ENTRY, }; -/// Zisk precompiled +/// Zisk precompiled emulation mode #[derive(Debug, Default, PartialEq, Eq)] pub enum EmulationMode { #[default] @@ -42,27 +42,27 @@ pub struct PrecompiledInstContext { #[derive(Debug)] pub struct FcallInstContext { /// Fcall parameters data - /// Maximum size is FCALL_PARAMS_MAX_SIZE u64's + /// Maximum size is FCALL_PARAMS_MAX_SIZE u64s pub parameters: [u64; FCALL_PARAMS_MAX_SIZE], - /// Indicates how many parameters u64's contain valid data + /// Indicates how many parameter u64s contain valid data pub parameters_size: u64, /// Fcall result data - /// Maximum size is FCALL_RESULT_MAX_SIZE u64's + /// Maximum size is FCALL_RESULT_MAX_SIZE u64s pub result: [u64; FCALL_RESULT_MAX_SIZE], - /// Indicates how many result u64's contain valid data + /// Indicates how many result u64s contain valid data pub result_size: u64, - /// Indicates how many result u64's have been read using fcall_get() + /// Indicates how many result u64s have been read using fcall_get() pub result_got: u64, } impl Default for FcallInstContext { /// Default fcall instruction context constructor fn default() -> Self { - FcallInstContext { + Self { parameters: [0; FCALL_PARAMS_MAX_SIZE], parameters_size: 0, result: [0; FCALL_RESULT_MAX_SIZE], @@ -71,7 +71,6 @@ impl Default for FcallInstContext { } } } - #[derive(Debug)] /// ZisK instruction context data container, storing the state of the execution pub struct InstContext { @@ -117,6 +116,18 @@ pub struct InstContext { /// Fcall data pub fcall: FcallInstContext, + + /// DataExt 64 bytes size. With this information it is possible to specify which variable part of the minimal trace + /// is associated with the current instruction. Used by DMA precompile. + pub data_ext_len: usize, + + /// Precompiles uses jmp_offset1 as extended param (static value known in transpilation time) + pub extended_arg: i64, + + pub stats_hint: u64, + + /// Input data length, stored in the context to be used by the FCALL_INPUT_READY_ID fcall + pub input_len: u64, } /// RisK instruction context implementation @@ -138,6 +149,10 @@ impl InstContext { emulation_mode: EmulationMode::default(), precompiled: PrecompiledInstContext::default(), fcall: FcallInstContext::default(), + data_ext_len: 0, + extended_arg: 0, + stats_hint: 0, + input_len: 0, } } diff --git a/core/src/mem.rs b/core/src/mem.rs index 0d4f7d711..cba4f24ff 100644 --- a/core/src/mem.rs +++ b/core/src/mem.rs @@ -14,6 +14,10 @@ //! `|` //! `|---------------` //! ` ...` +//! `|--------------- INPUT_ADDR (0x40000000)` +//! `|` +//! `| Contains program input data.` +//! `|` //! `|--------------- ROM_ADDR: first program instruction (0x80000000)` //! `|` //! `| Contains program instructions.` @@ -27,10 +31,6 @@ //! `|` //! `| Initial value of the float library stack pointer.` //! `|` -//! `|--------------- INPUT_ADDR (0x90000000)` -//! `|` -//! `| Contains program input data.` -//! `|` //! `|--------------- SYS_ADDR (= RAM_ADDR = REG_FIRST) (0xa0000000)` //! `|` //! `| Contains system address.` @@ -102,9 +102,9 @@ use crate::{M16, M3, M32, M8, REG_FIRST, REG_LAST}; use core::fmt; /// Fist input data memory address -pub const INPUT_ADDR: u64 = 0x90000000; +pub const INPUT_ADDR: u64 = 0x4000_0000; /// Maximum size of the input data -pub const MAX_INPUT_SIZE: u64 = 0x08000000; // 128M, +pub const MAX_INPUT_SIZE: u64 = 0x4000_0000; // 128M, /// Free input data memory address = first input address pub const FREE_INPUT_ADDR: u64 = INPUT_ADDR; /// First global RW memory address @@ -141,6 +141,8 @@ pub const FLOAT_LIB_SP: u64 = 0xc0000000 - 16; // 0xbffffff0 pub const ARCH_ID_ZISK: u64 = 0xFFFEEEE; /// UART memory address; single bytes written here will be copied to the standard output pub const UART_ADDR: u64 = SYS_ADDR + 0x200; +/// Extra parameters of repcompiles are stored in fixed memory area (256 bytes => 32 parameters) +pub const EXTRA_PARAMS_ADDR: u64 = SYS_ADDR + 0x0F00; /// Float registers first address pub const FREG_FIRST: u64 = SYS_ADDR + 0x1000; /// CSR memory address; contains control and status registers @@ -795,5 +797,210 @@ impl Mem { } } + #[inline(always)] + pub fn get_writeable_section(&mut self, addr: u64, count: u64) -> &mut MemSection { + if let Ok(section) = self.read_sections.binary_search_by(|section| { + if addr < section.start { + std::cmp::Ordering::Greater + } else if addr > (section.end - count) { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }) { + panic!( + "Mem::get_write_section() invalid addr={addr}={addr:x},count={count} write section start={:x} end={:x} is read only section", + self.read_sections[section].start, self.read_sections[section].end); + }; + + // If not found in read sections, try write section + let section = &mut self.write_section; + + // Check that the address and count fall into this section address range + if (addr < section.start) || ((addr + count) > section.end) { + panic!( + "Mem::get_section() invalid addr={addr}={addr:x},count={count} write section start={:x} end={:x}", + section.start, section.end + ); + } + section + } + + #[inline(always)] + pub fn get_readable_section(&self, addr: u64, count: u64) -> &MemSection { + let section = if let Ok(section) = self.read_sections.binary_search_by(|section| { + if addr < section.start { + std::cmp::Ordering::Greater + } else if addr > (section.end - count) { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }) { + &self.read_sections[section] + } else { + &self.write_section + }; + if (addr < section.start) || ((addr + count) > section.end) { + panic!( + "Mem::get_read_section() invalid addr={addr}={addr:x},count={count} read section start={:x} end={:x}", + section.start, section.end + ); + } + section + } + + #[inline(always)] + pub fn memcpy(&mut self, dst: u64, src: u64, count: u64) { + // Early return if source and destination are the same or count is zero + if dst == src || count == 0 { + return; + } + + let dst_end = dst + count; + let src_end = src + count; + let count_usize = count as usize; + + // Check if there is an overlap between source and destination + let overlaps = (dst < src_end) && (src < dst_end); + + if overlaps { + // Overlapping case: use temporary buffer to avoid data corruption + let temp_buffer: Vec = { + let src_section = self.get_readable_section(src, count); + let src_offset: usize = (src - src_section.start) as usize; + src_section.buffer[src_offset..src_offset + count_usize].to_vec() + }; + + let dst_section = self.get_writeable_section(dst, count); + let dst_offset: usize = (dst - dst_section.start) as usize; + dst_section.buffer[dst_offset..dst_offset + count_usize].copy_from_slice(&temp_buffer); + } else { + // Non-overlapping case: direct copy + // First, get a copy of the source data + let data_to_copy: Vec = { + let src_section = self.get_readable_section(src, count); + let src_offset: usize = (src - src_section.start) as usize; + src_section.buffer[src_offset..src_offset + count_usize].to_vec() + }; + + // Then, write to destination + let dst_section = self.get_writeable_section(dst, count); + let dst_offset: usize = (dst - dst_section.start) as usize; + dst_section.buffer[dst_offset..dst_offset + count_usize].copy_from_slice(&data_to_copy); + } + } + + pub fn memcpy_from_data(&mut self, dst: u64, count: u64, data: &[u64], data_offset: usize) { + // Early return if source and destination are the same or count is zero + if count == 0 { + return; + } + + let data_bytes: &[u8] = + unsafe { core::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * 8) }; + + // Then, write to destination + let dst_section = self.get_writeable_section(dst, count); + let dst_offset: usize = (dst - dst_section.start) as usize; + + let count = count as usize; + let bytes = &data_bytes[data_offset..data_offset + count]; + dst_section.buffer[dst_offset..dst_offset + count].copy_from_slice(bytes); + } + + pub fn memset(&mut self, dst: u64, count: u64, data: u8) { + // Early return if source and destination are the same or count is zero + if count == 0 { + return; + } + + // Then, write to destination + let dst_section = self.get_writeable_section(dst, count); + let dst_offset: usize = (dst - dst_section.start) as usize; + + let count = count as usize; + dst_section.buffer[dst_offset..dst_offset + count].fill(data); + } + + /// Reads `count` bytes from memory starting at `addr` and appends them as u64 values to `data`. + /// The data is read in 64-bit aligned chunks and pushed to the vector. + pub fn push_from_mem(&mut self, data: &mut Vec, addr: u64, count: u64) { + if count == 0 { + return; + } + + let section = self.get_readable_section(addr, count); + let addr64 = addr >> 3; + let to_addr64 = (addr + count - 1) >> 3; + let count64 = (to_addr64 - addr64 + 1) as usize; + let addr_offset: usize = (addr - section.start) as usize & !0x07; + let addr_offset64: usize = addr_offset >> 3; + + let mem64: &[u64] = unsafe { + core::slice::from_raw_parts( + section.buffer.as_ptr() as *const u64, + section.buffer.len() / 8, + ) + }; + data.extend_from_slice(&mem64[addr_offset64..addr_offset64 + count64]); + } + + pub fn memcmp(&self, a: u64, b: u64, count: u64) -> (u64, usize) { + if count == 0 { + return (0, 0); + } + + let count_usize = count as usize; + + // Get sections for both addresses + let a_section = self.get_readable_section(a, count); + let b_section = self.get_readable_section(b, count); + + let a_offset: usize = (a - a_section.start) as usize; + let b_offset: usize = (b - b_section.start) as usize; + + // Compare byte by byte + for i in 0..count_usize { + let byte_a = a_section.buffer[a_offset + i]; + let byte_b = b_section.buffer[b_offset + i]; + + if byte_a != byte_b { + // Sign extend the difference to 64 bits + let diff = (byte_a as i64) - (byte_b as i64); + // return effective count, needs the last byte to compare. + // println!("BYTE_DIFF[{i:>4}] = {diff} BYTE_A[0x{a:08X} + {i:>4}](0x{byte_a:02X}) ? BYTE_B[0x{b:08X} + {i:>4}](0x{byte_b:02X}) S:{step}"); + // if i > 0 { + // println!("PREV BYTE_A[0x{a:08X} + {:>4}](0x{:02X}) ? BYTE_B[0x{b:08X} + {:>4}](0x{:02X}) S:{step}", + // i - 1, a_section.buffer[a_offset + i - 1], i - 1, b_section.buffer[b_offset + i - 1]); + // } + // println!("POST BYTE_A[0x{a:08X} + {:>4}](0x{:02X}) ? BYTE_B[0x{b:08X} + {:>4}](0x{:02X}) S:{step}", + // i + 1, a_section.buffer[a_offset + i + 1], i + 1, b_section.buffer[b_offset + i + 1]); + return (diff as u64, i + 1); + } + } + // All bytes are equal + (0, count_usize) + } + + pub fn memdump(&self, addr: u64, count: u64) -> String { + if count == 0 { + return String::new(); + } + + let count_usize = count as usize; + + // Get section for the address range + let section = self.get_readable_section(addr, count); + let offset: usize = (addr - section.start) as usize; + + // Convert bytes to hex string + section.buffer[offset..offset + count_usize] + .iter() + .map(|byte| format!("{:02x}", byte)) + .collect::>() + .join("") + } + //pub fn get_non_aligned_data_from_required(address: u64, width: u8,) } diff --git a/core/src/riscv2zisk.rs b/core/src/riscv2zisk.rs index 28cdefd78..57cd06227 100644 --- a/core/src/riscv2zisk.rs +++ b/core/src/riscv2zisk.rs @@ -62,15 +62,15 @@ pub enum AsmGenerationMethod { AsmChunkPlayerMemReadsCollectMain, } /// RISCV-to-ZisK struct containing the input ELF RISCV file name and the output ZISK ASM file name -pub struct Riscv2zisk { - /// ELF RISC-V file name (input) - pub elf_file: PathBuf, +pub struct Riscv2zisk<'a> { + /// ELF RISC-V file bytes (input) + pub elf: &'a [u8], } -impl Riscv2zisk { - /// Creates a new Riscv2zisk struct with the provided input and output file names - pub fn new>(elf_file: P) -> Riscv2zisk { - Riscv2zisk { elf_file: elf_file.into() } +impl<'a> Riscv2zisk<'a> { + /// Creates a new Riscv2zisk struct with the provided ELF bytes + pub fn new(elf: &'a [u8]) -> Riscv2zisk<'a> { + Riscv2zisk { elf } } /// Executes the file conversion process by calling elf2romfile() @@ -80,13 +80,14 @@ impl Riscv2zisk { generation_method: AsmGenerationMethod, log_output: bool, comments: bool, + hints: bool, ) -> Result<(), Box> { - elf2romfile(&self.elf_file, &asm_file.into(), generation_method, log_output, comments) + elf2romfile(self.elf, &asm_file.into(), generation_method, log_output, comments, hints) .map_err(|e| format!("Error converting elf to assembly: {e}").into()) } /// Executes the file conversion process by calling elf2rom() pub fn run(&self) -> Result> { - elf2rom(&self.elf_file) + elf2rom(self.elf) } } diff --git a/core/src/riscv2zisk_context.rs b/core/src/riscv2zisk_context.rs index 56bc4cb68..429b8e913 100644 --- a/core/src/riscv2zisk_context.rs +++ b/core/src/riscv2zisk_context.rs @@ -3,18 +3,22 @@ //! attribute. use riscv::{riscv_interpreter, RiscvInstruction}; +use zisk_definitions::{ + SYSCALL_DMA_INPUTCPY_ID, SYSCALL_DMA_MEMCMP_ID, SYSCALL_DMA_MEMCPY_ID, SYSCALL_DMA_MEMSET_ID, +}; use crate::{ convert_vector, ZiskInstBuilder, ZiskRom, ARCH_ID_CSR_ADDR, ARCH_ID_ZISK, CSR_ADDR, - FLOAT_LIB_ROM_ADDR, FLOAT_LIB_SP, FREG_F0, FREG_INST, FREG_RA, FREG_X0, INPUT_ADDR, MTVEC, - OUTPUT_ADDR, REG_X0, ROM_ENTRY, ROM_EXIT, + EXTRA_PARAMS_ADDR, FLOAT_LIB_ROM_ADDR, FLOAT_LIB_SP, FREG_F0, FREG_INST, FREG_RA, FREG_X0, + INPUT_ADDR, MTVEC, OUTPUT_ADDR, REG_X0, ROM_ENTRY, ROM_EXIT, }; use std::collections::HashMap; -// The CSR precompiled addresses are defined in the `ZiskOS` `ziskos/entrypoint/src` files -// because legacy versions of Rust do not support constant parameters in `asm!` macros. -const CSR_PRECOMPILED: [&str; 18] = [ +// The CSR precompiled addresses are defined in the `definitions/src/syscall.rs` file +// because legacy versions of Rust do not support constant parameters in `asm!` macros. +// Important: The order should be the same as in such file. +const CSR_PRECOMPILED: [&str; 26] = [ "keccak", "arith256", "arith256_mod", @@ -32,10 +36,20 @@ const CSR_PRECOMPILED: [&str; 18] = [ "bls12_381_complex_add", "bls12_381_complex_sub", "bls12_381_complex_mul", - "add256", + "add256", // Note: Constant CSR_PRECOMPILED_ADD256 needs to be updated if this is moved + "poseidon2", + "dma_memcpy", + "dma_memcmp", + "dma_inputcpy", + "dma_memset", + "secp256r1_add", + "secp256r1_dbl", + "blake2", ]; const CSR_PRECOMPILED_ADDR_START: u32 = 0x800; const CSR_PRECOMPILED_ADDR_END: u32 = CSR_PRECOMPILED_ADDR_START + CSR_PRECOMPILED.len() as u32; +const CSR_DMA_PRECOMPILED_ADDR_START: u32 = 0x813; +const CSR_DMA_PRECOMPILED_ADDR_END: u32 = 0x816; const CSR_PRECOMPILED_ADD256: u32 = CSR_PRECOMPILED_ADDR_START + 17; const CSR_FCALL_ADDR_START: u32 = 0x8C0; const CSR_FCALL_ADDR_END: u32 = 0x8DF; @@ -55,13 +69,28 @@ const FLOAT_HANDLER_RETURN_ADDR: u64 = FLOAT_HANDLER_ADDR + 4 * 34; // 31 regs + pub struct Riscv2ZiskContext<'a> { /// Map of program address to ZisK instructions pub insts: &'a mut HashMap, + // to store csr-port used on CSR instrucction for next instruction + pub input_precompile: Option, + pub output_precompile: Option, + // to store register used on CSR instrucction for next instruction as arg1 + // precompile (arg1, previous_arg1, arg2 || immediate) + pub input_precompile_reg: Option, + pub output_precompile_reg: Option, } impl Riscv2ZiskContext<'_> { /// Converts an input RISCV instruction into a ZisK instruction and stores it into the internal /// map. C instrucions are already expanded into their equivalent RISCV instructions, so we /// only have to map them to their corresponding IMA 32-bits equivalent instructions. - pub fn convert(&mut self, riscv_instruction: &RiscvInstruction) { + /// + /// # Parameters + /// * `riscv_instruction` - The current instruction to convert + /// * `next_instructions` - Slice of the remaining instructions after the current one + pub fn convert( + &mut self, + riscv_instruction: &RiscvInstruction, + next_instructions: &[RiscvInstruction], + ) { // ZisK supports the IMAC RISC-V instruction set match riscv_instruction.inst.as_str() { // I: Base Integer Instruction Set @@ -69,9 +98,32 @@ impl Riscv2ZiskContext<'_> { // I.1. Integer Computational (Register-Register) "add" => { - if riscv_instruction.rs1 == 0 { - // rd = rs1(0) + rs2 = rs2 - self.copyb(riscv_instruction, 4, 2); + if riscv_instruction.rd == 0 + && self.input_precompile == Some(SYSCALL_DMA_MEMCPY_ID as u32) + { + self.create_precompiles_op( + riscv_instruction, + "dma_memcpy", + riscv_instruction.rs1, + self.input_precompile_reg.unwrap(), + 4, + ); + } else if self.input_precompile == Some(SYSCALL_DMA_MEMCMP_ID as u32) { + self.create_precompiles_op( + riscv_instruction, + "dma_memcmp", + riscv_instruction.rs1, + self.input_precompile_reg.unwrap(), + 4, + ); + } else if riscv_instruction.rs1 == 0 { + if !next_instructions.is_empty() { + // rd = rs1(0) + rs2 = rs2 followed by ret + self.copyb(riscv_instruction, 4, 2); + } else { + // rd = rs1(0) + rs2 = rs2 + self.copyb(riscv_instruction, 4, 2); + } } else if riscv_instruction.rs2 == 0 { // rd = rs1 + rs2(0) = rs1 self.copyb(riscv_instruction, 4, 1); @@ -180,10 +232,10 @@ impl Riscv2ZiskContext<'_> { "ecall" => self.ecall(riscv_instruction), "ebreak" => self.nop(riscv_instruction, 4), "csrrw" => self.csrrw(riscv_instruction), - "csrrs" => self.csrrs(riscv_instruction), + "csrrs" => self.csrrs(riscv_instruction, next_instructions), "csrrc" => self.csrrc(riscv_instruction), "csrrwi" => self.csrrwi(riscv_instruction), - "csrrsi" => self.csrrsi(riscv_instruction), + "csrrsi" => self.csrrsi(riscv_instruction, next_instructions), "csrrci" => self.csrrci(riscv_instruction), // M: Integer Multiplication and Division @@ -589,6 +641,96 @@ impl Riscv2ZiskContext<'_> { self.insts.insert(i.rom_address, zib); } + /// Creates a Zisk precompiles operation that implements a RISC-V register operation, + /// loads both input parameters a and b from their respective registers, and stores the + /// result c into a register. + /// NOTE: How extended static param not it's used set it to zero (jmp_offset1) + pub fn create_precompiles_op( + &mut self, + i: &RiscvInstruction, + op: &str, + rs1: u32, + rs2: u32, + inst_size: u64, + ) { + // inst_size == 8 used for special cases where take arguments of precompiled of + // next instruction but no need to read again + assert!(inst_size == 2 || inst_size == 4 || inst_size == 8); + let mut zib = ZiskInstBuilder::new_from_riscv(i.rom_address, i.inst.clone()); + zib.src_a("reg", rs1 as u64, false); + zib.src_b("reg", rs2 as u64, false); + zib.op(op).unwrap(); + zib.store("reg", i.rd as i64, false, false); + zib.j(0, inst_size as i64); + zib.verbose(&format!( + "{} r{}, r{}, r{} => {op} r{}, r{rs1}, r{rs2}", + i.inst, i.rd, i.rs1, i.rs2, i.rd + )); + zib.build(); + self.insts.insert(i.rom_address, zib); + } + + /// Creates a Zisk operation that implements a RISC-V precompiles operation, i.e. an operation that + /// loads both input parameters a and b from their respective registers, + /// and stores the result c into a register + #[allow(clippy::too_many_arguments)] + pub fn create_extended_precompiles_op( + &mut self, + i: &RiscvInstruction, + op: &str, + rs1: u32, + rs2: u64, + rd: u32, + extended_arg: i64, + is_rs2_an_imm: bool, + inst_size: u64, + ) { + // inst_size == 8 used for special cases where take arguments of precompiled of + // next instruction but no need to read again + assert!(inst_size == 2 || inst_size == 4 || inst_size == 8); + let mut zib = ZiskInstBuilder::new_from_riscv(i.rom_address, i.inst.clone()); + zib.src_a("reg", rs1 as u64, false); + if is_rs2_an_imm { + zib.src_b("imm", rs2, false); + } else { + zib.src_b("reg", rs2, false); + } + zib.op(op).unwrap(); + zib.store("reg", rd as i64, false, false); + zib.j(extended_arg, inst_size as i64); + zib.verbose(&format!( + "{} r{}, r{}, r{} (precompiled {op} r{rd},r{rs1},r{rs2},{extended_arg} + jmp +{inst_size})", + i.inst, + i.rd, + i.rs1, + i.rs2, + )); + zib.build(); + self.insts.insert(i.rom_address, zib); + } + + /// Creates a Zisk operation that implements a RISC-V precompiles set extra param this + /// operation store in fixed address the value. + pub fn create_set_precompiles_param_op( + &mut self, + i: &RiscvInstruction, + rs1: u32, + inst_size: u64, + ) { + assert!(inst_size == 2 || inst_size == 4); + let mut zib = ZiskInstBuilder::new_from_riscv(i.rom_address, i.inst.clone()); + zib.src_a("imm", 0, false); + zib.src_b("reg", rs1 as u64, false); + zib.op("copyb").unwrap(); + zib.store("mem", EXTRA_PARAMS_ADDR as i64, false, false); + zib.j(0, inst_size as i64); + zib.verbose(&format!("sd r{}, (0x{}) (param 0x{:03X})", rs1, EXTRA_PARAMS_ADDR, i.csr)); + zib.build(); + self.output_precompile = Some(i.csr); + self.output_precompile_reg = Some(i.rs1); + self.insts.insert(i.rom_address, zib); + } + // beq rs1, rs2, label // eq([%rs1], [rs2]), j(label) @@ -767,7 +909,7 @@ impl Riscv2ZiskContext<'_> { zib.src_a("imm", 0, false); zib.src_b("imm", 0, false); zib.op("flag").unwrap(); - zib.store_ra("reg", i.rd as i64, false); + zib.store_pc("reg", i.rd as i64, false); zib.j(4, i.imm as i64); zib.verbose(&format!("auipc r{}, 0x{:x}", i.rd, i.imm)); zib.build(); @@ -883,13 +1025,42 @@ impl Riscv2ZiskContext<'_> { pub fn jalr(&mut self, i: &RiscvInstruction, inst_size: u64) { assert!(inst_size == 4 || inst_size == 2); let mut rom_address = i.rom_address; + + // Thanks to https://github.com/codygunton for reporting the issue with JALR alignment! + + // JALR target address mask per RISC-V ISA spec Section 2.5. + // Must clear only bit 0 (0xfffffffffffffffe) for 2-byte alignment. + // + // BUG: Using 0xfffffffffffffffc (4-byte alignment) breaks zksync-os at _start. + // The startup code (zksync-airbender/riscv_common/src/asm/start64.s) is: + // _start: + // la ra, _abs_start # auipc + addi (8 bytes) + // jr ra # c.jr ra (2 bytes, compressed) + // _abs_start: # offset 10 = 0x8000000a + // + // The assembler uses compressed `c.jr` (2 bytes), placing _abs_start at + // 0x8000000a - valid for C extension but not 4-byte aligned. We could change the start + // file but we leave as-is to document the issue. + // + // With mask 0xfc: 0x8000000a & 0xfc = 0x80000008 (jumps back to `jr ra`!) + // With mask 0xfe: 0x8000000a & 0xfe = 0x8000000a (correct target) + // + // The wrong mask causes an infinite self-loop at the first instruction, + // terminating after 16k steps instead of 1.6B. + // + // Note that this change fixes the misalign2-jalr-01.S test, which is part of the privilege + // architecture test suite but which seeems to test requirements of other parts of the + // spec. + + const JALR_MASK: u64 = 0xfffffffffffffffe; + if (i.imm % 4) == 0 { let mut zib = ZiskInstBuilder::new_from_riscv(rom_address, i.inst.clone()); - zib.src_a("imm", 0xfffffffffffffffc, false); + zib.src_a("imm", JALR_MASK, false); zib.src_b("reg", i.rs1 as u64, false); zib.op("and").unwrap(); zib.set_pc(); - zib.store_ra("reg", i.rd as i64, false); + zib.store_pc("reg", i.rd as i64, false); zib.j(i.imm as i64, inst_size as i64); zib.verbose(&format!("jalr r{}, r{}, 0x{:x}", i.rd, i.rs1, i.imm)); zib.build(); @@ -908,11 +1079,11 @@ impl Riscv2ZiskContext<'_> { } { let mut zib = ZiskInstBuilder::new(rom_address); - zib.src_a("imm", 0xfffffffffffffffc, false); + zib.src_a("imm", JALR_MASK, false); zib.src_b("lastc", 0, false); zib.op("and").unwrap(); zib.set_pc(); - zib.store_ra("reg", i.rd as i64, false); + zib.store_pc("reg", i.rd as i64, false); zib.j(0, inst_size as i64 - 1); zib.verbose(&format!("jalr r{}, r{}, 0x{:x} ; 2/2", i.rd, i.rs1, i.imm)); zib.build(); @@ -930,7 +1101,7 @@ impl Riscv2ZiskContext<'_> { zib.src_a("imm", 0, false); zib.src_b("imm", 0, false); zib.op("flag").unwrap(); - zib.store_ra("reg", i.rd as i64, false); + zib.store_pc("reg", i.rd as i64, false); zib.j(i.imm as i64, inst_size as i64); zib.verbose(&format!("jal r{}, 0x{:x}", i.rd, i.imm)); zib.build(); @@ -943,7 +1114,7 @@ impl Riscv2ZiskContext<'_> { zib.src_a("imm", 0, false); zib.src_b("mem", MTVEC, false); zib.op("copyb").unwrap(); - zib.store_ra("reg", 1, false); + zib.store_pc("reg", 1, false); zib.set_pc(); zib.j(0, 4); zib.verbose("ecall"); @@ -1096,7 +1267,7 @@ impl Riscv2ZiskContext<'_> { /// in integer register rs1 is treated as a bit mask that specifies bit positions to be set in /// the CSR. Any bit that is high in rs1 will cause the corresponding bit to be set in the CSR, /// if that CSR bit is writable. - pub fn csrrs(&mut self, i: &RiscvInstruction) { + pub fn csrrs(&mut self, i: &RiscvInstruction, next_instructions: &[RiscvInstruction]) { let mut rom_address = i.rom_address; if i.rd == i.rs1 { if i.rd == 0 { @@ -1147,15 +1318,38 @@ impl Riscv2ZiskContext<'_> { self.insts.insert(rom_address, zib); } } + } else if i.rd == 0 + && (CSR_DMA_PRECOMPILED_ADDR_START..=CSR_DMA_PRECOMPILED_ADDR_END).contains(&i.csr) + { + assert!(!next_instructions.is_empty()); + // Special "extended" precompiles that could be use jmp_offset1 as extended static parameter that + // was sent to bus when is a precompiles + match i.csr as u16 { + SYSCALL_DMA_MEMCPY_ID | SYSCALL_DMA_MEMCMP_ID => { + self.transpile_dma_memcpy_memcmp_pattern(i, next_instructions); + } + SYSCALL_DMA_INPUTCPY_ID => { + self.transpile_dma_inputcpy_pattern(i, next_instructions); + } + SYSCALL_DMA_MEMSET_ID => { + self.transpile_dma_memset_pattern(i, next_instructions); + } + _ => { + panic!("Invalid CSR 0x{:03X}", i.csr); + } + } } else if i.rd == 0 { let mut zib = ZiskInstBuilder::new_from_riscv(rom_address, i.inst.clone()); zib.src_b("reg", i.rs1 as u64, false); - zib.j(4, 4); + if (CSR_PRECOMPILED_ADDR_START..=CSR_PRECOMPILED_ADDR_END).contains(&i.csr) { - zib.src_a("step", 0, false); let precompiled = CSR_PRECOMPILED[(i.csr - CSR_PRECOMPILED_ADDR_START) as usize]; + zib.src_a("imm", 0, false); zib.op(precompiled).unwrap(); zib.verbose(precompiled); + // NOTE: if precompiles don't use extended static parameter (jmp_offset1), must be set to 0 + // to match with that precompiles proves + zib.j(0, 4); } else if (CSR_FCALL_PARAM_ADDR_START..=CSR_FCALL_PARAM_ADDR_END).contains(&i.csr) { let words = CSR_FCALL_PARAM_OFFSET_TO_WORDS[(i.csr - CSR_FCALL_PARAM_ADDR_START) as usize]; @@ -1165,11 +1359,13 @@ impl Riscv2ZiskContext<'_> { "csrrs 0x{0:X}, rs1={1} => copyb[fcall_param(r{1},{2})]", i.csr, i.rs1, words )); + zib.j(4, 4); } else { zib.src_a("mem", CSR_ADDR + (i.csr * 8) as u64, false); zib.op("or").unwrap(); zib.store("mem", CSR_ADDR as i64 + (i.csr * 8) as i64, false, false); zib.verbose(&format!("{} r{}, 0x{:x}, r{} # rs!=rd=0", i.inst, i.rd, i.csr, i.rs1)); + zib.j(4, 4); } zib.build(); self.insts.insert(rom_address, zib); @@ -1194,12 +1390,12 @@ impl Riscv2ZiskContext<'_> { self.insts.insert(rom_address, zib); } else if i.csr == CSR_PRECOMPILED_ADD256 { let mut zib = ZiskInstBuilder::new_from_riscv(rom_address, i.inst.clone()); - zib.src_a("step", 0, false); + zib.src_a("imm", 0, false); zib.src_b("reg", i.rs1 as u64, false); zib.op("add256").unwrap(); zib.verbose("add256"); zib.store("reg", i.rd as i64, false, false); - zib.j(4, 4); + zib.j(0, 4); zib.build(); self.insts.insert(rom_address, zib); } else { @@ -1476,10 +1672,12 @@ impl Riscv2ZiskContext<'_> { } } */ - pub fn csrrsi(&mut self, i: &RiscvInstruction) { + pub fn csrrsi(&mut self, i: &RiscvInstruction, next_instructions: &[RiscvInstruction]) { let mut rom_address = i.rom_address; if i.rd == 0 { - if i.imme == 0 { + if i.csr == SYSCALL_DMA_MEMSET_ID as u32 { + self.transpile_dma_memset_pattern(i, next_instructions); + } else if i.imme == 0 { let mut zib = ZiskInstBuilder::new_from_riscv(rom_address, i.inst.clone()); zib.src_a("imm", 0, false); zib.src_b("imm", 0, false); @@ -1682,11 +1880,176 @@ impl Riscv2ZiskContext<'_> { self.insts.insert(rom_address, zib); } } + + fn transpile_dma_memset_pattern( + &mut self, + i: &RiscvInstruction, + next_instructions: &[RiscvInstruction], + ) { + if i.imme == 2 { + if next_instructions.len() > 1 + && next_instructions[0].inst == "addi" + && next_instructions[1].inst == "addi" + { + // xmemset transpilation pattern: + // + // csrsi 0x816, 2 ===> xmemset [x0|a0], a0, size, byte ──┐ + // addi x0, reg(dst), size addi x0, reg(dst), size (no-executed) │ jmp+12 + // addi x0, reg(dst), value addi x0, reg(dst), value (no-executed) │ + // .......... .......... <─────────────────────────┘ + + let rs1 = next_instructions[0].rs1; // dst + let rs2 = next_instructions[0].imm; // count + let rd = next_instructions[0].rd; + let fill_byte = next_instructions[1].imm; // fill_byte + assert!((0..=0xFF).contains(&fill_byte)); + self.create_extended_precompiles_op( + i, + "dma_xmemset", + rs1, + rs2 as u64, + rd, + fill_byte as i64, + true, + 12, + ); + } else { + let next_0 = next_instructions.first().map(|inst| inst.inst.as_str()).unwrap_or(""); + let next_1 = next_instructions.get(1).map(|inst| inst.inst.as_str()).unwrap_or(""); + panic!( + "Invalid use of CSR (0x{:03X}) at address 0x{:08x}, must be used as xmemset with two \ + consecutive addi (next[0]:{} next[1]:{})", + i.csr, i.rom_address, next_0, next_1); + } + } else if i.imme == 0 { + if !next_instructions.is_empty() && next_instructions[0].inst == "addi" { + // xmemset transpilation pattern: + // + // csrs 0x816, reg(dst) ===> xmemset [x0|a0], a0, reg(count), byte ─┐ + // addi x0, reg(cout), byte addi x0, reg(dst), byte (no-executed) │ jmp+8 + // .......... .......... <─────────────────────────────┘ + + let rs1 = i.rs1; // dst + let rs2 = next_instructions[0].rs1; // count + let rd = next_instructions[0].rd; + let fill_byte = next_instructions[0].imm; // byte (fill_byte) + assert!((0..=0xFF).contains(&fill_byte)); + self.create_extended_precompiles_op( + i, + "dma_xmemset", + rs1, + rs2 as u64, + rd, + fill_byte as i64, + false, + 8, + ); + } else { + let next_0 = next_instructions.first().map(|inst| inst.inst.as_str()).unwrap_or(""); + panic!( + "Invalid use of CSR (0x{:03X}) at address 0x{:08x}, must be used as xmemset with a \ + consecutive addi (next[0]:{})", + i.csr, i.rom_address, next_0 + ); + } + } + } + + fn transpile_dma_memcpy_memcmp_pattern( + &mut self, + i: &RiscvInstruction, + next_instructions: &[RiscvInstruction], + ) { + if i.imme == 0 && !next_instructions.is_empty() { + if next_instructions[0].inst == "add" { + // memcpy/memcmp transpilation pattern: + // + // csrs 0x81x, reg(src) ===> sd reg(count), [EXTRA_PARAM] + // addi rd, reg(dst), reg(count) memcxx rd, reg(dst), reg(src) + // .......... .......... + + self.create_set_precompiles_param_op(i, next_instructions[0].rs2, 4); + return; + } + if next_instructions[0].inst == "addi" { + // memcpy/memcmp transpilation pattern: + // + // csrs 0x81x, reg(src) ===> memcxx rd, reg(dst), reg(src), count ─┐ + // addi rd, reg(dst), count addi rd, reg(dst), count │ jmp+8 + // .......... .......... <────────────────────────┘ + let rs1 = i.rs1; + let rs2 = next_instructions[0].rs1; + let rd = next_instructions[0].rd; + let count = next_instructions[0].imm as i64; // count + let op = if i.csr == SYSCALL_DMA_MEMCPY_ID as u32 { + "dma_xmemcpy" + } else { + "dma_xmemcmp" + }; + self.create_extended_precompiles_op(i, op, rs1, rs2 as u64, rd, count, false, 8); + return; + } + } + let next_0 = next_instructions.first().map(|inst| inst.inst.as_str()).unwrap_or(""); + panic!( + "Invalid use of CSR (0x{:03X}) at address 0x{:08x}, must be used as memcpy/memcmp with a \ + consecutive addi (next[0]:{})", + i.csr, i.rom_address, next_0 + ); + } + fn transpile_dma_inputcpy_pattern( + &mut self, + i: &RiscvInstruction, + next_instructions: &[RiscvInstruction], + ) { + if i.imme == 0 && !next_instructions.is_empty() { + if next_instructions[0].inst == "add" { + // inputcpy transpilation pattern: + // + // csrs 0x815, reg(count) ===> inputcpy rd, reg(dst), reg(count) ─┐ + // add rd, reg(dst), reg(count) addi rd, reg(dst), reg(count) │ jmp+8 + // .......... .......... <─────────────────────┘ + let rs1 = next_instructions[0].rs1; + let rs2 = next_instructions[0].rs2; + let rd = next_instructions[0].rd; + self.create_extended_precompiles_op( + i, + "dma_inputcpy", + rs1, + rs2 as u64, + rd, + 0, + false, + 8, + ); + return; + } + if next_instructions[0].inst == "addi" { + // inputcpy transpilation pattern: + // + // csrs 0x815, reg(dst) ===> inputcpy rd, reg(dst), count ────┐ + // addi rd, reg(dst), count addi rd, reg(dst), count │ jmp+8 + // .......... .......... <───────────────────┘ + let rs1 = next_instructions[0].rs1; + let imm2 = next_instructions[0].imm as u64; + let rd = next_instructions[0].rd; + self.create_extended_precompiles_op(i, "dma_inputcpy", rs1, imm2, rd, 0, true, 8); + return; + } + } + let next_0 = next_instructions.first().map(|inst| inst.inst.as_str()).unwrap_or(""); + panic!( + "Invalid use of CSR (0x{:03X}) at address 0x{:08x}, must be used as inputcpy with a \ + consecutive addi (next[0]:{})", + i.csr, i.rom_address, next_0 + ); + } } // impl Riscv2ZiskContext /// Converts a buffer with RISC-V data into a vector of Zisk instructions, using the /// Riscv2ZiskContext to perform the instruction transpilation -pub fn add_zisk_code(rom: &mut ZiskRom, addr: u64, data: &[u8]) { +/// dma_addrs: (memcpy, memcmp, memset, memmove) addresses, 0 if not present +pub fn add_zisk_code(rom: &mut ZiskRom, addr: u64, data: &[u8], _dma_addrs: (u64, u64, u64, u64)) { //print!("add_zisk_code() addr={}\n", addr); // Convert input data to a u32 vector @@ -1696,15 +2059,46 @@ pub fn add_zisk_code(rom: &mut ZiskRom, addr: u64, data: &[u8]) { let riscv_instructions = riscv_interpreter(addr, &code_vector); // Create a context to convert RISCV instructions to ZisK instructions, using rom.insts - let mut ctx = Riscv2ZiskContext { insts: &mut rom.insts }; + let mut ctx = Riscv2ZiskContext { + insts: &mut rom.insts, + input_precompile: None, + output_precompile: None, + input_precompile_reg: None, + output_precompile_reg: None, + }; // For all RISCV instructions - for riscv_instruction in riscv_instructions { + for (i, riscv_instruction) in riscv_instructions.iter().enumerate() { //print!("add_zisk_code() converting RISCV instruction={}\n", // riscv_instruction.to_string()); + // if riscv_instructions[i].rom_address >= 0x80267b28 + // && riscv_instructions[i].rom_address <= 0x80267b30 + // { + // if let Some(zisk_memcmp_index) = zisk_memcmp_index { + // // Get slice of remaining instructions after current one + // let index_offset = (riscv_instructions[i].rom_address - 0x80267b28) as usize >> 2; + // let next_instructions = + // &riscv_instructions[(zisk_memcmp_index + index_offset + 1)..]; + + // let mut instruction = riscv_instructions[zisk_memcmp_index + index_offset].clone(); + // instruction.rom_address = riscv_instructions[i].rom_address; + + // // Convert RICV instruction to ZisK instruction and store it in rom.insts + // ctx.convert(&instruction, next_instructions); + // continue; + // //print!(" to: {}", ctx.insts.iter().last().) + // } + // } + + // Get slice of remaining instructions after current one + let next_instructions = &riscv_instructions[(i + 1)..]; // Convert RICV instruction to ZisK instruction and store it in rom.insts - ctx.convert(&riscv_instruction); + ctx.input_precompile = ctx.output_precompile; + ctx.output_precompile = None; + ctx.input_precompile_reg = ctx.output_precompile_reg; + ctx.output_precompile_reg = None; + ctx.convert(riscv_instruction, next_instructions); //print!(" to: {}", ctx.insts.iter().last().) } } @@ -1901,7 +2295,7 @@ pub fn add_entry_exit_jmp(rom: &mut ZiskRom, addr: u64) { zib.src_b("imm", addr, false); zib.op("copyb").unwrap(); zib.set_pc(); - zib.store_ra("reg", 1, false); + zib.store_pc("reg", 1, false); zib.j(0, 4); zib.verbose(&format!("CALL to entry: 0x{addr:08x}")); zib.build(); diff --git a/core/src/zisk_definitions.rs b/core/src/zisk_definitions.rs index 02bf67b3a..2c232d29b 100644 --- a/core/src/zisk_definitions.rs +++ b/core/src/zisk_definitions.rs @@ -1,7 +1,7 @@ //! This module contains constant definitions used by other modules and crates. -pub const DEFAULT_MAX_STEPS: u64 = 0xffffffff; -pub const DEFAULT_MAX_STEPS_STR: &str = "4294967295"; // 2^32 - 1 +pub const DEFAULT_MAX_STEPS: u64 = 0xF_FFFF_FFFF; +pub const DEFAULT_MAX_STEPS_STR: &str = "68719476735"; // 2^36 - 1 pub const CHUNK_SIZE_BITS: usize = 18; pub const CHUNK_SIZE: u64 = 1 << CHUNK_SIZE_BITS; diff --git a/core/src/zisk_inst.rs b/core/src/zisk_inst.rs index d88c9e4cf..3fc6c54e2 100644 --- a/core/src/zisk_inst.rs +++ b/core/src/zisk_inst.rs @@ -78,10 +78,13 @@ pub enum ZiskOperationType { BinaryE, Keccak, Sha256, + Poseidon2, + Blake2, PubOut, ArithEq, ArithEq384, BigInt, // Note: Add new core operations here + Dma, // Note: To add extra params to precompiles calls // ZisK Free Input Operations FcallParam, Fcall, @@ -95,13 +98,15 @@ pub const BINARY_OP_TYPE_ID: u32 = ZiskOperationType::Binary as u32; pub const BINARY_E_OP_TYPE_ID: u32 = ZiskOperationType::BinaryE as u32; pub const KECCAK_OP_TYPE_ID: u32 = ZiskOperationType::Keccak as u32; pub const SHA256_OP_TYPE_ID: u32 = ZiskOperationType::Sha256 as u32; +pub const POSEIDON2_OP_TYPE_ID: u32 = ZiskOperationType::Poseidon2 as u32; pub const PUB_OUT_OP_TYPE_ID: u32 = ZiskOperationType::PubOut as u32; pub const ARITH_EQ_OP_TYPE_ID: u32 = ZiskOperationType::ArithEq as u32; pub const ARITH_EQ_384_OP_TYPE_ID: u32 = ZiskOperationType::ArithEq384 as u32; pub const BIG_INT_OP_TYPE_ID: u32 = ZiskOperationType::BigInt as u32; pub const FCALL_PARAM_OP_TYPE_ID: u32 = ZiskOperationType::FcallParam as u32; pub const FCALL_OP_TYPE_ID: u32 = ZiskOperationType::Fcall as u32; -pub const FCALL_GET_OP_TYPE_ID: u32 = ZiskOperationType::FcallGet as u32; +pub const DMA_OP_TYPE_ID: u32 = ZiskOperationType::Dma as u32; +pub const BLAKE2_OP_TYPE_ID: u32 = ZiskOperationType::Blake2 as u32; /// ZisK instruction definition /// @@ -113,11 +118,12 @@ pub const FCALL_GET_OP_TYPE_ID: u32 = ZiskOperationType::FcallGet as u32; #[derive(Debug, Clone)] pub struct ZiskInst { pub paddr: u64, - pub store_ra: bool, + pub store_pc: bool, pub store_use_sp: bool, pub store: u64, pub store_offset: i64, pub set_pc: bool, + pub is_precompiled: bool, // #[cfg(feature = "sp")] // pub set_sp: bool, pub ind_width: u64, @@ -150,11 +156,12 @@ impl Default for ZiskInst { fn default() -> Self { Self { paddr: 0, - store_ra: false, + store_pc: false, store_use_sp: false, store: 0, store_offset: 0, set_pc: false, + is_precompiled: false, // #[cfg(feature = "sp")] // set_sp: false, ind_width: 0, @@ -220,8 +227,8 @@ impl ZiskInst { if self.store_offset != 0 { s += &format!(" store_offset=0x{:x}", self.store_offset as u64); } - if self.store_ra { - s += &format!(" store_ra={}", self.store_ra); + if self.store_pc { + s += &format!(" store_pc={}", self.store_pc); } if self.store_use_sp { s += &format!(" store_use_sp={}", self.store_use_sp); @@ -229,6 +236,9 @@ impl ZiskInst { if self.set_pc { s += &format!(" set_pc={}", self.set_pc); } + if self.is_precompiled { + s += &format!(" op_with_step={}", self.is_precompiled); + } if self.jmp_offset1 != 0 { s += &format!(" jmp_offset1={}", self.jmp_offset1); } @@ -261,11 +271,11 @@ impl ZiskInst { let flags: u64 = 1 | (((self.a_src == SRC_IMM) as u64) << 1) | (((self.a_src == SRC_MEM) as u64) << 2) - | (((self.a_src == SRC_STEP) as u64) << 3) + | ((self.is_precompiled as u64) << 3) | (((self.b_src == SRC_IMM) as u64) << 4) | (((self.b_src == SRC_MEM) as u64) << 5) | ((self.is_external_op as u64) << 6) - | ((self.store_ra as u64) << 7) + | ((self.store_pc as u64) << 7) | (((self.store == STORE_MEM) as u64) << 8) | (((self.store == STORE_IND) as u64) << 9) | ((self.set_pc as u64) << 10) diff --git a/core/src/zisk_inst_builder.rs b/core/src/zisk_inst_builder.rs index 2d6edbc3d..30a525109 100644 --- a/core/src/zisk_inst_builder.rs +++ b/core/src/zisk_inst_builder.rs @@ -5,7 +5,7 @@ use crate::{ zisk_ops::{InvalidNameError, OpType, ZiskOp}, ZiskInst, REGS_IN_MAIN_FROM, REGS_IN_MAIN_TO, REG_FIRST, SRC_C, SRC_IMM, SRC_IND, SRC_MEM, - SRC_REG, SRC_STEP, STORE_IND, STORE_MEM, STORE_NONE, STORE_REG, + SRC_REG, STORE_IND, STORE_MEM, STORE_NONE, STORE_REG, }; // #[cfg(feature = "sp")] @@ -44,7 +44,7 @@ impl ZiskInstBuilder { "lastc" => SRC_C, // #[cfg(feature = "sp")] // "sp" => SRC_SP, - "step" => SRC_STEP, + // "step" => SRC_STEP, _ => panic!("ZiskInstBuilder::a_src() called with invalid src={src}"), } } @@ -158,7 +158,7 @@ impl ZiskInstBuilder { } /// Sets the c store instruction attributes - pub fn store(&mut self, dst_input: &str, offset_input: i64, use_sp: bool, store_ra: bool) { + pub fn store(&mut self, dst_input: &str, offset_input: i64, use_sp: bool, store_pc: bool) { let mut dst = dst_input; let mut offset = offset_input; if dst == "reg" { @@ -170,7 +170,7 @@ impl ZiskInstBuilder { } } - self.i.store_ra = store_ra; + self.i.store_pc = store_pc; self.i.store = self.c_store(dst); if self.i.store == STORE_REG || self.i.store == STORE_MEM || self.i.store == STORE_IND { @@ -183,7 +183,7 @@ impl ZiskInstBuilder { } /// Set the store as a store ra - pub fn store_ra(&mut self, dst: &str, offset: i64, use_sp: bool) { + pub fn store_pc(&mut self, dst: &str, offset: i64, use_sp: bool) { self.store(dst, offset, use_sp, true); } @@ -207,6 +207,8 @@ impl ZiskInstBuilder { self.i.func = op.get_call_function(); self.i.op_type = op.op_type().into(); self.i.input_size = op.input_size(); + // assume that input_size > 0 implies a precompiled, and precompiled uses step on operations + self.i.is_precompiled = op.input_size() > 0; Ok(()) } diff --git a/core/src/zisk_ops.rs b/core/src/zisk_ops.rs index e11089778..8c093ec36 100644 --- a/core/src/zisk_ops.rs +++ b/core/src/zisk_ops.rs @@ -9,8 +9,15 @@ #![allow(unused)] -use ziskos::zisklib::fcall_proxy; +use precompiles_helpers::DmaInfo; +use ziskos_hints::zisklib::fcall_proxy; +use crate::{ + blake2br, sha256f, EmulationMode, InstContext, Mem, ZiskOperationType, ZiskRequiredOperation, + EXTRA_PARAMS_ADDR, INPUT_ADDR, M64, MAX_INPUT_SIZE, REG_A0, SYS_ADDR, +}; +use fields::{poseidon2_hash, Goldilocks, Poseidon16, PrimeField64}; +use paste::paste; use std::{ collections::HashMap, fmt::{Debug, Display}, @@ -18,11 +25,7 @@ use std::{ str::FromStr, }; use tiny_keccak::keccakf; - -use crate::{ - sha256f, EmulationMode, InstContext, Mem, ZiskOperationType, ZiskRequiredOperation, M64, - REG_A0, SYS_ADDR, -}; +use ziskos_hints::zisklib::FCALL_INPUT_READY_ID; use lib_c::{inverse_fn_ec_c, inverse_fp_ec_c, sqrt_fp_ec_parity_c, Fcall, FcallContext}; @@ -46,11 +49,14 @@ pub enum OpType { BinaryE, Keccak, Sha256, + Poseidon2, PubOut, ArithEq, Fcall, ArithEq384, BigInt, + Dma, + Blake2, } impl From for ZiskOperationType { @@ -62,11 +68,14 @@ impl From for ZiskOperationType { OpType::BinaryE => ZiskOperationType::BinaryE, OpType::Keccak => ZiskOperationType::Keccak, OpType::Sha256 => ZiskOperationType::Sha256, + OpType::Poseidon2 => ZiskOperationType::Poseidon2, OpType::PubOut => ZiskOperationType::PubOut, OpType::ArithEq => ZiskOperationType::ArithEq, OpType::Fcall => ZiskOperationType::Fcall, OpType::ArithEq384 => ZiskOperationType::ArithEq384, OpType::BigInt => ZiskOperationType::BigInt, + OpType::Dma => ZiskOperationType::Dma, + OpType::Blake2 => ZiskOperationType::Blake2, } } } @@ -82,11 +91,14 @@ impl Display for OpType { Self::BinaryE => write!(f, "BinaryE"), Self::Keccak => write!(f, "Keccak"), Self::Sha256 => write!(f, "Sha256"), + Self::Poseidon2 => write!(f, "Poseidon2"), Self::PubOut => write!(f, "PubOut"), Self::ArithEq => write!(f, "Arith256"), Self::Fcall => write!(f, "Fcall"), Self::ArithEq384 => write!(f, "Arith384"), Self::BigInt => write!(f, "BigInt"), + Self::Dma => write!(f, "Dma"), + Self::Blake2 => write!(f, "Blake2"), } } } @@ -104,10 +116,13 @@ impl FromStr for OpType { "be" => Ok(Self::BinaryE), "k" => Ok(Self::Keccak), "s" => Ok(Self::Sha256), + "p" => Ok(Self::Poseidon2), "aeq" => Ok(Self::ArithEq), "fcall" => Ok(Self::Fcall), "aeq384" => Ok(Self::ArithEq384), "bint" => Ok(Self::BigInt), + "dma" => Ok(Self::Dma), + "bl" => Ok(Self::Blake2), _ => Err(InvalidOpTypeError), } } @@ -143,6 +158,7 @@ impl Display for InvalidCodeError { pub trait OpStats { fn mem_align_read(&mut self, addr: u64, count: usize); fn mem_align_write(&mut self, addr: u64, count: usize); + fn add_extras(&mut self, extras: &[(u8, usize)]); } /// Stats gathering function that does nothing (used as default) @@ -151,6 +167,21 @@ pub fn ops_none(_ctx: &InstContext, _stats: &mut dyn OpStats) { // No-op implementation } +#[inline(always)] +pub fn opc_virtual(ctx: &mut InstContext) { + unimplemented!("opc_virtual: virtual operation") +} + +#[inline(always)] +pub fn op_virtual(a: u64, b: u64) -> (u64, bool) { + unimplemented!("op_virtual: virtual operation") +} + +#[inline(always)] +pub fn ops_virtual(_ctx: &InstContext, _stats: &mut dyn OpStats) { + unimplemented!("ops_virtual: virtual operation") +} + /// Internal macro used to define all ops in the [`ZiskOp`] enum macro_rules! define_ops { ( $( ($name:ident, $str_name:expr, $type:ident, $steps:expr, $code:expr, $input_size:expr, $output_size:expr, $call_fn:ident, $call_ab_fn:ident, $call_stats_fn:ident ) ),* $(,)? ) => { @@ -167,6 +198,11 @@ macro_rules! define_ops { } impl ZiskOp { + $( + paste! { + pub const [<$str_name:upper>]: u8 = $code; + } + )* /// Returns the (string) name of the operation pub const fn name(&self) -> &'static str { match self { @@ -309,19 +345,34 @@ macro_rules! define_ops { }; } +const DMA_64_ALIGNED_OPS_BY_ROW: usize = 4; + // Cost definitions: Area x Op const INTERNAL_COST: u64 = 0; -const BINARY_COST: u64 = 75; -const BINARY_ADD_COST: u64 = 26; -const BINARY_E_COST: u64 = 54; +const BINARY_COST: u64 = 60; +const BINARY_ADD_COST: u64 = 25; +const BINARY_E_COST: u64 = 53; const ARITHA32_COST: u64 = 95; const ARITHAM32_COST: u64 = 95; -const KECCAK_COST: u64 = (((93846 * 86) - 1) / 63) + 1; +const KECCAK_COST: u64 = 25 * 3022; const SHA256_COST: u64 = 72 * 121; -const ARITH_EQ_COST: u64 = 85 * 16; +const POSEIDON2_COST: u64 = 14 * 75; +const ARITH_EQ_COST: u64 = 89 * 16; const FCALL_COST: u64 = INTERNAL_COST; const ARITH_EQ_384_COST: u64 = 79 * 24; const ADD256_COST: u64 = 104; +const DMA_COST: u64 = 42; +const BLAKE2_COST: u64 = 24 * 205; + +const DMA_64_ALIGNED_COST: u64 = 40; +const DMA_UNALIGNED_COST: u64 = 42; +const DMA_PRE_POST_COST: u64 = 88; + +// const OP_DMA_64_ALIGNED: u8 = 0xda; +// const OP_DMA_UNALIGNED: u8 = 0xdb; +// const OP_DMA_PRE: u8 = 0xdc; +// const OP_DMA_POST: u8 = 0xdd; +// const OP_DMA_CMP_BYTE: u8 = 0xde; /// Table of Zisk opcode definitions: enum, name, type, cost, code and implementation functions /// This table is the backbone of the Zisk processor, it determines what functionality is supported, @@ -384,6 +435,20 @@ define_ops! { (RemuW, "remu_w", ArithA32, ARITHA32_COST, 0xbd, 0, 0, opc_remu_w, op_remu_w, ops_none), (DivW, "div_w", ArithA32, ARITHA32_COST, 0xbe, 0, 0, opc_div_w, op_div_w, ops_none), (RemW, "rem_w", ArithA32, ARITHA32_COST, 0xbf, 0, 0, opc_rem_w, op_rem_w, ops_none), + // opcpdes 0xc0-0xcf are available + (DmaMemCpy, "dma_memcpy", Dma, DMA_COST, 0xd0, 8, 0, opc_dma_memcpy, op_dma_memcpy, ops_dma_memcpy), + (DmaMemCmp, "dma_memcmp", Dma, DMA_COST, 0xd1, 16, 0, opc_dma_memcmp, op_dma_memcmp, ops_dma_memcmp), + (DmaInputCpy, "dma_inputcpy", Dma, DMA_COST, 0xd2, 8, 0, opc_dma_inputcpy, op_dma_inputcpy, ops_dma_inputcpy), + (DmaXMemCpy, "dma_xmemcpy", Dma, DMA_COST, 0xd6, 8, 0, opc_dma_xmemcpy, op_dma_xmemcpy, ops_dma_xmemcpy), + (DmaXMemCmp, "dma_xmemcmp", Dma, DMA_COST, 0xd7, 16, 0, opc_dma_xmemcmp, op_dma_xmemcmp, ops_dma_xmemcmp), + (DmaXMemSet, "dma_xmemset", Dma, DMA_COST, 0xd9, 8, 0, opc_dma_xmemset, op_dma_xmemset, ops_dma_xmemset), + // opcodes 0xd2-0xd9 future reserved for dma operations (memset, memcpy256, memcmp256) + (Dma64Aligned, "_dma_64_aligned", Dma, DMA_64_ALIGNED_COST, 0xda, 8, 0, opc_virtual, op_virtual, ops_virtual), + (DmaUnaligned, "_dma_unaligned", Dma, DMA_UNALIGNED_COST, 0xdb, 8, 0, opc_virtual, op_virtual, ops_virtual), + (DmaPre, "_dma_pre", Dma, DMA_PRE_POST_COST, 0xdc, 8, 0, opc_virtual, op_virtual, ops_virtual), + (DmaPost, "_dma_post", Dma, DMA_PRE_POST_COST, 0xdd, 8, 0, opc_virtual, op_virtual, ops_virtual), + // opcodes 0xda-0xdf reserved for dma extra operations (costs) + // opcodes 0xe0 is available (Arith384Mod, "arith384_mod", ArithEq384, ARITH_EQ_384_COST, 0xe2, 232, 48, opc_arith384_mod, op_arith384_mod, ops_arith384_mod), (Bls12_381CurveAdd, "bls12_381_curve_add", ArithEq384, ARITH_EQ_384_COST, 0xe3, 208, 96, opc_bls12_381_curve_add, op_bls12_381_curve_add, ops_bls12_381_curve_add), (Bls12_381CurveDbl, "bls12_381_curve_dbl", ArithEq384, ARITH_EQ_384_COST, 0xe4, 96, 96, opc_bls12_381_curve_dbl, op_bls12_381_curve_dbl, ops_bls12_381_curve_dbl), @@ -395,11 +460,15 @@ define_ops! { (Arith256, "arith256", ArithEq, ARITH_EQ_COST, 0xf2, 136, 64, opc_arith256, op_arith256, ops_arith256), (Arith256Mod, "arith256_mod", ArithEq, ARITH_EQ_COST, 0xf3, 168, 32, opc_arith256_mod, op_arith256_mod, ops_arith256_mod), (Secp256k1Add, "secp256k1_add", ArithEq, ARITH_EQ_COST, 0xf4, 144, 64, opc_secp256k1_add, op_secp256k1_add, ops_secp256k1_add), - (Secp256k1Dbl, "secp256k1_dbl", ArithEq, ARITH_EQ_COST, 0xf5, 64, 64, opc_secp256k1_dbl, op_secp256k1_add, ops_secp256k1_dbl), + (Secp256k1Dbl, "secp256k1_dbl", ArithEq, ARITH_EQ_COST, 0xf5, 64, 64, opc_secp256k1_dbl, op_secp256k1_dbl, ops_secp256k1_dbl), + (Secp256r1Add, "secp256r1_add", ArithEq, ARITH_EQ_COST, 0xe8, 144, 64, opc_secp256r1_add, op_secp256r1_add, ops_secp256r1_add), + (Secp256r1Dbl, "secp256r1_dbl", ArithEq, ARITH_EQ_COST, 0xe9, 64, 64, opc_secp256r1_dbl, op_secp256r1_dbl, ops_secp256r1_dbl), + (Blake2, "blake2", Blake2, BLAKE2_COST, 0xea, 280 , 128, opc_blake2, op_blake2, ops_blake2), (FcallParam, "fcall_param", Fcall, FCALL_COST, 0xf6, 0, 0, opc_fcall_param, op_fcall_param, ops_none), (Fcall, "fcall", Fcall, FCALL_COST, 0xf7, 0, 0, opc_fcall, op_fcall, ops_none), (FcallGet, "fcall_get", Fcall, FCALL_COST, 0xf8, 0, 0, opc_fcall_get, op_fcall_get, ops_none), (Sha256, "sha256", Sha256, SHA256_COST, 0xf9, 112, 112, opc_sha256, op_sha256, ops_sha256), + (Poseidon2, "poseidon2", Poseidon2, POSEIDON2_COST, 0xe1, 128, 128, opc_poseidon2, op_poseidon2, ops_poseidon2), (Bn254CurveAdd, "bn254_curve_add", ArithEq, ARITH_EQ_COST, 0xfa, 144, 64, opc_bn254_curve_add, op_bn254_curve_add, ops_bn254_curve_add), (Bn254CurveDbl, "bn254_curve_dbl", ArithEq, ARITH_EQ_COST, 0xfb, 64, 64, opc_bn254_curve_dbl, op_bn254_curve_dbl, ops_bn254_curve_dbl), (Bn254ComplexAdd, "bn254_complex_add", ArithEq, ARITH_EQ_COST, 0xfc, 144, 64, opc_bn254_complex_add, op_bn254_complex_add, ops_bn254_complex_add), @@ -1259,7 +1328,7 @@ pub fn opc_sha256(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 4 + 4; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 4, 4, &mut data, "sha256"); + precompiled_load_data(ctx, 2, 2, 4, 4, None, &mut data, "sha256"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // Get the state and input slices @@ -1292,6 +1361,142 @@ pub fn ops_sha256(ctx: &InstContext, stats: &mut dyn OpStats) { precompiled_stats_data(ctx, stats, &[4, 8], &[], 1); } +/// Performs a Poseidon2 hash over a 16 elements stored in memory at the address +/// specified by register A0, and stores the output state in the same memory address +#[inline(always)] +pub fn opc_poseidon2(ctx: &mut InstContext) { + // Get address from b (a = step) + let address = ctx.b; + if address & 0x7 != 0 { + panic!("opc_poseidon2() found address not aligned to 8 bytes"); + } + + // Allocate room for 16 u64 = 128 bytes = 1024 bits + const WORDS: usize = 16; + let mut data = [0u64; WORDS]; + + // Get input data from memory or from the precompiled context + match ctx.emulation_mode { + EmulationMode::Mem => { + // Read data from the memory address + for (i, d) in data.iter_mut().enumerate() { + *d = ctx.mem.read(address + (8 * i as u64), 8); + } + + // Call poseidon2 + let data_gl = data.map(Goldilocks::new); + let res_gl = poseidon2_hash::(&data_gl); + for (i, d) in data.iter_mut().enumerate() { + *d = res_gl[i].as_canonical_u64(); + } + + // Write data to the memory address + for (i, d) in data.iter().enumerate() { + ctx.mem.write(address + (8 * i as u64), *d, 8); + } + } + EmulationMode::GenerateMemReads => { + // Read data from the memory address + for (i, d) in data.iter_mut().enumerate() { + *d = ctx.mem.read(address + (8 * i as u64), 8); + } + + // Copy data to the precompiled context + ctx.precompiled.input_data.clear(); + for (i, d) in data.iter_mut().enumerate() { + ctx.precompiled.input_data.push(*d); + } + + // Call poseidon2 + let data_gl = data.map(Goldilocks::new); + let res_gl = poseidon2_hash::(&data_gl); + for (i, d) in data.iter_mut().enumerate() { + *d = res_gl[i].as_canonical_u64(); + } + + // Write data to the memory address + for (i, d) in data.iter().enumerate() { + ctx.mem.write(address + (8 * i as u64), *d, 8); + } + + // Write data to the precompiled context + ctx.precompiled.output_data.clear(); + for (i, d) in data.iter_mut().enumerate() { + ctx.precompiled.output_data.push(*d); + } + } + EmulationMode::ConsumeMemReads => { + // Check input data has the expected length + if ctx.precompiled.input_data.len() != WORDS { + panic!( + "opc_poseidon2() found ctx.precompiled.input_data.len={} != {}", + ctx.precompiled.input_data.len(), + WORDS + ); + } + } + } + + ctx.c = 0; + ctx.flag = false; +} + +/// Unimplemented. Poseidon2 can only be called from the system call context via InstContext. +/// This is provided just for completeness. +#[inline(always)] +pub fn op_poseidon2(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_poseidon2() is not implemented"); +} + +#[inline(always)] +pub fn ops_poseidon2(ctx: &InstContext, stats: &mut dyn OpStats) { + precompiled_stats_direct_data(ctx, stats, 16, 16); +} + +#[inline(always)] +pub fn opc_blake2(ctx: &mut InstContext) { + const WORDS: usize = 3 + 2 * 16; // index,addr_state,addr_input,state[16],input[16] + let mut data = [0u64; WORDS]; + + precompiled_load_data(ctx, 3, 2, 16, 0, Some(0), &mut data, "blake2"); + + if ctx.emulation_mode != EmulationMode::ConsumeMemReads { + // Get the state and input slices + // 0 - index + // 1 - addr_state + // 2 - addr_input + let index = data[0]; + let (params, rest) = data.split_at_mut(3); + let (state_slice, input_slice) = rest.split_at_mut(16); + let state: &mut [u64; 16] = state_slice.try_into().unwrap(); + let input: &[u64; 16] = input_slice[..16].try_into().unwrap(); + + // Compute the blake2br output with the fastest implementation available + blake2br(index, state, input); + + let state_addr = params[1]; + for (i, d) in state.iter().enumerate() { + ctx.mem.write(state_addr + (8 * i as u64), *d, 8); + } + } + + ctx.c = 0; + ctx.flag = false; +} + +/// Unimplemented. Blake2 can only be called from the system call context via InstContext. +/// This is provided just for completeness. +#[inline(always)] +pub fn op_blake2(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_blake2() is not implemented"); +} + +#[inline(always)] +pub fn ops_blake2(ctx: &InstContext, stats: &mut dyn OpStats) { + precompiled_stats_data(ctx, stats, &[4, 8], &[], 1); +} + +#[allow(clippy::too_many_arguments)] #[inline(always)] pub fn precompiled_load_data( ctx: &mut InstContext, @@ -1299,6 +1504,7 @@ pub fn precompiled_load_data( load_indirections: usize, load_chunks: usize, load_rem: usize, + direct_load_param_idx: Option, data: &mut [u64], title: &str, ) { @@ -1309,11 +1515,13 @@ pub fn precompiled_load_data( load_chunks, load_rem, 0, + direct_load_param_idx, data, title, ); } +#[allow(clippy::too_many_arguments)] #[inline(always)] pub fn precompiled_load_data_with_result( ctx: &mut InstContext, @@ -1321,6 +1529,7 @@ pub fn precompiled_load_data_with_result( load_indirections: usize, load_chunks: usize, load_rem: usize, + direct_load_param_idx: Option, data: &mut [u64], title: &str, ) { @@ -1331,6 +1540,7 @@ pub fn precompiled_load_data_with_result( load_chunks, load_rem, 1, + direct_load_param_idx, data, title, ); @@ -1345,20 +1555,26 @@ fn internal_precompiled_load_data( load_chunks: usize, load_rem: usize, result: usize, + direct_load_param_idx: Option, // Index of the load parameters that isn't an indirection data: &mut [u64], title: &str, ) { let address = ctx.b; if address & 0x7 != 0 { - panic!("precompiled_check_address() found address not aligned to 8 bytes"); + panic!( + "[{title}] precompiled_check_address() found address 0x{address:08X} not aligned \ + to 8 bytes at PC:0x{:08X} STEP:{}", + ctx.pc, ctx.step + ); } if let EmulationMode::ConsumeMemReads = ctx.emulation_mode { // Check input data has the expected length let expected_len = params_count + load_indirections * load_chunks + load_rem + result; if ctx.precompiled.input_data.len() != expected_len { panic!( - "[{title}] ctx.precompiled.input_data.len={} != {expected_len} [{params_count}+{load_indirections}*{load_chunks}+{load_rem}+{result}]", - ctx.precompiled.input_data.len(), + "[{title}] ctx.precompiled.input_data.len={} != {expected_len} \ + [{params_count}+{load_indirections}*{load_chunks}+{load_rem}+{result}] at PC:0x{:08X} STEP:{}", + ctx.precompiled.input_data.len(), ctx.pc, ctx.step, ); } // Read data from the precompiled context @@ -1370,20 +1586,36 @@ fn internal_precompiled_load_data( return; } - // Write the indirections to data + // Write the indirections/direct_params to data for (i, data) in data.iter_mut().enumerate().take(params_count) { let indirection = ctx.mem.read(address + (8 * i as u64), 8); - if address & 0x7 != 0 { - panic!("precompiled_check_address() found address[{i}] not aligned to 8 bytes"); + if indirection & 0x7 != 0 && Some(i) != direct_load_param_idx { + panic!( + "[{title}] precompiled_check_address() found address_{i} [0x{address:08X}]=0x{indirection:08X} \ + not aligned to 8 bytes at PC:0x{:08X} STEP:{}", + ctx.pc, ctx.step + ); } *data = indirection; } + // Write the data let mut data_offset = params_count; for i in 0..load_indirections { + let param_idx = if let Some(direct_idx) = direct_load_param_idx { + if i >= direct_idx { + i + 1 + } else { + i + } + } else { + i + }; + let data_offset = i * load_chunks + data_offset; // if there aren't indirections, take directly from the address - let param_address = if params_count == 0 { address + data_offset as u64 } else { data[i] }; + let param_address = + if params_count == 0 { address + data_offset as u64 } else { data[param_idx] }; for j in 0..load_chunks { let addr = param_address + (8 * j as u64); data[data_offset + j] = ctx.mem.read(addr, 8); @@ -1461,7 +1693,7 @@ pub fn opc_add256(ctx: &mut InstContext) { const WORDS: usize = 4 + 1 + 2 * 4; let mut data = [0u64; WORDS]; - precompiled_load_data_with_result(ctx, 4, 2, 4, 0, &mut data, "add256"); + precompiled_load_data_with_result(ctx, 4, 2, 4, 0, Some(2), &mut data, "add256"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 3 indirections @@ -1487,11 +1719,11 @@ pub fn opc_add256(ctx: &mut InstContext) { ctx.precompiled.input_data[4 + 2 * 4] = cout; } ctx.c = cout; - ctx.flag = cout != 0; + ctx.flag = false; } else { assert!(data[4 + 2 * 4] <= 1, "opc_add256: cout > 1"); ctx.c = data[4 + 2 * 4]; - ctx.flag = data[4 + 2 * 4] != 0; + ctx.flag = false; } } @@ -1512,7 +1744,7 @@ pub fn opc_arith256(ctx: &mut InstContext) { const WORDS: usize = 5 + 3 * 4; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 5, 3, 4, 0, &mut data, "arith256"); + precompiled_load_data(ctx, 5, 3, 4, 0, None, &mut data, "arith256"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 5 indirections @@ -1559,7 +1791,7 @@ pub fn opc_arith256_mod(ctx: &mut InstContext) { const WORDS: usize = 5 + 4 * 4; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 5, 4, 4, 0, &mut data, "arith256_mod"); + precompiled_load_data(ctx, 5, 4, 4, 0, None, &mut data, "arith256_mod"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 5 indirections @@ -1605,7 +1837,7 @@ pub fn opc_secp256k1_add(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 8; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 8, 0, &mut data, "secp256k1_add"); + precompiled_load_data(ctx, 2, 2, 8, 0, None, &mut data, "secp256k1_add"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -1644,7 +1876,7 @@ pub fn opc_secp256k1_dbl(ctx: &mut InstContext) { const WORDS: usize = 8; // one input of 8 64-bit words let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 0, 1, 8, 0, &mut data, "secp256k1_dbl"); + precompiled_load_data(ctx, 0, 1, 8, 0, None, &mut data, "secp256k1_dbl"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { let p1: &[u64; 8] = &data; @@ -1673,12 +1905,85 @@ pub fn ops_secp256k1_dbl(ctx: &InstContext, stats: &mut dyn OpStats) { precompiled_stats_direct_data(ctx, stats, 8, 8); } +#[inline(always)] +pub fn opc_secp256r1_add(ctx: &mut InstContext) { + const WORDS: usize = 2 + 2 * 8; + let mut data = [0u64; WORDS]; + + precompiled_load_data(ctx, 2, 2, 8, 0, None, &mut data, "secp256r1_add"); + + if ctx.emulation_mode != EmulationMode::ConsumeMemReads { + // ignore 2 indirections + let (_, rest) = data.split_at(2); + let (p1, p2) = rest.split_at(8); + + let p1: &[u64; 8] = p1.try_into().expect("opc_secp256r1_add: p1.len != 8"); + let p2: &[u64; 8] = p2.try_into().expect("opc_secp256r1_add: p2.len != 8"); + let mut p3 = [0u64; 8]; + + precompiles_helpers::secp256r1_add(p1, p2, &mut p3); + + // [0:p1,p2] + for (i, d) in p3.iter().enumerate() { + ctx.mem.write(data[0] + (8 * i as u64), *d, 8); + } + } + ctx.c = 0; + ctx.flag = false; +} + +/// Unimplemented. Secp256r1Add can only be called from the system call context via InstContext. +/// This is provided just for completeness. +#[inline(always)] +pub fn op_secp256r1_add(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_secp256r1_add() is not implemented"); +} + +#[inline(always)] +pub fn ops_secp256r1_add(ctx: &InstContext, stats: &mut dyn OpStats) { + precompiled_stats_data(ctx, stats, &[8, 8], &[], 1); +} + +#[inline(always)] +pub fn opc_secp256r1_dbl(ctx: &mut InstContext) { + const WORDS: usize = 8; // one input of 8 64-bit words + let mut data = [0u64; WORDS]; + + precompiled_load_data(ctx, 0, 1, 8, 0, None, &mut data, "secp256r1_dbl"); + + if ctx.emulation_mode != EmulationMode::ConsumeMemReads { + let p1: &[u64; 8] = &data; + let mut p3 = [0u64; 8]; + + precompiles_helpers::secp256r1_dbl(p1, &mut p3); + + for (i, d) in p3.iter().enumerate() { + ctx.mem.write(ctx.b + (8 * i as u64), *d, 8); + } + } + + ctx.c = 0; + ctx.flag = false; +} + +/// Unimplemented. Secp256r1Dbl can only be called from the system call context via InstContext. +/// This is provided just for completeness. +#[inline(always)] +pub fn op_secp256r1_dbl(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_secp256r1_dbl() is not implemented"); +} + +#[inline(always)] +pub fn ops_secp256r1_dbl(ctx: &InstContext, stats: &mut dyn OpStats) { + precompiled_stats_direct_data(ctx, stats, 8, 8); +} + #[inline(always)] pub fn opc_bn254_curve_add(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 8; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 8, 0, &mut data, "bn254_curve_add"); + precompiled_load_data(ctx, 2, 2, 8, 0, None, &mut data, "bn254_curve_add"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -1718,7 +2023,7 @@ pub fn opc_bn254_curve_dbl(ctx: &mut InstContext) { const WORDS: usize = 8; // one input of 8 64-bit words let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 0, 1, 8, 0, &mut data, "bn254_curve_dbl"); + precompiled_load_data(ctx, 0, 1, 8, 0, None, &mut data, "bn254_curve_dbl"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { let p1: &[u64; 8] = &data; @@ -1752,7 +2057,7 @@ pub fn opc_bn254_complex_add(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 8; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 8, 0, &mut data, "bn254_complex_add"); + precompiled_load_data(ctx, 2, 2, 8, 0, None, &mut data, "bn254_complex_add"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -1792,7 +2097,7 @@ pub fn opc_bn254_complex_sub(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 8; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 8, 0, &mut data, "bn254_complex_sub"); + precompiled_load_data(ctx, 2, 2, 8, 0, None, &mut data, "bn254_complex_sub"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -1832,7 +2137,7 @@ pub fn opc_bn254_complex_mul(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 8; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 8, 0, &mut data, "bn254_complex_mul"); + precompiled_load_data(ctx, 2, 2, 8, 0, None, &mut data, "bn254_complex_mul"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -1872,7 +2177,7 @@ pub fn opc_arith384_mod(ctx: &mut InstContext) { const WORDS: usize = 5 + 4 * 6; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 5, 4, 6, 0, &mut data, "arith384_mod"); + precompiled_load_data(ctx, 5, 4, 6, 0, None, &mut data, "arith384_mod"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 5 indirections @@ -1918,7 +2223,7 @@ pub fn opc_bls12_381_curve_add(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 12; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 12, 0, &mut data, "bls12_381_curve_add"); + precompiled_load_data(ctx, 2, 2, 12, 0, None, &mut data, "bls12_381_curve_add"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -1958,7 +2263,7 @@ pub fn opc_bls12_381_curve_dbl(ctx: &mut InstContext) { const WORDS: usize = 12; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 0, 1, 12, 0, &mut data, "bls12_381_curve_dbl"); + precompiled_load_data(ctx, 0, 1, 12, 0, None, &mut data, "bls12_381_curve_dbl"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { let p1: &[u64; 12] = &data; @@ -1992,7 +2297,7 @@ pub fn opc_bls12_381_complex_add(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 12; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 12, 0, &mut data, "bls12_381_complex_add"); + precompiled_load_data(ctx, 2, 2, 12, 0, None, &mut data, "bls12_381_complex_add"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -2032,7 +2337,7 @@ pub fn opc_bls12_381_complex_sub(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 12; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 12, 0, &mut data, "bls12_381_complex_sub"); + precompiled_load_data(ctx, 2, 2, 12, 0, None, &mut data, "bls12_381_complex_sub"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -2072,7 +2377,7 @@ pub fn opc_bls12_381_complex_mul(ctx: &mut InstContext) { const WORDS: usize = 2 + 2 * 12; let mut data = [0u64; WORDS]; - precompiled_load_data(ctx, 2, 2, 12, 0, &mut data, "bls12_381_complex_mul"); + precompiled_load_data(ctx, 2, 2, 12, 0, None, &mut data, "bls12_381_complex_mul"); if ctx.emulation_mode != EmulationMode::ConsumeMemReads { // ignore 2 indirections @@ -2195,7 +2500,35 @@ pub fn opc_fcall(ctx: &mut InstContext) { // Get function id from a let function_id = ctx.a; - let iresult = fcall_proxy(function_id, &ctx.fcall.parameters, &mut ctx.fcall.result); + let iresult = if function_id == FCALL_INPUT_READY_ID as u64 { + let required_address = ctx.fcall.parameters[0]; + if required_address < INPUT_ADDR { + panic!( + "opc_fcall() FCALL_INPUT_READY_ID called with required_address {:#x} < {:#x}", + required_address, INPUT_ADDR + ); + } + if required_address >= INPUT_ADDR + MAX_INPUT_SIZE - 1 { + panic!( + "opc_fcall() FCALL_INPUT_READY_ID called with required_address {:#x} > {:#x}", + required_address, + INPUT_ADDR + MAX_INPUT_SIZE - 1 + ); + } + + let required_bytes = required_address - INPUT_ADDR - 8 + 1; // + 1 because required_address is the address of the last required byte + if required_bytes > ctx.input_len { + panic!( + "opc_fcall() FCALL_INPUT_READY_ID called with required_address {:#x} requiring {} bytes, but only {} bytes available", + required_address, + required_bytes, + ctx.input_len + ); + } + 0 + } else { + fcall_proxy(function_id, &ctx.fcall.parameters, &mut ctx.fcall.result) + }; if iresult < 0 { panic!("opc_fcall() failed calling Fcall() function_id={function_id} iresult={iresult}"); @@ -2266,3 +2599,750 @@ pub fn opc_halt(ctx: &mut InstContext) { ctx.c = 0; ctx.flag = false; } + +pub fn opc_dma_memcpy(ctx: &mut InstContext) { + opc_dma_memcpys(ctx, false) +} +pub fn opc_dma_xmemcpy(ctx: &mut InstContext) { + opc_dma_memcpys(ctx, true) +} +fn opc_dma_memcpys(ctx: &mut InstContext, extended: bool) { + let dst = ctx.a; + let src = ctx.b; + + match ctx.emulation_mode { + EmulationMode::Mem => { + let count = + if extended { ctx.extended_arg as u64 } else { ctx.mem.read(EXTRA_PARAMS_ADDR, 8) }; + ctx.mem.memcpy(dst, src, count); + } + EmulationMode::GenerateMemReads => { + // In generate mode we need to populate precompiled.input_data with + // information needed + let count = + if extended { ctx.extended_arg as u64 } else { ctx.mem.read(EXTRA_PARAMS_ADDR, 8) }; + ctx.precompiled.input_data.clear(); + + #[cfg(feature = "log_dma_ops")] + println!("opc_dma_memcpy 0x{dst:08X} 0x{src:08X} {count} GMR STEP:{}", ctx.step); + + let encoded = DmaInfo::encode_memcpy(dst, src, count as usize); + ctx.precompiled.input_data.push(encoded); + + if count > 0 { + // read first dst unaligned part for dma-pre + let mut data_len = 0; + let dst64 = dst & !0x07; + // if dst64 != dst { + if DmaInfo::get_pre_count(encoded) > 0 { + let pre_data = ctx.mem.read(dst64, 8); + data_len += 1; + ctx.precompiled.input_data.push(pre_data); + } + + // read last dst unaligned part for dma-post + let to_dst = dst + count - 1; + // if to_dst & 0x07 != 0x07 { + if DmaInfo::get_post_count(encoded) > 0 { + let post_data = ctx.mem.read(to_dst & !0x07, 8); + data_len += 1; + // println!("ADDING_POST_DATA 0x{:08X} 0x{post_data:016X}", to_dst & !0x07); + ctx.precompiled.input_data.push(post_data); + } + + // read all source 64-words + let src64 = src & !0x07; + let to_src64 = (src + count - 1) & !0x07; + + let src64_count = (to_src64 - src64 + 8) >> 3; + ctx.mem.push_from_mem(&mut ctx.precompiled.input_data, src64, src64_count * 8); + data_len += src64_count; + #[cfg(feature = "debug_dma")] + println!( + "PRECOMPILED.MEMCPY.INPUT_DATA: [{}] data_len:{data_len}", + ctx.precompiled + .input_data + .iter() + .map(|x| format!("0x{x:016X}")) + .collect::>() + .join(",") + ); + assert_eq!(data_len as usize, DmaInfo::get_data_size(encoded)); + + ctx.mem.memcpy(dst, src, count); + } + ctx.precompiled.output_data.clear(); + + ctx.precompiled.step = ctx.step; + } + EmulationMode::ConsumeMemReads => { + let encoded = ctx.precompiled.input_data[0]; + let count = DmaInfo::get_count(encoded); + #[cfg(feature = "debug_dma")] + println!( + "opc_dma_memcpy 0x{dst:08X} 0x{src:08X} {count} CMR STEP:{} DATA_EXT_LEN:{}", + ctx.step, + DmaInfo::get_data_size(encoded) + ); + ctx.data_ext_len = DmaInfo::get_data_size(encoded); + } + } + ctx.c = dst; + ctx.flag = false; +} + +/// Unimplemented. Arith256 can only be called from the system call context via InstContext. +/// This is provided just for completeness. +#[inline(always)] +pub fn op_dma_memcpy(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_dma_memcpy() is not implemented"); +} +#[inline(always)] +pub fn op_dma_xmemcpy(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_dma_xmemcpy() is not implemented"); +} + +#[inline(always)] +pub fn ops_dma_memcpy(ctx: &InstContext, stats: &mut dyn OpStats) { + ops_dma_memcpys(ctx, stats, false) +} +#[inline(always)] +pub fn ops_dma_xmemcpy(ctx: &InstContext, stats: &mut dyn OpStats) { + ops_dma_memcpys(ctx, stats, true) +} +#[inline(always)] +fn ops_dma_memcpys(ctx: &InstContext, stats: &mut dyn OpStats, extended: bool) { + let addr_a = ctx.a; + let addr_b = ctx.b; + let count = if extended { ctx.extended_arg as u64 } else { ctx.mem.read(EXTRA_PARAMS_ADDR, 8) }; + // pre, post, dma_align, dma_unalign + if count == 0 { + return; + } + + let offset_a = addr_a & 0x07; + let offset_b = addr_b & 0x07; + let addr64_a = addr_a - offset_a; + let addr64_b = addr_b - offset_b; + let pre_count = std::cmp::min((8 - offset_a) & 0x07, count); + + if pre_count > 0 { + stats.mem_align_read(addr64_a, 1); + stats.mem_align_read(addr64_b, 1 + ((offset_b + pre_count) > 8) as usize); + stats.mem_align_write(addr64_a, 1); + } + + let post_count = (count - pre_count) & 0x07; + let remain_b = (16 - offset_a - pre_count) & 0x07; + let addr64_a_end = (addr_a + count - 1) & !0x07; + let addr64_b_end = (addr_b + count - 1) & !0x07; + if post_count > 0 { + let extra_b = (remain_b < post_count) as u64; + stats.mem_align_read(addr64_a_end, 1); + stats.mem_align_read(addr64_b_end - extra_b * 8, 1 + extra_b as usize); + stats.mem_align_write(addr64_a_end, 1); + } + + let loop_count = ((count - pre_count - post_count) >> 32) as usize; + if loop_count == 0 { + // with count < 8, there aren't 64-bits loops. + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + ]); + } else { + // calculate the resources used by 64-bits loop. + // count used are number of bytes read to demostrate memcmp(), usually count_eq + 1, + // but if all bytes are equal count = count_eq, no need extra reads + let first_loop_dst64 = (addr_a + pre_count) >> 3; + let first_loop_src64 = (addr_b + pre_count) >> 3; + + // same alignment + if addr_a & 0x07 == addr_b & 0x07 { + stats.mem_align_read(first_loop_src64, loop_count); + stats.mem_align_write(first_loop_dst64, loop_count); + // add information about other machines to demostrate operation + let units = loop_count.div_ceil(DMA_64_ALIGNED_OPS_BY_ROW); + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + (ZiskOp::_DMA_64_ALIGNED, loop_count), + ]); + } else { + stats.mem_align_read(first_loop_src64, loop_count + 1); + stats.mem_align_write(first_loop_dst64, loop_count); + // add information about other machines to demostrate operation + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + (ZiskOp::_DMA_UNALIGNED, loop_count + 1), + ]); + } + } +} + +#[inline(always)] +pub fn opc_dma_memcmp(ctx: &mut InstContext) { + opc_dma_memcmps(ctx, false) +} +#[inline(always)] +pub fn opc_dma_xmemcmp(ctx: &mut InstContext) { + opc_dma_memcmps(ctx, true) +} + +fn opc_dma_memcmps(ctx: &mut InstContext, extended: bool) { + let dst = ctx.a; + let src = ctx.b; + let step = ctx.step; + + match ctx.emulation_mode { + EmulationMode::Mem => { + let count = + if extended { ctx.extended_arg as u64 } else { ctx.mem.read(EXTRA_PARAMS_ADDR, 8) }; + let (result, effective_count) = ctx.mem.memcmp(dst, src, count); + ctx.stats_hint = effective_count as u64; + ctx.c = result; + } + EmulationMode::GenerateMemReads => { + // In generate mode we need to populate precompiled.input_data with + // information needed + let count = + if extended { ctx.extended_arg as u64 } else { ctx.mem.read(EXTRA_PARAMS_ADDR, 8) }; + ctx.precompiled.input_data.clear(); + + #[cfg(feature = "log_dma_ops")] + println!("opc_dma_memcmp 0x{dst:08X} 0x{src:08X} {count} GMR STEP:{step}"); + let (result, effective_count) = ctx.mem.memcmp(dst, src, count); + + let encoded = DmaInfo::encode_memcmp(dst, src, effective_count, result); + ctx.precompiled.input_data.push(encoded); + ctx.precompiled.input_data.push(count); + + if count > 0 { + // read first dst unaligned part for dma-pre + let mut data_len = 0; + let dst64 = dst & !0x07; + // if dst64 != dst { + if DmaInfo::get_pre_count(encoded) > 0 { + let pre_data = ctx.mem.read(dst64, 8); + data_len += 1; + ctx.precompiled.input_data.push(pre_data); + } + + let effective_count = effective_count as u64; + // read last dst unaligned part for dma-post + let to_dst = dst + effective_count - 1; + // if to_dst & 0x07 != 0x07 { + if DmaInfo::get_post_count(encoded) > 0 { + let post_data = ctx.mem.read(to_dst & !0x07, 8); + data_len += 1; + // println!("ADDING_POST_DATA 0x{:08X} 0x{post_data:016X}", to_dst & !0x07); + ctx.precompiled.input_data.push(post_data); + } + + // read all source 64-words + let src64 = src & !0x07; + let to_src64 = (src + effective_count - 1) & !0x07; + + let src64_count = (to_src64 - src64 + 8) >> 3; + ctx.mem.push_from_mem(&mut ctx.precompiled.input_data, src64, src64_count * 8); + data_len += src64_count; + #[cfg(feature = "debug_dma")] + println!( + "PRECOMPILED.MEMCMP.INPUT_DATA: [{}] data_len:{data_len}", + ctx.precompiled + .input_data + .iter() + .map(|x| format!("0x{x:016X}")) + .collect::>() + .join(",") + ); + assert_eq!(data_len as usize, DmaInfo::get_data_size(encoded)); + } + + ctx.precompiled.output_data.clear(); + ctx.precompiled.step = step; + ctx.c = result; + } + EmulationMode::ConsumeMemReads => { + let encoded = ctx.precompiled.input_data[0]; + let count = DmaInfo::get_count(encoded); + ctx.data_ext_len = DmaInfo::get_data_size(encoded); + ctx.c = DmaInfo::get_memcmp_res_as_u64(encoded); + #[cfg(feature = "debug_dma")] + println!( + "opc_dma_memcmp 0x{dst:08X} 0x{src:08X} {count} CMR 0x{:016X} STEP:{} DATA_EXT_LEN:{}", + ctx.c, + ctx.step, + ctx.data_ext_len + ); + } + } + ctx.flag = false; +} + +/// Unimplemented. DmaMemCmp and DmaXºMemCmp can only be called from the system call context +/// via InstContext. This is provided just for completeness. +#[inline(always)] +pub fn op_dma_memcmp(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_dma_memcmp() is not implemented"); +} + +#[inline(always)] +pub fn op_dma_xmemcmp(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_dma_xmemcmp() is not implemented"); +} + +#[inline(always)] +pub fn ops_dma_memcmp(ctx: &InstContext, stats: &mut dyn OpStats) { + ops_dma_memcmps(ctx, stats, false) +} +#[inline(always)] +pub fn ops_dma_xmemcmp(ctx: &InstContext, stats: &mut dyn OpStats) { + ops_dma_memcmps(ctx, stats, true) +} + +#[inline(always)] +fn ops_dma_memcmps(ctx: &InstContext, stats: &mut dyn OpStats, extended: bool) { + let addr_a = ctx.a; + let addr_b = ctx.b; + // let _bus_count = if extended { ctx.extended_arg as u64 } else { ctx.mem.read(EXTRA_PARAMS_ADDR, 8) }; + let count = ctx.stats_hint; + + // pre, post, dma_align, dma_unalign + if count == 0 { + return; + } + + let (res, count_eq) = ctx.mem.memcmp(addr_a, addr_b, count); + let count = if count_eq as u64 == count { count } else { count_eq as u64 + 1 }; + let offset_a = addr_a & 0x07; + let offset_b = addr_b & 0x07; + let addr64_a = addr_a - offset_a; + let addr64_b = addr_b - offset_b; + let pre_count = std::cmp::min((8 - offset_a) & 0x07, count); + + if pre_count > 0 { + stats.mem_align_read(addr64_a, 1); + stats.mem_align_read(addr64_b, 1 + ((offset_b + pre_count) > 8) as usize); + stats.mem_align_read(addr64_a, 1); + } + + let post_count = (count - pre_count) & 0x07; + let remain_b = (16 - offset_a - pre_count) & 0x07; + let addr64_a_end = (addr_a + count - 1) & !0x07; + let addr64_b_end = (addr_b + count - 1) & !0x07; + if post_count > 0 { + let extra_b = (remain_b < post_count) as u64; + stats.mem_align_read(addr64_a_end, 1); + stats.mem_align_read(addr64_b_end - extra_b * 8, 1 + extra_b as usize); + stats.mem_align_read(addr64_a_end, 1); + } + + let loop_count = ((count - pre_count - post_count) >> 32) as usize; + if loop_count == 0 { + // with count < 8, there aren't 64-bits loops. + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + ]); + } else { + // calculate the resources used by 64-bits loop. + // count used are number of bytes read to demostrate memcmp(), usually count_eq + 1, + // but if all bytes are equal count = count_eq, no need extra reads + let first_loop_dst64 = (addr_a + pre_count) >> 3; + let first_loop_src64 = (addr_b + pre_count) >> 3; + + // same alignment + if addr_a & 0x07 == addr_b & 0x07 { + stats.mem_align_read(first_loop_src64, loop_count); + stats.mem_align_read(first_loop_dst64, loop_count); + // add information about other machines to demostrate operation + let units = loop_count.div_ceil(DMA_64_ALIGNED_OPS_BY_ROW); + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + (ZiskOp::_DMA_64_ALIGNED, loop_count), + ]); + } else { + stats.mem_align_read(first_loop_src64, loop_count + 1); + stats.mem_align_read(first_loop_dst64, loop_count); + // add information about other machines to demostrate operation + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + (ZiskOp::_DMA_UNALIGNED, loop_count + 1), + ]); + } + } +} + +fn read_from_input(ctx: &mut InstContext, dst: u64, count: u64) { + // Check for consistency + if count % 8 != 0 { + panic!("opc_dma_inputcpy() called without invalid count {count}"); + } + let count64 = count >> 3; + if ctx.fcall.result_size == 0 { + panic!("opc_dma_inputcpy() called with ctx.fcall.result_size==0"); + } + if ctx.fcall.result_size as usize > FCALL_RESULT_MAX_SIZE { + panic!( + "opc_dma_inputcpy() called with ctx.fcall.result_size=={}>32", + ctx.fcall.result_size + ); + } + if (ctx.fcall.result_got - 1 + count64) > ctx.fcall.result_size { + panic!( + "opc_dma_inputcpy() called with ctx.fcall.result_got({}) + {count64} >= ctx.fcall.result_size {}", + ctx.fcall.result_got, ctx.fcall.result_size + ); + } + ctx.mem.memcpy_from_data( + dst, + count, + &ctx.fcall.result, + (ctx.fcall.result_got - 1) as usize * 8, + ); + ctx.fcall.result_got += count64; + if ctx.fcall.result_got > ctx.fcall.result_size { + ctx.mem.free_input = 0; + } else { + ctx.mem.free_input = ctx.fcall.result[ctx.fcall.result_got as usize - 1]; + } +} + +fn read_and_get_from_input(ctx: &mut InstContext, dst: u64, count: u64) -> Vec { + // Check for consistency + if count % 8 != 0 { + panic!("opc_dma_inputcpy() called at 0x{:08x} without invalid count {count}", ctx.pc); + } + let count64 = count >> 3; + if ctx.fcall.result_size == 0 { + panic!("opc_dma_inputcpy() called at 0x{:08x} with ctx.fcall.result_size==0", ctx.pc); + } + if ctx.fcall.result_size as usize > FCALL_RESULT_MAX_SIZE { + panic!( + "opc_dma_inputcpy() called at 0x{:08x} with ctx.fcall.result_size=={}>32", + ctx.pc, ctx.fcall.result_size + ); + } + if (ctx.fcall.result_got - 1 + count64) > ctx.fcall.result_size { + panic!( + "opc_dma_inputcpy() called at 0x{:08x} with ctx.fcall.result_got({}) + {count64} >= ctx.fcall.result_size {}", + ctx.pc, ctx.fcall.result_got, ctx.fcall.result_size + ); + } + + ctx.mem.memcpy_from_data( + dst, + count, + &ctx.fcall.result, + (ctx.fcall.result_got - 1) as usize * 8, + ); + + let offset = (dst & 0x07) as usize; + let start_index = (ctx.fcall.result_got - 1) as usize; + let mut qwords_added = 0; + let mut input_data = Vec::new(); + + if offset == 0 { + // Fast path: aligned, direct copy + for i in 0..count64 as usize { + input_data.push(ctx.fcall.result[start_index + i]); + qwords_added += 1; + } + } else { + // Slow path: unaligned, need to shift and merge words + // When unaligned, we need count64 + 1 output words + let shift_bits = (offset * 8) as u32; + let shift_bits_comp = 64 - shift_bits; + + // First word: padding zeros in lower bytes, first data bytes in upper bytes + let first_word = ctx.fcall.result[start_index] << shift_bits; + input_data.push(first_word); + qwords_added += 1; + + // Middle words: merge parts of consecutive data words + for i in 0..(count64 as usize - 1) { + let low_part = ctx.fcall.result[start_index + i] >> shift_bits_comp; + let high_part = ctx.fcall.result[start_index + i + 1] << shift_bits; + input_data.push(low_part | high_part); + qwords_added += 1; + } + + // Last word: remaining bytes from last data word + if count64 > 0 { + let last_word = ctx.fcall.result[start_index + count64 as usize - 1] >> shift_bits_comp; + input_data.push(last_word); + qwords_added += 1; + } + } + + ctx.fcall.result_got += count64; + if ctx.fcall.result_got > ctx.fcall.result_size { + ctx.mem.free_input = 0; + } else { + ctx.mem.free_input = ctx.fcall.result[ctx.fcall.result_got as usize - 1]; + } + + input_data +} + +#[inline(always)] +fn opc_dma_inputcpy(ctx: &mut InstContext) { + let dst: u64 = ctx.a; + let count = ctx.b; + + match ctx.emulation_mode { + EmulationMode::Mem => { + read_from_input(ctx, dst, count); + } + EmulationMode::GenerateMemReads => { + // In generate mode we need to populate precompiled.input_data with + // information needed + ctx.precompiled.input_data.clear(); + + #[cfg(feature = "log_dma_ops")] + println!("opc_dma_inputcpy 0x{dst:08X} {count} GMR STEP:{}", ctx.step); + + let encoded = DmaInfo::encode_inputcpy(dst, count as usize); + ctx.precompiled.input_data.push(encoded); + + if count > 0 { + // read first dst unaligned part for dma-pre + let mut data_len = 0; + let dst64 = dst & !0x07; + // if dst64 != dst { + if DmaInfo::get_pre_count(encoded) > 0 { + let pre_data = ctx.mem.read(dst64, 8); + data_len += 1; + ctx.precompiled.input_data.push(pre_data); + } + + // read last dst unaligned part for dma-post + let to_dst = dst + count - 1; + // if to_dst & 0x07 != 0x07 { + if DmaInfo::get_post_count(encoded) > 0 { + let post_data = ctx.mem.read(to_dst & !0x07, 8); + data_len += 1; + ctx.precompiled.input_data.push(post_data); + } + #[cfg(feature = "debug_dma")] + println!( + "PRECOMPILED.INPUTCPY.INPUT_DATA: [{}] data_len:{data_len}", + ctx.precompiled + .input_data + .iter() + .map(|x| format!("0x{x:016X}")) + .collect::>() + .join(",") + ); + + let input_data = read_and_get_from_input(ctx, dst, count); + data_len += input_data.len(); + + assert_eq!(data_len, DmaInfo::get_data_size(encoded)); + + ctx.precompiled.input_data.extend(input_data); + } + ctx.precompiled.output_data.clear(); + ctx.precompiled.step = ctx.step; + } + EmulationMode::ConsumeMemReads => { + let encoded = ctx.precompiled.input_data[0]; + let count = DmaInfo::get_count(encoded); + #[cfg(feature = "debug_dma")] + println!( + "opc_dma_inputcpy 0x{dst:08X} {count} CMR STEP:{} DATA_EXT_LEN:{}", + ctx.step, + DmaInfo::get_data_size(encoded) + ); + ctx.data_ext_len = DmaInfo::get_data_size(encoded); + } + } + ctx.c = dst; + ctx.flag = false; +} + +#[inline(always)] +pub fn op_dma_inputcpy(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_dma_inputcpy() is not implemented"); +} + +#[inline(always)] +pub fn ops_dma_inputcpy(ctx: &InstContext, stats: &mut dyn OpStats) { + let addr_a = ctx.a; + let count = ctx.b; + + // pre, post, dma_align, dma_unalign + if count == 0 { + return; + } + + let offset_a = addr_a & 0x07; + let addr64_a = addr_a - offset_a; + let pre_count = (8 - offset_a) & 0x07; + + if pre_count > 0 { + stats.mem_align_read(addr64_a, 1); + stats.mem_align_write(addr64_a, 1); + } + + let post_count = (count - pre_count) & 0x07; + let addr64_a_end = (addr_a + count - 1) & !0x07; + if post_count > 0 { + stats.mem_align_read(addr64_a_end, 1); + stats.mem_align_write(addr64_a_end, 1); + } + + let loop_count = ((count - pre_count - post_count) >> 32) as usize; + if loop_count == 0 { + // with count < 8, there aren't 64-bits loops. + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + ]); + } else { + // calculate the resources used by 64-bits loop. + // count used are number of bytes read to demostrate memcmp(), usually count_eq + 1, + // but if all bytes are equal count = count_eq, no need extra reads + let first_loop_dst64 = (addr_a + pre_count) >> 3; + + stats.mem_align_write(first_loop_dst64, loop_count); + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + (ZiskOp::_DMA_64_ALIGNED, loop_count), + ]); + } +} + +#[inline(always)] +pub fn opc_dma_xmemset(ctx: &mut InstContext) { + let dst = ctx.a; + let count = ctx.b; + let fill_byte = ctx.extended_arg as u8; + + match ctx.emulation_mode { + EmulationMode::Mem => { + ctx.mem.memset(dst, count, fill_byte); + } + EmulationMode::GenerateMemReads => { + // In generate mode we need to populate precompiled.input_data with + // information needed + ctx.precompiled.input_data.clear(); + + #[cfg(feature = "log_dma_ops")] + println!( + "opc_dma_memset 0x{dst:08X} 0x{fill_byte:02X} {count} GMR STEP:{} PC:0x{:08x}", + ctx.step, ctx.pc + ); + + let encoded = DmaInfo::encode_memset(dst, count as usize, fill_byte); + ctx.precompiled.input_data.push(encoded); + + if count > 0 { + // read first dst unaligned part for dma-pre + let mut data_len = 0; + let dst64 = dst & !0x07; + // if dst64 != dst { + if DmaInfo::get_pre_count(encoded) > 0 { + let pre_data = ctx.mem.read(dst64, 8); + data_len += 1; + ctx.precompiled.input_data.push(pre_data); + } + + // read last dst unaligned part for dma-post + let to_dst = dst + count - 1; + // if to_dst & 0x07 != 0x07 { + if DmaInfo::get_post_count(encoded) > 0 { + let post_data = ctx.mem.read(to_dst & !0x07, 8); + data_len += 1; + ctx.precompiled.input_data.push(post_data); + } + #[cfg(feature = "log_dma_ops")] + println!( + "PRECOMPILED.MEMSET.INPUT_DATA: [{}] data_len:{data_len}", + ctx.precompiled + .input_data + .iter() + .map(|x| format!("0x{x:016X}")) + .collect::>() + .join(",") + ); + assert_eq!(data_len as usize, DmaInfo::get_pre_writes(encoded)); + ctx.mem.memset(dst, count, fill_byte); + } + ctx.precompiled.output_data.clear(); + ctx.precompiled.step = ctx.step; + } + EmulationMode::ConsumeMemReads => { + let encoded = ctx.precompiled.input_data[0]; + let count = DmaInfo::get_count(encoded); + #[cfg(feature = "debug_dma")] + println!( + "opc_dma_memset 0x{dst:08X} 0x{fill_byte:02X} {count} CMR STEP:{} DATA_EXT_LEN:{}", + ctx.step, + DmaInfo::get_data_size(encoded) + ); + ctx.data_ext_len = DmaInfo::get_pre_writes(encoded); + } + } + ctx.c = dst; + ctx.flag = false; +} + +#[inline(always)] +pub fn op_dma_xmemset(_a: u64, _b: u64) -> (u64, bool) { + unimplemented!("op_dma_memset() is not implemented"); +} + +#[inline(always)] +pub fn ops_dma_xmemset(ctx: &InstContext, stats: &mut dyn OpStats) { + let addr_a = ctx.a; + let count = ctx.b; + + // pre, post, dma_align, dma_unalign + if count == 0 { + return; + } + + let offset_a = addr_a & 0x07; + let addr64_a = addr_a - offset_a; + let pre_count = std::cmp::min((8 - offset_a) & 0x07, count); + + if pre_count > 0 { + stats.mem_align_read(addr64_a, 1); + stats.mem_align_write(addr64_a, 1); + } + + let post_count = (count - pre_count) & 0x07; + let addr64_a_end = (addr_a + count - 1) & !0x07; + if post_count > 0 { + stats.mem_align_read(addr64_a_end, 1); + stats.mem_align_write(addr64_a_end, 1); + } + + let loop_count = ((count - pre_count - post_count) >> 32) as usize; + if loop_count == 0 { + // with count < 8, there aren't 64-bits loops. + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + ]); + } else { + // calculate the resources used by 64-bits loop. + // count used are number of bytes read to demostrate memcmp(), usually count_eq + 1, + // but if all bytes are equal count = count_eq, no need extra reads + let first_loop_dst64 = (addr_a + pre_count) >> 3; + + stats.mem_align_write(first_loop_dst64, loop_count); + // add information about other machines to demostrate operation + stats.add_extras(&[ + (ZiskOp::_DMA_PRE, (pre_count > 0) as usize), + (ZiskOp::_DMA_POST, (post_count > 0) as usize), + (ZiskOp::_DMA_64_ALIGNED, loop_count), + ]); + } +} diff --git a/core/src/zisk_rom_2_asm.rs b/core/src/zisk_rom_2_asm.rs index b0411a2cb..a15fd445a 100644 --- a/core/src/zisk_rom_2_asm.rs +++ b/core/src/zisk_rom_2_asm.rs @@ -3,10 +3,13 @@ //! Generates i86_64 assembly code that implements the Zisk ROM program use std::path::Path; +use ziskos_hints::zisklib::FCALL_INPUT_READY_ID; + use crate::{ - zisk_ops::ZiskOp, AsmGenerationMethod, ZiskInst, ZiskRom, FLOAT_LIB_ROM_ADDR, FREE_INPUT_ADDR, - M64, P2_32, ROM_ADDR, ROM_ADDR_MAX, ROM_ENTRY, SRC_C, SRC_IMM, SRC_IND, SRC_MEM, SRC_REG, - SRC_STEP, STORE_IND, STORE_MEM, STORE_NONE, STORE_REG, + zisk_ops::ZiskOp, AsmGenerationMethod, ZiskInst, ZiskRom, EXTRA_PARAMS_ADDR, + FLOAT_LIB_ROM_ADDR, FREE_INPUT_ADDR, INPUT_ADDR, M64, P2_32, ROM_ADDR, ROM_ADDR_MAX, ROM_ENTRY, + SRC_C, SRC_IMM, SRC_IND, SRC_MEM, SRC_REG, SRC_STEP, STORE_IND, STORE_MEM, STORE_NONE, + STORE_REG, }; // Regs rax, rcx, rdx, rdi, rsi, rsp, and r8-r11 are caller-save, not saved across function calls. @@ -51,9 +54,10 @@ const REG_CHUNK_PLAYER_ADDRESS: &str = "rbp"; // Used only in chunk player // - rsp // Only used to calculate histogram position for every rom pc -const TRACE_ADDR_NUMBER: u64 = 0xc0000020; +const TRACE_ADDR_NUMBER: u64 = 0xd0000000 + 0x20; // Fcall params and result lengths +// NOTE: if these parameters are update, review dma_constants.inc const FCALL_PARAMS_LENGTH: u64 = 386; const FCALL_RESULT_LENGTH: u64 = 8193; @@ -71,10 +75,31 @@ const FCALL_LENGTH: u64 = FCALL_RESULT_GOT + 1; const XMM_MAPPED_REGS: [u64; 16] = [1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]; //const XMM_MAPPED_REGS: [u64; 0] = []; // Used for debugging -const F_MEM_CLEAR_WRITE_BYTE: u64 = 1 << 37; -const F_MEM_WRITE_SHIFT: u64 = 36; -const F_MEM_WRITE: u64 = 1 << F_MEM_WRITE_SHIFT; -const F_MEM_WIDTH_SHIFT: u64 = 32; +const F_MOPS_CLEAR_WRITE_BYTE: u64 = 1 << 37; + +const F_MOPS_BLOCK_READ: u64 = 0x0000_000A_0000_0000; +const F_MOPS_BLOCK_WRITE: u64 = 0x0000_000B_0000_0000; + +const F_MOPS_READ_8: u64 = 0x0000_0008_0000_0000; +const F_MOPS_READ_4: u64 = 0x0000_0004_0000_0000; +const F_MOPS_READ_2: u64 = 0x0000_0002_0000_0000; +const F_MOPS_READ_1: u64 = 0x0000_0001_0000_0000; + +const F_MOPS_WRITE_8: u64 = 0x0000_0018_0000_0000; +const F_MOPS_WRITE_4: u64 = 0x0000_0014_0000_0000; +const F_MOPS_WRITE_2: u64 = 0x0000_0012_0000_0000; +const F_MOPS_WRITE_1: u64 = 0x0000_0011_0000_0000; + +const F_MOPS_ALIGNED_READ: u64 = 0x0000_000C_0000_0000; +const F_MOPS_ALIGNED_WRITE: u64 = 0x0000_000D_0000_0000; +// const F_MOPS_ALIGNED_BLOCK_READ: u64 = 0x0000_000E_0000_0000; +// const F_MOPS_ALIGNED_BLOCK_WRITE: u64 = 0x0000_000F_0000_0000; +const F_MOPS_BLOCK_LENGTH_SHIFT: u64 = 36; + +// const PRECOMPILE_BUFFER_SIZE_IN_BYTES: u64 = 0x100000; // 1MB +const PRECOMPILE_BUFFER_SIZE_IN_BYTES: u64 = 0x400000; // 4MB +const PRECOMPILE_BUFFER_SIZE_IN_U64: u64 = PRECOMPILE_BUFFER_SIZE_IN_BYTES / 8; +const PRECOMPILE_BUFFER_SIZE_U64_MASK: u64 = PRECOMPILE_BUFFER_SIZE_IN_U64 - 1; #[derive(Default, Debug, Clone)] pub struct ZiskAsmRegister { @@ -121,10 +146,14 @@ pub struct ZiskAsmContext { mem_chunk_address: String, mem_chunk_start_step: String, fcall_ctx: String, - mem_chunk_id: String, // 0, 1, 2, 3, 4... - mem_chunk_mask: String, // Module 8 of the chunks we want to activate, e.g. 0x03 - mem_rsp: String, // Backup of rsp register value from caller - mem_free_input: String, // Free input address (0x90000000) used in free call operations + mem_chunk_id: String, // 0, 1, 2, 3, 4... + mem_chunk_mask: String, // Module 8 of the chunks we want to activate, e.g. 0x03 + mem_rsp: String, // Backup of rsp register value from caller + mem_free_input: String, // Free input address (0x40000000) used in free call operations, but stored in memory to allow sharing the input shared memory + mem_precompile_results_address: String, // Address where precompile results are read from + mem_precompile_written_address: String, // Address where precompile written counter is stored + mem_precompile_read_address: String, // Address where precompile read counter is stored + mem_input_written_address: String, // Address where input written counter is stored comments: bool, // true if we want to generate comments in the assembly source code boc: String, // begin of comment: '/*', ';', '#', etc. @@ -132,7 +161,9 @@ pub struct ZiskAsmContext { ptr: String, // "ptr ", "" - //assert_rsp_counter: u64, + //assert_rsp_counter: u64, + precompile_results: bool, // Set to true is we are consuming precompile results + wait_for_prec_counter: u64, // Counter of wait_for_prec_avail calls, reset at every instruction } impl ZiskAsmContext { @@ -199,6 +230,7 @@ impl ZiskAsmContext { zisk_op, ZiskOp::Keccak | ZiskOp::Sha256 + | ZiskOp::Poseidon2 | ZiskOp::Arith256 | ZiskOp::Arith256Mod | ZiskOp::Secp256k1Add @@ -215,8 +247,87 @@ impl ZiskAsmContext { | ZiskOp::Bls12_381ComplexSub | ZiskOp::Bls12_381ComplexMul | ZiskOp::Add256 + | ZiskOp::Secp256r1Add + | ZiskOp::Secp256r1Dbl + | ZiskOp::Blake2 ) } + + pub fn precompile_results(&self) -> bool { + self.precompile_results + } + pub fn precompile_results_keccak(&self) -> bool { + //self.precompile_results() + false + } + pub fn precompile_results_sha256(&self) -> bool { + //self.precompile_results() + false + } + pub fn precompile_results_arith256(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_arith256mod(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_secp256k1add(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_secp256k1dbl(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_secp256r1add(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_secp256r1dbl(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_fcall(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bn254curveadd(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bn254curvedbl(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bn254complexadd(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bn254complexsub(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bn254complexmul(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_arith384mod(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bls12_381curveadd(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bls12_381curvedbl(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bls12_381complexadd(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bls12_381complexsub(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_bls12_381complexmul(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_add256(&self) -> bool { + self.precompile_results() + } + pub fn precompile_results_blake2(&self) -> bool { + //self.precompile_results() + false + } + pub fn call_wait_for_prec_avail(&self) -> bool { + self.precompile_results() + } } // One-pass (single emulation) memory trace, used to count, plan and collect. @@ -384,11 +495,11 @@ impl ZiskRom2Asm { generation_method: AsmGenerationMethod, log_output: bool, comments: bool, + precompile_results: bool, ) { // Get a string with the ASM data let mut s = String::new(); - Self::save_to_asm(rom, &mut s, generation_method, log_output, comments); - + Self::save_to_asm(rom, &mut s, generation_method, log_output, comments, precompile_results); // Save to file let path = std::path::PathBuf::from(file_name); let result = std::fs::write(path, s); @@ -407,7 +518,12 @@ impl ZiskRom2Asm { generation_method: AsmGenerationMethod, log_output: bool, comments: bool, + precompile_results: bool, ) { + // println!( + // "ZiskRom2Asm::save_to_asm() generation_method={:?}, log_output={}, comments={}, precompile_results={}", + // generation_method, log_output, comments, precompile_results + // ); // Clear output data, just in case code.clear(); @@ -423,6 +539,7 @@ impl ZiskRom2Asm { boc: "/* ".to_string(), eoc: " */".to_string(), min_program_pc: rom.min_program_pc, + precompile_results, ..Default::default() }; @@ -439,6 +556,13 @@ impl ZiskRom2Asm { ctx.mem_chunk_mask = format!("qword {}[chunk_mask]", ctx.ptr); ctx.mem_rsp = format!("qword {}[MEM_RSP]", ctx.ptr); ctx.mem_free_input = format!("qword {}[MEM_FREE_INPUT]", ctx.ptr); + if ctx.precompile_results() { + ctx.mem_precompile_results_address = + format!("qword {}[MEM_PRECOMPILE_RESULTS_ADDRESS]", ctx.ptr); + ctx.mem_precompile_written_address = format!("qword {}[0x70000000]", ctx.ptr); + ctx.mem_precompile_read_address = format!("qword {}[0x70001000]", ctx.ptr); + } + ctx.mem_input_written_address = format!("qword {}[0x70000010]", ctx.ptr); // Preamble *code += ".intel_syntax noprefix\n"; @@ -456,6 +580,9 @@ impl ZiskRom2Asm { *code += ".comm MEM_CHUNK_START_STEP, 8, 8\n"; *code += ".comm MEM_RSP, 8, 8\n"; *code += ".comm MEM_FREE_INPUT, 8, 8\n"; + if ctx.precompile_results() { + *code += ".comm MEM_PRECOMPILE_RESULTS_ADDRESS, 8, 8\n"; + } if ctx.zip() { *code += ".comm MEM_CHUNK_ID, 8, 8\n"; } @@ -506,10 +633,13 @@ impl ZiskRom2Asm { *code += ".extern print_step\n"; *code += ".extern opcode_keccak\n"; *code += ".extern opcode_sha256\n"; + *code += ".extern opcode_poseidon2\n"; *code += ".extern opcode_arith256\n"; *code += ".extern opcode_arith256_mod\n"; *code += ".extern opcode_secp256k1_add\n"; *code += ".extern opcode_secp256k1_dbl\n"; + *code += ".extern opcode_secp256r1_add\n"; + *code += ".extern opcode_secp256r1_dbl\n"; *code += ".extern opcode_fcall\n"; *code += ".extern opcode_bn254_curve_add\n"; *code += ".extern opcode_bn254_curve_dbl\n"; @@ -523,10 +653,14 @@ impl ZiskRom2Asm { *code += ".extern opcode_bls12_381_complex_sub\n"; *code += ".extern opcode_bls12_381_complex_mul\n"; *code += ".extern opcode_add256\n"; + *code += ".extern opcode_blake2\n"; *code += ".extern chunk_done\n"; *code += ".extern print_fcall_ctx\n"; *code += ".extern print_pc\n"; - *code += ".extern realloc_trace\n\n"; + *code += ".extern realloc_trace\n"; + if ctx.precompile_results() { + *code += ".extern wait_for_prec_avail\n\n"; + } if ctx.minimal_trace() || ctx.main_trace() @@ -540,6 +674,11 @@ impl ZiskRom2Asm { *code += ".extern chunk_size\n"; *code += ".extern trace_address\n\n"; *code += ".extern trace_address_threshold\n\n"; + if ctx.precompile_results() { + *code += ".extern precompile_result_address\n"; + *code += ".extern precompile_written_address\n"; + *code += ".extern precompile_read_address\n\n"; + } } if ctx.zip() { @@ -576,16 +715,21 @@ impl ZiskRom2Asm { } // Functions to let C know about ASM generation + + // get_max_bios_pc() returns the maximum bios pc used in the ROM *code += ".global get_max_bios_pc\n"; *code += "get_max_bios_pc:\n"; *code += &format!("\tmov rax, 0x{:08x}\n", rom.max_bios_pc); *code += "\tret\n\n"; + // get_max_program_pc() returns the maximum program pc used in the ROM *code += ".global get_max_program_pc\n"; *code += "get_max_program_pc:\n"; *code += &format!("\tmov rax, 0x{:08x}\n", rom.max_program_pc); *code += "\tret\n\n"; + // get_gen_method() returns the generation method used to generate the assembly + // It must match the one used to call the assembly emulator *code += ".global get_gen_method\n"; *code += "get_gen_method:\n"; if ctx.fast() { @@ -613,6 +757,17 @@ impl ZiskRom2Asm { } *code += "\tret\n\n"; + // get_gen_method() returns the generation method used to generate the assembly + // It must match the one used to call the assembly emulator + *code += ".global get_precompile_results\n"; + *code += "get_precompile_results:\n"; + if ctx.precompile_results() { + *code += "\tmov rax, 1\n"; + } else { + *code += "\tmov rax, 0\n"; + } + *code += "\tret\n\n"; + // Externally callable function label *code += ".global emulator_start\n"; *code += "emulator_start:\n"; @@ -705,6 +860,25 @@ impl ZiskRom2Asm { ctx.comment_str("chunk_start_step = 0") ); } + if ctx.precompile_results() { + *code += &format!( + "\tmov {}, precompile_results_address {}\n", + REG_AUX, + ctx.comment_str("aux = precompile_results_address") + ); + *code += &format!( + "\tmov {}, {} {}\n", + ctx.mem_precompile_results_address, + REG_AUX, + ctx.comment_str("mem_precompile_results_counter = precompile_results_address") + ); + + *code += &format!( + "\tmov {}, 0 {}\n", + ctx.mem_precompile_read_address, + ctx.comment_str("precompile_read = 0") + ); + } *code += &ctx.full_line_comment("fcall_context initialization".to_string()); *code += &format!( @@ -777,6 +951,9 @@ impl ZiskRom2Asm { // For all program addresses in the vector, create an assembly set of instructions with an // instruction label pc_ for k in 0..rom.sorted_pc_list.len() { + // Reset wait for prec counter + ctx.wait_for_prec_counter = 0; + // Get pc ctx.pc = rom.sorted_pc_list[k]; @@ -1179,7 +1356,7 @@ impl ZiskRom2Asm { } if ctx.mem_op() { - Self::a_src_mem_op(&mut ctx, code); + Self::src_read_mops(&mut ctx, code); } ctx.a.is_saved = true; @@ -1397,28 +1574,34 @@ impl ZiskRom2Asm { SRC_MEM => { *code += &ctx.full_line_comment("b=SRC_MEM".to_string()); - let b_is_free_input = instruction.b_offset_imm0 == FREE_INPUT_ADDR - && instruction.b_use_sp_imm1 == 0; + let b_is_free_input = (instruction.b_offset_imm0 == FREE_INPUT_ADDR) + && (instruction.b_use_sp_imm1 == 0); if !ctx.chunk_player_mem_reads_collect_main() { - // Calculate memory address - *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_ADDRESS, - instruction.b_offset_imm0, - ctx.comment_str("address = b_offset_imm0") - ); - if instruction.b_use_sp_imm1 != 0 { + if b_is_free_input { + // No need to write REG_ADDRESS, as we will read from mem_free_input + ctx.address_is_constant = true; + ctx.address_constant_value = instruction.b_offset_imm0; + } else { + // Calculate memory address *code += &format!( - "\tadd {}, {} {}\n", + "\tmov {}, 0x{:x} {}\n", REG_ADDRESS, - ctx.mem_sp, - ctx.comment_str("address += sp") + instruction.b_offset_imm0, + ctx.comment_str("address = b_offset_imm0") ); - ctx.address_is_constant = false; - } else { - ctx.address_is_constant = true; - ctx.address_constant_value = instruction.b_offset_imm0; + if instruction.b_use_sp_imm1 != 0 { + *code += &format!( + "\tadd {}, {} {}\n", + REG_ADDRESS, + ctx.mem_sp, + ctx.comment_str("address += sp") + ); + ctx.address_is_constant = false; + } else { + ctx.address_is_constant = true; + ctx.address_constant_value = instruction.b_offset_imm0; + } } } @@ -1563,7 +1746,7 @@ impl ZiskRom2Asm { } if ctx.mem_op() { - Self::b_src_mem_op(&mut ctx, code); + Self::src_read_mops(&mut ctx, code); } } SRC_IMM => { @@ -2307,7 +2490,7 @@ impl ZiskRom2Asm { } if ctx.mem_op() { - Self::b_src_ind_mem_op(&mut ctx, code, reg_address, instruction.ind_width); + Self::b_src_ind_mops(&mut ctx, code, reg_address, instruction.ind_width); } } _ => panic!( @@ -2402,13 +2585,12 @@ impl ZiskRom2Asm { /*************/ // Execute operation, storing result is registers c and flag - Self::operation_to_asm(&mut ctx, instruction.op, code, &mut unusual_code); - - // At this point, REG_C must contain the value of c - assert!(ctx.c.is_saved); + Self::operation_to_asm(&mut ctx, instruction, code, &mut unusual_code); // Copy c value to main trace if ctx.main_trace() { + // At this point, REG_C must contain the value of c + assert!(ctx.c.is_saved); *code += &ctx.full_line_comment("Main[3]=c".to_string()); *code += &format!( "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8 + 3*8], {REG_C}\n" @@ -2429,6 +2611,7 @@ impl ZiskRom2Asm { } } STORE_REG => { + assert!(ctx.c.is_saved); assert!(instruction.store_offset >= 0); assert!(instruction.store_offset <= 34); @@ -2456,7 +2639,7 @@ impl ZiskRom2Asm { .full_line_comment(format!("STORE_REG reg={}", instruction.store_offset)); // Store in mem[address] - if instruction.store_ra { + if instruction.store_pc { let value = (ctx.pc as i64 + instruction.jmp_offset2) as u64; Self::write_riscv_reg_constant( &mut ctx, @@ -2480,6 +2663,7 @@ impl ZiskRom2Asm { } } STORE_MEM => { + assert!(ctx.c.is_saved); *code += &ctx.full_line_comment("STORE_MEM".to_string()); // Calculate memory address and store it in REG_ADDRESS @@ -2580,7 +2764,7 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - if instruction.store_ra { + if instruction.store_pc { *code += &format!( "\tmov {}, 0x{:x} {}\n", REG_VALUE, @@ -2612,6 +2796,7 @@ impl ZiskRom2Asm { } } STORE_IND => { + assert!(ctx.c.is_saved); *code += &ctx .full_line_comment(format!("STORE_IND width={}", instruction.ind_width)); @@ -3002,7 +3187,7 @@ impl ZiskRom2Asm { { match instruction.ind_width { 8 => { - if instruction.store_ra { + if instruction.store_pc { *code += &format!( "\tmov qword {}[{}], {} {}\n", ctx.ptr, @@ -3020,7 +3205,7 @@ impl ZiskRom2Asm { } } 4 => { - if instruction.store_ra { + if instruction.store_pc { *code += &format!( "\tmov dword {}[{}], {} {}\n", ctx.ptr, @@ -3038,7 +3223,7 @@ impl ZiskRom2Asm { } } 2 => { - if instruction.store_ra { + if instruction.store_pc { *code += &format!( "\tmov word {}[{}], {} {}\n", ctx.ptr, @@ -3056,7 +3241,7 @@ impl ZiskRom2Asm { } } 1 => { - if instruction.store_ra { + if instruction.store_pc { *code += &format!( "\tmov word {}[{}], {} {}\n", ctx.ptr, @@ -3091,7 +3276,7 @@ impl ZiskRom2Asm { ctx.pc, ctx.comment_str("width=1: continue") ); - if instruction.store_ra { + if instruction.store_pc { *code += &format!( "\tmov dil, 0x{:x} {}\n", (ctx.pc as i64 + instruction.jmp_offset2) as u64 as u8, @@ -3349,6 +3534,10 @@ impl ZiskRom2Asm { *code += "\n"; + *code += "execute_pop_internal_regs_and_end:\n"; + Self::pop_internal_registers(&mut ctx, code, false); + *code += "\n"; + *code += "execute_end:\n"; // Update step memory variable with the content of the step register, to make it accessible @@ -3556,8 +3745,7 @@ impl ZiskRom2Asm { } println!( - "ZiskRom2Asm::save_to_asm() {} bytes, {} instructions, {:02} bytes/inst, {} map lines, {} label lines, {} comment lines, {} code lines, {:02} code lines/inst", - code.len(), + "ZiskRom2Asm::save_to_asm() {} bytes, {} instructions, {:02} bytes/inst, {} map lines, {} label lines, {} comment lines, {} code lines, {:02} code lines/inst, precompile_results={:?}", code.len(), rom.sorted_pc_list.len(), code.len() as f64 / rom.sorted_pc_list.len() as f64, map_label_lines_counter, @@ -3565,16 +3753,18 @@ impl ZiskRom2Asm { comment_lines_counter, code_lines_counter, code_lines_counter as f64 / rom.sorted_pc_list.len() as f64, + ctx.precompile_results ); } } fn operation_to_asm( ctx: &mut ZiskAsmContext, - opcode: u8, + inst: &ZiskInst, code: &mut String, unusual_code: &mut String, ) { + let opcode = inst.op; // Set flags to false, by default ctx.flag_is_always_one = false; ctx.flag_is_always_zero = false; @@ -5039,16 +5229,21 @@ impl ZiskRom2Asm { // Trace 25 memory read operations if ctx.mem_op() { *code += &format!("\tmov {REG_ADDRESS}, rdi\n"); - Self::mem_op_array(ctx, code, REG_ADDRESS, false, 8, 25); - Self::mem_op_array(ctx, code, REG_ADDRESS, true, 8, 25); + Self::mem_op_array(ctx, code, REG_ADDRESS, false, 25); + Self::mem_op_array(ctx, code, REG_ADDRESS, true, 25); } - // Call the keccak function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_keccak\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_keccak() { + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 25); + } else { + // Call the keccak function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_keccak\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5117,12 +5312,18 @@ impl ZiskRom2Asm { Self::mem_op_precompiled_read_and_write(ctx, code, 2, &[4, 8], 0, 0, 4); } - // Call the SHA256 function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_sha256\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_sha256() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 4); + } else { + // Call the SHA256 function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_sha256\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5154,6 +5355,201 @@ impl ZiskRom2Asm { ctx.c.is_saved = true; ctx.flag_is_always_zero = true; } + ZiskOp::Blake2 => { + // Use the memory address as the first and unique parameter + *code += &ctx.full_line_comment("Blake2: rdi = b".to_string()); + + if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() + { + // Use the memory address as the first and unique parameter + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rdi = b = address") + ); + + // Save data into mem_reads + if ctx.minimal_trace() || ctx.zip() || ctx.mem_reads() { + // If zip, check if chunk is active + if ctx.zip() { + *code += &format!( + "\ttest {}, 1 {}\n", + REG_ACTIVE_CHUNK, + ctx.comment_str("active_chunk == 1 ?") + ); + *code += &format!("\tjnz pc_{:x}_blake2_active_chunk\n", ctx.pc); + *code += &format!("\tjmp pc_{:x}_blake2_active_chunk_done\n", ctx.pc); + *code += &format!("pc_{:x}_blake2_active_chunk:\n", ctx.pc); + } + Self::precompiled_save_mem_reads(ctx, code, 3, &[0, 16, 16]); + if ctx.zip() { + *code += &format!("pc_{:x}_blake2_active_chunk_done:\n", ctx.pc); + } + } + + // Save memory operations into mem_reads + if ctx.mem_op() { + Self::mem_op_precompiled_read_and_write( + ctx, + code, + 3, + &[0, 16, 16], + 1, + 1, + 16, + ); + } + + // Get result from precompile results data + if ctx.precompile_results_blake2() { + *code += "\tmov rdi, [rdi+8]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 16); + } else { + // Call the Blake2 function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_blake2\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } + } + + // Consume mem reads + if ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tmov [{} + {}*8], {} {}\n", + REG_MEM_READS_ADDRESS, + REG_MEM_READS_SIZE, + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("Main[4] = precompiler data address") + ); + *code += &format!( + "\tinc {} {}\n", + REG_MEM_READS_SIZE, + ctx.comment_str("mem_reads_size++") + ); + } + if ctx.chunk_player_mt_collect_mem() || ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tadd {}, 35*8 {}\n", + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("chunk_address += 35*8") + ); + } + + // Set result + *code += + &format!("\txor {}, {} {}\n", REG_C, REG_C, ctx.comment_str("Blake2: c = 0")); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } + ZiskOp::Poseidon2 => { + // Use the memory address as the first and unique parameter + *code += &ctx.full_line_comment("Poseidon2: rdi = A0".to_string()); + + // Generate mem reads + if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() + { + // Use the memory address as the first and unique parameter + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rdi = b = address") + ); + + // Copy read data into mem_reads_address and advance it + if ctx.minimal_trace() || ctx.zip() || ctx.mem_reads() { + // If zip, check if chunk is active + if ctx.zip() { + *code += &format!( + "\ttest {}, 1 {}\n", + REG_ACTIVE_CHUNK, + ctx.comment_str("active_chunk == 1 ?") + ); + *code += &format!("\tjnz pc_{:x}_poseidon2_active_chunk\n", ctx.pc); + *code += + &format!("\tjmp pc_{:x}_poseidon2_active_chunk_done\n", ctx.pc); + *code += &format!("pc_{:x}_poseidon2_active_chunk:\n", ctx.pc); + } + *code += &format!("\tmov {REG_ADDRESS}, rdi\n"); + for k in 0..16 { + *code += &format!( + "\tmov {}, [{} + {}] {}\n", + REG_VALUE, + REG_ADDRESS, + k * 8, + ctx.comment(format!("value = mem[poseidon2_address[{k}]]")) + ); + *code += &format!( + "\tmov [{} + {}*8 + {}], {} {}\n", + REG_MEM_READS_ADDRESS, + REG_MEM_READS_SIZE, + k * 8, + REG_VALUE, + ctx.comment(format!("mem_reads[{k}] = value")) + ); + } + + // Increment chunk.steps.mem_reads_size in 16 units + *code += &format!( + "\tadd {}, 16 {}\n", + REG_MEM_READS_SIZE, + ctx.comment_str("mem_reads_size += 16") + ); + + if ctx.zip() { + *code += &format!("pc_{:x}_poseidon2_active_chunk_done:\n", ctx.pc); + } + } + + // Trace 16 memory read operations + if ctx.mem_op() { + *code += &format!("\tmov {REG_ADDRESS}, rdi\n"); + Self::mem_op_array(ctx, code, REG_ADDRESS, false, 16); + Self::mem_op_array(ctx, code, REG_ADDRESS, true, 16); + } + + // Call the poseidon2 function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_poseidon2\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } + + // Consume mem reads + if ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tmov [{} + {}*8], {} {}\n", + REG_MEM_READS_ADDRESS, + REG_MEM_READS_SIZE, + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("Main[4] = precompiler data address") + ); + *code += &format!( + "\tinc {} {}\n", + REG_MEM_READS_SIZE, + ctx.comment_str("mem_reads_size++") + ); + } + if ctx.chunk_player_mt_collect_mem() || ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tadd {}, 16*8 {}\n", + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("chunk_address += 16*8") + ); + } + + // Set result + *code += &format!( + "\txor {}, {} {}\n", + REG_C, + REG_C, + ctx.comment_str("Poseidon2: c = 0") + ); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } ZiskOp::PubOut => { assert!(ctx.store_b_in_c); ctx.c.is_constant = ctx.b.is_constant; @@ -5223,12 +5619,21 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the arith256 function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_arith256\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_arith256() { + *code += &format!("\tmov {REG_FLAG}, [rdi+3*8]\n"); + Self::precompile_results_array(ctx, code, unusual_code, REG_FLAG, 4); + *code += &format!("\tmov {REG_FLAG}, [rdi+4*8]\n"); + Self::precompile_results_array(ctx, code, unusual_code, REG_FLAG, 4); + *code += &format!("\tmov {REG_FLAG}, 0\n"); // Is this needed? + } else { + // Call the arith256 function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_arith256\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Set result @@ -5275,12 +5680,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the arith256_mod function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_arith256_mod\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_arith256mod() { + *code += "\tmov rdi, [rdi + 4*8]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 4); + } else { + // Call the arith256_mod function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_arith256_mod\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5350,12 +5761,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the secp256k1_add function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_secp256k1_add\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_secp256k1add() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the secp256k1_add function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_secp256k1_add\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5444,18 +5861,209 @@ impl ZiskRom2Asm { // Save memory operations into mem_reads if ctx.mem_op() { - Self::mem_op_array(ctx, code, "rdi", false, 8, 8); - Self::mem_op_array(ctx, code, "rdi", true, 8, 8); + Self::mem_op_array(ctx, code, "rdi", false, 8); + Self::mem_op_array(ctx, code, "rdi", true, 8); } if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the secp256k1_dbl function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_secp256k1_dbl\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_secp256k1dbl() { + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the secp256k1_dbl function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_secp256k1_dbl\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } + } + + // Consume mem reads + if ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tmov [{} + {}*8], {} {}\n", + REG_MEM_READS_ADDRESS, + REG_MEM_READS_SIZE, + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("Main[4] = precompiler data address") + ); + *code += &format!( + "\tinc {} {}\n", + REG_MEM_READS_SIZE, + ctx.comment_str("mem_reads_size++") + ); + } + if ctx.chunk_player_mt_collect_mem() || ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tadd {}, 8*8 {}\n", + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("chunk_address += 8*8") + ); + } + + // Set result + *code += &format!("\txor {}, {} {}\n", REG_C, REG_C, ctx.comment_str("c = 0")); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } + ZiskOp::Secp256r1Add => { + *code += &ctx.full_line_comment("Secp256r1Add".to_string()); + + // Use the memory address as the first and unique parameter + if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() + { + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rdi = b = address") + ); + } + + // Save data into mem_reads + if ctx.minimal_trace() || ctx.zip() || ctx.mem_reads() { + // If zip, check if chunk is active + if ctx.zip() { + *code += &format!( + "\ttest {}, 1 {}\n", + REG_ACTIVE_CHUNK, + ctx.comment_str("active_chunk == 1 ?") + ); + *code += &format!("\tjnz pc_{:x}_secp256r1add_active_chunk\n", ctx.pc); + *code += &format!("\tjmp pc_{:x}_secp256r1add_active_chunk_done\n", ctx.pc); + *code += &format!("pc_{:x}_secp256r1add_active_chunk:\n", ctx.pc); + } + Self::precompiled_save_mem_reads(ctx, code, 2, &[8, 8]); + if ctx.zip() { + *code += &format!("pc_{:x}_secp256r1add_active_chunk_done:\n", ctx.pc); + } + } + + // Save memory operations into mem_reads + if ctx.mem_op() { + Self::mem_op_precompiled_read_and_write(ctx, code, 2, &[8, 8], 0, 0, 8); + } + + if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() + { + // Get result from precompile results data + if ctx.precompile_results_secp256r1add() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the secp256r1_add function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_secp256r1_add\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } + } + + // Consume mem reads + if ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tmov [{} + {}*8], {} {}\n", + REG_MEM_READS_ADDRESS, + REG_MEM_READS_SIZE, + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("Main[4] = precompiler data address") + ); + *code += &format!( + "\tinc {} {}\n", + REG_MEM_READS_SIZE, + ctx.comment_str("mem_reads_size++") + ); + } + if ctx.chunk_player_mt_collect_mem() || ctx.chunk_player_mem_reads_collect_main() { + *code += &format!( + "\tadd {}, 18*8 {}\n", + REG_CHUNK_PLAYER_ADDRESS, + ctx.comment_str("chunk_address += 18*8") + ); + } + + // Set result + *code += &format!("\txor {}, {} {}\n", REG_C, REG_C, ctx.comment_str("c = 0")); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } + ZiskOp::Secp256r1Dbl => { + *code += &ctx.full_line_comment("Secp256r1Dbl".to_string()); + + // Use the memory address as the first and unique parameter + if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() + { + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rdi = b = address") + ); + } + + // Copy read data into mem_reads + if ctx.minimal_trace() || ctx.zip() || ctx.mem_reads() { + // If zip, check if chunk is active + if ctx.zip() { + *code += &format!( + "\ttest {}, 1 {}\n", + REG_ACTIVE_CHUNK, + ctx.comment_str("active_chunk == 1 ?") + ); + *code += &format!("\tjnz pc_{:x}_secp256r1dbl_active_chunk\n", ctx.pc); + *code += &format!("\tjmp pc_{:x}_secp256r1dbl_active_chunk_done\n", ctx.pc); + *code += &format!("pc_{:x}_secp256r1dbl_active_chunk:\n", ctx.pc); + } + *code += &format!("\tmov {REG_ADDRESS}, rdi\n"); + for k in 0..8 { + *code += &format!( + "\tmov {}, [{} + {}] {}\n", + REG_VALUE, + REG_ADDRESS, + k * 8, + ctx.comment(format!("value = mem[address[{k}]]")) + ); + *code += &format!( + "\tmov [{} + {}*8 + {}], {} {}\n", + REG_MEM_READS_ADDRESS, + REG_MEM_READS_SIZE, + k * 8, + REG_VALUE, + ctx.comment(format!("mem_reads[{k}] = value")) + ); + } + + // Increment chunk.steps.mem_reads_size in 8 units + *code += &format!( + "\tadd {}, 8 {}\n", + REG_MEM_READS_SIZE, + ctx.comment_str("mem_reads_size += 8") + ); + if ctx.zip() { + *code += &format!("pc_{:x}_secp256r1dbl_active_chunk_done:\n", ctx.pc); + } + } + + // Save memory operations into mem_reads + if ctx.mem_op() { + Self::mem_op_array(ctx, code, "rdi", false, 8); + Self::mem_op_array(ctx, code, "rdi", true, 8); + } + + if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() + { + // Get result from precompile results data + if ctx.precompile_results_secp256r1dbl() { + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the secp256r1_dbl function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_secp256r1_dbl\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5569,67 +6177,72 @@ impl ZiskRom2Asm { *code += &ctx.full_line_comment("Fcall".to_string()); assert!(ctx.store_b_in_c); + if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Store a (function id) in context - assert!(ctx.a.is_constant); - *code += &format!( - "\tmov qword {}[{} + {}*8], {} {}\n", - ctx.ptr, - ctx.fcall_ctx, - FCALL_FUNCTION_ID, - ctx.a.constant_value, - ctx.comment_str("ctx.function id = a") - ); + // Get result from precompile results data + if ctx.a.constant_value == FCALL_INPUT_READY_ID as u64 { + Self::wait_for_input_ready(ctx, code, unusual_code); + } else { + // Store a (function id) in context + assert!(ctx.a.is_constant); + *code += &format!( + "\tmov qword {}[{} + {}*8], {} {}\n", + ctx.ptr, + ctx.fcall_ctx, + FCALL_FUNCTION_ID, + ctx.a.constant_value, + ctx.comment_str("ctx.function id = a") + ); - // Set the fcall context address as the first parameter - *code += &format!( - "\tlea rdi, {} {}\n", - ctx.fcall_ctx, - ctx.comment_str("rdi = fcall context") - ); + // Set the fcall context address as the first parameter + *code += &format!( + "\tlea rdi, {} {}\n", + ctx.fcall_ctx, + ctx.comment_str("rdi = fcall context") + ); + // Get result from precompile results data + if ctx.precompile_results_fcall() { + Self::precompile_results_fcall(ctx, code, unusual_code, "rdi"); + } else { + // Call the fcall function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_fcall\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } - // Call the fcall function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_fcall\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // If ctx.result_size == 0 => free_input = 0 + *code += &format!( + "\tmov {}, qword {}[{} + {}*8] {}\n", + REG_AUX, + ctx.ptr, + ctx.fcall_ctx, + FCALL_RESULT_SIZE, + ctx.comment_str("aux = ctx.result_size") + ); + *code += &format!("\tcmp {REG_AUX}, 0\n"); + *code += &format!("\tjz pc_{:x}_fcall_result_zero\n", ctx.pc); - // Get free input address - *code += &format!( - "\tmov {}, {} {}\n", - REG_ADDRESS, - FREE_INPUT_ADDR, - ctx.comment_str("address = free_input") - ); + // Copy ctx.result[0] to free input address + *code += &format!( + "\tmov {}, qword {}[{} + {}*8] {}\n", + REG_VALUE, + ctx.ptr, + ctx.fcall_ctx, + FCALL_RESULT, + ctx.comment_str("value = ctx.result[0]") + ); + *code += &format!( + "\tmov {}, {} {}\n", + ctx.mem_free_input, + REG_VALUE, + ctx.comment_str("free_input = value") + ); + *code += &format!("\tjmp pc_{:x}_fcall_result_done\n", ctx.pc); + } - // Copy ctx.result[0] or 0 into free input - *code += &format!( - "\tmov {}, qword {}[{} + {}*8] {}\n", - REG_AUX, - ctx.ptr, - ctx.fcall_ctx, - FCALL_RESULT_SIZE, - ctx.comment_str("aux = ctx.result_size") - ); - *code += &format!("\tcmp {REG_AUX}, 0\n"); - *code += &format!("\tjz pc_{:x}_fcall_result_zero\n", ctx.pc); - *code += &format!( - "\tmov {}, qword {}[{} + {}*8] {}\n", - REG_VALUE, - ctx.ptr, - ctx.fcall_ctx, - FCALL_RESULT, - ctx.comment_str("value = ctx.result[0]") - ); - *code += &format!( - "\tmov {}, {} {}\n", - ctx.mem_free_input, - REG_VALUE, - ctx.comment_str("free_input = value") - ); - *code += &format!("\tjmp pc_{:x}_fcall_result_done\n", ctx.pc); *code += &format!("pc_{:x}_fcall_result_zero:\n", ctx.pc); *code += &format!( "\tmov {}, 0 {}\n", @@ -5741,12 +6354,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bn254_curve_add function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bn254_curve_add\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bn254curveadd() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the bn254_curve_add function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bn254_curve_add\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5836,18 +6455,23 @@ impl ZiskRom2Asm { // Save memory operations into mem_reads if ctx.mem_op() { - Self::mem_op_array(ctx, code, "rdi", false, 8, 8); - Self::mem_op_array(ctx, code, "rdi", true, 8, 8); + Self::mem_op_array(ctx, code, "rdi", false, 8); + Self::mem_op_array(ctx, code, "rdi", true, 8); } if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bn254_curve_dbl function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bn254_curve_dbl\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bn254curvedbl() { + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the bn254_curve_dbl function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bn254_curve_dbl\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5918,12 +6542,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bn254_complex_add function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bn254_complex_add\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bn254complexadd() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the bn254_complex_add function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bn254_complex_add\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -5994,12 +6624,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bn254_complex_sub function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bn254_complex_sub\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bn254complexsub() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the bn254_complex_sub function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bn254_complex_sub\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6070,12 +6706,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bn254_complex_mul function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bn254_complex_mul\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bn254complexmul() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 8); + } else { + // Call the bn254_complex_mul function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bn254_complex_mul\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6154,12 +6796,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the arith384_mod function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_arith384_mod\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_arith384mod() { + *code += "\tmov rdi, [rdi + 4*8]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 6); + } else { + // Call the arith384_mod function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_arith384_mod\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6230,12 +6878,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bls12_381_curve_add function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bls12_381_curve_add\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bls12_381curveadd() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 12); + } else { + // Call the bls12_381_curve_add function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bls12_381_curve_add\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6325,18 +6979,23 @@ impl ZiskRom2Asm { // Save memory operations into mem_reads if ctx.mem_op() { - Self::mem_op_array(ctx, code, "rdi", false, 8, 12); - Self::mem_op_array(ctx, code, "rdi", true, 8, 12); + Self::mem_op_array(ctx, code, "rdi", false, 12); + Self::mem_op_array(ctx, code, "rdi", true, 12); } if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bls12_381_curve_dbl function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bls12_381_curve_dbl\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bls12_381curvedbl() { + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 12); + } else { + // Call the bls12_381_curve_dbl function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bls12_381_curve_dbl\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6411,12 +7070,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bls12_381_complex_add function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bls12_381_complex_add\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bls12_381complexadd() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 12); + } else { + // Call the bls12_381_complex_add function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bls12_381_complex_add\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6491,12 +7156,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bls12_381_complex_sub function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bls12_381_complex_sub\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bls12_381complexsub() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 12); + } else { + // Call the bls12_381_complex_sub function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bls12_381_complex_sub\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6571,12 +7242,18 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the bls12_381_complex_mul function - Self::push_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_bls12_381_complex_mul\n"; - Self::pop_internal_registers(ctx, code, false); - //Self::assert_rsp_is_aligned(ctx, code); + // Get result from precompile results data + if ctx.precompile_results_bls12_381complexmul() { + *code += "\tmov rdi, [rdi]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 12); + } else { + // Call the bls12_381_complex_mul function + Self::push_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_bls12_381_complex_mul\n"; + Self::pop_internal_registers(ctx, code, false); + //Self::assert_rsp_is_aligned(ctx, code); + } } // Consume mem reads @@ -6669,14 +7346,19 @@ impl ZiskRom2Asm { if !ctx.chunk_player_mt_collect_mem() && !ctx.chunk_player_mem_reads_collect_main() { - // Call the add256 function - Self::push_internal_registers_except_c_and_flag(ctx, code, false); - // Self::assert_rsp_is_aligned(ctx, code); - *code += "\tcall _opcode_add256\n"; - *code += &format!("\tmov {}, rax {}\n", REG_C, ctx.comment_str("c = rax")); - *code += - &format!("\tmov {}, rax {}\n", REG_FLAG, ctx.comment_str("flag = rax")); - Self::pop_internal_registers_except_c_and_flag(ctx, code, false); + // Get result from precompile results data + if ctx.precompile_results_add256() { + *code += "\tmov rdi, [rdi+3*8]\n"; + Self::precompile_results_array(ctx, code, unusual_code, "rdi", 4); + Self::precompile_results_register(ctx, code, unusual_code, REG_C); + } else { + // Call the add256 function + Self::push_internal_registers_except_c_and_flag(ctx, code, false); + // Self::assert_rsp_is_aligned(ctx, code); + *code += "\tcall _opcode_add256\n"; + *code += &format!("\tmov {}, rax {}\n", REG_C, ctx.comment_str("c = rax")); + Self::pop_internal_registers_except_c_and_flag(ctx, code, false); + } // this precompiles store the result in minimal trace if ctx.minimal_trace() || ctx.zip() || ctx.mem_reads() { @@ -6690,7 +7372,235 @@ impl ZiskRom2Asm { // Set result ctx.c.is_saved = true; - ctx.flag_is_always_zero = false; + ctx.flag_is_always_zero = true; + } + ZiskOp::DmaMemCpy | ZiskOp::DmaXMemCpy => { + // Use the memory address as the first and unique parameter + *code += &ctx.full_line_comment("DmaMemCpy".to_string()); + + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.a.string_value, + ctx.comment_str("rdi = a = destination") + ); + *code += &format!( + "\tmov rsi, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rsi = b = source") + ); + if inst.op == ZiskOp::DMA_MEMCPY { + *code += &format!( + "\tmov rdx, 0x{:08x} {}\n", + EXTRA_PARAMS_ADDR, + ctx.comment_str("rdx = @EXTERN_PARAM") + ); + *code += + &format!("\tmov rdx, [rdx] {}\n", ctx.comment_str("rdx = [EXTERN_PARAM]")); + } else { + // DMA_XMEMCPY (take count from extended static param) + *code += &format!( + "\tmov rdx, {} {}\n", + inst.jmp_offset1, + ctx.comment(format!("rdx = {}", inst.jmp_offset1)) + ); + } + + assert_eq!(REG_MEM_READS_ADDRESS, "r12"); + assert_eq!(REG_MEM_READS_SIZE, "r13"); + + match ctx.mode { + AsmGenerationMethod::AsmMinimalTraces => { + // the number of mem_reads of trace used by memcpy could be + // large, need to control the count of each operation, and + // if it's necessary call to increase minimal trace + *code += "\tcall direct_dma_memcpy_mtrace_with_count_check\n"; + } + AsmGenerationMethod::AsmRomHistogram => { + // ROM hasn't a variable trace, only multiplicities + *code += "\tcall dma_memcpy_fast\n"; + } + AsmGenerationMethod::AsmMemOp => { + // the maximum number of mops of memcpy is limited because in case of range + // of address only send one address as block. The maximum number of mops + // generated by memcpy is 6, means that no need check size. + // 2 pre-reads + 2 src-reads + 1 src-loop + 1 write block = 6 mops + *code += "\tcall direct_dma_memcpy_mops\n"; + } + _ => unimplemented!("dma_memcpy not implemented for method {:?}", ctx.mode), + } + + // Set result + *code += &format!("\tmov {}, rax {}\n", REG_C, ctx.comment_str("c = rax")); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } + ZiskOp::DmaMemCmp | ZiskOp::DmaXMemCmp => { + // Use the memory address as the first and unique parameter + *code += &ctx.full_line_comment("DmaMemCmp".to_string()); + + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.a.string_value, + ctx.comment_str("rdi = a = destination") + ); + *code += &format!( + "\tmov rsi, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rsi = b = source") + ); + if inst.op == ZiskOp::DMA_MEMCMP { + *code += &format!( + "\tmov rdx, 0x{:08x} {}\n", + EXTRA_PARAMS_ADDR, + ctx.comment_str("rdx = @EXTERN_PARAM") + ); + *code += + &format!("\tmov rdx, [rdx] {}\n", ctx.comment_str("rdx = [EXTERN_PARAM]")); + } else { + // DMA_XMEMCMP (take count from extended static param) + *code += &format!( + "\tmov rdx, {} {}\n", + inst.jmp_offset1, + ctx.comment(format!("rdx = {}", inst.jmp_offset1)) + ); + } + + assert_eq!(REG_MEM_READS_ADDRESS, "r12"); + assert_eq!(REG_MEM_READS_SIZE, "r13"); + + match ctx.mode { + AsmGenerationMethod::AsmMinimalTraces => { + // the number of mem_reads of trace used by memcpy could be + // large, need to control the count of each operation, and + // if it's necessary call to increase minimal trace + *code += "\tcall direct_dma_memcmp_mtrace\n"; + } + AsmGenerationMethod::AsmRomHistogram => { + // ROM hasn't a variable trace, only multiplicities + *code += "\tcall fast_memcmp\n"; + } + AsmGenerationMethod::AsmMemOp => { + // the maximum number of mops of memcpy is limited because in case of range + // of address only send one address as block. The maximum number of mops + // generated by memcmp is 6, means that no need check size. + // 2 pre-reads + 2 src-reads + 1 src-loop + 1 read block = 6 mops + *code += "\tcall direct_dma_memcmp_mops\n"; + } + _ => unimplemented!("dma_memcmp not implemented for method {:?}", ctx.mode), + } + + // Set result + *code += &format!( + "\tmov {}, rax {}\n", + REG_C, + ctx.comment_str("c = rax (result memcmp)") + ); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } + ZiskOp::DmaInputCpy => { + // Use the memory address as the first and unique parameter + *code += &ctx.full_line_comment("DmaInputCpy".to_string()); + + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.a.string_value, + ctx.comment_str("rdi = a = destination") + ); + *code += &format!( + "\tmov rdx, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rdx = b = count") + ); + + assert_eq!(REG_MEM_READS_ADDRESS, "r12"); + assert_eq!(REG_MEM_READS_SIZE, "r13"); + + match ctx.mode { + AsmGenerationMethod::AsmMinimalTraces => { + // the number of mem_reads of trace used by memcpy could be + // large, need to control the count of each operation, and + // if it's necessary call to increase minimal trace + *code += "\tcall direct_dma_inputcpy_mtrace_with_count_check\n"; + } + AsmGenerationMethod::AsmRomHistogram => { + // ROM hasn't a variable trace, only multiplicities + *code += "\tcall fast_inputcpy\n"; + } + AsmGenerationMethod::AsmMemOp => { + // the maximum number of mops of memcpy is limited because in case of range + // of address only send one address as block. The maximum number of mops + // generated by memcpy is 6, means that no need check size. + // 2 pre-reads + 2 src-reads + 1 src-loop + 1 write block = 6 mops + *code += "\tcall direct_dma_inputcpy_mops\n"; + } + _ => unimplemented!("dma_inputcpy not implemented for method {:?}", ctx.mode), + } + + // Set result + *code += &format!("\tmov {}, rax {}\n", REG_C, ctx.comment_str("c = rax")); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } + ZiskOp::DmaXMemSet => { + // Use the memory address as the first and unique parameter + *code += &ctx.full_line_comment("DmaXMemSet".to_string()); + + *code += &format!( + "\tmov rdi, {} {}\n", + ctx.a.string_value, + ctx.comment_str("rdi = a = destination") + ); + *code += &format!( + "\tmov rdx, {} {}\n", + ctx.b.string_value, + ctx.comment_str("rdx = b = source") + ); + *code += &format!( + "\tmov rsi, {} {}\n", + inst.jmp_offset1, + ctx.comment(format!("rsi = {}", inst.jmp_offset1)) + ); + assert_eq!(REG_MEM_READS_ADDRESS, "r12"); + assert_eq!(REG_MEM_READS_SIZE, "r13"); + + match ctx.mode { + AsmGenerationMethod::AsmMinimalTraces => { + // the number of mem_reads of trace used by memcpy could be + // large, need to control the count of each operation, and + // if it's necessary call to increase minimal trace + *code += "\tcall direct_dma_xmemset_mtrace\n"; + } + AsmGenerationMethod::AsmRomHistogram => { + // ROM hasn't a variable trace, only multiplicities + *code += "\tcall fast_memset\n"; + } + AsmGenerationMethod::AsmMemOp => { + // the maximum number of mops of memcpy is limited because in case of range + // of address only send one address as block. The maximum number of mops + // generated by memcpy is 6, means that no need check size. + // 2 pre-reads + 2 src-reads + 1 src-loop + 1 write block = 6 mops + *code += "\tcall direct_dma_xmemset_mops\n"; + } + _ => unimplemented!("dma_memset not implemented for method {:?}", ctx.mode), + } + + // Set result + *code += &format!("\tmov {}, rax {}\n", REG_C, ctx.comment_str("c = rax")); + ctx.c.is_saved = true; + ctx.flag_is_always_zero = true; + } + ZiskOp::Dma64Aligned => { + unimplemented!("Internal opcode Dma64Aligned"); + } + ZiskOp::DmaUnaligned => { + unimplemented!("Internal opcode DmaUnaligned"); + } + ZiskOp::DmaPre => { + unimplemented!("Internal opcode DmaPre"); + } + ZiskOp::DmaPost => { + unimplemented!("Internal opcode DmaPost"); } } } @@ -7193,37 +8103,32 @@ impl ZiskRom2Asm { /* MEMORY OPERATIONS */ /*********************/ - fn a_src_mem_op(ctx: &mut ZiskAsmContext, code: &mut String) { + fn src_read_mops(ctx: &mut ZiskAsmContext, code: &mut String) { // Calculate the trace value on top of the address - const WIDTH: u64 = 8; if ctx.address_is_constant { *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_ADDRESS, - (WIDTH << F_MEM_WIDTH_SHIFT) | ctx.address_constant_value, + "\tmov {REG_ADDRESS}, 0x{:x} {}\n", + if ctx.address_constant_value & 0x07 == 0 { + F_MOPS_ALIGNED_READ + } else { + F_MOPS_READ_8 + } + ctx.address_constant_value, ctx.comment_str("aux = constant mem op") ); } else { *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - WIDTH << F_MEM_WIDTH_SHIFT, + "\tmov {REG_AUX}, 0x{F_MOPS_READ_8:x} {}\n", ctx.comment_str("aux = mem op mask") ); *code += &format!( - "\tor {}, {} {}\n", - REG_ADDRESS, - REG_AUX, + "\tor {REG_ADDRESS}, {REG_AUX} {}\n", ctx.comment_str("address |= mem op mask") ); } // Copy read data into mem_reads_address and increment it *code += &format!( - "\tmov [{} + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - REG_ADDRESS, + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8], {REG_ADDRESS} {}\n", ctx.comment_str("mem_reads[@+size*8] = mem op") ); @@ -7231,126 +8136,83 @@ impl ZiskRom2Asm { *code += &format!("\tinc {} {}\n", REG_MEM_READS_SIZE, ctx.comment_str("mem_reads_size++")); } - fn b_src_mem_op(ctx: &mut ZiskAsmContext, code: &mut String) { - // Calculate the trace value on top of the address - const WIDTH: u64 = 8; + fn b_src_ind_mops(ctx: &mut ZiskAsmContext, code: &mut String, reg_address: &str, width: u64) { if ctx.address_is_constant { + let mops = if width == 8 && ctx.address_constant_value & 0x07 == 0 { + F_MOPS_ALIGNED_READ + ctx.address_constant_value + } else { + ctx.address_constant_value + + match width { + 1 => F_MOPS_READ_1, + 2 => F_MOPS_READ_2, + 4 => F_MOPS_READ_4, + 8 => F_MOPS_READ_8, + _ => panic!("Invalid width"), + } + }; *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_ADDRESS, - (WIDTH << F_MEM_WIDTH_SHIFT) | ctx.address_constant_value, + "\tmov {reg_address}, 0x{mops:x} {}\n", ctx.comment_str("aux = constant mem op") ); } else { // Calculate the trace value on top of the address - *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - WIDTH << F_MEM_WIDTH_SHIFT, - ctx.comment_str("aux = mem op mask") - ); - *code += &format!( - "\tor {}, {} {}\n", - REG_ADDRESS, - REG_AUX, - ctx.comment_str("address |= mem op mask") - ); - } - - // Copy read data into mem_reads_address and increment it - *code += &format!( - "\tmov [{} + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - REG_ADDRESS, - ctx.comment_str("mem_reads[@+size*8] = mem op") - ); - - // Increment chunk.steps.mem_reads_size - *code += &format!("\tinc {} {}\n", REG_MEM_READS_SIZE, ctx.comment_str("mem_reads_size++")); - } + let mops = match width { + 1 => F_MOPS_READ_1, + 2 => F_MOPS_READ_2, + 4 => F_MOPS_READ_4, + 8 => F_MOPS_READ_8, + _ => panic!("Invalid width"), + }; - fn b_src_ind_mem_op( - ctx: &mut ZiskAsmContext, - code: &mut String, - reg_address: &str, - width: u64, - ) { - if ctx.address_is_constant { - *code += &format!( - "\tmov {}, 0x{:x} {}\n", - reg_address, - (width << F_MEM_WIDTH_SHIFT) + ctx.address_constant_value, - ctx.comment_str("aux = constant mem op") - ); - } else { - // Calculate the trace value on top of the address - *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - width << F_MEM_WIDTH_SHIFT, - ctx.comment_str("aux = mem op mask") - ); + *code += + &format!("\tmov {REG_AUX}, 0x{mops:x} {}\n", ctx.comment_str("aux = mem op mask")); *code += &format!( - "\tor {}, {} {}\n", - reg_address, - REG_AUX, + "\tor {reg_address}, {REG_AUX} {}\n", ctx.comment_str("address |= mem op mask") ); } // Copy read data into mem_reads_address and increment it *code += &format!( - "\tmov [{} + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - reg_address, + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8], {reg_address} {}\n", ctx.comment_str("mem_reads[@+size*8] = mem op") ); // Increment chunk.steps.mem_reads_size - *code += &format!("\tinc {} {}\n", REG_MEM_READS_SIZE, ctx.comment_str("mem_reads_size++")); + *code += &format!("\tinc {REG_MEM_READS_SIZE} {}\n", ctx.comment_str("mem_reads_size++")); } fn c_store_mem_mem_op(ctx: &mut ZiskAsmContext, code: &mut String) { // Calculate the trace value on top of the address - const WRITE: u64 = 1; - const WIDTH: u64 = 8; if ctx.address_is_constant { *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_ADDRESS, - (WRITE << F_MEM_WRITE_SHIFT) - + (WIDTH << F_MEM_WIDTH_SHIFT) - + ctx.address_constant_value, + "\tmov {REG_ADDRESS}, 0x{:x} {}\n", + if ctx.address_constant_value & 0x07 == 0 { + F_MOPS_ALIGNED_WRITE + } else { + F_MOPS_WRITE_8 + } + ctx.address_constant_value, ctx.comment_str("aux = constant mem op") ); } else { *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - (WRITE << F_MEM_WRITE_SHIFT) + (WIDTH << F_MEM_WIDTH_SHIFT), + "\tmov {REG_AUX}, 0x{F_MOPS_WRITE_8:x} {}\n", ctx.comment_str("aux = mem op mask") ); *code += &format!( - "\tor {}, {} {}\n", - REG_ADDRESS, - REG_AUX, + "\tor {REG_ADDRESS}, {REG_AUX} {}\n", ctx.comment_str("address |= mem op mask") ); } // Copy read data into mem_reads_address and increment it *code += &format!( - "\tmov [{} + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - REG_ADDRESS, + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8], {REG_ADDRESS} {}\n", ctx.comment_str("mem_reads[@+size*8] = mem op") ); // Increment chunk.steps.mem_reads_size - *code += &format!("\tinc {} {}\n", REG_MEM_READS_SIZE, ctx.comment_str("mem_reads_size++")); + *code += &format!("\tinc {REG_MEM_READS_SIZE} {}\n", ctx.comment_str("mem_reads_size++")); } fn c_store_ind_mem_op(ctx: &mut ZiskAsmContext, code: &mut String, width: u64) { @@ -7358,55 +8220,59 @@ impl ZiskRom2Asm { // With this information, the mem_planner can use a specific state machine for // this kind of byte writes if width == 1 { - *code += &format!("\tmov {}, {} {}\n", REG_VALUE, REG_C, ctx.comment_str("value = c")); + *code += &format!("\tmov {REG_VALUE}, {REG_C} {}\n", ctx.comment_str("value = c")); } // Calculate the fixed trace value adding write (bit 36) and width (bits 32-35) on top of // the address if ctx.address_is_constant { + let mops = if width == 8 && ctx.address_constant_value & 0x07 == 0 { + F_MOPS_ALIGNED_WRITE + ctx.address_constant_value + } else { + ctx.address_constant_value + + match width { + 1 => F_MOPS_WRITE_1, + 2 => F_MOPS_WRITE_2, + 4 => F_MOPS_WRITE_4, + 8 => F_MOPS_WRITE_8, + _ => panic!("Invalid width"), + } + }; *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_ADDRESS, - F_MEM_WRITE | (width << F_MEM_WIDTH_SHIFT) | ctx.address_constant_value, + "\tmov {REG_ADDRESS}, 0x{mops:x} {}\n", ctx.comment_str("aux = constant mem op") ); } else { + let mops = match width { + 1 => F_MOPS_WRITE_1, + 2 => F_MOPS_WRITE_2, + 4 => F_MOPS_WRITE_4, + 8 => F_MOPS_WRITE_8, + _ => panic!("Invalid width"), + }; + *code += + &format!("\tmov {REG_AUX}, 0x{mops:x} {}\n", ctx.comment_str("aux = mem op mask")); *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - F_MEM_WRITE | (width << F_MEM_WIDTH_SHIFT), - ctx.comment_str("aux = mem op mask") - ); - *code += &format!( - "\tor {}, {} {}\n", - REG_ADDRESS, - REG_AUX, + "\tor {REG_ADDRESS}, {REG_AUX} {}\n", ctx.comment_str("address |= mem op mask") ); } // Dynamic trace value: if rest of bytes were zero, set flag on bit F_MEM_CLEAR_WRITE_BYTE if width == 1 { - *code += &format!( - "\tshr {}, 8 {}\n", - REG_VALUE, - ctx.comment_str("value & 0xFFFFFF00 == 0 ?") - ); + *code += + &format!("\tshr {REG_VALUE}, 8 {}\n", ctx.comment_str("value & 0xFFFFFF00 == 0 ?")); *code += &format!( "\tjnz pc_{}_rest_of_bytes_not_zero {}\n", ctx.pc, ctx.comment_str("aux & 0xFFFFFF00 != 0 ?") ); *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - F_MEM_CLEAR_WRITE_BYTE, + "\tmov {REG_AUX}, 0x{F_MOPS_CLEAR_WRITE_BYTE:x} {}\n", ctx.comment_str("aux = F_MEM_CLEAR_WRITE_BYTE") ); *code += &format!( - "\tor {}, {} {}\n", - REG_ADDRESS, - REG_AUX, + "\tor {REG_ADDRESS}, {REG_AUX} {}\n", ctx.comment_str("address |= F_MEM_CLEAR_WRITE_BYTE") ); *code += &format!("\npc_{}_rest_of_bytes_not_zero:\n", ctx.pc); @@ -7414,76 +8280,49 @@ impl ZiskRom2Asm { // Copy read data into mem_reads_address and increment it *code += &format!( - "\tmov [{} + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - REG_ADDRESS, + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8], {REG_ADDRESS} {}\n", ctx.comment_str("mem_reads[@+size*8] = mem op") ); // Increment chunk.steps.mem_reads_size - *code += &format!("\tinc {} {}\n", REG_MEM_READS_SIZE, ctx.comment_str("mem_reads_size++")); + *code += &format!("\tinc {REG_MEM_READS_SIZE} {}\n", ctx.comment_str("mem_reads_size++")); } fn mem_op_array( ctx: &mut ZiskAsmContext, code: &mut String, reg_address: &str, - _write: bool, - width: u64, + write: bool, length: u64, ) { - let write: u64 = if _write { 1 } else { 0 }; - let mem_op_mask: u64 = (write << F_MEM_WRITE_SHIFT) | (width << F_MEM_WIDTH_SHIFT); + let mops_mask: u64 = if length > 1 { + // compress operation in one single block + (if write { F_MOPS_BLOCK_WRITE } else { F_MOPS_BLOCK_READ }) + | (length << F_MOPS_BLOCK_LENGTH_SHIFT) + } else if write { + F_MOPS_WRITE_8 + } else { + F_MOPS_READ_8 + }; - // Get a copy of the address register + // Load mask the mask *code += &format!( - "\tmov {}, {} {}\n", - REG_VALUE, - reg_address, - ctx.comment_str("value = address") + "\tmov {REG_VALUE}, 0x{mops_mask:x} {}\n", + ctx.comment_str("value = mem op mask") ); - // Calculate the mask - *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - mem_op_mask, - ctx.comment_str("aux = mem op mask + offset") - ); + // Get a copy of the address register + *code += + &format!("\tadd {REG_VALUE}, {reg_address} {}\n", ctx.comment_str("value = address")); - // Add the mask to the address *code += &format!( - "\tadd {}, {} {}\n", - REG_VALUE, - REG_AUX, - ctx.comment_str("value |= mem op mask") + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8], {REG_VALUE} {}\n", + ctx.comment_str("mem_reads[@+size*8] = mem op") ); - // Iterate for all memory operations - for i in 0..length { - // Copy read data into mem_reads_address and increment it - *code += &format!( - "\tmov [{} + {}*8 + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - i, - REG_VALUE, - ctx.comment_str("mem_reads[@+size*8] = mem op") - ); - - if i != (length - 1) { - // Get a copy of the address register - *code += &format!("\tadd {}, 8 {}\n", REG_VALUE, ctx.comment_str("value += 8")); - } - } // Increment chunk.steps.mem_reads_size - *code += &format!( - "\tadd {}, {} {}\n", - REG_MEM_READS_SIZE, - length, - ctx.comment_str("mem_reads_size += length") - ); + *code += + &format!("\tinc {REG_MEM_READS_SIZE} {}\n", ctx.comment_str("mem_reads_size += 1")); } fn internal_mem_op_precompiled_read( @@ -7493,105 +8332,81 @@ impl ZiskRom2Asm { load_sizes: &[usize], update_index: bool, ) -> u64 { - // Calculate the mask - let mem_op_mask: u64 = 8u64 << 32; - // This index will be incremented as we insert data into mem_reads let mut mem_reads_index: u64 = 0; // We get a copy of the precompiled data address - *code += &format!("\tmov {}, rdi {}\n", REG_ADDRESS, ctx.comment_str("address = rdi")); - - for i in 0..params_count { - // Store next aligned address value in mem_reads, and advance it - *code += &format!( - "\tmov {}, [{} + {}*8] {}\n", - REG_VALUE, - REG_ADDRESS, - i, - ctx.comment(format!("value = mem[address+{i}]")) - ); + *code += &format!("\tmov {REG_ADDRESS}, rdi {}\n", ctx.comment_str("address = rdi")); - // Load the mask + offset + if params_count > 0 { *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - mem_op_mask + 8 * i, - ctx.comment_str("aux = mem op mask + offset") + "\tmov {REG_AUX}, 0x{:x} {}\n", + F_MOPS_BLOCK_READ | (params_count << F_MOPS_BLOCK_LENGTH_SHIFT), + ctx.comment_str(&format!("aux = MOPS_BLOCK_READ({})", params_count)) ); // Add the address - *code += &format!( - "\tadd {}, {} {}\n", - REG_AUX, - REG_ADDRESS, - ctx.comment_str("aux += address") - ); + *code += + &format!("\tadd {REG_AUX}, {REG_ADDRESS} {}\n", ctx.comment_str("aux += address")); // Store it in the trace *code += &format!( - "\tmov [{} + {}*8 + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - mem_reads_index, - REG_AUX, + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8 + {mem_reads_index}*8], {REG_AUX} {}\n", ctx.comment_str("mem_reads[@+size*8+ind*8] = mem_op") ); mem_reads_index += 1; } + let mut previous_size = 0; + for (i, size) in load_sizes.iter().enumerate() { // Store next aligned address value in mem_reads, and advance it *code += &format!( - "\tmov {}, [{} + {}*8] {}\n", - REG_VALUE, - REG_ADDRESS, - i, + "\tmov {REG_VALUE}, [{REG_ADDRESS} + {i}*8] {}\n", ctx.comment(format!("value = mem[address+{i}]")) ); - // Store the first load_count iterations - // load_size elements in mem_reads - - // For each chunk of the indirection, store it in mem_reads - for l in 0..*size { - // Load the mask + offset + // if previous_size = size, means that REG_AUX has the correct value + // and not need to generate again + if previous_size != *size { *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - mem_op_mask + 8 * (l as u64), - ctx.comment_str("aux = mem op mask + offset") + "\tmov {REG_AUX}, 0x{:x} {}\n", + F_MOPS_BLOCK_READ | ((*size as u64) << F_MOPS_BLOCK_LENGTH_SHIFT), + ctx.comment(format!("aux = MOPS_BLOCK_READ({})", size)) ); + previous_size = *size; + } + + // Store a block with all consecutive mem_reads + + // Add the mask over the reg_value to reuse mops_mask (reg_aux) if width is the + // same of last previous parameter + + *code += + &format!("\tadd {REG_VALUE}, {REG_AUX} {}\n", ctx.comment_str("value += aux ")); - // Add the address + // Store it in the trace + *code += &format!( + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8 + {mem_reads_index}*8], {REG_VALUE} {}\n", + ctx.comment_str("mem_reads[@+size*8+ind*8] = mops") + ); + + mem_reads_index += 1; + } + if update_index && mem_reads_index > 0 { + // Increment chunk.steps.mem_reads_size + if mem_reads_index == 1 { *code += &format!( - "\tadd {}, {} {}\n", - REG_AUX, - REG_VALUE, - ctx.comment_str("aux += address") + "\tinc {REG_MEM_READS_SIZE}, {}\n", + ctx.comment_str("mem_reads_size+=1") ); - - // Store it in the trace + } else { *code += &format!( - "\tmov [{} + {}*8 + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - mem_reads_index, - REG_AUX, - ctx.comment_str("mem_reads[@+size*8+ind*8] = mem_op") + "\tadd {REG_MEM_READS_SIZE}, {mem_reads_index} {}\n", + ctx.comment(format!("mem_reads_size+={mem_reads_index}")) ); - mem_reads_index += 1; } } - if update_index { - // Increment chunk.steps.mem_reads_size - *code += &format!( - "\tadd {}, {} {}\n", - REG_MEM_READS_SIZE, - mem_reads_index, - ctx.comment(format!("mem_reads_size+={mem_reads_index}")) - ); - } mem_reads_index } @@ -7618,80 +8433,430 @@ impl ZiskRom2Asm { load_size: u64, initial_mem_reads_index: u64, ) { - // Calculate the mask - let mem_op_mask: u64 = F_MEM_WRITE + (8u64 << F_MEM_WIDTH_SHIFT); - // This index will be incremented as we insert data into mem_reads let mut mem_reads_index: u64 = initial_mem_reads_index; if initial_mem_reads_index == 0 { // We get a copy of the precompiled data address - *code += &format!("\tmov {}, rdi {}\n", REG_ADDRESS, ctx.comment_str("address = rdi")); + *code += &format!("\tmov {REG_ADDRESS}, rdi {}\n", ctx.comment_str("address = rdi")); + } + if begin <= end { + // Load the mask + offset + *code += &format!( + "\tmov {REG_AUX}, 0x{:x} {}\n", + F_MOPS_BLOCK_WRITE | (load_size << F_MOPS_BLOCK_LENGTH_SHIFT), + ctx.comment(format!("aux = BLOCK_WRITE({})", load_size)) + ); } + // For every parameter for i in begin..=end { // Store next aligned address value in mem_reads, and advance it *code += &format!( - "\tmov {}, [{} + {}*8] {}\n", - REG_VALUE, - REG_ADDRESS, - i, + "\tmov {REG_VALUE}, [{REG_ADDRESS} + {i}*8] {}\n", ctx.comment(format!("value = mem[address+{i}]")) ); - // For each of the indirection parameter, store it in mem_reads - for l in 0..load_size { - // Load the mask + offset - *code += &format!( - "\tmov {}, 0x{:x} {}\n", - REG_AUX, - mem_op_mask + 8 * l, - ctx.comment_str("aux = mem op mask + offset") + // Add the address + *code += + &format!("\tadd {REG_VALUE}, {REG_AUX} {}\n", ctx.comment_str("value += address")); + + // Store it in the trace + *code += &format!( + "\tmov [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8 + {mem_reads_index}*8], {REG_VALUE} {}\n", + ctx.comment_str("mem_reads[@+size*8+ind*8] = value (mops)") ); + mem_reads_index += 1; + } - // Add the address + // Increment chunk.steps.mem_reads_size + if mem_reads_index > 0 { + if mem_reads_index == 1 { *code += &format!( - "\tadd {}, {} {}\n", - REG_AUX, - REG_VALUE, - ctx.comment_str("aux += address") + "\tinc {REG_MEM_READS_SIZE} {}\n", + ctx.comment_str("mem_reads_size+=1") ); - - // Store it in the trace + } else { *code += &format!( - "\tmov [{} + {}*8 + {}*8], {} {}\n", - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, - mem_reads_index, - REG_AUX, - ctx.comment_str("mem_reads[@+size*8+ind*8] = mem_op") + "\tadd {REG_MEM_READS_SIZE}, {mem_reads_index} {}\n", + ctx.comment(format!("mem_reads_size+={mem_reads_index}")) ); - mem_reads_index += 1; } } - - // Increment chunk.steps.mem_reads_size - *code += &format!( - "\tadd {}, {} {}\n", - REG_MEM_READS_SIZE, - mem_reads_index, - ctx.comment(format!("mem_reads_size+={mem_reads_index}")) - ); } fn mem_op_precompiled_restore_c_and_flags(ctx: &mut ZiskAsmContext, code: &mut String) { // We get a copy of the precompiled data address - *code += &format!("\tmov {}, rdi {}\n", REG_ADDRESS, ctx.comment_str("address = rdi")); + *code += &format!("\tmov {REG_ADDRESS}, rdi {}\n", ctx.comment_str("address = rdi")); // read last mem_read into c *code += &format!( - "\tmov {}, [{} + {}*8 - 8] {}\n", - REG_C, - REG_MEM_READS_ADDRESS, - REG_MEM_READS_SIZE, + "\tmov {REG_C}, [{REG_MEM_READS_ADDRESS} + {REG_MEM_READS_SIZE}*8 - 8] {}\n", ctx.comment_str("c = mem_reads[@+size*8+ind*8]") ); - *code += &format!("\tmov {}, {} {}\n", REG_FLAG, REG_C, ctx.comment_str("flag = c")); + *code += &format!("\tmov {REG_FLAG}, {REG_C} {}\n", ctx.comment_str("flag = c")); + } + + /**********************/ + /* PRECOMPILE RESULTS */ + /**********************/ + + // Copies size u64 elements from precompile_results_address to the address in reg_address, + // and increments precompile_read by size*8 + fn precompile_results_array( + ctx: &mut ZiskAsmContext, + code: &mut String, + unusual_code: &mut String, + reg_address: &str, + size: u64, + ) { + // Wait for available precompile results data + if ctx.call_wait_for_prec_avail() { + Self::wait_for_prec_avail(ctx, code, unusual_code); + } + + // Load precompile address and read index + *code += &format!( + "\tmov {}, {} {}\n", + REG_ADDRESS, + ctx.mem_precompile_results_address, + ctx.comment_str("address = precompile_results_address") + ); + *code += &format!( + "\tmov {}, {} {}\n", + REG_AUX, + ctx.mem_precompile_read_address, + ctx.comment_str("aux = precompile_read") + ); + + // Loop for all u64-sized elements to read + for k in 0..size { + // Take the read index module against the precompile buffer size in u64's + *code += &format!( + "\tand {}, 0x{:x} {}\n", + REG_AUX, + PRECOMPILE_BUFFER_SIZE_U64_MASK, + ctx.comment_str("aux &= buffer mask") + ); + + // Copy the ui64 element from precompile_results to the destination address + *code += &format!( + "\tmov {}, [{} + {}*8] {}\n", + REG_VALUE, + REG_ADDRESS, + REG_AUX, + ctx.comment(format!("value = precompile_results[{}]", k)) + ); + *code += &format!( + "\tmov [{} + {}*8], {} {}\n", + reg_address, + k, + REG_VALUE, + ctx.comment(format!("addr[{}] = value", k)) + ); + + // Increase the register containing the read index + if k != size - 1 { + *code += &format!("\tinc {} {}\n", REG_AUX, ctx.comment_str("aux++")); + } + } + + // Add the read size to precompile_read + *code += &format!( + "\tadd {}, {} {}\n", + ctx.mem_precompile_read_address, + size, + ctx.comment(format!("read += {}", size)) + ); + } + + // Copies 1 u64 element from precompile_results_address to the register reg, + // and increments precompile_results_address by 8 + fn precompile_results_register( + ctx: &mut ZiskAsmContext, + code: &mut String, + unusual_code: &mut String, + reg: &str, + ) { + // Wait for available precompile results data + if ctx.call_wait_for_prec_avail() { + Self::wait_for_prec_avail(ctx, code, unusual_code); + } + + // Load precompile address and read index + *code += &format!( + "\tmov {}, {} {}\n", + REG_ADDRESS, + ctx.mem_precompile_results_address, + ctx.comment_str("address = precompile_results_address") + ); + *code += &format!( + "\tmov {}, {} {}\n", + REG_AUX, + ctx.mem_precompile_read_address, + ctx.comment_str("aux = precompile_read") + ); + + // Take the read index module against the precompile buffer size in u64's + *code += &format!( + "\tand {}, 0x{:x} {}\n", + REG_AUX, + PRECOMPILE_BUFFER_SIZE_U64_MASK, + ctx.comment_str("aux &= buffer mask") + ); + *code += &format!( + "\tmov {}, [{} + {}*8] {}\n", + REG_VALUE, + REG_ADDRESS, + REG_AUX, + ctx.comment_str("value = precompile_results[0]") + ); + + // Copy the ui64 element from precompile_results to the destination address + *code += &format!( + "\tmov {}, {} {}\n", + reg, + REG_VALUE, + ctx.comment_str("reg = precompile_results[0]") + ); + + // Increase precompile_read + *code += &format!( + "\tinc {} {}\n", + ctx.mem_precompile_read_address, + ctx.comment_str("precompile_read++") + ); + } + + // Copies the fcall result size and result data to the fcall structure + // address in reg_address, + fn precompile_results_fcall( + ctx: &mut ZiskAsmContext, + code: &mut String, + unusual_code: &mut String, + reg_address: &str, + ) { + // Wait for available precompile results data + if ctx.call_wait_for_prec_avail() { + Self::wait_for_prec_avail(ctx, code, unusual_code); + } + + // Load precompile address into REG_AUX + *code += &format!( + "\tmov {}, {} {}\n", + REG_AUX, + ctx.mem_precompile_results_address, + ctx.comment_str("aux = precompile_results_address") + ); + + // Load the destination fcall address into REG_ADDRESS + *code += &format!("\tmov {REG_ADDRESS}, {reg_address}\n"); + + // Calculate the address of the first precompile u64 value = address + read % buffer_size + *code += &format!( + "\tmov {}, {} {}\n", + REG_ADDRESS, + ctx.mem_precompile_read_address, + ctx.comment_str("address = precompile_read") + ); + *code += &format!( + "\tand {}, 0x{:x} {}\n", + REG_ADDRESS, + PRECOMPILE_BUFFER_SIZE_U64_MASK, + ctx.comment_str("address %= buffer size") + ); + + // Copy the result size (first u64 value) and store it in register B + *code += &format!( + "\tmov {}, [{} + {}*8] {}\n", + REG_B, + REG_AUX, + REG_ADDRESS, + ctx.comment_str("b = precompile_results[0]") + ); + *code += &format!( + "\tmov [{} + {}*8], {} {}\n", + reg_address, + FCALL_RESULT_SIZE, + REG_B, + ctx.comment_str("fcall[result_size] = b") + ); + *code += &format!( + "\tinc {} {}\n", + ctx.mem_precompile_read_address, + ctx.comment_str("precompile_read++") + ); + + // Copy data consuming REG_B u64's starting at REG_A=0, increasing REG_A until REG_A == REG_B + + // Initialize REG_A to 0 + *code += &format!("\txor {}, {} {}\n", REG_FLAG, REG_FLAG, ctx.comment_str("flag = 0")); + + // Loop start + *code += &format!("pc_{:x}_fcall_copy_params_loop_start:\n", ctx.pc); + + // End loop when REG_A == REG_B + *code += &format!("\tcmp {}, {} {}\n", REG_FLAG, REG_B, ctx.comment_str("flag =? b")); + *code += &format!("\tje pc_{:x}_fcall_copy_params_loop_end\n", ctx.pc); + + // Calculate the address of the next precompile u64 value = address + read % buffer_size + *code += &format!( + "\tmov {}, {} {}\n", + REG_ADDRESS, + ctx.mem_precompile_read_address, + ctx.comment_str("address = precompile_read") + ); + *code += &format!( + "\tadd {}, {} {}\n", + REG_ADDRESS, + REG_FLAG, + ctx.comment_str("address += flag") + ); + *code += &format!( + "\tand {}, 0x{:x} {}\n", + REG_ADDRESS, + PRECOMPILE_BUFFER_SIZE_U64_MASK, + ctx.comment_str("address %= buffer size") + ); + + // Copy value from precompile_results to fcall[result_data + REG_A] + *code += &format!( + "\tmov {}, [{} + {}*8] {}\n", + REG_VALUE, + REG_AUX, + REG_ADDRESS, + ctx.comment_str("value = precompile_results[a]") + ); + *code += &format!( + "\tmov [{} + {}*8 + {}*8], {} {}\n", + reg_address, + REG_FLAG, + FCALL_RESULT, + REG_VALUE, + ctx.comment_str("addr[] = value") + ); + + // Increment REG_FLAG + *code += &format!("\tinc {} {}\n", REG_FLAG, ctx.comment_str("flag++")); + + // Jump to loop start + *code += &format!("\tjmp pc_{:x}_fcall_copy_params_loop_start\n", ctx.pc); + + // Loop end + *code += &format!("pc_{:x}_fcall_copy_params_loop_end:\n", ctx.pc); + + // Update precompile_read += result_size + *code += &format!( + "\tadd {}, {} {}\n", + ctx.mem_precompile_read_address, + REG_FLAG, + ctx.comment_str("precompile_read += fcall_result_size") + ); + } + + fn wait_for_prec_avail(ctx: &mut ZiskAsmContext, code: &mut String, unusual_code: &mut String) { + *code += &ctx.full_line_comment("Wait for precompile results available".to_string()); + + // if precompile_written == precompile_read then call wait_for_prec_avail + *code += &format!( + "\tmov {}, {} {}\n", + REG_AUX, + ctx.mem_precompile_read_address, + ctx.comment_str("aux = precompile_read") + ); + *code += &format!( + "\tcmp {}, {} {}\n", + REG_AUX, + ctx.mem_precompile_written_address, + ctx.comment_str("read ?= written") + ); + *code += &format!( + "\tjz pc_{:x}_{}_wait_for_prec_avail {}\n", + ctx.pc, + ctx.wait_for_prec_counter, + ctx.comment_str("if there is data, done") + ); + *code += + &format!("pc_{:x}_{}_wait_for_prec_avail_done:\n", ctx.pc, ctx.wait_for_prec_counter); + + // Call wait_for_prec_avail() + *unusual_code += + &format!("pc_{:x}_{}_wait_for_prec_avail:\n", ctx.pc, ctx.wait_for_prec_counter); + Self::push_internal_registers(ctx, unusual_code, false); + *unusual_code += "\tcall _wait_for_prec_avail\n"; + *unusual_code += "\tcmp rax, 0\n"; + *unusual_code += "\tjne execute_pop_internal_regs_and_end\n"; + Self::pop_internal_registers(ctx, unusual_code, false); + *unusual_code += &format!( + "\tjmp pc_{:x}_{}_wait_for_prec_avail_done\n", + ctx.pc, ctx.wait_for_prec_counter + ); + + // TODO: + // else if *precompile_written_address - *precompile_read_address < threshold -> call post_prec_read + + //*code += &format!("pc_{:x}_wait_for_prec_avail_end:\n", ctx.pc,); + + // Increment wait_for_prec_counter + ctx.wait_for_prec_counter += 1; + } + + // Waits for the requested reg_address input address (and previous) to be available, waiting + // if necessary address in reg_address, + fn wait_for_input_ready( + ctx: &mut ZiskAsmContext, + code: &mut String, + unusual_code: &mut String, + //reg_address: &str, // fcall structure address + ) { + *code += &ctx.full_line_comment("Wait for input data available".to_string()); + + // Calculate number of bytes until the requested address from the current read address + // required_bytes = (required_address - INPUT_ADDR - 8 + 1 + 7) & ~0x7; + // + 1 because required_address is the address of the last required byte + // + 7 & ~0x7 because if we require any byte of the last u64, we need to wait for the whole u64 to be available + assert!(ctx.a.is_constant); + *code += &format!( + "\tmov rdi, qword {}[{} + {}*8] {}\n", + ctx.ptr, + ctx.fcall_ctx, + FCALL_PARAMS, + ctx.comment_str("rdi = params[0] = required_address") + ); + *code += &format!( + "\tmov {}, 0x{:x} {}\n", + REG_AUX, + INPUT_ADDR, + ctx.comment_str("aux = INPUT_ADDRESS") + ); + *code += &format!( + "\tsub rdi, {} {}\n", + REG_AUX, + ctx.comment_str("rdi = required_address - INPUT_ADDRESS") + ); + *code += &format!("\tand rdi, ~0x7 {}\n", ctx.comment_str("rdi &= 0x7")); + + // if input_written == input_ready then call wait_for_input_avail + *code += &format!( + "\tcmp rdi, {} {}\n", + ctx.mem_input_written_address, + ctx.comment_str("required <= written") + ); + *code += &format!( + "\tja pc_{:x}_wait_for_input_avail {}\n", + ctx.pc, + ctx.comment_str("if there is data, done") + ); + *code += &format!("pc_{:x}_wait_for_input_avail_done:\n", ctx.pc); + + // Call wait_for_input_avail() + *unusual_code += &format!("pc_{:x}_wait_for_input_avail:\n", ctx.pc); + Self::push_internal_registers(ctx, unusual_code, false); + *unusual_code += "\tcall _wait_for_input_avail\n"; + *unusual_code += "\tcmp rax, 0\n"; + *unusual_code += "\tjne execute_pop_internal_regs_and_end\n"; + Self::pop_internal_registers(ctx, unusual_code, false); + *unusual_code += &format!("\tjmp pc_{:x}_wait_for_input_avail_done\n", ctx.pc); } /*******************/ @@ -8213,25 +9378,25 @@ impl ZiskRom2Asm { *code += "\tpush rax\n"; *code += "\tpush rcx\n"; *code += "\tpush rdx\n"; - //*code += "\tpush rdi\n"; + *code += "\tpush rdi\n"; // *code += "\tpush rsi\n"; // *code += "\tpush rsp\n"; *code += "\tpush r8\n"; *code += "\tpush r9\n"; *code += "\tpush r10\n"; *code += "\tpush r11\n"; - Self::push_xmm_regs(ctx, code, !extra_8); + Self::push_xmm_regs(ctx, code, extra_8); } fn pop_internal_registers(ctx: &mut ZiskAsmContext, code: &mut String, extra_8: bool) { - Self::pop_xmm_regs(ctx, code, !extra_8); + Self::pop_xmm_regs(ctx, code, extra_8); *code += "\tpop r11\n"; *code += "\tpop r10\n"; *code += "\tpop r9\n"; *code += "\tpop r8\n"; // *code += "\tpop rsp\n"; // *code += "\tpop rsi\n"; - //*code += "\tpop rdi\n"; + *code += "\tpop rdi\n"; *code += "\tpop rdx\n"; *code += "\tpop rcx\n"; *code += "\tpop rax\n"; @@ -8716,10 +9881,7 @@ impl ZiskRom2Asm { /////////////// // Build the mask for this case - const WIDTH: u64 = 8; - const WRITE: u64 = 0; - let addr_step_mask: u64 = - (WIDTH << F_MEM_WIDTH_SHIFT) + (WRITE << F_MEM_WRITE_SHIFT) + (micro_step << 38); + let addr_step_mask: u64 = F_MOPS_READ_8 + (micro_step << 38); // Add mask to address *code += &format!( @@ -8898,10 +10060,15 @@ impl ZiskRom2Asm { /////////////// // Build the mask for this case - const WRITE: u64 = 1; const MICRO_STEP: u64 = 3; - let addr_step_mask: u64 = - (width << F_MEM_WIDTH_SHIFT) + (WRITE << F_MEM_WRITE_SHIFT) + (MICRO_STEP << 40); + let addr_step_mask: u64 = (MICRO_STEP << 40) + + match width { + 1 => F_MOPS_WRITE_1, + 2 => F_MOPS_WRITE_2, + 4 => F_MOPS_WRITE_4, + 8 => F_MOPS_WRITE_8, + _ => panic!("Invalid width {width}"), + }; // Add mask to address *code += &format!( @@ -9011,7 +10178,7 @@ impl ZiskRom2Asm { const WIDTH: u64 = 8; const MICRO_STEP: u64 = 2; let addr_step_mask: u64 = - (WIDTH << F_MEM_WIDTH_SHIFT) + (write << F_MEM_WRITE_SHIFT) + (MICRO_STEP << 40); + if write == 0 { F_MOPS_READ_8 } else { F_MOPS_WRITE_8 } + (MICRO_STEP << 40); // For every element for i in 0..buffer_size { diff --git a/data-bus/src/data_bus.rs b/data-bus/src/data_bus.rs index 8783c1731..71c44c8d1 100644 --- a/data-bus/src/data_bus.rs +++ b/data-bus/src/data_bus.rs @@ -3,9 +3,7 @@ //! omnipresent devices that process all data sent to the bus. This module provides mechanisms to //! send data, route it to the appropriate subscribers, and manage device connections. -use std::collections::VecDeque; - -use zisk_common::{BusDevice, BusId}; +use zisk_common::BusId; pub trait DataBusTrait { /// Writes data to the bus and processes it through the registered devices. @@ -18,178 +16,9 @@ pub trait DataBusTrait { /// # Returns /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. - fn write_to_bus(&mut self, bus_id: BusId, payload: &[D]) -> bool; + fn write_to_bus(&mut self, bus_id: BusId, data: &[D], data_ext: &[D]) -> bool; fn on_close(&mut self); fn into_devices(self, execute_on_close: bool) -> Vec<(Option, Option)>; } - -/// A bus system facilitating communication between multiple publishers and subscribers. -/// -/// The `DataBus` allows devices to register for specific bus IDs or act as global (omni) devices. -/// It routes payloads to registered devices and handles data transfers efficiently. -/// -/// # Type Parameters -/// * `D` - The type of data payloads handled by the bus. -/// * `BD` - The type of devices (subscribers) connected to the bus, implementing the `BusDevice` -/// trait. -pub struct DataBus> { - /// List of devices connected to the bus. - devices: Vec<(Option, BD)>, - - /// Mapping from `BusId` to indices of devices listening to that ID. - devices_bus_id_map: Vec>, - - /// Queue of pending data transfers to be processed. - pending_transfers: VecDeque<(BusId, Vec)>, - - /// Indices of devices that are connected to the bus but without a specific instance. - none_devices: Vec, - - /// The number of active devices currently connected to the bus. - active_devices: usize, -} - -impl> Default for DataBus { - /// Creates a new `DataBus` with default settings. - fn default() -> Self { - Self::new() - } -} - -impl> DataBus { - /// Creates a new `DataBus` instance. - pub fn new() -> Self { - Self { - devices: Vec::new(), - devices_bus_id_map: vec![vec![], vec![], vec![]], - pending_transfers: VecDeque::new(), - none_devices: vec![], - active_devices: 0, - } - } - - /// Connects a device to the bus with specific `BusId` subscriptions. - /// - /// # Arguments - /// * `instance_idx` - An optional index for the device instance. - /// * `bus_device` - The device to be added to the bus. - pub fn connect_device(&mut self, instance_idx: Option, bus_device: Option) { - if let Some(bus_device) = bus_device { - let bus_ids = bus_device.bus_id(); - - self.devices.push((instance_idx, bus_device)); - let device_idx = self.devices.len() - 1; - - for bus_id in bus_ids { - self.devices_bus_id_map[*bus_id].push(device_idx); - } - self.active_devices += 1; - } else { - self.none_devices.push(self.devices.len()); - } - } - - /// Routes data to the devices subscribed to a specific bus ID or global devices. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus to route the data to. - /// * `payload` - A reference to the data payload being routed. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - #[inline(always)] - fn route_data(&mut self, bus_id: BusId, payload: &[D]) { - let devices_idx = &mut self.devices_bus_id_map[*bus_id]; - let mut i = 0; - - while i < devices_idx.len() { - let device_idx = devices_idx[i]; - // When a device returns false, it indicates that it has finished its work and should be disabled. - if !self.devices[device_idx].1.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ) { - // Remove the device from the bus and update the mapping. - devices_idx.swap_remove(i); - self.active_devices -= 1; - } else { - i += 1; - } - } - } - - /// Outputs the current state of the bus for debugging purposes. - pub fn debug_state(&self) { - println!("Devices: {:?}", self.devices.len()); - println!("Devices by bus ID: {:?}", self.devices_bus_id_map); - println!("Pending Transfers: {:?}", self.pending_transfers.len()); - } -} - -impl> DataBusTrait for DataBus { - /// Writes data to the bus and processes it through the registered devices. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus receiving the data. - /// * `payload` - The data payload to be sent. - /// * `pending` – A queue of pending bus operations used to send derived inputs. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - #[inline(always)] - fn write_to_bus(&mut self, bus_id: BusId, payload: &[D]) -> bool { - self.route_data(bus_id, payload); - - while let Some((bus_id, payload)) = self.pending_transfers.pop_front() { - self.route_data(bus_id, &payload); - } - - self.active_devices > 0 - } - - /// Called when the bus is closed, allowing devices to perform any necessary cleanup. - fn on_close(&mut self) { - for device in &mut self.devices { - device.1.on_close(); - } - } - - /// Converts the bus into a vector of devices, optionally executing their close operations. - /// - /// # Arguments - /// * `execute_on_close` - If true, calls the `on_close` method on each device. - /// - //// # Returns - /// A vector of tuples containing the device instance index and the device itself. - fn into_devices(self, execute_on_close: bool) -> Vec<(Option, Option)> { - let total_len = self.devices.len() + self.none_devices.len(); - let mut result = Vec::with_capacity(total_len); - - let mut dev_iter = self.devices.into_iter(); - let mut none_iter = self.none_devices.iter().copied().peekable(); - - for idx in 0..total_len { - if Some(&idx) == none_iter.peek() { - result.push((None, None)); - none_iter.next(); - } else { - let mut device = - dev_iter.next().expect("Mismatch between device and none-device count"); - - if execute_on_close { - device.1.on_close(); - } - - result.push((device.0, Some(device.1))); - } - } - - result - } -} diff --git a/data-bus/src/data_bus_file.rs b/data-bus/src/data_bus_file.rs deleted file mode 100644 index bbc270496..000000000 --- a/data-bus/src/data_bus_file.rs +++ /dev/null @@ -1,139 +0,0 @@ -//! A module for reading and writing DataBus information to a file. -//! -//! The `DataBusFileReader` struct provides a utility for reading DataBus information from a plain -//! text file. The `DataBusFileWriter` struct provides a utility for writing DataBus information to -//! a file. - -use std::{ - fs::File, - io::{self, Read, Write}, - str::FromStr, -}; - -use zisk_common::BusId; - -pub struct DataBusFileReader; - -impl DataBusFileReader { - /// Reads data from a plain text file and returns a vector of `(BusId, Payload)` tuples. - /// - /// # File Format - /// Each line in the file should be formatted as: - /// ```text - /// ... - /// ``` - /// - ``: A 16-bit unsigned integer representing the bus ID. - /// - ``: A list of payload values convertible to the type `D`. - /// - /// # Arguments - /// * `file_path` - The path to the plain text file. - /// - /// # Returns - /// * `Result)>, io::Error>`: A vector of `(BusId, Payload)` tuples or an error - /// if the file cannot be read or the data format is invalid. - /// - /// # Errors - /// - Returns an error if the file cannot be opened or read. - /// - Returns an error if any line is malformed (missing `BusId` or invalid payload values). - pub fn read_from_file(file_path: &str) -> Result)>, io::Error> - where - D::Err: std::fmt::Display, - { - let mut file = File::open(file_path)?; - let mut content = String::new(); - file.read_to_string(&mut content)?; - - // Estimate the number of lines for pre-allocation - let estimated_lines = content.lines().count(); - let mut data = Vec::with_capacity(estimated_lines); - - for (line_number, line) in content.lines().enumerate() { - let mut parts = line.split_whitespace(); - - // Parse the BusId (first token) - let bus_id = parts - .next() - .ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("Missing BusId on line {}", line_number + 1), - ) - })? - .parse::() - .map_err(|err| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("Invalid BusId on line {}: {}", line_number + 1, err), - ) - })?; - - // Pre-allocate payload size if possible - let mut payload = Vec::with_capacity(parts.clone().count()); - - for token in parts { - let value = token.parse::().map_err(|err| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("Invalid payload on line {}: {}", line_number + 1, err), - ) - })?; - payload.push(value); - } - - // Push the parsed data into the pre-allocated vector - data.push((BusId(bus_id), payload)); - } - - Ok(data) - } -} - -/// A utility struct for writing DataBus information to a file. -pub struct DataBusFileWriter { - file: Option, -} - -impl DataBusFileWriter { - /// Creates a new `DataBusFileWriter` and opens the specified file for writing. - /// - /// # Arguments - /// * `file_path` - The path to the file where data will be written. - /// - /// # Returns - /// A new instance of `DataBusFileWriter`. - pub fn new(file_path: &str) -> Result { - let file = File::create(file_path)?; - Ok(Self { file: Some(file) }) - } - - /// Writes a single `(BusId, Payload)` line to the file. - /// - /// # Arguments - /// * `bus_id` - The BusId to write. - /// * `payload` - A vector of payload items to write. - pub fn write(&mut self, bus_id: u16, payload: &[D]) -> Result<(), io::Error> { - if let Some(file) = self.file.as_mut() { - let payload_str: String = - payload.iter().map(|item| item.to_string()).collect::>().join(" "); - writeln!(file, "{bus_id} {payload_str}")?; - Ok(()) - } else { - Err(io::Error::other("Attempted to write to a closed file.")) - } - } - - /// Closes the file, ensuring all data is flushed to disk. - pub fn close(&mut self) -> Result<(), io::Error> { - if let Some(mut file) = self.file.take() { - file.flush()?; // Ensure all buffered data is written - } - Ok(()) - } -} - -impl Drop for DataBusFileWriter { - /// Ensures the file is closed when the `DataBusFileWriter` is dropped. - fn drop(&mut self) { - let _ = self.close(); // Silently ignore any errors during drop - } -} diff --git a/data-bus/src/data_bus_player.rs b/data-bus/src/data_bus_player.rs deleted file mode 100644 index 25ee2834a..000000000 --- a/data-bus/src/data_bus_player.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! A player for replaying data on the `DataBus`. - -use std::{io, str::FromStr}; - -use zisk_common::{BusDevice, BusId}; - -use crate::{DataBus, DataBusFileReader, DataBusTrait}; - -pub struct DataBusPlayer; - -impl DataBusPlayer { - /// Plays data on the `DataBus` from a provided data vector. - /// - /// # Arguments - /// * `data_bus` - The `DataBus` to which the data is sent. - /// * `data` - A vector of `(BusId, Payload)` tuples. - pub fn play>(data_bus: &mut DataBus, data: Vec<(BusId, Vec)>) { - for (bus_id, payload) in data { - as DataBusTrait>::write_to_bus(data_bus, bus_id, &payload); - } - } - - /// Plays data on the `DataBus` from a file using `DataBusFileReader`. - /// - /// # Arguments - /// * `file_path` - The path to the file containing the data. - /// * `data_bus` - The `DataBus` to which the data is sent. - /// - /// # Returns - /// * `Result<(), io::Error>` indicating success or failure during file reading and playing. - pub fn play_from_file>( - data_bus: &mut DataBus, - file_path: &str, - ) -> Result<(), io::Error> - where - D::Err: std::fmt::Display, - { - let data = DataBusFileReader::read_from_file::(file_path)?; - Self::play(data_bus, data); - Ok(()) - } -} diff --git a/data-bus/src/lib.rs b/data-bus/src/lib.rs index 4ff1159d1..f4f8ec56f 100644 --- a/data-bus/src/lib.rs +++ b/data-bus/src/lib.rs @@ -1,7 +1,3 @@ mod data_bus; -mod data_bus_file; -mod data_bus_player; pub use data_bus::*; -pub use data_bus_file::*; -pub use data_bus_player::*; diff --git a/debug.json b/debug.json index fa544d1cb..5f935055d 100644 --- a/debug.json +++ b/debug.json @@ -3,5 +3,6 @@ "print_to_file": true, "n_vals": 1000000, "fast_mode": false - } + }, + "store_row_info": true } diff --git a/definitions/Cargo.toml b/definitions/Cargo.toml new file mode 100644 index 000000000..0397e9dbc --- /dev/null +++ b/definitions/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "zisk-definitions" +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } + +[lib] +name = "zisk_definitions" +path = "src/lib.rs" + +[dependencies] + +# QUIC networking + +[features] +default = [] +disable_distributed = [] +stats = [] diff --git a/definitions/src/lib.rs b/definitions/src/lib.rs new file mode 100644 index 000000000..5ca0b3436 --- /dev/null +++ b/definitions/src/lib.rs @@ -0,0 +1,2 @@ +mod syscall; +pub use syscall::*; diff --git a/ziskos/entrypoint/src/syscalls/syscall.rs b/definitions/src/syscall.rs similarity index 66% rename from ziskos/entrypoint/src/syscalls/syscall.rs rename to definitions/src/syscall.rs index da2835318..a1c74a724 100644 --- a/ziskos/entrypoint/src/syscalls/syscall.rs +++ b/definitions/src/syscall.rs @@ -1,5 +1,7 @@ // Syscall 0x800 - 0x84F (80 syscalls) +// Important: Syscalls should be contiguous and in the same order as in riscv2zisk_context.rs + pub const SYSCALL_KECCAKF_ID: u16 = 0x800; pub const SYSCALL_ARITH256_ID: u16 = 0x801; pub const SYSCALL_ARITH256_MOD_ID: u16 = 0x802; @@ -18,3 +20,11 @@ pub const SYSCALL_BLS12_381_COMPLEX_ADD_ID: u16 = 0x80E; pub const SYSCALL_BLS12_381_COMPLEX_SUB_ID: u16 = 0x80F; pub const SYSCALL_BLS12_381_COMPLEX_MUL_ID: u16 = 0x810; pub const SYSCALL_ADD256_ID: u16 = 0x811; +pub const SYSCALL_POSEIDON2_ID: u16 = 0x812; +pub const SYSCALL_DMA_MEMCPY_ID: u16 = 0x813; +pub const SYSCALL_DMA_MEMCMP_ID: u16 = 0x814; +pub const SYSCALL_DMA_INPUTCPY_ID: u16 = 0x815; +pub const SYSCALL_DMA_MEMSET_ID: u16 = 0x816; +pub const SYSCALL_SECP256R1_ADD_ID: u16 = 0x817; +pub const SYSCALL_SECP256R1_DBL_ID: u16 = 0x818; +pub const SYSCALL_BLAKE2B_ROUND_ID: u16 = 0x819; diff --git a/distributed/Dockerfile b/distributed/Dockerfile index f2bc3b3e9..05deb53d3 100644 --- a/distributed/Dockerfile +++ b/distributed/Dockerfile @@ -75,8 +75,6 @@ RUN mkdir -p bin config/coordinator config/worker /app/proofs /var/log/distribut # Copy binaries from builder stage COPY --from=builder --chown=zisk:zisk /app/target/release/zisk-coordinator ./bin/ COPY --from=builder --chown=zisk:zisk /app/target/release/zisk-worker ./bin/ -# Copy the witness library -COPY --from=builder --chown=zisk:zisk /app/target/release/libzisk_witness.so ./bin/ # Copy configuration files with proper ownership COPY --chown=zisk:zisk distributed/crates/coordinator/config/ ./config/coordinator/ diff --git a/distributed/README.md b/distributed/README.md index 3b715a708..399380669 100644 --- a/distributed/README.md +++ b/distributed/README.md @@ -41,7 +41,7 @@ cargo run --release --bin zisk-coordinator cargo run --release --bin zisk-worker -- --elf --inputs-folder # Generate a proof (in another terminal) -cargo run --release --bin zisk-coordinator prove --input --compute-capacity 10 +cargo run --release --bin zisk-coordinator prove --inputs-uri --compute-capacity 10 ``` ### Docker Deployment @@ -92,7 +92,7 @@ docker logs -f zisk-worker-1 # Generate a proof (use filename only, not full path) docker exec -it zisk-coordinator \ - zisk-coordinator prove --input --compute-capacity 10 + zisk-coordinator prove --inputs-uri --compute-capacity 10 # Stop containers docker stop zisk-coordinator zisk-worker-1 @@ -142,7 +142,9 @@ The table below lists the available configuration options for the Coordinator: | `service.environment` | - | - | String | development | Service environment (development, staging, production) | | `server.host` | - | - | String | 0.0.0.0 | Server host | | `server.port` | `--port` | - | Number | 50051 | Server port | -| `server.proofs_dir` | `--proofs-dir` | - | String | proofs | Directory to save generated proofs | +| `server.proofs_dir` | `--proofs-dir` | - | String | proofs | Directory to save generated proofs (conflicts with `--no-save-proofs`) | +| - | `--no-save-proofs` | - | Boolean | false | Disable saving proofs (conflicts with `--proofs-dir`) | +| - | `-c`, `--compressed-proofs` | - | Boolean | false | Generate compressed proofs | | `server.shutdown_timeout_seconds` | - | - | Number | 30 | Graceful shutdown timeout in seconds | | `logging.level` | - | RUST_LOG | String | debug | Logging level (error, warn, info, debug, trace) | | `logging.format` | - | - | String | pretty | Logging format (pretty, json, compact) | @@ -234,7 +236,7 @@ When `success` is `false`, the `error` field contains: } ``` -**Successful Proof Generation Example::** +**Successful Proof Generation Example:** ```json { @@ -348,7 +350,7 @@ Workers need to know where to find input files for proof generation. The `--inpu cargo run --release --bin zisk-worker -- --elf program.elf --inputs-folder /data/inputs/ # Coordinator requests proof for "input.bin" -> Worker looks for "/data/inputs/input.bin" -cargo run --release --bin zisk-coordinator -- prove --input input.bin --compute-capacity 10 +cargo run --release --bin zisk-coordinator -- prove --inputs-uri input.bin --compute-capacity 10 ``` The table below lists the available configuration options for the Worker: @@ -365,7 +367,6 @@ The table below lists the available configuration options for the Worker: | `logging.level` | - | RUST_LOG | String | debug | Logging level (error, warn, info, debug, trace) | | `logging.format` | - | - | String | pretty | Logging format (pretty, json, compact) | | `logging.file_path` | - | - | String | - | *Optional*. Log file path (enables file logging) | -| - | `--witness-lib` | - | String | ~/.zisk/bin/libzisk_witness.so | Path to witness computation dynamic library | | - | `--proving-key` | - | String | ~/.zisk/provingKey | Path to setup folder | | - | `--elf` | - | String | - | Path to ELF file | | - | `--asm` | - | String | ~/.zisk/cache | Path to ASM file (mutually exclusive with `--emulator`) | @@ -376,9 +377,11 @@ The table below lists the available configuration options for the Worker: | - | `-d`, `--debug` | - | String | - | Enable debug mode with optional component filter | | - | `--verify-constraints` | - | Boolean | false | Whether to verify constraints | | - | `--unlock-mapped-memory` | - | Boolean | false | Unlock memory map for the ROM file (mutually exclusive with `--emulator`) | -| - | `-f`, `--final-snark` | - | Boolean | false | Whether to generate the final SNARK | +| - | `--hints` | - | Boolean | false | Enable precompile hints processing | +| - | `-m`, `--minimal-memory` | - | Boolean | false | Use minimal memory mode | +| - | `-r`, `--rma` | - | Boolean | false | Enable RMA mode | | - | `-z`, `--preallocate` | - | Boolean | false | GPU preallocation flag | -| - | `-t`, `--max-streams` | | - | Number | - | Maximum number of GPU streams | +| - | `-t`, `--max-streams` | - | Number | - | Maximum number of GPU streams | | - | `-n`, `--number-threads-witness` | - | Number | - | Number of threads for witness computation | | - | `-x`, `--max-witness-stored` | - | Number | - | Maximum number of witnesses to store in memory | @@ -418,16 +421,57 @@ format = "pretty" file_path = "/var/log/distributed/worker-001.log" ``` -## Launching a proof +## Launching a Proof -To launch a proof generation request, use the `prove` command of the `zisk-coordinator` binary, specifying the input filename and desired compute capacity. +To launch a proof generation request, use the `prove` subcommand of the `zisk-coordinator` binary. This sends an RPC request to a running coordinator instance. ```bash -cargo run --release --bin zisk-coordinator -- prove --input --compute-capacity 10 +cargo run --release --bin zisk-coordinator -- prove --inputs-uri --compute-capacity 10 ``` The `--compute-capacity` flag indicates the total compute units required to generate a proof. The coordinator will assign one or more workers to meet this capacity, distributing the workload if multiple workers are needed. Requests exceeding the combined capacity of available workers will not be processed and an error will be returned. +### Prove Subcommand Arguments + +| CLI Argument | Short | Type | Default | Description | +|---|---|---|---|---| +| `--inputs-uri` | - | String | - | Path to the input file for proof generation | +| `--compute-capacity` | `-c` | Number | *required* | Total compute units required for the proof | +| `--coordinator-url` | - | String | http://127.0.0.1:50051 | URL of the coordinator to send the request to | +| `--data-id` | - | String | Auto (from filename or UUID) | Custom identifier for the proof job | +| `--hints-uri` | - | String | - | Path/URI to the precompile hints source | +| `--stream-hints` | - | Boolean | false | Stream hints from the coordinator to workers via gRPC (see [Hints Stream](../book/getting_started/hints_stream.md)) | +| `--direct-inputs` | `-x` | Boolean | false | Send input data inline via gRPC instead of as a file path | +| `--minimal-compute-capacity` | `-m` | Number | Same as `--compute-capacity` | Minimum acceptable compute capacity (allows partial worker allocation) | +| `--simulated-node` | - | Number | - | Simulated node ID (for testing) | + +### Input and Hints Modes + +The `prove` subcommand supports two modes for delivering inputs and hints to workers: + +**Input modes** (controlled by `--inputs-uri` and `--direct-inputs`): +- **Path mode** (default): The coordinator sends the input file path to workers. Workers must have access to the file at the specified path. +- **Data mode** (`--direct-inputs`): The coordinator reads the input file and sends its contents inline via gRPC. Workers do not need local access to the file. + +**Hints modes** (controlled by `--hints-uri` and `--stream-hints`): +- **Path mode** (default): The coordinator sends the hints URI to workers. Each worker loads hints from the specified path independently. +- **Streaming mode** (`--stream-hints`): The coordinator reads hints from the URI and broadcasts them to all workers in real-time via gRPC. See the [Hints Stream documentation](../book/getting_started/hints_stream.md) for details. + +**Examples:** +```bash +# Basic proof with file path inputs +zisk-coordinator prove --inputs-uri /data/inputs/my_input.bin --compute-capacity 10 + +# Send input data directly (workers don't need local file access) +zisk-coordinator prove --inputs-uri /data/inputs/my_input.bin -x --compute-capacity 10 + +# With precompile hints in path mode (workers load hints locally) +zisk-coordinator prove --inputs-uri input.bin --hints-uri /data/hints/hints.bin --compute-capacity 10 + +# With precompile hints in streaming mode (coordinator broadcasts to workers) +zisk-coordinator prove --inputs-uri input.bin --hints-uri unix:///tmp/hints.sock --stream-hints --compute-capacity 10 +``` + ## Administrative Operations ### Health Checks and Monitoring diff --git a/distributed/crates/common/Cargo.toml b/distributed/crates/common/Cargo.toml index cb6ca3f3f..295ace4ac 100644 --- a/distributed/crates/common/Cargo.toml +++ b/distributed/crates/common/Cargo.toml @@ -6,6 +6,8 @@ edition = "2021" [dependencies] proofman = { workspace = true } proofman-common = { workspace = true } +proofman-util = { workspace = true } +zisk-common = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } tracing-appender = { workspace = true } diff --git a/distributed/crates/common/src/dto.rs b/distributed/crates/common/src/dto.rs index 786c4f832..011944d16 100644 --- a/distributed/crates/common/src/dto.rs +++ b/distributed/crates/common/src/dto.rs @@ -4,8 +4,6 @@ //! These DTOs serve as the canonical data structures for business logic, separate from external //! representations like gRPC protobuf types or serialization formats. -use std::{fmt::Display, path::PathBuf}; - use crate::{ComputeCapacity, DataId, JobId, JobPhase, JobState, WorkerId, WorkerState}; use borsh::{BorshDeserialize, BorshSerialize}; use chrono::{DateTime, Utc}; @@ -66,27 +64,31 @@ pub struct SystemStatusDto { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -#[repr(i32)] -pub enum InputModeDto { - InputModeNone = 0, // No input provided - InputModePath(PathBuf) = 1, // Input will be provided as a path - InputModeData(PathBuf) = 2, // Input data will be sent directly -} - -impl Display for InputModeDto { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - InputModeDto::InputModeNone => write!(f, "None"), - InputModeDto::InputModePath(path) => write!(f, "Path({})", path.display()), - InputModeDto::InputModeData(path) => write!(f, "Data({})", path.display()), - } - } +pub enum InputsModeDto { + // No inputs are provided + InputsNone, + /// Inputs are provided as a complete payload referenced by a URI. + InputsPath(String), + /// Inputs are provided directly as data. + InputsData(String), +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum HintsModeDto { + /// No hints are provided. + HintsNone, + /// Hints are provided as a complete payload referenced by a URI. + HintsPath(String), + /// Hints will be streamed from the given URI endpoint. + HintsStream(String), } pub struct LaunchProofRequestDto { pub data_id: DataId, pub compute_capacity: u32, - pub input_mode: InputModeDto, + pub minimal_compute_capacity: u32, + pub inputs_mode: InputsModeDto, + pub hints_mode: HintsModeDto, pub simulated_node: Option, } @@ -114,6 +116,30 @@ pub enum CoordinatorMessageDto { WorkerRegisterResponse(WorkerRegisterResponseDto), ExecuteTaskRequest(ExecuteTaskRequestDto), JobCancelled(JobCancelledDto), + StreamData(StreamDataDto), +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum StreamMessageKind { + /// Marks the beginning of a stream. No payload is expected. + Start, + /// Contains a chunk of stream data. + Data, + /// Marks the end of a stream. No payload is expected. + End, +} + +#[derive(Debug, Clone)] +pub struct StreamDataDto { + pub job_id: JobId, + pub stream_type: StreamMessageKind, + pub stream_payload: Option, +} + +#[derive(Debug, Clone)] +pub struct StreamPayloadDto { + pub sequence_number: u32, + pub payload: Vec, } pub struct HeartbeatDto { @@ -152,6 +178,7 @@ pub enum ExecuteTaskRequestTypeDto { pub struct ContributionParamsDto { pub data_id: DataId, pub input_source: InputSourceDto, + pub hints_source: HintsSourceDto, pub rank_id: u32, pub total_workers: u32, pub worker_allocation: Vec, @@ -165,10 +192,48 @@ pub enum InputSourceDto { InputNull, } +#[derive(Debug, Clone, BorshSerialize, BorshDeserialize)] +pub enum HintsSourceDto { + HintsPath(String), + HintsStream(String), + HintsNull, +} + pub struct ProveParamsDto { pub challenges: Vec, } +#[derive(Clone)] +pub struct WitnessInfoDto { + /// Witness computation time in milliseconds + pub witness_time: f32, + pub publics: Vec, + pub proof_values: Vec, + pub summary_info: String, +} + +#[derive(Clone)] +pub struct ZiskExecutorTimeDto { + /// Total duration in milliseconds + pub total_duration: f32, + /// Execution duration in milliseconds + pub execution_duration: f32, + /// Count and plan duration in milliseconds + pub count_and_plan_duration: f32, + /// Count and plan memory operations duration in milliseconds + pub count_and_plan_mo_duration: f32, + /// ASM execution info (time in milliseconds) + pub asm_execution_duration: Option, + /// Time when task was received by worker (milliseconds since UNIX epoch, f64 for precision) + pub task_received_time: f64, +} + +#[derive(Clone)] +pub struct AsmExecutionInfoDto { + pub time: f32, + pub mhz: f32, +} + #[derive(Clone)] pub struct ChallengesDto { pub worker_index: u32, @@ -180,15 +245,7 @@ pub struct AggParamsDto { pub agg_proofs: Vec, pub last_proof: bool, pub final_proof: bool, - pub verify_constraints: bool, - pub aggregation: bool, - pub rma: bool, - pub final_snark: bool, - pub verify_proofs: bool, - pub save_proofs: bool, - pub test_mode: bool, - pub output_dir_path: String, - pub minimal_memory: bool, + pub compressed: bool, } pub struct ProofDto { @@ -210,8 +267,14 @@ pub struct ExecuteTaskResponseDto { pub result_data: ExecuteTaskResponseResultDataDto, } +pub struct ContributionsResultDataDto { + pub challenges: Vec, + pub witness_info: WitnessInfoDto, + pub zisk_executor_time: ZiskExecutorTimeDto, +} + pub enum ExecuteTaskResponseResultDataDto { - Challenges(Vec), + Challenges(ContributionsResultDataDto), Proofs(Vec), FinalProof(FinalProofDto), } diff --git a/distributed/crates/common/src/tracing.rs b/distributed/crates/common/src/tracing.rs index 7c1b1db1f..a4aebaf69 100644 --- a/distributed/crates/common/src/tracing.rs +++ b/distributed/crates/common/src/tracing.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use proofman_common::set_global_rank; +use proofman_common::{set_global_rank, RankInfo}; use serde::{Deserialize, Serialize}; use std::fmt::{self, Display}; use tracing_subscriber::{ @@ -68,7 +68,7 @@ impl fmt::Display for LogFormat { /// Returns `Ok(None)` if only console logging is configured. pub fn init( logging_config: Option<&LoggingConfig>, - rank: Option, + rank: Option<&RankInfo>, ) -> Result> { // Prioritize logging_config values over environment variables let log_level = @@ -97,7 +97,9 @@ pub fn init( } if let Some(r) = rank { - set_global_rank(r); + if r.n_processes > 1 { + set_global_rank(r.world_rank); + } } // Apply console logging with optional file logging diff --git a/distributed/crates/common/src/types.rs b/distributed/crates/common/src/types.rs index fba1bfd56..883f26a85 100644 --- a/distributed/crates/common/src/types.rs +++ b/distributed/crates/common/src/types.rs @@ -6,17 +6,18 @@ use borsh::{BorshDeserialize, BorshSerialize}; use chrono::{DateTime, Utc}; -use proofman::ContributionsInfo; +use proofman::{ContributionsInfo, ProvePhaseInputs, WitnessInfo}; +use proofman_common::ProofOptions; use serde::{Deserialize, Serialize}; use std::{ collections::HashMap, fmt::{self, Debug, Display}, ops::Range, - path::PathBuf, }; use tracing::error; +use zisk_common::ZiskExecutorTime; -use crate::{InputModeDto, InputSourceDto}; +use crate::{HintsModeDto, HintsSourceDto, InputSourceDto, InputsModeDto}; /// Job ID wrapper for type safety #[derive( @@ -240,27 +241,34 @@ impl Debug for JobStats { pub struct Job { pub job_id: JobId, pub start_times: HashMap>, + pub task_received_time: Option>, pub duration_ms: Option, pub state: JobState, pub data_id: DataId, - pub input_mode: InputModeDto, + pub inputs_mode: InputsModeDto, + pub hints_mode: HintsModeDto, pub compute_capacity: ComputeCapacity, + pub minimal_compute_capacity: ComputeCapacity, pub workers: Vec, pub agg_worker_id: Option, pub partitions: Vec>, pub results: HashMap>, pub stats: HashMap, pub challenges: Option>, + pub witness_info: Option, pub execution_mode: JobExecutionMode, pub final_proof: Option>, pub executed_steps: Option, } impl Job { + #[allow(clippy::too_many_arguments)] pub fn new( data_id: DataId, - input_mode: InputModeDto, + inputs_mode: InputsModeDto, + hints_mode: HintsModeDto, compute_capacity: ComputeCapacity, + minimal_compute_capacity: ComputeCapacity, selected_workers: Vec, partitions: Vec>, execution_mode: JobExecutionMode, @@ -271,14 +279,18 @@ impl Job { duration_ms: None, state: JobState::Created, data_id, - input_mode, + inputs_mode, + hints_mode, compute_capacity, + minimal_compute_capacity, workers: selected_workers, agg_worker_id: None, partitions, results: HashMap::new(), stats: HashMap::new(), + task_received_time: None, challenges: None, + witness_info: None, execution_mode, final_proof: None, executed_steps: None, @@ -378,9 +390,17 @@ pub struct AggProofData { pub values: Vec, } +#[derive(Debug, Clone)] +pub struct ContributionsResult { + pub challenges: Vec, + pub witness_info: WitnessInfo, + pub zisk_executor_time: ZiskExecutorTime, + pub task_received_time: Option>, +} + #[derive(Debug, Clone)] pub enum JobResultData { - Challenges(Vec), + Challenges(ContributionsResult), AggProofs(Vec), } @@ -395,6 +415,7 @@ pub struct JobResult { pub struct DataCtx { pub data_id: DataId, pub input_source: InputSourceDto, + pub hints_source: HintsSourceDto, } #[repr(u8)] @@ -403,6 +424,8 @@ pub enum JobPhase { Contributions, Prove, Aggregate, + ContributionsInputsStream, + ContributionsHintsStream, } impl TryFrom for JobPhase { @@ -413,6 +436,8 @@ impl TryFrom for JobPhase { 0 => Ok(JobPhase::Contributions), 1 => Ok(JobPhase::Prove), 2 => Ok(JobPhase::Aggregate), + 3 => Ok(JobPhase::ContributionsInputsStream), + 4 => Ok(JobPhase::ContributionsHintsStream), _ => Err(anyhow::anyhow!("Invalid JobPhase byte: {}", value)), } } @@ -424,6 +449,8 @@ impl fmt::Display for JobPhase { JobPhase::Contributions => write!(f, "Contributions"), JobPhase::Prove => write!(f, "Prove"), JobPhase::Aggregate => write!(f, "Aggregate"), + JobPhase::ContributionsInputsStream => write!(f, "ContributionsInputsStream"), + JobPhase::ContributionsHintsStream => write!(f, "ContributionsHintsStream"), } } } @@ -438,13 +465,35 @@ pub struct AggregationParams { pub agg_proofs: Vec, pub last_proof: bool, pub final_proof: bool, - pub verify_constraints: bool, - pub aggregation: bool, - pub rma: bool, - pub final_snark: bool, - pub verify_proofs: bool, - pub save_proofs: bool, - pub test_mode: bool, - pub output_dir_path: PathBuf, - pub minimal_memory: bool, + pub compressed: bool, +} + +#[derive(Debug, Clone, BorshSerialize, BorshDeserialize)] +pub struct PartitionInfo { + pub total_compute_units: usize, + pub allocation: Vec, + pub worker_idx: usize, +} + +/// Message structures for MPI broadcast to ensure type safety +#[derive(borsh::BorshSerialize, borsh::BorshDeserialize)] +pub struct ContributionsMessage { + pub job_id: JobId, + pub phase_inputs: ProvePhaseInputs, + pub options: ProofOptions, + pub input_source: InputSourceDto, + pub hints_source: HintsSourceDto, + pub partition_info: PartitionInfo, +} + +#[derive(borsh::BorshSerialize, borsh::BorshDeserialize)] +pub struct ProveMessage { + pub job_id: JobId, + pub phase_inputs: ProvePhaseInputs, + pub options: ProofOptions, +} + +#[derive(borsh::BorshSerialize, borsh::BorshDeserialize)] +pub struct StreamMessage { + pub data: Vec, } diff --git a/distributed/crates/coordinator/Cargo.toml b/distributed/crates/coordinator/Cargo.toml index ee7a02990..6584f790d 100644 --- a/distributed/crates/coordinator/Cargo.toml +++ b/distributed/crates/coordinator/Cargo.toml @@ -14,9 +14,11 @@ path = "src/cli/main.rs" zisk-distributed-grpc-api = { workspace = true } zisk-distributed-common = { workspace = true } zisk-common = { workspace = true } +zisk-sdk = { workspace = true } cargo-zisk = { workspace = true } proofman = { workspace = true } +proofman-util = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/distributed/crates/coordinator/src/cli/handler_coordinator.rs b/distributed/crates/coordinator/src/cli/handler_coordinator.rs index 9f32c6895..7d47f0572 100644 --- a/distributed/crates/coordinator/src/cli/handler_coordinator.rs +++ b/distributed/crates/coordinator/src/cli/handler_coordinator.rs @@ -14,13 +14,21 @@ pub async fn handle( port: Option, proofs_dir: Option, no_save_proofs: bool, + compressed_proofs: bool, webhook_url: Option, ) -> Result<()> { // Config file is now optional - if not provided, defaults will be used let config_file = config_file.or_else(|| std::env::var("ZISK_COORDINATOR_CONFIG_PATH").ok()); // Load configuration - let config = Config::load(config_file, port, proofs_dir, no_save_proofs, webhook_url)?; + let config = Config::load( + config_file, + port, + proofs_dir, + no_save_proofs, + compressed_proofs, + webhook_url, + )?; // Initialize tracing - keep guard alive for application lifetime let _log_guard = zisk_distributed_common::tracing::init(Some(&config.logging), None)?; diff --git a/distributed/crates/coordinator/src/cli/handler_prove.rs b/distributed/crates/coordinator/src/cli/handler_prove.rs index f8ff8cccb..ec8750903 100644 --- a/distributed/crates/coordinator/src/cli/handler_prove.rs +++ b/distributed/crates/coordinator/src/cli/handler_prove.rs @@ -5,16 +5,20 @@ use tonic::transport::Channel; use tracing::{error, info}; use zisk_distributed_coordinator::Config; use zisk_distributed_grpc_api::{ - zisk_distributed_api_client::ZiskDistributedApiClient, InputMode, LaunchProofRequest, + zisk_distributed_api_client::ZiskDistributedApiClient, HintsMode, InputMode, LaunchProofRequest, }; /// Handle the prove subcommand - makes RPC request to coordinator +#[allow(clippy::too_many_arguments)] pub async fn handle( coordinator_url: Option, data_id: Option, - input_path: Option, + inputs_uri: Option, + hints_uri: Option, direct_inputs: bool, + stream_hints: bool, compute_capacity: u32, + minimal_compute_capacity: Option, simulated_node: Option, ) -> Result<()> { // Initialize tracing - keep guard alive for application lifetime @@ -28,34 +32,45 @@ pub async fn handle( let channel = Channel::from_shared(coordinator_url)?.connect().await?; let mut client = ZiskDistributedApiClient::new(channel); - let (input_mode, input_path) = if let Some(ref path) = input_path { - if path.as_os_str().is_empty() { - return Err(anyhow::anyhow!("Input path cannot be empty")); - } - - let input_path = Some(path.to_string_lossy().to_string()); - - let input_mode = if direct_inputs { InputMode::Data } else { InputMode::Path }; + let inputs_mode = match inputs_uri { + None => InputMode::None, + Some(_) if direct_inputs => InputMode::Data, + Some(_) => InputMode::Path, + }; - (input_mode, input_path) - } else { - (InputMode::None, None) + let hints_mode = match hints_uri { + None => HintsMode::None, + Some(_) if stream_hints => HintsMode::Stream, + Some(_) => HintsMode::Path, }; // ID will be id if present, else input file name or random UUID let data_id = if let Some(id) = data_id { id - } else if let Some(ref path) = input_path { + } else if let Some(ref path) = inputs_uri { PathBuf::from(path).file_stem().unwrap().to_string_lossy().to_string() } else { uuid::Uuid::new_v4().to_string() }; + // Check compute capacity + let minimal_compute_capacity = minimal_compute_capacity.unwrap_or(compute_capacity); + if minimal_compute_capacity > compute_capacity { + return Err(anyhow::anyhow!( + "Minimal compute capacity ({}) cannot be greater than compute capacity ({})", + minimal_compute_capacity, + compute_capacity + )); + } + let launch_proof_request = LaunchProofRequest { data_id, compute_capacity, - input_mode: input_mode.into(), - input_path, + minimal_compute_capacity, + inputs_mode: inputs_mode.into(), + inputs_uri, + hints_mode: hints_mode.into(), + hints_uri, simulated_node, }; diff --git a/distributed/crates/coordinator/src/cli/main.rs b/distributed/crates/coordinator/src/cli/main.rs index 1d9b4df9f..db7107dc2 100644 --- a/distributed/crates/coordinator/src/cli/main.rs +++ b/distributed/crates/coordinator/src/cli/main.rs @@ -22,7 +22,7 @@ struct ZiskCoordinatorArgs { port: Option, /// Directory where to save generated proofs - #[arg(long, help = "Directory to save generated proofs", conflicts_with = "no_save_proof")] + #[arg(long, help = "Directory to save generated proofs", conflicts_with = "no_save_proofs")] proofs_dir: Option, /// Disable saving proofs @@ -34,6 +34,9 @@ struct ZiskCoordinatorArgs { )] no_save_proofs: bool, + #[arg(short = 'c', long, help = "Generate compressed proofs", default_value_t = false)] + compressed_proofs: bool, + /// Webhook URL to notify when a job finishes. /// /// The placeholder `{$job_id}` can be used in the URL and will be @@ -66,17 +69,36 @@ enum ZiskCoordinatorCommands { data_id: Option, /// Path to the input file - #[arg(long, help = "Path to the input file for proof generation")] - input: Option, + #[arg( + long, + help = "Path to the input file for proof generation", + conflicts_with = "hints_uri" + )] + inputs_uri: Option, + + /// Precompiles Hints path + #[arg( + long, + help = "Path to the precompiles hints file for proof generation", + conflicts_with = "inputs_uri" + )] + hints_uri: Option, /// Whether to send the input data directly - #[clap(short = 'x', long, default_value_t = false)] + #[clap(short = 'x', long, default_value_t = false, conflicts_with = "hints_uri")] direct_inputs: bool, + /// Whether to send the input data directly + #[clap(long, default_value_t = false, conflicts_with = "inputs_uri")] + stream_hints: bool, + /// Compute capacity needed to generate the proof #[arg(long, short, help = "Compute capacity needed to generate the proof")] compute_capacity: u32, + #[arg(long, short, help = "Minimal compute capacity needed to generate the proof")] + minimal_compute_capacity: Option, + #[arg(long, help = "Simulated node ID")] simulated_node: Option, }, @@ -91,18 +113,24 @@ async fn main() -> Result<()> { Some(ZiskCoordinatorCommands::Prove { coordinator_url, data_id, - input, + inputs_uri, + hints_uri, direct_inputs, + stream_hints, compute_capacity, + minimal_compute_capacity, simulated_node, }) => { // Run the "prove" subcommand handler_prove::handle( coordinator_url, data_id, - input, + inputs_uri, + hints_uri, direct_inputs, + stream_hints, compute_capacity, + minimal_compute_capacity, simulated_node, ) .await @@ -114,6 +142,7 @@ async fn main() -> Result<()> { args.port, args.proofs_dir, args.no_save_proofs, + args.compressed_proofs, args.webhook_url, ) .await diff --git a/distributed/crates/coordinator/src/config.rs b/distributed/crates/coordinator/src/config.rs index 7c18e7439..d374248ea 100644 --- a/distributed/crates/coordinator/src/config.rs +++ b/distributed/crates/coordinator/src/config.rs @@ -37,6 +37,7 @@ pub struct CoordinatorConfig { pub phase1_timeout_seconds: u64, pub phase2_timeout_seconds: u64, pub webhook_url: Option, + pub compressed_proofs: bool, } impl Config { @@ -50,6 +51,7 @@ impl Config { port: Option, proofs_dir: Option, no_save_proofs: bool, + compressed_proofs: bool, webhook_url: Option, ) -> Result { // Create proofs directory if it doesn't exist @@ -75,7 +77,8 @@ impl Config { .set_default("coordinator.max_workers_per_job", 10)? .set_default("coordinator.max_total_workers", 1000)? .set_default("coordinator.phase1_timeout_seconds", 300)? - .set_default("coordinator.phase2_timeout_seconds", 600)?; + .set_default("coordinator.phase2_timeout_seconds", 600)? + .set_default("coordinator.compressed_proofs", compressed_proofs)?; if let Some(path) = config_file { builder = builder.add_source(config::File::with_name(&path)); diff --git a/distributed/crates/coordinator/src/coordinator.rs b/distributed/crates/coordinator/src/coordinator.rs index c0b1d0683..af3b89455 100644 --- a/distributed/crates/coordinator/src/coordinator.rs +++ b/distributed/crates/coordinator/src/coordinator.rs @@ -33,15 +33,16 @@ use crate::{ config::Config, coordinator_errors::{CoordinatorError, CoordinatorResult}, - hooks, WorkersPool, + hooks, PrecompileHintsRelay, WorkersPool, }; use chrono::{DateTime, Utc}; use colored::Colorize; use dashmap::DashMap; -use proofman::ContributionsInfo; +use proofman::{ContributionsInfo, WitnessInfo}; use std::{ collections::HashMap, + fs, sync::{ atomic::{AtomicU64, Ordering}, Arc, @@ -50,16 +51,22 @@ use std::{ }; use tokio::sync::RwLock; use tracing::{error, info, warn}; +use zisk_common::io::{StreamSource, ZiskStream}; +use zisk_common::AsmExecutionInfo; +use zisk_common::ZiskExecutorTime; use zisk_distributed_common::{ AggParamsDto, AggProofData, ChallengesDto, ComputeCapacity, ContributionParamsDto, - CoordinatorMessageDto, DataId, ExecuteTaskRequestDto, ExecuteTaskRequestTypeDto, - ExecuteTaskResponseDto, ExecuteTaskResponseResultDataDto, HeartbeatAckDto, InputModeDto, - InputSourceDto, Job, JobExecutionMode, JobId, JobPhase, JobResult, JobResultData, JobState, - JobStatusDto, JobsListDto, LaunchProofRequestDto, LaunchProofResponseDto, MetricsDto, ProofDto, - ProveParamsDto, StatusInfoDto, SystemStatusDto, WorkerErrorDto, WorkerId, + ContributionsResult, CoordinatorMessageDto, DataId, ExecuteTaskRequestDto, + ExecuteTaskRequestTypeDto, ExecuteTaskResponseDto, ExecuteTaskResponseResultDataDto, + HeartbeatAckDto, HintsModeDto, HintsSourceDto, InputSourceDto, InputsModeDto, Job, + JobExecutionMode, JobId, JobPhase, JobResult, JobResultData, JobState, JobStatusDto, + JobsListDto, LaunchProofRequestDto, LaunchProofResponseDto, MetricsDto, ProofDto, + ProveParamsDto, StatusInfoDto, StreamMessageKind, SystemStatusDto, WorkerErrorDto, WorkerId, WorkerReconnectRequestDto, WorkerRegisterRequestDto, WorkerState, WorkersListDto, }; +use zisk_sdk::ZiskProofWithPublicValues; + /// Trait for sending messages to workers through various communication channels. /// /// This trait abstracts the message delivery mechanism, allowing different implementations @@ -105,7 +112,7 @@ pub struct Coordinator { start_time_utc: DateTime, /// Manages the pool of connected workers and their communication channels. - workers_pool: WorkersPool, + workers_pool: Arc, /// Concurrent storage for active jobs with fine-grained locking. jobs: DashMap>>, @@ -129,7 +136,7 @@ impl Coordinator { Self { config, start_time_utc, - workers_pool: WorkersPool::new(), + workers_pool: Arc::new(WorkersPool::new()), jobs: DashMap::new(), registrations: AtomicU64::new(0), reconnections: AtomicU64::new(0), @@ -284,6 +291,13 @@ impl Coordinator { )); } + if request.minimal_compute_capacity > request.compute_capacity { + error!("Invalid requested minimal compute capacity"); + return Err(CoordinatorError::InvalidArgument( + "Minimal compute capacity must not exceed compute capacity".to_string(), + )); + } + // Check if we have enough capacity to compute the proof is already checked // in create_job > partition_and_allocate_by_capacity @@ -328,21 +342,25 @@ impl Coordinator { self.pre_launch_proof(&request)?; let required_compute_capacity = ComputeCapacity::from(request.compute_capacity); + let minimal_compute_capacity = ComputeCapacity::from(request.minimal_compute_capacity); // Create and configure a new job let mut job = self .create_job( request.data_id.clone(), required_compute_capacity, - request.input_mode, + minimal_compute_capacity, + request.inputs_mode, + request.hints_mode, request.simulated_node, ) .await?; info!( - "[Job] Started {} successfully Inputs: {} Capacity: {} Workers: {}", + "[Job] Started {} successfully Inputs: {:?} Hints: {:?} Capacity: {} Workers: {}", job.job_id, - job.input_mode, + job.inputs_mode, + job.hints_mode, job.compute_capacity, job.workers.len(), ); @@ -402,13 +420,13 @@ impl Coordinator { // Clone job.final_proof and error if does not exist let final_proof = if job.state == JobState::Completed { - job.final_proof.clone().ok_or_else(|| { + Some(job.final_proof.clone().ok_or_else(|| { CoordinatorError::Internal( "Final proof is missing during post-launch processing".to_string(), ) - })? + })?) } else { - Vec::new() + None }; // Check if webhook URL is configured and spawn it in a separate task @@ -423,11 +441,19 @@ impl Coordinator { // Save proof to disk if state == JobState::Completed && !self.config.server.no_save_proofs { let folder = self.config.server.proofs_dir.clone(); - zisk_common::save_proof(job_id.as_str(), folder, &final_proof, false).map_err(|e| { - error!("Failed to save proof for job {}: {}", job_id, e); - job.cleanup(); - CoordinatorError::Internal(e.to_string()) + + let zisk_proof = ZiskProofWithPublicValues::new_from_vadcop_proof( + &final_proof.unwrap(), + self.config.coordinator.compressed_proofs, + ) + .map_err(|e| CoordinatorError::Internal(format!("Failed to create proof: {}", e)))?; + fs::create_dir_all(&folder).map_err(|e| { + CoordinatorError::Internal(format!("Failed to create proofs directory: {}", e)) })?; + let raw_path = folder.join(format!("proof_{}.fri", job_id)); + zisk_proof + .save(raw_path) + .map_err(|e| CoordinatorError::Internal(format!("Failed to save proof: {}", e)))?; } // Clean up process data for the job @@ -527,7 +553,9 @@ impl Coordinator { &self, data_id: DataId, required_compute_capacity: ComputeCapacity, - input_mode: InputModeDto, + minimal_compute_capacity: ComputeCapacity, + inputs_mode: InputsModeDto, + hints_mode: HintsModeDto, simulated_node: Option, ) -> CoordinatorResult { let execution_mode = if let Some(node) = simulated_node { @@ -538,7 +566,11 @@ impl Coordinator { let (selected_workers, mut partitions) = self .workers_pool - .partition_and_allocate_by_capacity(required_compute_capacity, execution_mode) + .partition_and_allocate_by_capacity( + required_compute_capacity, + minimal_compute_capacity, + execution_mode, + ) .await?; if let Some(simulated_node) = simulated_node { @@ -547,8 +579,10 @@ impl Coordinator { Ok(Job::new( data_id, - input_mode, + inputs_mode, + hints_mode, required_compute_capacity, + minimal_compute_capacity, selected_workers, partitions, execution_mode, @@ -604,12 +638,12 @@ impl Coordinator { job: &Job, active_workers: &[WorkerId], ) -> CoordinatorResult<()> { - let input_source = match job.input_mode { - InputModeDto::InputModePath(ref path) => { - InputSourceDto::InputPath(path.display().to_string()) + let input_source = match job.inputs_mode { + InputsModeDto::InputsPath(ref inputs_path) => { + InputSourceDto::InputPath(inputs_path.clone()) } - InputModeDto::InputModeData(ref path) => { - let inputs = tokio::fs::read(path).await.map_err(|e| { + InputsModeDto::InputsData(ref inputs_uri) => { + let inputs = tokio::fs::read(inputs_uri).await.map_err(|e| { CoordinatorError::Internal(format!( "Failed to read input data for job {}: {}", job.job_id, e @@ -617,19 +651,28 @@ impl Coordinator { })?; InputSourceDto::InputData(inputs) } - InputModeDto::InputModeNone => InputSourceDto::InputNull, + InputsModeDto::InputsNone => InputSourceDto::InputNull, + }; + + let hints_source = match &job.hints_mode { + HintsModeDto::HintsPath(ref hints_uri) => HintsSourceDto::HintsPath(hints_uri.clone()), + HintsModeDto::HintsStream(hints_uri) => { + // Hints will be streamed separately + HintsSourceDto::HintsStream(hints_uri.clone()) + } + HintsModeDto::HintsNone => HintsSourceDto::HintsNull, }; // Use Arc to avoid expensive clones let active_workers = active_workers.to_vec(); let total_workers = active_workers.len() as u32; - use futures::stream::{self, StreamExt}; - + let cloned_active_workers = active_workers.clone(); let tasks = active_workers.into_iter().enumerate().map(|(rank_id, worker_id)| { let job_id = job.job_id.clone(); let data_id = job.data_id.clone(); let input_source = input_source.clone(); + let hints_source = hints_source.clone(); let worker_allocation = job.partitions[rank_id].clone(); let job_compute_capacity = job.compute_capacity; let workers_pool = &self.workers_pool; @@ -641,6 +684,7 @@ impl Coordinator { params: ExecuteTaskRequestTypeDto::ContributionParams(ContributionParamsDto { data_id, input_source, + hints_source, rank_id: rank_id as u32, total_workers, worker_allocation, @@ -661,7 +705,10 @@ impl Coordinator { } }); - let results: Vec<_> = stream::iter(tasks).buffer_unordered(10).collect().await; + // Process tasks with a concurrency limit + use futures::stream::StreamExt; + + let results: Vec<_> = futures::stream::iter(tasks).buffer_unordered(16).collect().await; // Check for any errors for (worker_id, send_result, state_result) in results { @@ -680,6 +727,83 @@ impl Coordinator { })?; } + if matches!(hints_source, HintsSourceDto::HintsStream(_)) { + self.initialize_stream(job, cloned_active_workers)?; + } + + Ok(()) + } + + fn initialize_stream( + &self, + job: &Job, + cloned_active_workers: Vec, + ) -> Result<(), CoordinatorError> { + let hints_uri = match &job.hints_mode { + HintsModeDto::HintsStream(uri) => uri, + _ => unreachable!(), + }; + let job_id_clone = job.job_id.clone(); + let workers_clone = Arc::new(cloned_active_workers.clone()); + let workers_pool = Arc::clone(&self.workers_pool); + + // Async dispatcher - no blocking, pure async flow for maximum performance + let dispatcher = + move |sequence_number: u32, stream_type: StreamMessageKind, payload: Vec| { + use futures::future::join_all; + use zisk_distributed_common::{StreamDataDto, StreamPayloadDto}; + + let job_id = job_id_clone.clone(); + let workers = Arc::clone(&workers_clone); + let pool = Arc::clone(&workers_pool); + + Box::pin(async move { + let sends = workers.iter().map(|worker_id| { + let job_id = job_id.clone(); + let worker_id = worker_id.clone(); + let payload = payload.clone(); + let pool = Arc::clone(&pool); + let stream_type = stream_type.clone(); + + async move { + let msg = CoordinatorMessageDto::StreamData(StreamDataDto { + job_id: job_id.clone(), + stream_type, + stream_payload: Some(StreamPayloadDto { sequence_number, payload }), + }); + + if let Err(e) = pool.send_message(&worker_id, msg).await { + error!( + "Failed to send hints to worker {} for job {}: {}", + worker_id, job_id, e + ); + } + } + }); + + join_all(sends).await; + }) + }; + let hints_relay = PrecompileHintsRelay::new(dispatcher); + let mut stream = ZiskStream::new(hints_relay); + let stream_reader = StreamSource::from_uri(hints_uri).map_err(|e| { + CoordinatorError::Internal(format!( + "Failed to create hints stream reader for job {}: {}", + job.job_id, e + )) + })?; + stream.set_hints_stream_src(stream_reader).map_err(|e| { + CoordinatorError::Internal(format!( + "Failed to set hints stream for job {}: {}", + job.job_id, e + )) + })?; + stream.start_stream().map_err(|e| { + CoordinatorError::Internal(format!( + "Failed to start hints stream for job {}: {}", + job.job_id, e + )) + })?; Ok(()) } @@ -1053,6 +1177,9 @@ impl Coordinator { return Ok(()); } + // Print execution summary from Phase 1 completion + self.print_execution_summary(&job); + // Validate and extract challenges in a single operation to minimize lock time let challenges = self.validate_and_extract_challenges(&job).await?; @@ -1121,23 +1248,59 @@ impl Coordinator { result_data: ExecuteTaskResponseResultDataDto, ) -> CoordinatorResult { match result_data { - ExecuteTaskResponseResultDataDto::Challenges(challenges) => { - if challenges.is_empty() { + ExecuteTaskResponseResultDataDto::Challenges(ch_list) => { + if ch_list.challenges.is_empty() { return Err(CoordinatorError::InvalidRequest( "Received empty Challenges result data".to_string(), )); } - let contributions: Vec = challenges + let contributions: Vec = ch_list + .challenges .into_iter() .map(|challenge| ContributionsInfo { worker_index: challenge.worker_index, airgroup_id: challenge.airgroup_id as usize, challenge: challenge.challenge, + aggregated: false, }) .collect(); - Ok(JobResultData::Challenges(contributions)) + let witness_info = WitnessInfo { + summary_info: ch_list.witness_info.summary_info, + publics: ch_list.witness_info.publics, + proof_values: ch_list.witness_info.proof_values, + witness_time: ch_list.witness_info.witness_time, + }; + + let zisk_executor_time = ZiskExecutorTime { + total_duration: Duration::from_secs_f32( + ch_list.zisk_executor_time.total_duration / 1000.0, + ), + execution_duration: Duration::from_secs_f32( + ch_list.zisk_executor_time.execution_duration / 1000.0, + ), + count_and_plan_duration: Duration::from_secs_f32( + ch_list.zisk_executor_time.count_and_plan_duration / 1000.0, + ), + count_and_plan_mo_duration: Duration::from_secs_f32( + ch_list.zisk_executor_time.count_and_plan_mo_duration / 1000.0, + ), + asm_execution_duration: ch_list.zisk_executor_time.asm_execution_duration.map( + |asm_info| AsmExecutionInfo { time: asm_info.time, mhz: asm_info.mhz }, + ), + }; + + Ok(JobResultData::Challenges(ContributionsResult { + witness_info, + challenges: contributions, + zisk_executor_time, + task_received_time: chrono::DateTime::::from_timestamp( + (ch_list.zisk_executor_time.task_received_time / 1000.0) as i64, + ((ch_list.zisk_executor_time.task_received_time % 1000.0) * 1_000_000.0) + as u32, + ), + })) } _ => Err(CoordinatorError::InvalidRequest( "Expected Challenges result data for Phase1".to_string(), @@ -1145,6 +1308,25 @@ impl Coordinator { } } + /// Prints execution summary information from Phase 1 completion. + /// + /// Extracts and displays execution information from the first completed worker's + /// contribution results, including timing, summary info, and key metrics. + /// + /// # Parameters + /// + /// * `job` - Reference to the job containing Phase 1 results + fn print_execution_summary(&self, job: &Job) { + // Find the first completed contribution result to extract WitnessInfo summary + if let Some(contributions_results) = job.results.get(&JobPhase::Contributions) { + if let Some((_worker_id, job_result)) = contributions_results.iter().next() { + if let JobResultData::Challenges(contributions_result) = &job_result.data { + info!("Execution Summary: {}", contributions_result.witness_info.summary_info); + } + } + } + } + /// Checks if all workers have completed Phase 1 contributions. /// /// # Parameters @@ -1155,21 +1337,65 @@ impl Coordinator { job.results.get(&JobPhase::Contributions).map(|r| r.len()).unwrap_or(0); let end_time = Utc::now(); - let duration = end_time.signed_duration_since( - job.start_times.get(&JobPhase::Contributions).unwrap_or_else(|| { - error!("Missing start time for Phase1 in job {}", job.job_id); - &end_time - }), - ); + let phase_start_time = job.start_times.get(&JobPhase::Contributions).unwrap_or_else(|| { + error!("Missing start time for Phase1 in job {}", job.job_id); + &end_time + }); + let duration = end_time.signed_duration_since(phase_start_time); let duration_ms = Duration::from_millis(duration.num_milliseconds() as u64); + // Get execution info from the worker's result + let worker_result = + job.results.get(&JobPhase::Contributions).and_then(|results| results.get(worker_id)); + + let (asm_info_str, witness_time_str, delay_time_str) = if let Some(job_result) = + worker_result + { + match &job_result.data { + JobResultData::Challenges(contributions_result) => { + // Calculate delay: time from coordinator sending job to worker receiving task + let delay_duration = contributions_result + .task_received_time + .map(|task_received| task_received.signed_duration_since(*phase_start_time)) + .unwrap_or_else(chrono::Duration::zero); + let delay_ms = delay_duration.num_milliseconds().max(0) as f32; + let delay_str = format!(", Delay: {:.3}s", delay_ms / 1000.0); + + let asm_str = contributions_result + .zisk_executor_time + .asm_execution_duration + .as_ref() + .map(|asm_info| { + format!( + ", Asm Execution: {:.3}s at {} MHz", + asm_info.time, asm_info.mhz + ) + }) + .unwrap_or_default(); + + let witness_str = format!( + ", Witness: {:.3}s", + contributions_result.witness_info.witness_time / 1000.0 + ); + + (asm_str, witness_str, delay_str) + } + _ => (String::new(), String::new(), String::new()), + } + } else { + (String::new(), String::new(), String::new()) + }; + info!( - "[Phase1] {} finished phase 1 for {} ({}/{} workers done, {:.3}s)", + "[Phase1] {} finished phase 1 for {} ({}/{} workers done, Phase: {:.3}s{}{}{})", worker_id, job.job_id, phase1_results_len, job.workers.len(), - duration_ms.as_secs_f32() + duration_ms.as_secs_f32(), + delay_time_str, + witness_time_str, + asm_info_str, ); // Ensure we have results from all assigned workers before proceeding. @@ -1232,7 +1458,7 @@ impl Coordinator { // Simulation mode: replicate single worker's challenges across all expected workers // This maintains algorithm correctness while using minimal computational resources let first_challenges = match phase1_results.values().next().unwrap().data { - JobResultData::Challenges(ref values) => values, + JobResultData::Challenges(ref values) => &values.challenges, _ => unreachable!("Expected Challenges data in Phase1 results"), }; @@ -1241,13 +1467,47 @@ impl Coordinator { } else { // Standard mode: aggregate challenges from all participating workers // Each worker contributes their portion of the overall challenge space - let challenges: Vec> = phase1_results - .values() - .map(|results| match &results.data { - JobResultData::Challenges(values) => values.clone(), - _ => unreachable!("Expected Challenges data in Phase1 results"), - }) - .collect(); + let (challenges, witness_info): (Vec>, Vec) = + phase1_results + .values() + .map(|results| match &results.data { + JobResultData::Challenges(values) => { + (values.challenges.clone(), values.witness_info.clone()) + } + _ => unreachable!("Expected Challenges data in Phase1 results"), + }) + .unzip(); + + let first = witness_info.first().ok_or_else(|| { + CoordinatorError::Internal(format!("No witness info found in job {}", job.job_id)) + })?; + + let mut mismatched_workers = Vec::new(); + + for (worker_idx, info) in witness_info.iter().enumerate() { + if info.publics != first.publics || info.proof_values != first.proof_values { + mismatched_workers.push((worker_idx, info)); + } + } + + if !mismatched_workers.is_empty() { + // Format detailed mismatch report + let mismatch_report: Vec = mismatched_workers + .iter() + .map(|(idx, info)| { + format!( + "Worker {} differs: publics={:?}, proof_values={:?}", + idx, info.publics, info.proof_values + ) + }) + .collect(); + + return Err(CoordinatorError::Internal(format!( + "WitnessInfo mismatch in job {}:\n{}", + job.job_id, + mismatch_report.join("\n") + ))); + } // Flatten all worker contributions into unified challenge vector // Maintains worker indexing and airgroup assignments for proper coordination @@ -1371,6 +1631,10 @@ impl Coordinator { let all_done = self.check_phase2_completion(&job, &worker_id).await?; + if all_done { + job.start_times.insert(JobPhase::Aggregate, Utc::now()); + } + let proofs = self.collect_worker_proofs(&job, &agg_worker_id, &worker_id)?; drop(job); // Release jobs lock early @@ -1677,15 +1941,7 @@ impl Coordinator { agg_proofs: proofs, last_proof: all_done, final_proof: all_done, - verify_constraints: true, - aggregation: true, - rma: true, - final_snark: false, - verify_proofs: true, - save_proofs: false, - test_mode: false, - output_dir_path: "".to_string(), - minimal_memory: false, + compressed: self.config.coordinator.compressed_proofs, }), }; @@ -1796,22 +2052,199 @@ impl Coordinator { "Steps: N/A".to_string().red().bold() }; info!( - "{} {} ({:.3}s+{:.3}s+{:.3}s) {} Inputs: {}, Capacity: {} ", + "{} {} ({:.3}s+{:.3}s+{:.3}s) {} Inputs: {:?}, Capacity: {} ", header, duration_str, phase1_duration.as_seconds_f32(), phase2_duration.as_seconds_f32(), phase3_duration.as_seconds_f32(), steps_str, - job.input_mode, + job.inputs_mode, job.compute_capacity, ); + let workers = job.workers.clone(); + + if workers.len() > 1 { + for phase in [JobPhase::Contributions, JobPhase::Prove] { + if let Some(results) = job.results.get(&phase) { + if let Some(start_time) = job.start_times.get(&phase) { + let mut durations_ms: Vec<(WorkerId, i64)> = results + .iter() + .map(|(worker_id, result)| { + let duration = result.end_time.signed_duration_since(start_time); + (worker_id.clone(), duration.num_milliseconds()) + }) + .collect(); + + if durations_ms.len() > 1 { + durations_ms.sort_by_key(|(_, duration)| *duration); + + let (best_worker, best_duration) = &durations_ms[0]; + let (worst_worker, worst_duration) = durations_ms.last().unwrap(); + + let avg_duration = durations_ms.iter().map(|(_, d)| d).sum::() + as f64 + / durations_ms.len() as f64; + + let diff_percentage = if *best_duration > 0 { + ((*worst_duration - *best_duration) as f64 / *best_duration as f64) + * 100.0 + } else { + 0.0 + }; + + info!( + "[Job] {:?} Performance - Avg: {:.3}s, Best: {} ({:.3}s), Worst: {} ({:.3}s), Diff: {:.1}%", + phase, + avg_duration / 1000.0, + best_worker, + *best_duration as f64 / 1000.0, + worst_worker, + *worst_duration as f64 / 1000.0, + diff_percentage + ); + } + + // For Phase 1, also show delay, witness, and ASM execution statistics + if phase == JobPhase::Contributions && durations_ms.len() > 1 { + // Extract delay times (coordinator send to worker start) + let mut delays_ms: Vec<(WorkerId, i64)> = results + .iter() + .filter_map(|(worker_id, result)| { + if let JobResultData::Challenges(contrib) = &result.data { + contrib.task_received_time.map(|task_received| { + let delay = + task_received.signed_duration_since(start_time); + (worker_id.clone(), delay.num_milliseconds().max(0)) + }) + } else { + None + } + }) + .collect(); + + if !delays_ms.is_empty() { + delays_ms.sort_by_key(|(_, delay)| *delay); + let (best_delay_worker, best_delay) = &delays_ms[0]; + let (worst_delay_worker, worst_delay) = delays_ms.last().unwrap(); + let avg_delay = delays_ms.iter().map(|(_, d)| d).sum::() + as f64 + / delays_ms.len() as f64; + + let delay_diff_percentage = if *best_delay > 0 { + ((*worst_delay - *best_delay) as f64 / *best_delay as f64) + * 100.0 + } else { + 0.0 + }; + + info!( + "[Job] Contributions Delay - Avg: {:.3}s, Best: {} ({:.3}s), Worst: {} ({:.3}s), Diff: {:.1}%", + avg_delay / 1000.0, + best_delay_worker, + *best_delay as f64 / 1000.0, + worst_delay_worker, + *worst_delay as f64 / 1000.0, + delay_diff_percentage + ); + } + + // Extract witness times + let mut witness_times: Vec<(WorkerId, f32)> = results + .iter() + .filter_map(|(worker_id, result)| { + if let JobResultData::Challenges(contrib) = &result.data { + Some((worker_id.clone(), contrib.witness_info.witness_time)) + } else { + None + } + }) + .collect(); + + if !witness_times.is_empty() { + witness_times.sort_by(|(_, a), (_, b)| { + a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal) + }); + let (best_witness_worker, best_witness) = &witness_times[0]; + let (worst_witness_worker, worst_witness) = + witness_times.last().unwrap(); + let avg_witness = + witness_times.iter().map(|(_, t)| *t as f64).sum::() + / witness_times.len() as f64; + + let witness_diff_percentage = if *best_witness > 0.0 { + ((*worst_witness - *best_witness) as f64 / *best_witness as f64) + * 100.0 + } else { + 0.0 + }; + + info!( + "[Job] Contributions Witness - Avg: {:.3}s, Best: {} ({:.3}s), Worst: {} ({:.3}s), Diff: {:.1}%", + avg_witness / 1000.0, + best_witness_worker, + *best_witness as f64 / 1000.0, + worst_witness_worker, + *worst_witness as f64 / 1000.0, + witness_diff_percentage + ); + } + + // Extract ASM execution times + let mut asm_times: Vec<(WorkerId, f32, f32)> = results + .iter() + .filter_map(|(worker_id, result)| { + if let JobResultData::Challenges(contrib) = &result.data { + contrib + .zisk_executor_time + .asm_execution_duration + .as_ref() + .map(|asm| (worker_id.clone(), asm.time, asm.mhz)) + } else { + None + } + }) + .collect(); + + if !asm_times.is_empty() { + asm_times.sort_by(|(_, a, _), (_, b, _)| { + a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal) + }); + let (best_asm_worker, best_asm, best_mhz) = &asm_times[0]; + let (worst_asm_worker, worst_asm, worst_mhz) = + asm_times.last().unwrap(); + let avg_asm = + asm_times.iter().map(|(_, t, _)| *t as f64).sum::() + / asm_times.len() as f64; + + let asm_diff_percentage = if *best_asm > 0.0 { + ((*worst_asm - *best_asm) as f64 / *best_asm as f64) * 100.0 + } else { + 0.0 + }; + + info!( + "[Job] Contributions ASM - Avg: {:.3}s, Best: {} ({:.3}s @ {:.1}MHz), Worst: {} ({:.3}s @ {:.1}MHz), Diff: {:.1}%", + avg_asm, + best_asm_worker, + *best_asm, + *best_mhz, + worst_asm_worker, + *worst_asm, + *worst_mhz, + asm_diff_percentage + ); + } + } + } + } + } + } + // Print summary of the job let job_phases = vec![JobPhase::Contributions, JobPhase::Prove, JobPhase::Aggregate]; - let workers = job.workers.clone(); - info!("[Job] Summary for {}", job_id); for phase in job_phases { if let Some(result) = job.results.get(&phase) { diff --git a/distributed/crates/coordinator/src/coordinator_errors.rs b/distributed/crates/coordinator/src/coordinator_errors.rs index 7325ce9ce..617420eac 100644 --- a/distributed/crates/coordinator/src/coordinator_errors.rs +++ b/distributed/crates/coordinator/src/coordinator_errors.rs @@ -17,7 +17,7 @@ pub enum CoordinatorError { InsufficientCapacity, // Internal errors - logged but not exposed to clients - #[error("Internal service error")] + #[error("Internal service error: {0}")] Internal(String), #[error("Worker error: {0}")] diff --git a/distributed/crates/coordinator/src/hints_relay.rs b/distributed/crates/coordinator/src/hints_relay.rs new file mode 100644 index 000000000..7c63e51af --- /dev/null +++ b/distributed/crates/coordinator/src/hints_relay.rs @@ -0,0 +1,158 @@ +//! Precompile Hints Relay + +use anyhow::Result; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; +use zisk_common::{ + io::StreamProcessor, CtrlHint, HintCode, PartialPrecompileHint, PrecompileHint, + PrecompileHintParseResult, +}; +use zisk_distributed_common::StreamMessageKind; + +type AsyncDispatcher = Arc< + dyn Fn(u32, StreamMessageKind, Vec) -> Pin + Send>> + + Send + + Sync, +>; + +pub struct PrecompileHintsRelay { + sequence_number: Arc, + dispatcher: AsyncDispatcher, + runtime_handle: tokio::runtime::Handle, + + /// Buffer for incomplete hint data between batches + pending_partial: Mutex>, +} + +impl PrecompileHintsRelay { + pub fn new(dispatcher: F) -> Self + where + F: Fn(u32, StreamMessageKind, Vec) -> Fut + Send + Sync + 'static, + Fut: Future + Send + 'static, + { + let dispatcher = Arc::new( + move |seq: u32, + stream_type: StreamMessageKind, + payload: Vec| + -> Pin + Send>> { + Box::pin(dispatcher(seq, stream_type, payload)) + }, + ); + + Self { + sequence_number: Arc::new(AtomicU32::new(0)), + dispatcher, + runtime_handle: tokio::runtime::Handle::current(), + pending_partial: Mutex::new(None), + } + } + + pub fn process_hints(&self, hints: &[u64], first_batch: bool) -> Result { + let mut has_ctrl_start = false; + let mut has_ctrl_end = false; + + // Take any pending partial hint from previous batch + let mut pending_partial = self.pending_partial.lock().unwrap().take(); + + // Parse hints and dispatch to pool + let mut idx = 0; + while idx < hints.len() { + let (parsed_hint, consumed) = + PrecompileHint::from_u64_slice(hints, idx, true, pending_partial.take())?; + + let hint = match parsed_hint { + PrecompileHintParseResult::Complete(hint) => hint, + PrecompileHintParseResult::Partial(partial) => { + // Store partial for next batch and exit loop + *self.pending_partial.lock().unwrap() = Some(partial); + break; + } + }; + let length = consumed; + + // Validate hint type is in valid range before accessing stats array + + // CTRL_START must be the first message of the first batch + if hint.hint_code == HintCode::Ctrl(CtrlHint::Start) { + if !first_batch { + return Err(anyhow::anyhow!( + "CTRL_START can only be sent as the first message in the stream" + )); + } + if idx != 0 { + return Err(anyhow::anyhow!( + "CTRL_START must be the first hint in the batch, but found at index {}", + idx + )); + } + has_ctrl_start = true; + } + + if has_ctrl_end { + return Err(anyhow::anyhow!( + "Received hint after CTRL_END: type {} at index {}", + hint.hint_code, + idx + )); + } + has_ctrl_end = hint.hint_code == HintCode::Ctrl(CtrlHint::End); + + idx += length; + } + + if has_ctrl_start { + self.send_hints_start(); + } + + // Call async dispatcher - blocks on async work for zero overhead + self.send_hints_data(hints); + + if has_ctrl_end { + self.send_hints_end(); + } + + Ok(has_ctrl_end) + } + + fn send_hints_start(&self) { + let seq_num = self.sequence_number.fetch_add(1, Ordering::SeqCst); + + self.runtime_handle.block_on((self.dispatcher)(seq_num, StreamMessageKind::Start, vec![])); + } + + fn send_hints_data(&self, hints: &[u64]) { + let seq_num = self.sequence_number.fetch_add(1, Ordering::SeqCst); + + // Safe conversion: &[u64] → Vec for wire protocol + let payload = unsafe { + std::slice::from_raw_parts(hints.as_ptr() as *const u8, std::mem::size_of_val(hints)) + } + .to_vec(); + + self.runtime_handle.block_on((self.dispatcher)(seq_num, StreamMessageKind::Data, payload)); + } + + fn send_hints_end(&self) { + let seq_num = self.sequence_number.fetch_add(1, Ordering::SeqCst); + + self.runtime_handle.block_on((self.dispatcher)(seq_num, StreamMessageKind::End, vec![])); + } + + /// Reset internal state for clean execution + fn reset_state(&self) { + self.sequence_number.store(0, Ordering::SeqCst); + *self.pending_partial.lock().unwrap() = None; + } +} + +impl StreamProcessor for PrecompileHintsRelay { + fn process_hints(&self, data: &[u64], first_batch: bool) -> Result { + self.process_hints(data, first_batch) + } + + fn reset(&self) { + self.reset_state(); + } +} diff --git a/distributed/crates/coordinator/src/lib.rs b/distributed/crates/coordinator/src/lib.rs index 66b9fdd71..98b4f9e92 100644 --- a/distributed/crates/coordinator/src/lib.rs +++ b/distributed/crates/coordinator/src/lib.rs @@ -2,6 +2,7 @@ mod config; mod coordinator; mod coordinator_errors; mod coordinator_grpc; +mod hints_relay; mod hooks; mod shutdown; mod workers_pool; @@ -9,5 +10,6 @@ mod workers_pool; pub use config::*; use coordinator::*; pub use coordinator_grpc::*; +pub use hints_relay::*; pub use shutdown::*; use workers_pool::*; diff --git a/distributed/crates/coordinator/src/workers_pool.rs b/distributed/crates/coordinator/src/workers_pool.rs index 293d931a8..67827999f 100644 --- a/distributed/crates/coordinator/src/workers_pool.rs +++ b/distributed/crates/coordinator/src/workers_pool.rs @@ -427,6 +427,7 @@ impl WorkersPool { pub async fn partition_and_allocate_by_capacity( &self, required_compute_capacity: ComputeCapacity, + minimal_compute_capacity: ComputeCapacity, execution_mode: JobExecutionMode, ) -> CoordinatorResult<(Vec, Vec>)> { // Simulation mode requires exactly one worker @@ -445,6 +446,12 @@ impl WorkersPool { )); } + if minimal_compute_capacity.compute_units > required_compute_capacity.compute_units { + return Err(CoordinatorError::InvalidArgument( + "Minimal compute capacity cannot exceed required capacity".to_string(), + )); + } + let workers = self.workers.write().await; // For simulation mode, replicate single worker multiple times @@ -470,7 +477,7 @@ impl WorkersPool { available_workers.iter().map(|(_, p)| p.compute_capacity.compute_units).sum(); // Check if we have enough total capacity - if required_compute_capacity.compute_units > available_capacity { + if minimal_compute_capacity.compute_units > available_capacity { return Err(CoordinatorError::InsufficientCapacity); } @@ -496,11 +503,10 @@ impl WorkersPool { // Step 2: Distribute work units using round-robin allocation let num_workers = selected_workers.len(); - let total_units = required_compute_capacity.compute_units; let mut worker_allocations = vec![Vec::new(); num_workers]; // Round-robin assignment of compute units - for unit in 0..total_units { + for unit in 0..total_capacity { let worker_idx = (unit as usize) % num_workers; // Check if this worker still has capacity diff --git a/distributed/crates/grpc-api/proto/zisk_distributed_api.proto b/distributed/crates/grpc-api/proto/zisk_distributed_api.proto index c843c149e..3d1652a57 100644 --- a/distributed/crates/grpc-api/proto/zisk_distributed_api.proto +++ b/distributed/crates/grpc-api/proto/zisk_distributed_api.proto @@ -55,17 +55,27 @@ message WorkersListRequest { message LaunchProofRequest { string data_id = 1; uint32 compute_capacity = 2; - InputMode input_mode = 3; - optional string input_path = 4; - optional uint32 simulated_node = 5; // If set, indicates this is a simulated worker + uint32 minimal_compute_capacity = 3; + InputMode inputs_mode = 4; + optional string inputs_uri = 5; + HintsMode hints_mode = 6; + optional string hints_uri = 7; + optional uint32 simulated_node = 8; // If set, indicates this is a simulated worker } enum InputMode { INPUT_MODE_NONE = 0; // No input provided - INPUT_MODE_PATH = 1; // Input will be provided as a path + INPUT_MODE_PATH = 1; // Input will be provided as a PATH INPUT_MODE_DATA = 2; // Input data will be sent directly } +enum HintsMode { + HINTS_MODE_NONE = 0; // No hints provided + HINTS_MODE_PATH = 1; // Hints will be provided as a PATH + HINTS_MODE_STREAM = 2; // Hints will be sent as a stream +} + + // ============================================================================ // Admin Commands Response Messages // ============================================================================ @@ -166,6 +176,7 @@ message CoordinatorMessage { WorkerRegisterResponse register_response = 3; ExecuteTaskRequest execute_task = 4; JobCancelled job_cancelled = 5; + StreamData stream_data = 6; } } @@ -234,10 +245,12 @@ message ContributionParams { string input_path = 2; bytes input_data = 3; } - uint32 rank_id = 4; - uint32 total_workers = 5; - repeated uint32 worker_allocation = 6; - uint32 job_compute_units = 7; + optional string hints_path = 4; + bool hints_stream = 5; // Indicates whether hints will be streamed + uint32 rank_id = 6; + uint32 total_workers = 7; + repeated uint32 worker_allocation = 8; + uint32 job_compute_units = 9; } message ProveParams { @@ -248,15 +261,26 @@ message AggParams { ProofList agg_proofs = 1; bool last_proof = 2; bool final_proof = 3; - bool verify_constraints = 4; - bool aggregation = 5; - bool rma = 6; - bool final_snark = 7; - bool verify_proofs = 8; - bool save_proofs = 9; - bool test_mode = 10; - string output_dir_path = 11; - bool minimal_memory = 12; + bool compressed = 4; +} + +// Stream type enumeration +enum StreamType { + STREAM_TYPE_START = 0; + STREAM_TYPE_DATA = 1; + STREAM_TYPE_END = 2; +} + +// Streaming messages for data transfer +message StreamData { + string job_id = 1; + StreamType stream_type = 2; + optional StreamPayload payload = 3; +} + +message StreamPayload { + uint32 sequence_number = 3; + bytes payload = 4; } message ExecuteTaskResponse { @@ -272,8 +296,31 @@ message ExecuteTaskResponse { } } +message WitnessExecInfo { + float witness_time = 1; + repeated uint64 publics = 2; + repeated uint64 proof_values = 3; + string summary_info = 4; +} + +message AsmExecuteInfo { + float time = 1; + float mhz = 2; +} + +message ZiskExecuteTime { + float total_duration = 1; + float execution_duration = 2; + float count_and_plan_duration = 3; + float count_and_plan_mo_duration = 4; + optional AsmExecuteInfo asm_execution_duration = 5; + double task_received_time = 6; // Time when task was received by worker (in milliseconds since UNIX epoch) +} + message ChallengesList { repeated Challenges challenges = 1; + WitnessExecInfo witness_info = 2; + ZiskExecuteTime zisk_execution_time = 3; } message Challenges { diff --git a/distributed/crates/grpc-api/src/conversions.rs b/distributed/crates/grpc-api/src/conversions.rs index eaf863357..df2ac368c 100644 --- a/distributed/crates/grpc-api/src/conversions.rs +++ b/distributed/crates/grpc-api/src/conversions.rs @@ -8,18 +8,17 @@ //! The gRPC protobuf compiler generates Rust types that don't always match our internal domain //! model. All conversions implement the `From` and/or `Into` traits for idiomatic Rust usage. -use std::path::PathBuf; - use crate::{ contribution_params::InputSource, coordinator_message::Payload, execute_task_request, execute_task_response, job_status_response, jobs_list_response, launch_proof_response, system_status_response, workers_list_response, AggParams, Challenges, ComputeCapacity as GrpcComputeCapacity, ContributionParams, CoordinatorMessage, - ExecuteTaskRequest, ExecuteTaskResponse, Heartbeat, HeartbeatAck, InputMode, JobCancelled, - JobStatus, JobStatusResponse, JobsList, JobsListResponse, LaunchProofRequest, + ExecuteTaskRequest, ExecuteTaskResponse, Heartbeat, HeartbeatAck, HintsMode, InputMode, + JobCancelled, JobStatus, JobStatusResponse, JobsList, JobsListResponse, LaunchProofRequest, LaunchProofResponse, Metrics, Proof, ProofList, ProveParams, Shutdown, StatusInfoResponse, - SystemStatus, SystemStatusResponse, TaskType, WorkerError, WorkerInfo, WorkerReconnectRequest, - WorkerRegisterRequest, WorkerRegisterResponse, WorkersList, WorkersListResponse, + StreamData, StreamPayload, StreamType, SystemStatus, SystemStatusResponse, TaskType, + WorkerError, WorkerInfo, WorkerReconnectRequest, WorkerRegisterRequest, WorkerRegisterResponse, + WorkersList, WorkersListResponse, }; use zisk_distributed_common::*; @@ -155,21 +154,26 @@ impl From for SystemStatusResponse { impl From for LaunchProofRequest { fn from(dto: LaunchProofRequestDto) -> Self { - let (input_mode, input_path) = match dto.input_mode { - InputModeDto::InputModeNone => (InputMode::None, None), - InputModeDto::InputModePath(path) => { - (InputMode::Path, Some(path.display().to_string())) - } - InputModeDto::InputModeData(path) => { - (InputMode::Data, Some(path.display().to_string())) - } + let (inputs_mode, inputs_uri) = match dto.inputs_mode { + InputsModeDto::InputsNone => (InputMode::None, None), + InputsModeDto::InputsPath(inputs_path) => (InputMode::Path, Some(inputs_path)), + InputsModeDto::InputsData(inputs_uri) => (InputMode::Data, Some(inputs_uri)), + }; + + let (hints_mode, hints_uri) = match dto.hints_mode { + HintsModeDto::HintsNone => (HintsMode::None, None), + HintsModeDto::HintsPath(hints_path) => (HintsMode::Path, Some(hints_path)), + HintsModeDto::HintsStream(hints_uri) => (HintsMode::Stream, Some(hints_uri)), }; LaunchProofRequest { data_id: dto.data_id.into(), compute_capacity: dto.compute_capacity, - input_mode: input_mode.into(), - input_path, + minimal_compute_capacity: dto.minimal_compute_capacity, + inputs_mode: inputs_mode.into(), + inputs_uri, + hints_mode: hints_mode.into(), + hints_uri, simulated_node: dto.simulated_node, } } @@ -184,25 +188,35 @@ impl TryFrom for LaunchProofRequestDto { Ok(LaunchProofRequestDto { data_id: req.data_id.into(), compute_capacity: req.compute_capacity, - input_mode: match InputMode::try_from(req.input_mode).unwrap_or(InputMode::None) { - InputMode::None => InputModeDto::InputModeNone, + minimal_compute_capacity: req.minimal_compute_capacity, + inputs_mode: match InputMode::try_from(req.inputs_mode).unwrap_or(InputMode::None) { + InputMode::None => InputsModeDto::InputsNone, InputMode::Path => { - // Use the input_path field when available - if let Some(path) = req.input_path { - InputModeDto::InputModePath(PathBuf::from(path)) - } else { - return Err(anyhow::anyhow!( - "Input mode is Path but input_path is missing" - )); - } + let inputs_uri = req.inputs_uri.ok_or_else(|| { + anyhow::anyhow!("Input mode is Uri but inputs_uri is missing") + })?; + InputsModeDto::InputsPath(inputs_uri) } InputMode::Data => { - // Use the input_path field when available - if let Some(path) = req.input_path { - InputModeDto::InputModeData(PathBuf::from(path)) - } else { - InputModeDto::InputModeNone // Fallback if path is missing - } + let inputs_uri = req.inputs_uri.ok_or_else(|| { + anyhow::anyhow!("Input mode is Data but inputs_uri is missing") + })?; + InputsModeDto::InputsData(inputs_uri) + } + }, + hints_mode: match HintsMode::try_from(req.hints_mode).unwrap_or(HintsMode::None) { + HintsMode::None => HintsModeDto::HintsNone, + HintsMode::Path => { + let hints_uri = req.hints_uri.ok_or_else(|| { + anyhow::anyhow!("Hints mode is Uri but hints_uri is missing") + })?; + HintsModeDto::HintsPath(hints_uri) + } + HintsMode::Stream => { + let hints_uri = req.hints_uri.ok_or_else(|| { + anyhow::anyhow!("Hints mode is Stream but hints_uri is missing") + })?; + HintsModeDto::HintsStream(hints_uri) } }, simulated_node: req.simulated_node, @@ -260,6 +274,9 @@ impl From for CoordinatorMessage { CoordinatorMessageDto::JobCancelled(cancel) => { CoordinatorMessage { payload: Some(Payload::JobCancelled(cancel.into())) } } + CoordinatorMessageDto::StreamData(data) => { + CoordinatorMessage { payload: Some(Payload::StreamData(data.into())) } + } } } } @@ -328,14 +345,22 @@ impl From for ExecuteTaskRequest { impl From for ContributionParams { fn from(dto: ContributionParamsDto) -> Self { let input_source = match dto.input_source { - InputSourceDto::InputPath(path) => Some(InputSource::InputPath(path)), + InputSourceDto::InputPath(inputs_path) => Some(InputSource::InputPath(inputs_path)), InputSourceDto::InputData(data) => Some(InputSource::InputData(data)), InputSourceDto::InputNull => None, }; + let (hints_path, hints_stream) = match dto.hints_source { + HintsSourceDto::HintsPath(hints_path) => (Some(hints_path), false), + HintsSourceDto::HintsStream(hints_path) => (Some(hints_path), true), + HintsSourceDto::HintsNull => (None, false), + }; + ContributionParams { data_id: dto.data_id.as_string(), input_source, + hints_path, + hints_stream, rank_id: dto.rank_id, total_workers: dto.total_workers, worker_allocation: dto.worker_allocation, @@ -370,15 +395,7 @@ impl From for AggParams { agg_proofs: Some(ProofList { proofs: agg_proofs }), last_proof: dto.last_proof, final_proof: dto.final_proof, - verify_constraints: dto.verify_constraints, - aggregation: dto.aggregation, - rma: dto.rma, - final_snark: dto.final_snark, - verify_proofs: dto.verify_proofs, - save_proofs: dto.save_proofs, - test_mode: dto.test_mode, - output_dir_path: dto.output_dir_path, - minimal_memory: dto.minimal_memory, + compressed: dto.compressed, } } } @@ -402,7 +419,30 @@ impl From for ExecuteTaskResponseDto { challenge: c.challenge, }) .collect(); - Some(ExecuteTaskResponseResultDataDto::Challenges(challenges)) + let witness_info = challenges_list.witness_info.unwrap(); + let witness_info = WitnessInfoDto { + witness_time: witness_info.witness_time, + publics: witness_info.publics, + proof_values: witness_info.proof_values, + summary_info: witness_info.summary_info, + }; + let exec_time = challenges_list.zisk_execution_time.unwrap(); + let zisk_executor_time = ZiskExecutorTimeDto { + task_received_time: exec_time.task_received_time, + total_duration: exec_time.total_duration, + execution_duration: exec_time.execution_duration, + count_and_plan_duration: exec_time.count_and_plan_duration, + count_and_plan_mo_duration: exec_time.count_and_plan_mo_duration, + asm_execution_duration: exec_time.asm_execution_duration.map(|asm_info| { + AsmExecutionInfoDto { time: asm_info.time, mhz: asm_info.mhz } + }), + }; + + Some(ExecuteTaskResponseResultDataDto::Challenges(ContributionsResultDataDto { + witness_info, + challenges, + zisk_executor_time, + })) } Some(execute_task_response::ResultData::Proofs(proof_list)) => { let proofs: Vec = proof_list @@ -445,6 +485,60 @@ impl From for HeartbeatAckDto { } } +impl From for StreamType { + fn from(dto: StreamMessageKind) -> StreamType { + match dto { + StreamMessageKind::Start => StreamType::Start, + StreamMessageKind::Data => StreamType::Data, + StreamMessageKind::End => StreamType::End, + } + } +} + +impl From for StreamMessageKind { + fn from(stream_type: StreamType) -> StreamMessageKind { + match stream_type { + StreamType::Start => StreamMessageKind::Start, + StreamType::Data => StreamMessageKind::Data, + StreamType::End => StreamMessageKind::End, + } + } +} + +impl From for StreamData { + fn from(dto: StreamDataDto) -> Self { + StreamData { + job_id: dto.job_id.as_string(), + stream_type: StreamType::from(dto.stream_type) as i32, + payload: dto.stream_payload.map(Into::into), + } + } +} + +impl From for StreamDataDto { + fn from(data: StreamData) -> Self { + StreamDataDto { + job_id: JobId::from(data.job_id), + stream_type: StreamType::try_from(data.stream_type) + .map(StreamMessageKind::from) + .unwrap_or(StreamMessageKind::Data), + stream_payload: data.payload.map(Into::into), + } + } +} + +impl From for StreamPayload { + fn from(dto: StreamPayloadDto) -> Self { + StreamPayload { sequence_number: dto.sequence_number, payload: dto.payload } + } +} + +impl From for StreamPayloadDto { + fn from(payload: StreamPayload) -> Self { + StreamPayloadDto { sequence_number: payload.sequence_number, payload: payload.payload } + } +} + impl From for WorkerErrorDto { fn from(error: WorkerError) -> Self { WorkerErrorDto { diff --git a/distributed/crates/grpc-api/src/lib.rs b/distributed/crates/grpc-api/src/lib.rs index 36a8ad0fb..8d0c39e03 100644 --- a/distributed/crates/grpc-api/src/lib.rs +++ b/distributed/crates/grpc-api/src/lib.rs @@ -7,4 +7,4 @@ pub mod conversions; pub use distributed_api_proto::zisk_distributed_api_server; pub use distributed_api_proto::*; -pub const MAX_MESSAGE_SIZE: usize = 32 * 1024 * 1024; // 32 MB +pub const MAX_MESSAGE_SIZE: usize = 128 * 1024 * 1024; // 128 MB diff --git a/distributed/crates/worker/Cargo.toml b/distributed/crates/worker/Cargo.toml index 58487cd26..7afcf4274 100644 --- a/distributed/crates/worker/Cargo.toml +++ b/distributed/crates/worker/Cargo.toml @@ -11,11 +11,7 @@ name = "zisk-worker" path = "src/cli/main.rs" [build-dependencies] -vergen = { version = "8", default-features = false, features = [ - "build", - "git", - "git2", -] } +vergen-git2.workspace = true [dependencies] zisk-distributed-common = { workspace = true } @@ -32,6 +28,9 @@ witness = { workspace = true } rom-setup = { workspace = true } zisk-pil = { workspace = true } zisk-sdk = { workspace = true } +chrono = { version = "0.4", features = ["serde"] } + +precompiles-hints = { workspace = true } tonic = { workspace = true } tokio = { workspace = true } @@ -39,7 +38,6 @@ tokio-stream = { workspace = true } tracing = { workspace = true } anyhow = { workspace = true } serde = { workspace = true } -libloading = { workspace = true } borsh = { workspace = true } clap = { workspace = true } colored = { workspace = true } diff --git a/distributed/crates/worker/src/cli/main.rs b/distributed/crates/worker/src/cli/main.rs index c092b0c8f..631588938 100644 --- a/distributed/crates/worker/src/cli/main.rs +++ b/distributed/crates/worker/src/cli/main.rs @@ -1,8 +1,5 @@ use anyhow::Result; -use cargo_zisk::{ - commands::{get_proving_key, get_witness_computation_lib}, - ux::print_banner, -}; +use cargo_zisk::{commands::get_proving_key, ux::print_banner}; use clap::Parser; use colored::Colorize; use std::path::PathBuf; @@ -48,10 +45,6 @@ struct Cli { )] config: Option, - /// Witness computation dynamic library path - #[clap(short = 'w', long)] - pub witness_lib: Option, - /// ELF file path /// This is the path to the ROM file that the witness computation dynamic library will use /// to generate the witness. @@ -87,6 +80,11 @@ struct Cli { #[clap(long, conflicts_with = "emulator")] pub unlock_mapped_memory: bool, + /// Redirect ASM emulator output to file + /// This option is mutually exclusive with `--emulator` + #[clap(long, conflicts_with = "emulator", default_value_t = false)] + pub asm_out_file: bool, + /// Verbosity (-v, -vv) #[arg(short ='v', long, action = clap::ArgAction::Count, help = "Increase verbosity level")] pub verbose: u8, // Using u8 to hold the number of `-v` @@ -98,10 +96,6 @@ struct Cli { #[clap(long, default_value_t = false)] pub verify_constraints: bool, - /// Whether to generate the final SNARK - #[clap(short = 'f', long, default_value_t = false)] - pub final_snark: bool, - /// GPU parameters #[clap(short = 'z', long, default_value_t = false)] pub preallocate: bool, @@ -121,6 +115,9 @@ struct Cli { #[clap(short = 'r', long, default_value_t = false)] pub rma: bool, + + #[clap(long, default_value_t = false)] + pub hints: bool, } #[tokio::main] @@ -140,17 +137,17 @@ async fn main() -> Result<()> { let prover_config_dto = ProverServiceConfigDto { elf: cli.elf.clone(), - witness_lib: cli.witness_lib.clone(), asm: cli.asm.clone(), emulator: cli.emulator, + hints: cli.hints, proving_key: cli.proving_key.clone(), asm_port: cli.asm_port, unlock_mapped_memory: cli.unlock_mapped_memory, + asm_out_file: cli.asm_out_file, verbose: cli.verbose, debug: cli.debug.clone(), verify_constraints: cli.verify_constraints, aggregation: true, // we always aggregate - final_snark: cli.final_snark, preallocate: cli.preallocate, max_streams: cli.max_streams, number_threads_witness: cli.number_threads_witness, @@ -203,18 +200,13 @@ fn print_command_info( .map(|p| format!("(log file: {})", p).bright_black().to_string()) .unwrap_or_default() ); - println!( - "{: >12} {}", - "Witness Lib".bright_green().bold(), - get_witness_computation_lib(Some(&prover_config.witness_lib)).display() - ); println!("{: >12} {}", "Elf".bright_green().bold(), prover_config.elf.display()); - if prover_config.asm.is_some() { + if let Some(asm) = &prover_config.asm { if let Some(asm_port) = prover_config.asm_port.as_ref() { println!("{: >12} {}", "Asm port".bright_green().bold(), asm_port); } - let asm_path = prover_config.asm.as_ref().unwrap().display(); + let asm_path = asm.display(); println!("{: >12} {}", "ASM runner".bright_green().bold(), asm_path); } else { println!( @@ -225,7 +217,7 @@ fn print_command_info( } println!( "{: >12} {}", - "Proving key".bright_green().bold(), + "Proving Key".bright_green().bold(), get_proving_key(Some(&prover_config.proving_key)).display() ); diff --git a/distributed/crates/worker/src/config.rs b/distributed/crates/worker/src/config.rs index 5dceb7256..169138147 100644 --- a/distributed/crates/worker/src/config.rs +++ b/distributed/crates/worker/src/config.rs @@ -145,17 +145,17 @@ impl WorkerServiceConfig { #[derive(Debug, Clone)] pub struct ProverServiceConfigDto { pub elf: PathBuf, - pub witness_lib: Option, pub asm: Option, + pub hints: bool, pub emulator: bool, pub proving_key: Option, pub asm_port: Option, pub unlock_mapped_memory: bool, + pub asm_out_file: bool, pub verbose: u8, pub debug: Option>, pub verify_constraints: bool, pub aggregation: bool, - pub final_snark: bool, pub preallocate: bool, pub max_streams: Option, pub number_threads_witness: Option, @@ -169,17 +169,17 @@ impl Default for ProverServiceConfigDto { fn default() -> Self { Self { elf: PathBuf::new(), - witness_lib: None, asm: None, + hints: false, emulator: false, proving_key: None, asm_port: None, unlock_mapped_memory: false, + asm_out_file: false, verbose: 0, debug: None, verify_constraints: false, aggregation: false, - final_snark: false, preallocate: false, max_streams: None, number_threads_witness: None, diff --git a/distributed/crates/worker/src/lib.rs b/distributed/crates/worker/src/lib.rs index 08bf6303d..5522ef780 100644 --- a/distributed/crates/worker/src/lib.rs +++ b/distributed/crates/worker/src/lib.rs @@ -5,6 +5,7 @@ //! communication, and job handling capabilities. pub mod config; +mod stream_ordering; pub mod worker; pub mod worker_node; diff --git a/distributed/crates/worker/src/stream_ordering.rs b/distributed/crates/worker/src/stream_ordering.rs new file mode 100644 index 000000000..81e8dadcb --- /dev/null +++ b/distributed/crates/worker/src/stream_ordering.rs @@ -0,0 +1,126 @@ +use anyhow::Result; +use std::cmp::Reverse; +use std::collections::BinaryHeap; +use std::sync::mpsc; +use std::sync::Arc; +use tracing::{error, info}; +use zisk_common::io::StreamProcessor; +use zisk_common::reinterpret_vec; +use zisk_distributed_common::{JobId, StreamDataDto, StreamMessageKind}; + +/// Per-job actor that reorders out-of-order stream chunks and feeds them +/// to `HintsProcessor::process_hints` in strict sequence order. +pub struct StreamOrderingActor { + sender: Option>, + thread_handle: Option>, +} + +impl StreamOrderingActor { + /// Spawns the ordering thread and returns the actor handle. + pub fn new(processor: Arc

, job_id: JobId) -> Self { + let (tx, rx) = mpsc::channel::(); + + let handle = std::thread::spawn(move || Self::run(rx, processor, job_id)); + + Self { sender: Some(tx), thread_handle: Some(handle) } + } + + /// Enqueues a stream message for ordered delivery to `process_hints`. + /// + /// This call is non-blocking and safe to invoke from an async context. + pub fn send(&self, msg: StreamDataDto) -> Result<()> { + self.sender + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Stream ordering actor already shut down"))? + .send(msg) + .map_err(|_| anyhow::anyhow!("Stream ordering actor channel closed unexpectedly")) + } + + // Error propagation: when run_inner returns Err, rx is dropped, closing the channel. + // The next actor.send() in the gRPC loop then returns Err. + fn run( + rx: mpsc::Receiver, + processor: Arc

, + job_id: JobId, + ) { + if let Err(e) = Self::run_inner(rx, &*processor, &job_id) { + error!("Stream ordering actor failed for job {}: {}", job_id, e); + } + } + + fn run_inner( + rx: mpsc::Receiver, + processor: &P, + job_id: &JobId, + ) -> Result<()> { + // Min-heap ordered by sequence number (Reverse makes BinaryHeap a min-heap) + let mut heap: BinaryHeap)>> = BinaryHeap::new(); + let mut next_seq: u32 = 1; + let mut is_first = true; + + loop { + match rx.recv() { + Ok(msg) => match msg.stream_type { + StreamMessageKind::End => { + if !heap.is_empty() { + return Err(anyhow::anyhow!( + "Stream End received for job {} but {} buffered chunk(s) remain \ + (next expected seq: {}). Sequence gap detected.", + job_id, + heap.len(), + next_seq + )); + } + info!("Stream ordering actor: received End for job {}", job_id); + return Ok(()); + } + StreamMessageKind::Data => { + let payload_dto = msg.stream_payload.ok_or_else(|| { + anyhow::anyhow!("Data message missing payload for job {}", job_id) + })?; + + heap.push(Reverse((payload_dto.sequence_number, payload_dto.payload))); + + // Drain all consecutive in-order sequences from the heap, + // accumulating their bytes into a single buffer so that + // process_hints is called exactly once per recv() iteration. + if !matches!(heap.peek(), Some(Reverse((s, _))) if *s == next_seq) { + continue; + } + let mut combined: Vec = Vec::new(); + while matches!(heap.peek(), Some(Reverse((s, _))) if *s == next_seq) { + let Reverse((_, data)) = heap.pop().unwrap(); + combined.extend_from_slice(&data); + next_seq += 1; + } + + let hints = reinterpret_vec(combined)?; + let first = std::mem::replace(&mut is_first, false); + processor.process_hints(&hints, first)?; + } + StreamMessageKind::Start => { + return Err(anyhow::anyhow!( + "Unexpected Start message received mid-stream for job {}", + job_id + )); + } + }, + Err(_) => { + // Channel closed — sender was dropped (job cancelled or complete) + info!("Stream ordering actor: channel closed for job {}", job_id); + return Ok(()); + } + } + } + } +} + +impl Drop for StreamOrderingActor { + fn drop(&mut self) { + // Drop the sender first so the thread's recv() returns Err and exits + self.sender.take(); + + // Drop the ordering thread, it will terminate promptly once the channel is closed. + self.thread_handle.take(); + } +} diff --git a/distributed/crates/worker/src/worker.rs b/distributed/crates/worker/src/worker.rs index 822a01c93..c13fab4c8 100644 --- a/distributed/crates/worker/src/worker.rs +++ b/distributed/crates/worker/src/worker.rs @@ -1,25 +1,27 @@ use anyhow::Result; -use cargo_zisk::commands::{get_proving_key, get_witness_computation_lib}; -use proofman::{AggProofs, ContributionsInfo}; -use rom_setup::{ - gen_elf_hash, get_elf_bin_file_path, get_elf_data_hash, get_rom_blowup_factor_and_arity, - DEFAULT_CACHE_PATH, -}; +use cargo_zisk::commands::get_proving_key; +use proofman::{AggProofs, AggProofsRegister, ContributionsInfo}; +use rom_setup::{get_elf_data_hash, DEFAULT_CACHE_PATH}; use std::fs; use std::sync::Arc; use tokio::sync::{mpsc, Mutex}; use tokio::task::JoinHandle; -use zisk_common::io::ZiskStdin; +use zisk_common::io::{StreamSource, ZiskStdin}; +use zisk_common::ElfBinaryFromFile; +use zisk_common::ZiskExecutorTime; use zisk_distributed_common::{AggregationParams, DataCtx, InputSourceDto, JobPhase, WorkerState}; -use zisk_distributed_common::{ComputeCapacity, JobId, WorkerId}; -use zisk_sdk::{Asm, Emu, ProverClient, ZiskBackend, ZiskProver}; +use zisk_distributed_common::{ComputeCapacity, JobId, PartitionInfo, WorkerId}; +use zisk_distributed_common::{ContributionsMessage, ProveMessage, StreamMessage}; +use zisk_distributed_common::{HintsSourceDto, StreamDataDto, StreamMessageKind}; +use zisk_sdk::{Asm, Emu, ProverClient, ZiskBackend, ZiskProgramPK, ZiskProver}; + +use crate::stream_ordering::StreamOrderingActor; -use proofman::ProofInfo; use proofman::ProvePhaseInputs; +use proofman::WitnessInfo; use proofman_common::ParamsGPU; use proofman_common::ProofOptions; use proofman_common::{json_to_debug_instances_map, DebugInfo}; -use std::collections::HashMap; use std::path::PathBuf; use tracing::{error, info}; @@ -31,7 +33,8 @@ pub enum ComputationResult { Challenge { job_id: JobId, success: bool, - result: Result>, + result: Result<(WitnessInfo, ZiskExecutorTime, Vec)>, + task_received_time: Option>, }, Proofs { job_id: JobId, @@ -50,18 +53,12 @@ pub struct ProverConfig { /// Path to the ELF file pub elf: PathBuf, - /// Path to the witness computation dynamic library - pub witness_lib: PathBuf, - /// Path to the ASM file (optional) pub asm: Option, /// Path to the ASM ROM file (optional) pub asm_rom: Option, - /// Map of custom commits - pub custom_commits_map: HashMap, - /// Flag indicating whether to use the prebuilt emulator pub emulator: bool, @@ -83,17 +80,17 @@ pub struct ProverConfig { /// Flag to unlock mapped memory pub unlock_mapped_memory: bool, + /// Flag to redirect ASM emulator output to file + pub asm_out_file: bool, + /// Flag to verify constraints pub verify_constraints: bool, /// Flag to enable aggregation pub aggregation: bool, - /// Flag to enable final SNARK - pub final_snark: bool, - /// Preallocate resources - pub gpu_params: ParamsGPU, + pub gpu_params: Option, /// Whether to use shared tables in the witness library pub shared_tables: bool, @@ -103,6 +100,9 @@ pub struct ProverConfig { /// Whether to use minimal memory mode pub minimal_memory: bool, + + /// Whether to include precompile hints in the assembly generation + pub hints: bool, } impl ProverConfig { @@ -159,9 +159,16 @@ impl ProverConfig { prover_service_config.elf.display() ) })?; + let elf = + ElfBinaryFromFile::new(&prover_service_config.elf, prover_service_config.hints)?; - let hash = get_elf_data_hash(&prover_service_config.elf) + let hash = get_elf_data_hash(&elf) .map_err(|e| anyhow::anyhow!("Error computing ELF hash: {}", e))?; + let stem = if prover_service_config.hints { + format!("{stem}-hints") + } else { + stem.to_string() + }; let new_filename = format!("{stem}-{hash}-mt.bin"); let asm_rom_filename = format!("{stem}-{hash}-rh.bin"); asm_rom = Some(default_cache_path.join(asm_rom_filename)); @@ -178,57 +185,43 @@ impl ProverConfig { return Err(anyhow::anyhow!("ASM file not found at {:?}", asm_rom.display())); } } - let (blowup_factor, merkle_tree_arity) = get_rom_blowup_factor_and_arity(&proving_key); - let rom_bin_path = get_elf_bin_file_path( - &prover_service_config.elf.to_path_buf(), - &default_cache_path, - blowup_factor, - merkle_tree_arity, - )?; - if !rom_bin_path.exists() { - let _ = gen_elf_hash( - &prover_service_config.elf.clone(), - rom_bin_path.as_path(), - blowup_factor, - merkle_tree_arity, - false, - ) - .map_err(|e| anyhow::anyhow!("Error generating elf hash: {}", e)); - } - let mut custom_commits_map: HashMap = HashMap::new(); - custom_commits_map.insert("rom".to_string(), rom_bin_path); - let mut gpu_params = ParamsGPU::new(prover_service_config.preallocate); - if prover_service_config.max_streams.is_some() { - gpu_params.with_max_number_streams(prover_service_config.max_streams.unwrap()); - } - if prover_service_config.number_threads_witness.is_some() { - gpu_params.with_number_threads_pools_witness( - prover_service_config.number_threads_witness.unwrap(), - ); - } - if prover_service_config.max_witness_stored.is_some() { - gpu_params.with_max_witness_stored(prover_service_config.max_witness_stored.unwrap()); + let mut gpu_params = None; + if prover_service_config.preallocate + || prover_service_config.max_streams.is_some() + || prover_service_config.number_threads_witness.is_some() + || prover_service_config.max_witness_stored.is_some() + { + let mut gpu_params_new = ParamsGPU::new(prover_service_config.preallocate); + if let Some(max_streams) = prover_service_config.max_streams { + gpu_params_new.with_max_number_streams(max_streams); + } + if let Some(number_threads_witness) = prover_service_config.number_threads_witness { + gpu_params_new.with_number_threads_pools_witness(number_threads_witness); + } + if let Some(max_witness_stored) = prover_service_config.max_witness_stored { + gpu_params_new.with_max_witness_stored(max_witness_stored); + } + gpu_params = Some(gpu_params_new); } Ok(ProverConfig { elf: prover_service_config.elf.clone(), - witness_lib: get_witness_computation_lib(prover_service_config.witness_lib.as_ref()), asm: prover_service_config.asm.clone(), asm_rom, - custom_commits_map, emulator, proving_key, verbose: prover_service_config.verbose, debug_info, asm_port: prover_service_config.asm_port, unlock_mapped_memory: prover_service_config.unlock_mapped_memory, + asm_out_file: prover_service_config.asm_out_file, verify_constraints: prover_service_config.verify_constraints, aggregation: prover_service_config.aggregation, - final_snark: prover_service_config.final_snark, gpu_params, shared_tables: prover_service_config.shared_tables, rma: prover_service_config.rma, minimal_memory: prover_service_config.minimal_memory, + hints: prover_service_config.hints, }) } } @@ -244,6 +237,7 @@ pub struct JobContext { pub total_compute_units: u32, // Total compute units for the whole job pub phase: JobPhase, pub executed_steps: u64, + pub task_received_time: Option>, } pub struct Worker { @@ -255,6 +249,9 @@ pub struct Worker { prover: Arc>, prover_config: ProverConfig, + + stream_actor: Option, + pk: Arc, } impl Worker { @@ -268,17 +265,16 @@ impl Worker { .emu() .prove() .aggregation(true) - .rma(true) - .witness_lib_path(prover_config.witness_lib.clone()) .proving_key_path(prover_config.proving_key.clone()) - .elf_path(prover_config.elf.clone()) .verbose(prover_config.verbose) .shared_tables(prover_config.shared_tables) .gpu(prover_config.gpu_params.clone()) - .print_command_info() .build()?, ); + let elf = ElfBinaryFromFile::new(&prover_config.elf, prover_config.hints)?; + let (pk, _) = prover.setup(&elf)?; + Ok(Worker:: { _worker_id: worker_id, _compute_capacity: compute_capacity, @@ -287,6 +283,8 @@ impl Worker { current_computation: None, prover, prover_config, + pk: Arc::new(pk), + stream_actor: None, }) } @@ -300,20 +298,21 @@ impl Worker { .asm() .prove() .aggregation(true) - .rma(true) - .witness_lib_path(prover_config.witness_lib.clone()) .proving_key_path(prover_config.proving_key.clone()) - .elf_path(prover_config.elf.clone()) .verbose(prover_config.verbose) .shared_tables(prover_config.shared_tables) .asm_path_opt(prover_config.asm.clone()) .base_port_opt(prover_config.asm_port) .unlock_mapped_memory(prover_config.unlock_mapped_memory) + .asm_out_file(prover_config.asm_out_file) .gpu(prover_config.gpu_params.clone()) - .print_command_info() + .is_distributed(true) .build()?, ); + let elf = ElfBinaryFromFile::new(&prover_config.elf, prover_config.hints)?; + let (pk, _) = prover.setup(&elf)?; + Ok(Worker:: { _worker_id: worker_id, _compute_capacity: compute_capacity, @@ -322,6 +321,8 @@ impl Worker { current_computation: None, prover, prover_config, + pk: Arc::new(pk), + stream_actor: None, }) } @@ -369,8 +370,18 @@ impl Worker { if let Some(handle) = self.current_computation.take() { handle.abort(); } + + // Drop the actor on a blocking thread: closes the channel, which signals the ordering + // thread to exit, without blocking the Tokio runtime worker thread. + if let Some(stream_actor) = self.stream_actor.take() { + tokio::task::spawn_blocking(move || { + drop(stream_actor); + }); + } } + #[allow(clippy::type_complexity)] + #[allow(clippy::too_many_arguments)] pub fn new_job( &mut self, job_id: JobId, @@ -379,6 +390,7 @@ impl Worker { total_workers: u32, allocation: Vec, total_compute_units: u32, + task_received_time: Option>, ) -> Arc> { let current_job = Arc::new(Mutex::new(JobContext { job_id: job_id.clone(), @@ -389,6 +401,7 @@ impl Worker { total_compute_units, phase: JobPhase::Contributions, executed_steps: 0, + task_received_time, })); self.current_job = Some(current_job.clone()); @@ -401,35 +414,37 @@ impl Worker { &self, job: Arc>, tx: mpsc::UnboundedSender, - ) -> JoinHandle<()> { - self.partial_contribution_mpi_broadcast(&job).await; - self.partial_contribution(job, tx).await + ) -> Result> { + self.partial_contribution_mpi_broadcast(&job).await?; + Ok(self.partial_contribution(job, tx)) } - pub async fn partial_contribution_mpi_broadcast(&self, job: &Mutex) { - let job = job.lock().await; - let job_id = job.job_id.clone(); + pub async fn partial_contribution_mpi_broadcast(&self, job: &Mutex) -> Result<()> { + let mut serialized = { + let job = job.lock().await; - let proof_info = ProofInfo::new( - None, - job.total_compute_units as usize, - job.allocation.clone(), - job.rank_id as usize, - ); - let phase_inputs = proofman::ProvePhaseInputs::Contributions(proof_info); + let phase_inputs = ProvePhaseInputs::Contributions(); - let options = self.get_proof_options_partial_contribution(); + let options = self.get_proof_options(false); - let mut serialized = borsh::to_vec(&( - JobPhase::Contributions, - job_id, - phase_inputs, - options, - job.data_ctx.input_source.clone(), - )) - .unwrap(); + let message = ContributionsMessage { + job_id: job.job_id.clone(), + phase_inputs, + options, + input_source: job.data_ctx.input_source.clone(), + hints_source: job.data_ctx.hints_source.clone(), + partition_info: PartitionInfo { + total_compute_units: job.total_compute_units as usize, + allocation: job.allocation.clone(), + worker_idx: job.rank_id as usize, + }, + }; + + borsh::to_vec(&(JobPhase::Contributions, message)).unwrap() + }; - self.prover.mpi_broadcast(&mut serialized); + self.prover.mpi_broadcast(&mut serialized)?; + Ok(()) } pub async fn handle_prove( @@ -437,78 +452,92 @@ impl Worker { job: Arc>, challenges: Vec, tx: mpsc::UnboundedSender, - ) -> JoinHandle<()> { - self.prove_mpi_broadcast(&job, challenges.clone()).await; - self.prove(job, challenges, tx).await + ) -> Result> { + self.prove_mpi_broadcast(&job, challenges.clone()).await?; + Ok(self.prove(job, challenges, tx)) } pub async fn prove_mpi_broadcast( &self, job: &Mutex, challenges: Vec, - ) { - let job = job.lock().await; - let job_id = job.job_id.clone(); + ) -> Result<()> { + let mut serialized = { + let job = job.lock().await; + + let phase_inputs = proofman::ProvePhaseInputs::Internal(challenges); - let phase_inputs = proofman::ProvePhaseInputs::Internal(challenges); + let options = self.get_proof_options(false); - let options = self.get_proof_options_prove(); + let message = ProveMessage { job_id: job.job_id.clone(), phase_inputs, options }; - let mut serialized = - borsh::to_vec(&(JobPhase::Prove, job_id, phase_inputs, options)).unwrap(); + borsh::to_vec(&(JobPhase::Prove, message)).unwrap() + }; - self.prover.mpi_broadcast(&mut serialized); + self.prover.mpi_broadcast(&mut serialized)?; + Ok(()) } - pub async fn handle_aggregate( + pub fn handle_aggregate( &self, job: Arc>, agg_params: AggregationParams, tx: mpsc::UnboundedSender, ) -> JoinHandle<()> { - self.aggregate(job, agg_params, tx).await + self.aggregate(job, agg_params, tx) } - pub async fn partial_contribution( + pub fn partial_contribution( &self, job: Arc>, tx: mpsc::UnboundedSender, ) -> JoinHandle<()> { let prover = self.prover.clone(); + let pk = self.pk.clone(); + let options = self.get_proof_options(false); - let options = self.get_proof_options_partial_contribution(); - - tokio::spawn(async move { - let mut job = job.lock().await; - let job_id = job.job_id.clone(); + tokio::task::spawn_blocking(move || { + let guard = job.blocking_lock(); + let job_id = guard.job_id.clone(); info!("Computing Contribution for {job_id}"); - let proof_info = ProofInfo::new( - None, - job.total_compute_units as usize, - job.allocation.clone(), - job.rank_id as usize, - ); - let phase_inputs = proofman::ProvePhaseInputs::Contributions(proof_info); - + let phase_inputs = proofman::ProvePhaseInputs::Contributions(); + let inputs_source = guard.data_ctx.input_source.clone(); + let hints_source = guard.data_ctx.hints_source.clone(); + let partition_info = PartitionInfo { + total_compute_units: guard.total_compute_units as usize, + allocation: guard.allocation.clone(), + worker_idx: guard.rank_id as usize, + }; + drop(guard); let result = Self::execute_contribution_task( job_id.clone(), - prover.as_ref(), + &prover, phase_inputs, - job.data_ctx.input_source.clone(), + inputs_source, + hints_source, + partition_info, + &pk, options, - ) - .await; + ); + + let mut guard = job.blocking_lock(); + guard.executed_steps = prover.executed_steps(); + let task_received_time = guard.task_received_time; + drop(guard); - job.executed_steps = prover.executed_steps(); + let (witness_info, zisk_execution_time) = prover + .get_execution_info() + .unwrap_or_else(|_| (WitnessInfo::default(), ZiskExecutorTime::default())); match result { Ok(data) => { let _ = tx.send(ComputationResult::Challenge { job_id, success: true, - result: Ok(data), + result: Ok((witness_info, zisk_execution_time, data)), + task_received_time, }); } Err(error) => { @@ -517,36 +546,59 @@ impl Worker { job_id, success: false, result: Err(error), + task_received_time, }); } } }) } - pub async fn execute_contribution_task( + #[allow(clippy::too_many_arguments)] + pub fn execute_contribution_task( job_id: JobId, prover: &ZiskProver, phase_inputs: ProvePhaseInputs, input_source: InputSourceDto, + hints_source: HintsSourceDto, + partition_info: PartitionInfo, + pk: &ZiskProgramPK, options: ProofOptions, ) -> Result> { let phase = proofman::ProvePhase::Contributions; - match input_source { - InputSourceDto::InputPath(input_path) => { - let stdin = ZiskStdin::from_file(input_path)?; - prover.set_stdin(stdin); + let stdin = match input_source { + InputSourceDto::InputPath(inputs_uri) => ZiskStdin::from_file(inputs_uri)?, + InputSourceDto::InputData(input_data) => ZiskStdin::from_vec(input_data), + InputSourceDto::InputNull => ZiskStdin::null(), + }; + + match hints_source { + HintsSourceDto::HintsPath(hints_uri) => { + let hints_stream = StreamSource::from_uri(hints_uri)?; + pk.register_hints_stream(hints_stream)?; } - InputSourceDto::InputData(input_data) => { - let stdin = ZiskStdin::from_vec(input_data); - prover.set_stdin(stdin); + HintsSourceDto::HintsStream(_hints_uri) => { + // For HintsStream, the worker will receive hint data via StreamData gRPC messages + // routed through the stream ordering actor into the hints processor. + // No need to set hints_stream on prover for this case } - InputSourceDto::InputNull => { - let stdin = ZiskStdin::null(); - prover.set_stdin(stdin); + HintsSourceDto::HintsNull => { + // No hints to set } } + prover.set_stdin(stdin)?; + + prover.register_program(pk)?; + + if matches!(phase_inputs, ProvePhaseInputs::Contributions()) { + prover.set_partition( + partition_info.total_compute_units, + partition_info.allocation.clone(), + partition_info.worker_idx, + )?; + } + let challenge = match prover.prove_phase(phase_inputs, options, phase) { Ok(proofman::ProvePhaseResult::Contributions(challenge)) => { info!("Contribution computation successful for {job_id}"); @@ -567,27 +619,74 @@ impl Worker { Ok(challenge) } - pub async fn prove( + /// Routes an incoming `StreamData` message to the per-job ordering actor. + /// + /// - `Start`: initialises the `HintsProcessor` (if needed), resets it, and spawns the actor. + /// - `Data` / `End`: enqueues the message into the actor's channel — O(1), non-blocking. + /// + /// The actor thread owns the reorder buffer and calls `process_hints` in sequence order. + pub async fn route_stream_data( + &mut self, + stream_data: StreamDataDto, + is_first_partition: bool, + ) -> Result<()> { + match &stream_data.stream_type { + StreamMessageKind::Start => { + let job_id = stream_data.job_id.clone(); + + self.pk.reset(); + + let processor = self.pk.get_hints_processor().ok_or_else(|| { + anyhow::anyhow!("HintsProcessor not found for job {}", job_id) + })?; + + if let Some(r) = self.pk.asm_resources.as_ref() { + r.set_active_services(is_first_partition)?; + } + + // Replace any existing actor (handles reconnect / job restart) + self.stream_actor = Some(StreamOrderingActor::new(processor, job_id)); + } + StreamMessageKind::Data | StreamMessageKind::End => match &self.stream_actor { + Some(actor) => actor.send(stream_data)?, + None => { + return Err(anyhow::anyhow!( + "Received stream {:?} without a prior Start for job {}", + stream_data.stream_type, + stream_data.job_id + )); + } + }, + } + Ok(()) + } + + pub fn set_partition( + &self, + total_compute_units: usize, + allocation: Vec, + worker_idx: usize, + ) -> Result<()> { + self.prover.set_partition(total_compute_units, allocation, worker_idx) + } + + pub fn prove( &self, job: Arc>, challenges: Vec, tx: mpsc::UnboundedSender, ) -> JoinHandle<()> { let prover = self.prover.clone(); + let options = self.get_proof_options(false); - let options = self.get_proof_options_prove(); - - tokio::spawn(async move { - let job = job.lock().await; - let job_id = job.job_id.clone(); + tokio::task::spawn_blocking(move || { + let job_id = job.blocking_lock().job_id.clone(); info!("Computing Prove for {job_id}"); let phase_inputs = proofman::ProvePhaseInputs::Internal(challenges); + let result = Self::execute_prove_task(job_id.clone(), &prover, phase_inputs, options); - let result = - Self::execute_prove_task(job_id.clone(), prover.as_ref(), phase_inputs, options) - .await; match result { Ok(data) => { let _ = tx.send(ComputationResult::Proofs { @@ -608,7 +707,7 @@ impl Worker { }) } - pub async fn execute_prove_task( + pub fn execute_prove_task( job_id: JobId, prover: &ZiskProver, phase_inputs: ProvePhaseInputs, @@ -637,17 +736,44 @@ impl Worker { Ok(proof) } - pub async fn aggregate( + pub fn aggregate( &self, job: Arc>, agg_params: AggregationParams, tx: mpsc::UnboundedSender, ) -> JoinHandle<()> { let prover = self.prover.clone(); + let options = self.get_proof_options(agg_params.compressed); + + let agg_proofs_register: Vec = agg_params + .agg_proofs + .iter() + .map(|v| AggProofsRegister { + airgroup_id: v.airgroup_id, + worker_indexes: vec![v.worker_idx as usize], + }) + .collect(); + + if let Err(error) = prover.register_aggregated_proofs(agg_proofs_register) { + let job_guard = job.blocking_lock(); + let job_id = job_guard.job_id.clone(); + let executed_steps = job_guard.executed_steps; + + let _ = tx.send(ComputationResult::AggProof { + job_id, + success: false, + result: Err(error), + executed_steps, + }); + + return tokio::spawn(async {}); + } - tokio::spawn(async move { - let job = job.lock().await; - let job_id = job.job_id.clone(); + tokio::task::spawn_blocking(move || { + let (job_id, executed_steps) = { + let guard = job.blocking_lock(); + (guard.job_id.clone(), guard.executed_steps) + }; info!("Starting aggregation step for {job_id}"); @@ -661,8 +787,6 @@ impl Worker { }) .collect(); - let options = Self::get_proof_options_aggregation(&agg_params); - let result = prover.aggregate_proofs( agg_proofs, agg_params.last_proof, @@ -679,7 +803,7 @@ impl Worker { job_id, success: true, result: Ok(Some(proof)), - executed_steps: job.executed_steps, + executed_steps, }); } Err(error) => { @@ -688,52 +812,24 @@ impl Worker { job_id, success: false, result: Err(error), - executed_steps: job.executed_steps, + executed_steps, }); } } }) } - fn get_proof_options_partial_contribution(&self) -> ProofOptions { - ProofOptions { - verify_constraints: false, - aggregation: false, - final_snark: false, - verify_proofs: true, - save_proofs: true, - test_mode: false, - output_dir_path: PathBuf::from("."), - rma: self.prover_config.rma, - minimal_memory: self.prover_config.minimal_memory, - } - } - - fn get_proof_options_prove(&self) -> ProofOptions { + fn get_proof_options(&self, compressed: bool) -> ProofOptions { ProofOptions { - verify_constraints: false, - aggregation: true, - final_snark: false, + verify_constraints: self.prover_config.verify_constraints, + aggregation: self.prover_config.aggregation, verify_proofs: false, save_proofs: false, test_mode: false, - output_dir_path: PathBuf::default(), + output_dir_path: None, rma: self.prover_config.rma, minimal_memory: self.prover_config.minimal_memory, - } - } - - fn get_proof_options_aggregation(agg_params: &AggregationParams) -> ProofOptions { - ProofOptions { - verify_constraints: agg_params.verify_constraints, - aggregation: agg_params.aggregation, - rma: agg_params.rma, - final_snark: agg_params.final_snark, - verify_proofs: agg_params.verify_proofs, - save_proofs: agg_params.save_proofs, - test_mode: agg_params.test_mode, - output_dir_path: agg_params.output_dir_path.clone(), - minimal_memory: agg_params.minimal_memory, + compressed, } } @@ -744,49 +840,89 @@ impl Worker { pub async fn handle_mpi_broadcast_request(&self) -> Result<()> { let mut bytes: Vec = Vec::new(); - self.prover.mpi_broadcast(&mut bytes); + self.prover.mpi_broadcast(&mut bytes)?; // extract byte 0 to decide the option - let phase = borsh::from_slice(&bytes[0..1]).unwrap(); - - match phase { - JobPhase::Contributions => { - let (job_id, phase_inputs, options, input_source_dto): ( - JobId, - ProvePhaseInputs, - ProofOptions, - InputSourceDto, - ) = borsh::from_slice(&bytes[1..]).unwrap(); - - let result = Self::execute_contribution_task( - job_id, - self.prover.as_ref(), - phase_inputs, - input_source_dto, - options, - ) - .await; - if let Err(e) = result { - error!("Error during Contributions MPI broadcast execution: {}. Waiting for new job...", e); + let phase: JobPhase = borsh::from_slice(&bytes[0..1]).unwrap(); + + let prover = self.prover.clone(); + let pk = self.pk.clone(); + let options = self.get_proof_options(false); + + if phase == JobPhase::ContributionsHintsStream { + if let Some(r) = pk.asm_resources.as_ref() { + let message: StreamMessage = borsh::from_slice(&bytes[1..]).unwrap(); + if let Err(e) = r.submit_hint_direct(&message.data) { + tracing::error!("Failed to submit hints: {}", e); } + } else { + tracing::error!("Hints sink is not configured for ContributionsHintsStream"); } - JobPhase::Prove => { - let (job_id, phase_inputs, options): (JobId, ProvePhaseInputs, ProofOptions) = - borsh::from_slice(&bytes[1..]).unwrap(); - - let result = - Self::execute_prove_task(job_id, self.prover.as_ref(), phase_inputs, options) - .await; - if let Err(e) = result { - error!( - "Error during Prove MPI broadcast execution: {}. Waiting for new job...", - e - ); + } else if phase == JobPhase::ContributionsInputsStream { + if let Some(inputs_shmem_writer) = + pk.asm_resources.as_ref().map(|r| r.inputs_shmem_writer.clone()) + { + let message: StreamMessage = borsh::from_slice(&bytes[1..]).unwrap(); + let reinterpreted_data = unsafe { + std::slice::from_raw_parts( + message.data.as_ptr() as *const u8, + message.data.len() * std::mem::size_of::(), + ) + }; + if let Err(e) = inputs_shmem_writer.append_input(reinterpreted_data) { + tracing::error!("Failed to submit inputs: {}", e); } + } else { + tracing::error!("Inputs sink is not configured for ContributionsInputsStream"); } - JobPhase::Aggregate => { - unreachable!("Aggregate phase is not supported in MPI broadcast"); - } + } else { + tokio::task::spawn_blocking(move || match phase { + JobPhase::Contributions => { + let message: ContributionsMessage = borsh::from_slice(&bytes[1..]).unwrap(); + + let result = Self::execute_contribution_task( + message.job_id, + &prover, + message.phase_inputs, + message.input_source, + message.hints_source, + message.partition_info, + &pk, + message.options, + ); + if let Err(e) = result { + tracing::error!( + "Error during Contributions MPI broadcast execution: {}. Waiting for new job...", + e + ); + } + } + JobPhase::Prove => { + let message: ProveMessage = borsh::from_slice(&bytes[1..]).unwrap(); + + let result = Self::execute_prove_task( + message.job_id, + &prover, + message.phase_inputs, + options, + ); + if let Err(e) = result { + error!( + "Error during Prove MPI broadcast execution: {}. Waiting for new job...", + e + ); + } + } + + JobPhase::Aggregate => { + unreachable!("Aggregate phase is not supported in MPI broadcast"); + } + JobPhase::ContributionsHintsStream | JobPhase::ContributionsInputsStream => { + unreachable!( + "Stream phases should be handled separately and not reach this point" + ); + } + }); } Ok(()) } diff --git a/distributed/crates/worker/src/worker_node.rs b/distributed/crates/worker/src/worker_node.rs index f656ed2ab..d1ce46282 100644 --- a/distributed/crates/worker/src/worker_node.rs +++ b/distributed/crates/worker/src/worker_node.rs @@ -1,6 +1,6 @@ use crate::{worker::ComputationResult, ProverConfig, Worker}; use anyhow::{anyhow, Result}; -use proofman::{AggProofs, ContributionsInfo}; +use proofman::{AggProofs, ContributionsInfo, WitnessInfo}; use std::path::Path; use std::{path::PathBuf, time::Duration}; use tokio::sync::mpsc; @@ -8,8 +8,10 @@ use tokio_stream::StreamExt; use tonic::transport::Channel; use tonic::Request; use tracing::{error, info}; +use zisk_common::ZiskExecutorTime; use zisk_distributed_common::{ - AggProofData, AggregationParams, DataCtx, InputSourceDto, WorkerState, + AggProofData, AggregationParams, DataCtx, HintsSourceDto, InputSourceDto, StreamDataDto, + WorkerState, }; use zisk_distributed_common::{DataId, JobId}; use zisk_distributed_grpc_api::contribution_params::InputSource; @@ -237,8 +239,15 @@ impl WorkerNodeGrpc { message_sender: &mpsc::UnboundedSender, ) -> Result<()> { match result { - ComputationResult::Challenge { job_id, success, result } => { - self.send_partial_contribution(job_id, success, result, message_sender).await + ComputationResult::Challenge { job_id, success, result, task_received_time } => { + self.send_partial_contribution( + job_id, + success, + result, + message_sender, + task_received_time, + ) + .await } ComputationResult::Proofs { job_id, success, result } => { self.send_proof(job_id, success, result, message_sender).await @@ -253,8 +262,9 @@ impl WorkerNodeGrpc { &mut self, job_id: JobId, success: bool, - result: Result>, + result: Result<(WitnessInfo, ZiskExecutorTime, Vec)>, message_sender: &mpsc::UnboundedSender, + task_received_time: Option>, ) -> Result<()> { if let Some(handle) = self.worker.take_current_computation() { handle.await?; @@ -275,11 +285,12 @@ impl WorkerNodeGrpc { "Inconsistent state: operation reported success but returned Err result" )); } - (vec![], e.to_string()) + ((WitnessInfo::default(), ZiskExecutorTime::default(), vec![]), e.to_string()) } }; let challenges: Vec = result_data + .2 .into_iter() .map(|cont| Challenges { worker_index: cont.worker_index, @@ -288,13 +299,36 @@ impl WorkerNodeGrpc { }) .collect(); + let witness_info = WitnessExecInfo { + witness_time: result_data.0.witness_time, + publics: result_data.0.publics, + proof_values: result_data.0.proof_values, + summary_info: result_data.0.summary_info, + }; + + let zisk_execution_time = ZiskExecuteTime { + total_duration: result_data.1.total_duration.as_millis() as f32, + execution_duration: result_data.1.execution_duration.as_millis() as f32, + count_and_plan_duration: result_data.1.count_and_plan_duration.as_millis() as f32, + count_and_plan_mo_duration: result_data.1.count_and_plan_mo_duration.as_millis() as f32, + asm_execution_duration: result_data + .1 + .asm_execution_duration + .map(|asm_info| AsmExecuteInfo { time: asm_info.time, mhz: asm_info.mhz }), + task_received_time: task_received_time.unwrap().timestamp_millis() as f64, + }; + let message = WorkerMessage { payload: Some(worker_message::Payload::ExecuteTaskResponse(ExecuteTaskResponse { worker_id: self.worker_config.worker.worker_id.as_string(), job_id: job_id.as_string(), task_type: TaskType::PartialContribution as i32, success, - result_data: Some(ResultData::Challenges(ChallengesList { challenges })), + result_data: Some(ResultData::Challenges(ChallengesList { + challenges, + witness_info: Some(witness_info), + zisk_execution_time: Some(zisk_execution_time), + })), error_message, })), }; @@ -470,6 +504,9 @@ impl WorkerNodeGrpc { } } } + coordinator_message::Payload::StreamData(stream_data) => { + self.handle_stream_data(stream_data).await?; + } coordinator_message::Payload::JobCancelled(cancelled) => { info!("Job {} cancelled: {}", cancelled.job_id, cancelled.reason); @@ -504,6 +541,7 @@ impl WorkerNodeGrpc { request: ExecuteTaskRequest, computation_tx: &mpsc::UnboundedSender, ) -> Result<()> { + let task_received_time = chrono::Utc::now(); info!("Starting Partial Contribution for {}", request.job_id); // Cancel any existing computation @@ -516,68 +554,118 @@ impl WorkerNodeGrpc { let job_id = JobId::from(request.job_id); let input_source = match params.input_source { - Some(InputSource::InputPath(ref path)) => { - let input_path = self.worker_config.worker.inputs_folder.join(PathBuf::from(path)); - - // Validate that input_path is a subdirectory of inputs_folder - Self::validate_subdir(&self.worker_config.worker.inputs_folder, &input_path)?; + Some(InputSource::InputPath(ref inputs_uris)) => { + // Validate and get the full path + let inputs_uri = Self::validate_subdir( + &self.worker_config.worker.inputs_folder, + &PathBuf::from(&inputs_uris), + ) + .await?; - InputSourceDto::InputPath(input_path.display().to_string()) + InputSourceDto::InputPath(inputs_uri.to_string_lossy().to_string()) } Some(InputSource::InputData(data)) => InputSourceDto::InputData(data), - None => { - return Err(anyhow!("Input source missing in ContributionParams")); + None => InputSourceDto::InputNull, + }; + + let hints_source = if let Some(hints_path) = ¶ms.hints_path { + if params.hints_stream { + // Hints will be streamed - use placeholder, will be updated when stream completes + HintsSourceDto::HintsStream(hints_path.clone()) + } else { + // Validate and get the full path + let hints_uri = Self::validate_subdir( + &self.worker_config.worker.inputs_folder, + &PathBuf::from(hints_path), + ) + .await?; + + HintsSourceDto::HintsPath(hints_uri.to_string_lossy().to_string()) } + } else { + HintsSourceDto::HintsNull }; - let data_ctx = DataCtx { data_id: DataId::from(params.data_id), input_source }; + let data_ctx = + DataCtx { data_id: DataId::from(params.data_id), input_source, hints_source }; let job = self.worker.new_job( - job_id, + job_id.clone(), data_ctx, params.rank_id, params.total_workers, params.worker_allocation, params.job_compute_units, + Some(task_received_time), ); // Start computation in background task self.worker.set_current_computation( - self.worker.handle_partial_contribution(job.clone(), computation_tx.clone()).await, + self.worker.handle_partial_contribution(job.clone(), computation_tx.clone()).await?, ); Ok(()) } - fn validate_subdir(base: &Path, candidate: &Path) -> Result<()> { - let base = base.canonicalize().map_err(|e| anyhow!("Inputs folder error: {e}"))?; - - // Timeout 60 seconds + /// Validates that a subpath is within the base directory and waits for it to exist. + /// + /// This function joins the base directory with the provided subpath, waits for the + /// resulting file/directory to appear (up to 60 seconds), and validates that the + /// resolved path is within the base directory to prevent path traversal attacks. + /// + /// # Security Considerations + /// - Joins base and subpath before validation + /// - Canonicalizes paths to resolve symlinks and relative components (e.g., `..`) + /// - Validates that the resolved path is within the base directory + /// - Note: There's a small TOCTOU window between file existence check and canonicalization + /// where a file could theoretically be replaced with a malicious symlink + /// + /// # Arguments + /// * `base_dir` - The base directory that must contain the subpath + /// * `subpath` - The relative path within base_dir (can include subdirectories) + /// + /// # Returns + /// * `Ok(PathBuf)` - The validated, canonicalized full path + /// * `Err` - If the path doesn't appear within timeout or is outside base directory + async fn validate_subdir(base_dir: &Path, subpath: &Path) -> Result { + let base_canonical = + base_dir.canonicalize().map_err(|e| anyhow!("Inputs folder error: {e}"))?; + + // Join base with subpath to get full path + let full_path = base_dir.join(subpath); + + // Wait for file to appear (timeout: 60 seconds) let timeout = Duration::from_secs(60); let start = std::time::Instant::now(); + let poll_interval = Duration::from_millis(500); // Poll every 500ms - while !candidate.exists() { + while !full_path.exists() { if start.elapsed() > timeout { return Err(anyhow!( - "Input path {:?} did not appear within {:?}", - candidate, + "Input path {:?} (subpath: {:?}) did not appear within {:?}", + full_path, + subpath, timeout )); } - std::thread::sleep(Duration::from_millis(10)); + tokio::time::sleep(poll_interval).await; } - info!("Found input file {:?} (elapsed: {:?})", candidate, start.elapsed()); + info!("Found input path {:?} (elapsed: {:?})", full_path, start.elapsed()); - let candidate = candidate.canonicalize().map_err(|e| anyhow!("Input path error: {e}"))?; + // Canonicalize immediately after existence check to minimize TOCTOU window + let path_canonical = + full_path.canonicalize().map_err(|e| anyhow!("Input path error: {e}"))?; - if candidate.starts_with(&base) { - Ok(()) + // Validate that the canonical path is within the base directory + if path_canonical.starts_with(&base_canonical) { + Ok(path_canonical) } else { Err(anyhow!( - "Input path {:?} must be a subdirectory of inputs folder {:?}", - candidate, - base + "Input path {:?} (resolved to {:?}) is outside base directory {:?}", + subpath, + path_canonical, + base_canonical )) } } @@ -617,11 +705,12 @@ impl WorkerNodeGrpc { worker_index: ch.worker_index, airgroup_id: ch.airgroup_id as usize, challenge: ch.challenge, + aggregated: false, }) .collect(); self.worker.set_current_computation( - self.worker.handle_prove(job, cont, computation_tx.clone()).await, + self.worker.handle_prove(job, cont, computation_tx.clone()).await?, ); Ok(()) @@ -666,21 +755,38 @@ impl WorkerNodeGrpc { agg_proofs, last_proof: agg_params.last_proof, final_proof: agg_params.final_proof, - verify_constraints: agg_params.verify_constraints, - aggregation: agg_params.aggregation, - rma: agg_params.rma, - final_snark: agg_params.final_snark, - verify_proofs: agg_params.verify_proofs, - save_proofs: agg_params.save_proofs, - test_mode: agg_params.test_mode, - output_dir_path: PathBuf::from(agg_params.output_dir_path), - minimal_memory: agg_params.minimal_memory, + compressed: agg_params.compressed, }; - - self.worker.set_current_computation( - self.worker.handle_aggregate(job, agg_params, computation_tx.clone()).await, - ); + self.worker.set_current_computation(self.worker.handle_aggregate( + job, + agg_params, + computation_tx.clone(), + )); Ok(()) } + + async fn handle_stream_data(&mut self, stream_data: StreamData) -> Result<()> { + if self.worker.current_job().is_none() { + return Err(anyhow!("Stream data received without current job context")); + } + + let job = self.worker.current_job().unwrap(); + let (current_job_id, is_first_partition) = { + let job_guard = job.lock().await; + (job_guard.job_id.clone(), job_guard.allocation.contains(&0)) + }; + + let stream_data_dto: StreamDataDto = stream_data.into(); + + if current_job_id != stream_data_dto.job_id { + return Err(anyhow!( + "Job ID mismatch in StreamData: expected {}, got {}", + current_job_id.as_string(), + stream_data_dto.job_id + )); + } + + self.worker.route_stream_data(stream_data_dto, is_first_partition).await + } } diff --git a/elf-regressions/compressed/c_add.s b/elf-regressions/compressed/c_add.s new file mode 100644 index 000000000..776376026 --- /dev/null +++ b/elf-regressions/compressed/c_add.s @@ -0,0 +1,44 @@ +# Test c.add instruction - compressed add register to register + +.section .text.init +.global _start + +_start: + # Test basic addition + li x8, 10 + li x9, 20 + c.add x8, x9 # x8 = 10 + 20 = 30 + + # Verify result + li t0, 30 + bne x8, t0, error + + # Test with zero + li x10, 42 + li x11, 0 + c.add x10, x11 # x10 = 42 + 0 = 42 + + # Verify result + li t0, 42 + bne x10, t0, error + + # Test negative addition + li x12, -5 + li x13, 3 + c.add x12, x13 # x12 = -5 + 3 = -2 + + # Verify result + li t0, -2 + bne x12, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_addi.s b/elf-regressions/compressed/c_addi.s new file mode 100644 index 000000000..bf8a64d96 --- /dev/null +++ b/elf-regressions/compressed/c_addi.s @@ -0,0 +1,43 @@ +# Test c.addi instruction - compressed add immediate +# Tests adding immediate values to registers + +.section .text.init +.global _start + +_start: + # Initialize base values + li x1, 100 + li x2, 0 + li x3, -50 + + # Test positive immediate additions + c.addi x1, 1 + c.addi x1, 31 + c.addi x1, 15 + + # Test negative immediate additions + c.addi x1, -1 + c.addi x1, -32 + c.addi x1, -16 + + # Test zero addition + c.addi x2, 0 + + # Test overflow scenarios + li x4, 0x7fffffff + c.addi x4, 1 # Should overflow to negative + + li x5, 0x80000000 + c.addi x5, -1 # Should underflow to positive + + # Test different registers + c.addi t0, 10 + c.addi t1, -5 + c.addi a0, 7 + c.addi a1, -3 + + # Exit + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_addi16sp.s b/elf-regressions/compressed/c_addi16sp.s new file mode 100644 index 000000000..7f3e52179 --- /dev/null +++ b/elf-regressions/compressed/c_addi16sp.s @@ -0,0 +1,37 @@ +# Test c.addi16sp instruction - compressed add immediate to SP scaled by 16 +# Tests stack pointer manipulation with 16-byte alignment + +.section .text.init +.global _start + +_start: + # Save original stack pointer + mv t0, sp + + # Test positive adjustments (allocate stack space) + c.addi16sp sp, -16 # Allocate 16 bytes + c.addi16sp sp, -32 # Allocate 32 bytes + c.addi16sp sp, -64 # Allocate 64 bytes + c.addi16sp sp, -512 # Maximum negative adjustment + + # Test positive adjustments (deallocate stack space) + c.addi16sp sp, 16 # Deallocate 16 bytes + c.addi16sp sp, 32 # Deallocate 32 bytes + c.addi16sp sp, 64 # Deallocate 64 bytes + c.addi16sp sp, 496 # Maximum positive adjustment + + # Test edge cases + c.addi16sp sp, -496 # Near maximum negative + c.addi16sp sp, 496 # Maximum positive + + # Test zero adjustment (should be no-op) + mv t1, sp + + # Restore stack pointer + mv sp, t0 + + # Exit + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_addi4spn.s b/elf-regressions/compressed/c_addi4spn.s new file mode 100644 index 000000000..bc0153c44 --- /dev/null +++ b/elf-regressions/compressed/c_addi4spn.s @@ -0,0 +1,39 @@ +# Test c.addi4spn instruction - compressed add immediate scaled by 4 to SP for narrow registers +# Tests SP-relative addressing for compressed register set (x8-x15) + +.section .text.init +.global _start + +_start: + # Test with different immediate values and compressed registers + c.addi4spn x8, sp, 4 # Add 4 to SP, store in x8 + c.addi4spn x9, sp, 8 # Add 8 to SP, store in x9 + c.addi4spn x10, sp, 12 # Add 12 to SP, store in x10 + c.addi4spn x11, sp, 16 # Add 16 to SP, store in x11 + + # Test larger offsets + c.addi4spn x12, sp, 64 # Add 64 to SP + c.addi4spn x13, sp, 128 # Add 128 to SP + c.addi4spn x14, sp, 256 # Add 256 to SP + c.addi4spn x15, sp, 512 # Add 512 to SP + + # Test maximum offset + c.addi4spn x8, sp, 1020 # Maximum offset (1020 = 255 * 4) + + # Test various multiples of 4 + c.addi4spn x9, sp, 20 + c.addi4spn x10, sp, 36 + c.addi4spn x11, sp, 100 + c.addi4spn x12, sp, 200 + c.addi4spn x13, sp, 300 + c.addi4spn x14, sp, 400 + c.addi4spn x15, sp, 500 + + sub t0, x9, x8 # Should be (20-1020) = -1000 + sub t1, x10, x9 # Should be (36-20) = 16 + + # Exit + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_andi.s b/elf-regressions/compressed/c_andi.s new file mode 100644 index 000000000..d6464ee53 --- /dev/null +++ b/elf-regressions/compressed/c_andi.s @@ -0,0 +1,48 @@ +# Test c.andi instruction - compressed AND immediate + +.section .text.init +.global _start + +_start: + # Test basic AND with positive immediate + li x8, 0xff + c.andi x8, 0x0f # x8 = 0xff & 0x0f = 0x0f + + # Verify result + li t0, 0x0f + bne x8, t0, error + + # Test AND with zero (clears all bits) + li x9, 0x12345678 + c.andi x9, 0 # x9 = 0x12345678 & 0 = 0 + + # Verify result + bne x9, x0, error + + # Test AND with -1 (preserves all bits in range) + li x10, 0x12345678 + c.andi x10, -1 # x10 = 0x12345678 & 0xffffffff = 0x12345678 + + # Verify result (only low bits matter for c.andi) + li t0, 0x12345678 + bne x10, t0, error + + # Test boundary immediate values + li x11, 0xffffffff + c.andi x11, 31 # x11 = 0xffffffff & 31 = 31 + + # Verify result + li t0, 31 + bne x11, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_beqz_bnez.s b/elf-regressions/compressed/c_beqz_bnez.s new file mode 100644 index 000000000..b9bdba5d3 --- /dev/null +++ b/elf-regressions/compressed/c_beqz_bnez.s @@ -0,0 +1,37 @@ +# Test c.beqz and c.bnez instructions - compressed branch if equal/not equal to zero + +.section .text.init +.global _start + +_start: + # Test c.beqz with zero - should branch + li x8, 0 + c.beqz x8, test1_pass + j error + +test1_pass: + # Test c.beqz with non-zero - should not branch + li x9, 1 + c.beqz x9, error + + # Test c.bnez with non-zero - should branch + li x10, 42 + c.bnez x10, test2_pass + j error + +test2_pass: + # Test c.bnez with zero - should not branch + li x11, 0 + c.bnez x11, error + + # All tests passed + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_j.s b/elf-regressions/compressed/c_j.s new file mode 100644 index 000000000..3bd0d8dec --- /dev/null +++ b/elf-regressions/compressed/c_j.s @@ -0,0 +1,37 @@ +# Test c.j instruction - compressed unconditional jump + +.section .text.init +.global _start + +_start: + # Test forward jump + c.j forward_target + + # Should not reach here + li a0, 1 + li a7, 93 + ecall + +forward_target: + # Test backward jump + c.j test_backward + +after_backward: + # Test jump over instructions + c.j skip_section + + # These should be skipped + li x1, 0xbad + li x2, 0xbad + +skip_section: + # Success - all jumps worked + li a0, 0 + li a7, 93 + ecall + +test_backward: + # Jump back to continue test + c.j after_backward + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_jr_jalr.s b/elf-regressions/compressed/c_jr_jalr.s new file mode 100644 index 000000000..720f2815c --- /dev/null +++ b/elf-regressions/compressed/c_jr_jalr.s @@ -0,0 +1,39 @@ +# Test c.jr and c.jalr instructions - compressed jump register + +.section .text.init +.global _start + +_start: + # Test c.jr (jump register) + la x1, jr_target + c.jr x1 # Jump to address in x1 + + # Should not reach here + li a0, 1 + li a7, 93 + ecall + +jr_target: + # Test c.jalr (jump and link register) + la x2, function + c.jalr x2 # Call function, return address in x1 + + # Should return here + # Test that we returned by calling another function + la x3, function2 + c.jalr x3 + + # Success + li a0, 0 + li a7, 93 + ecall + +function: + # Simple function that returns + c.jr x1 # Return using saved address + +function2: + # Another simple function + c.jr x1 # Return + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_li.s b/elf-regressions/compressed/c_li.s new file mode 100644 index 000000000..e1b6073e5 --- /dev/null +++ b/elf-regressions/compressed/c_li.s @@ -0,0 +1,31 @@ +# Test c.li instruction - compressed load immediate +# Tests various immediate values including edge cases + +.section .text.init +.global _start + +_start: + # Test basic positive values + c.li x1, 0 + c.li x2, 1 + c.li x3, 31 + + # Test negative values (sign-extended) + c.li x4, -1 + c.li x5, -32 + + # Test boundary values + c.li x6, 15 + c.li x7, -16 + + # Test with different registers + c.li t0, 10 + c.li t1, 20 + c.li a0, 5 + c.li a1, -5 + + # Exit + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_lui.s b/elf-regressions/compressed/c_lui.s new file mode 100644 index 000000000..286017776 --- /dev/null +++ b/elf-regressions/compressed/c_lui.s @@ -0,0 +1,31 @@ +# Test c.lui instruction - compressed load upper immediate +# Tests upper immediate loading with various values + +.section .text.init +.global _start + +_start: + # Test basic values (c.lui uses 6-bit signed immediate, not 20-bit) + c.lui x1, 1 + c.lui x3, 0x1f # Maximum positive (31) + c.lui x4, 1 + + # Test various positive values (this toolchain only supports positive c.lui) + c.lui x5, 1 # Small positive + c.lui x6, 2 # Small positive + + # Test boundary conditions (positive values only) + c.lui x7, 30 # Near maximum + c.lui x8, 31 # Most positive (31) + + # Test with different registers (excluding x0, x2/sp) + c.lui t0, 16 # Valid positive immediate + c.lui t1, 20 # Valid positive immediate + c.lui a0, 8 # Valid positive immediate + c.lui a1, 12 # Valid positive immediate + + # Exit + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_lw_sw.s b/elf-regressions/compressed/c_lw_sw.s new file mode 100644 index 000000000..0487c6807 --- /dev/null +++ b/elf-regressions/compressed/c_lw_sw.s @@ -0,0 +1,57 @@ +# Test c.lw and c.sw instructions - compressed load/store word + +.section .data +test_data: + .word 0x12345678 + .word 0x9abcdef0 + .space 32 + +.section .text.init +.global _start + +_start: + # Load base address (must use compressed register) + la x8, test_data + + # Test basic c.sw and c.lw + li x9, 0x11223344 + c.sw x9, 8(x8) # Store at offset 8 + c.lw x10, 8(x8) # Load back from offset 8 + + # Verify result + bne x9, x10, error + + # Test different offsets + li x11, 0x55667788 + c.sw x11, 12(x8) # Store at offset 12 + c.lw x12, 12(x8) # Load back + + # Verify result + bne x11, x12, error + + # Test maximum offset (124 = 31 * 4) + li x13, 0xaabbccdd + c.sw x13, 124(x8) # Store at max offset + c.lw x14, 124(x8) # Load back + + # Verify result + bne x13, x14, error + + # Test loading existing data + c.lw x15, 0(x8) # Load first word (0x12345678) + + # Verify result + li t0, 0x12345678 + bne x15, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_lwsp_swsp.s b/elf-regressions/compressed/c_lwsp_swsp.s new file mode 100644 index 000000000..fcdddc5e8 --- /dev/null +++ b/elf-regressions/compressed/c_lwsp_swsp.s @@ -0,0 +1,96 @@ +# Test compressed stack operations - using c.addi16sp and regular load/store +# Tests SP-relative operations with compressed instructions where available + +.section .text.init +.global _start + +_start: + # Save original stack pointer + mv t0, sp + + # Allocate stack space using compressed instruction + c.addi16sp sp, -256 # Use compressed SP adjustment + + # Initialize some test data on stack + li t1, 0x11111111 + li t2, 0x22222222 + li t3, 0x33333333 + li t4, 0x44444444 + li t5, 0x55555555 + li t6, 0x66666666 + + # Store test data using regular stack operations + sw t1, 0(sp) # Store at SP + 0 + sw t2, 4(sp) # Store at SP + 4 + sw t3, 8(sp) # Store at SP + 8 + sw t4, 12(sp) # Store at SP + 12 + sw t5, 16(sp) # Store at SP + 16 + sw t6, 20(sp) # Store at SP + 20 + + # Test larger offsets + sw t1, 64(sp) # Store at SP + 64 + sw t2, 128(sp) # Store at SP + 128 + sw t3, 192(sp) # Store at SP + 192 + sw t4, 252(sp) # Store at SP + 252 + + # Clear registers + li x1, 0 + li x2, 0 + li x3, 0 + li x4, 0 + li x5, 0 + li x6, 0 + + # Load back using regular loads + lw x1, 0(sp) # Load from SP + 0 + lw x2, 4(sp) # Load from SP + 4 + lw x3, 8(sp) # Load from SP + 8 + lw x4, 12(sp) # Load from SP + 12 + lw x5, 16(sp) # Load from SP + 16 + lw x6, 20(sp) # Load from SP + 20 + + # Verify values match original + bne x1, t1, error + bne x2, t2, error + bne x3, t3, error + bne x4, t4, error + bne x5, t5, error + bne x6, t6, error + + # Test larger offsets + lw x7, 64(sp) # Load from SP + 64 + lw x8, 128(sp) # Load from SP + 128 + lw x9, 192(sp) # Load from SP + 192 + lw x10, 252(sp) # Load from SP + 252 + + # Verify larger offset values + bne x7, t1, error + bne x8, t2, error + bne x9, t3, error + bne x10, t4, error + + # Test stack frame simulation + sw ra, 248(sp) # Save return address + sw s0, 244(sp) # Save frame pointer + lw s0, 244(sp) # Restore frame pointer + lw ra, 248(sp) # Restore return address + +success: + # Restore stack pointer + mv sp, t0 + + # Exit with success + li a0, 0 + li a7, 93 + ecall + +error: + # Restore stack pointer + mv sp, t0 + + # Exit with error + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_mv.s b/elf-regressions/compressed/c_mv.s new file mode 100644 index 000000000..0cfdee28c --- /dev/null +++ b/elf-regressions/compressed/c_mv.s @@ -0,0 +1,47 @@ +# Test c.mv instruction - compressed move register to register + +.section .text.init +.global _start + +_start: + # Test basic move + li x1, 0x12345678 + c.mv x2, x1 # x2 = x1 + + # Verify result + bne x1, x2, error + + # Test move zero + li x3, 0 + c.mv x4, x3 # x4 = 0 + + # Verify result + bne x3, x4, error + + # Test move negative + li x5, -1 + c.mv x6, x5 # x6 = -1 + + # Verify result + bne x5, x6, error + + # Test chain of moves + li x7, 0xdeadbeef + c.mv x8, x7 # x8 = x7 + c.mv x9, x8 # x9 = x8 = x7 + + # Verify chain + bne x7, x8, error + bne x8, x9, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_or_and_xor_sub.s b/elf-regressions/compressed/c_or_and_xor_sub.s new file mode 100644 index 000000000..223019000 --- /dev/null +++ b/elf-regressions/compressed/c_or_and_xor_sub.s @@ -0,0 +1,53 @@ +# Test c.or, c.and, c.xor, c.sub instructions - compressed ALU operations + +.section .text.init +.global _start + +_start: + # Test c.or + li x8, 0xf0f0f0f0 + li x9, 0x0f0f0f0f + c.or x8, x9 # x8 = 0xf0f0f0f0 | 0x0f0f0f0f = 0xffffffff + + # Verify result + li t0, 0xffffffff + bne x8, t0, error + + # Test c.and + li x10, 0x12345678 + li x11, 0xff00ff00 + c.and x10, x11 # x10 = 0x12345678 & 0xff00ff00 = 0x12005600 + + # Verify result + li t0, 0x12005600 + bne x10, t0, error + + # Test c.xor + li x12, 0xaaaaaaaa + li x13, 0x55555555 + c.xor x12, x13 # x12 = 0xaaaaaaaa ^ 0x55555555 = 0xffffffff + + # Verify result + li t0, 0xffffffff + bne x12, t0, error + + # Test c.sub + li x14, 100 + li x15, 30 + c.sub x14, x15 # x14 = 100 - 30 = 70 + + # Verify result + li t0, 70 + bne x14, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_slli.s b/elf-regressions/compressed/c_slli.s new file mode 100644 index 000000000..f67fc33ee --- /dev/null +++ b/elf-regressions/compressed/c_slli.s @@ -0,0 +1,49 @@ +# Test c.slli instruction - compressed shift left logical immediate + +.section .text.init +.global _start + +_start: + # Test basic left shift + li x1, 1 + c.slli x1, 1 # x1 = 1 << 1 = 2 + + # Verify result + li t0, 2 + bne x1, t0, error + + # Test larger shift + li x2, 1 + c.slli x2, 8 # x2 = 1 << 8 = 256 + + # Verify result + li t0, 256 + bne x2, t0, error + + # Test shift with data + li x3, 0x12345678 + c.slli x3, 4 # x3 = 0x12345678 << 4 = 0x23456780 + + # Verify result + li t0, 0x23456780 + bne x3, t0, error + + # Test maximum shift + li x4, 1 + c.slli x4, 31 # x4 = 1 << 31 = 0x80000000 + + # Verify result + li t0, 0x80000000 + bne x4, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/c_srli_srai.s b/elf-regressions/compressed/c_srli_srai.s new file mode 100644 index 000000000..eb5f25f7e --- /dev/null +++ b/elf-regressions/compressed/c_srli_srai.s @@ -0,0 +1,49 @@ +# Test c.srli and c.srai instructions - compressed shift right logical and arithmetic + +.section .text.init +.global _start + +_start: + # Test c.srli (logical right shift) + li x8, 0x80000000 + c.srli x8, 1 # x8 = 0x80000000 >> 1 = 0x40000000 (logical) + + # Verify result + li t0, 0x40000000 + bne x8, t0, error + + # Test c.srli with data + li x9, 0x12345678 + c.srli x9, 4 # x9 = 0x12345678 >> 4 = 0x01234567 + + # Verify result + li t0, 0x01234567 + bne x9, t0, error + + # Test c.srai (arithmetic right shift) with positive number + li x10, 0x12345678 + c.srai x10, 4 # x10 = 0x12345678 >> 4 = 0x01234567 (same as logical) + + # Verify result + li t0, 0x01234567 + bne x10, t0, error + + # Test c.srai with negative number (sign extension) + li x11, 0x80000000 + c.srai x11, 1 # x11 = 0x80000000 >> 1 = 0xc0000000 (sign extended) + + # Verify result + li t0, 0xc0000000 + bne x11, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/edge_cases_alignment.s b/elf-regressions/compressed/edge_cases_alignment.s new file mode 100644 index 000000000..7c8bfd163 --- /dev/null +++ b/elf-regressions/compressed/edge_cases_alignment.s @@ -0,0 +1,198 @@ +# Edge cases for compressed instruction alignment and encoding +# Tests instruction alignment, mixed 16/32-bit instructions, and boundary conditions + +.section .text.init +.global _start + +_start: + # Test 16-bit instruction alignment + # Compressed instructions must be 16-bit aligned + + # Mix compressed and uncompressed instructions + c.li x1, 10 # 16-bit instruction + addi x2, x1, 20 # 32-bit instruction + c.add x1, x2 # 16-bit instruction + li x3, 0x12345678 # 32-bit instruction + c.mv x4, x3 # 16-bit instruction + + # Test instruction sequence that crosses boundaries + .align 2 +boundary_test: + c.li x5, 1 # At 4-byte boundary + c.li x6, 2 # At 4-byte boundary + 2 + add x7, x5, x6 # 32-bit instruction at odd 16-bit boundary + c.mv x8, x7 # Back to compressed + + # Test maximum compressed immediate values at boundaries + c.li x9, 31 # Maximum 6-bit signed positive + c.li x10, -32 # Maximum 6-bit signed negative + addi x11, x9, 1 # Force to 32-bit to get 32 + addi x12, x10, -1 # Force to 32-bit to get -33 + + # Compare compressed vs uncompressed versions + bne x9, x11, different_immediate_ranges # Should be different (31 vs 32) + c.j error # Should not reach here + +different_immediate_ranges: + # Test shift amount boundaries + li x13, 1 + c.slli x13, 31 # Maximum shift amount for compressed + li x14, 1 + slli x14, x14, 31 # Same shift with 32-bit instruction + bne x13, x14, error # Should be the same + + # Test compressed vs uncompressed load/store + la x15, test_data + li x8, 0xabcdef01 # Use compressed register + + # Compressed store and load (both registers must be x8-x15) + c.sw x8, 0(x15) # Compressed store (both x8,x15 are compressed) + c.lw x9, 0(x15) # Compressed load (both x9,x15 are compressed) + + # Uncompressed store and load with same data + sw x8, 4(x15) # Uncompressed store (larger offset range) + lw x18, 4(x15) # Uncompressed load + + bne x9, x18, error # Should load same data + + # Test offset encoding differences + # Compressed: offset scaled by 4, max 124 (31*4) + # Uncompressed: offset not scaled, max 2047 + + c.sw x8, 124(x15) # Maximum compressed offset (x8 compressed) + sw x8, 128(x15) # Uncompressed beyond compressed range + + c.lw x10, 124(x15) # Load with compressed instruction (x10 compressed) + lw x20, 128(x15) # Load with uncompressed instruction + + bne x10, x20, error # Should be same data + + # Test stack pointer operations with alignment + mv t6, sp + + # Ensure SP is aligned before compressed operations + andi t0, sp, 15 # Check if SP is 16-byte aligned + bnez t0, align_sp # If not, align it + c.j sp_aligned + +align_sp: + addi sp, sp, -16 # Align to 16-byte boundary + +sp_aligned: + # Test compressed stack operations + c.addi16sp sp, -32 # Must be multiple of 16 + c.addi16sp sp, 32 # Restore + + # Test that odd adjustments fail (should use uncompressed) + # c.addi16sp sp, -17 # This would be invalid encoding + addi sp, sp, -17 # Use uncompressed for odd values + addi sp, sp, 17 # Restore with uncompressed + + mv sp, t6 # Restore original SP + + # Test instruction fetch alignment + # Jump to odd 16-bit boundary + c.j odd_boundary_test + + .align 2 + nop # Ensure we start at 4-byte boundary + +odd_boundary_test: + # These instructions start at 4-byte + 2 boundary + li x21, 42 # Use regular li (c.li limited to -32 to 31) + li x22, 84 # Use regular li (c.li limited to -32 to 31) + add x23, x21, x22 # 32-bit instruction at 4-byte boundary + + # Test branch targets and alignment + c.j even_target # Jump to even boundary + + .align 2 + nop # Padding to create even target + +even_target: + li x24, 100 # At 4-byte boundary (c.li limited to -32 to 31) + + # Test that unaligned accesses work properly + la x11, unaligned_data # Use compressed register + + # Load from properly aligned address + c.lw x12, 0(x11) # Aligned load (both x12,x11 compressed) + + # Test memory alignment requirements + la x27, test_data + + # Ensure test_data is word-aligned for compressed operations + andi t0, x27, 3 # Check word alignment + bnez t0, error # Should be word-aligned + + # Test compressed instruction encoding boundaries + # Some instructions have special encodings for certain values + + # c.li x0, 0 is reserved (HINT) + # c.addi x0, 0 is NOP + # c.mv x0, x0 is reserved + + # Test legal NOP variants + nop # Use standard nop instead + addi x0, x0, 0 # 32-bit NOP + + # Test register encoding boundaries + # Compressed registers use 3-bit encoding (x8-x15) + # Full registers use 5-bit encoding (x0-x31) + + # Test that high registers require uncompressed instructions + li x30, 0x30303030 # High register number + li x31, 0x31313131 # Highest register + + # These operations require uncompressed instructions + add x30, x30, x31 # Cannot use compressed form + mv x29, x30 # c.mv can handle any register + + # Test instruction length detection + # Ensure emulator correctly identifies 16-bit vs 32-bit instructions + + # Create specific pattern to test decoder + c.li x28, 15 # 16-bit: should decode as c.li + ori x28, x28, 16 # 32-bit: should decode as ori (not compressed) + + # Result should be 15 | 16 = 31 + li t0, 31 + bne x28, t0, error + + # Test boundary between compressed and uncompressed encodings + # Some instruction patterns overlap between 16-bit and 32-bit encodings + + # Verify all our test values + li t0, 10 + bne x1, t0, error + li t0, 30 + bne x2, t0, error + li t0, 40 + bne x4, t0, error # x4 = x1 + x2 + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b + +.section .data +.align 4 +test_data: + .word 0x11111111 + .word 0x22222222 + .word 0x33333333 + .word 0x44444444 + .space 64 + +# Intentionally unaligned data for testing +.align 1 +unaligned_data: + .byte 0x01 + .word 0x12345678 # This will be at odd byte boundary \ No newline at end of file diff --git a/elf-regressions/compressed/edge_cases_branches.s b/elf-regressions/compressed/edge_cases_branches.s new file mode 100644 index 000000000..3bd20cdac --- /dev/null +++ b/elf-regressions/compressed/edge_cases_branches.s @@ -0,0 +1,253 @@ +# Edge cases for compressed branch instructions +# Tests branch distances, target alignment, and conditional edge cases + +.section .text.init +.global _start + +_start: + # Test compressed branch distance limits + # c.beqz and c.bnez have 9-bit signed offset (scaled by 2) + # Range: -256 to +254 bytes + + # Test forward branch near maximum distance + li x8, 0 + c.beqz x8, far_forward_target # Should branch + + # Fill space to test distance (but not too much to exceed limits) + .rept 50 + nop + .endr + + # Should not reach here + li x1, 0xbad + c.j error + +far_forward_target: + li x1, 0x100 # Mark successful far forward branch + + # Test backward branch setup + li x9, 42 + c.j setup_backward_test + +backward_target: + li x2, 0x200 # Mark successful backward branch + c.j continue_test + +setup_backward_test: + # Test backward branch + c.bnez x9, backward_target # Should branch back + + # Should not reach here + li x2, 0xbad + +continue_test: + # Test branch target alignment + # Branch targets should be 2-byte aligned for compressed instructions + + li x10, 1 + c.j aligned_target # Jump to aligned target + + .align 2 + nop # Ensure alignment + +aligned_target: + c.bnez x10, next_aligned # Branch to another aligned target + li x3, 0xbad + + .align 1 # 2-byte alignment +next_aligned: + li x3, 0x300 + + # Test edge case: branch to current instruction (infinite loop prevention) + li x11, 0 + c.bnez x11, after_self_branch # Should not branch (x11 is 0) + li x4, 0x400 # Should execute this + c.j after_self_branch + +after_self_branch: + # Test branch with zero flag edge cases + + # Test with exactly zero + li x12, 0x00000000 + c.beqz x12, zero_branch_taken + li x5, 0xbad + c.j error + +zero_branch_taken: + li x5, 0x500 + + # Test with non-zero patterns + li x13, 0x80000000 # MSB set (negative in signed interpretation) + c.bnez x13, negative_nonzero + li x6, 0xbad + c.j error + +negative_nonzero: + li x6, 0x600 + + li x14, 0x00000001 # LSB set (smallest positive) + c.bnez x14, positive_nonzero + li x7, 0xbad + c.j error + +positive_nonzero: + li x7, 0x700 + + # Test with alternating bit patterns + li x15, 0xaaaaaaaa + c.bnez x15, alternating_nonzero + li x8, 0xbad + c.j error + +alternating_nonzero: + li x8, 0x800 + + # Test branch prediction scenarios + # Loops that are likely to be taken vs not taken + + # Countdown loop (usually taken, except last iteration) + li x8, 3 # Loop counter (compressed register) + li x9, 0 # Accumulator (compressed register) + +countdown_loop: + c.beqz x8, countdown_end # Exit condition (c.beqz only on x8-x15) + c.add x9, x8 # Add counter to accumulator (compressed regs) + c.addi x8, -1 # Decrement (c.addi only on x8-x15) + c.j countdown_loop # Continue loop + +countdown_end: + # x9 should be 3+2+1 = 6 + li t0, 6 + bne x9, t0, error + + # Test nested conditional branches (use compressed registers) + li x10, 1 # Use compressed register + li x11, 0 # Use compressed register + li x12, 2 # Use compressed register + + c.bnez x10, outer_condition # Should branch (x10 is compressed) + li x9, 0xbad + c.j error + +outer_condition: + c.beqz x11, inner_condition # Should branch (x11 is compressed) + li x10, 0xbad + c.j error + +inner_condition: + c.bnez x12, nested_end # Should branch (x12 is compressed) + li x11, 0xbad + c.j error + +nested_end: + li x9, 0x900 + li x10, 0xa00 + li x11, 0xb00 + + # Test branch over different instruction types + li x13, 5 # Use compressed register + c.bnez x13, skip_mixed_instructions + + # Mixed instructions to skip + addi x12, x0, 1 # Valid instruction (0xbad is invalid hex) + li x13, 1 # Use regular li (c.li limited to -32 to 31) + lw x14, 0(x0) # This might fault, but should be skipped + c.add x15, x14 + +skip_mixed_instructions: + li x12, 0xc00 # Mark successful skip + + # Test branch to function-like targets + li x14, 10 # Use compressed register + c.bnez x14, function_like_target + li x13, 0xbad + c.j error + +return_point: + li x13, 0xd00 + c.j final_verification + +function_like_target: + # Simulate function that "returns" + li x14, 0xe00 + c.j return_point # "Return" to caller + +final_verification: + # Test rapid successive branches (use compressed registers) + li x8, 1 # Use compressed register + li x9, 0 # Use compressed register + li x10, 1 # Use compressed register + + c.bnez x8, rapid1 # Should branch (x8 is compressed) + c.j error +rapid1: + c.beqz x9, rapid2 # Should branch (x9 is compressed) + c.j error +rapid2: + c.bnez x10, rapid3 # Should branch (x10 is compressed) + c.j error +rapid3: + li x15, 0xf00 + + # Test branch distance calculations + # Verify that we can branch to nearby targets + li x11, 1 # Use compressed register + c.bnez x11, near_target_1 + c.j error + +near_target_1: + c.bnez x11, near_target_2 + c.j error + +near_target_2: + c.bnez x11, near_target_3 + c.j error + +near_target_3: + li x16, 0x1600 + + # Verify all results + li t0, 0x100 + bne x1, t0, error + li t0, 0x200 + bne x2, t0, error + li t0, 0x300 + bne x3, t0, error + li t0, 0x400 + bne x4, t0, error + li t0, 0x500 + bne x5, t0, error + li t0, 0x600 + bne x6, t0, error + li t0, 0x700 + bne x7, t0, error + li t0, 0x800 + bne x8, t0, error + li t0, 0x900 + bne x9, t0, error + li t0, 0xa00 + bne x10, t0, error + li t0, 0xb00 + bne x11, t0, error + li t0, 0xc00 + bne x12, t0, error + li t0, 0xd00 + bne x13, t0, error + li t0, 0xe00 + bne x14, t0, error + li t0, 0xf00 + bne x15, t0, error + li t0, 0x1600 + bne x16, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/edge_cases_immediates.s b/elf-regressions/compressed/edge_cases_immediates.s new file mode 100644 index 000000000..69bd0bb3f --- /dev/null +++ b/elf-regressions/compressed/edge_cases_immediates.s @@ -0,0 +1,210 @@ +# Edge cases for compressed instruction immediates +# Tests boundary values, overflow conditions, and special immediate encodings + +.section .text.init +.global _start + +_start: + # c.li edge cases - 6-bit signed immediate (-32 to 31) + c.li x1, 31 # Maximum positive immediate + c.li x2, -32 # Maximum negative immediate + c.li x3, 0 # Zero immediate + c.li x4, 1 # Minimum positive + c.li x5, -1 # Maximum magnitude negative + + # Verify sign extension + li t0, 31 + bne x1, t0, error + li t0, -32 + bne x2, t0, error + li t0, -1 + bne x5, t0, error + + # c.addi edge cases - 6-bit signed immediate + li x6, 100 + c.addi x6, 31 # Add maximum positive + li t0, 131 + bne x6, t0, error + + li x7, 100 + c.addi x7, -32 # Add maximum negative + li t0, 68 + bne x7, t0, error + + # Test zero addition (should be no-op) + li x8, 0x12345678 + c.addi x8, 0 + li t0, 0x12345678 + bne x8, t0, error + + # Test overflow with c.addi + li x9, 0x7fffffff # Max positive 32-bit + c.addi x9, 1 # Should overflow to negative + li t0, 0x80000000 + bne x9, t0, error + + li x10, 0x80000000 # Min negative 32-bit + c.addi x10, -1 # Should underflow to positive + li t0, 0x7fffffff + bne x10, t0, error + + # c.lui edge cases - 6-bit immediate for upper 20 bits + # Note: c.lui encodes different than c.li/c.addi + c.lui x11, 1 # Minimum non-zero value + li t0, 0x1000 # Should be 1 << 12 + bne x11, t0, error + + c.lui x12, 31 # Maximum positive in 6-bit field + li t0, 0x1f000 # Should be 31 << 12 + bne x12, t0, error + + # c.lui with different values + c.lui x13, 16 # Another positive value + # Test completed + + # c.addi16sp edge cases - 10-bit signed immediate scaled by 16 + mv t5, sp # Save original SP + + # Test maximum positive adjustment + c.addi16sp sp, 496 # 31 * 16 = 496 (max positive) + + # Test maximum negative adjustment + c.addi16sp sp, -512 # -32 * 16 = -512 (max negative) + + # Test small adjustments + c.addi16sp sp, 16 # Minimum positive adjustment + c.addi16sp sp, -16 # Minimum negative adjustment + + mv sp, t5 # Restore SP + + # c.addi4spn edge cases - 10-bit unsigned immediate scaled by 4 + # Maximum offset is 1020 (255 * 4) + c.addi4spn x14, sp, 1020 # Maximum offset + c.addi4spn x15, sp, 4 # Minimum offset + + # Verify the calculations + sub t0, x14, sp + li t1, 1020 + bne t0, t1, error + + sub t0, x15, sp + li t1, 4 + bne t0, t1, error + + # c.lwsp/c.swsp edge cases - 8-bit unsigned immediate scaled by 4 + mv t5, sp + addi sp, sp, -256 # Allocate space + + # Test maximum offset using regular instructions + li x16, 0xdeadbeef + sw x16, 252(sp) # Maximum offset + lw x17, 252(sp) # Load back + bne x16, x17, error + + # Test minimum offset + li x18, 0xcafebabe + sw x18, 0(sp) # Minimum offset + lw x19, 0(sp) # Load back + bne x18, x19, error + + mv sp, t5 # Restore SP + + # c.lw/c.sw edge cases - 7-bit unsigned immediate scaled by 4 + la x12, test_data # Use compressed register for base + + # Test maximum offset for compressed load/store (both registers must be x8-x15) + li x8, 0x11223344 # Use compressed register + c.sw x8, 124(x12) # Maximum offset (31 * 4 = 124), both x8,x12 compressed + c.lw x9, 124(x12) # Load back, both x9,x12 compressed + bne x8, x9, error + + # Test minimum offset + li x10, 0x55667788 # Use compressed register + c.sw x10, 0(x12) # Minimum offset, both x10,x12 compressed + c.lw x11, 0(x12) # Load back, both x11,x12 compressed + bne x10, x11, error + + # c.andi edge cases - 6-bit signed immediate (compressed registers only) + li x12, 0xffffffff # Use compressed register + c.andi x12, 31 # AND with max positive immediate + li t0, 31 + bne x12, t0, error + + li x13, 0xffffffff # Use compressed register + c.andi x13, -32 # AND with max negative immediate + li t0, 0xffffffe0 # Should be 0xffffffff & 0xffffffe0 + bne x13, t0, error + + li x14, 0x12345678 # Use compressed register + c.andi x14, 0 # AND with zero (should clear all) + bne x14, x0, error + + li x15, 0x12345678 # Use compressed register + c.andi x15, -1 # AND with -1 (should preserve all) + li t0, 0x12345678 + bne x15, t0, error + + # Shift immediate edge cases (use appropriate registers) + li x8, 1 # Use compressed register for c.srli/c.srai + c.slli x8, 31 # Maximum shift left (c.slli works on any register) + li t0, 0x80000000 + bne x8, t0, error + + li x9, 0x80000000 # Use compressed register + c.srli x9, 31 # Maximum logical shift right (c.srli only on x8-x15) + li t0, 1 + bne x9, t0, error + + li x10, 0x80000000 # Use compressed register + c.srai x10, 31 # Maximum arithmetic shift right (c.srai only on x8-x15) + li t0, -1 # Should sign extend to all 1s + bne x10, t0, error + + # Test shift by 1 (minimum non-zero shift) + li x8, 0x12345678 + c.slli x8, 1 + li t0, 0x2468acf0 + bne x8, t0, error + + li x9, 0x12345678 + c.srli x9, 1 + li t0, 0x091a2b3c + bne x9, t0, error + + li x10, 0x92345678 # Negative number + c.srai x10, 1 + li t0, 0xc91a2b3c # Sign extended + bne x10, t0, error + + # Test boundary between positive and negative immediates + c.li x11, 15 # Positive + c.li x12, 16 # Still positive (but check encoding) + c.li x13, -16 # Negative + c.li x14, -15 # Less negative + + # Verify correct values + li t0, 15 + bne x11, t0, error + li t0, 16 + bne x12, t0, error + li t0, -16 + bne x13, t0, error + li t0, -15 + bne x14, t0, error + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b + +.section .data +.align 4 +test_data: + .space 256 # Space for testing various offsets \ No newline at end of file diff --git a/elf-regressions/compressed/edge_cases_registers.s b/elf-regressions/compressed/edge_cases_registers.s new file mode 100644 index 000000000..61a977722 --- /dev/null +++ b/elf-regressions/compressed/edge_cases_registers.s @@ -0,0 +1,194 @@ +# Edge cases for compressed instruction register usage +# Tests register constraints, aliasing, and special register behaviors + +.section .text.init +.global _start + +_start: + # Test compressed register set (x8-x15) vs full register set + + # Initialize all compressed registers + li x8, 0x08080808 + li x9, 0x09090909 + li x10, 0x10101010 + li x11, 0x11111111 + li x12, 0x12121212 + li x13, 0x13131313 + li x14, 0x14141414 + li x15, 0x15151515 + + # Test operations only available on compressed registers + c.and x8, x9 # Both operands must be x8-x15 + c.or x10, x11 + c.xor x12, x13 + c.sub x14, x15 + + # Verify results + li t0, 0x08080808 + li t1, 0x09090909 + and t2, t0, t1 + bne x8, t2, error + + # Test c.andi only works on compressed registers + li x8, 0xffffffff + c.andi x8, 15 # x8 must be in x8-x15 range + li t0, 15 + bne x8, t0, error + + # Test shift operations on compressed registers + li x9, 0x12345678 + c.srli x9, 4 # x9 must be x8-x15 + li t0, 0x01234567 + bne x9, t0, error + + li x10, 0x80000000 + c.srai x10, 1 # x10 must be x8-x15 + li t0, 0xc0000000 + bne x10, t0, error + + # Test c.lw/c.sw compressed register constraints + la x11, test_data # Base register must be x8-x15 + li x12, 0xaabbccdd # Data register must be x8-x15 + + c.sw x12, 0(x11) # Both registers x8-x15 + c.lw x13, 0(x11) # Both registers x8-x15 + bne x12, x13, error + + # Test register x0 special cases + # c.li x0, imm is reserved (should not be used) + # c.addi x0, imm is a hint (NOP-like) + # c.mv x0, rs is reserved + + # Test that x0 always reads as zero even with operations that try to modify it + mv x1, x0 # x1 should be 0 + bne x1, x0, error + + # Test stack pointer (x2/sp) special handling + mv t5, sp # Save original SP + + # c.addi16sp only works with SP + c.addi16sp sp, -16 # Must use SP as target + c.addi16sp sp, 16 # Restore + + # c.lwsp/c.swsp use SP as base + li x1, 0x87654321 + sw x1, 0(sp) # Uses SP as base + lw x2, 0(sp) # Uses SP as base + bne x1, x2, error + + mv sp, t5 # Restore SP + + # Test c.addi4spn with compressed register targets + c.addi4spn x8, sp, 4 # Target must be x8-x15 + c.addi4spn x15, sp, 8 # Test boundary register + + sub t0, x8, sp + li t1, 4 + bne t0, t1, error + + sub t0, x15, sp + li t1, 8 + bne t0, t1, error + + # Test return address register (x1/ra) in c.jalr + la x3, test_function + c.jalr x3 # Should save return address in x1 + + # Verify we returned (x4 should be set by function) + li t0, 0xf00c # Valid hex constant + bne x4, t0, error + + # Test c.jr with various registers + la x5, jump_target1 + c.jr x5 # Can use any register + + li x6, 0xbad # Should not execute + +jump_target1: + la x31, jump_target2 + c.jr x31 # Test with high register number + + li x7, 0xbad + +jump_target2: + # Test register aliasing and name conflicts + # Verify that compressed registers are same as regular registers + li s0, 0x12345678 # s0 is x8 + mv t0, x8 + bne s0, t0, error + + li s1, 0x87654321 # s1 is x9 + mv t0, x9 + bne s1, t0, error + + # Test operations between compressed and non-compressed registers + li x16, 0x11111111 # Non-compressed register + c.mv x8, x16 # Move from non-compressed to compressed allowed + bne x8, x16, error + + c.mv x17, x8 # Move from compressed to non-compressed allowed + bne x17, x8, error + + # Test c.add with register constraints + li x1, 100 # Any register for c.add + li x8, 200 # Compressed register + c.add x1, x8 # First operand any reg, second any reg + li t0, 300 + bne x1, t0, error + + # Test boundary registers + li x7, 0x07070707 # Just before compressed range + li x8, 0x08080808 # First compressed register + li x15, 0x15151515 # Last compressed register + li x16, 0x16161616 # Just after compressed range + + # These should work (c.mv accepts any registers) + c.mv x7, x8 # Move to non-compressed from compressed + c.mv x16, x15 # Move to non-compressed from compressed + c.mv x8, x7 # Move to compressed from non-compressed + c.mv x15, x16 # Move to compressed from non-compressed + + # Verify moves worked + li t0, 0x08080808 + bne x7, t0, error + li t0, 0x15151515 + bne x16, t0, error + li t0, 0x07070707 + bne x8, t0, error + li t0, 0x16161616 + bne x15, t0, error + + # Test special register encodings + # x0 should always be zero regardless of operations + add x0, x1, x2 # Try to modify x0 (should be ignored) + bne x0, zero, error # x0 should still be zero + + # Test that SP operations preserve stack integrity + mv t6, sp + li t0, 0x1000 + sub t1, sp, t0 # Calculate new SP value + + c.addi16sp sp, -64 # Modify SP with compressed instruction + c.addi16sp sp, 64 # Restore SP + bne sp, t6, error # Should be back to original + + # Success + li a0, 0 + li a7, 93 + ecall + +test_function: + li x4, 0xf00c # Mark function called + c.jr x1 # Return using return address + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b + +.section .data +.align 4 +test_data: + .space 64 \ No newline at end of file diff --git a/elf-regressions/compressed/mixed_arithmetic.s b/elf-regressions/compressed/mixed_arithmetic.s new file mode 100644 index 000000000..e701f9b95 --- /dev/null +++ b/elf-regressions/compressed/mixed_arithmetic.s @@ -0,0 +1,135 @@ +# Mixed compressed arithmetic operations test +# Combines various compressed arithmetic and logical instructions + +.section .text.init +.global _start + +_start: + # Test arithmetic expression: ((a + b) - c) * d + li x8, 10 # a + li x9, 5 # b + li x10, 3 # c + li x11, 2 # d + + c.add x8, x9 # x8 = a + b = 15 + c.sub x8, x10 # x8 = (a + b) - c = 12 + # Note: No c.mul, so we'll simulate with shifts/adds + c.slli x8, 1 # x8 = x8 * 2 = 24 + + # Test bit manipulation chain: (x | y) & (~z) + li x12, 0xf0f0f0f0 + li x13, 0x0f0f0f0f + li x14, 0x00ff00ff + + c.or x12, x13 # x12 = 0xf0f0f0f0 | 0x0f0f0f0f = 0xffffffff + c.xor x14, x14 # x14 = x14 ^ x14 = 0 (NOT simulation) + c.addi x14, -1 # x14 = 0 - 1 = 0xffffffff (all 1s) + c.xor x14, x15 # Assume x15 has the value we want to invert + + # Test stack operations simulation + c.addi16sp sp, -32 # Allocate stack space + + # Store some values (use regular instructions since c.swsp/c.lwsp not supported) + li x15, 0x12345678 + sw x15, 0(sp) # Store to stack + sw x8, 4(sp) # Store our calculated result + + # Modify and reload + lw x16, 0(sp) # Load back + addi x16, x16, 1 # Increment + sw x16, 8(sp) # Store modified value + + # Test register move chain with arithmetic + li x17, 100 + c.mv x8, x17 # x8 = 100 (use compressed register) + c.addi x8, 31 # x8 = 131 (c.addi limited to -32 to 31) + c.mv x9, x8 # x9 = 131 + c.sub x9, x8 # x9 = 131 - 131 = 0 (c.sub only on compressed regs) + + # Test shift patterns (shift ops only work on compressed registers for c.srli/c.srai) + li x10, 0x11111111 # Use compressed register x10 + c.slli x10, 1 # x10 = 0x22222222 + c.srli x10, 2 # x10 = 0x08888888 (c.srli only on x8-x15) + c.srai x10, 1 # x10 = 0x04444444 (c.srai only on x8-x15) + + # Test immediate operations chain (use compressed registers) + li x11, 0 + c.li x11, 15 # Load immediate + c.addi x11, 5 # x11 = 20 (c.addi only on compressed regs) + c.andi x11, 0x1f # x11 = 20 & 31 = 20 (c.andi only on x8-x15) + c.srli x11, 2 # x11 = 20 >> 2 = 5 (c.srli only on x8-x15) + + # Test upper immediate with calculations (fix c.lui immediate range) + c.lui x12, 16 # x12 = 16 << 12 (c.lui uses 6-bit signed immediate) + c.srli x12, 4 # x12 shifted right (c.srli only on x8-x15) + c.slli x12, 2 # x12 shifted left + + # Test compressed register operations mixing (all on x8-x15) + li x8, 0xaaaaaaaa + li x9, 0x55555555 + li x10, 0xff00ff00 + li x11, 0x00ff00ff + + c.and x8, x9 # x8 = 0 (no common bits) + c.or x10, x11 # x10 = 0xffffffff (all bits) + c.xor x8, x10 # x8 = 0 ^ 0xffffffff = 0xffffffff + c.sub x10, x8 # x10 = 0xffffffff - 0xffffffff = 0 + + # Test address calculation pattern + la x13, test_data # Use compressed register + c.addi x13, 8 # Point to offset 8 (c.addi only on x8-x15) + c.lw x14, 0(x13) # Load from calculated address (c.lw only on x8-x15) + + # Test loop counter pattern (use compressed registers) + li x15, 5 # Counter (compressed register) + li x8, 0 # Sum (compressed register) + +loop: + c.beqz x15, loop_end # c.beqz only on x8-x15 + c.add x8, x15 # Add counter to sum (c.add with compressed regs) + c.addi x15, -1 # Decrement (c.addi only on x8-x15) + c.j loop + +loop_end: + # x8 should be 5+4+3+2+1 = 15 + + # Test function call pattern + la x9, test_function # Use compressed register + c.jalr x9 # Call function + + # Should return here with x10 modified + + # Restore stack + c.addi16sp sp, 32 + + # Test final calculations + c.add x8, x10 # Combine results (compressed registers) + c.andi x8, 0x1f # Mask result (c.andi only on x8-x15) + + # Verification + li t0, 15 + bne x8, t0, error # Check if sum is correct (before masking) + + # Success + li a0, 0 + li a7, 93 + ecall + +test_function: + # Simple function that modifies x10 + c.li x10, 10 # Use compressed register + c.jr x1 # Return + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b + +.section .data +test_data: + .word 0x11111111 + .word 0x22222222 + .word 0x33333333 + .word 0x44444444 diff --git a/elf-regressions/compressed/mixed_control_flow.s b/elf-regressions/compressed/mixed_control_flow.s new file mode 100644 index 000000000..a2303dce1 --- /dev/null +++ b/elf-regressions/compressed/mixed_control_flow.s @@ -0,0 +1,260 @@ +# Mixed compressed control flow operations test +# Tests combinations of branches, jumps, and function calls + +.section .text.init +.global _start + +_start: + # Test nested conditional branches + li x8, 10 + li x9, 0 + li x10, 5 + + c.bnez x8, branch1 # Should branch (x8 = 10) + li x1, 0xbad + c.j error + +branch1: + c.beqz x9, branch2 # Should branch (x9 = 0) + li x2, 0xbad + c.j error + +branch2: + c.bnez x10, branch3 # Should branch (x10 = 5) + li x3, 0xbad + c.j error + +branch3: + li x1, 0x100 + li x2, 0x200 + li x3, 0x300 + + # Test function call with conditional returns + li x8, 1 # Test condition + la x11, conditional_func + c.jalr x11 + + li x4, 0x400 # Mark return point + + # Test loop with compressed branches + li x12, 5 # Loop counter + li x13, 0 # Accumulator + +loop1: + c.beqz x12, loop1_end + c.add x13, x12 # Add counter to accumulator + c.addi x12, -1 # Decrement counter + c.j loop1 + +loop1_end: + # x13 should be 15 (5+4+3+2+1) + + # Test nested loops + li x14, 3 # Outer counter + li x15, 0 # Result accumulator + +outer_loop: + c.beqz x14, outer_end + + # Inner loop (use compressed registers) + li x8, 2 # Inner counter (use compressed register) + +inner_loop: + c.beqz x8, inner_end # c.beqz only works on x8-x15 + c.add x15, x14 # Add outer counter to result + c.addi x8, -1 # Decrement inner counter (c.addi only on x8-x15) + c.j inner_loop + +inner_end: + c.addi x14, -1 # Decrement outer counter + c.j outer_loop + +outer_end: + # x15 should be 3*2 + 2*2 + 1*2 = 12 + + # Test switch-like construct using jumps + li x9, 2 # Switch value (use compressed register) + + # Switch implementation + c.beqz x9, case0 + li t0, 1 + beq x9, t0, case1 + li t0, 2 + beq x9, t0, case2 + c.j default_case + +case0: + li x18, 0xc0 + c.j switch_end + +case1: + li x18, 0xc1 + c.j switch_end + +case2: + li x18, 0xc2 + c.j switch_end + +default_case: + li x18, 0xcf + +switch_end: + # x18 should be 0xc2 + + # Test recursive-like pattern with return address management + li x19, 3 # Recursion depth + la x20, recursive_func + c.jalr x20 + + li x5, 0x500 # Mark after recursion + + # Test exception-like control flow + la x21, protected_code + c.jalr x21 # "Call" protected code + + # Exception handler simulation + li x6, 0x600 + c.j continue_after_exception + +exception_handler: + li x6, 0x6ee # Mark exception handled + c.jr x1 # "Return" from exception + +protected_code: + # Simulate some code that might "throw" + li x14, 1 # Use compressed register + c.bnez x14, simulate_exception + + # Normal path + li x23, 0x700 + c.jr x1 + +simulate_exception: + # Jump to exception handler + la x24, exception_handler + c.jr x24 + +continue_after_exception: + # Test computed jump simulation + li x13, 1 # Jump table index (use compressed register) + + # Simple jump table using branches + c.beqz x13, jump_target0 + li t0, 1 + beq x13, t0, jump_target1 + li t0, 2 + beq x13, t0, jump_target2 + c.j jump_target_default + +jump_target0: + li x26, 0x100 # Valid hex constant (was 0xj0) + c.j jump_table_end + +jump_target1: + li x26, 0x101 # Valid hex constant (was 0xj1) + c.j jump_table_end + +jump_target2: + li x26, 0x102 # Valid hex constant (was 0xj2) + c.j jump_table_end + +jump_target_default: + li x26, 0x10f # Valid hex constant (was 0xjf) + +jump_table_end: + # x26 should be 0x101 + + # Test early return pattern + li x27, 0 + la x28, early_return_func + c.jalr x28 + + li x7, 0x700 + + # Verification + c.j verify_results + +conditional_func: + # Function with conditional execution + c.bnez x8, cond_true + li x29, 0xfa15e # Valid hex constant (was 0xfa1se) + c.jr x1 + +cond_true: + li x29, 0x701e # Valid hex constant (was 0x7r0e) + c.jr x1 + +recursive_func: + # Simulate recursive function (use compressed register) + c.beqz x10, recursive_base # x19 -> x10 (compressed register) + + # Recursive case + c.addi x10, -1 # Decrement depth (use compressed register) + c.add x11, x10 # Add current depth to result (compressed registers) + c.jr x1 # Return (simplified recursion) + +recursive_base: + # Base case + li x11, 0x800 # Base result + c.jr x1 + +early_return_func: + # Function with early return + c.bnez x12, early_ret # Use compressed register x12 + + # Normal execution + li x31, 0x900 + c.jr x1 + +early_ret: + # Early return path + li x31, 0x901 + c.jr x1 + +verify_results: + # Verify test results + li t0, 0x100 + bne x1, t0, error + + li t0, 0x200 + bne x2, t0, error + + li t0, 0x300 + bne x3, t0, error + + li t0, 0x400 + bne x4, t0, error + + li t0, 0x500 + bne x5, t0, error + + li t0, 0x6ee + bne x6, t0, error + + li t0, 0x700 + bne x7, t0, error + + # Check computed values + li t0, 15 + bne x13, t0, error # Loop sum + + li t0, 12 + bne x15, t0, error # Nested loop result + + li t0, 0xc2 + bne x18, t0, error # Switch result + + li t0, 0x101 + bne x26, t0, error # Jump table result + + # Success + li a0, 0 + li a7, 93 + ecall + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b \ No newline at end of file diff --git a/elf-regressions/compressed/mixed_stack_ops.s b/elf-regressions/compressed/mixed_stack_ops.s new file mode 100644 index 000000000..a2cf3b729 --- /dev/null +++ b/elf-regressions/compressed/mixed_stack_ops.s @@ -0,0 +1,172 @@ +# Mixed compressed stack and memory operations test +# Tests combinations of stack pointer operations, loads, stores, and memory access + +.section .text.init +.global _start + +_start: + # Save original stack pointer + mv t6, sp + + # Test nested stack frame allocation + c.addi16sp sp, -64 # Allocate frame 1 + sw ra, 60(sp) # Save return address + sw s0, 56(sp) # Save frame pointer + + # Create local variables on stack + li x8, 100 # c.li only supports -32 to 31, use regular li + li x9, 200 # c.li only supports -32 to 31, use regular li + sw x8, 0(sp) # Local var 1 + sw x9, 4(sp) # Local var 2 + + # Test compressed register addressing with stack + c.addi4spn x10, sp, 8 # x10 = sp + 8 + c.addi4spn x11, sp, 12 # x11 = sp + 12 + + # Store data using compressed register addressing + li x12, 0x1234 + li x13, 0x5678 + c.sw x12, 0(x10) # Store at sp+8 + c.sw x13, 0(x11) # Store at sp+12 + + # Test mixed load/store with calculations + lw x14, 0(sp) # Load local var 1 (100) + lw x15, 4(sp) # Load local var 2 (200) + c.add x14, x15 # x14 = 100 + 200 = 300 (compressed regs) + sw x14, 8(sp) # Store result + + # Test compressed memory operations with offsets + c.lw x8, 0(x10) # Load from sp+8 (should be 0x1234) + c.lw x9, 0(x11) # Load from sp+12 (should be 0x5678) + c.add x8, x9 # Combine loaded values (compressed regs) + sw x8, 12(sp) # Store combined result + + # Test stack-relative addressing patterns + la x16, stack_data + c.addi4spn x10, sp, 16 # Point to sp+16 + + # Copy data from global to stack + lw x8, 0(x16) + lw x9, 4(x16) + lw x11, 8(x16) + lw x12, 12(x16) + + c.sw x8, 0(x10) # Copy to stack + c.sw x9, 4(x10) + c.sw x11, 8(x10) + c.sw x12, 12(x10) + + # Test array-like access using compressed addressing + c.addi4spn x13, sp, 16 # Base address + + # Access array elements + c.lw x14, 0(x13) # array[0] + c.lw x15, 4(x13) # array[1] + c.add x14, x15 # Sum first two elements + + c.lw x15, 8(x13) # array[2] + c.add x14, x15 # Add third element + + c.lw x15, 12(x13) # array[3] + c.add x14, x15 # Add fourth element + + # Store array sum + sw x14, 16(sp) + + # Test function call with stack operations + c.addi16sp sp, -32 # Allocate more space for function call + + # Pass parameters via stack + li x8, 42 # c.li supports this but use li for consistency + li x9, 58 # Out of c.li range, use regular li + sw x8, 0(sp) # arg1 + sw x9, 4(sp) # arg2 + + # Call function + la x17, stack_function + c.jalr x17 + + # Function returns result in x10 + sw x10, 8(sp) # Store function result + + # Clean up function call stack + c.addi16sp sp, 32 + + # Test stack array manipulation + c.addi4spn x11, sp, 20 # Point to array area + + # Initialize array with sequence + c.li x8, 1 + c.sw x8, 0(x11) + c.li x8, 2 + c.sw x8, 4(x11) + c.li x8, 3 + c.sw x8, 8(x11) + c.li x8, 4 + c.sw x8, 12(x11) + + # Process array in reverse + c.lw x12, 12(x11) # Load element 3 (value 4) + c.lw x13, 8(x11) # Load element 2 (value 3) + c.lw x14, 4(x11) # Load element 1 (value 2) + c.lw x15, 0(x11) # Load element 0 (value 1) + + # Calculate: 4*1000 + 3*100 + 2*10 + 1*1 = 4321 + c.slli x12, 2 # x12 = 4 << 2 = 16 + li t0, 1000 + mul x12, x12, t0 # x12 = 4 * 1000 = 4000 (but we'll use simpler calc) + + # Simplified calculation for compressed ops only + c.slli x12, 10 # Approximate *1000 with shifts + c.slli x13, 6 # Approximate *100 + c.slli x14, 3 # Approximate *10 + # x15 *= 1 (no change) + + c.add x12, x13 + c.add x12, x14 + c.add x12, x15 + + # Store final result + sw x12, 24(sp) + + # Test stack unwinding + lw s0, 56(sp) # Restore frame pointer + lw ra, 60(sp) # Restore return address + c.addi16sp sp, 64 # Deallocate frame + + # Restore original stack pointer + mv sp, t6 + + # Verification (simple check that operations completed) + # Skip potentially faulting negative offset instruction + + # Success + li a0, 0 + li a7, 93 + ecall + +stack_function: + # Load arguments from stack + lw x8, 0(sp) # arg1 = 42 + lw x9, 4(sp) # arg2 = 58 + + # Calculate result + c.add x10, x8 # x10 = 42 + c.add x10, x9 # x10 = 42 + 58 = 100 + + # Return + c.jr x1 + +error: + li a0, 1 + li a7, 93 + ecall + +1: j 1b + +.section .data +stack_data: + .word 0x10101010 + .word 0x20202020 + .word 0x30303030 + .word 0x40404040 \ No newline at end of file diff --git a/elf-regressions/compressed/test_unimp.s b/elf-regressions/compressed/test_unimp.s new file mode 100644 index 000000000..5329f2947 --- /dev/null +++ b/elf-regressions/compressed/test_unimp.s @@ -0,0 +1,12 @@ +# Test case to reproduce 16 bit zeroes + +.section .text.init +.global _start + +_start: + unimp # Assembler generates 0x0000 (in RVC mode) + + li a7, 93 + ecall + +1: j 1b # Infinite loop \ No newline at end of file diff --git a/elf-regressions/custom_entry/custom.ld b/elf-regressions/custom_entry/custom.ld index 051952727..c11a0245d 100644 --- a/elf-regressions/custom_entry/custom.ld +++ b/elf-regressions/custom_entry/custom.ld @@ -23,6 +23,7 @@ SECTIONS *(.text) } > ROM + . = ALIGN(8); .rodata : { *(.rodata) } > ROM diff --git a/elf-regressions/fips_markers/fips.ld b/elf-regressions/fips_markers/fips.ld index 78084ffd6..bb4d4dae9 100644 --- a/elf-regressions/fips_markers/fips.ld +++ b/elf-regressions/fips_markers/fips.ld @@ -12,12 +12,16 @@ SECTIONS *(.fipsstart) } + . = ALIGN(8); + /* Main text section with the actual code */ .text : { *(.text.start) /* Entry point code */ *(.text) /* Other text */ } + . = ALIGN(8); + /* Create a separate section for FIPS end marker */ .fipsend : { PROVIDE(_fipsend = .); diff --git a/elf-regressions/jalr_custom/jalr_custom.ld b/elf-regressions/jalr_custom/jalr_custom.ld index 4f3cd2fcb..ae8348749 100644 --- a/elf-regressions/jalr_custom/jalr_custom.ld +++ b/elf-regressions/jalr_custom/jalr_custom.ld @@ -24,6 +24,7 @@ SECTIONS *(.text) } > ROM + . = ALIGN(8); .rodata : { *(.rodata) } > ROM diff --git a/elf-regressions/jalr_gap/jalr_gap.ld b/elf-regressions/jalr_gap/jalr_gap.ld index 5c1a689c9..9a3e71a08 100644 --- a/elf-regressions/jalr_gap/jalr_gap.ld +++ b/elf-regressions/jalr_gap/jalr_gap.ld @@ -10,14 +10,17 @@ SECTIONS *(.text) } + . = ALIGN(8); .rodata : { *(.rodata) } + . = ALIGN(8); .data : { *(.data) } + . = ALIGN(8); .bss : { *(.bss) } diff --git a/emulator-asm/Makefile b/emulator-asm/Makefile index 2d006d7ae..73fcfd429 100644 --- a/emulator-asm/Makefile +++ b/emulator-asm/Makefile @@ -1,3 +1,5 @@ +TRACE_TARGET=NONE + # Debug build flags ifeq ($(dbg),1) # CFLAGS = -O0 -g -D DEBUG -no-pie -ggdb -fno-inline @@ -8,9 +10,33 @@ else ASMFLAGS = --noexecstack endif +ifeq ($(TRACE_TARGET),MT) + CFLAGS += -DTRACE_TARGET_MT + ASMFLAGS += --defsym TRACE_TARGET=1 +else ifeq ($(TRACE_TARGET),MO) + CFLAGS += -DTRACE_TARGET_MO + ASMFLAGS += --defsym TRACE_TARGET=7 +else ifeq ($(TRACE_TARGET),RH) + CFLAGS += -DTRACE_TARGET_RH + ASMFLAGS += --defsym TRACE_TARGET=2 +endif + # Default EMU_PATH and OUT_PATH EMU_PATH ?= src/emu.asm OUT_PATH ?= build/ziskemuasm +# build/dma/memcpy_fast.o +DMA_OBJS = build/dma/fast_dma_encode.o \ + build/dma/direct_memcpy_mops.o build/dma/direct_memcpy_mtrace.o \ + build/dma/direct_inputcpy_mops.o build/dma/direct_inputcpy_mtrace.o \ + build/dma/direct_memcmp_mops.o build/dma/direct_memcmp_mtrace.o \ + build/dma/direct_memset_mops.o build/dma/direct_memset_mtrace.o \ + build/dma/fast_memcmp.o \ + build/dma/fast_memcpy64.o \ + build/dma/fast_memcpy.o \ + build/dma/fast_memset.o \ + build/dma/fast_inputcpy.o \ + build/dma/memcpy_fast.o \ + build/dma/check_dynamic_mtrace.o # Ensure the output directory exists OUT_DIR := $(dir $(OUT_PATH)) @@ -22,10 +48,24 @@ build/emu.o: $(EMU_PATH) mkdir -p build as $(ASMFLAGS) -o build/emu.o $< +build/dma/%.o: src/dma/%.asm src/dma/dma_constants.inc + mkdir -p build/dma + as $(ASMFLAGS) -I./src/dma -o $@ $< + +build/dma.o: $(DMA_OBJS) + ld -r $(DMA_OBJS) -o $@ + +build/zisk.ld: + mkdir -p build + ld --verbose 2>/dev/null | \ + awk '/^=+$$/{found++; next} found==1{print}' | \ + sed '/\/DISCARD\//i\ .zisk_region 0x40000000 (NOLOAD) : { . = . + 0x890000000; }' \ + > build/zisk.ld + # Compile the final executable -$(OUT_PATH): build/emu.o src/main.c src/emu.c src/chfast/keccak.c +$(OUT_PATH): build/zisk.ld build/emu.o src/main.c src/globals.c src/configuration.c src/trace_logs.c src/server.c src/client.c src/c_provided.c src/trace.c src/emu.c src/chfast/keccak.c src/bcon/bcon_sha256.c build/dma.o mkdir -p $(OUT_DIR) - gcc $(CFLAGS) src/main.c src/emu.c src/chfast/keccak.c src/bcon/bcon_sha256.c -L../lib-c/c/lib -L../../bin -L../target/release -L../target/debug -lc build/emu.o -lziskc -lziskclib -lgmp -lstdc++ -lgmpxx -o $@ + gcc $(CFLAGS) -Wl,-T,build/zisk.ld src/main.c src/globals.c src/configuration.c src/trace_logs.c src/server.c src/client.c src/c_provided.c src/trace.c src/emu.c src/chfast/keccak.c src/bcon/bcon_sha256.c -L../lib-c/c/lib -L../../bin -L../target/release -L../target/debug -lc build/emu.o -lc build/dma.o -lziskc -lziskclib -lgmp -lstdc++ -lgmpxx -o $@ clean: rm -rf build diff --git a/emulator-asm/asm-runner/Cargo.toml b/emulator-asm/asm-runner/Cargo.toml index f1c494afd..75c24fa20 100644 --- a/emulator-asm/asm-runner/Cargo.toml +++ b/emulator-asm/asm-runner/Cargo.toml @@ -16,14 +16,13 @@ zisk-common = { workspace = true } zisk-core = { workspace = true } mem-planner-cpp = { workspace = true } mem-common = { workspace = true } +proofman-common = { workspace = true } tracing = { workspace = true} rayon = { workspace = true} anyhow = { workspace = true} -clap = { workspace = true } libc = "0.2" -ureq = "3" thiserror = { workspace = true } [target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] diff --git a/emulator-asm/asm-runner/src/asm_mo_runner.rs b/emulator-asm/asm-runner/src/asm_mo_runner.rs index 93eab3f06..686c52529 100644 --- a/emulator-asm/asm-runner/src/asm_mo_runner.rs +++ b/emulator-asm/asm-runner/src/asm_mo_runner.rs @@ -1,47 +1,49 @@ #[cfg(all(target_os = "linux", target_arch = "x86_64"))] use named_sem::NamedSemaphore; -use zisk_common::ExecutorStatsHandle; -use zisk_common::Plan; +use zisk_common::{stats_begin, stats_end, stats_mark, ExecutorStatsHandle, Plan}; use std::ffi::c_void; use std::sync::atomic::{fence, Ordering}; -use std::time::Duration; use tracing::error; -use crate::{AsmMOChunk, AsmMOHeader, AsmRunError, AsmService, AsmServices, AsmSharedMemory}; +use crate::SEM_CHUNK_DONE_WAIT_DURATION; +use crate::TRACE_DELTA_SIZE; +use crate::TRACE_INITIAL_SIZE; +use crate::TRACE_MAX_SIZE; +use crate::{ + sem_chunk_done_name, shmem_output_name, AsmMOChunk, AsmMOHeader, AsmMultiSharedMemory, + AsmRunError, AsmService, AsmServices, +}; use mem_planner_cpp::MemPlanner; use anyhow::{Context, Result}; -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; - #[cfg(feature = "save_mem_plans")] use mem_common::save_plans; -pub struct PreloadedMO { - pub output_shmem: AsmSharedMemory, +pub struct MOShMemReader { + pub output_shmem: AsmMultiSharedMemory, mem_planner: Option, handle_mo: Option>, } -impl PreloadedMO { +impl MOShMemReader { pub fn new( local_rank: i32, base_port: Option, unlock_mapped_memory: bool, ) -> Result { - let port = if let Some(base_port) = base_port { - AsmServices::port_for(&AsmService::MO, base_port, local_rank) - } else { - AsmServices::default_port(&AsmService::MO, local_rank) - }; + let port = AsmServices::port_base_for(base_port, local_rank); - let output_name = - AsmSharedMemory::::shmem_output_name(port, AsmService::MO, local_rank); + let output_name = shmem_output_name(port, AsmService::MO, local_rank, None); - let output_shared_memory = - AsmSharedMemory::::open_and_map(&output_name, unlock_mapped_memory)?; + let output_shared_memory = AsmMultiSharedMemory::::open_and_map( + &output_name, + TRACE_INITIAL_SIZE, + TRACE_DELTA_SIZE, + TRACE_MAX_SIZE, + unlock_mapped_memory, + )?; Ok(Self { output_shmem: output_shared_memory, @@ -51,7 +53,7 @@ impl PreloadedMO { } } -impl Drop for PreloadedMO { +impl Drop for MOShMemReader { fn drop(&mut self) { if let Some(handle_mo) = self.handle_mo.take() { match handle_mo.join() { @@ -79,7 +81,7 @@ impl AsmRunnerMO { #[allow(clippy::too_many_arguments)] pub fn run( - preloaded: &mut PreloadedMO, + preloaded: &mut MOShMemReader, max_steps: u64, chunk_size: u64, world_rank: i32, @@ -87,37 +89,27 @@ impl AsmRunnerMO { base_port: Option, _stats: ExecutorStatsHandle, ) -> Result { - #[cfg(feature = "stats")] - let parent_stats_id = _stats.next_id(); - #[cfg(feature = "stats")] - _stats.add_stat(0, parent_stats_id, "ASM_MO_RUNNER", 0, ExecutorStatsEvent::Begin); - - let port = if let Some(base_port) = base_port { - AsmServices::port_for(&AsmService::MO, base_port, local_rank) - } else { - AsmServices::default_port(&AsmService::MO, local_rank) - }; + stats_begin!(_stats, 0, _runner_scope, "ASM_MO_RUNNER", 0); + + let port = AsmServices::port_base_for(base_port, local_rank); - let sem_chunk_done_name = - AsmSharedMemory::::shmem_chunk_done_name(port, AsmService::MO, local_rank); + let sem_chunk_done_name = sem_chunk_done_name(port, AsmService::MO, local_rank); let mut sem_chunk_done = NamedSemaphore::create(sem_chunk_done_name.clone(), 0) .map_err(|e| AsmRunError::SemaphoreError(sem_chunk_done_name.clone(), e))?; - let __stats = _stats.clone(); + // Capture parent id for thread + let _parent_id = _runner_scope.id(); + let _thread_stats = _stats.clone(); let handle = std::thread::spawn(move || { - #[cfg(feature = "stats")] - let stats_id = __stats.next_id(); - #[cfg(feature = "stats")] - __stats.add_stat(parent_stats_id, stats_id, "ASM_MO", 0, ExecutorStatsEvent::Begin); + stats_begin!(_thread_stats, _parent_id, _mo_scope, "ASM_MO", 0); let asm_services = AsmServices::new(world_rank, local_rank, base_port); + #[allow(clippy::let_and_return)] let result = asm_services.send_memory_ops_request(max_steps, chunk_size); - // Add to executor stats - #[cfg(feature = "stats")] - __stats.add_stat(parent_stats_id, stats_id, "ASM_MO", 0, ExecutorStatsEvent::End); + stats_end!(_thread_stats, &_mo_scope); result }); @@ -133,60 +125,56 @@ impl AsmRunnerMO { // Initialize C++ memory operations trace mem_planner.execute(); - #[cfg(feature = "stats")] - let stats_id = _stats.next_id(); - #[cfg(feature = "stats")] - _stats.add_stat( - parent_stats_id, - stats_id, - "MO_PROCESS_CHUNKS", - 0, - ExecutorStatsEvent::Begin, - ); - - // Threshold (in bytes) used to detect when the shared memory region size has changed. - // Computed to optimize the common case where minor size fluctuations are ignored. - // It is based on the worst-case scenario of memory usage. - let threshold_bytes = (chunk_size as usize * 200) + (44 * 8) + 32; + stats_begin!(_stats, &_runner_scope, _process_scope, "MO_PROCESS_CHUNKS", 0); + + // Threshold (in bytes) used to detect when we need to check for new shared memory files. + // Must match MAX_CHUNK_TRACE_SIZE from main.c to ensure we check before the producer + // reallocates. Constants from main.c: + // MAX_MTRACE_REGS_ACCESS_SIZE = (2 + 2 + 3) * 8 = 56 + // MAX_BYTES_DIRECT_MTRACE = 256 + // MAX_BYTES_MTRACE_STEP = 256 + 56 = 312 + // MAX_TRACE_CHUNK_INFO = (44 * 8) + 32 = 384 + // MAX_CHUNK_TRACE_SIZE = (chunk_size * MAX_BYTES_MTRACE_STEP) + MAX_TRACE_CHUNK_INFO + const MAX_MTRACE_REGS_ACCESS_SIZE: usize = (2 + 2 + 3) * 8; + const MAX_BYTES_DIRECT_MTRACE: usize = 256; + const MAX_BYTES_MTRACE_STEP: usize = MAX_BYTES_DIRECT_MTRACE + MAX_MTRACE_REGS_ACCESS_SIZE; + const MAX_TRACE_CHUNK_INFO: usize = (44 * 8) + 32; + + let threshold_bytes = (chunk_size as usize * MAX_BYTES_MTRACE_STEP) + MAX_TRACE_CHUNK_INFO; let mut threshold = unsafe { - preloaded.output_shmem.mapped_ptr().add(threshold_bytes) as *const AsmMOChunk + preloaded + .output_shmem + .mapped_ptr() + .add(preloaded.output_shmem.total_mapped_size() - threshold_bytes) + as *const AsmMOChunk }; let exit_code = loop { - match sem_chunk_done.timed_wait(Duration::from_secs(10)) { + match sem_chunk_done.timed_wait(SEM_CHUNK_DONE_WAIT_DURATION) { Ok(()) => { // Synchronize with memory changes from the C++ side fence(Ordering::Acquire); - // Check if we need to remap the shared memory + // Check if we need to map additional shared memory files. if data_ptr >= threshold - && preloaded - .output_shmem - .check_size_changed(&mut data_ptr) - .context("Failed to check and remap shared memory for MO trace")? + && preloaded.output_shmem.check_size_changed().context( + "Failed to check and map new shared memory files for MO trace", + )? { - threshold = unsafe { - preloaded.output_shmem.mapped_ptr().add(threshold_bytes) - as *const AsmMOChunk - }; + // Update threshold based on new total mapped size + threshold = + unsafe { + preloaded.output_shmem.mapped_ptr().add( + preloaded.output_shmem.total_mapped_size() - threshold_bytes, + ) as *const AsmMOChunk + }; } let chunk = unsafe { std::ptr::read(data_ptr) }; data_ptr = unsafe { data_ptr.add(1) }; - // Add to executor stats - #[cfg(feature = "stats")] - { - let stats_id = _stats.next_id(); - _stats.add_stat( - parent_stats_id, - stats_id, - "MO_CHUNK_DONE", - 0, - ExecutorStatsEvent::Mark, - ); - } + stats_mark!(_stats, &_runner_scope, "MO_CHUNK_DONE", 0); mem_planner.add_chunk(chunk.mem_ops_size, data_ptr as *const c_void); @@ -232,46 +220,22 @@ impl AsmRunnerMO { let mut mem_align_plans = mem_planner.wait_mem_align_plans(); mem_planner.wait(); - // Add to executor stats - #[cfg(feature = "stats")] - _stats.add_stat(parent_stats_id, stats_id, "MO_PROCESS_CHUNKS", 0, ExecutorStatsEvent::End); - - #[cfg(feature = "stats")] - let stats_id = _stats.next_id(); - #[cfg(feature = "stats")] - _stats.add_stat( - parent_stats_id, - stats_id, - "MO_COLLECT_PLANS", - 0, - ExecutorStatsEvent::Begin, - ); + stats_end!(_stats, &_process_scope); + stats_begin!(_stats, &_runner_scope, _collect_scope, "MO_COLLECT_PLANS", 0); let plans = mem_planner.collect_plans(&mut mem_align_plans); #[cfg(feature = "save_mem_plans")] save_plans(&plans, "mem_plans_cpp.txt"); - // Add to executor stats - #[cfg(feature = "stats")] - _stats.add_stat(parent_stats_id, stats_id, "MO_COLLECT_PLANS", 0, ExecutorStatsEvent::End); - - // #[cfg(feature = "stats")] - // { - // let mem_stats = mem_planner.get_mem_stats(); - // for i in mem_stats { - // _stats.add_stat(i); - // } - // } + stats_end!(_stats, &_collect_scope); preloaded.handle_mo = Some(std::thread::spawn(move || { drop(mem_planner); MemPlanner::new() })); - #[cfg(feature = "stats")] - _stats.add_stat(0, parent_stats_id, "ASM_MO_RUNNER", 0, ExecutorStatsEvent::End); - + stats_end!(_stats, &_runner_scope); Ok(AsmRunnerMO::new(plans)) } } diff --git a/emulator-asm/asm-runner/src/asm_mt.rs b/emulator-asm/asm-runner/src/asm_mt.rs index 51154ebb8..2ddeb9525 100644 --- a/emulator-asm/asm-runner/src/asm_mt.rs +++ b/emulator-asm/asm-runner/src/asm_mt.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt::Debug; use zisk_common::EmuTrace; use zisk_common::EmuTraceStart; @@ -45,11 +46,16 @@ impl AsmMTChunk { let chunk = unsafe { std::ptr::read(*mapped_ptr) }; *mapped_ptr = unsafe { mapped_ptr.add(1) }; - // Convert mem_reads into a Vec without copying - let mem_reads_ptr = *mapped_ptr as *mut u64; + // Zero-copy: borrow mem_reads directly from shared memory + // SAFETY: Caller must ensure shared memory outlives EmuTrace usage + let mem_reads_ptr = *mapped_ptr as *const u64; let mem_reads_len = chunk.mem_reads_size as usize; - let mem_reads = - unsafe { std::slice::from_raw_parts(mem_reads_ptr, mem_reads_len).to_vec() }; + let mem_reads: Cow<'static, [u64]> = Cow::Borrowed(unsafe { + std::mem::transmute::<&[u64], &[u64]>(std::slice::from_raw_parts( + mem_reads_ptr, + mem_reads_len, + )) + }); // Advance the pointer after reading memory reads *mapped_ptr = unsafe { (*mapped_ptr as *mut u64).add(mem_reads_len) as *const AsmMTChunk }; @@ -76,32 +82,12 @@ impl AsmMTChunk { #[repr(C)] #[derive(Debug)] -pub struct AsmInputC { - pub chunk_size: u64, - pub max_steps: u64, - pub initial_trace_size: u64, - pub input_data_size: u64, -} - -impl AsmInputC { - pub fn to_bytes(&self) -> Vec { - let mut bytes = Vec::with_capacity(32); - bytes.extend_from_slice(&self.chunk_size.to_le_bytes()); - bytes.extend_from_slice(&self.max_steps.to_le_bytes()); - bytes.extend_from_slice(&self.initial_trace_size.to_le_bytes()); - bytes.extend_from_slice(&self.input_data_size.to_le_bytes()); - bytes - } -} - -#[repr(C)] -#[derive(Debug)] -pub struct AsmInputC2 { +pub struct AsmInputHeader { pub zero: u64, // Not used pub input_data_size: u64, } -impl AsmInputC2 { +impl AsmInputHeader { pub fn to_bytes(&self) -> Vec { let mut bytes = Vec::with_capacity(32); bytes.extend_from_slice(&0u64.to_le_bytes()); diff --git a/emulator-asm/asm-runner/src/asm_mt_runner.rs b/emulator-asm/asm-runner/src/asm_mt_runner.rs index d3fbcbe8b..9244f2eef 100644 --- a/emulator-asm/asm-runner/src/asm_mt_runner.rs +++ b/emulator-asm/asm-runner/src/asm_mt_runner.rs @@ -1,57 +1,46 @@ use named_sem::NamedSemaphore; +use zisk_common::{stats_begin, stats_end, stats_mark, AsmExecutionInfo}; #[cfg(all(target_os = "linux", target_arch = "x86_64"))] use zisk_common::{ChunkId, EmuTrace, ExecutorStatsHandle}; #[cfg(all(target_os = "linux", target_arch = "x86_64"))] use std::sync::atomic::{fence, Ordering}; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Instant; use tracing::{error, info}; -use crate::{AsmMTChunk, AsmMTHeader, AsmRunError, AsmService, AsmServices, AsmSharedMemory}; +use crate::{ + sem_chunk_done_name, shmem_output_name, AsmMTChunk, AsmMTHeader, AsmMultiSharedMemory, + AsmRunError, AsmService, AsmServices, SEM_CHUNK_DONE_WAIT_DURATION, TRACE_DELTA_SIZE, + TRACE_INITIAL_SIZE, TRACE_MAX_SIZE, +}; use anyhow::{Context, Result}; -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; - -pub trait Task: Send + Sync + 'static { - type Output: Send + 'static; - fn execute(self) -> Self::Output; -} - -pub type TaskFactory<'a, T> = Box) -> T + Send + Sync + 'a>; - -pub enum MinimalTraces { - None, - EmuTrace(Vec), - AsmEmuTrace(AsmRunnerMT), -} - -pub struct PreloadedMT { - pub output_shmem: AsmSharedMemory, +pub struct MTShMemReader { + pub output_shmem: AsmMultiSharedMemory, } -impl PreloadedMT { +impl MTShMemReader { pub fn new( local_rank: i32, base_port: Option, unlock_mapped_memory: bool, ) -> Result { - let port = if let Some(base_port) = base_port { - AsmServices::port_for(&AsmService::MT, base_port, local_rank) - } else { - AsmServices::default_port(&AsmService::MT, local_rank) - }; + let port = AsmServices::port_base_for(base_port, local_rank); - let output_name = - AsmSharedMemory::::shmem_output_name(port, AsmService::MT, local_rank); + let output_name = shmem_output_name(port, AsmService::MT, local_rank, None); - let output_shared_memory = - AsmSharedMemory::::open_and_map(&output_name, unlock_mapped_memory)?; + let output_shmem = AsmMultiSharedMemory::::open_and_map( + &output_name, + TRACE_INITIAL_SIZE, + TRACE_DELTA_SIZE, + TRACE_MAX_SIZE, + unlock_mapped_memory, + )?; - Ok(Self { output_shmem: output_shared_memory }) + Ok(Self { output_shmem }) } } @@ -66,51 +55,38 @@ impl AsmRunnerMT { } #[allow(clippy::too_many_arguments)] - pub fn run_and_count( - preloaded: &mut PreloadedMT, + pub fn run_and_count)>( + preloaded: &mut MTShMemReader, max_steps: u64, chunk_size: u64, - task_factory: TaskFactory, + mut on_chunk: F, world_rank: i32, local_rank: i32, base_port: Option, _stats: ExecutorStatsHandle, - ) -> Result<(AsmRunnerMT, Vec)> { - let __stats = _stats.clone(); - - #[cfg(feature = "stats")] - let parent_stats_id = __stats.next_id(); - #[cfg(feature = "stats")] - _stats.add_stat(0, parent_stats_id, "ASM_MT_RUNNER", 0, ExecutorStatsEvent::Begin); + ) -> Result<(Vec>, AsmExecutionInfo)> { + stats_begin!(_stats, 0, _runner_scope, "ASM_MT_RUNNER", 0); - let port = if let Some(base_port) = base_port { - AsmServices::port_for(&AsmService::MT, base_port, local_rank) - } else { - AsmServices::default_port(&AsmService::MT, local_rank) - }; + let port = AsmServices::port_base_for(base_port, local_rank); - let sem_chunk_done_name = - AsmSharedMemory::::shmem_chunk_done_name(port, AsmService::MT, local_rank); + let sem_chunk_done_name = sem_chunk_done_name(port, AsmService::MT, local_rank); let mut sem_chunk_done = NamedSemaphore::create(sem_chunk_done_name.clone(), 0) .map_err(|e| AsmRunError::SemaphoreError(sem_chunk_done_name.clone(), e))?; - let start_time = Instant::now(); - + // Capture parent id for thread + let _parent_id = _runner_scope.id(); + let _thread_stats = _stats.clone(); let handle = std::thread::spawn(move || { let asm_services = AsmServices::new(world_rank, local_rank, base_port); - #[cfg(feature = "stats")] - let stats_id = __stats.next_id(); - #[cfg(feature = "stats")] - __stats.add_stat(parent_stats_id, stats_id, "ASM_MT", 0, ExecutorStatsEvent::Begin); - + stats_begin!(_thread_stats, _parent_id, _mt_scope, "ASM_MT", 0); + let start = Instant::now(); let result = asm_services.send_minimal_trace_request(max_steps, chunk_size); - #[cfg(feature = "stats")] - __stats.add_stat(parent_stats_id, stats_id, "ASM_MT", 0, ExecutorStatsEvent::End); + stats_end!(_thread_stats, &_mt_scope); - result + (result, start.elapsed()) }); let mut chunk_id = ChunkId(0); @@ -118,58 +94,70 @@ impl AsmRunnerMT { // Get the pointer to the data in the shared memory. let mut data_ptr = preloaded.output_shmem.data_ptr() as *const AsmMTChunk; - let mut emu_traces = Vec::new(); - let mut handles = Vec::new(); + // Calculate threshold for detecting when to map additional shared memory files. + // CRITICAL: These constants must match main.c to ensure we check for new files BEFORE + // the C++ producer needs to allocate beyond current mapped region. Mismatch will cause + // the producer to map new files while we still hold Cow::Borrowed references to old + // mappings, creating dangling pointers. + // + // Constants from main.c: + // MAX_MTRACE_REGS_ACCESS_SIZE = (2 + 2 + 3) * 8 // Register access overhead per step + // MAX_BYTES_DIRECT_MTRACE = 256 // Direct memory trace data per step + // MAX_BYTES_MTRACE_STEP = 256 + 56 = 312 // Total per-step overhead + // MAX_TRACE_CHUNK_INFO = (44 * 8) + 32 // Chunk metadata size + const MAX_MTRACE_REGS_ACCESS_SIZE: usize = (2 + 2 + 3) * 8; // 56 bytes + const MAX_BYTES_DIRECT_MTRACE: usize = 256; + const MAX_BYTES_MTRACE_STEP: usize = MAX_BYTES_DIRECT_MTRACE + MAX_MTRACE_REGS_ACCESS_SIZE; + const MAX_TRACE_CHUNK_INFO: usize = (44 * 8) + 32; // 384 bytes let __stats = _stats.clone(); - - // Threshold (in bytes) used to detect when the shared memory region size has changed. - // Computed to optimize the common case where minor size fluctuations are ignored. - // It is based on the worst-case scenario of memory usage. - let threshold_bytes = (chunk_size as usize * 200) + (44 * 8) + 32; + let threshold_bytes = (chunk_size as usize * MAX_BYTES_MTRACE_STEP) + MAX_TRACE_CHUNK_INFO; let mut threshold = unsafe { - preloaded.output_shmem.mapped_ptr().add(threshold_bytes) as *const AsmMTChunk + preloaded + .output_shmem + .mapped_ptr() + .add(preloaded.output_shmem.total_mapped_size() - threshold_bytes) + as *const AsmMTChunk }; + // Pre-allocate reasonable initial capacity to avoid early reallocations + let mut emu_traces: Vec> = Vec::with_capacity(1024); + let exit_code = loop { - match sem_chunk_done.timed_wait(Duration::from_secs(10)) { + match sem_chunk_done.timed_wait(SEM_CHUNK_DONE_WAIT_DURATION) { Ok(()) => { - #[cfg(feature = "stats")] - { - let stats_id = __stats.next_id(); - __stats.add_stat( - parent_stats_id, - stats_id, - "MT_CHUNK_DONE", - 0, - ExecutorStatsEvent::Mark, - ); - } + stats_mark!(_stats, &_runner_scope, "MT_CHUNK_DONE", 0); // Synchronize with memory changes from the C++ side fence(Ordering::Acquire); // Check if we need to remap the shared memory + // preloaded + // .output_shmem + // .check_size_changed(&mut data_ptr) + // .context("Failed to check and remap shared memory for MT trace")?; + + // Check if we need to map additional shared memory files. if data_ptr >= threshold - && preloaded - .output_shmem - .check_size_changed(&mut data_ptr) - .context("Failed to check and remap shared memory for MO trace")? + && preloaded.output_shmem.check_size_changed().context( + "Failed to check and map new shared memory files for MT trace", + )? { - threshold = unsafe { - preloaded.output_shmem.mapped_ptr().add(threshold_bytes) - as *const AsmMTChunk - }; + // Update threshold based on new total mapped size + threshold = + unsafe { + preloaded.output_shmem.mapped_ptr().add( + preloaded.output_shmem.total_mapped_size() - threshold_bytes, + ) as *const AsmMTChunk + }; } let emu_trace = Arc::new(AsmMTChunk::to_emu_trace(&mut data_ptr)); let should_exit = emu_trace.end; - let task = task_factory(chunk_id, emu_trace.clone()); + on_chunk(chunk_id.0, emu_trace.clone()); emu_traces.push(emu_trace); - handles.push(std::thread::spawn(move || task.execute())); - if should_exit { break 0; } @@ -197,35 +185,21 @@ impl AsmRunnerMT { .context("Child process returned error"); } - // Collect results - let mut tasks = Vec::new(); - for handle in handles { - tasks.push(handle.join().expect("Task panicked")); - } + // Wait for the assembly emulator to complete writing the trace + let (handle, elapsed) = handle.join().map_err(|_| AsmRunError::JoinPanic)?; let total_steps = emu_traces.iter().map(|x| x.steps).sum::(); - let mhz = (total_steps as f64 / start_time.elapsed().as_secs_f64()) / 1_000_000.0; - info!("··· Assembly execution speed: {:.2} MHz", mhz); + let mhz = (total_steps as f64 / elapsed.as_secs_f64()) / 1_000_000.0; + let asm_execution_info = AsmExecutionInfo { time: elapsed.as_secs_f32(), mhz: mhz as f32 }; + info!("··· Assembly execution speed: {}MHz ({:2?})", mhz.round(), elapsed); - // Wait for the assembly emulator to complete writing the trace - let response = handle - .join() - .map_err(|_| AsmRunError::JoinPanic)? - .map_err(AsmRunError::ServiceError)?; + let response = handle.map_err(AsmRunError::ServiceError)?; assert_eq!(response.result, 0); assert!(response.trace_len > 0); assert!(response.trace_len <= response.allocated_len); - // Unwrap the Arc pointers - let emu_traces: Vec = emu_traces - .into_iter() - .map(|arc| Arc::try_unwrap(arc).map_err(|_| AsmRunError::ArcUnwrap)) - .collect::>()?; - - #[cfg(feature = "stats")] - _stats.add_stat(0, parent_stats_id, "ASM_MT_RUNNER", 0, ExecutorStatsEvent::End); - - Ok((AsmRunnerMT::new(emu_traces), tasks)) + stats_end!(_stats, &_runner_scope); + Ok((emu_traces, asm_execution_info)) } } diff --git a/emulator-asm/asm-runner/src/asm_mt_runner_stub.rs b/emulator-asm/asm-runner/src/asm_mt_runner_stub.rs index 762315198..9c887e830 100644 --- a/emulator-asm/asm-runner/src/asm_mt_runner_stub.rs +++ b/emulator-asm/asm-runner/src/asm_mt_runner_stub.rs @@ -1,23 +1,10 @@ -use zisk_common::{ChunkId, EmuTrace, ExecutorStatsHandle}; +use zisk_common::{EmuTrace, ExecutorStatsHandle}; use std::ffi::c_void; use std::fmt::Debug; use std::sync::Arc; use anyhow::Result; -pub trait Task: Send + Sync + 'static { - type Output: Send + 'static; - fn execute(self) -> Self::Output; -} - -pub type TaskFactory<'a, T> = Box) -> T + Send + Sync + 'a>; - -#[derive(Debug)] -pub enum MinimalTraces { - None, - EmuTrace(Vec), - AsmEmuTrace(AsmRunnerMT), -} pub struct PreloadedMT {} @@ -38,16 +25,16 @@ impl AsmRunnerMT { } #[allow(clippy::too_many_arguments)] - pub fn run_and_count( + pub fn run_and_count)>( _: &mut PreloadedMT, _: u64, _: u64, - _: TaskFactory, + _: F, _: i32, _: i32, _: Option, _: ExecutorStatsHandle, - ) -> Result<(AsmRunnerMT, Vec)> { + ) -> Result>> { Err(anyhow::anyhow!("AsmRunnerMT::run_and_count() is not supported on this platform. Only Linux x86_64 is supported.")) } } diff --git a/emulator-asm/asm-runner/src/asm_rh_runner.rs b/emulator-asm/asm-runner/src/asm_rh_runner.rs index c17b6218a..acb47bd95 100644 --- a/emulator-asm/asm-runner/src/asm_rh_runner.rs +++ b/emulator-asm/asm-runner/src/asm_rh_runner.rs @@ -1,30 +1,27 @@ +use crate::{ + sem_chunk_done_name, shmem_output_name, AsmRHData, AsmRHHeader, AsmRunError, AsmService, + AsmServices, AsmSharedMemory, SEM_CHUNK_DONE_WAIT_DURATION, +}; +use named_sem::NamedSemaphore; +use std::sync::atomic::{fence, Ordering}; use tracing::error; -use zisk_common::ExecutorStatsHandle; +use zisk_common::{stats_begin, stats_end, ExecutorStatsHandle}; -use crate::{AsmRHData, AsmRHHeader, AsmRunError, AsmService, AsmServices, AsmSharedMemory}; use anyhow::{Context, Result}; -use named_sem::NamedSemaphore; -use std::sync::atomic::{fence, Ordering}; -use std::time::Duration; -pub struct PreloadedRH { +pub struct RHShMemReader { pub output_shmem: AsmSharedMemory, } -impl PreloadedRH { +impl RHShMemReader { pub fn new( local_rank: i32, base_port: Option, unlock_mapped_memory: bool, ) -> Result { - let port = if let Some(base_port) = base_port { - AsmServices::port_for(&AsmService::RH, base_port, local_rank) - } else { - AsmServices::default_port(&AsmService::RH, local_rank) - }; + let port = AsmServices::port_base_for(base_port, local_rank); - let output_name = - AsmSharedMemory::::shmem_output_name(port, AsmService::RH, local_rank); + let output_name = shmem_output_name(port, AsmService::RH, local_rank, Some(0)); let output_shared_memory = AsmSharedMemory::::open_and_map(&output_name, unlock_mapped_memory)?; @@ -33,9 +30,6 @@ impl PreloadedRH { } } -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; - // This struct is used to run the assembly code in a separate process and generate the ROM histogram. pub struct AsmRunnerRH { pub asm_rowh_output: AsmRHData, @@ -54,7 +48,7 @@ impl AsmRunnerRH { } pub fn run( - asm_shared_memory: &mut Option, + asm_shared_memory: &mut Option, max_steps: u64, world_rank: i32, local_rank: i32, @@ -62,21 +56,11 @@ impl AsmRunnerRH { unlock_mapped_memory: bool, _stats: ExecutorStatsHandle, ) -> Result { - let __stats = _stats.clone(); - - #[cfg(feature = "stats")] - let parent_stats_id = __stats.next_id(); - #[cfg(feature = "stats")] - _stats.add_stat(0, parent_stats_id, "ASM_RH_RUNNER", 0, ExecutorStatsEvent::Begin); + stats_begin!(_stats, 0, _runner_scope, "ASM_RH_RUNNER", 0); - let port = if let Some(base_port) = base_port { - AsmServices::port_for(&AsmService::RH, base_port, local_rank) - } else { - AsmServices::default_port(&AsmService::RH, local_rank) - }; + let port = AsmServices::port_base_for(base_port, local_rank); - let sem_chunk_done_name = - AsmSharedMemory::::shmem_chunk_done_name(port, AsmService::RH, local_rank); + let sem_chunk_done_name = sem_chunk_done_name(port, AsmService::RH, local_rank); let mut sem_chunk_done = NamedSemaphore::create(sem_chunk_done_name.clone(), 0) .map_err(|e| AsmRunError::SemaphoreError(sem_chunk_done_name.clone(), e))?; @@ -85,7 +69,7 @@ impl AsmRunnerRH { asm_services.send_rom_histogram_request(max_steps)?; loop { - match sem_chunk_done.timed_wait(Duration::from_secs(10)) { + match sem_chunk_done.timed_wait(SEM_CHUNK_DONE_WAIT_DURATION) { Ok(()) => { // Synchronize with memory changes from the C++ side fence(Ordering::Acquire); @@ -107,16 +91,13 @@ impl AsmRunnerRH { if asm_shared_memory.is_none() { *asm_shared_memory = - Some(PreloadedRH::new(local_rank, base_port, unlock_mapped_memory)?); + Some(RHShMemReader::new(local_rank, base_port, unlock_mapped_memory)?); } let asm_rowh_output = AsmRHData::from_shared_memory(&asm_shared_memory.as_ref().unwrap().output_shmem); - // Add to executor stats - #[cfg(feature = "stats")] - _stats.add_stat(0, parent_stats_id, "ASM_RH_RUNNER", 0, ExecutorStatsEvent::End); - + stats_end!(_stats, &_runner_scope); Ok(AsmRunnerRH::new(asm_rowh_output)) } } diff --git a/emulator-asm/asm-runner/src/asm_runner.rs b/emulator-asm/asm-runner/src/asm_runner.rs index 71d704036..66ea0846d 100644 --- a/emulator-asm/asm-runner/src/asm_runner.rs +++ b/emulator-asm/asm-runner/src/asm_runner.rs @@ -40,6 +40,9 @@ pub struct AsmRunnerOptions { pub local_rank: i32, pub base_port: Option, pub unlock_mapped_memory: bool, + pub asm_out_file: bool, + pub share_input_shmem: bool, + pub open_input_shmem: bool, } impl Default for AsmRunnerOptions { @@ -61,6 +64,9 @@ impl AsmRunnerOptions { local_rank: 0, base_port: None, unlock_mapped_memory: false, + asm_out_file: false, + share_input_shmem: false, + open_input_shmem: false, } } @@ -114,13 +120,33 @@ impl AsmRunnerOptions { self } + pub fn with_asm_out_file(mut self, value: bool) -> Self { + self.asm_out_file = value; + self + } + + pub fn with_share_input_shmem(mut self, value: bool) -> Self { + self.share_input_shmem = value; + self + } + + pub fn with_open_input_shmem(mut self, value: bool) -> Self { + self.open_input_shmem = value; + self + } + /// Applies the configuration flags to a command-line `Command`. /// /// # Arguments /// * `command` - A mutable reference to the `Command` to be modified. - pub fn apply_to_command(&self, command: &mut Command, asm_service: &AsmService) { - let port = if self.base_port.is_some() { - AsmServices::port_for(asm_service, self.base_port.unwrap(), self.local_rank) + pub fn apply_to_command( + &self, + command: &mut Command, + asm_service: &AsmService, + shm_prefix: &str, + ) { + let port = if let Some(base_port) = self.base_port { + AsmServices::port_for(asm_service, base_port, self.local_rank) } else { AsmServices::default_port(asm_service, self.local_rank) }; @@ -132,7 +158,11 @@ impl AsmRunnerOptions { command.arg("-u"); } - command.arg("--shm_prefix").arg(AsmServices::shmem_prefix(port, self.local_rank)); + if self.asm_out_file { + command.arg("--redirect-output-to-file"); + } + + command.arg("--shm_prefix").arg(shm_prefix); match asm_service { AsmService::MT => { @@ -154,6 +184,14 @@ impl AsmRunnerOptions { command.arg("-m"); } + if self.share_input_shmem { + command.arg("--share_input_shm"); + } + + if self.open_input_shmem { + command.arg("--open_input_shm"); + } + if self.verbose { command.arg("-v"); command.stdout(std::process::Stdio::inherit()); diff --git a/emulator-asm/asm-runner/src/asm_services/services.rs b/emulator-asm/asm-runner/src/asm_services/services.rs index 150596d2e..4c2dfbead 100644 --- a/emulator-asm/asm-runner/src/asm_services/services.rs +++ b/emulator-asm/asm-runner/src/asm_services/services.rs @@ -11,9 +11,9 @@ use std::{ net::TcpStream, path::Path, process::Command, - thread::sleep, time::{Duration, Instant}, }; +use tracing::debug; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AsmService { @@ -45,6 +45,7 @@ impl fmt::Display for AsmService { const ASM_SERVICE_BASE_PORT: u16 = 23115; +#[derive(Debug, Clone)] pub struct AsmServices { world_rank: i32, local_rank: i32, @@ -69,9 +70,8 @@ impl AsmServices { pub fn start_asm_services( &self, ziskemuasm_path: &Path, - options: AsmRunnerOptions, + mut options: AsmRunnerOptions, ) -> Result<()> { - // ! TODO Remove this when we have a proper way to find the path let path_str = ziskemuasm_path.to_string_lossy(); let trimmed_path = &path_str[..path_str.len().saturating_sub(7)]; @@ -91,27 +91,38 @@ impl AsmServices { } } - for service in &Self::SERVICES { - tracing::debug!( - ">>> [{}] Starting ASM service: {} on port {}", - self.world_rank, - service, - Self::port_for(service, self.base_port, self.local_rank) - ); - self.start_asm_service(service, trimmed_path, &options); + let shm_prefix = Self::shmem_prefix( + Self::port_for(&AsmService::MO, self.base_port, self.local_rank), + self.local_rank, + ); + + let mut pending_wait = Vec::new(); + + options.share_input_shmem = true; + + for (i, service) in Self::SERVICES.iter().enumerate() { + let port = Self::port_for(service, self.base_port, self.local_rank); + let wr = self.world_rank; + + debug!(">>> [{}] Starting ASM service: {} on port {}", wr, service, port); + + options.open_input_shmem = i != 0; + + self.start_asm_service(service, trimmed_path, &options, &shm_prefix); + + if i == 0 { + // For the first service, wait until it is ready before starting the others, + // since it may initialize the shared memory used by the rest. + Self::wait_for_service_ready(self.world_rank, service, port)?; + } else { + // For the other services, we can start them in parallel and wait for them after + pending_wait.push((service, port)); + } } - for service in &Self::SERVICES { - Self::wait_for_service_ready( - service, - Self::port_for(service, self.base_port, self.local_rank), - ); - tracing::debug!( - ">>> [{}] ASM service {} is ready on port {}", - self.world_rank, - service, - Self::port_for(service, self.base_port, self.local_rank) - ); + // Wait for the remaining services to be ready + for (service, port) in pending_wait { + Self::wait_for_service_ready(self.world_rank, service, port)?; } // Ping status for all services @@ -139,22 +150,47 @@ impl AsmServices { Ok(()) } - fn wait_for_service_ready(service: &AsmService, port: u16) { - let addr = format!("127.0.0.1:{port}"); - let timeout = Duration::from_secs(60); - let retry_delay = Duration::from_millis(100); - let start = Instant::now(); + fn wait_for_service_ready(world_rank: i32, service: &AsmService, port: u16) -> Result<()> { + const TIMEOUT: Duration = Duration::from_secs(60); + const CONNECT_TIMEOUT: Duration = Duration::from_millis(100); + const LOG_INTERVAL: Duration = Duration::from_secs(5); - while start.elapsed() < timeout { - match TcpStream::connect(&addr) { + let addr = std::net::SocketAddr::from(([127, 0, 0, 1], port)); + + let start = Instant::now(); + let mut last_log = start; + while start.elapsed() < TIMEOUT { + match TcpStream::connect_timeout(&addr, CONNECT_TIMEOUT) { Ok(_) => { - return; + debug!( + ">>> [{}] ASM service {} is ready on port {}", + world_rank, service, port + ); + return Ok(()); + } + Err(_) => { + if last_log.elapsed() >= LOG_INTERVAL { + debug!( + ">>> [{}] Waiting for ASM service {} on port {} ({:.0}s elapsed), retrying...", + world_rank, + service, + port, + start.elapsed().as_secs_f32() + ); + last_log = Instant::now(); + } } - Err(_) => sleep(retry_delay), } } - panic!("Timeout: service `{service}` not ready on {addr}"); + tracing::error!( + ">>> [{}] Timeout waiting for ASM service {} to be ready on port {} after {:?}", + world_rank, + service, + port, + start.elapsed() + ); + Err(anyhow::anyhow!("Timeout: service `{service}` not ready on {addr}")) } fn start_asm_service( @@ -162,16 +198,26 @@ impl AsmServices { asm_service: &AsmService, trimmed_path: &str, options: &AsmRunnerOptions, + shm_prefix: &str, ) { // Prepare command let command_path = trimmed_path.to_string() + &format!("-{asm_service}.bin"); - let mut command = Command::new("nice"); - command.arg("-n"); - command.arg("-5"); - command.arg(command_path); + let mut command = Command::new(command_path); + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + { + use std::os::unix::process::CommandExt; + + unsafe { + command.pre_exec(|| { + // Ignore failure silently + libc::setpriority(libc::PRIO_PROCESS, 0, -5); + Ok(()) + }); + } + } - options.apply_to_command(&mut command, asm_service); + options.apply_to_command(&mut command, asm_service, shm_prefix); if let Err(e) = command.spawn() { tracing::error!("Child process failed: {:?}", e); @@ -201,6 +247,17 @@ impl AsmServices { base_port + service_offset as u16 + rank_offset } + pub fn port_base_for(base_port: Option, local_rank: i32) -> u16 { + let rank_offset = local_rank as u16 * Self::SERVICES.len() as u16; + + base_port.unwrap_or(ASM_SERVICE_BASE_PORT) + rank_offset + } + + pub fn port_base_offset(base_port: Option, n_processes: i32, n_setups: u64) -> u16 { + let setups_offset = n_setups as u16 * (n_processes as u16 * Self::SERVICES.len() as u16); + base_port.unwrap_or(ASM_SERVICE_BASE_PORT) + setups_offset + } + pub fn send_status_request(&self, service: &AsmService) -> Result { self.send_request(service, &PingRequest {}) } @@ -254,11 +311,39 @@ impl AsmServices { .context("Failed to set read timeout")?; // Send request payload - stream.write_all(&out_buffer).context("Failed to write request payload")?; + if let Err(e) = stream.write_all(&out_buffer) { + return Err(anyhow::anyhow!( + "Failed to write request payload to service {} on {}: {}", + service, + addr, + e + )); + } + + let total_timeout = Duration::from_secs(120); + let start = Instant::now(); // Read exactly 40 bytes let mut in_buffer = [0u8; 40]; - stream.read_exact(&mut in_buffer).context("Failed to read full response payload")?; + loop { + if start.elapsed() >= total_timeout { + return Err(anyhow::anyhow!("Total timeout exceeded")); + } + + match stream.read_exact(&mut in_buffer) { + Ok(_) => break, + Err(e) + if e.kind() == std::io::ErrorKind::TimedOut + || e.kind() == std::io::ErrorKind::WouldBlock => + { + tracing::debug!("Read timeout after {:?}, retrying...", start.elapsed()); + continue; + } + Err(e) => { + return Err(e.into()); + } + } + } // Decode bytes into ResponseData let mut response = ResponseData::default(); @@ -271,7 +356,7 @@ impl AsmServices { #[cfg(all(target_os = "linux", target_arch = "x86_64"))] pub fn send_shutdown_and_wait(&self, service: &AsmService) -> Result<()> { - let port = AsmServices::port_for(service, self.base_port, self.local_rank); + let port = AsmServices::port_base_for(Some(self.base_port), self.local_rank); let sem_name = format!( "/{}_{}_shutdown_done", @@ -292,7 +377,7 @@ impl AsmServices { // Wait for the shutdown signal (up to 30s) loop { - match sem.timed_wait(Duration::from_secs(30)) { + match sem.timed_wait(Duration::from_secs(60)) { Ok(_) => break, Err(named_sem::Error::WaitFailed(e)) if e.kind() == std::io::ErrorKind::Interrupted => diff --git a/emulator-asm/asm-runner/src/control_shmem.rs b/emulator-asm/asm-runner/src/control_shmem.rs new file mode 100644 index 000000000..7082a3b16 --- /dev/null +++ b/emulator-asm/asm-runner/src/control_shmem.rs @@ -0,0 +1,69 @@ +use crate::{shmem_control_writer_name, AsmServices, SharedMemoryWriter}; + +use anyhow::Result; + +pub struct ControlShmem { + writer: SharedMemoryWriter, +} + +#[derive(Copy, Clone)] +pub enum ControlShmemOffsets { + PrecompilesSize = 0, + ShutdownFlag = 8, + InputsSize = 16, +} + +impl ControlShmem { + pub const CONTROL_WRITER_SIZE: u64 = 0x1000; // 4KB + + pub fn new( + base_port: Option, + local_rank: i32, + unlock_mapped_memory: bool, + ) -> Result { + let port = AsmServices::port_base_for(base_port, local_rank); + Ok(Self { + writer: SharedMemoryWriter::new( + &shmem_control_writer_name(port, local_rank), + Self::CONTROL_WRITER_SIZE as usize, + unlock_mapped_memory, + )?, + }) + } + + pub fn read_u64_at(&self, offset: ControlShmemOffsets) -> u64 { + self.writer.read_u64_at(offset as usize) + } + + pub fn write_u64_at(&self, offset: ControlShmemOffsets, size: u64) { + self.writer.write_u64_at(offset as usize, size); + } + + pub fn reset(&self) { + self.writer.write_u64_at(ControlShmemOffsets::PrecompilesSize as usize, 0); + self.writer.write_u64_at(ControlShmemOffsets::ShutdownFlag as usize, 0); + self.writer.write_u64_at(ControlShmemOffsets::InputsSize as usize, 0); + } + + pub fn set_prec_hints_size(&self, size: u64) { + self.writer.write_u64_at(ControlShmemOffsets::PrecompilesSize as usize, size); + } + + pub fn prec_hints_size(&self) -> u64 { + self.writer.read_u64_at(ControlShmemOffsets::PrecompilesSize as usize) + } + + pub fn set_shutdown_flag(&self) { + self.writer.write_u64_at(ControlShmemOffsets::ShutdownFlag as usize, 1); + } + + pub fn set_inputs_size(&self, size: u64) { + self.writer.write_u64_at(ControlShmemOffsets::InputsSize as usize, size); + } + + pub fn inc_inputs_size(&self, size: usize) { + let current_size = self.writer.read_u64_at(ControlShmemOffsets::InputsSize as usize); + self.writer + .write_u64_at(ControlShmemOffsets::InputsSize as usize, current_size + size as u64); + } +} diff --git a/emulator-asm/asm-runner/src/hints_file.rs b/emulator-asm/asm-runner/src/hints_file.rs new file mode 100644 index 000000000..a00bfe17b --- /dev/null +++ b/emulator-asm/asm-runner/src/hints_file.rs @@ -0,0 +1,66 @@ +//! HintsFile is responsible for writing precompile processed hints to a file. +//! +//! It implements the StreamSink trait to receive processed hints and write them to a file. + +use anyhow::Result; +use std::fs::File; +use std::io::Write; +use std::sync::Mutex; +use zisk_common::io::StreamSink; + +/// HintsFile struct manages the writing of processed precompile hints to a file. +pub struct HintsFile { + file: Mutex, +} + +unsafe impl Send for HintsFile {} +unsafe impl Sync for HintsFile {} + +impl HintsFile { + /// Create a new HintsFile with the given filename. + /// + /// # Arguments + /// * `filename` - Path to the file where hints will be written. + /// + /// # Returns + /// A new `HintsFile` instance. + pub fn new(filename: String) -> Result { + let file = File::create(&filename)?; + Ok(Self { file: Mutex::new(file) }) + } +} + +impl StreamSink for HintsFile { + /// Writes processed precompile hints to the file. + /// + /// # Arguments + /// * `processed` - A vector of processed precompile hints as u64 values. + /// + /// # Returns + /// * `Ok(())` - If hints were successfully written to the file + /// * `Err` - If writing to the file fails + #[inline] + fn submit(&self, processed: &[u64]) -> anyhow::Result<()> { + let mut file = self.file.lock().unwrap(); + + // Write each u64 as 8 bytes (little-endian) + for value in processed { + file.write_all(&value.to_le_bytes())?; + } + + // Flush to ensure data is written immediately + file.flush()?; + + Ok(()) + } +} + +impl Drop for HintsFile { + fn drop(&mut self) { + // File is automatically closed when dropped + // We can ensure final flush here + if let Ok(mut file) = self.file.lock() { + let _ = file.flush(); + } + } +} diff --git a/emulator-asm/asm-runner/src/hints_shmem.rs b/emulator-asm/asm-runner/src/hints_shmem.rs new file mode 100644 index 000000000..88b4305ae --- /dev/null +++ b/emulator-asm/asm-runner/src/hints_shmem.rs @@ -0,0 +1,315 @@ +//! HintsShmem is responsible for writing precompile processed hints to shared memory. +//! +//! It implements the HintsSink trait to receive processed hints and write them to shared memory +//! using SharedMemoryWriter instances. + +use crate::{ + sem_available_name, sem_read_name, shmem_control_reader_name, shmem_precompile_name, + AsmService, AsmServices, ControlShmem, SharedMemoryReader, SharedMemoryWriter, +}; +use anyhow::Result; +use named_sem::NamedSemaphore; +use std::{ + cell::RefCell, + sync::{ + atomic::{fence, AtomicUsize, Ordering}, + Arc, + }, +}; +use tracing::debug; +use zisk_common::io::StreamSink; + +/// Names for separate resources (per-service) +struct SeparateResourceNames { + control_reader: String, + sem_available_name: String, + sem_read_name: String, +} + +impl SeparateResourceNames { + fn new(service: &AsmService, port: u16, local_rank: i32) -> Self { + Self { + control_reader: shmem_control_reader_name(port, *service, local_rank), + sem_available_name: sem_available_name(port, *service, local_rank), + sem_read_name: sem_read_name(port, *service, local_rank), + } + } +} + +/// Separate resources, one per asm service +struct SeparateResources { + /// Control shared memory reader (consumer's read position) + control_reader: SharedMemoryReader, + /// Semaphore to signal data availability to this consumer + sem_available: NamedSemaphore, + /// Semaphore to wait for this consumer's data consumption + sem_read: NamedSemaphore, +} + +/// Unified resources shared across all asm services +struct UnifiedResources { + /// Control shared memory writer (single write_pos) + control_writer: Arc, + /// Data shared memory writer (single data buffer) + data_writer: SharedMemoryWriter, +} + +/// HintsShmem struct manages the writing of processed precompile hints to shared memory. +pub struct HintsShmem { + /// Number of active ASM services to notify on submit + active_count: AtomicUsize, + /// Unified resources (single data buffer and control writer) + unified: RefCell, + /// Separate resources (control_reader and semaphores for each service) + separate: RefCell>, +} + +unsafe impl Send for HintsShmem {} +unsafe impl Sync for HintsShmem {} + +impl HintsShmem { + const CONTROL_PRECOMPILE_SIZE: u64 = 0x1000; // 4KB + const MAX_PRECOMPILE_SIZE: u64 = 0x400000; // 4MB + const BUFFER_CAPACITY_U64: u64 = Self::MAX_PRECOMPILE_SIZE >> 3; // Capacity in u64 elements + + /// Create a new HintsShmem with the given shared memory names and unlock option. + /// + /// # Arguments + /// * `base_port` - Optional base port for generating shared memory names. + /// * `local_rank` - Local rank for generating shared memory names. + /// * `unlock_mapped_memory` - Whether to unlock mapped memory after writing. + /// + /// # Returns + /// A new `HintsShmem` instance with uninitialized writers. + pub fn new( + base_port: Option, + local_rank: i32, + unlock_mapped_memory: bool, + control_writer: Arc, + active_services: &[AsmService], + ) -> Result { + // Use the first service's port for shared resources naming + let first_service = &AsmServices::SERVICES[0]; + let shared_port = if let Some(base_port) = base_port { + AsmServices::port_for(first_service, base_port, local_rank) + } else { + AsmServices::default_port(first_service, local_rank) + }; + + // Create unified resources (single data buffer and control writer) + let unified = Self::create_unified_resources( + shared_port, + local_rank, + unlock_mapped_memory, + control_writer, + )?; + unified.control_writer.reset(); + + // Create separate resources + let separate_names: Vec = AsmServices::SERVICES + .iter() + .map(|service| { + let port = AsmServices::port_base_for(base_port, local_rank); + + SeparateResourceNames::new(service, port, local_rank) + }) + .collect(); + + let separate = Self::create_separate_resources(separate_names)?; + + Ok(Self { + unified: RefCell::new(unified), + separate: RefCell::new(separate), + active_count: AtomicUsize::new(active_services.len()), + }) + } + + /// Update the number of active ASM services notified on each submit. + /// + /// This is a deployment-time configuration — call once per job partition, + /// not on every stream reset. `services.len()` must not exceed `AsmServices::SERVICES.len()`. + pub fn set_active_services(&self, services: &[AsmService]) -> Result<()> { + if services.len() > AsmServices::SERVICES.len() { + return Err(anyhow::anyhow!( + "active_services count {} exceeds allocated separate resources {}", + services.len(), + AsmServices::SERVICES.len() + )); + } + self.active_count.store(services.len(), Ordering::SeqCst); + Ok(()) + } + + /// Create the unified resources (single data buffer and control writer). + fn create_unified_resources( + port: u16, + local_rank: i32, + unlock_mapped_memory: bool, + control_writer: Arc, + ) -> Result { + debug!("Initializing unified resources for precompile hints"); + let data_name = shmem_precompile_name(port, local_rank); + + Ok(UnifiedResources { + control_writer, + data_writer: SharedMemoryWriter::new( + &data_name, + Self::MAX_PRECOMPILE_SIZE as usize, + unlock_mapped_memory, + )?, + }) + } + + /// Create separate resources (control_reader and semaphores for each service). + fn create_separate_resources( + separate_names: Vec, + ) -> Result> { + debug!("Initializing separate resources for precompile hints"); + separate_names + .iter() + .map(|names: &SeparateResourceNames| -> Result { + Ok(SeparateResources { + control_reader: SharedMemoryReader::new( + &names.control_reader, + Self::CONTROL_PRECOMPILE_SIZE as usize, + )?, + sem_available: NamedSemaphore::create(&names.sem_available_name, 0).map_err( + |e| { + anyhow::anyhow!( + "Failed to create semaphore '{}': {}", + names.sem_available_name, + e + ) + }, + )?, + sem_read: NamedSemaphore::create(&names.sem_read_name, 0).map_err(|e| { + anyhow::anyhow!( + "Failed to create semaphore '{}': {}", + names.sem_read_name, + e + ) + })?, + }) + }) + .collect() + } +} + +impl StreamSink for HintsShmem { + /// Writes processed precompile hints to the shared memory. + /// + /// Data is written ONCE to the shared buffer, then all consumers are notified. + /// Flow control waits for the slowest consumer. + /// + /// # Arguments + /// * `processed` - A vector of processed precompile hints as u64 values. + /// + /// # Returns + /// * `Ok(())` - If hints were successfully written to shared memory + /// * `Err` - If writing to shared memory fails + #[inline] + fn submit(&self, processed: &[u64]) -> anyhow::Result<()> { + let data_size = processed.len() as u64; + + // Early return for empty data + if data_size == 0 { + return Ok(()); + } + + // Validate data size fits in buffer + if data_size > Self::BUFFER_CAPACITY_U64 { + return Err(anyhow::anyhow!( + "Processed data size ({} u64 elements) exceeds buffer capacity ({} u64 elements)", + data_size, + Self::BUFFER_CAPACITY_U64 + )); + } + + let mut unified = self.unified.borrow_mut(); + let mut separate = self.separate.borrow_mut(); + + let active = self.active_count.load(Ordering::SeqCst); + let separate = &mut separate[0..active]; + + // Read current write position once + let write_pos = unified.control_writer.prec_hints_size(); + + // Flow control: wait until all consumers have advanced enough + // We need to wait for the slowest consumer (minimum read position) + loop { + // Ensure we observe the latest read positions + fence(Ordering::Acquire); + + // Find the slowest consumer (minimum read position) and its index + let (slowest_idx, min_read_pos) = separate + .iter() + .enumerate() + .map(|(i, res)| (i, res.control_reader.read_u64_at(0))) + .min_by_key(|(_, pos)| *pos) + .unwrap(); + + // Calculate occupied space based on slowest consumer (saturating to avoid underflow) + debug_assert!( + write_pos >= min_read_pos, + "Write position ({}) is behind minimum read position ({})", + write_pos, + min_read_pos + ); + let occupied_space = write_pos - min_read_pos; + debug_assert!( + occupied_space <= Self::BUFFER_CAPACITY_U64, + "Occupied space ({}) exceeds buffer capacity ({})", + occupied_space, + Self::BUFFER_CAPACITY_U64 + ); + let available_space = Self::BUFFER_CAPACITY_U64 - occupied_space; + + // Flow control based on buffer occupancy + if available_space >= data_size { + break; + } + + // Not enough space - wait for the SLOWEST consumer to signal progress + // Retry on interrupt (EINTR) + if separate[slowest_idx].sem_read.wait().is_err() { + continue; + } + } + + // Write data ONCE to the unified shared memory buffer + unified.data_writer.write_ring_buffer(processed)?; + + fence(Ordering::Release); + + // Update write position ONCE in control memory + unified.control_writer.set_prec_hints_size(write_pos + data_size); + + fence(Ordering::Release); + + // Notify ALL consumers that new data is available + for res in separate.iter_mut() { + res.sem_available.post()?; + } + + Ok(()) + } + + fn reset(&self) { + // Reset control writer and data writer to initial state for next stream + let mut unified = self.unified.borrow_mut(); + unified.control_writer.reset(); + unified.data_writer.reset(); + + // Drain stale semaphore signals from previous execution + let mut separate = self.separate.borrow_mut(); + for res in separate.iter_mut() { + while res.sem_available.try_wait().is_ok() {} + while res.sem_read.try_wait().is_ok() {} + + assert!( + res.control_reader.read_u64_at(0) == 0, + "Control reader position should be reset to 0" + ); + } + } +} diff --git a/emulator-asm/asm-runner/src/hints_shmem_stub.rs b/emulator-asm/asm-runner/src/hints_shmem_stub.rs new file mode 100644 index 000000000..0927b7735 --- /dev/null +++ b/emulator-asm/asm-runner/src/hints_shmem_stub.rs @@ -0,0 +1,35 @@ +use crate::{AsmService, ControlShmem}; +use anyhow::Result; +use std::sync::Arc; +use zisk_common::io::StreamSink; + +/// HintsShmem struct manages the writing of processed precompile hints to shared memory. +pub struct HintsShmem; + +impl HintsShmem { + pub fn new( + _base_port: Option, + _local_rank: i32, + _unlock_mapped_memory: bool, + _control_writer: Arc, + _active_services: &[AsmService], + ) -> Result { + unreachable!( + "HintsShmem::new() is not supported on this platform. Only Linux x86_64 is supported." + ); + } + + pub fn set_active_services(&self, _active_services: &[AsmService]) -> Result<()> { + unreachable!( + "HintsShmem::set_active_services() is not supported on this platform. Only Linux x86_64 is supported." + ); + } +} + +impl StreamSink for HintsShmem { + fn submit(&self, _processed: &[u64]) -> anyhow::Result<()> { + unreachable!( + "HintsShmem::submit() is not supported on this platform. Only Linux x86_64 is supported." + ); + } +} diff --git a/emulator-asm/asm-runner/src/inputs_shmem.rs b/emulator-asm/asm-runner/src/inputs_shmem.rs new file mode 100644 index 000000000..8a83274be --- /dev/null +++ b/emulator-asm/asm-runner/src/inputs_shmem.rs @@ -0,0 +1,100 @@ +use std::sync::{Arc, Mutex}; + +use named_sem::NamedSemaphore; +use zisk_common::{io::StreamSink, reinterpret_vec}; +use zisk_core::MAX_INPUT_SIZE; + +use crate::{ + sem_input_avail_name, shmem_input_name, AsmServices, ControlShmem, SharedMemoryWriter, +}; + +use anyhow::Result; + +pub struct InputsShmemWriter { + writer: Mutex, + control_writer: Arc, + sem_avails: Mutex>, +} + +unsafe impl Send for InputsShmemWriter {} +unsafe impl Sync for InputsShmemWriter {} + +impl InputsShmemWriter { + pub fn new( + base_port: Option, + local_rank: i32, + unlock_mapped_memory: bool, + control_writer: Arc, + ) -> Result { + let port = AsmServices::port_base_for(base_port, local_rank); + + let mut writer = SharedMemoryWriter::new( + &shmem_input_name(port, local_rank), + MAX_INPUT_SIZE as usize, + unlock_mapped_memory, + )?; + + writer.reset(); + writer.append_input(&0u64.to_le_bytes())?; + + // Create one semaphore per ASM service + let sem_avails: Vec = AsmServices::SERVICES + .iter() + .map(|service| { + let name = sem_input_avail_name(port, *service, local_rank); + NamedSemaphore::create(&name, 0) + .map_err(|e| anyhow::anyhow!("Failed to create semaphore '{}': {}", name, e)) + }) + .collect::>>()?; + + Ok(Self { writer: Mutex::new(writer), control_writer, sem_avails: Mutex::new(sem_avails) }) + } + + pub fn write_input(&self, inputs: &[u8]) -> Result<()> { + self.writer.lock().unwrap().write_at(8, inputs)?; + self.control_writer.inc_inputs_size(inputs.len()); + self.notify_all_services()?; + + Ok(()) + } + + pub fn append_input(&self, inputs: &[u8]) -> Result<()> { + self.writer.lock().unwrap().append_input(inputs)?; + self.control_writer.inc_inputs_size(inputs.len()); + self.notify_all_services()?; + + Ok(()) + } + + /// Notify all ASM services that new input data is available + fn notify_all_services(&self) -> Result<()> { + for sem in self.sem_avails.lock().unwrap().iter_mut() { + sem.post()?; + } + Ok(()) + } + + pub fn reset(&self) { + let mut writer = self.writer.lock().unwrap(); + writer.reset(); + writer + .append_input(&0u64.to_le_bytes()) + .expect("Failed to write initial header after reset"); + + self.control_writer.reset(); + // Drain all the semaphore signals from all services + for sem in self.sem_avails.lock().unwrap().iter_mut() { + while sem.try_wait().is_ok() {} + } + } +} + +impl StreamSink for InputsShmemWriter { + fn submit(&self, hints: &[u64]) -> anyhow::Result<()> { + self.append_input(&reinterpret_vec(hints.to_vec())?) + } + + fn reset(&self) { + self.reset(); + } +} diff --git a/emulator-asm/asm-runner/src/inputs_shmem_stub.rs b/emulator-asm/asm-runner/src/inputs_shmem_stub.rs new file mode 100644 index 000000000..095d732d0 --- /dev/null +++ b/emulator-asm/asm-runner/src/inputs_shmem_stub.rs @@ -0,0 +1,54 @@ +use std::sync::Arc; + +use zisk_common::io::StreamSink; + +use crate::ControlShmem; + +use anyhow::Result; + +pub struct InputsShmemWriter; + +impl InputsShmemWriter { + pub fn new( + _base_port: Option, + _local_rank: i32, + _unlock_mapped_memory: bool, + _control_writer: Arc, + ) -> Result { + unreachable!( + "InputsShmemWriter::new() is not supported on this platform. Only Linux x86_64 is supported." + ); + } + + pub fn write_input(&self, _inputs: &[u8]) -> Result<()> { + unreachable!( + "InputsShmemWriter::write_input() is not supported on this platform. Only Linux x86_64 is supported." + ); + } + + pub fn append_input(&self, _inputs: &[u8]) -> Result<()> { + unreachable!( + "InputsShmemWriter::append_input() is not supported on this platform. Only Linux x86_64 is supported." + ); + } + + pub fn reset(&self) { + unreachable!( + "InputsShmemWriter::reset() is not supported on this platform. Only Linux x86_64 is supported." + ); + } +} + +impl StreamSink for InputsShmemWriter { + fn submit(&self, _hints: &[u64]) -> anyhow::Result<()> { + unreachable!( + "InputsShmemWriter::submit() is not supported on this platform. Only Linux x86_64 is supported." + ); + } + + fn reset(&self) { + unreachable!( + "InputsShmemWriter::reset() is not supported on this platform. Only Linux x86_64 is supported." + ); + } +} diff --git a/emulator-asm/asm-runner/src/lib.rs b/emulator-asm/asm-runner/src/lib.rs index 98bcb9e77..d8f000124 100644 --- a/emulator-asm/asm-runner/src/lib.rs +++ b/emulator-asm/asm-runner/src/lib.rs @@ -17,6 +17,19 @@ mod asm_rh_runner; mod asm_rh_runner_stub; mod asm_runner; mod asm_services; +mod control_shmem; +mod hints_file; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +mod hints_shmem; +#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] +mod hints_shmem_stub; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +mod inputs_shmem; +#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] +mod inputs_shmem_stub; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +mod multi_shmem; +mod shmem_reader; mod shmem_utils; mod shmem_writer; @@ -37,5 +50,119 @@ pub use asm_rh_runner::*; pub use asm_rh_runner_stub::*; pub use asm_runner::*; pub use asm_services::*; +pub use control_shmem::*; +pub use hints_file::*; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +pub use hints_shmem::*; +#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] +pub use hints_shmem_stub::*; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +pub use inputs_shmem::*; +#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] +pub use inputs_shmem_stub::*; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +pub use multi_shmem::*; +pub use shmem_reader::*; pub use shmem_utils::*; pub use shmem_writer::*; + +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +pub(crate) const TRACE_INITIAL_SIZE: usize = 0x180000000; // 6GB +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +pub(crate) const TRACE_DELTA_SIZE: usize = 0x080000000; // 2GB +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +pub(crate) const TRACE_MAX_SIZE: usize = 0x1000000000; // 64GB + +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +const SEM_CHUNK_DONE_WAIT_DURATION: std::time::Duration = std::time::Duration::from_secs(10); + +fn build_name( + prefix: &str, + port: u16, + asm_service: AsmService, + local_rank: i32, + suffix: &str, +) -> String { + format!( + "{}{}_{}_{}", + prefix, + AsmServices::shmem_prefix(port, local_rank), + asm_service.as_str(), + suffix + ) +} + +fn build_name2(prefix: &str, port: u16, local_rank: i32, suffix: &str) -> String { + format!("{}{}_{}", prefix, AsmServices::shmem_prefix(port, local_rank), suffix) +} + +fn build_shmem_name(port: u16, asm_service: AsmService, local_rank: i32, suffix: &str) -> String { + build_name("", port, asm_service, local_rank, suffix) +} + +fn build_shmem_name2(port: u16, local_rank: i32, suffix: &str) -> String { + build_name2("", port, local_rank, suffix) +} + +fn build_sem_name(port: u16, asm_service: AsmService, local_rank: i32, suffix: &str) -> String { + build_name("/", port, asm_service, local_rank, suffix) +} + +pub fn shmem_input_name(port: u16, local_rank: i32) -> String { + build_shmem_name2(port, local_rank, "input") +} + +pub fn shmem_input_avail_name(port: u16, local_rank: i32) -> String { + build_shmem_name2(port, local_rank, "input_avail") +} + +/// Semaphore name for input availability (per service) +pub fn sem_input_avail_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { + build_sem_name(port, asm_service, local_rank, "input_avail") +} + +/// Shared memory name for precompile hints data +pub fn shmem_precompile_name(port: u16, local_rank: i32) -> String { + build_shmem_name2(port, local_rank, "precompile") +} + +/// Shared memory name for precompile hints data +pub fn sem_available_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { + build_sem_name(port, asm_service, local_rank, "prec_avail") +} + +/// Shared memory name for precompile hints data +pub fn sem_read_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { + build_sem_name(port, asm_service, local_rank, "prec_read") +} + +/// Shared memory name for precompile hints data control +pub fn shmem_control_writer_name(port: u16, local_rank: i32) -> String { + build_shmem_name2(port, local_rank, "control_input") +} + +pub fn shmem_control_reader_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { + build_shmem_name(port, asm_service, local_rank, "control_output") +} + +pub fn shmem_output_name( + port: u16, + asm_service: AsmService, + local_rank: i32, + suffix: Option, +) -> String { + if let Some(suffix) = suffix { + format!( + "{}_{}_output_{}", + AsmServices::shmem_prefix(port, local_rank), + asm_service.as_str(), + suffix + ) + } else { + build_shmem_name(port, asm_service, local_rank, "output") + } +} + +pub fn sem_chunk_done_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { + build_sem_name(port, asm_service, local_rank, "chunk_done") +} diff --git a/emulator-asm/asm-runner/src/multi_shmem.rs b/emulator-asm/asm-runner/src/multi_shmem.rs new file mode 100644 index 000000000..7553fe727 --- /dev/null +++ b/emulator-asm/asm-runner/src/multi_shmem.rs @@ -0,0 +1,329 @@ +use crate::AsmShmemHeader; +use libc::{ + c_uint, close, mmap, munmap, shm_open, shm_unlink, MAP_FAILED, MAP_SHARED, PROT_READ, S_IRUSR, + S_IWUSR, +}; +use std::{ + ffi::CString, + io, + os::raw::c_void, + ptr, + sync::atomic::{fence, Ordering}, +}; +use tracing::debug; + +use anyhow::anyhow; +use anyhow::Result; + +/// Represents a single mapped shared memory file within the multi-file structure. +struct MappedFile { + fd: i32, + #[allow(dead_code)] // May be useful for debugging/validation + size: usize, +} + +/// A shared memory manager that supports multiple contiguous shared memory files. +/// +/// This struct reserves a large virtual address range upfront and maps multiple +/// shared memory files (`_0`, `_1`, etc.) into contiguous portions of that range. +/// +/// File layout: +/// - `{base_name}_0`: Initial file with size `initial_size`, contains the header +/// - `{base_name}_1`, `_2`, ...: Incremental files with size `incremental_size` +pub struct AsmMultiSharedMemory { + base_name: String, + reserved_ptr: *mut c_void, + reserved_size: usize, + initial_size: usize, + incremental_size: usize, + mapped_files: Vec, + total_mapped_size: usize, + unlock_mapped_memory: bool, + _phantom: std::marker::PhantomData, +} + +unsafe impl Send for AsmMultiSharedMemory {} +unsafe impl Sync for AsmMultiSharedMemory {} + +impl Drop for AsmMultiSharedMemory { + fn drop(&mut self) { + // Close all file descriptors + for mapped_file in &self.mapped_files { + unsafe { close(mapped_file.fd) }; + } + + // Unmap the entire reserved region (this handles all the MAP_FIXED mappings too) + if !self.reserved_ptr.is_null() && self.reserved_size > 0 { + unsafe { + if munmap(self.reserved_ptr, self.reserved_size) != 0 { + tracing::error!( + "munmap failed for multi-shmem '{}': {:?}", + self.base_name, + io::Error::last_os_error() + ); + } + } + } + } +} + +impl AsmMultiSharedMemory { + /// Opens and maps the initial shared memory file, reserving address space for growth. + /// + /// # Arguments + /// * `base_name` - Base name for shared memory files (files will be `{base_name}_0`, `_1`, etc.) + /// * `initial_size` - Size of the first file (`_0`) + /// * `incremental_size` - Size of subsequent files (`_1`, `_2`, ...) + /// * `max_size` - Total virtual address space to reserve + /// * `unlock_mapped_memory` - If true, don't use MAP_LOCKED + pub fn open_and_map( + base_name: &str, + initial_size: usize, + incremental_size: usize, + max_size: usize, + unlock_mapped_memory: bool, + ) -> Result { + if base_name.is_empty() { + return Err(anyhow!("Shared memory base name cannot be empty")); + } + + if max_size < initial_size { + return Err(anyhow!( + "max_size ({}) must be >= initial_size ({})", + max_size, + initial_size + )); + } + + if incremental_size == 0 { + return Err(anyhow!("incremental_size must be > 0")); + } + + // Reserve the entire address range with an anonymous mapping + // MAP_NORESERVE prevents reserving swap space for the entire range + let reserved_ptr = unsafe { + mmap( + ptr::null_mut(), + max_size, + libc::PROT_NONE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_NORESERVE, + -1, + 0, + ) + }; + + if reserved_ptr == MAP_FAILED { + let err = io::Error::last_os_error(); + return Err(anyhow!( + "Failed to reserve {} bytes of address space for '{}': {}", + max_size, + base_name, + err + )); + } + + debug!("Reserved {} bytes at {:p} for multi-shmem '{}'", max_size, reserved_ptr, base_name); + + let mut this = Self { + base_name: base_name.to_string(), + reserved_ptr, + reserved_size: max_size, + initial_size, + incremental_size, + mapped_files: Vec::with_capacity(8), + total_mapped_size: 0, + unlock_mapped_memory, + _phantom: std::marker::PhantomData, + }; + + // Map the initial file (_0) + if let Err(e) = this.map_file(0) { + unsafe { munmap(reserved_ptr, max_size) }; + return Err(e); + } + + this.total_mapped_size = initial_size; + + Ok(this) + } + + /// Checks if the producer has allocated more space and maps any new files. + /// + /// This reads `allocated_size` from the header (always in file `_0`) and maps + /// any new files that have been created by the producer. + /// + /// This does NOT move existing mappings, so pointers and slices to already-mapped data remain valid. + pub fn check_size_changed(&mut self) -> Result { + let allocated_size = self.map_header().allocated_size() as usize; + + if allocated_size <= self.total_mapped_size { + return Ok(false); + } + + // Calculate how many files should exist + let files_needed = if allocated_size <= self.initial_size { + 1 + } else { + 1 + (allocated_size - self.initial_size).div_ceil(self.incremental_size) + }; + + let current_files = self.mapped_files.len(); + + if files_needed <= current_files { + // Size increased but within current file - just update total + self.total_mapped_size = allocated_size; + return Ok(true); + } + + debug!( + "Multi-shmem '{}': allocated_size={}, need {} files, have {}", + self.base_name, allocated_size, files_needed, current_files + ); + + // Map all new files + for file_idx in current_files..files_needed { + self.map_file(file_idx)?; + } + + self.total_mapped_size = allocated_size; + + fence(Ordering::Acquire); + + Ok(true) + } + + /// Maps a specific file index into the reserved address space. + fn map_file(&mut self, file_idx: usize) -> Result<()> { + let file_name = format!("{}_{}", self.base_name, file_idx); + + unsafe { + let c_name = CString::new(file_name.clone()) + .map_err(|_| anyhow!("Shared memory name contains null byte"))?; + + let fd = + shm_open(c_name.as_ptr(), libc::O_RDONLY, S_IRUSR as c_uint | S_IWUSR as c_uint); + if fd == -1 { + let err = io::Error::last_os_error(); + return Err(anyhow!("shm_open('{}') failed: {}", file_name, err)); + } + + // Unlink to ensure cleanup + if shm_unlink(c_name.as_ptr()) != 0 { + let err = io::Error::last_os_error(); + close(fd); + return Err(anyhow!("shm_unlink('{}') failed: {}", file_name, err)); + } + + // For _0, validate that the header has a non-zero allocated size + if file_idx == 0 { + let temp_map = mmap(ptr::null_mut(), size_of::(), PROT_READ, MAP_SHARED, fd, 0); + if temp_map == MAP_FAILED { + let err = io::Error::last_os_error(); + close(fd); + return Err(anyhow!("mmap failed for header of '{}': {}", file_name, err)); + } + + let header = (temp_map as *const H).read(); + let allocated_size = header.allocated_size(); + munmap(temp_map, size_of::()); + + if allocated_size == 0 { + close(fd); + return Err(anyhow!("Shared memory '{}' has zero allocated size", file_name)); + } + } + + // Calculate the offset where this file should be mapped + let offset = if file_idx == 0 { + 0 + } else { + self.initial_size + (file_idx - 1) * self.incremental_size + }; + + let file_size = if file_idx == 0 { self.initial_size } else { self.incremental_size }; + + let target_addr = self.reserved_ptr.add(offset); + + let mut flags = MAP_SHARED | libc::MAP_FIXED; + if !self.unlock_mapped_memory { + flags |= libc::MAP_LOCKED; + } + + let mapped_ptr = mmap(target_addr, file_size, PROT_READ, flags, fd, 0); + if mapped_ptr == MAP_FAILED { + let err = io::Error::last_os_error(); + close(fd); + return Err(anyhow!( + "mmap(MAP_FIXED) failed for '{}': {} ({} bytes at {:p})", + file_name, + err, + file_size, + target_addr + )); + } + + debug!( + "Mapped '{}' ({} bytes) at {:p} (offset {})", + file_name, file_size, mapped_ptr, offset + ); + + self.mapped_files.push(MappedFile { fd, size: file_size }); + } + + Ok(()) + } + + /// Reads the header from the shared memory (always from file `_0`). + pub fn map_header(&self) -> H { + if self.mapped_files.is_empty() { + panic!("Multi-shmem '{}' has no mapped files, cannot read header", self.base_name); + } + + unsafe { (self.reserved_ptr as *const H).read() } + } + + /// Returns the base pointer of the mapped region. + pub fn mapped_ptr(&self) -> *mut c_void { + self.reserved_ptr + } + + /// Returns a pointer to the data area (after the header). + pub fn data_ptr(&self) -> *mut c_void { + unsafe { self.reserved_ptr.add(size_of::()) } + } + + /// Returns the total currently mapped size. + pub fn total_mapped_size(&self) -> usize { + self.total_mapped_size + } + + /// Returns the number of currently mapped files. + pub fn num_mapped_files(&self) -> usize { + self.mapped_files.len() + } + + /// Releases incremental shared memory files for a new execution. + /// + /// This closes file descriptors for incremental files (`_1`, `_2`, ...) while + /// keeping `_0` mapped. The reserved address space is preserved. + /// + /// Call this before starting a new execution when reusing the same instance + /// in a distributed context where `_0` remains valid across executions. + pub fn release_incremental(&mut self) { + let files_to_close = self.mapped_files.len().saturating_sub(1); + + // Close file descriptors for incremental files (_1, _2, ...), keep _0 + while self.mapped_files.len() > 1 { + let mapped_file = self.mapped_files.pop().unwrap(); + unsafe { close(mapped_file.fd) }; + } + + // Reset state to initial + self.total_mapped_size = self.initial_size; + + debug!( + "Reset multi-shmem '{}': kept _0, closed {} incremental files, total_mapped_size={}", + self.base_name, files_to_close, self.total_mapped_size + ); + } +} diff --git a/emulator-asm/asm-runner/src/shmem_reader.rs b/emulator-asm/asm-runner/src/shmem_reader.rs new file mode 100644 index 000000000..4ab114f38 --- /dev/null +++ b/emulator-asm/asm-runner/src/shmem_reader.rs @@ -0,0 +1,155 @@ +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +use libc::{mmap, shm_open, MAP_FAILED, MAP_SHARED}; +use std::io::{self, Result}; +use std::ptr; +use std::sync::atomic::{compiler_fence, Ordering}; + +use libc::{c_void, close, munmap, PROT_READ, S_IRUSR}; + +pub struct SharedMemoryReader { + ptr: *const u8, + size: usize, + fd: i32, + name: String, +} + +unsafe impl Send for SharedMemoryReader {} +unsafe impl Sync for SharedMemoryReader {} + +impl SharedMemoryReader { + pub fn new(name: &str, size: usize) -> Result { + // Open existing shared memory (read-only) + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + let fd = Self::open_shmem(name, libc::O_RDONLY, S_IRUSR); + + #[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] + let fd = Self::open_shmem(name, libc::O_RDONLY, S_IRUSR as u32); + + // Map the memory region for read-only + let ptr = Self::map(fd, size, PROT_READ, false, name); + let ptr_u8 = ptr as *const u8; + + Ok(Self { ptr: ptr_u8, size, fd, name: name.to_string() }) + } + + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + fn open_shmem(name: &str, flags: i32, mode: u32) -> i32 { + let c_name = std::ffi::CString::new(name).expect("CString::new failed"); + let fd = unsafe { shm_open(c_name.as_ptr(), flags, mode) }; + if fd == -1 { + let errno_value = unsafe { *libc::__errno_location() }; + let err = io::Error::from_raw_os_error(errno_value); + let err2 = io::Error::last_os_error(); + panic!("shm_open('{name}') failed: libc::errno:{err} #### last_os_error:{err2}"); + } + fd + } + + #[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] + fn open_shmem(_name: &str, _flags: i32, _mode: u32) -> i32 { + 0 + } + + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + fn map(fd: i32, size: usize, prot: i32, unlock_mapped_memory: bool, desc: &str) -> *mut c_void { + let mut flags = MAP_SHARED; + if !unlock_mapped_memory { + flags |= libc::MAP_LOCKED; + } + let mapped = unsafe { mmap(ptr::null_mut(), size, prot, flags, fd, 0) }; + if mapped == MAP_FAILED { + let err = io::Error::last_os_error(); + panic!("mmap failed for '{desc}': {err:?} ({size} bytes)"); + } + mapped + } + + #[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] + fn map(_: i32, _: usize, _: i32, _: bool, _: &str) -> *mut c_void { + ptr::null_mut() + } + + unsafe fn unmap(&mut self) { + if munmap(self.ptr as *mut _, self.size) != 0 { + tracing::error!("munmap failed: {:?}", io::Error::last_os_error()); + } else { + self.ptr = ptr::null(); + self.size = 0; + tracing::trace!("Unmapped shared memory '{}'", self.name); + } + } + + /// Reads a u64 from shared memory at a specific offset (in bytes) + /// + /// # Arguments + /// * `offset` - Byte offset from the start of shared memory (must be 8-byte aligned) + /// + /// # Safety + /// This method assumes that: + /// - The shared memory contains at least `offset + 8` bytes of valid data + /// - The offset should be aligned to 8 bytes + /// + /// # Returns + /// * The u64 value read from the specified offset (in native endianness) + #[inline] + pub fn read_u64_at(&self, offset: usize) -> u64 { + debug_assert_eq!(offset % 8, 0, "Offset must be 8-byte aligned"); + + unsafe { (self.ptr.add(offset) as *const u64).read() } + } + + /// Reads a slice of data from shared memory at a specific offset + /// + /// # Type Parameters + /// * `T` - The element type to read + /// + /// # Arguments + /// * `offset` - Byte offset from the start of shared memory + /// * `len` - Number of elements of type T to read + /// + /// # Returns + /// * `Ok(Vec)` - A vector containing the read data + /// * `Err` - If the read would exceed shared memory bounds + pub fn read_slice(&self, offset: usize, len: usize) -> Result> { + let byte_size = len * std::mem::size_of::(); + + if offset + byte_size > self.size { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "Read of {} bytes at offset {} exceeds shared memory capacity ({}) for '{}'", + byte_size, offset, self.size, self.name + ), + )); + } + + compiler_fence(Ordering::Acquire); + + let mut result = Vec::with_capacity(len); + unsafe { + ptr::copy_nonoverlapping(self.ptr.add(offset) as *const T, result.as_mut_ptr(), len); + result.set_len(len); + } + + Ok(result) + } + + /// Returns the size of the shared memory region in bytes + pub fn size(&self) -> usize { + self.size + } + + /// Returns the name of the shared memory region + pub fn name(&self) -> &str { + &self.name + } +} + +impl Drop for SharedMemoryReader { + fn drop(&mut self) { + unsafe { + self.unmap(); + close(self.fd); + } + } +} diff --git a/emulator-asm/asm-runner/src/shmem_utils.rs b/emulator-asm/asm-runner/src/shmem_utils.rs index 84acfefbb..6f160bf83 100644 --- a/emulator-asm/asm-runner/src/shmem_utils.rs +++ b/emulator-asm/asm-runner/src/shmem_utils.rs @@ -2,6 +2,7 @@ use libc::{ c_uint, close, mmap, munmap, shm_open, shm_unlink, MAP_FAILED, MAP_SHARED, PROT_READ, S_IRUSR, S_IWUSR, }; +use proofman_common::format_bytes; use std::{ ffi::CString, fmt::Debug, @@ -10,13 +11,9 @@ use std::{ ptr, sync::atomic::{fence, Ordering}, }; -use tracing::debug; -use zisk_common::io::{ZiskIO, ZiskStdin}; +use tracing::info; -use anyhow::anyhow; -use anyhow::Result; - -use crate::{AsmInputC2, AsmService, AsmServices, SharedMemoryWriter}; +use anyhow::{anyhow, Result}; pub enum AsmSharedMemoryMode { ReadOnly, @@ -216,7 +213,12 @@ impl AsmSharedMemory { return Ok(false); } - debug!("Remapping shared memory {} to new size: {}", self.shmem_name, read_mapped_size); + info!( + "Remapping shared memory {}: {} => {}", + self.shmem_name, + format_bytes(self.mapped_size as f64), + format_bytes(read_mapped_size as f64) + ); let offset = (*current_read_ptr as usize).wrapping_sub(self.mapped_ptr as usize); @@ -271,26 +273,10 @@ impl AsmSharedMemory { // Skip the header size to get the data pointer unsafe { self.mapped_ptr.add(size_of::()) } } - - pub fn shmem_input_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { - format!("{}_{}_input", AsmServices::shmem_prefix(port, local_rank), asm_service.as_str()) - } - - pub fn shmem_output_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { - format!("{}_{}_output", AsmServices::shmem_prefix(port, local_rank), asm_service.as_str()) - } - - pub fn shmem_chunk_done_name(port: u16, asm_service: AsmService, local_rank: i32) -> String { - format!( - "/{}_{}_chunk_done", - AsmServices::shmem_prefix(port, local_rank), - asm_service.as_str() - ) - } } pub fn open_shmem(name: &str, flags: i32, mode: u32) -> Result { - let c_name = CString::new(name).expect("CString::new failed"); + let c_name = CString::new(name)?; let fd = unsafe { shm_open(c_name.as_ptr(), flags, mode) }; if fd == -1 { @@ -344,18 +330,3 @@ pub unsafe fn unmap(ptr: *mut c_void, size: usize) { tracing::error!("munmap failed: {:?}", io::Error::last_os_error()); } } - -pub fn write_input(stdin: &mut ZiskStdin, shmem_input_writer: &SharedMemoryWriter) { - let inputs = stdin.read(); - let asm_input = AsmInputC2 { zero: 0, input_data_size: inputs.len() as u64 }; - let shmem_input_size = (inputs.len() + size_of::() + 7) & !7; - - let mut full_input = Vec::with_capacity(shmem_input_size); - full_input.extend_from_slice(&asm_input.to_bytes()); - full_input.extend_from_slice(&inputs); - while full_input.len() < shmem_input_size { - full_input.push(0); - } - - shmem_input_writer.write_input(&full_input).expect("Failed to write input to shared memory"); -} diff --git a/emulator-asm/asm-runner/src/shmem_writer.rs b/emulator-asm/asm-runner/src/shmem_writer.rs index d4353bd13..bedc6145e 100644 --- a/emulator-asm/asm-runner/src/shmem_writer.rs +++ b/emulator-asm/asm-runner/src/shmem_writer.rs @@ -7,6 +7,7 @@ use libc::{c_void, close, munmap, PROT_READ, PROT_WRITE, S_IRUSR, S_IWUSR}; pub struct SharedMemoryWriter { ptr: *mut u8, + current_ptr: *mut u8, size: usize, fd: i32, name: String, @@ -26,8 +27,9 @@ impl SharedMemoryWriter { // Map the memory region for read/write let ptr = Self::map(fd, size, PROT_READ | PROT_WRITE, unlock_mapped_memory, name); + let ptr_u8 = ptr as *mut u8; - Ok(Self { ptr: ptr as *mut u8, size, fd, name: name.to_string() }) + Ok(Self { ptr: ptr_u8, current_ptr: ptr_u8, size, fd, name: name.to_string() }) } #[cfg(all(target_os = "linux", target_arch = "x86_64"))] @@ -77,31 +79,182 @@ impl SharedMemoryWriter { } } - /// Writes data to the shared memory, always from the start - pub fn write_input(&self, data: &[u8]) -> Result<()> { - if data.len() > self.size { + /// Writes data to the shared memory, starting at the specified offset + /// + /// # Type Parameters + /// * `T` - The element type of the slice (e.g., u8, u64) + /// + /// # Arguments + /// * `offset` - Byte offset from the start of shared memory where data should be written + /// * `data` - A slice of data to write to shared memory + /// + /// # Returns + /// * `Ok(())` - If data was successfully written + /// * `Err` - If data size exceeds shared memory capacity or msync fails + pub fn write_at(&self, offset: usize, data: &[T]) -> Result<()> { + let byte_size = std::mem::size_of_val(data); + + if byte_size > self.size { return Err(io::Error::new( io::ErrorKind::InvalidInput, format!( - "Data size ({}) exceeds shared memory capacity ({}) for '{}'", - data.len(), - self.size, - self.name + "Data size ({} bytes) exceeds shared memory capacity ({}) for '{}'", + byte_size, self.size, self.name ), )); } unsafe { - ptr::copy_nonoverlapping(data.as_ptr(), self.ptr, data.len()); + ptr::copy_nonoverlapping(data.as_ptr() as *const u8, self.ptr.add(offset), byte_size); // Force changes to be flushed to the shared memory #[cfg(all(target_os = "linux", target_arch = "x86_64"))] if msync(self.ptr as *mut _, self.size, MS_SYNC /*| MS_INVALIDATE*/) != 0 { - panic!("msync failed: {}", std::io::Error::last_os_error()); + return Err(io::Error::last_os_error()); } } Ok(()) } + + /// Writes data to the shared memory, always from the start + /// + /// # Type Parameters + /// * `T` - The element type of the slice (e.g., u8, u64) + /// + /// # Arguments + /// * `data` - A slice of data to write to shared memory + /// + /// # Returns + /// * `Ok(())` - If data was successfully written + /// * `Err` - If data size exceeds shared memory capacity or msync fails + pub fn append_input(&mut self, data: &[T]) -> Result<()> { + let byte_size = std::mem::size_of_val(data); + + if byte_size > self.size { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "Data size ({} bytes) exceeds shared memory capacity ({}) for '{}'", + byte_size, self.size, self.name + ), + )); + } + + unsafe { + ptr::copy_nonoverlapping(data.as_ptr() as *const u8, self.current_ptr, byte_size); + // Force changes to be flushed to the shared memory + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + if msync(self.ptr as *mut _, self.size, MS_SYNC) != 0 { + return Err(io::Error::last_os_error()); + } + + self.current_ptr = self.current_ptr.add(byte_size); + } + + Ok(()) + } + + /// Writes data to the shared memory as a ring buffer, handling wraparound automatically + /// + /// Uses internal pointer tracking with automatic wraparound. + /// + /// # Type Parameters + /// * `T` - The element type of the slice (e.g., u8, u64) + /// + /// # Arguments + /// * `data` - A slice of data to write to shared memory + #[inline] + pub fn write_ring_buffer(&mut self, data: &[T]) -> Result<()> { + let byte_size = std::mem::size_of_val(data); + + let data_ptr = data.as_ptr() as *const u8; + + unsafe { + let current_offset = self.current_ptr.offset_from(self.ptr) as usize; + + // Check if data wraps around the buffer + if current_offset + byte_size > self.size { + // Split write: first part to end of buffer, second part from start + let first_part_size = self.size - current_offset; + let second_part_size = byte_size - first_part_size; + + // Write first part to end of buffer + ptr::copy_nonoverlapping(data_ptr, self.current_ptr, first_part_size); + + // Write second part to start of buffer + ptr::copy_nonoverlapping(data_ptr.add(first_part_size), self.ptr, second_part_size); + + // Update current_ptr to point after the second part + self.current_ptr = self.ptr.add(second_part_size); + } else { + // Write contiguously + ptr::copy_nonoverlapping(data_ptr, self.current_ptr, byte_size); + + // Update current_ptr, wrapping if at end + self.current_ptr = self.current_ptr.add(byte_size); + let new_offset = self.current_ptr.offset_from(self.ptr) as usize; + if new_offset == self.size { + self.current_ptr = self.ptr; + } + } + + // Force changes to be flushed to the shared memory + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + if msync(self.ptr as *mut _, self.size, MS_SYNC) != 0 { + return Err(io::Error::last_os_error()); + } + } + + Ok(()) + } + + /// Reads a u64 from shared memory at a specific offset (in bytes) + /// + /// # Arguments + /// * `offset` - Byte offset from the start of shared memory (must be 8-byte aligned) + /// + /// # Safety + /// This method assumes that: + /// - The shared memory contains at least `offset + 8` bytes of valid data + /// - The offset should be aligned to 8 bytes + /// + /// # Returns + /// * The u64 value read from the specified offset (in native endianness) + #[inline] + pub fn read_u64_at(&self, offset: usize) -> u64 { + debug_assert_eq!(offset % 8, 0, "Offset must be 8-byte aligned"); + + unsafe { (self.ptr.add(offset) as *const u64).read() } + } + + /// Writes a u64 to shared memory at a specific offset (in bytes) + /// + /// # Arguments + /// * `offset` - Byte offset from the start of shared memory (must be 8-byte aligned) + /// * `value` - The u64 value to write + /// + /// # Safety + /// This method assumes that: + /// - The shared memory contains at least `offset + 8` bytes of valid data + /// - The offset is 8-byte aligned for optimal performance + #[inline] + pub fn write_u64_at(&self, offset: usize, value: u64) { + debug_assert_eq!(offset % 8, 0, "Offset must be 8-byte aligned"); + + unsafe { + (self.ptr.add(offset) as *mut u64).write(value); + + // Force changes to be flushed to the shared memory + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + if msync(self.ptr as *mut _, self.size, MS_SYNC) != 0 { + panic!("msync failed in write_u64_at: {:?}", io::Error::last_os_error()); + } + } + } + + pub fn reset(&mut self) { + self.current_ptr = self.ptr; + } } impl Drop for SharedMemoryWriter { diff --git a/emulator-asm/src/asm_provided.hpp b/emulator-asm/src/asm_provided.hpp new file mode 100644 index 000000000..9a6b42e5b --- /dev/null +++ b/emulator-asm/src/asm_provided.hpp @@ -0,0 +1,31 @@ +#ifndef EMULATOR_ASM_ASM_PROVIDED_HPP +#define EMULATOR_ASM_ASM_PROVIDED_HPP + +#include + +/**************************/ +/* Assembly-provided code */ +/**************************/ + +// This is the emulator assembly code start function, which will execute the code in the ROM until +// it ends, and generate the trace in the output trace memory. +// It is called from C to start the execution of the assembly code. +void emulator_start(void); + +// These functions are implemented in assembly and provide access to configuration parameters used +// to generate the assembly code, and that in some cases must match the C main program configuration +uint64_t get_max_bios_pc(void); +uint64_t get_max_program_pc(void); +uint64_t get_gen_method(void); // Must match the C main program provided argument +uint64_t get_precompile_results(void); + +// These variables are updated by the assembly code to provide information about the execution +// status and trace generation, accessed by C to generate the response to the client +extern uint64_t MEM_STEP; // Current step, i.e. number of executed instructions, updated by assembly at every step or at the end of every chunk, depending on the generation method +extern uint64_t MEM_END; // Indicates the end of execution +extern uint64_t MEM_ERROR; // Indicates an error during execution +extern uint64_t MEM_TRACE_ADDRESS; // Address of the trace memory +extern uint64_t MEM_CHUNK_ADDRESS; // Address of the current chunk +extern uint64_t MEM_CHUNK_START_STEP; // Step at which the current chunk started + +#endif // EMULATOR_ASM_ASM_PROVIDED_HPP \ No newline at end of file diff --git a/emulator-asm/src/c_provided.c b/emulator-asm/src/c_provided.c new file mode 100644 index 000000000..9611679de --- /dev/null +++ b/emulator-asm/src/c_provided.c @@ -0,0 +1,488 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "c_provided.hpp" +#include "globals.hpp" +#include "trace.hpp" +#include "emu.hpp" + +/**************/ +/* TRACE SIZE */ +/**************/ + +void set_trace_size (uint64_t new_trace_size) +{ + // Update trace global variables + // printf("%s trace resize (trace_resize_request: %ld): %ld MB => %ld MB\n", log_name, trace_resize_request, trace_size >> 20, new_trace_size >> 20); + + // trace_resize_request = 0; + + trace_size = new_trace_size; + trace_address_threshold = TRACE_ADDR + trace_size - MAX_CHUNK_TRACE_SIZE; + pOutputTrace[2] = trace_size; +} + +/**************/ +/* PRINT REGS */ +/**************/ + +//#define PRINT_REGS +#ifdef PRINT_REGS +extern uint64_t reg_0; +extern uint64_t reg_1; +extern uint64_t reg_2; +extern uint64_t reg_3; +extern uint64_t reg_4; +extern uint64_t reg_5; +extern uint64_t reg_6; +extern uint64_t reg_7; +extern uint64_t reg_8; +extern uint64_t reg_9; +extern uint64_t reg_10; +extern uint64_t reg_11; +extern uint64_t reg_12; +extern uint64_t reg_13; +extern uint64_t reg_14; +extern uint64_t reg_15; +extern uint64_t reg_16; +extern uint64_t reg_17; +extern uint64_t reg_18; +extern uint64_t reg_19; +extern uint64_t reg_20; +extern uint64_t reg_21; +extern uint64_t reg_22; +extern uint64_t reg_23; +extern uint64_t reg_24; +extern uint64_t reg_25; +extern uint64_t reg_26; +extern uint64_t reg_27; +extern uint64_t reg_28; +extern uint64_t reg_29; +extern uint64_t reg_30; +extern uint64_t reg_31; +extern uint64_t reg_32; +extern uint64_t reg_33; +extern uint64_t reg_34; +#endif + +// Used for debugging purposes +extern int _print_regs() +{ +#ifdef PRINT_REGS + printf("print_regs()\n"); + printf("\treg[ 0]=%lu=0x%lx=@%p\n", reg_0, reg_0, ®_0); + printf("\treg[ 1]=%lu=0x%lx=@%p\n", reg_1, reg_1, ®_1); + printf("\treg[ 2]=%lu=0x%lx=@%p\n", reg_2, reg_2, ®_2); + printf("\treg[ 3]=%lu=0x%lx=@%p\n", reg_3, reg_3, ®_3); + printf("\treg[ 4]=%lu=0x%lx=@%p\n", reg_4, reg_4, ®_4); + printf("\treg[ 5]=%lu=0x%lx=@%p\n", reg_5, reg_5, ®_5); + printf("\treg[ 6]=%lu=0x%lx=@%p\n", reg_6, reg_6, ®_6); + printf("\treg[ 7]=%lu=0x%lx=@%p\n", reg_7, reg_7, ®_7); + printf("\treg[ 8]=%lu=0x%lx=@%p\n", reg_8, reg_8, ®_8); + printf("\treg[ 9]=%lu=0x%lx=@%p\n", reg_9, reg_9, ®_9); + printf("\treg[10]=%lu=0x%lx=@%p\n", reg_10, reg_10, ®_10); + printf("\treg[11]=%lu=0x%lx=@%p\n", reg_11, reg_11, ®_11); + printf("\treg[12]=%lu=0x%lx=@%p\n", reg_12, reg_12, ®_12); + printf("\treg[13]=%lu=0x%lx=@%p\n", reg_13, reg_13, ®_13); + printf("\treg[14]=%lu=0x%lx=@%p\n", reg_14, reg_14, ®_14); + printf("\treg[15]=%lu=0x%lx=@%p\n", reg_15, reg_15, ®_15); + printf("\treg[16]=%lu=0x%lx=@%p\n", reg_16, reg_16, ®_16); + printf("\treg[17]=%lu=0x%lx=@%p\n", reg_17, reg_17, ®_17); + printf("\treg[18]=%lu=0x%lx=@%p\n", reg_18, reg_18, ®_18); + printf("\treg[19]=%lu=0x%lx=@%p\n", reg_19, reg_19, ®_19); + printf("\treg[20]=%lu=0x%lx=@%p\n", reg_20, reg_20, ®_20); + printf("\treg[21]=%lu=0x%lx=@%p\n", reg_21, reg_21, ®_21); + printf("\treg[22]=%lu=0x%lx=@%p\n", reg_22, reg_22, ®_22); + printf("\treg[23]=%lu=0x%lx=@%p\n", reg_23, reg_23, ®_23); + printf("\treg[24]=%lu=0x%lx=@%p\n", reg_24, reg_24, ®_24); + printf("\treg[25]=%lu=0x%lx=@%p\n", reg_25, reg_25, ®_25); + printf("\treg[26]=%lu=0x%lx=@%p\n", reg_26, reg_26, ®_26); + printf("\treg[27]=%lu=0x%lx=@%p\n", reg_27, reg_27, ®_27); + printf("\treg[28]=%lu=0x%lx=@%p\n", reg_28, reg_28, ®_28); + printf("\treg[29]=%lu=0x%lx=@%p\n", reg_29, reg_29, ®_29); + printf("\treg[30]=%lu=0x%lx=@%p\n", reg_30, reg_30, ®_30); + printf("\treg[31]=%lu=0x%lx=@%p\n", reg_31, reg_31, ®_31); + printf("\treg[32]=%lu=0x%lx=@%p\n", reg_32, reg_32, ®_32); + printf("\treg[33]=%lu=0x%lx=@%p\n", reg_33, reg_33, ®_33); + printf("\treg[34]=%lu=0x%lx=@%p\n", reg_34, reg_34, ®_34); + printf("\n"); +#endif + return 0; +} + +/************/ +/* PRINT PC */ +/************/ + +//#define PRINT_PC_DURATION +#ifdef PRINT_PC_DURATION +struct timeval print_pc_tv; +#endif + +// Used for debugging purposes +extern int _print_pc (uint64_t pc, uint64_t c) +{ +#ifdef PRINT_PC_DURATION + print_pc_counter++; + { + struct timeval tv; + gettimeofday(&tv, NULL); + uint64_t duration = TimeDiff(print_pc_tv, tv); + if (duration > 900) + { + uint64_t chunk = print_pc_counter / chunk_size; + printf("print_pc() pc=%lx counter=%lu sec=%lu usec=%lu duration=%lu chunk=%lu\n", pc, print_pc_counter, tv.tv_sec, tv.tv_usec, duration, chunk); + fflush(stdout); + } + print_pc_tv = tv; + } +#endif + + printf("s=%lu pc=%lx c=%lx", print_pc_counter, pc, c); + +//#define PRINT_PC_REGS +#ifdef PRINT_PC_REGS + /* Used for debugging */ + printf(" r0=%lx", reg_0); + printf(" r1=%lx", reg_1); + printf(" r2=%lx", reg_2); + printf(" r3=%lx", reg_3); + printf(" r4=%lx", reg_4); + printf(" r5=%lx", reg_5); + printf(" r6=%lx", reg_6); + printf(" r7=%lx", reg_7); + printf(" r8=%lx", reg_8); + printf(" r9=%lx", reg_9); + printf(" r10=%lx", reg_10); + printf(" r11=%lx", reg_11); + printf(" r12=%lx", reg_12); + printf(" r13=%lx", reg_13); + printf(" r14=%lx", reg_14); + printf(" r15=%lx", reg_15); + printf(" r16=%lx", reg_16); + printf(" r17=%lx", reg_17); + printf(" r18=%lx", reg_18); + printf(" r19=%lx", reg_19); + printf(" r20=%lx", reg_20); + printf(" r21=%lx", reg_21); + printf(" r22=%lx", reg_22); + printf(" r23=%lx", reg_23); + printf(" r24=%lx", reg_24); + printf(" r25=%lx", reg_25); + printf(" r26=%lx", reg_26); + printf(" r27=%lx", reg_27); + printf(" r28=%lx", reg_28); + printf(" r29=%lx", reg_29); + printf(" r30=%lx", reg_30); + printf(" r31=%lx", reg_31); +#endif + + printf("\n"); + fflush(stdout); + print_pc_counter++; + return 0; +} + +/**************/ +/* CHUNK DONE */ +/**************/ + +//#define CHUNK_DONE_DURATION +#ifdef CHUNK_DONE_DURATION +uint64_t chunk_done_counter = 0; +struct timeval chunk_done_tv; +#endif + +//#define CHUNK_DONE_SYNC_DURATION +#ifdef CHUNK_DONE_SYNC_DURATION +struct timeval sync_start, sync_stop; +uint64_t sync_duration = 0; +#endif + +// Called by the assembly to notify that a chunk is done and its trace is ready to be consumed +extern void _chunk_done() +{ +#ifdef CHUNK_DONE_DURATION + chunk_done_counter++; + if ((chunk_done_counter & 0xFF) == 0) + { + struct timeval tv; + gettimeofday(&tv, NULL); + uint64_t duration = TimeDiff(chunk_done_tv, tv); + if (duration > 5000) + { + printf("chunk_done() counter=%lu sec=%lu usec=%lu duration=%lu\n", chunk_done_counter, tv.tv_sec, tv.tv_usec, duration); + fflush(stdout); + } + chunk_done_tv = tv; + } +#endif + +#ifdef CHUNK_DONE_SYNC_DURATION + gettimeofday(&sync_start, NULL); +#endif + + __sync_synchronize(); + +#ifdef CHUNK_DONE_SYNC_DURATION + gettimeofday(&sync_stop, NULL); + sync_duration += TimeDiff(sync_start, sync_stop); + printf("chunk_done() sync_duration=%lu\n", sync_duration); +#endif + + // Notify the caller that a new chunk is done and its trace is ready to be consumed + assert(call_chunk_done); + int result = sem_post(sem_chunk_done); + if (result == -1) + { + printf("ERROR: Failed calling sem_post(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } +} + +/*****************/ +/* REALLOC TRACE */ +/*****************/ + +// Called by the assembly to reallocate the trace when needed, e.g. for the next chunk, +// to increase the trace size by another chunk size +extern void _realloc_trace (void) +{ + // Increase realloc counter + realloc_counter++; + + // Map next chunk of the trace shared memory + trace_map_next_chunk(); + + // Update trace global variables + set_trace_size(trace_total_mapped_size); + +#ifdef DEBUG + if (verbose) printf("realloc_trace() realloc counter=%lu trace_address=0x%lx trace_size=%lu=%lx max_address=0x%lx trace_address_threshold=0x%lx chunk_size=%lu\n", realloc_counter, trace_address, trace_size, trace_size, trace_address + trace_size, trace_address_threshold, chunk_size); +#endif +} + +/*********************************/ +/* WAIT FOR PRECOMPILE AVAILABLE */ +/*********************************/ + +// Called by the assembly when prec_written == prec_read, to wait for new precompile results to be available +int _wait_for_prec_avail (void) +{ + // Increment wait counter + wait_prec_avail_counter++; + + //printf("wait_for_prec_avail() counter=%lu\n", wait_prec_avail_counter); + + // Sync control output shared memory so that the writer can see the precompile reads we have + // done, and thus update the precompile_written_address if needed + if (msync((void *)shmem_control_output_address, CONTROL_OUTPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_control_output_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Tell the writer that we have read some precompile results + sem_post(sem_prec_read); + + // Make sure the precompile available semaphore is reset before checking the condition, + // since the caller may have posted it (even several times) before we called sem_wait() + while (sem_trywait(sem_prec_avail) == 0) {/*printf("Purging sem_prec_avail\n");*/}; + + // Sync control input shared memory so that we can see the latest precompile_written_address value + if (msync((void *)shmem_control_input_address, CONTROL_INPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_control_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Check if there are already precompile results available + if (*precompile_written_address > *precompile_read_address) + { + // Sync precompile shared memory + if (msync((void *)shmem_precompile_address, MAX_PRECOMPILE_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_precompile_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + return 0; + } + + // Wait again, but blocking this time + while (true) + { + struct timespec ts; + int result = clock_gettime(CLOCK_REALTIME, &ts); + if (result == -1) + { + printf("ERROR: wait_for_prec_avail() failed calling clock_gettime() errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + ts.tv_sec += 5; // 5 seconds timeout + + //printf("_wait_for_prec_avail() calling sem_wait precompile_written_address=%lu precompile_read_address=%lu\n", *precompile_written_address, *precompile_read_address); + if (wait_flag) *waiting_for_precompile_address = wait_prec_avail_counter << 1; // Leave a mark in shmem that we are waiting; for debugging purposes + result = sem_timedwait(sem_prec_avail, &ts); + if (wait_flag) *waiting_for_precompile_address = (wait_prec_avail_counter << 1) + 1; // Clear the mark in shmem that we are waiting; for debugging purposes + //printf("_wait_for_prec_avail() called sem_wait precompile_written_address=%lu precompile_read_address=%lu\n", *precompile_written_address, *precompile_read_address); + if ((result == -1) && (errno != ETIMEDOUT)) + { + printf("ERROR: wait_for_prec_avail() failed calling sem_wait(%s) errno=%d=%s\n", sem_prec_avail_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Sync control input shared memory so that we can see the latest precompile_written_address value + if (msync((void *)shmem_control_input_address, CONTROL_INPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_control_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + if (*precompile_exit_address != 0) + { + printf("ERROR: wait_for_prec_avail() found precompile_exit_address=%lu\n", *precompile_exit_address); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (*precompile_written_address > *precompile_read_address) + { + // Sync precompile shared memory + if (msync((void *)shmem_precompile_address, MAX_PRECOMPILE_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_precompile_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + return 0; + } + } + + printf("ERROR: wait_for_prec_avail() unreachable code\n"); + fflush(stdout); + fflush(stderr); + exit(-1); +} + +/****************************/ +/* WAIT FOR INPUT AVAILABLE */ +/****************************/ + +// Called by the assembly when input_written == input_read, to wait for new input to be available +int _wait_for_input_avail (uint64_t required_input_bytes) +{ + // Increment wait counter + wait_input_avail_counter++; + + //printf("wait_for_input_avail() required_input_bytes=%lu counter=%lu\n", required_input_bytes, wait_input_avail_counter); + + // Make sure the input available semaphore is reset before checking the condition, + // since the caller may have posted it (even several times) before we called sem_wait() + while (sem_trywait(sem_input_avail) == 0) {/*printf("Purging sem_input_avail\n");*/}; + + // Sync control input shared memory so that we can see the latest input_written_address value + if (msync((void *)shmem_control_input_address, CONTROL_INPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_control_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Check if there is already input data available + if (*input_written_address > required_input_bytes) + { + // Sync input shared memory + if (msync((void *)INPUT_ADDR, MAX_INPUT_SIZE, MS_SYNC) != 0) + { + printf("ERROR: msync failed for shmem_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + return 0; + } + + // Wait again, but blocking this time + while (true) + { + struct timespec ts; + int result = clock_gettime(CLOCK_REALTIME, &ts); + if (result == -1) + { + printf("ERROR: wait_for_input_avail() failed calling clock_gettime() errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + ts.tv_sec += 5; // 5 seconds timeout + + //printf("_wait_for_input_avail() calling sem_wait input_written_address=%lu required_input_bytes=%lu\n", *input_written_address, required_input_bytes); + if (wait_flag) *waiting_for_input_address = wait_input_avail_counter << 1; // Leave a mark in shmem that we are waiting; for debugging purposes + result = sem_timedwait(sem_input_avail, &ts); + if (wait_flag) *waiting_for_input_address = (wait_input_avail_counter << 1) + 1; // Clear the mark in shmem that we are waiting; for debugging purposes + //printf("_wait_for_input_avail() called sem_wait input_written_address=%lu required_input_bytes=%lu\n", *input_written_address, required_input_bytes); + if ((result == -1) && (errno != ETIMEDOUT)) + { + printf("ERROR: wait_for_input_avail() failed calling sem_wait(%s) errno=%d=%s\n", sem_input_avail_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Sync control input shared memory so that we can see the latest input_written_address value + if (msync((void *)shmem_control_input_address, CONTROL_INPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_control_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + if (*precompile_exit_address != 0) + { + printf("ERROR: wait_for_input_avail() found precompile_exit_address=%lu\n", *precompile_exit_address); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (*input_written_address >= required_input_bytes) + { + // Sync input shared memory + if (msync((void *)INPUT_ADDR, MAX_INPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + return 0; + } + } + + printf("ERROR: wait_for_input_avail() unreachable code\n"); + fflush(stdout); + fflush(stderr); + exit(-1); +} \ No newline at end of file diff --git a/emulator-asm/src/c_provided.hpp b/emulator-asm/src/c_provided.hpp new file mode 100644 index 000000000..4d56a7429 --- /dev/null +++ b/emulator-asm/src/c_provided.hpp @@ -0,0 +1,13 @@ +#ifndef EMULATOR_ASM_C_PROVIDED_HPP +#define EMULATOR_ASM_C_PROVIDED_HPP + +#include + +extern int _print_regs(); +extern int _print_pc (uint64_t pc, uint64_t c); +extern void _chunk_done(); +extern void _realloc_trace (void); +extern int _wait_for_prec_avail (void); +extern int _wait_for_input_avail (uint64_t required_input_bytes); + +#endif // EMULATOR_ASM_C_PROVIDED_HPP \ No newline at end of file diff --git a/emulator-asm/src/client.c b/emulator-asm/src/client.c new file mode 100644 index 000000000..065fe8bb8 --- /dev/null +++ b/emulator-asm/src/client.c @@ -0,0 +1,1507 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "constants.hpp" +#include "client.hpp" +#include "globals.hpp" +#include "emu.hpp" + +void * shmem_input_address = NULL; + +/**********/ +/* CLIENT */ +/**********/ + +void client_setup (void) +{ + assert(!server); + assert(client); + + int result; + + /***********************/ + /* INPUT MINIMAL TRACE */ + /***********************/ + + // Input MT trace + if ((gen_method == ChunkPlayerMTCollectMem) || (gen_method == ChunkPlayerMemReadsCollectMain)) + { + // Create the output shared memory + shmem_mt_fd = shm_open(shmem_mt_name, O_RDONLY, 0666); + if (shmem_mt_fd < 0) + { + printf("ERROR: Failed calling trace shm_open(%s) errno=%d=%s\n", shmem_mt_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map it to the trace address +#ifdef DEBUG + gettimeofday(&start_time, NULL); +#endif + void * pTrace = mmap((void *)TRACE_ADDR, chunk_player_mt_size, PROT_READ, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_mt_fd, 0); +#ifdef DEBUG + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); +#endif + if (pTrace == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(MT) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if ((uint64_t)pTrace != TRACE_ADDR) + { + printf("ERROR: Called mmap(MT) but returned address = %p != 0x%lx\n", pTrace, TRACE_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("mmap(MT) returned %p in %lu us\n", pTrace, duration); + } + + /**********************/ + /* PRECOMPILE_RESULTS */ + /**********************/ + + if (precompile_results_enabled) + { + /**************/ + /* PRECOMPILE */ + /**************/ + + // Create the precompile results shared memory + shmem_precompile_fd = shm_open(shmem_precompile_name, O_RDWR, 0666); + if (shmem_precompile_fd < 0) + { + printf("ERROR: Failed calling precompile shm_open(%s) errno=%d=%s\n", shmem_precompile_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map precompile address space + if (verbose) gettimeofday(&start_time, NULL); + void * pPrecompile = mmap(NULL, MAX_PRECOMPILE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | map_locked_flag, shmem_precompile_fd, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pPrecompile == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(precompile) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + shmem_precompile_address = pPrecompile; + precompile_results_address = (uint64_t *)pPrecompile; + + if (verbose) printf("mmap(precompile) mapped %lu B and returned address %p in %lu us\n", MAX_PRECOMPILE_SIZE, precompile_results_address, duration); + + /*************************/ + /* PRECOMPILE SEMAPHORES */ + /*************************/ + + // Create the semaphore for precompile results available signal + assert(strlen(sem_prec_avail_name) > 0); + + sem_prec_avail = sem_open(sem_prec_avail_name, O_CREAT, 0666, 0); + if (sem_prec_avail == SEM_FAILED) + { + printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_prec_avail_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("sem_open(%s) succeeded\n", sem_prec_avail_name); + + // Create the semaphore for precompile results read signal + assert(strlen(sem_prec_read_name) > 0); + + sem_prec_read = sem_open(sem_prec_read_name, O_CREAT, 0666, 0); + if (sem_prec_read == SEM_FAILED) + { + printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_prec_read_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("sem_open(%s) succeeded\n", sem_prec_read_name); + } + + /*****************/ + /* CONTROL INPUT */ + /*****************/ + + // Create the control input shared memory + shmem_control_input_fd = shm_open(shmem_control_input_name, O_RDWR, 0666); + if (shmem_control_input_fd < 0) + { + printf("ERROR: Failed calling control shm_open(%s) errno=%d=%s\n", shmem_control_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map control input address space + if (verbose) gettimeofday(&start_time, NULL); + void * pControl = mmap((void *)CONTROL_INPUT_ADDR, CONTROL_INPUT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_control_input_fd, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pControl == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(control_input) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (pControl != (void *)CONTROL_INPUT_ADDR) + { + printf("ERROR: Called mmap(control_input) but returned address = %p != 0x%08lx\n", pControl, CONTROL_INPUT_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + shmem_control_input_address = (uint64_t *)pControl; + precompile_written_address = &shmem_control_input_address[0]; + precompile_exit_address = &shmem_control_input_address[1]; + input_written_address = &shmem_control_input_address[2]; + if (verbose) printf("mmap(control_input) mapped %lu B and returned address %p in %lu us\n", CONTROL_INPUT_SIZE, shmem_control_input_address, duration); + + /*****************/ + /* CONTROL OUTPUT */ + /*****************/ + + // Create the control input shared memory + shmem_control_output_fd = shm_open(shmem_control_output_name, O_RDWR, 0666); + if (shmem_control_output_fd < 0) + { + printf("ERROR: Failed calling control shm_open(%s) errno=%d=%s\n", shmem_control_output_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map control input address space + if (verbose) gettimeofday(&start_time, NULL); + pControl = mmap((void *)CONTROL_OUTPUT_ADDR, CONTROL_OUTPUT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_control_output_fd, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pControl == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(control_output) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (pControl != (void *)CONTROL_OUTPUT_ADDR) + { + printf("ERROR: Called mmap(control_output) but returned address = %p != 0x%08lx\n", pControl, CONTROL_OUTPUT_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + shmem_control_output_address = (uint64_t *)pControl; + precompile_read_address = &shmem_control_output_address[0]; + if (verbose) printf("mmap(control_output) mapped %lu B and returned address %p in %lu us\n", CONTROL_OUTPUT_SIZE, shmem_control_output_address, duration); +} + +typedef enum { + PrecompileReadMode_NoPrefix, + PrecompileReadMode_Prefixed +} PrecompileReadMode; + +PrecompileReadMode precompile_read_mode = PrecompileReadMode_NoPrefix; +//PrecompileReadMode precompile_read_mode = PrecompileReadMode_Prefixed; + +typedef enum { + PrecompileWriteMode_Full, + PrecompileWriteMode_OnePrecAtATime +} PrecompileWriteMode; + +PrecompileWriteMode precompile_write_mode = PrecompileWriteMode_Full; +//PrecompileWriteMode precompile_write_mode = PrecompileWriteMode_OnePrecAtATime; + +//#define PRECOMPILE_FIXED_SIZE 25 // Keccak-f state size in u64s +#define PRECOMPILE_FIXED_SIZE 4 // SHA-256 state size in u64s + +void client_write_precompile_results (void) +{ + int result; + +#ifdef DEBUG + gettimeofday(&start_time, NULL); +#endif + + // Open input file + FILE * precompile_fp = fopen(precompile_file_name, "r"); + if (precompile_fp == NULL) + { + printf("ERROR: Failed calling fopen(%s) errno=%d=%s; does it exist?\n", precompile_file_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Get input file size + if (fseek(precompile_fp, 0, SEEK_END) == -1) + { + printf("ERROR: Failed calling fseek(%s) errno=%d=%s\n", precompile_file_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + long precompile_data_size = ftell(precompile_fp); + if (precompile_data_size == -1) + { + printf("ERROR: Failed calling ftell(%s) errno=%d=%s\n", precompile_file_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if ((precompile_data_size & 0x7) != 0) + { + printf("ERROR: Precompile results file (%s) size (%lu) is not a multiple of 8 B\n", precompile_file_name, precompile_data_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Go back to the first byte + if (fseek(precompile_fp, 0, SEEK_SET) == -1) + { + printf("ERROR: Failed calling fseek(%s, 0) errno=%d=%s\n", precompile_file_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + assert(precompile_read_mode == PrecompileReadMode_NoPrefix || precompile_read_mode == PrecompileReadMode_Prefixed); + assert(precompile_write_mode == PrecompileWriteMode_Full || precompile_write_mode == PrecompileWriteMode_OnePrecAtATime); + + /*************/ + /* NO PREFIX */ + /*************/ + + if (precompile_read_mode == PrecompileReadMode_NoPrefix) + { + if (precompile_write_mode == PrecompileWriteMode_Full) + { + // Check the precompile data size is inside the proper range + if (precompile_data_size > MAX_PRECOMPILE_SIZE) + { + printf("ERROR: Size of precompile results file (%s) is too long (%lu)\n", precompile_file_name, precompile_data_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Copy input data into input memory + size_t precompile_read = fread(precompile_results_address, 1, precompile_data_size, precompile_fp); + if (precompile_read != precompile_data_size) + { + printf("ERROR: Input read (%lu) != expected read size (%lu)\n", precompile_read, precompile_data_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Initialize precompile written address + *precompile_written_address = precompile_data_size >> 3; // in u64s + + //printf("Posting sem_prec_avail() precompile_written=%lu precompile_read=%lu\n", *precompile_written_address, *precompile_read_address); + sem_post(sem_prec_avail); + } + else if (precompile_write_mode == PrecompileWriteMode_OnePrecAtATime) + { + // Check the precompile data size is inside the proper range + if (precompile_data_size % (PRECOMPILE_FIXED_SIZE * 8) != 0) + { + printf("ERROR: Size of precompile results file (%s) is not a multiple %u * 8 B\n", precompile_file_name, PRECOMPILE_FIXED_SIZE); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Initialize precompile written address to zero + *precompile_written_address = 0; // in u64s + + // Copy in chunks of PRECOMPILE_FIXED_SIZE*8 bytes (Keccak-f state size) + uint64_t precompile_read_so_far = 0; + uint64_t data[PRECOMPILE_FIXED_SIZE]; + while (precompile_read_so_far < (uint64_t)precompile_data_size) + { + // Wait for server to read precompile results + //printf("Waiting for sem_prec_read()\n"); + result = sem_wait(sem_prec_read); + if (result == -1) + { + printf("ERROR: Failed calling sem_wait(sem_prec_read) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Number of bytes to read from file and write to shared memory in every loop + uint64_t bytes_to_read = sizeof(data); + + // Copy input data into input memory + size_t precompile_read = fread(data, 1, bytes_to_read, precompile_fp); + if (precompile_read != bytes_to_read) + { + printf("ERROR: Input read (%lu) != expected read size (%lu)\n", precompile_read, bytes_to_read); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Copy data to shared memory + for (int i=0; i> 3) % (MAX_PRECOMPILE_SIZE >> 3)], &data[i], 8); + precompile_read_so_far += 8; + } + + // Notify server that precompile results are available + *precompile_written_address = precompile_read_so_far >> 3; // in u64s + + //printf("Posting sem_prec_avail() precompile_written=%lu precompile_read=%lu\n", *precompile_written_address, *precompile_read_address); + sem_post(sem_prec_avail); + } + } + } + + /************/ + /* PREFIXED */ + /************/ + + else if (precompile_read_mode == PrecompileReadMode_Prefixed) + { +#define CTRL_START 0x00 +#define CTRL_END 0x01 +#define CTRL_CANCEL 0x02 +#define CTRL_ERROR 0x03 +#define HINTS_TYPE_RESULT 0x04 +#define HINTS_TYPE_ECRECOVER 0x05 +#define NUM_HINT_TYPES 0x06 + + uint64_t precompile_read_so_far = 0; + uint64_t precompile_written_so_far = 0; + + while (precompile_read_so_far < (uint64_t)precompile_data_size) + { + uint64_t data; + uint64_t bytes_to_read = sizeof(data); + + // Copy input data into input memory + size_t precompile_read = fread(&data, 1, bytes_to_read, precompile_fp); + if (precompile_read != bytes_to_read) + { + printf("ERROR: Input read (%lu) != expected read size (%lu)\n", precompile_read, bytes_to_read); + fflush(stdout); + fflush(stderr); + exit(-1); + } + precompile_read_so_far += bytes_to_read; + switch (data >> 32) + { + case CTRL_START: + //printf("Precompile CTRL_START\n"); + assert(precompile_read_so_far == 8); + break; + case CTRL_END: + //printf("Precompile CTRL_END\n"); + assert(precompile_read_so_far == precompile_data_size); + break; + // case CTRL_CANCEL: + // printf("Precompile CTRL_CANCEL\n"); + // break; + // case CTRL_ERROR: + // printf("Precompile CTRL_ERROR\n"); + // break; + case HINTS_TYPE_RESULT: + { + //printf("Precompile HINTS_TYPE_RESULT\n"); + if (precompile_write_mode == PrecompileWriteMode_OnePrecAtATime) + { + // Wait for server to read precompile results + //printf("Waiting for sem_prec_read()\n"); + result = sem_wait(sem_prec_read); + if (result == -1) + { + printf("ERROR: Failed calling sem_wait(sem_prec_read) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + + uint64_t result_length = data & 0xFFFFFFFF; + if (result_length > (precompile_data_size - precompile_read_so_far)) + { + printf("ERROR: Precompile HINTS_TYPE_RESULT length=%lu exceeds remaining file size %lu\n", result_length, precompile_data_size - precompile_read_so_far); + fflush(stdout); + fflush(stderr); + exit(-1); + } + //printf("Precompile HINTS_TYPE_RESULT result_length=%lu\n", result_length); + for (uint64_t i=0; i> 3) % (MAX_PRECOMPILE_SIZE >> 3)], &value, 8); + precompile_read_so_far += 8; + precompile_written_so_far += 8; + //printf(" Precompile result[%lu] = 0x%016lx\n", i, value); + } + + if (precompile_write_mode == PrecompileWriteMode_OnePrecAtATime) + { + // Notify server that precompile results are available + *precompile_written_address = precompile_written_so_far >> 3; // in u64s + + //printf("Posting sem_prec_avail() precompile_written=%lu precompile_read=%lu\n", *precompile_written_address, *precompile_read_address); + sem_post(sem_prec_avail); + } + } + break; + // case HINTS_TYPE_ECRECOVER: + // { + // // Not implemented + // printf("Precompile HINTS_TYPE_ECRECOVER not implemented\n"); + // } + // break; + default: + printf("ERROR: Unknown precompile prefix type %lu\n", data >> 32); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + + if (precompile_write_mode == PrecompileWriteMode_Full) + { + // Notify server that precompile results are available + *precompile_written_address = precompile_written_so_far >> 3; // in u64s + + //printf("Posting sem_prec_avail() precompile_written=%lu precompile_read=%lu\n", *precompile_written_address, *precompile_read_address); + sem_post(sem_prec_avail); + } + + } + + // Close the file pointer + fclose(precompile_fp); + +#ifdef DEBUG + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + printf("client (precompile): done in %lu us\n", duration); +#endif +} + +void client_run (void) +{ + printf("client_run(): Starting client...\n"); + assert(client); + assert(!server); + + int result; + + /************************/ + /* Read input file data */ + /************************/ + if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) + { + +#ifdef DEBUG + gettimeofday(&start_time, NULL); +#endif + + // Open input file + FILE * input_fp = fopen(input_file, "r"); + if (input_fp == NULL) + { + printf("ERROR: Failed calling fopen(%s) errno=%d=%s; does it exist?\n", input_file, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Get input file size + if (fseek(input_fp, 0, SEEK_END) == -1) + { + printf("ERROR: Failed calling fseek(%s) errno=%d=%s\n", input_file, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + long input_data_size = ftell(input_fp); + if (input_data_size == -1) + { + printf("ERROR: Failed calling ftell(%s) errno=%d=%s\n", input_file, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Go back to the first byte + if (fseek(input_fp, 0, SEEK_SET) == -1) + { + printf("ERROR: Failed calling fseek(%s, 0) errno=%d=%s\n", input_file, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Check the input data size is inside the proper range + if (input_data_size > (MAX_INPUT_SIZE - 16)) + { + printf("ERROR: Size of input file (%s) is too long (%lu)\n", input_file, input_data_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Open input shared memory + shmem_input_fd = shm_open(shmem_input_name, O_RDWR, 0666); + if (shmem_input_fd < 0) + { + printf("ERROR: Failed calling input shm_open(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map the shared memory object into the process address space + shmem_input_address = mmap(NULL, MAX_INPUT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shmem_input_fd, 0); + if (shmem_input_address == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Write the free input value as 0 in the first 64 bits + *(uint64_t *)shmem_input_address = (uint64_t)0; // free input + + // Copy input data into input memory + size_t input_read = fread(shmem_input_address + 8, 1, input_data_size, input_fp); + if (input_read != input_data_size) + { + printf("ERROR: Input read (%lu) != input file size (%lu)\n", input_read, input_data_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Close the file pointer + fclose(input_fp); + + // Unmap input + result = munmap(shmem_input_address, MAX_INPUT_SIZE); + if (result == -1) + { + printf("ERROR: Failed calling munmap(input) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Set written counter + *input_written_address = input_data_size; // in bytes + +#ifdef DEBUG + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + printf("client (input): done in %lu us\n", duration); +#endif + + } + + /*****************************/ + /* Read precompile file data */ + /*****************************/ + if (precompile_results_enabled) + { + // Reset written counter + *precompile_written_address = 0; + + //client_write_precompile_results(); + } + + /*************************/ + /* Connect to the server */ + /*************************/ + + // Create socket to connect to server + int socket_fd; + socket_fd = socket(AF_INET, SOCK_STREAM, 0); + if (socket_fd < 0) + { + printf("ERROR: socket() failed socket_fd=%d errno=%d=%s\n", socket_fd, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Configure server address + struct sockaddr_in server_addr; + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(port); + + result = inet_pton(AF_INET, SERVER_IP, &server_addr.sin_addr); + if (result <= 0) + { + printf("ERROR: inet_pton() failed. Invalid address/Address not supported result=%d errno=%d=%s\n", result, errno, strerror(errno)); + exit(-1); + } + + // Connect to server + result = connect(socket_fd, (struct sockaddr *)&server_addr, sizeof(server_addr)); + if (result < 0) + { + printf("ERROR: connect() failed result=%d errno=%d=%s\n", result, errno, strerror(errno)); + exit(-1); + } + if (verbose) printf("connect()'d to port=%u\n", port); + + // Request and response + uint64_t request[5]; + uint64_t response[5]; + + /********/ + /* Ping */ + /********/ + + gettimeofday(&start_time, NULL); + + // Prepare message to send + request[0] = TYPE_PING; + request[1] = 0; + request[2] = 0; + request[3] = 0; + request[4] = 0; + + // Send data to server + result = send(socket_fd, request, sizeof(request), 0); + if (result < 0) + { + printf("ERROR: send() failed result=%d errno=%d=%s\n", result, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Read server response + ssize_t bytes_received = recv(socket_fd, response, sizeof(response), MSG_WAITALL); + if (bytes_received < 0) + { + printf("ERROR: recv() failed result=%d errno=%d=%s\n", result, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (bytes_received != sizeof(response)) + { + printf("ERROR: recv() returned bytes_received=%ld errno=%d=%s\n", bytes_received, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (response[0] != TYPE_PONG) + { + printf("ERROR: recv() returned unexpected type=%lu\n", response[0]); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (response[1] != gen_method) + { + printf("ERROR: recv() returned unexpected gen_method=%lu\n", response[1]); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + printf("client (PING): done in %lu us\n", duration); + + /*****************/ + /* Minimal trace */ + /*****************/ + for (uint64_t i=0; i +#include +#include +#include +#include +#include "configuration.hpp" +#include "globals.hpp" +#include "asm_provided.hpp" + +/*******************************/ +/* ARGUMENTS AND CONFIGURATION */ +/*******************************/ + +// To be overwritten by arguments, if provided; otherwise, default values per generation method are used +uint16_t arguments_port = 0; + +// Print usage information: valid arguments +void print_usage (void) +{ + printf("Usage: ziskemuasm\n"); + printf("\t-s(server)\n"); + printf("\t-c(client)\n"); + printf("\t-i \n"); + printf("\t-p \n"); + printf("\t--gen=0|--generate_fast\n"); + printf("\t--gen=1|--generate_minimal_trace\n"); + printf("\t--gen=2|--generate_rom_histogram\n"); + printf("\t--gen=3|--generate_main_trace\n"); + printf("\t--gen=4|--generate_chunks\n"); + printf("\t--gen=6|--generate_zip\n"); + printf("\t--gen=9|--generate_mem_reads\n"); + printf("\t--gen=10|--generate_chunk_player_mem_reads\n"); + printf("\t--chunk \n"); + printf("\t--shutdown\n"); + printf("\t--mt \n"); + printf("\t-o output on\n"); + printf("\t--output_riscof output riscof on\n"); + printf("\t--silent silent on\n"); + printf("\t--shm_prefix (default: ZISK)\n"); + printf("\t-m metrics on\n"); + printf("\t-t trace on\n"); + printf("\t-tt trace_trace on\n"); + printf("\t-f(save to file)\n"); + printf("\t-a chunk_address\n"); + printf("\t-v verbose on\n"); + printf("\t-u unlock physical memory in mmap\n"); + printf("\t--share_input_shm share input shared memories\n"); + printf("\t--open_input_shm open existing input shared memories\n"); +#ifdef ASM_PRECOMPILE_CACHE + printf("\t--precompile-cache-store store precompile results in cache file\n"); + printf("\t--precompile-cache-load load precompile results from cache file\n"); +#endif + if (precompile_results_enabled) + { + printf("\t-r \n"); + } + printf("\t--redirect-output-to-file redirect output to file\n"); + printf("\t-h/--help print this\n"); +} + +// Parse main function arguments and configure global variables accordingly +void parse_arguments(int argc, char *argv[]) +{ + strcpy(shm_prefix, "ZISK"); + uint64_t number_of_selected_generation_methods = 0; + if (argc > 1) + { + for (int i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-s") == 0) + { + server = true; + continue; + } + if (strcmp(argv[i], "-c") == 0) + { + client = true; + continue; + } + if ( (strcmp(argv[i], "--gen=0") == 0) || (strcmp(argv[i], "--generate_fast") == 0)) + { + gen_method = Fast; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=1") == 0) || (strcmp(argv[i], "--generate_minimal_trace") == 0)) + { + gen_method = MinimalTrace; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=2") == 0) || (strcmp(argv[i], "--generate_rom_histogram") == 0)) + { + gen_method = RomHistogram; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=3") == 0) || (strcmp(argv[i], "--generate_main_trace") == 0)) + { + gen_method = MainTrace; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=4") == 0) || (strcmp(argv[i], "--generate_chunks") == 0)) + { + gen_method = ChunksOnly; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=6") == 0) || (strcmp(argv[i], "--generate_zip") == 0)) + { + gen_method = Zip; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=7") == 0) || (strcmp(argv[i], "--generate_mem_op") == 0)) + { + gen_method = MemOp; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=8") == 0) || (strcmp(argv[i], "--generate_chunk_player_mt_collect_mem") == 0)) + { + gen_method = ChunkPlayerMTCollectMem; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=9") == 0) || (strcmp(argv[i], "--generate_mem_reads") == 0)) + { + gen_method = MemReads; + number_of_selected_generation_methods++; + continue; + } + if ( (strcmp(argv[i], "--gen=10") == 0) || (strcmp(argv[i], "--generate_chunk_player_mem_reads") == 0)) + { + gen_method = ChunkPlayerMemReadsCollectMain; + number_of_selected_generation_methods++; + continue; + } + if (strcmp(argv[i], "-o") == 0) + { + output = true; + continue; + } + if (strcmp(argv[i], "--output_riscof") == 0) + { + output_riscof = true; + continue; + } + if (strcmp(argv[i], "--silent") == 0) + { + silent = true; + continue; + } + if (strcmp(argv[i], "-m") == 0) + { + metrics = true; + continue; + } + if (strcmp(argv[i], "-t") == 0) + { + trace = true; + continue; + } + if (strcmp(argv[i], "-tt") == 0) + { + trace = true; + trace_trace = true; + continue; + } + if (strcmp(argv[i], "-v") == 0) + { + verbose = true; + //emu_verbose = true; + continue; + } + if (strcmp(argv[i], "-u") == 0) + { + map_locked_flag = 0; + continue; + } + if (strcmp(argv[i], "-h") == 0) + { + print_usage(); + exit(0); + } + if (strcmp(argv[i], "--help") == 0) + { + print_usage(); + exit(0); + } + if (strcmp(argv[i], "-i") == 0) + { + i++; + if (i >= argc) + { + printf("ERROR: Detected argument -i in the last position; please provide input file after it\n"); + print_usage(); + exit(-1); + } + if (strlen(argv[i]) > 4095) + { + printf("ERROR: Detected argument -i but next argument is too long\n"); + print_usage(); + exit(-1); + } + strcpy(input_file, argv[i]); + continue; + } + if (strcmp(argv[i], "--shm_prefix") == 0) + { + i++; + if (i >= argc) + { + printf("ERROR: Detected argument --shm_prefix in the last position; please provide shared mem prefix after it\n"); + print_usage(); + exit(-1); + } + if (strlen(argv[i]) >= MAX_SHM_PREFIX_LENGTH) + { + printf("ERROR: Detected argument --shm_prefix but next argument is too long\n"); + print_usage(); + exit(-1); + } + strcpy(shm_prefix, argv[i]); + continue; + } + if (strcmp(argv[i], "--chunk") == 0) + { + i++; + if (i >= argc) + { + printf("ERROR: Detected argument --chunk in the last position; please provide chunk number after it\n"); + print_usage(); + exit(-1); + } + errno = 0; + char *endptr; + chunk_mask = strtoul(argv[i], &endptr, 10); + + // Check for errors + if (errno == ERANGE) { + printf("ERROR: Chunk number is too large\n"); + print_usage(); + exit(-1); + } else if (endptr == argv[i]) { + printf("ERROR: No digits found while parsing chunk number\n"); + print_usage(); + exit(-1); + } else if (*endptr != '\0') { + printf("ERROR: Extra characters after chunk number: %s\n", endptr); + print_usage(); + exit(-1); + } else if (chunk_mask > MAX_CHUNK_MASK) { + printf("ERROR: Invalid chunk number: %lu\n", chunk_mask); + print_usage(); + exit(-1); + } else { + printf("Got chunk_mask= %lu\n", chunk_mask); + } + continue; + } + if (strcmp(argv[i], "--shutdown") == 0) + { + do_shutdown = true; + continue; + } + if (strcmp(argv[i], "--mt") == 0) + { + i++; + if (i >= argc) + { + printf("ERROR: Detected argument --mt in the last position; please provide number of MT requests after it\n"); + print_usage(); + exit(-1); + } + errno = 0; + char *endptr; + number_of_mt_requests = strtoul(argv[i], &endptr, 10); + + // Check for errors + if (errno == ERANGE) { + printf("ERROR: Number of MT requests is too large\n"); + print_usage(); + exit(-1); + } else if (endptr == argv[i]) { + printf("ERROR: No digits found while parsing number of MT requests\n"); + print_usage(); + exit(-1); + } else if (*endptr != '\0') { + printf("ERROR: Extra characters after number of MT requests: %s\n", endptr); + print_usage(); + exit(-1); + } else if (number_of_mt_requests > 1000000) { + printf("ERROR: Invalid number of MT requests: %lu\n", number_of_mt_requests); + print_usage(); + exit(-1); + } else { + printf("Got number of MT requests= %lu\n", number_of_mt_requests); + } + continue; + } + if (strcmp(argv[i], "-p") == 0) + { + i++; + if (i >= argc) + { + printf("ERROR: Detected argument -p in the last position; please provide port number after it\n"); + print_usage(); + exit(-1); + } + errno = 0; + char *endptr; + uint64_t arguments_port_u64 = strtoul(argv[i], &endptr, 10); + if (arguments_port_u64 > 0xFFFF) + { + printf("ERROR: Port number is too large, must be at most 65535\n"); + print_usage(); + exit(-1); + } + arguments_port = arguments_port_u64 & 0xFFFF; // Keep only lower 16 bits, since port numbers are 16 bits + + // Check for errors + if (errno == ERANGE) { + printf("ERROR: Port number is too large\n"); + print_usage(); + exit(-1); + } else if (endptr == argv[i]) { + printf("ERROR: No digits found while parsing port number\n"); + print_usage(); + exit(-1); + } else if (*endptr != '\0') { + printf("ERROR: Extra characters after port number: %s\n", endptr); + print_usage(); + exit(-1); + } else { + printf("Got port number= %u\n", arguments_port); + } + continue; + } + if (strcmp(argv[i], "-f") == 0) + { + save_to_file = true; + continue; + } + if (strcmp(argv[i], "-a") == 0) + { + i++; + if (i >= argc) + { + printf("ERROR: Detected argument -a in the last position; please provide chunk address after it\n"); + print_usage(); + exit(-1); + } + errno = 0; + char *endptr; + char * argument = argv[i]; + if ((argument[0] == '0') && (argument[1] == 'x')) argument += 2; + chunk_player_address = strtoul(argument, &endptr, 16); + + // Check for errors + if (errno == ERANGE) { + printf("ERROR: Chunk address is too large\n"); + print_usage(); + exit(-1); + } else if (endptr == argument) { + printf("ERROR: No digits found while parsing chunk address\n"); + print_usage(); + exit(-1); + } else if (*endptr != '\0') { + printf("ERROR: Extra characters after chunk address: %s\n", endptr); + print_usage(); + exit(-1); + } else { + printf("Got chunk address= %p\n", (void *)chunk_player_address); + } + continue; + } + if (strcmp(argv[i], "--share_input_shm") == 0) + { + share_input_shm = true; + continue; + } + if (strcmp(argv[i], "--open_input_shm") == 0) + { + open_input_shm = true; + continue; + } + if (strcmp(argv[i], "--redirect-output-to-file") == 0) + { + redirect_output_to_file = true; + continue; + } +#ifdef ASM_PRECOMPILE_CACHE + if (strcmp(argv[i], "--precompile-cache-store") == 0) + { + precompile_cache_enabled = true; + precompile_cache_store_init(); + continue; + } + if (strcmp(argv[i], "--precompile-cache-load") == 0) + { + precompile_cache_enabled = true; + precompile_cache_load_init(); + continue; + } + +#endif + if (precompile_results_enabled && (strcmp(argv[i], "-r") == 0)) + { + i++; + if (i >= argc) + { + printf("ERROR: Detected argument -r in the last position; please provide precompile results file after it\n"); + print_usage(); + exit(-1); + } + if (strlen(argv[i]) > 4095) + { + printf("ERROR: Detected argument -r but next argument is too long\n"); + print_usage(); + exit(-1); + } + strcpy(precompile_file_name, argv[i]); + continue; + } + printf("ERROR: parse_arguments() Unrecognized argument: %s\n", argv[i]); + print_usage(); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } +#ifdef ASM_PRECOMPILE_CACHE + if (precompile_cache_enabled == false) + { + printf("ERROR: parse_arguments() when in precompile cache mode, you need to use an argument: either --precompile-cache-store or --precompile-cache-load\n"); + print_usage(); + fflush(stdout); + fflush(stderr); + exit(-1); + } +#endif + + // Check that only one generation method was selected as an argument + if (number_of_selected_generation_methods != 1) + { + printf("ERROR! parse_arguments() Invalid arguments: select 1 generation method, and only one\n"); + print_usage(); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Check that the generation method selected by the process launcher is the same as the one + // for which the assembly code was generated + uint64_t asm_gen_method = get_gen_method(); + if (asm_gen_method != gen_method) + { + printf("ERROR! parse_arguments() Inconsistency: C generation method is %u but ASM generation method is %lu\n", + gen_method, + asm_gen_method); + print_usage(); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Check server/client + if (server && client) + { + printf("ERROR! parse_arguments() Inconsistency: both server and client at the same time is not possible\n"); + print_usage(); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (!server && !client) + { + printf("ERROR! parse_arguments() Inconsistency: select server or client\n"); + print_usage(); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + if (precompile_results_enabled && client && (strlen(precompile_file_name) == 0)) + { + printf("ERROR! parse_arguments() when in precompile results mode, you need to provide a precompile results file using -r \n"); + print_usage(); + fflush(stdout); + fflush(stderr); + exit(-1); + } +} + +// Configure global variables based on generation method and other arguments +void configure (void) +{ + // Select configuration based on generation method + switch (gen_method) + { + case Fast: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_FT_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_FT_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_FT_input"); + if (precompile_results_enabled) + { + strcpy(shmem_precompile_name, shm_prefix); + if (share_input_shm) + strcat(shmem_precompile_name, "_precompile"); + else + strcat(shmem_precompile_name, "_FT_precompile"); + strcpy(sem_prec_avail_name, shm_prefix); + strcat(sem_prec_avail_name, "_FT_prec_avail"); + strcpy(sem_prec_read_name, shm_prefix); + strcat(sem_prec_read_name, "_FT_prec_read"); + } + else + { + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + } + strcpy(shmem_output_name, ""); + strcpy(sem_chunk_done_name, ""); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_FT_shutdown_done"); + strcpy(sem_input_avail_name, shm_prefix); + strcat(sem_input_avail_name, "_FT_input_avail"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_FT"); + port = 23120; + break; + } + case MinimalTrace: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_MT_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_MT_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_MT_input"); + if (precompile_results_enabled) + { + strcpy(shmem_precompile_name, shm_prefix); + if (share_input_shm) + strcat(shmem_precompile_name, "_precompile"); + else + strcat(shmem_precompile_name, "_MT_precompile"); + strcpy(sem_prec_avail_name, shm_prefix); + strcat(sem_prec_avail_name, "_MT_prec_avail"); + strcpy(sem_prec_read_name, shm_prefix); + strcat(sem_prec_read_name, "_MT_prec_read"); + } + else + { + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + } + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_MT_output"); + strcpy(sem_chunk_done_name, shm_prefix); + strcat(sem_chunk_done_name, "_MT_chunk_done"); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_MT_shutdown_done"); + strcpy(sem_input_avail_name, shm_prefix); + strcat(sem_input_avail_name, "_MT_input_avail"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_MT"); + call_chunk_done = true; + port = 23115; + break; + } + case RomHistogram: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_RH_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_RH_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_RH_input"); + if (precompile_results_enabled) + { + strcpy(shmem_precompile_name, shm_prefix); + if (share_input_shm) + strcat(shmem_precompile_name, "_precompile"); + else + strcat(shmem_precompile_name, "_RH_precompile"); + strcpy(sem_prec_avail_name, shm_prefix); + strcat(sem_prec_avail_name, "_RH_prec_avail"); + strcpy(sem_prec_read_name, shm_prefix); + strcat(sem_prec_read_name, "_RH_prec_read"); + } + else + { + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + } + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_RH_output"); + strcpy(sem_chunk_done_name, shm_prefix); + strcat(sem_chunk_done_name, "_RH_chunk_done"); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_RH_shutdown_done"); + strcpy(sem_input_avail_name, shm_prefix); + strcat(sem_input_avail_name, "_RH_input_avail"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_RH"); + call_chunk_done = true; + port = 23116; + break; + } + case MainTrace: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_MA_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_MA_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_MA_input"); + if (precompile_results_enabled) + { + strcpy(shmem_precompile_name, shm_prefix); + if (share_input_shm) + strcat(shmem_precompile_name, "_precompile"); + else + strcat(shmem_precompile_name, "_MA_precompile"); + strcpy(sem_prec_avail_name, shm_prefix); + strcat(sem_prec_avail_name, "_MA_prec_avail"); + strcpy(sem_prec_read_name, shm_prefix); + strcat(sem_prec_read_name, "_MA_prec_read"); + } + else + { + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + } + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_MA_output"); + strcpy(sem_chunk_done_name, shm_prefix); + strcat(sem_chunk_done_name, "_MA_chunk_done"); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_MA_shutdown_done"); + strcpy(sem_input_avail_name, shm_prefix); + strcat(sem_input_avail_name, "_MA_input_avail"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_MA"); + call_chunk_done = true; + port = 23118; + break; + } + case ChunksOnly: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_CH_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_CH_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_CH_input"); + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + strcpy(sem_input_avail_name, ""); + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_CH_output"); + strcpy(sem_chunk_done_name, shm_prefix); + strcat(sem_chunk_done_name, "_CH_chunk_done"); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_CH_shutdown_done"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_CH"); + call_chunk_done = true; + port = 23115; + break; + } + // case BusOp: + // { + // strcpy(shmem_input_name, "ZISKBO_input"); + // strcpy(shmem_output_name, "ZISKBO_output"); + // strcpy(sem_chunk_done_name, "ZISKBO_chunk_done"); + // chunk_done = true; + // port = 23115; + // break; + // } + case Zip: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_ZP_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_ZP_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_ZP_input"); + if (precompile_results_enabled) + { + strcpy(shmem_precompile_name, shm_prefix); + if (share_input_shm) + strcat(shmem_precompile_name, "_precompile"); + else + strcat(shmem_precompile_name, "_ZP_precompile"); + strcpy(sem_prec_avail_name, shm_prefix); + strcat(sem_prec_avail_name, "_ZP_prec_avail"); + strcpy(sem_prec_read_name, shm_prefix); + strcat(sem_prec_read_name, "_ZP_prec_read"); + } + else + { + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + } + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_ZP_output"); + strcpy(sem_chunk_done_name, shm_prefix); + strcat(sem_chunk_done_name, "_ZP_chunk_done"); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_ZP_shutdown_done"); + strcpy(sem_input_avail_name, shm_prefix); + strcat(sem_input_avail_name, "_ZP_input_avail"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_ZP"); + call_chunk_done = true; + port = 23115; + break; + } + case MemOp: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_MO_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_MO_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_MO_input"); + if (precompile_results_enabled) + { + strcpy(shmem_precompile_name, shm_prefix); + if (share_input_shm) + strcat(shmem_precompile_name, "_precompile"); + else + strcat(shmem_precompile_name, "_MO_precompile"); + strcpy(sem_prec_avail_name, shm_prefix); + strcat(sem_prec_avail_name, "_MO_prec_avail"); + strcpy(sem_prec_read_name, shm_prefix); + strcat(sem_prec_read_name, "_MO_prec_read"); + } + else + { + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + } + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_MO_output"); + strcpy(sem_chunk_done_name, shm_prefix); + strcat(sem_chunk_done_name, "_MO_chunk_done"); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_MO_shutdown_done"); + strcpy(sem_input_avail_name, shm_prefix); + strcat(sem_input_avail_name, "_MO_input_avail"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_MO"); + call_chunk_done = true; + port = 23117; + break; + } + case ChunkPlayerMTCollectMem: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_CM_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_CM_control_output"); + strcpy(shmem_input_name, ""); + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + strcpy(sem_input_avail_name, ""); + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_CM_output"); + strcpy(sem_chunk_done_name, ""); + strcpy(sem_shutdown_done_name, ""); + strcpy(shmem_mt_name, shm_prefix); + strcat(shmem_mt_name, "_MT_output"); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_CM"); + call_chunk_done = false; + port = 23119; + break; + } + case MemReads: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_MT_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_MT_control_output"); + strcpy(shmem_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_input_name, "_input"); + else + strcat(shmem_input_name, "_MT_input"); + if (precompile_results_enabled) + { + strcpy(shmem_precompile_name, shm_prefix); + if (share_input_shm) + strcat(shmem_precompile_name, "_precompile"); + else + strcat(shmem_precompile_name, "_MT_precompile"); + strcpy(sem_prec_avail_name, shm_prefix); + strcat(sem_prec_avail_name, "_MT_prec_avail"); + strcpy(sem_prec_read_name, shm_prefix); + strcat(sem_prec_read_name, "_MT_prec_read"); + } + else + { + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + } + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_MT_output"); + strcpy(sem_chunk_done_name, shm_prefix); + strcat(sem_chunk_done_name, "_MT_chunk_done"); + strcpy(sem_shutdown_done_name, shm_prefix); + strcat(sem_shutdown_done_name, "_MT_shutdown_done"); + strcpy(sem_input_avail_name, shm_prefix); + strcat(sem_input_avail_name, "_MT_input_avail"); + strcpy(shmem_mt_name, ""); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_MT"); + call_chunk_done = true; + port = 23115; + break; + } + case ChunkPlayerMemReadsCollectMain: + { + strcpy(shmem_control_input_name, shm_prefix); + if (share_input_shm) + strcat(shmem_control_input_name, "_control_input"); + else + strcat(shmem_control_input_name, "_CA_control_input"); + strcpy(shmem_control_output_name, shm_prefix); + strcat(shmem_control_output_name, "_CA_control_output"); + strcpy(shmem_input_name, ""); + strcpy(shmem_precompile_name, ""); + strcpy(sem_prec_avail_name, ""); + strcpy(sem_prec_read_name, ""); + strcpy(sem_input_avail_name, ""); + strcpy(shmem_output_name, shm_prefix); + strcat(shmem_output_name, "_CA_output"); + strcpy(sem_chunk_done_name, ""); + strcpy(sem_shutdown_done_name, ""); + strcpy(shmem_mt_name, shm_prefix); + strcat(shmem_mt_name, "_MT_output"); + strcpy(file_lock_name, "/tmp/"); + strcat(file_lock_name, shm_prefix); + strcat(file_lock_name, ".lock"); + strcpy(log_name, shm_prefix); + strcat(log_name, "_CA"); + call_chunk_done = false; + port = 23120; + break; + } + default: + { + printf("ERROR: configure() Invalid gen_method = %u\n", gen_method); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + + if (precompile_results_enabled && (gen_method == ChunkPlayerMTCollectMem || gen_method == ChunkPlayerMemReadsCollectMain)) + { + printf("ERROR: configure() precompile results enabled is not compatible with generation method %u\n", gen_method); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + if (arguments_port != 0) + { + port = arguments_port; + } + + if (verbose) + { + printf("ziskemuasm configuration:\n"); + printf("\tgen_method=%u\n", gen_method); + printf("\tshm_prefix=%s\n", shm_prefix); + printf("\tfile_lock_name=%s\n", file_lock_name); + printf("\tlog_name=%s\n", log_name); + printf("\tport=%u\n", port); + printf("\tcall_chunk_done=%u\n", call_chunk_done); + printf("\tchunk_size=%lu\n", chunk_size); + printf("\tshmem_control_input=%s\n", shmem_control_input_name); + printf("\tshmem_control_output=%s\n", shmem_control_output_name); + printf("\tshmem_input=%s\n", shmem_input_name); + printf("\tshmem_precompile=%s\n", shmem_precompile_name); + printf("\tshmem_output=%s\n", shmem_output_name); + printf("\tshmem_mt=%s\n", shmem_mt_name); + printf("\tsem_chunk_done=%s\n", sem_chunk_done_name); + printf("\tsem_shutdown_done=%s\n", sem_shutdown_done_name); + printf("\tsem_prec_avail=%s\n", sem_prec_avail_name); + printf("\tsem_prec_read=%s\n", sem_prec_read_name); + printf("\tsem_input_avail=%s\n", sem_input_avail_name); + printf("\tmap_locked_flag=%d\n", map_locked_flag); + printf("\toutput=%u\n", output); + printf("\tprecompile_results_enabled=%u\n", precompile_results_enabled); + printf("\toutput_riscof=%u\n", output_riscof); + } +} \ No newline at end of file diff --git a/emulator-asm/src/configuration.hpp b/emulator-asm/src/configuration.hpp new file mode 100644 index 000000000..8c9cc2c40 --- /dev/null +++ b/emulator-asm/src/configuration.hpp @@ -0,0 +1,7 @@ +#ifndef EMULATOR_ASM_CONFIGURATION_HPP +#define EMULATOR_ASM_CONFIGURATION_HPP + +void parse_arguments(int argc, char *argv[]); +void configure (void); + +#endif // EMULATOR_ASM_CONFIGURATION_HPP \ No newline at end of file diff --git a/emulator-asm/src/constants.hpp b/emulator-asm/src/constants.hpp new file mode 100644 index 000000000..71f3bacb2 --- /dev/null +++ b/emulator-asm/src/constants.hpp @@ -0,0 +1,119 @@ +#ifndef EMULATOR_ASM_CONSTANTS_HPP +#define EMULATOR_ASM_CONSTANTS_HPP + +#include + +/***************/ +/* Definitions */ +/***************/ + +// Address map +// There definitions must match the ZisK rust code ones at core/src/mem.rs used to generate the +// assembly code, and that are used by the assembly code to access memory and generate the trace +#define ROM_ADDR (uint64_t)0x80000000 +#define ROM_SIZE (uint64_t)0x08000000 // 128MB +#define INPUT_ADDR (uint64_t)0x40000000 +#define MAX_INPUT_SIZE (uint64_t)0x40000000 // 1024MB + +#define RAM_ADDR (uint64_t)0xA0000000 +#define RAM_SIZE (uint64_t)0x20000000 // 512MB +#define SYS_ADDR RAM_ADDR +#define SYS_SIZE (uint64_t)0x10000 +#define OUTPUT_ADDR (SYS_ADDR + SYS_SIZE) + +#ifdef TRACE_TARGET_MO + #define TRACE_INITIAL_SIZE (uint64_t)0x180000000 /* 6GB */ + #define TRACE_DELTA_SIZE (uint64_t)0x080000000 /* 2GB */ +#elif defined(TRACE_TARGET_RH) + #define TRACE_INITIAL_SIZE (uint64_t)0x004000000 /* 64MB */ + #define TRACE_DELTA_SIZE (uint64_t)0x004000000 /* 64MB */ +#else + #define TRACE_INITIAL_SIZE (uint64_t)0x180000000 /* 6GB */ + #define TRACE_DELTA_SIZE (uint64_t)0x080000000 /* 2GB */ +#endif + +#define TRACE_ADDR (uint64_t)0xd0000000 +#define TRACE_MAX_SIZE (uint64_t)0x800000000 // 32GB +#define TRACE_NUMBER_OF_CHUNKS (((TRACE_MAX_SIZE - TRACE_INITIAL_SIZE) / TRACE_DELTA_SIZE) + 1) +#define TRACE_SIZE_GRANULARITY (1014*1014) // ROM histogram trace size is round up to a multiple of this granularity + +// Control input and output shared memory configuration. +// Control input is used to tell the assembly code how many precompile result u64 fields have been +// written by the client. Control output is used to tell the client how many precompile result u64 +// fields have been read by the assembly code, so the client can know when it can write new +// precompile results. Assembly code waits when the number of read fields is not lower than the +// number of written fields, and client waits when the number of written fields would exceed the +// number of read fields plus the available precompile shared memory size, which is a circular buffer +#define CONTROL_INPUT_ADDR (uint64_t)0x70000000 +#define CONTROL_INPUT_SIZE (uint64_t)0x1000 // 4kB +#define CONTROL_OUTPUT_ADDR (uint64_t)0x70001000 +#define CONTROL_OUTPUT_SIZE (uint64_t)0x1000 // 4kB +#define CONTROL_RETRY_DELAY_US 1000 // 1ms +#define CONTROL_NUMBER_OF_RETRIES 1000 // 1s max total + +// Maximum number of steps to execute, used by the client to limit the execution steps of the +// assembly code. This limit is set by the ZisK PIL constraints. +#define MAX_STEPS (1ULL << 36) + +// Assembly service request/response types +// Only the methods supported by the configured generation method will be implemented by the server, +// e.g. gen_method=1 => PING, MT and SHUTDOWN; the rest will fail with an error response. +#define TYPE_PING 1 // Ping +#define TYPE_PONG 2 +#define TYPE_MT_REQUEST 3 // Minimal trace +#define TYPE_MT_RESPONSE 4 +#define TYPE_RH_REQUEST 5 // ROM histogram +#define TYPE_RH_RESPONSE 6 +#define TYPE_MO_REQUEST 7 // Memory opcode +#define TYPE_MO_RESPONSE 8 +#define TYPE_MA_REQUEST 9 // Main packed trace +#define TYPE_MA_RESPONSE 10 +#define TYPE_CM_REQUEST 11 // Collect memory trace +#define TYPE_CM_RESPONSE 12 +#define TYPE_FA_REQUEST 13 // Fast mode, do not generate any trace +#define TYPE_FA_RESPONSE 14 +#define TYPE_MR_REQUEST 15 // Mem reads +#define TYPE_MR_RESPONSE 16 +#define TYPE_CA_REQUEST 17 // Collect main trace +#define TYPE_CA_RESPONSE 18 +#define TYPE_SD_REQUEST 1000000 // Shutdown +#define TYPE_SD_RESPONSE 1000001 + +// Server IP address, used by the client to connect to the server +#define SERVER_IP "127.0.0.1" // Change to your server IP; otherwise use localhost IP address + +// Chunk size used in generation methods that generate a trace chunk at every N steps, e.g. gen_method=1 or gen_method=7. +// It must be a power of two, and it is used to calculate the trace address threshold at which the next chunk must be mapped, +// to avoid reaching the end of the currently mapped trace memory. +#define CHUNK_SIZE (1ULL << 18) + +// Maximum trace chunk size, used to determine when the trace address is close to the end of the +// currently mapped trace memory and the next chunk must be mapped. It is calculated based on the +// maximum number of bytes that can be generated in a chunk +// Worst case: every chunk instruction is a keccak operation, with an input data of 200 bytes +// (let's use 256 bytes to be safe), and the trace includes the access to 2 source registers, 2 +// destination registers and 3 memory addresses (e.g. for a keccak operation with 3 memory operands), +// which are the maximum number of registers and memory addresses that can be accessed by a chunk +// instruction, according to the ZisK assembly code generation configuration. + +#define MAX_MTRACE_REGS_ACCESS_SIZE ((2 + 2 + 3) * 8) +#define MAX_TRACE_CHUNK_INFO ((44*8) + 32) +#define MAX_BYTES_DIRECT_MTRACE 256 +#define MAX_BYTES_MTRACE_STEP (MAX_BYTES_DIRECT_MTRACE + MAX_MTRACE_REGS_ACCESS_SIZE) +#define MAX_CHUNK_TRACE_SIZE ((CHUNK_SIZE * MAX_BYTES_MTRACE_STEP) + MAX_TRACE_CHUNK_INFO) + +// Maximum precompile results share memory size +// It is a circular buffer +#define MAX_PRECOMPILE_SIZE (uint64_t)0x400000 // 4MB + +// Maximum chunk mask for zip generation method, which indicates which chunks are included in the trace, +// and must be between 0 and 7 (inclusive), as it is used to generate a mask of 8 bits where each +// bit indicates if the corresponding chunk is included in the trace or not. +#define MAX_CHUNK_MASK 7 + +// Maximum length of the shared memory prefix, e.g. "ZISK_12345" +// This prefix is used to generate the names of the shared memories and semaphores used for +// communication and synchronization between the server and the client, +#define MAX_SHM_PREFIX_LENGTH 64 + +#endif // EMULATOR_ASM_CONSTANTS_HPP \ No newline at end of file diff --git a/emulator-asm/src/dma/check_dynamic_mtrace.asm b/emulator-asm/src/dma/check_dynamic_mtrace.asm new file mode 100644 index 000000000..4f12f7d1d --- /dev/null +++ b/emulator-asm/src/dma/check_dynamic_mtrace.asm @@ -0,0 +1,195 @@ +.intel_syntax noprefix +.code64 + +# +# check_dynamic_mtrace - Check if mtrace buffer needs dynamic expansion +# +# PURPOSE: +# This function determines whether the memory trace (mtrace) buffer needs to be +# dynamically expanded. It should be called whenever the data to be written to +# the trace exceeds the MAX_BYTES_DIRECT_MTRACE threshold. +# +# HOW IT WORKS: +# The function uses a two-level check to minimize overhead: +# +# 1. FAST PATH (most common): Check if current mtrace position plus required +# bytes is below the threshold. If yes, return immediately - no expansion needed. +# +# 2. SLOW PATH: If we're past the threshold, check if this is expected based on +# how many steps have been consumed in the current chunk. We use a worst-case +# assumption where each step consumes MAX_BYTES_MTRACE_STEP bytes. This value +# is slightly larger than MAX_BYTES_DIRECT_MTRACE because it includes other +# mtrace costs like register read operations. +# +# By using worst-case assumptions, we avoid checking on every mtrace write. +# If actual usage is within expected bounds for consumed steps, no expansion +# is needed. Only when actual usage exceeds expected usage do we trigger realloc. +# +# REGISTER USAGE: +# Uses: R_MT_ADDR, R_MT_INDEX, R_STEP, R_COUNT, R_AUX, R_AUX2, R_DST, R_SRC +# Preserves: All XMM registers (saved/restored only when calling _realloc_trace) +# Preserves: r8, r10, r11, R_SRC, R_DST, R_COUNT (when calling _realloc_trace) +# +# PARAMETERS (using DMA register convention): +# R_MT_ADDR = Base address of mtrace buffer +# R_MT_INDEX = Current index into mtrace buffer (in qwords) +# R_STEP = Steps remaining to end of chunk +# R_COUNT = Bytes required by current request (with margin) +# +# RETURN VALUE: +# None (returns via ret, mtrace may have been expanded) +# +# PERFORMANCE: +# - Fast path (no expansion needed): ~8-9 cycles +# - Slow path (within expected bounds): ~15-17 cycles +# - Realloc path: ~200-600 cycles (includes XMM save/restore + _realloc_trace call) +# +# SIDE EFFECTS: +# - May trigger _realloc_trace which expands the mtrace buffer +# - Updates trace_address_threshold after realloc + +.global check_dynamic_mtrace + +.extern fast_dma_encode +.extern trace_address_threshold +# .extern trace_resize_request + +.ifdef DEBUG +.section .data +.align 8 + dma_check_case: .quad 0 + dma_check_step: .quad 0 + dma_check_aux: .quad 0 + dma_check_threshold: .quad 0 +.endif + +.include "dma_constants.inc" + +.section .text + +# REGISTER INPUTS: +# R_MT_ADDR = base address of mtrace buffer +# R_MT_INDEX = current index into mtrace (qwords) +# R_STEP = steps remaining until end of chunk +# R_COUNT = bytes needed by current request + +check_dynamic_mtrace: + + # FAST PATH: Check if we're below the threshold + # + # trace_address_threshold = TRACE_ADDR + trace_size - MAX_CHUNK_TRACE_SIZE + # This gives us the "safe" limit before considering reallocation. + # Calculate: current_addr + required_bytes + margin, compare with threshold. + +.ifdef DEBUG + mov qword ptr [dma_check_case], 1 +.endif + + lea R_AUX, [R_MT_ADDR + 8 * R_MT_INDEX] # 1 cycle - current mtrace address + lea R_AUX, [R_AUX + R_COUNT + MAX_DMA_MT_MARGIN] # 1 cycle - add required bytes + safety margin + sub R_AUX, [trace_address_threshold] # ~4 cycles - bytes over threshold (negative = OK) + jnc .L_calculate_current_margin # 2 cycles (predicted) - if negative, space available + ret # FAST PATH EXIT: ~8-9 cycles total + + # SLOW PATH: Check if usage is within expected bounds for consumed steps + # + # Instead of checking every time, we use worst-case assumptions: + # - Each step may consume up to MAX_BYTES_MTRACE_STEP bytes + # - MAX_BYTES_MTRACE_STEP > MAX_BYTES_DIRECT_MTRACE (includes register reads, etc.) + # - If actual usage <= (steps_consumed * MAX_BYTES_MTRACE_STEP), no realloc needed + # + # R_STEP contains steps REMAINING to end of chunk + # steps_consumed = CHUNK_SIZE - R_STEP + +.L_calculate_current_margin: + +.ifdef DEBUG + mov qword ptr [dma_check_case], 2 + mov [dma_check_step], R_STEP + mov [dma_check_aux], R_AUX +.endif + + # Calculate expected worst-case mtrace usage for consumed steps + mov R_AUX2, CHUNK_SIZE # 1 cycle - total steps per chunk + sub R_AUX2, R_STEP # 1 cycle - steps_consumed = total - remaining + imul R_AUX2, MAX_BYTES_MTRACE_STEP # ~3 cycles - expected_bytes = steps * worst_case_per_step + cmp R_AUX2, R_AUX # 1 cycle - compare expected vs actual overflow + jb .L_call_realloc # 2 cycles (predicted) - if expected < actual, need realloc + ret # SLOW PATH EXIT: ~15-17 cycles total + +.L_call_realloc: + + # REALLOC PATH: Actual usage exceeds expected bounds, must expand mtrace + # + # Save all volatile registers since _realloc_trace follows System V ABI + # and may use any of them. We're being called from non-ABI-compliant code, + # so we must preserve everything our caller expects. +.ifdef DEBUG + mov qword ptr [dma_check_case], 3 + mov R_AUX, [trace_address_threshold] + mov [dma_check_threshold], R_AUX +.endif + + # Save general purpose registers used by DMA operations + push R_COUNT # 1 cycle - save count + push r8 # 1 cycle + push r10 # 1 cycle + push r11 # 1 cycle + push R_SRC # 1 cycle - save source address + push R_DST # 1 cycle - save destination address + + # Allocate stack for XMM registers (16 registers x 16 bytes + 8 for alignment) + # Note: We're inside a call, so stack is unaligned to 16 bytes + sub rsp, 16*16 + 8 # 1 cycle - allocate 264 bytes + + # Save all XMM registers (may be used by caller for optimized operations) + movaps [rsp + 0*16], xmm0 # 1 cycle - aligned 128-bit stores + movaps [rsp + 1*16], xmm1 # 1 cycle + movaps [rsp + 2*16], xmm2 # 1 cycle + movaps [rsp + 3*16], xmm3 # 1 cycle + movaps [rsp + 4*16], xmm4 # 1 cycle + movaps [rsp + 5*16], xmm5 # 1 cycle + movaps [rsp + 6*16], xmm6 # 1 cycle + movaps [rsp + 7*16], xmm7 # 1 cycle + movaps [rsp + 8*16], xmm8 # 1 cycle + movaps [rsp + 9*16], xmm9 # 1 cycle + movaps [rsp + 10*16], xmm10 # 1 cycle + movaps [rsp + 11*16], xmm11 # 1 cycle + movaps [rsp + 12*16], xmm12 # 1 cycle + movaps [rsp + 13*16], xmm13 # 1 cycle + movaps [rsp + 14*16], xmm14 # 1 cycle + movaps [rsp + 15*16], xmm15 # 1 cycle + + call _realloc_trace # ~5 cycles call + ~100-500 cycles function body + + # Restore all XMM registers + movaps xmm0, [rsp + 0*16] # 1 cycle - aligned 128-bit loads + movaps xmm1, [rsp + 1*16] # 1 cycle + movaps xmm2, [rsp + 2*16] # 1 cycle + movaps xmm3, [rsp + 3*16] # 1 cycle + movaps xmm4, [rsp + 4*16] # 1 cycle + movaps xmm5, [rsp + 5*16] # 1 cycle + movaps xmm6, [rsp + 6*16] # 1 cycle + movaps xmm7, [rsp + 7*16] # 1 cycle + movaps xmm8, [rsp + 8*16] # 1 cycle + movaps xmm9, [rsp + 9*16] # 1 cycle + movaps xmm10, [rsp + 10*16] # 1 cycle + movaps xmm11, [rsp + 11*16] # 1 cycle + movaps xmm12, [rsp + 12*16] # 1 cycle + movaps xmm13, [rsp + 13*16] # 1 cycle + movaps xmm14, [rsp + 14*16] # 1 cycle + movaps xmm15, [rsp + 15*16] # 1 cycle + + add rsp, 16*16 +8 # 1 cycle - deallocate stack space + + # Restore general purpose registers + pop R_DST # 1 cycle - restore destination address + pop R_SRC # 1 cycle - restore source address + pop r11 # 1 cycle + pop r10 # 1 cycle + pop r8 # 1 cycle + pop R_COUNT # 1 cycle - restore count +.L_memcpy_mtrace_continue: + ret + +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_inputcpy_mops.asm b/emulator-asm/src/dma/direct_inputcpy_mops.asm new file mode 100644 index 000000000..1503c2f00 --- /dev/null +++ b/emulator-asm/src/dma/direct_inputcpy_mops.asm @@ -0,0 +1,240 @@ +.intel_syntax noprefix +.code64 + +################################################################################ +# inputcpy_mops - Optimized inputcpy with memory ops tracing +# +# This function performs two main tasks: +# 1. Records all addresses of memory operations (read and write addresses) +# 2. Performs the actual inputcpy operation filling with free-inputs +# +# REGISTER USAGE: +# Uses general-purpose registers: rax, rbx, rcx, rdx, rdi, rsi, r8, r9, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Preserves callee-saved registers (rbx, r12, r13 saved/restored in wrapper) +# +# PARAMETERS (NON System V AMD64 ABI): +# rdi = dst (u64) - Destination address to fill +# rsi = value (u8 in low byte) - Byte value to set (0-255) +# rdx = count (usize) - Number of bytes to set +# r12 = mops_base_addr (u64*) - Pointer to memory ops trace buffer base +# r13 = mops_index (usize) - Current index in mops buffer (input/output) +# +################################################################################ + +.global direct_dma_inputcpy_mops +.extern fast_memcpy +.extern fcall_ctx + +.include "dma_constants.inc" + +.section .text + +# Direct entry point for assembly callers (no ABI overhead) +# More efficient when caller manages register preservation + +# arguments: +# rdi: destination adress +# rdx: count (bytes) +# r12 + r13: mops trace + +direct_dma_inputcpy_mops: + + # Modified registers (caller must handle): + # r9 = scratch for mops address calculation + # rcx = mops index (incremented, output) + + # test count = 0 + test rdx, rdx + jz .L_inputcpy_mops_done + + # test dst aligned + test rdi, 0x7 + jnz .L_inputcpy_mops_rdi_unaligned + + # test count multiple of 8 + test rdx, 0x07 + jnz .L_inputcpy_mops_count_remain + + # FAST BRANCH + # dst is aligned, count is a multiple of 8 and greater than zero + # => no pre-reads, only one MOPS write block + + # FAST BRANCH - MOPS (MOPS_ALIGNED_BLOCK_WRITE) + + mov rax, rdx + shr rax, 3 + shl rax, MOPS_BLOCK_WORDS_RS # 1 cycle - shift to block words field position + mov r9, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - rcx = block write flags + add r9, rax + add r9, rdi # rdi aligned + + mov [r12 + r13 * 8], r9 # ~4 cycles - write mops entry (block write) + inc r13 # 1 cycle - advance mops index + + jmp fast_inputcpy + # fast_inputcpy "execute" the return + + +.L_inputcpy_mops_count_remain: + # BRANCH 1 + # dst is aligned, but count is NOT a multiple of 8, + # => one pre-read (post) before one MOPS write block + # NOTE: if count < 8 no problem, because you need to do read and write. + + # BRANCH 1 - MOPS (MOPS_ALIGNED_READ (POST) + MOPS_ALIGNED_BLOCK_WRITE) + + # BRANCH 1 - common MOPS part + + lea r9, [rdx + 7] + shr r9, 3 + + # BRANCH 1 - specific MOPS pre-read part + + lea rcx, [rdi + r9 * 8 - 8] + mov rax, MOPS_ALIGNED_READ + add rcx, rax + mov [r12 + r13 * 8], rcx + + # BRANCH 1 - specific MOPS block write + # set rcx = qwords to write + + shl r9, MOPS_BLOCK_WORDS_RS # 1 cycle - shift to block words field position + mov rax, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - rcx = block write flags + add rax, r9 + add rax, rdi # rdi is aligned in this path + + mov [r12 + r13 * 8 + 8], rax # ~4 cycles - write mops entry (block write) + add r13, 2 + + jmp fast_inputcpy + # fast_inputcpy "execute" the return + +.L_inputcpy_mops_rdi_unaligned: + # BRANCH 2 - worse + # dst is NOT aligned + # => BRANCH 2.1 one pre-read (pre) + no post + # => BRANCH 2.2 one pre-read (pre) + second post pre-read + + # [EC] only PRE but [rdi + rdx] & 0x07 !== 0 + mov rcx, rdi + and rcx, 0x07 + lea rcx, [rcx + rdx + 7] # optimization to be used in this branch + test rcx, 0xFFFFFFFFFFFFFFF0 # (rcx + rdx) > 8 => (rcx + rdx + 7) > 15 => + # (rcx + rdx + 7) & 0xF..F0 != 0 + jnz .L_pre_branch_2_2 + jmp .L_branch_2_1 + +.L_pre_branch_2_2: + lea rax, [rcx - 7] + test rax, 0x7 + jnz .L_branch_2_2 + +.L_branch_2_1: + + # BRANCH 2.1 - MOPS (MOPS_ALIGNED_READ (PRE) + MOPS_ALIGNED_BLOCK_WRITE) + + # BRANCH 2.1 - specific MOPS block write + # NOTE: at least one qword because count > 0 + # rcx = (rdi & 0x7) + rdx + 7 ==> (rcx >> 3) qwords + mov rax, rdi + and rax, ALIGN_MASK + shr rcx, 3 + shl rcx, MOPS_BLOCK_WORDS_RS # 1 cycle - shift to block words field position + mov r9, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - rcx = block write flags + add rcx, r9 + add rcx, rax + + mov [r12 + r13 * 8 + 8], rcx # ~4 cycles - write mops entry (block write) + + # BRANCH 2.1 - specific MOPS pre-read part PRE + # rax = rdi & ALIGN_MASK + + mov rcx, MOPS_ALIGNED_READ + add rcx, rax + mov [r12 + r13 * 8], rcx + add r13, 2 + + jmp fast_inputcpy + # fast_inputcpy "execute" the return + +.L_branch_2_2: + + # BRANCH 2.2 - MOPS (2xMOPS_ALIGNED_READ (PRE/POST) + MOPS_ALIGNED_BLOCK_WRITE) + # BRANCH 2.2 - specific MOPS pre-read part PRE + # rcx = (rdi & 0x7) + rdx + 7 ==> (rcx >> 3) qwords + + shr rcx, 3 + shl rcx, MOPS_BLOCK_WORDS_RS # 1 cycle - shift to block words field position + mov rax, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - rcx = block write flags + add rax, rcx # rax = MOPS_ALIGNED_BLOCK_WRITE | (count_q << MOPS_BLOCK_WORDS_RS) + mov rcx, rdi + and rcx, ALIGN_MASK + add rax, rcx # rax |= rdi & ALIGN MASK + + mov [r12 + r13 * 8 + 16], rax # ~4 cycles - write mops entry (block write) + + # rcx = rdi & ALIGN_MASK + # BRANCH 2.2 - PRE write + + mov rax, MOPS_ALIGNED_READ + + add rcx, rax # rcx = rdi & ALIGN_MASK + mov [r12 + r13 * 8], rcx + + # BRANCH 2.2 - POST write + + lea r9, [rdi + rdx] + and r9, ALIGN_MASK + add r9, rax + mov [r12 + r13 * 8 + 8], r9 + + add r13, 3 + + # rsi = input + mops + # incr mops + + jmp fast_inputcpy + + # fast_inputcpy "execute" the return + +.L_inputcpy_mops_done: + ret + + + +# Performance estimate (Modern x86-64, Intel Skylake/AMD Zen+, L1 cache hits): +# +# MEMSET OPERATION WITH MOPS TRACING: +# - fast_dma_encode call: ~15-20 cycles (function call + table lookup) +# - Pre-read mops entry: ~8-10 cycles (if pre_count > 0: calc + and + store + inc) +# - Post-read mops entry: ~10-12 cycles (if post_count > 0: lea + and + add + store + inc) +# - Block write mops entry: ~12-15 cycles (extract + shift + combine + store + inc) +# - Byte value expansion: ~5-6 cycles (movzx + mov + imul) +# - Qword fill (rep stosq): ~0.5-1.0 cycles per qword (ERMSB optimization) +# - Remaining bytes (rep stosb): ~1.0-2.0 cycles per byte (0-7 bytes) +# - Function overhead: ~3-5 cycles (branches, return) +# +# TOTAL (typical case, 64 bytes, aligned, no pre/post): +# ~15 (encode) + ~15 (block mops) + ~6 (expand) + 8*0.75 (fill) + ~4 (overhead) +# = ~46 cycles (~1.39 GB/s @ 3 GHz) +# +# TOTAL (misaligned case, 64 bytes with pre/post): +# ~15 (encode) + ~10 (pre) + ~12 (post) + ~15 (block) + ~6 (expand) + 7*0.75 + 4*1.5 (fill) + ~4 +# = ~73 cycles (~0.88 GB/s @ 3 GHz) +# +# TOTAL (large fill, 4096 bytes, aligned): +# ~15 (encode) + ~15 (mops) + ~6 (expand) + 512*0.5 (fill) + ~4 (overhead) +# = ~296 cycles (~13.8 GB/s @ 3 GHz, approaching L1D bandwidth) +# +# NOTES: +# - Assumes L1D cache hits for all memory accesses (~4 cycle latency, ~64 GB/s bandwidth) +# - rep stosq/stosb uses Enhanced REP MOVSB/STOSB (ERMSB) on modern CPUs (post-2013) +# - ERMSB enables microcode to use wide stores (16-64 bytes per iteration internally) +# - For fills >256 bytes, performance approaches memory bandwidth limits +# - Actual cycles vary ±20-30% by microarchitecture (Skylake/Zen/Alder Lake) +# - Mops overhead: ~30-50 cycles base + minimal per-byte impact +# - No overlap handling needed for inputcpy (writes only, no read-modify-write hazards) + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_inputcpy_mtrace.asm b/emulator-asm/src/dma/direct_inputcpy_mtrace.asm new file mode 100644 index 000000000..9d0691e59 --- /dev/null +++ b/emulator-asm/src/dma/direct_inputcpy_mtrace.asm @@ -0,0 +1,194 @@ +.intel_syntax noprefix +.code64 + +# +# inputcpy_mtrace - Optimized version with memory tracing and actual copy +# +# This function performs three main tasks: +# 1. Encodes inputcpy metadata (offsets, counts, flags) using fast_dma_encode +# 2. Records memory trace (pre-values and src data for verification/rollback) +# 3. Performs the actual memory copy from src to dst (with overlap handling) +# +# REGISTER USAGE: +# Uses general-purpose registers: rax, rbx, rcx, rdx, rdi, rsi, r9, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Preserves callee-saved registers (rbx, r12, r13 saved/restored in wrapper) +# +# PARAMETERS (NON System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = reserved - Input source +# rdx = count (usize) - Number of bytes to copy +# [r12 + r13*8] = trace_ptr (u64*) - Pointer to memory trace buffer (input/output) +# +# RETURN VALUE: +# RAX = Number of 64-bit words written to trace buffer +# +# MEMORY TRACE FORMAT (written to trace buffer sequentially): +# [0] = Encoded metadata (64-bit value with offsets, counts, flags) +# [1] = Pre-write value at aligned(dst) IF pre_count > 0 +# [1 or 2] = Post-write value at aligned(dst+count) IF post_count > 0 +# [...] = All aligned qwords from aligned(src) to aligned(src+count) +# +# The trace buffer captures: +# - Original destination values (for undo/verification) +# - Source data (for verification) +# - Metadata needed to reconstruct the operation +# +# MEMORY COPY BEHAVIOR: +# - Handles overlapping src/dst correctly (like memmove) +# - For non-overlapping: optimized copy using pre_count/loop_count/post_count +# - For overlapping: backward byte-by-byte copy to avoid corruption +# +# PERFORMANCE: +# - Encoding: ~15-20 cycles (function call to fast_dma_encode, table lookup) +# - Trace writes: ~4 cycles per qword write +# - Src data copy to trace: ~1.5-2 cycles per qword (rep movsq) +# - Final inputcpy (non-overlap): ~1.5-2 cycles per qword (rep movsq aligned) +# - Final inputcpy (overlap): ~100-150 cycles overhead + ~4-5 cycles per byte (std/rep movsb/cld) +# +# SIDE EFFECTS: +# - Modifies memory at dst (count bytes) +# - Modifies trace buffer (variable size depending on pre/post counts) +# - Preserves direction flag (cld called after any std) + +.global direct_dma_inputcpy_mtrace +.global direct_dma_inputcpy_mtrace_with_count_check + +.extern trace_address_threshold +.extern fcall_ctx +.extern fast_memcpy +.extern fast_memcpy64 + +.include "dma_constants.inc" +.include "fast_dma_encode_macro.inc" + +.section .text + +.set R_MT_INDEX, r13 +.set R_MT_ADDR, r12 +.set R_STEP, r14 +.set R_AUX, r9 +.set R_AUX2, rcx # NOTE: used by rep +.set R_SRC, rsi # NOTE: used by rep +.set R_DST, rdi # NOTE: used by rep +.set R_COUNT, rdx +.set R_ENCODE, rax + + +# DIRECT CALL +# RDI = DST +# RSI = RESERVED(INPUT) +# RDX = COUNT +# RCX = TRACE + +direct_dma_inputcpy_mtrace_with_count_check: + + # Call fast_dma_encode to calculate encoding + # Parameters already in correct registers: R_DST=dst, R_SRC=src, R_COUNT=count + # Result will be returned in R_ENCODE (encoded value) + + cmp R_COUNT, MAX_DMA_BYTES_DIRECT_MTRACE # 1 cycle - check if count exceeds direct threshold + ja .L_inputcpy_check_dynamic_trace # 2 cycles (not taken usually) - large count, check trace space + jmp direct_dma_inputcpy_mtrace + +.L_inputcpy_check_dynamic_trace: + call check_dynamic_mtrace + +direct_dma_inputcpy_mtrace: + + # Call fast_dma_encode to calculate encoding + # Parameters already in correct registers: R_DST=dst, R_SRC=src, R_COUNT=count + # Result will be returned in R_ENCODE (encoded value) + + FAST_DMA_ENCODE_NO_SRC + + mov [R_MT_ADDR + R_MT_INDEX * 8], R_ENCODE # ~4 cycles - write encoded result to mem trace + inc R_MT_INDEX # 1 cycle - advance R_MT_INDEX (mem trace index) + +.L_pre_dst_to_mtrace: + # If pre_count > 0, write aligned dst value to trace + test R_ENCODE, DMA_PRE_COUNT_MASK # 1 cycle - check if pre_count > 0 + jz .L_post_dst_to_mtrace # 2 cycles (predicted taken) + + # Branch with pre_count > 0: save original dst value before it's overwritten + mov R_AUX, R_DST # 1 cycle - get original dst + and R_AUX, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov R_AUX, [R_AUX] # ~4 cycles - read qword from aligned dst + mov [R_MT_ADDR + R_MT_INDEX * 8], R_AUX # ~4 cycles - write dst pre-value to trace + inc R_MT_INDEX # 1 cycle - advance trace index + +.L_post_dst_to_mtrace: + + # If post_count > 0, write aligned (dst+count) value to trace + test R_ENCODE, DMA_POST_COUNT_MASK # 1 cycle - check if post_count > 0 + jz .L_input_to_mtrace # 2 cycles (predicted taken) - skip to input copy + + lea R_AUX, [R_DST + R_COUNT - 1] # 1 cycle - R_AUX = dst + count - 1 (last dst byte) + and R_AUX, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov R_AUX, [R_AUX] # ~4 cycles - read qword at (dst+count) aligned + mov [R_MT_ADDR + R_MT_INDEX * 8], R_AUX # ~4 cycles - write dst post-value to trace + inc R_MT_INDEX # 1 cycle - advance trace index + +.L_input_to_mtrace: + # Copy input data to trace buffer, always aligned. + # Total qwords = (count + 7) + + lea R_AUX2, [R_COUNT + 7] # 1 cycle - R_AUX2 = count + 7 + shr R_AUX2, 3 # 1 cycle - R_AUX2 = round_up(count/8) + + mov R_AUX, qword ptr [fcall_ctx + FCALL_RESULT_GOT * 8] + lea R_SRC, [fcall_ctx + R_AUX * 8 + FCALL_RESULT * 8 - 8] + + push R_DST # ~3 cycles - save dst pointer + lea R_DST, [R_MT_ADDR + R_MT_INDEX * 8] # 1 cycle - R_DST = trace buffer destination + add R_MT_INDEX, R_AUX2 # 1 cycle - advance trace index by qwords copied + + push R_COUNT + mov R_COUNT, R_AUX2 + call fast_memcpy64 + # rep movsq # ~1.5-2 cycles per qword (hardware optimized) + + pop R_COUNT + pop R_DST # ~3 cycles - restore dst pointer + + mov R_AUX2, R_COUNT + jmp fast_inputcpy +.L_done: + ret # ~5 cycles + +# Performance estimate (Modern x86-64, L1 cache hits): +# +# NON-OVERLAPPING FORWARD COPY PATH: +# - fast_dma_encode call: ~15-20 cycles (function call + table lookup) +# - Write encoding to trace: ~4 cycles +# - Pre-value trace (conditional): ~12 cycles (if pre_count > 0) +# - Post-value trace (conditional): ~12 cycles (if post_count > 0) +# - Source data to trace: ~1.5-2 cycles per qword (rep movsq) +# - Pre-bytes copy: ~3-5 cycles per byte (if pre_count > 0, max 7 bytes) +# - Aligned qwords copy: ~1.5-2 cycles per qword (rep movsq, main data) +# - Post-bytes copy: ~3-5 cycles per byte (if post_count > 0, max 7 bytes) +# - Function overhead: ~10 cycles (push/pop, branches, return) +# +# TOTAL (best case, aligned, no pre/post): +# ~30 cycles base + ~2 cycles per qword (trace + copy) +# +# TOTAL (typical case, some alignment): +# ~50 cycles base + ~2 cycles per qword + ~4 cycles per pre/post byte +# +# OVERLAPPING BACKWARD COPY PATH: +# - Same trace overhead: ~30-50 cycles +# - std instruction: ~20-50 cycles (serializing, causes pipeline flush) +# - Backward byte copy: ~3-5 cycles per byte (rep movsb backward) +# - cld instruction: ~20-50 cycles (serializing, causes pipeline flush) +# +# TOTAL (overlap, worst case): +# ~100-150 cycles base + ~4-5 cycles per byte +# +# NOTES: +# - Assumes L1 cache hits for all memory accesses +# - rep movsq/movsb performance varies by microarchitecture +# - Actual cycles may vary ±20% depending on CPU model and memory alignment +# - Fast path (aligned, no overlap) is ~2-3x faster than overlap path + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_memcmp_mops.asm b/emulator-asm/src/dma/direct_memcmp_mops.asm new file mode 100644 index 000000000..2b1bc5e19 --- /dev/null +++ b/emulator-asm/src/dma/direct_memcmp_mops.asm @@ -0,0 +1,280 @@ +.intel_syntax noprefix +.code64 + +################################################################################ +# memcmp_mops - Optimized version with memory ops tracing and actual copy. This +# is an variant of memcmp operation, xmemcmp operation, that it +# doesn't read the count from zisk memory. +# +# This function performs two main tasks: +# 1. Records all addresses of memory operations (read and write addresses) +# 2. Performs the actual memory copy from src to dst (with overlap handling) +# +# REGISTER USAGE: +# Uses general-purpose registers: rax, rbx, rcx, rdx, rdi, rsi, r9, r11, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Preserves callee-saved registers (rbx, r12, r13 saved/restored in wrapper) +# +# PARAMETERS (NON System V AMD64 ABI): +# rdi -> rbx = dst (u64) - Destination address +# rsi -> rax = src (u64) - Source address +# rdx -> count (usize) - Number of bytes to copy +# [r12 + r13*8] = trace_ptr (u64*) - Pointer to memory trace buffer (input/output) +# +# MEMORY COPY BEHAVIOR: +# - Handles overlapping src/dst correctly (like memmove) +# - For non-overlapping: optimized copy using pre_count/loop_count/post_count +# - For overlapping: backward byte-by-byte copy to avoid corruption +################################################################################ + +.global direct_dma_memcmp_mops +.global direct_dma_xmemcmp_mops +.extern fast_dma_encode +.extern fast_memcpy +.extern fast_memcpy64 + +.include "dma_constants.inc" +.include "fast_dma_encode_macro.inc" + +.section .text + +# call directly from assembly without standard ABI call +# more eficient + +direct_dma_memcmp_mops: + + # updated registers: + # r9 = no save (value_reg) + # rcx = no save (available from asm) + # rdi = no save (available from asm) + # rsi = no save (available from asm) + # r13 = with new mops index (output) + # rax = encoded + + mov r9, (MOPS_ALIGNED_READ + EXTRA_PARAMETER_ADDR) + mov [r12 + r13 * 8], r9 + inc r13 + +direct_dma_xmemcmp_mops: + + # Call fast_dma_encode to calculate encoding + # Parameters already in correct registers: rdi=dst, rsi=src, rdx=count + # Result will be returned in rax (encoded value) + + test rdx, rdx + jz .L_dma_memcmp_mops_count_zero + + call fast_memcmp + + # in case of original count > 0, the effective count must be > 0 because at least + # need check one byte to see if are equals or not + + # at end the memcpy must return rax with correct value, but these information + # could be extract from encoded, this is the stragegy to avoid manage two values + # encoded and result + + mov r9, rax + and r9, 0x1FF + jz .L_fast_dma_memcmp_encode_eq + + # Non-equal case: use table with NEQ offset + FAST_DMA_ENCODE_MEMCMP FAST_ENCODE_TABLE_WO_NEQ_SIZE + + shl r9, DMA_CMP_RES_RS # 1 cycle - shift cmp_result to position (bits 21-28) + or rax, r9 # 1 cycle - combine with encoding + jmp .L_pre_dst_to_mops # 1 cycle + +.L_fast_dma_memcmp_encode_eq: + # Equal case: use base table (offset 0) + FAST_DMA_ENCODE_MEMCMP 0 + +.L_pre_dst_to_mops: + # If pre_count > 0, write aligned dst value to trace + test rax, DMA_PRE_COUNT_MASK # 1 cycle - check if pre_count > 0 + jz .L_post_dst_to_mops # 2 cycles (predicted taken) + +.L_pre_is_active: + # Branch with pre_count > 0: save original dst value before it's overwritten + mov r9, MOPS_ALIGNED_READ # r9 = flags aligned read + add r9, rdi # 1 cycle - get original dst + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov [r12 + r13 * 8], r9 # ~4 cycles - write dst pre-address to trace + + test rax, DMA_DOUBLE_SRC_PRE_MASK + jnz .L_pre_double_src_to_mops + +.L_pre_single_src_to_mops: + + mov r9, MOPS_ALIGNED_READ # r9 = flags aligned read + add r9, rsi # 1 cycle - get original src + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write src address + jmp .L_pre_src_inc_mops_index + +.L_pre_double_src_to_mops: + + mov r9, MOPS_ALIGNED_READ_2W # r9 = flags double read + add r9, rsi # 1 cycle - get original src + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write src address + +.L_pre_src_inc_mops_index: + add r13, 2 # add 2 (pre-write, block single/dual src) + +.L_post_dst_to_mops: + + # If post_count > 0, write aligned (dst+count) value to trace + test rax, DMA_POST_COUNT_MASK # 1 cycle - check if post_count > 0 + jz .L_src_to_mops # 2 cycles (predicted taken) - skip to src copy + +.L_post_is_active: + # preparing post pre-write read + mov rcx, MOPS_ALIGNED_READ # rcx = flags aligned read + lea r9, [rdi + rdx - 1] # 1 cycle - r9 = dst + count - 1 (last dst byte) + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + add r9, rcx # 1 cycle - r9 mops with dst aligned address + mov [r12 + r13 * 8], r9 # ~4 cycles - write dst post-value to trace + + # preparing post src read, calculating base src address + mov r9, rax + shr r9, DMA_PRE_AND_LOOP_BYTES_RS + add r9, rsi + and r9, ALIGN_MASK + + # check if single read or double read + test rax, DMA_DOUBLE_SRC_POST_MASK + jnz .L_post_double_src_to_mops + +.L_post_single_src_to_mops: + # not double read, load flags and store in mops trace + mov rcx, MOPS_ALIGNED_READ # r9 = flags aligned read + add r9, rcx # 1 cycle - get original src + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write src address + jmp .L_post_src_inc_mops_index + +.L_post_double_src_to_mops: + # its double read, load flags for double read, and store in mops trace + mov rcx, MOPS_ALIGNED_READ_2W # r9 = flags aligned read + add r9, rcx # 1 cycle - get original src + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write src address + +.L_post_src_inc_mops_index: + # adding two "slots", because we store pre-write read and source-read + add r13, 2 # add 2 (pre-write, block single/dual src) + +.L_src_to_mops: + # extract loop count from encoded + mov rcx, rax # 1 cycle - rcx = encoded + shr rcx, DMA_LOOP_COUNT_RS # 1 cycle - rcx = loop (32 bits) + + # check edge case loop_count = 0 + jz .L_prepare_result + shl rcx, MOPS_BLOCK_WORDS_RS # 1 cycle - rcx = loop | 0 (4 bits) | (32 bits) + + # in case of unaligned loop, add and extra read because each qword verification + # of dst need part of current read and part of next read. + + test rax, DMA_UNALIGNED_DST_SRC_MASK + jnz .L_src_extra_for_unaligned_loop + + # add flags of aligned block read + mov r9, MOPS_ALIGNED_BLOCK_READ # 1 cycle - r9 = read block + jmp .L_src_block_before_address + +.L_src_extra_for_unaligned_loop: + # add special flags with that add one to current count loop count in r9 + mov r9, MOPS_ALIGNED_BLOCK_READ + MOPS_BLOCK_ONE_WORD + +.L_src_block_before_address: + # at this point in r9 we have flags, and lenght but we need to add the + # base src address. For do it, first we need to know if we need to pass + # the first src because it was used only for pre part. + add r9, rcx + test rax, DMA_SRC64_INC_BY_PRE_MASK + jnz .L_src_incr_by_pre + add r9, rsi # 1 cycle - rcx = first block src address + jmp .L_src_to_mops_ready + +.L_src_incr_by_pre: + # in this patch the first src address is used exclusively by pre part, for this + # reason we add rsi + 8 to r9 + lea r9, [rsi + r9 + 8] + +.L_src_to_mops_ready: + # before store all, we need to align them + and r9, ALIGN_MASK + mov [r12 + r13 * 8], r9 # ~4 cycles - write first block src read address + + # rcx = loop_count + mov r9, MOPS_ALIGNED_BLOCK_READ + add rcx, r9 + test rax, DMA_PRE_COUNT_MASK + jz .L_dst_loop_has_not_offset + add rcx, 8 + +.L_dst_loop_has_not_offset: + add rcx, rdi + and rcx, ALIGN_MASK + mov [r12 + r13 * 8 + 8], rcx # ~4 cycles - write first block src read address + + add r13, 2 + +.L_prepare_result: + # how we are in comparation, we don't write pre/post parts because when pre-read + # this parts and with this is enough. In case of loop is different because we don't + # pre-read for this reason we need to read to verify that are equals. The loop part + # only could verify that all are equal. + + # extract result from encoded + shr rax, DMA_CMP_RES_RS + test rax, 0x100 + jnz .L_memcmp_mops_negative_res + and rax, 0xFF + jmp .L_memcmp_mops_res_ready + +.L_memcmp_mops_negative_res: + or rax, 0xFFFFFFFFFFFFFF00 + +.L_memcmp_mops_res_ready: + ret + +.L_dma_memcmp_mops_count_zero: + xor rax, rax + ret + +# Performance estimate (Modern x86-64, L1 cache hits): +# +# NON-OVERLAPPING FORWARD COPY PATH: +# - fast_dma_encode call: ~15-20 cycles (function call + table lookup) +# - Write mops entries: ~4-6 cycles per entry +# - Pre-read mops (conditional): ~12 cycles (if pre_count > 0) +# - Post-read mops (conditional): ~12 cycles (if post_count > 0) +# - Block src read mops: ~8-12 cycles (address calculation + write) +# - Pre-bytes copy: ~3-5 cycles per byte (if pre_count > 0, max 7 bytes) +# - Aligned qwords copy: ~1.5-2 cycles per qword (rep movsq, main data) +# - Post-bytes copy: ~3-5 cycles per byte (if post_count > 0, max 7 bytes) +# - Function overhead: ~10 cycles (push/pop, branches, return) +# +# TOTAL (best case, aligned, no pre/post): +# ~30 cycles base + ~2 cycles per qword (trace + copy) +# +# TOTAL (typical case, some alignment): +# ~50 cycles base + ~2 cycles per qword + ~4 cycles per pre/post byte +# +# OVERLAPPING BACKWARD COPY PATH: +# - Same mops overhead: ~30-50 cycles +# - std instruction: ~20-50 cycles (serializing, causes pipeline flush) +# - Backward byte copy: ~3-5 cycles per byte (rep movsb backward) +# - cld instruction: ~20-50 cycles (serializing, causes pipeline flush) +# +# TOTAL (overlap, worst case): +# ~100-150 cycles base + ~4-5 cycles per byte +# +# NOTES: +# - Assumes L1 cache hits for all memory accesses +# - rep movsq/movsb performance varies by microarchitecture +# - Actual cycles may vary ±20% depending on CPU model and memory alignment +# - Fast path (aligned, no overlap) is ~2-3x faster than overlap path + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_memcmp_mtrace.asm b/emulator-asm/src/dma/direct_memcmp_mtrace.asm new file mode 100644 index 000000000..894771e41 --- /dev/null +++ b/emulator-asm/src/dma/direct_memcmp_mtrace.asm @@ -0,0 +1,238 @@ +.intel_syntax noprefix +.code64 + +# +# memcmp_mtrace - Memory comparison with mtrace recording +# +# This function performs two main tasks: +# 1. Encodes memcmp metadata (offsets, counts, comparison result) using FAST_DMA_ENCODE +# 2. Records memory trace entries for later verification/replay +# +# NOTE: The actual comparison is performed by fast_memcmp (called via tail jump). +# This function only handles the mtrace recording part. +# +# REGISTER USAGE: +# Uses general-purpose registers: rax, rbx, rcx, rdx, rdi, rsi, r9, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Preserves callee-saved registers (rbx, r12, r13 saved/restored in wrapper) +# +# PARAMETERS (NON System V AMD64 ABI): +# rdi = ptr1 (u64) - First memory region pointer +# rsi = ptr2 (u64) - Second memory region pointer +# rdx = count (usize) - Number of bytes to compare +# [r12 + r13*8] = mtrace_ptr (u64*) - Pointer to mtrace buffer (input/output) +# +# RETURN VALUE: +# rax = comparison result (0 if equal, byte difference if not) +# +# MTRACE FORMAT (written to mtrace buffer sequentially): +# [0] = Encoded metadata (offsets, counts, cmp_result in bits 21-28) +# [1] = Pre-read value at aligned(ptr1) IF pre_count > 0 +# [1 or 2] = Post-read value at aligned(ptr1+count) IF post_count > 0 +# [...] = All aligned qwords from aligned(ptr2) to aligned(ptr2+count) +# +# The mtrace buffer captures: +# - Source data from ptr2 (for verification) +# - Comparison result encoded in metadata +# - Pre/post values for boundary alignment handling +# +# COMPARISON BEHAVIOR: +# - Does NOT modify memory (read-only operation) +# - Records all data needed to verify/replay the comparison +# - Tail-calls fast_memcmp to perform actual byte comparison +# +# PERFORMANCE: +# - FAST_DMA_ENCODE macro: ~15-20 cycles (logic + table lookup) +# - Trace writes: ~4 cycles per qword write +# - Src data copy to trace: ~1-2 cycles per qword (rep movsq) +# - Function overhead: ~10-15 cycles (branches, setup) +# +# SIDE EFFECTS: +# - Does NOT modify ptr1 or ptr2 memory (read-only comparison) +# - Modifies mtrace buffer (variable size depending on pre/post counts) +# - Advances r13 (mtrace index) + +.global direct_dma_memcmp_mtrace +.global dma_memcmp_mtrace +.global _dma_memcmp_mtrace_test +// .global direct_dma_memcmp_mtrace_with_count_check +.extern fast_memcmp + +# .extern trace_resize_request + +.include "dma_constants.inc" +.include "fast_dma_encode_macro.inc" +.section .text + +direct_dma_memcmp_mtrace: + + # First step calculate the effective count (length), because must be the length + # really used, the other length it's only used when it's send to bus + + # First step was store the original size to mtrace + 1, because the position 0 + # will used by encoded. The function fast_memcmp modifies rdx with the effective + # count + + test rdx, rdx + jz .L_dma_memcmp_mtrace_count_zero + + # store on mtrace the bus count, original, no effective + mov [r12 + r13 * 8 + 8], rdx + + call fast_memcmp + # in case of original count > 0, the effective count must be > 0 because at least + # need check one byte to see if are equals or not + + # at end the memcpy must return rax with correct value, but these information + # could be extract from encoded, this is the stragegy to avoid manage two values + # encoded and result + + mov r9, rax + and r9, 0x1FF + jz .L_fast_dma_memcmp_encode_eq + + # Non-equal case: use table with NEQ offset + FAST_DMA_ENCODE_MEMCMP FAST_ENCODE_TABLE_WO_NEQ_SIZE + + shl r9, DMA_CMP_RES_RS # 1 cycle - shift cmp_result to position (bits 21-28) + or rax, r9 # 1 cycle - combine with encoding + jmp .L_dma_memcmp_encode_done # 1 cycle + +.L_fast_dma_memcmp_encode_eq: + # Equal case: use base table (offset 0) + FAST_DMA_ENCODE_MEMCMP 0 + +.L_dma_memcmp_encode_done: + # store before a potential realloc + + mov [r12 + r13 * 8], rax + add r13, 2 + + # Check if count exceeds direct mtrace threshold + # Parameters: rdi=ptr1, rsi=ptr2, rdx=count + + cmp rdx, MAX_DMA_BYTES_DIRECT_MTRACE # 1 cycle - check threshold + ja .L_memcmp_check_dynamic_trace # 1 cycle (not taken usually) + jmp .L_memcmp_mtrace_encoded_stored # 1 cycle - fall through to main function + +.L_memcmp_check_dynamic_trace: + call check_dynamic_mtrace # expand mtrace buffer if needed + +.L_memcmp_mtrace_encoded_stored: + +.L_pre_ptr1_to_mtrace: + # If pre_count > 0, record aligned ptr1 value + test rax, DMA_PRE_COUNT_MASK # 1 cycle - check if pre_count > 0 + jz .L_post_ptr1_to_mtrace # 1 cycle (predicted taken) + + # Pre-read: save qword at aligned(ptr1) for boundary handling + mov r9, rdi # 1 cycle - r9 = ptr1 + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov r9, [r9] # ~4 cycles - read qword from aligned ptr1 + mov [r12 + r13 * 8], r9 # ~4 cycles - write to mtrace + inc r13 # 1 cycle - advance mtrace index + +.L_post_ptr1_to_mtrace: + + # If post_count > 0, record aligned (ptr1+count) value + test rax, DMA_POST_COUNT_MASK # 1 cycle - check if post_count > 0 + jz .L_ptr2_to_mtrace # 1 cycle (predicted taken) + + lea r9, [rdi + rdx - 1] # 1 cycle - r9 = ptr1 + count - 1 (last byte) + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov r9, [r9] # ~4 cycles - read qword at aligned(ptr1+count) + mov [r12 + r13 * 8], r9 # ~4 cycles - write to mtrace + inc r13 # 1 cycle - advance mtrace index + +.L_ptr2_to_mtrace: + # Copy ptr2 (source) data to mtrace buffer for verification + # Total qwords = loop_count + extra_src_reads + + mov rcx, rax # 1 cycle - rcx = encoding + shr rcx, DMA_LOOP_COUNT_RS # 1 cycle - rcx = loop_count (bits 35+) + + mov r9, rax # 1 cycle - r9 = encoding + shr r9, DMA_EXTRA_SRC_READS_RS # 1 cycle - shift to extra_src_reads + and r9, 0x03 # 1 cycle - r9 = extra_src_reads (0-3) + add rcx, r9 # 1 cycle - rcx = total qwords to copy + + # Setup for rep movsq: copy aligned ptr2 data to mtrace + mov r9, rsi # 1 cycle - preserve original ptr2 + and rsi, ALIGN_MASK # 1 cycle - rsi = ptr2 aligned to 8 bytes + + push rdi # 1 cycle - save ptr1 + lea rdi, [r12 + r13 * 8] # 1 cycle - rdi = mtrace destination + add r13, rcx # 1 cycle - advance mtrace index + + rep movsq # ~1-2 cycles per qword (ERMSB optimized) + + pop rdi # 1 cycle - restore ptr1 + mov rsi, r9 # 1 cycle - restore original ptr2 + +.L_mtrace_done: + + shr rax, DMA_CMP_RES_RS + test rax, 0x100 + jnz .L_memcmp_mtrace_negative_res + and rax, 0xFF + jmp .L_memcmp_mtrace_res_ready + +.L_memcmp_mtrace_negative_res: + or rax, 0xFFFFFFFFFFFFFF00 + +.L_memcmp_mtrace_res_ready: + ret # ~5 cycles + +.L_dma_memcmp_mtrace_count_zero: + # this path used if bus count is 0 + # bus_count = 0 ==> effective_count = 0 + # bus_count > 0 ==> effective_count > 0 (at least need to check first byte) + + FAST_DMA_ENCODE_COUNT_ZERO + + mov [r12 + r13 * 8], rax # ~4 cycles - write encoding to mtrace + + # rdx contains 0, it's more fast use rdx rather immediate 0. + + mov [r12 + r13 * 8 + 8], rdx # ~4 cycles - write encoding to mtrace + + add r13, 2 # 1 cycle - advance mtrace index + xor rax, rax + ret + +# Performance estimate (Modern x86-64, L1 cache hits): +# +# NON-OVERLAPPING FORWARD COPY PATH: +# - fast_dma_encode call: ~15-20 cycles (function call + table lookup) +# - Write encoding to trace: ~4 cycles +# - Pre-value trace (conditional): ~12 cycles (if pre_count > 0) +# - Post-value trace (conditional): ~12 cycles (if post_count > 0) +# - Source data to trace: ~1.5-2 cycles per qword (rep movsq) +# - Pre-bytes copy: ~3-5 cycles per byte (if pre_count > 0, max 7 bytes) +# - Aligned qwords copy: ~1.5-2 cycles per qword (rep movsq, main data) +# - Post-bytes copy: ~3-5 cycles per byte (if post_count > 0, max 7 bytes) +# - Function overhead: ~10 cycles (push/pop, branches, return) +# +# TOTAL (best case, aligned, no pre/post): +# ~30 cycles base + ~2 cycles per qword (trace + copy) +# +# TOTAL (typical case, some alignment): +# ~50 cycles base + ~2 cycles per qword + ~4 cycles per pre/post byte +# +# OVERLAPPING BACKWARD COPY PATH: +# - Same trace overhead: ~30-50 cycles +# - std instruction: ~20-50 cycles (serializing, causes pipeline flush) +# - Backward byte copy: ~3-5 cycles per byte (rep movsb backward) +# - cld instruction: ~20-50 cycles (serializing, causes pipeline flush) +# +# TOTAL (overlap, worst case): +# ~100-150 cycles base + ~4-5 cycles per byte +# +# NOTES: +# - Assumes L1 cache hits for all memory accesses +# - rep movsq/movsb performance varies by microarchitecture +# - Actual cycles may vary ±20% depending on CPU model and memory alignment +# - Fast path (aligned, no overlap) is ~2-3x faster than overlap path + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_memcpy_mops.asm b/emulator-asm/src/dma/direct_memcpy_mops.asm new file mode 100644 index 000000000..ec10ee936 --- /dev/null +++ b/emulator-asm/src/dma/direct_memcpy_mops.asm @@ -0,0 +1,370 @@ +.intel_syntax noprefix +.code64 + +################################################################################ +# direct_memcpy_mops / direct_xmemcpy_mops - Memory copy with mops tracing +# +# These functions perform memory copy operations while recording all memory +# operation addresses (mops) for verification. Two variants exist: +# +# - memcpy_mops: Records an EXTRA_PARAMETER_ADDR read (count comes from memory) +# - xmemcpy_mops: Extended variant where count is passed directly (no extra read) +# +# MAIN TASKS: +# 1. Encode memcpy metadata (offsets, counts, alignment flags) +# 2. Record all memory operation addresses (reads and writes) to mops buffer +# 3. Perform the actual memory copy from src to dst (with overlap handling) +# +# REGISTER USAGE: +# Uses: rax, rcx, rdx, rdi, rsi, r9, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Modifies: r13 (mops index output) +# +# PARAMETERS (non-standard ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# r12 = mops buffer base address - Base pointer to memory ops buffer +# r13 = mops buffer index - Current index (updated on return) +# +# RETURN: +# r13 = Updated mops index (number of entries written) +# +# MEMORY COPY BEHAVIOR: +# - Handles overlapping src/dst correctly (like memmove) +# - Non-overlapping: optimized 3-phase copy (pre/loop/post alignment) +# - Overlapping: backward byte-by-byte copy to avoid corruption +################################################################################ + +.global direct_dma_memcpy_mops +.global direct_dma_xmemcpy_mops +.extern check_dynamic_mtrace + +.include "dma_constants.inc" +.include "fast_dma_encode_macro.inc" + +.section .text + +################################################################################ +# direct_dma_xmemcpy_mops - Fast memory copy with mops (extended variant) +# +# Direct entry point for generated code (non-standard ABI). The extended +# variant receives count in rdx, so no extra memory read is recorded. +# +# PARAMETERS (non-standard ABI): +# rdi = destination address +# rsi = source address +# rdx = byte count +# r12 = mops buffer base address +# r13 = mops buffer index (input/output) +# +# RETURN: +# r13 = updated mops index +################################################################################ + +direct_dma_xmemcpy_mops: + + # Modified registers (no save needed - caller expects these to change): + # r9 = scratch register + # rcx = scratch register + # rdi = advanced during copy + # rsi = advanced during copy + # r13 = updated mops index (output) + # rax = encoded metadata + + # Encode memcpy parameters: rdi=dst, rsi=src, rdx=count + FAST_DMA_ENCODE # ~15-20 cycles - table lookup encoding + + # Skip the EXTENDED_PARAM read entry (not needed for xmemcpy) + jmp direct_dma_xmemcpy_common_entry_point + +################################################################################ +# direct_dma_memcpy_mops - Fast memory copy with mops (standard variant) +# +# Direct entry point for generated code (non-standard ABI). Records an extra +# memory read from EXTENDED_PARAM address because the memcpy opcode reads +# count from that location. +# +# PARAMETERS (non-standard ABI): +# rdi = destination address +# rsi = source address +# rdx = byte count +# r12 = mops buffer base address +# r13 = mops buffer index (input/output) +# +# RETURN: +# r13 = updated mops index +################################################################################ + +direct_dma_memcpy_mops: + + # Modified registers (no save needed - caller expects these to change): + # r9 = scratch register + # rcx = scratch register + # rdi = advanced during copy + # rsi = advanced during copy + # r13 = updated mops index (output) + # rax = encoded metadata + + # Encode memcpy parameters: rdi=dst, rsi=src, rdx=count + FAST_DMA_ENCODE # ~15-20 cycles - table lookup encoding + + # Record EXTENDED_PARAM read (memcpy opcode reads count from this address) + mov r9, (MOPS_ALIGNED_READ + EXTRA_PARAMETER_ADDR) # 1 cycle + mov [r12 + r13 * 8], r9 # ~4 cycles + inc r13 # 1 cycle + +direct_dma_xmemcpy_common_entry_point: + + # Early exit if count is zero + test rdx, rdx # 1 cycle - check count + jz .L_done # 2 cycles (predicted) - nothing to copy + + # ========== PHASE 1: Record PRE-alignment memory operations ========== + +.L_pre_dst_to_mops: + # Check if pre_count > 0 (unaligned prefix bytes to copy) + test rax, DMA_PRE_COUNT_MASK # 1 cycle - check pre_count bits + jz .L_post_dst_to_mops # 2 cycles (predicted) - skip if aligned + +.L_pre_is_active: + # Pre-alignment read: record dst read (original value before overwrite) + mov r9, MOPS_ALIGNED_READ # 1 cycle - read operation flag + add r9, rdi # 1 cycle - add dst address + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov [r12 + r13 * 8], r9 # ~4 cycles - write mops entry + + # Check if source spans two qwords (unaligned causing double read) + test rax, DMA_DOUBLE_SRC_PRE_MASK # 1 cycle + jnz .L_pre_double_src_to_mops # 2 cycles (predicted) + +.L_pre_single_src_to_mops: + # Source fits in single qword + mov r9, MOPS_ALIGNED_READ # 1 cycle - single read flag + add r9, rsi # 1 cycle - add src address + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write mops entry + jmp .L_pre_src_inc_mops_index # 2 cycles + +.L_pre_double_src_to_mops: + # Source spans two qwords (needs double read) + mov r9, MOPS_ALIGNED_READ_2W # 1 cycle - double read flag + add r9, rsi # 1 cycle - add src address + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write mops entry + +.L_pre_src_inc_mops_index: + add r13, 2 # 1 cycle - advance index (dst + src entries) + + # ========== PHASE 2: Record POST-alignment memory operations ========== + +.L_post_dst_to_mops: + # Check if post_count > 0 (unaligned suffix bytes to copy) + test rax, DMA_POST_COUNT_MASK # 1 cycle - check post_count bits + jz .L_src_to_mops # 2 cycles (predicted) - skip if no suffix + +.L_post_is_active: + # Post-alignment read: record dst read at end of copy region + mov rcx, MOPS_ALIGNED_READ # 1 cycle - read operation flag + lea r9, [rdi + rdx - 1] # 1 cycle - r9 = last dst byte address + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + add r9, rcx # 1 cycle - add mops flags + mov [r12 + r13 * 8], r9 # ~4 cycles - write mops entry + + # Calculate source address for post-alignment bytes + mov r9, rax # 1 cycle + shr r9, DMA_PRE_AND_LOOP_BYTES_RS # 1 cycle - extract pre+loop byte offset + add r9, rsi # 1 cycle - add to source + and r9, ALIGN_MASK # 1 cycle - align to 8-byte boundary + + # Check if source spans two qwords + test rax, DMA_DOUBLE_SRC_POST_MASK # 1 cycle + jnz .L_post_double_src_to_mops # 2 cycles (predicted) + +.L_post_single_src_to_mops: + # Source fits in single qword + mov rcx, MOPS_ALIGNED_READ # 1 cycle - single read flag + add r9, rcx # 1 cycle - add mops flags + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write mops entry + jmp .L_post_src_inc_mops_index # 2 cycles + +.L_post_double_src_to_mops: + # Source spans two qwords (needs double read) + mov rcx, MOPS_ALIGNED_READ_2W # 1 cycle - double read flag + add r9, rcx # 1 cycle - add mops flags + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write mops entry + +.L_post_src_inc_mops_index: + add r13, 2 # 1 cycle - advance index (dst + src entries) + + # ========== PHASE 3: Record LOOP (aligned bulk) memory operations ========== + +.L_src_to_mops: + # Extract loop_count (number of aligned qwords to copy) + mov rcx, rax # 1 cycle - rcx = encoded + shr rcx, DMA_LOOP_COUNT_RS # 1 cycle - rcx = loop_count + jz .L_save_dst_with_loop_count_zero # 2 cycles - no aligned bulk + shl rcx, MOPS_BLOCK_WORDS_RS # 1 cycle - format for mops block entry + + # Check if source is unaligned (needs extra word per iteration) + test rax, DMA_UNALIGNED_DST_SRC_MASK # 1 cycle + jnz .L_src_extra_for_unaligned_loop # 2 cycles (predicted) + + mov r9, MOPS_ALIGNED_BLOCK_READ # 1 cycle - aligned block read flag + jmp .L_src_block_before_address # 2 cycles + +.L_src_extra_for_unaligned_loop: + # Unaligned source requires one extra word per block + mov r9, MOPS_ALIGNED_BLOCK_READ + MOPS_BLOCK_ONE_WORD # 1 cycle + +.L_src_block_before_address: + add r9, rcx # 1 cycle - add block word count + test rax, DMA_SRC64_INC_BY_PRE_MASK # 1 cycle - check pre-alignment offset + jnz .L_src_incr_by_pre # 2 cycles (predicted) + add r9, rsi # 1 cycle - use base src address + jmp .L_src_to_mops_ready # 2 cycles + +.L_src_incr_by_pre: + # Source starts one qword after base (due to pre-alignment) + lea r9, [rsi + r9 + 8] # 1 cycle + +.L_src_to_mops_ready: + and r9, ALIGN_MASK # 1 cycle - align address + mov [r12 + r13 * 8], r9 # ~4 cycles - write mops entry + inc r13 # 1 cycle + +.L_save_dst_addr_reusing_rcx: + # Record destination write block + # Strategy: treat all writes as one block (cannot write same address twice per step) + + mov r9, rax # 1 cycle - r9 = encoded + and r9, DMA_PRE_WRITES_MASK # 1 cycle - extract pre_writes count + shl r9, PRE_WRITES_TO_MOPS_BLOCK # 1 cycle - format for mops block + add r9, rcx # 1 cycle - add loop block count + add r9, rdi # 1 cycle - add dst base address + + mov rcx, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - write block flag + add r9, rcx # 1 cycle - add mops flags + and r9, ALIGN_MASK # 1 cycle - align address + + mov [r12 + r13 * 8], r9 # ~4 cycles - write mops entry + inc r13 # 1 cycle + jmp .L_mops_done # 2 cycles + +.L_save_dst_with_loop_count_zero: + # No loop iterations - pre/post writes may be consecutive (single block) + + mov r9, rax # 1 cycle - r9 = encoded + and r9, DMA_PRE_WRITES_MASK # 1 cycle - extract pre_writes count + shl r9, PRE_WRITES_TO_MOPS_BLOCK # 1 cycle - format for mops block + add r9, rdi # 1 cycle - add dst base address + + mov rcx, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - write block flag + add r9, rcx # 1 cycle - add mops flags + and r9, ALIGN_MASK # 1 cycle - align address + + mov [r12 + r13 * 8], r9 # ~4 cycles - write mops entry + inc r13 # 1 cycle + + # ========== PHASE 4: Perform actual memory copy ========== + +.L_mops_done: + + # Check for memory overlap to decide copy direction + # Overlap exists if: src < dst < src+count (forward copy would corrupt) + cmp rdi, rsi # 1 cycle - compare dst with src + jb .L_copy_forward # 2 cycles (predicted) - dst < src, safe + lea r9, [rsi + rdx] # 1 cycle - r9 = src + count + cmp rdi, r9 # 1 cycle - compare dst with (src+count) + jae .L_copy_forward # 2 cycles (predicted) - dst >= src+count, safe + + # Overlap detected (src < dst < src+count), must copy backward + # Setup pointers to end of regions for backward copy + + mov rax, rdi + lea rsi, [rsi + rdx - 1] # 1 cycle - rsi = last src byte + lea rdi, [rdi + rdx - 1] # 1 cycle - rdi = last dst byte + mov rcx, rdx # 1 cycle - rcx = byte count + + std # ~20-50 cycles - set direction flag (backward) + rep movsb # ~3-5 cycles/byte (backward, slower) + cld # ~20-50 cycles - clear direction flag + + ret # ~3 cycles + +.L_copy_forward: + # No overlap - perform optimized forward copy + // cmp rdx, 16 # 1 cycle - check if count >= 16 + // jae .L_copy_forward_pre # 2 cycles (predicted) - use 3-phase copy + + mov rax, rdi + # Small copy (count < 16): direct byte copy + mov rcx, rdx # 1 cycle - rcx = count + rep movsb # ~3-5 cycles/byte + + ret # ~3 cycles +/* +.L_copy_forward_pre: + # 3-phase copy: pre-alignment bytes, aligned qwords, post-alignment bytes + + # Phase A: Pre-alignment bytes (0-7 bytes to reach 8-byte alignment) + test rax, DMA_PRE_COUNT_MASK # 1 cycle - check if pre_count > 0 + jz .L_copy_forward_loop # 2 cycles (predicted) + + mov rcx, rax # 1 cycle + and rcx, DMA_PRE_COUNT_MASK # 1 cycle - rcx = pre_count + rep movsb # ~3-5 cycles/byte - rsi/rdi now aligned + +.L_copy_forward_loop: + # Phase B: Aligned qwords (bulk data transfer) + mov rcx, rax # 1 cycle + shr rcx, DMA_LOOP_COUNT_RS # 1 cycle - rcx = loop_count + rep movsq # ~1.5-2 cycles/qword (optimized) + +.L_check_forward_post: + # Phase C: Post-alignment bytes (0-7 remaining bytes) + test rax, DMA_POST_COUNT_MASK # 1 cycle - check if post_count > 0 + jz .L_done # 2 cycles (predicted) + + mov rcx, rax # 1 cycle + shr rcx, DMA_POST_COUNT_RS # 1 cycle - extract post_count + and rcx, 0x0F # 1 cycle - mask to 3 bits + rep movsb # ~3-5 cycles/byte +*/ +.L_done: + mov rax, rdi + ret # ~3 cycles + +################################################################################ +# PERFORMANCE ESTIMATES (Modern x86-64, L1 cache hits) +# +# NON-OVERLAPPING FORWARD COPY: +# - FAST_DMA_ENCODE macro: ~15-20 cycles (table lookup) +# - EXTENDED_PARAM entry: ~6 cycles (memcpy variant only) +# - Pre-alignment mops: ~12 cycles (if pre_count > 0) +# - Post-alignment mops: ~12 cycles (if post_count > 0) +# - Block src/dst mops: ~10-15 cycles (address calc + writes) +# - Pre-bytes copy: ~3-5 cycles/byte (max 7 bytes) +# - Aligned qwords copy: ~1.5-2 cycles/qword (rep movsq) +# - Post-bytes copy: ~3-5 cycles/byte (max 7 bytes) +# +# Best case (aligned, no pre/post): ~35 cycles + ~2 cycles/qword +# Typical case (some alignment): ~55 cycles + ~2 cycles/qword +# +# OVERLAPPING BACKWARD COPY: +# - Same mops overhead: ~35-55 cycles +# - std instruction: ~20-50 cycles (pipeline flush) +# - Backward byte copy: ~3-5 cycles/byte (rep movsb) +# - cld instruction: ~20-50 cycles (pipeline flush) +# +# Worst case: ~100-150 cycles + ~4-5 cycles/byte +# +# NOTES: +# - Assumes L1 cache hits for all memory accesses +# - rep movsq/movsb performance varies by microarchitecture +# - Actual cycles may vary ±20% depending on CPU model +# - Forward aligned path is ~2-3x faster than backward path +################################################################################ + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_memcpy_mtrace.asm b/emulator-asm/src/dma/direct_memcpy_mtrace.asm new file mode 100644 index 000000000..4e4fd97dd --- /dev/null +++ b/emulator-asm/src/dma/direct_memcpy_mtrace.asm @@ -0,0 +1,252 @@ +.intel_syntax noprefix +.code64 + +# +# memcpy_mtrace - Optimized version with memory tracing and actual copy +# +# This function performs three main tasks: +# 1. Encodes memcpy metadata (offsets, counts, flags) using fast_dma_encode +# 2. Records memory trace (pre-values and src data for verification/rollback) +# 3. Performs the actual memory copy from src to dst (with overlap handling) +# +# REGISTER USAGE: +# Uses general-purpose registers: rax, rbx, rcx, rdx, rdi, rsi, r9, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Preserves callee-saved registers (rbx, r12, r13 saved/restored in wrapper) +# +# PARAMETERS (NON System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# [r12 + r13*8] = trace_ptr (u64*) - Pointer to memory trace buffer (input/output) +# +# RETURN VALUE: +# RAX = Number of 64-bit words written to trace buffer +# +# MEMORY TRACE FORMAT (written to trace buffer sequentially): +# [0] = Encoded metadata (64-bit value with offsets, counts, flags) +# [1] = Pre-write value at aligned(dst) IF pre_count > 0 +# [1 or 2] = Post-write value at aligned(dst+count) IF post_count > 0 +# [...] = All aligned qwords from aligned(src) to aligned(src+count) +# +# The trace buffer captures: +# - Original destination values (for undo/verification) +# - Source data (for verification) +# - Metadata needed to reconstruct the operation +# +# MEMORY COPY BEHAVIOR: +# - Handles overlapping src/dst correctly (like memmove) +# - For non-overlapping: optimized copy using pre_count/loop_count/post_count +# - For overlapping: backward byte-by-byte copy to avoid corruption +# +# PERFORMANCE: +# - Encoding: ~15-20 cycles (function call to fast_dma_encode, table lookup) +# - Trace writes: ~4 cycles per qword write +# - Src data copy to trace: ~1.5-2 cycles per qword (rep movsq) +# - Final memcpy (non-overlap): ~1.5-2 cycles per qword (rep movsq aligned) +# - Final memcpy (overlap): ~100-150 cycles overhead + ~4-5 cycles per byte (std/rep movsb/cld) +# +# SIDE EFFECTS: +# - Modifies memory at dst (count bytes) +# - Modifies trace buffer (variable size depending on pre/post counts) +# - Preserves direction flag (cld called after any std) + +.global direct_dma_memcpy_mtrace +.global direct_dma_memcpy_mtrace_with_count_check + +.extern fast_dma_encode +.extern trace_address_threshold +# .extern trace_resize_request + +.include "dma_constants.inc" +.include "fast_dma_encode_macro.inc" + +.section .text + +direct_dma_memcpy_mtrace_with_count_check: + + # Call fast_dma_encode to calculate encoding + # Parameters already in correct registers: R_DST=dst, R_SRC=src, R_COUNT=count + # Result will be returned in R_ENCODE (encoded value) + + cmp R_COUNT, MAX_DMA_BYTES_DIRECT_MTRACE # 1 cycle - check if count exceeds direct threshold + ja .L_memcpy_check_dynamic_trace # 2 cycles (not taken usually) - large count, check trace space + jmp direct_dma_memcpy_mtrace + +.L_memcpy_check_dynamic_trace: + call check_dynamic_mtrace + +direct_dma_memcpy_mtrace: + + # Call fast_dma_encode to calculate encoding + # Parameters already in correct registers: R_DST=dst, R_SRC=src, R_COUNT=count + # Result will be returned in R_ENCODE (encoded value) + + FAST_DMA_ENCODE # ~15-18 cycles - table lookup encoding + + mov [R_MT_ADDR + R_MT_INDEX * 8], R_ENCODE # ~4 cycles - write encoded result to mem trace + inc R_MT_INDEX # 1 cycle - advance R_MT_INDEX (mem trace index) + +.L_pre_dst_to_mtrace: + # If pre_count > 0, write aligned dst value to trace + test R_ENCODE, DMA_PRE_COUNT_MASK # 1 cycle - check if pre_count > 0 + jz .L_post_dst_to_mtrace # 2 cycles (predicted taken) + + # Branch with pre_count > 0: save original dst value before it's overwritten + mov R_AUX, R_DST # 1 cycle - get original dst + and R_AUX, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov R_AUX, [R_AUX] # ~4 cycles - read qword from aligned dst + mov [R_MT_ADDR + R_MT_INDEX * 8], R_AUX # ~4 cycles - write dst pre-value to trace + inc R_MT_INDEX # 1 cycle - advance trace index + +.L_post_dst_to_mtrace: + + # If post_count > 0, write aligned (dst+count) value to trace + test R_ENCODE, DMA_POST_COUNT_MASK # 1 cycle - check if post_count > 0 + jz .L_src_to_mtrace # 2 cycles (predicted taken) - skip to src copy + + lea R_AUX, [R_DST + R_COUNT - 1] # 1 cycle - R_AUX = dst + count - 1 (last dst byte) + and R_AUX, ALIGN_MASK # 1 cycle - align to 8-byte boundary + mov R_AUX, [R_AUX] # ~4 cycles - read qword at (dst+count) aligned + mov [R_MT_ADDR + R_MT_INDEX * 8], R_AUX # ~4 cycles - write dst post-value to trace + inc R_MT_INDEX # 1 cycle - advance trace index + +.L_src_to_mtrace: + # Copy source data to trace buffer + # Total qwords = loop_count (bits 0-31) + extra_src_reads (bits 48-50) + + mov R_AUX2, R_ENCODE # 1 cycle - R_AUX2 = encoded + shr R_AUX2, DMA_LOOP_COUNT_RS # 1 cycle - R_AUX2 = loop_count (bits 32-63) + + mov R_AUX, R_ENCODE # 1 cycle - R_AUX = encoded + shr R_AUX, DMA_EXTRA_SRC_READS_RS # 1 cycle - shift extra_src_reads to position + and R_AUX, 0x03 # 1 cycle - R_AUX = extra_src_reads (bits 48-50) + add R_AUX2, R_AUX # 1 cycle - R_AUX2 = total qwords to copy + + # Setup for rep movsq: copy from aligned src to trace buffer + mov R_AUX, R_SRC # 1 cycle - preserve original src pointer + and R_SRC, ALIGN_MASK # 1 cycle - R_SRC = src aligned to 8 bytes + + push R_DST # ~3 cycles - save dst pointer + lea R_DST, [R_MT_ADDR + R_MT_INDEX * 8] # 1 cycle - R_DST = trace buffer destination + add R_MT_INDEX, R_AUX2 # 1 cycle - advance trace index by qwords copied + + rep movsq # ~1.5-2 cycles per qword (hardware optimized) + + pop R_DST # ~3 cycles - restore dst pointer + mov R_SRC, R_AUX # 1 cycle - restore original src pointer + +.L_mtrace_done: + # Check for memory overlap to decide copy direction + # NOTE: R_DST and R_SRC now contain their ORIGINAL values (restored above) + # Overlap exists if: src < dst < src+count (forward overlap) + cmp R_DST, R_SRC # 1 cycle - compare dst with src + jb .L_copy_forward # 2 cycles (predicted) - dst < src, no overlap + lea R_AUX, [R_SRC + R_COUNT] # 1 cycle - R_AUX = src + count + cmp R_DST, R_AUX # 1 cycle - compare dst with (src+count) + jae .L_copy_forward # 2 cycles (predicted) - dst >= src+count, no overlap + + # Overlap detected (src < dst < src+count), must copy backward + # Setup: R_SRC = src+count-1, R_DST = dst+count-1, R_AUX2 = count + # Uses ORIGINAL R_SRC and R_DST values (restored from R_AUX and stack) + + mov rax, R_DST + lea R_SRC, [R_SRC + R_COUNT - 1] # 1 cycle - R_SRC = src + count - 1 (from original) + lea R_DST, [R_DST + R_COUNT - 1] # 1 cycle - R_DST = dst + count - 1 (from original) + mov R_AUX2, R_COUNT # 1 cycle - R_AUX2 = count + + std # ~20-50 cycles - set DF (serializing, pipeline flush) + rep movsb # ~3-5 cycles per byte (backward copy, slower than forward) + cld # ~20-50 cycles - clear DF (serializing, pipeline flush) + + ret # ~5 cycles + +.L_copy_forward: + # No overlap detected, perform optimized forward copy + // cmp R_COUNT, 16 # 1 cycle - check if count >= 16 (worth alignment) + // jae .L_copy_forward_pre # 2 cycles (predicted) - use 3-phase aligned copy + + # Small copy (count < 16): copy all bytes directly + mov rax, R_DST + mov R_AUX2, R_COUNT # 1 cycle - R_AUX2 = count + rep movsb # ~3-5 cycles per byte (unaligned small copy) + + ret # ~5 cycles + +# .L_copy_forward_pre: +# # Copy in 3 phases: pre-alignment bytes, aligned qwords, post-alignment bytes + +# # If pre_count > 0, copy unaligned prefix bytes + +# test R_ENCODE, DMA_PRE_COUNT_MASK # 1 cycle - check if pre_count > 0 +# jz .L_copy_forward_loop # 2 cycles (predicted) + +# # Extract and copy pre_count bytes (1-7 bytes to reach alignment) + +# mov R_AUX2, R_ENCODE # 1 cycle +# and R_AUX2, DMA_PRE_COUNT_MASK # 1 cycle - R_AUX2 = pre_count (bits 0-3) + +# rep movsb # ~3-5 cycles per byte +# # R_SRC, R_DST now 8-byte aligned + +# .L_copy_forward_loop: +# # Copy aligned qwords (main bulk of data) +# mov R_AUX2, R_ENCODE # 1 cycle +# shr R_AUX2, DMA_LOOP_COUNT_RS # 1 cycle - R_AUX2 = loop_count (bits 32-63) +# rep movsq # ~1.5-2 cycles per qword (aligned, optimized) +# # R_SRC, R_DST advanced by loop_count * 8 + +# .L_check_forward_post: + +# # If post_count > 0, copy remaining unaligned suffix bytes +# test R_ENCODE, DMA_POST_COUNT_MASK # 1 cycle - check if post_count > 0 +# jz .L_done # 2 cycles (predicted) + +# # Extract and copy post_count bytes (1-7 bytes after aligned data) +# mov R_AUX2, R_ENCODE # 1 cycle +# shr R_AUX2, DMA_POST_COUNT_RS # 1 cycle - shift post_count to position +# and R_AUX2, 0x0F # 1 cycle - R_AUX2 = post_count (bits 43-45) + +# rep movsb # ~3-5 cycles per byte +# # R_SRC, R_DST now point past end of data + +.L_done: + mov rax, rdi + ret # ~5 cycles + +# Performance estimate (Modern x86-64, L1 cache hits): +# +# NON-OVERLAPPING FORWARD COPY PATH: +# - fast_dma_encode call: ~15-20 cycles (function call + table lookup) +# - Write encoding to trace: ~4 cycles +# - Pre-value trace (conditional): ~12 cycles (if pre_count > 0) +# - Post-value trace (conditional): ~12 cycles (if post_count > 0) +# - Source data to trace: ~1.5-2 cycles per qword (rep movsq) +# - Pre-bytes copy: ~3-5 cycles per byte (if pre_count > 0, max 7 bytes) +# - Aligned qwords copy: ~1.5-2 cycles per qword (rep movsq, main data) +# - Post-bytes copy: ~3-5 cycles per byte (if post_count > 0, max 7 bytes) +# - Function overhead: ~10 cycles (push/pop, branches, return) +# +# TOTAL (best case, aligned, no pre/post): +# ~30 cycles base + ~2 cycles per qword (trace + copy) +# +# TOTAL (typical case, some alignment): +# ~50 cycles base + ~2 cycles per qword + ~4 cycles per pre/post byte +# +# OVERLAPPING BACKWARD COPY PATH: +# - Same trace overhead: ~30-50 cycles +# - std instruction: ~20-50 cycles (serializing, causes pipeline flush) +# - Backward byte copy: ~3-5 cycles per byte (rep movsb backward) +# - cld instruction: ~20-50 cycles (serializing, causes pipeline flush) +# +# TOTAL (overlap, worst case): +# ~100-150 cycles base + ~4-5 cycles per byte +# +# NOTES: +# - Assumes L1 cache hits for all memory accesses +# - rep movsq/movsb performance varies by microarchitecture +# - Actual cycles may vary ±20% depending on CPU model and memory alignment +# - Fast path (aligned, no overlap) is ~2-3x faster than overlap path + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_memset_mops.asm b/emulator-asm/src/dma/direct_memset_mops.asm new file mode 100644 index 000000000..9ceff6e36 --- /dev/null +++ b/emulator-asm/src/dma/direct_memset_mops.asm @@ -0,0 +1,256 @@ +.intel_syntax noprefix +.code64 + +################################################################################ +# direct_dma_xmemset_mops - Memory set with mops (memory operation) tracing +# +# This function fills a memory region with a byte value while recording all +# memory operation addresses to the mops buffer for verification. +# +# MAIN TASKS: +# 1. Record memory operation addresses (pre-reads for partial qwords, writes) +# 2. Perform the actual memset operation (via fast_memset) +# +# REGISTER USAGE: +# Uses: rax, rcx, rdx, rdi, rsi, r9, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Modifies: r13 (mops index output) +# +# PARAMETERS (non-standard ABI): +# rdi = dst (u64) - Destination address to fill +# rsi = value (u8 in low byte) - Byte value to set (0-255) +# rdx = count (usize) - Number of bytes to set +# r12 = mops buffer base address - Base pointer to mops buffer +# r13 = mops buffer index - Current index (updated on return) +# +# RETURN: +# r13 = Updated mops index +# +# BRANCHES: +# FAST: dst aligned + count multiple of 8 → only write block entry +# BRANCH 1: dst aligned + count NOT multiple of 8 → 1 pre-read (post) + write +# BRANCH 2.1: dst unaligned + fits single range → 1 pre-read (pre) + write +# BRANCH 2.2: dst unaligned + spans qwords → 2 pre-reads (pre/post) + write +################################################################################ + +.global direct_dma_xmemset_mops +.extern fast_memset + +.include "dma_constants.inc" + +.section .text + +################################################################################ +# direct_dma_xmemset_mops - Direct entry point (non-standard ABI) +# +# Called directly from generated assembly code without ABI overhead. +# More efficient when caller manages register preservation. +# +# PARAMETERS: +# rdi = destination address +# rsi = byte value (0-255) +# rdx = byte count +# r12 = mops buffer base +# r13 = mops buffer index (input/output) +################################################################################ + +direct_dma_xmemset_mops: + + # Modified registers (caller must handle): + # r9 = scratch for mops address calculation + # rcx = scratch for calculations + # r13 = mops index (updated on return) + + # Early exit if count = 0 + test rdx, rdx + jz .L_xmemset_mops_done + + # Check if dst is 8-byte aligned + test rdi, 0x7 + jnz .L_xmemset_mops_rdi_unaligned + + # Check if count is multiple of 8 + test rdx, 0x07 + jnz .L_memset_mops_count_remain + + # ========== FAST PATH ========== + # dst is aligned AND count is multiple of 8 + # => No pre-reads needed, only one write block entry + + mov rax, rdx + shr rax, 3 # 1 cycle - rax = count / 8 (qwords) + shl rax, MOPS_BLOCK_WORDS_RS # 1 cycle - format for mops block + mov r9, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - write block flag + add r9, rax # 1 cycle - add qword count + add r9, rdi # 1 cycle - add dst (already aligned) + + mov [r12 + r13 * 8], r9 # ~4 cycles - write mops entry + inc r13 # 1 cycle - advance mops index + + jmp fast_memset # tail call to fast_memset + + # fast_memset "execute" the return, memset set rax = rdi + + # ========== BRANCH 1 ========== + # dst aligned, count NOT multiple of 8 + # => 1 pre-read (post qword) + 1 write block + +.L_memset_mops_count_remain: + + # Calculate qwords needed: ceil(count / 8) = (count + 7) / 8 + lea r9, [rdx + 7] + shr r9, 3 # 1 cycle - r9 = qwords to write + + # BRANCH 1 - POST pre-read: read last qword (partial overwrite) + lea rcx, [rdi + r9 * 8 - 8] # 1 cycle - address of last qword + mov rax, MOPS_ALIGNED_READ # 1 cycle - read flag + add rcx, rax # 1 cycle - combine + mov [r12 + r13 * 8], rcx # ~4 cycles - write pre-read entry + + # BRANCH 1 - Write block entry + shl r9, MOPS_BLOCK_WORDS_RS # 1 cycle - format qwords for mops + mov rax, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - write block flag + add rax, r9 # 1 cycle - add qword count + add rax, rdi # 1 cycle - add dst (aligned) + + mov [r12 + r13 * 8 + 8], rax # ~4 cycles - write block entry + add r13, 2 + # 1 cycle - advance index by 2 + jmp fast_memset # tail call to fast_memset + + # fast_memset "execute" the return, memset set rax = rdi + + # ========== BRANCH 2 ========== + # dst NOT aligned + # Must determine if we need 1 or 2 pre-reads + +.L_xmemset_mops_rdi_unaligned: + + # Calculate total span: (rdi & 0x7) + count + # If span <= 8: only PRE read needed (BRANCH 2.1) + # If span > 8 and end is aligned: only PRE read needed (BRANCH 2.1) + # If span > 8 and end is unaligned: PRE + POST reads needed (BRANCH 2.2) + + mov rcx, rdi + and rcx, 0x07 # 1 cycle - offset within qword + lea rcx, [rcx + rdx + 7] # 1 cycle - rcx = offset + count + 7 + test rcx, 0xFFFFFFFFFFFFFFF0 # 1 cycle - check if (offset + count + 7) > 15 + # => (offset + count) > 8 => spans qwords + jnz .L_pre_branch_2_2 # 2 cycles (predicted) + jmp .L_branch_2_1 # single qword span + + # Check if end is unaligned (needs POST read) +.L_pre_branch_2_2: + lea rax, [rcx - 7] # 1 cycle - rax = offset + count + test rax, 0x7 # 1 cycle - check if end is aligned + jnz .L_branch_2_2 # 2 cycles - end unaligned, need POST + + # ========== BRANCH 2.1 ========== + # dst unaligned, but end IS aligned (or fits in one qword) + # => 1 pre-read (PRE) + 1 write block + +.L_branch_2_1: + + # Calculate aligned base and qword count + # rcx = (rdi & 0x7) + rdx + 7 → (rcx >> 3) = qwords needed + mov rax, rdi + and rax, ALIGN_MASK # 1 cycle - rax = aligned dst + shr rcx, 3 # 1 cycle - rcx = qwords + shl rcx, MOPS_BLOCK_WORDS_RS # 1 cycle - format for mops + mov r9, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - write block flag + add rcx, r9 # 1 cycle - add flag + add rcx, rax # 1 cycle - add aligned address + + mov [r12 + r13 * 8 + 8], rcx # ~4 cycles - write block entry + + # PRE read entry (first qword contains unaligned start) + mov rcx, MOPS_ALIGNED_READ # 1 cycle - read flag + add rcx, rax # 1 cycle - add aligned address + mov [r12 + r13 * 8], rcx # ~4 cycles - write pre-read entry + add r13, 2 # 1 cycle - advance index by 2 + + jmp fast_memset # tail call to fast_memset + + # fast_memset "execute" the return, memset set rax = rdi + + # ========== BRANCH 2.2 ========== + # dst unaligned AND end unaligned (spans multiple partial qwords) + # => 2 pre-reads (PRE + POST) + 1 write block + +.L_branch_2_2: + + # rcx = (rdi & 0x7) + rdx + 7 → (rcx >> 3) = qwords needed + shr rcx, 3 # 1 cycle - rcx = qwords + shl rcx, MOPS_BLOCK_WORDS_RS # 1 cycle - format for mops + mov rax, MOPS_ALIGNED_BLOCK_WRITE # 1 cycle - write block flag + add rax, rcx # 1 cycle - add qword count + mov rcx, rdi + and rcx, ALIGN_MASK # 1 cycle - rcx = aligned dst + add rax, rcx # 1 cycle - add aligned address + + mov [r12 + r13 * 8 + 16], rax # ~4 cycles - write block entry (3rd slot) + + # PRE read entry (first partial qword) + mov rax, MOPS_ALIGNED_READ # 1 cycle - read flag + add rcx, rax # 1 cycle - rcx = aligned dst + read flag + mov [r12 + r13 * 8], rcx # ~4 cycles - write PRE read entry + + # POST read entry (last partial qword) + lea r9, [rdi + rdx] # 1 cycle - r9 = dst + count + and r9, ALIGN_MASK # 1 cycle - align to qword + add r9, rax # 1 cycle - add read flag + mov [r12 + r13 * 8 + 8], r9 # ~4 cycles - write POST read entry + + add r13, 3 # 1 cycle - advance index by 3 + + jmp fast_memset # tail call to fast_memset + + # fast_memset "execute" the return, memset set rax = rdi + +.L_xmemset_mops_done: + + mov rax, rdi + ret # ~3 cycles + + +################################################################################ +# PERFORMANCE ESTIMATES (Modern x86-64, L1 cache hits) +# +# FAST PATH (aligned dst, count multiple of 8): +# - Mops entry: ~8-10 cycles +# - fast_memset overhead: ~5-10 cycles +# - Qword fill (rep stosq): ~0.5-1.0 cycles/qword (ERMSB) +# Total: ~15-20 cycles + ~0.75 cycles/qword +# +# BRANCH 1 (aligned dst, count NOT multiple of 8): +# - Pre-read + block entries: ~15-18 cycles +# - fast_memset + fill: ~5-10 cycles + ~0.75 cycles/qword +# Total: ~25-30 cycles + ~0.75 cycles/qword +# +# BRANCH 2.1 (unaligned dst, end aligned or single qword): +# - PRE read + block entries: ~18-22 cycles +# - fast_memset + fill: ~5-10 cycles + ~0.75 cycles/qword +# Total: ~28-35 cycles + ~0.75 cycles/qword +# +# BRANCH 2.2 (unaligned dst AND end): +# - PRE + POST + block entries: ~25-30 cycles +# - fast_memset + fill: ~5-10 cycles + ~0.75 cycles/qword +# Total: ~35-45 cycles + ~0.75 cycles/qword +# +# EXAMPLE (64-byte aligned fill): +# ~20 cycles mops + ~10 cycles setup + 8 qwords * 0.75 = ~36 cycles +# Throughput: ~1.8 GB/s @ 3 GHz +# +# EXAMPLE (4096-byte aligned fill): +# ~20 cycles mops + ~10 cycles setup + 512 qwords * 0.5 = ~286 cycles +# Throughput: ~14.3 GB/s @ 3 GHz (approaching L1D bandwidth) +# +# NOTES: +# - Assumes L1D cache hits (~4 cycle latency) +# - rep stosq uses ERMSB optimization on modern CPUs (post-2013) +# - For fills >256 bytes, approaches memory bandwidth limits +# - Actual cycles vary ±20% by microarchitecture +################################################################################ + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/direct_memset_mtrace.asm b/emulator-asm/src/dma/direct_memset_mtrace.asm new file mode 100644 index 000000000..d5ba1cacc --- /dev/null +++ b/emulator-asm/src/dma/direct_memset_mtrace.asm @@ -0,0 +1,268 @@ +.intel_syntax noprefix +.code64 + +################################################################################ +# direct_dma_xmemset_mtrace - Memory set with mtrace (memory trace) recording +# +# This function fills a memory region with a byte value while recording +# the operation encoding and pre-values to the mtrace buffer for verification. +# +# MAIN TASKS: +# 1. Encode memset metadata (dst_offset, count, fill_byte, alignment info) +# 2. Record pre-values of partial qwords (before overwriting) +# 3. Perform the actual memset operation (via fast_memset) +# +# MTRACE SIZE: +# xmemset uses at most 3 qwords: encode + pre + post +# Therefore, no realloc check is needed (always fits within threshold) +# +# REGISTER USAGE: +# Uses: rax, rcx, rdx, rdi, rsi, r9, r12, r13 +# Does NOT use XMM registers (caller doesn't need to save them) +# Modifies: r13 (mtrace index output) +# +# PARAMETERS (non-standard ABI): +# rdi = dst (u64) - Destination address to fill +# rsi = value (u8 in low byte) - Byte value to set (0-255) +# rdx = count (usize) - Number of bytes to set +# r12 = mtrace buffer base address - Base pointer to mtrace buffer +# r13 = mtrace buffer index - Current index (updated on return) +# +# RETURN: +# r13 = Updated mtrace index +# +# BRANCHES: +# FAST: dst aligned + count multiple of 8 → encode only (no pre-reads) +# BRANCH 1: dst aligned + count NOT multiple of 8 → encode + 1 post pre-read +# BRANCH 2: dst unaligned → encode + 0-2 pre-reads depending on alignment +################################################################################ + +.global direct_dma_xmemset_mtrace +.extern fast_memset + +.include "dma_constants.inc" +.include "fast_dma_encode_macro.inc" + +.section .text + +################################################################################ +# direct_dma_xmemset_mtrace - Direct entry point (non-standard ABI) +# +# Called directly from generated assembly code without ABI overhead. +# More efficient when caller manages register preservation. +# +# PARAMETERS: +# rdi = destination address +# rsi = byte value (0-255) +# rdx = byte count +# r12 = mtrace buffer base +# r13 = mtrace buffer index (input/output) +################################################################################ + +direct_dma_xmemset_mtrace: + + # Modified registers (caller must handle): + # r9 = scratch for calculations + # rcx = scratch for address/value storage + # r13 = mtrace index (updated on return) + + # Early exit path if count = 0 + test rdx, rdx + jz .L_xmemset_mtrace_count_zero + + # Check if dst is 8-byte aligned + test rdi, 0x7 + jnz .L_xmemset_mtrace_rdi_unaligned + + # Check if count is multiple of 8 + test rdx, 0x07 + jnz .L_memset_mtrace_count_remain + + # ========== FAST PATH ========== + # dst is aligned AND count is multiple of 8 + # => No partial qwords, no pre-reads needed, only encoding + + # Encode loop_bytes (count is already multiple of 8) + mov r9, rdx + shl r9, DMA_PRE_AND_LOOP_BYTES_RS # 1 cycle - shift to loop_bytes position + + # Encode fill byte + movzx eax, sil # 1 cycle - zero-extend byte value + shl rax, DMA_FILL_BYTE_RS # 1 cycle - shift to fill_byte position + add rax, r9 # 1 cycle - combine + + # Store encoded value to mtrace + mov [r12 + r13 * 8], rax # ~4 cycles - write encoding + inc r13 # 1 cycle - advance mtrace index + + jmp fast_memset # tail call to fast_memset + + # ========== BRANCH 1 ========== + # dst aligned, count NOT multiple of 8 + # => Need 1 post pre-read (last partial qword) + +.L_memset_mtrace_count_remain: + # dst_offset = 0 + + movzx r9, sil # 1 cycle - zero-extend byte value + shl r9, DMA_FILL_BYTE_RS # 1 cycle - shift to fill_byte position + + FAST_DMA_ENCODE_NO_SRC + + # Encode post_count (remaining bytes after aligned portion) + add rax, r9 + + # Store encoding to mtrace + mov [r12 + r13 * 8], rax # ~4 cycles + + # Calculate qword count for post pre-read address + + shr rax, DMA_LOOP_COUNT_RS + mov rcx, [rdi + rax * 8] # 1 cycle - rcx = post qword address + mov [r12 + r13 * 8 + 8], rcx # ~4 cycles - store post pre-read address + add r13, 2 # 1 cycle - advance index by 2 + + jmp fast_memset # tail call to fast_memset + + # ========== BRANCH 2 ========== + # dst NOT aligned - uses full FAST_DMA_ENCODE macro + # Depending on alignment, may need 0, 1, or 2 pre-reads + +.L_xmemset_mtrace_rdi_unaligned: + + # Use macro for complex encoding (handles all alignment cases) + FAST_DMA_ENCODE_NO_SRC # ~15-20 cycles + + # Add fill byte to encoding + movzx r9, sil # 1 cycle - zero-extend byte value + shl r9, DMA_FILL_BYTE_RS # 1 cycle - shift to position + or rax, r9 # 1 cycle - combine with encoding + + # Store encoding to mtrace + mov [r12 + r13 * 8], rax # ~4 cycles + + # Check if PRE pre-read needed (unaligned start) + test rax, DMA_PRE_COUNT_MASK # 1 cycle + jz .L_xmemset_mtrace_rdi_unaligned_no_pre # 2 cycles (predicted) + + # PRE pre-read: save original value of first partial qword + mov r9, rdi + and r9, ALIGN_MASK # 1 cycle - r9 = aligned dst + mov rcx, [r9] # ~4 cycles - read current value + mov [r12 + r13 * 8 + 8], rcx # ~4 cycles - store pre-value + + # Check if POST pre-read also needed (unaligned end) + test rax, DMA_POST_COUNT_MASK # 1 cycle + jz .L_xmemset_mtrace_rdi_unaligned_pre_no_post # 2 cycles + + # POST pre-read: save original value of last partial qword + # r9 still contains (dst & ALIGN_MASK) from previous calculation + # Calculate post qword address: aligned_dst + 8 + loop_count * 8 + + mov rcx, rax + shr rcx, DMA_PRE_AND_LOOP_BYTES_RS + + mov rcx, [rdi + rcx] # ~4 cycles - read post pre-value + mov [r12 + r13 * 8 + 16], rcx # ~4 cycles - store as third mtrace entry + add r13, 3 # 1 cycle - advance by 3 (encode + pre + post) + + jmp fast_memset # tail call to fast_memset + + # ----- BRANCH 2.1: PRE only (no POST) ----- + # Unaligned start but aligned end +.L_xmemset_mtrace_rdi_unaligned_pre_no_post: + add r13, 2 # 1 cycle - advance by 2 (encode + pre) + + jmp fast_memset # tail call to fast_memset + + # ----- BRANCH 2.2: NO PRE (start happens to be aligned) ----- + # When unaligned path was taken but PRE=0 (edge case) +.L_xmemset_mtrace_rdi_unaligned_no_pre: + # Check if POST pre-read needed + test rax, DMA_POST_COUNT_MASK # 1 cycle + jz .L_xmemset_mtrace_rdi_unaligned_no_pre_no_post # 2 cycles + + # Calculate aligned dst for post address + mov r9, rdi # 1 cycle + and r9, ALIGN_MASK # 1 cycle - r9 = aligned dst + + # Extract loop_count to calculate post address + mov rcx, rax # 1 cycle + shr rcx, DMA_LOOP_COUNT_RS # 1 cycle - rcx = loop_count + + # POST pre-read: save original value of last qword + # Address: aligned_dst + 8 + loop_count * 8 + mov rcx, [r9 + 8 + rcx * 8] # ~4 cycles - read post pre-value + mov [r12 + r13 * 8 + 8], rcx # ~4 cycles - store as second mtrace entry + add r13, 2 # 1 cycle - advance by 2 (encode + post) + + jmp fast_memset # tail call to fast_memset + + # ----- BRANCH 2.3: NO PRE, NO POST ----- + # Edge case where both ends happen to be aligned despite taking unaligned path +.L_xmemset_mtrace_rdi_unaligned_no_pre_no_post: + inc r13 # 1 cycle - advance by 1 (encode only) + + jmp fast_memset # tail call to fast_memset + + # ========== COUNT = 0 CASE ========== + # Zero-length memset: only encoding, no pre-reads needed + # Creates minimal mtrace entry for zero-byte operation +.L_xmemset_mtrace_count_zero: + + FAST_DMA_ENCODE_COUNT_ZERO 0 + + # Encode fill byte + movzx r9, sil # 1 cycle - zero-extend byte value + shl r9, DMA_FILL_BYTE_RS # 1 cycle - shift to position + + # Add template for MEMSET_ZERO operation type + add rax, r9 # 1 cycle + + + # Store encoding to mtrace (no pre-reads for zero-length) + mov [r12 + r13 * 8], rax # ~4 cycles + inc r13 # 1 cycle + + jmp fast_memset # tail call to fast_memset + + # NOTE: This label is unreachable - all paths use tail calls to fast_memset + + +# Performance Estimate (Modern x86-64, Intel Skylake/AMD Zen+, L1 cache hits): +# +# MEMSET OPERATION WITH MTRACE RECORDING: +# - FAST_DMA_ENCODE macro: ~15-20 cycles (logic + table lookup) +# - Encoding store: ~4 cycles (mov to mtrace buffer) +# - Pre pre-read (if needed): ~8-10 cycles (and + mov load + mov store) +# - Post pre-read (if needed): ~10-12 cycles (lea + mov load + mov store) +# - Fill byte insertion: ~3 cycles (movzx + shl + or) +# - Tail call jump: ~1-2 cycles +# +# PATH TIMING: +# +# FAST PATH (aligned dst, count % 8 == 0): +# 7 (setup) + 3 (fill byte) + 4 (store) + 8 (post pre-read) + 2 (tail) +# = ~24 cycles overhead + fast_memset execution +# +# BRANCH 1 (aligned dst, count % 8 != 0): +# 15 (encode) + 3 (fill byte) + 4 (store) + 8 (post pre-read) + 2 (tail) +# = ~32 cycles overhead + fast_memset execution +# +# BRANCH 2.1 (unaligned dst, PRE only): +# 20 (encode) + 3 (fill byte) + 4 (store) + 10 (pre pre-read) + 2 (tail) +# = ~39 cycles overhead + fast_memset execution +# +# BRANCH 2.2 (unaligned dst, PRE + POST): +# 20 (encode) + 3 (fill byte) + 4 (store) + 10 (pre) + 12 (post) + 2 (tail) +# = ~51 cycles overhead + fast_memset execution +# +# NOTES: +# - Mtrace overhead is independent of fill size (constant per operation) +# - Pre-reads capture original values for later verification +# - All paths use tail calls, minimizing return overhead +# - Encoding + pre-reads add ~24-51 cycles vs direct fast_memset call +# - Actual fill performance depends on fast_memset (ERMSB ~0.5 cycles/qword) + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/dma_constants.inc b/emulator-asm/src/dma/dma_constants.inc new file mode 100644 index 000000000..e968242ff --- /dev/null +++ b/emulator-asm/src/dma/dma_constants.inc @@ -0,0 +1,119 @@ +.intel_syntax noprefix +.code64 + +# GENERAL CONSTANTS + +.set R_MT_INDEX, r13 +.set R_MT_ADDR, r12 +.set R_STEP, r14 +.set R_AUX, r9 +.set R_AUX2, rcx # NOTE: used by rep +.set R_SRC, rsi # NOTE: used by rep +.set R_DST, rdi # NOTE: used by rep +.set R_COUNT, rdx +.set R_ENCODE, rax + +# GENERAL CONSTANTS + +.equ MAX_MTRACE_REGS_ACCESS_SIZE, (2 + 2 + 3) * 8 +.equ CHUNK_SIZE, (1 << 18) +.equ MAX_TRACE_CHUNK_INFO, ((44*8) + 32) +.equ MAX_BYTES_DIRECT_MTRACE, 256 +.equ MAX_BYTES_MTRACE_STEP, (MAX_BYTES_DIRECT_MTRACE + MAX_MTRACE_REGS_ACCESS_SIZE) +.equ MAX_CHUNK_TRACE_SIZE, CHUNK_SIZE * MAX_BYTES_MTRACE_STEP + MAX_TRACE_CHUNK_INFO +.equ FAST_ENCODE_TABLE_WO_NEQ_SIZE, 8 * 8 * 16 + +# 1 encoded + 2 prewrites + 2 src reads +.equ MAX_DMA_EXTRA_BYTES, (2 + 2 + 1) * 8 +.equ MAX_DMA_BYTES_DIRECT_MTRACE, (MAX_BYTES_DIRECT_MTRACE - MAX_DMA_EXTRA_BYTES) +.equ MAX_DMA_MT_MARGIN, (MAX_DMA_BYTES_DIRECT_MTRACE + MAX_DMA_EXTRA_BYTES) + +.equ EXTRA_PARAMETER_ADDR, 0xA0000F00 + +# ENCODE CONSTANTS +# +# bits offset mask +# ---- ------ -------- +# pre_count: 0-7 3 0 0x0000_0000_0000_0007 +# post_count: 0-7 4 3 0x0000_0000_0000_0078 (memcmp uses 8) +# pre_writes: 0,1,2 2 7 0x0000_0000_0000_0180 +# dst_offset: 0-7 3 9 0x0000_0000_0000_0E00 +# src_offset: 0-7 3 12 0x0000_0000_0000_7000 +# double_src_pre: 0,1 1 15 0x0000_0000_0000_8000 +# double_src_post: 0,1 1 16 0x0000_0000_0001_0000 +# extra_src_reads: 0-3 2 17 0x0000_0000_0006_0000 +# src64_inc_by_pre: 1 19 0x0000_0000_0008_0000 +# unaligned_dst_src: 1 20 0x0000_0000_0010_0000 +# fill_byte/cmp_res: 8 21 0x0000_0000_1FE0_0000 +# requires_dma: 1 29 0x0000_0000_2000_0000 +# pre_count: 0-7 3 32 only for optimization PRE_AND_LOOP_BYTES +# loop_count 32 0 0xFFFF_FFFF_0000_0000 + +# BITS 31,32,32 + +.equ DMA_PRE_COUNT_MASK, 0x0000000000000007 +.equ DMA_POST_COUNT_MASK, 0x0000000000000078 +.equ DMA_PRE_WRITES_MASK, 0x0000000000000180 +.equ DMA_DST_OFFSET_MASK, 0x0000000000000E00 +.equ DMA_SRC_OFFSET_MASK, 0x0000000000007000 +.equ DMA_DOUBLE_SRC_PRE_MASK, 0x0000000000008000 +.equ DMA_DOUBLE_SRC_POST_MASK, 0x0000000000010000 +.equ DMA_EXTRA_SRC_READS_MASK, 0x0000000000060000 +.equ DMA_SRC64_INC_BY_PRE_MASK, 0x0000000000080000 +.equ DMA_UNALIGNED_DST_SRC_MASK, 0x0000000000100000 +.equ DMA_FILL_BYTE_MASK, 0x000000000FE00000 +.equ DMA_CMP_RES_MASK, 0x000000001FE00000 +.equ DMA_REQUIRES_DMA_MASK, 0x0000000040000000 +.equ DMA_LOOP_COUNT_MASK, 0xFFFFFFFF00000000 +.equ DMA_ONLY_LOOP_COUNT_MASK, 0xFFFFFFFF00000000 +.equ DMA_FULL_ALIGN_MASK, 0x00000000001FFFFF +.equ DMA_DIRECT_MASK, 0x00000000201FFFFF +.equ DMA_CMP_RES_SIGN_TEST_MASK, 0x0000000020000000 + + +.equ DMA_PRE_COUNT_RS, 0 +.equ DMA_POST_COUNT_RS, 3 +.equ DMA_PRE_WRITES_RS, 7 +.equ DMA_DST_OFFSET_RS, 9 +.equ DMA_SRC_OFFSET_RS, 12 +.equ DMA_DOUBLE_SRC_PRE_RS, 15 +.equ DMA_DOUBLE_SRC_POST_RS, 16 +.equ DMA_EXTRA_SRC_READS_RS, 17 +.equ DMA_SRC64_INC_BY_PRE_RS, 19 +.equ DMA_UNALIGNED_DST_SRC_RS, 20 +.equ DMA_FILL_BYTE_RS, 21 +.equ DMA_CMP_RES_RS, 21 +.equ DMA_LOOP_COUNT_RS, 35 +.equ DMA_PRE_AND_LOOP_BYTES_RS, 32 +.equ ALIGN_MASK, 0xFFFFFFFFFFFFFFF8 +.equ MOPS_BLOCK_WORDS_RS, 36 +.equ MOPS_BLOCK_ONE_WORD, 0x0000001000000000 +.equ MOPS_BLOCK_READ, 0x0000000A00000000 +.equ MOPS_BLOCK_WRITE, 0x0000000B00000000 +.equ MOPS_ALIGNED_READ, 0x0000000C00000000 +.equ MOPS_ALIGNED_WRITE, 0x0000000D00000000 +.equ MOPS_ALIGNED_BLOCK_READ, 0x0000000E00000000 +.equ MOPS_ALIGNED_BLOCK_WRITE, 0x0000000F00000000 + +.equ FCALL_PARAMS_LENGTH, 386 +.equ FCALL_RESULT_LENGTH, 8193 +.equ FCALL_FUNCTION_ID, 0 +.equ FCALL_PARAMS_CAPACITY, FCALL_FUNCTION_ID + 1 +.equ FCALL_PARAMS_SIZE, FCALL_PARAMS_CAPACITY + 1 +.equ FCALL_PARAMS, FCALL_PARAMS_SIZE + 1 +.equ FCALL_RESULT_CAPACITY, FCALL_PARAMS + FCALL_PARAMS_LENGTH +.equ FCALL_RESULT_SIZE, FCALL_RESULT_CAPACITY + 1 +.equ FCALL_RESULT, FCALL_RESULT_SIZE + 1 +.equ FCALL_RESULT_GOT, FCALL_RESULT + FCALL_RESULT_LENGTH + +.equ MOPS_ALIGNED_READ_2W, ((2 << MOPS_BLOCK_WORDS_RS) + MOPS_ALIGNED_BLOCK_READ) +.equ LOOP_COUNT_TO_MOPS_BLOCK, (MOPS_BLOCK_WORDS_RS - DMA_LOOP_COUNT_RS) +.equ PRE_WRITES_TO_MOPS_BLOCK, (MOPS_BLOCK_WORDS_RS - DMA_PRE_WRITES_RS) + +# Additional encode constants for fast_dma_encode +.equ DMA_LPRE_COUNT_RS, 32 +.equ DMA_FILL_BYTE_RS, 21 +.equ DMA_FILL_BITS9_MASK, 0x1FF + +.equ ENCODE_MEMSET_ZERO, DMA_REQUIRES_DMA_MASK +.equ ENCODE_MEMSET_ALIGNED_NO_COUNT_M8, DMA_REQUIRES_DMA_MASK | (1 << DMA_PRE_WRITES_RS) diff --git a/emulator-asm/src/dma/fast_dma_encode.asm b/emulator-asm/src/dma/fast_dma_encode.asm new file mode 100644 index 000000000..1397a3ffb --- /dev/null +++ b/emulator-asm/src/dma/fast_dma_encode.asm @@ -0,0 +1,136 @@ +.intel_syntax noprefix +.code64 + +# Include DMA constants +.include "dma_constants.inc" + +################################################################################ +# fast_dma_encode - Optimized function to encode dma information +# +# REGISTER USAGE: +# Modified registers: rax, r8 +# Does NOT use XMM registers (caller doesn't need to save them) +# +# PARAMETERS (System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# +# RETURN VALUE: +# rax = encoded value +# +# ENCODED METADATA (bits): +# 0-2: pre_count - Bytes to copy before alignment (0-7) +# 3-6: post_count - Bytes to copy after aligned chunks (0-8) (* 8 memset case) +# 7-8: pre_writes - Number of pre/post partial writes (0, 1, or 2) +# 9-11: dst_offset - Byte offset within dst qword (0-7) +# 12-14: src_offset - Byte offset within src qword (0-7) +# 15: double_src_pre - Flag: pre-read spans two src qwords +# 16: double_src_post - Flag: post-read spans two src qwords +# 17-18: extra_src_reads - Additional src qword reads needed (0-3) +# 19: src64_inc_by_pre - Flag: indicate loop use src64 + 8 +# 20: unaligned_dst_src - Flag: dst and src has diferent alignement +# 21-28: fill_byte/cmp_res - Byte value for fill or compare result +# 29: cmp_negative flag - Comparation between two bytes generate 9 bits (one of them for sign) +# 30: requires_dma - Flag: indicates if operation requires DMA (*) +# 31: reserved +# 32-34: pre_count (loop) - Byte value for fill or compare result +# 35-63: loop_count - Number of 8-byte chunks in main copy loop +# +# (*) when compare exists an edge case when dst is aligned and effetive_count is multiple of 8, only +# PRE_POST machine could verify the different byte, in this case POST could be 8 bytes of length. +# effective_count is the number of bytes to check to compare. +# +# (*) The requires_dma flag is set by function caller when the operation it's a memcmp, because for +# this operation always a DMA is required. +# +################################################################################ + +.global fast_dma_memcpy_encode +.global fast_dma_memcmp_encode +.global fast_dma_memset_encode +.global fast_dma_memset_with_byte_encode +.global fast_dma_inputcpy_encode +.global fast_dma_memcmp_with_result_encode +.global fast_dma_encode +.section .text + +.include "dma_constants.inc" +.include "fast_dma_encode_macro.inc" + +# PARAMETERS (System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# RESULT rax = encoded value +fast_dma_memcpy_encode: + FAST_DMA_ENCODE + ret + +# PARAMETERS (System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# RESULT rax = encoded value +# NOTE: This function don't encode the result, only take in consideration to calculate +# NOTE: FAST_ENCODE_TABLE_WO_NEQ_SIZE ==> DMA_REQUIRES_DMA_MASK +fast_dma_memcmp_neq_encode: + FAST_DMA_ENCODE_MEMCMP FAST_ENCODE_TABLE_WO_NEQ_SIZE + ret + +# PARAMETERS (System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# RESULT rax = encoded value +# NOTE: This function don't encode the result, only take in consideration to calculate +fast_dma_memcmp_eq_encode: + FAST_DMA_ENCODE_MEMCMP 0 + ret + +# PARAMETERS (System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# r9 = result (9 bits) - NOTE: value will be modified +# RESULT rax = encoded value +fast_dma_memcmp_encode: + and r9, DMA_FILL_BITS9_MASK # Ensure result is in lower 9 bits + jz .L_fast_dma_memcmp_encode_eq + FAST_DMA_ENCODE_MEMCMP FAST_ENCODE_TABLE_WO_NEQ_SIZE + shl r9, DMA_FILL_BYTE_RS # r8 has the result byte in the lower 8 bits + or rax, r9 + ret + +.L_fast_dma_memcmp_encode_eq: + FAST_DMA_ENCODE_MEMCMP 0 + ret + + +# PARAMETERS: +# rdi = dst (u64) - Destination address +# rdx = count (usize) - Number of bytes to copy + + +fast_dma_inputcpy_encode: +fast_dma_no_src_encode: + FAST_DMA_ENCODE_NO_SRC + ret + + +# PARAMETERS (System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = fill byte - Source address +# rdx = count (usize) - Number of bytes to copy +fast_dma_memset_with_byte_encode: + FAST_DMA_ENCODE_NO_SRC + movzx r9, sil + shl r9, DMA_FILL_BYTE_RS # r8 has the result byte in the lower 8 bits + or rax, r9 + ret + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits + +# Include the lookup table in the .rodata section +.include "fast_dma_encode_table.asm" diff --git a/emulator-asm/src/dma/fast_dma_encode_macro.inc b/emulator-asm/src/dma/fast_dma_encode_macro.inc new file mode 100644 index 000000000..b0d056a63 --- /dev/null +++ b/emulator-asm/src/dma/fast_dma_encode_macro.inc @@ -0,0 +1,114 @@ +.intel_syntax noprefix +.code64 + +.extern fast_dma_encode_no_src_table +.extern fast_dma_encode_table +.extern fast_dma_encode_memcmp_table + +.macro FAST_DMA_ENCODE + mov rax, rdi + and rax, 0x07 # dst_offset (0-7) + shl rax, 7 # dst_offset << 7 + + mov r8, rsi + and r8, 0x07 # src_offset (0-7) + shl r8, 4 # src_offset << 4 + + or rax, r8 # combine dst and src offsets + + # Calculate table_count + mov r8, rdx + cmp r8, 16 + jb 1f + + # count >= 16: table_count = (count & 0x07) | 0x08 + and r8, 0x07 + or r8, 0x08 + +1: + or rax, r8 # rax = index = (dst<<7) + (src<<4) + table_count + + # Look up encoded value in table (direct access since it's in the same file) + mov rax, [fast_dma_encode_table + rax * 8] + + # Add (count >> 3) to result + mov r8, rdx + shl r8, DMA_LPRE_COUNT_RS + add rax, r8 +.endm + +.macro FAST_DMA_ENCODE_NO_SRC + mov rax, rdi + and rax, 0x07 # dst_offset (0-7) + shl rax, 4 # dst_offset << 4 + + # Calculate table_count + mov r8, rdx + cmp r8, 16 + jb 1f + + # count >= 16: table_count = (count & 0x07) | 0x08 + and r8, 0x07 + or r8, 0x08 + +1: + or rax, r8 # rax = index = (dst<<7) + (src<<4) + table_count + + mov rax, [fast_dma_encode_no_src_table + rax * 8] + + # Add (count >> 3) to result + mov r8, rdx + shl r8, DMA_LPRE_COUNT_RS + add rax, r8 +.endm + + +.macro FAST_DMA_ENCODE_MEMCMP extra_table_offset=0 + mov rax, rdi + and rax, 0x07 # dst_offset (0-7) + shl rax, 7 # dst_offset << 7 + + mov r8, rsi + and r8, 0x07 # src_offset (0-7) + shl r8, 4 # src_offset << 4 + + or rax, r8 # combine dst and src offsets + + # Calculate table_count + mov r8, rdx + cmp r8, 16 + jb 1f + + # count >= 16: table_count = (count & 0x07) | 0x08 + and r8, 0x07 + or r8, 0x08 + +1: + or rax, r8 # rax = index = (dst<<7) + (src<<4) + table_count + + # Look up encoded value in table (direct access since it's in the same file) + mov rax, [fast_dma_encode_memcmp_table + rax * 8 + \extra_table_offset * 8] + + # Add (count >> 3) to result + mov r8, rdx + shl r8, DMA_LPRE_COUNT_RS + add rax, r8 +.endm + +# always a zero encoded requires a DMA machine, because It is the only machine +# that can formally verify a DMA operation with count = 0. +.macro FAST_DMA_ENCODE_COUNT_ZERO use_src=1 + mov rax, rdi + and rax, 0x07 # dst_offset (0-7) + shl rax, DMA_DST_OFFSET_RS # dst_offset << 7 + + .if \use_src + mov r8, rsi + and r8, 0x07 # src_offset (0-7) + shl r8, DMA_SRC_OFFSET_RS # src_offset << 4 + lea rax, [rax + r8 + DMA_REQUIRES_DMA_MASK] + .else + or rax, DMA_REQUIRES_DMA_MASK + .endif +.endm + \ No newline at end of file diff --git a/emulator-asm/src/dma/fast_dma_encode_table.asm b/emulator-asm/src/dma/fast_dma_encode_table.asm new file mode 100644 index 000000000..30597027c --- /dev/null +++ b/emulator-asm/src/dma/fast_dma_encode_table.asm @@ -0,0 +1,816 @@ +.intel_syntax noprefix +.code64 + +.globl fast_dma_encode_no_src_table +.globl fast_dma_encode_table +.globl fast_dma_encode_memcmp_table + +.section .data +# generated with precompiles/helpers/src/dma +# asm_fast_encode_table test + +fast_dma_encode_no_src_table: + .quad 0x0000000040000000, 0xFFFFFFFF40000088, 0xFFFFFFFE40000090, 0xFFFFFFFD40000098 # 0 - 3 D0 C0 + .quad 0xfffffffc400000a0, 0xFFFFFFFB400000A8, 0xFFFFFFFA400000B0, 0xFFFFFFF9400000B8 # 4 - 7 D0 C4 + .quad 0x0000000000000000, 0xFFFFFFFF40000088, 0xFFFFFFFE40000090, 0xFFFFFFFD40000098 # 8 - 11 D0 C8 + .quad 0xfffffffc400000a0, 0xFFFFFFFB400000A8, 0xFFFFFFFA400000B0, 0xFFFFFFF9400000B8 # 12 - 15 D0 C12 + .quad 0x0000000040000200, 0x0000000040000281, 0x0000000040000282, 0x0000000040000283 # 16 - 19 D0 C0 + .quad 0x0000000040000284, 0x0000000040000285, 0x0000000040000286, 0x0000000040000287 # 20 - 23 D0 C4 + .quad 0xffffffff4000030f, 0xFFFFFFFE40000317, 0xFFFFFFFD4000031F, 0xFFFFFFFC40000327 # 24 - 27 D0 C8 + .quad 0xfffffffb4000032f, 0xFFFFFFFA40000337, 0xFFFFFFF94000033F, 0x0000000040000287 # 28 - 31 D0 C12 + .quad 0x0000000040000400, 0x0000000040000481, 0x0000000040000482, 0x0000000040000483 # 32 - 35 D1 C0 + .quad 0x0000000040000484, 0x0000000040000485, 0x0000000040000486, 0xFFFFFFFF4000050E # 36 - 39 D1 C4 + .quad 0xfffffffe40000516, 0xFFFFFFFD4000051E, 0xFFFFFFFC40000526, 0xFFFFFFFB4000052E # 40 - 43 D1 C8 + .quad 0xfffffffa40000536, 0xFFFFFFF94000053E, 0x0000000040000486, 0xFFFFFFFF4000050E # 44 - 47 D1 C12 + .quad 0x0000000040000600, 0x0000000040000681, 0x0000000040000682, 0x0000000040000683 # 48 - 51 D1 C0 + .quad 0x0000000040000684, 0x0000000040000685, 0xFFFFFFFF4000070D, 0xFFFFFFFE40000715 # 52 - 55 D1 C4 + .quad 0xfffffffd4000071d, 0xFFFFFFFC40000725, 0xFFFFFFFB4000072D, 0xFFFFFFFA40000735 # 56 - 59 D1 C8 + .quad 0xfffffff94000073d, 0x0000000040000685, 0xFFFFFFFF4000070D, 0xFFFFFFFE40000715 # 60 - 63 D1 C12 + .quad 0x0000000040000800, 0x0000000040000881, 0x0000000040000882, 0x0000000040000883 # 64 - 67 D2 C0 + .quad 0x0000000040000884, 0xFFFFFFFF4000090C, 0xFFFFFFFE40000914, 0xFFFFFFFD4000091C # 68 - 71 D2 C4 + .quad 0xfffffffc40000924, 0xFFFFFFFB4000092C, 0xFFFFFFFA40000934, 0xFFFFFFF94000093C # 72 - 75 D2 C8 + .quad 0x0000000040000884, 0xFFFFFFFF4000090C, 0xFFFFFFFE40000914, 0xFFFFFFFD4000091C # 76 - 79 D2 C12 + .quad 0x0000000040000a00, 0x0000000040000A81, 0x0000000040000A82, 0x0000000040000A83 # 80 - 83 D2 C0 + .quad 0xffffffff40000b0b, 0xFFFFFFFE40000B13, 0xFFFFFFFD40000B1B, 0xFFFFFFFC40000B23 # 84 - 87 D2 C4 + .quad 0xfffffffb40000b2b, 0xFFFFFFFA40000B33, 0xFFFFFFF940000B3B, 0x0000000040000A83 # 88 - 91 D2 C8 + .quad 0xffffffff40000b0b, 0xFFFFFFFE40000B13, 0xFFFFFFFD40000B1B, 0xFFFFFFFC40000B23 # 92 - 95 D2 C12 + .quad 0x0000000040000c00, 0x0000000040000C81, 0x0000000040000C82, 0xFFFFFFFF40000D0A # 96 - 99 D3 C0 + .quad 0xfffffffe40000d12, 0xFFFFFFFD40000D1A, 0xFFFFFFFC40000D22, 0xFFFFFFFB40000D2A # 100 - 103 D3 C4 + .quad 0xfffffffa40000d32, 0xFFFFFFF940000D3A, 0x0000000040000C82, 0xFFFFFFFF40000D0A # 104 - 107 D3 C8 + .quad 0xfffffffe40000d12, 0xFFFFFFFD40000D1A, 0xFFFFFFFC40000D22, 0xFFFFFFFB40000D2A # 108 - 111 D3 C12 + .quad 0x0000000040000e00, 0x0000000040000E81, 0xFFFFFFFF40000F09, 0xFFFFFFFE40000F11 # 112 - 115 D3 C0 + .quad 0xfffffffd40000f19, 0xFFFFFFFC40000F21, 0xFFFFFFFB40000F29, 0xFFFFFFFA40000F31 # 116 - 119 D3 C4 + .quad 0xfffffff940000f39, 0x0000000040000E81, 0xFFFFFFFF40000F09, 0xFFFFFFFE40000F11 # 120 - 123 D3 C8 + .quad 0xfffffffd40000f19, 0xFFFFFFFC40000F21, 0xFFFFFFFB40000F29, 0xFFFFFFFA40000F31 # 124 - 127 D3 C12 + +fast_dma_encode_table: + .quad 0x0000000040000000, 0xFFFFFFFF40020088, 0xFFFFFFFE40020090, 0xFFFFFFFD40020098 # 0 - 3 D0 S0 C0 + .quad 0xfffffffc400200a0, 0xFFFFFFFB400200A8, 0xFFFFFFFA400200B0, 0xFFFFFFF9400200B8 # 4 - 7 D0 S0 C4 + .quad 0x0000000000000000, 0xFFFFFFFF40020088, 0xFFFFFFFE40020090, 0xFFFFFFFD40020098 # 8 - 11 D0 S0 C8 + .quad 0xfffffffc400200a0, 0xFFFFFFFB400200A8, 0xFFFFFFFA400200B0, 0xFFFFFFF9400200B8 # 12 - 15 D0 S0 C12 + .quad 0x0000000040001000, 0xFFFFFFFF40121088, 0xFFFFFFFE40121090, 0xFFFFFFFD40121098 # 16 - 19 D0 S1 C0 + .quad 0xfffffffc401210a0, 0xFFFFFFFB401210A8, 0xFFFFFFFA401210B0, 0xFFFFFFF9401210B8 # 20 - 23 D0 S1 C4 + .quad 0x0000000000121000, 0xFFFFFFFF40121088, 0xFFFFFFFE40121090, 0xFFFFFFFD40121098 # 24 - 27 D0 S1 C8 + .quad 0xfffffffc401210a0, 0xFFFFFFFB401210A8, 0xFFFFFFFA401210B0, 0xFFFFFFF9401210B8 # 28 - 31 D0 S1 C12 + .quad 0x0000000040002000, 0xFFFFFFFF40122088, 0xFFFFFFFE40122090, 0xFFFFFFFD40122098 # 32 - 35 D0 S2 C0 + .quad 0xfffffffc401220a0, 0xFFFFFFFB401220A8, 0xFFFFFFFA401220B0, 0xFFFFFFF9401520B8 # 36 - 39 D0 S2 C4 + .quad 0x0000000000122000, 0xFFFFFFFF40122088, 0xFFFFFFFE40122090, 0xFFFFFFFD40122098 # 40 - 43 D0 S2 C8 + .quad 0xfffffffc401220a0, 0xFFFFFFFB401220A8, 0xFFFFFFFA401220B0, 0xFFFFFFF9401520B8 # 44 - 47 D0 S2 C12 + .quad 0x0000000040003000, 0xFFFFFFFF40123088, 0xFFFFFFFE40123090, 0xFFFFFFFD40123098 # 48 - 51 D0 S3 C0 + .quad 0xfffffffc401230a0, 0xFFFFFFFB401230A8, 0xFFFFFFFA401530B0, 0xFFFFFFF9401530B8 # 52 - 55 D0 S3 C4 + .quad 0x0000000000123000, 0xFFFFFFFF40123088, 0xFFFFFFFE40123090, 0xFFFFFFFD40123098 # 56 - 59 D0 S3 C8 + .quad 0xfffffffc401230a0, 0xFFFFFFFB401230A8, 0xFFFFFFFA401530B0, 0xFFFFFFF9401530B8 # 60 - 63 D0 S3 C12 + .quad 0x0000000040004000, 0xFFFFFFFF40124088, 0xFFFFFFFE40124090, 0xFFFFFFFD40124098 # 64 - 67 D0 S4 C0 + .quad 0xfffffffc401240a0, 0xFFFFFFFB401540A8, 0xFFFFFFFA401540B0, 0xFFFFFFF9401540B8 # 68 - 71 D0 S4 C4 + .quad 0x0000000000124000, 0xFFFFFFFF40124088, 0xFFFFFFFE40124090, 0xFFFFFFFD40124098 # 72 - 75 D0 S4 C8 + .quad 0xfffffffc401240a0, 0xFFFFFFFB401540A8, 0xFFFFFFFA401540B0, 0xFFFFFFF9401540B8 # 76 - 79 D0 S4 C12 + .quad 0x0000000040005000, 0xFFFFFFFF40125088, 0xFFFFFFFE40125090, 0xFFFFFFFD40125098 # 80 - 83 D0 S5 C0 + .quad 0xfffffffc401550a0, 0xFFFFFFFB401550A8, 0xFFFFFFFA401550B0, 0xFFFFFFF9401550B8 # 84 - 87 D0 S5 C4 + .quad 0x0000000000125000, 0xFFFFFFFF40125088, 0xFFFFFFFE40125090, 0xFFFFFFFD40125098 # 88 - 91 D0 S5 C8 + .quad 0xfffffffc401550a0, 0xFFFFFFFB401550A8, 0xFFFFFFFA401550B0, 0xFFFFFFF9401550B8 # 92 - 95 D0 S5 C12 + .quad 0x0000000040006000, 0xFFFFFFFF40126088, 0xFFFFFFFE40126090, 0xFFFFFFFD40156098 # 96 - 99 D0 S6 C0 + .quad 0xfffffffc401560a0, 0xFFFFFFFB401560A8, 0xFFFFFFFA401560B0, 0xFFFFFFF9401560B8 # 100 - 103 D0 S6 C4 + .quad 0x0000000000126000, 0xFFFFFFFF40126088, 0xFFFFFFFE40126090, 0xFFFFFFFD40156098 # 104 - 107 D0 S6 C8 + .quad 0xfffffffc401560a0, 0xFFFFFFFB401560A8, 0xFFFFFFFA401560B0, 0xFFFFFFF9401560B8 # 108 - 111 D0 S6 C12 + .quad 0x0000000040007000, 0xFFFFFFFF40127088, 0xFFFFFFFE40157090, 0xFFFFFFFD40157098 # 112 - 115 D0 S7 C0 + .quad 0xfffffffc401570a0, 0xFFFFFFFB401570A8, 0xFFFFFFFA401570B0, 0xFFFFFFF9401570B8 # 116 - 119 D0 S7 C4 + .quad 0x0000000000127000, 0xFFFFFFFF40127088, 0xFFFFFFFE40157090, 0xFFFFFFFD40157098 # 120 - 123 D0 S7 C8 + .quad 0xfffffffc401570a0, 0xFFFFFFFB401570A8, 0xFFFFFFFA401570B0, 0xFFFFFFF9401570B8 # 124 - 127 D0 S7 C12 + .quad 0x0000000040000200, 0x0000000040120281, 0x0000000040120282, 0x0000000040120283 # 128 - 131 D1 S0 C0 + .quad 0x0000000040120284, 0x0000000040120285, 0x0000000040120286, 0x0000000040120287 # 132 - 135 D1 S0 C4 + .quad 0xffffffff4012030f, 0xFFFFFFFE40150317, 0xFFFFFFFD4015031F, 0xFFFFFFFC40150327 # 136 - 139 D1 S0 C8 + .quad 0xfffffffb4015032f, 0xFFFFFFFA40150337, 0xFFFFFFF94015033F, 0x0000000040120287 # 140 - 143 D1 S0 C12 + .quad 0x0000000040001200, 0x0000000040021281, 0x0000000040021282, 0x0000000040021283 # 144 - 147 D1 S1 C0 + .quad 0x0000000040021284, 0x0000000040021285, 0x0000000040021286, 0x00000000400A1287 # 148 - 151 D1 S1 C4 + .quad 0xffffffff400c130f, 0xFFFFFFFE400C1317, 0xFFFFFFFD400C131F, 0xFFFFFFFC400C1327 # 152 - 155 D1 S1 C8 + .quad 0xfffffffb400c132f, 0xFFFFFFFA400C1337, 0xFFFFFFF9400C133F, 0x00000000400A1287 # 156 - 159 D1 S1 C12 + .quad 0x0000000040002200, 0x0000000040122281, 0x0000000040122282, 0x0000000040122283 # 160 - 163 D1 S2 C0 + .quad 0x0000000040122284, 0x0000000040122285, 0x00000000401A2286, 0x00000000401CA287 # 164 - 167 D1 S2 C4 + .quad 0xffffffff401ca30f, 0xFFFFFFFE401CA317, 0xFFFFFFFD401CA31F, 0xFFFFFFFC401CA327 # 168 - 171 D1 S2 C8 + .quad 0xfffffffb401ca32f, 0xFFFFFFFA401CA337, 0xFFFFFFF9401CA33F, 0x00000000401CA287 # 172 - 175 D1 S2 C12 + .quad 0x0000000040003200, 0x0000000040123281, 0x0000000040123282, 0x0000000040123283 # 176 - 179 D1 S3 C0 + .quad 0x0000000040123284, 0x00000000401A3285, 0x00000000401CB286, 0x00000000401CB287 # 180 - 183 D1 S3 C4 + .quad 0xffffffff401cb30f, 0xFFFFFFFE401CB317, 0xFFFFFFFD401CB31F, 0xFFFFFFFC401CB327 # 184 - 187 D1 S3 C8 + .quad 0xfffffffb401cb32f, 0xFFFFFFFA401CB337, 0xFFFFFFF9401FB33F, 0x00000000401CB287 # 188 - 191 D1 S3 C12 + .quad 0x0000000040004200, 0x0000000040124281, 0x0000000040124282, 0x0000000040124283 # 192 - 195 D1 S4 C0 + .quad 0x00000000401a4284, 0x00000000401CC285, 0x00000000401CC286, 0x00000000401CC287 # 196 - 199 D1 S4 C4 + .quad 0xffffffff401cc30f, 0xFFFFFFFE401CC317, 0xFFFFFFFD401CC31F, 0xFFFFFFFC401CC327 # 200 - 203 D1 S4 C8 + .quad 0xfffffffb401cc32f, 0xFFFFFFFA401FC337, 0xFFFFFFF9401FC33F, 0x00000000401CC287 # 204 - 207 D1 S4 C12 + .quad 0x0000000040005200, 0x0000000040125281, 0x0000000040125282, 0x00000000401A5283 # 208 - 211 D1 S5 C0 + .quad 0x00000000401cd284, 0x00000000401CD285, 0x00000000401CD286, 0x00000000401CD287 # 212 - 215 D1 S5 C4 + .quad 0xffffffff401cd30f, 0xFFFFFFFE401CD317, 0xFFFFFFFD401CD31F, 0xFFFFFFFC401CD327 # 216 - 219 D1 S5 C8 + .quad 0xfffffffb401fd32f, 0xFFFFFFFA401FD337, 0xFFFFFFF9401FD33F, 0x00000000401CD287 # 220 - 223 D1 S5 C12 + .quad 0x0000000040006200, 0x0000000040126281, 0x00000000401A6282, 0x00000000401CE283 # 224 - 227 D1 S6 C0 + .quad 0x00000000401ce284, 0x00000000401CE285, 0x00000000401CE286, 0x00000000401CE287 # 228 - 231 D1 S6 C4 + .quad 0xffffffff401ce30f, 0xFFFFFFFE401CE317, 0xFFFFFFFD401CE31F, 0xFFFFFFFC401FE327 # 232 - 235 D1 S6 C8 + .quad 0xfffffffb401fe32f, 0xFFFFFFFA401FE337, 0xFFFFFFF9401FE33F, 0x00000000401CE287 # 236 - 239 D1 S6 C12 + .quad 0x0000000040007200, 0x00000000401A7281, 0x00000000401CF282, 0x00000000401CF283 # 240 - 243 D1 S7 C0 + .quad 0x00000000401cf284, 0x00000000401CF285, 0x00000000401CF286, 0x00000000401CF287 # 244 - 247 D1 S7 C4 + .quad 0xffffffff401cf30f, 0xFFFFFFFE401CF317, 0xFFFFFFFD401FF31F, 0xFFFFFFFC401FF327 # 248 - 251 D1 S7 C8 + .quad 0xfffffffb401ff32f, 0xFFFFFFFA401FF337, 0xFFFFFFF9401FF33F, 0x00000000401CF287 # 252 - 255 D1 S7 C12 + .quad 0x0000000040000400, 0x0000000040120481, 0x0000000040120482, 0x0000000040120483 # 256 - 259 D2 S0 C0 + .quad 0x0000000040120484, 0x0000000040120485, 0x0000000040120486, 0xFFFFFFFF4012050E # 260 - 263 D2 S0 C4 + .quad 0xfffffffe40120516, 0xFFFFFFFD4015051E, 0xFFFFFFFC40150526, 0xFFFFFFFB4015052E # 264 - 267 D2 S0 C8 + .quad 0xfffffffa40150536, 0xFFFFFFF94015053E, 0x0000000040120486, 0xFFFFFFFF4012050E # 268 - 271 D2 S0 C12 + .quad 0x0000000040001400, 0x0000000040121481, 0x0000000040121482, 0x0000000040121483 # 272 - 275 D2 S1 C0 + .quad 0x0000000040121484, 0x0000000040121485, 0x0000000040121486, 0xFFFFFFFF4012150E # 276 - 279 D2 S1 C4 + .quad 0xfffffffe40151516, 0xFFFFFFFD4015151E, 0xFFFFFFFC40151526, 0xFFFFFFFB4015152E # 280 - 283 D2 S1 C8 + .quad 0xfffffffa40151536, 0xFFFFFFF94015153E, 0x0000000040121486, 0xFFFFFFFF4012150E # 284 - 287 D2 S1 C12 + .quad 0x0000000040002400, 0x0000000040022481, 0x0000000040022482, 0x0000000040022483 # 288 - 291 D2 S2 C0 + .quad 0x0000000040022484, 0x0000000040022485, 0x00000000400A2486, 0xFFFFFFFF400C250E # 292 - 295 D2 S2 C4 + .quad 0xfffffffe400c2516, 0xFFFFFFFD400C251E, 0xFFFFFFFC400C2526, 0xFFFFFFFB400C252E # 296 - 299 D2 S2 C8 + .quad 0xfffffffa400c2536, 0xFFFFFFF9400C253E, 0x00000000400A2486, 0xFFFFFFFF400C250E # 300 - 303 D2 S2 C12 + .quad 0x0000000040003400, 0x0000000040123481, 0x0000000040123482, 0x0000000040123483 # 304 - 307 D2 S3 C0 + .quad 0x0000000040123484, 0x00000000401A3485, 0x00000000401CB486, 0xFFFFFFFF401CB50E # 308 - 311 D2 S3 C4 + .quad 0xfffffffe401cb516, 0xFFFFFFFD401CB51E, 0xFFFFFFFC401CB526, 0xFFFFFFFB401CB52E # 312 - 315 D2 S3 C8 + .quad 0xfffffffa401cb536, 0xFFFFFFF9401CB53E, 0x00000000401CB486, 0xFFFFFFFF401CB50E # 316 - 319 D2 S3 C12 + .quad 0x0000000040004400, 0x0000000040124481, 0x0000000040124482, 0x0000000040124483 # 320 - 323 D2 S4 C0 + .quad 0x00000000401a4484, 0x00000000401CC485, 0x00000000401CC486, 0xFFFFFFFF401CC50E # 324 - 327 D2 S4 C4 + .quad 0xfffffffe401cc516, 0xFFFFFFFD401CC51E, 0xFFFFFFFC401CC526, 0xFFFFFFFB401CC52E # 328 - 331 D2 S4 C8 + .quad 0xfffffffa401cc536, 0xFFFFFFF9401FC53E, 0x00000000401CC486, 0xFFFFFFFF401CC50E # 332 - 335 D2 S4 C12 + .quad 0x0000000040005400, 0x0000000040125481, 0x0000000040125482, 0x00000000401A5483 # 336 - 339 D2 S5 C0 + .quad 0x00000000401cd484, 0x00000000401CD485, 0x00000000401CD486, 0xFFFFFFFF401CD50E # 340 - 343 D2 S5 C4 + .quad 0xfffffffe401cd516, 0xFFFFFFFD401CD51E, 0xFFFFFFFC401CD526, 0xFFFFFFFB401CD52E # 344 - 347 D2 S5 C8 + .quad 0xfffffffa401fd536, 0xFFFFFFF9401FD53E, 0x00000000401CD486, 0xFFFFFFFF401CD50E # 348 - 351 D2 S5 C12 + .quad 0x0000000040006400, 0x0000000040126481, 0x00000000401A6482, 0x00000000401CE483 # 352 - 355 D2 S6 C0 + .quad 0x00000000401ce484, 0x00000000401CE485, 0x00000000401CE486, 0xFFFFFFFF401CE50E # 356 - 359 D2 S6 C4 + .quad 0xfffffffe401ce516, 0xFFFFFFFD401CE51E, 0xFFFFFFFC401CE526, 0xFFFFFFFB401FE52E # 360 - 363 D2 S6 C8 + .quad 0xfffffffa401fe536, 0xFFFFFFF9401FE53E, 0x00000000401CE486, 0xFFFFFFFF401CE50E # 364 - 367 D2 S6 C12 + .quad 0x0000000040007400, 0x00000000401A7481, 0x00000000401CF482, 0x00000000401CF483 # 368 - 371 D2 S7 C0 + .quad 0x00000000401cf484, 0x00000000401CF485, 0x00000000401CF486, 0xFFFFFFFF401CF50E # 372 - 375 D2 S7 C4 + .quad 0xfffffffe401cf516, 0xFFFFFFFD401CF51E, 0xFFFFFFFC401FF526, 0xFFFFFFFB401FF52E # 376 - 379 D2 S7 C8 + .quad 0xfffffffa401ff536, 0xFFFFFFF9401FF53E, 0x00000000401CF486, 0xFFFFFFFF401CF50E # 380 - 383 D2 S7 C12 + .quad 0x0000000040000600, 0x0000000040120681, 0x0000000040120682, 0x0000000040120683 # 384 - 387 D3 S0 C0 + .quad 0x0000000040120684, 0x0000000040120685, 0xFFFFFFFF4012070D, 0xFFFFFFFE40120715 # 388 - 391 D3 S0 C4 + .quad 0xfffffffd4012071d, 0xFFFFFFFC40150725, 0xFFFFFFFB4015072D, 0xFFFFFFFA40150735 # 392 - 395 D3 S0 C8 + .quad 0xfffffff94015073d, 0x0000000040120685, 0xFFFFFFFF4012070D, 0xFFFFFFFE40120715 # 396 - 399 D3 S0 C12 + .quad 0x0000000040001600, 0x0000000040121681, 0x0000000040121682, 0x0000000040121683 # 400 - 403 D3 S1 C0 + .quad 0x0000000040121684, 0x0000000040121685, 0xFFFFFFFF4012170D, 0xFFFFFFFE40121715 # 404 - 407 D3 S1 C4 + .quad 0xfffffffd4015171d, 0xFFFFFFFC40151725, 0xFFFFFFFB4015172D, 0xFFFFFFFA40151735 # 408 - 411 D3 S1 C8 + .quad 0xfffffff94015173d, 0x0000000040121685, 0xFFFFFFFF4012170D, 0xFFFFFFFE40121715 # 412 - 415 D3 S1 C12 + .quad 0x0000000040002600, 0x0000000040122681, 0x0000000040122682, 0x0000000040122683 # 416 - 419 D3 S2 C0 + .quad 0x0000000040122684, 0x0000000040122685, 0xFFFFFFFF4012270D, 0xFFFFFFFE40152715 # 420 - 423 D3 S2 C4 + .quad 0xfffffffd4015271d, 0xFFFFFFFC40152725, 0xFFFFFFFB4015272D, 0xFFFFFFFA40152735 # 424 - 427 D3 S2 C8 + .quad 0xfffffff94015273d, 0x0000000040122685, 0xFFFFFFFF4012270D, 0xFFFFFFFE40152715 # 428 - 431 D3 S2 C12 + .quad 0x0000000040003600, 0x0000000040023681, 0x0000000040023682, 0x0000000040023683 # 432 - 435 D3 S3 C0 + .quad 0x0000000040023684, 0x00000000400A3685, 0xFFFFFFFF400C370D, 0xFFFFFFFE400C3715 # 436 - 439 D3 S3 C4 + .quad 0xfffffffd400c371d, 0xFFFFFFFC400C3725, 0xFFFFFFFB400C372D, 0xFFFFFFFA400C3735 # 440 - 443 D3 S3 C8 + .quad 0xfffffff9400c373d, 0x00000000400A3685, 0xFFFFFFFF400C370D, 0xFFFFFFFE400C3715 # 444 - 447 D3 S3 C12 + .quad 0x0000000040004600, 0x0000000040124681, 0x0000000040124682, 0x0000000040124683 # 448 - 451 D3 S4 C0 + .quad 0x00000000401a4684, 0x00000000401CC685, 0xFFFFFFFF401CC70D, 0xFFFFFFFE401CC715 # 452 - 455 D3 S4 C4 + .quad 0xfffffffd401cc71d, 0xFFFFFFFC401CC725, 0xFFFFFFFB401CC72D, 0xFFFFFFFA401CC735 # 456 - 459 D3 S4 C8 + .quad 0xfffffff9401cc73d, 0x00000000401CC685, 0xFFFFFFFF401CC70D, 0xFFFFFFFE401CC715 # 460 - 463 D3 S4 C12 + .quad 0x0000000040005600, 0x0000000040125681, 0x0000000040125682, 0x00000000401A5683 # 464 - 467 D3 S5 C0 + .quad 0x00000000401cd684, 0x00000000401CD685, 0xFFFFFFFF401CD70D, 0xFFFFFFFE401CD715 # 468 - 471 D3 S5 C4 + .quad 0xfffffffd401cd71d, 0xFFFFFFFC401CD725, 0xFFFFFFFB401CD72D, 0xFFFFFFFA401CD735 # 472 - 475 D3 S5 C8 + .quad 0xfffffff9401fd73d, 0x00000000401CD685, 0xFFFFFFFF401CD70D, 0xFFFFFFFE401CD715 # 476 - 479 D3 S5 C12 + .quad 0x0000000040006600, 0x0000000040126681, 0x00000000401A6682, 0x00000000401CE683 # 480 - 483 D3 S6 C0 + .quad 0x00000000401ce684, 0x00000000401CE685, 0xFFFFFFFF401CE70D, 0xFFFFFFFE401CE715 # 484 - 487 D3 S6 C4 + .quad 0xfffffffd401ce71d, 0xFFFFFFFC401CE725, 0xFFFFFFFB401CE72D, 0xFFFFFFFA401FE735 # 488 - 491 D3 S6 C8 + .quad 0xfffffff9401fe73d, 0x00000000401CE685, 0xFFFFFFFF401CE70D, 0xFFFFFFFE401CE715 # 492 - 495 D3 S6 C12 + .quad 0x0000000040007600, 0x00000000401A7681, 0x00000000401CF682, 0x00000000401CF683 # 496 - 499 D3 S7 C0 + .quad 0x00000000401cf684, 0x00000000401CF685, 0xFFFFFFFF401CF70D, 0xFFFFFFFE401CF715 # 500 - 503 D3 S7 C4 + .quad 0xfffffffd401cf71d, 0xFFFFFFFC401CF725, 0xFFFFFFFB401FF72D, 0xFFFFFFFA401FF735 # 504 - 507 D3 S7 C8 + .quad 0xfffffff9401ff73d, 0x00000000401CF685, 0xFFFFFFFF401CF70D, 0xFFFFFFFE401CF715 # 508 - 511 D3 S7 C12 + .quad 0x0000000040000800, 0x0000000040120881, 0x0000000040120882, 0x0000000040120883 # 512 - 515 D4 S0 C0 + .quad 0x0000000040120884, 0xFFFFFFFF4012090C, 0xFFFFFFFE40120914, 0xFFFFFFFD4012091C # 516 - 519 D4 S0 C4 + .quad 0xfffffffc40120924, 0xFFFFFFFB4015092C, 0xFFFFFFFA40150934, 0xFFFFFFF94015093C # 520 - 523 D4 S0 C8 + .quad 0x0000000040120884, 0xFFFFFFFF4012090C, 0xFFFFFFFE40120914, 0xFFFFFFFD4012091C # 524 - 527 D4 S0 C12 + .quad 0x0000000040001800, 0x0000000040121881, 0x0000000040121882, 0x0000000040121883 # 528 - 531 D4 S1 C0 + .quad 0x0000000040121884, 0xFFFFFFFF4012190C, 0xFFFFFFFE40121914, 0xFFFFFFFD4012191C # 532 - 535 D4 S1 C4 + .quad 0xfffffffc40151924, 0xFFFFFFFB4015192C, 0xFFFFFFFA40151934, 0xFFFFFFF94015193C # 536 - 539 D4 S1 C8 + .quad 0x0000000040121884, 0xFFFFFFFF4012190C, 0xFFFFFFFE40121914, 0xFFFFFFFD4012191C # 540 - 543 D4 S1 C12 + .quad 0x0000000040002800, 0x0000000040122881, 0x0000000040122882, 0x0000000040122883 # 544 - 547 D4 S2 C0 + .quad 0x0000000040122884, 0xFFFFFFFF4012290C, 0xFFFFFFFE40122914, 0xFFFFFFFD4015291C # 548 - 551 D4 S2 C4 + .quad 0xfffffffc40152924, 0xFFFFFFFB4015292C, 0xFFFFFFFA40152934, 0xFFFFFFF94015293C # 552 - 555 D4 S2 C8 + .quad 0x0000000040122884, 0xFFFFFFFF4012290C, 0xFFFFFFFE40122914, 0xFFFFFFFD4015291C # 556 - 559 D4 S2 C12 + .quad 0x0000000040003800, 0x0000000040123881, 0x0000000040123882, 0x0000000040123883 # 560 - 563 D4 S3 C0 + .quad 0x0000000040123884, 0xFFFFFFFF4012390C, 0xFFFFFFFE40153914, 0xFFFFFFFD4015391C # 564 - 567 D4 S3 C4 + .quad 0xfffffffc40153924, 0xFFFFFFFB4015392C, 0xFFFFFFFA40153934, 0xFFFFFFF94015393C # 568 - 571 D4 S3 C8 + .quad 0x0000000040123884, 0xFFFFFFFF4012390C, 0xFFFFFFFE40153914, 0xFFFFFFFD4015391C # 572 - 575 D4 S3 C12 + .quad 0x0000000040004800, 0x0000000040024881, 0x0000000040024882, 0x0000000040024883 # 576 - 579 D4 S4 C0 + .quad 0x00000000400a4884, 0xFFFFFFFF400C490C, 0xFFFFFFFE400C4914, 0xFFFFFFFD400C491C # 580 - 583 D4 S4 C4 + .quad 0xfffffffc400c4924, 0xFFFFFFFB400C492C, 0xFFFFFFFA400C4934, 0xFFFFFFF9400C493C # 584 - 587 D4 S4 C8 + .quad 0x00000000400a4884, 0xFFFFFFFF400C490C, 0xFFFFFFFE400C4914, 0xFFFFFFFD400C491C # 588 - 591 D4 S4 C12 + .quad 0x0000000040005800, 0x0000000040125881, 0x0000000040125882, 0x00000000401A5883 # 592 - 595 D4 S5 C0 + .quad 0x00000000401cd884, 0xFFFFFFFF401CD90C, 0xFFFFFFFE401CD914, 0xFFFFFFFD401CD91C # 596 - 599 D4 S5 C4 + .quad 0xfffffffc401cd924, 0xFFFFFFFB401CD92C, 0xFFFFFFFA401CD934, 0xFFFFFFF9401CD93C # 600 - 603 D4 S5 C8 + .quad 0x00000000401cd884, 0xFFFFFFFF401CD90C, 0xFFFFFFFE401CD914, 0xFFFFFFFD401CD91C # 604 - 607 D4 S5 C12 + .quad 0x0000000040006800, 0x0000000040126881, 0x00000000401A6882, 0x00000000401CE883 # 608 - 611 D4 S6 C0 + .quad 0x00000000401ce884, 0xFFFFFFFF401CE90C, 0xFFFFFFFE401CE914, 0xFFFFFFFD401CE91C # 612 - 615 D4 S6 C4 + .quad 0xfffffffc401ce924, 0xFFFFFFFB401CE92C, 0xFFFFFFFA401CE934, 0xFFFFFFF9401FE93C # 616 - 619 D4 S6 C8 + .quad 0x00000000401ce884, 0xFFFFFFFF401CE90C, 0xFFFFFFFE401CE914, 0xFFFFFFFD401CE91C # 620 - 623 D4 S6 C12 + .quad 0x0000000040007800, 0x00000000401A7881, 0x00000000401CF882, 0x00000000401CF883 # 624 - 627 D4 S7 C0 + .quad 0x00000000401cf884, 0xFFFFFFFF401CF90C, 0xFFFFFFFE401CF914, 0xFFFFFFFD401CF91C # 628 - 631 D4 S7 C4 + .quad 0xfffffffc401cf924, 0xFFFFFFFB401CF92C, 0xFFFFFFFA401FF934, 0xFFFFFFF9401FF93C # 632 - 635 D4 S7 C8 + .quad 0x00000000401cf884, 0xFFFFFFFF401CF90C, 0xFFFFFFFE401CF914, 0xFFFFFFFD401CF91C # 636 - 639 D4 S7 C12 + .quad 0x0000000040000a00, 0x0000000040120A81, 0x0000000040120A82, 0x0000000040120A83 # 640 - 643 D5 S0 C0 + .quad 0xffffffff40120b0b, 0xFFFFFFFE40120B13, 0xFFFFFFFD40120B1B, 0xFFFFFFFC40120B23 # 644 - 647 D5 S0 C4 + .quad 0xfffffffb40120b2b, 0xFFFFFFFA40150B33, 0xFFFFFFF940150B3B, 0x0000000040120A83 # 648 - 651 D5 S0 C8 + .quad 0xffffffff40120b0b, 0xFFFFFFFE40120B13, 0xFFFFFFFD40120B1B, 0xFFFFFFFC40120B23 # 652 - 655 D5 S0 C12 + .quad 0x0000000040001a00, 0x0000000040121A81, 0x0000000040121A82, 0x0000000040121A83 # 656 - 659 D5 S1 C0 + .quad 0xffffffff40121b0b, 0xFFFFFFFE40121B13, 0xFFFFFFFD40121B1B, 0xFFFFFFFC40121B23 # 660 - 663 D5 S1 C4 + .quad 0xfffffffb40151b2b, 0xFFFFFFFA40151B33, 0xFFFFFFF940151B3B, 0x0000000040121A83 # 664 - 667 D5 S1 C8 + .quad 0xffffffff40121b0b, 0xFFFFFFFE40121B13, 0xFFFFFFFD40121B1B, 0xFFFFFFFC40121B23 # 668 - 671 D5 S1 C12 + .quad 0x0000000040002a00, 0x0000000040122A81, 0x0000000040122A82, 0x0000000040122A83 # 672 - 675 D5 S2 C0 + .quad 0xffffffff40122b0b, 0xFFFFFFFE40122B13, 0xFFFFFFFD40122B1B, 0xFFFFFFFC40152B23 # 676 - 679 D5 S2 C4 + .quad 0xfffffffb40152b2b, 0xFFFFFFFA40152B33, 0xFFFFFFF940152B3B, 0x0000000040122A83 # 680 - 683 D5 S2 C8 + .quad 0xffffffff40122b0b, 0xFFFFFFFE40122B13, 0xFFFFFFFD40122B1B, 0xFFFFFFFC40152B23 # 684 - 687 D5 S2 C12 + .quad 0x0000000040003a00, 0x0000000040123A81, 0x0000000040123A82, 0x0000000040123A83 # 688 - 691 D5 S3 C0 + .quad 0xffffffff40123b0b, 0xFFFFFFFE40123B13, 0xFFFFFFFD40153B1B, 0xFFFFFFFC40153B23 # 692 - 695 D5 S3 C4 + .quad 0xfffffffb40153b2b, 0xFFFFFFFA40153B33, 0xFFFFFFF940153B3B, 0x0000000040123A83 # 696 - 699 D5 S3 C8 + .quad 0xffffffff40123b0b, 0xFFFFFFFE40123B13, 0xFFFFFFFD40153B1B, 0xFFFFFFFC40153B23 # 700 - 703 D5 S3 C12 + .quad 0x0000000040004a00, 0x0000000040124A81, 0x0000000040124A82, 0x0000000040124A83 # 704 - 707 D5 S4 C0 + .quad 0xffffffff40124b0b, 0xFFFFFFFE40154B13, 0xFFFFFFFD40154B1B, 0xFFFFFFFC40154B23 # 708 - 711 D5 S4 C4 + .quad 0xfffffffb40154b2b, 0xFFFFFFFA40154B33, 0xFFFFFFF940154B3B, 0x0000000040124A83 # 712 - 715 D5 S4 C8 + .quad 0xffffffff40124b0b, 0xFFFFFFFE40154B13, 0xFFFFFFFD40154B1B, 0xFFFFFFFC40154B23 # 716 - 719 D5 S4 C12 + .quad 0x0000000040005a00, 0x0000000040025A81, 0x0000000040025A82, 0x00000000400A5A83 # 720 - 723 D5 S5 C0 + .quad 0xffffffff400c5b0b, 0xFFFFFFFE400C5B13, 0xFFFFFFFD400C5B1B, 0xFFFFFFFC400C5B23 # 724 - 727 D5 S5 C4 + .quad 0xfffffffb400c5b2b, 0xFFFFFFFA400C5B33, 0xFFFFFFF9400C5B3B, 0x00000000400A5A83 # 728 - 731 D5 S5 C8 + .quad 0xffffffff400c5b0b, 0xFFFFFFFE400C5B13, 0xFFFFFFFD400C5B1B, 0xFFFFFFFC400C5B23 # 732 - 735 D5 S5 C12 + .quad 0x0000000040006a00, 0x0000000040126A81, 0x00000000401A6A82, 0x00000000401CEA83 # 736 - 739 D5 S6 C0 + .quad 0xffffffff401ceb0b, 0xFFFFFFFE401CEB13, 0xFFFFFFFD401CEB1B, 0xFFFFFFFC401CEB23 # 740 - 743 D5 S6 C4 + .quad 0xfffffffb401ceb2b, 0xFFFFFFFA401CEB33, 0xFFFFFFF9401CEB3B, 0x00000000401CEA83 # 744 - 747 D5 S6 C8 + .quad 0xffffffff401ceb0b, 0xFFFFFFFE401CEB13, 0xFFFFFFFD401CEB1B, 0xFFFFFFFC401CEB23 # 748 - 751 D5 S6 C12 + .quad 0x0000000040007a00, 0x00000000401A7A81, 0x00000000401CFA82, 0x00000000401CFA83 # 752 - 755 D5 S7 C0 + .quad 0xffffffff401cfb0b, 0xFFFFFFFE401CFB13, 0xFFFFFFFD401CFB1B, 0xFFFFFFFC401CFB23 # 756 - 759 D5 S7 C4 + .quad 0xfffffffb401cfb2b, 0xFFFFFFFA401CFB33, 0xFFFFFFF9401FFB3B, 0x00000000401CFA83 # 760 - 763 D5 S7 C8 + .quad 0xffffffff401cfb0b, 0xFFFFFFFE401CFB13, 0xFFFFFFFD401CFB1B, 0xFFFFFFFC401CFB23 # 764 - 767 D5 S7 C12 + .quad 0x0000000040000c00, 0x0000000040120C81, 0x0000000040120C82, 0xFFFFFFFF40120D0A # 768 - 771 D6 S0 C0 + .quad 0xfffffffe40120d12, 0xFFFFFFFD40120D1A, 0xFFFFFFFC40120D22, 0xFFFFFFFB40120D2A # 772 - 775 D6 S0 C4 + .quad 0xfffffffa40120d32, 0xFFFFFFF940150D3A, 0x0000000040120C82, 0xFFFFFFFF40120D0A # 776 - 779 D6 S0 C8 + .quad 0xfffffffe40120d12, 0xFFFFFFFD40120D1A, 0xFFFFFFFC40120D22, 0xFFFFFFFB40120D2A # 780 - 783 D6 S0 C12 + .quad 0x0000000040001c00, 0x0000000040121C81, 0x0000000040121C82, 0xFFFFFFFF40121D0A # 784 - 787 D6 S1 C0 + .quad 0xfffffffe40121d12, 0xFFFFFFFD40121D1A, 0xFFFFFFFC40121D22, 0xFFFFFFFB40121D2A # 788 - 791 D6 S1 C4 + .quad 0xfffffffa40151d32, 0xFFFFFFF940151D3A, 0x0000000040121C82, 0xFFFFFFFF40121D0A # 792 - 795 D6 S1 C8 + .quad 0xfffffffe40121d12, 0xFFFFFFFD40121D1A, 0xFFFFFFFC40121D22, 0xFFFFFFFB40121D2A # 796 - 799 D6 S1 C12 + .quad 0x0000000040002c00, 0x0000000040122C81, 0x0000000040122C82, 0xFFFFFFFF40122D0A # 800 - 803 D6 S2 C0 + .quad 0xfffffffe40122d12, 0xFFFFFFFD40122D1A, 0xFFFFFFFC40122D22, 0xFFFFFFFB40152D2A # 804 - 807 D6 S2 C4 + .quad 0xfffffffa40152d32, 0xFFFFFFF940152D3A, 0x0000000040122C82, 0xFFFFFFFF40122D0A # 808 - 811 D6 S2 C8 + .quad 0xfffffffe40122d12, 0xFFFFFFFD40122D1A, 0xFFFFFFFC40122D22, 0xFFFFFFFB40152D2A # 812 - 815 D6 S2 C12 + .quad 0x0000000040003c00, 0x0000000040123C81, 0x0000000040123C82, 0xFFFFFFFF40123D0A # 816 - 819 D6 S3 C0 + .quad 0xfffffffe40123d12, 0xFFFFFFFD40123D1A, 0xFFFFFFFC40153D22, 0xFFFFFFFB40153D2A # 820 - 823 D6 S3 C4 + .quad 0xfffffffa40153d32, 0xFFFFFFF940153D3A, 0x0000000040123C82, 0xFFFFFFFF40123D0A # 824 - 827 D6 S3 C8 + .quad 0xfffffffe40123d12, 0xFFFFFFFD40123D1A, 0xFFFFFFFC40153D22, 0xFFFFFFFB40153D2A # 828 - 831 D6 S3 C12 + .quad 0x0000000040004c00, 0x0000000040124C81, 0x0000000040124C82, 0xFFFFFFFF40124D0A # 832 - 835 D6 S4 C0 + .quad 0xfffffffe40124d12, 0xFFFFFFFD40154D1A, 0xFFFFFFFC40154D22, 0xFFFFFFFB40154D2A # 836 - 839 D6 S4 C4 + .quad 0xfffffffa40154d32, 0xFFFFFFF940154D3A, 0x0000000040124C82, 0xFFFFFFFF40124D0A # 840 - 843 D6 S4 C8 + .quad 0xfffffffe40124d12, 0xFFFFFFFD40154D1A, 0xFFFFFFFC40154D22, 0xFFFFFFFB40154D2A # 844 - 847 D6 S4 C12 + .quad 0x0000000040005c00, 0x0000000040125C81, 0x0000000040125C82, 0xFFFFFFFF40125D0A # 848 - 851 D6 S5 C0 + .quad 0xfffffffe40155d12, 0xFFFFFFFD40155D1A, 0xFFFFFFFC40155D22, 0xFFFFFFFB40155D2A # 852 - 855 D6 S5 C4 + .quad 0xfffffffa40155d32, 0xFFFFFFF940155D3A, 0x0000000040125C82, 0xFFFFFFFF40125D0A # 856 - 859 D6 S5 C8 + .quad 0xfffffffe40155d12, 0xFFFFFFFD40155D1A, 0xFFFFFFFC40155D22, 0xFFFFFFFB40155D2A # 860 - 863 D6 S5 C12 + .quad 0x0000000040006c00, 0x0000000040026C81, 0x00000000400A6C82, 0xFFFFFFFF400C6D0A # 864 - 867 D6 S6 C0 + .quad 0xfffffffe400c6d12, 0xFFFFFFFD400C6D1A, 0xFFFFFFFC400C6D22, 0xFFFFFFFB400C6D2A # 868 - 871 D6 S6 C4 + .quad 0xfffffffa400c6d32, 0xFFFFFFF9400C6D3A, 0x00000000400A6C82, 0xFFFFFFFF400C6D0A # 872 - 875 D6 S6 C8 + .quad 0xfffffffe400c6d12, 0xFFFFFFFD400C6D1A, 0xFFFFFFFC400C6D22, 0xFFFFFFFB400C6D2A # 876 - 879 D6 S6 C12 + .quad 0x0000000040007c00, 0x00000000401A7C81, 0x00000000401CFC82, 0xFFFFFFFF401CFD0A # 880 - 883 D6 S7 C0 + .quad 0xfffffffe401cfd12, 0xFFFFFFFD401CFD1A, 0xFFFFFFFC401CFD22, 0xFFFFFFFB401CFD2A # 884 - 887 D6 S7 C4 + .quad 0xfffffffa401cfd32, 0xFFFFFFF9401CFD3A, 0x00000000401CFC82, 0xFFFFFFFF401CFD0A # 888 - 891 D6 S7 C8 + .quad 0xfffffffe401cfd12, 0xFFFFFFFD401CFD1A, 0xFFFFFFFC401CFD22, 0xFFFFFFFB401CFD2A # 892 - 895 D6 S7 C12 + .quad 0x0000000040000e00, 0x0000000040120E81, 0xFFFFFFFF40120F09, 0xFFFFFFFE40120F11 # 896 - 899 D7 S0 C0 + .quad 0xfffffffd40120f19, 0xFFFFFFFC40120F21, 0xFFFFFFFB40120F29, 0xFFFFFFFA40120F31 # 900 - 903 D7 S0 C4 + .quad 0xfffffff940120f39, 0x0000000040120E81, 0xFFFFFFFF40120F09, 0xFFFFFFFE40120F11 # 904 - 907 D7 S0 C8 + .quad 0xfffffffd40120f19, 0xFFFFFFFC40120F21, 0xFFFFFFFB40120F29, 0xFFFFFFFA40120F31 # 908 - 911 D7 S0 C12 + .quad 0x0000000040001e00, 0x0000000040121E81, 0xFFFFFFFF40121F09, 0xFFFFFFFE40121F11 # 912 - 915 D7 S1 C0 + .quad 0xfffffffd40121f19, 0xFFFFFFFC40121F21, 0xFFFFFFFB40121F29, 0xFFFFFFFA40121F31 # 916 - 919 D7 S1 C4 + .quad 0xfffffff940151f39, 0x0000000040121E81, 0xFFFFFFFF40121F09, 0xFFFFFFFE40121F11 # 920 - 923 D7 S1 C8 + .quad 0xfffffffd40121f19, 0xFFFFFFFC40121F21, 0xFFFFFFFB40121F29, 0xFFFFFFFA40121F31 # 924 - 927 D7 S1 C12 + .quad 0x0000000040002e00, 0x0000000040122E81, 0xFFFFFFFF40122F09, 0xFFFFFFFE40122F11 # 928 - 931 D7 S2 C0 + .quad 0xfffffffd40122f19, 0xFFFFFFFC40122F21, 0xFFFFFFFB40122F29, 0xFFFFFFFA40152F31 # 932 - 935 D7 S2 C4 + .quad 0xfffffff940152f39, 0x0000000040122E81, 0xFFFFFFFF40122F09, 0xFFFFFFFE40122F11 # 936 - 939 D7 S2 C8 + .quad 0xfffffffd40122f19, 0xFFFFFFFC40122F21, 0xFFFFFFFB40122F29, 0xFFFFFFFA40152F31 # 940 - 943 D7 S2 C12 + .quad 0x0000000040003e00, 0x0000000040123E81, 0xFFFFFFFF40123F09, 0xFFFFFFFE40123F11 # 944 - 947 D7 S3 C0 + .quad 0xfffffffd40123f19, 0xFFFFFFFC40123F21, 0xFFFFFFFB40153F29, 0xFFFFFFFA40153F31 # 948 - 951 D7 S3 C4 + .quad 0xfffffff940153f39, 0x0000000040123E81, 0xFFFFFFFF40123F09, 0xFFFFFFFE40123F11 # 952 - 955 D7 S3 C8 + .quad 0xfffffffd40123f19, 0xFFFFFFFC40123F21, 0xFFFFFFFB40153F29, 0xFFFFFFFA40153F31 # 956 - 959 D7 S3 C12 + .quad 0x0000000040004e00, 0x0000000040124E81, 0xFFFFFFFF40124F09, 0xFFFFFFFE40124F11 # 960 - 963 D7 S4 C0 + .quad 0xfffffffd40124f19, 0xFFFFFFFC40154F21, 0xFFFFFFFB40154F29, 0xFFFFFFFA40154F31 # 964 - 967 D7 S4 C4 + .quad 0xfffffff940154f39, 0x0000000040124E81, 0xFFFFFFFF40124F09, 0xFFFFFFFE40124F11 # 968 - 971 D7 S4 C8 + .quad 0xfffffffd40124f19, 0xFFFFFFFC40154F21, 0xFFFFFFFB40154F29, 0xFFFFFFFA40154F31 # 972 - 975 D7 S4 C12 + .quad 0x0000000040005e00, 0x0000000040125E81, 0xFFFFFFFF40125F09, 0xFFFFFFFE40125F11 # 976 - 979 D7 S5 C0 + .quad 0xfffffffd40155f19, 0xFFFFFFFC40155F21, 0xFFFFFFFB40155F29, 0xFFFFFFFA40155F31 # 980 - 983 D7 S5 C4 + .quad 0xfffffff940155f39, 0x0000000040125E81, 0xFFFFFFFF40125F09, 0xFFFFFFFE40125F11 # 984 - 987 D7 S5 C8 + .quad 0xfffffffd40155f19, 0xFFFFFFFC40155F21, 0xFFFFFFFB40155F29, 0xFFFFFFFA40155F31 # 988 - 991 D7 S5 C12 + .quad 0x0000000040006e00, 0x0000000040126E81, 0xFFFFFFFF40126F09, 0xFFFFFFFE40156F11 # 992 - 995 D7 S6 C0 + .quad 0xfffffffd40156f19, 0xFFFFFFFC40156F21, 0xFFFFFFFB40156F29, 0xFFFFFFFA40156F31 # 996 - 999 D7 S6 C4 + .quad 0xfffffff940156f39, 0x0000000040126E81, 0xFFFFFFFF40126F09, 0xFFFFFFFE40156F11 # 1000 - 1003 D7 S6 C8 + .quad 0xfffffffd40156f19, 0xFFFFFFFC40156F21, 0xFFFFFFFB40156F29, 0xFFFFFFFA40156F31 # 1004 - 1007 D7 S6 C12 + .quad 0x0000000040007e00, 0x00000000400A7E81, 0xFFFFFFFF400C7F09, 0xFFFFFFFE400C7F11 # 1008 - 1011 D7 S7 C0 + .quad 0xfffffffd400c7f19, 0xFFFFFFFC400C7F21, 0xFFFFFFFB400C7F29, 0xFFFFFFFA400C7F31 # 1012 - 1015 D7 S7 C4 + .quad 0xfffffff9400c7f39, 0x00000000400A7E81, 0xFFFFFFFF400C7F09, 0xFFFFFFFE400C7F11 # 1016 - 1019 D7 S7 C8 + .quad 0xfffffffd400c7f19, 0xFFFFFFFC400C7F21, 0xFFFFFFFB400C7F29, 0xFFFFFFFA400C7F31 # 1020 - 1023 D7 S7 C12 + +fast_dma_encode_memcmp_table: + .quad 0x0000000040000000, 0xFFFFFFFF40020088, 0xFFFFFFFE40020090, 0xFFFFFFFD40020098 # 0 - 3 D0 S0 C0 + .quad 0xfffffffc400200a0, 0xFFFFFFFB400200A8, 0xFFFFFFFA400200B0, 0xFFFFFFF9400200B8 # 4 - 7 D0 S0 C4 + .quad 0x0000000040000000, 0xFFFFFFFF40020088, 0xFFFFFFFE40020090, 0xFFFFFFFD40020098 # 8 - 11 D0 S0 C8 + .quad 0xfffffffc400200a0, 0xFFFFFFFB400200A8, 0xFFFFFFFA400200B0, 0xFFFFFFF9400200B8 # 12 - 15 D0 S0 C12 + .quad 0x0000000040001000, 0xFFFFFFFF40121088, 0xFFFFFFFE40121090, 0xFFFFFFFD40121098 # 16 - 19 D0 S1 C0 + .quad 0xfffffffc401210a0, 0xFFFFFFFB401210A8, 0xFFFFFFFA401210B0, 0xFFFFFFF9401210B8 # 20 - 23 D0 S1 C4 + .quad 0x0000000040121000, 0xFFFFFFFF40121088, 0xFFFFFFFE40121090, 0xFFFFFFFD40121098 # 24 - 27 D0 S1 C8 + .quad 0xfffffffc401210a0, 0xFFFFFFFB401210A8, 0xFFFFFFFA401210B0, 0xFFFFFFF9401210B8 # 28 - 31 D0 S1 C12 + .quad 0x0000000040002000, 0xFFFFFFFF40122088, 0xFFFFFFFE40122090, 0xFFFFFFFD40122098 # 32 - 35 D0 S2 C0 + .quad 0xfffffffc401220a0, 0xFFFFFFFB401220A8, 0xFFFFFFFA401220B0, 0xFFFFFFF9401520B8 # 36 - 39 D0 S2 C4 + .quad 0x0000000040122000, 0xFFFFFFFF40122088, 0xFFFFFFFE40122090, 0xFFFFFFFD40122098 # 40 - 43 D0 S2 C8 + .quad 0xfffffffc401220a0, 0xFFFFFFFB401220A8, 0xFFFFFFFA401220B0, 0xFFFFFFF9401520B8 # 44 - 47 D0 S2 C12 + .quad 0x0000000040003000, 0xFFFFFFFF40123088, 0xFFFFFFFE40123090, 0xFFFFFFFD40123098 # 48 - 51 D0 S3 C0 + .quad 0xfffffffc401230a0, 0xFFFFFFFB401230A8, 0xFFFFFFFA401530B0, 0xFFFFFFF9401530B8 # 52 - 55 D0 S3 C4 + .quad 0x0000000040123000, 0xFFFFFFFF40123088, 0xFFFFFFFE40123090, 0xFFFFFFFD40123098 # 56 - 59 D0 S3 C8 + .quad 0xfffffffc401230a0, 0xFFFFFFFB401230A8, 0xFFFFFFFA401530B0, 0xFFFFFFF9401530B8 # 60 - 63 D0 S3 C12 + .quad 0x0000000040004000, 0xFFFFFFFF40124088, 0xFFFFFFFE40124090, 0xFFFFFFFD40124098 # 64 - 67 D0 S4 C0 + .quad 0xfffffffc401240a0, 0xFFFFFFFB401540A8, 0xFFFFFFFA401540B0, 0xFFFFFFF9401540B8 # 68 - 71 D0 S4 C4 + .quad 0x0000000040124000, 0xFFFFFFFF40124088, 0xFFFFFFFE40124090, 0xFFFFFFFD40124098 # 72 - 75 D0 S4 C8 + .quad 0xfffffffc401240a0, 0xFFFFFFFB401540A8, 0xFFFFFFFA401540B0, 0xFFFFFFF9401540B8 # 76 - 79 D0 S4 C12 + .quad 0x0000000040005000, 0xFFFFFFFF40125088, 0xFFFFFFFE40125090, 0xFFFFFFFD40125098 # 80 - 83 D0 S5 C0 + .quad 0xfffffffc401550a0, 0xFFFFFFFB401550A8, 0xFFFFFFFA401550B0, 0xFFFFFFF9401550B8 # 84 - 87 D0 S5 C4 + .quad 0x0000000040125000, 0xFFFFFFFF40125088, 0xFFFFFFFE40125090, 0xFFFFFFFD40125098 # 88 - 91 D0 S5 C8 + .quad 0xfffffffc401550a0, 0xFFFFFFFB401550A8, 0xFFFFFFFA401550B0, 0xFFFFFFF9401550B8 # 92 - 95 D0 S5 C12 + .quad 0x0000000040006000, 0xFFFFFFFF40126088, 0xFFFFFFFE40126090, 0xFFFFFFFD40156098 # 96 - 99 D0 S6 C0 + .quad 0xfffffffc401560a0, 0xFFFFFFFB401560A8, 0xFFFFFFFA401560B0, 0xFFFFFFF9401560B8 # 100 - 103 D0 S6 C4 + .quad 0x0000000040126000, 0xFFFFFFFF40126088, 0xFFFFFFFE40126090, 0xFFFFFFFD40156098 # 104 - 107 D0 S6 C8 + .quad 0xfffffffc401560a0, 0xFFFFFFFB401560A8, 0xFFFFFFFA401560B0, 0xFFFFFFF9401560B8 # 108 - 111 D0 S6 C12 + .quad 0x0000000040007000, 0xFFFFFFFF40127088, 0xFFFFFFFE40157090, 0xFFFFFFFD40157098 # 112 - 115 D0 S7 C0 + .quad 0xfffffffc401570a0, 0xFFFFFFFB401570A8, 0xFFFFFFFA401570B0, 0xFFFFFFF9401570B8 # 116 - 119 D0 S7 C4 + .quad 0x0000000040127000, 0xFFFFFFFF40127088, 0xFFFFFFFE40157090, 0xFFFFFFFD40157098 # 120 - 123 D0 S7 C8 + .quad 0xfffffffc401570a0, 0xFFFFFFFB401570A8, 0xFFFFFFFA401570B0, 0xFFFFFFF9401570B8 # 124 - 127 D0 S7 C12 + .quad 0x0000000040000200, 0x0000000040120281, 0x0000000040120282, 0x0000000040120283 # 128 - 131 D1 S0 C0 + .quad 0x0000000040120284, 0x0000000040120285, 0x0000000040120286, 0x0000000040120287 # 132 - 135 D1 S0 C4 + .quad 0xffffffff4012030f, 0xFFFFFFFE40150317, 0xFFFFFFFD4015031F, 0xFFFFFFFC40150327 # 136 - 139 D1 S0 C8 + .quad 0xfffffffb4015032f, 0xFFFFFFFA40150337, 0xFFFFFFF94015033F, 0x0000000040120287 # 140 - 143 D1 S0 C12 + .quad 0x0000000040001200, 0x0000000040021281, 0x0000000040021282, 0x0000000040021283 # 144 - 147 D1 S1 C0 + .quad 0x0000000040021284, 0x0000000040021285, 0x0000000040021286, 0x00000000400A1287 # 148 - 151 D1 S1 C4 + .quad 0xffffffff400c130f, 0xFFFFFFFE400C1317, 0xFFFFFFFD400C131F, 0xFFFFFFFC400C1327 # 152 - 155 D1 S1 C8 + .quad 0xfffffffb400c132f, 0xFFFFFFFA400C1337, 0xFFFFFFF9400C133F, 0x00000000400A1287 # 156 - 159 D1 S1 C12 + .quad 0x0000000040002200, 0x0000000040122281, 0x0000000040122282, 0x0000000040122283 # 160 - 163 D1 S2 C0 + .quad 0x0000000040122284, 0x0000000040122285, 0x00000000401A2286, 0x00000000401CA287 # 164 - 167 D1 S2 C4 + .quad 0xffffffff401ca30f, 0xFFFFFFFE401CA317, 0xFFFFFFFD401CA31F, 0xFFFFFFFC401CA327 # 168 - 171 D1 S2 C8 + .quad 0xfffffffb401ca32f, 0xFFFFFFFA401CA337, 0xFFFFFFF9401CA33F, 0x00000000401CA287 # 172 - 175 D1 S2 C12 + .quad 0x0000000040003200, 0x0000000040123281, 0x0000000040123282, 0x0000000040123283 # 176 - 179 D1 S3 C0 + .quad 0x0000000040123284, 0x00000000401A3285, 0x00000000401CB286, 0x00000000401CB287 # 180 - 183 D1 S3 C4 + .quad 0xffffffff401cb30f, 0xFFFFFFFE401CB317, 0xFFFFFFFD401CB31F, 0xFFFFFFFC401CB327 # 184 - 187 D1 S3 C8 + .quad 0xfffffffb401cb32f, 0xFFFFFFFA401CB337, 0xFFFFFFF9401FB33F, 0x00000000401CB287 # 188 - 191 D1 S3 C12 + .quad 0x0000000040004200, 0x0000000040124281, 0x0000000040124282, 0x0000000040124283 # 192 - 195 D1 S4 C0 + .quad 0x00000000401a4284, 0x00000000401CC285, 0x00000000401CC286, 0x00000000401CC287 # 196 - 199 D1 S4 C4 + .quad 0xffffffff401cc30f, 0xFFFFFFFE401CC317, 0xFFFFFFFD401CC31F, 0xFFFFFFFC401CC327 # 200 - 203 D1 S4 C8 + .quad 0xfffffffb401cc32f, 0xFFFFFFFA401FC337, 0xFFFFFFF9401FC33F, 0x00000000401CC287 # 204 - 207 D1 S4 C12 + .quad 0x0000000040005200, 0x0000000040125281, 0x0000000040125282, 0x00000000401A5283 # 208 - 211 D1 S5 C0 + .quad 0x00000000401cd284, 0x00000000401CD285, 0x00000000401CD286, 0x00000000401CD287 # 212 - 215 D1 S5 C4 + .quad 0xffffffff401cd30f, 0xFFFFFFFE401CD317, 0xFFFFFFFD401CD31F, 0xFFFFFFFC401CD327 # 216 - 219 D1 S5 C8 + .quad 0xfffffffb401fd32f, 0xFFFFFFFA401FD337, 0xFFFFFFF9401FD33F, 0x00000000401CD287 # 220 - 223 D1 S5 C12 + .quad 0x0000000040006200, 0x0000000040126281, 0x00000000401A6282, 0x00000000401CE283 # 224 - 227 D1 S6 C0 + .quad 0x00000000401ce284, 0x00000000401CE285, 0x00000000401CE286, 0x00000000401CE287 # 228 - 231 D1 S6 C4 + .quad 0xffffffff401ce30f, 0xFFFFFFFE401CE317, 0xFFFFFFFD401CE31F, 0xFFFFFFFC401FE327 # 232 - 235 D1 S6 C8 + .quad 0xfffffffb401fe32f, 0xFFFFFFFA401FE337, 0xFFFFFFF9401FE33F, 0x00000000401CE287 # 236 - 239 D1 S6 C12 + .quad 0x0000000040007200, 0x00000000401A7281, 0x00000000401CF282, 0x00000000401CF283 # 240 - 243 D1 S7 C0 + .quad 0x00000000401cf284, 0x00000000401CF285, 0x00000000401CF286, 0x00000000401CF287 # 244 - 247 D1 S7 C4 + .quad 0xffffffff401cf30f, 0xFFFFFFFE401CF317, 0xFFFFFFFD401FF31F, 0xFFFFFFFC401FF327 # 248 - 251 D1 S7 C8 + .quad 0xfffffffb401ff32f, 0xFFFFFFFA401FF337, 0xFFFFFFF9401FF33F, 0x00000000401CF287 # 252 - 255 D1 S7 C12 + .quad 0x0000000040000400, 0x0000000040120481, 0x0000000040120482, 0x0000000040120483 # 256 - 259 D2 S0 C0 + .quad 0x0000000040120484, 0x0000000040120485, 0x0000000040120486, 0xFFFFFFFF4012050E # 260 - 263 D2 S0 C4 + .quad 0xfffffffe40120516, 0xFFFFFFFD4015051E, 0xFFFFFFFC40150526, 0xFFFFFFFB4015052E # 264 - 267 D2 S0 C8 + .quad 0xfffffffa40150536, 0xFFFFFFF94015053E, 0x0000000040120486, 0xFFFFFFFF4012050E # 268 - 271 D2 S0 C12 + .quad 0x0000000040001400, 0x0000000040121481, 0x0000000040121482, 0x0000000040121483 # 272 - 275 D2 S1 C0 + .quad 0x0000000040121484, 0x0000000040121485, 0x0000000040121486, 0xFFFFFFFF4012150E # 276 - 279 D2 S1 C4 + .quad 0xfffffffe40151516, 0xFFFFFFFD4015151E, 0xFFFFFFFC40151526, 0xFFFFFFFB4015152E # 280 - 283 D2 S1 C8 + .quad 0xfffffffa40151536, 0xFFFFFFF94015153E, 0x0000000040121486, 0xFFFFFFFF4012150E # 284 - 287 D2 S1 C12 + .quad 0x0000000040002400, 0x0000000040022481, 0x0000000040022482, 0x0000000040022483 # 288 - 291 D2 S2 C0 + .quad 0x0000000040022484, 0x0000000040022485, 0x00000000400A2486, 0xFFFFFFFF400C250E # 292 - 295 D2 S2 C4 + .quad 0xfffffffe400c2516, 0xFFFFFFFD400C251E, 0xFFFFFFFC400C2526, 0xFFFFFFFB400C252E # 296 - 299 D2 S2 C8 + .quad 0xfffffffa400c2536, 0xFFFFFFF9400C253E, 0x00000000400A2486, 0xFFFFFFFF400C250E # 300 - 303 D2 S2 C12 + .quad 0x0000000040003400, 0x0000000040123481, 0x0000000040123482, 0x0000000040123483 # 304 - 307 D2 S3 C0 + .quad 0x0000000040123484, 0x00000000401A3485, 0x00000000401CB486, 0xFFFFFFFF401CB50E # 308 - 311 D2 S3 C4 + .quad 0xfffffffe401cb516, 0xFFFFFFFD401CB51E, 0xFFFFFFFC401CB526, 0xFFFFFFFB401CB52E # 312 - 315 D2 S3 C8 + .quad 0xfffffffa401cb536, 0xFFFFFFF9401CB53E, 0x00000000401CB486, 0xFFFFFFFF401CB50E # 316 - 319 D2 S3 C12 + .quad 0x0000000040004400, 0x0000000040124481, 0x0000000040124482, 0x0000000040124483 # 320 - 323 D2 S4 C0 + .quad 0x00000000401a4484, 0x00000000401CC485, 0x00000000401CC486, 0xFFFFFFFF401CC50E # 324 - 327 D2 S4 C4 + .quad 0xfffffffe401cc516, 0xFFFFFFFD401CC51E, 0xFFFFFFFC401CC526, 0xFFFFFFFB401CC52E # 328 - 331 D2 S4 C8 + .quad 0xfffffffa401cc536, 0xFFFFFFF9401FC53E, 0x00000000401CC486, 0xFFFFFFFF401CC50E # 332 - 335 D2 S4 C12 + .quad 0x0000000040005400, 0x0000000040125481, 0x0000000040125482, 0x00000000401A5483 # 336 - 339 D2 S5 C0 + .quad 0x00000000401cd484, 0x00000000401CD485, 0x00000000401CD486, 0xFFFFFFFF401CD50E # 340 - 343 D2 S5 C4 + .quad 0xfffffffe401cd516, 0xFFFFFFFD401CD51E, 0xFFFFFFFC401CD526, 0xFFFFFFFB401CD52E # 344 - 347 D2 S5 C8 + .quad 0xfffffffa401fd536, 0xFFFFFFF9401FD53E, 0x00000000401CD486, 0xFFFFFFFF401CD50E # 348 - 351 D2 S5 C12 + .quad 0x0000000040006400, 0x0000000040126481, 0x00000000401A6482, 0x00000000401CE483 # 352 - 355 D2 S6 C0 + .quad 0x00000000401ce484, 0x00000000401CE485, 0x00000000401CE486, 0xFFFFFFFF401CE50E # 356 - 359 D2 S6 C4 + .quad 0xfffffffe401ce516, 0xFFFFFFFD401CE51E, 0xFFFFFFFC401CE526, 0xFFFFFFFB401FE52E # 360 - 363 D2 S6 C8 + .quad 0xfffffffa401fe536, 0xFFFFFFF9401FE53E, 0x00000000401CE486, 0xFFFFFFFF401CE50E # 364 - 367 D2 S6 C12 + .quad 0x0000000040007400, 0x00000000401A7481, 0x00000000401CF482, 0x00000000401CF483 # 368 - 371 D2 S7 C0 + .quad 0x00000000401cf484, 0x00000000401CF485, 0x00000000401CF486, 0xFFFFFFFF401CF50E # 372 - 375 D2 S7 C4 + .quad 0xfffffffe401cf516, 0xFFFFFFFD401CF51E, 0xFFFFFFFC401FF526, 0xFFFFFFFB401FF52E # 376 - 379 D2 S7 C8 + .quad 0xfffffffa401ff536, 0xFFFFFFF9401FF53E, 0x00000000401CF486, 0xFFFFFFFF401CF50E # 380 - 383 D2 S7 C12 + .quad 0x0000000040000600, 0x0000000040120681, 0x0000000040120682, 0x0000000040120683 # 384 - 387 D3 S0 C0 + .quad 0x0000000040120684, 0x0000000040120685, 0xFFFFFFFF4012070D, 0xFFFFFFFE40120715 # 388 - 391 D3 S0 C4 + .quad 0xfffffffd4012071d, 0xFFFFFFFC40150725, 0xFFFFFFFB4015072D, 0xFFFFFFFA40150735 # 392 - 395 D3 S0 C8 + .quad 0xfffffff94015073d, 0x0000000040120685, 0xFFFFFFFF4012070D, 0xFFFFFFFE40120715 # 396 - 399 D3 S0 C12 + .quad 0x0000000040001600, 0x0000000040121681, 0x0000000040121682, 0x0000000040121683 # 400 - 403 D3 S1 C0 + .quad 0x0000000040121684, 0x0000000040121685, 0xFFFFFFFF4012170D, 0xFFFFFFFE40121715 # 404 - 407 D3 S1 C4 + .quad 0xfffffffd4015171d, 0xFFFFFFFC40151725, 0xFFFFFFFB4015172D, 0xFFFFFFFA40151735 # 408 - 411 D3 S1 C8 + .quad 0xfffffff94015173d, 0x0000000040121685, 0xFFFFFFFF4012170D, 0xFFFFFFFE40121715 # 412 - 415 D3 S1 C12 + .quad 0x0000000040002600, 0x0000000040122681, 0x0000000040122682, 0x0000000040122683 # 416 - 419 D3 S2 C0 + .quad 0x0000000040122684, 0x0000000040122685, 0xFFFFFFFF4012270D, 0xFFFFFFFE40152715 # 420 - 423 D3 S2 C4 + .quad 0xfffffffd4015271d, 0xFFFFFFFC40152725, 0xFFFFFFFB4015272D, 0xFFFFFFFA40152735 # 424 - 427 D3 S2 C8 + .quad 0xfffffff94015273d, 0x0000000040122685, 0xFFFFFFFF4012270D, 0xFFFFFFFE40152715 # 428 - 431 D3 S2 C12 + .quad 0x0000000040003600, 0x0000000040023681, 0x0000000040023682, 0x0000000040023683 # 432 - 435 D3 S3 C0 + .quad 0x0000000040023684, 0x00000000400A3685, 0xFFFFFFFF400C370D, 0xFFFFFFFE400C3715 # 436 - 439 D3 S3 C4 + .quad 0xfffffffd400c371d, 0xFFFFFFFC400C3725, 0xFFFFFFFB400C372D, 0xFFFFFFFA400C3735 # 440 - 443 D3 S3 C8 + .quad 0xfffffff9400c373d, 0x00000000400A3685, 0xFFFFFFFF400C370D, 0xFFFFFFFE400C3715 # 444 - 447 D3 S3 C12 + .quad 0x0000000040004600, 0x0000000040124681, 0x0000000040124682, 0x0000000040124683 # 448 - 451 D3 S4 C0 + .quad 0x00000000401a4684, 0x00000000401CC685, 0xFFFFFFFF401CC70D, 0xFFFFFFFE401CC715 # 452 - 455 D3 S4 C4 + .quad 0xfffffffd401cc71d, 0xFFFFFFFC401CC725, 0xFFFFFFFB401CC72D, 0xFFFFFFFA401CC735 # 456 - 459 D3 S4 C8 + .quad 0xfffffff9401cc73d, 0x00000000401CC685, 0xFFFFFFFF401CC70D, 0xFFFFFFFE401CC715 # 460 - 463 D3 S4 C12 + .quad 0x0000000040005600, 0x0000000040125681, 0x0000000040125682, 0x00000000401A5683 # 464 - 467 D3 S5 C0 + .quad 0x00000000401cd684, 0x00000000401CD685, 0xFFFFFFFF401CD70D, 0xFFFFFFFE401CD715 # 468 - 471 D3 S5 C4 + .quad 0xfffffffd401cd71d, 0xFFFFFFFC401CD725, 0xFFFFFFFB401CD72D, 0xFFFFFFFA401CD735 # 472 - 475 D3 S5 C8 + .quad 0xfffffff9401fd73d, 0x00000000401CD685, 0xFFFFFFFF401CD70D, 0xFFFFFFFE401CD715 # 476 - 479 D3 S5 C12 + .quad 0x0000000040006600, 0x0000000040126681, 0x00000000401A6682, 0x00000000401CE683 # 480 - 483 D3 S6 C0 + .quad 0x00000000401ce684, 0x00000000401CE685, 0xFFFFFFFF401CE70D, 0xFFFFFFFE401CE715 # 484 - 487 D3 S6 C4 + .quad 0xfffffffd401ce71d, 0xFFFFFFFC401CE725, 0xFFFFFFFB401CE72D, 0xFFFFFFFA401FE735 # 488 - 491 D3 S6 C8 + .quad 0xfffffff9401fe73d, 0x00000000401CE685, 0xFFFFFFFF401CE70D, 0xFFFFFFFE401CE715 # 492 - 495 D3 S6 C12 + .quad 0x0000000040007600, 0x00000000401A7681, 0x00000000401CF682, 0x00000000401CF683 # 496 - 499 D3 S7 C0 + .quad 0x00000000401cf684, 0x00000000401CF685, 0xFFFFFFFF401CF70D, 0xFFFFFFFE401CF715 # 500 - 503 D3 S7 C4 + .quad 0xfffffffd401cf71d, 0xFFFFFFFC401CF725, 0xFFFFFFFB401FF72D, 0xFFFFFFFA401FF735 # 504 - 507 D3 S7 C8 + .quad 0xfffffff9401ff73d, 0x00000000401CF685, 0xFFFFFFFF401CF70D, 0xFFFFFFFE401CF715 # 508 - 511 D3 S7 C12 + .quad 0x0000000040000800, 0x0000000040120881, 0x0000000040120882, 0x0000000040120883 # 512 - 515 D4 S0 C0 + .quad 0x0000000040120884, 0xFFFFFFFF4012090C, 0xFFFFFFFE40120914, 0xFFFFFFFD4012091C # 516 - 519 D4 S0 C4 + .quad 0xfffffffc40120924, 0xFFFFFFFB4015092C, 0xFFFFFFFA40150934, 0xFFFFFFF94015093C # 520 - 523 D4 S0 C8 + .quad 0x0000000040120884, 0xFFFFFFFF4012090C, 0xFFFFFFFE40120914, 0xFFFFFFFD4012091C # 524 - 527 D4 S0 C12 + .quad 0x0000000040001800, 0x0000000040121881, 0x0000000040121882, 0x0000000040121883 # 528 - 531 D4 S1 C0 + .quad 0x0000000040121884, 0xFFFFFFFF4012190C, 0xFFFFFFFE40121914, 0xFFFFFFFD4012191C # 532 - 535 D4 S1 C4 + .quad 0xfffffffc40151924, 0xFFFFFFFB4015192C, 0xFFFFFFFA40151934, 0xFFFFFFF94015193C # 536 - 539 D4 S1 C8 + .quad 0x0000000040121884, 0xFFFFFFFF4012190C, 0xFFFFFFFE40121914, 0xFFFFFFFD4012191C # 540 - 543 D4 S1 C12 + .quad 0x0000000040002800, 0x0000000040122881, 0x0000000040122882, 0x0000000040122883 # 544 - 547 D4 S2 C0 + .quad 0x0000000040122884, 0xFFFFFFFF4012290C, 0xFFFFFFFE40122914, 0xFFFFFFFD4015291C # 548 - 551 D4 S2 C4 + .quad 0xfffffffc40152924, 0xFFFFFFFB4015292C, 0xFFFFFFFA40152934, 0xFFFFFFF94015293C # 552 - 555 D4 S2 C8 + .quad 0x0000000040122884, 0xFFFFFFFF4012290C, 0xFFFFFFFE40122914, 0xFFFFFFFD4015291C # 556 - 559 D4 S2 C12 + .quad 0x0000000040003800, 0x0000000040123881, 0x0000000040123882, 0x0000000040123883 # 560 - 563 D4 S3 C0 + .quad 0x0000000040123884, 0xFFFFFFFF4012390C, 0xFFFFFFFE40153914, 0xFFFFFFFD4015391C # 564 - 567 D4 S3 C4 + .quad 0xfffffffc40153924, 0xFFFFFFFB4015392C, 0xFFFFFFFA40153934, 0xFFFFFFF94015393C # 568 - 571 D4 S3 C8 + .quad 0x0000000040123884, 0xFFFFFFFF4012390C, 0xFFFFFFFE40153914, 0xFFFFFFFD4015391C # 572 - 575 D4 S3 C12 + .quad 0x0000000040004800, 0x0000000040024881, 0x0000000040024882, 0x0000000040024883 # 576 - 579 D4 S4 C0 + .quad 0x00000000400a4884, 0xFFFFFFFF400C490C, 0xFFFFFFFE400C4914, 0xFFFFFFFD400C491C # 580 - 583 D4 S4 C4 + .quad 0xfffffffc400c4924, 0xFFFFFFFB400C492C, 0xFFFFFFFA400C4934, 0xFFFFFFF9400C493C # 584 - 587 D4 S4 C8 + .quad 0x00000000400a4884, 0xFFFFFFFF400C490C, 0xFFFFFFFE400C4914, 0xFFFFFFFD400C491C # 588 - 591 D4 S4 C12 + .quad 0x0000000040005800, 0x0000000040125881, 0x0000000040125882, 0x00000000401A5883 # 592 - 595 D4 S5 C0 + .quad 0x00000000401cd884, 0xFFFFFFFF401CD90C, 0xFFFFFFFE401CD914, 0xFFFFFFFD401CD91C # 596 - 599 D4 S5 C4 + .quad 0xfffffffc401cd924, 0xFFFFFFFB401CD92C, 0xFFFFFFFA401CD934, 0xFFFFFFF9401CD93C # 600 - 603 D4 S5 C8 + .quad 0x00000000401cd884, 0xFFFFFFFF401CD90C, 0xFFFFFFFE401CD914, 0xFFFFFFFD401CD91C # 604 - 607 D4 S5 C12 + .quad 0x0000000040006800, 0x0000000040126881, 0x00000000401A6882, 0x00000000401CE883 # 608 - 611 D4 S6 C0 + .quad 0x00000000401ce884, 0xFFFFFFFF401CE90C, 0xFFFFFFFE401CE914, 0xFFFFFFFD401CE91C # 612 - 615 D4 S6 C4 + .quad 0xfffffffc401ce924, 0xFFFFFFFB401CE92C, 0xFFFFFFFA401CE934, 0xFFFFFFF9401FE93C # 616 - 619 D4 S6 C8 + .quad 0x00000000401ce884, 0xFFFFFFFF401CE90C, 0xFFFFFFFE401CE914, 0xFFFFFFFD401CE91C # 620 - 623 D4 S6 C12 + .quad 0x0000000040007800, 0x00000000401A7881, 0x00000000401CF882, 0x00000000401CF883 # 624 - 627 D4 S7 C0 + .quad 0x00000000401cf884, 0xFFFFFFFF401CF90C, 0xFFFFFFFE401CF914, 0xFFFFFFFD401CF91C # 628 - 631 D4 S7 C4 + .quad 0xfffffffc401cf924, 0xFFFFFFFB401CF92C, 0xFFFFFFFA401FF934, 0xFFFFFFF9401FF93C # 632 - 635 D4 S7 C8 + .quad 0x00000000401cf884, 0xFFFFFFFF401CF90C, 0xFFFFFFFE401CF914, 0xFFFFFFFD401CF91C # 636 - 639 D4 S7 C12 + .quad 0x0000000040000a00, 0x0000000040120A81, 0x0000000040120A82, 0x0000000040120A83 # 640 - 643 D5 S0 C0 + .quad 0xffffffff40120b0b, 0xFFFFFFFE40120B13, 0xFFFFFFFD40120B1B, 0xFFFFFFFC40120B23 # 644 - 647 D5 S0 C4 + .quad 0xfffffffb40120b2b, 0xFFFFFFFA40150B33, 0xFFFFFFF940150B3B, 0x0000000040120A83 # 648 - 651 D5 S0 C8 + .quad 0xffffffff40120b0b, 0xFFFFFFFE40120B13, 0xFFFFFFFD40120B1B, 0xFFFFFFFC40120B23 # 652 - 655 D5 S0 C12 + .quad 0x0000000040001a00, 0x0000000040121A81, 0x0000000040121A82, 0x0000000040121A83 # 656 - 659 D5 S1 C0 + .quad 0xffffffff40121b0b, 0xFFFFFFFE40121B13, 0xFFFFFFFD40121B1B, 0xFFFFFFFC40121B23 # 660 - 663 D5 S1 C4 + .quad 0xfffffffb40151b2b, 0xFFFFFFFA40151B33, 0xFFFFFFF940151B3B, 0x0000000040121A83 # 664 - 667 D5 S1 C8 + .quad 0xffffffff40121b0b, 0xFFFFFFFE40121B13, 0xFFFFFFFD40121B1B, 0xFFFFFFFC40121B23 # 668 - 671 D5 S1 C12 + .quad 0x0000000040002a00, 0x0000000040122A81, 0x0000000040122A82, 0x0000000040122A83 # 672 - 675 D5 S2 C0 + .quad 0xffffffff40122b0b, 0xFFFFFFFE40122B13, 0xFFFFFFFD40122B1B, 0xFFFFFFFC40152B23 # 676 - 679 D5 S2 C4 + .quad 0xfffffffb40152b2b, 0xFFFFFFFA40152B33, 0xFFFFFFF940152B3B, 0x0000000040122A83 # 680 - 683 D5 S2 C8 + .quad 0xffffffff40122b0b, 0xFFFFFFFE40122B13, 0xFFFFFFFD40122B1B, 0xFFFFFFFC40152B23 # 684 - 687 D5 S2 C12 + .quad 0x0000000040003a00, 0x0000000040123A81, 0x0000000040123A82, 0x0000000040123A83 # 688 - 691 D5 S3 C0 + .quad 0xffffffff40123b0b, 0xFFFFFFFE40123B13, 0xFFFFFFFD40153B1B, 0xFFFFFFFC40153B23 # 692 - 695 D5 S3 C4 + .quad 0xfffffffb40153b2b, 0xFFFFFFFA40153B33, 0xFFFFFFF940153B3B, 0x0000000040123A83 # 696 - 699 D5 S3 C8 + .quad 0xffffffff40123b0b, 0xFFFFFFFE40123B13, 0xFFFFFFFD40153B1B, 0xFFFFFFFC40153B23 # 700 - 703 D5 S3 C12 + .quad 0x0000000040004a00, 0x0000000040124A81, 0x0000000040124A82, 0x0000000040124A83 # 704 - 707 D5 S4 C0 + .quad 0xffffffff40124b0b, 0xFFFFFFFE40154B13, 0xFFFFFFFD40154B1B, 0xFFFFFFFC40154B23 # 708 - 711 D5 S4 C4 + .quad 0xfffffffb40154b2b, 0xFFFFFFFA40154B33, 0xFFFFFFF940154B3B, 0x0000000040124A83 # 712 - 715 D5 S4 C8 + .quad 0xffffffff40124b0b, 0xFFFFFFFE40154B13, 0xFFFFFFFD40154B1B, 0xFFFFFFFC40154B23 # 716 - 719 D5 S4 C12 + .quad 0x0000000040005a00, 0x0000000040025A81, 0x0000000040025A82, 0x00000000400A5A83 # 720 - 723 D5 S5 C0 + .quad 0xffffffff400c5b0b, 0xFFFFFFFE400C5B13, 0xFFFFFFFD400C5B1B, 0xFFFFFFFC400C5B23 # 724 - 727 D5 S5 C4 + .quad 0xfffffffb400c5b2b, 0xFFFFFFFA400C5B33, 0xFFFFFFF9400C5B3B, 0x00000000400A5A83 # 728 - 731 D5 S5 C8 + .quad 0xffffffff400c5b0b, 0xFFFFFFFE400C5B13, 0xFFFFFFFD400C5B1B, 0xFFFFFFFC400C5B23 # 732 - 735 D5 S5 C12 + .quad 0x0000000040006a00, 0x0000000040126A81, 0x00000000401A6A82, 0x00000000401CEA83 # 736 - 739 D5 S6 C0 + .quad 0xffffffff401ceb0b, 0xFFFFFFFE401CEB13, 0xFFFFFFFD401CEB1B, 0xFFFFFFFC401CEB23 # 740 - 743 D5 S6 C4 + .quad 0xfffffffb401ceb2b, 0xFFFFFFFA401CEB33, 0xFFFFFFF9401CEB3B, 0x00000000401CEA83 # 744 - 747 D5 S6 C8 + .quad 0xffffffff401ceb0b, 0xFFFFFFFE401CEB13, 0xFFFFFFFD401CEB1B, 0xFFFFFFFC401CEB23 # 748 - 751 D5 S6 C12 + .quad 0x0000000040007a00, 0x00000000401A7A81, 0x00000000401CFA82, 0x00000000401CFA83 # 752 - 755 D5 S7 C0 + .quad 0xffffffff401cfb0b, 0xFFFFFFFE401CFB13, 0xFFFFFFFD401CFB1B, 0xFFFFFFFC401CFB23 # 756 - 759 D5 S7 C4 + .quad 0xfffffffb401cfb2b, 0xFFFFFFFA401CFB33, 0xFFFFFFF9401FFB3B, 0x00000000401CFA83 # 760 - 763 D5 S7 C8 + .quad 0xffffffff401cfb0b, 0xFFFFFFFE401CFB13, 0xFFFFFFFD401CFB1B, 0xFFFFFFFC401CFB23 # 764 - 767 D5 S7 C12 + .quad 0x0000000040000c00, 0x0000000040120C81, 0x0000000040120C82, 0xFFFFFFFF40120D0A # 768 - 771 D6 S0 C0 + .quad 0xfffffffe40120d12, 0xFFFFFFFD40120D1A, 0xFFFFFFFC40120D22, 0xFFFFFFFB40120D2A # 772 - 775 D6 S0 C4 + .quad 0xfffffffa40120d32, 0xFFFFFFF940150D3A, 0x0000000040120C82, 0xFFFFFFFF40120D0A # 776 - 779 D6 S0 C8 + .quad 0xfffffffe40120d12, 0xFFFFFFFD40120D1A, 0xFFFFFFFC40120D22, 0xFFFFFFFB40120D2A # 780 - 783 D6 S0 C12 + .quad 0x0000000040001c00, 0x0000000040121C81, 0x0000000040121C82, 0xFFFFFFFF40121D0A # 784 - 787 D6 S1 C0 + .quad 0xfffffffe40121d12, 0xFFFFFFFD40121D1A, 0xFFFFFFFC40121D22, 0xFFFFFFFB40121D2A # 788 - 791 D6 S1 C4 + .quad 0xfffffffa40151d32, 0xFFFFFFF940151D3A, 0x0000000040121C82, 0xFFFFFFFF40121D0A # 792 - 795 D6 S1 C8 + .quad 0xfffffffe40121d12, 0xFFFFFFFD40121D1A, 0xFFFFFFFC40121D22, 0xFFFFFFFB40121D2A # 796 - 799 D6 S1 C12 + .quad 0x0000000040002c00, 0x0000000040122C81, 0x0000000040122C82, 0xFFFFFFFF40122D0A # 800 - 803 D6 S2 C0 + .quad 0xfffffffe40122d12, 0xFFFFFFFD40122D1A, 0xFFFFFFFC40122D22, 0xFFFFFFFB40152D2A # 804 - 807 D6 S2 C4 + .quad 0xfffffffa40152d32, 0xFFFFFFF940152D3A, 0x0000000040122C82, 0xFFFFFFFF40122D0A # 808 - 811 D6 S2 C8 + .quad 0xfffffffe40122d12, 0xFFFFFFFD40122D1A, 0xFFFFFFFC40122D22, 0xFFFFFFFB40152D2A # 812 - 815 D6 S2 C12 + .quad 0x0000000040003c00, 0x0000000040123C81, 0x0000000040123C82, 0xFFFFFFFF40123D0A # 816 - 819 D6 S3 C0 + .quad 0xfffffffe40123d12, 0xFFFFFFFD40123D1A, 0xFFFFFFFC40153D22, 0xFFFFFFFB40153D2A # 820 - 823 D6 S3 C4 + .quad 0xfffffffa40153d32, 0xFFFFFFF940153D3A, 0x0000000040123C82, 0xFFFFFFFF40123D0A # 824 - 827 D6 S3 C8 + .quad 0xfffffffe40123d12, 0xFFFFFFFD40123D1A, 0xFFFFFFFC40153D22, 0xFFFFFFFB40153D2A # 828 - 831 D6 S3 C12 + .quad 0x0000000040004c00, 0x0000000040124C81, 0x0000000040124C82, 0xFFFFFFFF40124D0A # 832 - 835 D6 S4 C0 + .quad 0xfffffffe40124d12, 0xFFFFFFFD40154D1A, 0xFFFFFFFC40154D22, 0xFFFFFFFB40154D2A # 836 - 839 D6 S4 C4 + .quad 0xfffffffa40154d32, 0xFFFFFFF940154D3A, 0x0000000040124C82, 0xFFFFFFFF40124D0A # 840 - 843 D6 S4 C8 + .quad 0xfffffffe40124d12, 0xFFFFFFFD40154D1A, 0xFFFFFFFC40154D22, 0xFFFFFFFB40154D2A # 844 - 847 D6 S4 C12 + .quad 0x0000000040005c00, 0x0000000040125C81, 0x0000000040125C82, 0xFFFFFFFF40125D0A # 848 - 851 D6 S5 C0 + .quad 0xfffffffe40155d12, 0xFFFFFFFD40155D1A, 0xFFFFFFFC40155D22, 0xFFFFFFFB40155D2A # 852 - 855 D6 S5 C4 + .quad 0xfffffffa40155d32, 0xFFFFFFF940155D3A, 0x0000000040125C82, 0xFFFFFFFF40125D0A # 856 - 859 D6 S5 C8 + .quad 0xfffffffe40155d12, 0xFFFFFFFD40155D1A, 0xFFFFFFFC40155D22, 0xFFFFFFFB40155D2A # 860 - 863 D6 S5 C12 + .quad 0x0000000040006c00, 0x0000000040026C81, 0x00000000400A6C82, 0xFFFFFFFF400C6D0A # 864 - 867 D6 S6 C0 + .quad 0xfffffffe400c6d12, 0xFFFFFFFD400C6D1A, 0xFFFFFFFC400C6D22, 0xFFFFFFFB400C6D2A # 868 - 871 D6 S6 C4 + .quad 0xfffffffa400c6d32, 0xFFFFFFF9400C6D3A, 0x00000000400A6C82, 0xFFFFFFFF400C6D0A # 872 - 875 D6 S6 C8 + .quad 0xfffffffe400c6d12, 0xFFFFFFFD400C6D1A, 0xFFFFFFFC400C6D22, 0xFFFFFFFB400C6D2A # 876 - 879 D6 S6 C12 + .quad 0x0000000040007c00, 0x00000000401A7C81, 0x00000000401CFC82, 0xFFFFFFFF401CFD0A # 880 - 883 D6 S7 C0 + .quad 0xfffffffe401cfd12, 0xFFFFFFFD401CFD1A, 0xFFFFFFFC401CFD22, 0xFFFFFFFB401CFD2A # 884 - 887 D6 S7 C4 + .quad 0xfffffffa401cfd32, 0xFFFFFFF9401CFD3A, 0x00000000401CFC82, 0xFFFFFFFF401CFD0A # 888 - 891 D6 S7 C8 + .quad 0xfffffffe401cfd12, 0xFFFFFFFD401CFD1A, 0xFFFFFFFC401CFD22, 0xFFFFFFFB401CFD2A # 892 - 895 D6 S7 C12 + .quad 0x0000000040000e00, 0x0000000040120E81, 0xFFFFFFFF40120F09, 0xFFFFFFFE40120F11 # 896 - 899 D7 S0 C0 + .quad 0xfffffffd40120f19, 0xFFFFFFFC40120F21, 0xFFFFFFFB40120F29, 0xFFFFFFFA40120F31 # 900 - 903 D7 S0 C4 + .quad 0xfffffff940120f39, 0x0000000040120E81, 0xFFFFFFFF40120F09, 0xFFFFFFFE40120F11 # 904 - 907 D7 S0 C8 + .quad 0xfffffffd40120f19, 0xFFFFFFFC40120F21, 0xFFFFFFFB40120F29, 0xFFFFFFFA40120F31 # 908 - 911 D7 S0 C12 + .quad 0x0000000040001e00, 0x0000000040121E81, 0xFFFFFFFF40121F09, 0xFFFFFFFE40121F11 # 912 - 915 D7 S1 C0 + .quad 0xfffffffd40121f19, 0xFFFFFFFC40121F21, 0xFFFFFFFB40121F29, 0xFFFFFFFA40121F31 # 916 - 919 D7 S1 C4 + .quad 0xfffffff940151f39, 0x0000000040121E81, 0xFFFFFFFF40121F09, 0xFFFFFFFE40121F11 # 920 - 923 D7 S1 C8 + .quad 0xfffffffd40121f19, 0xFFFFFFFC40121F21, 0xFFFFFFFB40121F29, 0xFFFFFFFA40121F31 # 924 - 927 D7 S1 C12 + .quad 0x0000000040002e00, 0x0000000040122E81, 0xFFFFFFFF40122F09, 0xFFFFFFFE40122F11 # 928 - 931 D7 S2 C0 + .quad 0xfffffffd40122f19, 0xFFFFFFFC40122F21, 0xFFFFFFFB40122F29, 0xFFFFFFFA40152F31 # 932 - 935 D7 S2 C4 + .quad 0xfffffff940152f39, 0x0000000040122E81, 0xFFFFFFFF40122F09, 0xFFFFFFFE40122F11 # 936 - 939 D7 S2 C8 + .quad 0xfffffffd40122f19, 0xFFFFFFFC40122F21, 0xFFFFFFFB40122F29, 0xFFFFFFFA40152F31 # 940 - 943 D7 S2 C12 + .quad 0x0000000040003e00, 0x0000000040123E81, 0xFFFFFFFF40123F09, 0xFFFFFFFE40123F11 # 944 - 947 D7 S3 C0 + .quad 0xfffffffd40123f19, 0xFFFFFFFC40123F21, 0xFFFFFFFB40153F29, 0xFFFFFFFA40153F31 # 948 - 951 D7 S3 C4 + .quad 0xfffffff940153f39, 0x0000000040123E81, 0xFFFFFFFF40123F09, 0xFFFFFFFE40123F11 # 952 - 955 D7 S3 C8 + .quad 0xfffffffd40123f19, 0xFFFFFFFC40123F21, 0xFFFFFFFB40153F29, 0xFFFFFFFA40153F31 # 956 - 959 D7 S3 C12 + .quad 0x0000000040004e00, 0x0000000040124E81, 0xFFFFFFFF40124F09, 0xFFFFFFFE40124F11 # 960 - 963 D7 S4 C0 + .quad 0xfffffffd40124f19, 0xFFFFFFFC40154F21, 0xFFFFFFFB40154F29, 0xFFFFFFFA40154F31 # 964 - 967 D7 S4 C4 + .quad 0xfffffff940154f39, 0x0000000040124E81, 0xFFFFFFFF40124F09, 0xFFFFFFFE40124F11 # 968 - 971 D7 S4 C8 + .quad 0xfffffffd40124f19, 0xFFFFFFFC40154F21, 0xFFFFFFFB40154F29, 0xFFFFFFFA40154F31 # 972 - 975 D7 S4 C12 + .quad 0x0000000040005e00, 0x0000000040125E81, 0xFFFFFFFF40125F09, 0xFFFFFFFE40125F11 # 976 - 979 D7 S5 C0 + .quad 0xfffffffd40155f19, 0xFFFFFFFC40155F21, 0xFFFFFFFB40155F29, 0xFFFFFFFA40155F31 # 980 - 983 D7 S5 C4 + .quad 0xfffffff940155f39, 0x0000000040125E81, 0xFFFFFFFF40125F09, 0xFFFFFFFE40125F11 # 984 - 987 D7 S5 C8 + .quad 0xfffffffd40155f19, 0xFFFFFFFC40155F21, 0xFFFFFFFB40155F29, 0xFFFFFFFA40155F31 # 988 - 991 D7 S5 C12 + .quad 0x0000000040006e00, 0x0000000040126E81, 0xFFFFFFFF40126F09, 0xFFFFFFFE40156F11 # 992 - 995 D7 S6 C0 + .quad 0xfffffffd40156f19, 0xFFFFFFFC40156F21, 0xFFFFFFFB40156F29, 0xFFFFFFFA40156F31 # 996 - 999 D7 S6 C4 + .quad 0xfffffff940156f39, 0x0000000040126E81, 0xFFFFFFFF40126F09, 0xFFFFFFFE40156F11 # 1000 - 1003 D7 S6 C8 + .quad 0xfffffffd40156f19, 0xFFFFFFFC40156F21, 0xFFFFFFFB40156F29, 0xFFFFFFFA40156F31 # 1004 - 1007 D7 S6 C12 + .quad 0x0000000040007e00, 0x00000000400A7E81, 0xFFFFFFFF400C7F09, 0xFFFFFFFE400C7F11 # 1008 - 1011 D7 S7 C0 + .quad 0xfffffffd400c7f19, 0xFFFFFFFC400C7F21, 0xFFFFFFFB400C7F29, 0xFFFFFFFA400C7F31 # 1012 - 1015 D7 S7 C4 + .quad 0xfffffff9400c7f39, 0x00000000400A7E81, 0xFFFFFFFF400C7F09, 0xFFFFFFFE400C7F11 # 1016 - 1019 D7 S7 C8 + .quad 0xfffffffd400c7f19, 0xFFFFFFFC400C7F21, 0xFFFFFFFB400C7F29, 0xFFFFFFFA400C7F31 # 1020 - 1023 D7 S7 C12 + .quad 0x0000000040000000, 0xFFFFFFFF40020088, 0xFFFFFFFE40020090, 0xFFFFFFFD40020098 # 1024 - 1027 D0 S0 C0 neq + .quad 0xfffffffc400200a0, 0xFFFFFFFB400200A8, 0xFFFFFFFA400200B0, 0xFFFFFFF9400200B8 # 1028 - 1031 D0 S0 C4 neq + .quad 0xfffffff8400200c0, 0xFFFFFFFF40020088, 0xFFFFFFFE40020090, 0xFFFFFFFD40020098 # 1032 - 1035 D0 S0 C8 neq + .quad 0xfffffffc400200a0, 0xFFFFFFFB400200A8, 0xFFFFFFFA400200B0, 0xFFFFFFF9400200B8 # 1036 - 1039 D0 S0 C12 neq + .quad 0x0000000040001000, 0xFFFFFFFF40121088, 0xFFFFFFFE40121090, 0xFFFFFFFD40121098 # 1040 - 1043 D0 S1 C0 neq + .quad 0xfffffffc401210a0, 0xFFFFFFFB401210A8, 0xFFFFFFFA401210B0, 0xFFFFFFF9401210B8 # 1044 - 1047 D0 S1 C4 neq + .quad 0xfffffff8401510c0, 0xFFFFFFFF40121088, 0xFFFFFFFE40121090, 0xFFFFFFFD40121098 # 1048 - 1051 D0 S1 C8 neq + .quad 0xfffffffc401210a0, 0xFFFFFFFB401210A8, 0xFFFFFFFA401210B0, 0xFFFFFFF9401210B8 # 1052 - 1055 D0 S1 C12 neq + .quad 0x0000000040002000, 0xFFFFFFFF40122088, 0xFFFFFFFE40122090, 0xFFFFFFFD40122098 # 1056 - 1059 D0 S2 C0 neq + .quad 0xfffffffc401220a0, 0xFFFFFFFB401220A8, 0xFFFFFFFA401220B0, 0xFFFFFFF9401520B8 # 1060 - 1063 D0 S2 C4 neq + .quad 0xfffffff8401520c0, 0xFFFFFFFF40122088, 0xFFFFFFFE40122090, 0xFFFFFFFD40122098 # 1064 - 1067 D0 S2 C8 neq + .quad 0xfffffffc401220a0, 0xFFFFFFFB401220A8, 0xFFFFFFFA401220B0, 0xFFFFFFF9401520B8 # 1068 - 1071 D0 S2 C12 neq + .quad 0x0000000040003000, 0xFFFFFFFF40123088, 0xFFFFFFFE40123090, 0xFFFFFFFD40123098 # 1072 - 1075 D0 S3 C0 neq + .quad 0xfffffffc401230a0, 0xFFFFFFFB401230A8, 0xFFFFFFFA401530B0, 0xFFFFFFF9401530B8 # 1076 - 1079 D0 S3 C4 neq + .quad 0xfffffff8401530c0, 0xFFFFFFFF40123088, 0xFFFFFFFE40123090, 0xFFFFFFFD40123098 # 1080 - 1083 D0 S3 C8 neq + .quad 0xfffffffc401230a0, 0xFFFFFFFB401230A8, 0xFFFFFFFA401530B0, 0xFFFFFFF9401530B8 # 1084 - 1087 D0 S3 C12 neq + .quad 0x0000000040004000, 0xFFFFFFFF40124088, 0xFFFFFFFE40124090, 0xFFFFFFFD40124098 # 1088 - 1091 D0 S4 C0 neq + .quad 0xfffffffc401240a0, 0xFFFFFFFB401540A8, 0xFFFFFFFA401540B0, 0xFFFFFFF9401540B8 # 1092 - 1095 D0 S4 C4 neq + .quad 0xfffffff8401540c0, 0xFFFFFFFF40124088, 0xFFFFFFFE40124090, 0xFFFFFFFD40124098 # 1096 - 1099 D0 S4 C8 neq + .quad 0xfffffffc401240a0, 0xFFFFFFFB401540A8, 0xFFFFFFFA401540B0, 0xFFFFFFF9401540B8 # 1100 - 1103 D0 S4 C12 neq + .quad 0x0000000040005000, 0xFFFFFFFF40125088, 0xFFFFFFFE40125090, 0xFFFFFFFD40125098 # 1104 - 1107 D0 S5 C0 neq + .quad 0xfffffffc401550a0, 0xFFFFFFFB401550A8, 0xFFFFFFFA401550B0, 0xFFFFFFF9401550B8 # 1108 - 1111 D0 S5 C4 neq + .quad 0xfffffff8401550c0, 0xFFFFFFFF40125088, 0xFFFFFFFE40125090, 0xFFFFFFFD40125098 # 1112 - 1115 D0 S5 C8 neq + .quad 0xfffffffc401550a0, 0xFFFFFFFB401550A8, 0xFFFFFFFA401550B0, 0xFFFFFFF9401550B8 # 1116 - 1119 D0 S5 C12 neq + .quad 0x0000000040006000, 0xFFFFFFFF40126088, 0xFFFFFFFE40126090, 0xFFFFFFFD40156098 # 1120 - 1123 D0 S6 C0 neq + .quad 0xfffffffc401560a0, 0xFFFFFFFB401560A8, 0xFFFFFFFA401560B0, 0xFFFFFFF9401560B8 # 1124 - 1127 D0 S6 C4 neq + .quad 0xfffffff8401560c0, 0xFFFFFFFF40126088, 0xFFFFFFFE40126090, 0xFFFFFFFD40156098 # 1128 - 1131 D0 S6 C8 neq + .quad 0xfffffffc401560a0, 0xFFFFFFFB401560A8, 0xFFFFFFFA401560B0, 0xFFFFFFF9401560B8 # 1132 - 1135 D0 S6 C12 neq + .quad 0x0000000040007000, 0xFFFFFFFF40127088, 0xFFFFFFFE40157090, 0xFFFFFFFD40157098 # 1136 - 1139 D0 S7 C0 neq + .quad 0xfffffffc401570a0, 0xFFFFFFFB401570A8, 0xFFFFFFFA401570B0, 0xFFFFFFF9401570B8 # 1140 - 1143 D0 S7 C4 neq + .quad 0xfffffff8401570c0, 0xFFFFFFFF40127088, 0xFFFFFFFE40157090, 0xFFFFFFFD40157098 # 1144 - 1147 D0 S7 C8 neq + .quad 0xfffffffc401570a0, 0xFFFFFFFB401570A8, 0xFFFFFFFA401570B0, 0xFFFFFFF9401570B8 # 1148 - 1151 D0 S7 C12 neq + .quad 0x0000000040000200, 0x0000000040120281, 0x0000000040120282, 0x0000000040120283 # 1152 - 1155 D1 S0 C0 neq + .quad 0x0000000040120284, 0x0000000040120285, 0x0000000040120286, 0x0000000040120287 # 1156 - 1159 D1 S0 C4 neq + .quad 0xffffffff4012030f, 0xFFFFFFFE40150317, 0xFFFFFFFD4015031F, 0xFFFFFFFC40150327 # 1160 - 1163 D1 S0 C8 neq + .quad 0xfffffffb4015032f, 0xFFFFFFFA40150337, 0xFFFFFFF94015033F, 0xFFFFFFF840150347 # 1164 - 1167 D1 S0 C12 neq + .quad 0x0000000040001200, 0x0000000040021281, 0x0000000040021282, 0x0000000040021283 # 1168 - 1171 D1 S1 C0 neq + .quad 0x0000000040021284, 0x0000000040021285, 0x0000000040021286, 0x00000000400A1287 # 1172 - 1175 D1 S1 C4 neq + .quad 0xffffffff400c130f, 0xFFFFFFFE400C1317, 0xFFFFFFFD400C131F, 0xFFFFFFFC400C1327 # 1176 - 1179 D1 S1 C8 neq + .quad 0xfffffffb400c132f, 0xFFFFFFFA400C1337, 0xFFFFFFF9400C133F, 0xFFFFFFF8400C1347 # 1180 - 1183 D1 S1 C12 neq + .quad 0x0000000040002200, 0x0000000040122281, 0x0000000040122282, 0x0000000040122283 # 1184 - 1187 D1 S2 C0 neq + .quad 0x0000000040122284, 0x0000000040122285, 0x00000000401A2286, 0x00000000401CA287 # 1188 - 1191 D1 S2 C4 neq + .quad 0xffffffff401ca30f, 0xFFFFFFFE401CA317, 0xFFFFFFFD401CA31F, 0xFFFFFFFC401CA327 # 1192 - 1195 D1 S2 C8 neq + .quad 0xfffffffb401ca32f, 0xFFFFFFFA401CA337, 0xFFFFFFF9401CA33F, 0xFFFFFFF8401FA347 # 1196 - 1199 D1 S2 C12 neq + .quad 0x0000000040003200, 0x0000000040123281, 0x0000000040123282, 0x0000000040123283 # 1200 - 1203 D1 S3 C0 neq + .quad 0x0000000040123284, 0x00000000401A3285, 0x00000000401CB286, 0x00000000401CB287 # 1204 - 1207 D1 S3 C4 neq + .quad 0xffffffff401cb30f, 0xFFFFFFFE401CB317, 0xFFFFFFFD401CB31F, 0xFFFFFFFC401CB327 # 1208 - 1211 D1 S3 C8 neq + .quad 0xfffffffb401cb32f, 0xFFFFFFFA401CB337, 0xFFFFFFF9401FB33F, 0xFFFFFFF8401FB347 # 1212 - 1215 D1 S3 C12 neq + .quad 0x0000000040004200, 0x0000000040124281, 0x0000000040124282, 0x0000000040124283 # 1216 - 1219 D1 S4 C0 neq + .quad 0x00000000401a4284, 0x00000000401CC285, 0x00000000401CC286, 0x00000000401CC287 # 1220 - 1223 D1 S4 C4 neq + .quad 0xffffffff401cc30f, 0xFFFFFFFE401CC317, 0xFFFFFFFD401CC31F, 0xFFFFFFFC401CC327 # 1224 - 1227 D1 S4 C8 neq + .quad 0xfffffffb401cc32f, 0xFFFFFFFA401FC337, 0xFFFFFFF9401FC33F, 0xFFFFFFF8401FC347 # 1228 - 1231 D1 S4 C12 neq + .quad 0x0000000040005200, 0x0000000040125281, 0x0000000040125282, 0x00000000401A5283 # 1232 - 1235 D1 S5 C0 neq + .quad 0x00000000401cd284, 0x00000000401CD285, 0x00000000401CD286, 0x00000000401CD287 # 1236 - 1239 D1 S5 C4 neq + .quad 0xffffffff401cd30f, 0xFFFFFFFE401CD317, 0xFFFFFFFD401CD31F, 0xFFFFFFFC401CD327 # 1240 - 1243 D1 S5 C8 neq + .quad 0xfffffffb401fd32f, 0xFFFFFFFA401FD337, 0xFFFFFFF9401FD33F, 0xFFFFFFF8401FD347 # 1244 - 1247 D1 S5 C12 neq + .quad 0x0000000040006200, 0x0000000040126281, 0x00000000401A6282, 0x00000000401CE283 # 1248 - 1251 D1 S6 C0 neq + .quad 0x00000000401ce284, 0x00000000401CE285, 0x00000000401CE286, 0x00000000401CE287 # 1252 - 1255 D1 S6 C4 neq + .quad 0xffffffff401ce30f, 0xFFFFFFFE401CE317, 0xFFFFFFFD401CE31F, 0xFFFFFFFC401FE327 # 1256 - 1259 D1 S6 C8 neq + .quad 0xfffffffb401fe32f, 0xFFFFFFFA401FE337, 0xFFFFFFF9401FE33F, 0xFFFFFFF8401FE347 # 1260 - 1263 D1 S6 C12 neq + .quad 0x0000000040007200, 0x00000000401A7281, 0x00000000401CF282, 0x00000000401CF283 # 1264 - 1267 D1 S7 C0 neq + .quad 0x00000000401cf284, 0x00000000401CF285, 0x00000000401CF286, 0x00000000401CF287 # 1268 - 1271 D1 S7 C4 neq + .quad 0xffffffff401cf30f, 0xFFFFFFFE401CF317, 0xFFFFFFFD401FF31F, 0xFFFFFFFC401FF327 # 1272 - 1275 D1 S7 C8 neq + .quad 0xfffffffb401ff32f, 0xFFFFFFFA401FF337, 0xFFFFFFF9401FF33F, 0xFFFFFFF8401FF347 # 1276 - 1279 D1 S7 C12 neq + .quad 0x0000000040000400, 0x0000000040120481, 0x0000000040120482, 0x0000000040120483 # 1280 - 1283 D2 S0 C0 neq + .quad 0x0000000040120484, 0x0000000040120485, 0x0000000040120486, 0xFFFFFFFF4012050E # 1284 - 1287 D2 S0 C4 neq + .quad 0xfffffffe40120516, 0xFFFFFFFD4015051E, 0xFFFFFFFC40150526, 0xFFFFFFFB4015052E # 1288 - 1291 D2 S0 C8 neq + .quad 0xfffffffa40150536, 0xFFFFFFF94015053E, 0xFFFFFFF840150546, 0xFFFFFFFF4012050E # 1292 - 1295 D2 S0 C12 neq + .quad 0x0000000040001400, 0x0000000040121481, 0x0000000040121482, 0x0000000040121483 # 1296 - 1299 D2 S1 C0 neq + .quad 0x0000000040121484, 0x0000000040121485, 0x0000000040121486, 0xFFFFFFFF4012150E # 1300 - 1303 D2 S1 C4 neq + .quad 0xfffffffe40151516, 0xFFFFFFFD4015151E, 0xFFFFFFFC40151526, 0xFFFFFFFB4015152E # 1304 - 1307 D2 S1 C8 neq + .quad 0xfffffffa40151536, 0xFFFFFFF94015153E, 0xFFFFFFF840151546, 0xFFFFFFFF4012150E # 1308 - 1311 D2 S1 C12 neq + .quad 0x0000000040002400, 0x0000000040022481, 0x0000000040022482, 0x0000000040022483 # 1312 - 1315 D2 S2 C0 neq + .quad 0x0000000040022484, 0x0000000040022485, 0x00000000400A2486, 0xFFFFFFFF400C250E # 1316 - 1319 D2 S2 C4 neq + .quad 0xfffffffe400c2516, 0xFFFFFFFD400C251E, 0xFFFFFFFC400C2526, 0xFFFFFFFB400C252E # 1320 - 1323 D2 S2 C8 neq + .quad 0xfffffffa400c2536, 0xFFFFFFF9400C253E, 0xFFFFFFF8400C2546, 0xFFFFFFFF400C250E # 1324 - 1327 D2 S2 C12 neq + .quad 0x0000000040003400, 0x0000000040123481, 0x0000000040123482, 0x0000000040123483 # 1328 - 1331 D2 S3 C0 neq + .quad 0x0000000040123484, 0x00000000401A3485, 0x00000000401CB486, 0xFFFFFFFF401CB50E # 1332 - 1335 D2 S3 C4 neq + .quad 0xfffffffe401cb516, 0xFFFFFFFD401CB51E, 0xFFFFFFFC401CB526, 0xFFFFFFFB401CB52E # 1336 - 1339 D2 S3 C8 neq + .quad 0xfffffffa401cb536, 0xFFFFFFF9401CB53E, 0xFFFFFFF8401FB546, 0xFFFFFFFF401CB50E # 1340 - 1343 D2 S3 C12 neq + .quad 0x0000000040004400, 0x0000000040124481, 0x0000000040124482, 0x0000000040124483 # 1344 - 1347 D2 S4 C0 neq + .quad 0x00000000401a4484, 0x00000000401CC485, 0x00000000401CC486, 0xFFFFFFFF401CC50E # 1348 - 1351 D2 S4 C4 neq + .quad 0xfffffffe401cc516, 0xFFFFFFFD401CC51E, 0xFFFFFFFC401CC526, 0xFFFFFFFB401CC52E # 1352 - 1355 D2 S4 C8 neq + .quad 0xfffffffa401cc536, 0xFFFFFFF9401FC53E, 0xFFFFFFF8401FC546, 0xFFFFFFFF401CC50E # 1356 - 1359 D2 S4 C12 neq + .quad 0x0000000040005400, 0x0000000040125481, 0x0000000040125482, 0x00000000401A5483 # 1360 - 1363 D2 S5 C0 neq + .quad 0x00000000401cd484, 0x00000000401CD485, 0x00000000401CD486, 0xFFFFFFFF401CD50E # 1364 - 1367 D2 S5 C4 neq + .quad 0xfffffffe401cd516, 0xFFFFFFFD401CD51E, 0xFFFFFFFC401CD526, 0xFFFFFFFB401CD52E # 1368 - 1371 D2 S5 C8 neq + .quad 0xfffffffa401fd536, 0xFFFFFFF9401FD53E, 0xFFFFFFF8401FD546, 0xFFFFFFFF401CD50E # 1372 - 1375 D2 S5 C12 neq + .quad 0x0000000040006400, 0x0000000040126481, 0x00000000401A6482, 0x00000000401CE483 # 1376 - 1379 D2 S6 C0 neq + .quad 0x00000000401ce484, 0x00000000401CE485, 0x00000000401CE486, 0xFFFFFFFF401CE50E # 1380 - 1383 D2 S6 C4 neq + .quad 0xfffffffe401ce516, 0xFFFFFFFD401CE51E, 0xFFFFFFFC401CE526, 0xFFFFFFFB401FE52E # 1384 - 1387 D2 S6 C8 neq + .quad 0xfffffffa401fe536, 0xFFFFFFF9401FE53E, 0xFFFFFFF8401FE546, 0xFFFFFFFF401CE50E # 1388 - 1391 D2 S6 C12 neq + .quad 0x0000000040007400, 0x00000000401A7481, 0x00000000401CF482, 0x00000000401CF483 # 1392 - 1395 D2 S7 C0 neq + .quad 0x00000000401cf484, 0x00000000401CF485, 0x00000000401CF486, 0xFFFFFFFF401CF50E # 1396 - 1399 D2 S7 C4 neq + .quad 0xfffffffe401cf516, 0xFFFFFFFD401CF51E, 0xFFFFFFFC401FF526, 0xFFFFFFFB401FF52E # 1400 - 1403 D2 S7 C8 neq + .quad 0xfffffffa401ff536, 0xFFFFFFF9401FF53E, 0xFFFFFFF8401FF546, 0xFFFFFFFF401CF50E # 1404 - 1407 D2 S7 C12 neq + .quad 0x0000000040000600, 0x0000000040120681, 0x0000000040120682, 0x0000000040120683 # 1408 - 1411 D3 S0 C0 neq + .quad 0x0000000040120684, 0x0000000040120685, 0xFFFFFFFF4012070D, 0xFFFFFFFE40120715 # 1412 - 1415 D3 S0 C4 neq + .quad 0xfffffffd4012071d, 0xFFFFFFFC40150725, 0xFFFFFFFB4015072D, 0xFFFFFFFA40150735 # 1416 - 1419 D3 S0 C8 neq + .quad 0xfffffff94015073d, 0xFFFFFFF840150745, 0xFFFFFFFF4012070D, 0xFFFFFFFE40120715 # 1420 - 1423 D3 S0 C12 neq + .quad 0x0000000040001600, 0x0000000040121681, 0x0000000040121682, 0x0000000040121683 # 1424 - 1427 D3 S1 C0 neq + .quad 0x0000000040121684, 0x0000000040121685, 0xFFFFFFFF4012170D, 0xFFFFFFFE40121715 # 1428 - 1431 D3 S1 C4 neq + .quad 0xfffffffd4015171d, 0xFFFFFFFC40151725, 0xFFFFFFFB4015172D, 0xFFFFFFFA40151735 # 1432 - 1435 D3 S1 C8 neq + .quad 0xfffffff94015173d, 0xFFFFFFF840151745, 0xFFFFFFFF4012170D, 0xFFFFFFFE40121715 # 1436 - 1439 D3 S1 C12 neq + .quad 0x0000000040002600, 0x0000000040122681, 0x0000000040122682, 0x0000000040122683 # 1440 - 1443 D3 S2 C0 neq + .quad 0x0000000040122684, 0x0000000040122685, 0xFFFFFFFF4012270D, 0xFFFFFFFE40152715 # 1444 - 1447 D3 S2 C4 neq + .quad 0xfffffffd4015271d, 0xFFFFFFFC40152725, 0xFFFFFFFB4015272D, 0xFFFFFFFA40152735 # 1448 - 1451 D3 S2 C8 neq + .quad 0xfffffff94015273d, 0xFFFFFFF840152745, 0xFFFFFFFF4012270D, 0xFFFFFFFE40152715 # 1452 - 1455 D3 S2 C12 neq + .quad 0x0000000040003600, 0x0000000040023681, 0x0000000040023682, 0x0000000040023683 # 1456 - 1459 D3 S3 C0 neq + .quad 0x0000000040023684, 0x00000000400A3685, 0xFFFFFFFF400C370D, 0xFFFFFFFE400C3715 # 1460 - 1463 D3 S3 C4 neq + .quad 0xfffffffd400c371d, 0xFFFFFFFC400C3725, 0xFFFFFFFB400C372D, 0xFFFFFFFA400C3735 # 1464 - 1467 D3 S3 C8 neq + .quad 0xfffffff9400c373d, 0xFFFFFFF8400C3745, 0xFFFFFFFF400C370D, 0xFFFFFFFE400C3715 # 1468 - 1471 D3 S3 C12 neq + .quad 0x0000000040004600, 0x0000000040124681, 0x0000000040124682, 0x0000000040124683 # 1472 - 1475 D3 S4 C0 neq + .quad 0x00000000401a4684, 0x00000000401CC685, 0xFFFFFFFF401CC70D, 0xFFFFFFFE401CC715 # 1476 - 1479 D3 S4 C4 neq + .quad 0xfffffffd401cc71d, 0xFFFFFFFC401CC725, 0xFFFFFFFB401CC72D, 0xFFFFFFFA401CC735 # 1480 - 1483 D3 S4 C8 neq + .quad 0xfffffff9401cc73d, 0xFFFFFFF8401FC745, 0xFFFFFFFF401CC70D, 0xFFFFFFFE401CC715 # 1484 - 1487 D3 S4 C12 neq + .quad 0x0000000040005600, 0x0000000040125681, 0x0000000040125682, 0x00000000401A5683 # 1488 - 1491 D3 S5 C0 neq + .quad 0x00000000401cd684, 0x00000000401CD685, 0xFFFFFFFF401CD70D, 0xFFFFFFFE401CD715 # 1492 - 1495 D3 S5 C4 neq + .quad 0xfffffffd401cd71d, 0xFFFFFFFC401CD725, 0xFFFFFFFB401CD72D, 0xFFFFFFFA401CD735 # 1496 - 1499 D3 S5 C8 neq + .quad 0xfffffff9401fd73d, 0xFFFFFFF8401FD745, 0xFFFFFFFF401CD70D, 0xFFFFFFFE401CD715 # 1500 - 1503 D3 S5 C12 neq + .quad 0x0000000040006600, 0x0000000040126681, 0x00000000401A6682, 0x00000000401CE683 # 1504 - 1507 D3 S6 C0 neq + .quad 0x00000000401ce684, 0x00000000401CE685, 0xFFFFFFFF401CE70D, 0xFFFFFFFE401CE715 # 1508 - 1511 D3 S6 C4 neq + .quad 0xfffffffd401ce71d, 0xFFFFFFFC401CE725, 0xFFFFFFFB401CE72D, 0xFFFFFFFA401FE735 # 1512 - 1515 D3 S6 C8 neq + .quad 0xfffffff9401fe73d, 0xFFFFFFF8401FE745, 0xFFFFFFFF401CE70D, 0xFFFFFFFE401CE715 # 1516 - 1519 D3 S6 C12 neq + .quad 0x0000000040007600, 0x00000000401A7681, 0x00000000401CF682, 0x00000000401CF683 # 1520 - 1523 D3 S7 C0 neq + .quad 0x00000000401cf684, 0x00000000401CF685, 0xFFFFFFFF401CF70D, 0xFFFFFFFE401CF715 # 1524 - 1527 D3 S7 C4 neq + .quad 0xfffffffd401cf71d, 0xFFFFFFFC401CF725, 0xFFFFFFFB401FF72D, 0xFFFFFFFA401FF735 # 1528 - 1531 D3 S7 C8 neq + .quad 0xfffffff9401ff73d, 0xFFFFFFF8401FF745, 0xFFFFFFFF401CF70D, 0xFFFFFFFE401CF715 # 1532 - 1535 D3 S7 C12 neq + .quad 0x0000000040000800, 0x0000000040120881, 0x0000000040120882, 0x0000000040120883 # 1536 - 1539 D4 S0 C0 neq + .quad 0x0000000040120884, 0xFFFFFFFF4012090C, 0xFFFFFFFE40120914, 0xFFFFFFFD4012091C # 1540 - 1543 D4 S0 C4 neq + .quad 0xfffffffc40120924, 0xFFFFFFFB4015092C, 0xFFFFFFFA40150934, 0xFFFFFFF94015093C # 1544 - 1547 D4 S0 C8 neq + .quad 0xfffffff840150944, 0xFFFFFFFF4012090C, 0xFFFFFFFE40120914, 0xFFFFFFFD4012091C # 1548 - 1551 D4 S0 C12 neq + .quad 0x0000000040001800, 0x0000000040121881, 0x0000000040121882, 0x0000000040121883 # 1552 - 1555 D4 S1 C0 neq + .quad 0x0000000040121884, 0xFFFFFFFF4012190C, 0xFFFFFFFE40121914, 0xFFFFFFFD4012191C # 1556 - 1559 D4 S1 C4 neq + .quad 0xfffffffc40151924, 0xFFFFFFFB4015192C, 0xFFFFFFFA40151934, 0xFFFFFFF94015193C # 1560 - 1563 D4 S1 C8 neq + .quad 0xfffffff840151944, 0xFFFFFFFF4012190C, 0xFFFFFFFE40121914, 0xFFFFFFFD4012191C # 1564 - 1567 D4 S1 C12 neq + .quad 0x0000000040002800, 0x0000000040122881, 0x0000000040122882, 0x0000000040122883 # 1568 - 1571 D4 S2 C0 neq + .quad 0x0000000040122884, 0xFFFFFFFF4012290C, 0xFFFFFFFE40122914, 0xFFFFFFFD4015291C # 1572 - 1575 D4 S2 C4 neq + .quad 0xfffffffc40152924, 0xFFFFFFFB4015292C, 0xFFFFFFFA40152934, 0xFFFFFFF94015293C # 1576 - 1579 D4 S2 C8 neq + .quad 0xfffffff840152944, 0xFFFFFFFF4012290C, 0xFFFFFFFE40122914, 0xFFFFFFFD4015291C # 1580 - 1583 D4 S2 C12 neq + .quad 0x0000000040003800, 0x0000000040123881, 0x0000000040123882, 0x0000000040123883 # 1584 - 1587 D4 S3 C0 neq + .quad 0x0000000040123884, 0xFFFFFFFF4012390C, 0xFFFFFFFE40153914, 0xFFFFFFFD4015391C # 1588 - 1591 D4 S3 C4 neq + .quad 0xfffffffc40153924, 0xFFFFFFFB4015392C, 0xFFFFFFFA40153934, 0xFFFFFFF94015393C # 1592 - 1595 D4 S3 C8 neq + .quad 0xfffffff840153944, 0xFFFFFFFF4012390C, 0xFFFFFFFE40153914, 0xFFFFFFFD4015391C # 1596 - 1599 D4 S3 C12 neq + .quad 0x0000000040004800, 0x0000000040024881, 0x0000000040024882, 0x0000000040024883 # 1600 - 1603 D4 S4 C0 neq + .quad 0x00000000400a4884, 0xFFFFFFFF400C490C, 0xFFFFFFFE400C4914, 0xFFFFFFFD400C491C # 1604 - 1607 D4 S4 C4 neq + .quad 0xfffffffc400c4924, 0xFFFFFFFB400C492C, 0xFFFFFFFA400C4934, 0xFFFFFFF9400C493C # 1608 - 1611 D4 S4 C8 neq + .quad 0xfffffff8400c4944, 0xFFFFFFFF400C490C, 0xFFFFFFFE400C4914, 0xFFFFFFFD400C491C # 1612 - 1615 D4 S4 C12 neq + .quad 0x0000000040005800, 0x0000000040125881, 0x0000000040125882, 0x00000000401A5883 # 1616 - 1619 D4 S5 C0 neq + .quad 0x00000000401cd884, 0xFFFFFFFF401CD90C, 0xFFFFFFFE401CD914, 0xFFFFFFFD401CD91C # 1620 - 1623 D4 S5 C4 neq + .quad 0xfffffffc401cd924, 0xFFFFFFFB401CD92C, 0xFFFFFFFA401CD934, 0xFFFFFFF9401CD93C # 1624 - 1627 D4 S5 C8 neq + .quad 0xfffffff8401fd944, 0xFFFFFFFF401CD90C, 0xFFFFFFFE401CD914, 0xFFFFFFFD401CD91C # 1628 - 1631 D4 S5 C12 neq + .quad 0x0000000040006800, 0x0000000040126881, 0x00000000401A6882, 0x00000000401CE883 # 1632 - 1635 D4 S6 C0 neq + .quad 0x00000000401ce884, 0xFFFFFFFF401CE90C, 0xFFFFFFFE401CE914, 0xFFFFFFFD401CE91C # 1636 - 1639 D4 S6 C4 neq + .quad 0xfffffffc401ce924, 0xFFFFFFFB401CE92C, 0xFFFFFFFA401CE934, 0xFFFFFFF9401FE93C # 1640 - 1643 D4 S6 C8 neq + .quad 0xfffffff8401fe944, 0xFFFFFFFF401CE90C, 0xFFFFFFFE401CE914, 0xFFFFFFFD401CE91C # 1644 - 1647 D4 S6 C12 neq + .quad 0x0000000040007800, 0x00000000401A7881, 0x00000000401CF882, 0x00000000401CF883 # 1648 - 1651 D4 S7 C0 neq + .quad 0x00000000401cf884, 0xFFFFFFFF401CF90C, 0xFFFFFFFE401CF914, 0xFFFFFFFD401CF91C # 1652 - 1655 D4 S7 C4 neq + .quad 0xfffffffc401cf924, 0xFFFFFFFB401CF92C, 0xFFFFFFFA401FF934, 0xFFFFFFF9401FF93C # 1656 - 1659 D4 S7 C8 neq + .quad 0xfffffff8401ff944, 0xFFFFFFFF401CF90C, 0xFFFFFFFE401CF914, 0xFFFFFFFD401CF91C # 1660 - 1663 D4 S7 C12 neq + .quad 0x0000000040000a00, 0x0000000040120A81, 0x0000000040120A82, 0x0000000040120A83 # 1664 - 1667 D5 S0 C0 neq + .quad 0xffffffff40120b0b, 0xFFFFFFFE40120B13, 0xFFFFFFFD40120B1B, 0xFFFFFFFC40120B23 # 1668 - 1671 D5 S0 C4 neq + .quad 0xfffffffb40120b2b, 0xFFFFFFFA40150B33, 0xFFFFFFF940150B3B, 0xFFFFFFF840150B43 # 1672 - 1675 D5 S0 C8 neq + .quad 0xffffffff40120b0b, 0xFFFFFFFE40120B13, 0xFFFFFFFD40120B1B, 0xFFFFFFFC40120B23 # 1676 - 1679 D5 S0 C12 neq + .quad 0x0000000040001a00, 0x0000000040121A81, 0x0000000040121A82, 0x0000000040121A83 # 1680 - 1683 D5 S1 C0 neq + .quad 0xffffffff40121b0b, 0xFFFFFFFE40121B13, 0xFFFFFFFD40121B1B, 0xFFFFFFFC40121B23 # 1684 - 1687 D5 S1 C4 neq + .quad 0xfffffffb40151b2b, 0xFFFFFFFA40151B33, 0xFFFFFFF940151B3B, 0xFFFFFFF840151B43 # 1688 - 1691 D5 S1 C8 neq + .quad 0xffffffff40121b0b, 0xFFFFFFFE40121B13, 0xFFFFFFFD40121B1B, 0xFFFFFFFC40121B23 # 1692 - 1695 D5 S1 C12 neq + .quad 0x0000000040002a00, 0x0000000040122A81, 0x0000000040122A82, 0x0000000040122A83 # 1696 - 1699 D5 S2 C0 neq + .quad 0xffffffff40122b0b, 0xFFFFFFFE40122B13, 0xFFFFFFFD40122B1B, 0xFFFFFFFC40152B23 # 1700 - 1703 D5 S2 C4 neq + .quad 0xfffffffb40152b2b, 0xFFFFFFFA40152B33, 0xFFFFFFF940152B3B, 0xFFFFFFF840152B43 # 1704 - 1707 D5 S2 C8 neq + .quad 0xffffffff40122b0b, 0xFFFFFFFE40122B13, 0xFFFFFFFD40122B1B, 0xFFFFFFFC40152B23 # 1708 - 1711 D5 S2 C12 neq + .quad 0x0000000040003a00, 0x0000000040123A81, 0x0000000040123A82, 0x0000000040123A83 # 1712 - 1715 D5 S3 C0 neq + .quad 0xffffffff40123b0b, 0xFFFFFFFE40123B13, 0xFFFFFFFD40153B1B, 0xFFFFFFFC40153B23 # 1716 - 1719 D5 S3 C4 neq + .quad 0xfffffffb40153b2b, 0xFFFFFFFA40153B33, 0xFFFFFFF940153B3B, 0xFFFFFFF840153B43 # 1720 - 1723 D5 S3 C8 neq + .quad 0xffffffff40123b0b, 0xFFFFFFFE40123B13, 0xFFFFFFFD40153B1B, 0xFFFFFFFC40153B23 # 1724 - 1727 D5 S3 C12 neq + .quad 0x0000000040004a00, 0x0000000040124A81, 0x0000000040124A82, 0x0000000040124A83 # 1728 - 1731 D5 S4 C0 neq + .quad 0xffffffff40124b0b, 0xFFFFFFFE40154B13, 0xFFFFFFFD40154B1B, 0xFFFFFFFC40154B23 # 1732 - 1735 D5 S4 C4 neq + .quad 0xfffffffb40154b2b, 0xFFFFFFFA40154B33, 0xFFFFFFF940154B3B, 0xFFFFFFF840154B43 # 1736 - 1739 D5 S4 C8 neq + .quad 0xffffffff40124b0b, 0xFFFFFFFE40154B13, 0xFFFFFFFD40154B1B, 0xFFFFFFFC40154B23 # 1740 - 1743 D5 S4 C12 neq + .quad 0x0000000040005a00, 0x0000000040025A81, 0x0000000040025A82, 0x00000000400A5A83 # 1744 - 1747 D5 S5 C0 neq + .quad 0xffffffff400c5b0b, 0xFFFFFFFE400C5B13, 0xFFFFFFFD400C5B1B, 0xFFFFFFFC400C5B23 # 1748 - 1751 D5 S5 C4 neq + .quad 0xfffffffb400c5b2b, 0xFFFFFFFA400C5B33, 0xFFFFFFF9400C5B3B, 0xFFFFFFF8400C5B43 # 1752 - 1755 D5 S5 C8 neq + .quad 0xffffffff400c5b0b, 0xFFFFFFFE400C5B13, 0xFFFFFFFD400C5B1B, 0xFFFFFFFC400C5B23 # 1756 - 1759 D5 S5 C12 neq + .quad 0x0000000040006a00, 0x0000000040126A81, 0x00000000401A6A82, 0x00000000401CEA83 # 1760 - 1763 D5 S6 C0 neq + .quad 0xffffffff401ceb0b, 0xFFFFFFFE401CEB13, 0xFFFFFFFD401CEB1B, 0xFFFFFFFC401CEB23 # 1764 - 1767 D5 S6 C4 neq + .quad 0xfffffffb401ceb2b, 0xFFFFFFFA401CEB33, 0xFFFFFFF9401CEB3B, 0xFFFFFFF8401FEB43 # 1768 - 1771 D5 S6 C8 neq + .quad 0xffffffff401ceb0b, 0xFFFFFFFE401CEB13, 0xFFFFFFFD401CEB1B, 0xFFFFFFFC401CEB23 # 1772 - 1775 D5 S6 C12 neq + .quad 0x0000000040007a00, 0x00000000401A7A81, 0x00000000401CFA82, 0x00000000401CFA83 # 1776 - 1779 D5 S7 C0 neq + .quad 0xffffffff401cfb0b, 0xFFFFFFFE401CFB13, 0xFFFFFFFD401CFB1B, 0xFFFFFFFC401CFB23 # 1780 - 1783 D5 S7 C4 neq + .quad 0xfffffffb401cfb2b, 0xFFFFFFFA401CFB33, 0xFFFFFFF9401FFB3B, 0xFFFFFFF8401FFB43 # 1784 - 1787 D5 S7 C8 neq + .quad 0xffffffff401cfb0b, 0xFFFFFFFE401CFB13, 0xFFFFFFFD401CFB1B, 0xFFFFFFFC401CFB23 # 1788 - 1791 D5 S7 C12 neq + .quad 0x0000000040000c00, 0x0000000040120C81, 0x0000000040120C82, 0xFFFFFFFF40120D0A # 1792 - 1795 D6 S0 C0 neq + .quad 0xfffffffe40120d12, 0xFFFFFFFD40120D1A, 0xFFFFFFFC40120D22, 0xFFFFFFFB40120D2A # 1796 - 1799 D6 S0 C4 neq + .quad 0xfffffffa40120d32, 0xFFFFFFF940150D3A, 0xFFFFFFF840150D42, 0xFFFFFFFF40120D0A # 1800 - 1803 D6 S0 C8 neq + .quad 0xfffffffe40120d12, 0xFFFFFFFD40120D1A, 0xFFFFFFFC40120D22, 0xFFFFFFFB40120D2A # 1804 - 1807 D6 S0 C12 neq + .quad 0x0000000040001c00, 0x0000000040121C81, 0x0000000040121C82, 0xFFFFFFFF40121D0A # 1808 - 1811 D6 S1 C0 neq + .quad 0xfffffffe40121d12, 0xFFFFFFFD40121D1A, 0xFFFFFFFC40121D22, 0xFFFFFFFB40121D2A # 1812 - 1815 D6 S1 C4 neq + .quad 0xfffffffa40151d32, 0xFFFFFFF940151D3A, 0xFFFFFFF840151D42, 0xFFFFFFFF40121D0A # 1816 - 1819 D6 S1 C8 neq + .quad 0xfffffffe40121d12, 0xFFFFFFFD40121D1A, 0xFFFFFFFC40121D22, 0xFFFFFFFB40121D2A # 1820 - 1823 D6 S1 C12 neq + .quad 0x0000000040002c00, 0x0000000040122C81, 0x0000000040122C82, 0xFFFFFFFF40122D0A # 1824 - 1827 D6 S2 C0 neq + .quad 0xfffffffe40122d12, 0xFFFFFFFD40122D1A, 0xFFFFFFFC40122D22, 0xFFFFFFFB40152D2A # 1828 - 1831 D6 S2 C4 neq + .quad 0xfffffffa40152d32, 0xFFFFFFF940152D3A, 0xFFFFFFF840152D42, 0xFFFFFFFF40122D0A # 1832 - 1835 D6 S2 C8 neq + .quad 0xfffffffe40122d12, 0xFFFFFFFD40122D1A, 0xFFFFFFFC40122D22, 0xFFFFFFFB40152D2A # 1836 - 1839 D6 S2 C12 neq + .quad 0x0000000040003c00, 0x0000000040123C81, 0x0000000040123C82, 0xFFFFFFFF40123D0A # 1840 - 1843 D6 S3 C0 neq + .quad 0xfffffffe40123d12, 0xFFFFFFFD40123D1A, 0xFFFFFFFC40153D22, 0xFFFFFFFB40153D2A # 1844 - 1847 D6 S3 C4 neq + .quad 0xfffffffa40153d32, 0xFFFFFFF940153D3A, 0xFFFFFFF840153D42, 0xFFFFFFFF40123D0A # 1848 - 1851 D6 S3 C8 neq + .quad 0xfffffffe40123d12, 0xFFFFFFFD40123D1A, 0xFFFFFFFC40153D22, 0xFFFFFFFB40153D2A # 1852 - 1855 D6 S3 C12 neq + .quad 0x0000000040004c00, 0x0000000040124C81, 0x0000000040124C82, 0xFFFFFFFF40124D0A # 1856 - 1859 D6 S4 C0 neq + .quad 0xfffffffe40124d12, 0xFFFFFFFD40154D1A, 0xFFFFFFFC40154D22, 0xFFFFFFFB40154D2A # 1860 - 1863 D6 S4 C4 neq + .quad 0xfffffffa40154d32, 0xFFFFFFF940154D3A, 0xFFFFFFF840154D42, 0xFFFFFFFF40124D0A # 1864 - 1867 D6 S4 C8 neq + .quad 0xfffffffe40124d12, 0xFFFFFFFD40154D1A, 0xFFFFFFFC40154D22, 0xFFFFFFFB40154D2A # 1868 - 1871 D6 S4 C12 neq + .quad 0x0000000040005c00, 0x0000000040125C81, 0x0000000040125C82, 0xFFFFFFFF40125D0A # 1872 - 1875 D6 S5 C0 neq + .quad 0xfffffffe40155d12, 0xFFFFFFFD40155D1A, 0xFFFFFFFC40155D22, 0xFFFFFFFB40155D2A # 1876 - 1879 D6 S5 C4 neq + .quad 0xfffffffa40155d32, 0xFFFFFFF940155D3A, 0xFFFFFFF840155D42, 0xFFFFFFFF40125D0A # 1880 - 1883 D6 S5 C8 neq + .quad 0xfffffffe40155d12, 0xFFFFFFFD40155D1A, 0xFFFFFFFC40155D22, 0xFFFFFFFB40155D2A # 1884 - 1887 D6 S5 C12 neq + .quad 0x0000000040006c00, 0x0000000040026C81, 0x00000000400A6C82, 0xFFFFFFFF400C6D0A # 1888 - 1891 D6 S6 C0 neq + .quad 0xfffffffe400c6d12, 0xFFFFFFFD400C6D1A, 0xFFFFFFFC400C6D22, 0xFFFFFFFB400C6D2A # 1892 - 1895 D6 S6 C4 neq + .quad 0xfffffffa400c6d32, 0xFFFFFFF9400C6D3A, 0xFFFFFFF8400C6D42, 0xFFFFFFFF400C6D0A # 1896 - 1899 D6 S6 C8 neq + .quad 0xfffffffe400c6d12, 0xFFFFFFFD400C6D1A, 0xFFFFFFFC400C6D22, 0xFFFFFFFB400C6D2A # 1900 - 1903 D6 S6 C12 neq + .quad 0x0000000040007c00, 0x00000000401A7C81, 0x00000000401CFC82, 0xFFFFFFFF401CFD0A # 1904 - 1907 D6 S7 C0 neq + .quad 0xfffffffe401cfd12, 0xFFFFFFFD401CFD1A, 0xFFFFFFFC401CFD22, 0xFFFFFFFB401CFD2A # 1908 - 1911 D6 S7 C4 neq + .quad 0xfffffffa401cfd32, 0xFFFFFFF9401CFD3A, 0xFFFFFFF8401FFD42, 0xFFFFFFFF401CFD0A # 1912 - 1915 D6 S7 C8 neq + .quad 0xfffffffe401cfd12, 0xFFFFFFFD401CFD1A, 0xFFFFFFFC401CFD22, 0xFFFFFFFB401CFD2A # 1916 - 1919 D6 S7 C12 neq + .quad 0x0000000040000e00, 0x0000000040120E81, 0xFFFFFFFF40120F09, 0xFFFFFFFE40120F11 # 1920 - 1923 D7 S0 C0 neq + .quad 0xfffffffd40120f19, 0xFFFFFFFC40120F21, 0xFFFFFFFB40120F29, 0xFFFFFFFA40120F31 # 1924 - 1927 D7 S0 C4 neq + .quad 0xfffffff940120f39, 0xFFFFFFF840150F41, 0xFFFFFFFF40120F09, 0xFFFFFFFE40120F11 # 1928 - 1931 D7 S0 C8 neq + .quad 0xfffffffd40120f19, 0xFFFFFFFC40120F21, 0xFFFFFFFB40120F29, 0xFFFFFFFA40120F31 # 1932 - 1935 D7 S0 C12 neq + .quad 0x0000000040001e00, 0x0000000040121E81, 0xFFFFFFFF40121F09, 0xFFFFFFFE40121F11 # 1936 - 1939 D7 S1 C0 neq + .quad 0xfffffffd40121f19, 0xFFFFFFFC40121F21, 0xFFFFFFFB40121F29, 0xFFFFFFFA40121F31 # 1940 - 1943 D7 S1 C4 neq + .quad 0xfffffff940151f39, 0xFFFFFFF840151F41, 0xFFFFFFFF40121F09, 0xFFFFFFFE40121F11 # 1944 - 1947 D7 S1 C8 neq + .quad 0xfffffffd40121f19, 0xFFFFFFFC40121F21, 0xFFFFFFFB40121F29, 0xFFFFFFFA40121F31 # 1948 - 1951 D7 S1 C12 neq + .quad 0x0000000040002e00, 0x0000000040122E81, 0xFFFFFFFF40122F09, 0xFFFFFFFE40122F11 # 1952 - 1955 D7 S2 C0 neq + .quad 0xfffffffd40122f19, 0xFFFFFFFC40122F21, 0xFFFFFFFB40122F29, 0xFFFFFFFA40152F31 # 1956 - 1959 D7 S2 C4 neq + .quad 0xfffffff940152f39, 0xFFFFFFF840152F41, 0xFFFFFFFF40122F09, 0xFFFFFFFE40122F11 # 1960 - 1963 D7 S2 C8 neq + .quad 0xfffffffd40122f19, 0xFFFFFFFC40122F21, 0xFFFFFFFB40122F29, 0xFFFFFFFA40152F31 # 1964 - 1967 D7 S2 C12 neq + .quad 0x0000000040003e00, 0x0000000040123E81, 0xFFFFFFFF40123F09, 0xFFFFFFFE40123F11 # 1968 - 1971 D7 S3 C0 neq + .quad 0xfffffffd40123f19, 0xFFFFFFFC40123F21, 0xFFFFFFFB40153F29, 0xFFFFFFFA40153F31 # 1972 - 1975 D7 S3 C4 neq + .quad 0xfffffff940153f39, 0xFFFFFFF840153F41, 0xFFFFFFFF40123F09, 0xFFFFFFFE40123F11 # 1976 - 1979 D7 S3 C8 neq + .quad 0xfffffffd40123f19, 0xFFFFFFFC40123F21, 0xFFFFFFFB40153F29, 0xFFFFFFFA40153F31 # 1980 - 1983 D7 S3 C12 neq + .quad 0x0000000040004e00, 0x0000000040124E81, 0xFFFFFFFF40124F09, 0xFFFFFFFE40124F11 # 1984 - 1987 D7 S4 C0 neq + .quad 0xfffffffd40124f19, 0xFFFFFFFC40154F21, 0xFFFFFFFB40154F29, 0xFFFFFFFA40154F31 # 1988 - 1991 D7 S4 C4 neq + .quad 0xfffffff940154f39, 0xFFFFFFF840154F41, 0xFFFFFFFF40124F09, 0xFFFFFFFE40124F11 # 1992 - 1995 D7 S4 C8 neq + .quad 0xfffffffd40124f19, 0xFFFFFFFC40154F21, 0xFFFFFFFB40154F29, 0xFFFFFFFA40154F31 # 1996 - 1999 D7 S4 C12 neq + .quad 0x0000000040005e00, 0x0000000040125E81, 0xFFFFFFFF40125F09, 0xFFFFFFFE40125F11 # 2000 - 2003 D7 S5 C0 neq + .quad 0xfffffffd40155f19, 0xFFFFFFFC40155F21, 0xFFFFFFFB40155F29, 0xFFFFFFFA40155F31 # 2004 - 2007 D7 S5 C4 neq + .quad 0xfffffff940155f39, 0xFFFFFFF840155F41, 0xFFFFFFFF40125F09, 0xFFFFFFFE40125F11 # 2008 - 2011 D7 S5 C8 neq + .quad 0xfffffffd40155f19, 0xFFFFFFFC40155F21, 0xFFFFFFFB40155F29, 0xFFFFFFFA40155F31 # 2012 - 2015 D7 S5 C12 neq + .quad 0x0000000040006e00, 0x0000000040126E81, 0xFFFFFFFF40126F09, 0xFFFFFFFE40156F11 # 2016 - 2019 D7 S6 C0 neq + .quad 0xfffffffd40156f19, 0xFFFFFFFC40156F21, 0xFFFFFFFB40156F29, 0xFFFFFFFA40156F31 # 2020 - 2023 D7 S6 C4 neq + .quad 0xfffffff940156f39, 0xFFFFFFF840156F41, 0xFFFFFFFF40126F09, 0xFFFFFFFE40156F11 # 2024 - 2027 D7 S6 C8 neq + .quad 0xfffffffd40156f19, 0xFFFFFFFC40156F21, 0xFFFFFFFB40156F29, 0xFFFFFFFA40156F31 # 2028 - 2031 D7 S6 C12 neq + .quad 0x0000000040007e00, 0x00000000400A7E81, 0xFFFFFFFF400C7F09, 0xFFFFFFFE400C7F11 # 2032 - 2035 D7 S7 C0 neq + .quad 0xfffffffd400c7f19, 0xFFFFFFFC400C7F21, 0xFFFFFFFB400C7F29, 0xFFFFFFFA400C7F31 # 2036 - 2039 D7 S7 C4 neq + .quad 0xfffffff9400c7f39, 0xFFFFFFF8400C7F41, 0xFFFFFFFF400C7F09, 0xFFFFFFFE400C7F11 # 2040 - 2043 D7 S7 C8 neq + .quad 0xfffffffd400c7f19, 0xFFFFFFFC400C7F21, 0xFFFFFFFB400C7F29, 0xFFFFFFFA400C7F31 # 2044 - 2047 D7 S7 C12 neq diff --git a/emulator-asm/src/dma/fast_inputcpy.asm b/emulator-asm/src/dma/fast_inputcpy.asm new file mode 100644 index 000000000..16b7bab6e --- /dev/null +++ b/emulator-asm/src/dma/fast_inputcpy.asm @@ -0,0 +1,162 @@ +.intel_syntax noprefix +.code64 +.text +.global fast_inputcpy +.type fast_inputcpy, @function +.extern fcall_ctx +.extern MEM_FREE_INPUT + +.include "dma_constants.inc" + +# PARAMS +# rdi = dst +# rdx = n (bytes) +# +# Clobbers: rax, rcx, r9, rsi + +fast_inputcpy: + # load current pointer to input data + mov rcx, qword ptr [fcall_ctx + FCALL_RESULT_GOT * 8] + + # set rsi = fcall_ctx.result[rcx - 1] + lea rsi, [fcall_ctx + rcx * 8 + FCALL_RESULT * 8 - 8] + + # rcx = round_up(rdx/8) + lea rcx, [rdx + 7] + shr rcx, 3 + + # read next value, r9 = fcall_ctx.result[rcx + rdx - 1] + mov r9, [rsi + rcx * 8] + + # save next value to MEM_FREE_INPUT + mov qword ptr [MEM_FREE_INPUT], r9 + + # increase FCALL_RESULT_GOT in rcx + add qword ptr [fcall_ctx + FCALL_RESULT_GOT * 8], rcx + + # rsi = input src + # rdx = input bytes to copy + # rdi = dst + + # return dst + mov rax, rdi + + test rdx, 0xFFFFFFFFFFFFFFF8 + jz .L_fast_inputcpy_count_lt_8 + + # only first could be lt 32x8=256 bytes + movzx ecx, dl + and rdx, 0xFFFFFFFFFFFFFF07 + shr rcx, 3 + + # Jump to entry that leaves exactly q MOVSQ until the end + lea r9, [rip + .L_fast_inputcpy_jump_qword_table] + jmp [r9 + rcx*8] + +.p2align 3 +.L_fast_inputcpy_jump_qword_table: + .quad .L_q0 + .quad .L_q1 + .quad .L_q2 + .quad .L_q3 + .quad .L_q4 + .quad .L_q5 + .quad .L_q6 + .quad .L_q7 + .quad .L_q8 + .quad .L_q9 + .quad .L_q10 + .quad .L_q11 + .quad .L_q12 + .quad .L_q13 + .quad .L_q14 + .quad .L_q15 + .quad .L_q16 + .quad .L_q17 + .quad .L_q18 + .quad .L_q19 + .quad .L_q20 + .quad .L_q21 + .quad .L_q22 + .quad .L_q23 + .quad .L_q24 + .quad .L_q25 + .quad .L_q26 + .quad .L_q27 + .quad .L_q28 + .quad .L_q29 + .quad .L_q30 + .quad .L_q31 + .quad .L_q32 + +# Fallthrough chain: entering at q31 executes 31 STOSQ down to q1 +.L_q32: movsq +.L_q31: movsq +.L_q30: movsq +.L_q29: movsq +.L_q28: movsq +.L_q27: movsq +.L_q26: movsq +.L_q25: movsq +.L_q24: movsq +.L_q23: movsq +.L_q22: movsq +.L_q21: movsq +.L_q20: movsq +.L_q19: movsq +.L_q18: movsq +.L_q17: movsq +.L_q16: movsq +.L_q15: movsq +.L_q14: movsq +.L_q13: movsq +.L_q12: movsq +.L_q11: movsq +.L_q10: movsq +.L_q9: movsq +.L_q8: movsq +.L_q7: movsq +.L_q6: movsq +.L_q5: movsq +.L_q4: movsq +.L_q3: movsq +.L_q2: movsq +.L_q1: movsq +.L_q0: + + # check if remain more 32 x 8 = 256 bytes blocks + + test rdx, 0xFFFFFFFFFFFFFF00 # 0xFFFF_FFFF_FFFF_FF00 + jz .L_fast_inputcpy_count_lt_8 + sub rdx, 256 + jmp .L_q32 + + +.L_fast_inputcpy_count_lt_8: + + # Jump to byte tail entry + lea r9, [rip + .L_fast_inputcpy_jump_byte_table] + jmp [r9 + rdx*8] + +.p2align 3 +.L_fast_inputcpy_jump_byte_table: + .quad .L_b0 + .quad .L_b1 + .quad .L_b2 + .quad .L_b3 + .quad .L_b4 + .quad .L_b5 + .quad .L_b6 + .quad .L_b7 + +.L_b7: movsb +.L_b6: movsb +.L_b5: movsb +.L_b4: movsb +.L_b3: movsb +.L_b2: movsb +.L_b1: movsb +.L_b0: + ret + +.size fast_inputcpy, .-fast_inputcpy diff --git a/emulator-asm/src/dma/fast_memcmp.asm b/emulator-asm/src/dma/fast_memcmp.asm new file mode 100644 index 000000000..de6502447 --- /dev/null +++ b/emulator-asm/src/dma/fast_memcmp.asm @@ -0,0 +1,153 @@ +.intel_syntax noprefix +.code64 + + +###################################################################################### +# fast_memcmp - Optimized comparison of two memory regions. Returns (a - b) of the +# first different byte, or 0 if equal. Also updates rdx with the +# effective count (number of bytes checked to find the difference). +# PARAMETERS +# rdi: addr_a +# rsi: addr_b +# rdx: count (bytes) +# +# RESULT +# rdx: updated with effective count (bytes) +# rax: i64 comparison result of first different bytes (a - b) +# +# CLOBBERED REGS: rcx +# ─────────────────────────────────────────────────────────────────────────────────── +# fast_memcmp_count_nz - Same as fast_memcmp, but assumes count > 0 (caller must +# verify this beforehand). +# PARAMETERS +# rdi: addr_a +# rsi: addr_b +# rdx: count (bytes) +# +# RESULT +# rdx: updated with effective count (bytes) +# rax: i64 comparison result of first different bytes (a - b) +# +# CLOBBERED REGS: rcx +# ─────────────────────────────────────────────────────────────────────────────────── +# get_memcmp_effective_count - Updates rdx with the effective count, which is the +# number of bytes needed to compare a and b. +# PARAMETERS +# rdi: addr_a +# rsi: addr_b +# rdx: count (bytes) +# +# RESULT +# rdx: updated with effective count (bytes) +# +# CLOBBERED REGS: rcx +################################################################################ + +.global fast_memcmp +.global fast_memcmp_count_nz +.global get_memcmp_effective_count + +.section .text + +# PARAMETERS +# rdi: addr_a +# rsi: addr_b +# rdx: count (bytes) +# +# RESULT +# rdx: updated with effective count (bytes) +# rax: i64 comparison result of first different bytes (a - b) +# +# CLOBBERED REGS: rcx + +fast_memcmp: + + mov rcx, rdx # rcx = rdx (count) + test rcx, rcx + jz .L_dma_memcmp_zero + + repe cmpsb # Compare byte-by-byte; on mismatch, increments + # rdi, rsi and decrements rcx + + jz .L_fast_memcmp_eq # Jump if all bytes were equal + sub rdx, rcx # rdx = rdx - rcx (*) + movzx rax, byte ptr [rdi - 1] # rax = a (zero-extended) + movzx rcx, byte ptr [rsi - 1] # rcx = b (zero-extended) + sub rax, rcx # rax = a - b + sub rdi, rdx # restore rdi + sub rsi, rdx # restore rsi + ret + +.L_fast_memcmp_eq: + xor rax, rax # rax = 0 + sub rdi, rdx # restore rdi + sub rsi, rdx # restore rsi + ret + +.L_dma_memcmp_zero: + xor rax, rax # rax = 0 + ret + +# PARAMETERS +# rdi: addr_a +# rsi: addr_b +# rdx: count (bytes), must be > 0 +# +# RESULT +# rdx: updated with effective count (bytes) +# rax: i64 comparison result of first different bytes (a - b) +# +# CLOBBERED REGS: rcx + +fast_memcmp_count_nz: + + mov rcx, rdx # rcx = rdx (count) + repe cmpsb # Compare byte-by-byte; on mismatch, increments + # rdi, rsi and decrements rcx + + jz .L_fast_memcmp_count_nz_eq # Jump if all bytes were equal + + sub rdx, rcx # rdx = rdx - rcx (*) + movzx rax, byte ptr [rdi - 1] # rax = a (zero-extended) + movzx rcx, byte ptr [rsi - 1] # rcx = b (zero-extended) + sub rax, rcx # rax = a - b + add rdi, rdx # restore rdi + add rsi, rdx # restore rsi + ret + +.L_fast_memcmp_count_nz_eq: + xor rax, rax # rax = 0 (buffers are equal) + add rdi, rdx # restore rdi + add rsi, rdx # restore rsi + ret + +# PARAMETERS +# rdi: addr_a +# rsi: addr_b +# rdx: count (bytes) +# +# RESULT +# rdx: updated with effective count (bytes) +# +# CLOBBERED REGS: rcx + +get_memcmp_effective_count: + + mov rcx, rdx # rcx = rdx (count) + repe cmpsb # Compare byte-by-byte; on mismatch, increments + # rdi, rsi and decrements rcx + + jz .L_get_memcmp_effective_count_eq # Jump if all bytes were equal + + sub rdx, rcx # rdx = rdx - rcx (*) + add rdi, rdx # restore rdi + add rsi, rdx # restore rsi + ret + +.L_get_memcmp_effective_count_eq: + add rdi, rdx # restore rdi + add rsi, rdx # restore rsi + ret + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/fast_memcpy.asm b/emulator-asm/src/dma/fast_memcpy.asm new file mode 100644 index 000000000..87d5a16ef --- /dev/null +++ b/emulator-asm/src/dma/fast_memcpy.asm @@ -0,0 +1,133 @@ +.intel_syntax noprefix +.code64 +.text +.global fast_memcpy +.type fast_memcpy, @function + +# void fast_memcpy(void* dst, void *src, size_t n) +# rdi = dst +# rsi = src +# rdx = n (bytes) +# +# Clobbers: rax, rcx, r9 + +fast_memcpy: + test rdx, 0xFFFFFFFFFFFFFFF8 + jz .L_fast_memcpy_count_lt_8 + + # only first could be lt 32x8=256 bytes + movzx ecx, dl + and rdx, 0xFFFFFFFFFFFFFF07 + shr rcx, 3 + + # Jump to entry that leaves exactly q MOVSQ until the end + lea r9, [rip + .L_fast_memcpy_jump_qword_table] + jmp [r9 + rcx*8] + +.p2align 3 +.L_fast_memcpy_jump_qword_table: + .quad .L_q0 + .quad .L_q1 + .quad .L_q2 + .quad .L_q3 + .quad .L_q4 + .quad .L_q5 + .quad .L_q6 + .quad .L_q7 + .quad .L_q8 + .quad .L_q9 + .quad .L_q10 + .quad .L_q11 + .quad .L_q12 + .quad .L_q13 + .quad .L_q14 + .quad .L_q15 + .quad .L_q16 + .quad .L_q17 + .quad .L_q18 + .quad .L_q19 + .quad .L_q20 + .quad .L_q21 + .quad .L_q22 + .quad .L_q23 + .quad .L_q24 + .quad .L_q25 + .quad .L_q26 + .quad .L_q27 + .quad .L_q28 + .quad .L_q29 + .quad .L_q30 + .quad .L_q31 + .quad .L_q32 + +# Fallthrough chain: entering at q31 executes 31 STOSQ down to q1 +.L_q32: movsq +.L_q31: movsq +.L_q30: movsq +.L_q29: movsq +.L_q28: movsq +.L_q27: movsq +.L_q26: movsq +.L_q25: movsq +.L_q24: movsq +.L_q23: movsq +.L_q22: movsq +.L_q21: movsq +.L_q20: movsq +.L_q19: movsq +.L_q18: movsq +.L_q17: movsq +.L_q16: movsq +.L_q15: movsq +.L_q14: movsq +.L_q13: movsq +.L_q12: movsq +.L_q11: movsq +.L_q10: movsq +.L_q9: movsq +.L_q8: movsq +.L_q7: movsq +.L_q6: movsq +.L_q5: movsq +.L_q4: movsq +.L_q3: movsq +.L_q2: movsq +.L_q1: movsq +.L_q0: + + # check if remain more 32 x 8 = 256 bytes blocks + + test rdx, 0xFFFFFFFFFFFFFF00 # 0xFFFF_FFFF_FFFF_FF00 + jz .L_fast_memcpy_count_lt_8 + sub rdx, 256 + jmp .L_q32 + + +.L_fast_memcpy_count_lt_8: + + # Jump to byte tail entry + lea r9, [rip + .L_fast_memcpy_jump_byte_table] + jmp [r9 + rdx*8] + +.p2align 3 +.L_fast_memcpy_jump_byte_table: + .quad .L_b0 + .quad .L_b1 + .quad .L_b2 + .quad .L_b3 + .quad .L_b4 + .quad .L_b5 + .quad .L_b6 + .quad .L_b7 + +.L_b7: movsb +.L_b6: movsb +.L_b5: movsb +.L_b4: movsb +.L_b3: movsb +.L_b2: movsb +.L_b1: movsb +.L_b0: + ret + +.size fast_memcpy, .-fast_memcpy diff --git a/emulator-asm/src/dma/fast_memcpy64.asm b/emulator-asm/src/dma/fast_memcpy64.asm new file mode 100644 index 000000000..264340c5f --- /dev/null +++ b/emulator-asm/src/dma/fast_memcpy64.asm @@ -0,0 +1,111 @@ +.intel_syntax noprefix +.code64 +.text +.global fast_memcpy64 +.type fast_memcpy64, @function + +# void fast_memcpy64(uint64_t* dst, uint64_t *v, size_t n_qwords) +# rdi = dst +# rsi = src +# rdx = n (QWORDS) +# +# Clobbers: rax, rcx, r9 + +fast_memcpy64: + + # only first could be lt 32x8=256 bytes + # 256 bytes => 32 qwords + test rdx, 0x1F + jz .L_fast_memcpy64_mul256 + + mov rcx, rdx + and rdx, 0xFFFFFFFFFFFFFFE0 + sub rcx, rdx + + # Jump to entry that leaves exactly q MOVSQ until the end + lea r9, [rip + .L_fast_memcpy64_jump_qword_table] + jmp [r9 + rcx*8] + +.p2align 3 +.L_fast_memcpy64_jump_qword_table: + .quad .L_q0 + .quad .L_q1 + .quad .L_q2 + .quad .L_q3 + .quad .L_q4 + .quad .L_q5 + .quad .L_q6 + .quad .L_q7 + .quad .L_q8 + .quad .L_q9 + .quad .L_q10 + .quad .L_q11 + .quad .L_q12 + .quad .L_q13 + .quad .L_q14 + .quad .L_q15 + .quad .L_q16 + .quad .L_q17 + .quad .L_q18 + .quad .L_q19 + .quad .L_q20 + .quad .L_q21 + .quad .L_q22 + .quad .L_q23 + .quad .L_q24 + .quad .L_q25 + .quad .L_q26 + .quad .L_q27 + .quad .L_q28 + .quad .L_q29 + .quad .L_q30 + .quad .L_q31 + .quad .L_q32 + +# Fallthrough chain: entering at q31 executes 31 STOSQ down to q1 +.L_q32: movsq +.L_q31: movsq +.L_q30: movsq +.L_q29: movsq +.L_q28: movsq +.L_q27: movsq +.L_q26: movsq +.L_q25: movsq +.L_q24: movsq +.L_q23: movsq +.L_q22: movsq +.L_q21: movsq +.L_q20: movsq +.L_q19: movsq +.L_q18: movsq +.L_q17: movsq +.L_q16: movsq +.L_q15: movsq +.L_q14: movsq +.L_q13: movsq +.L_q12: movsq +.L_q11: movsq +.L_q10: movsq +.L_q9: movsq +.L_q8: movsq +.L_q7: movsq +.L_q6: movsq +.L_q5: movsq +.L_q4: movsq +.L_q3: movsq +.L_q2: movsq +.L_q1: movsq +.L_q0: + + # check if remain more 32 x 8 = 256 bytes blocks + +.L_fast_memcpy64_mul256: + test rdx, 0xFFFFFFFFFFFFFFE0 # 0xFFFF_FFFF_FFFF_FF00 + jz .L_fast_memcpy64_done + sub rdx, 32 + jmp .L_q32 + +.L_fast_memcpy64_done: + ret + +.size fast_memcpy64, .-fast_memcpy64 diff --git a/emulator-asm/src/dma/fast_memset.asm b/emulator-asm/src/dma/fast_memset.asm new file mode 100644 index 000000000..46cfd6616 --- /dev/null +++ b/emulator-asm/src/dma/fast_memset.asm @@ -0,0 +1,141 @@ +.intel_syntax noprefix +.code64 +.text +.global fast_memset +.type fast_memset, @function + +# void fast_memset(void* dst, uint8_t v, size_t n) +# rdi = dst +# rsi = v (only low 8 bits used) +# rdx = n (bytes) +# +# Clobbers: rax, rcx, r9 + +fast_memset: + movzx eax, sil + mov rsi, rdi + + test rdx, 0xFFFFFFFFFFFFFFF8 + jz .L_fast_memset_count_lt_8 + + # Build 64-bit pattern 0xvvvvvvvvvvvvvvvv in RAX (needed for all paths) + mov r9, 0x0101010101010101 + imul rax, r9 # rax = v * 0x0101010101010101 + + # only first could be lt 32x8=256 bytes + movzx ecx, dl + and rdx, 0xFFFFFFFFFFFFFF07 + + # Jump to entry that leaves exactly q STOSQ until the end + shr ecx, 3 + lea r9, [rip + .L_fast_memcpy_jump_qword_table] + jmp [r9 + rcx*8] + +.p2align 3 +.L_fast_memcpy_jump_qword_table: + .quad .L_q0 + .quad .L_q1 + .quad .L_q2 + .quad .L_q3 + .quad .L_q4 + .quad .L_q5 + .quad .L_q6 + .quad .L_q7 + .quad .L_q8 + .quad .L_q9 + .quad .L_q10 + .quad .L_q11 + .quad .L_q12 + .quad .L_q13 + .quad .L_q14 + .quad .L_q15 + .quad .L_q16 + .quad .L_q17 + .quad .L_q18 + .quad .L_q19 + .quad .L_q20 + .quad .L_q21 + .quad .L_q22 + .quad .L_q23 + .quad .L_q24 + .quad .L_q25 + .quad .L_q26 + .quad .L_q27 + .quad .L_q28 + .quad .L_q29 + .quad .L_q30 + .quad .L_q31 + .quad .L_q32 + +# Fallthrough chain: entering at q31 executes 31 STOSQ down to q1 +.L_q32: stosq +.L_q31: stosq +.L_q30: stosq +.L_q29: stosq +.L_q28: stosq +.L_q27: stosq +.L_q26: stosq +.L_q25: stosq +.L_q24: stosq +.L_q23: stosq +.L_q22: stosq +.L_q21: stosq +.L_q20: stosq +.L_q19: stosq +.L_q18: stosq +.L_q17: stosq +.L_q16: stosq +.L_q15: stosq +.L_q14: stosq +.L_q13: stosq +.L_q12: stosq +.L_q11: stosq +.L_q10: stosq +.L_q9: stosq +.L_q8: stosq +.L_q7: stosq +.L_q6: stosq +.L_q5: stosq +.L_q4: stosq +.L_q3: stosq +.L_q2: stosq +.L_q1: stosq +.L_q0: + + # check if remain more 32 x 8 = 256 bytes blocks + + test rdx, 0xFFFFFFFFFFFFFF00 # 0xFFFF_FFFF_FFFF_FF00 + jz .L_fast_memset_count_lt_8 + sub rdx, 256 + jmp .L_q32 + + +.L_fast_memset_count_lt_8: + + # Jump to byte tail entry + lea r9, [rip + .L_fast_memset_jump_byte_table] + jmp [r9 + rdx*8] + +.p2align 3 +.L_fast_memset_jump_byte_table: + .quad .L_b0 + .quad .L_b1 + .quad .L_b2 + .quad .L_b3 + .quad .L_b4 + .quad .L_b5 + .quad .L_b6 + .quad .L_b7 + +.L_b7: stosb +.L_b6: stosb +.L_b5: stosb +.L_b4: stosb +.L_b3: stosb +.L_b2: stosb +.L_b1: stosb +.L_b0: + mov rax, rsi + ret + +.size fast_memset, .-fast_memset diff --git a/emulator-asm/src/dma/memcpy_fast.asm b/emulator-asm/src/dma/memcpy_fast.asm new file mode 100644 index 000000000..0ad97dae1 --- /dev/null +++ b/emulator-asm/src/dma/memcpy_fast.asm @@ -0,0 +1,97 @@ +.intel_syntax noprefix +.code64 + +################################################################################ +# dma_memcpy_fast - Optimized memcpy using rep movsq (no tracing) +# +# Fast memory copy function optimized for performance using hardware-accelerated +# instructions. Handles overlapping memory regions correctly (like memmove). +# +# PARAMETERS (System V AMD64 ABI): +# rdi = dst (u64) - Destination address +# rsi = src (u64) - Source address +# rdx = count (usize) - Number of bytes to copy +# +# RETURN VALUE: None +# +# STRATEGY: +# For non-overlapping regions: +# 1. Copy pre_count unaligned bytes (0-7 bytes to reach alignment) +# 2. Copy aligned qwords using rep movsq (~1-2 cycles/qword) +# 3. Copy post_count remaining bytes (0-7 bytes) +# +# For overlapping regions (dst between src and src+count): +# - Copy backward byte-by-byte using rep movsb with std flag +# +# PERFORMANCE: ~10-20 cycles overhead + ~1-2 cycles per qword +# +# REGISTERS USED: rax, rcx, rdi, rsi, rdx, r8, r9 +################################################################################ + +.global dma_memcpy_fast + +.section .text + +dma_memcpy_fast: + # Check if count is 0 + test rdx, rdx # 1 cycle + jz .L_fast_done # nothing to copy + + # Save original values + mov r8, rdi # r8 = dst + mov r9, rsi # r9 = src + + # Check for overlap: if dst < src or dst >= src+count, no overlap + lea rax, [rsi + rdx] # rax = src + count + cmp rdi, rsi # compare dst with src + jb .L_fast_forward # dst < src, copy forward + cmp rdi, rax # compare dst with src+count + jae .L_fast_forward # dst >= src+count, no overlap + + # Overlap detected: copy backward + lea rsi, [r9 + rdx - 1] # rsi = src + count - 1 (use r9, original src) + lea rdi, [r8 + rdx - 1] # rdi = dst + count - 1 (use r8, original dst) + mov rcx, rdx # rcx = count + std # set direction flag (backward) + rep movsb # copy backward + cld # clear direction flag + jmp .L_fast_done + +.L_fast_forward: + # No overlap: optimized 3-phase copy + # Calculate dst_offset and pre_count + mov rax, r8 # rax = dst + and rax, 0x07 # rax = dst_offset + test rax, rax # check if already aligned + jz .L_fast_aligned # skip pre-copy if aligned + + # Copy pre_count bytes to align dst + mov rcx, 8 # rcx = 8 + sub rcx, rax # rcx = 8 - dst_offset = pre_count + cmp rcx, rdx # check if pre_count > count + jbe .L_fast_pre_ok # pre_count <= count + mov rcx, rdx # pre_count = count (copy all) +.L_fast_pre_ok: + sub rdx, rcx # count -= pre_count + rep movsb # copy pre_count bytes + # rsi and rdi are now advanced and rdi is aligned + +.L_fast_aligned: + # Copy aligned qwords using rep movsq + mov rcx, rdx # rcx = remaining count + shr rcx, 3 # rcx = count / 8 (qword count) + jz .L_fast_post # skip if no qwords to copy + rep movsq # copy qwords (~1-2 cycles each) + +.L_fast_post: + # Copy remaining bytes (0-7) + mov rcx, rdx # rcx = original count + and rcx, 0x07 # rcx = count % 8 (post_count) + jz .L_fast_done # skip if no remaining bytes + rep movsb # copy remaining bytes + +.L_fast_done: + ret + +# Mark stack as non-executable (required by modern linkers) +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/dma/test/Makefile b/emulator-asm/src/dma/test/Makefile new file mode 100644 index 000000000..c421ce776 --- /dev/null +++ b/emulator-asm/src/dma/test/Makefile @@ -0,0 +1,61 @@ +# Makefile for encode_memcpy_inline test + +CXX = g++ +CXXFLAGS = -std=c++17 -O0 -Wall -Wextra -g -no-pie -I../../../../state-machines/mem-cpp/cpp +LDFLAGS = -no-pie +ASMFLAGS = -g --noexecstack -I$(CURDIR) -I.. +BDIR = build + +# Output directory for object files (current directory) +OBJDIR = . + +SRCS_ASM = ../direct_memcpy_mtrace.asm ../memcpy_fast.asm ../fast_memcpy.asm ../direct_memcpy_mops.asm \ + ../direct_memset_mops.asm ../direct_memset_mtrace.asm ../fast_memset.asm ../fast_memcpy64.asm \ + ../direct_inputcpy_mops.asm ../direct_inputcpy_mtrace.asm ../fast_dma_encode.asm \ + ../check_dynamic_mtrace.asm ../direct_memcmp_mops.asm ../direct_memcmp_mtrace.asm \ + ../fast_memcmp.asm ../fast_inputcpy.asm ../test_dma_api.asm -- + +SRCS_CPP = test_dma.cpp test_dma_encode.cpp test_dma_tools.cpp test_mock.cpp \ + test_dma_memcmp_mtrace.cpp test_dma_memcmp_mops.cpp \ + test_dma_memcpy_mtrace.cpp test_dma_memcpy_mops.cpp \ + test_dma_memset_mtrace.cpp test_dma_memset_mops.cpp \ + test_dma_inputcpy_mtrace.cpp test_dma_inputcpy_mops.cpp \ + test_dma_mem.cpp test_dma_mem_mtrace.cpp test_dma_mem_mops.cpp + + + +OBJS_ASM = $(BDIR)/direct_memcpy_mtrace.o $(BDIR)/memcpy_fast.o $(BDIR)/fast_memcpy.o \ + $(BDIR)/direct_memcpy_mops.o $(BDIR)/direct_memset_mops.o $(BDIR)/direct_memset_mtrace.o \ + $(BDIR)/fast_memset.o $(BDIR)/fast_memcpy64.o $(BDIR)/direct_inputcpy_mops.o \ + $(BDIR)/direct_inputcpy_mtrace.o $(BDIR)/fast_dma_encode.o $(BDIR)/check_dynamic_mtrace.o \ + $(BDIR)/direct_memcmp_mops.o $(BDIR)/direct_memcmp_mtrace.o $(BDIR)/fast_memcmp.o \ + $(BDIR)/fast_inputcpy.o $(BDIR)/test_dma_api.o + +OBJS = $(OBJS_ASM) test_dma.o test_dma_encode.o test_dma_tools.o test_mock.o \ + test_dma_memcmp_mtrace.o test_dma_memcmp_mops.o \ + test_dma_memcpy_mtrace.o test_dma_memcpy_mops.o \ + test_dma_memset_mtrace.o test_dma_memset_mops.o \ + test_dma_inputcpy_mtrace.o test_dma_inputcpy_mops.o \ + test_dma_mem.o test_dma_mem_mtrace.o test_dma_mem_mops.o + +TARGET = test_dma + +all: $(TARGET) + +# Rule to compile assembly files from parent directory into current directory +$(BDIR)/%.o: ../%.asm + as $(ASMFLAGS) -o $@ $< + +%.o: %.cpp + $(CXX) $(CXXFLAGS) -c -o $@ $< + +$(TARGET): $(OBJS) + $(CXX) $(LDFLAGS) -o $@ $^ + +test: $(TARGET) + ./$(TARGET) + +clean: + rm -f *.o $(TARGET) $(OBJS_ASM) + +.PHONY: all test clean diff --git a/emulator-asm/src/dma/test/test_dma.cpp b/emulator-asm/src/dma/test/test_dma.cpp new file mode 100644 index 000000000..782ddb7be --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma.cpp @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "test_dma_encode.hpp" +#include "test_dma_tools.hpp" +#include "test_mock.hpp" +#include "test_dma_memcmp_mops.hpp" +#include "test_dma_memcpy_mops.hpp" +#include "test_dma_memset_mops.hpp" +#include "test_dma_inputcpy_mops.hpp" +#include "test_dma_memcmp_mtrace.hpp" +#include "test_dma_memcpy_mtrace.hpp" +#include "test_dma_memset_mtrace.hpp" +#include "test_dma_inputcpy_mtrace.hpp" + +int main () { + test_dma_inputcpy_mops(); + test_dma_inputcpy_mtrace(); + test_dma_memcpy_mops(); + test_dma_memcmp_mops(); + test_dma_memset_mops(); + test_dma_memcpy_mtrace(); + test_dma_memcmp_mtrace(); + test_dma_memset_mtrace(); +} + + diff --git a/emulator-asm/src/dma/test/test_dma_encode.cpp b/emulator-asm/src/dma/test/test_dma_encode.cpp new file mode 100644 index 000000000..6b84a5dd5 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_encode.cpp @@ -0,0 +1,209 @@ +#include +#include +#include +#include +#include +#include + +#include "test_dma_encode.hpp" + +struct EncodeInfo { + const char *title; + uint64_t mask; + uint64_t rs_bits; +}; + +EncodeInfo encode_info[] = { + {"pre_count", DMA_PRE_COUNT_TEST_MASK, 0}, + {"post_count", DMA_POST_COUNT_TEST_MASK, DMA_POST_COUNT_RS}, + {"pre_writes", DMA_PRE_WRITES_TEST_MASK, DMA_PRE_WRITES_RS}, + {"dst_offset", DMA_DST_OFFSET_TEST_MASK, DMA_DST_OFFSET_RS}, + {"src_offset", DMA_SRC_OFFSET_TEST_MASK, DMA_SRC_OFFSET_RS}, + {"double_src_pre", DMA_DOUBLE_SRC_PRE_TEST_MASK, DMA_DOUBLE_SRC_PRE_RS}, + {"double_src_post", DMA_DOUBLE_SRC_POST_TEST_MASK, DMA_DOUBLE_SRC_POST_RS}, + {"extra_src_reads", DMA_EXTRA_SRC_READS_TEST_MASK, DMA_EXTRA_SRC_READS_RS}, + {"src64_inc_by_pre", DMA_SRC64_INC_BY_PRE_TEST_MASK, DMA_SRC64_INC_BY_PRE_RS}, + {"unaligned_dst_src", DMA_UNALIGNED_DST_SRC_TEST_MASK, DMA_UNALIGNED_DST_SRC_RS}, + {"fill_byte_cmp_negative", DMA_FILL_BYTE_CMD_RES_TEST_MASK, DMA_FILL_BYTE_RS}, + {"requires_dma", DMA_REQUIRES_DMA_TEST_MASK, DMA_REQUIRES_DMA_RS}, + {"lpre_count", DMA_LPRE_COUNT_TEST_MASK, DMA_LPRE_COUNT_RS}, + {"loop_count", DMA_LOOP_COUNT_TEST_MASK, DMA_LOOP_COUNT_RS}, + {"", 0, 0} +}; + + +// #bits bits +// pre_count: 0-7 3 0-2 +// post_count: 0-8(*) 4 3-6 (*) memcmp +// pre_writes: 0,1,2 2 7-8 +// dst_offset: 0-7 3 9-11 +// src_offset: 0-7 3 12-14 +// double_src_pre: 0,1 1 15 +// double_src_post: 0,1 1 16 +// extra_src_reads: 0-3 2 17-18 +// src64_inc_by_pre: 1 19 +// unaligned_dst_src: 1 20 +// fill_byte/cmp: 8 21-28 +// cmp_negative: 1 29 +// requires_dma: 1 30 +// (reserved) 1 31 +// lpre_count 3 32-34 +// loop_count 29 35-63 + + +uint64_t calculate_encode_memcmp(uint64_t dst, uint64_t src, size_t count, int result) { + return calculate_encode(dst, src, count, result != 0, true) | DMA_REQUIRES_DMA_TEST_MASK | ((result & DMA_FILL_BITS9_MASK) << DMA_FILL_BYTE_RS); +} + +uint64_t calculate_encode_memset(uint64_t dst, size_t count, uint64_t byte) { + return calculate_encode(dst, 0, count, false, false) | ((byte & DMA_FILL_BYTE_MASK) << DMA_FILL_BYTE_RS); +} + +uint64_t calculate_encode_inputcpy(uint64_t dst, size_t count) { + return calculate_encode(dst, 0, count, false, false); +} + +uint64_t calculate_encode(uint64_t dst, uint64_t src, size_t count, bool neq, bool has_src) { + + uint64_t dst_offset = dst & 0x07; + uint64_t src_offset = src & 0x07; + + uint64_t pre_count = 0; + uint64_t loop_count = 0; + uint64_t post_count = 0; + + if (dst_offset > 0) { + pre_count = 8 - dst_offset; + if (pre_count >= count) { + pre_count = count; + } else { + uint64_t pending = count - pre_count; + loop_count = pending >> 3; + post_count = pending & 0x7; + } + } else { + loop_count = count >> 3; + post_count = count & 0x07; + } + + uint64_t pre_writes = (pre_count > 0) + (post_count > 0); + // uint64_T to_src_offset = (src + count - 1) & 0x07; + uint64_t src_offset_pos = (src_offset + pre_count) & 0x07; + uint64_t double_src_post = (src_offset_pos + post_count) > 8; + uint64_t double_src_pre = (src_offset + pre_count) > 8; + uint64_t extra_src_reads = count == 0 ? 0 : ((((src + count - 1) >> 3) - (src >> 3) + 1) - loop_count); + + uint64_t src64_inc_by_pre = (pre_count > 0 && (src_offset + pre_count) >= 8); + uint64_t unaligned_dst_src = count > 0 && src_offset != dst_offset; + + if (neq && post_count == 0 && loop_count > 0) { + // (dst + count) 0x07 == 7 ==> (dst_offset + count) 0x07 == 7 ==> post_count == 0 + // loop = loop - 1 + // pre_writes = pre_writes + 1 + // post = 8 + // double_src_post = unaligned_dst_src ? 1:0; + // extra_src_reads = extra_src_read + 1 + loop_count -= 1; + pre_writes += 1; + post_count = 8; + double_src_post = src_offset != dst_offset; + extra_src_reads += 1; + } + uint64_t requires_dma = count == 0 || pre_count != 0 || post_count != 0; + if (has_src) { + return pre_count + | (post_count << DMA_POST_COUNT_RS) + | (pre_writes << DMA_PRE_WRITES_RS) + | (dst_offset << DMA_DST_OFFSET_RS) + | (src_offset << DMA_SRC_OFFSET_RS) + | (double_src_pre << DMA_DOUBLE_SRC_PRE_RS) + | (double_src_post << DMA_DOUBLE_SRC_POST_RS) + | (extra_src_reads << DMA_EXTRA_SRC_READS_RS) + | (src64_inc_by_pre << DMA_SRC64_INC_BY_PRE_RS) + | (unaligned_dst_src << DMA_UNALIGNED_DST_SRC_RS) + | (pre_count << DMA_LPRE_COUNT_RS) // optimization to read loop_count * 8 + pre_count + | (loop_count << DMA_LOOP_COUNT_RS) + | (requires_dma << DMA_REQUIRES_DMA_RS); + } + return + pre_count + | (post_count << DMA_POST_COUNT_RS) + | (pre_writes << DMA_PRE_WRITES_RS) + | (dst_offset << DMA_DST_OFFSET_RS) + | (pre_count << DMA_LPRE_COUNT_RS) // optimization to read loop_count * 8 + pre_count + | (loop_count << DMA_LOOP_COUNT_RS) + | (requires_dma << DMA_REQUIRES_DMA_RS); +} + +void basic_print_encode_mismatch(uint64_t expected, uint64_t found) { + static const char *_hexdigits = "0123456789ABCDF"; + char _expected[256]; + char _found[256]; + size_t _iexpected = 0; + size_t _ifound = 0; + for (size_t i_digit=0; i_digit<16; ++i_digit) { + uint8_t byte_expected = (expected >> (60 - 4 * i_digit)) & 0x0F; + uint8_t byte_found = (found >> (60 - 4 * i_digit)) & 0x0F; + if (i_digit && (i_digit % 4) == 0) { + _expected[_iexpected] = '_'; + _found[_ifound] = '_'; + ++_ifound; + ++_iexpected; + } + if (byte_expected != byte_found) { + strcpy(_expected + _iexpected, "\x1B[1;31m"); + strcpy(_found + _ifound, "\x1B[1;31m"); + _ifound += 7; + _iexpected += 7; + } + _expected[_iexpected] = _hexdigits[byte_expected]; + _found[_ifound] = _hexdigits[byte_found]; + ++_ifound; + ++_iexpected; + if (byte_expected != byte_found) { + strcpy(_expected + _iexpected, "\x1B[0m"); + strcpy(_found + _ifound, "\x1B[0m"); + _ifound += 4; + _iexpected += 4; + } + } + _expected[_iexpected] = '\0'; + _found[_ifound] = '\0'; + printf("expected:%s\n found:%s\n", _expected, _found); +} + + +void print_encode_mismatch(uint64_t expected, uint64_t found) { + static const char *_hexdigits = "0123456789ABCDF"; + std::stringstream s_expected; + std::stringstream s_found; + for (size_t i_digit=0; i_digit<16; ++i_digit) { + uint8_t byte_expected = (expected >> (60 - 4 * i_digit)) & 0x0F; + uint8_t byte_found = (found >> (60 - 4 * i_digit)) & 0x0F; + if (i_digit && (i_digit % 4) == 0) { + s_expected << '_'; + s_found << '_'; + } + if (byte_expected != byte_found) { + s_expected << "\x1B[1;31m"; + s_found << "\x1B[1;31m"; + } + s_expected << _hexdigits[byte_expected]; + s_found << _hexdigits[byte_found]; + if (byte_expected != byte_found) { + s_expected << "\x1B[0m"; + s_found << "\x1B[0m"; + } + } + size_t i_group = 0; + while (encode_info[i_group].title[0]) { + uint64_t g_expected = (expected & encode_info[i_group].mask); + uint64_t g_found = (found & encode_info[i_group].mask); + if (g_expected != g_found) { + s_expected << " " << encode_info[i_group].title << ":" << (g_expected >> encode_info[i_group].rs_bits); + s_found << " " << encode_info[i_group].title << ":" << (g_found >> encode_info[i_group].rs_bits); + } + ++i_group; + } + printf("expected:%s\n found:%s\n", s_expected.str().c_str(), s_found.str().c_str()); +} diff --git a/emulator-asm/src/dma/test/test_dma_encode.hpp b/emulator-asm/src/dma/test/test_dma_encode.hpp new file mode 100644 index 000000000..38d364822 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_encode.hpp @@ -0,0 +1,69 @@ +#ifndef __DMA_ENCODE__HPP_ +#define __DMA_ENCODE__HPP_ + +#include +#include +#include + +#define DMA_PRE_COUNT_TEST_MASK 0x07 +#define DMA_PRE_COUNT_MASK 0x07 +#define DMA_POST_COUNT_RS 3 +#define DMA_POST_COUNT_TEST_MASK 0x78 +#define DMA_POST_COUNT_MASK 0x0F +#define DMA_PRE_WRITES_RS 7 +#define DMA_PRE_WRITES_TEST_MASK 0x180 +#define DMA_PRE_WRITES_MASK 0x003 +#define DMA_DST_OFFSET_RS 9 +#define DMA_DST_OFFSET_TEST_MASK 0x0E00 +#define DMA_DST_OFFSET_MASK 0x007 +#define DMA_SRC_OFFSET_RS 12 +#define DMA_SRC_OFFSET_TEST_MASK 0x70000 +#define DMA_SRC_OFFSET_MASK 0x007 +#define DMA_DOUBLE_SRC_PRE_RS 15 +#define DMA_DOUBLE_SRC_PRE_TEST_MASK 0x08000 +#define DMA_DOUBLE_SRC_POST_RS 16 +#define DMA_DOUBLE_SRC_POST_TEST_MASK 0x10000 +#define DMA_EXTRA_SRC_READS_RS 17 +#define DMA_EXTRA_SRC_READS_TEST_MASK 0x60000 +#define DMA_EXTRA_SRC_READS_MASK 0x00003 +#define DMA_SRC64_INC_BY_PRE_RS 19 +#define DMA_SRC64_INC_BY_PRE_TEST_MASK 0x80000 +#define DMA_UNALIGNED_DST_SRC_RS 20 +#define DMA_UNALIGNED_DST_SRC_TEST_MASK 0x100000 +#define DMA_FILL_BYTE_RS 21 +#define DMA_FILL_BYTE_TEST_MASK 0x1FE00000 +#define DMA_FILL_BYTE_CMD_RES_TEST_MASK 0x3FE00000 +#define DMA_FILL_BYTE_MASK 0x000000FF +#define DMA_FILL_BITS9_MASK 0x000001FF +#define DMA_FILL_BYTE_SIGN_TEST_MASK 0x20000000 +#define DMA_LPRE_COUNT_RS 32 +#define DMA_LPRE_COUNT_TEST_MASK 0x700000000 +#define DMA_LPRE_COUNT_MASK 0x00000007 +#define DMA_REQUIRES_DMA_RS 30 +#define DMA_REQUIRES_DMA_TEST_MASK 0x40000000 +#define DMA_REQUIRES_DMA_MASK 0x00000001 +#define DMA_LOOP_COUNT_TEST_MASK 0xFFFFFFF800000000 + +#define DMA_PRE_OR_POST_TEST_MASK (DMA_PRE_COUNT_TEST_MASK | DMA_POST_COUNT_TEST_MASK) +#define DMA_LOOP_COUNT_RS 35 +#define DMA_FULL_ALIGNED_MASK (DMA_PRE_COUNT_TEST_MASK \ + | DMA_POST_COUNT_TEST_MASK \ + | DMA_PRE_WRITES_TEST_MASK \ + | DMA_DST_OFFSET_TEST_MASK \ + | DMA_SRC_OFFSET_TEST_MASK \ + | DMA_DOUBLE_SRC_PRE_TEST_MASK \ + | DMA_DOUBLE_SRC_POST_TEST_MASK \ + | DMA_EXTRA_SRC_READS_TEST_MASK \ + | DMA_SRC64_INC_BY_PRE_TEST_MASK \ + | DMA_UNALIGNED_DST_SRC_TEST_MASK) + +#define DMA_DIRECT_MASK (DMA_FULL_ALIGNED_MASK | DMA_REQUIRES_DMA_TEST_MASK) + +uint64_t calculate_encode(uint64_t dst, uint64_t src, size_t count, bool neq = false, bool has_src = true); +uint64_t calculate_encode_memset(uint64_t dst, size_t count, uint64_t byte); +uint64_t calculate_encode_memcmp(uint64_t dst, uint64_t src, size_t count, int result = 0); +uint64_t calculate_encode_inputcpy(uint64_t dst, size_t count); +void print_encode_mismatch(uint64_t expected, uint64_t found); + + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_inputcpy_mops.cpp b/emulator-asm/src/dma/test/test_dma_inputcpy_mops.cpp new file mode 100644 index 000000000..e0e697d9b --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_inputcpy_mops.cpp @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mops.hpp" +#include "test_dma_inputcpy_mops.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" +#include "test_mock.hpp" + +extern "C" { + uint64_t test_asm_dma_inputcpy_mops(uint8_t *dst, uint8_t *src, size_t count, uint64_t *trace); +} + +class TestDmaInputCpyMops: public TestDmaMemMops { +protected: + uint64_t *prev_dst; + bool execute_single_test(void); + bool check_mop(size_t index, uint64_t expected, const char *tag); +public: + TestDmaInputCpyMops(size_t max_count = 1024); + virtual ~TestDmaInputCpyMops(); + void run(void); +}; + +TestDmaInputCpyMops::TestDmaInputCpyMops(size_t max_count): + TestDmaMemMops(max_count, false) { + prev_dst = (uint64_t *)malloc(data_size); +} + +TestDmaInputCpyMops::~TestDmaInputCpyMops(void) { + free(prev_dst); +} + +void TestDmaInputCpyMops::run(void) { + fill_pattern((uint8_t *)(fcall_ctx + FCALL_RESULT), FCALL_RESULT_LENGTH, 3013102105130209); + printf("DST:0x%08lX\n", (uint64_t)dst); + size_t total_tests = 0; + src_offset = 0; + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (count = 0; count < 1024; ++count) { + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + dump(); + return; + } + ++total_tests; + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaInputCpyMops::check_mop(size_t index, uint64_t expected, const char *tag) { + if (mtrace[index] != expected) { + printf("\nERROR: %s expected: 0x%016lX (%s) found: mtrace[%ld]:%016lX (%s)\n", tag, expected, + decode(expected).c_str(), index, mtrace[index], decode(mtrace[index]).c_str()); + return false; + } + return true; +} +bool TestDmaInputCpyMops::execute_single_test(void) { + fill_pattern((uint8_t *)(fcall_ctx + FCALL_RESULT), FCALL_RESULT_LENGTH, 15436 + dst_offset + count); + fcall_ctx[FCALL_RESULT_GOT] = 1; + fill_pattern(dst, data_size, 1821904675 + dst_offset + count); + uint8_t *p_dst = dst + dst_offset; + + memcpy(prev_dst, dst, data_size); + printf("\rTEST dst_offset:%ld count:%4ld", dst_offset, count); + fflush(stdout); + uint64_t res = test_asm_dma_inputcpy_mops(p_dst, 0, count, test_trace); + size_t trace_count = test_trace[0]; + uint64_t _dst = (uint64_t)dst + dst_offset; + if (res != _dst) { + printf("\nERROR: invalid result expected:0x%08lX found:0x%08lX\n", _dst, res); + return false; + } + // uint64_t encode = calculate_encode((uint64_t)p_dst, (uint64_t)p_src, count); + size_t index = 0; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + if (!check_mop(index, encode_aligned_read((uint64_t)dst), "PRE pre write")) { + return false; + } + index += 1; + } + size_t loop_count = (count - pre_count) >> 3; + size_t post_count = (count - pre_count) & 0x07; + if (post_count > 0) { + uint64_t dst_post = ((uint64_t)dst + dst_offset + pre_count + loop_count * 8) & ~0x07; + if (!check_mop(index, encode_aligned_read((uint64_t)dst_post), "POST pre write")) { + return false; + } + index += 1; + } + if (count > 0) { + size_t dst_qwords = (dst_offset + count + 7) >> 3; + if (!check_mop(index, encode_aligned_block_write((uint64_t)dst, dst_qwords), "dst write")) { + return false; + } + ++index; + } + if (trace_count != index) { + printf("ERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + return false; + } + memcpy((uint8_t *)prev_dst + dst_offset, fcall_ctx + FCALL_RESULT, count); + if (memcmp(prev_dst, dst, data_size) != 0) { + printf("\nERROR: inputcpy operation\n"); + int errors = 0; + uint8_t *_dst = (uint8_t *)prev_dst; + for (size_t i = 0; i < data_size; ++i) { + if (_dst[i] == dst[i]) continue; + printf("[%ld] 0x%02X 0x%02X NO MATCH\n", i, _dst[i], dst[i]); + ++errors; + if (errors > 16) { + printf(".... and more\n"); + break; + } + } + printf("\nERROR: memcpy operation\n"); + return false; + } + return true; +} + +void test_dma_inputcpy_mops() { + printf("\x1B[1;34mTEST DMA INPUTCPY MOPS =================================================\x1B[0m\n"); + TestDmaInputCpyMops test(1024); + test.run(); +} diff --git a/emulator-asm/src/dma/test/test_dma_inputcpy_mops.hpp b/emulator-asm/src/dma/test/test_dma_inputcpy_mops.hpp new file mode 100644 index 000000000..841780f08 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_inputcpy_mops.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_INPUTCPY_MOPS__HPP__ +#define __TEST_DMA_INPUTCPY_MOPS__HPP__ + +void test_dma_inputcpy_mops(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_inputcpy_mtrace.cpp b/emulator-asm/src/dma/test/test_dma_inputcpy_mtrace.cpp new file mode 100644 index 000000000..19a393511 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_inputcpy_mtrace.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mtrace.hpp" +#include "test_dma_inputcpy_mtrace.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" +#include "test_mock.hpp" + +extern "C" { + uint64_t test_asm_dma_inputcpy_mtrace(uint8_t *dst, uint8_t *src, size_t count, uint64_t *trace); +} + +class TestDmaInputCpyMtrace: public TestDmaMemMtrace { +protected: + uint64_t *prev_dst; + uint64_t *check_dst; + bool execute_single_test(void); +public: + TestDmaInputCpyMtrace(size_t max_count = 1024); + virtual ~TestDmaInputCpyMtrace(); + void run(void); +}; + +TestDmaInputCpyMtrace::TestDmaInputCpyMtrace(size_t max_count): + TestDmaMemMtrace(max_count, false) { + prev_dst = (uint64_t *)malloc(data_size); + check_dst = (uint64_t *)malloc(data_size); +} + +TestDmaInputCpyMtrace::~TestDmaInputCpyMtrace(void) { + free(prev_dst); + free(check_dst); +} + +void TestDmaInputCpyMtrace::run(void) { + fill_pattern((uint8_t *)(fcall_ctx + FCALL_RESULT), FCALL_RESULT_LENGTH, 3013102105130209); + size_t total_tests = 0; + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (count = 0; count < 1024; ++count) { + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + dump(); + return; + } + ++total_tests; + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaInputCpyMtrace::execute_single_test(void) { + fill_pattern((uint8_t *)(fcall_ctx + FCALL_RESULT), FCALL_RESULT_LENGTH, 15436 + dst_offset + count); + fcall_ctx[FCALL_RESULT_GOT] = 1; + fill_pattern(dst, data_size, 1821904675); + uint8_t *p_dst = dst + dst_offset; + + memcpy(prev_dst, dst, data_size); + printf("\rTEST dst_offset:%ld count:%4ld", dst_offset, count); + fflush(stdout); + uint64_t res = test_asm_dma_inputcpy_mtrace(p_dst, 0, count, test_trace); + size_t trace_count = test_trace[0]; + if (trace_count < 1) { + printf("\nERROR: invalid trace_count %ld\n", trace_count); + return false; + } + uint64_t _dst = (uint64_t)dst + dst_offset; + if (res != _dst) { + printf("\nERROR: invalid result expected:0x%08lX found:0x%08lX\n", _dst, res); + return false; + } + uint64_t encode = calculate_encode_inputcpy((uint64_t)p_dst, count); + if (mtrace[0] != encode) { + printf("\nERROR: invalid encoded\n"); + print_encode_mismatch(encode, mtrace[0]); + return false; + } + size_t index = 1; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + if (mtrace[index] != prev_dst[0]) { + printf("\nERROR: pre write pre-value expected: dst[0]:0x%016lX found: mtrace[%ld]:%016lX\n", prev_dst[0], index, mtrace[index]); + return false; + } + ++index; + } + + size_t post_count = (count - pre_count) & 0x07; + if (post_count > 0) { + size_t last_dst_index = (dst_offset + count - 1) >> 3; + if (mtrace[index] != prev_dst[last_dst_index]) { + printf("\nERROR: post write pre-value expected: dst[%ld]:0x%016lX vs found mtrace[%ld]:0x%016lX\n", last_dst_index, prev_dst[last_dst_index], index, mtrace[index]); + return false; + } + ++index; + } + size_t input_qwords = count > 0 ? (count + 7) >> 3 : 0; + for (size_t i = 0; i < input_qwords; ++i) { + uint64_t expected = fcall_ctx[FCALL_RESULT + i]; + if (mtrace[index] != expected) { + printf("\nERROR: input value expected: input[%ld]:0x%016lX vs found mtrace[%ld]:0x%016lX\n", i, expected, index, mtrace[index]); + return false; + } + ++index; + } + if (trace_count != index) { + printf("\nERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + size_t _count = index > trace_count ? index + 1: trace_count + 1; + for (size_t i = 0; i <= _count; ++i) { + printf("mtrace[%ld] 0x%016lX\n", i, mtrace[i]); + } + return false; + } + memcpy((uint8_t *)prev_dst + dst_offset, fcall_ctx + FCALL_RESULT, count); + if (memcmp(prev_dst, dst, data_size) != 0) { + printf("\nERROR: inputcpy operation\n"); + + return false; + } + return true; +} + +void test_dma_inputcpy_mtrace() { + printf("\x1B[1;34mTEST DMA INPUTCPY MTRACE =================================================\x1B[0m\n"); + TestDmaInputCpyMtrace test(1024); + test.run(); +} diff --git a/emulator-asm/src/dma/test/test_dma_inputcpy_mtrace.hpp b/emulator-asm/src/dma/test/test_dma_inputcpy_mtrace.hpp new file mode 100644 index 000000000..d7ff614ec --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_inputcpy_mtrace.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_INPUTCPY_MTRACE__HPP__ +#define __TEST_DMA_INPUTCPY_MTRACE__HPP__ + +void test_dma_inputcpy_mtrace(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_mem.cpp b/emulator-asm/src/dma/test/test_dma_mem.cpp new file mode 100644 index 000000000..77ce7ae8e --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_mem.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +TestDmaMem::TestDmaMem(size_t max_count, bool use_src): + max_count(max_count), use_src(use_src) { + data_size = sizeof(uint64_t) * (max_count + 16); + trace_size = sizeof(uint64_t) * max_count * 4; + src = use_src ? (uint8_t *)malloc(data_size) : NULL; + dst = (uint8_t *)malloc(data_size); + aligned_dst = (uint64_t *)dst; + aligned_src = (uint64_t *)src; + test_trace = (uint64_t *)malloc(trace_size); + mtrace = test_trace + 1; +} + +TestDmaMem::~TestDmaMem(void) { + if (src) free(src); + free(dst); + free(test_trace); +} \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_mem.hpp b/emulator-asm/src/dma/test/test_dma_mem.hpp new file mode 100644 index 000000000..81cf02246 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_mem.hpp @@ -0,0 +1,32 @@ +#ifndef __TEST_DMA_MEM__HPP__ +#define __TEST_DMA_MEM__HPP__ + +#include +#include +#include + +#define EXTRA_PARAMETER_ADDR 0xA0000F00 + +class TestDmaMem { +protected: + uint8_t *dst; + uint8_t *src; + uint64_t *aligned_dst; + uint64_t *aligned_src; + uint64_t *test_trace; + uint64_t *mtrace; + size_t max_count; + size_t data_size; + size_t trace_size; + uint64_t src_offset; + uint64_t dst_offset; + bool use_src; + int diff_dst_src; + uint64_t count; +public: + TestDmaMem(size_t max_count = 1024, bool use_src = true); + virtual ~TestDmaMem(); + virtual void run(void) = 0; +}; + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_mem_mops.cpp b/emulator-asm/src/dma/test/test_dma_mem_mops.cpp new file mode 100644 index 000000000..f9a777398 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_mem_mops.cpp @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_dma_mem_mops.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" +#include "mem_config.hpp" + +TestDmaMemMops::TestDmaMemMops(size_t max_count, bool use_src): + TestDmaMem(max_count, use_src) { +} + +TestDmaMemMops::~TestDmaMemMops(void) { +} + +std::string TestDmaMemMops::decode(uint64_t value) { + uint32_t flags = value >> 32; + uint8_t bytes = flags & 0x0F; + uint32_t addr = value & 0xFFFF'FFFF; + uint32_t count = flags >> MOPS_BLOCK_COUNT_SBITS; + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::hex << std::uppercase; + switch (bytes) { + // byte + case 1: + case 2: + case 4: + case 8: { + if (flags & MOPS_WRITE_FLAG) { + oss << "READ(0x"; + } else { + oss << "WRITE(0x"; + } + oss << addr << "," << std::setw(0) << std::dec << bytes << ")"; + return oss.str(); + } + case MOPS_ALIGNED_READ: { + oss << "ALIGNED_READ(0x" << addr << ")"; + return oss.str(); + } + case MOPS_ALIGNED_WRITE: { + oss << "ALIGNED_WRITE(0x" << addr << ")"; + return oss.str(); + } + case MOPS_BLOCK_READ: { + oss << "BLOCK_READ(0x" << addr << "," << std::setw(0) << std::dec << count << ")"; + return oss.str(); + } + case MOPS_BLOCK_WRITE: { + oss << "BLOCK_WRITE(0x" << addr << "," << std::setw(0) << std::dec << count << ")"; + return oss.str(); + } + case MOPS_ALIGNED_BLOCK_READ: { + oss << "ALIGNED_BLOCK_READ(0x" << addr << "," << std::setw(0) << std::dec << count << ")"; + return oss.str(); + } + case MOPS_ALIGNED_BLOCK_WRITE: { + oss << "ALIGNED_BLOCK_WRITE(0x" << addr << "," << std::setw(0) << std::dec << count << ")"; + return oss.str(); + } + default: { + oss << "?¿ " << std::setw(2) << bytes; + return oss.str(); + } + } +} + +void TestDmaMemMops::dump(void) { + printf("---------------------------------\n"); + size_t trace_count = test_trace[0]; + for (size_t index = 0; index < trace_count; ++index) { + uint64_t trace = test_trace[index+1]; + uint32_t addr = trace & 0xFFFF'FFFF; + uint32_t flags = trace >> 32; + printf("mops[%ld] 0x%08X_%08X %s", index, flags, addr, decode(test_trace[index+1]).c_str()); + if (src) { + if (addr >= (uint64_t)src && addr < (uint64_t)(src + max_count)) { + printf(" SRC+%ld", (uint64_t) addr - (uint64_t) src); + } + } + if (addr >= (uint64_t)dst && addr < (uint64_t)(dst + max_count)) { + printf(" DST+%ld", (uint64_t) addr - (uint64_t) dst); + } + printf("\n"); + } +} + +uint64_t TestDmaMemMops::encode_read(uint32_t addr, uint8_t bytes) { + switch (bytes) { + case 1: + return (1ull << 32) | (uint64_t)addr; + case 2: + return (2ull << 32) | (uint64_t)addr; + case 4: + return (4ull << 32) | (uint64_t)addr; + case 8: + return (8ull << 32) | (uint64_t)addr; + default: + throw std::runtime_error("encode_read: invalid bytes: " + std::to_string((int)bytes)); + } +} +uint64_t TestDmaMemMops::encode_write(uint32_t addr, uint8_t bytes) { + switch (bytes) { + case 1: + return ((1ull + MOPS_WRITE_FLAG) << 32) | (uint64_t)addr; + case 2: + return ((2ull + MOPS_WRITE_FLAG) << 32) | (uint64_t)addr; + case 4: + return ((4ull + MOPS_WRITE_FLAG) << 32) | (uint64_t)addr; + case 8: + return ((8ull + MOPS_WRITE_FLAG) << 32) | (uint64_t)addr; + default: + throw std::runtime_error("encode_write: invalid bytes: " + std::to_string((int)bytes)); + } +} +uint64_t TestDmaMemMops::encode_aligned_read(uint32_t addr) { + return ((uint64_t) MOPS_ALIGNED_READ << 32) | (uint64_t) addr; +} +uint64_t TestDmaMemMops::encode_aligned_x_read(uint32_t addr, uint32_t count) { + if (count == 1) { + return ((uint64_t) MOPS_ALIGNED_READ << 32) | (uint64_t) addr; + } + return encode_aligned_block_read(addr, count); +} +uint64_t TestDmaMemMops::encode_aligned_write(uint32_t addr) { + return ((uint64_t) MOPS_ALIGNED_WRITE << 32) | (uint64_t) addr; +} +uint64_t TestDmaMemMops::encode_block_read(uint32_t addr, uint32_t count) { + return ((uint64_t) MOPS_BLOCK_READ << 32) | ((uint64_t) count << (MOPS_BLOCK_COUNT_SBITS + 32)) | addr; +} +uint64_t TestDmaMemMops::encode_block_write(uint32_t addr, uint32_t count) { + return ((uint64_t) MOPS_BLOCK_WRITE << 32) | ((uint64_t) count << (MOPS_BLOCK_COUNT_SBITS + 32)) | addr; +} +uint64_t TestDmaMemMops::encode_aligned_block_read(uint32_t addr, uint32_t count) { + return ((uint64_t) MOPS_ALIGNED_BLOCK_READ << 32) | ((uint64_t) count << (MOPS_BLOCK_COUNT_SBITS + 32)) | addr; +} +uint64_t TestDmaMemMops::encode_aligned_block_write(uint32_t addr, uint32_t count) { + return ((uint64_t) MOPS_ALIGNED_BLOCK_WRITE << 32) | ((uint64_t) count << (MOPS_BLOCK_COUNT_SBITS + 32)) | addr; + +} + diff --git a/emulator-asm/src/dma/test/test_dma_mem_mops.hpp b/emulator-asm/src/dma/test/test_dma_mem_mops.hpp new file mode 100644 index 000000000..aa70f132c --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_mem_mops.hpp @@ -0,0 +1,28 @@ +#ifndef __TEST_DMA_MEM_MTRACE_MOPS__HPP__ +#define __TEST_DMA_MEM_MTRACE_MOPS__HPP__ + +#include +#include +#include +#include "test_dma_mem.hpp" + +class TestDmaMemMops: public TestDmaMem { +protected: + void dump(void); +public: + TestDmaMemMops(size_t max_count = 1024, bool use_src = true); + virtual ~TestDmaMemMops(); + virtual void run(void) = 0; + std::string decode(uint64_t value); + uint64_t encode_read(uint32_t addr, uint8_t bytes); + uint64_t encode_write(uint32_t addr, uint8_t bytes); + uint64_t encode_aligned_read(uint32_t addr); + uint64_t encode_aligned_write(uint32_t addr); + uint64_t encode_block_read(uint32_t addr, uint32_t count); + uint64_t encode_block_write(uint32_t addr, uint32_t count); + uint64_t encode_aligned_block_read(uint32_t addr, uint32_t count); + uint64_t encode_aligned_block_write(uint32_t addr, uint32_t count); + uint64_t encode_aligned_x_read(uint32_t addr, uint32_t count); +}; + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_mem_mtrace.cpp b/emulator-asm/src/dma/test/test_dma_mem_mtrace.cpp new file mode 100644 index 000000000..b3503be45 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_mem_mtrace.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mtrace.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +TestDmaMemMtrace::TestDmaMemMtrace(size_t max_count, bool use_src): + TestDmaMem(max_count, use_src) { +} + +TestDmaMemMtrace::~TestDmaMemMtrace(void) { +} + +void TestDmaMemMtrace::dump(void) { + printf("---------------------------------\n"); + size_t dst_qwords = (dst_offset + count + 7) >> 3; + for (size_t index = 0; index < dst_qwords; ++index) { + printf("dst64[%ld] 0x%016lX\n", index, aligned_dst[index]); + } + if (src) { + printf("---------------------------------\n"); + size_t src_qwords = (src_offset + count + 7) >> 3; + for (size_t index = 0; index < src_qwords; ++index) { + printf("src64[%ld] 0x%016lX\n", index, aligned_src[index]); + } + } + printf("---------------------------------\n"); + size_t trace_count = test_trace[0]; + for (size_t index = 0; index < trace_count; ++index) { + printf("mtrace[%ld] 0x%016lX\n", index, test_trace[index+1]); + } +} diff --git a/emulator-asm/src/dma/test/test_dma_mem_mtrace.hpp b/emulator-asm/src/dma/test/test_dma_mem_mtrace.hpp new file mode 100644 index 000000000..878acbeb3 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_mem_mtrace.hpp @@ -0,0 +1,20 @@ +#ifndef __TEST_DMA_MEM_MTRACE__HPP__ +#define __TEST_DMA_MEM_MTRACE__HPP__ + +#include +#include +#include +#include "test_dma_mem.hpp" + +class TestDmaMemMtrace: public TestDmaMem { +protected: + int diff_dst_src; + uint64_t count; + void dump(void); +public: + TestDmaMemMtrace(size_t max_count = 1024, bool use_src = true); + virtual ~TestDmaMemMtrace(); + virtual void run(void) = 0; +}; + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_memcmp_mops.cpp b/emulator-asm/src/dma/test/test_dma_memcmp_mops.cpp new file mode 100644 index 000000000..15333ff33 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcmp_mops.cpp @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mops.hpp" +#include "test_dma_memcmp_mops.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +extern "C" { + size_t test_asm_dma_memcmp_mops(uint8_t *dst, uint8_t *src, size_t count, uint64_t *trace); +} + +class TestDmaMemCmpMops: public TestDmaMemMops { +protected: + int diff_dst_src; + uint64_t bus_count; + bool execute_single_test(void); + bool check_mop(size_t index, uint64_t expected, const char *tag); +public: + TestDmaMemCmpMops(size_t max_count = 1024); + void run(void); +}; + +TestDmaMemCmpMops::TestDmaMemCmpMops(size_t max_count): + TestDmaMemMops(max_count) { +} + + +void TestDmaMemCmpMops::run(void) { + fill_pattern(src,data_size, 3013102105130209); + size_t total_tests = 0; + for (uint64_t icase = 0; icase < 3; ++icase) { + diff_dst_src = icase == 0 ? 0 : (60 * icase - 180 * (icase - 1)); + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (src_offset = 0; src_offset < 7; ++src_offset) { + for (count = 0; count < 1024; ++count) { + for (uint64_t i_count_case = 0; i_count_case < 3; ++i_count_case) { + if (i_count_case > 0) { + bus_count = count + 1 + (i_count_case - 1) * (dst_offset + src_offset); + } + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + dump(); + return; + } + ++total_tests; + } + } + } + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaMemCmpMops::check_mop(size_t index, uint64_t expected, const char *tag) { + if (mtrace[index] != expected) { + printf("\nERROR: %s expected: 0x%016lX (%s) found: mtrace[%ld]:%016lX (%s)\n", tag, expected, + decode(expected).c_str(), index, mtrace[index], decode(mtrace[index]).c_str()); + return false; + } + return true; +} +bool TestDmaMemCmpMops::execute_single_test(void) { + memset(test_trace, 0, trace_size); + fill_pattern(dst, data_size, 1821904675); + uint8_t *p_dst = dst + dst_offset; + uint8_t *p_src = src + src_offset; + + int cmp_res = create_memcmp_data(p_dst, p_src, count, diff_dst_src); + printf("\rTEST dst_offset:%ld src_offset:%ld count:%4ld (bus_count:%4ld) cmp_res:%4d (diff:%4d)", + dst_offset, src_offset, count, bus_count, cmp_res, diff_dst_src); + fflush(stdout); + int res = test_asm_dma_memcmp_mops(p_dst, p_src, count, test_trace); + size_t trace_count = test_trace[0]; + if (trace_count < 1) { + printf("\nERROR: invalid trace_count %ld\n", trace_count); + return false; + } + if (res != cmp_res) { + uint8_t byte_dst = dst[dst_offset + count - 1]; + uint8_t byte_src = src[src_offset + count - 1]; + printf("\nERROR: invalid result expected:%d found:%d DST:0x%02X SRC:0x%02X\n", + cmp_res, res, byte_dst, byte_src); + return false; + } + if (mtrace[0] != encode_aligned_read(EXTRA_PARAMETER_ADDR)) { + printf("\nERROR: not found valid param read\n"); + return false; + } + // uint64_t encode = calculate_encode((uint64_t)p_dst, (uint64_t)p_src, count); + size_t index = 1; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + size_t src_blocks = 1 + ((src_offset + pre_count) > 8); + if (!check_mop(index, encode_aligned_read((uint64_t)dst), "PRE pre write") || + !check_mop(index + 1, encode_aligned_x_read((uint64_t)src, src_blocks), "PRE src read")) { + return false; + } + index += 2; + } + size_t loop_count = (count - pre_count) >> 3; + size_t post_count = (count - pre_count) & 0x07; + if (loop_count > 0 && post_count == 0 && res != 0) { + loop_count -= 1; + post_count = 8; + } + if (post_count > 0) { + uint64_t src_post = ((uint64_t)src + src_offset + pre_count + loop_count * 8) & ~0x07; + uint64_t dst_post = ((uint64_t)dst + dst_offset + pre_count + loop_count * 8) & ~0x07; + size_t src_blocks = 1 + ((((src_offset + pre_count) & 0x07) + post_count) > 8); + if (!check_mop(index, encode_aligned_read((uint64_t)dst_post), "POST pre write") || + !check_mop(index + 1, encode_aligned_x_read((uint64_t)src_post, src_blocks), "POST src read")) { + return false; + } + index += 2; + } + if (loop_count > 0) { + uint64_t src_loop = ((uint64_t)src + src_offset + pre_count) & ~0x07; + uint64_t dst_loop = pre_count > 0 ? (uint64_t)dst + 8 : (uint64_t)dst; + size_t src_count = dst_offset == src_offset ? loop_count : (loop_count + 1); + + if (!check_mop(index, encode_aligned_block_read(src_loop, src_count), "LOOP src read")) { + return false; + } + if (!check_mop(index+1, encode_aligned_block_read(dst_loop, loop_count), "LOOP dst read (cmp)")) { + return false; + } + index += 2; + } + if (trace_count != index) { + printf("ERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + return false; + } + return true; +} + + +void test_dma_memcmp_mops() { + printf("\x1B[1;34mTEST DMA MEMCMP MOPS =================================================\x1B[0m\n"); + TestDmaMemCmpMops test(1024); + test.run(); +} diff --git a/emulator-asm/src/dma/test/test_dma_memcmp_mops.hpp b/emulator-asm/src/dma/test/test_dma_memcmp_mops.hpp new file mode 100644 index 000000000..b5d543f82 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcmp_mops.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_MEMCMP_MOPS__HPP__ +#define __TEST_DMA_MEMCMP_MOPS__HPP__ + +void test_dma_memcmp_mops(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_memcmp_mtrace.cpp b/emulator-asm/src/dma/test/test_dma_memcmp_mtrace.cpp new file mode 100644 index 000000000..68aa8c78e --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcmp_mtrace.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mtrace.hpp" +#include "test_dma_memcmp_mtrace.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +extern "C" { + size_t test_asm_dma_memcmp_mtrace(uint8_t *dst, uint8_t *src, size_t count, uint64_t *trace); +} + +class TestDmaMemCmpMtrace: public TestDmaMemMtrace { +protected: + int diff_dst_src; + uint64_t bus_count; + bool execute_single_test(void); +public: + TestDmaMemCmpMtrace(size_t max_count = 1024); + void run(void); +}; + +TestDmaMemCmpMtrace::TestDmaMemCmpMtrace(size_t max_count): + TestDmaMemMtrace(max_count) { +} + + +void TestDmaMemCmpMtrace::run(void) { + fill_pattern(src,data_size, 3013102105130209); + size_t total_tests = 0; + for (uint64_t icase = 0; icase < 3; ++icase) { + diff_dst_src = icase == 0 ? 0 : (60 * icase - 180 * (icase - 1)); + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (src_offset = 0; src_offset < 7; ++src_offset) { + for (count = 0; count < 1024; ++count) { + for (uint64_t i_count_case = 0; i_count_case < 3; ++i_count_case) { + if (i_count_case > 0) { + bus_count = count + 1 + (i_count_case - 1) * (dst_offset + src_offset); + } + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + dump(); + return; + } + ++total_tests; + } + } + } + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaMemCmpMtrace::execute_single_test(void) { + memset(test_trace, 0, trace_size); + fill_pattern(dst, data_size, 1821904675); + bus_count = count; + uint8_t *p_dst = dst + dst_offset; + uint8_t *p_src = src + src_offset; + + int cmp_res = create_memcmp_data(p_dst, p_src, count, diff_dst_src); + printf("\rTEST dst_offset:%ld src_offset:%ld count:%4ld (bus_count:%4ld) cmp_res:%4d (diff:%4d)", + dst_offset, src_offset, count, bus_count, cmp_res, diff_dst_src); + fflush(stdout); + int res = test_asm_dma_memcmp_mtrace(p_dst, p_src, bus_count, test_trace); + size_t trace_count = test_trace[0]; + if (trace_count < 2) { + printf("\nERROR: invalid trace_count %ld\n", trace_count); + return false; + } + if (res != cmp_res) { + uint8_t byte_dst = dst[dst_offset + count - 1]; + uint8_t byte_src = src[src_offset + count - 1]; + printf("\nERROR: invalid result expected:%d found:%d DST:0x%02X SRC:0x%02X\n", + cmp_res, res, byte_dst, byte_src); + return false; + } + uint64_t encode = calculate_encode_memcmp((uint64_t)p_dst, (uint64_t)p_src, count, res); + if (mtrace[0] != encode) { + printf("\nERROR: invalid encoded\n"); + print_encode_mismatch(encode, mtrace[0]); + return false; + } + if (mtrace[1] != bus_count) { + printf("ERROR: invalid bus_count expected:%ld found:%ld\n", bus_count, mtrace[1]); + return false; + } + size_t index = 2; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + if (mtrace[index] != aligned_dst[0]) { + printf("ERROR: pre write pre-value expected: dst[0]:0x%016lX found: mtrace[%ld]:%016lX\n", aligned_dst[0], index, mtrace[index]); + return false; + } + ++index; + } + + size_t loop_count = (count - pre_count) >> 3; + size_t post_count = (count - pre_count) & 0x07; + if (loop_count > 0 && post_count == 0 && res != 0) { + loop_count -= 1; + post_count = 8; + } + if (post_count > 0) { + size_t last_dst_index = (dst_offset + count - 1) >> 3; + if (mtrace[index] != aligned_dst[last_dst_index]) { + printf("ERROR: post write pre-value expected: dst[%ld]:0x%016lX vs found mtrace[%ld]:0x%016lX\n", last_dst_index, aligned_dst[last_dst_index], index, mtrace[index]); + return false; + } + ++index; + } + size_t src_qwords = count > 0 ? (src_offset + count + 7) >> 3 : 0; + for (size_t i_src = 0; i_src < src_qwords; ++i_src) { + if (mtrace[index] != aligned_src[i_src]) { + printf("ERROR: src value expected: src[%ld]:0x%016lX vs found mtrace[%ld]:0x%016lX\n", i_src, aligned_src[i_src], index, mtrace[index]); + return false; + } + ++index; + } + if (trace_count != index) { + printf("ERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + size_t _count = index > trace_count ? index + 1: trace_count + 1; + for (size_t i = 0; i <= _count; ++i) { + printf("mtrace[%ld] 0x%016lX\n", i, mtrace[i]); + } + return false; + } + + return true; +} + +void test_dma_memcmp_mtrace() { + printf("\x1B[1;34mTEST DMA MEMCMP MTRACE =================================================\x1B[0m\n"); + TestDmaMemCmpMtrace test(1024); + test.run(); +} \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_memcmp_mtrace.hpp b/emulator-asm/src/dma/test/test_dma_memcmp_mtrace.hpp new file mode 100644 index 000000000..9909c2ca1 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcmp_mtrace.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_MEMCMP_MTRACE__HPP__ +#define __TEST_DMA_MEMCMP_MTRACE__HPP__ + +void test_dma_memcmp_mtrace(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_memcpy_mops.cpp b/emulator-asm/src/dma/test/test_dma_memcpy_mops.cpp new file mode 100644 index 000000000..a7b12899c --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcpy_mops.cpp @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mops.hpp" +#include "test_dma_memcpy_mops.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +extern "C" { + uint64_t test_asm_dma_memcpy_mops(uint8_t *dst, uint8_t *src, size_t count, uint64_t *trace); +} + +class TestDmaMemCpyMops: public TestDmaMemMops { +protected: + uint64_t *prev_dst; + bool execute_single_test(void); + bool check_mop(size_t index, uint64_t expected, const char *tag); +public: + TestDmaMemCpyMops(size_t max_count = 1024); + virtual ~TestDmaMemCpyMops(); + void run(void); +}; + +TestDmaMemCpyMops::TestDmaMemCpyMops(size_t max_count): + TestDmaMemMops(max_count) { + prev_dst = (uint64_t *)malloc(data_size); +} + +TestDmaMemCpyMops::~TestDmaMemCpyMops(void) { + free(prev_dst); +} + +void TestDmaMemCpyMops::run(void) { + fill_pattern(src,data_size, 3013102105130209); + printf("SRC:0x%08lX DST:0x%08lX\n", (uint64_t)src, (uint64_t)dst); + size_t total_tests = 0; + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (src_offset = 0; src_offset < 7; ++src_offset) { + for (count = 0; count < 1024; ++count) { + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + dump(); + return; + } + ++total_tests; + } + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaMemCpyMops::check_mop(size_t index, uint64_t expected, const char *tag) { + if (mtrace[index] != expected) { + printf("\nERROR: %s expected: 0x%016lX (%s) found: mtrace[%ld]:%016lX (%s)\n", tag, expected, + decode(expected).c_str(), index, mtrace[index], decode(mtrace[index]).c_str()); + return false; + } + return true; +} +bool TestDmaMemCpyMops::execute_single_test(void) { + memset(test_trace, 0, trace_size); + fill_pattern(dst, data_size, 1821904675); + uint8_t *p_dst = dst + dst_offset; + uint8_t *p_src = src + src_offset; + + memcpy(prev_dst, dst, data_size); + printf("\rTEST dst_offset:%ld src_offset:%ld count:%4ld", + dst_offset, src_offset, count); + fflush(stdout); + uint64_t res = test_asm_dma_memcpy_mops(p_dst, p_src, count, test_trace); + size_t trace_count = test_trace[0]; + if (trace_count < 1) { + printf("\nERROR: invalid trace_count %ld\n", trace_count); + return false; + } + uint64_t _dst = (uint64_t)dst + dst_offset; + if (res != _dst) { + printf("\nERROR: invalid result expected:0x%08lX found:0x%08lX\n", _dst, res); + return false; + } + if (mtrace[0] != encode_aligned_read(EXTRA_PARAMETER_ADDR)) { + printf("\nERROR: not found valid param read\n"); + return false; + } + // uint64_t encode = calculate_encode((uint64_t)p_dst, (uint64_t)p_src, count); + size_t index = 1; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + size_t src_blocks = 1 + ((src_offset + pre_count) > 8); + if (!check_mop(index, encode_aligned_read((uint64_t)dst), "PRE pre write") || + !check_mop(index + 1, encode_aligned_x_read((uint64_t)src, src_blocks), "PRE src read")) { + return false; + } + index += 2; + } + size_t loop_count = (count - pre_count) >> 3; + size_t post_count = (count - pre_count) & 0x07; + if (post_count > 0) { + uint64_t src_post = ((uint64_t)src + src_offset + pre_count + loop_count * 8) & ~0x07; + uint64_t dst_post = ((uint64_t)dst + dst_offset + pre_count + loop_count * 8) & ~0x07; + size_t src_blocks = 1 + ((((src_offset + pre_count) & 0x07) + post_count) > 8); + if (!check_mop(index, encode_aligned_read((uint64_t)dst_post), "POST pre write") || + !check_mop(index + 1, encode_aligned_x_read((uint64_t)src_post, src_blocks), "POST src read")) { + return false; + } + index += 2; + } + if (loop_count > 0) { + uint64_t src_loop = ((uint64_t)src + src_offset + pre_count) & ~0x07; + size_t src_count = dst_offset == src_offset ? loop_count : (loop_count + 1); + if (!check_mop(index, encode_aligned_block_read(src_loop, src_count), "LOOP src read")) { + return false; + } + ++index; + } + if (count > 0) { + size_t dst_qwords = (dst_offset + count + 7) >> 3; + if (!check_mop(index, encode_aligned_block_write((uint64_t)dst, dst_qwords), "dst write")) { + return false; + } + ++index; + } + if (trace_count != index) { + printf("ERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + return false; + } + memcpy((uint8_t *)prev_dst + dst_offset, src + src_offset, count); + if (memcmp(prev_dst, dst, data_size) != 0) { + int errors = 0; + uint8_t *_dst = (uint8_t *)prev_dst; + for (size_t i = 0; i < data_size; ++i) { + if (_dst[i] == src[i]) continue; + printf("[%ld] 0x%02X 0x%02X NO MATCH\n", i, _dst[i], src[i]); + ++errors; + if (errors > 16) { + printf(".... and more\n"); + break; + } + } + printf("\nERROR: memcpy operation\n"); + return false; + } + return true; +} + +void test_dma_memcpy_mops() { + printf("\x1B[1;34mTEST DMA MEMCPY MOPS =================================================\x1B[0m\n"); + TestDmaMemCpyMops test(1024); + test.run(); +} diff --git a/emulator-asm/src/dma/test/test_dma_memcpy_mops.hpp b/emulator-asm/src/dma/test/test_dma_memcpy_mops.hpp new file mode 100644 index 000000000..ef4199215 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcpy_mops.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_MEMCPY_MOPS__HPP__ +#define __TEST_DMA_MEMCPY_MOPS__HPP__ + +void test_dma_memcpy_mops(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_memcpy_mtrace.cpp b/emulator-asm/src/dma/test/test_dma_memcpy_mtrace.cpp new file mode 100644 index 000000000..6f915ebae --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcpy_mtrace.cpp @@ -0,0 +1,148 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mtrace.hpp" +#include "test_dma_memcpy_mtrace.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +#define BUFFER_SIZE (sizeof(uint64_t) * 2 * 1024) + +extern "C" { + uint64_t test_asm_dma_memcpy_mtrace(uint8_t *dst, uint8_t *src, size_t count, uint64_t *trace); +} + +class TestDmaMemCpyMtrace: public TestDmaMemMtrace { +protected: + uint64_t *prev_dst; + uint64_t *check_dst; + bool execute_single_test(void); +public: + TestDmaMemCpyMtrace(size_t max_count = 1024); + virtual ~TestDmaMemCpyMtrace(); + void run(void); +}; + +TestDmaMemCpyMtrace::TestDmaMemCpyMtrace(size_t max_count): + TestDmaMemMtrace(max_count) { + prev_dst = (uint64_t *)malloc(data_size); + check_dst = (uint64_t *)malloc(data_size); +} + +TestDmaMemCpyMtrace::~TestDmaMemCpyMtrace(void) { + free(prev_dst); + free(check_dst); +} + +void TestDmaMemCpyMtrace::run(void) { + fill_pattern(src,data_size, 3013102105130209); + size_t total_tests = 0; + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (src_offset = 0; src_offset < 7; ++src_offset) { + for (count = 0; count < 1024; ++count) { + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + dump(); + return; + } + ++total_tests; + } + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaMemCpyMtrace::execute_single_test(void) { + memset(test_trace, 0, trace_size); + fill_pattern(dst, data_size, 1821904675); + uint8_t *p_dst = dst + dst_offset; + uint8_t *p_src = src + src_offset; + + memcpy(prev_dst, dst, data_size); + printf("\rTEST dst_offset:%ld src_offset:%ld count:%4ld", + dst_offset, src_offset, count); + fflush(stdout); + uint64_t res = test_asm_dma_memcpy_mtrace(p_dst, p_src, count, test_trace); + size_t trace_count = test_trace[0]; + if (trace_count < 1) { + printf("\nERROR: invalid trace_count %ld\n", trace_count); + return false; + } + uint64_t _dst = (uint64_t)dst + dst_offset; + if (res != _dst) { + printf("\nERROR: invalid result expected:0x%08lX found:0x%08lX\n", _dst, res); + return false; + } + uint64_t encode = calculate_encode((uint64_t)p_dst, (uint64_t)p_src, count); + if (mtrace[0] != encode) { + printf("\nERROR: invalid encoded\n"); + print_encode_mismatch(encode, mtrace[0]); + return false; + } + size_t index = 1; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + if (mtrace[index] != prev_dst[0]) { + printf("\nERROR: pre write pre-value expected: dst[0]:0x%016lX found: mtrace[%ld]:%016lX\n", prev_dst[0], index, mtrace[index]); + return false; + } + ++index; + } + + size_t post_count = (count - pre_count) & 0x07; + if (post_count > 0) { + size_t last_dst_index = (dst_offset + count - 1) >> 3; + if (mtrace[index] != prev_dst[last_dst_index]) { + printf("\nERROR: post write pre-value expected: dst[%ld]:0x%016lX vs found mtrace[%ld]:0x%016lX\n", last_dst_index, prev_dst[last_dst_index], index, mtrace[index]); + return false; + } + ++index; + } + size_t src_qwords = count > 0 ? (src_offset + count + 7) >> 3 : 0; + for (size_t i_src = 0; i_src < src_qwords; ++i_src) { + if (mtrace[index] != aligned_src[i_src]) { + printf("ERROR: src value expected: src[%ld]:0x%016lX vs found mtrace[%ld]:0x%016lX\n", i_src, aligned_src[i_src], index, mtrace[index]); + return false; + } + ++index; + } + if (trace_count != index) { + printf("ERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + size_t _count = index > trace_count ? index + 1: trace_count + 1; + for (size_t i = 0; i <= _count; ++i) { + printf("mtrace[%ld] 0x%016lX\n", i, mtrace[i]); + } + return false; + } + memcpy((uint8_t *)prev_dst + dst_offset, src + src_offset, count); + if (memcmp(prev_dst, dst, data_size) != 0) { + int errors = 0; + uint8_t *_dst = (uint8_t *)prev_dst; + for (size_t i = 0; i < data_size; ++i) { + if (_dst[i] == src[i]) continue; + printf("[%ld] 0x%02X 0x%02X NO MATCH\n", i, _dst[i], src[i]); + ++errors; + if (errors > 16) { + printf(".... and more\n"); + break; + } + } + printf("\nERROR: memcpy operation\n"); + return false; + } + return true; +} + +void test_dma_memcpy_mtrace() { + printf("\x1B[1;34mTEST DMA MEMCPY MTRACE =================================================\x1B[0m\n"); + TestDmaMemCpyMtrace test(1024); + test.run(); +} diff --git a/emulator-asm/src/dma/test/test_dma_memcpy_mtrace.hpp b/emulator-asm/src/dma/test/test_dma_memcpy_mtrace.hpp new file mode 100644 index 000000000..6942343c4 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memcpy_mtrace.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_MEMCPY_MTRACE__HPP__ +#define __TEST_DMA_MEMCPY_MTRACE__HPP__ + +void test_dma_memcpy_mtrace(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_memset_mops.cpp b/emulator-asm/src/dma/test/test_dma_memset_mops.cpp new file mode 100644 index 000000000..14ee6618b --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memset_mops.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mops.hpp" +#include "test_dma_memset_mops.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +extern "C" { + uint64_t test_asm_dma_memset_mops(uint8_t *dst, uint64_t byte, size_t count, uint64_t *trace); +} + +class TestDmaMemSetMops: public TestDmaMemMops { +protected: + uint64_t *prev_dst; + uint64_t byte; + bool execute_single_test(void); + bool check_mop(size_t index, uint64_t expected, const char *tag); +public: + TestDmaMemSetMops(size_t max_count = 1024); + virtual ~TestDmaMemSetMops(); + void run(void); +}; + +TestDmaMemSetMops::TestDmaMemSetMops(size_t max_count): + TestDmaMemMops(max_count, false) { + prev_dst = (uint64_t *)malloc(data_size); +} + +TestDmaMemSetMops::~TestDmaMemSetMops(void) { + free(prev_dst); +} + +void TestDmaMemSetMops::run(void) { + printf("DST:0x%08lX\n", (uint64_t)dst); + size_t total_tests = 0; + src_offset = 0; + for (byte = 0; byte <= 0xFF; ++byte) { + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (count = 0; count < 1024; ++count) { + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + dump(); + return; + } + ++total_tests; + } + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaMemSetMops::check_mop(size_t index, uint64_t expected, const char *tag) { + if (mtrace[index] != expected) { + printf("\nERROR: %s expected: 0x%016lX (%s) found: mtrace[%ld]:%016lX (%s)\n", tag, expected, + decode(expected).c_str(), index, mtrace[index], decode(mtrace[index]).c_str()); + return false; + } + return true; +} +bool TestDmaMemSetMops::execute_single_test(void) { + memset(test_trace, 0, trace_size); + fill_pattern(dst, data_size, 1821904675); + uint8_t *p_dst = dst + dst_offset; + + memcpy(prev_dst, dst, data_size); + printf("\rTEST byte:0x%02lX dst_offset:%ld count:%4ld", + byte, dst_offset, count); + fflush(stdout); + uint64_t res = test_asm_dma_memset_mops(p_dst, byte, count, test_trace); + size_t trace_count = test_trace[0]; + uint64_t _dst = (uint64_t)dst + dst_offset; + if (res != _dst) { + printf("\nERROR: invalid result expected:0x%08lX found:0x%08lX\n", _dst, res); + return false; + } + size_t index = 0; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + if (!check_mop(index, encode_aligned_read((uint64_t)dst), "PRE pre write") ) { + return false; + } + index += 1; + } + size_t loop_count = (count - pre_count) >> 3; + size_t post_count = (count - pre_count) & 0x07; + if (post_count > 0) { + uint64_t dst_post = ((uint64_t)dst + dst_offset + pre_count + loop_count * 8) & ~0x07; + if (!check_mop(index, encode_aligned_read((uint64_t)dst_post), "POST pre write")) { + return false; + } + index += 1; + } + if (count > 0) { + size_t dst_qwords = (dst_offset + count + 7) >> 3; + if (!check_mop(index, encode_aligned_block_write((uint64_t)dst, dst_qwords), "dst write")) { + return false; + } + ++index; + } + if (trace_count != index) { + printf("ERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + return false; + } + memset((uint8_t *)prev_dst + dst_offset, byte, count); + if (memcmp(prev_dst, dst, data_size) != 0) { + printf("\nERROR: memset operation\n"); + return false; + } + return true; +} + +void test_dma_memset_mops() { + printf("\x1B[1;34mTEST DMA MEMSET MOPS =================================================\x1B[0m\n"); + TestDmaMemSetMops test(1024); + test.run(); +} diff --git a/emulator-asm/src/dma/test/test_dma_memset_mops.hpp b/emulator-asm/src/dma/test/test_dma_memset_mops.hpp new file mode 100644 index 000000000..e5ec86c8f --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memset_mops.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_MEMSET_MOPS__HPP__ +#define __TEST_DMA_MEMSET_MOPS__HPP__ + +void test_dma_memset_mops(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_memset_mtrace.cpp b/emulator-asm/src/dma/test/test_dma_memset_mtrace.cpp new file mode 100644 index 000000000..602d63775 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memset_mtrace.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_dma_mem_mtrace.hpp" +#include "test_dma_memset_mtrace.hpp" +#include "test_dma_tools.hpp" +#include "test_dma_encode.hpp" + +extern "C" { + uint64_t test_asm_dma_memset_mtrace(uint8_t *dst, uint64_t byte, size_t count, uint64_t *trace); +} + +class TestDmaMemSetMtrace: public TestDmaMemMtrace { +protected: + uint64_t *prev_dst; + uint64_t byte; + bool execute_single_test(void); +public: + TestDmaMemSetMtrace(size_t max_count = 1024); + virtual ~TestDmaMemSetMtrace(); + void run(void); +}; + +TestDmaMemSetMtrace::TestDmaMemSetMtrace(size_t max_count): + TestDmaMemMtrace(max_count,false) { + prev_dst = (uint64_t *)malloc(data_size); +} + +TestDmaMemSetMtrace::~TestDmaMemSetMtrace(void) { + free(prev_dst); +} + +void TestDmaMemSetMtrace::run(void) { + size_t total_tests = 0; + src_offset = 0; + for (byte = 0; byte <= 0xFF; ++byte) { + for (dst_offset = 0; dst_offset < 7; ++dst_offset) { + for (count = 0; count < 1024; ++count) { + if (!execute_single_test()) { + printf("\nTest is [\x1B[1;31mFAIL\x1B[0m]\n"); + printf("---------------------------------\n"); + size_t dst_qwords = (dst_offset + count + 7) >> 3; + for (size_t index = 0; index < dst_qwords; ++index) { + printf("prev_dst64[%ld] 0x%016lX\n", index, prev_dst[index]); + } + dump(); + return; + } + ++total_tests; + } + } + } + printf("\nAll %ld tests are [\x1B[1;32mOK\x1B[0m]\n", total_tests); +} + +bool TestDmaMemSetMtrace::execute_single_test(void) { + memset(test_trace, 0, trace_size); + fill_pattern(dst, data_size, 18219046755); + uint8_t *p_dst = dst + dst_offset; + + memcpy(prev_dst, dst, data_size); + printf("\rTEST byte:0x%02lX dst_offset:%ld count:%4ld", + byte, dst_offset, count); + fflush(stdout); + uint64_t res = test_asm_dma_memset_mtrace(p_dst, byte, count, test_trace); + size_t trace_count = test_trace[0]; + if (trace_count < 1) { + printf("\nERROR: invalid trace_count %ld\n", trace_count); + return false; + } + uint64_t _dst = (uint64_t)dst + dst_offset; + if (res != _dst) { + printf("\nERROR: invalid result expected:0x%08lX found:0x%08lX\n", _dst, res); + return false; + } + uint64_t encode = calculate_encode_memset((uint64_t)p_dst, count, byte); + if (mtrace[0] != encode) { + printf("\nERROR: invalid encoded\n"); + print_encode_mismatch(encode, mtrace[0]); + return false; + } + size_t index = 1; + size_t pre_count = (dst_offset > 0 && count > 0) ? 8 - dst_offset : 0; + if (pre_count > count) { + pre_count = count; + } + if (pre_count > 0) { + if (mtrace[index] != prev_dst[0]) { + printf("\nERROR: pre write pre-value expected: dst[0]:0x%016lX found: mtrace[%ld]:%016lX\n", prev_dst[0], index, mtrace[index]); + return false; + } + ++index; + } + + size_t post_count = (count - pre_count) & 0x07; + if (post_count > 0) { + size_t last_dst_index = (dst_offset + count - 1) >> 3; + if (mtrace[index] != prev_dst[last_dst_index]) { + printf("\nERROR: post write pre-value expected: dst[%ld]:0x%016lX vs found mtrace[%ld]:0x%016lX\n", last_dst_index, prev_dst[last_dst_index], index, mtrace[index]); + return false; + } + ++index; + } + if (trace_count != index) { + printf("ERROR: invalid mtrace len expected:%ld vs found:%ld\n", index, trace_count); + size_t _count = index > trace_count ? index + 1: trace_count + 1; + for (size_t i = 0; i <= _count; ++i) { + printf("mtrace[%ld] 0x%016lX\n", i, mtrace[i]); + } + return false; + } + return true; +} + +void test_dma_memset_mtrace() { + printf("\x1B[1;34mTEST DMA MEMSET MTRACE =================================================\x1B[0m\n"); + TestDmaMemSetMtrace test(1024); + test.run(); +} diff --git a/emulator-asm/src/dma/test/test_dma_memset_mtrace.hpp b/emulator-asm/src/dma/test/test_dma_memset_mtrace.hpp new file mode 100644 index 000000000..7c6753c82 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_memset_mtrace.hpp @@ -0,0 +1,6 @@ +#ifndef __TEST_DMA_MEMSET_MTRACE__HPP__ +#define __TEST_DMA_MEMSET_MTRACE__HPP__ + +void test_dma_memset_mtrace(); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_dma_tools.cpp b/emulator-asm/src/dma/test/test_dma_tools.cpp new file mode 100644 index 000000000..3a7b58020 --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_tools.cpp @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "test_dma_tools.hpp" + +uint8_t *fill_pattern(uint8_t *data, size_t count, uint64_t seed) { + std::mt19937_64 rng(seed); + uint64_t *p_data = (uint64_t *)data; + + size_t count64 = count >> 3; + for (size_t i = 0; i < count64; ++i) { + p_data[i] = rng(); + } + size_t count_bytes = count & 0x07; + if (count_bytes > 0) { + uint64_t value = rng(); + uint8_t *value_bytes = (uint8_t *)&value; + uint8_t *bytes = data + (count64 << 3); + for (size_t i = 0; i < count_bytes; ++i) { + bytes[i] = value_bytes[i]; + } + + } + return data; +} + +bool check_pattern_slice(uint64_t *data, size_t from, size_t to, uint64_t seed) { + std::mt19937_64 rng(seed); + + for (size_t i = 0; i < to; ++i) { + const uint64_t rvalue = rng(); + if (i < from) continue; + if (data[i] != rvalue) { + return false; + } + } + return true; +} + +bool check_pattern_exclude_slice(uint64_t *data, size_t count, size_t from, size_t to, uint64_t seed) { + std::mt19937_64 rng(seed); + + for (size_t i = 0; i < count; ++i) { + const uint64_t rvalue = rng(); + if (i >= from && i <= to) continue; + if (data[i] != rvalue) { + return false; + } + } + return true; +} + +int create_memcmp_data(uint8_t *dst, uint8_t *src, size_t ef_count, int diff_dst_src) { + if (ef_count == 0) { + return 0; + } + + size_t count = diff_dst_src == 0 ? ef_count : ef_count - 1; + + for (size_t i = 0; i < count; ++i) { + dst[i] = src[i]; + } + if (diff_dst_src > 0) { + if (src[count] == 255) { + src[count] = 254; + dst[count] = 255; + return 1; + } + if (diff_dst_src > (255 - (int)src[count])) { + dst[count] = 255; + return 255 - (int)src[count]; + } + dst[count] = (int) src[count] + diff_dst_src; + return diff_dst_src; + } + if (diff_dst_src < 0) { + if (src[count] == 0) { + src[count] = 1; + dst[count] = 0; + return -1; + } + if (diff_dst_src < (0 - (int)src[count])) { + dst[count] = 0; + return 0 - (int)src[count]; + } + dst[count] = (int) src[count] + diff_dst_src; + return diff_dst_src; + } + return 0; +} diff --git a/emulator-asm/src/dma/test/test_dma_tools.hpp b/emulator-asm/src/dma/test/test_dma_tools.hpp new file mode 100644 index 000000000..26fb68faf --- /dev/null +++ b/emulator-asm/src/dma/test/test_dma_tools.hpp @@ -0,0 +1,15 @@ +#ifndef __TEST_DMA_TOOLS__HPP__ +#define __TEST_DMA_TOOLS__HPP__ + +#include +#include +#include + +#include "test_dma_tools.hpp" + +uint8_t *fill_pattern(uint8_t *data, size_t count, uint64_t seed); +bool check_pattern_slice(uint64_t *data, size_t from, size_t to, uint64_t seed); +bool check_pattern_exclude_slice(uint64_t *data, size_t count, size_t from, size_t to, uint64_t seed); +int create_memcmp_data(uint8_t *dst, uint8_t *src, size_t ef_count, int diff_dst_src); + +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test/test_mock.cpp b/emulator-asm/src/dma/test/test_mock.cpp new file mode 100644 index 000000000..d5edbd569 --- /dev/null +++ b/emulator-asm/src/dma/test/test_mock.cpp @@ -0,0 +1,16 @@ +#include +#include + +#include "test_mock.hpp" + +extern "C" { + uint64_t trace_address_threshold = 0; + uint64_t fcall_ctx[FCALL_CTX_LENGTH]; + uint64_t MEM_FREE_INPUT = 0; +} + +extern "C" void _realloc_trace(void) { + +} + + diff --git a/emulator-asm/src/dma/test/test_mock.hpp b/emulator-asm/src/dma/test/test_mock.hpp new file mode 100644 index 000000000..b6319cf80 --- /dev/null +++ b/emulator-asm/src/dma/test/test_mock.hpp @@ -0,0 +1,26 @@ +#ifndef __TEST_MOCK__HPP__ +#define __TEST_MOCK__HPP__ + +#include +#include + +#define FCALL_PARAMS_LENGTH 386 +#define FCALL_RESULT_LENGTH 8193 +#define FCALL_FUNCTION_ID 0 +#define FCALL_PARAMS_CAPACITY (FCALL_FUNCTION_ID + 1) +#define FCALL_PARAMS_SIZE (FCALL_PARAMS_CAPACITY + 1) +#define FCALL_PARAMS (FCALL_PARAMS_SIZE + 1) +#define FCALL_RESULT_CAPACITY (FCALL_PARAMS + FCALL_PARAMS_LENGTH) +#define FCALL_RESULT_SIZE (FCALL_RESULT_CAPACITY + 1) +#define FCALL_RESULT (FCALL_RESULT_SIZE + 1) // 391 +#define FCALL_RESULT_GOT (FCALL_RESULT + FCALL_RESULT_LENGTH) // 8584 +#define FCALL_CTX_LENGTH (FCALL_RESULT_GOT + 1) // 8585 + +extern "C" { + extern uint64_t trace_address_threshold; + extern uint64_t fcall_ctx[FCALL_CTX_LENGTH]; + extern uint64_t MEM_FREE_INPUT; +} + +extern "C" void _realloc_trace(void); +#endif \ No newline at end of file diff --git a/emulator-asm/src/dma/test_dma_api.asm b/emulator-asm/src/dma/test_dma_api.asm new file mode 100644 index 000000000..8ffbd6429 --- /dev/null +++ b/emulator-asm/src/dma/test_dma_api.asm @@ -0,0 +1,75 @@ +.intel_syntax noprefix +.code64 + +.section .text + +.macro ABI_WRAPPER abi_call asm_call +.global \abi_call +.extern \asm_call + +\abi_call: + push r12 # 1 cycle - save callee-saved register + push r13 # 1 cycle - save callee-saved register + push r9 # 1 cycle - save caller-saved register (used internally) + push rbx # 1 cycle - save callee-saved register + + mov r12, rcx # 1 cycle - setup mtrace address from count parameter + mov r13, 1 # 1 cycle - initialize mtrace index to 1, first position for count + call \asm_call # ~3 cycles + function cost + + dec r13 + mov [r12], r13 # store in first position the length + pop rbx # 1 cycle - restore register + pop r9 # 1 cycle - restore register + pop r13 # 1 cycle - restore register + pop r12 # 1 cycle - restore register + + ret +.endm + +# [memcpy] +# PARAMETERS (System V AMD64 ABI): +# rdi = dst +# rsi = src +# rdx = count +# rcx = mtrace_ptr +# RETURN: rax = dst + +ABI_WRAPPER test_asm_dma_memcpy_mops direct_dma_memcpy_mops +ABI_WRAPPER test_asm_dma_memcpy_mtrace direct_dma_memcpy_mtrace + +# [memcmp] +# PARAMETERS (System V AMD64 ABI): +# rdi = dst +# rsi = src +# rdx = count +# rcx = mtrace_ptr +# RETURN: rax = result compare + +ABI_WRAPPER test_asm_dma_memcmp_mops direct_dma_memcmp_mops +ABI_WRAPPER test_asm_dma_memcmp_mtrace direct_dma_memcmp_mtrace + +# [memset] +# PARAMETERS (System V AMD64 ABI): +# rdi = dst +# rsi = byte +# rdx = count +# rcx = mtrace_ptr +# RETURN: rax = result compare + + +ABI_WRAPPER test_asm_dma_memset_mops direct_dma_xmemset_mops +ABI_WRAPPER test_asm_dma_memset_mtrace direct_dma_xmemset_mtrace + +# [inputcpy] +# PARAMETERS (System V AMD64 ABI): +# rdi = dst +# rsi = 0 +# rdx = count +# rcx = mtrace_ptr +# RETURN: rax = result compare + +ABI_WRAPPER test_asm_dma_inputcpy_mops direct_dma_inputcpy_mops +ABI_WRAPPER test_asm_dma_inputcpy_mtrace direct_dma_inputcpy_mtrace + +.section .note.GNU-stack,"",%progbits diff --git a/emulator-asm/src/emu.c b/emulator-asm/src/emu.c index ba92f3800..f61338ae7 100644 --- a/emulator-asm/src/emu.c +++ b/emulator-asm/src/emu.c @@ -10,11 +10,14 @@ #include "emu.hpp" #include "../../lib-c/c/src/bigint/add256.hpp" #include "../../lib-c/c/src/ec/ec.hpp" +#include "../../lib-c/c/src/secp256r1/secp256r1.hpp" #include "../../lib-c/c/src/fcall/fcall.hpp" #include "../../lib-c/c/src/arith256/arith256.hpp" #include "../../lib-c/c/src/arith384/arith384.hpp" #include "../../lib-c/c/src/bn254/bn254.hpp" #include "../../lib-c/c/src/bls12_381/bls12_381.hpp" +#include "../../lib-c/c/src/poseidon2/poseidon2_goldilocks.hpp" +#include "../../lib-c/c/src/blake2/blake2.hpp" #include "bcon/bcon_sha256.hpp" extern void zisk_sha256(uint64_t state[4], uint64_t input[8]); @@ -37,6 +40,10 @@ void reset_asm_call_metrics (void) asm_call_metrics.keccak_duration = 0; asm_call_metrics.sha256_counter = 0; asm_call_metrics.sha256_duration = 0; + asm_call_metrics.blake2_counter = 0; + asm_call_metrics.blake2_duration = 0; + asm_call_metrics.poseidon2_counter = 0; + asm_call_metrics.poseidon2_duration = 0; asm_call_metrics.arith256_counter = 0; asm_call_metrics.arith256_duration = 0; asm_call_metrics.arith256_mod_counter = 0; @@ -45,6 +52,10 @@ void reset_asm_call_metrics (void) asm_call_metrics.secp256k1_add_duration = 0; asm_call_metrics.secp256k1_dbl_counter = 0; asm_call_metrics.secp256k1_dbl_duration = 0; + asm_call_metrics.secp256r1_add_counter = 0; + asm_call_metrics.secp256r1_add_duration = 0; + asm_call_metrics.secp256r1_dbl_counter = 0; + asm_call_metrics.secp256r1_dbl_duration = 0; asm_call_metrics.fcall_counter = 0; asm_call_metrics.fcall_duration = 0; asm_call_metrics.inverse_fp_ec_counter = 0; @@ -103,6 +114,26 @@ void print_asm_call_metrics (uint64_t total_duration) duration, percentage); + // Print blake2 metrics + percentage = total_duration == 0 ? 0 : (asm_call_metrics.blake2_duration * 1000) / total_duration; + duration = asm_call_metrics.blake2_counter == 0 ? 0 : (asm_call_metrics.blake2_duration * 1000) / asm_call_metrics.blake2_counter; + asm_call_total_duration += asm_call_metrics.blake2_duration; + printf("Blake2: counter = %lu, duration = %lu us, single duration = %lu ns, per thousand = %lu \n", + asm_call_metrics.blake2_counter, + asm_call_metrics.blake2_duration, + duration, + percentage); + + // Print poseidon2 metrics + percentage = total_duration == 0 ? 0 : (asm_call_metrics.poseidon2_duration * 1000) / total_duration; + duration = asm_call_metrics.poseidon2_counter == 0 ? 0 : (asm_call_metrics.poseidon2_duration * 1000) / asm_call_metrics.poseidon2_counter; + asm_call_total_duration += asm_call_metrics.poseidon2_duration; + printf("Poseidon2: counter = %lu, duration = %lu us, single duration = %lu ns, per thousand = %lu \n", + asm_call_metrics.poseidon2_counter, + asm_call_metrics.poseidon2_duration, + duration, + percentage); + // Print arith256 metrics percentage = total_duration == 0 ? 0 : (asm_call_metrics.arith256_duration * 1000) / total_duration; duration = asm_call_metrics.arith256_counter == 0 ? 0 : (asm_call_metrics.arith256_duration * 1000) / asm_call_metrics.arith256_counter; @@ -143,6 +174,26 @@ void print_asm_call_metrics (uint64_t total_duration) duration, percentage); + // Print secp256r1_add metrics + percentage = total_duration == 0 ? 0 : (asm_call_metrics.secp256r1_add_duration * 1000) / total_duration; + duration = asm_call_metrics.secp256r1_add_counter == 0 ? 0 : (asm_call_metrics.secp256r1_add_duration * 1000) / asm_call_metrics.secp256r1_add_counter; + asm_call_total_duration += asm_call_metrics.secp256r1_add_duration; + printf("secp256r1_add: counter = %lu, duration = %lu us, single duration = %lu ns, per thousand = %lu \n", + asm_call_metrics.secp256r1_add_counter, + asm_call_metrics.secp256r1_add_duration, + duration, + percentage); + + // Print secp256r1_dbl metrics + percentage = total_duration == 0 ? 0 : (asm_call_metrics.secp256r1_dbl_duration * 1000) / total_duration; + duration = asm_call_metrics.secp256r1_dbl_counter == 0 ? 0 : (asm_call_metrics.secp256r1_dbl_duration * 1000) / asm_call_metrics.secp256r1_dbl_counter; + asm_call_total_duration += asm_call_metrics.secp256r1_dbl_duration; + printf("secp256r1_dbl: counter = %lu, duration = %lu us, single duration = %lu ns, per thousand = %lu \n", + asm_call_metrics.secp256r1_dbl_counter, + asm_call_metrics.secp256r1_dbl_duration, + duration, + percentage); + // Print fcall metrics percentage = total_duration == 0 ? 0 : (asm_call_metrics.fcall_duration * 1000) / total_duration; duration = asm_call_metrics.fcall_counter == 0 ? 0 : (asm_call_metrics.fcall_duration * 1000) / asm_call_metrics.fcall_counter; @@ -347,12 +398,23 @@ void precompile_cache_cleanup(void) precompile_cache_loading = false; } +// #define ASM_PRECOMPILE_CACHE_DEBUG +#ifdef ASM_PRECOMPILE_CACHE_DEBUG +uint64_t total_precompile_cache_size = 0; +uint64_t total_precompile_cache_counter = 0; +#endif void precompile_cache_store( uint8_t* data, uint64_t size) { assert(precompile_file != NULL); assert(precompile_cache_storing == true); fwrite(data, 1, size, precompile_file); fflush(precompile_file); +#ifdef ASM_PRECOMPILE_CACHE_DEBUG + uint64_t previous_total_precompile_cache_size = total_precompile_cache_size; + total_precompile_cache_size += size; + total_precompile_cache_counter++; + printf("precompile_cache_store() Stored %lu bytes at pos=%lu file_size=%lu total_precompile_cache_size=%lu total_precompile_cache_counter=%lu\n", size, previous_total_precompile_cache_size, ftell(precompile_file), total_precompile_cache_size, total_precompile_cache_counter); +#endif } void precompile_cache_load( uint8_t* data, uint64_t size) @@ -364,6 +426,11 @@ void precompile_cache_load( uint8_t* data, uint64_t size) printf("precompile_cache_load() Error reading file %s read_size=%lu expected size=%lu pos=%lu\n", precompile_cache_filename, read_size, size, ftell(precompile_file)); exit(-1); } +#ifdef ASM_PRECOMPILE_CACHE_DEBUG + total_precompile_cache_size += size; + total_precompile_cache_counter++; + printf("precompile_cache_load() Loaded %lu bytes at pos=%lu total_precompile_cache_size=%lu total_precompile_cache_counter=%lu\n", size, ftell(precompile_file), total_precompile_cache_size, total_precompile_cache_counter); +#endif } #endif @@ -445,9 +512,17 @@ extern int _opcode_keccak(uint64_t address) #endif #ifdef DEBUG #ifdef ASM_CALL_METRICS - if (emu_verbose) printf("opcode_keccak() calling KeccakF1600() counter=%lu address=%08lx\n", asm_call_metrics.keccak_counter, address); + if (emu_verbose) printf("opcode_keccak() calling keccakf1600_generic() counter=%lu address=%08lx\n", asm_call_metrics.keccak_counter, address); #else - if (emu_verbose) printf("opcode_keccak() calling KeccakF1600() address=%08lx\n", address); + if (emu_verbose) + { + printf("opcode_keccak() calling keccakf1600_generic() address=%08lx\n", address); + for (uint64_t i=0; i<200; i++) + { + printf("%02x", ((uint8_t *)(uintptr_t)address)[i]); + } + printf("\n"); + } #endif #endif @@ -470,7 +545,15 @@ extern int _opcode_keccak(uint64_t address) #endif #ifdef DEBUG - if (emu_verbose) printf("opcode_keccak() called KeccakF1600()\n"); + if (emu_verbose) + { + printf("opcode_keccak() called keccakf1600_generic()\n"); + for (uint64_t i=0; i<200; i++) + { + printf("%02x", ((uint8_t *)(uintptr_t)address)[i]); + } + printf("\n"); + } #endif #ifdef ASM_CALL_METRICS asm_call_metrics.keccak_counter++; @@ -487,9 +570,9 @@ extern int _opcode_sha256(uint64_t * address) #endif #ifdef DEBUG #ifdef ASM_CALL_METRICS - if (emu_verbose) printf("opcode_sha256() calling sha256_transform_2() counter=%lu address=%p\n", asm_call_metrics.sha256_counter, address); + if (emu_verbose) printf("opcode_sha256() calling zisk_sha256() counter=%lu address=%p\n", asm_call_metrics.sha256_counter, address); #else - if (emu_verbose) printf("opcode_sha256() calling sha256_transform_2() address=%p\n", address); + if (emu_verbose) printf("opcode_sha256() calling zisk_sha256() address=%p\n", address); #endif #endif @@ -508,11 +591,11 @@ extern int _opcode_sha256(uint64_t * address) { // Load result from cache precompile_cache_load((uint8_t *)address[0], 4*8); - } + } #endif #ifdef DEBUG - if (emu_verbose) printf("opcode_sha256() called sha256_transform_2()\n"); + if (emu_verbose) printf("opcode_sha256() called zisk_sha256()\n"); #endif #ifdef ASM_CALL_METRICS asm_call_metrics.sha256_counter++; @@ -522,6 +605,90 @@ extern int _opcode_sha256(uint64_t * address) return 0; } +extern int _opcode_blake2(uint64_t * address) +{ +#ifdef ASM_CALL_METRICS + gettimeofday(&asm_call_start, NULL); +#endif +#ifdef DEBUG +#ifdef ASM_CALL_METRICS + if (emu_verbose) printf("opcode_blake2() calling blake2b() counter=%lu address=%p\n", asm_call_metrics.blake2_counter, address); +#else + if (emu_verbose) printf("opcode_blake2() calling blake2b() address=%p\n", address); +#endif +#endif + +#ifdef ASM_PRECOMPILE_CACHE + if (precompile_cache_storing) + { +#endif + // Call blake2b compression function + blake2b_round((uint64_t *)address[1], (uint64_t *)address[2], address[0]); + +#ifdef ASM_PRECOMPILE_CACHE + // Store result in cache + precompile_cache_store((uint8_t *)address[1], 16*8); + } + else if (precompile_cache_loading) + { + // Load result from cache + precompile_cache_load((uint8_t *)address[1], 16*8); + } +#endif + +#ifdef DEBUG + if (emu_verbose) printf("opcode_blake2() called blake2b()\n"); +#endif +#ifdef ASM_CALL_METRICS + asm_call_metrics.blake2_counter++; + gettimeofday(&asm_call_stop, NULL); + asm_call_metrics.blake2_duration += TimeDiff(asm_call_start, asm_call_stop); +#endif + return 0; +} + +extern int _opcode_poseidon2(uint64_t address) +{ +#ifdef ASM_CALL_METRICS + gettimeofday(&asm_call_start, NULL); +#endif +#ifdef DEBUG +#ifdef ASM_CALL_METRICS + if (emu_verbose) printf("opcode_poseidon2() calling poseidon2_hash() counter=%lu address=%08lx\n", asm_call_metrics.poseidon2_counter, address); +#else + if (emu_verbose) printf("opcode_poseidon2() calling poseidon2_hash() address=%08lx\n", address); +#endif +#endif + +#ifdef ASM_PRECOMPILE_CACHE + if (precompile_cache_storing) + { +#endif + // Call poseidon2 compression function + poseidon2_hash((uint64_t *)address); + +#ifdef ASM_PRECOMPILE_CACHE + // Store result in cache + precompile_cache_store((uint8_t *)address, 16*8); + } + else if (precompile_cache_loading) + { + // Load result from cache + precompile_cache_load((uint8_t *)address, 16*8); + } +#endif + +#ifdef DEBUG + if (emu_verbose) printf("opcode_poseidon2() called poseidon2_hash()\n"); +#endif +#ifdef ASM_CALL_METRICS + asm_call_metrics.poseidon2_counter++; + gettimeofday(&asm_call_stop, NULL); + asm_call_metrics.poseidon2_duration += TimeDiff(asm_call_start, asm_call_stop); +#endif + return 0; +} + extern int _opcode_arith256(uint64_t * address) { #ifdef ASM_CALL_METRICS @@ -542,9 +709,9 @@ extern int _opcode_arith256(uint64_t * address) #else printf("opcode_arith256() calling Arith256() address=%p\n", address); #endif - printf("a = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", a[3], a[2], a[1], a[0], a[3], a[2], a[1], a[0]); - printf("b = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", b[3], b[2], b[1], b[0], b[3], b[2], b[1], b[0]); - printf("c = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0], c[3], c[2], c[1], c[0]); + printf("a = %lx:%lx:%lx:%lx\n", a[3], a[2], a[1], a[0]); + printf("b = %lx:%lx:%lx:%lx\n", b[3], b[2], b[1], b[0]); + printf("c = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0]); } #endif @@ -577,8 +744,8 @@ extern int _opcode_arith256(uint64_t * address) if (emu_verbose) printf("opcode_arith256() called Arith256()\n"); if (emu_verbose) { - printf("dl = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", dl[3], dl[2], dl[1], dl[0], dl[3], dl[2], dl[1], dl[0]); - printf("dh = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", dh[3], dh[2], dh[1], dh[0], dh[3], dh[2], dh[1], dh[0]); + printf("dl = %lx:%lx:%lx:%lx\n", dl[3], dl[2], dl[1], dl[0]); + printf("dh = %lx:%lx:%lx:%lx\n", dh[3], dh[2], dh[1], dh[0]); } #endif #ifdef ASM_CALL_METRICS @@ -609,10 +776,10 @@ extern int _opcode_arith256_mod(uint64_t * address) #else printf("opcode_arith256_mod() calling Arith256Mod() address=%p\n", address); #endif - printf("a = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", a[3], a[2], a[1], a[0], a[3], a[2], a[1], a[0]); - printf("b = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", b[3], b[2], b[1], b[0], b[3], b[2], b[1], b[0]); - printf("c = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0], c[3], c[2], c[1], c[0]); - printf("module = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", module[3], module[2], module[1], module[0], module[3], module[2], module[1], module[0]); + printf("a = %lx:%lx:%lx:%lx\n", a[3], a[2], a[1], a[0]); + printf("b = %lx:%lx:%lx:%lx\n", b[3], b[2], b[1], b[0]); + printf("c = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0]); + printf("module = %lx:%lx:%lx:%lx\n", module[3], module[2], module[1], module[0]); } #endif @@ -643,7 +810,7 @@ extern int _opcode_arith256_mod(uint64_t * address) if (emu_verbose) printf("opcode_arith256_mod() called Arith256Mod()\n"); if (emu_verbose) { - printf("d = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", d[3], d[2], d[1], d[0], d[3], d[2], d[1], d[0]); + printf("d = %lx:%lx:%lx:%lx\n", d[3], d[2], d[1], d[0]); } #endif #ifdef ASM_CALL_METRICS @@ -674,10 +841,10 @@ extern int _opcode_arith384_mod(uint64_t * address) #else printf("opcode_arith384_mod() calling Arith384Mod() address=%p\n", address); #endif - printf("a = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", a[5], a[4], a[3], a[2], a[1], a[0], a[5], a[4], a[3], a[2], a[1], a[0]); - printf("b = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", b[5], b[4], b[3], b[2], b[1], b[0], b[5], b[4], b[3], b[2], b[1], b[0]); - printf("c = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", c[5], c[4], c[3], c[2], c[1], c[0], c[5], c[4], c[3], c[2], c[1], c[0]); - printf("module = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", module[5], module[4], module[3], module[2], module[1], module[0], module[5], module[4], module[3], module[2], module[1], module[0]); + printf("a = %lx:%lx:%lx:%lx:%lx:%lx\n", a[5], a[4], a[3], a[2], a[1], a[0]); + printf("b = %lx:%lx:%lx:%lx:%lx:%lx\n", b[5], b[4], b[3], b[2], b[1], b[0]); + printf("c = %lx:%lx:%lx:%lx:%lx:%lx\n", c[5], c[4], c[3], c[2], c[1], c[0]); + printf("module = %lx:%lx:%lx:%lx:%lx:%lx\n", module[5], module[4], module[3], module[2], module[1], module[0]); } #endif @@ -708,7 +875,7 @@ extern int _opcode_arith384_mod(uint64_t * address) if (emu_verbose) printf("opcode_arith384_mod() called Arith384Mod()\n"); if (emu_verbose) { - printf("d = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", d[5], d[4], d[3], d[2], d[1], d[0], d[5], d[4], d[3], d[2], d[1], d[0]); + printf("d = %lx:%lx:%lx:%lx:%lx:%lx\n", d[5], d[4], d[3], d[2], d[1], d[0]); } #endif #ifdef ASM_CALL_METRICS @@ -735,10 +902,10 @@ extern int _opcode_secp256k1_add(uint64_t * address) #else printf("opcode_secp256k1_add() calling AddPointEcP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); - printf("p2.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4], p2[7], p2[6], p2[5], p2[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); + printf("p2.x = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4]); } #endif @@ -773,7 +940,8 @@ extern int _opcode_secp256k1_add(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p3 = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); + printf("p3.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p3.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif #ifdef ASM_CALL_METRICS @@ -800,8 +968,8 @@ extern int _opcode_secp256k1_dbl(uint64_t * address) #else printf("opcode_secp256k1_dbl() calling AddPointEcP() address=%p\n", address); #endif - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif @@ -836,8 +1004,8 @@ extern int _opcode_secp256k1_dbl(uint64_t * address) if (emu_verbose) printf("opcode_secp256k1_dbl() called AddPointEcP()\n"); if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif #ifdef ASM_CALL_METRICS @@ -848,6 +1016,135 @@ extern int _opcode_secp256k1_dbl(uint64_t * address) return 0; } +extern int _opcode_secp256r1_add(uint64_t * address) +{ +#ifdef ASM_CALL_METRICS + gettimeofday(&asm_call_start, NULL); +#endif + + uint64_t * p1 = (uint64_t *)address[0]; + uint64_t * p2 = (uint64_t *)address[1]; +#ifdef DEBUG + if (emu_verbose) + { +#ifdef ASM_CALL_METRICS + printf("opcode_secp256r1_add() calling AddPointEcP() counter=%lu address=%p p1_address=%p p2_address=%p\n", asm_call_metrics.secp256r1_add_counter, address, p1, p2); +#else + printf("opcode_secp256r1_add() calling AddPointEcP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); +#endif + printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p2.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0], p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4], p2[7], p2[6], p2[5], p2[4]); + } +#endif + +#ifdef ASM_PRECOMPILE_CACHE + if (precompile_cache_storing) + { +#endif + // Call point addition function + int result = secp256r1_add_point_ecp ( + 0, + p1, // p1 = [x1, y1] = 8x64bits + p2, // p2 = [x2, y2] = 8x64bits + p1 // p3 = [x3, y3] = 8x64bits + ); + if (result != 0) + { + printf("_opcode_secp256r1_add() failed callilng AddPointEcP() result=%d;", result); + exit(-1); + } + +#ifdef ASM_PRECOMPILE_CACHE + // Store result in cache + precompile_cache_store((uint8_t *)p1, 8*8); + } + else if (precompile_cache_loading) + { + // Load result from cache + precompile_cache_load((uint8_t *)p1, 8*8); + } +#endif + +#ifdef DEBUG + if (emu_verbose) + { + printf("p3 = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); + } +#endif +#ifdef ASM_CALL_METRICS + asm_call_metrics.secp256r1_add_counter++; + gettimeofday(&asm_call_stop, NULL); + asm_call_metrics.secp256r1_add_duration += TimeDiff(asm_call_start, asm_call_stop); +#endif + return 0; +} + +extern int _opcode_secp256r1_dbl(uint64_t * address) +{ +#ifdef ASM_CALL_METRICS + gettimeofday(&asm_call_start, NULL); +#endif + + uint64_t * p1 = address; + +#ifdef DEBUG + if (emu_verbose) + { +#ifdef ASM_CALL_METRICS + printf("opcode_secp256r1_dbl() calling AddPointEcP() counter=%lu address=%p\n", asm_call_metrics.secp256r1_dbl_counter, address); +#else + printf("opcode_secp256r1_dbl() calling AddPointEcP() address=%p\n", address); +#endif + printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + } +#endif + +#ifdef ASM_PRECOMPILE_CACHE + if (precompile_cache_storing) + { +#endif + int result = secp256r1_add_point_ecp ( + 1, + p1, // p1 = [x1, y1] = 8x64bits + NULL, // p2 = [x2, y2] = 8x64bits + p1 // p3 = [x3, y3] = 8x64bits + ); + if (result != 0) + { + printf("_opcode_secp256r1_dbl() failed callilng secp256r1_add_point_ecp() result=%d;", result); + exit(-1); + } + +#ifdef ASM_PRECOMPILE_CACHE + // Store result in cache + precompile_cache_store((uint8_t *)p1, 8*8); + } + else if (precompile_cache_loading) + { + // Load result from cache + precompile_cache_load((uint8_t *)p1, 8*8); + } +#endif + +#ifdef DEBUG + if (emu_verbose) printf("opcode_secp256r1_dbl() called secp256r1_add_point_ecp()\n"); + if (emu_verbose) + { + printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + } +#endif +#ifdef ASM_CALL_METRICS + asm_call_metrics.secp256r1_dbl_counter++; + gettimeofday(&asm_call_stop, NULL); + asm_call_metrics.secp256r1_dbl_duration += TimeDiff(asm_call_start, asm_call_stop); +#endif + return 0; +} + extern uint64_t MEM_TRACE_ADDRESS; extern uint64_t fcall_ctx; uint64_t print_fcall_ctx_counter = 0; @@ -880,12 +1177,21 @@ extern int _opcode_fcall(struct FcallContext * ctx) #endif #ifdef DEBUG #ifdef ASM_CALL_METRICS - if (emu_verbose) printf("_opcode_fcall() counter=%lu\n", asm_call_metrics.fcall_counter); + if (emu_verbose) printf("_opcode_fcall(%lu) counter=%lu\n", ctx->function_id, asm_call_metrics.fcall_counter); #else - if (emu_verbose) printf("_opcode_fcall()\n"); + if (emu_verbose) printf("_opcode_fcall(%lu)\n", ctx->function_id); #endif + if (emu_verbose) + { + printf("_opcode_fcall() calling Fcall() with params_size=%lu\n", ctx->params_size); + printf("params="); + for (uint64_t i=0; iparams_size; i++) + { + printf("%lx ", ctx->params[i]); + } + printf("\n"); + } #endif - int iresult; #ifdef ASM_PRECOMPILE_CACHE @@ -902,17 +1208,30 @@ extern int _opcode_fcall(struct FcallContext * ctx) #ifdef ASM_PRECOMPILE_CACHE // Store result in cache - precompile_cache_store((uint8_t *)&ctx->result_size, 8*8); + precompile_cache_store((uint8_t *)&ctx->result_size, 1*8); precompile_cache_store((uint8_t *)&ctx->result, ctx->result_size*8); } else if (precompile_cache_loading) { // Load result from cache - precompile_cache_load((uint8_t *)&ctx->result_size, 8*8); + precompile_cache_load((uint8_t *)&ctx->result_size, 1*8); precompile_cache_load((uint8_t *)&ctx->result, ctx->result_size*8); } #endif +#ifdef DEBUG + if (emu_verbose) + { + printf("_opcode_fcall() called Fcall() and got result_size=%lu\n", ctx->result_size); + printf("results="); + for (uint64_t i=0; iresult_size; i++) + { + printf("%lx ", ctx->result[i]); + } + printf("\n"); + } +#endif + #ifdef ASM_CALL_METRICS asm_call_metrics.fcall_counter++; gettimeofday(&asm_call_stop, NULL); @@ -921,6 +1240,7 @@ extern int _opcode_fcall(struct FcallContext * ctx) return iresult; } +/* extern int _opcode_inverse_fp_ec(uint64_t params, uint64_t result) { #ifdef ASM_CALL_METRICS @@ -1062,6 +1382,7 @@ extern int _opcode_sqrt_fp_ec_parity(uint64_t params, uint64_t result) #endif return 0; } +*/ /*********/ /* BN254 */ @@ -1079,14 +1400,14 @@ extern int _opcode_bn254_curve_add(uint64_t * address) if (emu_verbose) { #ifdef ASM_CALL_METRICS - printf("_opcode_bn254_curve_add() calling AddPointEcP() counter=%lu address=%p p1_address=%p p2_address=%p\n", asm_call_metrics.bn254_curve_add_counter, address, p1, p2); + printf("_opcode_bn254_curve_add() calling BN254CurveAddP() counter=%lu address=%p p1_address=%p p2_address=%p\n", asm_call_metrics.bn254_curve_add_counter, address, p1, p2); #else - printf("_opcode_bn254_curve_add() calling AddPointEcP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); + printf("_opcode_bn254_curve_add() calling BN254CurveAddP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); - printf("p2.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4], p2[7], p2[6], p2[5], p2[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); + printf("p2.x = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4]); } #endif @@ -1120,8 +1441,8 @@ extern int _opcode_bn254_curve_add(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif #ifdef ASM_CALL_METRICS @@ -1147,8 +1468,8 @@ extern int _opcode_bn254_curve_dbl(uint64_t * address) #else printf("_opcode_bn254_curve_dbl() calling BN254CurveDblP() address=%p p1_address=%p\n", address, p1); #endif - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif @@ -1181,8 +1502,8 @@ extern int _opcode_bn254_curve_dbl(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif #ifdef ASM_CALL_METRICS @@ -1209,10 +1530,10 @@ extern int _opcode_bn254_complex_add(uint64_t * address) #else printf("_opcode_bn254_complex_add() calling BN254ComplexAddP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); - printf("p2.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4], p2[7], p2[6], p2[5], p2[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); + printf("p2.x = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4]); } #endif @@ -1246,8 +1567,8 @@ extern int _opcode_bn254_complex_add(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif #ifdef ASM_CALL_METRICS @@ -1274,10 +1595,10 @@ extern int _opcode_bn254_complex_sub(uint64_t * address) #else printf("_opcode_bn254_complex_sub() calling BN254ComplexSubP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); - printf("p2.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4], p2[7], p2[6], p2[5], p2[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); + printf("p2.x = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4]); } #endif @@ -1311,8 +1632,8 @@ extern int _opcode_bn254_complex_sub(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif #ifdef ASM_CALL_METRICS @@ -1339,10 +1660,10 @@ extern int _opcode_bn254_complex_mul(uint64_t * address) #else printf("_opcode_bn254_complex_mul() calling BN254ComplexMulP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); - printf("p2.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4], p2[7], p2[6], p2[5], p2[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); + printf("p2.x = %lx:%lx:%lx:%lx\n", p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx\n", p2[7], p2[6], p2[5], p2[4]); } #endif @@ -1376,8 +1697,8 @@ extern int _opcode_bn254_complex_mul(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4], p1[7], p1[6], p1[5], p1[4]); + printf("p1.x = %lx:%lx:%lx:%lx\n", p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx\n", p1[7], p1[6], p1[5], p1[4]); } #endif #ifdef ASM_CALL_METRICS @@ -1404,14 +1725,14 @@ extern int _opcode_bls12_381_curve_add(uint64_t * address) if (emu_verbose) { #ifdef ASM_CALL_METRICS - printf("_opcode_bls12_381_curve_add() calling AddPointEcP() counter=%lu address=%p p1_address=%p p2_address=%p\n", asm_call_metrics.bl12_381_curve_add_counter, address, p1, p2); + printf("_opcode_bls12_381_curve_add() calling BLS12_381CurveAddP() counter=%lu address=%p p1_address=%p p2_address=%p\n", asm_call_metrics.bl12_381_curve_add_counter, address, p1, p2); #else - printf("_opcode_bls12_381_curve_add() calling AddPointEcP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); + printf("_opcode_bls12_381_curve_add() calling BLS12_381CurveAddP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); - printf("p2.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0], p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6], p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p2.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); } #endif @@ -1445,8 +1766,8 @@ extern int _opcode_bls12_381_curve_add(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); } #endif #ifdef ASM_CALL_METRICS @@ -1472,8 +1793,8 @@ extern int _opcode_bls12_381_curve_dbl(uint64_t * address) #else printf("_opcode_bls12_381_curve_dbl() calling BLS12_381CurveDblP() address=%p p1_address=%p\n", address, p1); #endif - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); } #endif @@ -1506,8 +1827,8 @@ extern int _opcode_bls12_381_curve_dbl(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); } #endif #ifdef ASM_CALL_METRICS @@ -1534,10 +1855,10 @@ extern int _opcode_bls12_381_complex_add(uint64_t * address) #else printf("_opcode_bls12_381_complex_add() calling BLS12_381ComplexAddP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); - printf("p2.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0], p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6], p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p2.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); } #endif @@ -1571,8 +1892,8 @@ extern int _opcode_bls12_381_complex_add(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); } #endif #ifdef ASM_CALL_METRICS @@ -1599,10 +1920,10 @@ extern int _opcode_bls12_381_complex_sub(uint64_t * address) #else printf("_opcode_bls12_381_complex_sub() calling BLS12_381ComplexSubP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); - printf("p2.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0], p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6], p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p2.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); } #endif @@ -1636,8 +1957,8 @@ extern int _opcode_bls12_381_complex_sub(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); } #endif #ifdef ASM_CALL_METRICS @@ -1664,10 +1985,10 @@ extern int _opcode_bls12_381_complex_mul(uint64_t * address) #else printf("_opcode_bls12_381_complex_mul() calling BLS12_381ComplexMulP() address=%p p1_address=%p p2_address=%p\n", address, p1, p2); #endif - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); - printf("p2.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0], p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); - printf("p2.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6], p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p2.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[5], p2[4], p2[3], p2[2], p2[1], p2[0]); + printf("p2.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p2[11], p2[10], p2[9], p2[8], p2[7], p2[6]); } #endif @@ -1701,8 +2022,8 @@ extern int _opcode_bls12_381_complex_mul(uint64_t * address) #ifdef DEBUG if (emu_verbose) { - printf("p1.x = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0], p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); - printf("p1.y = %lu:%lu:%lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6], p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); + printf("p1.x = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[5], p1[4], p1[3], p1[2], p1[1], p1[0]); + printf("p1.y = %lx:%lx:%lx:%lx:%lx:%lx\n", p1[11], p1[10], p1[9], p1[8], p1[7], p1[6]); } #endif #ifdef ASM_CALL_METRICS @@ -1733,33 +2054,38 @@ extern uint64_t _opcode_add256(uint64_t * address) #else printf("opcode_add256() calling Add256() address=%p\n", address); #endif - printf("a = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", a[3], a[2], a[1], a[0], a[3], a[2], a[1], a[0]); - printf("b = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", b[3], b[2], b[1], b[0], b[3], b[2], b[1], b[0]); - printf("c = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0], c[3], c[2], c[1], c[0]); + printf("a = %lx:%lx:%lx:%lx\n", a[3], a[2], a[1], a[0]); + printf("b = %lx:%lx:%lx:%lx\n", b[3], b[2], b[1], b[0]); + printf("c = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0]); } #endif + + uint64_t cout = 0; + #ifdef ASM_PRECOMPILE_CACHE if (precompile_cache_storing) { #endif - // cout = [0,1] ok, cout < 0 error - int cout = Add256 (a, b, cin, c); - if (cout < 0) - { - printf("_opcode_add256() failed callilng Add256() cout=%d;", cout); - exit(-1); - } + // cout = [0,1] ok, cout < 0 error + int icout = Add256 (a, b, cin, c); + if (icout < 0) + { + printf("_opcode_add256() failed callilng Add256() cout=%d;", icout); + exit(-1); + } + cout = (uint64_t)icout; + #ifdef ASM_PRECOMPILE_CACHE // Store result in cache precompile_cache_store((uint8_t *)c, 4*8); - precompile_cache_store((uint8_t *)cout, 8); + precompile_cache_store((uint8_t *)&cout, 8); } else if (precompile_cache_loading) { // Load result from cache - precompile_cache_load((uint8_t *)cout, 8); precompile_cache_load((uint8_t *)c, 4*8); + precompile_cache_load((uint8_t *)&cout, 8); } #endif @@ -1767,8 +2093,8 @@ extern uint64_t _opcode_add256(uint64_t * address) if (emu_verbose) printf("opcode_add256() called Add256()\n"); if (emu_verbose) { - printf("cout = %u\n", cout); - printf("c = %lu:%lu:%lu:%lu = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0], c[3], c[2], c[1], c[0]); + printf("cout = %lu\n", cout); + printf("c = %lx:%lx:%lx:%lx\n", c[3], c[2], c[1], c[0]); } #endif #ifdef ASM_CALL_METRICS diff --git a/emulator-asm/src/emu.hpp b/emulator-asm/src/emu.hpp index 13c36f04d..057966094 100644 --- a/emulator-asm/src/emu.hpp +++ b/emulator-asm/src/emu.hpp @@ -2,6 +2,9 @@ #define EMU_ASM_HPP #include +#include + +uint64_t TimeDiff(const struct timeval startTime, const struct timeval endTime); #ifdef DEBUG extern bool emu_verbose; @@ -27,6 +30,12 @@ typedef struct { uint64_t sha256_counter; uint64_t sha256_duration; + uint64_t blake2_counter; + uint64_t blake2_duration; + + uint64_t poseidon2_counter; + uint64_t poseidon2_duration; + uint64_t arith256_counter; uint64_t arith256_duration; diff --git a/emulator-asm/src/globals.c b/emulator-asm/src/globals.c new file mode 100644 index 000000000..d6507a034 --- /dev/null +++ b/emulator-asm/src/globals.c @@ -0,0 +1,137 @@ +#define _GNU_SOURCE +#include +#include "constants.hpp" +#include "globals.hpp" + +// Configuration globals, set by arguments +bool output = false; +bool output_riscof = false; +bool silent = false; +bool metrics = false; +bool trace = false; +bool trace_trace = false; +bool verbose = false; +bool save_to_file = false; +bool share_input_shm = false; +bool open_input_shm = false; +char input_file[4096] = {0}; +bool redirect_output_to_file = false; +bool server = false; +bool client = false; +char shm_prefix[MAX_SHM_PREFIX_LENGTH] = {0}; +int map_locked_flag = MAP_LOCKED; +uint64_t chunk_mask = 0x0; +bool do_shutdown = false; +uint64_t number_of_mt_requests = 1; +uint16_t port = 0; +uint64_t chunk_player_address = 0; +bool wait_flag = true; + +char precompile_file_name[4096] = {0}; +char shmem_control_input_name[128] = {0}; +char shmem_control_output_name[128] = {0}; +char shmem_input_name[128] = {0}; +char shmem_output_name[128] = {0}; +char shmem_mt_name[128] = {0}; +char shmem_precompile_name[128] = {0}; +char sem_prec_avail_name[128] = {0}; +char sem_prec_read_name[128] = {0}; +char sem_chunk_done_name[128] = {0}; +char sem_shutdown_done_name[128] = {0}; +char sem_input_avail_name[128] = {0}; +char file_lock_name[128] = {0}; +char log_name[128] = {0}; +bool call_chunk_done = false; + +// Configuration set by assembly code, accessed by C +bool precompile_results_enabled = false; + +// Default generation method, can be overridden by the --gen argument +GenMethod gen_method = Fast; + +// To be used when calculating partial durations +// Time measurements cannot be overlapped +struct timeval start_time; +struct timeval stop_time; +uint64_t duration; + +/*****************/ +/* SHARED MEMORY */ +/*****************/ + +// Input shared memory +int shmem_input_fd = -1; + +// Output trace shared memory +int shmem_output_fd = -1; + +// Input MT trace shared memory +int shmem_mt_fd = -1; + +// Chunk done semaphore: notifies the caller when a new chunk has been processed +sem_t * sem_chunk_done = NULL; + +/**************************/ +/* PRECOMPILE AND CONTROL */ +/**************************/ + +uint64_t * precompile_results_address = NULL; + +// Precompile results shared memory +int shmem_precompile_fd = -1; +void * shmem_precompile_address = NULL; + +// Precompile results semaphores +sem_t * sem_prec_avail = NULL; +sem_t * sem_prec_read = NULL; +sem_t * sem_input_avail = NULL; + +// Control input shared memory +int shmem_control_input_fd = -1; +uint64_t * shmem_control_input_address = NULL; +volatile uint64_t * precompile_written_address = NULL; +volatile uint64_t * precompile_exit_address = NULL; +volatile uint64_t * input_written_address = NULL; + +// Control output shared memory +int shmem_control_output_fd = -1; +uint64_t * shmem_control_output_address = NULL; +volatile uint64_t * precompile_read_address = NULL; +volatile uint64_t * waiting_for_precompile_address = NULL; +volatile uint64_t * waiting_for_input_address = NULL; + +/**************/ +/* TRACE SIZE */ +/**************/ + +uint64_t initial_trace_size = TRACE_INITIAL_SIZE; +uint64_t trace_address = TRACE_ADDR; +uint64_t trace_size = TRACE_INITIAL_SIZE; +uint64_t trace_used_size = 0; +uint64_t trace_address_threshold = TRACE_ADDR + TRACE_INITIAL_SIZE - MAX_CHUNK_TRACE_SIZE; + +// To be used when calculating the assembly duration +uint64_t assembly_duration; + +// Counters used in functions called from assembly code +uint64_t realloc_counter = 0; +uint64_t wait_prec_avail_counter = 0; +uint64_t wait_input_avail_counter = 0; +uint64_t print_pc_counter = 0; + +// Chunk player globals +uint64_t chunk_player_mt_size = TRACE_INITIAL_SIZE; + +// Maximum number of steps to execute, used by the client to limit the execution steps of the +// assembly code. +uint64_t max_steps = (1ULL << 32); + +// Pointers to the input, RAM, ROM and trace memory, used by both C and assembly code to access these memories +uint64_t * pInputTrace = (uint64_t *)TRACE_ADDR; // Used for trace consumption, i.e. chunk player +uint64_t * pOutputTrace = (uint64_t *)TRACE_ADDR; // Used for trace generation, i.e. assembly code writes the trace to this address, and client reads it from this address + +/**************/ +/* CHUNK SIZE */ +/**************/ + +uint64_t chunk_size = CHUNK_SIZE; \ No newline at end of file diff --git a/emulator-asm/src/globals.hpp b/emulator-asm/src/globals.hpp new file mode 100644 index 000000000..0d3f09b87 --- /dev/null +++ b/emulator-asm/src/globals.hpp @@ -0,0 +1,168 @@ +#ifndef EMULATOR_ASM_GLOBALS_HPP +#define EMULATOR_ASM_GLOBALS_HPP + +#include +#include +#include +#include +#include +#include "constants.hpp" + +// Configuration globals, set by arguments +extern bool output; +extern bool output_riscof; +extern bool silent; +extern bool metrics; +extern bool trace; +extern bool trace_trace; +extern bool verbose; +extern bool save_to_file; +extern bool share_input_shm; // Shares input shared memories: input, precompile results and control input, using a common name +extern bool open_input_shm; // Opens existing input shared memories, without creating them. They must be previously created by another process (assembly emulator or witness computation) +extern char input_file[4096]; +extern bool redirect_output_to_file; +extern bool server; // Indicates that this process is a server +extern bool client; // Indicates that this process is a client (used for testing the server) +extern char shm_prefix[MAX_SHM_PREFIX_LENGTH]; // Shared memory prefix +extern int map_locked_flag; // Flag used in mmap to indicate if the physical memory is locked in RAM (MAP_LOCKED) or can be swapped (0). By default it is locked, but it can be unlocked with the -u argument, which can be useful for testing and debugging purposes, e.g. to allow core dumps when the assembly code crashes +extern uint64_t chunk_mask; // ZIP: 0, 1, 2, 3, 4, 5, 6 or 7 +extern bool do_shutdown; // If true, the client will perform a shutdown request to the server when done +extern uint64_t number_of_mt_requests; // Loop to send this number of minimal trace requests +extern uint16_t port; // Service TCP port +extern uint64_t chunk_player_address; // Chunk player address, used for generation methods that use the chunk player, i.e. gen_method=8 or gen_method=10 +extern bool wait_flag; // If true, the shmem will get a flag set to 1 if we are waiting for a semaphore, and set it back to 0 when we are not waiting anymore. This can be used for debugging purposes to know if the assembly code is waiting for a semaphore or not. + +extern char precompile_file_name[4096]; // Precompile results file name (used by client) +extern char shmem_control_input_name[128]; +extern char shmem_control_output_name[128]; +extern char shmem_input_name[128]; +extern char shmem_output_name[128]; +extern char shmem_mt_name[128]; +extern char shmem_precompile_name[128]; +extern char sem_prec_avail_name[128]; +extern char sem_prec_read_name[128]; +extern char sem_chunk_done_name[128]; +extern char sem_shutdown_done_name[128]; +extern char sem_input_avail_name[128]; +extern char file_lock_name[128]; +extern char log_name[128]; +extern bool call_chunk_done; + +// Configuration set by assembly code, accessed by C +extern bool precompile_results_enabled; + +/*********************/ +/* Generation method */ +/*********************/ + +// Specifies how the assembly code generates the trace, and what information it includes. +// It is specified with the mandatory argument --gen= +// It must match the value returned by the assembly function get_gen_method() +// The enum names are equivalent to the rust ones defined in core/src/riscv2zisk.rs as AsmGenerationMethod +// ZisK uses generation methods 1 (minimal trace), 2 (ROM histogram) and 7 (memory operations) +// but the rest of methods can be used for testing and debugging purposes +typedef enum { + Fast = 0, + MinimalTrace = 1, + RomHistogram = 2, + MainTrace = 3, + ChunksOnly = 4, + //BusOp = 5, + Zip = 6, + MemOp = 7, + ChunkPlayerMTCollectMem = 8, + MemReads = 9, + ChunkPlayerMemReadsCollectMain = 10, +} GenMethod; + +// Default generation method, can be overridden by the --gen argument +extern GenMethod gen_method; + +// To be used when calculating partial durations +// Time measurements cannot be overlapped +extern struct timeval start_time; +extern struct timeval stop_time; +extern uint64_t duration; + +/*****************/ +/* SHARED MEMORY */ +/*****************/ + +// Input shared memory +extern int shmem_input_fd; + +// Output trace shared memory +extern int shmem_output_fd; + +// Input MT trace shared memory +extern int shmem_mt_fd; + +// Chunk done semaphore: notifies the caller when a new chunk has been processed +extern sem_t * sem_chunk_done; + +/**************************/ +/* PRECOMPILE AND CONTROL */ +/**************************/ + +extern uint64_t * precompile_results_address; + +// Precompile results shared memory +extern int shmem_precompile_fd; +extern void * shmem_precompile_address; + +// Precompile results semaphores +extern sem_t * sem_prec_avail; +extern sem_t * sem_prec_read; +extern sem_t * sem_input_avail; + +// Control input shared memory +extern int shmem_control_input_fd; +extern uint64_t * shmem_control_input_address; +extern volatile uint64_t * precompile_written_address; +extern volatile uint64_t * precompile_exit_address; +extern volatile uint64_t * input_written_address; + +// Control output shared memory +extern int shmem_control_output_fd; +extern uint64_t * shmem_control_output_address; +extern volatile uint64_t * precompile_read_address; +extern volatile uint64_t * waiting_for_precompile_address; +extern volatile uint64_t * waiting_for_input_address; + +/**************/ +/* TRACE SIZE */ +/**************/ + +extern uint64_t initial_trace_size; +extern uint64_t trace_address; +extern uint64_t trace_size; +extern uint64_t trace_used_size; +extern uint64_t trace_address_threshold; + +// To be used when calculating the assembly duration +extern uint64_t assembly_duration; + +// Counters used in functions called from assembly code +extern uint64_t realloc_counter; +extern uint64_t wait_prec_avail_counter; +extern uint64_t wait_input_avail_counter; +extern uint64_t print_pc_counter; + +// Chunk player globals +extern uint64_t chunk_player_mt_size; + +// Maximum number of steps to execute, used by the client to limit the execution steps of the +// assembly code. +extern uint64_t max_steps; + +// Pointers to the input, RAM, ROM and trace memory, used by both C and assembly code to access these memories +extern uint64_t * pInputTrace; // Used for trace consumption, i.e. chunk player +extern uint64_t * pOutputTrace; // Used for trace generation, i.e. assembly code writes the trace to this address, and client reads it from this address + +/**************/ +/* CHUNK SIZE */ +/**************/ + +extern uint64_t chunk_size; + +#endif // EMULATOR_ASM_GLOBALS_HPP \ No newline at end of file diff --git a/emulator-asm/src/main.c b/emulator-asm/src/main.c index 58226ea5c..a44002c12 100644 --- a/emulator-asm/src/main.c +++ b/emulator-asm/src/main.c @@ -13,171 +13,57 @@ #include #include #include -#include "../../lib-c/c/src/ec/ec.hpp" -#include "../../lib-c/c/src/fcall/fcall.hpp" -#include "../../lib-c/c/src/arith256/arith256.hpp" -#include "emu.hpp" #include #include #include #include #include - -// Assembly-provided functions -void emulator_start(void); -uint64_t get_max_bios_pc(void); -uint64_t get_max_program_pc(void); -uint64_t get_gen_method(void); - -// Address map -#define ROM_ADDR (uint64_t)0x80000000 -#define ROM_SIZE (uint64_t)0x08000000 // 128MB - -#define INPUT_ADDR (uint64_t)0x90000000 -#define MAX_INPUT_SIZE (uint64_t)0x08000000 // 128MB - -#define RAM_ADDR (uint64_t)0xa0000000 -#define RAM_SIZE (uint64_t)0x20000000 // 512MB -#define SYS_ADDR RAM_ADDR -#define SYS_SIZE (uint64_t)0x10000 -#define OUTPUT_ADDR (SYS_ADDR + SYS_SIZE) - -#define TRACE_ADDR (uint64_t)0xc0000000 -#define INITIAL_TRACE_SIZE (uint64_t)0x100000000 // 4GB - -#define REG_ADDR (uint64_t)0x70000000 -#define REG_SIZE (uint64_t)0x1000 // 4kB - -uint8_t * pInput = (uint8_t *)INPUT_ADDR; -uint8_t * pInputLast = (uint8_t *)(INPUT_ADDR + 10440504 - 64); -uint8_t * pRam = (uint8_t *)RAM_ADDR; -uint8_t * pRom = (uint8_t *)ROM_ADDR; -uint64_t * pInputTrace = (uint64_t *)TRACE_ADDR; -uint64_t * pOutputTrace = (uint64_t *)TRACE_ADDR; - -#define TYPE_PING 1 // Ping -#define TYPE_PONG 2 -#define TYPE_MT_REQUEST 3 // Minimal trace -#define TYPE_MT_RESPONSE 4 -#define TYPE_RH_REQUEST 5 // ROM histogram -#define TYPE_RH_RESPONSE 6 -#define TYPE_MO_REQUEST 7 // Memory opcode -#define TYPE_MO_RESPONSE 8 -#define TYPE_MA_REQUEST 9 // Main packed trace -#define TYPE_MA_RESPONSE 10 -#define TYPE_CM_REQUEST 11 // Collect memory trace -#define TYPE_CM_RESPONSE 12 -#define TYPE_FA_REQUEST 13 // Fast mode, do not generate any trace -#define TYPE_FA_RESPONSE 14 -#define TYPE_MR_REQUEST 15 // Mem reads -#define TYPE_MR_RESPONSE 16 -#define TYPE_CA_REQUEST 17 // Collect main trace -#define TYPE_CA_RESPONSE 18 -#define TYPE_SD_REQUEST 1000000 // Shutdown -#define TYPE_SD_RESPONSE 1000001 - -// Generation method -typedef enum { - Fast = 0, - MinimalTrace = 1, - RomHistogram = 2, - MainTrace = 3, - ChunksOnly = 4, - //BusOp = 5, - Zip = 6, - MemOp = 7, - ChunkPlayerMTCollectMem = 8, - MemReads = 9, - ChunkPlayerMemReadsCollectMain = 10, -} GenMethod; -GenMethod gen_method = Fast; - -// Service TCP parameters -#define SERVER_IP "127.0.0.1" // Change to your server IP -uint16_t port = 0; -uint16_t arguments_port = 0; - -// Type of execution -bool server = false; -bool client = false; -bool call_chunk_done = false; -bool do_shutdown = false; // If true, the client will perform a shutdown request to the server when done -uint64_t number_of_mt_requests = 1; // Loop to send this number of minimal trace requests - -char input_file[4096]; - -// To be used when calculating partial durations -// Time measurements cannot be overlapped -struct timeval start_time; -struct timeval stop_time; -uint64_t duration; +#include +#include "constants.hpp" +#include "emu.hpp" +#include "asm_provided.hpp" +#include "globals.hpp" +#include "configuration.hpp" +#include "server.hpp" +#include "client.hpp" +#include "trace.hpp" + +// Returns the acronym of the generation method, used for logging and file naming +const char * gen_method_acronym(GenMethod method) +{ + switch (method) + { + case Fast: return "FT"; + case MinimalTrace: return "MT"; + case RomHistogram: return "RH"; + case MainTrace: return "MA"; + case ChunksOnly: return "CO"; + //case BusOp: return "bus-op"; + case Zip: return "ZP"; + case MemOp: return "MO"; + case ChunkPlayerMTCollectMem: return "CPM"; + case MemReads: return "MR"; + case ChunkPlayerMemReadsCollectMain: return "CPMCM"; + default: return "?"; + } +} // To be used when calculating total duration struct timeval total_start_time; struct timeval total_stop_time; uint64_t total_duration; -// To be used when calculating the assembly duration -uint64_t assembly_duration; - -extern uint64_t MEM_STEP; -extern uint64_t MEM_END; -extern uint64_t MEM_ERROR; -extern uint64_t MEM_TRACE_ADDRESS; -extern uint64_t MEM_CHUNK_ADDRESS; -extern uint64_t MEM_CHUNK_START_STEP; - -uint64_t realloc_counter = 0; - -extern void zisk_keccakf(uint64_t state[25]); -/* Used for debugging -extern uint64_t reg_0; -extern uint64_t reg_1; -extern uint64_t reg_2; -extern uint64_t reg_3; -extern uint64_t reg_4; -extern uint64_t reg_5; -extern uint64_t reg_6; -extern uint64_t reg_7; -extern uint64_t reg_8; -extern uint64_t reg_9; -extern uint64_t reg_10; -extern uint64_t reg_11; -extern uint64_t reg_12; -extern uint64_t reg_13; -extern uint64_t reg_14; -extern uint64_t reg_15; -extern uint64_t reg_16; -extern uint64_t reg_17; -extern uint64_t reg_18; -extern uint64_t reg_19; -extern uint64_t reg_20; -extern uint64_t reg_21; -extern uint64_t reg_22; -extern uint64_t reg_23; -extern uint64_t reg_24; -extern uint64_t reg_25; -extern uint64_t reg_26; -extern uint64_t reg_27; -extern uint64_t reg_28; -extern uint64_t reg_29; -extern uint64_t reg_30; -extern uint64_t reg_31; -*/ - -bool is_power_of_two (uint64_t number) { +// Checks if a number is a power of two, used to validate the max steps and chunk size provided by the client +bool is_power_of_two (uint64_t number) +{ return (number != 0) && ((number & (number - 1)) == 0); } -#define INITIAL_CHUNK_SIZE (1ULL << 18) -uint64_t chunk_size = INITIAL_CHUNK_SIZE; -uint64_t chunk_size_mask = INITIAL_CHUNK_SIZE - 1; -uint64_t max_steps = (1ULL << 32); - -// Chunk player globals -uint64_t chunk_player_address = 0; -uint64_t chunk_player_mt_size = INITIAL_TRACE_SIZE; // TODO +/*************/ +/* MAX STEPS */ +/*************/ +// Sets the maximum number of steps provided by the client in the request void set_max_steps (uint64_t new_max_steps) { if (!is_power_of_two(new_max_steps)) @@ -190,22 +76,9 @@ void set_max_steps (uint64_t new_max_steps) max_steps = new_max_steps; } -uint64_t initial_trace_size = INITIAL_TRACE_SIZE; -uint64_t trace_address = TRACE_ADDR; -uint64_t trace_size = INITIAL_TRACE_SIZE; -uint64_t trace_used_size = 0; - -// Worst case: every chunk instruction is a keccak operation, with an input data of 200 bytes -#define MAX_CHUNK_TRACE_SIZE (INITIAL_CHUNK_SIZE * 200) + (44 * 8) + 32 -uint64_t trace_address_threshold = TRACE_ADDR + INITIAL_TRACE_SIZE - MAX_CHUNK_TRACE_SIZE; - -uint64_t print_pc_counter = 0; - -int map_locked_flag = MAP_LOCKED; - -#ifdef ASM_PRECOMPILE_CACHE -bool precompile_cache_enabled = false; -#endif +/**************/ +/* CHUNK SIZE */ +/**************/ void set_chunk_size (uint64_t new_chunk_size) { @@ -217,99 +90,31 @@ void set_chunk_size (uint64_t new_chunk_size) exit(-1); } chunk_size = new_chunk_size; - chunk_size_mask = chunk_size - 1; - trace_address_threshold = TRACE_ADDR + trace_size - ((chunk_size*200) + (44*8) + 32); -} - -void set_trace_size (uint64_t new_trace_size) -{ - // Update trace global variables - trace_size = new_trace_size; trace_address_threshold = TRACE_ADDR + trace_size - MAX_CHUNK_TRACE_SIZE; - pOutputTrace[2] = trace_size; } -void parse_arguments(int argc, char *argv[]); -uint64_t TimeDiff(const struct timeval startTime, const struct timeval endTime); - -void configure (void); -void server_setup (void); -void server_reset (void); -void server_run (void); -void server_cleanup (void); - -void client_setup (void); -void client_run (void); -void client_cleanup (void); +//#define USE_FILE_LOCK -void _chunk_done(void); - -void log_minimal_trace(void); -void log_histogram(void); -void log_main_trace(void); -void log_mem_trace(void); -void log_mem_op(void); -void save_mem_op_to_files(void); -void log_chunk_player_main_trace(void); - -int recv_all_with_timeout (int sockfd, void *buffer, size_t length, int flags, int timeout_sec); +#ifdef USE_FILE_LOCK void file_lock(void); -// Configuration -bool output = false; -bool silent = false; -bool metrics = false; -bool trace = false; -bool trace_trace = false; -bool verbose = false; -bool save_to_file = false; - -// ROM histogram -uint64_t histogram_size = 0; -uint64_t bios_size = 0; -uint64_t program_size = 0; - -// Zip -uint64_t chunk_mask = 0x0; // 0, 1, 2, 3, 4, 5, 6 or 7 -#define MAX_CHUNK_MASK 7 - -// Maximum length of the shared memory prefix, e.g. SHMZISK12345678 -#define MAX_SHM_PREFIX_LENGTH 64 -char shm_prefix[MAX_SHM_PREFIX_LENGTH]; - -// Input shared memory -char shmem_input_name[128]; -int shmem_input_fd = -1; -uint64_t shmem_input_size = 0; -void * shmem_input_address = NULL; - -// Output trace shared memory -char shmem_output_name[128]; -int shmem_output_fd = -1; - -// Input MT trace shared memory -char shmem_mt_name[128]; -int shmem_mt_fd = -1; - -// Chunk done semaphore: notifies the caller when a new chunk has been processed -char sem_chunk_done_name[128]; -sem_t * sem_chunk_done = NULL; - -// Shutdown done semaphore: notifies the caller when a shutdown has been processed -char sem_shutdown_done_name[128]; -sem_t * sem_shutdown_done = NULL; - -// File lock name -char file_lock_name[128]; +// File lock name, used to lock a file that indicates that the assembly emulator process is running, +// to prevent multiple instances of the server from running at the same time. int file_lock_fd = -1; -// Log name -char log_name[128]; +#endif // USE_FILE_LOCK +// Process id int process_id = 0; -uint64_t input_size = 0; +#ifdef ASM_PRECOMPILE_CACHE +bool precompile_cache_enabled = false; +#endif + +/********/ +/* MAIN */ +/********/ int main(int argc, char *argv[]) { @@ -324,17 +129,54 @@ int main(int argc, char *argv[]) // Get current process id process_id = getpid(); + // Get precompiled results configuration + uint64_t precompile_results = get_precompile_results(); + if (precompile_results == 1) { + precompile_results_enabled = true; + } else { + precompile_results_enabled = false; + } + // Parse arguments parse_arguments(argc, argv); + // Redirect output to file if requested + if (redirect_output_to_file) + { + char redirect_output_file[256]; + snprintf(redirect_output_file, sizeof(redirect_output_file), "/tmp/%s_%s_output.txt", shm_prefix, gen_method_acronym(gen_method)); + + // Redirect stdout to file + FILE * file_pointer = freopen(redirect_output_file, "w", stdout); + if (file_pointer == NULL) + { + printf("ERROR: Failed to redirect stdout to file %s\n", redirect_output_file); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Redirect stderr to the same file + file_pointer = freopen(redirect_output_file, "a", stderr); + if (file_pointer == NULL) + { + printf("ERROR: Failed to redirect stderr to file %s\n", redirect_output_file); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + // Configure based on parguments configure(); +#ifdef USE_FILE_LOCK // Lock file - // if (server) - // { - // file_lock(); - // } + if (server) + { + file_lock(); + } +#endif // Send a message to stderr // fprintf(stderr, "%s stderr test (not an error): Starting Ziskemu ASM emulator process id=%d server=%d client=%d gen_method=%d port=%u\n", log_name, process_id, server, client, gen_method, port); @@ -358,7 +200,9 @@ int main(int argc, char *argv[]) server_setup(); // Reset the server, i.e. reset memory - server_reset(); + server_reset_fast(); + server_reset_slow(); + server_reset_trace(); // Create socket file descriptor int server_fd; @@ -413,7 +257,12 @@ int main(int argc, char *argv[]) struct sockaddr_in address; int addrlen = sizeof(address); int client_fd; - if (!silent) printf("%s Waiting for incoming connections to port %u...\n", log_name, port); + if (!silent) + { + printf("%s Waiting for incoming connections to port %u...\n", log_name, port); + fflush(stdout); + fflush(stderr); + } client_fd = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen); if (client_fd < 0) { @@ -446,20 +295,34 @@ int main(int argc, char *argv[]) if (bytes_read < 0) { printf("%s ERROR: Failed calling recv() bytes_read=%ld errno=%d=%s\n", log_name, bytes_read, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); break; } if (bytes_read != sizeof(request)) { if ((errno != 0) && (errno != 2)) { - printf("%s ERROR: Failed calling recv() invalid bytes_read=%ld errno=%d=%s\n", log_name, bytes_read, errno, strerror(errno)); + printf("%s WARNING: Failed calling recv() invalid bytes_read=%ld errno=%d=%s\n", log_name, bytes_read, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); } break; } #ifdef DEBUG - if (verbose) printf("%s recv() returned: %ld\n", log_name, bytes_read); + if (verbose) + { + printf("%s recv() returned: %ld\n", log_name, bytes_read); + fflush(stdout); + fflush(stderr); + } #endif - if (verbose) printf("%s recv()'d request=[%lu, 0x%lx, 0x%lx, 0x%lx, 0x%lx]\n", log_name, request[0], request[1], request[2], request[3], request[4]); + if (verbose) + { + printf("%s recv()'d request=[%lu, 0x%lx, 0x%lx, 0x%lx, 0x%lx]\n", log_name, request[0], request[1], request[2], request[3], request[4]); + fflush(stdout); + fflush(stderr); + } uint64_t response[5]; bReset = false; @@ -489,6 +352,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_MT_RESPONSE; response[1] = (MEM_END && !MEM_ERROR) ? 0 : 1; response[2] = trace_size; @@ -518,6 +383,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_RH_RESPONSE; response[1] = MEM_END ? 0 : 1; response[2] = trace_size; @@ -548,6 +415,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_MO_RESPONSE; response[1] = MEM_END ? 0 : 1; response[2] = trace_size; @@ -578,6 +447,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_MA_RESPONSE; response[1] = MEM_END ? 0 : 1; response[2] = trace_size; @@ -611,6 +482,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_CM_RESPONSE; response[1] = 0; response[2] = trace_size; @@ -641,6 +514,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_FA_RESPONSE; response[1] = MEM_END ? 0 : 1; response[2] = 0; @@ -671,6 +546,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_MR_RESPONSE; response[1] = MEM_END ? 0 : 1; response[2] = trace_size; @@ -704,6 +581,8 @@ int main(int argc, char *argv[]) server_run(); + server_reset_fast(); + response[0] = TYPE_CA_RESPONSE; response[1] = 0; response[2] = trace_size; @@ -743,20 +622,32 @@ int main(int argc, char *argv[]) } } - if (verbose) printf("%s send()'ing response=[%lu, 0x%lx, 0x%lx, 0x%lx, 0x%lx]\n", log_name, response[0], response[1], response[2], response[3], response[4]); + if (verbose) + { + printf("%s send()'ing response=[%lu, 0x%lx, 0x%lx, 0x%lx, 0x%lx]\n", log_name, response[0], response[1], response[2], response[3], response[4]); + fflush(stdout); + fflush(stderr); + } ssize_t bytes_sent = send(client_fd, response, sizeof(response), MSG_WAITALL); if (bytes_sent != sizeof(response)) { printf("%s ERROR: Failed calling send() invalid bytes_sent=%ld errno=%d=%s\n", log_name, bytes_sent, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); break; } -#ifdef DEBUG - else if (verbose) printf("Response sent to client\n"); -#endif +//#ifdef DEBUG + else if (verbose) + { + printf("Response sent to client\n"); + fflush(stdout); + fflush(stderr); + } +//#endif if (bReset) { - server_reset(); + server_reset_slow(); } if (bShutdown) @@ -780,7 +671,6 @@ int main(int argc, char *argv[]) // Close the server close(server_fd); - /************/ /* CLEAN UP */ /************/ @@ -805,3121 +695,32 @@ int main(int argc, char *argv[]) #endif } -void print_usage (void) -{ - printf("Usage: ziskemuasm\n"); - printf("\t-s(server)\n"); - printf("\t-c(client)\n"); - printf("\t-i \n"); - printf("\t-p \n"); - printf("\t--gen=0|--generate_fast\n"); - printf("\t--gen=1|--generate_minimal_trace\n"); - printf("\t--gen=2|--generate_rom_histogram\n"); - printf("\t--gen=3|--generate_main_trace\n"); - printf("\t--gen=4|--generate_chunks\n"); - printf("\t--gen=6|--generate_zip\n"); - printf("\t--gen=9|--generate_mem_reads\n"); - printf("\t--gen=10|--generate_chunk_player_mem_reads\n"); - printf("\t--chunk \n"); - printf("\t--shutdown\n"); - printf("\t--mt \n"); - printf("\t-o output on\n"); - printf("\t--silent silent on\n"); - printf("\t--shm_prefix (default: ZISK)\n"); - printf("\t-m metrics on\n"); - printf("\t-t trace on\n"); - printf("\t-tt trace_trace on\n"); - printf("\t-f(save to file)\n"); - printf("\t-a chunk_address\n"); - printf("\t-v verbose on\n"); - printf("\t-u unlock physical memory in mmap\n"); -#ifdef ASM_PRECOMPILE_CACHE - printf("\t--precompile-cache-store store precompile results in cache file\n"); - printf("\t--precompile-cache-load load precompile results from cache file\n"); -#endif - printf("\t-h/--help print this\n"); -} - -void parse_arguments(int argc, char *argv[]) -{ - strcpy(shm_prefix, "ZISK"); - uint64_t number_of_selected_generation_methods = 0; - if (argc > 1) - { - for (int i = 1; i < argc; i++) - { - if (strcmp(argv[i], "-s") == 0) - { - server = true; - continue; - } - if (strcmp(argv[i], "-c") == 0) - { - client = true; - continue; - } - if ( (strcmp(argv[i], "--gen=0") == 0) || (strcmp(argv[i], "--generate_fast") == 0)) - { - gen_method = Fast; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=1") == 0) || (strcmp(argv[i], "--generate_minimal_trace") == 0)) - { - gen_method = MinimalTrace; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=2") == 0) || (strcmp(argv[i], "--generate_rom_histogram") == 0)) - { - gen_method = RomHistogram; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=3") == 0) || (strcmp(argv[i], "--generate_main_trace") == 0)) - { - gen_method = MainTrace; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=4") == 0) || (strcmp(argv[i], "--generate_chunks") == 0)) - { - gen_method = ChunksOnly; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=6") == 0) || (strcmp(argv[i], "--generate_zip") == 0)) - { - gen_method = Zip; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=7") == 0) || (strcmp(argv[i], "--generate_mem_op") == 0)) - { - gen_method = MemOp; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=8") == 0) || (strcmp(argv[i], "--generate_chunk_player_mt_collect_mem") == 0)) - { - gen_method = ChunkPlayerMTCollectMem; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=9") == 0) || (strcmp(argv[i], "--generate_mem_reads") == 0)) - { - gen_method = MemReads; - number_of_selected_generation_methods++; - continue; - } - if ( (strcmp(argv[i], "--gen=10") == 0) || (strcmp(argv[i], "--generate_chunk_player_mem_reads") == 0)) - { - gen_method = ChunkPlayerMemReadsCollectMain; - number_of_selected_generation_methods++; - continue; - } - if (strcmp(argv[i], "-o") == 0) - { - output = true; - continue; - } - if (strcmp(argv[i], "--silent") == 0) - { - silent = true; - continue; - } - if (strcmp(argv[i], "-m") == 0) - { - metrics = true; - continue; - } - if (strcmp(argv[i], "-t") == 0) - { - trace = true; - continue; - } - if (strcmp(argv[i], "-tt") == 0) - { - trace = true; - trace_trace = true; - continue; - } - if (strcmp(argv[i], "-v") == 0) - { - verbose = true; - //emu_verbose = true; - continue; - } - if (strcmp(argv[i], "-u") == 0) - { - map_locked_flag = 0; - continue; - } - if (strcmp(argv[i], "-h") == 0) - { - print_usage(); - exit(0); - } - if (strcmp(argv[i], "--help") == 0) - { - print_usage(); - continue; - } - if (strcmp(argv[i], "-i") == 0) - { - i++; - if (i >= argc) - { - printf("ERROR: Detected argument -i in the last position; please provide input file after it\n"); - print_usage(); - exit(-1); - } - if (strlen(argv[i]) > 4095) - { - printf("ERROR: Detected argument -i but next argument is too long\n"); - print_usage(); - exit(-1); - } - strcpy(input_file, argv[i]); - continue; - } - if (strcmp(argv[i], "--shm_prefix") == 0) - { - i++; - if (i >= argc) - { - printf("ERROR: Detected argument -i in the last position; please provide shared mem prefix after it\n"); - print_usage(); - exit(-1); - } - if (strlen(argv[i]) > MAX_SHM_PREFIX_LENGTH) - { - printf("ERROR: Detected argument -i but next argument is too long\n"); - print_usage(); - exit(-1); - } - strcpy(shm_prefix, argv[i]); - continue; - } - if (strcmp(argv[i], "--chunk") == 0) - { - i++; - if (i >= argc) - { - printf("ERROR: Detected argument -c in the last position; please provide chunk number after it\n"); - print_usage(); - exit(-1); - } - errno = 0; - char *endptr; - chunk_mask = strtoul(argv[i], &endptr, 10); - - // Check for errors - if (errno == ERANGE) { - printf("ERROR: Chunk number is too large\n"); - print_usage(); - exit(-1); - } else if (endptr == argv[i]) { - printf("ERROR: No digits found while parsing chunk number\n"); - print_usage(); - exit(-1); - } else if (*endptr != '\0') { - printf("ERROR: Extra characters after chunk number: %s\n", endptr); - print_usage(); - exit(-1); - } else if (chunk_mask > MAX_CHUNK_MASK) { - printf("ERROR: Invalid chunk number: %lu\n", chunk_mask); - print_usage(); - exit(-1); - } else { - printf("Got chunk_mask= %lu\n", chunk_mask); - } - continue; - } - if (strcmp(argv[i], "--shutdown") == 0) - { - do_shutdown = true; - continue; - } - if (strcmp(argv[i], "--mt") == 0) - { - i++; - if (i >= argc) - { - printf("ERROR: Detected argument -mt in the last position; please provide number of MT requests after it\n"); - print_usage(); - exit(-1); - } - errno = 0; - char *endptr; - number_of_mt_requests = strtoul(argv[i], &endptr, 10); - - // Check for errors - if (errno == ERANGE) { - printf("ERROR: Number of MT requests is too large\n"); - print_usage(); - exit(-1); - } else if (endptr == argv[i]) { - printf("ERROR: No digits found while parsing number of MT requests\n"); - print_usage(); - exit(-1); - } else if (*endptr != '\0') { - printf("ERROR: Extra characters after number of MT requests: %s\n", endptr); - print_usage(); - exit(-1); - } else if (number_of_mt_requests > 1000000) { - printf("ERROR: Invalid number of MT requests: %lu\n", number_of_mt_requests); - print_usage(); - exit(-1); - } else { - printf("Got number of MT requests= %lu\n", number_of_mt_requests); - } - continue; - } - if (strcmp(argv[i], "-p") == 0) - { - i++; - if (i >= argc) - { - printf("ERROR: Detected argument -p in the last position; please provide port number after it\n"); - print_usage(); - exit(-1); - } - errno = 0; - char *endptr; - arguments_port = strtoul(argv[i], &endptr, 10); - - // Check for errors - if (errno == ERANGE) { - printf("ERROR: Port number is too large\n"); - print_usage(); - exit(-1); - } else if (endptr == argv[i]) { - printf("ERROR: No digits found while parsing port number\n"); - print_usage(); - exit(-1); - } else if (*endptr != '\0') { - printf("ERROR: Extra characters after port number: %s\n", endptr); - print_usage(); - exit(-1); - } else { - printf("Got port number= %u\n", arguments_port); - } - continue; - } - if (strcmp(argv[i], "-f") == 0) - { - save_to_file = true; - continue; - } - if (strcmp(argv[i], "-a") == 0) - { - i++; - if (i >= argc) - { - printf("ERROR: Detected argument -a in the last position; please provide chunk address after it\n"); - print_usage(); - exit(-1); - } - errno = 0; - char *endptr; - char * argument = argv[i]; - if ((argument[0] == '0') && (argument[1] == 'x')) argument += 2; - chunk_player_address = strtoul(argv[i], &endptr, 16); - - // Check for errors - if (errno == ERANGE) { - printf("ERROR: Chunk address is too large\n"); - print_usage(); - exit(-1); - } else if (endptr == argument) { - printf("ERROR: No digits found while parsing chunk addresss\n"); - print_usage(); - exit(-1); - } else if (*endptr != '\0') { - printf("ERROR: Extra characters after chunk address: %s\n", endptr); - print_usage(); - exit(-1); - } else { - printf("Got chunk address= %p\n", (void *)chunk_player_address); - } - continue; - } -#ifdef ASM_PRECOMPILE_CACHE - if (strcmp(argv[i], "--precompile-cache-store") == 0) - { - precompile_cache_enabled = true; - precompile_cache_store_init(); - continue; - } - if (strcmp(argv[i], "--precompile-cache-load") == 0) - { - precompile_cache_enabled = true; - precompile_cache_load_init(); - continue; - } - -#endif - printf("ERROR: parse_arguments() Unrecognized argument: %s\n", argv[i]); - print_usage(); - fflush(stdout); - fflush(stderr); - exit(-1); - } - } -#ifdef ASM_PRECOMPILE_CACHE - if (precompile_cache_enabled == false) - { - printf("ERROR: parse_arguments() when in precompile cache mode, you need to use an argument: either --precompile-cache-store or --precompile-cache-load\n"); - print_usage(); - fflush(stdout); - fflush(stderr); - exit(-1); - } -#endif +/*************/ +/* FILE LOCK */ +/*************/ - // Check that only one generation method was selected as an argument - if (number_of_selected_generation_methods != 1) - { - printf("ERROR! parse_arguments() Invalid arguments: select 1 generation method, and only one\n"); - print_usage(); - fflush(stdout); - fflush(stderr); - exit(-1); - } +#ifdef USE_FILE_LOCK - // Check that the generation method selected by the process launcher is the same as the one - // for which the assembly code was generated - uint64_t asm_gen_method = get_gen_method(); - if (asm_gen_method != gen_method) - { - printf("ERROR! parse_arguments() Inconsistency: C generation method is %u but ASM generation method is %lu\n", - gen_method, - asm_gen_method); - print_usage(); +// Lock file exclusively to ensure that only one instance of the program is running at a time +void file_lock(void) +{ + // Open (or create) the lock file. We don't need to write to it. + file_lock_fd = open(file_lock_name, O_CREAT | O_RDONLY, 0644); + if (file_lock_fd == -1) { + printf("ERROR: file_lock() failed calling open(%s) errno=%d=%s\n", file_lock_name, errno, strerror(errno)); fflush(stdout); fflush(stderr); - exit(-1); + exit(1); } - // Check server/client - if (server && client) - { - printf("ERROR! parse_arguments() Inconsistency: both server and client at the same time is not possible\n"); - print_usage(); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (!server && !client) - { - printf("ERROR! parse_arguments() Inconsistency: select server or client\n"); - print_usage(); + // Try to acquire an exclusive lock, non-blocking. + if (flock(file_lock_fd, LOCK_EX | LOCK_NB) == -1) { + // If we fail to get the lock, another instance is running. + printf("ERROR: Another instance of this program is already running.\n"); fflush(stdout); fflush(stderr); - exit(-1); + exit(1); } } -void configure (void) -{ - // Select configuration based on generation method - switch (gen_method) - { - case Fast: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_FT_input"); - strcpy(shmem_output_name, ""); - strcpy(sem_chunk_done_name, ""); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_FT_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_FT"); - port = 23120; - break; - } - case MinimalTrace: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_MT_input"); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_MT_output"); - strcpy(sem_chunk_done_name, shm_prefix); - strcat(sem_chunk_done_name, "_MT_chunk_done"); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_MT_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_MT"); - call_chunk_done = true; - port = 23115; - break; - } - case RomHistogram: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_RH_input"); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_RH_output"); - strcpy(sem_chunk_done_name, shm_prefix); - strcat(sem_chunk_done_name, "_RH_chunk_done"); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_RH_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_RH"); - call_chunk_done = true; - port = 23116; - break; - } - case MainTrace: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_MA_input"); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_MA_output"); - strcpy(sem_chunk_done_name, shm_prefix); - strcat(sem_chunk_done_name, "_MA_chunk_done"); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_MA_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_MA"); - call_chunk_done = true; - port = 23118; - break; - } - case ChunksOnly: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_CH_input"); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_CH_output"); - strcpy(sem_chunk_done_name, shm_prefix); - strcat(sem_chunk_done_name, "_CH_chunk_done"); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_CH_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_CH"); - call_chunk_done = true; - port = 23115; - break; - } - // case BusOp: - // { - // strcpy(shmem_input_name, "ZISKBO_input"); - // strcpy(shmem_output_name, "ZISKBO_output"); - // strcpy(sem_chunk_done_name, "ZISKBO_chunk_done"); - // chunk_done = true; - // port = 23115; - // break; - // } - case Zip: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_ZP_input"); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_ZP_output"); - strcpy(sem_chunk_done_name, shm_prefix); - strcat(sem_chunk_done_name, "_ZP_chunk_done"); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_ZP_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_ZP"); - call_chunk_done = true; - port = 23115; - break; - } - case MemOp: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_MO_input"); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_MO_output"); - strcpy(sem_chunk_done_name, shm_prefix); - strcat(sem_chunk_done_name, "_MO_chunk_done"); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_MO_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_MO"); - call_chunk_done = true; - port = 23117; - break; - } - case ChunkPlayerMTCollectMem: - { - strcpy(shmem_input_name, ""); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_CM_output"); - strcpy(sem_chunk_done_name, ""); - strcpy(sem_shutdown_done_name, ""); - strcpy(shmem_mt_name, shm_prefix); - strcat(shmem_mt_name, "_MT_output"); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_CM"); - call_chunk_done = false; - port = 23119; - break; - } - case MemReads: - { - strcpy(shmem_input_name, shm_prefix); - strcat(shmem_input_name, "_MT_input"); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_MT_output"); - strcpy(sem_chunk_done_name, shm_prefix); - strcat(sem_chunk_done_name, "_MT_chunk_done"); - strcpy(sem_shutdown_done_name, shm_prefix); - strcat(sem_shutdown_done_name, "_MT_shutdown_done"); - strcpy(shmem_mt_name, ""); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_MT"); - call_chunk_done = true; - port = 23115; - break; - } - case ChunkPlayerMemReadsCollectMain: - { - strcpy(shmem_input_name, ""); - strcpy(shmem_output_name, shm_prefix); - strcat(shmem_output_name, "_CA_output"); - strcpy(sem_chunk_done_name, ""); - strcpy(sem_shutdown_done_name, ""); - strcpy(shmem_mt_name, shm_prefix); - strcat(shmem_mt_name, "_MT_output"); - strcpy(file_lock_name, "/tmp/"); - strcat(file_lock_name, shm_prefix); - strcat(file_lock_name, ".lock"); - strcpy(log_name, shm_prefix); - strcat(log_name, "_CA"); - call_chunk_done = false; - port = 23120; - break; - } - default: - { - printf("ERROR: configure() Invalid gen_method = %u\n", gen_method); - fflush(stdout); - fflush(stderr); - exit(-1); - } - } - - if (arguments_port != 0) - { - port = arguments_port; - } - - if (verbose) - { - printf("ziskemuasm configuration:\n"); - printf("\tgen_method=%u\n", gen_method); - printf("\tshm_prefix=%s\n", shm_prefix); - printf("\tfile_lock_name=%s\n", file_lock_name); - printf("\tlog_name=%s\n", log_name); - printf("\tport=%u\n", port); - printf("\tcall_chunk_done=%u\n", call_chunk_done); - printf("\tchunk_size=%lu\n", chunk_size); - printf("\tshmem_input=%s\n", shmem_input_name); - printf("\tshmem_output=%s\n", shmem_output_name); - printf("\tshmem_mt=%s\n", shmem_mt_name); - printf("\tsem_chunk_done=%s\n", sem_chunk_done_name); - printf("\tsem_shutdown_done=%s\n", sem_shutdown_done_name); - printf("\tmap_locked_flag=%d\n", map_locked_flag); - printf("\toutput=%u\n", output); - } -} - -void client_setup (void) -{ - assert(!server); - assert(client); - - int result; - - /***********************/ - /* INPUT MINIMAL TRACE */ - /***********************/ - - // Input MT trace - if ((gen_method == ChunkPlayerMTCollectMem) || (gen_method == ChunkPlayerMemReadsCollectMain)) - { - // Create the output shared memory - shmem_mt_fd = shm_open(shmem_mt_name, O_RDONLY, 0666); - if (shmem_mt_fd < 0) - { - printf("ERROR: Failed calling shm_open(%s) errno=%d=%s\n", shmem_mt_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Map it to the trace address -#ifdef DEBUG - gettimeofday(&start_time, NULL); -#endif - void * pTrace = mmap((void *)TRACE_ADDR, chunk_player_mt_size, PROT_READ, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_mt_fd, 0); -#ifdef DEBUG - gettimeofday(&stop_time, NULL); - duration = TimeDiff(start_time, stop_time); -#endif - if (pTrace == MAP_FAILED) - { - printf("ERROR: Failed calling mmap(MT) errno=%d=%s\n", errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if ((uint64_t)pTrace != TRACE_ADDR) - { - printf("ERROR: Called mmap(MT) but returned address = %p != 0x%lx\n", pTrace, TRACE_ADDR); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (verbose) printf("mmap(MT) returned %p in %lu us\n", pTrace, duration); - } -} - -void client_run (void) -{ - assert(client); - assert(!server); - - int result; - - /************************/ - /* Read input file data */ - /************************/ - if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) - { - -#ifdef DEBUG - gettimeofday(&start_time, NULL); -#endif - - // Open input file - FILE * input_fp = fopen(input_file, "r"); - if (input_fp == NULL) - { - printf("ERROR: Failed calling fopen(%s) errno=%d=%s; does it exist?\n", input_file, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Get input file size - if (fseek(input_fp, 0, SEEK_END) == -1) - { - printf("ERROR: Failed calling fseek(%s) errno=%d=%s\n", input_file, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - long input_data_size = ftell(input_fp); - if (input_data_size == -1) - { - printf("ERROR: Failed calling ftell(%s) errno=%d=%s\n", input_file, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Go back to the first byte - if (fseek(input_fp, 0, SEEK_SET) == -1) - { - printf("ERROR: Failed calling fseek(%s, 0) errno=%d=%s\n", input_file, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Check the input data size is inside the proper range - if (input_data_size > (MAX_INPUT_SIZE - 16)) - { - printf("ERROR: Size of input file (%s) is too long (%lu)\n", input_file, input_data_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Open input shared memory - shmem_input_fd = shm_open(shmem_input_name, O_RDWR, 0666); - if (shmem_input_fd < 0) - { - printf("ERROR: Failed calling shm_open(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Map the shared memory object into the process address space - shmem_input_address = mmap(NULL, MAX_INPUT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shmem_input_fd, 0); - if (shmem_input_address == MAP_FAILED) - { - printf("ERROR: Failed calling mmap(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Write the input size in the first 64 bits - *(uint64_t *)shmem_input_address = (uint64_t)0; // free input - *(uint64_t *)(shmem_input_address + 8)= (uint64_t)input_data_size; - - // Copy input data into input memory - size_t input_read = fread(shmem_input_address + 16, 1, input_data_size, input_fp); - if (input_read != input_data_size) - { - printf("ERROR: Input read (%lu) != input file size (%lu)\n", input_read, input_data_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Close the file pointer - fclose(input_fp); - - // Unmap input - result = munmap(shmem_input_address, MAX_INPUT_SIZE); - if (result == -1) - { - printf("ERROR: Failed calling munmap(input) errno=%d=%s\n", errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - -#ifdef DEBUG - gettimeofday(&stop_time, NULL); - duration = TimeDiff(start_time, stop_time); - printf("client (input): done in %lu us\n", duration); -#endif - - } - - /*************************/ - /* Connect to the server */ - /*************************/ - - // Create socket to connect to server - int socket_fd; - socket_fd = socket(AF_INET, SOCK_STREAM, 0); - if (socket_fd < 0) - { - printf("ERROR: socket() failed socket_fd=%d errno=%d=%s\n", socket_fd, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Configure server address - struct sockaddr_in server_addr; - server_addr.sin_family = AF_INET; - server_addr.sin_port = htons(port); - - result = inet_pton(AF_INET, SERVER_IP, &server_addr.sin_addr); - if (result <= 0) - { - printf("ERROR: inet_pton() failed. Invalid address/Address not supported result=%d errno=%d=%s\n", result, errno, strerror(errno)); - exit(-1); - } - - // Connect to server - result = connect(socket_fd, (struct sockaddr *)&server_addr, sizeof(server_addr)); - if (result < 0) - { - printf("ERROR: connect() failed result=%d errno=%d=%s\n", result, errno, strerror(errno)); - exit(-1); - } - if (verbose) printf("connect()'d to port=%u\n", port); - - // Request and response - uint64_t request[5]; - uint64_t response[5]; - - /********/ - /* Ping */ - /********/ - - gettimeofday(&start_time, NULL); - - // Prepare message to send - request[0] = TYPE_PING; - request[1] = 0; - request[2] = 0; - request[3] = 0; - request[4] = 0; - - // Send data to server - result = send(socket_fd, request, sizeof(request), 0); - if (result < 0) - { - printf("ERROR: send() failed result=%d errno=%d=%s\n", result, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Read server response - ssize_t bytes_received = recv(socket_fd, response, sizeof(response), MSG_WAITALL); - if (bytes_received < 0) - { - printf("ERROR: recv_all_with_timeout() failed result=%d errno=%d=%s\n", result, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (bytes_received != sizeof(response)) - { - printf("ERROR: recv_all_with_timeout() returned bytes_received=%ld errno=%d=%s\n", bytes_received, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (response[0] != TYPE_PONG) - { - printf("ERROR: recv_all_with_timeout() returned unexpected type=%lu\n", response[0]); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (response[1] != gen_method) - { - printf("ERROR: recv_all_with_timeout() returned unexpected gen_method=%lu\n", response[1]); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - gettimeofday(&stop_time, NULL); - duration = TimeDiff(start_time, stop_time); - printf("client (PING): done in %lu us\n", duration); - - /*****************/ - /* Minimal trace */ - /*****************/ - for (uint64_t i=0; i= 0x1000); - assert((max_bios_pc & 0x3) == 0); - assert(max_program_pc >= 0x80000000); - - // Calculate sizes - bios_size = ((max_bios_pc - 0x1000) >> 2) + 1; - program_size = max_program_pc - 0x80000000 + 1; - histogram_size = (4 + 1 + bios_size + 1 + program_size)*8; -#define TRACE_SIZE_GRANULARITY (1014*1014) - initial_trace_size = ((histogram_size/TRACE_SIZE_GRANULARITY) + 1) * TRACE_SIZE_GRANULARITY; - trace_size = initial_trace_size; - } - - // Output trace - if ((gen_method == MinimalTrace) || - (gen_method == RomHistogram) || - (gen_method == MainTrace) || - (gen_method == Zip) || - (gen_method == MemOp) || - (gen_method == ChunkPlayerMTCollectMem) || - (gen_method == MemReads) || - (gen_method == ChunkPlayerMemReadsCollectMain)) - { - // Make sure the output shared memory is deleted - shm_unlink(shmem_output_name); - - // Create the output shared memory - shmem_output_fd = shm_open(shmem_output_name, O_RDWR | O_CREAT | O_EXCL, 0666); - if (shmem_output_fd < 0) - { - printf("ERROR: Failed calling shm_open(%s) errno=%d=%s\n", shmem_output_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Size it - result = ftruncate(shmem_output_fd, trace_size); - if (result != 0) - { - printf("ERROR: Failed calling ftruncate(%s) errno=%d=%s\n", shmem_output_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Sync - fsync(shmem_output_fd); - - // Map it to the trace address - if (verbose) gettimeofday(&start_time, NULL); - void * requested_address; - if ((gen_method == ChunkPlayerMTCollectMem) || (gen_method == ChunkPlayerMemReadsCollectMain)) - { - requested_address = 0; - } - else - { - requested_address = (void *)TRACE_ADDR; - } - int flags = MAP_SHARED | map_locked_flag; - if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) - { - flags |= MAP_FIXED; - } - void * pTrace = mmap(requested_address, trace_size, PROT_READ | PROT_WRITE, flags, shmem_output_fd, 0); - if (verbose) - { - gettimeofday(&stop_time, NULL); - duration = TimeDiff(start_time, stop_time); - } - if (pTrace == MAP_FAILED) - { - printf("ERROR: Failed calling mmap(pTrace) name=%s errno=%d=%s\n", shmem_output_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain) && ((uint64_t)pTrace != TRACE_ADDR)) - { - printf("ERROR: Called mmap(trace) but returned address = %p != 0x%lx\n", pTrace, TRACE_ADDR); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (verbose) printf("mmap(trace) mapped %lu B and returned address %p in %lu us\n", trace_size, pTrace, duration); - - trace_address = (uint64_t)pTrace; - pOutputTrace = pTrace; - } - - /***********************/ - /* INPUT MINIMAL TRACE */ - /***********************/ - - // Input MT trace - if ((gen_method == ChunkPlayerMTCollectMem) || (gen_method == ChunkPlayerMemReadsCollectMain)) - { - // Create the output shared memory - shmem_mt_fd = shm_open(shmem_mt_name, O_RDONLY, 0666); - if (shmem_mt_fd < 0) - { - printf("ERROR: Failed calling shm_open(%s) errno=%d=%s\n", shmem_mt_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Map it to the trace address -#ifdef DEBUG - gettimeofday(&start_time, NULL); -#endif - void * pTrace = mmap((void *)TRACE_ADDR, chunk_player_mt_size, PROT_READ, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_mt_fd, 0); -#ifdef DEBUG - gettimeofday(&stop_time, NULL); - duration = TimeDiff(start_time, stop_time); -#endif - if (pTrace == MAP_FAILED) - { - printf("ERROR: Failed calling mmap(MT) errno=%d=%s\n", errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if ((uint64_t)pTrace != TRACE_ADDR) - { - printf("ERROR: Called mmap(MT) but returned address = %p != 0x%lx\n", pTrace, TRACE_ADDR); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (verbose) printf("mmap(MT) returned %p in %lu us\n", pTrace, duration); - } - - /******************/ - /* SEM CHUNK DONE */ - /******************/ - - if (call_chunk_done) - { - assert(strlen(sem_chunk_done_name) > 0); - - sem_unlink(sem_chunk_done_name); - - sem_chunk_done = sem_open(sem_chunk_done_name, O_CREAT | O_EXCL, 0666, 0); - if (sem_chunk_done == SEM_FAILED) - { - printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (verbose) printf("sem_open(%s) succeeded\n", sem_chunk_done_name); - } - - /*********************/ - /* SEM SHUTDOWN DONE */ - /*********************/ - - assert(strlen(sem_shutdown_done_name) > 0); - - sem_unlink(sem_shutdown_done_name); - - sem_shutdown_done = sem_open(sem_shutdown_done_name, O_CREAT | O_EXCL, 0666, 0); - if (sem_shutdown_done == SEM_FAILED) - { - printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_shutdown_done_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (verbose) printf("sem_open(%s) succeeded\n", sem_shutdown_done_name); -} - -void server_reset (void) -{ - // Reset RAM data for next emulation - if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) - { -#ifdef DEBUG - gettimeofday(&start_time, NULL); -#endif - memset((void *)RAM_ADDR, 0, RAM_SIZE); -#ifdef DEBUG - gettimeofday(&stop_time, NULL); - duration = TimeDiff(start_time, stop_time); - if (verbose) printf("server_reset() memset(ram) in %lu us\n", duration); -#endif - if ((gen_method != Fast) && (gen_method != RomHistogram)) - { - // Reset trace: init output header data - pOutputTrace[0] = 0x000100; // Version, e.g. v1.0.0 [8] - pOutputTrace[1] = 1; // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - pOutputTrace[2] = trace_size; // MT allocated size [8] -> to be updated after reallocation - pOutputTrace[3] = 0; // MT used size [8] -> to be updated after completion - - // Reset trace used size - trace_used_size = 0; - } - } -} - -void server_run (void) -{ - if ((gen_method == RomHistogram)) { - memset((void *)trace_address, 0, trace_size); - } - -#ifdef ASM_CALL_METRICS - reset_asm_call_metrics(); -#endif - - // Init trace header - if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain) && (gen_method != Fast)) - { - // Reset trace: init output header data - pOutputTrace[0] = 0x000100; // Version, e.g. v1.0.0 [8] - pOutputTrace[1] = 1; // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - pOutputTrace[2] = trace_size; // MT allocated size [8] -> to be updated after reallocation - pOutputTrace[3] = 0; // MT used size [8] -> to be updated after completion - - // Reset trace used size - trace_used_size = 0; - } - - // Sync input shared memory - if (msync((void *)INPUT_ADDR, MAX_INPUT_SIZE, MS_SYNC) != 0) { - printf("ERROR: msync failed for shmem_input_address errno=%d=%s\n", errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - /*******/ - /* ASM */ - /*******/ - - // Call emulator assembly code - gettimeofday(&start_time,NULL); - if (verbose) printf("trace_address=%lx\n", trace_address); - emulator_start(); - gettimeofday(&stop_time,NULL); - assembly_duration = TimeDiff(start_time, stop_time); - - uint64_t final_trace_size = MEM_CHUNK_ADDRESS - MEM_TRACE_ADDRESS; - trace_used_size = final_trace_size + 32; - - if ( metrics ) - { - uint64_t duration = assembly_duration; - uint64_t steps = MEM_STEP; - uint64_t end = MEM_END; - uint64_t error = MEM_ERROR; - uint64_t step_duration_ns = steps == 0 ? 0 : (duration * 1000) / steps; - uint64_t step_tp_sec = duration == 0 ? 0 : steps * 1000000 / duration; - uint64_t final_trace_size_percentage = (final_trace_size * 100) / trace_size; - printf("Duration = %lu us, realloc counter = %lu, steps = %lu, step duration = %lu ns, tp = %lu steps/s, trace size = 0x%lx - 0x%lx = %lu B(%lu%%), end=%lu, error=%lu, max steps=%lu, chunk size=%lu\n", - duration, - realloc_counter, - steps, - step_duration_ns, - step_tp_sec, - MEM_CHUNK_ADDRESS, - MEM_TRACE_ADDRESS, - final_trace_size, - final_trace_size_percentage, - end, - error, - max_steps, - chunk_size); - if (gen_method == RomHistogram) - { - printf("Rom histogram size=%lu\n", histogram_size); - } - } - if (MEM_ERROR) - { - printf("Emulation ended with error code %lu\n", MEM_ERROR); - } - - // Log output - if (output) - { - unsigned int * pOutput = (unsigned int *)OUTPUT_ADDR; - unsigned int output_size = *pOutput; -#ifdef DEBUG - if (verbose) printf("Output size=%d\n", output_size); -#endif - - for (unsigned int i = 0; i < output_size; i++) - { - pOutput++; - printf("%08x\n", *pOutput); - } - } - - // Complete output header data - if ((gen_method == MinimalTrace) || - (gen_method == RomHistogram) || - (gen_method == Zip) || - (gen_method == MainTrace) || - (gen_method == MemOp) || - (gen_method == MemReads) || - (gen_method == ChunkPlayerMemReadsCollectMain)) - { - uint64_t * pOutput = (uint64_t *)trace_address; - pOutput[0] = 0x000100; // Version, e.g. v1.0.0 [8] - pOutput[1] = MEM_ERROR; // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - pOutput[2] = trace_size; // MT allocated size [8] - //assert(final_trace_size > 32); - if (gen_method == RomHistogram) - { - pOutput[3] = MEM_STEP; - pOutput[4] = bios_size; - pOutput[4 + bios_size + 1] = program_size; - } - else - { - pOutput[3] = trace_used_size; // MT used size [8] - } - } - - // Notify client - if (gen_method == RomHistogram) - { - _chunk_done(); - } - - - // Notify the caller that the trace is ready to be consumed - // if (!is_file) - // { - // result = sem_post(sem_input); - // if (result == -1) - // { - // printf("Failed calling sem_post(%s) errno=%d=%s\n", sem_input_name, errno, strerror(errno)); - // fflush(stdout); - // fflush(stderr); - // exit(-1); - // } - // } - - -#ifdef ASM_CALL_METRICS - print_asm_call_metrics(assembly_duration); -#endif - - // Log trace - if (((gen_method == MinimalTrace) || (gen_method == Zip)) && trace) - { - log_minimal_trace(); - } - if ((gen_method == RomHistogram) && trace) - { - log_histogram(); - } - if ((gen_method == MainTrace) && trace) - { - log_main_trace(); - } - if ((gen_method == MemOp) && trace) - { - log_mem_op(); - } - if ((gen_method == MemOp) && save_to_file) - { - save_mem_op_to_files(); - } - if ((gen_method == ChunkPlayerMTCollectMem) && trace) - { - log_mem_trace(); - } - if ((gen_method == MemReads) && trace) - { - log_minimal_trace(); - } - if ((gen_method == ChunkPlayerMemReadsCollectMain) && trace) - { - log_chunk_player_main_trace(); - } -} - -void server_cleanup (void) -{ - // Cleanup ROM - int result = munmap((void *)ROM_ADDR, ROM_SIZE); - if (result == -1) - { - printf("ERROR: Failed calling munmap(rom) errno=%d=%s\n", errno, strerror(errno)); - } - - // Cleanup RAM - result = munmap((void *)RAM_ADDR, RAM_SIZE); - if (result == -1) - { - printf("ERROR: Failed calling munmap(ram) errno=%d=%s\n", errno, strerror(errno)); - } - - // Cleanup INPUT - result = munmap((void *)INPUT_ADDR, MAX_INPUT_SIZE); - if (result == -1) - { - printf("ERROR: Failed calling munmap(input) errno=%d=%s\n", errno, strerror(errno)); - } - result = shm_unlink(shmem_input_name); - if (result == -1) - { - printf("ERROR: Failed calling shm_unlink(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); - } - - // Cleanup trace - result = munmap((void *)TRACE_ADDR, trace_size); - if (result == -1) - { - printf("ERROR: Failed calling munmap(trace) for size=%lu errno=%d=%s\n", trace_size, errno, strerror(errno)); - } - result = shm_unlink(shmem_output_name); - if (result == -1) - { - printf("ERROR: Failed calling shm_unlink(%s) errno=%d=%s\n", shmem_output_name, errno, strerror(errno)); - } - - // Cleanup chunk done semaphore - if (call_chunk_done) - { - result = sem_close(sem_chunk_done); - if (result == -1) - { - printf("ERROR: Failed calling sem_close(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); - } - result = sem_unlink(sem_chunk_done_name); - if (result == -1) - { - printf("ERROR: Failed calling sem_unlink(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); - } - } - - // Post shutdown donw semaphore - result = sem_post(sem_shutdown_done); - if (result == -1) - { - printf("ERROR: Failed calling sem_post(%s) errno=%d=%s\n", sem_shutdown_done_name, errno, strerror(errno)); - } -} - -// extern uint64_t reg_0; -// extern uint64_t reg_1; -// extern uint64_t reg_2; -// extern uint64_t reg_3; -// extern uint64_t reg_4; -// extern uint64_t reg_5; -// extern uint64_t reg_6; -// extern uint64_t reg_7; -// extern uint64_t reg_8; -// extern uint64_t reg_9; -// extern uint64_t reg_10; -// extern uint64_t reg_11; -// extern uint64_t reg_12; -// extern uint64_t reg_13; -// extern uint64_t reg_14; -// extern uint64_t reg_15; -// extern uint64_t reg_16; -// extern uint64_t reg_17; -// extern uint64_t reg_18; -// extern uint64_t reg_19; -// extern uint64_t reg_20; -// extern uint64_t reg_21; -// extern uint64_t reg_22; -// extern uint64_t reg_23; -// extern uint64_t reg_24; -// extern uint64_t reg_25; -// extern uint64_t reg_26; -// extern uint64_t reg_27; -// extern uint64_t reg_28; -// extern uint64_t reg_29; -// extern uint64_t reg_30; -// extern uint64_t reg_31; -// extern uint64_t reg_32; -// extern uint64_t reg_33; -// extern uint64_t reg_34; - -extern int _print_regs() -{ - // printf("print_regs()\n"); - // printf("\treg[ 0]=%lu=0x%lx=@%p\n", reg_0, reg_0, ®_0); - // //printf("\treg[ 1]=%lu=0x%lx=@%p\n", reg_1, reg_1, ®_1); - // //printf("\treg[ 2]=%lu=0x%lx=@%p\n", reg_2, reg_2, ®_2); - // printf("\treg[ 3]=%lu=0x%lx=@%p\n", reg_3, reg_3, ®_3); - // printf("\treg[ 4]=%lu=0x%lx=@%p\n", reg_4, reg_4, ®_4); - // /*printf("\treg[ 5]=%lu=0x%lx=@%p\n", reg_5, reg_5, ®_5); - // printf("\treg[ 6]=%lu=0x%lx=@%p\n", reg_6, reg_6, ®_6); - // printf("\treg[ 7]=%lu=0x%lx=@%p\n", reg_7, reg_7, ®_7); - // printf("\treg[ 8]=%lu=0x%lx=@%p\n", reg_8, reg_8, ®_8); - // printf("\treg[ 9]=%lu=0x%lx=@%p\n", reg_9, reg_9, ®_9); - // printf("\treg[10]=%lu=0x%lx=@%p\n", reg_10, reg_10, ®_10); - // printf("\treg[11]=%lu=0x%lx=@%p\n", reg_11, reg_11, ®_11); - // printf("\treg[12]=%lu=0x%lx=@%p\n", reg_12, reg_12, ®_12); - // printf("\treg[13]=%lu=0x%lx=@%p\n", reg_13, reg_13, ®_13); - // printf("\treg[14]=%lu=0x%lx=@%p\n", reg_14, reg_14, ®_14); - // printf("\treg[15]=%lu=0x%lx=@%p\n", reg_15, reg_15, ®_15); - // printf("\treg[16]=%lu=0x%lx=@%p\n", reg_16, reg_16, ®_16); - // printf("\treg[17]=%lu=0x%lx=@%p\n", reg_17, reg_17, ®_17); - // printf("\treg[18]=%lu=0x%lx=@%p\n", reg_18, reg_18, ®_18);*/ - // printf("\treg[19]=%lu=0x%lx=@%p\n", reg_19, reg_19, ®_19); - // printf("\treg[20]=%lu=0x%lx=@%p\n", reg_20, reg_20, ®_20); - // printf("\treg[21]=%lu=0x%lx=@%p\n", reg_21, reg_21, ®_21); - // printf("\treg[22]=%lu=0x%lx=@%p\n", reg_22, reg_22, ®_22); - // printf("\treg[23]=%lu=0x%lx=@%p\n", reg_23, reg_23, ®_23); - // printf("\treg[24]=%lu=0x%lx=@%p\n", reg_24, reg_24, ®_24); - // printf("\treg[25]=%lu=0x%lx=@%p\n", reg_25, reg_25, ®_25); - // printf("\treg[26]=%lu=0x%lx=@%p\n", reg_26, reg_26, ®_26); - // printf("\treg[27]=%lu=0x%lx=@%p\n", reg_27, reg_27, ®_27); - // printf("\treg[28]=%lu=0x%lx=@%p\n", reg_28, reg_28, ®_28); - // printf("\treg[29]=%lu=0x%lx=@%p\n", reg_29, reg_29, ®_29); - // printf("\treg[30]=%lu=0x%lx=@%p\n", reg_30, reg_30, ®_30); - // printf("\treg[31]=%lu=0x%lx=@%p\n", reg_31, reg_31, ®_31); - // printf("\treg[32]=%lu=0x%lx=@%p\n", reg_32, reg_32, ®_32); - // printf("\treg[33]=%lu=0x%lx=@%p\n", reg_33, reg_33, ®_33); - // printf("\treg[34]=%lu=0x%lx=@%p\n", reg_34, reg_34, ®_34); - // printf("\n"); -} - -extern int _print_pc (uint64_t pc, uint64_t c) -{ - printf("s=%lu pc=%lx c=%lx", print_pc_counter, pc, c); - /* Used for debugging - printf(" r0=%lx", reg_0); - printf(" r1=%lx", reg_1); - printf(" r2=%lx", reg_2); - printf(" r3=%lx", reg_3); - printf(" r4=%lx", reg_4); - printf(" r5=%lx", reg_5); - printf(" r6=%lx", reg_6); - printf(" r7=%lx", reg_7); - printf(" r8=%lx", reg_8); - printf(" r9=%lx", reg_9); - printf(" r10=%lx", reg_10); - printf(" r11=%lx", reg_11); - printf(" r12=%lx", reg_12); - printf(" r13=%lx", reg_13); - printf(" r14=%lx", reg_14); - printf(" r15=%lx", reg_15); - printf(" r16=%lx", reg_16); - printf(" r17=%lx", reg_17); - printf(" r18=%lx", reg_18); - printf(" r19=%lx", reg_19); - printf(" r20=%lx", reg_20); - printf(" r21=%lx", reg_21); - printf(" r22=%lx", reg_22); - printf(" r23=%lx", reg_23); - printf(" r24=%lx", reg_24); - printf(" r25=%lx", reg_25); - printf(" r26=%lx", reg_26); - printf(" r27=%lx", reg_27); - printf(" r28=%lx", reg_28); - printf(" r29=%lx", reg_29); - printf(" r30=%lx", reg_30); - printf(" r31=%lx", reg_31); - */ - printf("\n"); - fflush(stdout); - print_pc_counter++; -} - -//uint64_t chunk_done_counter = 0; -// struct timeval sync_start, sync_stop; -// uint64_t sync_duration = 0; -extern void _chunk_done() -{ - //chunk_done_counter++; - //printf("chunk_done() counter=%lu\n", chunk_done_counter); - //gettimeofday(&sync_start, NULL); - __sync_synchronize(); - // gettimeofday(&sync_stop, NULL); - // sync_duration += TimeDiff(sync_start, sync_stop); - // printf("chunk_done() sync_duration=%lu\n", sync_duration); - - // Notify the caller that a new chunk is done and its trace is ready to be consumed - assert(call_chunk_done); - int result = sem_post(sem_chunk_done); - if (result == -1) - { - printf("ERROR: Failed calling sem_post(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } -} - -extern void _realloc_trace (void) -{ - realloc_counter++; - - // Calculate new trace size - uint64_t new_trace_size = trace_size * 2; - - // Extend the underlying file to the new size - int result = ftruncate(shmem_output_fd, new_trace_size); - if (result != 0) - { - printf("ERROR: realloc_trace() failed calling ftruncate(%s) of new size=%lu errno=%d=%s\n", shmem_output_name, new_trace_size, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Sync - fsync(shmem_output_fd); - - // Remap the memory - void * new_address = mremap((void *)trace_address, trace_size, new_trace_size, 0); - if ((uint64_t)new_address != trace_address) - { - printf("ERROR: realloc_trace() failed calling mremap() from size=%lu to %lu got new_address=%p errno=%d=%s\n", trace_size, new_trace_size, new_address, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - // Update trace global variables - set_trace_size(new_trace_size); - -#ifdef DEBUG - if (verbose) printf("realloc_trace() realloc counter=%lu trace_address=0x%lx trace_size=%lu=%lx max_address=0x%lx trace_address_threshold=0x%lx chunk_size=%lu\n", realloc_counter, trace_address, trace_size, trace_size, trace_address + trace_size, trace_address_threshold, chunk_size); -#endif -} - -/* Trace data structure - [8B] Number of chunks: C - - Chunk 0: - Start state: - [8B] pc - [8B] sp - [8B] c - [8B] step - [8B] register[1] - … - [8B] register[31] - [8B] register[32] - [8B] register[33] - Last state: - [8B] c - End: - [8B] end - Steps: - [8B] steps = chunk size except for the last chunk - [8B] mem_reads_size - [8B] mem_reads[0] - [8B] mem_reads[1] - … - [8B] mem_reads[mem_reads_size - 1] - - Chunk 1: - … - Chunk C-1: - … -*/ -void log_minimal_trace(void) -{ - uint64_t * pOutput = (uint64_t *)TRACE_ADDR; - printf("Version = 0x%06lx\n", pOutput[0]); // Version, e.g. v1.0.0 [8] - printf("Exit code = %lu\n", pOutput[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - printf("Allocated size = %lu B\n", pOutput[2]); // Allocated size [8] - printf("Minimal trace used size = %lu B\n", pOutput[3]); // Minimal trace used size [8] - - printf("Trace content:\n"); - uint64_t * trace = (uint64_t *)MEM_TRACE_ADDRESS; - uint64_t number_of_chunks = trace[0]; - printf("Number of chunks=%lu\n", number_of_chunks); - if (number_of_chunks > 1000000) - { - printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); - fflush(stdout); - fflush(stderr); - exit(-1); - } - uint64_t * chunk = trace + 1; - for (uint64_t c=0; c 10000000) - { - printf("ERROR: Mem reads size is too high=%lu\n", mem_reads_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (trace_trace) - { - for (uint64_t m=0; m 100000000) - { - printf("ERROR: Bios size is too high=%lu\n", bios_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (trace_trace) - { - uint64_t * bios = trace + 1; - for (uint64_t i=0; i 100000000) - { - printf("ERROR: Program size is too high=%lu\n", program_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - if (trace_trace) - { - uint64_t * program = trace + 1 + bios_size + 1; - for (uint64_t i=0; i 1000000) - { - printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); - fflush(stdout); - fflush(stderr); - exit(-1); - } - uint64_t * chunk = trace + 1; - for (uint64_t c=0; c 10000000) - { - printf("ERROR: Main_trace size is too high=%lu\n", main_trace_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - if (trace_trace) - { - for (uint64_t m=0; m 0) - { - size_t bytes_written = fwrite(buffer_address, 1, buffer_length, file); - if (bytes_written != buffer_length) - { - printf("ERROR: buffer2file() failed calling fwrite(%s) buffer_address=%p buffer_length=%lu errno=%d=%s\n", file_name, buffer_address, buffer_length, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - fclose(file); - exit(-1); - } - } - - if (fclose(file) != 0) - { - printf("ERROR: buffer2file() failed calling fclose(%s) errno=%d=%s\n", file_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(-1); - } -} - -/* Memory operations structure - [8B] Number of chunks = C - - Chunk 0: - [8b] end - [8B] mem_op_trace_size - [8B] mem_op_trace[0] - [8B] mem_op_trace[1] - … - [8B] mem_op_trace[mem_op_trace_size - 1] - - Chunk 1: - … - Chunk C-1: - … -*/ -void log_mem_op(void) -{ - // Log header - uint64_t * pOutput = (uint64_t *)TRACE_ADDR; - printf("Version = 0x%06lx\n", pOutput[0]); // Version, e.g. v1.0.0 [8] - printf("Exit code = %lu\n", pOutput[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - printf("Allocated size = %lu B\n", pOutput[2]); // Allocated size [8] - printf("Memory operations trace used size = %lu B\n", pOutput[3]); // Main trace used size [8] - - printf("Trace content:\n"); - uint64_t * trace = (uint64_t *)MEM_TRACE_ADDRESS; - uint64_t number_of_chunks = trace[0]; - printf("Number of chunks=%lu\n", number_of_chunks); - if (number_of_chunks > 1000000) - { - printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); - fflush(stdout); - fflush(stderr); - exit(-1); - } - uint64_t * chunk = trace + 1; - for (uint64_t c=0; c 10000000) - { - printf("ERROR: Mem op trace size is too high=%lu\n", mem_op_trace_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - for (uint64_t m=0; m> 49) & 0x1; - uint64_t write = (chunk[i] >> 48) & 0x1; - uint64_t width = (chunk[i] >> 32) & 0xF; - uint64_t address = chunk[i] & 0xFFFFFFFF; - bool inside_range = - ((address >= RAM_ADDR) && (address < (RAM_ADDR + RAM_SIZE))) || - ((address >= ROM_ADDR) && (address < (ROM_ADDR + ROM_SIZE))) || - ((address >= INPUT_ADDR) && (address < (INPUT_ADDR + MAX_INPUT_SIZE))); - if (trace_trace || !inside_range) - { - printf("\t\tchunk[%lu].mem_op_trace[%lu] = %016lx = rest_are_zeros=%lx, write=%lx, width=%lx, address=%lx%s\n", - c, - m, - chunk[i], - rest_are_zeros, - write, - width, - address, - inside_range ? "" : " ERROR!!!!!!!!!!!!!!" - ); - } - i += 1; - } - - //Set next chunk pointer - chunk = chunk + i; - } - printf("Trace=%p chunk=%p size=%lu\n", trace, chunk, (uint64_t)chunk - (uint64_t)trace); -} - -/* Memory trace structure (for 1 chunk) - [8B] mem_trace_size - [16B] mem_trace[0] - [8B] mem operacion - [4B] address (LE) - [1B] width (1, 2, 4, 8) + write (0, 1) << 4 - [3B] - [16B] mem_trace[1] - … - [16B] mem_trace[mem_trace_size - 1] -*/ -void log_mem_trace(void) -{ - printf("Trace content:\n"); - uint64_t * trace = (uint64_t *)trace_address; - printf("log_mem_trace() trace_address=%p\n", trace); - uint64_t i=0; - printf("Version = 0x%06lx\n", trace[0]); // Version, e.g. v1.0.0 [8] - printf("Exit code = %lu\n", trace[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - printf("Allocated size = %lu B\n", trace[2]); // Allocated size [8] - printf("Memory operations trace used size = %lu B\n", trace[3]); // Main trace used size [8] - i += 4; - uint64_t number_of_entries = trace[i]; - i++; - printf("Trace size=%lu\n", number_of_entries); - - for (uint64_t m = 0; m < number_of_entries; m++) - { - uint64_t addr_step = trace[i]; - i++; - - // addr_step = [@0, @1, @2, @3, width + write<<4, supra_step] - uint64_t address = addr_step & 0xFFFFFFFF; - uint64_t width = (addr_step >> (4*8)) & 0xF; - uint64_t write = (addr_step >> ((4*8) + 4)) & 0x1; - uint64_t micro_step = (addr_step >> (5*8)) & 0x3; - uint64_t incremental_step = (addr_step >> ((5*8) + 2)); - bool address_is_inside_range = - ((address >= RAM_ADDR) && (address < (RAM_ADDR + RAM_SIZE))) || - ((address >= ROM_ADDR) && (address < (ROM_ADDR + ROM_SIZE))) || - ((address >= INPUT_ADDR) && (address < (INPUT_ADDR + MAX_INPUT_SIZE))); - bool width_is_valid = (width == 1) || (width == 2) || (width == 4) || (width == 8); - bool bError = !(address_is_inside_range && width_is_valid); - if (trace_trace || bError) - { - printf("\tmem_trace[%lu] = %016lx = [inc_step=%lu, u_step=%lu, write=%lx, width=%lx, address=%lx] %s\n", - m, - addr_step, - incremental_step, - micro_step, - write, - width, - address, - bError ? " ERROR!!!!!!!!!!!!!!" : "" - ); - } - - // u-step: - // 0: a=SRC_MEM - // 1: b=SRC_MEM or b=SRC_IND - // 2: precompiled_read - // 3: c=STORE_MEM, c=STORE_IND or precompiled_write - - bool address_is_aligned = (address & 0x7) == 0; - uint64_t aligned_address = address & 0xFFFFFFF8; - uint64_t number_of_read_values = 0; - uint64_t number_of_write_values = 0; - - switch (micro_step) - { - case 0: // a=SRC_MEM - { - assert_perror(width == 8); - if (address_is_aligned) - { - number_of_read_values = 1; - } - else - { - number_of_read_values = 2; - } - break; - } - case 1: // b=SRC_MEM or b=SRC_IND - { - if (address_is_aligned) - { - number_of_read_values = 1; - } - else - { - if (((address + width - 1) & 0xFFFFFFF8) == aligned_address) - { - number_of_read_values = 1; - } - else - { - number_of_read_values = 2; - } - } - break; - } - case 2: // precompiled_read - { - assert_perror(width == 8); - if (address_is_aligned) - { - number_of_read_values = 1; - } - else - { - number_of_read_values = 2; - } - break; - } - case 3: // c=STORE_MEM, c=STORE_IND or precompiled_write - { - if (address_is_aligned && (width == 8)) - { - number_of_read_values = 0; - } - else - { - if (((address + width - 1) & 0xFFFFFFF8) == aligned_address) - { - number_of_read_values = 1; - } - else - { - number_of_read_values = 2; - } - } - number_of_write_values = 1; - break; - } - } - - for (uint64_t r = 0; r < number_of_read_values; r++) - { - uint64_t value = trace[i]; - i++; - m++; - if (trace_trace) - { - printf("\t\tread_value[%lu] = 0x%lx\n", i, value); - } - } - - for (uint64_t w = 0; w < number_of_write_values; w++) - { - uint64_t value = trace[i]; - i++; - m++; - if (trace_trace) - { - printf("\t\twrite_value[%lu] = 0x%lx\n", i, value); - } - } - } - printf("Trace=%p number_of_entries=%lu\n", trace, number_of_entries); -} - -void save_mem_op_to_files(void) -{ - // Log header - uint64_t * pOutput = (uint64_t *)TRACE_ADDR; - printf("Version = 0x%06lx\n", pOutput[0]); // Version, e.g. v1.0.0 [8] - printf("Exit code = %lu\n", pOutput[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - printf("Allocated size = %lu B\n", pOutput[2]); // Allocated size [8] - printf("Memory operations trace used size = %lu B\n", pOutput[3]); // Main trace used size [8] - - printf("Trace content:\n"); - uint64_t * trace = (uint64_t *)MEM_TRACE_ADDRESS; - uint64_t number_of_chunks = trace[0]; - printf("Number of chunks=%lu\n", number_of_chunks); - if (number_of_chunks > 1000000) - { - printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); - fflush(stdout); - fflush(stderr); - exit(-1); - } - uint64_t * chunk = trace + 1; - for (uint64_t c=0; c 10000000) - { - printf("ERROR: Mem op trace size is too high=%lu\n", mem_op_trace_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - - printf("Chunk %lu: file=%s length=%lu\n", c, file_name, mem_op_trace_size); - - buffer2file(&chunk[i], mem_op_trace_size * 8, file_name); - - //Set next chunk pointer - chunk = chunk + mem_op_trace_size + 1; - } - printf("Trace=%p chunk=%p size=%lu\n", trace, chunk, (uint64_t)chunk - (uint64_t)trace); -} - -/* Trace data structure - [8B] Number of elements - - A series of elements with the following structure: - [8B] op: instruction opcode - [8B] a: register a value - [8B] b: register b value - [8B] precompiled_memory_address: memory read address of the precompiled input data -*/ -void log_chunk_player_main_trace(void) -{ - uint64_t * chunk = (uint64_t *)trace_address; - uint64_t i = 0; - - printf("Version = 0x%06lx\n", chunk[0]); // Version, e.g. v1.0.0 [8] - printf("Exit code = %lu\n", chunk[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] - printf("Allocated size = %lu B\n", chunk[2]); // Allocated size [8] - printf("Memory operations trace used size = %lu B\n", chunk[3]); // Main trace used size [8] - i = 4; - - uint64_t mem_reads_size = chunk[i]; - i++; - printf("mem_reads_size=%lu\n", mem_reads_size); - if (mem_reads_size > 10000000) - { - printf("ERROR: Mem reads size is too high=%lu\n", mem_reads_size); - fflush(stdout); - fflush(stderr); - exit(-1); - } - //if (trace_trace) - { - for (uint64_t m=0; m 0xFF) - { - printf("ERROR!! Invalid op=%lu=0x%lx\n", op, op); - } - if (trace_trace) printf("\tmem_reads[%lu] a=0x%08lx\n", m, chunk[i]); - i++; - m++; - if (trace_trace) printf("\tmem_reads[%lu] b=0x%08lx\n", m, chunk[i]); - i++; - m++; - if ( (op == 0xf1) // Keccak - || (op == 0xf9) // SHA256 - || (op == 0xf2) // Arith256 - || (op == 0xf3) // Arith256Mod - || (op == 0xf4) // Secp256k1Add - || (op == 0xf5) // Secp256k1Dbl - ) - { - if (trace_trace) printf("\tmem_reads[%lu] precompiled_address=%08lx\n", m, chunk[i]); - i++; - m++; - } - } - } - - printf("Chunk=%p size=%lu\n", chunk, mem_reads_size); -} - -void file_lock(void) -{ - // Open (or create) the lock file. We don't need to write to it. - file_lock_fd = open(file_lock_name, O_CREAT | O_RDONLY, 0644); - if (file_lock_fd == -1) { - printf("ERROR: file_lock() failed calling open(%s) errno=%d=%s\n", file_lock_name, errno, strerror(errno)); - fflush(stdout); - fflush(stderr); - exit(1); - } - - // Try to acquire an exclusive lock, non-blocking. - if (flock(file_lock_fd, LOCK_EX | LOCK_NB) == -1) { - // If we fail to get the lock, another instance is running. - printf("ERROR: Another instance of this program is already running.\n"); - fflush(stdout); - fflush(stderr); - exit(1); - } -} \ No newline at end of file +#endif // USE_FILE_LOCK \ No newline at end of file diff --git a/emulator-asm/src/server.c b/emulator-asm/src/server.c new file mode 100644 index 000000000..36b4d33c6 --- /dev/null +++ b/emulator-asm/src/server.c @@ -0,0 +1,1016 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "server.hpp" +#include "globals.hpp" +#include "asm_provided.hpp" +#include "trace_logs.hpp" +#include "trace.hpp" +#include "emu.hpp" +#include "c_provided.hpp" + +/**********/ +/* SERVER */ +/**********/ + +// ROM histogram +uint64_t histogram_size = 0; +uint64_t bios_size = 0; +uint64_t program_size = 0; + +// Shutdown done semaphore: notifies the caller when a shutdown has been processed +sem_t * sem_shutdown_done = NULL; + +void server_setup (void) +{ + assert(server); + assert(!client); + + int result; + + /*******/ + /* ROM */ + /*******/ + if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) + { + + if (verbose) gettimeofday(&start_time, NULL); + void * pRom = mmap((void *)ROM_ADDR, ROM_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | map_locked_flag, -1, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pRom == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(rom) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if ((uint64_t)pRom != ROM_ADDR) + { + printf("ERROR: Called mmap(rom) but returned address = %p != 0x%lx\n", pRom, ROM_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("mmap(rom) mapped %ld B and returned address %p in %lu us\n", ROM_SIZE, pRom, duration); + } + + /*********/ + /* INPUT */ + /*********/ + + if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) + { + if (!open_input_shm) + { + // Make sure the input shared memory is deleted + shm_unlink(shmem_input_name); + + // Create the input shared memory + shmem_input_fd = shm_open(shmem_input_name, O_RDWR | O_CREAT | O_EXCL, 0666); + if (shmem_input_fd < 0) + { + printf("ERROR: Failed calling input RW shm_open(%s) as read-write errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Size it + result = ftruncate(shmem_input_fd, MAX_INPUT_SIZE); + if (result != 0) + { + printf("ERROR: Failed calling ftruncate(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Sync + fsync(shmem_input_fd); + + // Close the descriptor + if (close(shmem_input_fd) != 0) + { + printf("ERROR: Failed calling close(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + + // Open the input shared memory as read-only + shmem_input_fd = shm_open(shmem_input_name, O_RDONLY, 0666); + if (shmem_input_fd < 0) + { + printf("ERROR: Failed calling input RO shm_open(%s) as read-only errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map input address space + if (verbose) gettimeofday(&start_time, NULL); + void * pInput = mmap((void *)INPUT_ADDR, MAX_INPUT_SIZE, PROT_READ, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_input_fd, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pInput == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(input) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if ((uint64_t)pInput != INPUT_ADDR) + { + printf("ERROR: Called mmap(pInput) but returned address = %p != 0x%lx\n", pInput, INPUT_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) { + printf("mmap(input) mapped %lu B and returned address %p in %lu us\n", MAX_INPUT_SIZE, pInput, duration); + fflush(stdout); + } + } + + /**********************/ + /* PRECOMPILE_RESULTS */ + /**********************/ + + if (precompile_results_enabled) + { + /**************/ + /* PRECOMPILE */ + /**************/ + + if (!open_input_shm) + { + // Make sure the precompile results shared memory is deleted + shm_unlink(shmem_precompile_name); + + // Create the precompile results shared memory + shmem_precompile_fd = shm_open(shmem_precompile_name, O_RDWR | O_CREAT, 0666); + if (shmem_precompile_fd < 0) + { + printf("ERROR: Failed calling precompile shm_open(%s) errno=%d=%s\n", shmem_precompile_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Size it + result = ftruncate(shmem_precompile_fd, MAX_PRECOMPILE_SIZE); + if (result != 0) + { + printf("ERROR: Failed calling ftruncate(%s) errno=%d=%s\n", shmem_precompile_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Sync + fsync(shmem_precompile_fd); + + // Close the descriptor + if (close(shmem_precompile_fd) != 0) + { + printf("ERROR: Failed calling close(%s) errno=%d=%s\n", shmem_precompile_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + + // Open the precompile shared memory as read-only + shmem_precompile_fd = shm_open(shmem_precompile_name, O_RDONLY, 0666); + if (shmem_precompile_fd < 0) + { + printf("ERROR: Failed calling precompile RO shm_open(%s) as read-only errno=%d=%s\n", shmem_precompile_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map precompile address space + if (verbose) gettimeofday(&start_time, NULL); + void * pPrecompile = mmap(NULL, MAX_PRECOMPILE_SIZE, PROT_READ, MAP_SHARED | map_locked_flag, shmem_precompile_fd, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pPrecompile == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(precompile) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + shmem_precompile_address = pPrecompile; + precompile_results_address = (uint64_t *)pPrecompile; + if (verbose) printf("mmap(precompile) mapped %lu B and returned address %p in %lu us\n", MAX_PRECOMPILE_SIZE, precompile_results_address, duration); + + /*************************/ + /* PRECOMPILE SEMAPHORES */ + /*************************/ + + // Create the semaphore for precompile results available signal + assert(strlen(sem_prec_avail_name) > 0); + + sem_unlink(sem_prec_avail_name); + + sem_prec_avail = sem_open(sem_prec_avail_name, O_CREAT | O_EXCL, 0666, 0); + if (sem_prec_avail == SEM_FAILED) + { + printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_prec_avail_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("sem_open(%s) succeeded sem_prec_avail=%p\n", sem_prec_avail_name, sem_prec_avail); + + // Create the semaphore for precompile results read signal + assert(strlen(sem_prec_read_name) > 0); + + sem_unlink(sem_prec_read_name); + + sem_prec_read = sem_open(sem_prec_read_name, O_CREAT | O_EXCL, 0666, 0); + if (sem_prec_read == SEM_FAILED) + { + printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_prec_read_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("sem_open(%s) succeeded sem_prec_read=%p\n", sem_prec_read_name, sem_prec_read); + } + + /*****************/ + /* CONTROL INPUT */ + /*****************/ + + if (!open_input_shm) + { + // Make sure the precompile results shared memory is deleted + shm_unlink(shmem_control_input_name); + + // Create the control shared memory + shmem_control_input_fd = shm_open(shmem_control_input_name, O_RDWR | O_CREAT, 0666); + if (shmem_control_input_fd < 0) + { + printf("ERROR: Failed calling control shm_open(%s) errno=%d=%s\n", shmem_control_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Size it + result = ftruncate(shmem_control_input_fd, CONTROL_INPUT_SIZE); + if (result != 0) + { + printf("ERROR: Failed calling ftruncate(%s) errno=%d=%s\n", shmem_control_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Sync + fsync(shmem_control_input_fd); + + // Close the descriptor + if (close(shmem_control_input_fd) != 0) + { + printf("ERROR: Failed calling close(%s) errno=%d=%s\n", shmem_control_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + + // Open the control input shared memory as read-only + shmem_control_input_fd = shm_open(shmem_control_input_name, O_RDONLY, 0666); + if (shmem_control_input_fd < 0) + { + printf("ERROR: Failed calling precompile RO shm_open(%s) as read-only errno=%d=%s\n", shmem_control_input_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map precompile address space + if (verbose) gettimeofday(&start_time, NULL); + void * pControl = mmap((void *)CONTROL_INPUT_ADDR, CONTROL_INPUT_SIZE, PROT_READ, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_control_input_fd, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pControl == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(control_input) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (pControl != (void *)CONTROL_INPUT_ADDR) + { + printf("ERROR: Called mmap(control_input) but returned address = %p != 0x%08lx\n", pControl, CONTROL_INPUT_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + shmem_control_input_address = (uint64_t *)pControl; + precompile_written_address = &shmem_control_input_address[0]; + precompile_exit_address = &shmem_control_input_address[1]; + input_written_address = &shmem_control_input_address[2]; + if (verbose) printf("mmap(control_input) mapped %lu B and returned address %p in %lu us\n", CONTROL_INPUT_SIZE, shmem_control_input_address, duration); + + /******************/ + /* CONTROL OUTPUT */ + /******************/ + + // Make sure the precompile results shared memory is deleted + shm_unlink(shmem_control_output_name); + + // Create the control shared memory + shmem_control_output_fd = shm_open(shmem_control_output_name, O_RDWR | O_CREAT, 0666); + if (shmem_control_output_fd < 0) + { + printf("ERROR: Failed calling control shm_open(%s) errno=%d=%s\n", shmem_control_output_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Size it + result = ftruncate(shmem_control_output_fd, CONTROL_OUTPUT_SIZE); + if (result != 0) + { + printf("ERROR: Failed calling ftruncate(%s) errno=%d=%s\n", shmem_control_output_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map precompile address space + if (verbose) gettimeofday(&start_time, NULL); + pControl = mmap((void *)CONTROL_OUTPUT_ADDR, CONTROL_OUTPUT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_control_output_fd, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pControl == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(control_output) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (pControl != (void *)CONTROL_OUTPUT_ADDR) + { + printf("ERROR: Called mmap(control_output) but returned address = %p != 0x%08lx\n", pControl, CONTROL_OUTPUT_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + shmem_control_output_address = (uint64_t *)pControl; + precompile_read_address = &shmem_control_output_address[0]; + waiting_for_precompile_address = &shmem_control_output_address[1]; + waiting_for_input_address = &shmem_control_output_address[2]; + if (verbose) printf("mmap(control_output) mapped %lu B and returned address %p in %lu us\n", CONTROL_OUTPUT_SIZE, shmem_control_output_address, duration); + + /*******/ + /* RAM */ + /*******/ + + if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) + { + + if (verbose) gettimeofday(&start_time, NULL); + void * pRam = mmap((void *)RAM_ADDR, RAM_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | map_locked_flag, -1, 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pRam == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(ram) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if ((uint64_t)pRam != RAM_ADDR) + { + printf("ERROR: Called mmap(ram) but returned address = %p != 0x%08lx\n", pRam, RAM_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("mmap(ram) mapped %lu B and returned address %p in %lu us\n", RAM_SIZE, pRam, duration); + } + + /****************/ + /* OUTPUT TRACE */ + /****************/ + + // If ROM histogram, configure trace size + if (gen_method == RomHistogram) + { + // Get max PC values for low and high addresses + uint64_t max_bios_pc = get_max_bios_pc(); + uint64_t max_program_pc = get_max_program_pc(); + assert(max_bios_pc >= 0x1000); + assert((max_bios_pc & 0x3) == 0); + assert(max_program_pc >= 0x80000000); + + // Calculate sizes + bios_size = ((max_bios_pc - 0x1000) >> 2) + 1; + program_size = max_program_pc - 0x80000000 + 1; + histogram_size = (4 + 1 + bios_size + 1 + program_size)*8; + initial_trace_size = ((histogram_size/TRACE_SIZE_GRANULARITY) + 1) * TRACE_SIZE_GRANULARITY; + trace_size = initial_trace_size; + } + + // Output trace + if ((gen_method == MinimalTrace) || + (gen_method == RomHistogram) || + (gen_method == MainTrace) || + (gen_method == Zip) || + (gen_method == MemOp) || + (gen_method == ChunkPlayerMTCollectMem) || + (gen_method == MemReads) || + (gen_method == ChunkPlayerMemReadsCollectMain)) + { + trace_map_initialize(); + } + + /***********************/ + /* INPUT MINIMAL TRACE */ + /***********************/ + + // Input MT trace + if ((gen_method == ChunkPlayerMTCollectMem) || (gen_method == ChunkPlayerMemReadsCollectMain)) + { + // Create the output shared memory + shmem_mt_fd = shm_open(shmem_mt_name, O_RDONLY, 0666); + if (shmem_mt_fd < 0) + { + printf("ERROR: Failed calling mt shm_open(%s) errno=%d=%s\n", shmem_mt_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Map it to the trace address +#ifdef DEBUG + gettimeofday(&start_time, NULL); +#endif + void * pTrace = mmap((void *)TRACE_ADDR, chunk_player_mt_size, PROT_READ, MAP_SHARED | MAP_FIXED | map_locked_flag, shmem_mt_fd, 0); +#ifdef DEBUG + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); +#endif + if (pTrace == MAP_FAILED) + { + printf("ERROR: Failed calling mmap(MT) errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if ((uint64_t)pTrace != TRACE_ADDR) + { + printf("ERROR: Called mmap(MT) but returned address = %p != 0x%lx\n", pTrace, TRACE_ADDR); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("mmap(MT) returned %p in %lu us\n", pTrace, duration); + } + + /******************/ + /* SEM CHUNK DONE */ + /******************/ + + if (call_chunk_done) + { + assert(strlen(sem_chunk_done_name) > 0); + + sem_unlink(sem_chunk_done_name); + + sem_chunk_done = sem_open(sem_chunk_done_name, O_CREAT | O_EXCL, 0666, 0); + if (sem_chunk_done == SEM_FAILED) + { + printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("sem_open(%s) succeeded\n", sem_chunk_done_name); + } + + /*********************/ + /* SEM SHUTDOWN DONE */ + /*********************/ + + assert(strlen(sem_shutdown_done_name) > 0); + + sem_unlink(sem_shutdown_done_name); + + sem_shutdown_done = sem_open(sem_shutdown_done_name, O_CREAT | O_EXCL, 0666, 0); + if (sem_shutdown_done == SEM_FAILED) + { + printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_shutdown_done_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("sem_open(%s) succeeded\n", sem_shutdown_done_name); // Create the semaphore for input available signal + + /***********************/ + /* SEM INPUT AVAILABLE */ + /***********************/ + + assert(strlen(sem_input_avail_name) > 0); + + sem_unlink(sem_input_avail_name); + + sem_input_avail = sem_open(sem_input_avail_name, O_CREAT | O_EXCL, 0666, 0); + if (sem_input_avail == SEM_FAILED) + { + printf("ERROR: Failed calling sem_open(%s) errno=%d=%s\n", sem_input_avail_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("sem_open(%s) succeeded\n", sem_input_avail_name); +} + +void server_reset_fast (void) +{ + // Reset precompile read address for next emulation + if (precompile_results_enabled) + { + // Set precompile read counter to 0 for next emulation + *precompile_read_address = 0; + + // Sync control output shared memory so that the writer can see the precompile reads we have + // done, and thus update the precompile_written_address if needed + if (msync((void *)shmem_control_output_address, CONTROL_OUTPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: server_reset_fast() msync failed for shmem_control_output_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } +} + +void server_reset_slow (void) +{ + // Reset RAM data for next emulation + if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) + { +#ifdef DEBUG + gettimeofday(&start_time, NULL); +#endif + memset((void *)RAM_ADDR, 0, RAM_SIZE); +#ifdef DEBUG + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + if (verbose) printf("server_reset_slow() memset(ram) in %lu us\n", duration); +#endif + } +} + +void server_reset_trace (void) +{ + // Reset trace header and trace_used_size for next emulation + if ( (gen_method != ChunkPlayerMTCollectMem) && + (gen_method != ChunkPlayerMemReadsCollectMain) && + (gen_method != Fast) && + (gen_method != RomHistogram) ) + { + // Reset trace: init output header data + pOutputTrace[0] = 0x000100; // Version, e.g. v1.0.0 [8] + pOutputTrace[1] = 1; // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] + pOutputTrace[2] = trace_size; // MT allocated size [8] -> to be updated after reallocation + pOutputTrace[3] = 0; // MT used size [8] -> to be updated after completion + + // Reset trace used size + trace_used_size = 0; + } + + // Reset flags + if (wait_flag) + { + *waiting_for_precompile_address = 0; + *waiting_for_input_address = 0; + } + + // Reset counters + wait_prec_avail_counter = 0; + wait_input_avail_counter = 0; + print_pc_counter = 0; +} + +void server_run (void) +{ + // If ROM histogram, reset the trace area to 0 for the histogram data since it represents the + // ROM instruction multiplicity and one of them will be increased at every executed instruction + if ((gen_method == RomHistogram)) { + memset((void *)trace_address, 0, trace_size); + } + +#ifdef ASM_CALL_METRICS + reset_asm_call_metrics(); +#endif + + // Init trace header + server_reset_trace(); + + // Sync input shared memory + if (msync((void *)INPUT_ADDR, MAX_INPUT_SIZE, MS_SYNC) != 0) + { + printf("ERROR: msync failed for shmem_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + if (precompile_results_enabled) + { + // Sync control input shared memory + if (msync((void *)shmem_control_input_address, CONTROL_INPUT_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_control_input_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Sync precompile shared memory + if (msync((void *)shmem_precompile_address, MAX_PRECOMPILE_SIZE, MS_SYNC) != 0) { + printf("ERROR: msync failed for shmem_precompile_address errno=%d=%s\n", errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + } + + /*******/ + /* ASM */ + /*******/ + + // Call emulator assembly code + gettimeofday(&start_time,NULL); + if (verbose) + { + printf("Before calling emulator_start() trace_address=%lx\n", trace_address); + fflush(stdout); + fflush(stderr); + } + emulator_start(); + if (verbose) + { + printf("After calling emulator_start() trace_address=%lx\n", trace_address); + fflush(stdout); + fflush(stderr); + } + gettimeofday(&stop_time,NULL); + assembly_duration = TimeDiff(start_time, stop_time); + + // Reset precompile read address for next emulation + if (precompile_results_enabled) + { + *precompile_read_address = 0; + } + + uint64_t final_trace_size = MEM_CHUNK_ADDRESS - MEM_TRACE_ADDRESS; + trace_used_size = final_trace_size + 32; + + if ( metrics ) + { + uint64_t duration = assembly_duration; + uint64_t steps = MEM_STEP; + uint64_t end = MEM_END; + uint64_t error = MEM_ERROR; + uint64_t step_duration_ns = steps == 0 ? 0 : (duration * 1000) / steps; + uint64_t step_tp_sec = duration == 0 ? 0 : steps * 1000000 / duration; + uint64_t final_trace_size_percentage = (final_trace_size * 100) / trace_size; + printf("Duration = %lu us, realloc counter = %lu, wait prec counter = %lu, wait input counter = %lu, steps = %lu, step duration = %lu ns, tp = %lu steps/s, trace size = 0x%lx - 0x%lx = %lu B(%lu%% of %lu), end=%lu, error=%lu, max steps=%lu, chunk size=%lu, prec_written=%lu, prec_read=%lu\n", + duration, + realloc_counter, + wait_prec_avail_counter, + wait_input_avail_counter, + steps, + step_duration_ns, + step_tp_sec, + MEM_CHUNK_ADDRESS, + MEM_TRACE_ADDRESS, + final_trace_size, + final_trace_size_percentage, + trace_size, + end, + error, + max_steps, + chunk_size, + precompile_written_address ? *precompile_written_address : 0, + precompile_read_address ? *precompile_read_address : 0 + ); + fflush(stdout); + fflush(stderr); + if (gen_method == RomHistogram) + { + printf("Rom histogram size=%lu\n", histogram_size); + fflush(stdout); + fflush(stderr); + } + } + if (MEM_ERROR) + { + printf("Emulation ended with error code %lu\n", MEM_ERROR); + fflush(stdout); + fflush(stderr); + } + + // Log output + if (output) + { + unsigned int * pOutput = (unsigned int *)OUTPUT_ADDR; + unsigned int output_size = 64; +#ifdef DEBUG + if (verbose) + { + printf("Output size=%d\n", output_size); + fflush(stdout); + fflush(stderr); + } +#endif + + for (unsigned int i = 0; i < output_size; i++) + { + printf("%08x\n", *pOutput); + pOutput++; + } + fflush(stdout); + fflush(stderr); + } + + // Log output for riscof tests + if (output_riscof) + { + unsigned int * pOutput = (unsigned int *)OUTPUT_ADDR; + unsigned int output_size = *pOutput; +#ifdef DEBUG + if (verbose) + { + printf("Output size=%d\n", output_size); + fflush(stdout); + fflush(stderr); + } +#endif + + for (unsigned int i = 0; i < output_size; i++) + { + pOutput++; + printf("%08x\n", *pOutput); + } + fflush(stdout); + fflush(stderr); + } + + // Complete output header data + if ((gen_method == MinimalTrace) || + (gen_method == RomHistogram) || + (gen_method == Zip) || + (gen_method == MainTrace) || + (gen_method == MemOp) || + (gen_method == MemReads) || + (gen_method == ChunkPlayerMemReadsCollectMain)) + { + uint64_t * pOutput = (uint64_t *)trace_address; + pOutput[0] = 0x000100; // Version, e.g. v1.0.0 [8] + pOutput[1] = MEM_ERROR; // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] + pOutput[2] = trace_size; // MT allocated size [8] + //assert(final_trace_size > 32); + if (gen_method == RomHistogram) + { + pOutput[3] = MEM_STEP; + pOutput[4] = bios_size; + pOutput[4 + bios_size + 1] = program_size; + } + else + { + pOutput[3] = trace_used_size; // MT used size [8] + } + } + + // Notify client + if (gen_method == RomHistogram) + { + _chunk_done(); + } + + + // Notify the caller that the trace is ready to be consumed + // if (!is_file) + // { + // result = sem_post(sem_input); + // if (result == -1) + // { + // printf("Failed calling sem_post(%s) errno=%d=%s\n", sem_input_name, errno, strerror(errno)); + // fflush(stdout); + // fflush(stderr); + // exit(-1); + // } + // } + + +#ifdef ASM_CALL_METRICS + print_asm_call_metrics(assembly_duration); +#endif + + // Log trace + if (((gen_method == MinimalTrace) || (gen_method == Zip)) && trace) + { + log_minimal_trace(); + } + if ((gen_method == RomHistogram) && trace) + { + log_histogram(); + } + if ((gen_method == MainTrace) && trace) + { + log_main_trace(); + } + if ((gen_method == MemOp) && trace) + { + log_mem_op(); + } + if ((gen_method == MemOp) && save_to_file) + { + save_mem_op_to_files(); + } + if ((gen_method == ChunkPlayerMTCollectMem) && trace) + { + log_mem_trace(); + } + if ((gen_method == MemReads) && trace) + { + log_minimal_trace(); + } + if ((gen_method == ChunkPlayerMemReadsCollectMain) && trace) + { + log_chunk_player_main_trace(); + } +} + +void server_cleanup (void) +{ + // Cleanup ROM + int result = munmap((void *)ROM_ADDR, ROM_SIZE); + if (result == -1) + { + printf("ERROR: Failed calling munmap(rom) errno=%d=%s\n", errno, strerror(errno)); + } + + // Cleanup RAM + result = munmap((void *)RAM_ADDR, RAM_SIZE); + if (result == -1) + { + printf("ERROR: Failed calling munmap(ram) errno=%d=%s\n", errno, strerror(errno)); + } + + // Cleanup INPUT + result = munmap((void *)INPUT_ADDR, MAX_INPUT_SIZE); + if (result == -1) + { + printf("ERROR: Failed calling munmap(input) errno=%d=%s\n", errno, strerror(errno)); + } + result = shm_unlink(shmem_input_name); + if (result == -1) + { + printf("ERROR: Failed calling shm_unlink(%s) errno=%d=%s\n", shmem_input_name, errno, strerror(errno)); + } + + if (precompile_results_enabled && (gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) + { + // Cleanup PRECOMPILE + result = munmap((void *)shmem_precompile_address, MAX_PRECOMPILE_SIZE); + if (result == -1) + { + printf("ERROR: Failed calling munmap(precompile) errno=%d=%s\n", errno, strerror(errno)); + } + result = shm_unlink(shmem_precompile_name); + if (result == -1) + { + printf("ERROR: Failed calling shm_unlink(%s) errno=%d=%s\n", shmem_precompile_name, errno, strerror(errno)); + } + + // Semaphores cleanup + result = sem_close(sem_prec_avail); + if (result == -1) + { + printf("ERROR: Failed calling sem_close(%s) errno=%d=%s\n", sem_prec_avail_name, errno, strerror(errno)); + } + result = sem_unlink(sem_prec_avail_name); + if (result == -1) + { + printf("ERROR: Failed calling sem_unlink(%s) errno=%d=%s\n", sem_prec_avail_name, errno, strerror(errno)); + } + result = sem_close(sem_prec_read); + if (result == -1) + { + printf("ERROR: Failed calling sem_close(%s) errno=%d=%s\n", sem_prec_read_name, errno, strerror(errno)); + } + result = sem_unlink(sem_prec_read_name); + if (result == -1) + { + printf("ERROR: Failed calling sem_unlink(%s) errno=%d=%s\n", sem_prec_read_name, errno, strerror(errno)); + } + result = sem_close(sem_input_avail); + if (result == -1) + { + printf("ERROR: Failed calling sem_close(%s) errno=%d=%s\n", sem_input_avail_name, errno, strerror(errno)); + } + } + + // Cleanup CONTROL + result = munmap((void *)shmem_control_input_address, CONTROL_INPUT_SIZE); + if (result == -1) + { + printf("ERROR: Failed calling munmap(control_input) errno=%d=%s\n", errno, strerror(errno)); + } + if (!wait_flag) + { + result = shm_unlink(shmem_control_input_name); + if (result == -1) + { + printf("ERROR: Failed calling shm_unlink(%s) errno=%d=%s\n", shmem_control_input_name, errno, strerror(errno)); + } + } + result = munmap((void *)shmem_control_output_address, CONTROL_OUTPUT_SIZE); + if (result == -1) + { + printf("ERROR: Failed calling munmap(control_output) errno=%d=%s\n", errno, strerror(errno)); + } + if (!wait_flag) + { + result = shm_unlink(shmem_control_output_name); + if (result == -1) + { + printf("ERROR: Failed calling shm_unlink(%s) errno=%d=%s\n", shmem_control_output_name, errno, strerror(errno)); + } + } + + // Cleanup trace + trace_cleanup(); + + // Cleanup chunk done semaphore + if (call_chunk_done) + { + result = sem_close(sem_chunk_done); + if (result == -1) + { + printf("ERROR: Failed calling sem_close(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); + } + result = sem_unlink(sem_chunk_done_name); + if (result == -1) + { + printf("ERROR: Failed calling sem_unlink(%s) errno=%d=%s\n", sem_chunk_done_name, errno, strerror(errno)); + } + } + + // Cleanup input available semaphore + result = sem_unlink(sem_input_avail_name); + if (result == -1) + { + printf("ERROR: Failed calling sem_unlink(%s) errno=%d=%s\n", sem_input_avail_name, errno, strerror(errno)); + } + + // Post shutdown done semaphore + result = sem_post(sem_shutdown_done); + if (result == -1) + { + printf("ERROR: Failed calling sem_post(%s) errno=%d=%s\n", sem_shutdown_done_name, errno, strerror(errno)); + } +} \ No newline at end of file diff --git a/emulator-asm/src/server.hpp b/emulator-asm/src/server.hpp new file mode 100644 index 000000000..c76cefee9 --- /dev/null +++ b/emulator-asm/src/server.hpp @@ -0,0 +1,11 @@ +#ifndef EMULATOR_ASM_SERVER_HPP +#define EMULATOR_ASM_SERVER_HPP + +void server_setup (void); +void server_reset_fast (void); +void server_reset_slow (void); +void server_reset_trace (void); +void server_run (void); +void server_cleanup (void); + +#endif // EMULATOR_ASM_SERVER_HPP \ No newline at end of file diff --git a/emulator-asm/src/trace.c b/emulator-asm/src/trace.c new file mode 100644 index 000000000..4020a301d --- /dev/null +++ b/emulator-asm/src/trace.c @@ -0,0 +1,218 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "trace.hpp" +#include "constants.hpp" +#include "globals.hpp" +#include "emu.hpp" + +uint64_t next_chunk_id = 0; // Next trace chunk id to be mapped, starting from 0 +int trace_chunk_fd[TRACE_NUMBER_OF_CHUNKS]; // File descriptors for each chunk +uint64_t trace_total_mapped_size = 0; // Total mapped trace size + +void * trace_get_chunk_address (uint64_t chunk_id) +{ + assert(gen_method != RomHistogram || chunk_id == 0); + + if (chunk_id == 0) + { + return (void *)TRACE_ADDR; + } + else + { + return (void *)(TRACE_ADDR + TRACE_INITIAL_SIZE + ((chunk_id - 1) * TRACE_DELTA_SIZE)); + } +} + +uint64_t trace_get_chunk_size (uint64_t chunk_id) +{ + if (gen_method == RomHistogram) { + assert(chunk_id == 0); + return trace_size; + } + + if (chunk_id == 0) + { + return TRACE_INITIAL_SIZE; + } + else + { + return TRACE_DELTA_SIZE; + } +} + +void trace_generate_shmem_chunk_name(char * shmem_chunk_name, size_t shmem_chunk_name_size, uint64_t chunk_id) +{ + int result = snprintf(shmem_chunk_name, shmem_chunk_name_size, "%s_%lu", shmem_output_name, chunk_id); + if (result < 0 || result >= (int)shmem_chunk_name_size) + { + printf("ERROR: trace_generate_shmem_chunk_name() failed to create chunk shared memory name\n"); + fflush(stdout); + fflush(stderr); + exit(-1); + } +} + +void trace_cleanup (void) +{ + // Unmap all mapped chunks + for (uint64_t chunk_id = 0; chunk_id < next_chunk_id; chunk_id++) + { + uint64_t chunk_size = trace_get_chunk_size(chunk_id); + void * chunk_address = trace_get_chunk_address(chunk_id); + int result = munmap(chunk_address, chunk_size); + if (result != 0) + { + printf("ERROR: trace_cleanup() failed calling munmap() chunk id=%lu size=%lu B address=0x%lx errno=%d=%s\n", chunk_id, chunk_size, (uint64_t)chunk_address, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Close the chunk shared memory file descriptor + close(trace_chunk_fd[chunk_id]); + trace_chunk_fd[chunk_id] = -1; + + // Build the chunk shared memory name + char shmem_chunk_name[128]; + trace_generate_shmem_chunk_name(shmem_chunk_name, sizeof(shmem_chunk_name), chunk_id); + + // Make sure the chunk shared memory is deleted + shm_unlink(shmem_chunk_name); + } + + // Reset next chunk id + next_chunk_id = 0; +} + +void trace_preventive_cleanup (void) +{ + // Unmap all mapped chunks + for (uint64_t chunk_id = 0; chunk_id < TRACE_NUMBER_OF_CHUNKS; chunk_id++) + { + // Build the chunk shared memory name + char shmem_chunk_name[128]; + trace_generate_shmem_chunk_name(shmem_chunk_name, sizeof(shmem_chunk_name), chunk_id); + + // Make sure the chunk shared memory is deleted + int result = shm_unlink(shmem_chunk_name); + if (result != 0) + { + break; + } + if (verbose) printf("trace_preventive_cleanup() unlinked chunk shared memory %s\n", shmem_chunk_name); + } +} + +void trace_map_next_chunk (void) +{ + // Get the next chunk id, size and address + uint64_t chunk_id = next_chunk_id; + if (chunk_id >= TRACE_NUMBER_OF_CHUNKS) + { + printf("ERROR: trace_map_next_chunk() exceeded maximum number of chunks %lu\n", TRACE_NUMBER_OF_CHUNKS); + fflush(stdout); + fflush(stderr); + exit(-1); + } + uint64_t chunk_size = trace_get_chunk_size(chunk_id); + void * chunk_address = trace_get_chunk_address(chunk_id); + + if (verbose) printf("trace_map_next_chunk() mapping chunk id=%lu size=%lu B address=0x%lx\n", chunk_id, chunk_size, (uint64_t)chunk_address); + + // Build the chunk shared memory name + char shmem_chunk_name[128]; + trace_generate_shmem_chunk_name(shmem_chunk_name, sizeof(shmem_chunk_name), chunk_id); + + // Make sure the chunk shared memory is deleted + shm_unlink(shmem_chunk_name); + + // Create the output shared memory + trace_chunk_fd[chunk_id] = shm_open(shmem_chunk_name, O_RDWR | O_CREAT | O_EXCL, 0666); + if (trace_chunk_fd[chunk_id] < 0) + { + printf("ERROR: trace_map_next_chunk() failed calling trace shm_open(%s) errno=%d=%s\n", shmem_chunk_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Size it + int result = ftruncate(trace_chunk_fd[chunk_id], chunk_size); + if (result != 0) + { + printf("ERROR: trace_map_next_chunk() failed calling ftruncate(%s) errno=%d=%s\n", shmem_chunk_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + // Sync + fsync(trace_chunk_fd[chunk_id]); + + // Map it to the trace address + if (verbose) gettimeofday(&start_time, NULL); + void * requested_address; + if ((gen_method == ChunkPlayerMTCollectMem) || (gen_method == ChunkPlayerMemReadsCollectMain)) + { + requested_address = 0; + } + else + { + requested_address = (void *)chunk_address; + } + int flags = MAP_SHARED | map_locked_flag; + if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain)) + { + flags |= MAP_FIXED; + } + void * pTrace = mmap(requested_address, chunk_size, PROT_READ | PROT_WRITE, flags, trace_chunk_fd[chunk_id], 0); + if (verbose) + { + gettimeofday(&stop_time, NULL); + duration = TimeDiff(start_time, stop_time); + } + if (pTrace == MAP_FAILED) + { + printf("ERROR: trace_map_next_chunk() failed calling mmap(pTrace) name=%s errno=%d=%s\n", shmem_chunk_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if ((gen_method != ChunkPlayerMTCollectMem) && (gen_method != ChunkPlayerMemReadsCollectMain) && ((uint64_t)pTrace != (uint64_t)requested_address)) + { + printf("ERROR: trace_map_next_chunk() called mmap(trace) but returned address = %p != 0x%lx\n", pTrace, (uint64_t)requested_address); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (verbose) printf("trace_map_next_chunk() mapped %lu B to %s and returned address %p in %lu us\n", chunk_size, shmem_chunk_name, pTrace, duration); + + // Update total mapped size + trace_total_mapped_size += chunk_size; + + // Increment next chunk id + next_chunk_id++; +} + +void trace_map_initialize (void) +{ + // Perform preventive cleanup of any leftover shared memory chunks + trace_preventive_cleanup(); + + // Map the first chunk, i.e. chunk 0 + trace_map_next_chunk(); + + trace_address = TRACE_ADDR; + pOutputTrace = (uint64_t *)TRACE_ADDR; +} \ No newline at end of file diff --git a/emulator-asm/src/trace.hpp b/emulator-asm/src/trace.hpp new file mode 100644 index 000000000..84983df20 --- /dev/null +++ b/emulator-asm/src/trace.hpp @@ -0,0 +1,12 @@ +#ifndef EMULATOR_ASM_TRACE_HPP +#define EMULATOR_ASM_TRACE_HPP + +#include + +extern uint64_t trace_total_mapped_size; // Total mapped trace size + +void trace_map_initialize (void); +void trace_map_next_chunk (void); +void trace_cleanup (void); + +#endif // EMULATOR_ASM_TRACE_HPP \ No newline at end of file diff --git a/emulator-asm/src/trace_logs.c b/emulator-asm/src/trace_logs.c new file mode 100644 index 000000000..d63c0f973 --- /dev/null +++ b/emulator-asm/src/trace_logs.c @@ -0,0 +1,689 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include "constants.hpp" +#include "globals.hpp" +#include "asm_provided.hpp" + +// This file contains trace logging functions that are used only for debugging purposes, to log the +// content of the generated traces in a human-readable format. These functions are not used by the +// assembly code, and are not optimized for performance. + +/*****************/ +/* LOG FUNCTIONS */ +/*****************/ + +/* Trace data structure + [8B] Number of chunks: C + + Chunk 0: + Start state: + [8B] pc + [8B] sp + [8B] c + [8B] step + [8B] register[1] + … + [8B] register[31] + [8B] register[32] + [8B] register[33] + Last state: + [8B] c + End: + [8B] end + Steps: + [8B] steps = chunk size except for the last chunk + [8B] mem_reads_size + [8B] mem_reads[0] + [8B] mem_reads[1] + … + [8B] mem_reads[mem_reads_size - 1] + + Chunk 1: + … + Chunk C-1: + … +*/ +void log_minimal_trace(void) +{ + uint64_t * pOutput = (uint64_t *)TRACE_ADDR; + printf("Version = 0x%06lx\n", pOutput[0]); // Version, e.g. v1.0.0 [8] + printf("Exit code = %lu\n", pOutput[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] + printf("Allocated size = %lu B\n", pOutput[2]); // Allocated size [8] + printf("Minimal trace used size = %lu B\n", pOutput[3]); // Minimal trace used size [8] + + printf("Trace content:\n"); + uint64_t * trace = (uint64_t *)MEM_TRACE_ADDRESS; + uint64_t number_of_chunks = trace[0]; + printf("Number of chunks=%lu\n", number_of_chunks); + if (number_of_chunks > 1000000) + { + printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); + fflush(stdout); + fflush(stderr); + exit(-1); + } + uint64_t * chunk = trace + 1; + for (uint64_t c=0; c 10000000) + { + printf("ERROR: Mem reads size is too high=%lu\n", mem_reads_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (trace_trace) + { + for (uint64_t m=0; m 100000000) + { + printf("ERROR: Bios size is too high=%lu\n", bios_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (trace_trace) + { + uint64_t * bios = trace + 1; + for (uint64_t i=0; i 100000000) + { + printf("ERROR: Program size is too high=%lu\n", program_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + if (trace_trace) + { + uint64_t * program = trace + 1 + bios_size + 1; + for (uint64_t i=0; i 1000000) + { + printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); + fflush(stdout); + fflush(stderr); + exit(-1); + } + uint64_t * chunk = trace + 1; + for (uint64_t c=0; c 10000000) + { + printf("ERROR: Main_trace size is too high=%lu\n", main_trace_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + if (trace_trace) + { + for (uint64_t m=0; m 0) + { + size_t bytes_written = fwrite(buffer_address, 1, buffer_length, file); + if (bytes_written != buffer_length) + { + printf("ERROR: buffer2file() failed calling fwrite(%s) buffer_address=%p buffer_length=%zu errno=%d=%s\n", file_name, buffer_address, buffer_length, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + fclose(file); + exit(-1); + } + } + + if (fclose(file) != 0) + { + printf("ERROR: buffer2file() failed calling fclose(%s) errno=%d=%s\n", file_name, errno, strerror(errno)); + fflush(stdout); + fflush(stderr); + exit(-1); + } +} + +/* Memory operations structure + [8B] Number of chunks = C + + Chunk 0: + [8b] end + [8B] mem_op_trace_size + [8B] mem_op_trace[0] + [8B] mem_op_trace[1] + … + [8B] mem_op_trace[mem_op_trace_size - 1] + + Chunk 1: + … + Chunk C-1: + … +*/ +void log_mem_op(void) +{ + // Log header + uint64_t * pOutput = (uint64_t *)TRACE_ADDR; + printf("Version = 0x%06lx\n", pOutput[0]); // Version, e.g. v1.0.0 [8] + printf("Exit code = %lu\n", pOutput[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] + printf("Allocated size = %lu B\n", pOutput[2]); // Allocated size [8] + printf("Memory operations trace used size = %lu B\n", pOutput[3]); // Main trace used size [8] + + printf("Trace content:\n"); + uint64_t * trace = (uint64_t *)MEM_TRACE_ADDRESS; + uint64_t number_of_chunks = trace[0]; + printf("Number of chunks=%lu\n", number_of_chunks); + if (number_of_chunks > 1000000) + { + printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); + fflush(stdout); + fflush(stderr); + exit(-1); + } + uint64_t * chunk = trace + 1; + for (uint64_t c=0; c 10000000) + { + printf("ERROR: Mem op trace size is too high=%lu\n", mem_op_trace_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + for (uint64_t m=0; m> 49) & 0x1; + uint64_t write = (chunk[i] >> 48) & 0x1; + uint64_t width = (chunk[i] >> 32) & 0xF; + uint64_t address = chunk[i] & 0xFFFFFFFF; + bool inside_range = + ((address >= RAM_ADDR) && (address < (RAM_ADDR + RAM_SIZE))) || + ((address >= ROM_ADDR) && (address < (ROM_ADDR + ROM_SIZE))) || + ((address >= INPUT_ADDR) && (address < (INPUT_ADDR + MAX_INPUT_SIZE))); + if (trace_trace || !inside_range) + { + printf("\t\tchunk[%lu].mem_op_trace[%lu] = %016lx = rest_are_zeros=%lx, write=%lx, width=%lx, address=%lx%s\n", + c, + m, + chunk[i], + rest_are_zeros, + write, + width, + address, + inside_range ? "" : " ERROR!!!!!!!!!!!!!!" + ); + } + i += 1; + } + + //Set next chunk pointer + chunk = chunk + i; + } + printf("Trace=%p chunk=%p size=%lu\n", trace, chunk, (uint64_t)chunk - (uint64_t)trace); +} + +/* Memory trace structure (for 1 chunk) + [8B] mem_trace_size + [16B] mem_trace[0] + [8B] mem operacion + [4B] address (LE) + [1B] width (1, 2, 4, 8) + write (0, 1) << 4 + [3B] + [16B] mem_trace[1] + … + [16B] mem_trace[mem_trace_size - 1] +*/ +void log_mem_trace(void) +{ + printf("Trace content:\n"); + uint64_t * trace = (uint64_t *)TRACE_ADDR; + printf("log_mem_trace() trace_address=%p\n", trace); + uint64_t i=0; + printf("Version = 0x%06lx\n", trace[0]); // Version, e.g. v1.0.0 [8] + printf("Exit code = %lu\n", trace[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] + printf("Allocated size = %lu B\n", trace[2]); // Allocated size [8] + printf("Memory operations trace used size = %lu B\n", trace[3]); // Main trace used size [8] + i += 4; + uint64_t number_of_entries = trace[i]; + i++; + printf("Trace size=%lu\n", number_of_entries); + + for (uint64_t m = 0; m < number_of_entries; m++) + { + uint64_t addr_step = trace[i]; + i++; + + // addr_step = [@0, @1, @2, @3, width + write<<4, supra_step] + uint64_t address = addr_step & 0xFFFFFFFF; + uint64_t width = (addr_step >> (4*8)) & 0xF; + uint64_t write = (addr_step >> ((4*8) + 4)) & 0x1; + uint64_t micro_step = (addr_step >> (5*8)) & 0x3; + uint64_t incremental_step = (addr_step >> ((5*8) + 2)); + bool address_is_inside_range = + ((address >= RAM_ADDR) && (address < (RAM_ADDR + RAM_SIZE))) || + ((address >= ROM_ADDR) && (address < (ROM_ADDR + ROM_SIZE))) || + ((address >= INPUT_ADDR) && (address < (INPUT_ADDR + MAX_INPUT_SIZE))); + bool width_is_valid = (width == 1) || (width == 2) || (width == 4) || (width == 8); + bool bError = !(address_is_inside_range && width_is_valid); + if (trace_trace || bError) + { + printf("\tmem_trace[%lu] = %016lx = [inc_step=%lu, u_step=%lu, write=%lx, width=%lx, address=%lx] %s\n", + m, + addr_step, + incremental_step, + micro_step, + write, + width, + address, + bError ? " ERROR!!!!!!!!!!!!!!" : "" + ); + } + + // u-step: + // 0: a=SRC_MEM + // 1: b=SRC_MEM or b=SRC_IND + // 2: precompiled_read + // 3: c=STORE_MEM, c=STORE_IND or precompiled_write + + bool address_is_aligned = (address & 0x7) == 0; + uint64_t aligned_address = address & 0xFFFFFFF8; + uint64_t number_of_read_values = 0; + uint64_t number_of_write_values = 0; + + switch (micro_step) + { + case 0: // a=SRC_MEM + { + assert(width == 8); + if (address_is_aligned) + { + number_of_read_values = 1; + } + else + { + number_of_read_values = 2; + } + break; + } + case 1: // b=SRC_MEM or b=SRC_IND + { + if (address_is_aligned) + { + number_of_read_values = 1; + } + else + { + if (((address + width - 1) & 0xFFFFFFF8) == aligned_address) + { + number_of_read_values = 1; + } + else + { + number_of_read_values = 2; + } + } + break; + } + case 2: // precompiled_read + { + assert(width == 8); + if (address_is_aligned) + { + number_of_read_values = 1; + } + else + { + number_of_read_values = 2; + } + break; + } + case 3: // c=STORE_MEM, c=STORE_IND or precompiled_write + { + if (address_is_aligned && (width == 8)) + { + number_of_read_values = 0; + } + else + { + if (((address + width - 1) & 0xFFFFFFF8) == aligned_address) + { + number_of_read_values = 1; + } + else + { + number_of_read_values = 2; + } + } + number_of_write_values = 1; + break; + } + } + + for (uint64_t r = 0; r < number_of_read_values; r++) + { + uint64_t value = trace[i]; + i++; + m++; + if (trace_trace) + { + printf("\t\tread_value[%lu] = 0x%lx\n", i, value); + } + } + + for (uint64_t w = 0; w < number_of_write_values; w++) + { + uint64_t value = trace[i]; + i++; + m++; + if (trace_trace) + { + printf("\t\twrite_value[%lu] = 0x%lx\n", i, value); + } + } + } + printf("Trace=%p number_of_entries=%lu\n", trace, number_of_entries); +} + +void save_mem_op_to_files(void) +{ + // Log header + uint64_t * pOutput = (uint64_t *)TRACE_ADDR; + printf("Version = 0x%06lx\n", pOutput[0]); // Version, e.g. v1.0.0 [8] + printf("Exit code = %lu\n", pOutput[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] + printf("Allocated size = %lu B\n", pOutput[2]); // Allocated size [8] + printf("Memory operations trace used size = %lu B\n", pOutput[3]); // Main trace used size [8] + + printf("Trace content:\n"); + uint64_t * trace = (uint64_t *)MEM_TRACE_ADDRESS; + uint64_t number_of_chunks = trace[0]; + printf("Number of chunks=%lu\n", number_of_chunks); + if (number_of_chunks > 1000000) + { + printf("ERROR: Number of chunks is too high=%lu\n", number_of_chunks); + fflush(stdout); + fflush(stderr); + exit(-1); + } + uint64_t * chunk = trace + 1; + for (uint64_t c=0; c= sizeof(file_name)) + { + fprintf(stderr, "ERROR: Failed to construct file name for chunk=%lu\n", c); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + uint64_t i=0; + i++; // Skip end + uint64_t mem_op_trace_size = chunk[i]; + i++; + if (mem_op_trace_size > 10000000) + { + printf("ERROR: Mem op trace size is too high=%lu\n", mem_op_trace_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + + printf("Chunk %lu: file=%s length=%lu\n", c, file_name, mem_op_trace_size); + + buffer2file(&chunk[i], mem_op_trace_size * 8, file_name); + + //Set next chunk pointer: skip [end] and [mem_op_trace_size] headers plus data + chunk = chunk + mem_op_trace_size + 2; + } + printf("Trace=%p chunk=%p size=%lu\n", trace, chunk, (uint64_t)chunk - (uint64_t)trace); +} + +/* Trace data structure + [8B] Number of elements + + A series of elements with the following structure: + [8B] op: instruction opcode + [8B] a: register a value + [8B] b: register b value + [8B] precompiled_memory_address: memory read address of the precompiled input data +*/ +void log_chunk_player_main_trace(void) +{ + uint64_t * chunk = (uint64_t *)TRACE_ADDR; + uint64_t i = 0; + + printf("Version = 0x%06lx\n", chunk[0]); // Version, e.g. v1.0.0 [8] + printf("Exit code = %lu\n", chunk[1]); // Exit code: 0=successfully completed, 1=not completed (written at the beginning of the emulation), etc. [8] + printf("Allocated size = %lu B\n", chunk[2]); // Allocated size [8] + printf("Memory operations trace used size = %lu B\n", chunk[3]); // Main trace used size [8] + i = 4; + + uint64_t mem_reads_size = chunk[i]; + i++; + printf("mem_reads_size=%lu\n", mem_reads_size); + if (mem_reads_size > 10000000) + { + printf("ERROR: Mem reads size is too high=%lu\n", mem_reads_size); + fflush(stdout); + fflush(stderr); + exit(-1); + } + //if (trace_trace) + { + for (uint64_t m=0; m 0xFF) + { + printf("ERROR!! Invalid op=%lu=0x%lx\n", op, op); + } + if (trace_trace) printf("\tmem_reads[%lu] a=0x%08lx\n", m, chunk[i]); + i++; + m++; + if (trace_trace) printf("\tmem_reads[%lu] b=0x%08lx\n", m, chunk[i]); + i++; + m++; + if ( (op == 0xf1) // Keccak + || (op == 0xf9) // SHA256 + || (op == 0xf2) // Arith256 + || (op == 0xf3) // Arith256Mod + || (op == 0xf4) // Secp256k1Add + || (op == 0xf5) // Secp256k1Dbl + ) + { + if (trace_trace) printf("\tmem_reads[%lu] precompiled_address=%08lx\n", m, chunk[i]); + i++; + m++; + } + } + } + + printf("Chunk=%p size=%lu\n", chunk, mem_reads_size); +} \ No newline at end of file diff --git a/emulator-asm/src/trace_logs.hpp b/emulator-asm/src/trace_logs.hpp new file mode 100644 index 000000000..258804f23 --- /dev/null +++ b/emulator-asm/src/trace_logs.hpp @@ -0,0 +1,14 @@ +#ifndef EMULATOR_ASM_TRACE_LOGS_HPP +#define EMULATOR_ASM_TRACE_LOGS_HPP + +#include + +void log_minimal_trace(void); +void log_histogram(void); +void log_main_trace(void); +void log_mem_trace(void); +void log_mem_op(void); +void save_mem_op_to_files(void); +void log_chunk_player_main_trace(void); + +#endif // EMULATOR_ASM_TRACE_LOGS_HPP \ No newline at end of file diff --git a/emulator/Cargo.toml b/emulator/Cargo.toml index 481b9b2a9..e8239b9fc 100644 --- a/emulator/Cargo.toml +++ b/emulator/Cargo.toml @@ -22,7 +22,6 @@ zisk-pil = { workspace = true } riscv = { workspace = true } data-bus = { workspace = true } rayon = { workspace = true } -sm-mem = { workspace = true } mem-common = { workspace = true } sm-arith = { workspace = true } sm-binary = { workspace = true } @@ -36,17 +35,13 @@ memmap2 = "0.9.8" num-format = "0.4" symbolic-demangle = { version = "12.16", features = ["rust", "cpp"] } symbolic-common = "12.16" +regex = "1.11.1" [build-dependencies] -vergen = { version = "8", default-features = false, features = [ - "build", - "git", - "git2", -] } +vergen-git2 = { workspace = true } [dev-dependencies] criterion = { version = "0.5.1", features = ["html_reports"] } -pprof = { version = "0.14.0", features = ["flamegraph", "criterion"] } [[bench]] name = "benchmark" @@ -55,8 +50,9 @@ harness = false [features] default = [] debug_stats_trace = [] +minimal_trace_index_debug = [] debug_call_stack = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] +gpu = ["packed"] +packed = [] # sp = [] no_lib_link = ["zisk-pil/no_lib_link"] diff --git a/emulator/benches/benchmark.rs b/emulator/benches/benchmark.rs index 76fe92e7c..3b445e7db 100644 --- a/emulator/benches/benchmark.rs +++ b/emulator/benches/benchmark.rs @@ -81,9 +81,10 @@ fn bench_riscv2zisk(c: &mut Criterion) { b.iter(|| { // Convert the ELF file to ZisK ROM let elf_file = "./benches/data/my.elf".to_string(); + let elf = std::fs::read(elf_file.clone()).unwrap(); let _rom: ZiskRom = { // Create an instance of the RISCV -> ZisK program converter - let rv2zk = Riscv2zisk::new(elf_file.clone()); + let rv2zk = Riscv2zisk::new(&elf); // Convert program to rom let result = rv2zk.run(); @@ -113,9 +114,10 @@ fn bench_process_rom(c: &mut Criterion) { c.bench_function("Process ROM", |b| { // Convert the ELF file to ZisK ROM let elf_file = "./benches/data/my.elf".to_string(); + let elf = std::fs::read(elf_file.clone()).unwrap(); let rom: ZiskRom = { // Create an instance of the RISCV -> ZisK program converter - let rv2zk = Riscv2zisk::new(elf_file.clone()); + let rv2zk = Riscv2zisk::new(&elf); // Convert program to rom let result = rv2zk.run(); @@ -166,9 +168,10 @@ fn bench_process_rom_callback(c: &mut Criterion) { //let elf_file = // "../riscof/riscof_work/rv64i_m/A/src/amoxor.w-01.S/dut/my.elf".to_string(); let elf_file = "./benches/data/my.elf".to_string(); + let elf = std::fs::read(elf_file.clone()).unwrap(); let zisk_rom: ZiskRom = { // Create an instance of the RISCV -> ZisK program converter - let rv2zk = Riscv2zisk::new(elf_file.clone()); + let rv2zk = Riscv2zisk::new(&elf); // Convert program to rom let result = rv2zk.run(); diff --git a/emulator/build.rs b/emulator/build.rs index c2f550fb6..b9a0832eb 100644 --- a/emulator/build.rs +++ b/emulator/build.rs @@ -1,3 +1,12 @@ fn main() { - vergen::EmitBuilder::builder().build_timestamp().git_sha(true).emit().unwrap(); + let mut builder = vergen_git2::Emitter::default(); + builder + .add_instructions( + &vergen_git2::BuildBuilder::default().build_timestamp(true).build().unwrap(), + ) + .unwrap(); + builder + .add_instructions(&vergen_git2::Git2Builder::default().sha(true).build().unwrap()) + .unwrap(); + builder.emit().unwrap(); } diff --git a/emulator/src/disasm.rs b/emulator/src/disasm.rs new file mode 100644 index 000000000..d958903f6 --- /dev/null +++ b/emulator/src/disasm.rs @@ -0,0 +1,398 @@ +//! Disassembly writer module +//! Generates objdump-like output with execution counts + +use std::collections::HashMap; +use std::fs::File; +use std::io::{BufWriter, Result, Write}; + +use crate::ElfSymbolReader; +use zisk_core::{ZiskInst, ZiskRom}; + +pub struct DisasmWriter { + file: BufWriter, + pc_histogram: HashMap, + symbols: Option, +} + +impl DisasmWriter { + pub fn new(path: &str) -> Result { + let file = File::create(path)?; + Ok(Self { file: BufWriter::new(file), pc_histogram: HashMap::new(), symbols: None }) + } + + pub fn set_pc_histogram(&mut self, histogram: HashMap) { + self.pc_histogram = histogram; + } + + pub fn set_symbols(&mut self, symbols: ElfSymbolReader) { + self.symbols = Some(symbols); + } + + pub fn write_header(&mut self, title: &str) -> Result<()> { + writeln!(&mut self.file)?; + writeln!(&mut self.file, "Disassembly with execution counts:")?; + writeln!(&mut self.file, "{}", title)?; + writeln!(&mut self.file)?; + Ok(()) + } + + pub fn write_disassembly(&mut self, rom: &ZiskRom) -> Result<()> { + let mut local_labels: HashMap = HashMap::new(); + + // First pass: identify all jump targets to generate labels + for (idx, pc) in rom.sorted_pc_list.iter().enumerate() { + let inst = rom.get_instruction(*pc); + // Get next PC for detecting fall-through + let next_pc = if idx + 1 < rom.sorted_pc_list.len() { + Some(rom.sorted_pc_list[idx + 1]) + } else { + None + }; + + // Check for jumps to generate labels + if inst.set_pc { + let target1 = (*pc as i64 + inst.jmp_offset1) as u64; + // Only create label if it's not the next instruction (not fall-through) + if Some(target1) != next_pc + && !local_labels.contains_key(&target1) + && rom.sorted_pc_list.binary_search(&target1).is_ok() + { + if let Some(ref symbols) = self.symbols { + if let Some(sym) = symbols.get_symbol_at_address(target1) { + local_labels.insert(target1, sym.name.clone()); + } else { + let label = format!(".L{}", local_labels.len()); + local_labels.insert(target1, label); + } + } else { + let label = format!(".L{}", local_labels.len()); + local_labels.insert(target1, label); + } + } + + if inst.jmp_offset2 != 0 { + let target2 = (*pc as i64 + inst.jmp_offset2) as u64; + // Only create label if it's not the next instruction (not fall-through) + if Some(target2) != next_pc + && !local_labels.contains_key(&target2) + && rom.sorted_pc_list.binary_search(&target2).is_ok() + { + if let Some(ref symbols) = self.symbols { + if let Some(sym) = symbols.get_symbol_at_address(target2) { + local_labels.insert(target2, sym.name.clone()); + } else { + let label = format!(".L{}", local_labels.len()); + local_labels.insert(target2, label); + } + } else { + let label = format!(".L{}", local_labels.len()); + local_labels.insert(target2, label); + } + } + } + } + } + + // Second pass: generate disassembly + for (idx, pc) in rom.sorted_pc_list.iter().enumerate() { + // Check if this PC is a function entry point + if let Some(ref symbols) = self.symbols { + if let Some(sym) = symbols.get_symbol_at_address(*pc) { + // Write function header + writeln!(&mut self.file)?; + writeln!(&mut self.file, "{:016x} <{}>:", pc, sym.name)?; + } + } + + // Check if this PC has a label (jump target) + if let Some(label) = local_labels.get(pc) { + if let Some(ref symbols) = self.symbols { + if symbols.get_symbol_at_address(*pc).is_none() { + writeln!(&mut self.file)?; + writeln!(&mut self.file, "{:016x} <{}>:", pc, label)?; + } + } else { + writeln!(&mut self.file)?; + writeln!(&mut self.file, "{:016x} <{}>:", pc, label)?; + } + } + + let inst = rom.get_instruction(*pc); + let exec_count = self.pc_histogram.get(pc).unwrap_or(&0); + + // Get next PC for detecting fall-through jumps + let next_pc = if idx + 1 < rom.sorted_pc_list.len() { + Some(rom.sorted_pc_list[idx + 1]) + } else { + None + }; + + // Determine if this is the first Zisk instruction for a RISC-V instruction + let is_first_zisk_for_riscv = if idx > 0 { + let prev_pc = rom.sorted_pc_list[idx - 1]; + let prev_inst = rom.get_instruction(prev_pc); + prev_inst.riscv_inst != inst.riscv_inst + } else { + true + }; + + // Format: PC | EXEC_COUNT | RISCV_INST | ZISK_INST + if is_first_zisk_for_riscv { + if let Some(ref riscv_inst) = inst.riscv_inst { + writeln!( + &mut self.file, + " {:08x}: {:12} {:30} {}", + pc, + exec_count, + riscv_inst, + inst_to_asm(inst, &local_labels, next_pc) + )?; + } else { + // Zisk instruction without RISC-V source (initialization) + writeln!( + &mut self.file, + " {:08x}: {:12} {:30} {}", + pc, + exec_count, + "", + inst_to_asm(inst, &local_labels, next_pc) + )?; + } + } else { + // Additional Zisk instruction from same RISC-V instruction + writeln!( + &mut self.file, + " {:08x}: {:12} {:30} {}", + pc, + exec_count, + "", + inst_to_asm(inst, &local_labels, next_pc) + )?; + } + } + + Ok(()) + } + + pub fn flush(&mut self) -> Result<()> { + self.file.flush() + } +} + +/// Convert a ZiskInst to assembly-like string representation +/// Format: operation dest, a, b (RISC-V like syntax) +fn inst_to_asm(inst: &ZiskInst, labels: &HashMap, next_pc: Option) -> String { + use zisk_core::{ + SRC_C, SRC_IMM, SRC_IND, SRC_MEM, SRC_REG, SRC_STEP, STORE_IND, STORE_MEM, STORE_NONE, + STORE_REG, + }; + + let mut asm = String::new(); + + // Operation name + asm.push_str(inst.op_str); + + let mut operands = Vec::new(); + + // 1. Destination (c register - where result is stored) + if inst.store != STORE_NONE { + let dest = match inst.store { + STORE_REG => { + format!("x{}", inst.store_offset) + } + STORE_MEM => { + if inst.store_use_sp { + if inst.store_offset >= 0 { + format!("[sp+{}]", inst.store_offset) + } else { + format!("[sp{}]", inst.store_offset) + } + } else { + format!("[0x{:x}]", inst.store_offset) + } + } + STORE_IND => { + if inst.store_offset >= 0 { + format!("[a+{}]", inst.store_offset) + } else { + format!("[a{}]", inst.store_offset) + } + } + _ => "?".to_string(), + }; + operands.push(dest); + } + + // 2. Source A + let src_a = match inst.a_src { + SRC_C => "c".to_string(), + SRC_REG => { + format!("x{}", inst.a_offset_imm0) + } + SRC_MEM => { + if inst.a_use_sp_imm1 != 0 { + let offset = inst.a_offset_imm0 as i64; + if offset >= 0 { + format!("[sp+{}]", offset) + } else { + format!("[sp{}]", offset) + } + } else { + format!("[0x{:x}]", inst.a_offset_imm0) + } + } + SRC_IMM => { + let imm = inst.a_offset_imm0 as i64 | ((inst.a_use_sp_imm1 as i64) << 32); + if (0..=9).contains(&imm) { + format!("{}", imm) + } else { + format!("0x{:x}", imm as u64) + } + } + SRC_STEP => "step".to_string(), + _ => "?".to_string(), + }; + operands.push(src_a); + + // 3. Source B (if used) + if inst.b_src != 0 { + let src_b = match inst.b_src { + SRC_C => "c".to_string(), + SRC_REG => { + format!("x{}", inst.b_offset_imm0) + } + SRC_MEM => { + if inst.b_use_sp_imm1 != 0 { + let offset = inst.b_offset_imm0 as i64; + if offset >= 0 { + format!("[sp+{}]", offset) + } else { + format!("[sp{}]", offset) + } + } else { + format!("[0x{:x}]", inst.b_offset_imm0) + } + } + SRC_IMM => { + let imm = inst.b_offset_imm0 as i64 | ((inst.b_use_sp_imm1 as i64) << 32); + if (0..=9).contains(&imm) { + format!("{}", imm) + } else { + format!("0x{:x}", imm as u64) + } + } + SRC_IND => { + let offset = inst.b_offset_imm0 as i64; + let width = match inst.ind_width { + 1 => "b", + 2 => "h", + 4 => "w", + 8 => "d", + _ => "", + }; + if inst.b_use_sp_imm1 != 0 { + if offset >= 0 { + format!("[a+sp+{}]{}", offset, width) + } else { + format!("[a+sp{}]{}", offset, width) + } + } else if offset >= 0 { + format!("[a+{}]{}", offset, width) + } else { + format!("[a{}]{}", offset, width) + } + } + _ => "?".to_string(), + }; + operands.push(src_b); + } + + // Format operands + if !operands.is_empty() { + asm.push(' '); + asm.push_str(&operands.join(", ")); + } + + // 4. Jump targets (Zisk peculiarity: two jump offsets) + // jmp_offset1: used if flag is active + // jmp_offset2: used as default jump + // Don't show jumps to next instruction (fall-through) to be more RISC-V like + if inst.set_pc { + let mut jump_targets = Vec::new(); + + let target1_is_next = + inst.jmp_offset1 != 0 && Some((inst.paddr as i64 + inst.jmp_offset1) as u64) == next_pc; + let target2_is_next = + inst.jmp_offset2 != 0 && Some((inst.paddr as i64 + inst.jmp_offset2) as u64) == next_pc; + + if inst.jmp_offset1 != 0 && !target1_is_next { + let target = (inst.paddr as i64 + inst.jmp_offset1) as u64; + if let Some(label) = labels.get(&target) { + jump_targets.push((true, label.clone())); + } else { + jump_targets.push((true, format!("0x{:x}", target))); + } + } + + if inst.jmp_offset2 != 0 && !target2_is_next { + let target = (inst.paddr as i64 + inst.jmp_offset2) as u64; + if let Some(label) = labels.get(&target) { + jump_targets.push((false, label.clone())); + } else { + jump_targets.push((false, format!("0x{:x}", target))); + } + } + + if !jump_targets.is_empty() { + if operands.is_empty() { + asm.push(' '); + } else { + asm.push_str(", "); + } + + // If only one target, don't use prefix (it's implicit) + // If both targets, use true:/false: prefix to distinguish + if jump_targets.len() == 1 { + asm.push_str(&jump_targets[0].1); + } else { + let formatted: Vec = jump_targets + .iter() + .map(|(is_true, label)| { + if *is_true { + format!("true:{}", label) + } else { + format!("false:{}", label) + } + }) + .collect(); + asm.push_str(&formatted.join(", ")); + } + } + } + + // 5. Additional flags/modifiers as comments + let mut comments = Vec::new(); + + if inst.m32 { + comments.push("32-bit"); + } + if inst.end { + comments.push("END"); + } + if inst.is_external_op { + comments.push("external"); + } + if inst.store_pc { + comments.push("store_pc"); + } + if inst.is_precompiled { + comments.push("with_step"); + } + + if !comments.is_empty() { + asm.push_str(" ; "); + asm.push_str(&comments.join(", ")); + } + + asm +} diff --git a/emulator/src/elf_symbol_reader.rs b/emulator/src/elf_symbol_reader.rs index 09563f0ef..df324b931 100644 --- a/emulator/src/elf_symbol_reader.rs +++ b/emulator/src/elf_symbol_reader.rs @@ -1,5 +1,7 @@ use memmap2::Mmap; +use object::elf::STT_GNU_IFUNC; use object::{elf::STT_FUNC, Object, ObjectSymbol, Symbol, SymbolFlags, SymbolKind}; +use regex::Regex; use std::fs::File; use std::io::Result; @@ -9,11 +11,13 @@ pub struct SymbolInfo { pub name: String, pub address: u64, pub size: u64, + pub is_selected_roi: bool, } pub struct ElfSymbolReader { functions: Vec, profile_tags: Vec<(u16, String)>, + roi_filter: Option, } impl Default for ElfSymbolReader { @@ -23,7 +27,13 @@ impl Default for ElfSymbolReader { } impl ElfSymbolReader { pub fn new() -> Self { - Self { functions: Vec::new(), profile_tags: Vec::new() } + Self { functions: Vec::new(), profile_tags: Vec::new(), roi_filter: None } + } + + /// Sets a regex filter to mark matching symbols as ROI + pub fn set_roi_filter(&mut self, pattern: &str) -> std::result::Result<(), regex::Error> { + self.roi_filter = Some(Regex::new(pattern)?); + Ok(()) } pub fn load_from_file(&mut self, path: &str) -> Result<()> { @@ -48,11 +58,17 @@ impl ElfSymbolReader { if let Ok(name) = symbol.name() { if !name.is_empty() { if let SymbolFlags::Elf { st_info, .. } = symbol.flags() { - if (st_info & STT_FUNC) != 0 { + let st_type = st_info & 0x0f; + if st_type == STT_FUNC || st_type == STT_GNU_IFUNC { let name = self.demangle_name(name); let address = symbol.address(); let size = symbol.size(); - let symbol_info = SymbolInfo { name, address, size }; + let is_selected_roi = self + .roi_filter + .as_ref() + .map(|re| re.is_match(&name)) + .unwrap_or(false); + let symbol_info = SymbolInfo { name, address, size, is_selected_roi }; self.functions.push(symbol_info); } } @@ -105,4 +121,14 @@ impl ElfSymbolReader { pub fn functions(&self) -> impl Iterator { self.functions.iter() } + + /// Returns an iterator over all ROI functions (those matching the filter) + pub fn roi_functions(&self) -> impl Iterator { + self.functions.iter().filter(|s| s.is_selected_roi) + } + + /// Returns the symbol at the given address, if it exists + pub fn get_symbol_at_address(&self, address: u64) -> Option<&SymbolInfo> { + self.functions.iter().find(|s| s.address == address) + } } diff --git a/emulator/src/emu.rs b/emulator/src/emu.rs index 07e898289..38f08abfe 100644 --- a/emulator/src/emu.rs +++ b/emulator/src/emu.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::mem; use crate::{ @@ -20,6 +21,8 @@ use zisk_core::{ STORE_IND, STORE_MEM, STORE_NONE, STORE_REG, }; +pub const ZISK_PUBLICS: usize = 64; + /// ZisK emulator structure, containing the ZisK rom, the list of ZisK operations, and the /// execution context pub struct Emu<'a> { @@ -90,9 +93,9 @@ impl<'a> Emu<'a> { emu } - pub fn create_emu_context(&mut self, inputs: Vec) -> EmuContext { + pub fn create_emu_context(&mut self, inputs: Vec, options: &EmuOptions) -> EmuContext { // Initialize an empty instance - let mut ctx = EmuContext::new(inputs); + let mut ctx = EmuContext::new(inputs, options); // Create a new read section for every RO data entry of the rom for i in 0..self.rom.ro_data.len() { @@ -320,7 +323,7 @@ impl<'a> Emu<'a> { 8, [self.ctx.inst_ctx.a, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } else { let (required_address_1, required_address_2) = Mem::required_addresses(address, 8); @@ -340,7 +343,7 @@ impl<'a> Emu<'a> { 8, [raw_data_1, raw_data_2], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } /*println!( "Emu::source_a_mem_reads_consume() mem_leads_index={} value={:x}", @@ -728,7 +731,7 @@ impl<'a> Emu<'a> { 8, [self.ctx.inst_ctx.b, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } else { let (required_address_1, required_address_2) = Mem::required_addresses(address, 8); @@ -745,7 +748,7 @@ impl<'a> Emu<'a> { 8, [raw_data, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } else { assert!(*mem_reads_index < mem_reads.len()); let raw_data_1 = mem_reads[*mem_reads_index]; @@ -762,7 +765,7 @@ impl<'a> Emu<'a> { 8, [raw_data_1, raw_data_2], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } } /*println!( @@ -797,8 +800,14 @@ impl<'a> Emu<'a> { 8, [self.ctx.inst_ctx.b, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } else { + if instruction.ind_width == 0 || address > 0xFFFF_FFFF { + println!( + "ILLEGAL INSTRUCTION/ADDRESS 0x{:08X} {} S:{} {:?}", + address, instruction.ind_width, self.ctx.inst_ctx.step, instruction + ); + } let (required_address_1, required_address_2) = Mem::required_addresses(address, instruction.ind_width); if required_address_1 == required_address_2 { @@ -817,7 +826,7 @@ impl<'a> Emu<'a> { instruction.ind_width as u8, [raw_data, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } else { assert!(*mem_reads_index < mem_reads.len()); let raw_data_1 = mem_reads[*mem_reads_index]; @@ -838,7 +847,7 @@ impl<'a> Emu<'a> { 8, [raw_data_1, raw_data_2], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } } /*println!( @@ -1015,7 +1024,7 @@ impl<'a> Emu<'a> { } STORE_IND => { // Calculate value - let val: i64 = if instruction.store_ra { + let val: i64 = if instruction.store_pc { self.ctx.inst_ctx.pc as i64 + instruction.jmp_offset2 } else { self.ctx.inst_ctx.c as i64 @@ -1237,7 +1246,7 @@ impl<'a> Emu<'a> { value, [value, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } // Otherwise, if not aligned, get old raw data from memory, then write it else { @@ -1256,7 +1265,7 @@ impl<'a> Emu<'a> { value, [raw_data, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } else { assert!(*mem_reads_index < mem_reads.len()); let raw_data_1 = mem_reads[*mem_reads_index]; @@ -1273,7 +1282,7 @@ impl<'a> Emu<'a> { value, [raw_data_1, raw_data_2], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } } } @@ -1300,7 +1309,7 @@ impl<'a> Emu<'a> { value, [value, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } // Otherwise, if not aligned, get old raw data from memory, then write it else { @@ -1319,7 +1328,7 @@ impl<'a> Emu<'a> { value, [raw_data, 0], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } else { assert!(*mem_reads_index < mem_reads.len()); let raw_data_1 = mem_reads[*mem_reads_index]; @@ -1336,7 +1345,7 @@ impl<'a> Emu<'a> { value, [raw_data_1, raw_data_2], ); - data_bus.write_to_bus(MEM_BUS_ID, &payload); + data_bus.write_to_bus(MEM_BUS_ID, &payload, &[]); } } } @@ -1445,8 +1454,14 @@ impl<'a> Emu<'a> { /// Run the whole program, fast #[inline(always)] pub fn run_fast(&mut self, options: &EmuOptions) { - while !self.ctx.inst_ctx.end && (self.ctx.inst_ctx.step < options.max_steps) { - self.step_fast(); + if options.with_progress { + while !self.ctx.inst_ctx.end && (self.ctx.inst_ctx.step < options.max_steps) { + self.step_fast_with_progress(); + } + } else { + while !self.ctx.inst_ctx.end && (self.ctx.inst_ctx.step < options.max_steps) { + self.step_fast(); + } } // Detect and report error @@ -1458,10 +1473,40 @@ impl<'a> Emu<'a> { } } + #[inline(always)] + pub fn step_fast_with_progress(&mut self) { + let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); + if self.ctx.inst_ctx.step & 0xFF_FFFF == 0 { + let pc = self.ctx.inst_ctx.pc; + println!( + "running 0x{pc:08x} MS:{} {}", + self.ctx.inst_ctx.step >> 20, + instruction.verbose + ); + } + self.source_a(instruction); + self.source_b(instruction); + if instruction.input_size > 0 { + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; + } else { + self.ctx.inst_ctx.extended_arg = 0; + } + (instruction.func)(&mut self.ctx.inst_ctx); + self.store_c(instruction); + + // #[cfg(feature = "sp")] + // self.set_sp(instruction); + + self.set_pc(instruction); + self.ctx.inst_ctx.end = instruction.end; + self.ctx.inst_ctx.step += 1; + } + /// Performs one single step of the emulation #[inline(always)] pub fn step_fast(&mut self) { let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); + // println!("TRACE PC:0x{:0X} {}", self.ctx.inst_ctx.pc, instruction.verbose); // let debug = instruction.op >= 0xf6; // let initial_regs = if debug { // print!( @@ -1488,6 +1533,13 @@ impl<'a> Emu<'a> { // }; self.source_a(instruction); self.source_b(instruction); + + if instruction.input_size > 0 { + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; + } else { + self.ctx.inst_ctx.extended_arg = 0; + } + (instruction.func)(&mut self.ctx.inst_ctx); self.store_c(instruction); @@ -1525,14 +1577,27 @@ impl<'a> Emu<'a> { callback: Option, ) { // Context, where the state of the execution is stored and modified at every execution step - self.ctx = self.create_emu_context(inputs.clone()); + self.ctx = self.create_emu_context(inputs.clone(), options); let mut elf = ElfSymbolReader::new(); + println!("READ SYMBOLS={}", options.read_symbols); if options.read_symbols { if let Some(elf_file) = &options.elf { println!("Loading symbols from ELF file: {elf_file}"); + + // Set ROI filter if provided + if let Some(roi_filter) = &options.roi_filter { + match elf.set_roi_filter(roi_filter) { + Ok(_) => println!("ROI filter applied: {}", roi_filter), + Err(e) => eprintln!("Invalid ROI filter regex '{}': {}", roi_filter, e), + } + } + elf.load_from_file(elf_file).unwrap(); let mut count = 0; + let mut roi_count = 0; + + // First pass: add all ROIs for symbol in elf.functions() { count += 1; self.ctx.stats.add_roi( @@ -1541,7 +1606,46 @@ impl<'a> Emu<'a> { &symbol.name, ); } - println!("Loaded {} function symbols", count); + + // Second pass: mark selected ROIs for tracking + for symbol in elf.functions() { + if symbol.is_selected_roi { + roi_count += 1; + println!(" [Selected ROI] {}", symbol.name); + self.ctx + .stats + .mark_roi_as_selected(symbol.address as u32, options.track_calls); + } + } + + println!( + "Loaded {} function symbols ({} marked as selected ROI)", + count, roi_count + ); + + // Setup call tracking if requested + if options.track_calls > 0 { + if roi_count > 0 { + println!( + "Call tracking enabled for {} ROI function(s) (tracking {} parameters)", + roi_count, options.track_calls + ); + println!("Output path: {}", options.track_output_path); + println!("Separator: '{}'", options.track_separator); + + // Initialize tracking files + if let Err(e) = self + .ctx + .stats + .init_roi_tracking(&options.track_output_path, &options.track_separator) + { + eprintln!("Error initializing ROI tracking: {}", e); + } + } else { + eprintln!("Warning: --track-calls specified but no ROI symbols found"); + } + } + count = 0; for (id, tag) in elf.profile_tags() { count += 1; @@ -1552,18 +1656,25 @@ impl<'a> Emu<'a> { self.ctx.stats.set_roi_callers(options.roi_callers); self.ctx.stats.set_top_roi_detail(options.top_roi_detail); self.ctx.stats.set_main_name(options.main_name.clone()); + self.ctx.stats.set_use_thousands_sep(!options.no_thousands_sep); + self.ctx.stats.set_top_rois_filter(options.top_roi_filter); } } if options.coverage && !options.stats { panic!("Coverage feature needs at least stats option"); } + if options.top_histogram > 0 && !options.stats { + panic!("Top Histogram feature needs at least stats option"); + } + + self.ctx.stats.set_top_histogram(options.top_histogram); self.ctx.stats.set_coverage(options.coverage); self.ctx.stats.set_legacy_stats(options.legacy_stats); self.ctx.stats.set_store_ops(options.store_op_output.is_some()); // Check that callback is provided if chunk size is specified - if options.chunk_size.is_some() { + if let Some(chunk_size) = options.chunk_size { // Check callback consistency if callback.is_none() { panic!("Emu::run() called with chunk size but no callback"); @@ -1571,7 +1682,7 @@ impl<'a> Emu<'a> { // Record callback into context self.ctx.do_callback = true; - self.ctx.callback_steps = options.chunk_size.unwrap(); + self.ctx.callback_steps = chunk_size; // Check steps value if self.ctx.callback_steps == 0 { @@ -1579,7 +1690,7 @@ impl<'a> Emu<'a> { } // Reserve enough entries for all the requested steps between callbacks - self.ctx.trace.mem_reads.reserve(self.ctx.callback_steps as usize); + self.ctx.trace.mem_reads.to_mut().reserve(self.ctx.callback_steps as usize); // Init pc to the rom entry address self.ctx.trace.start_state.pc = ROM_ENTRY; @@ -1587,7 +1698,10 @@ impl<'a> Emu<'a> { // Call run_fast if only essential work is needed if options.is_fast() { - return self.run_fast(options); + self.run_fast(options); + if options.steps { + println!("STEPS: {}", self.ctx.inst_ctx.step); + } } if options.generate_minimal_traces { let par_emu_options = @@ -1624,6 +1738,10 @@ impl<'a> Emu<'a> { // While not done while !self.ctx.inst_ctx.end { + // println!( + // "DEBUG_TRACE {:09} 0x{:08x} {:?}", + // self.ctx.inst_ctx.step, self.ctx.inst_ctx.pc, self.ctx.inst_ctx.regs + // ); if options.verbose { println!( "Emu::run() step={} ctx.pc={}", @@ -1703,6 +1821,32 @@ impl<'a> Emu<'a> { if let Some(store_op_output_file) = &options.store_op_output { self.ctx.stats.flush_op_data_to_file(store_op_output_file).unwrap(); } + + // Generate disassembly if requested + if let Some(disasm_file) = &options.disasm { + println!("Writing disassembly to: {}", disasm_file); + // Try to load symbols if not already loaded + let symbols = if options.read_symbols { + if let Some(elf_file) = &options.elf { + let mut elf = ElfSymbolReader::new(); + if let Some(roi_filter) = &options.roi_filter { + let _ = elf.set_roi_filter(roi_filter); + } + elf.load_from_file(elf_file).ok(); + Some(elf) + } else { + None + } + } else { + None + }; + + if let Err(e) = self.ctx.stats.write_disassembly(self.rom, disasm_file, symbols) { + eprintln!("Error writing disassembly: {}", e); + } else { + println!("Disassembly written successfully"); + } + } } } @@ -1714,7 +1858,7 @@ impl<'a> Emu<'a> { par_options: &ParEmuOptions, ) -> Vec { // Context, where the state of the execution is stored and modified at every execution step - self.ctx = self.create_emu_context(inputs); + self.ctx = self.create_emu_context(inputs, options); // Init pc to the rom entry address self.ctx.trace.start_state.pc = ROM_ENTRY; @@ -1747,7 +1891,7 @@ impl<'a> Emu<'a> { }, last_c: 0, steps: 0, - mem_reads: Vec::with_capacity(par_options.num_steps), + mem_reads: Cow::Owned(Vec::with_capacity(par_options.num_steps)), end: false, }); } @@ -1801,7 +1945,7 @@ impl<'a> Emu<'a> { }, last_c: 0, steps: 0, - mem_reads: Vec::with_capacity(par_options.num_steps), + mem_reads: Cow::Owned(Vec::with_capacity(par_options.num_steps)), end: false, }); } @@ -1823,14 +1967,14 @@ impl<'a> Emu<'a> { let pc = self.ctx.inst_ctx.pc; let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); - // println!( - // "Emu::step() executing step={} pc={:x} inst={}", - // self.ctx.inst_ctx.step, - // self.ctx.inst_ctx.pc, - // instruction.to_text() - // ); - - //println!("PCLOG={}", instruction.to_text()); + if options.with_progress && self.ctx.inst_ctx.step & 0xF_FFFF == 0 { + println!( + "running 0x{pc:08x} MS:{} {}", + self.ctx.inst_ctx.step >> 20, + instruction.verbose + ); + } + // println!("PCLOG={}", instruction.to_text()); // Build the 'a' register value based on the source specified by the current instruction self.source_a(instruction); @@ -1839,6 +1983,11 @@ impl<'a> Emu<'a> { self.source_b(instruction); // Call the operation + if instruction.input_size > 0 { + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; + } else { + self.ctx.inst_ctx.extended_arg = 0; + } (instruction.func)(&mut self.ctx.inst_ctx); // Retrieve statistics data @@ -1864,6 +2013,11 @@ impl<'a> Emu<'a> { // #[cfg(feature = "sp")] // self.set_sp(instruction); + // println!( + // "s={} pc={:x} c={:x}", + // self.ctx.inst_ctx.step, self.ctx.inst_ctx.pc, self.ctx.inst_ctx.c + // ); + // Set PC, based on current PC, current flag and current instruction self.set_pc(instruction); @@ -1914,7 +2068,7 @@ impl<'a> Emu<'a> { // Swap the emulator trace to avoid memory copies let mut trace = EmuTrace::default(); - trace.mem_reads.reserve(self.ctx.callback_steps as usize); + trace.mem_reads.to_mut().reserve(self.ctx.callback_steps as usize); mem::swap(&mut self.ctx.trace, &mut trace); (callback)(trace); @@ -1938,28 +2092,60 @@ impl<'a> Emu<'a> { #[inline(always)] pub fn par_step_my_block(&mut self, emu_full_trace_vec: &mut EmuTrace) { let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); + // println!("TRACE PC:0x{:0X} {}", self.ctx.inst_ctx.pc, instruction.verbose); + + // Extract the Vec once for all mem_reads operations + let mem_reads = emu_full_trace_vec.mem_reads.to_mut(); + + #[cfg(feature = "minimal_trace_index_debug")] + println!( + "MINIMAL_TRACE par_step_my_block {} {}", + self.ctx.inst_ctx.step, + mem_reads.len() + ); + + // println!("PC:0x{:08X} {}", self.ctx.inst_ctx.pc, instruction.verbose); + // Build the 'a' register value based on the source specified by the current instruction - self.source_a_mem_reads_generate(instruction, &mut emu_full_trace_vec.mem_reads); + self.source_a_mem_reads_generate(instruction, mem_reads); // Build the 'b' register value based on the source specified by the current instruction - self.source_b_mem_reads_generate(instruction, &mut emu_full_trace_vec.mem_reads); + self.source_b_mem_reads_generate(instruction, mem_reads); // If this is a precompiled, get the required input data to copy it to mem_reads if instruction.input_size > 0 { self.ctx.inst_ctx.precompiled.input_data.clear(); self.ctx.inst_ctx.precompiled.output_data.clear(); + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; + } else { + self.ctx.inst_ctx.extended_arg = 0; } // Call the operation (instruction.func)(&mut self.ctx.inst_ctx); // If this is a precompiled, copy input data generated by precompile call to mem_reads. + // when generate mem traces the input data containts also data_ext. if instruction.input_size > 0 { - emu_full_trace_vec.mem_reads.append(&mut self.ctx.inst_ctx.precompiled.input_data); + #[cfg(feature = "minimal_trace_index_debug")] + { + let input_data_bytes = self.ctx.inst_ctx.precompiled.input_data.len() * 8; + if input_data_bytes > instruction.input_size as usize { + println!( + "MINIMAL_TRACE data_ext_len:{} input_data:{} input_size:{} mem_reads[{}..{}]", + input_data_bytes - instruction.input_size as usize, + input_data_bytes, + instruction.input_size, + mem_reads.len(), + mem_reads.len() + (input_data_bytes >> 3) + ); + } + } + mem_reads.append(&mut self.ctx.inst_ctx.precompiled.input_data); } // Store the 'c' register value based on the storage specified by the current instruction - self.store_c_mem_reads_generate(instruction, &mut emu_full_trace_vec.mem_reads); + self.store_c_mem_reads_generate(instruction, mem_reads); // Set SP, if specified by the current instruction // #[cfg(feature = "sp")] @@ -1990,6 +2176,13 @@ impl<'a> Emu<'a> { // Build the 'b' register value based on the source specified by the current instruction self.source_b(instruction); + // If this is a precompiled, prepare extended argument + if instruction.input_size > 0 { + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; + } else { + self.ctx.inst_ctx.extended_arg = 0; + } + // Call the operation (instruction.func)(&mut self.ctx.inst_ctx); @@ -2019,6 +2212,11 @@ impl<'a> Emu<'a> { data_bus: &mut DB, ) -> bool { let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); + #[cfg(feature = "minimal_trace_index_debug")] + println!( + "MINIMAL_TRACE step_emu_trace {} {}", + self.ctx.inst_ctx.step, mem_reads_index + ); self.source_a_mem_reads_consume_databus(instruction, mem_reads, mem_reads_index, data_bus); self.source_b_mem_reads_consume_databus(instruction, mem_reads, mem_reads_index, data_bus); @@ -2026,7 +2224,7 @@ impl<'a> Emu<'a> { if instruction.input_size > 0 { self.ctx.inst_ctx.precompiled.input_data.clear(); self.ctx.inst_ctx.precompiled.output_data.clear(); - + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; // round_up => (size + 7) >> 3 let number_of_mem_reads = (instruction.input_size + 7) >> 3; for _ in 0..number_of_mem_reads { @@ -2034,8 +2232,11 @@ impl<'a> Emu<'a> { *mem_reads_index += 1; self.ctx.inst_ctx.precompiled.input_data.push(mem_read); } + } else { + self.ctx.inst_ctx.extended_arg = 0; } + self.ctx.inst_ctx.data_ext_len = 0; (instruction.func)(&mut self.ctx.inst_ctx); self.store_c_mem_reads_consume_databus(instruction, mem_reads, mem_reads_index, data_bus); @@ -2049,7 +2250,22 @@ impl<'a> Emu<'a> { &self.ctx.inst_ctx, &mut self.static_array, ); - data_bus.write_to_bus(OPERATION_BUS_ID, operation_payload); + if self.ctx.inst_ctx.data_ext_len > 0 { + if mem_reads.len() < *mem_reads_index + self.ctx.inst_ctx.data_ext_len { + println!( + "OUT_OF_DATA_EXT({}) S:{}", + self.ctx.inst_ctx.data_ext_len, self.ctx.inst_ctx.step + ); + } + data_bus.write_to_bus( + OPERATION_BUS_ID, + operation_payload, + &mem_reads[*mem_reads_index..*mem_reads_index + self.ctx.inst_ctx.data_ext_len], + ); + *mem_reads_index += self.ctx.inst_ctx.data_ext_len; + } else { + data_bus.write_to_bus(OPERATION_BUS_ID, operation_payload, &[]); + } } // #[cfg(feature = "sp")] @@ -2073,13 +2289,20 @@ impl<'a> Emu<'a> { ) -> bool { let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); + #[cfg(feature = "minimal_trace_index_debug")] + println!( + "MINIMAL_TRACE step_emu_trace_no_mem_ops {} {}", + self.ctx.inst_ctx.step, mem_reads_index + ); + + // println!("TRACE PC:0x{:0X} {}", self.ctx.inst_ctx.pc, instruction.verbose); self.source_a_mem_reads_consume_no_mem_ops(instruction, mem_reads, mem_reads_index); self.source_b_mem_reads_consume_no_mem_ops(instruction, mem_reads, mem_reads_index); // If this is a precompiled, get the required input data from mem_reads if instruction.input_size > 0 { self.ctx.inst_ctx.precompiled.input_data.clear(); self.ctx.inst_ctx.precompiled.output_data.clear(); - + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; // round_up => (size + 7) >> 3 let number_of_mem_reads = (instruction.input_size + 7) >> 3; for _ in 0..number_of_mem_reads { @@ -2087,8 +2310,11 @@ impl<'a> Emu<'a> { *mem_reads_index += 1; self.ctx.inst_ctx.precompiled.input_data.push(mem_read); } + } else { + self.ctx.inst_ctx.extended_arg = 0; } + self.ctx.inst_ctx.data_ext_len = 0; (instruction.func)(&mut self.ctx.inst_ctx); self.store_c_mem_reads_consume_no_mem_ops(instruction, mem_reads, mem_reads_index); @@ -2102,7 +2328,16 @@ impl<'a> Emu<'a> { &self.ctx.inst_ctx, &mut self.static_array, ); - data_bus.write_to_bus(OPERATION_BUS_ID, operation_payload); + if self.ctx.inst_ctx.data_ext_len > 0 { + data_bus.write_to_bus( + OPERATION_BUS_ID, + operation_payload, + &mem_reads[*mem_reads_index..*mem_reads_index + self.ctx.inst_ctx.data_ext_len], + ); + *mem_reads_index += self.ctx.inst_ctx.data_ext_len; + } else { + data_bus.write_to_bus(OPERATION_BUS_ID, operation_payload, &[]); + } } // #[cfg(feature = "sp")] @@ -2196,19 +2431,31 @@ impl<'a> Emu<'a> { ) -> bool { let mut _continue = true; let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); + + #[cfg(feature = "minimal_trace_index_debug")] + println!( + "MINIMAL_TRACE step_emu_traces {} {} 0x{:08x}", + self.ctx.inst_ctx.step, mem_reads_index, self.ctx.inst_ctx.pc + ); + self.source_a_mem_reads_consume_databus(instruction, mem_reads, mem_reads_index, data_bus); self.source_b_mem_reads_consume_databus(instruction, mem_reads, mem_reads_index, data_bus); // If this is a precompiled, get the required input data from mem_reads if instruction.input_size > 0 { self.ctx.inst_ctx.precompiled.input_data.clear(); self.ctx.inst_ctx.precompiled.output_data.clear(); + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; let number_of_mem_reads = (instruction.input_size + 7) >> 3; for _ in 0..number_of_mem_reads { let mem_read = mem_reads[*mem_reads_index]; *mem_reads_index += 1; self.ctx.inst_ctx.precompiled.input_data.push(mem_read); } + } else { + self.ctx.inst_ctx.extended_arg = 0; } + + self.ctx.inst_ctx.data_ext_len = 0; (instruction.func)(&mut self.ctx.inst_ctx); self.store_c_mem_reads_consume_databus(instruction, mem_reads, mem_reads_index, data_bus); @@ -2221,14 +2468,24 @@ impl<'a> Emu<'a> { &self.ctx.inst_ctx, &mut self.static_array, ); - _continue = data_bus.write_to_bus(OPERATION_BUS_ID, operation_payload); + _continue = if self.ctx.inst_ctx.data_ext_len > 0 { + let data_ext_index = *mem_reads_index; + *mem_reads_index += self.ctx.inst_ctx.data_ext_len; + data_bus.write_to_bus( + OPERATION_BUS_ID, + operation_payload, + &mem_reads[data_ext_index..*mem_reads_index], + ) + } else { + data_bus.write_to_bus(OPERATION_BUS_ID, operation_payload, &[]) + } } // Get rom bus data let rom_payload = RomBusData::from_instruction(instruction, &self.ctx.inst_ctx); // Write rom bus data to rom bus - data_bus.write_to_bus(ROM_BUS_ID, &rom_payload); + data_bus.write_to_bus(ROM_BUS_ID, &rom_payload, &[]); // #[cfg(feature = "sp")] // self.set_sp(instruction); @@ -2240,6 +2497,18 @@ impl<'a> Emu<'a> { _continue } + #[allow(dead_code)] + fn get_slice_from_mem_reads<'b>( + &mut self, + mem_reads: &'b [u64], + mem_reads_index: &mut usize, + len: usize, + ) -> &'b [u64] { + let slice = &mem_reads[*mem_reads_index..*mem_reads_index + len]; + *mem_reads_index += len; + slice + } + /// Performs one single step of the emulation #[inline(always)] pub fn step_slice_full_trace( @@ -2254,6 +2523,12 @@ impl<'a> Emu<'a> { } let instruction = self.rom.get_instruction(self.ctx.inst_ctx.pc); + #[cfg(feature = "minimal_trace_index_debug")] + println!( + "MINIMAL_TRACE step_slice_full_trace {} {} 0x{:08x}", + self.ctx.inst_ctx.step, mem_reads_index, self.ctx.inst_ctx.pc + ); + reg_trace.clear_reg_step_ranges(); self.source_a_mem_reads_consume(instruction, mem_reads, mem_reads_index, reg_trace); @@ -2263,14 +2538,18 @@ impl<'a> Emu<'a> { if instruction.input_size > 0 { self.ctx.inst_ctx.precompiled.input_data.clear(); self.ctx.inst_ctx.precompiled.output_data.clear(); + self.ctx.inst_ctx.extended_arg = instruction.jmp_offset1; let number_of_mem_reads = (instruction.input_size + 7) >> 3; for _ in 0..number_of_mem_reads { let mem_read = mem_reads[*mem_reads_index]; *mem_reads_index += 1; self.ctx.inst_ctx.precompiled.input_data.push(mem_read); } + } else { + self.ctx.inst_ctx.extended_arg = 0; } + self.ctx.inst_ctx.data_ext_len = 0; (instruction.func)(&mut self.ctx.inst_ctx); self.store_c_mem_reads_consume(instruction, mem_reads, mem_reads_index, reg_trace); @@ -2287,6 +2566,7 @@ impl<'a> Emu<'a> { let full_trace_step = Self::build_full_trace_step(instruction, &self.ctx.inst_ctx, reg_trace); + *mem_reads_index += self.ctx.inst_ctx.data_ext_len; self.ctx.inst_ctx.step += 1; full_trace_step @@ -2367,7 +2647,7 @@ impl<'a> Emu<'a> { // trace.set_a_src_sp(inst.a_src == SRC_SP), // #[cfg(feature = "sp")] // trace.set_a_use_sp_imm1(inst.a_use_sp_imm1), - trace.set_a_src_step(inst.a_src == SRC_STEP); + trace.set_is_precompiled(inst.is_precompiled); trace.set_b_src_imm(inst.b_src == SRC_IMM); trace.set_b_src_mem(inst.b_src == SRC_MEM); trace.set_b_src_reg(inst.b_src == SRC_REG); @@ -2391,7 +2671,7 @@ impl<'a> Emu<'a> { inst.op }, ); - trace.set_store_ra(inst.store_ra); + trace.set_store_pc(inst.store_pc); trace.set_store_mem(inst.store == STORE_MEM); trace.set_store_reg(inst.store == STORE_REG); trace.set_store_ind(inst.store == STORE_IND); @@ -2425,21 +2705,21 @@ impl<'a> Emu<'a> { self.ctx.inst_ctx.step } - /// Get the output as a vector of u64 - pub fn get_output(&self) -> Vec { - let n = self.ctx.inst_ctx.mem.read(OUTPUT_ADDR, 8); - let mut addr = OUTPUT_ADDR + 8; - - let mut output: Vec = Vec::with_capacity(n as usize); + /// Get the output as a vector of u32 + pub fn get_output_32(&self) -> Vec { + let n = ZISK_PUBLICS; + let mut addr = OUTPUT_ADDR; + let mut output: Vec = Vec::with_capacity(n); for _i in 0..n { - output.push(self.ctx.inst_ctx.mem.read(addr, 8)); - addr += 8; + output.push(self.ctx.inst_ctx.mem.read(addr, 4) as u32); + addr += 4; } + output } /// Get the output as a vector of u32 - pub fn get_output_32(&self) -> Vec { + pub fn get_output_riscof_32(&self) -> Vec { let n = self.ctx.inst_ctx.mem.read(OUTPUT_ADDR, 4); let mut addr = OUTPUT_ADDR + 4; let mut output: Vec = Vec::with_capacity(n as usize); @@ -2448,22 +2728,15 @@ impl<'a> Emu<'a> { addr += 4; } - // let mut addr = OUTPUT_ADDR; - // let mut output: Vec = Vec::with_capacity(32); - // for _i in 0..32 { - // output.push(self.ctx.inst_ctx.mem.read(addr, 4) as u32); - // addr += 4; - // } - output } /// Get the output as a vector of u8 pub fn get_output_8(&self) -> Vec { - let n = self.ctx.inst_ctx.mem.read(OUTPUT_ADDR, 4); - let mut addr = OUTPUT_ADDR + 4; + let n = ZISK_PUBLICS; + let mut addr = OUTPUT_ADDR; - let mut output: Vec = Vec::with_capacity(n as usize); + let mut output: Vec = Vec::with_capacity(n); for _i in 0..n { output.push(self.ctx.inst_ctx.mem.read(addr, 1) as u8); output.push(self.ctx.inst_ctx.mem.read(addr + 1, 1) as u8); @@ -2515,7 +2788,7 @@ impl<'a> Emu<'a> { #[inline(always)] pub fn get_value_to_store(&self, instruction: &ZiskInst) -> u64 { - if instruction.store_ra { + if instruction.store_pc { (self.ctx.inst_ctx.pc as i64 + instruction.jmp_offset2) as u64 } else { self.ctx.inst_ctx.c diff --git a/emulator/src/emu_context.rs b/emulator/src/emu_context.rs index d5487768d..872a26c5d 100644 --- a/emulator/src/emu_context.rs +++ b/emulator/src/emu_context.rs @@ -1,9 +1,6 @@ -use crate::Stats; +use crate::{EmuOptions, Stats}; use zisk_common::EmuTrace; -use zisk_core::{ - EmulationMode, FcallInstContext, InstContext, Mem, PrecompiledInstContext, INPUT_ADDR, - MAX_INPUT_SIZE, RAM_ADDR, RAM_SIZE, REGS_IN_MAIN_TOTAL_NUMBER, ROM_ENTRY, -}; +use zisk_core::{InstContext, INPUT_ADDR, RAM_ADDR, RAM_SIZE, REGS_IN_MAIN_TOTAL_NUMBER}; /// ZisK emulator context data container, storing the state of the emulation pub struct EmuContext { @@ -24,24 +21,9 @@ pub struct EmuContext { /// RisK emulator context implementation impl EmuContext { /// RisK emulator context constructor - pub fn new(input: Vec) -> EmuContext { + pub fn new(input: Vec, options: &EmuOptions) -> EmuContext { let mut ctx = EmuContext { - inst_ctx: InstContext { - mem: Mem::default(), - a: 0, - b: 0, - c: 0, - flag: false, - sp: 0, - pc: ROM_ENTRY, - step: 0, - end: false, - error: false, - regs: [0; REGS_IN_MAIN_TOTAL_NUMBER], - emulation_mode: EmulationMode::default(), - precompiled: PrecompiledInstContext::default(), - fcall: FcallInstContext::default(), - }, + inst_ctx: InstContext::default(), tracerv: Vec::new(), tracerv_step: 0, tracerv_current_regs: [0; REGS_IN_MAIN_TOTAL_NUMBER], @@ -55,16 +37,17 @@ impl EmuContext { }; // Check the input data size is inside the proper range - if input.len() > (MAX_INPUT_SIZE - 16) as usize { + if input.len() > (options.max_input_mem - 8) as usize { panic!("EmuContext::new() input size too big size={}", input.len()); } + if input.len() & 7 != 0 { + panic!("EmuContext::new() input size must be a multiple of 8 size={}", input.len()); + } - // Add the length and input data read sections - let input_len = input.len() as u64; + ctx.inst_ctx.input_len = input.len() as u64; let free_input = 0u64; ctx.inst_ctx.mem.add_read_section(INPUT_ADDR, &free_input.to_le_bytes()); - ctx.inst_ctx.mem.add_read_section(INPUT_ADDR + 8, &input_len.to_le_bytes()); - ctx.inst_ctx.mem.add_read_section(INPUT_ADDR + 16, &input); + ctx.inst_ctx.mem.add_read_section(INPUT_ADDR + 8, &input); // Add the write section ctx.inst_ctx.mem.add_write_section(RAM_ADDR, RAM_SIZE); @@ -75,6 +58,6 @@ impl EmuContext { impl Default for EmuContext { fn default() -> Self { - Self::new(Vec::new()) + Self::new(Vec::new(), &EmuOptions::default()) } } diff --git a/emulator/src/emu_options.rs b/emulator/src/emu_options.rs index 1d13b1eed..078690f8c 100644 --- a/emulator/src/emu_options.rs +++ b/emulator/src/emu_options.rs @@ -2,7 +2,7 @@ use clap::Parser; use std::fmt; -use zisk_core::DEFAULT_MAX_STEPS_STR; +use zisk_core::{DEFAULT_MAX_STEPS, DEFAULT_MAX_STEPS_STR, MAX_INPUT_SIZE}; pub const ZISK_VERSION_MESSAGE: &str = concat!( env!("CARGO_PKG_VERSION"), @@ -27,6 +27,9 @@ pub struct EmuOptions { /// Sets the input data file path #[clap(short, long, value_name = "INPUT_FILE")] pub inputs: Option, + /// Sets the legacy input data file path + #[clap(long, value_name = "LEGACY_INPUT_FILE")] + pub legacy_inputs: Option, /// Sets the output data file path #[clap(short, long, value_name = "OUTPUT_FILE")] pub output: Option, @@ -48,8 +51,11 @@ pub struct EmuOptions { pub log_step: bool, /// Log the output to console. This option is set by default to true as a requirement to pass /// the riscof GHA tests. Enabled with `-c`. - #[clap(short = 'c', long, value_name = "LOG_OUTPUT", default_value = "true")] + #[clap(short = 'c', long, value_name = "LOG_OUTPUT", default_value = "false")] pub log_output: bool, + /// Log the output to console in riscof format. Enabled with `-f`. + #[clap(short = 'f', long, value_name = "LOG_OUTPUT_RISCOF", default_value = "false")] + pub log_output_riscof: bool, /// Trace every this number of steps. pub chunk_size: Option, /// Log performance metrics. Enabled with `-m`. @@ -77,6 +83,10 @@ pub struct EmuOptions { /// Requires options: -S -X #[clap(short = 'T', long, value_name = "TOP_ROI", default_value = "25")] pub top_roi: usize, + /// Set the number of top frequent instructions (histogram) + /// Requires options: -X + #[clap(short = 'H', long, value_name = "TOP_HISTOGRAM", default_value = "0")] + pub top_histogram: usize, /// Set the number of top caller functions to show for each top ROI. /// Requires options: -S -X -D #[clap(short = 'C', long, value_name = "ROI_CALLERS", default_value = "10")] @@ -93,6 +103,41 @@ pub struct EmuOptions { /// Requires option: -X #[clap(long, value_name = "COVERAGE", default_value = "false")] pub coverage: bool, + /// Filter symbols using regular expression to mark as special ROI. + /// Requires option: -S + #[clap(long, value_name = "ROI_FILTER")] + pub roi_filter: Option, + /// Track function calls to filtered symbols, specifying number of parameters to log. + /// Requires options: -S --roi-filter + #[clap(long, value_name = "TRACK_CALLS", default_value = "0")] + pub track_calls: usize, + /// Separator for tracked call parameters in output files. + /// Requires option: --track-calls + #[clap(long, value_name = "TRACK_SEPARATOR", default_value = ";")] + pub track_separator: String, + /// Output directory path for tracked call files. + /// Requires option: --track-calls + #[clap(long, value_name = "TRACK_OUTPUT_PATH", default_value = ".")] + pub track_output_path: String, + /// Disable thousands separator in statistics reports. + #[clap(long, value_name = "NO_THOUSANDS_SEP", default_value = "false")] + pub no_thousands_sep: bool, + /// Consider only filtered ROIs when calculating top ROI statistics. + /// Requires options: -S -X --roi-filter + #[clap(long, value_name = "TOP_ROI_FILTER", default_value = "false")] + pub top_roi_filter: bool, + /// Generate disassembly file with execution counts (objdump-like format). + /// Requires options: -S -X + #[clap(long, value_name = "DISASM_FILE")] + pub disasm: Option, + #[clap(long, value_name = "MAX_INPUT_MEM", default_value = "134217728")] // 128 MiB + pub max_input_mem: u64, + /// In mode fast, without stats, show coverage steps consumed without stats. + #[clap(long, default_value = "false")] + pub steps: bool, + /// In mode fast, without stats, show executing lines each 16Msteps. + #[clap(long, default_value = "false")] + pub with_progress: bool, } impl Default for EmuOptions { @@ -103,12 +148,13 @@ impl Default for EmuOptions { elf: None, inputs: None, output: None, - max_steps: 0xFFFFFFFFFFFFFFFF, + max_steps: DEFAULT_MAX_STEPS, print_step: None, trace: None, verbose: false, log_step: false, log_output: false, + log_output_riscof: false, chunk_size: None, log_metrics: false, tracerv: false, @@ -121,7 +167,19 @@ impl Default for EmuOptions { top_roi_detail: false, legacy_stats: false, coverage: false, + top_histogram: 0, main_name: "main".to_string(), + roi_filter: None, + track_calls: 0, + track_separator: ";".to_string(), + track_output_path: ".".to_string(), + no_thousands_sep: false, + top_roi_filter: false, + disasm: None, + max_input_mem: MAX_INPUT_SIZE, + steps: false, + with_progress: false, + legacy_inputs: None, } } } @@ -137,6 +195,7 @@ impl fmt::Display for EmuOptions { writeln!(f, "TRACE: {:?}", self.trace)?; writeln!(f, "OUTPUT: {:?}", self.output)?; writeln!(f, "LOG_OUTPUT: {:?}", self.log_output)?; + writeln!(f, "LOG_OUTPUT_RISCOF: {:?}", self.log_output_riscof)?; writeln!(f, "VERBOSE: {}", self.verbose)?; writeln!(f, "CHUNK_SIZE: {:?}", self.chunk_size)?; writeln!(f, "METRICS: {:?}", self.log_metrics)?; @@ -149,6 +208,17 @@ impl fmt::Display for EmuOptions { writeln!(f, "TOP_ROI: {:?}", self.top_roi)?; writeln!(f, "ROI_CALLERS: {:?}", self.roi_callers)?; writeln!(f, "TOP_ROI_DETAIL: {:?}", self.top_roi_detail)?; + writeln!(f, "TOP_HISTOGRAM: {:?}", self.top_histogram)?; + writeln!(f, "ROI_FILTER: {:?}", self.roi_filter)?; + writeln!(f, "TRACK_CALLS: {:?}", self.track_calls)?; + writeln!(f, "TRACK_SEPARATOR: {:?}", self.track_separator)?; + writeln!(f, "TRACK_OUTPUT_PATH: {:?}", self.track_output_path)?; + writeln!(f, "NO_THOUSANDS_SEP: {:?}", self.no_thousands_sep)?; + writeln!(f, "TOP_ROI_FILTER: {:?}", self.top_roi_filter)?; + writeln!(f, "DISASM: {:?}", self.disasm)?; + writeln!(f, "MAX_INPUT_MEM: {:?}", self.max_input_mem)?; + writeln!(f, "STEPS: {:?}", self.steps)?; + writeln!(f, "WITH_PROGRESS: {:?}", self.with_progress)?; Ok(()) } } @@ -165,5 +235,6 @@ impl EmuOptions { && !self.stats && !self.generate_minimal_traces && !self.log_output + && !self.log_output_riscof } } diff --git a/emulator/src/emulator.rs b/emulator/src/emulator.rs index 0369af1e3..e50bd6ac3 100644 --- a/emulator/src/emulator.rs +++ b/emulator/src/emulator.rs @@ -76,9 +76,12 @@ impl ZiskEmulator { println!("process_elf_file() elf_file={elf_filename}"); } + let elf = fs::read(&elf_filename) + .map_err(|e| ZiskEmulatorErr::Unknown(format!("Error reading ELF file: {e}")))?; + // Create an instance of the RISC-V -> ZisK program transpiler (Riscv2zisk) with the ELF // file name - let riscv2zisk = Riscv2zisk::new(elf_filename); + let riscv2zisk = Riscv2zisk::new(&elf); // Convert the ELF file to ZisK ROM calling the transpiler run() method let zisk_rom = riscv2zisk.run().map_err(|err| ZiskEmulatorErr::Unknown(err.to_string()))?; @@ -153,9 +156,8 @@ impl ZiskEmulator { // OUTPUT: // Save output to a file if requested - if options.output.is_some() { - fs::write(options.output.as_ref().unwrap(), &output) - .map_err(|e| ZiskEmulatorErr::Unknown(e.to_string()))? + if let Some(output_path) = &options.output { + fs::write(output_path, &output).map_err(|e| ZiskEmulatorErr::Unknown(e.to_string()))? } // Log output to console if requested @@ -169,6 +171,17 @@ impl ZiskEmulator { } } + // Log output to console if requested + if options.log_output_riscof { + // Get the emulation output as a u32 vector + let output = emu.get_output_riscof_32(); + + // Log the output to console + for o in &output { + println!("{o:08x}"); + } + } + Ok(output) } @@ -312,6 +325,29 @@ impl Emulator for ZiskEmulator { inputs = fs::read(path).expect("Could not read inputs file"); } + // Build an input data buffer either from the provided inputs path (if provided), or leave + // it empty + if options.legacy_inputs.is_some() { + if options.inputs.is_some() { + return Err(ZiskEmulatorErr::WrongArguments(ErrWrongArguments::new( + "Legacy input file and input file options are incompatible", + ))); + } + // Read inputs data from the provided inputs path + let path = PathBuf::from(options.legacy_inputs.clone().unwrap()); + let file_data = fs::read(path).expect("Could not read inputs file"); + + // Build legacy format: 8 bytes length (native endianness) + file content + padding to multiple of 8 + let file_len = file_data.len() as u64; + let total_len = 8 + file_data.len(); + let padding = (8 - (total_len % 8)) % 8; + + inputs = Vec::with_capacity(total_len + padding); + inputs.extend_from_slice(&file_len.to_ne_bytes()); + inputs.extend_from_slice(&file_data); + inputs.resize(total_len + padding, 0); + } + // If a rom file path is provided, load the rom from it if options.rom.is_some() { // Get the rom file name diff --git a/emulator/src/lib.rs b/emulator/src/lib.rs index 2663497e8..0a9b33e97 100644 --- a/emulator/src/lib.rs +++ b/emulator/src/lib.rs @@ -12,6 +12,7 @@ //! User configuration -------> EmuOptions / //! ``` +mod disasm; mod elf_symbol_reader; mod emu; mod emu_context; @@ -31,6 +32,7 @@ mod stats_costs; pub mod stats_coverage_report; pub mod stats_report; +pub use disasm::*; pub use elf_symbol_reader::*; pub use emu::*; pub use emu_context::*; diff --git a/emulator/src/regions_of_interest.rs b/emulator/src/regions_of_interest.rs index f1a60f941..e48745f1d 100644 --- a/emulator/src/regions_of_interest.rs +++ b/emulator/src/regions_of_interest.rs @@ -1,14 +1,16 @@ use std::collections::BTreeMap; +use std::fs::{self, File}; +use std::io::{BufWriter, Write}; use crate::{get_ops_costs, StatsCosts, MAIN_COST}; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct CallerInfo { pub calls: usize, pub steps: usize, } -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct RegionsOfInterest { pub id: usize, pub from_pc: u32, @@ -19,6 +21,10 @@ pub struct RegionsOfInterest { pub callers: BTreeMap, pub call_stack_rc: usize, call_stack_depth: Option, + pub is_selected_roi: bool, + pub track_calls: usize, + tracked_calls: Vec>, + track_file: Option>, } impl RegionsOfInterest { @@ -33,8 +39,77 @@ impl RegionsOfInterest { callers: BTreeMap::new(), call_stack_rc: 0, call_stack_depth: None, + is_selected_roi: false, + track_calls: 0, + tracked_calls: Vec::new(), + track_file: None, } } + + pub fn set_selected_roi(&mut self, track_calls: usize) { + self.is_selected_roi = true; + self.track_calls = track_calls; + } + + pub fn init_tracking( + &mut self, + output_path: &str, + separator: &str, + filename: &str, + ) -> std::io::Result<()> { + if self.track_calls == 0 { + return Ok(()); + } + + // Create output directory if it doesn't exist + fs::create_dir_all(output_path)?; + + let filepath = format!("{}/{}.txt", output_path, filename); + let file = File::create(&filepath)?; + let mut writer = BufWriter::new(file); + + // Write header + writeln!(writer, "# ROI: {} (PC: 0x{:08x}-0x{:08x})", self.name, self.from_pc, self.to_pc)?; + writeln!(writer, "# Separator: '{}'", separator)?; + writeln!(writer, "# Parameters: a0-a{}", self.track_calls.min(8) - 1)?; + + self.track_file = Some(writer); + Ok(()) + } + + pub fn track_call_parameters(&mut self, registers: &[u64], separator: &str, caller: &str) { + if self.track_calls == 0 { + return; + } + + // RISC-V registers a0-a7 are at indices 10-17 + let num_params = self.track_calls.min(8); + let mut params = Vec::with_capacity(num_params); + + for i in 0..num_params { + if 10 + i < registers.len() { + params.push(registers[10 + i]); + } else { + params.push(0); + } + } + + // Write to file if available + if let Some(ref mut file) = self.track_file { + let line = params.iter().map(|p| p.to_string()).collect::>().join(separator); + if caller.is_empty() { + let _ = writeln!(file, "{line}"); + } else { + let _ = writeln!(file, "{line};{caller}"); + } + } + + self.tracked_calls.push(params); + } + + pub fn get_tracked_calls(&self) -> &[Vec] { + &self.tracked_calls + } pub fn contains(&self, pc: u32) -> bool { pc >= self.from_pc && pc <= self.to_pc } @@ -42,11 +117,10 @@ impl RegionsOfInterest { self.call_stack_rc += 1; } pub fn update_call_depth(&mut self, call_stack_depth: usize) { - if self.call_stack_depth.is_none() { - self.call_stack_depth = Some(call_stack_depth); + if let Some(depth) = self.call_stack_depth { + self.call_stack_depth = Some(std::cmp::min(depth, call_stack_depth)); } else { - self.call_stack_depth = - Some(std::cmp::min(self.call_stack_depth.unwrap(), call_stack_depth)); + self.call_stack_depth = Some(call_stack_depth); } } pub fn call(&mut self, caller: Option, call_stack_depth: usize) { diff --git a/emulator/src/stats.rs b/emulator/src/stats.rs index 1b340f54b..c91800535 100644 --- a/emulator/src/stats.rs +++ b/emulator/src/stats.rs @@ -40,7 +40,7 @@ const OP_DATA_BUFFER_DEFAULT_CAPACITY: usize = 128 * 1024 * 1024; const REG_RA_IDX: usize = 1; /// Keeps statistics of the emulator operations -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct Stats { /// Counter of FROPS (FRequentOPs) frops: u64, @@ -65,6 +65,7 @@ pub struct Stats { roi_callers: usize, top_rois_detail: bool, coverage: bool, + top_histogram: usize, legacy_stats: bool, /// PC histogram, i.e. number of times each PC was executed pc_histogram: HashMap, @@ -78,6 +79,9 @@ pub struct Stats { individual_cost_marks: bool, main_name: String, profile_tags: HashMap, + track_separator: String, + use_thousands_sep: bool, + top_rois_filter: bool, #[cfg(feature = "debug_stats_trace")] debug_step_stack: Vec, #[cfg(feature = "debug_stats_trace")] @@ -116,6 +120,10 @@ impl Default for Stats { individual_cost_marks: false, main_name: "main".to_string(), profile_tags: HashMap::new(), + top_histogram: 0, + track_separator: ";".to_string(), + use_thousands_sep: true, + top_rois_filter: false, #[cfg(feature = "debug_stats_trace")] debug_step_stack: Vec::new(), #[cfg(feature = "debug_stats_trace")] @@ -317,9 +325,12 @@ impl Stats { if pc >= self.rois[roi_index].from_pc && pc <= self.rois[roi_index].to_pc { if self.is_call { assert!(!self.is_return); - if let Some(previous_roi_index) = previous_roi_index { + let caller_name = if let Some(previous_roi_index) = previous_roi_index { self.rois[previous_roi_index].caller_call(); - } + &self.rois[previous_roi_index].name.clone() + } else { + "" + }; #[cfg(feature = "debug_call_stack")] println!( "CALL_STACK_DEBUG: CALL P_PC:0x{:08x} => PC:0x{pc:08x} CALLER_ROI:{} CALLED_ROI:{}", @@ -338,6 +349,16 @@ impl Stats { self.call_return_reg = 0; self.rois[roi_index].call(previous_roi_index, self.call_stack.len()); + + // Track call parameters for selected ROIs + if self.rois[roi_index].is_selected_roi && self.rois[roi_index].track_calls > 0 + { + self.rois[roi_index].track_call_parameters( + regs, + &self.track_separator, + caller_name, + ); + } } else if !self.is_return { // JMP: This is a tail call. Replace the top of the call stack if it exists if let Some(top) = self.call_stack.last_mut() { @@ -493,12 +514,12 @@ impl Stats { if is_jmp { // CALL: set_pc=true, store_ra=true, store_offset=1 (stores PC+4 or PC+2 in ra) // self.is_call = instruction.store_ra && instruction.store_offset == 1; - self.is_call = instruction.store_ra; + self.is_call = instruction.store_pc; self.call_return_reg = if self.is_call { instruction.store_offset as u8 } else { 0 }; - // RETURN: set_pc=true, store_ra=false (no stores RA), b_src=SRC_REG, b_offset_imm0=1 (jumps to ra/x1) + // RETURN: set_pc=true, store_pc=false (no stores RA), b_src=SRC_REG, b_offset_imm0=1 (jumps to ra/x1) // Additionally, verify that the target PC matches the expected return address from the call stack - let is_jalr_ra = !instruction.store_ra + let is_jalr_ra = !instruction.store_pc && instruction.set_pc && instruction.b_src == SRC_REG && instruction.b_offset_imm0 == 1; @@ -513,7 +534,7 @@ impl Stats { self.is_return = false; } } else if let Some(top) = self.call_stack.last() { - self.is_return = !instruction.store_ra + self.is_return = !instruction.store_pc && instruction.b_src == SRC_REG && instruction.b_offset_imm0 == top.return_reg as u64; } else { @@ -588,6 +609,7 @@ impl Stats { .rois .iter() .enumerate() + .filter(|(_, roi)| !self.top_rois_filter || roi.is_selected_roi) .map(|(index, roi)| (index, if by_step { roi.get_steps() } else { roi.get_cost() })) .collect(); top_rois.sort_by(|a, b| b.1.cmp(&a.1)); @@ -612,7 +634,13 @@ impl Stats { self.ops_cost = ops_cost; self.precompiled_cost = precompiled_cost; } - pub fn report_opcodes(&self, report: &mut StatsReport, ops: &[u64], title: &str) { + pub fn report_opcodes( + &self, + report: &mut StatsReport, + ops: &[u64], + title: &str, + steps_perc: bool, + ) { let ranks = get_ops_ranks(ops); for (opcode, op_count) in ops.iter().enumerate() { if opcode > 1 && *op_count > 0 { @@ -622,12 +650,21 @@ impl Stats { } else { String::new() }; - report.add_count_cost_perc( - &format!("{title} {:}", inst.name()), - *op_count, - *op_count * inst.steps(), - &rank, - ); + if steps_perc { + report.add_count_cost_perc2( + &format!("{title} {:}", inst.name()), + *op_count, + *op_count * inst.steps(), + &rank, + ); + } else { + report.add_count_cost_perc( + &format!("{title} {:}", inst.name()), + *op_count, + *op_count * inst.steps(), + &rank, + ); + } } } } @@ -696,6 +733,7 @@ impl Stats { let base_cost = BASE_COST as u64; let total_cost = base_cost + mem_cost + main_cost + ops_cost + precompiled_cost; let mut report = StatsReport::new(); + report.use_thousands_sep = self.use_thousands_sep; report.set_total_cost(total_cost); report.set_steps(self.costs.steps); report.title_cost("REPORT", ""); @@ -712,8 +750,8 @@ impl Stats { report.ln(); report.add_cost_perc("FROPS", self.frops_cost); report.add_perc("RAM USAGE", self.costs.mops.get_max_ram_address() - RAM_ADDR + 1, 1 << 29); - report.title_count_cost_perc("COST BY OPCODE", "COUNT", "COST", " RANK"); - self.report_opcodes(&mut report, &self.costs.ops, "OP"); + report.title_count_cost_perc2("COST BY OPCODE", "COUNT", "COST", " RANK"); + self.report_opcodes(&mut report, &self.costs.ops, "OP", true); report.title_count_perc_cost_perc("FROPS BY OPCODE", "COUNT", "HIT", "COST", " RANK"); self.report_opcodes_hit(&mut report, &self.costs.frops_ops, &self.costs.ops, "FROP"); @@ -761,6 +799,7 @@ impl Stats { for index in final_top_cost_rois.iter() { let roi = &self.rois[*index]; let mut roi_report = StatsReport::new(); + roi_report.use_thousands_sep = self.use_thousands_sep; roi_report.set_total_cost(roi.get_cost()); roi_report.set_steps(roi.get_steps()); roi_report.title(&format!("DETAIL FUNCTION {}", roi.name)); @@ -769,7 +808,7 @@ impl Stats { roi_report.set_identation(1); roi_report.title_count_cost_perc("COST BY OPCODE", "COUNT", "COST", " RANK"); - self.report_opcodes(&mut roi_report, roi.get_ops_costs(), "OP"); + self.report_opcodes(&mut roi_report, roi.get_ops_costs(), "OP", false); roi_report.title_top_count_perc("TOP STEP CALLERS (calls, steps)"); let mut callers: Vec<_> = roi.get_callers().collect(); @@ -845,6 +884,54 @@ impl Stats { } } } + if self.top_histogram > 0 { + report.title_autowidth("TOP PC HISTOGRAM (EXECUTIONS, % EXECUTIONS, PC)"); + + // Convert HashMap to Vec and sort by execution count (descending), then by PC (ascending) + let mut pc_vec: Vec<_> = self.pc_histogram.iter().collect(); + pc_vec.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + + // Show only top N entries + let mut previous_count = 0; + let mut initial_address = 0; + let mut block_count = 0; + let mut block_label = ""; + let last_index = std::cmp::min(self.top_histogram, pc_vec.len()) - 1; + for (index, (pc, count)) in pc_vec.iter().take(self.top_histogram).enumerate() { + let is_same_block = previous_count == **count + && **pc > initial_address + && (**pc - initial_address) < 512; + + if is_same_block { + block_count += **count; + } else { + if block_count > 0 { + report.add_top_step_perc( + &format!(" ----------- {block_label}\n"), + block_count, + ); + } + previous_count = **count; + initial_address = **pc; + block_count = **count; + block_label = if let Some((_, index)) = + self.rois_by_address.range(..=initial_address as u32).next_back() + { + &self.rois[*index as usize].name + } else { + "" + }; + } + let instruction = rom.get_instruction(**pc); + let pc_str = format!(" 0x{pc:08x}: {}", instruction.verbose); + report.add_top_step_perc(&pc_str, **count); + if index == last_index { + report + .add_top_step_perc(&format!(" ----------- {block_label}\n"), block_count); + } + } + } + report.output } pub fn add_profile_tag(&mut self, id: u16, name: &str) { @@ -856,9 +943,51 @@ impl Stats { self.rois.push(roi); self.rois_by_address.insert(from_pc, index); } + pub fn mark_roi_as_selected(&mut self, from_pc: u32, track_calls: usize) { + if let Some(&index) = self.rois_by_address.get(&from_pc) { + if let Some(roi) = self.rois.get_mut(index as usize) { + roi.set_selected_roi(track_calls); + } + } + } + pub fn init_roi_tracking(&mut self, output_path: &str, separator: &str) -> std::io::Result<()> { + self.track_separator = separator.to_string(); + + // Track used filenames to detect collisions + let mut used_filenames = std::collections::HashSet::new(); + + for roi in &mut self.rois { + if roi.is_selected_roi && roi.track_calls > 0 { + // Clean function name: keep only alphanumeric and underscore + let clean_name: String = + roi.name.chars().filter(|c| c.is_alphanumeric() || *c == '_').collect(); + + // Check for collision + let filename = if used_filenames.contains(&clean_name) { + // Collision detected, add ROI id + format!("{}_roi_{}", clean_name, roi.id) + } else { + clean_name.clone() + }; + + used_filenames.insert(clean_name); + roi.init_tracking(output_path, separator, &filename)?; + } + } + Ok(()) + } + pub fn set_track_separator(&mut self, separator: String) { + self.track_separator = separator; + } + pub fn set_use_thousands_sep(&mut self, value: bool) { + self.use_thousands_sep = value; + } pub fn set_top_rois(&mut self, value: usize) { self.top_rois = value; } + pub fn set_top_histogram(&mut self, value: usize) { + self.top_histogram = value; + } pub fn set_legacy_stats(&mut self, value: bool) { self.legacy_stats = value; } @@ -874,6 +1003,31 @@ impl Stats { pub fn set_main_name(&mut self, value: String) { self.main_name = value; } + pub fn set_top_rois_filter(&mut self, value: bool) { + self.top_rois_filter = value; + } + + /// Write disassembly to file with execution counts + pub fn write_disassembly( + &self, + rom: &ZiskRom, + path: &str, + symbols: Option, + ) -> std::io::Result<()> { + use crate::DisasmWriter; + + let mut disasm_writer = DisasmWriter::new(path)?; + disasm_writer.set_pc_histogram(self.pc_histogram.clone()); + if let Some(syms) = symbols { + disasm_writer.set_symbols(syms); + } + disasm_writer.write_header("ZisK Disassembly")?; + disasm_writer.write_disassembly(rom)?; + disasm_writer.flush()?; + + Ok(()) + } + #[cfg(feature = "debug_stats_trace")] pub fn debug_stats_trace(&mut self, pc: u64) { if self.costs.steps == 1 || self.previous_roi != self.current_roi { @@ -920,4 +1074,9 @@ impl OpStats for Stats { self.on_memory_write(addr + 8 * index as u64, 8, 0); } } + fn add_extras(&mut self, extras: &[(u8, usize)]) { + for (opcode, count) in extras { + self.costs.ops[*opcode as usize] += *count as u64; + } + } } diff --git a/emulator/src/stats_report.rs b/emulator/src/stats_report.rs index 3f5f44207..f081ac953 100644 --- a/emulator/src/stats_report.rs +++ b/emulator/src/stats_report.rs @@ -9,6 +9,7 @@ pub struct StatsReport { pub label_width: usize, pub short_label_width: usize, pub label_width_stack: Vec, + pub use_thousands_sep: bool, } impl Default for StatsReport { fn default() -> Self { @@ -26,6 +27,7 @@ impl StatsReport { label_width: 24, short_label_width: 10, label_width_stack: Vec::new(), + use_thousands_sep: true, } } @@ -54,6 +56,14 @@ impl StatsReport { } } + fn format_number(&self, num: u64) -> String { + if self.use_thousands_sep { + num.to_formatted_string(&Locale::en) + } else { + num.to_string() + } + } + pub fn add(&mut self, text: &str) { self.output += text; } @@ -62,7 +72,7 @@ impl StatsReport { "{}{:15}\n", self.identation, label, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), label_width = self.label_width ); } @@ -97,7 +107,7 @@ impl StatsReport { self.output += &format!( "{}{label:15} {:6.2}%\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), (cost as f64 * 100.0) / total as f64, label_width = self.label_width, ); @@ -107,7 +117,7 @@ impl StatsReport { self.output += &format!( "{}{label:15} {:6.2}%\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.cost_divisor, label_width = self.label_width, ); @@ -137,7 +147,7 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {label}\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.cost_divisor ); } @@ -147,7 +157,7 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {depth:2} {label}\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.cost_divisor ); return; @@ -155,7 +165,7 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {label}\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.cost_divisor ); } @@ -164,9 +174,9 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {:>10} {label}\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.cost_divisor, - calls.to_formatted_string(&Locale::en) + self.format_number(calls as u64) ); } @@ -174,9 +184,9 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {:>10} {label}\n", self.identation, - steps.to_formatted_string(&Locale::en), + self.format_number(steps), steps as f64 / self.step_divisor, - calls.to_formatted_string(&Locale::en) + self.format_number(calls as u64) ); } @@ -184,7 +194,7 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {label}\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.step_divisor ); } @@ -194,7 +204,7 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {depth:2} {label}\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.step_divisor ); return; @@ -202,7 +212,7 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:6.2}% {label}\n", self.identation, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.step_divisor ); } @@ -219,8 +229,8 @@ impl StatsReport { self.output += &format!( "{}{:>15} {:>15} {:6.2}% {label}\n", self.identation, - count.to_formatted_string(&Locale::en), - step.to_formatted_string(&Locale::en), + self.format_number(count), + self.format_number(step), step as f64 / self.step_divisor ); } @@ -243,8 +253,34 @@ impl StatsReport { "{}{:15} {:>15} {:6.2}%{comment}\n", self.identation, label, - count.to_formatted_string(&Locale::en), - cost.to_formatted_string(&Locale::en), + self.format_number(count), + self.format_number(cost), + cost as f64 / self.cost_divisor, + label_width = self.label_width, + ); + } + + pub fn title_count_cost_perc2( + &mut self, + label: &str, + count_label: &str, + cost_label: &str, + comment: &str, + ) { + self.line_from_title(&format!( + "{label:15} % {cost_label:>15} %{comment}", + label_width = self.label_width, + )); + } + + pub fn add_count_cost_perc2(&mut self, label: &str, count: u64, cost: u64, comment: &str) { + self.output += &format!( + "{}{:15} {:6.2}% {:>15} {:6.2}%{comment}\n", + self.identation, + label, + self.format_number(count), + count as f64 / self.step_divisor, + self.format_number(cost), cost as f64 / self.cost_divisor, label_width = self.label_width, ); @@ -276,9 +312,9 @@ impl StatsReport { "{}{:15} {:6.2}% {:>15} {:6.2}%{comment}\n", self.identation, label, - count.to_formatted_string(&Locale::en), + self.format_number(count), perc, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.cost_divisor, label_width = self.label_width, ); @@ -320,16 +356,16 @@ impl StatsReport { "{}{:10} {:>10} {:>15} {:6.2}% {:>15} {:6.2}% {:>15} {:>15} {:>15} {:>15}{comment}\n", self.identation, label, - index.to_formatted_string(&Locale::en), - count.to_formatted_string(&Locale::en), - step.to_formatted_string(&Locale::en), + self.format_number(index as u64), + self.format_number(count), + self.format_number(step), step as f64 / self.step_divisor, - cost.to_formatted_string(&Locale::en), + self.format_number(cost), cost as f64 / self.cost_divisor, - cost_main.to_formatted_string(&Locale::en), - cost_ops.to_formatted_string(&Locale::en), - cost_precomp.to_formatted_string(&Locale::en), - cost_mem.to_formatted_string(&Locale::en), + self.format_number(cost_main), + self.format_number(cost_ops), + self.format_number(cost_precomp), + self.format_number(cost_mem), label_width = self.label_width, ); } diff --git a/examples/Cargo.lock b/examples/Cargo.lock new file mode 100644 index 000000000..b5e1a962b --- /dev/null +++ b/examples/Cargo.lock @@ -0,0 +1,4808 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aggregation-host" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "sha2", + "zisk-sdk", +] + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "ark-bls12-381" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3df4dcc01ff89867cd86b0da835f23c3f02738353aaee7dde7495af71363b8d5" +dependencies = [ + "ark-ec", + "ark-ff", + "ark-serialize", + "ark-std", +] + +[[package]] +name = "ark-bn254" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d69eab57e8d2663efa5c63135b2af4f396d66424f88954c21104125ab6b3e6bc" +dependencies = [ + "ark-ec", + "ark-ff", + "ark-std", +] + +[[package]] +name = "ark-ec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d68f2d516162846c1238e755a7c4d131b892b70cc70c471a8e3ca3ed818fce" +dependencies = [ + "ahash", + "ark-ff", + "ark-poly", + "ark-serialize", + "ark-std", + "educe", + "fnv", + "hashbrown 0.15.5", + "itertools 0.13.0", + "num-bigint", + "num-integer", + "num-traits", + "zeroize", +] + +[[package]] +name = "ark-ff" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a177aba0ed1e0fbb62aa9f6d0502e9b46dad8c2eab04c14258a1212d2557ea70" +dependencies = [ + "ark-ff-asm", + "ark-ff-macros", + "ark-serialize", + "ark-std", + "arrayvec", + "digest", + "educe", + "itertools 0.13.0", + "num-bigint", + "num-traits", + "paste", + "zeroize", +] + +[[package]] +name = "ark-ff-asm" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62945a2f7e6de02a31fe400aa489f0e0f5b2502e69f95f853adb82a96c7a6b60" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "ark-ff-macros" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09be120733ee33f7693ceaa202ca41accd5653b779563608f1234f78ae07c4b3" +dependencies = [ + "num-bigint", + "num-traits", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ark-poly" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "579305839da207f02b89cd1679e50e67b4331e2f9294a57693e5051b7703fe27" +dependencies = [ + "ahash", + "ark-ff", + "ark-serialize", + "ark-std", + "educe", + "fnv", + "hashbrown 0.15.5", +] + +[[package]] +name = "ark-secp256k1" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8bd211c48debd3037b48873a7aa22c3aba034e83388aa4124795c9f220b88c7" +dependencies = [ + "ark-ec", + "ark-ff", + "ark-std", +] + +[[package]] +name = "ark-secp256r1" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cf8be5820de567729bfa73a410ddd07cec8ad102d9a4bf61fd6b2e60db264e8" +dependencies = [ + "ark-ec", + "ark-ff", + "ark-std", +] + +[[package]] +name = "ark-serialize" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f4d068aaf107ebcd7dfb52bc748f8030e0fc930ac8e360146ca54c1203088f7" +dependencies = [ + "ark-serialize-derive", + "ark-std", + "arrayvec", + "digest", + "num-bigint", +] + +[[package]] +name = "ark-serialize-derive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213888f660fddcca0d257e88e54ac05bca01885f258ccdf695bafd77031bb69d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ark-std" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "246a225cc6131e9ee4f24619af0f19d67761fff15d7ccc22e42b80846e69449a" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "asm-runner" +version = "0.16.0" +dependencies = [ + "anyhow", + "libc", + "mem-common", + "mem-planner-cpp", + "named-sem", + "proofman-common", + "rayon", + "thiserror 2.0.18", + "tracing", + "zisk-common", + "zisk-core", +] + +[[package]] +name = "asn1-rs" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56624a96882bb8c26d61312ae18cb45868e5a9992ea73c58e45c3101e56a1e60" +dependencies = [ + "asn1-rs-derive", + "asn1-rs-impl", + "displaydoc", + "nom", + "num-traits", + "rusticata-macros", + "thiserror 2.0.18", + "time", +] + +[[package]] +name = "asn1-rs-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "asn1-rs-impl" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-lc-rs" +version = "1.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "big-program-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "serde", + "sha2", + "ziskos", +] + +[[package]] +name = "big-program-host" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "sha2", + "zisk-sdk", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn", +] + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "borsh" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "build-probe-mpi" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78ace2bb02fc18ad937f1599a853fcf3da2327bc1eb3c8e62b1f2fe4573bfd6" +dependencies = [ + "pkg-config", + "shell-words", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "camino" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" +dependencies = [ + "serde_core", +] + +[[package]] +name = "cargo-platform" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87a0c0e6148f11f01f32650a2ea02d532b2ad4e81d8bd41e6e565b5adc5e6082" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "cargo_metadata" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link 0.2.1", +] + +[[package]] +name = "circuit" +version = "0.16.0" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + +[[package]] +name = "cmake" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + +[[package]] +name = "conv" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299" +dependencies = [ + "custom_derive", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpp_demangle" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "ctor" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "curves" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "fields", + "num-bigint", + "num-traits", +] + +[[package]] +name = "custom_derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "data-bus" +version = "0.16.0" +dependencies = [ + "zisk-common", + "zisk-core", +] + +[[package]] +name = "data-encoding" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" + +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + +[[package]] +name = "der-parser" +version = "10.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" +dependencies = [ + "asn1-rs", + "displaydoc", + "nom", + "num-bigint", + "num-traits", + "rusticata-macros", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc95de49ad098572c02d3fbf368c9a020bfff5ae78483685b77f51d8a7e9486d" +dependencies = [ + "num_threads", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "executor" +version = "0.16.0" +dependencies = [ + "anyhow", + "asm-runner", + "crossbeam", + "data-bus", + "fields", + "itertools 0.14.0", + "mem-common", + "mem-planner-cpp", + "named-sem", + "pil-std-lib", + "precomp-arith-eq", + "precomp-arith-eq-384", + "precomp-big-int", + "precomp-blake2", + "precomp-dma", + "precomp-keccakf", + "precomp-poseidon2", + "precomp-sha256f", + "precompiles-common", + "precompiles-hints", + "proofman", + "proofman-common", + "proofman-util", + "rayon", + "sm-arith", + "sm-binary", + "sm-frequent-ops", + "sm-main", + "sm-mem", + "sm-rom", + "tracing", + "witness", + "zisk-common", + "zisk-core", + "zisk-pil", + "ziskemu", +] + +[[package]] +name = "fastbloom" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7f34442dbe69c60fe8eaf58a8cafff81a1f278816d8ab4db255b3bef4ac3c4" +dependencies = [ + "getrandom 0.3.4", + "libm", + "rand 0.9.2", + "siphasher", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fields" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "cfg-if", + "num-bigint", + "paste", + "serde", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "git2" +version = "0.20.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b88256088d75a56f8ecfa070513a775dd9107f6530ef14919dac831af9cfe2b" +dependencies = [ + "bitflags", + "libc", + "libgit2-sys", + "log", + "url", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "ziskos", +] + +[[package]] +name = "guest-agg" +version = "0.1.0" +dependencies = [ + "byteorder", + "ziskos", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core 0.62.2", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if", + "combine", + "jni-sys", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "lib-c" +version = "0.16.0" + +[[package]] +name = "lib-float" +version = "0.16.0" + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libffi" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0498fe5655f857803e156523e644dcdcdc3b3c7edda42ea2afdae2e09b2db87b" +dependencies = [ + "libc", + "libffi-sys", +] + +[[package]] +name = "libffi-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d4f1d4ce15091955144350b75db16a96d4a63728500122706fb4d29a26afbb" +dependencies = [ + "cc", +] + +[[package]] +name = "libgit2-sys" +version = "0.18.3+1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9b3acc4b91781bb0b3386669d325163746af5f6e4f73e6d2d630e09a35f3487" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libz-sys" +version = "1.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52f4c29e2a68ac30c9087e1b772dc9f44a2b66ed44edf2266cf2be9b03dafc1" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "mem-common" +version = "0.16.0" +dependencies = [ + "clap", + "fields", + "num-bigint", + "num-traits", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "static_assertions", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "mem-planner-cpp" +version = "0.16.0" +dependencies = [ + "mem-common", + "proofman-common", + "proofman-util", + "tracing", + "zisk-common", + "zisk-pil", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "mpi" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41457b69d35846af2fec1877a4f3b866a72b6ab2c9500218f115e65e10993b21" +dependencies = [ + "build-probe-mpi", + "conv", + "libffi", + "mpi-sys", + "once_cell", + "smallvec", + "thiserror 2.0.18", +] + +[[package]] +name = "mpi-sys" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f655543f54b263cbc3d2456bf714bd807d66a33eff8f70136687f0776d34f76" +dependencies = [ + "bindgen", + "build-probe-mpi", + "cc", +] + +[[package]] +name = "msvc-demangler" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbeff6bd154a309b2ada5639b2661ca6ae4599b34e8487dc276d2cd637da2d76" +dependencies = [ + "bitflags", + "itoa", +] + +[[package]] +name = "multiple-program-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "ziskos", +] + +[[package]] +name = "multiple-program-guest-2" +version = "0.1.0" +dependencies = [ + "byteorder", + "ziskos", +] + +[[package]] +name = "multiple-program-host" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "sha2", + "zisk-sdk", +] + +[[package]] +name = "named-sem" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0875efe1a57a20d0cee7034499aa9d764b3c7525563fa3c3f16a2ccf01ddfa04" +dependencies = [ + "libc", + "thiserror 2.0.18", + "windows 0.61.3", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "flate2", + "memchr", + "ruzstd", +] + +[[package]] +name = "oid-registry" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12f40cff3dde1b6087cc5d5f5d4d65712f34016a03ed60e9c08dcc392736b5b7" +dependencies = [ + "asn1-rs", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link 0.2.1", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64", + "serde_core", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pil-std-lib" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "colored", + "fields", + "num-bigint", + "num-traits", + "proofman-common", + "proofman-hints", + "proofman-util", + "rayon", + "rustc-hash 2.1.1", + "serde", + "serde_json", + "tracing", + "witness", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "precomp-arith-eq" +version = "0.16.0" +dependencies = [ + "ark-bn254", + "ark-ff", + "ark-secp256k1", + "ark-secp256r1", + "ark-std", + "fields", + "lazy_static", + "lib-c", + "mem-common", + "num-bigint", + "num-traits", + "path-clean", + "pil-std-lib", + "precompiles-common", + "precompiles-helpers", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "regex", + "rustfmt-wrapper", + "serde", + "serde_json", + "sm-mem", + "tracing", + "typenum", + "witness", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-arith-eq-384" +version = "0.16.0" +dependencies = [ + "ark-bls12-381", + "ark-bn254", + "ark-ff", + "ark-secp256k1", + "ark-std", + "fields", + "lazy_static", + "lib-c", + "mem-common", + "num-bigint", + "num-traits", + "path-clean", + "pil-std-lib", + "precomp-arith-eq", + "precompiles-common", + "precompiles-helpers", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "regex", + "rustfmt-wrapper", + "serde", + "serde_json", + "tracing", + "typenum", + "witness", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-big-int" +version = "0.16.0" +dependencies = [ + "fields", + "generic-array", + "lib-c", + "mem-common", + "pil-std-lib", + "precompiles-common", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sm-mem", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-blake2" +version = "0.16.0" +dependencies = [ + "fields", + "mem-common", + "pil-std-lib", + "precompiles-common", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sm-mem", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-dma" +version = "0.16.0" +dependencies = [ + "fields", + "generic-array", + "lib-c", + "mem-common", + "pil-std-lib", + "precompiles-common", + "precompiles-helpers", + "proofman", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sm-mem", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-keccakf" +version = "0.16.0" +dependencies = [ + "circuit", + "fields", + "path-clean", + "pil-std-lib", + "precompiles-common", + "precompiles-helpers", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "tiny-keccak", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-poseidon2" +version = "0.16.0" +dependencies = [ + "fields", + "mem-common", + "pil-std-lib", + "precompiles-common", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sha2", + "sm-mem", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precomp-sha256f" +version = "0.16.0" +dependencies = [ + "fields", + "mem-common", + "pil-std-lib", + "precompiles-common", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sm-mem", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "precompiles-common" +version = "0.16.0" +dependencies = [ + "fields", + "mem-common", + "sm-mem", + "zisk-common", + "zisk-core", +] + +[[package]] +name = "precompiles-helpers" +version = "0.16.0" +dependencies = [ + "ark-bls12-381", + "ark-bn254", + "ark-ff", + "ark-secp256k1", + "ark-secp256r1", + "ark-std", + "cfg-if", + "circuit", + "lib-c", + "num-bigint", + "num-traits", +] + +[[package]] +name = "precompiles-hints" +version = "0.16.0" +dependencies = [ + "anyhow", + "borsh", + "lib-c", + "precompiles-helpers", + "rayon", + "rustls", + "tracing", + "zisk-common", + "zisk-distributed-common", + "ziskos-hints", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit 0.25.4+spec-1.1.0", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proofman" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "bincode", + "blake3", + "borsh", + "bytemuck", + "chrono", + "colored", + "crossbeam-channel", + "csv", + "curves", + "fields", + "libloading", + "mpi", + "num-bigint", + "num-traits", + "pil-std-lib", + "proofman-common", + "proofman-hints", + "proofman-macros", + "proofman-starks-lib-c", + "proofman-util", + "proofman-verifier", + "rayon", + "serde", + "serde_json", + "tokio", + "tokio-util", + "tracing", + "witness", +] + +[[package]] +name = "proofman-common" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "bincode", + "borsh", + "bytemuck", + "colored", + "crossbeam-channel", + "crossbeam-queue", + "csv", + "env", + "fields", + "lazy_static", + "libloading", + "mpi", + "num_cpus", + "proofman-macros", + "proofman-starks-lib-c", + "proofman-util", + "rayon", + "serde", + "serde_json", + "sysinfo 0.35.2", + "thiserror 2.0.18", + "tracing", + "tracing-subscriber", + "yansi", +] + +[[package]] +name = "proofman-hints" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "fields", + "itoa", + "proofman-common", + "proofman-starks-lib-c", + "proofman-util", + "tracing", +] + +[[package]] +name = "proofman-macros" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "proofman-starks-lib-c" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "crossbeam-channel", + "tracing", +] + +[[package]] +name = "proofman-util" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "bincode", + "bytemuck", + "colored", + "serde", + "sysinfo 0.35.2", +] + +[[package]] +name = "proofman-verifier" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "bytemuck", + "fields", + "proofman-util", + "rayon", + "tracing", +] + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.1.1", + "rustls", + "socket2", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "fastbloom", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash 2.1.1", + "rustls", + "rustls-pki-types", + "rustls-platform-verifier", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.60.2", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "rcgen" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10b99e0098aa4082912d4c649628623db6aba77335e4f4569ff5083a6448b32e" +dependencies = [ + "pem", + "ring", + "rustls-pki-types", + "time", + "x509-parser", + "yasna", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "riscv" +version = "0.16.0" + +[[package]] +name = "rom-setup" +version = "0.16.0" +dependencies = [ + "anyhow", + "blake3", + "colored", + "fields", + "proofman-common", + "sm-rom", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustfmt-wrapper" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1adc9dfed5cc999077978cc7163b9282c5751c8d39827c4ea8c8c220ca5a440" +dependencies = [ + "serde", + "tempfile", + "thiserror 1.0.69", + "toml", + "toolchain_find", +] + +[[package]] +name = "rusticata-macros" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" +dependencies = [ + "nom", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "aws-lc-rs", + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + +[[package]] +name = "rustls-webpki" +version = "0.103.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ruzstd" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ff0cc5e135c8870a775d3320910cd9b564ec036b4dc0b8741629020be63f01" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_arrays" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94a16b99c5ea4fe3daccd14853ad260ec00ea043b2708d1fd1da3106dcd8d9df" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "sha-hasher-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "serde", + "sha2", + "ziskos", +] + +[[package]] +name = "sha-hasher-host" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "sha2", + "zisk-sdk", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shell-words" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "sm-arith" +version = "0.16.0" +dependencies = [ + "fields", + "num-bigint", + "pil-std-lib", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sm-binary", + "sm-frequent-ops", + "static_assertions", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "sm-binary" +version = "0.16.0" +dependencies = [ + "fields", + "num-bigint", + "pil-std-lib", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "sm-frequent-ops", + "static_assertions", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "sm-frequent-ops" +version = "0.16.0" +dependencies = [ + "clap", + "fields", + "num-bigint", + "proofman-common", + "proofman-util", + "rayon", + "static_assertions", + "tracing", + "zisk-core", +] + +[[package]] +name = "sm-main" +version = "0.16.0" +dependencies = [ + "fields", + "mem-common", + "num-bigint", + "pil-std-lib", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", + "ziskemu", +] + +[[package]] +name = "sm-mem" +version = "0.16.0" +dependencies = [ + "fields", + "mem-common", + "num-bigint", + "num-traits", + "pil-std-lib", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "tracing", + "witness", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "sm-rom" +version = "0.16.0" +dependencies = [ + "asm-runner", + "fields", + "itertools 0.14.0", + "proofman-common", + "proofman-macros", + "proofman-util", + "rayon", + "tracing", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "symbolic-common" +version = "12.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "751a2823d606b5d0a7616499e4130a516ebd01a44f39811be2b9600936509c23" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] +name = "symbolic-demangle" +version = "12.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79b237cfbe320601dd24b4ac817a5b68bb28f5508e33f08d42be0682cadc8ac9" +dependencies = [ + "cc", + "cpp_demangle", + "msvc-demangler", + "rustc-demangle", + "symbolic-common", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sysinfo" +version = "0.35.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3ffa3e4ff2b324a57f7aeb3c349656c7b127c3c189520251a648102a92496e" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows 0.61.3", +] + +[[package]] +name = "sysinfo" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows 0.62.2", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime 0.6.11", + "toml_edit 0.22.27", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_datetime" +version = "1.0.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime 0.6.11", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_edit" +version = "0.25.4+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2" +dependencies = [ + "indexmap", + "toml_datetime 1.0.0+spec-1.1.0", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.9+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + +[[package]] +name = "toolchain_find" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc8c9a7f0a2966e1acdaf0461023d0b01471eeead645370cf4c3f5cff153f2a" +dependencies = [ + "home", + "once_cell", + "regex", + "semver", + "walkdir", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-appender" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf" +dependencies = [ + "crossbeam-channel", + "thiserror 2.0.18", + "time", + "tracing-subscriber", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "vergen" +version = "9.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b849a1f6d8639e8de261e81ee0fc881e3e3620db1af9f2e0da015d4382ceaf75" +dependencies = [ + "anyhow", + "derive_builder", + "rustversion", + "time", + "vergen-lib", +] + +[[package]] +name = "vergen-git2" +version = "9.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51ab55ddf1188c8d679f349775362b0fa9e90bd7a4ac69838b2a087623f0d57" +dependencies = [ + "anyhow", + "derive_builder", + "git2", + "rustversion", + "time", + "vergen", + "vergen-lib", +] + +[[package]] +name = "vergen-lib" +version = "9.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34a29ba7e9c59e62f229ae1932fb1b8fb8a6fdcc99215a641913f5f5a59a569" +dependencies = [ + "anyhow", + "derive_builder", + "rustversion", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-root-certs" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections 0.2.0", + "windows-core 0.61.2", + "windows-future 0.2.1", + "windows-link 0.1.3", + "windows-numerics 0.2.0", +] + +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections 0.3.2", + "windows-core 0.62.2", + "windows-future 0.3.2", + "windows-numerics 0.3.1", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core 0.62.2", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", + "windows-threading 0.1.0", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core 0.62.2", + "windows-link 0.2.1", + "windows-threading 0.2.1", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core 0.62.2", + "windows-link 0.2.1", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link 0.2.1", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "witness" +version = "0.16.0" +source = "git+https://github.com/0xPolygonHermez/pil2-proofman.git?branch=pre-develop-0.16.0#c590eccceb4a88e7f65f998f02f59e3b487d0317" +dependencies = [ + "colored", + "fields", + "libloading", + "proofman-common", + "proofman-util", + "serde_json", + "tracing", +] + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "x509-parser" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d43b0f71ce057da06bc0851b23ee24f3f86190b07203dd8f567d0b706a185202" +dependencies = [ + "asn1-rs", + "data-encoding", + "der-parser", + "lazy_static", + "nom", + "oid-registry", + "ring", + "rusticata-macros", + "thiserror 2.0.18", + "time", +] + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "yasna" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" +dependencies = [ + "time", +] + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zisk-build" +version = "0.16.0" +dependencies = [ + "anyhow", + "cargo_metadata", + "clap", + "rom-setup", + "tracing", + "vergen-git2", +] + +[[package]] +name = "zisk-common" +version = "0.16.0" +dependencies = [ + "anyhow", + "bincode", + "fields", + "libc", + "mpi", + "proofman", + "proofman-common", + "proofman-util", + "quinn", + "rcgen", + "rustls", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tracing", + "tracing-subscriber", + "zisk-core", +] + +[[package]] +name = "zisk-core" +version = "0.16.0" +dependencies = [ + "elf", + "fields", + "lib-c", + "lib-float", + "paste", + "precompiles-helpers", + "rayon", + "riscv", + "serde", + "sha2", + "tiny-keccak", + "zisk-definitions", + "ziskos-hints", +] + +[[package]] +name = "zisk-definitions" +version = "0.16.0" + +[[package]] +name = "zisk-distributed-common" +version = "0.1.0" +dependencies = [ + "anyhow", + "borsh", + "chrono", + "proofman", + "proofman-common", + "proofman-util", + "serde", + "serde_json", + "thiserror 2.0.18", + "tracing", + "tracing-appender", + "tracing-subscriber", + "uuid", + "zisk-common", +] + +[[package]] +name = "zisk-pil" +version = "0.16.0" +dependencies = [ + "fields", + "proofman-common", + "proofman-macros", + "rayon", + "serde", + "serde_arrays", +] + +[[package]] +name = "zisk-sdk" +version = "0.16.0" +dependencies = [ + "anyhow", + "asm-runner", + "bincode", + "colored", + "executor", + "fields", + "precompiles-hints", + "proofman", + "proofman-common", + "proofman-util", + "proofman-verifier", + "rom-setup", + "serde", + "sha2", + "tracing", + "zisk-build", + "zisk-common", + "zisk-core", + "zisk-distributed-common", + "ziskemu", +] + +[[package]] +name = "zisk-verifier" +version = "0.16.0" +dependencies = [ + "proofman-verifier", +] + +[[package]] +name = "ziskemu" +version = "0.16.0" +dependencies = [ + "clap", + "data-bus", + "fields", + "mem-common", + "memmap2", + "num-format", + "object", + "proofman-common", + "rayon", + "regex", + "riscv", + "sm-arith", + "sm-binary", + "symbolic-common", + "symbolic-demangle", + "sysinfo 0.38.4", + "vergen-git2", + "zisk-common", + "zisk-core", + "zisk-pil", +] + +[[package]] +name = "ziskos" +version = "0.16.0" +dependencies = [ + "anyhow", + "bincode", + "bytes", + "cfg-if", + "ctor", + "fields", + "getrandom 0.2.17", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "once_cell", + "paste", + "precompiles-helpers", + "rand 0.8.5", + "serde", + "sha2", + "tiny-keccak", + "tokio", + "zisk-common", + "zisk-definitions", + "zisk-verifier", +] + +[[package]] +name = "ziskos-hints" +version = "0.16.0" +dependencies = [ + "anyhow", + "bincode", + "cfg-if", + "fields", + "getrandom 0.2.17", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "paste", + "precompiles-helpers", + "rand 0.8.5", + "serde", + "sha2", + "tiny-keccak", + "zisk-verifier", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/examples/Cargo.toml b/examples/Cargo.toml new file mode 100644 index 000000000..898efa3fa --- /dev/null +++ b/examples/Cargo.toml @@ -0,0 +1,28 @@ +[workspace] +members = [ + "sha-hasher/host", + "sha-hasher/guest", + "multiple-programs/host", + "multiple-programs/guest", + "multiple-programs/guest_2", + "aggregation/host", + "aggregation/guest", + "aggregation/guest_agg", + "big-program/host", + "big-program/guest", +] + +resolver = "2" + +[profile.release] +opt-level = 3 + +[profile.release.package."proofman-verifier"] +opt-level = 0 + +[workspace.dependencies] +# Guest dependencies +ziskos = { path = "../ziskos/entrypoint" } + +# Host dependencies +zisk-sdk = { path = "../sdk" } \ No newline at end of file diff --git a/examples/aggregation/guest/Cargo.lock b/examples/aggregation/guest/Cargo.lock new file mode 100644 index 000000000..a31235f0c --- /dev/null +++ b/examples/aggregation/guest/Cargo.lock @@ -0,0 +1,293 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "generic-array" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lib-c" +version = "0.13.1" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "sha-hasher-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "sha2", + "ziskos", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ziskos" +version = "0.13.1" +dependencies = [ + "cfg-if", + "getrandom", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "rand", + "static_assertions", + "tiny-keccak", +] diff --git a/examples/aggregation/guest/Cargo.toml b/examples/aggregation/guest/Cargo.toml new file mode 100644 index 000000000..2f3da51c9 --- /dev/null +++ b/examples/aggregation/guest/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "guest" +version = "0.1.0" +edition = "2021" + +[dependencies] +byteorder = "1.5.0" +ziskos = { path = "../../../ziskos/entrypoint" } diff --git a/examples/aggregation/guest/src/main.rs b/examples/aggregation/guest/src/main.rs new file mode 100644 index 000000000..ed657d058 --- /dev/null +++ b/examples/aggregation/guest/src/main.rs @@ -0,0 +1,26 @@ +// This example program takes a number `n` as input and computes the SHA-256 hash `n` times sequentially. + +// Mark the main function as the entry point for ZisK +#![no_main] +ziskos::entrypoint!(main); + +fn main() { + // Read the input data + let n: u32 = ziskos::io::read(); + + let module = 233; + + ziskos::io::commit(&n); + ziskos::io::commit(&module); + + let mut a = 0; + let mut b = 1; + for _ in 0..n { + let mut c = a + b; + c %= module; + a = b; + b = c; + } + + ziskos::io::commit(&b); +} diff --git a/examples/aggregation/guest_agg/Cargo.lock b/examples/aggregation/guest_agg/Cargo.lock new file mode 100644 index 000000000..a31235f0c --- /dev/null +++ b/examples/aggregation/guest_agg/Cargo.lock @@ -0,0 +1,293 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "generic-array" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lib-c" +version = "0.13.1" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "sha-hasher-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "sha2", + "ziskos", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ziskos" +version = "0.13.1" +dependencies = [ + "cfg-if", + "getrandom", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "rand", + "static_assertions", + "tiny-keccak", +] diff --git a/examples/aggregation/guest_agg/Cargo.toml b/examples/aggregation/guest_agg/Cargo.toml new file mode 100644 index 000000000..ba42fdd0d --- /dev/null +++ b/examples/aggregation/guest_agg/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "guest-agg" +version = "0.1.0" +edition = "2021" + +[dependencies] +byteorder = "1.5.0" +ziskos = { path = "../../../ziskos/entrypoint" } diff --git a/examples/aggregation/guest_agg/src/main.rs b/examples/aggregation/guest_agg/src/main.rs new file mode 100644 index 000000000..aebbcc3a9 --- /dev/null +++ b/examples/aggregation/guest_agg/src/main.rs @@ -0,0 +1,22 @@ +// This example program takes a number `n` as input and computes the SHA-256 hash `n` times sequentially. + +// Mark the main function as the entry point for ZisK +#![no_main] +ziskos::entrypoint!(main); + +fn main() { + let proof1 = ziskos::io::read_proof(); + let proof2 = ziskos::io::read_proof(); + + // Verify the first proof + let valid_proof1 = ziskos::io::verify_zisk_proof(&proof1); + if !valid_proof1 { + panic!("Proof 1 verification failed"); + } + + // Verify the second proof + let valid_proof2 = ziskos::io::verify_zisk_proof(&proof2); + if !valid_proof2 { + panic!("Proof 2 verification failed"); + } +} diff --git a/examples/aggregation/host/Cargo.toml b/examples/aggregation/host/Cargo.toml new file mode 100644 index 000000000..c2c6993e5 --- /dev/null +++ b/examples/aggregation/host/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "aggregation-host" +version = "0.1.0" +edition = "2021" + +[dependencies] +zisk-sdk = { workspace = true } +anyhow = "1.0" +serde = { version = "1.0", default-features = false, features = ["derive"] } +sha2 = "0.10.8" + +[build-dependencies] +zisk-sdk = { workspace = true } + +[features] +default = [] +packed = ["zisk-sdk/packed"] +gpu = ["zisk-sdk/gpu", "packed"] \ No newline at end of file diff --git a/examples/aggregation/host/build.rs b/examples/aggregation/host/build.rs new file mode 100644 index 000000000..a45234df9 --- /dev/null +++ b/examples/aggregation/host/build.rs @@ -0,0 +1,4 @@ +fn main() { + zisk_sdk::build_program("../guest"); + zisk_sdk::build_program("../guest_agg"); +} diff --git a/examples/aggregation/host/src/main.rs b/examples/aggregation/host/src/main.rs new file mode 100644 index 000000000..f0a393c28 --- /dev/null +++ b/examples/aggregation/host/src/main.rs @@ -0,0 +1,71 @@ +use anyhow::Result; +use zisk_sdk::{include_elf, ElfBinary, ProofOpts, ProverClient, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("guest"); +pub const ELF2: ElfBinary = include_elf!("guest-agg"); + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client...\n"); + + // Create an input stream and write '1000' to it. + let n = 1000u32; + let stdin = ZiskStdin::new(); + stdin.write(&n); + + // Create a `ProverClient` method. + let client = ProverClient::builder().build().unwrap(); + + println!("Setting up first program..."); + let (pk, vkey) = client.setup(&ELF)?; + + println!("Setting up second program..."); + let (pk2, vkey2) = client.setup(&ELF2)?; + + // Execute the program using the `ProverClient.execute` method, without generating a proof. + println!("Executing first program..."); + let result = client.execute(&pk, stdin.clone())?; + + println!( + "Program executed successfully: {} cycles in {:.2?}", + result.get_execution_steps(), + result.get_duration() + ); + + println!("Generating first proof for program..."); + let proof_opts = ProofOpts::default().minimal_memory(); + let vadcop_result1 = client.prove(&pk, stdin).with_proof_options(proof_opts).run()?; + + let n = 2000u32; + let stdin2 = ZiskStdin::new(); + stdin2.write(&n); + + println!("Generating second proof for program..."); + let proof_opts = ProofOpts::default().minimal_memory(); + let vadcop_result2 = client.prove(&pk, stdin2).with_proof_options(proof_opts).run()?; + + // Write the proofs, publics, and verification keys to be verified by the guest + let stdin_aggregation = ZiskStdin::new(); + + let proof1 = client.prepare_send_proof( + &vadcop_result1.get_proof(), + &vadcop_result1.get_publics(), + &vkey, + )?; + let proof2 = client.prepare_send_proof( + &vadcop_result2.get_proof(), + &vadcop_result2.get_publics(), + &vkey, + )?; + + stdin_aggregation.write_proof(&proof1); + stdin_aggregation.write_proof(&proof2); + + let proof_opts = ProofOpts::default().minimal_memory(); + + let result_aggregation = + client.prove(&pk2, stdin_aggregation).with_proof_options(proof_opts).run()?; + + client.verify(result_aggregation.get_proof(), result_aggregation.get_publics(), &vkey2)?; + + Ok(()) +} diff --git a/examples/big-program/guest/Cargo.lock b/examples/big-program/guest/Cargo.lock new file mode 100644 index 000000000..a31235f0c --- /dev/null +++ b/examples/big-program/guest/Cargo.lock @@ -0,0 +1,293 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "generic-array" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lib-c" +version = "0.13.1" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "sha-hasher-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "sha2", + "ziskos", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ziskos" +version = "0.13.1" +dependencies = [ + "cfg-if", + "getrandom", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "rand", + "static_assertions", + "tiny-keccak", +] diff --git a/examples/big-program/guest/Cargo.toml b/examples/big-program/guest/Cargo.toml new file mode 100644 index 000000000..738c86f86 --- /dev/null +++ b/examples/big-program/guest/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "big-program-guest" +version = "0.1.0" +edition = "2021" + +[dependencies] +byteorder = "1.5.0" +sha2 = "0.10.8" +serde = { version = "1.0", default-features = false, features = ["derive"] } +ziskos = { path = "../../../ziskos/entrypoint" } diff --git a/examples/big-program/guest/src/main.rs b/examples/big-program/guest/src/main.rs new file mode 100644 index 000000000..9957a2d87 --- /dev/null +++ b/examples/big-program/guest/src/main.rs @@ -0,0 +1,24 @@ +// This example program processes large u64 input data (250MB - 1GB+) +// Input size is controlled by INPUT_SIZE_MB environment variable in host build.rs + +#![no_main] +ziskos::entrypoint!(main); + +fn main() { + // Get zero-copy slice directly from INPUT_ADDR (no RAM allocation!) + let data_bytes = ziskos::io::read_input_slice(); + + // Reinterpret bytes as &[u64] - still zero-copy + let data: &[u64] = unsafe { + core::slice::from_raw_parts(data_bytes.as_ptr() as *const u64, data_bytes.len() / 8) + }; + + // Sum all values - no heap allocation needed + let mut sum: u64 = 0; + for &value in data { + sum = sum.wrapping_add(value); + } + + // Commit the result + ziskos::io::commit(&sum); +} diff --git a/examples/big-program/host/Cargo.toml b/examples/big-program/host/Cargo.toml new file mode 100644 index 000000000..5ce9625c0 --- /dev/null +++ b/examples/big-program/host/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "big-program-host" +version = "0.1.0" +edition = "2021" + +[dependencies] +zisk-sdk = { workspace = true } +anyhow = "1.0" +serde = { version = "1.0", default-features = false, features = ["derive"] } +sha2 = "0.10.8" + +[build-dependencies] +zisk-sdk = { workspace = true } + +[features] +default = [] +packed = [] +gpu = ["zisk-sdk/gpu"] \ No newline at end of file diff --git a/examples/big-program/host/build.rs b/examples/big-program/host/build.rs new file mode 100644 index 000000000..d9218b4c6 --- /dev/null +++ b/examples/big-program/host/build.rs @@ -0,0 +1,45 @@ +use std::path::PathBuf; +use zisk_sdk::{build_program, ZiskStdin}; + +fn main() { + build_program("../guest"); + + // Read input size from environment variable (in MB), default to 250MB + let size_mb: usize = + std::env::var("INPUT_SIZE_MB").ok().and_then(|s| s.parse().ok()).unwrap_or(250); + + // Make the size available to main.rs at compile time + println!("cargo:rustc-env=INPUT_SIZE_MB={}", size_mb); + println!("cargo:rerun-if-env-changed=INPUT_SIZE_MB"); + + // Calculate number of u64 values + // 1MB = 1,048,576 bytes = 131,072 u64 values + const BYTES_PER_MB: usize = 1024 * 1024; + const NUM_U64_PER_MB: usize = BYTES_PER_MB / 8; + let num_u64 = size_mb * NUM_U64_PER_MB; + + println!("Generating {} u64 values (~{}MB of data)...", num_u64, size_mb); + + let mut data = Vec::with_capacity(num_u64); + for i in 0..num_u64 { + // Generate pseudo-random but deterministic data + data.push((i as u64).wrapping_mul(1103515245).wrapping_add(12345)); + } + + println!("Writing input data..."); + let stdin_save = ZiskStdin::new(); + stdin_save.write(&data); + + // Save to file + let path = PathBuf::from(format!("tmp/big_program_input_{}mb.bin", size_mb)); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + stdin_save.save(&path).unwrap(); + + println!("Input data saved to: {}", path.display()); + let file_size_mb = std::fs::metadata(&path).unwrap().len() / 1024 / 1024; + println!("File size: {}MB", file_size_mb); + println!("\nConfigured size: {}MB", size_mb); + println!("To change: INPUT_SIZE_MB=512 cargo build --release"); +} diff --git a/examples/big-program/host/src/main.rs b/examples/big-program/host/src/main.rs new file mode 100644 index 000000000..ea0f12e21 --- /dev/null +++ b/examples/big-program/host/src/main.rs @@ -0,0 +1,46 @@ +use anyhow::Result; +use std::path::PathBuf; +use zisk_sdk::{include_elf, ElfBinary, ProverClient, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("big-program-guest"); + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client..."); + + // Read the input size that was configured during build + let size_mb: usize = env!("INPUT_SIZE_MB").parse().unwrap(); + + // Use CARGO_MANIFEST_DIR to get absolute path to the crate directory + let manifest_dir = env!("CARGO_MANIFEST_DIR"); + let input_path = + PathBuf::from(manifest_dir).join(format!("tmp/big_program_input_{}mb.bin", size_mb)); + println!("Loading input from: {} ({}MB)", input_path.display(), size_mb); + + let stdin = ZiskStdin::from_file(&input_path)?; + println!("Input loaded successfully"); + + // Create a `ProverClient` method. + let client = ProverClient::builder() + .asm() + .verify_constraints() + .proving_key_path_opt(Some("/home/roger/zisk/build/provingKey".into())) + .build() + .unwrap(); + + let (pk, _vkey) = client.setup(&ELF)?; + + // Execute the program using the `ProverClient.execute` method, without generating a proof. + let result = client.execute(&pk, stdin.clone())?; + + println!( + "ZisK has executed program with {} cycles in {:?}", + result.executor_summary.steps, result.total_duration + ); + + println!("Verifying constraints (no proof generation)..."); + client.verify_constraints(&pk, stdin.clone())?; + + println!("\u{2713} VerifyConstraints completed successfully!"); + + Ok(()) +} diff --git a/examples/multiple-programs/guest/Cargo.lock b/examples/multiple-programs/guest/Cargo.lock new file mode 100644 index 000000000..a31235f0c --- /dev/null +++ b/examples/multiple-programs/guest/Cargo.lock @@ -0,0 +1,293 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "generic-array" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lib-c" +version = "0.13.1" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "sha-hasher-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "sha2", + "ziskos", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ziskos" +version = "0.13.1" +dependencies = [ + "cfg-if", + "getrandom", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "rand", + "static_assertions", + "tiny-keccak", +] diff --git a/examples/multiple-programs/guest/Cargo.toml b/examples/multiple-programs/guest/Cargo.toml new file mode 100644 index 000000000..b9fc0e0f4 --- /dev/null +++ b/examples/multiple-programs/guest/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "multiple-program-guest" +version = "0.1.0" +edition = "2021" + +[dependencies] +byteorder = "1.5.0" +ziskos = { path = "../../../ziskos/entrypoint" } diff --git a/examples/multiple-programs/guest/src/main.rs b/examples/multiple-programs/guest/src/main.rs new file mode 100644 index 000000000..ed657d058 --- /dev/null +++ b/examples/multiple-programs/guest/src/main.rs @@ -0,0 +1,26 @@ +// This example program takes a number `n` as input and computes the SHA-256 hash `n` times sequentially. + +// Mark the main function as the entry point for ZisK +#![no_main] +ziskos::entrypoint!(main); + +fn main() { + // Read the input data + let n: u32 = ziskos::io::read(); + + let module = 233; + + ziskos::io::commit(&n); + ziskos::io::commit(&module); + + let mut a = 0; + let mut b = 1; + for _ in 0..n { + let mut c = a + b; + c %= module; + a = b; + b = c; + } + + ziskos::io::commit(&b); +} diff --git a/examples/multiple-programs/guest_2/Cargo.lock b/examples/multiple-programs/guest_2/Cargo.lock new file mode 100644 index 000000000..a31235f0c --- /dev/null +++ b/examples/multiple-programs/guest_2/Cargo.lock @@ -0,0 +1,293 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "generic-array" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lib-c" +version = "0.13.1" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "sha-hasher-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "sha2", + "ziskos", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ziskos" +version = "0.13.1" +dependencies = [ + "cfg-if", + "getrandom", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "rand", + "static_assertions", + "tiny-keccak", +] diff --git a/examples/multiple-programs/guest_2/Cargo.toml b/examples/multiple-programs/guest_2/Cargo.toml new file mode 100644 index 000000000..e0d7a89e2 --- /dev/null +++ b/examples/multiple-programs/guest_2/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "multiple-program-guest-2" +version = "0.1.0" +edition = "2021" + +[dependencies] +byteorder = "1.5.0" +ziskos = { path = "../../../ziskos/entrypoint" } diff --git a/examples/multiple-programs/guest_2/src/main.rs b/examples/multiple-programs/guest_2/src/main.rs new file mode 100644 index 000000000..5bc024070 --- /dev/null +++ b/examples/multiple-programs/guest_2/src/main.rs @@ -0,0 +1,26 @@ +// This example program takes a number `n` as input and computes the SHA-256 hash `n` times sequentially. + +// Mark the main function as the entry point for ZisK +#![no_main] +ziskos::entrypoint!(main); + +fn main() { + // Read the input data + let n: u32 = ziskos::io::read(); + + let module = 253; + + ziskos::io::commit(&n); + ziskos::io::commit(&module); + + let mut a = 0; + let mut b = 1; + for _ in 0..n { + let mut c = a + b; + c %= module; + a = b; + b = c; + } + + ziskos::io::commit(&b); +} diff --git a/examples/multiple-programs/host/Cargo.toml b/examples/multiple-programs/host/Cargo.toml new file mode 100644 index 000000000..98db47fda --- /dev/null +++ b/examples/multiple-programs/host/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "multiple-program-host" +version = "0.1.0" +edition = "2021" + +[dependencies] +zisk-sdk = { workspace = true } +anyhow = "1.0" +serde = { version = "1.0", default-features = false, features = ["derive"] } +sha2 = "0.10.8" + +[build-dependencies] +zisk-sdk = { workspace = true } + +[features] +default = [] +packed = ["zisk-sdk/packed"] +gpu = ["zisk-sdk/gpu", "packed"] \ No newline at end of file diff --git a/examples/multiple-programs/host/build.rs b/examples/multiple-programs/host/build.rs new file mode 100644 index 000000000..9a7c14db5 --- /dev/null +++ b/examples/multiple-programs/host/build.rs @@ -0,0 +1,4 @@ +fn main() { + zisk_sdk::build_program("../guest"); + zisk_sdk::build_program("../guest_2"); +} diff --git a/examples/multiple-programs/host/src/main.rs b/examples/multiple-programs/host/src/main.rs new file mode 100644 index 000000000..de8493274 --- /dev/null +++ b/examples/multiple-programs/host/src/main.rs @@ -0,0 +1,69 @@ +use anyhow::Result; +use zisk_sdk::{include_elf, ElfBinary, ProofOpts, ProverClient, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("multiple-program-guest"); +pub const ELF2: ElfBinary = include_elf!("multiple-program-guest-2"); + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client...\n"); + + // Create an input stream and write '1000' to it. + let n = 1000u32; + let stdin = ZiskStdin::new(); + stdin.write(&n); + + // Create a `ProverClient` method. + let client = ProverClient::builder().build().unwrap(); + + println!("Setting up first program..."); + let (pk, vkey) = client.setup(&ELF)?; + + println!("Setting up second program..."); + let (pk2, vkey2) = client.setup(&ELF2)?; + + // Execute the program using the `ProverClient.execute` method, without generating a proof. + println!("Executing first program..."); + let result = client.execute(&pk, stdin.clone())?; + + println!( + "Program executed successfully: {} cycles in {:.2?}", + result.get_execution_steps(), + result.get_duration() + ); + + println!("Generating proof for first program..."); + let proof_opts = ProofOpts::default().minimal_memory(); + let vadcop_result = client.prove(&pk, stdin).with_proof_options(proof_opts).run()?; + + println!("Verifying proof..."); + client.verify(vadcop_result.get_proof(), vadcop_result.get_publics(), &vkey)?; + + println!("Successfully generated and verified proof for first program!\n"); + + let n = 2000u32; + let stdin2 = ZiskStdin::new(); + stdin2.write(&n); + + // Execute the program using the `ProverClient.execute` method, without generating a proof. + println!("Executing second program..."); + let result2 = client.execute(&pk2, stdin2.clone())?; + + println!( + "Program executed successfully: {} cycles in {:.2?}", + result2.get_execution_steps(), + result2.get_duration() + ); + + println!("Generating proof for second program..."); + let proof_opts = ProofOpts::default().minimal_memory(); + let vadcop_result2 = client.prove(&pk2, stdin2).with_proof_options(proof_opts).run()?; + + println!("Verifying proof..."); + client.verify(vadcop_result2.get_proof(), vadcop_result2.get_publics(), &vkey2)?; + + println!("Successfully generated and verified proof for second program!\n"); + + println!("All proofs generated and verified successfully!"); + + Ok(()) +} diff --git a/examples/sha-hasher/guest/Cargo.lock b/examples/sha-hasher/guest/Cargo.lock new file mode 100644 index 000000000..a31235f0c --- /dev/null +++ b/examples/sha-hasher/guest/Cargo.lock @@ -0,0 +1,293 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "generic-array" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lib-c" +version = "0.13.1" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "sha-hasher-guest" +version = "0.1.0" +dependencies = [ + "byteorder", + "sha2", + "ziskos", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ziskos" +version = "0.13.1" +dependencies = [ + "cfg-if", + "getrandom", + "lazy_static", + "lib-c", + "num-bigint", + "num-integer", + "num-traits", + "rand", + "static_assertions", + "tiny-keccak", +] diff --git a/examples/sha-hasher/guest/Cargo.toml b/examples/sha-hasher/guest/Cargo.toml new file mode 100644 index 000000000..5c7540ff9 --- /dev/null +++ b/examples/sha-hasher/guest/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "sha-hasher-guest" +version = "0.1.0" +edition = "2021" + +[dependencies] +byteorder = "1.5.0" +sha2 = "0.10.8" +serde = { version = "1.0", default-features = false, features = ["derive"] } +ziskos = { path = "../../../ziskos/entrypoint" } diff --git a/examples/sha-hasher/guest/src/main.rs b/examples/sha-hasher/guest/src/main.rs new file mode 100644 index 000000000..0dba31050 --- /dev/null +++ b/examples/sha-hasher/guest/src/main.rs @@ -0,0 +1,37 @@ +// This example program takes a number `n` as input and computes the SHA-256 hash `n` times sequentially. + +// Mark the main function as the entry point for ZisK +#![no_main] +ziskos::entrypoint!(main); + +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +#[derive(Serialize, Deserialize, Debug)] +struct Output { + hash: [u8; 32], + iterations: u32, + magic_number: u32, +} + +fn main() { + // Read the input data + let n: u32 = ziskos::io::read(); + + let mut hash = [0u8; 32]; + + // Compute SHA-256 hashing 'n' times + for _ in 0..n { + let mut hasher = Sha256::new(); + hasher.update(hash); + let digest = &hasher.finalize(); + hash = Into::<[u8; 32]>::into(*digest); + } + + let output = Output { hash, iterations: n, magic_number: 0xDEADBEEF }; + + println!("Computed hash: {:02x?}", output.hash); + println!("Iterations: {}", output.iterations); + + ziskos::io::commit(&output); +} diff --git a/examples/sha-hasher/host/Cargo.toml b/examples/sha-hasher/host/Cargo.toml new file mode 100644 index 000000000..b3e85874f --- /dev/null +++ b/examples/sha-hasher/host/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "sha-hasher-host" +version = "0.1.0" +edition = "2021" + +[dependencies] +zisk-sdk = { workspace = true } +anyhow = "1.0" +serde = { version = "1.0", default-features = false, features = ["derive"] } +sha2 = "0.10.8" + +[build-dependencies] +zisk-sdk = { workspace = true } + +[features] +default = [] +packed = [] +gpu = ["zisk-sdk/gpu"] + +[[bin]] +name = "prove" +path = "bin/prove.rs" + +[[bin]] +name = "execute" +path = "bin/execute.rs" + +[[bin]] +name = "plonk" +path = "bin/plonk.rs" + +[[bin]] +name = "compressed" +path = "bin/compressed.rs" + +[[bin]] +name = "verify-constraints" +path = "bin/verify-constraints.rs" + +[[bin]] +name = "ziskemu" +path = "bin/ziskemu.rs" \ No newline at end of file diff --git a/examples/sha-hasher/host/bin/compressed.rs b/examples/sha-hasher/host/bin/compressed.rs new file mode 100644 index 000000000..9d5db0ccb --- /dev/null +++ b/examples/sha-hasher/host/bin/compressed.rs @@ -0,0 +1,46 @@ +use anyhow::Result; +use zisk_sdk::{include_elf, ElfBinary, ProofOpts, ProverClient, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("sha-hasher-guest"); + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client (Compressed proof mode)..."); + + // Create an input stream and write '1000' to it. + let n = 1000u32; + let stdin = ZiskStdin::new(); + stdin.write(&n); + println!("Input prepared: {} iterations", n); + + // Create a `ProverClient` method. + println!("Building prover client..."); + let client = ProverClient::builder().build().unwrap(); + + println!("Setting up program..."); + let (pk, vkey) = client.setup(&ELF)?; + println!("Setup completed successfully"); + + println!("Generating Vadcop proof..."); + let proof_opts = ProofOpts::default().minimal_memory(); + let vadcop_result = client.prove(&pk, stdin).with_proof_options(proof_opts).run()?; + println!("Vadcop proof generated in {:?}", vadcop_result.get_duration()); + + println!("Compressing proof (this may take a while)..."); + let compressed_result = + client.compress(vadcop_result.get_proof(), vadcop_result.get_publics(), &vkey)?; + + // Alternatively, you can also call `compressed()` on the `ProverClient.prove` method to generate a compressed proof directly. + // let result = client.prove(&pk, stdin).with_proof_options(proof_opts).compressed().run()?; + + println!("Verifying compressed proof..."); + client.verify( + compressed_result.get_proof(), + compressed_result.get_publics(), + compressed_result.get_program_vk(), + )?; + println!("Compressed proof verification successful!"); + + println!("\u{2713} Successfully generated and verified compressed proof!"); + + Ok(()) +} diff --git a/examples/sha-hasher/host/bin/execute.rs b/examples/sha-hasher/host/bin/execute.rs new file mode 100644 index 000000000..8575054e1 --- /dev/null +++ b/examples/sha-hasher/host/bin/execute.rs @@ -0,0 +1,47 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use zisk_sdk::{include_elf, ElfBinary, ProverClient, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("sha-hasher-guest"); + +#[derive(Serialize, Deserialize, Debug)] +struct Output { + hash: [u8; 32], + iterations: u32, + magic_number: u32, +} + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client..."); + + // Create an input stream and write '1000' to it. + let n = 1000u32; + let stdin = ZiskStdin::new(); + stdin.write(&n); + println!("Input prepared: {} iterations", n); + + // Create a `ProverClient` method. + println!("Building prover client..."); + let client = ProverClient::builder().emu().verify_constraints().build().unwrap(); + + println!("Setting up program..."); + let (pk, _) = client.setup(&ELF)?; + println!("Setup completed successfully"); + + // Execute the program using the `ProverClient.execute` method, without generating a proof. + println!("Executing program (no proof generation)..."); + let result = client.execute(&pk, stdin.clone())?; + + println!("\u{2713} Execution completed successfully!"); + println!("Cycles: {}", result.get_execution_steps()); + println!("Duration: {:?}", result.get_duration()); + + println!("Reading public outputs..."); + let output: Output = result.get_public_values()?; + println!("Public outputs:"); + println!(" Hash: {:02x?}", output.hash); + println!(" Iterations: {}", output.iterations); + println!(" Magic number: 0x{:08x}", output.magic_number); + + Ok(()) +} diff --git a/examples/sha-hasher/host/bin/plonk.rs b/examples/sha-hasher/host/bin/plonk.rs new file mode 100644 index 000000000..a34731218 --- /dev/null +++ b/examples/sha-hasher/host/bin/plonk.rs @@ -0,0 +1,49 @@ +use anyhow::Result; +use zisk_sdk::{include_elf, ElfBinary, ProverClient, ZiskProofWithPublicValues, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("sha-hasher-guest"); + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client (SNARK mode)..."); + + // Create an input stream and write '1000' to it. + let n = 1000u32; + let stdin = ZiskStdin::new(); + stdin.write(&n); + println!("Input prepared: {} iterations", n); + + // Create a `ProverClient` method. + println!("Building prover client with SNARK support..."); + let client = ProverClient::builder().asm().base_port(54321).snark().build().unwrap(); + + println!("Setting up program and generating verification key..."); + let (pk, vkey) = client.setup(&ELF)?; + println!("Setup completed successfully"); + + println!("Generating PLONK proof (this may take a while)..."); + let snark_proof = client.prove(&pk, stdin).plonk().run()?; + println!("PLONK proof generated successfully in {:?}", snark_proof.get_duration()); + println!("Execution steps: {}", snark_proof.get_execution_steps()); + + // Alternatively, it can also be done in two steps + // let vadcop_result = client.prove(&pk, stdin).run()?; + // let snark_proof = client.prove_snark(&vadcop_result.get_proof(), &vadcop_result.get_publics(), &vkey)?; + + println!("Verifying PLONK proof..."); + client.verify(snark_proof.get_proof(), snark_proof.get_publics(), &vkey)?; + println!("PLONK proof verification successful!"); + + println!("Saving PLONK proof to disk..."); + snark_proof.save_proof_with_publics("/tmp/sha_hasher_proof_snark_with_publics.bin")?; + println!("Proof saved to /tmp/sha_hasher_proof_snark_with_publics.bin"); + + println!("Loading and verifying saved PLONK proof..."); + let proof = ZiskProofWithPublicValues::load("/tmp/sha_hasher_proof_snark_with_publics.bin")?; + let vk = client.vk(&ELF)?; + client.verify(proof.get_proof(), proof.get_publics(), &vk)?; + println!("Saved PLONK proof verification successful!"); + + println!("\u{2713} Successfully generated and verified PLONK proof!"); + + Ok(()) +} diff --git a/examples/sha-hasher/host/bin/prove.rs b/examples/sha-hasher/host/bin/prove.rs new file mode 100644 index 000000000..d1254aa4a --- /dev/null +++ b/examples/sha-hasher/host/bin/prove.rs @@ -0,0 +1,80 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use zisk_sdk::{ + include_elf, ElfBinary, ProofOpts, ProverClient, ZiskProof, ZiskProofWithPublicValues, + ZiskPublics, ZiskStdin, +}; + +pub const ELF: ElfBinary = include_elf!("sha-hasher-guest"); + +#[derive(Serialize, Deserialize, Debug)] +struct Output { + hash: [u8; 32], + iterations: u32, + magic_number: u32, +} + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client..."); + + // Create an input stream and write '1000' to it. + let n = 1000u32; + let stdin = ZiskStdin::new(); + stdin.write(&n); + println!("Input prepared: {} iterations", n); + + // Create a `ProverClient` method. + println!("Building prover client..."); + let client = ProverClient::builder().asm().base_port(54321).build().unwrap(); + + println!("Setting up program..."); + let (pk, _) = client.setup(&ELF)?; + println!("Setup completed successfully"); + + println!("Generating proof (this may take a while)..."); + let proof_opts = ProofOpts::default().minimal_memory(); + let result = client.prove(&pk, stdin).with_proof_options(proof_opts).run()?; + println!("Proof generated successfully in {:?}", result.get_duration()); + println!("Execution steps: {}", result.get_execution_steps()); + + println!("Verifying proof..."); + client.verify(result.get_proof(), result.get_publics(), result.get_program_vk())?; + println!("Proof verification successful!"); + + println!("Saving proof to disk..."); + result.save_proof_with_publics("tmp/sha_hasher_proof_with_publics.bin")?; + result.get_proof().save("tmp/sha_hasher_proof.bin")?; + println!("Proofs saved to tmp/ directory"); + + let mut hash = [0u8; 32]; + for _ in 0..n { + let mut hasher = Sha256::new(); + hasher.update(hash); + let digest = &hasher.finalize(); + hash = Into::<[u8; 32]>::into(*digest); + } + + let output = Output { hash, iterations: n, magic_number: 0xDEADBEEF }; + println!("Expected output hash: {:02x?}", &hash[..8]); + + println!("Verifying saved proofs from disk..."); + let publics = ZiskPublics::write(&output)?; + println!("Loading proof from disk..."); + let proof = ZiskProof::load("tmp/sha_hasher_proof.bin")?; + let vk = client.vk(&ELF)?; + println!("Verifying standalone proof..."); + client.verify(&proof, &publics, &vk)?; + println!("Standalone proof verification successful!"); + + println!("Loading proof with publics from disk..."); + let proof_with_publics = + ZiskProofWithPublicValues::load("tmp/sha_hasher_proof_with_publics.bin")?; + println!("Verifying proof with publics..."); + client.verify(&proof_with_publics.proof, &proof_with_publics.publics, &vk)?; + println!("Proof with publics verification successful!"); + + println!("\u{2713} Successfully generated and verified all proofs!"); + + Ok(()) +} diff --git a/examples/sha-hasher/host/bin/verify-constraints.rs b/examples/sha-hasher/host/bin/verify-constraints.rs new file mode 100644 index 000000000..0013596b7 --- /dev/null +++ b/examples/sha-hasher/host/bin/verify-constraints.rs @@ -0,0 +1,47 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use zisk_sdk::{include_elf, ElfBinary, ProverClient, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("sha-hasher-guest"); + +#[derive(Serialize, Deserialize, Debug)] +struct Output { + hash: [u8; 32], + iterations: u32, + magic_number: u32, +} + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client..."); + + let current_dir = std::env::current_dir()?; + let stdin = + ZiskStdin::from_file(current_dir.join("sha-hasher/host/tmp/verify_constraints_input.bin"))?; + + let n: u32 = stdin.read()?; + println!("Input prepared: {} iterations", n); + + // Create a `ProverClient` method. + println!("Building prover client..."); + let client = ProverClient::builder().emu().verify_constraints().build().unwrap(); + + println!("Setting up program..."); + let (pk, _vkey) = client.setup(&ELF)?; + println!("Setup completed successfully"); + + println!("Verifying constraints (no proof generation)..."); + let result = client.verify_constraints(&pk, stdin.clone())?; + + println!("\u{2713} VerifyConstraints completed successfully!"); + println!("Cycles: {}", result.get_execution_steps()); + println!("Duration: {:?}", result.get_duration()); + + println!("Reading public outputs..."); + let output: Output = result.get_public_values()?; + println!("Public outputs:"); + println!(" Hash: {:02x?}", output.hash); + println!(" Iterations: {}", output.iterations); + println!(" Magic number: 0x{:08x}", output.magic_number); + + Ok(()) +} diff --git a/examples/sha-hasher/host/bin/ziskemu.rs b/examples/sha-hasher/host/bin/ziskemu.rs new file mode 100644 index 000000000..2d0b52d29 --- /dev/null +++ b/examples/sha-hasher/host/bin/ziskemu.rs @@ -0,0 +1,25 @@ +use anyhow::Result; +use zisk_sdk::{include_elf, ziskemu, ElfBinary, EmuOptions, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("sha-hasher-guest"); + +fn main() -> Result<()> { + let current_dir = std::env::current_dir()?; + let stdin = + ZiskStdin::from_file(current_dir.join("sha-hasher/host/tmp/verify_constraints_input.bin"))?; + + let n: u32 = stdin.read()?; + println!("Input prepared: {} iterations", n); + + println!("Running ZisK Emulator..."); + let emu_options = EmuOptions { + stats: true, + read_symbols: true, + top_roi_detail: true, + ..EmuOptions::default() + }; + ziskemu(&ELF, stdin, &emu_options)?; + println!("ZisK Emulator completed successfully!"); + + Ok(()) +} diff --git a/examples/sha-hasher/host/build.rs b/examples/sha-hasher/host/build.rs new file mode 100644 index 000000000..ecc7c594a --- /dev/null +++ b/examples/sha-hasher/host/build.rs @@ -0,0 +1,15 @@ +use std::path::PathBuf; +use zisk_sdk::{build_program, ZiskStdin}; + +fn main() { + build_program("../guest"); + let n = 1000u32; + let stdin_save = ZiskStdin::new(); + stdin_save.write(&n); + // Check if path exists, if not write + let path = PathBuf::from("tmp/verify_constraints_input.bin"); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + stdin_save.save(&path).unwrap(); +} diff --git a/examples/sha-hasher/host/src/main.rs b/examples/sha-hasher/host/src/main.rs new file mode 100644 index 000000000..bd5139592 --- /dev/null +++ b/examples/sha-hasher/host/src/main.rs @@ -0,0 +1,35 @@ +use anyhow::Result; +use zisk_sdk::{include_elf, ElfBinary, ProofOpts, ProverClient, ZiskStdin}; + +pub const ELF: ElfBinary = include_elf!("sha-hasher-guest"); + +fn main() -> Result<()> { + println!("Starting ZisK Prover Client..."); + + // Create an input stream and write '1000' to it. + let n = 1000u32; + let stdin = ZiskStdin::new(); + stdin.write(&n); + + // Create a `ProverClient` method. + let client = ProverClient::builder().asm().build().unwrap(); + + let (pk, vkey) = client.setup(&ELF)?; + + // Execute the program using the `ProverClient.execute` method, without generating a proof. + let result = client.execute(&pk, stdin.clone())?; + + println!( + "ZisK has executed program with {} cycles in {:?}", + result.get_execution_steps(), + result.get_duration() + ); + + let proof_opts = ProofOpts::default().minimal_memory(); + let vadcop_result = client.prove(&pk, stdin).with_proof_options(proof_opts).run()?; + client.verify(vadcop_result.get_proof(), vadcop_result.get_publics(), &vkey)?; + + println!("successfully generated and verified proof for the program!"); + + Ok(()) +} diff --git a/executor/Cargo.toml b/executor/Cargo.toml index 6487a4499..1bce1efde 100644 --- a/executor/Cargo.toml +++ b/executor/Cargo.toml @@ -10,7 +10,6 @@ categories = { workspace = true } [dependencies] sm-main = { workspace = true } sm-rom = { workspace = true } -rom-setup = { workspace = true } zisk-pil = { workspace = true } ziskemu = { workspace = true } zisk-core = { workspace = true } @@ -29,22 +28,47 @@ tracing = { workspace = true } itertools = { workspace = true } rayon = { workspace = true } pil-std-lib = { workspace = true } +precompiles-hints = { workspace = true } +anyhow = { workspace = true } crossbeam = "0.8.4" +precompiles-common = { workspace = true } precomp-keccakf = { workspace = true } precomp-sha256f = { workspace = true } +precomp-poseidon2 = { workspace = true } +precomp-blake2 = { workspace = true } precomp-arith-eq = { workspace = true } precomp-arith-eq-384 = { workspace = true } precomp-big-int = { workspace = true } +precomp-dma = { workspace = true } sm-arith = { workspace = true } sm-binary = { workspace = true } sm-mem = { workspace = true } sm-frequent-ops = { workspace = true } +[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] +named-sem = { workspace = true } + [features] default = [] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] -gpu = ["proofman-common/gpu", "proofman/gpu" ] -packed = ["proofman-common/packed"] +disable_distributed = [] +gpu = ["proofman-common/gpu", "proofman/gpu", "packed"] +packed = [ + "proofman/packed", + "proofman-common/packed", + "precomp-keccakf/packed", + "precomp-sha256f/packed", + "precomp-poseidon2/packed", + "precomp-arith-eq/packed", + "precomp-arith-eq-384/packed", + "precomp-big-int/packed", + "precomp-dma/packed", + "sm-arith/packed", + "sm-binary/packed", + "sm-main/packed", + "sm-mem/packed", + "sm-frequent-ops/packed", + "sm-rom/packed", +] stats = [] diff --git a/witness-computation/NEW_SM.md b/executor/NEW_SM.md similarity index 100% rename from witness-computation/NEW_SM.md rename to executor/NEW_SM.md diff --git a/executor/src/air_classifier.rs b/executor/src/air_classifier.rs new file mode 100644 index 000000000..b004c591f --- /dev/null +++ b/executor/src/air_classifier.rs @@ -0,0 +1,81 @@ +//! AIR classification helpers. +//! +//! This module provides helpers for classifying AIR types based on their IDs, +//! centralizing the scattered `*_AIR_IDS.contains()` checks throughout the executor. + +use zisk_pil::{ + INPUT_DATA_AIR_IDS, KECCAKF_AIR_IDS, MAIN_AIR_IDS, MEM_AIR_IDS, ROM_AIR_IDS, ROM_DATA_AIR_IDS, + ZISK_AIRGROUP_ID, +}; + +/// Helper for classifying AIR instances by their ID. +/// +/// Centralizes the logic for determining AIR types, replacing scattered +/// `*_AIR_IDS.contains()` checks throughout the codebase. +pub struct AirClassifier; + +impl AirClassifier { + /// Checks if the AIR ID corresponds to a main state machine. + #[inline] + pub fn is_main(air_id: usize) -> bool { + MAIN_AIR_IDS.contains(&air_id) + } + + /// Checks if the AIR ID corresponds to the ROM state machine. + #[inline] + pub fn is_rom(air_id: usize) -> bool { + air_id == ROM_AIR_IDS[0] + } + + #[inline] + pub fn is_keccakf(air_id: usize) -> bool { + air_id == KECCAKF_AIR_IDS[0] + } + + /// Checks if the plan targets the ROM instance that requires special handling. + /// + /// ROM instances need to be added to the proof context with first partition assignment. + #[inline] + pub fn is_rom_instance(airgroup_id: usize, air_id: usize) -> bool { + airgroup_id == ZISK_AIRGROUP_ID && Self::is_rom(air_id) + } + + #[inline] + pub fn is_keccakf_instance(airgroup_id: usize, air_id: usize) -> bool { + airgroup_id == ZISK_AIRGROUP_ID && Self::is_keccakf(air_id) + } + + /// Checks if the AIR ID corresponds to a memory-related state machine. + /// + /// Memory-related AIRs include MEM, ROM_DATA, and INPUT_DATA. + #[inline] + pub fn is_memory_related(air_id: usize) -> bool { + air_id == MEM_AIR_IDS[0] || air_id == ROM_DATA_AIR_IDS[0] || air_id == INPUT_DATA_AIR_IDS[0] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_main() { + for &air_id in MAIN_AIR_IDS { + assert!(AirClassifier::is_main(air_id)); + } + } + + #[test] + fn test_is_rom() { + for &air_id in ROM_AIR_IDS { + assert!(AirClassifier::is_rom(air_id)); + } + } + + #[test] + fn test_is_memory_related() { + assert!(AirClassifier::is_memory_related(MEM_AIR_IDS[0])); + assert!(AirClassifier::is_memory_related(ROM_DATA_AIR_IDS[0])); + assert!(AirClassifier::is_memory_related(INPUT_DATA_AIR_IDS[0])); + } +} diff --git a/executor/src/asm_resources.rs b/executor/src/asm_resources.rs new file mode 100644 index 000000000..b56752654 --- /dev/null +++ b/executor/src/asm_resources.rs @@ -0,0 +1,201 @@ +use std::sync::{Arc, Mutex}; + +use anyhow::Result; +use asm_runner::{AsmServices, ControlShmem, HintsShmem, InputsShmemWriter}; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +use asm_runner::{MOShMemReader, MTShMemReader, RHShMemReader}; +use precompiles_hints::{HintsProcessor, MpiBroadcastFn}; +use zisk_common::io::{StreamSink, StreamSource, ZiskStdin, ZiskStream}; + +/// Configuration for assembly resources. +#[derive(Clone)] +pub struct AsmResourcesConfig { + /// Optional baseline port to communicate with assembly microservices. + pub base_port: Option, + + /// Local rank for distributed execution. + pub local_rank: i32, + + /// Map unlocked flag. + pub unlock_mapped_memory: bool, +} + +impl std::fmt::Debug for AsmResourcesConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AsmResources") + .field("base_port", &self.base_port) + .field("local_rank", &self.local_rank) + .field("unlock_mapped_memory", &self.unlock_mapped_memory) + .finish_non_exhaustive() + } +} + +/// Encapsulates assembly-related resources including shared memory and hints stream. +#[derive(Clone)] +pub struct AsmResources { + /// Configuration for assembly resources. + config: AsmResourcesConfig, + + /// Shared memory for writing inputs to the assembly microservices. + pub inputs_shmem_writer: Arc, + + /// Pipeline for handling precompile hints. + hints_stream: Option>>>>, + + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + pub mt_shmem_reader: Arc>, + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + pub mo_shmem_reader: Arc>, + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + pub rh_shmem_reader: Arc>>, +} + +impl std::fmt::Debug for AsmResources { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AsmResources") + .field("config", &self.config) + .field("hints_stream", &self.hints_stream.is_some()) + .finish_non_exhaustive() + } +} + +impl AsmResources { + pub fn new( + local_rank: i32, + base_port: Option, + unlock_mapped_memory: bool, + verbose_mode: proofman_common::VerboseMode, + with_hints: bool, + mpi_broadcast_fn: Option, + init_rom: bool, + ) -> Result { + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + let asm_shmem_mt = MTShMemReader::new(local_rank, base_port, unlock_mapped_memory)?; + + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + let asm_shmem_mo = MOShMemReader::new(local_rank, base_port, unlock_mapped_memory)?; + + let control_writer = + Arc::new(ControlShmem::new(base_port, local_rank, unlock_mapped_memory)?); + + let config = AsmResourcesConfig { base_port, local_rank, unlock_mapped_memory }; + + let inputs_shmem_writer = Arc::new(InputsShmemWriter::new( + base_port, + local_rank, + unlock_mapped_memory, + control_writer.clone(), + )?); + + let hints_stream = if with_hints { + let active_services = + if init_rom { &AsmServices::SERVICES[..] } else { &AsmServices::SERVICES[..2] }; + + let hints_shmem = Arc::new(HintsShmem::new( + base_port, + local_rank, + unlock_mapped_memory, + control_writer, + active_services, + )?); + + let mut builder = + HintsProcessor::builder(hints_shmem, Some(inputs_shmem_writer.clone())) + .enable_stats(verbose_mode != proofman_common::VerboseMode::Info); + + if let Some(broadcast_fn) = mpi_broadcast_fn { + builder = builder.with_mpi_broadcast(move |data| broadcast_fn(data)); + } + + let hints_processor = builder.build()?; + + Some(Arc::new(Mutex::new(ZiskStream::new(hints_processor)))) + } else { + None + }; + + Ok(Self { + config, + hints_stream, + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + mt_shmem_reader: Arc::new(Mutex::new(asm_shmem_mt)), + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + mo_shmem_reader: Arc::new(Mutex::new(asm_shmem_mo)), + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + rh_shmem_reader: Arc::new(Mutex::new(None)), + inputs_shmem_writer, + }) + } + + /// Returns the concrete hints processor for passing to `StreamOrderingActor`. + pub fn get_hints_processor(&self) -> Option>> { + self.hints_stream.as_ref().map(|stream| stream.lock().unwrap().get_processor()) + } + + /// Update the active ASM services for this partition. + /// + /// Call once per partition start (not per stream reset). + /// `is_first_partition` controls whether the ROM histogram service (RH) is active. + pub fn set_active_services(&self, is_first_partition: bool) -> Result<()> { + if let Some(stream) = &self.hints_stream { + let processor = stream.lock().unwrap().get_processor(); + let sink = processor.hints_sink(); + let services = if is_first_partition { + &AsmServices::SERVICES[..] + } else { + &AsmServices::SERVICES[..2] + }; + sink.set_active_services(services)?; + } + Ok(()) + } + + /// Submit hint data directly to the shmem sink, bypassing the processing pipeline. + /// + /// Used in the gRPC streaming path where hints arrive pre-processed. + pub fn submit_hint_direct(&self, data: &[u64]) -> Result<()> { + if let Some(stream) = &self.hints_stream { + let processor = stream.lock().unwrap().get_processor(); + processor.hints_sink().submit(data) + } else { + Err(anyhow::anyhow!("Hints stream not configured")) + } + } + + pub fn start_stream(&self) -> Result<()> { + if let Some(hints_stream) = &self.hints_stream { + hints_stream.lock().unwrap().start_stream() + } else { + Ok(()) + } + } + + pub fn set_hints_stream_src(&self, stream: StreamSource) -> Result<()> { + if let Some(hints_stream) = &self.hints_stream { + hints_stream.lock().unwrap().set_hints_stream_src(stream) + } else { + Err(anyhow::anyhow!("Hints stream not initialized")) + } + } + + pub fn is_hints_stream_initialized(&self) -> bool { + self.hints_stream.as_ref().map(|s| s.lock().unwrap().is_initialized()).unwrap_or(false) + } + + pub fn reset(&self) { + if let Some(hints_stream) = &self.hints_stream { + hints_stream.lock().unwrap().reset(); + } + self.inputs_shmem_writer.reset(); + } + + pub fn config(&self) -> &AsmResourcesConfig { + &self.config + } + + pub fn write_input(&self, stdin: &ZiskStdin) -> Result<()> { + let inputs = stdin.read_raw_bytes(); + + self.inputs_shmem_writer.write_input(&inputs) + } +} diff --git a/executor/src/collector.rs b/executor/src/collector.rs new file mode 100644 index 000000000..23fa30575 --- /dev/null +++ b/executor/src/collector.rs @@ -0,0 +1,345 @@ +//! Chunk collector component. +//! +//! ## Overview +//! +//! During witness computation, secondary state machines need data from +//! specific execution chunks. This component: +//! +//! 1. Determines which chunks each instance needs (via checkpoints) +//! 2. Orders chunks for optimal parallel processing +//! 3. Executes chunks and routes data to the appropriate collectors +//! +//! ## Chunk Ordering Strategy +//! +//! Uses a greedy algorithm that prioritizes completing instances that need +//! fewer remaining chunks, minimizing time-to-first-completion. + +use anyhow::Result; +use crossbeam::atomic::AtomicCell; +use data_bus::DataBusTrait; +use fields::PrimeField64; +use proofman_common::ProofCtx; +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, Mutex, + }, + time::Instant, +}; +use zisk_common::{CheckPoint, EmuTrace, Instance, Stats}; +use zisk_core::ZiskRom; +use ziskemu::ZiskEmulator; + +use crate::{state::ChunkCollector, ExecutionState, StaticSMBundle}; + +pub struct ChunkDataCollector { + /// State machine bundle for building data buses. + sm_bundle: Arc>, +} + +impl ChunkDataCollector { + /// Creates a new `ChunkDataCollector`. + /// + /// # Arguments + /// * `sm_bundle` - State machine bundle. + pub fn new(sm_bundle: Arc>) -> Self { + Self { sm_bundle } + } + + pub fn set_rom(&self, zisk_rom: Arc) { + self.sm_bundle.set_rom(zisk_rom); + } + + /// Computes which chunks need to be executed for each instance. + /// + /// # Arguments + /// * `min_traces` - Minimal traces from execution. + /// * `secn_instances` - Map of global ID to secondary instances. + /// + /// # Returns + /// Tuple of (chunks_to_execute, global_id_chunks) where: + /// - chunks_to_execute[chunk_id] = list of global_ids that need this chunk + /// - global_id_chunks[global_id] = list of chunk_ids this instance needs + #[allow(clippy::borrowed_box)] + pub fn compute_chunks_to_execute( + &self, + min_traces: &[EmuTrace], + secn_instances: &HashMap>>, + ) -> (Vec>, HashMap>) { + let mut chunks_to_execute = vec![Vec::new(); min_traces.len()]; + let mut global_id_chunks: HashMap> = HashMap::new(); + + secn_instances.iter().for_each(|(global_idx, secn_instance)| { + match secn_instance.check_point() { + CheckPoint::None => {} + CheckPoint::Single(chunk_id) => { + chunks_to_execute[chunk_id.as_usize()].push(*global_idx); + global_id_chunks.entry(*global_idx).or_default().push(chunk_id.as_usize()); + } + CheckPoint::Multiple(chunk_ids) => { + chunk_ids.iter().for_each(|&chunk_id| { + chunks_to_execute[chunk_id.as_usize()].push(*global_idx); + global_id_chunks.entry(*global_idx).or_default().push(chunk_id.as_usize()); + }); + } + } + }); + + for chunk_ids in global_id_chunks.values_mut() { + chunk_ids.sort(); + } + + (chunks_to_execute, global_id_chunks) + } + + /// Orders chunks for optimal processing. + /// + /// Uses a greedy algorithm to minimize the time until any instance + /// has all its chunks collected. + /// + /// # Arguments + /// * `chunks_to_execute` - Which instances need each chunk. + /// * `global_id_chunks` - Which chunks each instance needs. + /// + /// # Returns + /// Ordered list of chunk IDs to process. + pub fn order_chunks( + &self, + chunks_to_execute: &[Vec], + global_id_chunks: &HashMap>, + ) -> Vec { + let mut ordered_chunks = Vec::new(); + let mut already_selected_chunks = vec![false; chunks_to_execute.len()]; + + let mut n_global_ids_incompleted = global_id_chunks.len(); + let mut n_chunks_by_global_id: HashMap = + global_id_chunks.iter().map(|(global_id, chunks)| (*global_id, chunks.len())).collect(); + + while n_global_ids_incompleted > 0 { + let selected_global_id = n_chunks_by_global_id + .iter() + .filter(|(_, &count)| count > 0) + .min_by_key(|(_, &count)| count) + .map(|(&global_id, _)| global_id); + + if let Some(global_id) = selected_global_id { + for chunk_id in global_id_chunks[&global_id].iter() { + if already_selected_chunks[*chunk_id] { + continue; + } + ordered_chunks.push(*chunk_id); + already_selected_chunks[*chunk_id] = true; + for global_idx in chunks_to_execute[*chunk_id].iter() { + if let Some(count) = n_chunks_by_global_id.get_mut(global_idx) { + *count -= 1; + if *count == 0 { + n_chunks_by_global_id.remove(global_idx); + n_global_ids_incompleted -= 1; + } + } + } + } + } else { + break; + } + } + + ordered_chunks + } + + /// Collects chunk data for a single secondary instance. + /// + /// Convenience method that wraps `collect()` for single-instance collection. + /// Avoids the caller needing to create a HashMap for one instance. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `state` - Execution state for storing collectors. + /// * `global_id` - Global ID of the instance. + /// * `instance` - The secondary instance to collect for. + #[allow(clippy::borrowed_box)] + pub fn collect_single( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + global_id: usize, + instance: &Box>, + ) -> Result<()> { + let mut map = HashMap::with_capacity(1); + map.insert(global_id, instance); + self.collect(pctx, state, map)?; + Ok(()) + } + + /// Collects chunk data for the given secondary instances. + /// + /// Processes chunks in parallel, collecting data into the execution state's + /// collectors_by_instance map. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `state` - Execution state for storing collectors. + /// * `secn_instances` - Map of global ID to secondary instances. + #[allow(clippy::borrowed_box)] + pub fn collect( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + secn_instances: HashMap>>, + ) -> Result<()> { + let min_traces_guard = state.min_traces.read().unwrap(); + let min_traces = min_traces_guard.as_ref().expect("min_traces should not be None"); + + // Compute chunks to execute + let (chunks_to_execute, global_id_chunks) = + self.compute_chunks_to_execute(min_traces, &secn_instances); + + let ordered_chunks = self.order_chunks(&chunks_to_execute, &global_id_chunks); + let global_ids: Vec = secn_instances.keys().copied().collect(); + + let collect_start_times: Vec>> = + global_ids.iter().map(|_| AtomicCell::new(None)).collect(); + + let global_ids_map: HashMap = + global_ids.iter().enumerate().map(|(idx, &id)| (id, idx)).collect(); + + // Create data buses for each chunk + let data_buses = self + .sm_bundle + .build_data_bus_collectors(pctx, &secn_instances, &chunks_to_execute) + .into_iter() + .map(Mutex::new) + .collect::>(); + + let n_chunks_left: Vec = global_ids + .iter() + .map(|global_id| AtomicUsize::new(global_id_chunks[global_id].len())) + .collect(); + + // Initialize collectors and stats + for global_id in global_ids.iter() { + let (airgroup_id, air_id) = + pctx.dctx_get_instance_info(*global_id).expect("Failed to get instance info"); + let stats = Stats::new_pending_collection( + airgroup_id, + air_id, + global_id_chunks[global_id].len(), + ); + + state + .collectors_by_instance + .write() + .unwrap() + .insert(*global_id, (0..global_id_chunks[global_id].len()).map(|_| None).collect()); + state.stats.insert_witness_stats(*global_id, stats); + } + + let next_chunk = AtomicUsize::new(0); + let zisk_rom = state.get_rom()?; + + rayon::in_place_scope(|scope| { + for _ in 0..rayon::current_num_threads() { + let next_chunk = &next_chunk; + let n_chunks_left = &n_chunks_left; + let collectors_by_instance = &state.collectors_by_instance; + let collect_start_times = &collect_start_times; + let stats = &state.stats; + let min_traces = &min_traces; + let data_buses = &data_buses; + let zisk_rom = &zisk_rom; + let global_ids_map = &global_ids_map; + let global_id_chunks = &global_id_chunks; + let ordered_chunks = &ordered_chunks; + let chunks_to_execute = &chunks_to_execute; + let pctx = &pctx; + + scope.spawn(move |_| loop { + let next_chunk_id = next_chunk.fetch_add(1, Ordering::Relaxed); + if next_chunk_id >= ordered_chunks.len() { + break; + } + let chunk_id = ordered_chunks[next_chunk_id]; + + if let Some(mut data_bus) = data_buses[chunk_id].lock().unwrap().take() { + for global_id in chunks_to_execute[chunk_id].iter() { + let start_time_cell = &collect_start_times[global_ids_map[global_id]]; + if start_time_cell.load().is_none() { + start_time_cell.store(Some(Instant::now())); + } + } + + ZiskEmulator::process_emu_traces::( + zisk_rom, + min_traces, + chunk_id, + &mut data_bus, + ); + + // Collect all device results locally + let devices = data_bus.into_devices(false); + let mut entries: Vec<(usize, usize, Option)> = Vec::new(); + let mut affected_globals: Vec<(usize, usize)> = Vec::new(); + + for (global_id, collector) in devices { + if let Some(global_id) = global_id { + let global_id_idx = *global_ids_map + .get(&global_id) + .expect("Global ID not found in map"); + + let chunk_order = &global_id_chunks[&global_id]; + let position = chunk_order + .iter() + .position(|&id| id == chunk_id) + .expect("Chunk ID not found in order"); + + entries.push(( + global_id, + position, + Some((chunk_id, collector.unwrap())), + )); + affected_globals.push((global_id, global_id_idx)); + } + } + + // Single write-lock acquisition + { + let mut guard = collectors_by_instance.write().unwrap(); + for (global_id, position, entry) in entries.iter_mut() { + guard.get_mut(global_id).unwrap()[*position] = entry.take(); + } + } + + // Update atomic counters and mark ready instances + for (global_id, global_id_idx) in affected_globals { + if n_chunks_left[global_id_idx].fetch_sub(1, Ordering::SeqCst) == 1 { + pctx.set_witness_ready(global_id, true); + + let collect_start_time = collect_start_times[global_id_idx] + .load() + .expect("Collect start time was not set"); + let collect_duration = + collect_start_time.elapsed().as_millis() as u64; + + let (airgroup_id, air_id) = pctx + .dctx_get_instance_info(global_id) + .expect("Failed to get instance info"); + let new_stats = Stats::new_with_collection( + airgroup_id, + air_id, + global_id_chunks[&global_id].len(), + collect_start_time, + collect_duration, + ); + + stats.insert_witness_stats(global_id, new_stats); + } + } + } + }); + } + }); + + Ok(()) + } +} diff --git a/executor/src/dummy_counter.rs b/executor/src/dummy_counter.rs index ea3c3400b..4be2099a5 100644 --- a/executor/src/dummy_counter.rs +++ b/executor/src/dummy_counter.rs @@ -3,9 +3,9 @@ //! This counter is used as a default implementation when no actual counting or metrics //! collection is required. -use std::{any::Any, collections::VecDeque}; +use std::any::Any; -use zisk_common::{BusDevice, BusId, MemCollectorInfo, Metrics}; +use zisk_common::{BusDevice, Metrics}; /// The `DummyCounter` struct serves as a placeholder counter that performs no actions /// when connected to the data bus. @@ -15,6 +15,12 @@ use zisk_common::{BusDevice, BusId, MemCollectorInfo, Metrics}; #[derive(Default)] pub struct DummyCounter {} +impl DummyCounter { + #[inline(always)] + pub fn process_data(&mut self) -> bool { + true + } +} impl Metrics for DummyCounter { /// Does nothing when tracking activity on the bus. /// @@ -36,25 +42,6 @@ impl Metrics for DummyCounter { } impl BusDevice for DummyCounter { - #[inline(always)] - fn process_data( - &mut self, - _bus_id: &BusId, - _data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - true - } - - /// Returns an empty vector as this counter is not associated with any bus IDs. - /// - /// # Returns - /// An empty vector of bus IDs. - fn bus_id(&self) -> Vec { - vec![] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/executor/src/emu_asm.rs b/executor/src/emu_asm.rs new file mode 100644 index 000000000..2d003517c --- /dev/null +++ b/executor/src/emu_asm.rs @@ -0,0 +1,313 @@ +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, + thread::JoinHandle, +}; + +use crate::AsmResources; +use crate::{ + DeviceMetricsList, DummyCounter, NestedDeviceMetricsList, StaticSMBundle, MAX_NUM_STEPS, +}; +use asm_runner::{AsmRunnerMO, AsmRunnerMT, AsmRunnerRH}; +use data_bus::DataBusTrait; +use fields::PrimeField64; +use proofman_common::ProofCtx; +use sm_rom::RomSM; +use zisk_common::{ + io::ZiskStdin, stats_begin, stats_end, AsmExecutionInfo, ChunkId, EmuTrace, + ExecutorStatsHandle, StatsScope, +}; +use zisk_core::ZiskRom; +use ziskemu::ZiskEmulator; + +use anyhow::Result; + +pub struct EmulatorAsm { + /// World rank for distributed execution. Default to 0 for single-node execution. + world_rank: i32, + + /// Local rank for distributed execution. Default to 0 for single-node execution. + local_rank: i32, + + /// Map unlocked flag + /// This is used to unlock the memory map for the ROM file. + unlock_mapped_memory: bool, + + /// Chunk size for processing. + chunk_size: u64, + + /// Optional ROM state machine, used for assembly ROM execution. + rom_sm: Option>, + + /// Assembly resources including shared memory and hints stream. + asm_resources: Mutex>, + + asm_execution_info: Mutex>, +} + +impl EmulatorAsm { + #[allow(clippy::too_many_arguments)] + pub fn new( + world_rank: i32, + local_rank: i32, + unlock_mapped_memory: bool, + chunk_size: u64, + rom_sm: Option>, + _verbose_mode: proofman_common::VerboseMode, + ) -> Self { + Self { + world_rank, + local_rank, + unlock_mapped_memory, + chunk_size, + rom_sm, + asm_resources: Mutex::new(None), + asm_execution_info: Mutex::new(None), + } + } + + pub fn get_chunk_size(&self) -> u64 { + self.chunk_size + } + + pub fn get_asm_execution_info(&self) -> Option { + self.asm_execution_info.lock().unwrap().clone() + } + + pub fn set_asm_resources(&self, asm_resources: AsmResources) { + *self.asm_resources.lock().unwrap() = Some(asm_resources); + } + + pub fn reset_hints_stream(&self) { + self.asm_resources.lock().unwrap().as_ref().unwrap().reset(); + } + + pub fn set_rh_data(&self, rh_data: AsmRunnerRH) { + self.rom_sm.as_ref().unwrap().set_rh_data(rh_data); + } + + /// Computes minimal traces by processing the ZisK ROM with given public inputs. + /// + /// # Arguments + /// * `stdin` - Shared mutable access to the ZiskStdin providing public inputs. + /// * `pctx` - Proof context used during execution. + /// * `sm_bundle` - Static shared-memory bundle used by the executor. + /// * `stats` - Handle for collecting executor statistics. + /// * `_caller_stats_id` - Identifier used to attribute collected statistics to the caller. + /// + /// # Returns + /// A tuple containing: + /// * `Vec` - The computed minimal traces. + /// * `DeviceMetricsList` - Flat device metrics collected during execution. + /// * `NestedDeviceMetricsList` - Hierarchical device metrics collected during execution. + /// * `Option>` - Optional join handle for the memory-only ASM runner. + /// * `u64` - Total number of steps. + #[allow(clippy::type_complexity)] + #[allow(clippy::too_many_arguments)] + pub fn execute( + &self, + zisk_rom: &ZiskRom, + stdin: &Mutex, + pctx: &ProofCtx, + sm_bundle: &StaticSMBundle, + use_hints: bool, + stats: &ExecutorStatsHandle, + _caller_stats_scope: &StatsScope, + ) -> Result<( + Vec, + DeviceMetricsList, + NestedDeviceMetricsList, + Option>, + Option>, + u64, + )> { + let asm_resources_guard = self.asm_resources.lock().unwrap(); + let asm_resources = asm_resources_guard + .as_ref() + .ok_or_else(|| anyhow::anyhow!("AsmResources not initialized"))?; + + let has_hints_stream = asm_resources.is_hints_stream_initialized(); + if use_hints && has_hints_stream { + asm_resources.start_stream()?; + } + + stats_begin!(stats, _caller_stats_scope, _exec_scope, "EXECUTE_WITH_ASSEMBLY", 0); + + stats_begin!(stats, &_exec_scope, _write_scope, "ASM_WRITE_INPUT", 0); + + let config = asm_resources.config(); + + asm_resources.write_input(&stdin.lock().unwrap())?; + + stats_end!(stats, &_write_scope); + + let chunk_size = self.chunk_size; + let (world_rank, local_rank) = (self.world_rank, self.local_rank); + + let _stats = stats.clone(); + + // Run the assembly Memory Operations (MO) runner thread + let handle_mo = std::thread::spawn({ + let asm_shmem_mo = asm_resources.mo_shmem_reader.clone(); + let base_port = config.base_port; + move || { + AsmRunnerMO::run( + &mut asm_shmem_mo.lock().unwrap(), + MAX_NUM_STEPS, + chunk_size, + world_rank, + local_rank, + base_port, + _stats, + ) + .expect("Error during Assembly Memory Operations execution") + } + }); + + // Run the ROM histogram only on partition 0 as it is always computed by this partition + let has_rom_sm = pctx.dctx_is_first_process(); + + let _stats = stats.clone(); + + let handle_rh = (has_rom_sm).then(|| { + let asm_shmem_rh = asm_resources.rh_shmem_reader.clone(); + let base_port = config.base_port; + let unlock_mapped_memory = self.unlock_mapped_memory; + std::thread::spawn(move || { + AsmRunnerRH::run( + &mut asm_shmem_rh.lock().unwrap(), + MAX_NUM_STEPS, + world_rank, + local_rank, + base_port, + unlock_mapped_memory, + _stats, + ) + .expect("Error during ROM Histogram execution") + }) + }); + drop(asm_resources_guard); + + let (min_traces, main_count, secn_count) = self.run_mt_assembly(zisk_rom, sm_bundle, stats); + // Store execute steps + let steps = min_traces.iter().map(|trace| trace.steps).sum::(); + + stats_end!(stats, &_exec_scope); + + Ok((min_traces, main_count, secn_count, Some(handle_mo), handle_rh, steps)) + } + + fn run_mt_assembly( + &self, + zisk_rom: &ZiskRom, + sm_bundle: &StaticSMBundle, + stats: &ExecutorStatsHandle, + ) -> (Vec, DeviceMetricsList, NestedDeviceMetricsList) { + stats_begin!(stats, 0, _mt_scope, "RUN_MT_ASSEMBLY", 0); + + let results_mu: Mutex> = Mutex::new(Vec::new()); + + // Capture the parent scope ID so it can be copied into the closure + #[allow(unused_variables)] + let mt_scope_id = _mt_scope.id(); + + let (emu_traces, asm_execution_info) = rayon::in_place_scope(|scope| { + let on_chunk = |idx: usize, emu_trace: std::sync::Arc| { + let chunk_id = ChunkId(idx); + let results_ref = &results_mu; + scope.spawn(move |_| { + stats_begin!(stats, mt_scope_id, _chunk_scope, "MT_CHUNK_PLAYER", 0); + + let mut data_bus = sm_bundle.build_data_bus_counters(); + + ZiskEmulator::process_emu_trace::( + zisk_rom, + &emu_trace, + &mut data_bus, + false, + ); + + data_bus.on_close(); + + stats_end!(stats, &_chunk_scope); + + results_ref.lock().unwrap().push((chunk_id, data_bus)); + }); + }; + + let asm_resources_guard = self.asm_resources.lock().unwrap(); + let asm_resources = asm_resources_guard.as_ref().expect("AsmResources not initialized"); + let result = AsmRunnerMT::run_and_count( + &mut asm_resources.mt_shmem_reader.lock().unwrap(), + MAX_NUM_STEPS, + self.chunk_size, + on_chunk, + self.world_rank, + self.local_rank, + asm_resources.config().base_port, + stats.clone(), + ) + .expect("Error during ASM execution"); + drop(asm_resources_guard); + result + }); + + self.asm_execution_info.lock().unwrap().replace(asm_execution_info); + + // Unwrap the Arc pointers now that all rayon tasks have completed + let emu_traces = emu_traces + .into_iter() + .map(|arc| Arc::try_unwrap(arc).expect("Arc should have single owner after scope")) + .collect(); + + let mut data_buses = results_mu.into_inner().unwrap(); + + data_buses.sort_by_key(|(chunk_id, _)| chunk_id.0); + + let mut main_count = Vec::with_capacity(data_buses.len()); + let mut secn_count = HashMap::new(); + + for (chunk_id, data_bus) in data_buses { + let databus_counters = data_bus.into_devices(false); + + for (idx, counter) in databus_counters.into_iter() { + match idx { + None => { + main_count.push((chunk_id, counter.unwrap_or(Box::new(DummyCounter {})))); + } + Some(idx) => { + secn_count + .entry(idx) + .or_insert_with(Vec::new) + .push((chunk_id, counter.unwrap())); + } + } + } + } + + stats_end!(stats, &_mt_scope); + (emu_traces, main_count, secn_count) + } +} + +impl crate::Emulator for EmulatorAsm { + fn execute( + &self, + zisk_rom: &ZiskRom, + stdin: &Mutex, + pctx: &ProofCtx, + sm_bundle: &StaticSMBundle, + use_hints: bool, + stats: &ExecutorStatsHandle, + caller_stats_scope: &StatsScope, + ) -> Result<( + Vec, + DeviceMetricsList, + NestedDeviceMetricsList, + Option>, + Option>, + u64, + )> { + self.execute(zisk_rom, stdin, pctx, sm_bundle, use_hints, stats, caller_stats_scope) + } +} diff --git a/executor/src/emu_asm_stub.rs b/executor/src/emu_asm_stub.rs new file mode 100644 index 000000000..aabefe8e6 --- /dev/null +++ b/executor/src/emu_asm_stub.rs @@ -0,0 +1,73 @@ +use std::{ + sync::{Arc, Mutex}, + thread::JoinHandle, +}; + +use crate::{DeviceMetricsList, NestedDeviceMetricsList, StaticSMBundle}; +use anyhow::Result; +use asm_runner::{AsmRunnerMO, AsmRunnerRH}; + +use crate::AsmResources; +use fields::PrimeField64; +use proofman_common::ProofCtx; +use sm_rom::RomSM; +use zisk_common::{io::ZiskStdin, AsmExecutionInfo, EmuTrace, ExecutorStatsHandle, StatsScope}; +use zisk_core::ZiskRom; + +pub struct EmulatorAsm {} + +impl EmulatorAsm { + #[allow(clippy::too_many_arguments)] + pub fn new( + _world_rank: i32, + _local_rank: i32, + _unlock_mapped_memory: bool, + _chunk_size: u64, + _rom_sm: Option>, + _verbose_mode: proofman_common::VerboseMode, + ) -> Self { + unimplemented!("AsmRunner is only supported on Linux x86_64 platforms."); + } + + #[allow(clippy::type_complexity)] + #[allow(clippy::too_many_arguments)] + pub fn execute( + &self, + _zisk_rom: &ZiskRom, + _stdin: &Mutex, + _pctx: &ProofCtx, + _sm_bundle: &StaticSMBundle, + _use_hints: bool, + _stats: &ExecutorStatsHandle, + _caller_stats_scope: &StatsScope, + ) -> Result<( + Vec, + DeviceMetricsList, + NestedDeviceMetricsList, + Option>, + Option>, + u64, + )> { + unimplemented!("AsmRunner is only supported on Linux x86_64 platforms."); + } + + pub fn set_asm_resources(&self, _asm_resources: AsmResources) { + unimplemented!("AsmRunner is only supported on Linux x86_64 platforms."); + } + + pub fn set_rh_data(&self, _rh_data: AsmRunnerRH) { + unimplemented!("AsmRunner is only supported on Linux x86_64 platforms."); + } + + pub fn get_chunk_size(&self) -> u64 { + unimplemented!("AsmRunner is only supported on Linux x86_64 platforms."); + } + + pub fn reset_hints_stream(&self) { + unimplemented!("AsmRunner is only supported on Linux x86_64 platforms."); + } + + pub fn get_asm_execution_info(&self) -> Option { + unimplemented!("AsmRunner is only supported on Linux x86_64 platforms."); + } +} diff --git a/executor/src/emu_rust.rs b/executor/src/emu_rust.rs new file mode 100644 index 000000000..ee848b55c --- /dev/null +++ b/executor/src/emu_rust.rs @@ -0,0 +1,170 @@ +use std::{collections::HashMap, sync::Mutex}; + +use data_bus::DataBusTrait; +use fields::PrimeField64; +use proofman_common::ProofCtx; +use proofman_util::{timer_start_info, timer_stop_and_log_info}; +use rayon::prelude::*; +use zisk_common::{io::ZiskStdin, ChunkId, EmuTrace, ExecutorStatsHandle}; +use zisk_core::ZiskRom; +use ziskemu::{EmuOptions, ZiskEmulator}; + +use crate::{ + DeviceMetricsList, DummyCounter, EmulatorResult, NestedDeviceMetricsList, StaticSMBundle, + MAX_NUM_STEPS, +}; + +use anyhow::Result; + +pub struct EmulatorRust { + /// Chunk size for processing. + chunk_size: u64, +} + +impl EmulatorRust { + /// The number of threads to use for parallel processing when computing minimal traces. + const NUM_THREADS: usize = 16; + + pub fn new(chunk_size: u64) -> Self { + Self { chunk_size } + } + + pub fn get_chunk_size(&self) -> u64 { + self.chunk_size + } + + /// Computes minimal traces by processing the ZisK ROM with the given public inputs. + /// + /// # Arguments + /// * `stdin` - Shared standard input source used to feed data into the emulator. + /// * `_pctx` - Proof context carrying field-parameterized configuration for execution. + /// * `sm_bundle` - Static state machine bundle used for counting device metrics. + /// * `_stats` - Handle to executor statistics collection. + /// * `_caller_stats_scope` - Stats scope used to associate collected statistics with the caller. + /// + /// # Returns + /// A tuple containing: + /// * `Vec` - The minimal traces produced by the emulator. + /// * `DeviceMetricsList` - Metrics for primary devices. + /// * `NestedDeviceMetricsList` - Metrics for secondary/nested devices. + /// * `None`. + /// * `u64` - Total number of steps. + #[allow(clippy::type_complexity)] + pub fn execute( + &self, + zisk_rom: &ZiskRom, + stdin: &Mutex, + sm_bundle: &StaticSMBundle, + ) -> Result { + let min_traces = self.run_emulator(zisk_rom, Self::NUM_THREADS, &stdin.lock().unwrap()); + + // Store execute steps + let steps = min_traces.iter().map(|trace| trace.steps).sum::(); + + timer_start_info!(COUNT); + let (main_count, secn_count) = self.count(zisk_rom, &min_traces, sm_bundle); + timer_stop_and_log_info!(COUNT); + + Ok((min_traces, main_count, secn_count, None, None, steps)) + } + + fn run_emulator( + &self, + zisk_rom: &ZiskRom, + num_threads: usize, + stdin: &ZiskStdin, + ) -> Vec { + // Call emulate with these options + let input_data = stdin.read_raw_bytes(); + + // Settings for the emulator + let emu_options = EmuOptions { + chunk_size: Some(self.chunk_size), + max_steps: MAX_NUM_STEPS, + ..EmuOptions::default() + }; + + ZiskEmulator::compute_minimal_traces(zisk_rom, &input_data, &emu_options, num_threads) + .expect("Error during emulator execution") + } + + /// Counts metrics for secondary state machines based on minimal traces. + /// + /// # Arguments + /// * `min_traces` - Minimal traces obtained from the ROM execution. + /// + /// # Returns + /// A tuple containing two vectors: + /// * A vector of main state machine metrics grouped by chunk ID. + /// * A vector of secondary state machine metrics grouped by chunk ID. The vector is nested, + /// with the outer vector representing the secondary state machines and the inner vector + /// containing the metrics for each chunk. + fn count( + &self, + zisk_rom: &ZiskRom, + min_traces: &[EmuTrace], + sm_bundle: &StaticSMBundle, + ) -> (DeviceMetricsList, NestedDeviceMetricsList) { + let metrics_slices: Vec<_> = min_traces + .par_iter() + .map(|minimal_trace| { + let mut data_bus = sm_bundle.build_data_bus_counters(); + + ZiskEmulator::process_emu_trace::( + zisk_rom, + minimal_trace, + &mut data_bus, + true, + ); + + let mut counters = Vec::new(); + + let databus_counters = data_bus.into_devices(true); + for counter in databus_counters.into_iter() { + counters.push(counter); + } + + counters + }) + .collect(); + + let mut main_count = Vec::new(); + let mut secn_count = HashMap::new(); + + for (chunk_id, counter_slice) in metrics_slices.into_iter().enumerate() { + for (idx, counter) in counter_slice.into_iter() { + match idx { + None => { + main_count.push(( + ChunkId(chunk_id), + counter.unwrap_or_else(|| Box::new(DummyCounter {})), + )); + } + Some(idx) => { + secn_count + .entry(idx) + .or_insert_with(Vec::new) + .push((ChunkId(chunk_id), counter.unwrap())); + } + } + } + } + + (main_count, secn_count) + } +} + +impl crate::Emulator for EmulatorRust { + fn execute( + &self, + zisk_rom: &ZiskRom, + stdin: &Mutex, + _pctx: &ProofCtx, + sm_bundle: &StaticSMBundle, + _use_hints: bool, + _stats: &ExecutorStatsHandle, + _caller_stats_scope: &zisk_common::StatsScope, + ) -> Result { + self.execute(zisk_rom, stdin, sm_bundle) + } +} diff --git a/executor/src/executor.rs b/executor/src/executor.rs index 25601ea21..e6ac901c0 100644 --- a/executor/src/executor.rs +++ b/executor/src/executor.rs @@ -1,10 +1,9 @@ //! The `ZiskExecutor` module serves as the core orchestrator for executing the ZisK ROM program -//! and generating witness computations. It manages the execution of the state machines, from initial -//! planning to witness computation, ensuring efficient parallel processing and resource -//! utilization. +//! and generating witness computations. It manages the execution of the state machines, +//! from initial planning to witness computation. //! -//! This module handles both main and secondary state machines, integrating complex tasks such as -//! planning, configuration, and witness generation into a streamlined process. +//! This module handles both main and secondary state machines, integrating tasks such as +//! planning, configuration, and witness computation. //! //! ## Executor Workflow //! The execution is divided into distinct, sequential phases: @@ -19,1369 +18,280 @@ //! By structuring these phases, the `ZiskExecutor` ensures high-performance execution while //! maintaining clarity and modularity in the computation process. -use asm_runner::{ - write_input, AsmMTHeader, AsmRunnerMO, AsmRunnerMT, AsmRunnerRH, AsmServices, AsmSharedMemory, - MinimalTraces, PreloadedMO, PreloadedMT, PreloadedRH, SharedMemoryWriter, Task, TaskFactory, +use crate::{ + state::ExecutionState, witness_orchestrator::WitnessContext, AirClassifier, AsmResources, + EmulatorKind, InstancePlanner, InstanceRegistry, RomExecutor, StaticSMBundle, + WitnessOrchestrator, }; use fields::PrimeField64; -use pil_std_lib::Std; -use proofman_common::{create_pool, BufferPool, ProofCtx, ProofmanError, ProofmanResult, SetupCtx}; +use proofman_common::{create_pool, BufferPool, ProofCtx, ProofmanResult, SetupCtx}; use proofman_util::{timer_start_info, timer_stop_and_log_info}; -use rayon::prelude::*; -use rom_setup::gen_elf_hash; -use sm_rom::{RomInstance, RomSM}; -use std::sync::atomic::{AtomicUsize, Ordering}; +use sm_main::MainSM; +use std::{ + sync::{Arc, RwLock}, + time::Instant, +}; use witness::WitnessComponent; -use zisk_common::io::{ZiskIO, ZiskStdin}; - -use crate::DummyCounter; -use data_bus::DataBusTrait; -use sm_main::{MainInstance, MainPlanner, MainSM}; use zisk_common::{ - BusDevice, BusDeviceMetrics, CheckPoint, ExecutorStats, ExecutorStatsHandle, Instance, - InstanceCtx, InstanceType, Plan, Stats, ZiskExecutionResult, + io::ZiskStdin, stats_begin, stats_end, BusDeviceMetrics, ChunkId, ExecutorStatsHandle, + StatsCostPerType, StatsType, ZiskExecutorSummary, ZiskExecutorTime, }; -use zisk_common::{ChunkId, PayloadType}; +use zisk_core::ZiskRom; +use zisk_pil::ZiskPublicValues; use zisk_pil::{ - RomRomTrace, ZiskPublicValues, INPUT_DATA_AIR_IDS, MAIN_AIR_IDS, MEM_AIR_IDS, ROM_AIR_IDS, - ROM_DATA_AIR_IDS, ZISK_AIRGROUP_ID, -}; - -use std::thread::JoinHandle; -use std::time::Instant; -use std::{ - collections::HashMap, - path::PathBuf, - sync::{Arc, Mutex, RwLock}, + SPECIFIED_RANGES_AIR_IDS, VIRTUAL_TABLE_0_AIR_IDS, VIRTUAL_TABLE_1_AIR_IDS, ZISK_AIRGROUP_ID, }; -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; - -use crossbeam::atomic::AtomicCell; - -use zisk_common::EmuTrace; -use zisk_core::{ZiskRom, MAX_INPUT_SIZE}; -use ziskemu::{EmuOptions, ZiskEmulator}; -use crate::StaticSMBundle; +pub type DeviceMetricsByChunk = (ChunkId, Box); // (chunk_id, metrics) -type DeviceMetricsByChunk = (ChunkId, Box); // (chunk_id, metrics) -type DeviceMetricsList = Vec; -pub type NestedDeviceMetricsList = HashMap; - -#[allow(dead_code)] -enum MinimalTraceExecutionMode { - Emulator, - AsmWithCounter, -} +/// The maximum number of steps to execute in the emulator or assembly runner. +pub const MAX_NUM_STEPS: u64 = 1 << 36; /// The `ZiskExecutor` struct orchestrates the execution of the ZisK ROM program, managing state /// machines, planning, and witness computation. pub struct ZiskExecutor { - stdin: Mutex, - - /// ZisK ROM, a binary file containing the ZisK program to be executed. - pub zisk_rom: Arc, - - /// Path to the ZisK ROM file. - pub rom_path: PathBuf, - - /// Path to the assembly minimal trace binary file, if applicable. - pub asm_runner_path: Option, - - /// Path to the assembly ROM binary file, if applicable. - pub asm_rom_path: Option, - - /// Planning information for main state machines. - pub min_traces: Arc>, - - /// Planning information for secondary state machines. - pub secn_planning: RwLock>, - - /// Main state machine instances, indexed by their global ID. - pub main_instances: RwLock>>, - - /// Secondary state machine instances, indexed by their global ID. - pub secn_instances: RwLock>>>, - - /// Standard library instance, providing common functionalities. - std: Arc>, - - /// Execution result, including the number of executed steps. - execution_result: Mutex, - - /// State machine bundle, containing the state machines and their configurations. - sm_bundle: StaticSMBundle, - - /// Optional ROM state machine, used for assembly ROM execution. - rom_sm: Option>, - - /// Collectors by instance, storing statistics and collectors for each instance. - #[allow(clippy::type_complexity)] - collectors_by_instance: - Arc>)>>>>>, - - /// Statistics collected during the execution, including time taken for collection and witness computation. - stats: ExecutorStatsHandle, - - chunk_size: u64, - - /// World rank for distributed execution. Default to 0 for single-node execution. - world_rank: i32, - - /// Local rank for distributed execution. Default to 0 for single-node execution. - local_rank: i32, - - /// Optional baseline port to communicate with assembly microservices. - base_port: Option, - - /// Map unlocked flag - /// This is used to unlock the memory map for the ROM file. - unlock_mapped_memory: bool, - - asm_shmem_mt: Arc>>, - asm_shmem_mo: Arc>>, - asm_shmem_rh: Arc>>, - - shmem_input_writer: [Arc>>; AsmServices::SERVICES.len()], + /// Shared execution state. + state: ExecutionState, + /// ROM executor component. + rom_executor: RomExecutor, + /// Instance planner component. + planner: InstancePlanner, + /// Instance registry component. + registry: InstanceRegistry, + /// Witness orchestrator component. + orchestrator: WitnessOrchestrator, } impl ZiskExecutor { - /// The number of threads to use for parallel processing when computing minimal traces. - const NUM_THREADS: usize = 16; - - /// The maximum number of steps to execute in the emulator or assembly runner. - const MAX_NUM_STEPS: u64 = 1 << 32; - /// Creates a new instance of the `ZiskExecutor`. /// + /// The ROM can be set or changed via `set_rom()` before calling `execute()`. + /// /// # Arguments - /// * `zisk_rom` - An `Arc`-wrapped ZisK ROM instance. + /// * `std` - Standard library instance. + /// * `sm_bundle` - State machine bundle. + /// * `chunk_size` - Chunk size for processing. + /// * `emulator` - Emulator backend to use. + /// * `hints_stream` - Optional hints stream for processing precompile hints. #[allow(clippy::too_many_arguments)] - pub fn new( - rom_path: PathBuf, - asm_path: Option, - asm_rom_path: Option, - zisk_rom: Arc, - std: Arc>, - sm_bundle: StaticSMBundle, - rom_sm: Option>, - chunk_size: u64, - world_rank: i32, - local_rank: i32, - base_port: Option, - unlock_mapped_memory: bool, - ) -> Self { - #[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] - let (asm_shmem_mt, asm_shmem_mo) = (None, None); - - #[cfg(all(target_os = "linux", target_arch = "x86_64"))] - let (asm_shmem_mt, asm_shmem_mo) = if asm_path.is_some() { - let mt = PreloadedMT::new(local_rank, base_port, unlock_mapped_memory) - .expect("Failed to create PreloadedMT"); - let mo = PreloadedMO::new(local_rank, base_port, unlock_mapped_memory) - .expect("Failed to create PreloadedMO"); - (Some(mt), Some(mo)) - } else { - (None, None) - }; + pub fn new(sm_bundle: StaticSMBundle, emulator: EmulatorKind) -> Self { + let sm_bundle = Arc::new(sm_bundle); + let is_asm_emulator = emulator.is_asm_emulator(); + let chunk_size = emulator.get_chunk_size(); Self { - stdin: Mutex::new(ZiskStdin::null()), - rom_path, - asm_runner_path: asm_path, - asm_rom_path, - zisk_rom, - min_traces: Arc::new(RwLock::new(MinimalTraces::None)), - secn_planning: RwLock::new(Vec::new()), - main_instances: RwLock::new(HashMap::new()), - secn_instances: RwLock::new(HashMap::new()), - collectors_by_instance: Arc::new(RwLock::new(HashMap::new())), - std, - execution_result: Mutex::new(ZiskExecutionResult::default()), - sm_bundle, - rom_sm, - stats: ExecutorStatsHandle::new(), - chunk_size, - world_rank, - local_rank, - base_port, - unlock_mapped_memory, - asm_shmem_mt: Arc::new(Mutex::new(asm_shmem_mt)), - asm_shmem_mo: Arc::new(Mutex::new(asm_shmem_mo)), - asm_shmem_rh: Arc::new(Mutex::new(None)), - shmem_input_writer: std::array::from_fn(|_| Arc::new(Mutex::new(None))), - } - } - - pub fn set_stdin(&self, stdin: ZiskStdin) { - let mut guard = self.stdin.lock().unwrap(); - *guard = stdin; - } - - #[allow(clippy::type_complexity)] - pub fn get_execution_result(&self) -> (ZiskExecutionResult, ExecutorStats) { - (self.execution_result.lock().unwrap().clone(), self.stats.get_inner()) - } - - pub fn store_stats(&self) { - self.stats.store_stats(); - } - - /// Computes minimal traces by processing the ZisK ROM with given public inputs. - /// - /// # Arguments - /// * `input_data` - Input data for the ROM execution. - /// * `num_threads` - Number of threads to use for parallel execution. - /// - /// # Returns - /// A vector of `EmuTrace` instances representing minimal traces. - fn execute_with_emulator(&self) -> MinimalTraces { - let min_traces = self.run_emulator(Self::NUM_THREADS, &mut self.stdin.lock().unwrap()); - - // Store execute steps - let steps = if let MinimalTraces::EmuTrace(min_traces) = &min_traces { - min_traces.iter().map(|trace| trace.steps).sum::() - } else { - panic!("Expected EmuTrace, got something else"); - }; - - self.execution_result.lock().unwrap().executed_steps = steps; - - min_traces - } - - /// Computes minimal traces by processing the ZisK ROM with given public inputs. - /// - /// # Arguments - /// * `input_data` - Input data for the ROM execution. - /// * `num_threads` - Number of threads to use for parallel execution. - /// - /// # Returns - /// A vector of `EmuTrace` instances representing minimal traces. - #[allow(clippy::type_complexity)] - fn execute_with_assembly( - &self, - pctx: &ProofCtx, - _caller_stats_id: u64, - ) -> (MinimalTraces, DeviceMetricsList, NestedDeviceMetricsList, Option>) - { - #[cfg(feature = "stats")] - let parent_stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - _caller_stats_id, - parent_stats_id, - "EXECUTE_WITH_ASSEMBLY", - 0, - ExecutorStatsEvent::Begin, - ); - - AsmServices::SERVICES.par_iter().enumerate().for_each(|(idx, service)| { - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "ASM_WRITE_INPUT", - 0, - ExecutorStatsEvent::Begin, - ); - - let port = if let Some(base_port) = self.base_port { - AsmServices::port_for(service, base_port, self.local_rank) - } else { - AsmServices::default_port(service, self.local_rank) - }; - - let shmem_input_name = - AsmSharedMemory::::shmem_input_name(port, *service, self.local_rank); - - let mut input_writer = self.shmem_input_writer[idx].lock().unwrap(); - if input_writer.is_none() { - tracing::info!( - "Initializing SharedMemoryWriter for service {:?} at '{}'", - service, - shmem_input_name - ); - *input_writer = Some( - SharedMemoryWriter::new( - &shmem_input_name, - MAX_INPUT_SIZE as usize, - self.unlock_mapped_memory, - ) - .expect("Failed to create SharedMemoryWriter"), - ); - } - - write_input(&mut self.stdin.lock().unwrap(), input_writer.as_ref().unwrap()); - - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "ASM_WRITE_INPUT", - 0, - ExecutorStatsEvent::End, - ); - }); - - let chunk_size = self.chunk_size; - let (world_rank, local_rank, base_port) = - (self.world_rank, self.local_rank, self.base_port); - - let stats = self.stats.clone(); - - // Run the assembly Memory Operations (MO) runner thread - let handle_mo = std::thread::spawn({ - let asm_shmem_mo = self.asm_shmem_mo.clone(); - move || { - AsmRunnerMO::run( - asm_shmem_mo.lock().unwrap().as_mut().unwrap(), - Self::MAX_NUM_STEPS, - chunk_size, - world_rank, - local_rank, - base_port, - stats, - ) - .expect("Error during Assembly Memory Operations execution") - } - }); - - let stats = self.stats.clone(); - - // Run the ROM histogram only on partition 0 as it is always computed by this partition - let has_rom_sm = pctx.dctx_is_first_partition(); - - let handle_rh = (has_rom_sm).then(|| { - let asm_shmem_rh = self.asm_shmem_rh.clone(); - let unlock_mapped_memory = self.unlock_mapped_memory; - std::thread::spawn(move || { - AsmRunnerRH::run( - &mut asm_shmem_rh.lock().unwrap(), - Self::MAX_NUM_STEPS, - world_rank, - local_rank, - base_port, - unlock_mapped_memory, - stats, - ) - .expect("Error during ROM Histogram execution") - }) - }); - - let (min_traces, main_count, secn_count) = self.run_mt_assembly(); - - // Store execute steps - let steps = if let MinimalTraces::AsmEmuTrace(asm_min_traces) = &min_traces { - asm_min_traces.vec_chunks.iter().map(|trace| trace.steps).sum::() - } else { - panic!("Expected AsmEmuTrace, got something else"); - }; - - self.execution_result.lock().unwrap().executed_steps = steps; - - // If the world rank is 0, wait for the ROM Histogram thread to finish and set the handler - if has_rom_sm { - self.rom_sm.as_ref().unwrap().set_asm_runner_handler( - handle_rh.expect("Error during Assembly ROM Histogram thread execution"), - ); - } - - #[cfg(feature = "stats")] - self.stats.add_stat( - 0, - parent_stats_id, - "EXECUTE_WITH_ASSEMBLY", - 0, - ExecutorStatsEvent::End, - ); - - (min_traces, main_count, secn_count, Some(handle_mo)) - } - - fn run_mt_assembly(&self) -> (MinimalTraces, DeviceMetricsList, NestedDeviceMetricsList) { - #[cfg(feature = "stats")] - let parent_stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat(0, parent_stats_id, "RUN_MT_ASSEMBLY", 0, ExecutorStatsEvent::Begin); - - struct CounterTask - where - DB: DataBusTrait>, - { - chunk_id: ChunkId, - emu_trace: Arc, - data_bus: DB, - zisk_rom: Arc, - _phantom: std::marker::PhantomData, - _stats: ExecutorStatsHandle, - _parent_stats_id: u64, - } - - impl Task for CounterTask - where - F: PrimeField64, - DB: DataBusTrait> + Send + Sync + 'static, - { - type Output = (ChunkId, DB); - - fn execute(mut self) -> Self::Output { - #[cfg(feature = "stats")] - let stats_id = self._stats.next_id(); - #[cfg(feature = "stats")] - self._stats.add_stat( - self._parent_stats_id, - stats_id, - "MT_CHUNK_PLAYER", - 0, - ExecutorStatsEvent::Begin, - ); - - ZiskEmulator::process_emu_trace::( - &self.zisk_rom, - &self.emu_trace, - &mut self.data_bus, - false, - ); - - self.data_bus.on_close(); - - // Add to executor stats - #[cfg(feature = "stats")] - self._stats.add_stat( - self._parent_stats_id, - stats_id, - "MT_CHUNK_PLAYER", - 0, - ExecutorStatsEvent::End, - ); - - (self.chunk_id, self.data_bus) - } - } - - let task_factory: TaskFactory<_> = - Box::new(|chunk_id: ChunkId, emu_trace: Arc| { - let data_bus = self.sm_bundle.build_data_bus_counters(); - CounterTask { - chunk_id, - emu_trace, - data_bus, - zisk_rom: self.zisk_rom.clone(), - _phantom: std::marker::PhantomData::, - _stats: self.stats.clone(), - #[cfg(feature = "stats")] - _parent_stats_id: parent_stats_id, - #[cfg(not(feature = "stats"))] - _parent_stats_id: 0, - } - }); - - let (asm_runner_mt, mut data_buses) = AsmRunnerMT::run_and_count( - self.asm_shmem_mt.lock().unwrap().as_mut().unwrap(), - Self::MAX_NUM_STEPS, - self.chunk_size, - task_factory, - self.world_rank, - self.local_rank, - self.base_port, - self.stats.clone(), - ) - .expect("Error during ASM execution"); - - data_buses.sort_by_key(|(chunk_id, _)| chunk_id.0); - - let mut main_count = Vec::with_capacity(data_buses.len()); - let mut secn_count = HashMap::new(); - - for (chunk_id, data_bus) in data_buses { - let databus_counters = data_bus.into_devices(false); - - for (idx, counter) in databus_counters.into_iter() { - match idx { - None => { - main_count.push((chunk_id, counter.unwrap_or(Box::new(DummyCounter {})))); - } - Some(idx) => { - secn_count - .entry(idx) - .or_insert_with(Vec::new) - .push((chunk_id, counter.unwrap())); - } - } - } - } - - #[cfg(feature = "stats")] - self.stats.add_stat(0, parent_stats_id, "RUN_MT_ASSEMBLY", 0, ExecutorStatsEvent::End); - (MinimalTraces::AsmEmuTrace(asm_runner_mt), main_count, secn_count) - } - - fn run_emulator(&self, num_threads: usize, stdin: &mut ZiskStdin) -> MinimalTraces { - // Call emulate with these options - let input_data = stdin.read(); - - // Settings for the emulator - let emu_options = EmuOptions { - chunk_size: Some(self.chunk_size), - max_steps: Self::MAX_NUM_STEPS, - ..EmuOptions::default() - }; - - let min_traces = ZiskEmulator::compute_minimal_traces( - &self.zisk_rom, - &input_data, - &emu_options, - num_threads, - ) - .expect("Error during emulator execution"); - - MinimalTraces::EmuTrace(min_traces) - } - - /// Adds main state machine instances to the proof context and assigns global IDs. - /// - /// # Arguments - /// * `pctx` - Proof context. - /// * `main_planning` - Planning information for main state machines. - fn assign_main_instances( - &self, - pctx: &ProofCtx, - global_ids: &RwLock>, - main_planning: Vec, - ) { - let mut main_instances = self.main_instances.write().unwrap(); - - for mut plan in main_planning { - let global_id = pctx - .add_instance_assign(plan.airgroup_id, plan.air_id) - .expect("Failed to add instance"); - plan.set_global_id(global_id); - global_ids.write().unwrap().push(global_id); - main_instances - .entry(global_id) - .or_insert_with(|| self.create_main_instance(plan, global_id)); - } - } - - /// Creates main state machine instance based on a main planning. - /// - /// # Arguments - /// * `global_id` - Global ID of the main instance to be created. - /// - /// # Returns - /// A main instance for the provided global ID. - fn create_main_instance(&self, plan: Plan, global_id: usize) -> MainInstance { - MainInstance::new(InstanceCtx::new(global_id, plan), self.std.clone()) - } - - /// Counts metrics for secondary state machines based on minimal traces. - /// - /// # Arguments - /// * `min_traces` - Minimal traces obtained from the ROM execution. - /// - /// # Returns - /// A tuple containing two vectors: - /// * A vector of main state machine metrics grouped by chunk ID. - /// * A vector of secondary state machine metrics grouped by chunk ID. The vector is nested, - /// with the outer vector representing the secondary state machines and the inner vector - /// containing the metrics for each chunk. - fn count(&self, min_traces: &MinimalTraces) -> (DeviceMetricsList, NestedDeviceMetricsList) { - let min_traces = match min_traces { - MinimalTraces::EmuTrace(min_traces) => min_traces, - MinimalTraces::AsmEmuTrace(asm_min_traces) => &asm_min_traces.vec_chunks, - _ => unreachable!(), - }; - - let metrics_slices: Vec<_> = min_traces - .par_iter() - .map(|minimal_trace| { - let mut data_bus = self.sm_bundle.build_data_bus_counters(); - - ZiskEmulator::process_emu_trace::( - &self.zisk_rom, - minimal_trace, - &mut data_bus, - true, - ); - - let mut counters = Vec::new(); - - let databus_counters = data_bus.into_devices(true); - for counter in databus_counters.into_iter() { - counters.push(counter); - } - - counters - }) - .collect(); - - let mut main_count = Vec::new(); - let mut secn_count = HashMap::new(); - - for (chunk_id, counter_slice) in metrics_slices.into_iter().enumerate() { - for (idx, counter) in counter_slice.into_iter() { - match idx { - None => { - main_count.push(( - ChunkId(chunk_id), - counter.unwrap_or_else(|| Box::new(DummyCounter {})), - )); - } - Some(idx) => { - secn_count - .entry(idx) - .or_insert_with(Vec::new) - .push((ChunkId(chunk_id), counter.unwrap())); - } - } - } - } - - (main_count, secn_count) - } - - /// Adds secondary state machine instances to the proof context and assigns global IDs. - /// - /// # Arguments - /// * `pctx` - Proof context. - /// * `secn_planning` - Planning information for secondary state machines. - fn assign_secn_instances( - &self, - pctx: &ProofCtx, - global_ids: &RwLock>, - secn_planning: &mut [Plan], - ) { - for plan in secn_planning.iter_mut() { - // If the node has rank 0 and the plan targets the ROM instance, - // we need to add it to the proof context using a special method. - // This method allows us to mark it as an instance to be computed by node 0. - let global_id = if plan.airgroup_id == ZISK_AIRGROUP_ID && plan.air_id == ROM_AIR_IDS[0] - { - // If this is the ROM instance, we need to add it to the proof context - // with the rank 0. - pctx.add_instance_assign_first_partition(plan.airgroup_id, plan.air_id) - .expect("Failed to add ROM instance") - } else { - match plan.instance_type { - InstanceType::Instance => pctx - .add_instance(plan.airgroup_id, plan.air_id) - .expect("Failed to add instance"), - InstanceType::Table => { - pctx.add_table(plan.airgroup_id, plan.air_id).expect("Failed to add table") - } - } - }; - - global_ids.write().unwrap().push(global_id); - plan.set_global_id(global_id); - } - } - - /// Creates a secondary state machine instance based on the provided global ID. - /// - /// # Arguments - /// * `global_id` - Global ID of the secondary state machine instance. - /// - /// # Returns - /// A secondary state machine instance for the provided global ID. - fn create_secn_instance(&self, global_id: usize) -> Box> { - let mut secn_planning_guard = self.secn_planning.write().unwrap(); - - let plan_idx = - secn_planning_guard.iter().position(|plan| plan.global_id.unwrap() == global_id); - if plan_idx.is_none() { - panic!("Secondary instance not found"); + state: ExecutionState::new(), + rom_executor: RomExecutor::new(emulator), + planner: InstancePlanner::new(chunk_size), + registry: InstanceRegistry::new(sm_bundle.clone()), + orchestrator: WitnessOrchestrator::new(chunk_size, sm_bundle, is_asm_emulator), } - - let plan_idx = plan_idx.unwrap(); - let plan = secn_planning_guard.remove(plan_idx); - - let global_id = plan.global_id.unwrap(); - - let ictx = InstanceCtx::new(global_id, plan); - self.sm_bundle.build_instance(ictx) } - /// Expands and computes witnesses for a main instance. + /// Sets the ZisK ROM (ELF) for execution. /// - /// # Arguments - /// * `pctx` - Proof context. - /// * `main_instance` - Main instance to compute witness for - fn witness_main_instance( - &self, - pctx: &ProofCtx, - main_instance: &MainInstance, - trace_buffer: Vec, - _caller_stats_id: u64, - ) -> ProofmanResult<()> { - let (airgroup_id, air_id) = pctx - .dctx_get_instance_info(main_instance.ictx.global_id) - .expect("Failed to get instance info"); - let witness_start_time = Instant::now(); - - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - _caller_stats_id, - stats_id, - "AIR_MAIN_WITNESS", - air_id, - ExecutorStatsEvent::Begin, - ); - - let min_traces_guard = self.min_traces.read().unwrap(); - let min_traces = &*min_traces_guard; - - let min_traces = match min_traces { - MinimalTraces::EmuTrace(min_traces) => min_traces, - MinimalTraces::AsmEmuTrace(asm_min_traces) => &asm_min_traces.vec_chunks, - _ => unreachable!(), - }; - - let air_instance = main_instance.compute_witness( - &self.zisk_rom, - min_traces, - self.chunk_size, - main_instance, - trace_buffer, - )?; - - pctx.add_air_instance(air_instance, main_instance.ictx.global_id); - - #[cfg(feature = "stats")] - self.stats.add_stat( - _caller_stats_id, - stats_id, - "AIR_MAIN_WITNESS", - air_id, - ExecutorStatsEvent::End, - ); - - let stats = Stats { - airgroup_id, - air_id, - collect_start_time: Instant::now(), - collect_duration: 0, - witness_start_time: Instant::now(), - witness_duration: witness_start_time.elapsed().as_millis(), - num_chunks: 0, - }; - - self.stats.insert_witness_stats(main_instance.ictx.global_id, stats); - - Ok(()) - } - - /// computes witness for a secondary state machines instance. + /// This method allows changing the ROM between executions without + /// recreating the executor, making the executor more reusable. /// /// # Arguments - /// * `pctx` - Proof context. - /// * `sctx` - Setup context. - /// * `global_id` - Global ID of the secondary state machine instance. - /// * `secn_instance` - Secondary state machine instance to compute witness for - fn witness_secn_instance( - &self, - pctx: &ProofCtx, - sctx: &SetupCtx, - global_id: usize, - secn_instance: &dyn Instance, - trace_buffer: Vec, - _caller_stats_id: u64, - ) -> ProofmanResult<()> { - let witness_start_time = Instant::now(); - - #[cfg(feature = "stats")] - let (_airgroup_id, air_id) = pctx.dctx_get_instance_info(global_id); - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - _caller_stats_id, - stats_id, - "AIR_SECN_WITNESS", - air_id, - ExecutorStatsEvent::Begin, - ); - - let collectors_by_instance = { - let mut guard = self.collectors_by_instance.write().unwrap(); - - guard - .remove(&global_id) - .expect("Missing collectors for given global_id") - .into_iter() - .map(Option::unwrap) // All are guaranteed to be Some - .collect() - }; - - if let Some(air_instance) = - secn_instance.compute_witness(pctx, sctx, collectors_by_instance, trace_buffer)? - { - pctx.add_air_instance(air_instance, global_id); - } - #[cfg(feature = "stats")] - { - self.stats.add_stat( - _caller_stats_id, - stats_id, - "AIR_SECN_WITNESS", - air_id, - ExecutorStatsEvent::End, - ); - } - self.stats.set_witness_duration(global_id, witness_start_time.elapsed().as_millis()); - Ok(()) - } - - fn order_chunks( - &self, - chunks_to_execute: &[Vec], - global_id_chunks: &HashMap>, - ) -> Vec { - let mut ordered_chunks = Vec::new(); - let mut already_selected_chunks = vec![false; chunks_to_execute.len()]; - - let mut n_global_ids_incompleted = global_id_chunks.len(); - let mut n_chunks_by_global_id: HashMap = - global_id_chunks.iter().map(|(global_id, chunks)| (*global_id, chunks.len())).collect(); - - while n_global_ids_incompleted > 0 { - let selected_global_id = n_chunks_by_global_id - .iter() - .filter(|(_, &count)| count > 0) - .min_by_key(|(_, &count)| count) - .map(|(&global_id, _)| global_id); - - if let Some(global_id) = selected_global_id { - for chunk_id in global_id_chunks[&global_id].iter() { - if already_selected_chunks[*chunk_id] { - continue; - } - ordered_chunks.push(*chunk_id); - already_selected_chunks[*chunk_id] = true; - for global_idx in chunks_to_execute[*chunk_id].iter() { - if let Some(count) = n_chunks_by_global_id.get_mut(global_idx) { - *count -= 1; - if *count == 0 { - n_chunks_by_global_id.remove(global_idx); - n_global_ids_incompleted -= 1; - } - } - } - } - } else { - break; - } - } - - ordered_chunks + /// * `zisk_rom` - The ZisK ROM to execute. + pub fn set_rom(&self, zisk_rom: Arc, use_hints: bool) { + self.state.set_rom(zisk_rom.clone(), use_hints); + self.orchestrator.set_rom(zisk_rom); } - /// Expands for a secondary state machines instance. - /// - /// # Arguments - /// * `pctx` - Proof context. - /// * `sctx` - Setup context. - /// * `global_id` - Global ID of the secondary state machine instance. - /// * `secn_instance` - Secondary state machine instance to compute witness for - #[allow(clippy::borrowed_box)] - fn witness_collect_instances( - &self, - pctx: Arc>, - secn_instances: HashMap>>, - ) { - let min_traces = self.min_traces.read().unwrap(); - - let min_traces = match &*min_traces { - MinimalTraces::EmuTrace(min_traces) => min_traces, - MinimalTraces::AsmEmuTrace(asm_min_traces) => &asm_min_traces.vec_chunks, - _ => unreachable!(), - }; - - // Group the instances by the chunk they need to process - let (chunks_to_execute, global_id_chunks) = - self.chunks_to_execute(min_traces, &secn_instances); - - let ordered_chunks = self.order_chunks(&chunks_to_execute, &global_id_chunks); - let global_ids: Vec = secn_instances.keys().copied().collect(); - - let collect_start_times: Vec>>> = - global_ids.iter().map(|_| Arc::new(AtomicCell::new(None))).collect(); - - let chunks_to_execute_clone = chunks_to_execute.clone(); - - let global_ids_map: HashMap = - global_ids.iter().enumerate().map(|(idx, &id)| (id, idx)).collect(); - - // Create data buses for each chunk - let data_buses = self - .sm_bundle - .build_data_bus_collectors(&pctx, &secn_instances, &chunks_to_execute) - .into_iter() - .map(|db| Arc::new(Mutex::new(db))) - .collect::>(); - - let n_chunks_left: Vec> = global_ids - .iter() - .map(|global_id| Arc::new(AtomicUsize::new(global_id_chunks[global_id].len()))) - .collect(); - - for global_id in global_ids.iter() { - let (airgroup_id, air_id) = - pctx.dctx_get_instance_info(*global_id).expect("Failed to get instance info"); - let stats = Stats { - airgroup_id, - air_id, - collect_start_time: Instant::now(), - collect_duration: 0, - witness_start_time: Instant::now(), - witness_duration: 0, - num_chunks: global_id_chunks[global_id].len(), - }; - - self.collectors_by_instance - .write() - .unwrap() - .insert(*global_id, (0..global_id_chunks[global_id].len()).map(|_| None).collect()); - self.stats.insert_witness_stats(*global_id, stats); - } - - let next_chunk = Arc::new(AtomicUsize::new(0)); - let n_threads = rayon::current_num_threads(); - - let mut handles = Vec::with_capacity(n_threads); - for _ in 0..n_threads { - let next_chunk = Arc::clone(&next_chunk); - let min_traces_lock = Arc::clone(&self.min_traces); - let data_buses = data_buses.clone(); - let zisk_rom = self.zisk_rom.clone(); - let n_chunks_left = n_chunks_left.clone(); - let global_ids_map = global_ids_map.clone(); - let global_id_chunks = global_id_chunks.clone(); - let collectors_by_instance = self.collectors_by_instance.clone(); - let ordered_chunks_clone = ordered_chunks.clone(); - - let pctx_clone = pctx.clone(); - - let chunks_to_execute = chunks_to_execute_clone.clone(); - - let collect_start_times = collect_start_times.clone(); - - let _stats = self.stats.clone(); - handles.push(std::thread::spawn(move || { - let guard = min_traces_lock.read().unwrap(); - let min_traces = match &*guard { - MinimalTraces::EmuTrace(v) => v, - MinimalTraces::AsmEmuTrace(a) => &a.vec_chunks, - _ => unreachable!(), - }; - loop { - let next_chunk_id = next_chunk.fetch_add(1, Ordering::SeqCst); - if next_chunk_id >= ordered_chunks_clone.len() { - break; - } - let chunk_id = ordered_chunks_clone[next_chunk_id]; - - if let Some(mut data_bus) = data_buses[chunk_id].lock().unwrap().take() { - for global_id in chunks_to_execute[chunk_id].iter() { - let start_time_cell = &collect_start_times[global_ids_map[global_id]]; - if start_time_cell.load().is_none() { - start_time_cell.store(Some(Instant::now())); - } - } - - ZiskEmulator::process_emu_traces::( - &zisk_rom, - min_traces, - chunk_id, - &mut data_bus, - ); - - for (global_id, collector) in data_bus.into_devices(false) { - if let Some(global_id) = global_id { - let global_id_idx = global_ids_map - .get(&global_id) - .expect("Global ID not found in map"); - - let chunk_order = &global_id_chunks[&global_id]; - let position = chunk_order - .iter() - .position(|&id| id == chunk_id) - .expect("Chunk ID not found in order"); - - collectors_by_instance - .write() - .unwrap() - .get_mut(&global_id) - .unwrap()[position] = Some((chunk_id, collector.unwrap())); - - if n_chunks_left[*global_id_idx].fetch_sub(1, Ordering::SeqCst) == 1 - { - pctx_clone.set_witness_ready(global_id, true); - - let collect_start_time = collect_start_times[*global_id_idx] - .load() - .expect("Collect start time was not set"); - let collect_duration = - collect_start_time.elapsed().as_millis() as u64; - - let (airgroup_id, air_id) = pctx_clone - .dctx_get_instance_info(global_id) - .expect("Failed to get instance info"); - let stats = Stats { - airgroup_id, - air_id, - collect_start_time, - collect_duration, - witness_start_time: Instant::now(), - witness_duration: 0, - num_chunks: global_id_chunks[&global_id].len(), - }; - - _stats.insert_witness_stats(global_id, stats); - } - } - } - } - } - })); - } - - for handle in handles { - handle.join().unwrap(); - } + /// Sets the standard input for execution. + pub fn set_stdin(&self, stdin: ZiskStdin) { + self.rom_executor.set_stdin(stdin); } - /// Computes and generates witness for secondary state machine instance of type `Table`. - /// - /// # Arguments - /// * `pctx` - Proof context. - /// * `sctx` - Setup context. - /// * `global_id` - Global ID of the secondary state machine instance. - /// * `table_instance` - Secondary state machine table instance to compute witness for - fn witness_table( - &self, - pctx: &ProofCtx, - sctx: &SetupCtx, - global_id: usize, - table_instance: &dyn Instance, - trace_buffer: Vec, - _caller_stats_id: u64, - ) -> ProofmanResult<()> { - #[cfg(feature = "stats")] - let (_airgroup_id, air_id) = pctx.dctx_get_instance_info(global_id); - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - _caller_stats_id, - stats_id, - "AIR_WITNESS_TABLE", - air_id, - ExecutorStatsEvent::Begin, - ); - assert_eq!(table_instance.instance_type(), InstanceType::Table, "Instance is not a table"); - - if let Some(air_instance) = - table_instance.compute_witness(pctx, sctx, vec![], trace_buffer)? - { - if pctx - .dctx_is_my_process_instance(global_id) - .expect("Failed to check instance ownership") - { - pctx.add_air_instance(air_instance, global_id); - } - } - - #[cfg(feature = "stats")] - self.stats.add_stat( - _caller_stats_id, - stats_id, - "AIR_WITNESS_TABLE", - air_id, - ExecutorStatsEvent::Begin, - ); - - Ok(()) + /// Sets ASM resources for execution (only applicable for ASM emulator). + pub fn set_asm_resources(&self, asm_resources: AsmResources) { + self.rom_executor.set_asm_resources(asm_resources); } - /// Computes all the chunks to be executed to generate the witness given an instance. - /// - /// # Arguments - /// * `min_traces` - Minimal traces - /// * `secn_instance` - Secondary state machine instance to group. - /// - /// # Returns - /// A vector of booleans indicating which chunks to execute. - #[allow(clippy::borrowed_box)] - fn chunks_to_execute( - &self, - min_traces: &[EmuTrace], - secn_instances: &HashMap>>, - ) -> (Vec>, HashMap>) { - let mut chunks_to_execute = vec![Vec::new(); min_traces.len()]; - let mut global_id_chunks: HashMap> = HashMap::new(); - secn_instances.iter().for_each(|(global_idx, secn_instance)| { - match secn_instance.check_point() { - CheckPoint::None => {} - CheckPoint::Single(chunk_id) => { - chunks_to_execute[chunk_id.as_usize()].push(*global_idx); - global_id_chunks.entry(*global_idx).or_default().push(chunk_id.as_usize()); - } - CheckPoint::Multiple(chunk_ids) => { - chunk_ids.iter().for_each(|&chunk_id| { - chunks_to_execute[chunk_id.as_usize()].push(*global_idx); - global_id_chunks.entry(*global_idx).or_default().push(chunk_id.as_usize()); - }); - } - } - }); - - for chunk_ids in global_id_chunks.values_mut() { - chunk_ids.sort(); - } - - (chunks_to_execute, global_id_chunks) + /// Gets the execution result and stats. + #[allow(clippy::type_complexity)] + pub fn get_execution_result(&self) -> (ZiskExecutorSummary, ExecutorStatsHandle) { + (self.state.get_execution_result(), self.state.get_stats()) } - fn reset(&self) { - // Reset the internal state of the executor - *self.execution_result.lock().unwrap() = ZiskExecutionResult::default(); - *self.min_traces.write().unwrap() = MinimalTraces::None; - *self.secn_planning.write().unwrap() = Vec::new(); - self.main_instances.write().unwrap().clear(); - self.secn_instances.write().unwrap().clear(); - self.collectors_by_instance.write().unwrap().clear(); - self.stats.reset(); + /// Stores statistics to persistent storage. + pub fn store_stats(&self) { + self.state.stats.store_stats(); } } impl WitnessComponent for ZiskExecutor { /// Executes the ZisK ROM program and calculate the plans for main and secondary state machines. - /// - /// # Arguments - /// * `pctx` - Proof context. - /// - /// # Returns - /// A vector of global IDs for the instances to compute witness for. fn execute( &self, pctx: Arc>, + sctx: Arc>, global_ids: &RwLock>, ) -> ProofmanResult<()> { - #[cfg(feature = "stats")] - let parent_stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat(0, parent_stats_id, "EXECUTE", 0, ExecutorStatsEvent::Begin); + let start_total = Instant::now(); + self.state.reset(); - self.reset(); + stats_begin!(self.state.stats, 0, _exec_scope, "EXECUTE", 0); // Set the start time of the current execution - self.stats.set_start_time(Instant::now()); + self.state.stats.set_start_time(Instant::now()); - // Process the ROM to collect the Minimal Traces + // Phase 1: Execute ROM to collect minimal traces timer_start_info!(COMPUTE_MINIMAL_TRACE); - - assert_eq!(self.asm_runner_path.is_some(), self.asm_rom_path.is_some()); - - let (min_traces, main_count, mut secn_count, handle_mo) = if self.asm_runner_path.is_some() - { - // If we are executing in assembly mode - self.execute_with_assembly( + let start_partial = Instant::now(); + + let zisk_rom = self + .state + .get_rom() + .map_err(|e| proofman_common::ProofmanError::InvalidSetup(e.to_string()))?; + let output = self + .rom_executor + .execute( + &zisk_rom, &pctx, - #[cfg(feature = "stats")] - parent_stats_id, - #[cfg(not(feature = "stats"))] - 0, + self.registry.sm_bundle(), + self.state.use_hints.load(std::sync::atomic::Ordering::SeqCst), + &self.state.stats, + &_exec_scope, ) - } else { - // Otherwise, use the emulator - let min_traces = self.execute_with_emulator(); - - timer_start_info!(COUNT); - let (main_count, secn_count) = self.count(&min_traces); - timer_stop_and_log_info!(COUNT); + .expect("Failed to execute ROM and collect minimal traces"); - (min_traces, main_count, secn_count, None) - }; + let execution_duration = start_partial.elapsed(); timer_stop_and_log_info!(COMPUTE_MINIMAL_TRACE); - // Plan the main and secondary instances using the counted metrics - // stats_begin!(stats_next_id!(), parent_stats_id, "PLAN"); - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat(parent_stats_id, stats_id, "MAIN_PLAN", 0, ExecutorStatsEvent::Begin); + // Phase 2: Plan main instances + stats_begin!(self.state.stats, &_exec_scope, _main_plan_scope, "MAIN_PLAN", 0); timer_start_info!(PLAN); - let (main_planning, public_values) = - MainPlanner::plan::(&min_traces, main_count, self.chunk_size); - *self.min_traces.write().unwrap() = min_traces; - self.assign_main_instances(&pctx, global_ids, main_planning); + let start_partial = Instant::now(); + + let main_output = self.planner.plan_main::(&output.min_traces, output.main_count); + *self.state.min_traces.write().unwrap() = Some(output.min_traces); - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat(parent_stats_id, stats_id, "MAIN_PLAN", 0, ExecutorStatsEvent::End); - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat(parent_stats_id, stats_id, "SECN_PLAN", 0, ExecutorStatsEvent::Begin); + let (main_assignments, cost_main) = + self.planner.assign_main_instances(&pctx, &sctx, global_ids, main_output.plans); + self.registry.populate_main_instances(&pctx, &self.state, main_assignments)?; - let mut secn_planning = self.sm_bundle.plan_sec(&mut secn_count); + stats_end!(self.state.stats, &_main_plan_scope); + // Phase 3: Plan secondary instances + stats_begin!(self.state.stats, &_exec_scope, _secn_plan_scope, "SECN_PLAN", 0); + + let mut secn_count = output.secn_count; + let mut secn_planning = + self.planner.plan_secondary(self.registry.sm_bundle(), &mut secn_count); + + let count_and_plan_duration = start_partial.elapsed(); timer_stop_and_log_info!(PLAN); - timer_start_info!(PLAN_MEM_CPP); - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat(parent_stats_id, stats_id, "SECN_PLAN", 0, ExecutorStatsEvent::End); + timer_start_info!(WAIT_PLAN_MEM_CPP); + stats_end!(self.state.stats, &_secn_plan_scope); + let start_partial = Instant::now(); - if let Some(handle_mo) = handle_mo { - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "MO_PLAN_WAIT", - 0, - ExecutorStatsEvent::Begin, - ); + // Handle memory operations from ASM runner + if let Some(handle_mo) = output.handle_mo { + stats_begin!(self.state.stats, &_exec_scope, _mo_wait_scope, "MO_PLAN_WAIT", 0); - // Wait for the memory operations thread to finish let asm_runner_mo = handle_mo.join().expect("Error during Assembly Memory Operations thread execution"); - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "MO_PLAN_WAIT", - 0, - ExecutorStatsEvent::End, - ); - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "MO_PLAN_ADD", - 0, - ExecutorStatsEvent::Begin, - ); + stats_end!(self.state.stats, &_mo_wait_scope); + stats_begin!(self.state.stats, &_exec_scope, _mo_add_scope, "MO_PLAN_ADD", 0); secn_planning - .entry(self.sm_bundle.get_mem_sm_id()) + .entry(self.registry.sm_bundle().get_mem_sm_id()) .or_default() .extend(asm_runner_mo.plans); - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "MO_PLAN_ADD", - 0, - ExecutorStatsEvent::End, - ); + stats_end!(self.state.stats, &_mo_add_scope); + } + + let count_and_plan_mo_duration = start_partial.elapsed(); + timer_stop_and_log_info!(WAIT_PLAN_MEM_CPP); + + if let Some(handle_rh) = output.handle_rh { + timer_start_info!(WAIT_ASM_RH); + let rh_data = handle_rh.join().expect("Error during ROM Histogram thread execution"); + + self.rom_executor.set_rh_data(rh_data); + timer_stop_and_log_info!(WAIT_ASM_RH); } - timer_stop_and_log_info!(PLAN_MEM_CPP); + // Phase 4: Configure and assign secondary instances + stats_begin!(self.state.stats, &_exec_scope, _config_scope, "CONFIGURE_INSTANCES", 0); - #[cfg(feature = "stats")] - let stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "CONFIGURE_INSTANCES", - 0, - ExecutorStatsEvent::Begin, - ); + // Configure secondary state machine instances based on planning + self.registry.configure_sm_instances(&pctx, &secn_planning); - // Configure the instances - self.sm_bundle.configure_instances(&pctx, &secn_planning); + let mut cost_per_type = StatsCostPerType::default(); + cost_per_type.add_cost(StatsType::Main, cost_main); - // Flatten all plans - let mut secn_planning = - secn_planning.into_iter().flat_map(|(_, plans)| plans).collect::>(); + let mut secn_planning: Vec<_> = + secn_planning.into_iter().flat_map(|(_, plans)| plans).collect(); - // Assign the instances - self.assign_secn_instances(&pctx, global_ids, &mut secn_planning); + self.planner.assign_secn_instances(&pctx, global_ids, &mut secn_planning); - // Get the global IDs of the instances to compute witness for - let secn_global_ids = - secn_planning.iter().map(|plan| plan.global_id.unwrap()).collect::>(); - let secn_global_ids_vec: Vec = secn_global_ids.to_vec(); + let secn_global_ids: Vec = + secn_planning.iter().map(|plan| plan.global_id.unwrap()).collect(); // Add public values to the proof context let mut publics = ZiskPublicValues::from_vec_guard(pctx.get_publics()); - for (index, value) in public_values.iter() { + for (index, value) in main_output.public_values.iter() { publics.inputs[*index as usize] = F::from_u32(*value); } drop(publics); - // Update internal state with the computed minimal traces and planning. - *self.secn_planning.write().unwrap() = secn_planning; + // Store secondary planning in execution state + *self.state.secn_planning.write().unwrap() = secn_planning; - let mut secn_instances = self.secn_instances.write().unwrap(); - for global_id in &secn_global_ids_vec { - secn_instances - .entry(*global_id) - .or_insert_with(|| self.create_secn_instance(*global_id)); - secn_instances[global_id].reset(); - if secn_instances[global_id].instance_type() == InstanceType::Instance { - let checkpoint = secn_instances[global_id].check_point(); - let chunks = match checkpoint { - CheckPoint::None => vec![], - CheckPoint::Single(chunk_id) => vec![chunk_id.as_usize()], - CheckPoint::Multiple(chunk_ids) => { - chunk_ids.iter().map(|id| id.as_usize()).collect() - } - }; - let (_, air_id) = - pctx.dctx_get_instance_info(*global_id).expect("Failed to get instance info"); - let mem_global_id = air_id == MEM_AIR_IDS[0] - || air_id == ROM_DATA_AIR_IDS[0] - || air_id == INPUT_DATA_AIR_IDS[0]; - pctx.dctx_set_chunks(*global_id, chunks, mem_global_id); - } - } + // Create secondary instances + self.registry.populate_secn_instances(&self.state, &secn_global_ids); - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat( - parent_stats_id, - stats_id, - "CONFIGURE_INSTANCES", - 0, - ExecutorStatsEvent::End, - ); + // Configure instance checkpoints using registry method + self.registry.configure_checkpoints(&pctx, &self.state, &secn_global_ids); - #[cfg(feature = "stats")] - self.stats.add_stat(0, parent_stats_id, "EXECUTE", 0, ExecutorStatsEvent::End); + // Reset hints stream + self.rom_executor.reset_hints_stream(); - // #[cfg(feature = "stats")] - // self.stats.lock().unwrap().store_stats(); + stats_end!(self.state.stats, &_config_scope); + stats_end!(self.state.stats, &_exec_scope); + + let tables_air_ids = + [SPECIFIED_RANGES_AIR_IDS[0], VIRTUAL_TABLE_0_AIR_IDS[0], VIRTUAL_TABLE_1_AIR_IDS[0]]; + for air_id in tables_air_ids { + let setup = sctx.get_setup(ZISK_AIRGROUP_ID, air_id)?; + let n_bits = setup.stark_info.stark_struct.n_bits; + let total_cols: u64 = setup + .stark_info + .map_sections_n + .iter() + .filter(|(key, _)| *key != "const") + .map(|(_, value)| *value) + .sum(); + let cost = (1 << n_bits) * total_cols; + cost_per_type.add_cost(StatsType::Tables, cost); + } + + let zisk_execution_time = ZiskExecutorTime { + execution_duration, + count_and_plan_duration, + count_and_plan_mo_duration, + total_duration: start_total.elapsed(), + asm_execution_duration: self.rom_executor.get_asm_execution_info(), + }; + // Store the execution result + let execution_result = + ZiskExecutorSummary::new(output.steps, zisk_execution_time, cost_per_type); + + // Store the execution result + self.state.set_execution_result(execution_result); Ok(()) } /// Computes the witness for the main and secondary state machines. - /// - /// # Arguments - /// * `stage` - The current stage id - /// * `pctx` - Proof context. - /// * `sctx` - Setup context. - /// * `global_ids` - Global IDs of the instances to compute witness for. fn calculate_witness( &self, stage: u32, @@ -1395,90 +305,18 @@ impl WitnessComponent for ZiskExecutor { return Ok(()); } - #[cfg(feature = "stats")] - let parent_stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat(0, parent_stats_id, "CALCULATE_WITNESS", 0, ExecutorStatsEvent::Begin); + stats_begin!(self.state.stats, 0, _witness_scope, "CALCULATE_WITNESS", 0); let pool = create_pool(n_cores); pool.install(|| -> ProofmanResult<()> { + let ctx = WitnessContext::new(&pctx, &sctx, &self.state, buffer_pool, &_witness_scope); for &global_id in global_ids { - let (airgroup_id, air_id) = - pctx.dctx_get_instance_info(global_id).expect("Failed to get instance info"); - - if MAIN_AIR_IDS.contains(&air_id) { - let main_instance = &self.main_instances.read().unwrap()[&global_id]; - - self.witness_main_instance( - &pctx, - main_instance, - buffer_pool.take_buffer(), - #[cfg(feature = "stats")] - parent_stats_id, - #[cfg(not(feature = "stats"))] - 0, - )?; - } else { - let secn_instance = &self.secn_instances.read().unwrap()[&global_id]; - - match secn_instance.instance_type() { - InstanceType::Instance => { - if !self.collectors_by_instance.read().unwrap().contains_key(&global_id) - { - if air_id == ROM_AIR_IDS[0] && self.asm_runner_path.is_some() { - let stats = Stats { - airgroup_id, - air_id, - collect_start_time: Instant::now(), - collect_duration: 0, - witness_start_time: Instant::now(), - witness_duration: 0, - num_chunks: 0, - }; - - self.collectors_by_instance - .write() - .unwrap() - .insert(global_id, Vec::new()); - self.stats.insert_witness_stats(global_id, stats); - } else { - let mut secn_instances = HashMap::new(); - secn_instances.insert(global_id, secn_instance); - self.witness_collect_instances(pctx.clone(), secn_instances); - } - } - self.witness_secn_instance( - &pctx, - &sctx, - global_id, - &**secn_instance, - buffer_pool.take_buffer(), - #[cfg(feature = "stats")] - parent_stats_id, - #[cfg(not(feature = "stats"))] - 0, - )?; - } - InstanceType::Table => self.witness_table( - &pctx, - &sctx, - global_id, - &**secn_instance, - Vec::new(), - #[cfg(feature = "stats")] - parent_stats_id, - #[cfg(not(feature = "stats"))] - 0, - )?, - } - } + self.orchestrator.compute_witness_for_instance(&ctx, global_id)?; } Ok(()) })?; - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat(0, parent_stats_id, "CALCULATE_WITNESS", 0, ExecutorStatsEvent::End); + stats_end!(self.state.stats, &_witness_scope); Ok(()) } @@ -1492,91 +330,22 @@ impl WitnessComponent for ZiskExecutor { n_cores: usize, _buffer_pool: &dyn BufferPool, ) -> ProofmanResult<()> { - #[cfg(feature = "stats")] - let parent_stats_id = self.stats.next_id(); - #[cfg(feature = "stats")] - self.stats.add_stat( - 0, - parent_stats_id, - "PRE_CALCULATE_WITNESS", - 0, - ExecutorStatsEvent::Begin, - ); + stats_begin!(self.state.stats, 0, _pre_scope, "PRE_CALCULATE_WITNESS", 0); if stage != 1 { return Ok(()); } - let secn_instances_guard = self.secn_instances.read().unwrap(); - - let mut secn_instances = HashMap::new(); - for &global_id in global_ids { - let (airgroup_id, air_id) = - pctx.dctx_get_instance_info(global_id).expect("Failed to get instance info"); - if MAIN_AIR_IDS.contains(&air_id) { - pctx.set_witness_ready(global_id, false); - } else if air_id == ROM_AIR_IDS[0] { - if self.asm_runner_path.is_some() { - pctx.set_witness_ready(global_id, false); - } else { - let secn_instance = &secn_instances_guard[&global_id]; - let rom_instance = - secn_instance.as_any().downcast_ref::().unwrap(); - if rom_instance.skip_collector() { - let stats = Stats { - airgroup_id, - air_id, - collect_start_time: Instant::now(), - collect_duration: 0, - witness_start_time: Instant::now(), - witness_duration: 0, - num_chunks: 0, - }; - - self.collectors_by_instance.write().unwrap().insert(global_id, Vec::new()); - self.stats.insert_witness_stats(global_id, stats); - pctx.set_witness_ready(global_id, true); - } else { - secn_instances.insert(global_id, secn_instance); - } - } - } else { - let secn_instance = &secn_instances_guard[&global_id]; - - if secn_instance.instance_type() == InstanceType::Instance - && !self.collectors_by_instance.read().unwrap().contains_key(&global_id) - { - secn_instances.insert(global_id, secn_instance); - } else { - pctx.set_witness_ready(global_id, true); - } - } - } let pool = create_pool(n_cores); - pool.install(|| { - if !secn_instances.is_empty() { - self.witness_collect_instances(pctx.clone(), secn_instances); - } - }); + let result = + pool.install(|| self.orchestrator.pre_calculate(&pctx, &self.state, global_ids)); + result?; - // Add to executor stats - #[cfg(feature = "stats")] - self.stats.add_stat( - 0, - parent_stats_id, - "PRE_CALCULATE_WITNESS", - 0, - ExecutorStatsEvent::End, - ); + stats_end!(self.state.stats, &_pre_scope); Ok(()) } /// Debugs the main and secondary state machines. - /// - /// # Arguments - /// * `pctx` - Proof context. - /// * `sctx` - Setup context. - /// * `global_ids` - Global IDs of the instances to debug. fn debug( &self, pctx: Arc>, @@ -1587,10 +356,10 @@ impl WitnessComponent for ZiskExecutor { let (_airgroup_id, air_id) = pctx.dctx_get_instance_info(global_id).expect("Failed to get instance info"); - if MAIN_AIR_IDS.contains(&air_id) { + if AirClassifier::is_main(air_id) { MainSM::debug(&pctx, &sctx); } else { - let secn_instances = self.secn_instances.read().unwrap(); + let secn_instances = self.state.secn_instances.read().unwrap(); let secn_instance = secn_instances.get(&global_id).expect("Instance not found"); secn_instance.debug(&pctx, &sctx); @@ -1598,28 +367,4 @@ impl WitnessComponent for ZiskExecutor { } Ok(()) } - - fn gen_custom_commits_fixed( - &self, - pctx: Arc>, - sctx: Arc>, - check: bool, - ) -> ProofmanResult<()> { - let file_name = pctx.get_custom_commits_fixed_buffer("rom", false)?; - - let setup = sctx.get_setup(RomRomTrace::::AIRGROUP_ID, RomRomTrace::::AIR_ID)?; - let blowup_factor = - 1 << (setup.stark_info.stark_struct.n_bits_ext - setup.stark_info.stark_struct.n_bits); - let arity = setup.stark_info.stark_struct.merkle_tree_arity; - - gen_elf_hash(&self.rom_path, file_name.as_path(), blowup_factor, arity, check).map_err( - |e| { - ProofmanError::ProofmanError(format!( - "Failed to generate custom commits fixed: {}", - e - )) - }, - )?; - Ok(()) - } } diff --git a/executor/src/lib.rs b/executor/src/lib.rs index 936a187d4..cc22d63ee 100644 --- a/executor/src/lib.rs +++ b/executor/src/lib.rs @@ -1,11 +1,146 @@ +mod air_classifier; +mod asm_resources; +mod collector; mod dummy_counter; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +mod emu_asm; +#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] +mod emu_asm_stub; +mod emu_rust; mod executor; +mod planner; +mod registry; +mod rom_executor; mod sm_static_bundle; +mod state; mod static_data_bus; mod static_data_bus_collect; +mod utils; +mod witness_generator; +mod witness_orchestrator; + +use air_classifier::*; +pub use asm_resources::*; +use collector::*; pub use dummy_counter::*; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +pub use emu_asm::*; +#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] +pub use emu_asm_stub::*; +pub use emu_rust::*; pub use executor::*; +use planner::*; +use registry::*; +use rom_executor::*; pub use sm_static_bundle::*; +pub use state::*; pub use static_data_bus::*; pub use static_data_bus_collect::*; +pub use utils::*; +use witness_generator::*; +use witness_orchestrator::*; +use zisk_core::ZiskRom; + +pub type DeviceMetricsList = Vec; +pub type NestedDeviceMetricsList = HashMap; + +use asm_runner::{AsmRunnerMO, AsmRunnerRH}; +use fields::PrimeField64; +use proofman_common::ProofCtx; +use std::{collections::HashMap, sync::Mutex, thread::JoinHandle}; +use zisk_common::{io::ZiskStdin, AsmExecutionInfo, EmuTrace, ExecutorStatsHandle, StatsScope}; + +pub type EmulatorResult = ( + Vec, + DeviceMetricsList, + NestedDeviceMetricsList, + Option>, + Option>, + u64, +); + +use anyhow::Result; +/// Trait for unified execution across different emulator backends +#[allow(clippy::too_many_arguments)] +#[allow(clippy::type_complexity)] +pub trait Emulator: Send + Sync { + /// Execute the emulator + fn execute( + &self, + zisk_rom: &ZiskRom, + stdin: &Mutex, + pctx: &ProofCtx, + sm_bundle: &StaticSMBundle, + use_hints: bool, + stats: &ExecutorStatsHandle, + caller_stats_scope: &StatsScope, + ) -> Result; +} + +/// Enum wrapper for different emulator backends (no heap allocation) +pub enum EmulatorKind { + Asm(EmulatorAsm), + Rust(EmulatorRust), +} + +impl EmulatorKind { + /// Check if this is an ASM emulator (non-generic, can be called without F) + pub fn is_asm_emulator(&self) -> bool { + matches!(self, Self::Asm(_)) + } + + pub fn get_chunk_size(&self) -> u64 { + match self { + Self::Asm(e) => e.get_chunk_size(), + Self::Rust(e) => e.get_chunk_size(), + } + } + + pub fn set_asm_resources(&self, asm_resources: AsmResources) { + match self { + Self::Asm(e) => e.set_asm_resources(asm_resources), + Self::Rust(_) => (), // No ASM resources in Rust emulator + }; + } + + pub fn get_asm_execution_info(&self) -> Option { + match self { + Self::Asm(e) => e.get_asm_execution_info(), + Self::Rust(_) => None, // No ASM execution info in Rust emulator + } + } + pub fn reset_hints_stream(&self) { + match self { + Self::Asm(e) => e.reset_hints_stream(), + Self::Rust(_) => (), // No hints stream in Rust emulator + } + } + + pub fn set_rh_data(&self, rh_data: AsmRunnerRH) { + match self { + Self::Asm(e) => e.set_rh_data(rh_data), + Self::Rust(_) => (), + } + } +} + +impl Emulator for EmulatorKind { + fn execute( + &self, + zisk_rom: &ZiskRom, + stdin: &Mutex, + pctx: &ProofCtx, + sm_bundle: &StaticSMBundle, + use_hints: bool, + stats: &ExecutorStatsHandle, + caller_stats_scope: &StatsScope, + ) -> Result { + match self { + Self::Asm(e) => { + e.execute(zisk_rom, stdin, pctx, sm_bundle, use_hints, stats, caller_stats_scope) + } + Self::Rust(e) => e.execute(zisk_rom, stdin, sm_bundle), + } + } +} diff --git a/executor/src/planner.rs b/executor/src/planner.rs new file mode 100644 index 000000000..9d8d856cf --- /dev/null +++ b/executor/src/planner.rs @@ -0,0 +1,156 @@ +//! Instance planning component. +//! +//! This module handles the planning and assignment of main and secondary +//! state machine instances to the proof context. + +use fields::PrimeField64; +use proofman_common::{ProofCtx, SetupCtx}; +use sm_main::MainPlanner; +use std::{collections::BTreeMap, sync::RwLock}; +use zisk_common::{EmuTrace, InstanceType, Plan}; +use zisk_pil::{MAIN_AIR_IDS, ZISK_AIRGROUP_ID}; + +use crate::AirClassifier; +use crate::{DeviceMetricsList, NestedDeviceMetricsList, StaticSMBundle}; + +/// Output from main planning. +pub struct MainPlanningOutput { + /// Plans for main instances. + pub plans: Vec, + /// Public values extracted during planning. + pub public_values: Vec<(u64, u32)>, +} + +/// Component responsible for instance planning. +/// +/// Handles the strategic planning of main and secondary state machine +/// instances based on execution metrics. Planning determines: +/// - How many instances of each state machine type are needed +/// - How work is distributed across instances +/// - Global ID assignments for proof context registration +pub struct InstancePlanner { + /// Chunk size for dividing execution into manageable pieces. + chunk_size: u64, +} + +impl InstancePlanner { + /// Creates a new `InstancePlanner`. + /// + /// # Arguments + /// * `chunk_size` - The chunk size for processing. + pub fn new(chunk_size: u64) -> Self { + Self { chunk_size } + } + + /// Plans main state machine instances. + /// + /// # Arguments + /// * `min_traces` - Minimal traces from execution. + /// * `main_count` - Device metrics for main instances. + /// + /// # Returns + /// Planning output with plans and public values. + pub fn plan_main( + &self, + min_traces: &[EmuTrace], + main_count: DeviceMetricsList, + ) -> MainPlanningOutput { + let (plans, public_values) = + MainPlanner::plan::(min_traces, main_count, self.chunk_size); + MainPlanningOutput { plans, public_values } + } + + /// Plans secondary state machine instances. + /// + /// # Arguments + /// * `sm_bundle` - State machine bundle. + /// * `secn_count` - Device metrics for secondary instances. + /// + /// # Returns + /// BTreeMap of SM type ID to plans. + pub fn plan_secondary( + &self, + sm_bundle: &StaticSMBundle, + secn_count: &mut NestedDeviceMetricsList, + ) -> BTreeMap> { + sm_bundle.plan_sec(secn_count) + } + + /// Assigns main instances to the proof context. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `global_ids` - Lock for storing assigned global IDs. + /// * `plans` - Plans to assign. + /// + /// # Returns + /// Vector of (global_id, plan) pairs for instance creation. + pub fn assign_main_instances( + &self, + pctx: &ProofCtx, + sctx: &SetupCtx, + global_ids: &RwLock>, + plans: Vec, + ) -> (Vec<(usize, Plan)>, u64) { + let mut assignments = Vec::with_capacity(plans.len()); + + let setup_main = sctx.get_setup(ZISK_AIRGROUP_ID, MAIN_AIR_IDS[0]).unwrap(); + let n_bits = setup_main.stark_info.stark_struct.n_bits; + let total_cols: u64 = setup_main + .stark_info + .map_sections_n + .iter() + .filter(|(key, _)| *key != "const") + .map(|(_, value)| *value) + .sum(); + let cost = (1 << n_bits) * total_cols; + let total_cost = cost * plans.len() as u64; + + for mut plan in plans { + let global_id = pctx + .add_instance_assign(plan.airgroup_id, plan.air_id) + .expect("Failed to add instance"); + plan.set_global_id(global_id); + global_ids.write().unwrap().push(global_id); + assignments.push((global_id, plan)); + } + + (assignments, total_cost) + } + + /// Assigns secondary instances to the proof context. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `global_ids` - Lock for storing assigned global IDs. + /// * `plans` - Plans to assign (will be mutated with global IDs). + pub fn assign_secn_instances( + &self, + pctx: &ProofCtx, + global_ids: &RwLock>, + plans: &mut [Plan], + ) { + for plan in plans.iter_mut() { + // ROM instances need special first partition assignment + let global_id = if AirClassifier::is_rom_instance(plan.airgroup_id, plan.air_id) { + pctx.add_instance_assign_first_process(plan.airgroup_id, plan.air_id) + .expect("Failed to add ROM instance") + } else if AirClassifier::is_keccakf_instance(plan.airgroup_id, plan.air_id) { + pctx.add_instance_assign(plan.airgroup_id, plan.air_id) + .expect("Failed to add KeccakF instance") + } else { + match plan.instance_type { + InstanceType::Instance => pctx + .add_instance(plan.airgroup_id, plan.air_id) + .expect("Failed to add instance"), + InstanceType::Table => { + pctx.add_table(plan.airgroup_id, plan.air_id).expect("Failed to add table") + } + } + }; + + global_ids.write().unwrap().push(global_id); + plan.set_global_id(global_id); + } + } +} diff --git a/executor/src/registry.rs b/executor/src/registry.rs new file mode 100644 index 000000000..21cf6dcfc --- /dev/null +++ b/executor/src/registry.rs @@ -0,0 +1,161 @@ +//! Instance registry component. +//! +//! This module handles the creation and lifecycle management of main and secondary +//! state machine instances. + +use fields::PrimeField64; +use proofman_common::{ProofCtx, ProofmanResult}; +use sm_main::MainInstance; +use std::sync::Arc; +use zisk_common::{CheckPoint, Instance, InstanceCtx, InstanceType, Plan}; + +use crate::AirClassifier; +use crate::{state::ExecutionState, StaticSMBundle}; + +pub struct InstanceRegistry { + /// State machine bundle for secondary instance creation. + sm_bundle: Arc>, +} + +impl InstanceRegistry { + /// Creates a new `InstanceRegistry`. + /// + /// # Arguments + /// * `sm_bundle` - State machine bundle. + pub fn new(sm_bundle: Arc>) -> Self { + Self { sm_bundle } + } + + /// Creates a main state machine instance. + /// + /// # Arguments + /// * `plan` - The plan for the instance. + /// * `global_id` - The global ID assigned to this instance. + pub fn create_main_instance(&self, plan: Plan, global_id: usize) -> MainInstance { + MainInstance::new(InstanceCtx::new(global_id, plan), self.sm_bundle.get_std()) + } + + /// Creates a secondary state machine instance. + /// + /// # Arguments + /// * `plan` - The plan for the instance. + /// * `global_id` - The global ID assigned to this instance. + pub fn create_secn_instance(&self, plan: Plan, global_id: usize) -> Box> { + let ictx = InstanceCtx::new(global_id, plan); + self.sm_bundle.build_instance(ictx) + } + + /// Creates a secondary instance by looking up the plan in execution state. + /// + /// # Arguments + /// * `state` - The execution state containing the plans. + /// * `global_id` - The global ID to look up. + pub fn create_secn_instance_from_state( + &self, + state: &ExecutionState, + global_id: usize, + ) -> Box> { + let mut secn_planning_guard = state.secn_planning.write().unwrap(); + + // Find and remove in single operation using swap_remove for O(1) removal + let plan = secn_planning_guard + .iter() + .position(|plan| plan.global_id == Some(global_id)) + .map(|idx| secn_planning_guard.swap_remove(idx)) + .unwrap_or_else(|| panic!("Secondary instance not found for global_id: {}", global_id)); + + self.create_secn_instance(plan, global_id) + } + + /// Populates main instances in the execution state. + /// + /// # Arguments + /// * `state` - The execution state to populate. + /// * `assignments` - Vector of (global_id, plan) pairs. + pub fn populate_main_instances( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + assignments: Vec<(usize, Plan)>, + ) -> ProofmanResult<()> { + let mut main_instances = state.main_instances.write().unwrap(); + for (global_id, plan) in assignments { + main_instances + .entry(global_id) + .or_insert_with(|| self.create_main_instance(plan, global_id)); + + let is_mine = pctx.dctx_is_my_process_instance(global_id)?; + if is_mine { + pctx.set_witness_ready(global_id, false); + } + } + Ok(()) + } + + /// Populates secondary instances in the execution state. + /// + /// # Arguments + /// * `state` - The execution state to populate. + /// * `global_ids` - Vector of global IDs for instances to create. + pub fn populate_secn_instances(&self, state: &ExecutionState, global_ids: &[usize]) { + let mut secn_instances = state.secn_instances.write().unwrap(); + for &global_id in global_ids { + secn_instances + .entry(global_id) + .or_insert_with(|| self.create_secn_instance_from_state(state, global_id)); + } + } + + /// Gets a reference to the state machine bundle. + pub fn sm_bundle(&self) -> &StaticSMBundle { + &self.sm_bundle + } + + /// Configures secondary state machine instances based on planning. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `plannings` - Map of SM ID to plans. + pub fn configure_sm_instances( + &self, + pctx: &ProofCtx, + plannings: &std::collections::BTreeMap>, + ) { + self.sm_bundle.configure_instances(pctx, plannings); + } + + /// Configures checkpoints for secondary instances. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `state` - Execution state containing the instances. + /// * `global_ids` - Global IDs of secondary instances to configure. + pub fn configure_checkpoints( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + global_ids: &[usize], + ) { + let secn_instances = state.secn_instances.read().unwrap(); + + for &global_id in global_ids { + secn_instances[&global_id].reset(); + + if secn_instances[&global_id].instance_type() == InstanceType::Instance { + let checkpoint = secn_instances[&global_id].check_point(); + let chunks = match checkpoint { + CheckPoint::None => vec![], + CheckPoint::Single(chunk_id) => vec![chunk_id.as_usize()], + CheckPoint::Multiple(chunk_ids) => { + chunk_ids.iter().map(|id| id.as_usize()).collect() + } + }; + + let (_, air_id) = + pctx.dctx_get_instance_info(global_id).expect("Failed to get instance info"); + let is_memory_related = AirClassifier::is_memory_related(air_id); + pctx.dctx_set_chunks(global_id, chunks, is_memory_related); + } + } + } +} diff --git a/executor/src/rom_executor.rs b/executor/src/rom_executor.rs new file mode 100644 index 000000000..de6ca38d5 --- /dev/null +++ b/executor/src/rom_executor.rs @@ -0,0 +1,109 @@ +//! ROM executor +//! +//! This module handles the execution of a ZisK ROM program, coordinating +//! the emulator backend and hints stream processing. + +use crate::{ + AsmResources, DeviceMetricsList, Emulator, EmulatorKind, NestedDeviceMetricsList, + StaticSMBundle, +}; +use asm_runner::{AsmRunnerMO, AsmRunnerRH}; +use fields::PrimeField64; +use proofman_common::ProofCtx; +use std::{sync::Mutex, thread::JoinHandle}; +use zisk_common::{io::ZiskStdin, AsmExecutionInfo, EmuTrace, ExecutorStatsHandle, StatsScope}; +use zisk_core::ZiskRom; + +use anyhow::Result; + +/// Output from ROM execution. +pub struct RomExecutionOutput { + /// Minimal traces collected during execution. + pub min_traces: Vec, + /// Device metrics for main state machines. + pub main_count: DeviceMetricsList, + /// Device metrics for secondary state machines. + pub secn_count: NestedDeviceMetricsList, + /// Handle to memory operations thread (for ASM emulator). + pub handle_mo: Option>, + /// Handle to hints runner thread (for ASM emulator). + pub handle_rh: Option>, + /// Execution result with step counts. + pub steps: u64, +} + +pub struct RomExecutor { + /// The emulator backend used for execution. + emulator: EmulatorKind, + + /// Standard input for the ZisK program execution. + stdin: Mutex, +} + +impl RomExecutor { + /// Creates a new `RomExecutor`. + /// + /// # Arguments + /// * `emulator` - The emulator backend to use. + /// * `hints_stream` - Optional hints stream for precompile processing. + pub fn new(emulator: EmulatorKind) -> Self { + Self { emulator, stdin: Mutex::new(ZiskStdin::null()) } + } + + /// Sets the standard input for execution. + pub fn set_stdin(&self, stdin: ZiskStdin) { + *self.stdin.lock().unwrap() = stdin; + } + + pub fn set_asm_resources(&self, asm_resources: AsmResources) { + self.emulator.set_asm_resources(asm_resources); + } + + /// Resets the hints stream if configured. + pub fn reset_hints_stream(&self) { + self.emulator.reset_hints_stream() + } + + pub fn get_asm_execution_info(&self) -> Option { + self.emulator.get_asm_execution_info() + } + + pub fn set_rh_data(&self, rh_data: AsmRunnerRH) { + self.emulator.set_rh_data(rh_data); + } + + /// Executes the ROM program and collects minimal traces. + /// + /// # Arguments + /// * `zisk_rom` - The ROM to execute. + /// * `pctx` - Proof context. + /// * `sm_bundle` - State machine bundle. + /// * `use_hints` - Flag to indicate whether to use hints. + /// * `stats` - Statistics handle. + /// * `caller_stats_scope` - Parent statistics scope. + /// + /// # Returns + /// Execution output containing traces, metrics, and results. + pub fn execute( + &self, + zisk_rom: &ZiskRom, + pctx: &ProofCtx, + sm_bundle: &StaticSMBundle, + use_hints: bool, + stats: &ExecutorStatsHandle, + caller_stats_scope: &StatsScope, + ) -> Result { + let (min_traces, main_count, secn_count, handle_mo, handle_rh, steps) = + self.emulator.execute( + zisk_rom, + &self.stdin, + pctx, + sm_bundle, + use_hints, + stats, + caller_stats_scope, + )?; + + Ok(RomExecutionOutput { min_traces, main_count, secn_count, handle_mo, handle_rh, steps }) + } +} diff --git a/executor/src/sm_static_bundle.rs b/executor/src/sm_static_bundle.rs index 3aa8edb76..32f07db8c 100644 --- a/executor/src/sm_static_bundle.rs +++ b/executor/src/sm_static_bundle.rs @@ -1,14 +1,21 @@ use std::sync::Arc; -use crate::NestedDeviceMetricsList; -use crate::StaticDataBusCollect; +use crate::{NestedDeviceMetricsList, StaticDataBusCollect}; use data_bus::DataBusTrait; use fields::PrimeField64; +use pil_std_lib::Std; use precomp_arith_eq::{ArithEqInstance, ArithEqManager}; use precomp_arith_eq_384::ArithEq384Instance; use precomp_arith_eq_384::ArithEq384Manager; use precomp_big_int::{Add256Instance, Add256Manager}; +use precomp_blake2::{Blake2Instance, Blake2Manager}; +use precomp_dma::Dma64AlignedInstance; +use precomp_dma::DmaInstance; +use precomp_dma::DmaManager; +use precomp_dma::DmaPrePostInstance; +use precomp_dma::DmaUnalignedInstance; use precomp_keccakf::{KeccakfInstance, KeccakfManager}; +use precomp_poseidon2::{Poseidon2Instance, Poseidon2Manager}; use precomp_sha256f::{Sha256fInstance, Sha256fManager}; use proofman_common::ProofCtx; use sm_arith::{ArithFullInstance, ArithSM}; @@ -21,14 +28,27 @@ use sm_rom::{RomInstance, RomSM}; use std::collections::{BTreeMap, HashMap}; use zisk_common::{BusDeviceMetrics, ChunkId, ComponentBuilder, Instance, InstanceCtx, Plan}; use zisk_pil::ADD_256_AIR_IDS; +use zisk_pil::DMA_64_ALIGNED_AIR_IDS; +use zisk_pil::DMA_64_ALIGNED_INPUT_CPY_AIR_IDS; +use zisk_pil::DMA_64_ALIGNED_MEM_AIR_IDS; +use zisk_pil::DMA_64_ALIGNED_MEM_CPY_AIR_IDS; +use zisk_pil::DMA_64_ALIGNED_MEM_SET_AIR_IDS; +use zisk_pil::DMA_AIR_IDS; +use zisk_pil::DMA_INPUT_CPY_AIR_IDS; +use zisk_pil::DMA_MEM_CPY_AIR_IDS; +use zisk_pil::DMA_PRE_POST_AIR_IDS; +use zisk_pil::DMA_PRE_POST_INPUT_CPY_AIR_IDS; +use zisk_pil::DMA_PRE_POST_MEM_CPY_AIR_IDS; +use zisk_pil::DMA_UNALIGNED_AIR_IDS; use zisk_pil::{ ARITH_AIR_IDS, ARITH_EQ_384_AIR_IDS, ARITH_EQ_AIR_IDS, BINARY_ADD_AIR_IDS, BINARY_AIR_IDS, - BINARY_EXTENSION_AIR_IDS, INPUT_DATA_AIR_IDS, KECCAKF_AIR_IDS, MEM_AIR_IDS, MEM_ALIGN_AIR_IDS, - MEM_ALIGN_BYTE_AIR_IDS, MEM_ALIGN_READ_BYTE_AIR_IDS, MEM_ALIGN_WRITE_BYTE_AIR_IDS, ROM_AIR_IDS, - ROM_DATA_AIR_IDS, SHA_256_F_AIR_IDS, ZISK_AIRGROUP_ID, + BINARY_EXTENSION_AIR_IDS, BLAKE_2_BR_AIR_IDS, INPUT_DATA_AIR_IDS, KECCAKF_AIR_IDS, MEM_AIR_IDS, + MEM_ALIGN_AIR_IDS, MEM_ALIGN_BYTE_AIR_IDS, MEM_ALIGN_READ_BYTE_AIR_IDS, + MEM_ALIGN_WRITE_BYTE_AIR_IDS, POSEIDON_2_AIR_IDS, ROM_AIR_IDS, ROM_DATA_AIR_IDS, + SHA_256_F_AIR_IDS, ZISK_AIRGROUP_ID, }; -use crate::StaticDataBus; +use crate::{StaticDataBus, ZiskRom}; use rayon::prelude::*; type SMAirType = Vec<(usize, usize)>; @@ -41,9 +61,12 @@ pub enum StateMachines { ArithSM(Arc>), KeccakfManager(Arc>), Sha256fManager(Arc>), + Poseidon2Manager(Arc>), + Blake2Manager(Arc>), ArithEqManager(Arc>), ArithEq384Manager(Arc>), Add256Manager(Arc>), + DmaManager(Arc>), } impl StateMachines { @@ -55,9 +78,12 @@ impl StateMachines { StateMachines::ArithSM(_) => 3, StateMachines::KeccakfManager(_) => 4, StateMachines::Sha256fManager(_) => 5, - StateMachines::ArithEqManager(_) => 6, - StateMachines::ArithEq384Manager(_) => 7, - StateMachines::Add256Manager(_) => 8, + StateMachines::Poseidon2Manager(_) => 6, + StateMachines::Blake2Manager(_) => 7, + StateMachines::ArithEqManager(_) => 8, + StateMachines::ArithEq384Manager(_) => 9, + StateMachines::Add256Manager(_) => 10, + StateMachines::DmaManager(_) => 11, } } @@ -75,9 +101,12 @@ impl StateMachines { StateMachines::ArithSM(sm) => (**sm).build_planner(), StateMachines::KeccakfManager(sm) => (**sm).build_planner(), StateMachines::Sha256fManager(sm) => (**sm).build_planner(), + StateMachines::Poseidon2Manager(sm) => (**sm).build_planner(), + StateMachines::Blake2Manager(sm) => (**sm).build_planner(), StateMachines::ArithEqManager(sm) => (**sm).build_planner(), StateMachines::ArithEq384Manager(sm) => (**sm).build_planner(), StateMachines::Add256Manager(sm) => (**sm).build_planner(), + StateMachines::DmaManager(sm) => (**sm).build_planner(), } } @@ -91,9 +120,12 @@ impl StateMachines { StateMachines::ArithSM(sm) => (**sm).configure_instances(pctx, plans), StateMachines::KeccakfManager(sm) => (**sm).configure_instances(pctx, plans), StateMachines::Sha256fManager(sm) => (**sm).configure_instances(pctx, plans), + StateMachines::Poseidon2Manager(sm) => (**sm).configure_instances(pctx, plans), + StateMachines::Blake2Manager(sm) => (**sm).configure_instances(pctx, plans), StateMachines::ArithEqManager(sm) => (**sm).configure_instances(pctx, plans), StateMachines::ArithEq384Manager(sm) => (**sm).configure_instances(pctx, plans), StateMachines::Add256Manager(sm) => (**sm).configure_instances(pctx, plans), + StateMachines::DmaManager(sm) => (**sm).configure_instances(pctx, plans), } } @@ -105,9 +137,12 @@ impl StateMachines { StateMachines::ArithSM(sm) => (**sm).build_instance(ictx), StateMachines::KeccakfManager(sm) => (**sm).build_instance(ictx), StateMachines::Sha256fManager(sm) => (**sm).build_instance(ictx), + StateMachines::Poseidon2Manager(sm) => (**sm).build_instance(ictx), + StateMachines::Blake2Manager(sm) => (**sm).build_instance(ictx), StateMachines::ArithEqManager(sm) => (**sm).build_instance(ictx), StateMachines::ArithEq384Manager(sm) => (**sm).build_instance(ictx), StateMachines::Add256Manager(sm) => (**sm).build_instance(ictx), + StateMachines::DmaManager(sm) => (**sm).build_instance(ictx), } } } @@ -115,19 +150,37 @@ impl StateMachines { pub struct StaticSMBundle { process_only_operation_bus: bool, sm: BTreeMap>, + std: Arc>, } impl StaticSMBundle { #[allow(clippy::too_many_arguments)] - pub fn new(process_only_operation_bus: bool, sm: Vec<(SMAirType, StateMachines)>) -> Self { + pub fn new( + process_only_operation_bus: bool, + std: Arc>, + sm: Vec<(SMAirType, StateMachines)>, + ) -> Self { Self { process_only_operation_bus, sm: BTreeMap::from_iter( sm.into_iter().map(|(air_ids, sm)| (sm.type_id(), (air_ids, sm))), ), + std, } } + pub fn set_rom(&self, zisk_rom: Arc) { + for (_, sm) in self.sm.values() { + if let StateMachines::RomSM(rom_sm) = sm { + rom_sm.set_rom(zisk_rom.clone()); + } + } + } + + pub fn get_std(&self) -> Arc> { + self.std.clone() + } + pub fn get_mem_sm_id(&self) -> usize { 1 } @@ -184,9 +237,12 @@ impl StaticSMBundle { let mut arith_counter = None; let mut keccakf_counter = None; let mut sha256f_counter = None; + let mut poseidon2_counter = None; + let mut blake2_counter = None; let mut arith_eq_counter = None; let mut arith_eq_384_counter = None; let mut add256_counter = None; + let mut dma_counter = None; for (_, sm) in self.sm.values() { match sm { @@ -204,20 +260,52 @@ impl StaticSMBundle { arith_counter = Some((sm.type_id(), arith_sm.build_arith_counter())); } StateMachines::KeccakfManager(keccak_sm) => { - keccakf_counter = Some((sm.type_id(), keccak_sm.build_keccakf_counter())); + keccakf_counter = Some(( + sm.type_id(), + keccak_sm.build_keccakf_counter(self.process_only_operation_bus), + )); } StateMachines::Sha256fManager(sha256_sm) => { - sha256f_counter = Some((sm.type_id(), sha256_sm.build_sha256f_counter())); + sha256f_counter = Some(( + sm.type_id(), + sha256_sm.build_sha256f_counter(self.process_only_operation_bus), + )); + } + StateMachines::Poseidon2Manager(poseidon2_sm) => { + poseidon2_counter = Some(( + sm.type_id(), + poseidon2_sm.build_poseidon2_counter(self.process_only_operation_bus), + )); + } + StateMachines::Blake2Manager(blake2_sm) => { + blake2_counter = Some(( + sm.type_id(), + blake2_sm.build_blake2_counter(self.process_only_operation_bus), + )); } StateMachines::ArithEqManager(arith_eq_sm) => { - arith_eq_counter = Some((sm.type_id(), arith_eq_sm.build_arith_eq_counter())); + arith_eq_counter = Some(( + sm.type_id(), + arith_eq_sm.build_arith_eq_counter(self.process_only_operation_bus), + )); } StateMachines::ArithEq384Manager(arith_eq_384_sm) => { - arith_eq_384_counter = - Some((sm.type_id(), arith_eq_384_sm.build_arith_eq_384_counter())); + arith_eq_384_counter = Some(( + sm.type_id(), + arith_eq_384_sm.build_arith_eq_384_counter(self.process_only_operation_bus), + )); } StateMachines::Add256Manager(add256_sm) => { - add256_counter = Some((sm.type_id(), add256_sm.build_add256_counter())); + add256_counter = Some(( + sm.type_id(), + add256_sm.build_add256_counter(self.process_only_operation_bus), + )); + } + StateMachines::DmaManager(dma_sm) => { + dma_counter = Some(( + sm.type_id(), + dma_sm.build_dma_counter(self.process_only_operation_bus), + )); } StateMachines::RomSM(_) => {} } @@ -230,9 +318,12 @@ impl StaticSMBundle { arith_counter.expect("Arith counter not found"), keccakf_counter.expect("Keccakf counter not found"), sha256f_counter.expect("Sha256f counter not found"), + poseidon2_counter.expect("Poseidon2 counter not found"), + blake2_counter.expect("Blake2 counter not found"), arith_eq_counter.expect("ArithEq counter not found"), arith_eq_384_counter.expect("ArithEq384 counter not found"), add256_counter.expect("Add256 counter not found"), + dma_counter.expect("Dma counter not found"), Some(0), ) } @@ -243,7 +334,7 @@ impl StaticSMBundle { pctx: &ProofCtx, secn_instances: &HashMap>>, chunks_to_execute: &[Vec], - ) -> Vec>> { + ) -> Vec>> { chunks_to_execute .par_iter() .enumerate() @@ -260,10 +351,16 @@ impl StaticSMBundle { let mut arith_collectors = Vec::new(); let mut keccakf_collectors = Vec::new(); let mut sha256f_collectors = Vec::new(); + let mut poseidon2_collectors = Vec::new(); + let mut blake2_collectors = Vec::new(); let mut arith_eq_collectors = Vec::new(); let mut arith_eq_384_collectors = Vec::new(); let mut add256_collectors = Vec::new(); let mut rom_collectors = Vec::new(); + let mut dma_collectors = Vec::new(); + let mut dma_pre_post_collectors = Vec::new(); + let mut dma_64_aligned_collectors = Vec::new(); + let mut dma_unaligned_collectors = Vec::new(); for global_idx in global_idxs { let secn_instance = secn_instances.get(global_idx).unwrap(); @@ -374,6 +471,22 @@ impl StaticSMBundle { sha256f_instance.build_sha256f_collector(ChunkId(chunk_id)); sha256f_collectors.push((*global_idx, sha256f_collector)); } + air_id if air_id == POSEIDON_2_AIR_IDS[0] => { + let poseidon2_instance = secn_instance + .as_any() + .downcast_ref::>() + .unwrap(); + let poseidon2_collector = + poseidon2_instance.build_poseidon2_collector(ChunkId(chunk_id)); + poseidon2_collectors.push((*global_idx, poseidon2_collector)); + } + air_id if air_id == BLAKE_2_BR_AIR_IDS[0] => { + let blake2_instance = + secn_instance.as_any().downcast_ref::>().unwrap(); + let blake2_collector = + blake2_instance.build_blake2_collector(ChunkId(chunk_id)); + blake2_collectors.push((*global_idx, blake2_collector)); + } air_id if air_id == ARITH_EQ_AIR_IDS[0] => { let arith_eq_instance = secn_instance .as_any() @@ -399,6 +512,54 @@ impl StaticSMBundle { add256_instance.build_add256_collector(ChunkId(chunk_id)); add256_collectors.push((*global_idx, add256_collector)); } + // DMA AIRS + air_id + if air_id == DMA_AIR_IDS[0] + || air_id == DMA_MEM_CPY_AIR_IDS[0] + || air_id == DMA_INPUT_CPY_AIR_IDS[0] => + { + let dma_instance = + secn_instance.as_any().downcast_ref::>().unwrap(); + let dma_collector = dma_instance.build_dma_collector(ChunkId(chunk_id)); + dma_collectors.push((*global_idx, dma_collector)); + } + air_id + if air_id == DMA_PRE_POST_AIR_IDS[0] + || air_id == DMA_PRE_POST_MEM_CPY_AIR_IDS[0] + || air_id == DMA_PRE_POST_INPUT_CPY_AIR_IDS[0] => + { + let dma_pre_post_instance = secn_instance + .as_any() + .downcast_ref::>() + .unwrap(); + let dma_pre_post_collector = + dma_pre_post_instance.build_dma_collector(ChunkId(chunk_id)); + dma_pre_post_collectors.push((*global_idx, dma_pre_post_collector)); + } + air_id + if air_id == DMA_64_ALIGNED_AIR_IDS[0] + || air_id == DMA_64_ALIGNED_MEM_CPY_AIR_IDS[0] + || air_id == DMA_64_ALIGNED_INPUT_CPY_AIR_IDS[0] + || air_id == DMA_64_ALIGNED_MEM_SET_AIR_IDS[0] + || air_id == DMA_64_ALIGNED_MEM_AIR_IDS[0] => + { + let dma_64_aligned_instance = secn_instance + .as_any() + .downcast_ref::>() + .unwrap(); + let dma_64_aligned_collector = + dma_64_aligned_instance.build_dma_collector(ChunkId(chunk_id)); + dma_64_aligned_collectors.push((*global_idx, dma_64_aligned_collector)); + } + air_id if air_id == DMA_UNALIGNED_AIR_IDS[0] => { + let dma_unaligned_instance = secn_instance + .as_any() + .downcast_ref::>() + .unwrap(); + let dma_unaligned_collector = + dma_unaligned_instance.build_dma_collector(ChunkId(chunk_id)); + dma_unaligned_collectors.push((*global_idx, dma_unaligned_collector)); + } air_id if air_id == ROM_AIR_IDS[0] => { let rom_instance = secn_instance.as_any().downcast_ref::().unwrap(); @@ -417,8 +578,11 @@ impl StaticSMBundle { let mut arith_eq_384_inputs_generator = None; let mut keccakf_inputs_generator = None; let mut sha256f_inputs_generator = None; + let mut poseidon2_inputs_generator = None; + let mut blake2_inputs_generator = None; let mut arith_inputs_generator = None; let mut add256_inputs_generator = None; + let mut dma_inputs_generator = None; for (_, sm) in self.sm.values() { match sm { StateMachines::ArithSM(arith_sm) => { @@ -432,6 +596,14 @@ impl StaticSMBundle { sha256f_inputs_generator = Some(sha256_sm.build_sha256f_input_generator()); } + StateMachines::Poseidon2Manager(poseidon2_sm) => { + poseidon2_inputs_generator = + Some(poseidon2_sm.build_poseidon2_input_generator()); + } + StateMachines::Blake2Manager(blake2_sm) => { + blake2_inputs_generator = + Some(blake2_sm.build_blake2_input_generator()); + } StateMachines::ArithEqManager(arith_eq_sm) => { arith_eq_inputs_generator = Some(arith_eq_sm.build_arith_eq_input_generator()); @@ -444,6 +616,9 @@ impl StaticSMBundle { add256_inputs_generator = Some(add256_sm.build_add256_input_generator()); } + StateMachines::DmaManager(dma_sm) => { + dma_inputs_generator = Some(dma_sm.build_dma_input_generator()); + } _ => {} } } @@ -457,16 +632,25 @@ impl StaticSMBundle { arith_collectors, keccakf_collectors, sha256f_collectors, + poseidon2_collectors, + blake2_collectors, arith_eq_collectors, arith_eq_384_collectors, add256_collectors, + dma_collectors, + dma_pre_post_collectors, + dma_64_aligned_collectors, + dma_unaligned_collectors, rom_collectors, arith_eq_inputs_generator.expect("ArithEq input generator not found"), arith_eq_384_inputs_generator.expect("ArithEq384 input generator not found"), keccakf_inputs_generator.expect("KeccakF input generator not found"), sha256f_inputs_generator.expect("SHA256F input generator not found"), + poseidon2_inputs_generator.expect("Poseidon2 input generator not found"), + blake2_inputs_generator.expect("Blake2 input generator not found"), arith_inputs_generator.expect("Arith input generator not found"), add256_inputs_generator.expect("Add256 input generator not found"), + dma_inputs_generator.expect("Dma input generator not found"), ); Some(data_bus) diff --git a/executor/src/state.rs b/executor/src/state.rs new file mode 100644 index 000000000..b3931be41 --- /dev/null +++ b/executor/src/state.rs @@ -0,0 +1,127 @@ +//! Shared execution state for the ZisK executor components. + +use anyhow::Result; +use fields::PrimeField64; +use sm_main::MainInstance; +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, RwLock, + }, +}; +use zisk_common::{BusDevice, EmuTrace, ExecutorStatsHandle, Instance, Plan, ZiskExecutorSummary}; +use zisk_core::ZiskRom; + +/// Type alias for chunk collectors: (chunk_id, collector) +pub type ChunkCollector = (usize, Box>); + +pub struct ExecutionState { + /// ZisK ROM (ELF), can be changed between executions. + pub zisk_rom: RwLock>>, + + /// Planning information for main state machines (minimal traces from emulation). + pub min_traces: Arc>>>, + + /// Planning information for secondary state machines. + pub secn_planning: RwLock>, + + /// Main state machine instances, indexed by their global ID. + pub main_instances: RwLock>>, + + /// Secondary state machine instances, indexed by their global ID. + pub secn_instances: RwLock>>>, + + /// Collectors by instance, storing statistics and collectors for each instance. + pub collectors_by_instance: Arc>>>>, + + /// Execution result, including the number of executed steps. + pub execution_result: Mutex, + + /// Statistics collected during the execution. + pub stats: ExecutorStatsHandle, + + /// Flag to indicate if the ROM has been initialized + pub is_rom_initialized: AtomicBool, + + /// Flag to indicate whether to use hints during execution + pub use_hints: AtomicBool, +} + +impl ExecutionState { + /// Creates a new `ExecutionState` with default values. + pub fn new() -> Self { + Self { + zisk_rom: RwLock::new(None), + min_traces: Arc::new(RwLock::new(None)), + secn_planning: RwLock::new(Vec::new()), + main_instances: RwLock::new(HashMap::new()), + secn_instances: RwLock::new(HashMap::new()), + collectors_by_instance: Arc::new(RwLock::new(HashMap::new())), + execution_result: Mutex::new(ZiskExecutorSummary::default()), + stats: ExecutorStatsHandle::new(), + is_rom_initialized: AtomicBool::new(false), + use_hints: AtomicBool::new(false), + } + } + + /// Sets the ZisK ROM for execution. + /// + /// This can be called between executions to change the ROM/ELF + /// without recreating the executor. + pub fn set_rom(&self, rom: Arc, use_hints: bool) { + *self.zisk_rom.write().unwrap() = Some(rom); + self.is_rom_initialized.store(true, Ordering::SeqCst); + self.use_hints.store(use_hints, Ordering::SeqCst); + } + + /// Gets the current ZisK ROM. + /// + /// # Panics + /// Panics if no ROM has been set. + pub fn get_rom(&self) -> Result> { + if !self.is_rom_initialized.load(Ordering::SeqCst) { + return Err(anyhow::anyhow!("ROM not initialized. Call set_rom() before get_rom()")); + } + + Ok(self + .zisk_rom + .read() + .unwrap() + .as_ref() + .expect("ROM not set. Call set_rom() before execute()") + .clone()) + } + + /// Resets all internal state to default values. + pub fn reset(&self) { + *self.execution_result.lock().unwrap() = ZiskExecutorSummary::default(); + *self.min_traces.write().unwrap() = None; + *self.secn_planning.write().unwrap() = Vec::new(); + self.main_instances.write().unwrap().clear(); + self.secn_instances.write().unwrap().clear(); + self.collectors_by_instance.write().unwrap().clear(); + self.stats.reset(); + } + + /// Gets a clone of the execution result. + pub fn get_execution_result(&self) -> ZiskExecutorSummary { + self.execution_result.lock().unwrap().clone() + } + + /// Sets the execution result. + pub fn set_execution_result(&self, result: ZiskExecutorSummary) { + *self.execution_result.lock().unwrap() = result; + } + + /// Gets a clone of the stats handle. + pub fn get_stats(&self) -> ExecutorStatsHandle { + self.stats.clone() + } +} + +impl Default for ExecutionState { + fn default() -> Self { + Self::new() + } +} diff --git a/executor/src/static_data_bus.rs b/executor/src/static_data_bus.rs index 92a893f75..de495aa6e 100644 --- a/executor/src/static_data_bus.rs +++ b/executor/src/static_data_bus.rs @@ -10,16 +10,20 @@ use mem_common::MemCounters; use precomp_arith_eq::ArithEqCounterInputGen; use precomp_arith_eq_384::ArithEq384CounterInputGen; use precomp_big_int::Add256CounterInputGen; +use precomp_blake2::Blake2CounterInputGen; +use precomp_dma::DmaCounterInputGen; use precomp_keccakf::KeccakfCounterInputGen; +use precomp_poseidon2::Poseidon2CounterInputGen; use precomp_sha256f::Sha256fCounterInputGen; +use precompiles_common::MemCounterProcessor; use sm_arith::ArithCounterInputGen; use sm_binary::BinaryCounter; use sm_main::MainCounter; -use zisk_common::{BusDevice, BusDeviceMetrics, BusId, PayloadType, MEM_BUS_ID, OPERATION_BUS_ID}; +use zisk_common::{BusDeviceMetrics, BusId, PayloadType, MEM_BUS_ID, OPERATION_BUS_ID}; use zisk_core::{ ARITH_EQ_384_OP_TYPE_ID, ARITH_EQ_OP_TYPE_ID, ARITH_OP_TYPE_ID, BIG_INT_OP_TYPE_ID, - BINARY_E_OP_TYPE_ID, BINARY_OP_TYPE_ID, KECCAK_OP_TYPE_ID, PUB_OUT_OP_TYPE_ID, - SHA256_OP_TYPE_ID, + BINARY_E_OP_TYPE_ID, BINARY_OP_TYPE_ID, BLAKE2_OP_TYPE_ID, DMA_OP_TYPE_ID, KECCAK_OP_TYPE_ID, + POSEIDON2_OP_TYPE_ID, PUB_OUT_OP_TYPE_ID, SHA256_OP_TYPE_ID, }; /// A bus system facilitating communication between multiple publishers and subscribers. @@ -42,12 +46,15 @@ pub struct StaticDataBus { pub arith_counter: (usize, ArithCounterInputGen), pub keccakf_counter: (usize, KeccakfCounterInputGen), pub sha256f_counter: (usize, Sha256fCounterInputGen), + pub poseidon2_counter: (usize, Poseidon2CounterInputGen), + pub blake2_counter: (usize, Blake2CounterInputGen), pub arith_eq_counter: (usize, ArithEqCounterInputGen), pub arith_eq_384_counter: (usize, ArithEq384CounterInputGen), pub add_256_counter: (usize, Add256CounterInputGen), + pub dma_counter: (usize, DmaCounterInputGen), pub rom_counter_id: Option, /// Queue of pending data transfers to be processed. - pending_transfers: VecDeque<(BusId, Vec)>, + pending_transfers: VecDeque<(BusId, Vec, Vec)>, } impl StaticDataBus { @@ -60,9 +67,12 @@ impl StaticDataBus { arith_counter: (usize, ArithCounterInputGen), keccakf_counter: (usize, KeccakfCounterInputGen), sha256f_counter: (usize, Sha256fCounterInputGen), + poseidon2_counter: (usize, Poseidon2CounterInputGen), + blake2_counter: (usize, Blake2CounterInputGen), arith_eq_counter: (usize, ArithEqCounterInputGen), arith_eq_384_counter: (usize, ArithEq384CounterInputGen), add_256_counter: (usize, Add256CounterInputGen), + dma_counter: (usize, DmaCounterInputGen), rom_counter_id: Option, ) -> Self { Self { @@ -73,9 +83,12 @@ impl StaticDataBus { arith_counter, keccakf_counter, sha256f_counter, + poseidon2_counter, + blake2_counter, arith_eq_counter, arith_eq_384_counter, add_256_counter, + dma_counter, rom_counter_id, pending_transfers: VecDeque::new(), } @@ -92,71 +105,71 @@ impl StaticDataBus { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn route_data(&mut self, bus_id: BusId, payload: &[PayloadType]) -> bool { + fn route_data( + &mut self, + bus_id: BusId, + data: &[PayloadType], + data_ext: &[PayloadType], + ) -> bool { match bus_id { MEM_BUS_ID => { let mut _continue = true; if !self.process_only_operation_bus { if let Some(mem_counter) = self.mem_counter.1.as_mut() { // If we are not processing only operation bus, we process memory bus data. - _continue &= mem_counter.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + _continue &= mem_counter.process_data(&bus_id, data); } } _continue } - OPERATION_BUS_ID => match payload[1] as u32 { - PUB_OUT_OP_TYPE_ID => self.main_counter.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ), - BINARY_OP_TYPE_ID | BINARY_E_OP_TYPE_ID => self.binary_counter.1.process_data( + OPERATION_BUS_ID => match data[1] as u32 { + PUB_OUT_OP_TYPE_ID => self.main_counter.process_data(&bus_id, data), + BINARY_OP_TYPE_ID | BINARY_E_OP_TYPE_ID => { + self.binary_counter.1.process_data(&bus_id, data) + } + ARITH_OP_TYPE_ID => { + self.arith_counter.1.process_data(&bus_id, data, &mut self.pending_transfers) + } + KECCAK_OP_TYPE_ID => self.keccakf_counter.1.process_data( &bus_id, - payload, - &mut self.pending_transfers, - None, + data, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), ), - ARITH_OP_TYPE_ID => self.arith_counter.1.process_data( + SHA256_OP_TYPE_ID => self.sha256f_counter.1.process_data( &bus_id, - payload, - &mut self.pending_transfers, - None, + data, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), ), - KECCAK_OP_TYPE_ID => self.keccakf_counter.1.process_data( + POSEIDON2_OP_TYPE_ID => self.poseidon2_counter.1.process_data( &bus_id, - payload, - &mut self.pending_transfers, - None, + data, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), ), - SHA256_OP_TYPE_ID => self.sha256f_counter.1.process_data( + BLAKE2_OP_TYPE_ID => self.blake2_counter.1.process_data( &bus_id, - payload, - &mut self.pending_transfers, - None, + data, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), ), ARITH_EQ_OP_TYPE_ID => self.arith_eq_counter.1.process_data( &bus_id, - payload, - &mut self.pending_transfers, - None, + data, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), ), ARITH_EQ_384_OP_TYPE_ID => self.arith_eq_384_counter.1.process_data( &bus_id, - payload, - &mut self.pending_transfers, - None, + data, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), ), BIG_INT_OP_TYPE_ID => self.add_256_counter.1.process_data( &bus_id, - payload, - &mut self.pending_transfers, - None, + data, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), + ), + DMA_OP_TYPE_ID => self.dma_counter.1.process_data( + &bus_id, + data, + data_ext, + &mut MemCounterProcessor::new(self.mem_counter.1.as_mut()), ), _ => true, }, @@ -167,28 +180,25 @@ impl StaticDataBus { impl DataBusTrait> for StaticDataBus { #[inline(always)] - fn write_to_bus(&mut self, bus_id: BusId, payload: &[PayloadType]) -> bool { - let mut _continue = self.route_data(bus_id, payload); + fn write_to_bus( + &mut self, + bus_id: BusId, + data: &[PayloadType], + data_ext: &[PayloadType], + ) -> bool { + let mut _continue = self.route_data(bus_id, data, data_ext); - while let Some((bus_id, payload)) = self.pending_transfers.pop_front() { - _continue &= self.route_data(bus_id, &payload); + while let Some((bus_id, data, data_ext)) = self.pending_transfers.pop_front() { + _continue &= self.route_data(bus_id, &data, &data_ext); } _continue } fn on_close(&mut self) { - self.main_counter.on_close(); if let Some(mem_counter) = self.mem_counter.1.as_mut() { - mem_counter.on_close(); + mem_counter.close(); } - self.binary_counter.1.on_close(); - self.arith_counter.1.on_close(); - self.keccakf_counter.1.on_close(); - self.sha256f_counter.1.on_close(); - self.arith_eq_counter.1.on_close(); - self.arith_eq_384_counter.1.on_close(); - self.add_256_counter.1.on_close(); } fn into_devices( @@ -207,9 +217,12 @@ impl DataBusTrait> for StaticDataBus { +pub struct StaticDataBusCollect { /// Memory-related collectors (grouped for cache locality) pub mem_collector: Vec<(usize, MemModuleCollector)>, pub mem_align_collector: Vec<(usize, MemAlignCollector)>, /// Binary operation collectors (grouped for cache locality) - pub binary_basic_collector: Vec<(usize, BinaryBasicCollector)>, - pub binary_add_collector: Vec<(usize, BinaryAddCollector)>, - pub binary_extension_collector: Vec<(usize, BinaryExtensionCollector)>, + pub binary_basic_collector: Vec<(usize, BinaryBasicCollector)>, + pub binary_add_collector: Vec<(usize, BinaryAddCollector)>, + pub binary_extension_collector: Vec<(usize, BinaryExtensionCollector)>, /// Arithmetic collectors (grouped for cache locality) - pub arith_collector: Vec<(usize, ArithInstanceCollector)>, + pub arith_collector: Vec<(usize, ArithInstanceCollector)>, pub arith_inputs_generator: ArithCounterInputGen, /// Cryptographic hash collectors (grouped for cache locality) @@ -54,6 +64,10 @@ pub struct StaticDataBusCollect { pub keccakf_inputs_generator: KeccakfCounterInputGen, pub sha256f_collector: Vec<(usize, Sha256fCollector)>, pub sha256f_inputs_generator: Sha256fCounterInputGen, + pub poseidon2_collector: Vec<(usize, Poseidon2Collector)>, + pub poseidon2_inputs_generator: Poseidon2CounterInputGen, + pub blake2_collector: Vec<(usize, Blake2Collector)>, + pub blake2_inputs_generator: Blake2CounterInputGen, /// Arithmetic equality collectors pub arith_eq_collector: Vec<(usize, ArithEqCollector)>, @@ -67,13 +81,18 @@ pub struct StaticDataBusCollect { pub add256_collector: Vec<(usize, Add256Collector)>, pub add256_inputs_generator: Add256CounterInputGen, + /// Dma collectors + pub dma_collector: Vec<(usize, DmaCollector)>, + pub dma_pre_post_collector: Vec<(usize, DmaPrePostCollector)>, + pub dma_64_aligned_collector: Vec<(usize, Dma64AlignedCollector)>, + pub dma_unaligned_collector: Vec<(usize, DmaUnalignedCollector)>, + pub dma_inputs_generator: DmaCounterInputGen, + /// ROM collector pub rom_collector: Vec<(usize, RomCollector)>, /// Queue of pending data transfers to be processed. - pending_transfers: VecDeque<(BusId, Vec)>, - - mem_collectors_info: Vec, + pending_transfers: VecDeque<(BusId, Vec, Vec)>, } const BINARY_TYPE: u64 = ZiskOperationType::Binary as u64; @@ -81,36 +100,45 @@ const BINARY_E_TYPE: u64 = ZiskOperationType::BinaryE as u64; const ARITH_TYPE: u64 = ZiskOperationType::Arith as u64; const KECCAK_TYPE: u64 = ZiskOperationType::Keccak as u64; const SHA256_TYPE: u64 = ZiskOperationType::Sha256 as u64; +const POSEIDON2_TYPE: u64 = ZiskOperationType::Poseidon2 as u64; +const BLAKE2_TYPE: u64 = ZiskOperationType::Blake2 as u64; const ARITH_EQ_TYPE: u64 = ZiskOperationType::ArithEq as u64; const ARITH_EQ_384_TYPE: u64 = ZiskOperationType::ArithEq384 as u64; const BIG_INT_OP_TYPE_ID: u64 = ZiskOperationType::BigInt as u64; +const DMA_OP_TYPE_ID: u64 = ZiskOperationType::Dma as u64; -impl StaticDataBusCollect { +impl StaticDataBusCollect { /// Creates a new `DataBus` instance. #[allow(clippy::too_many_arguments)] pub fn new( mem_collector: Vec<(usize, MemModuleCollector)>, mem_align_collector: Vec<(usize, MemAlignCollector)>, - binary_basic_collector: Vec<(usize, BinaryBasicCollector)>, - binary_add_collector: Vec<(usize, BinaryAddCollector)>, - binary_extension_collector: Vec<(usize, BinaryExtensionCollector)>, - arith_collector: Vec<(usize, ArithInstanceCollector)>, + binary_basic_collector: Vec<(usize, BinaryBasicCollector)>, + binary_add_collector: Vec<(usize, BinaryAddCollector)>, + binary_extension_collector: Vec<(usize, BinaryExtensionCollector)>, + arith_collector: Vec<(usize, ArithInstanceCollector)>, keccakf_collector: Vec<(usize, KeccakfCollector)>, sha256f_collector: Vec<(usize, Sha256fCollector)>, + poseidon2_collector: Vec<(usize, Poseidon2Collector)>, + blake2_collector: Vec<(usize, Blake2Collector)>, arith_eq_collector: Vec<(usize, ArithEqCollector)>, arith_eq_384_collector: Vec<(usize, ArithEq384Collector)>, add256_collector: Vec<(usize, Add256Collector)>, + dma_collector: Vec<(usize, DmaCollector)>, + dma_pre_post_collector: Vec<(usize, DmaPrePostCollector)>, + dma_64_aligned_collector: Vec<(usize, Dma64AlignedCollector)>, + dma_unaligned_collector: Vec<(usize, DmaUnalignedCollector)>, rom_collector: Vec<(usize, RomCollector)>, arith_eq_inputs_generator: ArithEqCounterInputGen, arith_eq_384_inputs_generator: ArithEq384CounterInputGen, keccakf_inputs_generator: KeccakfCounterInputGen, sha256f_inputs_generator: Sha256fCounterInputGen, + poseidon2_inputs_generator: Poseidon2CounterInputGen, + blake2_inputs_generator: Blake2CounterInputGen, arith_inputs_generator: ArithCounterInputGen, add256_inputs_generator: Add256CounterInputGen, + dma_inputs_generator: DmaCounterInputGen, ) -> Self { - let mem_collectors_info: Vec = - mem_collector.iter().map(|(_, collector)| collector.get_mem_collector_info()).collect(); - Self { mem_collector, mem_align_collector, @@ -120,18 +148,26 @@ impl StaticDataBusCollect { arith_collector, keccakf_collector, sha256f_collector, + poseidon2_collector, + blake2_collector, arith_eq_collector, arith_eq_384_collector, add256_collector, + dma_collector, + dma_pre_post_collector, + dma_64_aligned_collector, + dma_unaligned_collector, rom_collector, arith_eq_inputs_generator, arith_eq_384_inputs_generator, keccakf_inputs_generator, sha256f_inputs_generator, + poseidon2_inputs_generator, + blake2_inputs_generator, arith_inputs_generator, add256_inputs_generator, + dma_inputs_generator, pending_transfers: VecDeque::with_capacity(64), - mem_collectors_info, } } @@ -146,161 +182,163 @@ impl StaticDataBusCollect { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn route_data(&mut self, bus_id: BusId, payload: &[PayloadType]) { + fn route_data(&mut self, bus_id: BusId, data: &[PayloadType], data_ext: &[PayloadType]) { match bus_id { MEM_BUS_ID => { - // Process mem collectors - inverted condition to avoid continue - for (_, mem_collector) in &mut self.mem_collector { - mem_collector.process_data(&bus_id, payload, &mut self.pending_transfers, None); - } - - // Only process align collectors if needed - for (_, mem_align_collector) in &mut self.mem_align_collector { - mem_align_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); - } + MemCollectorProcessor::new(&mut self.mem_collector, &mut self.mem_align_collector) + .process_mem_data(&data.try_into().unwrap()); } - OPERATION_BUS_ID => match payload[OP_TYPE] { + OPERATION_BUS_ID => match data[OP_TYPE] { BINARY_TYPE => { for (_, binary_add_collector) in &mut self.binary_add_collector { - binary_add_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + binary_add_collector.process_data(&bus_id, data); } for (_, binary_basic_collector) in &mut self.binary_basic_collector { - binary_basic_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + binary_basic_collector.process_data(&bus_id, data); } } BINARY_E_TYPE => { for (_, binary_extension_collector) in &mut self.binary_extension_collector { - binary_extension_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + binary_extension_collector.process_data(&bus_id, data); } } ARITH_TYPE => { for (_, arith_collector) in &mut self.arith_collector { - arith_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + arith_collector.process_data(&bus_id, data); } self.arith_inputs_generator.process_data( &bus_id, - payload, + data, &mut self.pending_transfers, - None, ); } KECCAK_TYPE => { for (_, keccakf_collector) in &mut self.keccakf_collector { - keccakf_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + keccakf_collector.process_data(&bus_id, data); } self.keccakf_inputs_generator.process_data( &bus_id, - payload, - &mut self.pending_transfers, - Some(&self.mem_collectors_info), + data, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), ); } SHA256_TYPE => { for (_, sha256f_collector) in &mut self.sha256f_collector { - sha256f_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + sha256f_collector.process_data(&bus_id, data); } self.sha256f_inputs_generator.process_data( &bus_id, - payload, - &mut self.pending_transfers, - Some(&self.mem_collectors_info), + data, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), + ); + } + POSEIDON2_TYPE => { + for (_, poseidon2_collector) in &mut self.poseidon2_collector { + poseidon2_collector.process_data(&bus_id, data); + } + self.poseidon2_inputs_generator.process_data( + &bus_id, + data, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), + ); + } + BLAKE2_TYPE => { + for (_, blake2_collector) in &mut self.blake2_collector { + blake2_collector.process_data(&bus_id, data); + } + self.blake2_inputs_generator.process_data( + &bus_id, + data, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), ); } ARITH_EQ_TYPE => { for (_, arith_eq_collector) in &mut self.arith_eq_collector { - arith_eq_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + arith_eq_collector.process_data(&bus_id, data); } self.arith_eq_inputs_generator.process_data( &bus_id, - payload, - &mut self.pending_transfers, - Some(&self.mem_collectors_info), + data, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), ); } ARITH_EQ_384_TYPE => { for (_, arith_eq_384_collector) in &mut self.arith_eq_384_collector { - arith_eq_384_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + arith_eq_384_collector.process_data(&bus_id, data); } self.arith_eq_384_inputs_generator.process_data( &bus_id, - payload, - &mut self.pending_transfers, - Some(&self.mem_collectors_info), + data, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), ); } BIG_INT_OP_TYPE_ID => { for (_, add256_collector) in &mut self.add256_collector { - add256_collector.process_data( - &bus_id, - payload, - &mut self.pending_transfers, - None, - ); + add256_collector.process_data(&bus_id, data); } self.add256_inputs_generator.process_data( &bus_id, - payload, - &mut self.pending_transfers, - Some(&self.mem_collectors_info), + data, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), + ); + } + DMA_OP_TYPE_ID => { + for (_, dma_collector) in &mut self.dma_collector { + dma_collector.process_data(&bus_id, data, data_ext); + } + for (_, dma_pre_post_collector) in &mut self.dma_pre_post_collector { + dma_pre_post_collector.process_data(&bus_id, data, data_ext); + } + for (_, dma_64_aligned_collector) in &mut self.dma_64_aligned_collector { + dma_64_aligned_collector.process_data(&bus_id, data, data_ext); + } + for (_, dma_unaligned_collector) in &mut self.dma_unaligned_collector { + dma_unaligned_collector.process_data(&bus_id, data, data_ext); + } + + self.dma_inputs_generator.process_data( + &bus_id, + data, + data_ext, + &mut MemCollectorProcessor::new( + &mut self.mem_collector, + &mut self.mem_align_collector, + ), ); } _ => {} }, ROM_BUS_ID => { for (_, rom_collector) in &mut self.rom_collector { - rom_collector.process_data(&bus_id, payload, &mut self.pending_transfers, None); + rom_collector.process_data(&bus_id, data); } } _ => {} @@ -308,16 +346,23 @@ impl StaticDataBusCollect { } } -impl DataBusTrait>> - for StaticDataBusCollect +impl DataBusTrait>> + for StaticDataBusCollect { #[inline(always)] - fn write_to_bus(&mut self, bus_id: BusId, payload: &[PayloadType]) -> bool { - self.route_data(bus_id, payload); + fn write_to_bus( + &mut self, + bus_id: BusId, + data: &[PayloadType], + data_ext: &[PayloadType], + ) -> bool { + self.route_data(bus_id, data, data_ext); // Process all pending transfers in a batch to improve cache locality - while let Some((pending_bus_id, pending_payload)) = self.pending_transfers.pop_front() { - self.route_data(pending_bus_id, &pending_payload); + while let Some((pending_bus_id, pending_payload, pending_data_ext)) = + self.pending_transfers.pop_front() + { + self.route_data(pending_bus_id, &pending_payload, &pending_data_ext); } true @@ -326,13 +371,9 @@ impl DataBusTrait>> fn on_close(&mut self) {} fn into_devices( - mut self, - execute_on_close: bool, + self, + _execute_on_close: bool, ) -> Vec<(Option, Option>>)> { - if execute_on_close { - self.on_close(); - } - let mut result = Vec::new(); // Add all collectors to the result @@ -368,6 +409,14 @@ impl DataBusTrait>> result.push((Some(id), Some(Box::new(collector) as Box>))); } + for (id, collector) in self.poseidon2_collector { + result.push((Some(id), Some(Box::new(collector) as Box>))); + } + + for (id, collector) in self.blake2_collector { + result.push((Some(id), Some(Box::new(collector) as Box>))); + } + for (id, collector) in self.arith_eq_collector { result.push((Some(id), Some(Box::new(collector) as Box>))); } @@ -380,6 +429,22 @@ impl DataBusTrait>> result.push((Some(id), Some(Box::new(collector) as Box>))); } + for (id, collector) in self.dma_collector { + result.push((Some(id), Some(Box::new(collector) as Box>))); + } + + for (id, collector) in self.dma_pre_post_collector { + result.push((Some(id), Some(Box::new(collector) as Box>))); + } + + for (id, collector) in self.dma_64_aligned_collector { + result.push((Some(id), Some(Box::new(collector) as Box>))); + } + + for (id, collector) in self.dma_unaligned_collector { + result.push((Some(id), Some(Box::new(collector) as Box>))); + } + for (id, collector) in self.rom_collector { result.push((Some(id), Some(Box::new(collector) as Box>))); } diff --git a/executor/src/utils.rs b/executor/src/utils.rs new file mode 100644 index 000000000..441257183 --- /dev/null +++ b/executor/src/utils.rs @@ -0,0 +1,208 @@ +use crate::{EmulatorAsm, EmulatorKind, EmulatorRust, StateMachines, StaticSMBundle, ZiskExecutor}; +use fields::PrimeField64; +use pil_std_lib::Std; +use precomp_arith_eq::ArithEqManager; +use precomp_arith_eq_384::ArithEq384Manager; +use precomp_big_int::Add256Manager; +use precomp_blake2::Blake2Manager; +use precomp_dma::DmaManager; +use precomp_keccakf::KeccakfManager; +use precomp_poseidon2::Poseidon2Manager; +use precomp_sha256f::Sha256fManager; +use proofman::register_std; +use proofman_common::PackedInfo; +use proofman_common::VerboseMode; +use sm_arith::ArithSM; +use sm_binary::BinarySM; +use sm_mem::Mem; +use sm_rom::RomSM; +use std::{collections::HashMap, sync::Arc}; +use tracing::debug; +use witness::WitnessManager; + +use zisk_core::CHUNK_SIZE; +#[cfg(feature = "packed")] +use zisk_pil::PACKED_INFO; +use zisk_pil::{ + ADD_256_AIR_IDS, ARITH_AIR_IDS, ARITH_EQ_384_AIR_IDS, ARITH_EQ_AIR_IDS, BINARY_ADD_AIR_IDS, + BINARY_AIR_IDS, BINARY_EXTENSION_AIR_IDS, BLAKE_2_BR_AIR_IDS, DMA_64_ALIGNED_AIR_IDS, + DMA_64_ALIGNED_INPUT_CPY_AIR_IDS, DMA_64_ALIGNED_MEM_AIR_IDS, DMA_64_ALIGNED_MEM_CPY_AIR_IDS, + DMA_64_ALIGNED_MEM_SET_AIR_IDS, DMA_AIR_IDS, DMA_INPUT_CPY_AIR_IDS, DMA_MEM_CPY_AIR_IDS, + DMA_PRE_POST_AIR_IDS, DMA_PRE_POST_INPUT_CPY_AIR_IDS, DMA_PRE_POST_MEM_CPY_AIR_IDS, + DMA_UNALIGNED_AIR_IDS, INPUT_DATA_AIR_IDS, KECCAKF_AIR_IDS, MEM_AIR_IDS, MEM_ALIGN_AIR_IDS, + MEM_ALIGN_BYTE_AIR_IDS, MEM_ALIGN_READ_BYTE_AIR_IDS, MEM_ALIGN_WRITE_BYTE_AIR_IDS, + POSEIDON_2_AIR_IDS, ROM_AIR_IDS, ROM_DATA_AIR_IDS, SHA_256_F_AIR_IDS, ZISK_AIRGROUP_ID, +}; + +use anyhow::Result; + +pub fn get_packed_info() -> HashMap<(usize, usize), PackedInfo> { + let mut _packed_info = HashMap::new(); + #[cfg(feature = "packed")] + { + for packed_info in PACKED_INFO.iter() { + _packed_info.insert( + (packed_info.0, packed_info.1), + PackedInfo::new( + packed_info.2.is_packed, + packed_info.2.num_packed_words, + packed_info.2.unpack_info.to_vec(), + ), + ); + } + } + _packed_info +} + +/// Registers the witness components +/// +/// # Arguments +/// * `wcm` - An `Arc`-wrapped `WitnessManager` instance that orchestrates witness generation. +/// +/// This method performs the following steps: +/// 2. Initializes core and secondary state machines for witness generation. +/// 3. Registers the state machines with the `ZiskExecutor`. +/// 4. Registers the `ZiskExecutor` as a component in the `WitnessManager`. +fn initialize_executor( + verbose_mode: proofman_common::VerboseMode, + shared_tables: bool, + is_asm_emulator: bool, + unlock_mapped_memory: Option, + wcm: &WitnessManager, +) -> Result>> { + let rank_info = wcm.get_rank_info(); + + proofman_common::initialize_logger(verbose_mode, Some(&rank_info)); + + // Step 3: Initialize the secondary state machines + let std = Std::new(wcm.get_pctx(), wcm.get_sctx(), shared_tables)?; + register_std(wcm, &std); + + let rom_sm = RomSM::new(is_asm_emulator); + let binary_sm = BinarySM::new(std.clone()); + let arith_sm = ArithSM::new(std.clone()); + let mem_sm = Mem::new(std.clone()); + // Step 4: Initialize the precompiles state machines + let keccakf_sm = KeccakfManager::new(std.clone()); + let sha256f_sm = Sha256fManager::new(std.clone()); + let poseidon2_sm = Poseidon2Manager::new(); + let blake2_sm = Blake2Manager::new(std.clone()); + let arith_eq_sm = ArithEqManager::new(std.clone()); + let arith_eq_384_sm = ArithEq384Manager::new(std.clone()); + let add256_sm = Add256Manager::new(std.clone()); + let dma_sm = DmaManager::new(std.clone()); + + let mem_instances = vec![ + (ZISK_AIRGROUP_ID, MEM_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, ROM_DATA_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, INPUT_DATA_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, MEM_ALIGN_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, MEM_ALIGN_BYTE_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, MEM_ALIGN_WRITE_BYTE_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, MEM_ALIGN_READ_BYTE_AIR_IDS[0]), + ]; + + let binary_instances = vec![ + (ZISK_AIRGROUP_ID, BINARY_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, BINARY_ADD_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, BINARY_EXTENSION_AIR_IDS[0]), + ]; + + let dma_instances = vec![ + (ZISK_AIRGROUP_ID, DMA_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_PRE_POST_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_64_ALIGNED_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_UNALIGNED_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_MEM_CPY_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_INPUT_CPY_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_PRE_POST_MEM_CPY_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_PRE_POST_INPUT_CPY_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_64_ALIGNED_MEM_CPY_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_64_ALIGNED_MEM_SET_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_64_ALIGNED_INPUT_CPY_AIR_IDS[0]), + (ZISK_AIRGROUP_ID, DMA_64_ALIGNED_MEM_AIR_IDS[0]), + ]; + + let sm_bundle = StaticSMBundle::new( + is_asm_emulator, + std.clone(), + vec![ + (vec![(ZISK_AIRGROUP_ID, ROM_AIR_IDS[0])], StateMachines::RomSM(rom_sm.clone())), + (mem_instances, StateMachines::MemSM(mem_sm.clone())), + (binary_instances, StateMachines::BinarySM(binary_sm.clone())), + (vec![(ZISK_AIRGROUP_ID, ARITH_AIR_IDS[0])], StateMachines::ArithSM(arith_sm.clone())), + // The precompiles state machines + ( + vec![(ZISK_AIRGROUP_ID, KECCAKF_AIR_IDS[0])], + StateMachines::KeccakfManager(keccakf_sm.clone()), + ), + ( + vec![(ZISK_AIRGROUP_ID, SHA_256_F_AIR_IDS[0])], + StateMachines::Sha256fManager(sha256f_sm.clone()), + ), + ( + vec![(ZISK_AIRGROUP_ID, POSEIDON_2_AIR_IDS[0])], + StateMachines::Poseidon2Manager(poseidon2_sm.clone()), + ), + ( + vec![(ZISK_AIRGROUP_ID, BLAKE_2_BR_AIR_IDS[0])], + StateMachines::Blake2Manager(blake2_sm.clone()), + ), + ( + vec![(ZISK_AIRGROUP_ID, ARITH_EQ_AIR_IDS[0])], + StateMachines::ArithEqManager(arith_eq_sm.clone()), + ), + ( + vec![(ZISK_AIRGROUP_ID, ARITH_EQ_384_AIR_IDS[0])], + StateMachines::ArithEq384Manager(arith_eq_384_sm.clone()), + ), + ( + vec![(ZISK_AIRGROUP_ID, ADD_256_AIR_IDS[0])], + StateMachines::Add256Manager(add256_sm.clone()), + ), + (dma_instances, StateMachines::DmaManager(dma_sm.clone())), + ], + ); + + let emulator = if is_asm_emulator { + debug!("Using ASM emulator"); + EmulatorKind::Asm(EmulatorAsm::new( + rank_info.world_rank, + rank_info.local_rank, + unlock_mapped_memory.unwrap_or(false), + CHUNK_SIZE, + Some(rom_sm.clone()), + verbose_mode, + )) + } else { + debug!("Using Rust emulator"); + EmulatorKind::Rust(EmulatorRust::new(CHUNK_SIZE)) + }; + + let executor = Arc::new(ZiskExecutor::new(sm_bundle, emulator)); + + // Step 7: Register the executor as a component in the Witness Manager + wcm.register_component(executor.clone()); + + wcm.set_witness_initialized(); + + Ok(executor) +} + +pub fn init_executor_emu( + verbose: VerboseMode, + shared_tables: bool, + wcm: &WitnessManager, +) -> Result>> { + initialize_executor(verbose, shared_tables, false, None, wcm) +} + +#[allow(clippy::too_many_arguments)] +pub fn init_executor_asm( + verbose: VerboseMode, + shared_tables: bool, + unlock_mapped_memory: bool, + wcm: &WitnessManager, +) -> Result>> { + initialize_executor(verbose, shared_tables, true, Some(unlock_mapped_memory), wcm) +} diff --git a/executor/src/witness_generator.rs b/executor/src/witness_generator.rs new file mode 100644 index 000000000..bc5398789 --- /dev/null +++ b/executor/src/witness_generator.rs @@ -0,0 +1,139 @@ +//! Witness computation component. +//! +//! This module handles the computation of witnesses for main and +//! secondary state machine instances. + +use fields::PrimeField64; +use proofman_common::{ProofCtx, ProofmanResult, SetupCtx}; +use sm_main::MainInstance; +use std::time::Instant; +use zisk_common::{stats_begin, stats_end, BusDevice, Instance, InstanceType, Stats}; + +use crate::state::ExecutionState; + +/// Component responsible for witness computation. +/// +/// Handles the computation of witnesses for: +/// - **Main instances**: Compute from minimal traces with chunk processing +/// - **Secondary instances**: Compute from collected chunk data +/// - **Table instances**: Compute static lookup tables +pub struct WitnessGenerator { + /// Chunk size for trace processing. + chunk_size: u64, +} + +impl WitnessGenerator { + /// Creates a new `WitnessGenerator`. + /// + /// # Arguments + /// * `chunk_size` - Chunk size for processing. + pub fn new(chunk_size: u64) -> Self { + Self { chunk_size } + } + + /// Computes witness for a main state machine instance. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `state` - Execution state. + /// * `main_instance` - The main instance to compute witness for. + /// * `trace_buffer` - Buffer for trace data. + /// * `caller_stats_id` - Parent stats scope ID. + pub fn compute_main_witness( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + main_instance: &MainInstance, + trace_buffer: Vec, + _caller_stats_id: u64, + ) -> ProofmanResult<()> { + let witness_start_time = Instant::now(); + + let (airgroup_id, air_id) = pctx + .dctx_get_instance_info(main_instance.ictx.global_id) + .expect("Failed to get instance info"); + + stats_begin!(state.stats, _caller_stats_id, _stats_scope, "AIR_MAIN_WITNESS", air_id); + + let zisk_rom = state + .get_rom() + .map_err(|e| proofman_common::ProofmanError::InvalidConfiguration(e.to_string()))?; + let min_traces_guard = state.min_traces.read().unwrap(); + let min_traces = min_traces_guard.as_ref().expect("min_traces should not be None"); + + let air_instance = main_instance.compute_witness( + &zisk_rom, + min_traces, + self.chunk_size, + main_instance, + trace_buffer, + )?; + + pctx.add_air_instance(air_instance, main_instance.ictx.global_id); + + stats_end!(state.stats, &_stats_scope); + + let stats = Stats::new_main_completed(airgroup_id, air_id, witness_start_time); + + state.stats.insert_witness_stats(main_instance.ictx.global_id, stats); + + Ok(()) + } + + /// Computes witness for a secondary state machine instance. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `sctx` - Setup context. + /// * `state` - Execution state. + /// * `global_id` - Global ID of the instance. + /// * `secn_instance` - The secondary instance to compute witness for. + /// * `collectors` - Collectors for chunk data. + /// * `should_add_instance` - Whether to add the computed AIR instance to the proof + /// * `trace_buffer` - Buffer for trace data. + /// * `_caller_stats_id` - Parent stats scope ID. + #[allow(clippy::too_many_arguments)] + pub fn compute_secn_witness( + &self, + pctx: &ProofCtx, + sctx: &SetupCtx, + state: &ExecutionState, + global_id: usize, + secn_instance: &dyn Instance, + collectors: Vec<(usize, Box>)>, + trace_buffer: Vec, + _caller_stats_id: u64, + ) -> ProofmanResult<()> { + let witness_start_time = Instant::now(); + + let _stats_msg = match secn_instance.instance_type() { + InstanceType::Instance => "AIR_SECN_WITNESS", + InstanceType::Table => "AIR_WITNESS_TABLE", + }; + + let (_airgroup_id, _air_id) = + pctx.dctx_get_instance_info(global_id).expect("Failed to get instance info"); + + stats_begin!(state.stats, _caller_stats_id, _stats_scope, _stats_msg, _air_id); + + let air_instance = secn_instance.compute_witness(pctx, sctx, collectors, trace_buffer)?; + + if let Some(air_instance) = air_instance { + let should_add_instance = secn_instance.instance_type() == InstanceType::Instance + || (secn_instance.instance_type() == InstanceType::Table + && pctx + .dctx_is_my_process_instance(global_id) + .expect("Failed to check instance ownership")); + + if should_add_instance { + pctx.add_air_instance(air_instance, global_id); + } + } + + stats_end!(state.stats, &_stats_scope); + + state.stats.set_witness_duration(global_id, witness_start_time.elapsed().as_millis()); + + Ok(()) + } +} diff --git a/executor/src/witness_orchestrator.rs b/executor/src/witness_orchestrator.rs new file mode 100644 index 000000000..4800df895 --- /dev/null +++ b/executor/src/witness_orchestrator.rs @@ -0,0 +1,366 @@ +//! Witness orchestrator component. +//! +//! This module handles the logic for witness computation, coordinating between collectors and +//! witness generators + +use crate::{ + state::ExecutionState, AirClassifier, ChunkDataCollector, StaticSMBundle, WitnessGenerator, +}; +use fields::PrimeField64; +use proofman_common::{BufferPool, ProofCtx, ProofmanResult, SetupCtx}; +use sm_rom::RomInstance; +use std::collections::HashMap; +use std::sync::Arc; +use zisk_common::{BusDevice, Instance, InstanceType, Stats, StatsScope}; +use zisk_core::ZiskRom; + +/// Type alias for the secondary instances map (owned). +type SecnInstanceMap = HashMap>>; + +/// Type alias for the secondary instances map (borrowed). +type SecnInstanceMapRef<'a, F> = HashMap>>; + +/// Context for witness computation operations. +pub struct WitnessContext<'a, F: PrimeField64> { + /// Proof context. + pub pctx: &'a ProofCtx, + + /// Setup context. + pub sctx: &'a SetupCtx, + + /// Execution state. + pub state: &'a ExecutionState, + + /// Buffer pool for trace data. + pub buffer_pool: &'a dyn BufferPool, + + /// Statistics scope. + pub stats_scope: &'a StatsScope, +} + +impl<'a, F: PrimeField64> WitnessContext<'a, F> { + /// Creates a new witness context. + pub fn new( + pctx: &'a ProofCtx, + sctx: &'a SetupCtx, + state: &'a ExecutionState, + buffer_pool: &'a dyn BufferPool, + stats_scope: &'a StatsScope, + ) -> Self { + Self { pctx, sctx, state, buffer_pool, stats_scope } + } + + /// Gets instance info (airgroup_id, air_id) for a global ID. + pub fn get_instance_info(&self, global_id: usize) -> (usize, usize) { + self.pctx.dctx_get_instance_info(global_id).expect("Failed to get instance info") + } +} + +/// Component responsible for orchestrating witness computation. +pub struct WitnessOrchestrator { + /// Chunk data collector for secondary instances. + collector: ChunkDataCollector, + + /// Witness computer for all instance types. + witness_generator: WitnessGenerator, + + /// Whether using ASM emulator (cached to avoid passing through all calls). + is_asm_emulator: bool, +} + +impl WitnessOrchestrator { + /// Creates a new `WitnessOrchestrator`. + /// + /// # Arguments + /// * `chunk_size` - Chunk size for trace processing. + /// * `sm_bundle` - Static state machine bundle for collector initialization. + /// * `is_asm_emulator` - Whether using ASM emulator. + pub fn new(chunk_size: u64, sm_bundle: Arc>, is_asm_emulator: bool) -> Self { + let collector = ChunkDataCollector::new(sm_bundle.clone()); + let witness_generator = WitnessGenerator::new(chunk_size); + + Self { collector, witness_generator, is_asm_emulator } + } + + pub fn set_rom(&self, zisk_rom: Arc) { + self.collector.set_rom(zisk_rom.clone()); + } + + /// Computes witness for a single global ID. + /// + /// Routes to the appropriate witness computation method based on + /// instance type and handles special cases like ROM with ASM emulator. + /// + /// # Arguments + /// * `ctx` - Witness context with shared references. + /// * `global_id` - Global ID of the instance. + pub fn compute_witness_for_instance( + &self, + ctx: &WitnessContext<'_, F>, + global_id: usize, + ) -> ProofmanResult<()> { + let (airgroup_id, air_id) = ctx.get_instance_info(global_id); + + if AirClassifier::is_main(air_id) { + self.compute_main_witness( + ctx.pctx, + ctx.state, + global_id, + ctx.buffer_pool, + ctx.stats_scope, + ) + } else { + self.compute_secondary_witness( + ctx.pctx, + ctx.sctx, + ctx.state, + global_id, + airgroup_id, + air_id, + ctx.buffer_pool, + ctx.stats_scope, + ) + } + } + + /// Computes witness for a main instance. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `state` - Execution state. + /// * `global_id` - Global ID of the main instance. + /// * `buffer_pool` - Buffer pool for trace data. + /// * `stats_scope` - Statistics scope for recording stats. + fn compute_main_witness( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + global_id: usize, + buffer_pool: &dyn BufferPool, + stats_scope: &StatsScope, + ) -> ProofmanResult<()> { + let main_instances = state.main_instances.read().unwrap(); + let main_instance = &main_instances[&global_id]; + + self.witness_generator.compute_main_witness( + pctx, + state, + main_instance, + buffer_pool.take_buffer(), + stats_scope.id(), + ) + } + + /// Computes witness for a secondary instance. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `sctx` - Setup context. + /// * `state` - Execution state. + /// * `global_id` - Global ID of the secondary instance. + /// * `airgroup_id` - AIR group ID of the instance. + /// * `air_id` - AIR ID of the instance. + /// * `buffer_pool` - Buffer pool for trace data. + /// * `stats_scope` - Statistics scope for recording stats. + #[allow(clippy::too_many_arguments)] + fn compute_secondary_witness( + &self, + pctx: &ProofCtx, + sctx: &SetupCtx, + state: &ExecutionState, + global_id: usize, + airgroup_id: usize, + air_id: usize, + buffer_pool: &dyn BufferPool, + stats_scope: &StatsScope, + ) -> ProofmanResult<()> { + let secn_instances = state.secn_instances.read().unwrap(); + let secn_instance = &secn_instances[&global_id]; + + if secn_instance.instance_type() == InstanceType::Instance { + let needs_collection = + !state.collectors_by_instance.read().unwrap().contains_key(&global_id); + + if needs_collection { + if AirClassifier::is_rom(air_id) && self.is_asm_emulator { + // ROM with ASM emulator: skip collection + self.register_empty_collector(state, global_id, airgroup_id, air_id); + } else { + // Collect data for this instance + self.collector.collect_single(pctx, state, global_id, secn_instance).map_err( + |e| proofman_common::ProofmanError::InvalidConfiguration(e.to_string()), + )?; + } + } + } + + let instance = &**secn_instance; + let collectors = + Self::take_collectors_for_instance(state, global_id, instance.instance_type()); + + self.witness_generator.compute_secn_witness( + pctx, + sctx, + state, + global_id, + instance, + collectors, + buffer_pool.take_buffer(), + stats_scope.id(), + ) + } + + /// Registers an empty collector for instances that skip collection. + /// + /// # Arguments + /// * `state` - Execution state. + /// * `global_id` - Global ID of the instance. + /// * `airgroup_id` - AIR group ID of the instance. + /// * `air_id` - AIR ID of the instance. + fn register_empty_collector( + &self, + state: &ExecutionState, + global_id: usize, + airgroup_id: usize, + air_id: usize, + ) { + let stats = Stats::new_no_collection(airgroup_id, air_id); + + state.collectors_by_instance.write().unwrap().insert(global_id, Vec::new()); + state.stats.insert_witness_stats(global_id, stats); + } + + /// Extracts collectors from state, returning an empty list for table instances. + /// + /// # Arguments + /// * `state` - Execution state. + /// * `global_id` - Global ID of the instance. + /// * `instance_type` - Type of the instance (Instance or Table). + fn take_collectors_for_instance( + state: &ExecutionState, + global_id: usize, + instance_type: InstanceType, + ) -> Vec<(usize, Box>)> { + match instance_type { + InstanceType::Instance => { + let mut guard = state.collectors_by_instance.write().unwrap(); + + guard + .remove(&global_id) + .expect("Missing collectors for given global_id") + .into_iter() + .enumerate() + .map(|(idx, opt)| { + opt.unwrap_or_else(|| { + panic!("Collector at index {} for global_id {} is None", idx, global_id) + }) + }) + .collect() + } + InstanceType::Table => { + vec![] + } + } + } + + /// Pre-calculates witnesses by determining which instances need collection. + /// + /// Sets witness readiness flags and collects data for instances that need it. + /// + /// # Arguments + /// * `pctx` - Proof context. + /// * `state` - Execution state. + /// * `global_ids` - Global IDs to pre-calculate. + pub fn pre_calculate( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + global_ids: &[usize], + ) -> ProofmanResult<()> { + let secn_instances_guard = state.secn_instances.read().unwrap(); + + let mut instances_to_collect = HashMap::new(); + + for &global_id in global_ids { + let (airgroup_id, air_id) = + pctx.dctx_get_instance_info(global_id).expect("Failed to get instance info"); + + if AirClassifier::is_main(air_id) { + pctx.set_witness_ready(global_id, false); + } else if AirClassifier::is_rom(air_id) { + self.handle_rom_pre_calculate( + pctx, + state, + &secn_instances_guard, + &mut instances_to_collect, + global_id, + airgroup_id, + air_id, + ); + } else { + self.handle_secondary_pre_calculate( + pctx, + state, + &secn_instances_guard, + &mut instances_to_collect, + global_id, + ); + } + } + + // Collect all instances that need collection + if !instances_to_collect.is_empty() { + self.collector + .collect(pctx, state, instances_to_collect) + .map_err(|e| proofman_common::ProofmanError::InvalidConfiguration(e.to_string()))?; + } + Ok(()) + } + + /// Handles ROM instance pre-calculation. + #[allow(clippy::too_many_arguments)] + fn handle_rom_pre_calculate<'a>( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + secn_instances: &'a SecnInstanceMap, + instances_to_collect: &mut SecnInstanceMapRef<'a, F>, + global_id: usize, + airgroup_id: usize, + air_id: usize, + ) { + if self.is_asm_emulator { + pctx.set_witness_ready(global_id, false); + } else { + let secn_instance = &secn_instances[&global_id]; + let rom_instance = secn_instance.as_any().downcast_ref::().unwrap(); + + if rom_instance.skip_collector() { + self.register_empty_collector(state, global_id, airgroup_id, air_id); + pctx.set_witness_ready(global_id, true); + } else { + instances_to_collect.insert(global_id, secn_instance); + } + } + } + + /// Handles secondary instance pre-calculation. + fn handle_secondary_pre_calculate<'a>( + &self, + pctx: &ProofCtx, + state: &ExecutionState, + secn_instances: &'a SecnInstanceMap, + instances_to_collect: &mut SecnInstanceMapRef<'a, F>, + global_id: usize, + ) { + let secn_instance = &secn_instances[&global_id]; + + if secn_instance.instance_type() == InstanceType::Instance + && !state.collectors_by_instance.read().unwrap().contains_key(&global_id) + { + instances_to_collect.insert(global_id, secn_instance); + } else { + pctx.set_witness_ready(global_id, true); + } + } +} diff --git a/lib-c/build.rs b/lib-c/build.rs index b96388738..cfbc6f5df 100644 --- a/lib-c/build.rs +++ b/lib-c/build.rs @@ -1,8 +1,5 @@ -use std::env; -use std::fs; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::process::Command; -use std::time::UNIX_EPOCH; fn main() { if cfg!(target_os = "macos") { @@ -10,140 +7,84 @@ fn main() { return; } - // // **Check if the `no_lib_link` feature is enabled** - // if env::var("CARGO_FEATURE_NO_LIB_LINK").is_ok() { - // println!("Skipping linking because `no_lib_link` feature is enabled."); - // return; - // } - // Paths let c_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("c"); if !c_path.exists() { panic!("Missing c_path = {}", c_path.display()); } let library_folder = c_path.join("lib"); + let build_folder = c_path.join("build"); let library_name = "ziskc"; let lib_file = library_folder.join(format!("lib{library_name}.a")); - // Check if the C++ library exists before recompiling - if !lib_file.exists() { - println!("`{}` not found! Compiling...", lib_file.display()); - run_command("make", &["clean"], &c_path); - run_command("make", &[], &c_path); - } else { - println!("C++ library already compiled, skipping rebuild."); - } + // Ensure build and lib directories exist before running make + std::fs::create_dir_all(&build_folder) + .unwrap_or_else(|e| panic!("Failed to create build directory: {e}")); + std::fs::create_dir_all(&library_folder) + .unwrap_or_else(|e| panic!("Failed to create lib directory: {e}")); - // Absolute path to the library - let abs_lib_path = library_folder.canonicalize().unwrap_or_else(|_| library_folder.clone()); + // Run make (incremental build - only recompiles changed files) + let status = Command::new("make") + .current_dir(&c_path) + .status() + .unwrap_or_else(|e| panic!("Failed to execute `make`: {e}")); + + if !status.success() { + panic!("Command `make` failed with exit code {:?}", status.code()); + } + // Verify the library exists after build if !lib_file.exists() { - panic!("`{}` was not found", lib_file.display()); + panic!("`{}` was not found after build", lib_file.display()); } - // Ensure Rust triggers a rebuild if the C++ source code changes - track_cpp_changes(&c_path); + // Absolute path to the library + let abs_lib_path = library_folder.canonicalize().unwrap_or_else(|_| library_folder.clone()); // Link the static library println!("cargo:rustc-link-search=native={}", abs_lib_path.display()); println!("cargo:rustc-link-lib=static={library_name}"); + // Track C source files for recompilation + track_sources(&c_path); + // Link required libraries for lib in &["pthread", "gmp", "stdc++", "gmpxx", "c"] { println!("cargo:rustc-link-lib={lib}"); } } -// /// Runs an external command and checks for errors -fn run_command(cmd: &str, args: &[&str], dir: &Path) { - let status = Command::new(cmd) - .args(args) - .current_dir(dir) - .status() - .unwrap_or_else(|e| panic!("Failed to execute `{cmd}`: {e}")); - - if !status.success() { - panic!("Command `{}` failed with exit code {:?}", cmd, status.code()); - } -} - -/// Tracks changes in the `pil2-stark` directory to trigger recompilation only when needed -fn track_cpp_changes(c_path: &Path) { - let cpp_files = find_cpp_files(c_path); - let lib_file = c_path.join("lib/libziskc.a"); - - // Print tracked files for debugging - eprintln!("Tracking {} C++ source files:", cpp_files.len()); - for file in &cpp_files { - eprintln!(" - {}", file.display()); - println!("cargo:rerun-if-changed={}", file.display()); - } - - // If any C++ source file changed, force a rebuild - if cpp_files_have_changed(&cpp_files, &lib_file) { - eprintln!("Changes detected! Running `make clean` and recompiling..."); - run_command("make", &["clean"], c_path); - run_command("make", &[], c_path); - } else { - println!("No C++ source changes detected, skipping rebuild."); - } -} -/// Checks if any `.cpp`, `.h`, or `.hpp` file has changed since the last build -fn cpp_files_have_changed(cpp_files: &[PathBuf], lib_file: &Path) -> bool { - let mut modified_files: Vec = Vec::new(); - - // Get the modification time of `libstarks.a` - let lib_modified_time = match fs::metadata(lib_file) { - Ok(metadata) => { - let modified = metadata.modified().unwrap_or(UNIX_EPOCH); - eprintln!("`{}` last modified: {:?}", lib_file.display(), modified); - modified - } - Err(_) => { - eprintln!("Library `{}` does not exist, triggering rebuild.", lib_file.display()); - return true; // If `libstarks.a` is missing, we must rebuild. - } - }; - - // Check if any `.cpp`, `.h`, or `.hpp` file has been modified after `libstarks.a` - for file in cpp_files { - if let Ok(metadata) = fs::metadata(file) { - if let Ok(modified_time) = metadata.modified() { - if modified_time > lib_modified_time { - modified_files.push(file.clone()); +/// Tell Cargo to track C source files for changes +fn track_sources(dir: &Path) { + // Track all C/C++ source files and headers recursively + if let Ok(entries) = std::fs::read_dir(dir.join("src")) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + track_sources_recursive(&path); + } else if let Some(ext) = path.extension() { + if ext == "c" || ext == "cpp" || ext == "h" || ext == "hpp" || ext == "asm" { + println!("cargo:rerun-if-changed={}", path.display()); } } } } - // Print the list of modified files (if any) - if !modified_files.is_empty() { - eprintln!("Modified files detected:"); - for file in &modified_files { - eprintln!(" - {}", file.display()); - } - return true; - } - - false // No changes detected + // Also track the Makefile itself + println!("cargo:rerun-if-changed={}", dir.join("Makefile").display()); } -/// Finds all `.cpp`, `.h`, and `.hpp` files in `pil2-stark` (recursive search) -fn find_cpp_files(dir: &Path) -> Vec { - let mut cpp_files = Vec::new(); - if let Ok(entries) = fs::read_dir(dir) { +fn track_sources_recursive(dir: &Path) { + if let Ok(entries) = std::fs::read_dir(dir) { for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { - cpp_files.extend(find_cpp_files(&path)); + track_sources_recursive(&path); } else if let Some(ext) = path.extension() { - if (ext == "cpp" || ext == "h" || ext == "hpp") - && path.file_name() != Some(std::ffi::OsStr::new("starks_lib.h")) - { - cpp_files.push(path); + if ext == "c" || ext == "cpp" || ext == "h" || ext == "hpp" || ext == "asm" { + println!("cargo:rerun-if-changed={}", path.display()); } } } } - cpp_files } diff --git a/lib-c/c/Makefile b/lib-c/c/Makefile index e196df67b..97ad92fdd 100644 --- a/lib-c/c/Makefile +++ b/lib-c/c/Makefile @@ -6,46 +6,173 @@ else CFLAGS = -O3 -fPIC endif -all: - mkdir -p build - nasm -felf64 src/ffiasm/fec.asm -o build/fec.o - nasm -felf64 src/ffiasm/fnec.asm -o build/fnec.o - nasm -felf64 src/ffiasm/fq.asm -o build/fq.o - nasm -felf64 src/ffiasm/bls12_381_384.asm -o build/bls12_381_384.o - gcc $(CFLAGS) -c src/ffiasm/fec.cpp -o build/fecc.o - gcc $(CFLAGS) -c src/ffiasm/fnec.cpp -o build/fnecc.o - gcc $(CFLAGS) -c src/ffiasm/fq.cpp -o build/fqc.o - gcc $(CFLAGS) -c src/ffiasm/bls12_381_384.cpp -o build/bls12_381_384c.o - gcc $(CFLAGS) -c src/ec/ec.cpp -o build/ec.o - gcc $(CFLAGS) -c src/bn254/bn254.cpp -o build/bn254.o - gcc $(CFLAGS) -c src/bls12_381/bls12_381.cpp -o build/bls12_381.o - gcc $(CFLAGS) -c src/fcall/fcall.cpp -o build/fcall.o - gcc $(CFLAGS) -c src/arith256/arith256.cpp -o build/arith256.o - gcc $(CFLAGS) -c src/arith384/arith384.cpp -o build/arith384.o - gcc $(CFLAGS) -c src/bigint/add256.cpp -o build/add256.o - gcc $(CFLAGS) -c src/common/globals.cpp -o build/globals.o - ar rcs\ - build/libziskc.a\ - build/fec.o\ - build/fnec.o\ - build/fq.o\ - build/bls12_381_384.o\ - build/ec.o\ - build/bn254.o\ - build/bls12_381.o\ - build/fecc.o\ - build/fnecc.o\ - build/fqc.o\ - build/bls12_381_384c.o\ - build/fcall.o\ - build/arith256.o\ - build/arith384.o\ - build/add256.o\ - build/globals.o - gcc $(CFLAGS) src/main.cpp -lc build/libziskc.a -o build/clib -lgmp -lstdc++ -lgmpxx - mkdir -p lib - cp build/libziskc.a lib/ +# Directories +BUILD_DIR = build +LIB_DIR = lib +SRC_DIR = src + +# Output library +LIB_NAME = libziskc.a +LIB_FILE = $(LIB_DIR)/$(LIB_NAME) + +# Object files +ASM_OBJS = $(BUILD_DIR)/fec.o \ + $(BUILD_DIR)/fnec.o \ + $(BUILD_DIR)/fq.o \ + $(BUILD_DIR)/fr.o \ + $(BUILD_DIR)/bls12_381_384.o \ + $(BUILD_DIR)/bls12_381_asm.o \ + $(BUILD_DIR)/nsecp256r1.o \ + $(BUILD_DIR)/psecp256r1.o + +CPP_OBJS = $(BUILD_DIR)/fecc.o \ + $(BUILD_DIR)/fnecc.o \ + $(BUILD_DIR)/fqc.o \ + $(BUILD_DIR)/frc.o \ + $(BUILD_DIR)/bls12_381_384c.o \ + $(BUILD_DIR)/bls12_381c.o \ + $(BUILD_DIR)/alt_bn128.o \ + $(BUILD_DIR)/nsecp256r1c.o \ + $(BUILD_DIR)/psecp256r1c.o \ + $(BUILD_DIR)/secp256r1.o \ + $(BUILD_DIR)/misc.o \ + $(BUILD_DIR)/naf.o \ + $(BUILD_DIR)/splitparstr.o \ + $(BUILD_DIR)/ec.o \ + $(BUILD_DIR)/bn254.o \ + $(BUILD_DIR)/bls12_381.o \ + $(BUILD_DIR)/fcall.o \ + $(BUILD_DIR)/arith256.o \ + $(BUILD_DIR)/arith384.o \ + $(BUILD_DIR)/add256.o \ + $(BUILD_DIR)/globals.o \ + $(BUILD_DIR)/goldilocks_base_field.o \ + $(BUILD_DIR)/poseidon2_goldilocks.o \ + $(BUILD_DIR)/blake2.o + +ALL_OBJS = $(ASM_OBJS) $(CPP_OBJS) + +# Header directories for include path +INCLUDES = -I$(SRC_DIR) + +# Default target +all: $(LIB_FILE) $(BUILD_DIR)/clib + +# Create directories +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +$(LIB_DIR): + mkdir -p $(LIB_DIR) + +# Library target +$(LIB_FILE): $(ALL_OBJS) | $(LIB_DIR) + ar rcs $@ $(ALL_OBJS) + +# Test binary +$(BUILD_DIR)/clib: $(SRC_DIR)/main.cpp $(LIB_FILE) + gcc $(CFLAGS) $(INCLUDES) $< -lc $(LIB_FILE) -o $@ -lgmp -lstdc++ -lgmpxx + +# Assembly rules +$(BUILD_DIR)/fec.o: $(SRC_DIR)/ffiasm/fec.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +$(BUILD_DIR)/fnec.o: $(SRC_DIR)/ffiasm/fnec.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +$(BUILD_DIR)/fq.o: $(SRC_DIR)/ffiasm/fq.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +$(BUILD_DIR)/fr.o: $(SRC_DIR)/ffiasm/fr.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +$(BUILD_DIR)/bls12_381_384.o: $(SRC_DIR)/ffiasm/bls12_381_384.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +$(BUILD_DIR)/bls12_381_asm.o: $(SRC_DIR)/ffiasm/bls12_381.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +$(BUILD_DIR)/nsecp256r1.o: $(SRC_DIR)/ffiasm/nsecp256r1.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +$(BUILD_DIR)/psecp256r1.o: $(SRC_DIR)/ffiasm/psecp256r1.asm | $(BUILD_DIR) + nasm -felf64 $< -o $@ + +# C++ compilation rules +$(BUILD_DIR)/fecc.o: $(SRC_DIR)/ffiasm/fec.cpp $(SRC_DIR)/ffiasm/fec.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/fnecc.o: $(SRC_DIR)/ffiasm/fnec.cpp $(SRC_DIR)/ffiasm/fnec.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/fqc.o: $(SRC_DIR)/ffiasm/fq.cpp $(SRC_DIR)/ffiasm/fq.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/frc.o: $(SRC_DIR)/ffiasm/fr.cpp $(SRC_DIR)/ffiasm/fr.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/bls12_381_384c.o: $(SRC_DIR)/ffiasm/bls12_381_384.cpp $(SRC_DIR)/ffiasm/bls12_381_384.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/bls12_381c.o: $(SRC_DIR)/ffiasm/bls12_381.cpp $(SRC_DIR)/ffiasm/bls12_381.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/alt_bn128.o: $(SRC_DIR)/ffiasm/alt_bn128.cpp $(SRC_DIR)/ffiasm/alt_bn128.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/nsecp256r1c.o: $(SRC_DIR)/ffiasm/nsecp256r1.cpp $(SRC_DIR)/ffiasm/nsecp256r1.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/psecp256r1c.o: $(SRC_DIR)/ffiasm/psecp256r1.cpp $(SRC_DIR)/ffiasm/psecp256r1.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/secp256r1.o: $(SRC_DIR)/secp256r1/secp256r1.cpp $(SRC_DIR)/secp256r1/secp256r1.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/misc.o: $(SRC_DIR)/ffiasm/misc.cpp $(SRC_DIR)/ffiasm/misc.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/naf.o: $(SRC_DIR)/ffiasm/naf.cpp $(SRC_DIR)/ffiasm/naf.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/splitparstr.o: $(SRC_DIR)/ffiasm/splitparstr.cpp $(SRC_DIR)/ffiasm/splitparstr.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/ec.o: $(SRC_DIR)/ec/ec.cpp $(SRC_DIR)/ec/ec.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/bn254.o: $(SRC_DIR)/bn254/bn254.cpp $(SRC_DIR)/bn254/bn254.hpp $(SRC_DIR)/bn254/bn254_fe.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/bls12_381.o: $(SRC_DIR)/bls12_381/bls12_381.cpp $(SRC_DIR)/bls12_381/bls12_381.hpp $(SRC_DIR)/bls12_381/bls12_381_fe.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/fcall.o: $(SRC_DIR)/fcall/fcall.cpp $(SRC_DIR)/fcall/fcall.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/arith256.o: $(SRC_DIR)/arith256/arith256.cpp $(SRC_DIR)/arith256/arith256.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/arith384.o: $(SRC_DIR)/arith384/arith384.cpp $(SRC_DIR)/arith384/arith384.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/add256.o: $(SRC_DIR)/bigint/add256.cpp $(SRC_DIR)/bigint/add256.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/globals.o: $(SRC_DIR)/common/globals.cpp $(SRC_DIR)/common/globals.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/goldilocks_base_field.o: $(SRC_DIR)/poseidon2/goldilocks_base_field.cpp $(SRC_DIR)/poseidon2/goldilocks_base_field.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/poseidon2_goldilocks.o: $(SRC_DIR)/poseidon2/poseidon2_goldilocks.cpp $(SRC_DIR)/poseidon2/poseidon2_goldilocks.hpp $(SRC_DIR)/poseidon2/poseidon2_goldilocks_constants.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/blake2.o: $(SRC_DIR)/blake2/blake2.cpp $(SRC_DIR)/blake2/blake2.hpp | $(BUILD_DIR) + gcc $(CFLAGS) $(INCLUDES) -c $< -o $@ clean: - rm -rf build - rm -rf lib + rm -rf $(BUILD_DIR) + rm -rf $(LIB_DIR) + +.PHONY: all clean \ No newline at end of file diff --git a/lib-c/c/src/blake2/blake2.cpp b/lib-c/c/src/blake2/blake2.cpp new file mode 100644 index 000000000..c9580b2bb --- /dev/null +++ b/lib-c/c/src/blake2/blake2.cpp @@ -0,0 +1,76 @@ +#include "blake2.hpp" +#include + +/// Message word permutation schedule +const size_t SIGMA[10][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, +}; + +/// Rotation constants for G function +const uint32_t R1 = 32; +const uint32_t R2 = 24; +const uint32_t R3 = 16; +const uint32_t R4 = 63; + +// U64 rotate left and right functions +static inline uint64_t rotate_left_64(uint64_t x, unsigned int n) { + n &= 63; + return (x << n) | (x >> (64 - n)); +} +static inline uint64_t rotate_right_64(uint64_t x, unsigned int n) { + n &= 63; + return (x >> n) | (x << (64 - n)); +} + +/// G mixing function +/// +/// The G function mixes two input words `x` and `y` from the message block into the state. +/// It operates on 4 state words: v[a], v[b], v[c], v[d] +static inline void g(uint64_t v[16], size_t a, size_t b, size_t c, size_t d, uint64_t x, uint64_t y) { + uint64_t va = v[a]; + uint64_t vb = v[b]; + uint64_t vc = v[c]; + uint64_t vd = v[d]; + + va = va + vb + x; + vd = rotate_right_64(vd ^ va, R1); + vc = vc + vd; + vb = rotate_right_64(vb ^ vc, R2); + + va = va + vb + y; + vd = rotate_right_64(vd ^ va, R3); + vc = vc + vd; + vb = rotate_right_64(vb ^ vc, R4); + + v[a] = va; + v[b] = vb; + v[c] = vc; + v[d] = vd; +} + +/// BLAKE2b round function +void blake2b_round(uint64_t v[16], const uint64_t m[16], uint64_t round) { + // Message word selection permutation for this round + const size_t* s = SIGMA[round % 10]; + + // Column step + g(v, 0, 4, 8, 12, m[s[0]], m[s[1]]); + g(v, 1, 5, 9, 13, m[s[2]], m[s[3]]); + g(v, 2, 6, 10, 14, m[s[4]], m[s[5]]); + g(v, 3, 7, 11, 15, m[s[6]], m[s[7]]); + + // Diagonal step + g(v, 0, 5, 10, 15, m[s[8]], m[s[9]]); + g(v, 1, 6, 11, 12, m[s[10]], m[s[11]]); + g(v, 2, 7, 8, 13, m[s[12]], m[s[13]]); + g(v, 3, 4, 9, 14, m[s[14]], m[s[15]]); +} diff --git a/lib-c/c/src/blake2/blake2.hpp b/lib-c/c/src/blake2/blake2.hpp new file mode 100644 index 000000000..41d3f7b80 --- /dev/null +++ b/lib-c/c/src/blake2/blake2.hpp @@ -0,0 +1,16 @@ +#ifndef LIB_C_BLAKE2_HPP +#define LIB_C_BLAKE2_HPP + +#include // uint64_t + +#ifdef __cplusplus +extern "C" { +#endif + +void blake2b_round(uint64_t v[16], const uint64_t m[16], uint64_t round); + +#ifdef __cplusplus +} +#endif + +#endif // LIB_C_BLAKE2_HPP \ No newline at end of file diff --git a/lib-c/c/src/bls12_381/bls12_381.cpp b/lib-c/c/src/bls12_381/bls12_381.cpp index 032e1e8b5..7ffc7fefb 100644 --- a/lib-c/c/src/bls12_381/bls12_381.cpp +++ b/lib-c/c/src/bls12_381/bls12_381.cpp @@ -187,6 +187,48 @@ int BLS12_381ComplexMulP (const uint64_t * p1, const uint64_t * p2, uint64_t * p return result; } +/**************************/ +/* BLS12_381 complex sqrt */ +/**************************/ + +int BLS12_381ComplexSqrt ( + const uint64_t * _x1, // 6 x 64 bits + const uint64_t * _y1, // 6 x 64 bits + uint64_t * _x2, // 6 x 64 bits + uint64_t * _y2, // 6 x 64 bits + uint64_t * is_qr // 1 x 64 bits +) +{ + RawBLS12_381_384::Element x1, y1, x2, y2; + array2fe(_x1, x1); + array2fe(_y1, y1); + + int result = BLS12_381ComplexSqrtFe (x1, y1, x2, y2, *is_qr); + + fe2array(x2, _x2); + fe2array(y2, _y2); + + return result; +} + +int BLS12_381ComplexSqrtP ( + const uint64_t * p1, // 12 x 64 bits + uint64_t * p2, // 12 x 64 bits + uint64_t * is_qr // 1 x 64 bits +) +{ + RawBLS12_381_384::Element x1, y1, x2, y2; + array2fe(p1, x1); + array2fe(p1 + 6, y1); + + int result = BLS12_381ComplexSqrtFe (x1, y1, x2, y2, *is_qr); + + fe2array(x2, p2); + fe2array(y2, p2 + 6); + + return result; +} + #ifdef __cplusplus } // extern "C" #endif \ No newline at end of file diff --git a/lib-c/c/src/bls12_381/bls12_381.hpp b/lib-c/c/src/bls12_381/bls12_381.hpp index c1871f2ca..ca3627f7a 100644 --- a/lib-c/c/src/bls12_381/bls12_381.hpp +++ b/lib-c/c/src/bls12_381/bls12_381.hpp @@ -99,6 +99,24 @@ int BLS12_381ComplexMulP ( uint64_t * p3 // 12 x 64 bits ); +/**************************/ +/* BLS12_381 complex sqrt */ +/**************************/ + +int BLS12_381ComplexSqrt ( + const uint64_t * x1, // 6 x 64 bits + const uint64_t * y1, // 6 x 64 bits + uint64_t * x2, // 6 x 64 bits + uint64_t * y3, // 6 x 64 bits + uint64_t * is_qr // 1 x 64 bits +); + +int BLS12_381ComplexSqrtP ( + const uint64_t * p1, // 12 x 64 bits + uint64_t * p2, // 12 x 64 bits + uint64_t * is_qr // 1 x 64 bits +); + #ifdef __cplusplus } // extern "C" #endif diff --git a/lib-c/c/src/bls12_381/bls12_381_fe.hpp b/lib-c/c/src/bls12_381/bls12_381_fe.hpp index 739889e00..a42ea28a2 100644 --- a/lib-c/c/src/bls12_381/bls12_381_fe.hpp +++ b/lib-c/c/src/bls12_381/bls12_381_fe.hpp @@ -136,6 +136,116 @@ int inline BLS12_381ComplexInvFe (const RawBLS12_381_384::Element &real, const R return 0; }; +int inline BLS12_381ComplexExpFe (const RawBLS12_381_384::Element &x1, const RawBLS12_381_384::Element &y1, const mpz_class &_exp, RawBLS12_381_384::Element &x2, RawBLS12_381_384::Element &y2) +{ + // Exponentiation of a complex number using square-and-multiply algorithm + + // Get a local copy of the base to modify it + RawBLS12_381_384::Element base_x, base_y; + base_x = x1; + base_y = y1; + + // Get a scalar copy of the exponent to modify it + mpz_class exp(_exp); + + // Initialize result to 1 + 0i + x2 = bls12_381.one(); // x2 = 1 + y2 = bls12_381.zero(); // y2 = 0 + + // Loop until exponent becomes zero + while (exp != 0) + { + // If exponent is odd, multiply the result by the base + if ((exp & 1) == 1) + { + BLS12_381ComplexMulFe(x2, y2, base_x, base_y, x2, y2); + } + + // Square the base + BLS12_381ComplexMulFe(base_x, base_y, base_x, base_y, base_x, base_y); + + // Divide exponent by 2 + exp = exp >> 1; + } + + return 0; +} + +int inline BLS12_381ComplexSqrtFe (const RawBLS12_381_384::Element &x1, const RawBLS12_381_384::Element &y1, RawBLS12_381_384::Element &x2, RawBLS12_381_384::Element &y2, uint64_t &is_qr) +{ + /// Algorithm 9 from https://eprint.iacr.org/2012/685.pdf + /// Square root computation over F_p^2, with p ≡ 3 (mod 4) + + // Step 1: a1 ← a^((p-3)/4) + RawBLS12_381_384::Element a1_x, a1_y; + BLS12_381ComplexExpFe(x1, y1, ScalarP_MINUS_3_DIV_4, a1_x, a1_y); + + // Step 2: α ← a1 * a1 * a + RawBLS12_381_384::Element a1_a_x, a1_a_y; + BLS12_381ComplexMulFe(a1_x, a1_y, x1, y1, a1_a_x, a1_a_y); + RawBLS12_381_384::Element alpha_x, alpha_y; + BLS12_381ComplexMulFe(a1_x, a1_y, a1_a_x, a1_a_y, alpha_x, alpha_y); + + // Step 3: a0 ← α^p * α = conjugate(α) * α + RawBLS12_381_384::Element alpha_conj_x, alpha_conj_y; + bls12_381.copy(alpha_conj_x, alpha_x); + bls12_381.neg(alpha_conj_y, alpha_y); + RawBLS12_381_384::Element a0_x, a0_y; + BLS12_381ComplexMulFe(alpha_conj_x, alpha_conj_y, alpha_x, alpha_y, a0_x, a0_y); + + // Step 4-6: if a0 == -1 then return false (no square root) + if (bls12_381.eq(a0_x, bls12_381.negOne()) && bls12_381.isZero(a0_y)) + { + // Return false (no square root exists) + is_qr = 0; + x2 = bls12_381.zero(); + y2 = bls12_381.zero(); + return 0; + } + + // Step 7: x0 ← a1 * a + #define x0_x a1_a_x + #define x0_y a1_a_y + + // Step 8-13: compute x based on α + // If α == -1 then x ← i * x0 else x ← b * x0 + if (bls12_381.eq(a0_x, bls12_381.negOne()) && bls12_381.isZero(a0_y)) + { + // Step 9: x ← i * x0 + BLS12_381ComplexMulFe( + bls12_381.zero(), // i real part = 0 + bls12_381.one(), // i imaginary part = 1 + x0_x, + x0_y, + x2, + y2 + ); + } + else + { + // Step 11: b ← (1 + α)^((p-1)/2) + RawBLS12_381_384::Element one_plus_alpha_x, one_plus_alpha_y; + BLS12_381ComplexAddFe( + bls12_381.one(), // 1 real part = 1 + bls12_381.zero(), // 1 imaginary part = 0 + alpha_x, + alpha_y, + one_plus_alpha_x, + one_plus_alpha_y + ); + RawBLS12_381_384::Element b_x, b_y; + BLS12_381ComplexExpFe(one_plus_alpha_x, one_plus_alpha_y, ScalarP_MINUS_1_DIV_2, b_x, b_y); + + // Step 12: x ← b * x0 + BLS12_381ComplexMulFe(b_x, b_y, x0_x, x0_y, x2, y2); + } + + // Return true (square root exists) + is_qr = 1; + + return 0; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/lib-c/c/src/common/globals.cpp b/lib-c/c/src/common/globals.cpp index 77de2158e..a4cb36e6f 100644 --- a/lib-c/c/src/common/globals.cpp +++ b/lib-c/c/src/common/globals.cpp @@ -4,9 +4,13 @@ RawFec fec; RawFnec fnec; RawFq bn254; RawBLS12_381_384 bls12_381; +RawpSecp256r1 secp256r1; +RawnSecp256r1 secp256r1n; mpz_class ScalarMask256 ("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); mpz_class ScalarMask384 ("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); mpz_class ScalarP_DIV_4 ("680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaab", 16); mpz_class ScalarP ("1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", 16); -mpz_class ScalarNQR ("2", 16); // First non-quadratic residue in Fp \ No newline at end of file +mpz_class ScalarNQR_FP ("2", 16); // First non-quadratic residue in Fp +mpz_class ScalarP_MINUS_3_DIV_4 ("680447A8E5FF9A692C6E9ED90D2EB35D91DD2E13CE144AFD9CC34A83DAC3D8907AAFFFFAC54FFFFEE7FBFFFFFFFEAAA", 16); +mpz_class ScalarP_MINUS_1_DIV_2 ("D0088F51CBFF34D258DD3DB21A5D66BB23BA5C279C2895FB39869507B587B120F55FFFF58A9FFFFDCFF7FFFFFFFD555", 16); \ No newline at end of file diff --git a/lib-c/c/src/common/globals.hpp b/lib-c/c/src/common/globals.hpp index 2774092cc..b939d6ddf 100644 --- a/lib-c/c/src/common/globals.hpp +++ b/lib-c/c/src/common/globals.hpp @@ -6,16 +6,22 @@ #include "../ffiasm/fnec.hpp" #include "../ffiasm/fq.hpp" #include "../ffiasm/bls12_381_384.hpp" +#include "../ffiasm/psecp256r1.hpp" +#include "../ffiasm/nsecp256r1.hpp" extern RawFec fec; extern RawFnec fnec; extern RawFq bn254; extern RawBLS12_381_384 bls12_381; +extern RawpSecp256r1 secp256r1; +extern RawnSecp256r1 secp256r1n; extern mpz_class ScalarMask256; extern mpz_class ScalarMask384; extern mpz_class ScalarP_DIV_4; extern mpz_class ScalarP; -extern mpz_class ScalarNQR; +extern mpz_class ScalarNQR_FP; +extern mpz_class ScalarP_MINUS_3_DIV_4; +extern mpz_class ScalarP_MINUS_1_DIV_2; #endif \ No newline at end of file diff --git a/lib-c/c/src/common/utils.hpp b/lib-c/c/src/common/utils.hpp index cf52af1fd..baa018e28 100644 --- a/lib-c/c/src/common/utils.hpp +++ b/lib-c/c/src/common/utils.hpp @@ -107,4 +107,36 @@ inline void fe2array (const RawBLS12_381_384::Element &fe, uint64_t * a) scalar2array6(s, a); } +// Converts an array of 4 u64 LE to a Fq (Secp256r1) element +inline void array2fe (const uint64_t * a, RawpSecp256r1::Element &fe) +{ + mpz_class s; + array2scalar(a, s); + secp256r1.fromMpz(fe, s.get_mpz_t()); +} + +// Converts a Fq (Secp256r1) element to an array of 4 u64 LE +inline void fe2array (const RawpSecp256r1::Element &fe, uint64_t * a) +{ + mpz_class s; + secp256r1.toMpz(s.get_mpz_t(), fe); + scalar2array(s, a); +} + +// Converts an array of 4 u64 LE to a Fq (nSecp256r1) element +inline void array2fe (const uint64_t * a, RawnSecp256r1::Element &fe) +{ + mpz_class s; + array2scalar(a, s); + secp256r1n.fromMpz(fe, s.get_mpz_t()); +} + +// Converts a Fq (nSecp256r1) element to an array of 4 u64 LE +inline void fe2array (const RawnSecp256r1::Element &fe, uint64_t * a) +{ + mpz_class s; + secp256r1n.toMpz(s.get_mpz_t(), fe); + scalar2array(s, a); +} + #endif \ No newline at end of file diff --git a/lib-c/c/src/ec/ec.cpp b/lib-c/c/src/ec/ec.cpp index b25f59113..ac2f994af 100644 --- a/lib-c/c/src/ec/ec.cpp +++ b/lib-c/c/src/ec/ec.cpp @@ -98,7 +98,7 @@ int inline AddPointEcDblFe (RawFec::Element &x1, RawFec::Element &y1) fec.add(aux2, y1, y1); if (fec.isZero(aux2)) { - printf("AddPointEc() got denominator=0 1\n"); + printf("AddPointEcDbl() got denominator=0 1\n"); return -1; } fec.div(s, aux1, aux2); @@ -182,6 +182,263 @@ int AddPointEcP (uint64_t _dbl, const uint64_t * p1, const uint64_t * p2, uint64 return result; } +uint64_t G[8] = { + 0x59F2815B16F81798, + 0x029BFCDB2DCE28D9, + 0x55A06295CE870B07, + 0x79BE667EF9DCBBAC, + 0x9C47D08FFB10D4B8, + 0xFD17B448A6855419, + 0x5DA4FBFC0E1108A8, + 0x483ADA7726A3C465, +}; + +int secp256k1_ecdsa_verify ( + const uint64_t * pk, // 8 x 64 bits + const uint64_t * _z, // 4 x 64 bits + const uint64_t * _r, // 4 x 64 bits + const uint64_t * _s, // 4 x 64 bits + uint64_t * result // 8 x 64 bits +) +{ + // Convert z, r, s inputs to field elements + RawFnec::Element z, r, s; + array2fe(_z, z); + array2fe(_r, r); + array2fe(_s, s); + + // Given the public key pk and the signature (r, s) over the message hash z: + // 1. Computes s_inv = s⁻¹ mod n + // 2. Computes u1 = z·s_inv mod n + // 3. Computes u2 = r·s_inv mod n + // 4. Computes and returns the curve point p = u1·G + u2·PK + + // s_inv = s⁻¹ mod n + RawFnec::Element s_inv; + fnec.inv(s_inv, s); + + // u1 = z·s_inv mod n + RawFnec::Element u1; + fnec.mul(u1, z, s_inv); + + // u2 = r·s_inv mod n + RawFnec::Element u2; + fnec.mul(u2, r, s_inv); + uint64_t u1_array[4]; + uint64_t u2_array[4]; + fe2array(u1, u1_array); + fe2array(u2, u2_array); + + secp256k1_curve_dbl_scalar_mul(u1_array, G, u2_array, pk, result); + + return 0; +} + +const uint64_t IDENTITY[8] = {0,0,0,0,0,0,0,0}; + +void secp256k1_curve_add( + const uint64_t * p, // 8 x 64 bits + const uint64_t * q, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +) +{ + // Get the 2 points coordinates + const uint64_t * x1 = &p[0]; + const uint64_t * y1 = &p[4]; + const uint64_t * x2 = &q[0]; + const uint64_t * y2 = &q[4]; + + // If p==q return dbl(p) + if (x1[0] == x2[0] && + x1[1] == x2[1] && + x1[2] == x2[2] && + x1[3] == x2[3]) + { + if (y1[0] == y2[0] && + y1[1] == y2[1] && + y1[2] == y2[2] && + y1[3] == y2[3]) { + secp256k1_curve_dbl(p, r); + return; + } else { + for (int i = 0; i < 8; i++) { + r[i] = IDENTITY[i]; + } + return; + } + } + + // If p==0 return q + if ( p[0] == IDENTITY[0] && + p[1] == IDENTITY[1] && + p[2] == IDENTITY[2] && + p[3] == IDENTITY[3] && + p[4] == IDENTITY[4] && + p[5] == IDENTITY[5] && + p[6] == IDENTITY[6] && + p[7] == IDENTITY[7] ) + { + for (int i = 0; i < 8; i++) + { + r[i] = q[i]; + } + return; + } + // if q == 0 return p + else if ( q[0] == IDENTITY[0] && + q[1] == IDENTITY[1] && + q[2] == IDENTITY[2] && + q[3] == IDENTITY[3] && + q[4] == IDENTITY[4] && + q[5] == IDENTITY[5] && + q[6] == IDENTITY[6] && + q[7] == IDENTITY[7] ) + { + for (int i = 0; i < 8; i++) + { + r[i] = p[i]; + } + return; + } + + // Convert coordinates to field elements + RawFec::Element x1_fe, y1_fe, x2_fe, y2_fe; + array2fe(x1, x1_fe); + array2fe(y1, y1_fe); + array2fe(x2, x2_fe); + array2fe(y2, y2_fe); + + // Calculate lambda = (y2 - y1) / (x2 - x1) + RawFec::Element y2_minus_y1; + fec.sub(y2_minus_y1, y2_fe, y1_fe); + RawFec::Element x2_minus_x1; + fec.sub(x2_minus_x1, x2_fe, x1_fe); + RawFec::Element x2_minus_x1_inv; + fec.inv(x2_minus_x1_inv, x2_minus_x1); + RawFec::Element lambda; + fec.mul(lambda, y2_minus_y1, x2_minus_x1_inv); + + // Calculate x3 = lambda^2 - (x1 + x2) + RawFec::Element x3_fe; + RawFec::Element lambda_sq; + fec.square(lambda_sq, lambda); + RawFec::Element x1_plus_x2; + fec.add(x1_plus_x2, x1_fe, x2_fe); + fec.sub(x3_fe, lambda_sq, x1_plus_x2); + + // Calculate y3 = lambda * (x1 - x3) - y1 + RawFec::Element y3_fe; + RawFec::Element x1_minus_x3; + fec.sub(x1_minus_x3, x1_fe, x3_fe); + RawFec::Element lambda_x1_minus_x3; + fec.mul(lambda_x1_minus_x3, lambda, x1_minus_x3); + fec.sub(y3_fe, lambda_x1_minus_x3, y1_fe); + + // Convert to result + fe2array(x3_fe, r); + fe2array(y3_fe, r + 4); +} + +void secp256k1_curve_dbl( + const uint64_t * p, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +) +{ + // If p==0 return p + if ( p[0] == IDENTITY[0] && + p[1] == IDENTITY[1] && + p[2] == IDENTITY[2] && + p[3] == IDENTITY[3] && + p[4] == IDENTITY[4] && + p[5] == IDENTITY[5] && + p[6] == IDENTITY[6] && + p[7] == IDENTITY[7] ) + { + for (int i = 0; i < 8; i++) + { + r[i] = p[i]; + } + return; + } + + // Convert coordinates to field elements + uint64_t * x = (uint64_t *)&p[0]; + uint64_t * y = (uint64_t *)&p[4]; + RawFec::Element x_fe, y_fe; + array2fe(x, x_fe); + array2fe(y, y_fe); + + // Calculate lambda = (3*x1^2) / (2*y1) + RawFec::Element x1_sq; + fec.square(x1_sq, x_fe); + RawFec::Element three; + fec.fromUI(three, 3); + RawFec::Element three_x1_sq; + fec.mul(three_x1_sq, x1_sq, three); + RawFec::Element two_y1; + fec.add(two_y1, y_fe, y_fe); + RawFec::Element two_y1_inv; + fec.inv(two_y1_inv, two_y1); + RawFec::Element lambda; + fec.mul(lambda, three_x1_sq, two_y1_inv); + + // Calculate x3 = lambda^2 - 2*x1 + RawFec::Element lambda_sq; + fec.square(lambda_sq, lambda); + RawFec::Element two_x1; + fec.add(two_x1, x_fe, x_fe); + RawFec::Element x3_fe; + fec.sub(x3_fe, lambda_sq, two_x1); + + // Calculate y3 = lambda * (x1 - x3) - y1 + RawFec::Element x1_minus_x3; + fec.sub(x1_minus_x3, x_fe, x3_fe); + RawFec::Element lambda_x1_minus_x3; + fec.mul(lambda_x1_minus_x3, lambda, x1_minus_x3); + RawFec::Element y3_fe; + fec.sub(y3_fe, lambda_x1_minus_x3, y_fe); + + // Convert to result + fe2array(x3_fe, r); + fe2array(y3_fe, r + 4); +} + +int secp256k1_curve_dbl_scalar_mul( + const uint64_t * k1, // 4 x 64 bits + const uint64_t * p1, // 8 x 64 bits + const uint64_t * k2, // 4 x 64 bits + const uint64_t * p2, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +) +{ + for (uint64_t i = 0; i < 8; i++) { + r[i] = 0; + } + + for (int64_t ii=255; ii>=0; ii--) { + uint64_t i = ii; + + // r = r + r + secp256k1_curve_dbl(r, r); + + // If k1[i] == 1 then r = r + p1 + uint64_t k1_bit = (k1[i / 64] >> (i % 64)) & 1; + if (k1_bit == 1) + { + secp256k1_curve_add(r, p1, r); + } + + // If k2[i] == 1 then r = r + p2 + uint64_t k2_bit = (k2[i / 64] >> (i % 64)) & 1; + if (k2_bit == 1) + { + secp256k1_curve_add(r, p2, r); + } + } + + return 0; +} + #ifdef __cplusplus } // extern "C" #endif \ No newline at end of file diff --git a/lib-c/c/src/ec/ec.hpp b/lib-c/c/src/ec/ec.hpp index cec4e836d..a3fadab21 100644 --- a/lib-c/c/src/ec/ec.hpp +++ b/lib-c/c/src/ec/ec.hpp @@ -24,6 +24,32 @@ int AddPointEcP ( uint64_t * p3 // 8 x 64 bits ); +int secp256k1_ecdsa_verify ( + const uint64_t * pk, // 8 x 64 bits + const uint64_t * z, // 4 x 64 bits + const uint64_t * r, // 4 x 64 bits + const uint64_t * s, // 4 x 64 bits + uint64_t * result // 8 x 64 bits +); + +void secp256k1_curve_add( + const uint64_t * p, // 8 x 64 bits + const uint64_t * q, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +); + +void secp256k1_curve_dbl( + const uint64_t * p, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +); + +int secp256k1_curve_dbl_scalar_mul( + const uint64_t * k1, // 4 x 64 bits + const uint64_t * p1, // 8 x 64 bits + const uint64_t * k2, // 4 x 64 bits + const uint64_t * p2, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +); #ifdef __cplusplus } // extern "C" diff --git a/lib-c/c/src/fcall/fcall.cpp b/lib-c/c/src/fcall/fcall.cpp index b5dfa1321..55f6dd354 100644 --- a/lib-c/c/src/fcall/fcall.cpp +++ b/lib-c/c/src/fcall/fcall.cpp @@ -2,6 +2,9 @@ #include "../common/utils.hpp" #include "../bn254/bn254_fe.hpp" #include "../bls12_381/bls12_381_fe.hpp" +#include "../bls12_381/bls12_381.hpp" +#include "../ec/ec.hpp" +#include "../secp256r1/secp256r1.hpp" #include #include @@ -98,6 +101,21 @@ int Fcall ( iresult = BinDecompCtx(ctx); break; } + case FCALL_BLS12_381_FP2_SQRT_ID: + { + iresult = BLS12_381Fp2SqrtCtx(ctx); + break; + } + case FCALL_SECP256K1_ECDSA_VERIFY_ID: + { + iresult = Secp256k1EcdsaVerifyCtx(ctx); + break; + } + case FCALL_SECP256R1_ECDSA_VERIFY_ID: + { + iresult = Secp256r1EcdsaVerifyCtx(ctx); + break; + } default: { printf("Fcall() found unsupported function_id=%lu\n", ctx->function_id); @@ -295,19 +313,28 @@ int MsbPos256 ( uint64_t * r // 2 x 64 bits ) { - const uint64_t * x = a; - const uint64_t * y = &a[4]; + const uint64_t n = a[0]; // number of inputs + const uint64_t * params = &a[1]; - for (int i=3; i>=0; i--) + for (int limb=3; limb>=0; limb--) { - if ((x[i] != 0) || (y[i] != 0)) + // Find max value at this limb position across all inputs + uint64_t max_word = 0; + for (uint64_t i=0; i y[i] ? x[i] : y[i]; - r[0] = i; - r[1] = msb_pos(word); + uint64_t word = params[i * 4 + limb]; + if (word > max_word) { + max_word = word; + } + } + if (max_word != 0) + { + r[0] = limb; + r[1] = msb_pos(max_word); return 0; } } + printf("MsbPos256() error: both x and y are zero\n"); exit(-1); } @@ -598,7 +625,7 @@ int BLS12_381FpSqrt ( { // To check that a is indeed a non-quadratic residue, we check that // a * NQR is a quadratic residue for some fixed known non-quadratic residue NQR - mpz_class a_nqr = (a * ScalarNQR) % ScalarP; + mpz_class a_nqr = (a * ScalarNQR_FP) % ScalarP; // Compute the square root of a * NQR mpz_powm(r.get_mpz_t(), a_nqr.get_mpz_t(), ScalarP_DIV_4.get_mpz_t(), ScalarP.get_mpz_t()); @@ -915,7 +942,12 @@ int BigIntDivCtx ( ctx->result[2 + quotient_size + i] = 0; } - return 2 + quotient_size + remainder_size; + uint64_t total_size = 2 + quotient_size + remainder_size; + assert(total_size < FCALL_RESULT_MAX_SIZE); + + ctx->result_size = total_size; + + return total_size; } /************************/ @@ -963,5 +995,72 @@ int BinDecompCtx ( ctx->result[0] = ctx->result_size; ctx->result_size++; + return 0; +} + +/**********************/ +/* BLS12 381 FP2 SQRT */ +/**********************/ + +uint64_t NQR[12] = {1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}; + +/// Computes the square root of a non-zero field element in Fp2 +int BLS12_381Fp2SqrtCtx ( + struct FcallContext * ctx // fcall context +) +{ + int result; + + // Perform the square root + result = BLS12_381ComplexSqrtP( + &ctx->params[0], // 12 x 64 bits input parameter: real(6) + imaginary(6) + &ctx->result[1], // 12 x 64 bits output parameter: real(6) + imaginary(6) + &ctx->result[0] // 1 x 64 bits output parameter: is_quadratic_residue (1) + ); + if (result != 0) return result; + + // Check if a is a quadratic residue + if (!ctx->result[0]) + { + // To check that a is indeed a non-quadratic residue, we check that + // a * NQR is a quadratic residue for some fixed known non-quadratic residue NQR + uint64_t a_nqr[12]; + result = BLS12_381ComplexMulP( + &ctx->params[0], // 12 x 64 bits input parameter: real(6) + imaginary(6) + &NQR[0], // 12 x 64 bits input parameter: real(6) + imaginary(6) + &a_nqr[0] // 12 x 64 bits output parameter: real(6) + imaginary(6) + ); + if (result != 0) return result; + + // Compute the square root of a * NQR + uint64_t aux; // Unused + result = BLS12_381ComplexSqrtP( + &a_nqr[0], // 12 x 64 bits input parameter: real(6) + imaginary(6) + &ctx->result[1], // 12 x 64 bits output parameter: real(6) + imaginary(6) + &aux // 1 x 64 bits output parameter: is_quadratic_residue (1) + ); + if (result != 0) return result; + } + + ctx->result_size = 13; + + return 0; +} + +int Secp256k1EcdsaVerifyCtx( + struct FcallContext * ctx // fcall context +) +{ + secp256k1_ecdsa_verify( &ctx->params[0], &ctx->params[8], &ctx->params[12], &ctx->params[16], &ctx->result[0]); + ctx->result_size = 8; + return 0; +} + +int Secp256r1EcdsaVerifyCtx( + struct FcallContext * ctx // fcall context +) +{ + secp256r1_ecdsa_verify( &ctx->params[0], &ctx->params[8], &ctx->params[12], &ctx->params[16], &ctx->result[0]); + ctx->result_size = 8; return 0; } \ No newline at end of file diff --git a/lib-c/c/src/fcall/fcall.hpp b/lib-c/c/src/fcall/fcall.hpp index 768777e81..b2509745d 100644 --- a/lib-c/c/src/fcall/fcall.hpp +++ b/lib-c/c/src/fcall/fcall.hpp @@ -25,6 +25,9 @@ extern "C" { #define FCALL_BIGINT256_DIV_ID 16 #define FCALL_BIG_INT_DIV_ID 17 #define FCALL_BIN_DECOMP_ID 18 +#define FCALL_BLS12_381_FP2_SQRT_ID 19 +#define FCALL_SECP256K1_ECDSA_VERIFY_ID 20 +#define FCALL_SECP256R1_ECDSA_VERIFY_ID 21 #define FCALL_PARAMS_MAX_SIZE 386 #define FCALL_RESULT_MAX_SIZE 8193 @@ -98,6 +101,15 @@ int BigIntDivCtx ( int BinDecompCtx ( struct FcallContext * ctx // fcall context ); +int BLS12_381Fp2SqrtCtx ( + struct FcallContext * ctx // fcall context +); +int Secp256k1EcdsaVerifyCtx ( + struct FcallContext * ctx // fcall context +); +int Secp256r1EcdsaVerifyCtx ( + struct FcallContext * ctx // fcall context +); // Functions supported by fcall, in u64 array format int InverseFpEc ( diff --git a/lib-c/c/src/ffiasm/bls12_381.asm b/lib-c/c/src/ffiasm/bls12_381.asm index aceee87d4..04e85b56f 100644 --- a/lib-c/c/src/ffiasm/bls12_381.asm +++ b/lib-c/c/src/ffiasm/bls12_381.asm @@ -8792,3 +8792,6 @@ R3 dq 0xc62c1807439b73af,0x1b3e0d188cf06990,0x73d13c71c7b5f418,0x6e2a5 lboMask dq 0x7fffffffffffffff np dq 0xfffffffeffffffff + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/ffiasm/bls12_381_384.asm b/lib-c/c/src/ffiasm/bls12_381_384.asm index 7f065aec7..b737e40f5 100644 --- a/lib-c/c/src/ffiasm/bls12_381_384.asm +++ b/lib-c/c/src/ffiasm/bls12_381_384.asm @@ -10910,3 +10910,6 @@ R3 dq 0xed48ac6bd94ca1e0,0x315f831e03a7adf8,0x9a53352a615e29dd,0x34c04 lboMask dq 0x1fffffffffffffff np dq 0x89f3fffcfffcfffd + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/ffiasm/fec.asm b/lib-c/c/src/ffiasm/fec.asm index 4d1465e25..21b8e1d2c 100644 --- a/lib-c/c/src/ffiasm/fec.asm +++ b/lib-c/c/src/ffiasm/fec.asm @@ -8874,3 +8874,6 @@ R3 dq 0x002bb1e33795f671,0x0000000100000b73,0x0000000000000000,0x00000 lboMask dq 0xffffffffffffffff np dq 0xd838091dd2253531 + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/ffiasm/fnec.asm b/lib-c/c/src/ffiasm/fnec.asm index 3b6b36d3d..fe8abd610 100644 --- a/lib-c/c/src/ffiasm/fnec.asm +++ b/lib-c/c/src/ffiasm/fnec.asm @@ -8874,3 +8874,6 @@ R3 dq 0x7bc0cfe0e9ff41ed,0x0017648444d4322c,0xb1b31347f1d0b2da,0x555d8 lboMask dq 0xffffffffffffffff np dq 0x4b0dff665588b13f + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/ffiasm/fq.asm b/lib-c/c/src/ffiasm/fq.asm index 34d7bd01a..93c36ba05 100644 --- a/lib-c/c/src/ffiasm/fq.asm +++ b/lib-c/c/src/ffiasm/fq.asm @@ -8791,3 +8791,6 @@ R3 dq 0xb1cd6dafda1530df,0x62f210e6a7283db6,0xef7f0b0c0ada0afb,0x20fd6 lboMask dq 0x3fffffffffffffff np dq 0x87d20782e4866389 + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/ffiasm/fr.asm b/lib-c/c/src/ffiasm/fr.asm index 806afaac0..0cac981ae 100644 --- a/lib-c/c/src/ffiasm/fr.asm +++ b/lib-c/c/src/ffiasm/fr.asm @@ -8791,3 +8791,6 @@ R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf85 lboMask dq 0x3fffffffffffffff np dq 0xc2e1f593efffffff + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/ffiasm/nsecp256r1.asm b/lib-c/c/src/ffiasm/nsecp256r1.asm index 963327487..e7f3e4693 100644 --- a/lib-c/c/src/ffiasm/nsecp256r1.asm +++ b/lib-c/c/src/ffiasm/nsecp256r1.asm @@ -8875,3 +8875,6 @@ R3 dq 0xac8ebec90b65a624,0x111f28ae0c0555c9,0x2543b9246ba5e93f,0x503a5 lboMask dq 0xffffffffffffffff np dq 0xccd1c8aaee00bc4f + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/ffiasm/psecp256r1.asm b/lib-c/c/src/ffiasm/psecp256r1.asm index ba7233133..a97b6e8f0 100644 --- a/lib-c/c/src/ffiasm/psecp256r1.asm +++ b/lib-c/c/src/ffiasm/psecp256r1.asm @@ -8875,3 +8875,6 @@ R3 dq 0xfffffffd0000000a,0xffffffedfffffff7,0x00000005fffffffc,0x00000 lboMask dq 0xffffffffffffffff np dq 0x1 + +; Mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits diff --git a/lib-c/c/src/poseidon2/goldilocks_base_field.cpp b/lib-c/c/src/poseidon2/goldilocks_base_field.cpp new file mode 100644 index 000000000..1991c9830 --- /dev/null +++ b/lib-c/c/src/poseidon2/goldilocks_base_field.cpp @@ -0,0 +1,128 @@ +#include "goldilocks_base_field.hpp" +#include "goldilocks_base_field_tools.hpp" +#include "goldilocks_base_field_scalar.hpp" + +const Goldilocks::Element Goldilocks::ZR = {(uint64_t)0x0000000000000000LL}; +const Goldilocks::Element Goldilocks::Q = {(uint64_t)0xFFFFFFFF00000001LL}; +const Goldilocks::Element Goldilocks::MM = {(uint64_t)0xFFFFFFFeFFFFFFFFLL}; +const Goldilocks::Element Goldilocks::CQ = {(uint64_t)0x00000000FFFFFFFFLL}; +const Goldilocks::Element Goldilocks::R2 = {(uint64_t)0xFFFFFFFe00000001LL}; + +const Goldilocks::Element Goldilocks::W[33] = { + Goldilocks::fromU64(0x1), + Goldilocks::fromU64(18446744069414584320ULL), + Goldilocks::fromU64(281474976710656ULL), + Goldilocks::fromU64(16777216ULL), + Goldilocks::fromU64(4096ULL), + Goldilocks::fromU64(64ULL), + Goldilocks::fromU64(8ULL), + Goldilocks::fromU64(2198989700608ULL), + Goldilocks::fromU64(4404853092538523347ULL), + Goldilocks::fromU64(6434636298004421797ULL), + Goldilocks::fromU64(4255134452441852017ULL), + Goldilocks::fromU64(9113133275150391358ULL), + Goldilocks::fromU64(4355325209153869931ULL), + Goldilocks::fromU64(4308460244895131701ULL), + Goldilocks::fromU64(7126024226993609386ULL), + Goldilocks::fromU64(1873558160482552414ULL), + Goldilocks::fromU64(8167150655112846419ULL), + Goldilocks::fromU64(5718075921287398682ULL), + Goldilocks::fromU64(3411401055030829696ULL), + Goldilocks::fromU64(8982441859486529725ULL), + Goldilocks::fromU64(1971462654193939361ULL), + Goldilocks::fromU64(6553637399136210105ULL), + Goldilocks::fromU64(8124823329697072476ULL), + Goldilocks::fromU64(5936499541590631774ULL), + Goldilocks::fromU64(2709866199236980323ULL), + Goldilocks::fromU64(8877499657461974390ULL), + Goldilocks::fromU64(3757607247483852735ULL), + Goldilocks::fromU64(4969973714567017225ULL), + Goldilocks::fromU64(2147253751702802259ULL), + Goldilocks::fromU64(2530564950562219707ULL), + Goldilocks::fromU64(1905180297017055339ULL), + Goldilocks::fromU64(3524815499551269279ULL), + Goldilocks::fromU64(7277203076849721926ULL)}; + +const Goldilocks::Element Goldilocks::ONE = {(uint64_t)0x0000000000000001LL}; +const Goldilocks::Element Goldilocks::ZERO = {(uint64_t)0x0000000000000000LL}; +const Goldilocks::Element Goldilocks::NEGONE = {(uint64_t)0xFFFFFFFF00000000LL}; +const Goldilocks::Element Goldilocks::TWO32 = {0x0000000100000000LL}; +const Goldilocks::Element Goldilocks::SHIFT = Goldilocks::fromU64(7); + +/* + Scalar operations +*/ +void Goldilocks::parcpy(Element *dst, const Element *src, uint64_t size, int num_threads_copy) +{ + if (num_threads_copy < 1) + { + num_threads_copy = 1; + } + uint64_t components_thread = (size + num_threads_copy - 1) / num_threads_copy; + +#pragma omp parallel for num_threads(num_threads_copy) + for (uint64_t i = 0; i < size; i += components_thread) + { + uint64_t dim_ = components_thread * sizeof(Goldilocks::Element); + if (size - i < components_thread) + { + dim_ = (size - i) * sizeof(Goldilocks::Element); + } + std::memcpy(&dst[i], &src[i], dim_); + } +} + +void Goldilocks::parSetZero(Element *dst, uint64_t size, int num_threads_copy) +{ + + if (num_threads_copy < 1) + { + num_threads_copy = 1; + } + uint64_t components_thread = (size + num_threads_copy - 1) / num_threads_copy; + +#pragma omp parallel for num_threads(num_threads_copy) + for (uint64_t i = 0; i < size; i += components_thread) + { + uint64_t dim_ = components_thread * sizeof(Goldilocks::Element); + if (size - i < components_thread) + { + dim_ = (size - i) * sizeof(Goldilocks::Element); + } + std::memset(&dst[i], 0, dim_); + } +} + +// TODO: Review and optimize inv imlementation +void Goldilocks::inv(Element &result, const Element &in1) +{ + if (Goldilocks::isZero(in1)) + { + throw std::runtime_error("Error: Goldilocks::inv called with zero"); + } + u_int64_t t = 0; + u_int64_t r = GOLDILOCKS_PRIME; + u_int64_t newt = 1; + + u_int64_t newr = Goldilocks::toU64(in1); + Element q; + Element aux1; + Element aux2; + while (newr != 0) + { + q = Goldilocks::fromU64(r / newr); + aux1 = Goldilocks::fromU64(t); + aux2 = Goldilocks::fromU64(newt); + t = Goldilocks::toU64(aux2); + newt = Goldilocks::toU64(Goldilocks::sub(aux1, Goldilocks::mul(q, aux2))); + aux1 = Goldilocks::fromU64(r); + aux2 = Goldilocks::fromU64(newr); + r = Goldilocks::toU64(aux2); + newr = Goldilocks::toU64(Goldilocks::sub(aux1, Goldilocks::mul(q, aux2))); + } + + Goldilocks::fromU64(result, t); +#if GOLDILOCKS_DEBUG == 1 + result.fe = result.fe % GOLDILOCKS_PRIME; +#endif +}; \ No newline at end of file diff --git a/lib-c/c/src/poseidon2/goldilocks_base_field.hpp b/lib-c/c/src/poseidon2/goldilocks_base_field.hpp new file mode 100644 index 000000000..63787ec59 --- /dev/null +++ b/lib-c/c/src/poseidon2/goldilocks_base_field.hpp @@ -0,0 +1,170 @@ +#ifndef GOLDILOCKS_BASE +#define GOLDILOCKS_BASE + +#include // uint64_t +#include // string +#include +#include // string +#include +#include + +#define GOLDILOCKS_DEBUG 0 +#ifndef USE_ASSEMBLY +#define USE_ASSEMBLY 1 // Default value if not set by the Makefile +#endif +#define GOLDILOCKS_NUM_ROOTS 33 +#define GOLDILOCKS_PRIME 0xFFFFFFFF00000001ULL +#define GOLDILOCKS_PRIME_NEG 0xFFFFFFFF +#define MSB_ 0x8000000000000000 // Most Significant Bit + +class Goldilocks +{ +public: + typedef struct + { + uint64_t fe; + } Element; + +private: + static const Element ZR; + static const Element Q; + static const Element MM; + static const Element CQ; + static const Element R2; + static const Element TWO32; + + static const Element ZERO; + static const Element ONE; + static const Element NEGONE; + static const Element SHIFT; + static const Element W[GOLDILOCKS_NUM_ROOTS]; + +public: + /* + Basic functionality + */ + + static const Element &zero(); + static void zero(Element &result); + + static const Element &one(); + static void one(Element &result); + + static const Element &negone(); + static void negone(Element &result); + + static const Element &shift(); + static void shift(Element &result); + + static const Element &w(uint64_t i); + static void w(Element &result, uint64_t i); + + static Element fromU64(uint64_t in1); + static void fromU64(Element &result, uint64_t in1); + static Element fromS64(int64_t in1); + static void fromS64(Element &result, int64_t in1); + static Element fromS32(int32_t in1); + static void fromS32(Element &result, int32_t in1); + static Element fromString(const std::string &in1, int radix = 10); + static void fromString(Element &result, const std::string &in1, int radix = 10); + static Element fromScalar(const mpz_class &scalar); + static void fromScalar(Element &result, const mpz_class &scalar); + + static uint64_t toU64(const Element &in1); + static void toU64(uint64_t &result, const Element &in1); + static int64_t toS64(const Element &in1); + static void toS64(int64_t &result, const Element &in1); + static bool toS32(int32_t &result, const Element &in1); + static std::string toString(const Element &in1, int radix = 10); + static void toString(std::string &result, const Element &in1, int radix = 10); + static std::string toString(const Element *in1, const uint64_t size, int radix = 10); + + /* + Scalar operations + */ + static void copy(Element &dst, const Element &src); + static void copy(Element *dst, const Element *src); + + static void parcpy(Element *dst, const Element *src, uint64_t size, int num_threads_copy = 64); + static void parSetZero(Element *dst, uint64_t size, int num_threads_copy = 64); + + static Element add(const Element &in1, const Element &in2); + static void add(Element &result, const Element &in1, const Element &in2); + static void add_no_double_carry(uint64_t &result, const uint64_t &in1, const uint64_t &in2); + static Element inc(const Goldilocks::Element &fe); + + static Element sub(const Element &in1, const Element &in2); + static void sub(Element &result, const Element &in1, const Element &in2); + static Element dec(const Goldilocks::Element &fe); + + static Element mul(const Element &in1, const Element &in2); + static void mul(Element &result, const Element &in1, const Element &in2); + static void mul1(Element &result, const Element &in1, const Element &in2); + static void mul2(Element &result, const Element &in1, const Element &in2); + + static Element square(const Element &in1); + static void square(Element &result, const Element &in1); + + static Element pow(const Element& base, uint64_t exp); + + static Element div(const Element &in1, const Element &in2); + static void div(Element &result, const Element &in1, const Element &in2); + + static Element neg(const Element &in1); + static void neg(Element &result, const Element &in1); + + static bool isZero(const Element &in1); + static bool isOne(const Element &in1); + static bool isNegone(const Element &in1); + + static bool equal(const Element &in1, const Element &in2); + + static Element inv(const Element &in1); + static void inv(Element &result, const Element &in1); + + static Element mulScalar(const Element &base, const uint64_t &scalar); + static void mulScalar(Element &result, const Element &base, const uint64_t &scalar); + + static Element exp(Element base, uint64_t exp); + static void exp(Element &result, Element base, uint64_t exps); + + static void batchInverse(Element *res, const Element *src, uint64_t size) + { + Element* tmp = new Element[size]; + copy(tmp[0], src[0]); + + for (uint64_t i = 1; i < size; i++) + { + mul(tmp[i], tmp[i - 1], src[i]); + } + + Element z, z2; + inv(z, tmp[size - 1]); + + for (uint64_t i = size - 1; i > 0; i--) + { + mul(z2, z, src[i]); + mul(res[i], z, tmp[i - 1]); + copy(z, z2); + } + copy(res[0], z); + + delete[] tmp; + } +}; + +/* + Operator Overloading +*/ +inline Goldilocks::Element operator+(const Goldilocks::Element &in1, const Goldilocks::Element &in2) { return Goldilocks::add(in1, in2); } +inline Goldilocks::Element operator*(const Goldilocks::Element &in1, const Goldilocks::Element &in2) { return Goldilocks::mul(in1, in2); } +inline Goldilocks::Element operator-(const Goldilocks::Element &in1, const Goldilocks::Element &in2) { return Goldilocks::sub(in1, in2); } +inline Goldilocks::Element operator/(const Goldilocks::Element &in1, const Goldilocks::Element &in2) { return Goldilocks::div(in1, in2); } +inline bool operator==(const Goldilocks::Element &in1, const Goldilocks::Element &in2) { return Goldilocks::equal(in1, in2); } +inline Goldilocks::Element operator-(const Goldilocks::Element &in1) { return Goldilocks::neg(in1); } +inline Goldilocks::Element operator+(const Goldilocks::Element &in1) { return in1; } + +#include "goldilocks_base_field_tools.hpp" +#include "goldilocks_base_field_scalar.hpp" + +#endif // GOLDILOCKS_BASE diff --git a/lib-c/c/src/poseidon2/goldilocks_base_field_scalar.hpp b/lib-c/c/src/poseidon2/goldilocks_base_field_scalar.hpp new file mode 100644 index 000000000..fa54c520a --- /dev/null +++ b/lib-c/c/src/poseidon2/goldilocks_base_field_scalar.hpp @@ -0,0 +1,317 @@ +#ifndef GOLDILOCKS_SCALAR +#define GOLDILOCKS_SCALAR +#include "goldilocks_base_field.hpp" + +inline void Goldilocks::copy(Element &dst, const Element &src) { dst.fe = src.fe; }; + +inline void Goldilocks::copy(Element *dst, const Element *src) { dst->fe = src->fe; }; + +inline Goldilocks::Element Goldilocks::add(const Element &in1, const Element &in2) +{ + Goldilocks::Element result; + Goldilocks::add(result, in1, in2); + return result; +} + +inline void Goldilocks::add(Element &result, const Element &in1, const Element &in2) +{ +#ifdef __USE_ASSEMBLY__ + uint64_t in_1 = in1.fe; + uint64_t in_2 = in2.fe; + __asm__("xor %%r10, %%r10\n\t" + "mov %1, %0\n\t" + "add %2, %0\n\t" + "cmovc %3, %%r10\n\t" + "add %%r10, %0\n\t" + "jnc 1f\n\t" + "add %3, %0\n\t" + "1: \n\t" + : "=&a"(result.fe) + : "r"(in_1), "r"(in_2), "m"(CQ), "m"(ZR) + : "%r10"); +#else + uint64_t in_1 = in1.fe; + if(in_1 >= GOLDILOCKS_PRIME){ + in_1 -= GOLDILOCKS_PRIME; + } + result.fe = in_1 + in2.fe; + if(in_1 > result.fe){ + result.fe -= GOLDILOCKS_PRIME; + } +#endif +} + +inline Goldilocks::Element Goldilocks::inc(const Goldilocks::Element &fe) +{ + Goldilocks::Element result; + if (fe.fe < GOLDILOCKS_PRIME - 2) + { + result.fe = fe.fe + 1; + } + else if (fe.fe == GOLDILOCKS_PRIME - 1) + { + result.fe = 0; + } + else + { + result = Goldilocks::add(fe, Goldilocks::one()); + } + return result; +} + +inline Goldilocks::Element Goldilocks::sub(const Element &in1, const Element &in2) +{ + Goldilocks::Element result; + Goldilocks::sub(result, in1, in2); + return result; +} + +inline void Goldilocks::sub(Element &result, const Element &in1, const Element &in2) +{ +#ifdef __USE_ASSEMBLY__ + uint64_t in_1 = in1.fe; + uint64_t in_2 = in2.fe; + __asm__("xor %%r10, %%r10\n\t" + "mov %1, %0\n\t" + "sub %2, %0\n\t" + "cmovc %3, %%r10\n\t" + "sub %%r10, %0\n\t" + "jnc 1f\n\t" + "sub %3, %0\n\t" + "1: \n\t" + : "=&a"(result.fe) + : "r"(in_1), "r"(in_2), "m"(CQ), "m"(ZR) + : "%r10"); +#else + uint64_t in_2 = in2.fe; + if(in_2 >= GOLDILOCKS_PRIME){ + in_2 -= GOLDILOCKS_PRIME; + } + result.fe = in1.fe - in_2; + if(in_2 > in1.fe){ + result.fe += GOLDILOCKS_PRIME; + } +#endif +#if GOLDILOCKS_DEBUG == 1 + result.fe = result.fe % GOLDILOCKS_PRIME; +#endif +} + +inline Goldilocks::Element Goldilocks::dec(const Goldilocks::Element &fe) +{ + Goldilocks::Element result; + if (fe.fe > 0) + { + result.fe = fe.fe - 1; + } + else + { + result.fe = GOLDILOCKS_PRIME - 1; + } + return result; +} + +inline Goldilocks::Element Goldilocks::mul(const Element &in1, const Element &in2) +{ + Goldilocks::Element result; + Goldilocks::mul(result, in1, in2); + return result; +} + +inline Goldilocks::Element Goldilocks::pow(const Element& base, uint64_t exp) +{ + Element result; + one(result); + Element temp; + copy(temp, base); + while (exp > 0) + { + if (exp % 2 == 1) + { + mul(result, result, temp); + } + mul(temp, temp, temp); + exp /= 2; + } + return result; +} + +/* +* Stable version used until new optimization based on branch_hint was introduced (see mul function) +*/ +inline void Goldilocks::mul1(Element &result, const Element &in1, const Element &in2) +{ + +#ifdef __USE_ASSEMBLY__ + __asm__("mov %1, %0\n\t" + "mul %2\n\t" + // "xor %%rbx, %%rbx\n\t" + "mov %%edx, %%ebx\n\t" + "sub %4, %%rbx\n\t" + "rol $32, %%rdx\n\t" + //"xor %%rcx, %%rcx;\n\t" + "mov %%edx, %%ecx\n\t" + "sub %%rcx, %%rdx\n\t" + "add %4, %%rcx\n\t" + "sub %%rbx, %%rdx\n\t" + //"mov %3,%%r10 \n\t" + "xor %%rbx, %%rbx\n\t" + "add %%rdx, %0\n\t" + "cmovc %3, %%rbx\n\t" + "add %%rbx, %0\n\t" + // TODO: migrate to labels + //"xor %%rbx, %%rbx\n\t" + //"sub %%rcx, %0\n\t" + //"cmovc %%r10, %%rbx\n\t" + //"sub %%rbx, %0\n\t" + "sub %%rcx, %0\n\t" + "jnc 1f\n\t" + "sub %3, %0\n\t" + "1: \n\t" + : "=&a"(result.fe) + : "r"(in1.fe), "r"(in2.fe), "m"(CQ), "m"(TWO32) + : "%rbx", "%rcx", "%rdx"); + +#if GOLDILOCKS_DEBUG == 1 + result.fe = result.fe % GOLDILOCKS_PRIME; +#endif +#else + mul(result, in1, in2); +#endif +} + +inline void Goldilocks::mul2(Element &result, const Element &in1, const Element &in2) +{ + +#ifdef __USE_ASSEMBLY__ + __asm__( + "mov %1, %%rax\n\t" + "mul %2\n\t" + "divq %3\n\t" + : "=&d"(result.fe) + : "r"(in1.fe), "r"(in2.fe), "m"(Q) + : "%rax"); + +#if GOLDILOCKS_DEBUG == 1 + result.fe = result.fe % GOLDILOCKS_PRIME; +#endif +#else + mul(result, in1, in2); +#endif +} + +inline void branch_hint() { + asm("nop"); +} +inline void Goldilocks::add_no_double_carry(uint64_t &result, const uint64_t &in1, const uint64_t &in2) +{ + +#ifdef __USE_ASSEMBLY__ + __asm__("xor %%r10, %%r10\n\t" + "mov %1, %0\n\t" + "add %2, %0\n\t" + "cmovc %3, %%r10\n\t" + "add %%r10, %0\n\t" + : "=&a"(result) + : "r"(in1), "r"(in2), "m"(CQ) + : "%r10"); +#endif +} +/** + * Optimized version inspired in Plonky3 optimizations, using branch_hint hint the processor that the branch is unlikely to be taken + */ + +inline void Goldilocks::mul(Element &result, const Element &in1, const Element &in2){ + + + uint64_t rh; + uint64_t rl; + + __uint128_t res = static_cast<__uint128_t>(in1.fe) * static_cast<__uint128_t>(in2.fe); + rl = (uint64_t)res; + rh = (uint64_t)(res>>64); + uint64_t rhh = rh >> 32; + uint64_t rhl = rh & 0xFFFFFFFF; + + uint64_t aux1; + aux1 = rl - rhh; + if(rhh>rl){ //this branch is unlikely to be taken + branch_hint(); + aux1-=0xFFFFFFFF; + } + uint64_t aux = 0xFFFFFFFF* rhl; + // aux1 <= 2^64-1 + // aux <= (2^32-1)*(2^32-1) = 2^64-2^32+1-2^32 = P-2^32 + // aux1 + aux <= 2^64-1 + P-2^32 = P+P-2=2P-2 + #ifdef __USE_ASSEMBLY__ + add_no_double_carry(result.fe, aux1, aux); + #else + Goldilocks::Element aux1_, aux2_; + aux1_.fe = aux1; + aux2_.fe = aux; + add(result, aux2_, aux1_); + #endif + +} + +inline Goldilocks::Element Goldilocks::square(const Element &in1) { return mul(in1, in1); }; + +inline void Goldilocks::square(Element &result, const Element &in1) { return mul(result, in1, in1); }; + +inline Goldilocks::Element Goldilocks::div(const Element &in1, const Element &in2) { return mul(in1, inv(in2)); }; + +inline void Goldilocks::div(Element &result, const Element &in1, const Element &in2) { mul(result, in1, inv(in2)); }; + +inline Goldilocks::Element Goldilocks::neg(const Element &in1) { return sub(Goldilocks::zero(), in1); }; + +inline void Goldilocks::neg(Element &result, const Element &in1) { return sub(result, Goldilocks::zero(), in1); }; + +inline bool Goldilocks::isZero(const Element &in1) { return equal(in1, Goldilocks::zero()); }; + +inline bool Goldilocks::isOne(const Element &in1) { return equal(in1, Goldilocks::one()); }; + +inline bool Goldilocks::isNegone(const Element &in1) { return equal(in1, Goldilocks::negone()); }; + +inline bool Goldilocks::equal(const Element &in1, const Element &in2) { return Goldilocks::toU64(in1) == Goldilocks::toU64(in2); } + +inline Goldilocks::Element Goldilocks::inv(const Element &in1) +{ + Goldilocks::Element result; + Goldilocks::inv(result, in1); + return result; +}; + +inline Goldilocks::Element Goldilocks::mulScalar(const Element &base, const uint64_t &scalar) +{ + Goldilocks::Element result; + Goldilocks::mulScalar(result, base, scalar); + return result; +}; +inline void Goldilocks::mulScalar(Element &result, const Element &base, const uint64_t &scalar) +{ + Element eScalar = fromU64(scalar); + mul(result, base, eScalar); +}; + +inline Goldilocks::Element Goldilocks::exp(Element base, uint64_t exp) +{ + Goldilocks::Element result; + Goldilocks::exp(result, base, exp); + return result; +}; + +inline void Goldilocks::exp(Element &result, Element base, uint64_t exp) +{ + result = Goldilocks::one(); + + for (;;) + { + if (exp & 1) + mul(result, result, base); + exp >>= 1; + if (!exp) + break; + mul(base, base, base); + } +}; +#endif \ No newline at end of file diff --git a/lib-c/c/src/poseidon2/goldilocks_base_field_tools.hpp b/lib-c/c/src/poseidon2/goldilocks_base_field_tools.hpp new file mode 100644 index 000000000..7c9feb76a --- /dev/null +++ b/lib-c/c/src/poseidon2/goldilocks_base_field_tools.hpp @@ -0,0 +1,191 @@ +#ifndef GOLDILOCKS_BASIC +#define GOLDILOCKS_BASIC +#include "goldilocks_base_field.hpp" + + +inline const Goldilocks::Element &Goldilocks::zero() { return ZERO; }; +inline void Goldilocks::zero(Element &result) { result.fe = ZERO.fe; }; + +inline const Goldilocks::Element &Goldilocks::one() { return ONE; }; +inline void Goldilocks::one(Element &result) { result.fe = ONE.fe; }; + +inline const Goldilocks::Element &Goldilocks::negone() { return NEGONE; }; +inline void Goldilocks::negone(Element &result) { result.fe = NEGONE.fe; }; + +inline const Goldilocks::Element &Goldilocks::shift() { return SHIFT; }; +inline void Goldilocks::shift(Element &result) { result.fe = SHIFT.fe; }; + +inline const Goldilocks::Element &Goldilocks::w(uint64_t i) { return W[i]; }; +inline void Goldilocks::w(Element &result, uint64_t i) { result.fe = W[i].fe; }; + +inline Goldilocks::Element Goldilocks::fromU64(uint64_t in1) +{ + Goldilocks::Element res; + Goldilocks::fromU64(res, in1); + return res; +} + +inline void Goldilocks::fromU64(Element &result, uint64_t in1) +{ + result.fe = in1; +} + +inline Goldilocks::Element Goldilocks::fromS64(int64_t in1) +{ + Goldilocks::Element res; + Goldilocks::fromS64(res, in1); + return res; +} + +inline void Goldilocks::fromS64(Element &result, int64_t in1) +{ + uint64_t aux; + (in1 < 0) ? aux = static_cast(in1) + GOLDILOCKS_PRIME : aux = static_cast(in1); + result.fe = aux; +} + + +inline Goldilocks::Element Goldilocks::fromS32(int32_t in1) +{ + Goldilocks::Element res; + Goldilocks::fromS32(res, in1); + return res; +} + +inline void Goldilocks::fromS32(Element &result, int32_t in1) +{ + uint64_t aux; + (in1 < 0) ? aux = static_cast(in1) + GOLDILOCKS_PRIME : aux = static_cast(in1); + result.fe = aux; + +} + +inline Goldilocks::Element Goldilocks::fromString(const std::string &in1, int radix) +{ + Goldilocks::Element result; + Goldilocks::fromString(result, in1, radix); + return result; +}; + +inline void Goldilocks::fromString(Element &result, const std::string &in1, int radix) +{ + mpz_class aux(in1, radix); + mpz_class gl(0xFFFFFFFF00000001); + + aux = (aux + gl) % gl; + result.fe = aux.get_ui(); + +}; + +inline Goldilocks::Element Goldilocks::fromScalar(const mpz_class &scalar) +{ + Goldilocks::Element result; + Goldilocks::fromScalar(result, scalar); + return result; +}; + +inline void Goldilocks::fromScalar(Element &result, const mpz_class &scalar) +{ + mpz_class gl(0xFFFFFFFF00000001); + mpz_class aux = (scalar + gl) % gl; + result.fe = aux.get_ui(); +}; + +inline uint64_t Goldilocks::toU64(const Element &in1) +{ + uint64_t res; + Goldilocks::toU64(res, in1); + return res; +}; +inline void Goldilocks::toU64(uint64_t &result, const Element &in1) +{ + result = in1.fe; + if( result >= GOLDILOCKS_PRIME ) + result -= GOLDILOCKS_PRIME; +}; + +inline int64_t Goldilocks::toS64(const Element &in1) +{ + int64_t res; + Goldilocks::toS64(res, in1); + return res; +} + +/* Converts a field element into a signed 64bits integer */ +inline void Goldilocks::toS64(int64_t &result, const Element &in1) +{ + + mpz_class out(std::to_string(Goldilocks::toU64(in1))); + mpz_class gl(0xFFFFFFFF00000001); + + mpz_class maxInt = (gl - 1) / 2; + + if (out > maxInt) + { + mpz_class onegative = gl - out; + result = -onegative.get_si(); + } + else + { + result = out.get_si(); + } +} + +/* Converts a field element into a signed 32bits integer */ +/* Precondition: Goldilocks::Element < 2^31 */ +inline bool Goldilocks::toS32(int32_t &result, const Element &in1) +{ + mpz_class out(std::to_string(Goldilocks::toU64(in1))); + mpz_class gl(0xFFFFFFFF00000001); + + mpz_class maxInt(0x7FFFFFFF); + mpz_class minInt = gl - mpz_class(0x80000000); + + if (out > maxInt) + { + mpz_class onegative = gl - out; + if (out > minInt) + { + result = -onegative.get_si(); + } + else + { + std::cerr << "Error: Goldilocks::toS32 accessing a non-32bit value: " << Goldilocks::toString(in1, 16) << " out=" << out.get_str(16) << " minInt=" << minInt.get_str(16) << " maxInt=" << maxInt.get_str(16) << std::endl; + return false; + } + } + else + { + result = out.get_si(); + } + return true; +} + +inline std::string Goldilocks::toString(const Element &in1, int radix) +{ + std::string result; + Goldilocks::toString(result, in1, radix); + return result; +} + +inline void Goldilocks::toString(std::string &result, const Element &in1, int radix) +{ + mpz_class aux; + uint64_t value = Goldilocks::toU64(in1); + mpz_import(aux.get_mpz_t(), 1, -1, sizeof(value), 0, 0, &value); + result = aux.get_str(radix); +} + +inline std::string Goldilocks::toString(const Element *in1, const uint64_t size, int radix) +{ + std::string result = ""; + for (uint64_t i = 0; i < size; i++) + { + mpz_class aux; + uint64_t value = Goldilocks::toU64(in1[i]); + mpz_import(aux.get_mpz_t(), 1, -1, sizeof(value), 0, 0, &value); + result += std::to_string(i) + ": " + aux.get_str(radix) + "\n"; + } + return result; +} +#endif \ No newline at end of file diff --git a/lib-c/c/src/poseidon2/poseidon2_goldilocks.cpp b/lib-c/c/src/poseidon2/poseidon2_goldilocks.cpp new file mode 100644 index 000000000..1fc5f1d13 --- /dev/null +++ b/lib-c/c/src/poseidon2/poseidon2_goldilocks.cpp @@ -0,0 +1,134 @@ +#ifndef POSEIDON2_GOLDILOCKS +#define POSEIDON2_GOLDILOCKS + +#include +#include "poseidon2_goldilocks_constants.hpp" +#include "goldilocks_base_field.hpp" + +#define WIDTH 16 + +inline void pow7(Goldilocks::Element &x) +{ + Goldilocks::Element x2 = x * x; + Goldilocks::Element x3 = x * x2; + Goldilocks::Element x4 = x2 * x2; + x = x3 * x4; +}; + +inline void add_(Goldilocks::Element &x, const Goldilocks::Element *st) +{ + for (int i = 0; i < WIDTH; ++i) + { + x = x + st[i]; + } +} +inline void prodadd_(Goldilocks::Element *x, const Goldilocks::Element *D, const Goldilocks::Element &sum) +{ + for (int i = 0; i < WIDTH; ++i) + { + x[i] = x[i]*D[i] + sum; + } +} + +inline void pow7add_(Goldilocks::Element *x, const Goldilocks::Element *C) +{ + Goldilocks::Element x2[WIDTH], x3[WIDTH], x4[WIDTH]; + + for (int i = 0; i < WIDTH; ++i) + { + Goldilocks::Element xi = x[i] + C[i]; + x2[i] = xi * xi; + x3[i] = xi * x2[i]; + x4[i] = x2[i] * x2[i]; + x[i] = x3[i] * x4[i]; + } +}; + +inline void matmul_m4_(Goldilocks::Element *x) { + Goldilocks::Element t0 = x[0] + x[1]; + Goldilocks::Element t1 = x[2] + x[3]; + Goldilocks::Element t2 = x[1] + x[1] + t1; + Goldilocks::Element t3 = x[3] + x[3] + t0; + Goldilocks::Element t1_2 = t1 + t1; + Goldilocks::Element t0_2 = t0 + t0; + Goldilocks::Element t4 = t1_2 + t1_2 + t3; + Goldilocks::Element t5 = t0_2 + t0_2 + t2; + Goldilocks::Element t6 = t3 + t5; + Goldilocks::Element t7 = t2 + t4; + + x[0] = t6; + x[1] = t5; + x[2] = t7; + x[3] = t4; +} + +inline void matmul_external_(Goldilocks::Element *x) { + for (int i = 0; i < WIDTH/4; ++i) { + matmul_m4_(&x[i*4]); + } + + Goldilocks::Element stored[4] = {Goldilocks::zero(), Goldilocks::zero(), Goldilocks::zero(), Goldilocks::zero()}; + + for(int i = 0; i < 4; ++i) { + for (int j = 0; j < WIDTH/4; ++j) { + stored[i] = stored[i] + x[j*4 + i]; + } + } + + for (int i = 0; i < WIDTH; ++i) + { + x[i] = x[i] + stored[i % 4]; + } +} + +void Poseidon2(Goldilocks::Element *state) +{ + const Goldilocks::Element *RC = Poseidon2GoldilocksConstants::RC; + const Goldilocks::Element *D = Poseidon2GoldilocksConstants::DIAG; + + matmul_external_(state); + + for (int r = 0; r < 4; r++) + { + pow7add_(state, &(RC[WIDTH * r])); + matmul_external_(state); + } + + for (int r = 0; r < 22; r++) + { + state[0] = state[0] + RC[4 * WIDTH + r]; + pow7(state[0]); + Goldilocks::Element sum_ = Goldilocks::zero(); + add_(sum_, state); + prodadd_(state, D, sum_); + } + + for (int r = 0; r < 4; r++) + { + pow7add_(state, &(RC[4 * WIDTH + 22 + r * WIDTH])); + matmul_external_(state); + } +} + +#ifdef __cplusplus +extern "C" { +#endif + +void poseidon2_hash(uint64_t *state) +{ + Goldilocks::Element stateGL[16]; + for(uint64_t i = 0; i < 16; ++i) { + stateGL[i] = Goldilocks::fromU64(state[i]); + } + Poseidon2(stateGL); + + for(uint64_t i = 0; i < WIDTH; ++i) { + state[i] = Goldilocks::toU64(stateGL[i]); + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif \ No newline at end of file diff --git a/lib-c/c/src/poseidon2/poseidon2_goldilocks.hpp b/lib-c/c/src/poseidon2/poseidon2_goldilocks.hpp new file mode 100644 index 000000000..dbdde90ec --- /dev/null +++ b/lib-c/c/src/poseidon2/poseidon2_goldilocks.hpp @@ -0,0 +1,16 @@ +#ifndef POSEIDON2_GOLDILOCKS_HPP +#define POSEIDON2_GOLDILOCKS_HPP + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void poseidon2_hash(uint64_t *state); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif \ No newline at end of file diff --git a/lib-c/c/src/poseidon2/poseidon2_goldilocks_constants.hpp b/lib-c/c/src/poseidon2/poseidon2_goldilocks_constants.hpp new file mode 100644 index 000000000..8c36738e9 --- /dev/null +++ b/lib-c/c/src/poseidon2/poseidon2_goldilocks_constants.hpp @@ -0,0 +1,181 @@ +#ifndef POSEIDON2_GOLDILOCKS_CONSTANTS +#define POSEIDON2_GOLDILOCKS_CONSTANTS +#endif // POSEIDON2_GOLDILOCKS_CONSTANTS +#include "goldilocks_base_field.hpp" + +namespace Poseidon2GoldilocksConstants +{ + inline constexpr static Goldilocks::Element DIAG[16] = { + {0xde9b91a467d6afc0}, + {0xc5f16b9c76a9be17}, + {0x0ab0fef2d540ac55}, + {0x3001d27009d05773}, + {0xed23b1f906d3d9eb}, + {0x5ce73743cba97054}, + {0x1c3bab944af4ba24}, + {0x2faa105854dbafae}, + {0x53ffb3ae6d421a10}, + {0xbcda9df8884ba396}, + {0xfc1273e4a31807bb}, + {0xc77952573d5142c0}, + {0x56683339a819b85e}, + {0x328fcbd8f0ddc8eb}, + {0xb5101e303fce9cb7}, + {0x774487b8c40089bb}, + }; + + inline constexpr static Goldilocks::Element RC[150] = { + {0x15ebea3fc73397c3}, + {0xd73cd9fbfe8e275c}, + {0x8c096bfce77f6c26}, + {0x4e128f68b53d8fea}, + {0x29b779a36b2763f6}, + {0xfe2adc6fb65acd08}, + {0x8d2520e725ad0955}, + {0x1c2392b214624d2a}, + {0x37482118206dcc6e}, + {0x2f829bed19be019a}, + {0x2fe298cb6f8159b0}, + {0x2bbad982deccdbbf}, + {0xbad568b8cc60a81e}, + {0xb86a814265baad10}, + {0xbec2005513b3acb3}, + {0x6bf89b59a07c2a94}, + {0xa25deeb835e230f5}, + {0x3c5bad8512b8b12a}, + {0x7230f73c3cb7a4f2}, + {0xa70c87f095c74d0f}, + {0x6b7606b830bb2e80}, + {0x6cd467cfc4f24274}, + {0xfeed794df42a9b0a}, + {0x8cf7cf6163b7dbd3}, + {0x9a6e9dda597175a0}, + {0xaa52295a684faf7b}, + {0x017b811cc3589d8d}, + {0x55bfb699b6181648}, + {0xc2ccaf71501c2421}, + {0x1707950327596402}, + {0xdd2fcdcd42a8229f}, + {0x8b9d7d5b27778a21}, + {0xac9a05525f9cf512}, + {0x2ba125c58627b5e8}, + {0xc74e91250a8147a5}, + {0xa3e64b640d5bb384}, + {0xf53047d18d1f9292}, + {0xbaaeddacae3a6374}, + {0xf2d0914a808b3db1}, + {0x18af1a3742bfa3b0}, + {0x9a621ef50c55bdb8}, + {0xc615f4d1cc5466f3}, + {0xb7fbac19a35cf793}, + {0xd2b1a15ba517e46d}, + {0x4a290c4d7fd26f6f}, + {0x4f0cf1bb1770c4c4}, + {0x548345386cd377f5}, + {0x33978d2789fddd42}, + {0xab78c59deb77e211}, + {0xc485b2a933d2be7f}, + {0xbde3792c00c03c53}, + {0xab4cefe8f893d247}, + {0xc5c0e752eab7f85f}, + {0xdbf5a76f893bafea}, + {0xa91f6003e3d984de}, + {0x099539077f311e87}, + {0x097ec52232f9559e}, + {0x53641bdf8991e48c}, + {0x2afe9711d5ed9d7c}, + {0xa7b13d3661b5d117}, + {0x5a0e243fe7af6556}, + {0x1076fae8932d5f00}, + {0x9b53a83d434934e3}, + {0xed3fd595a3c0344a}, + {0x28eff4b01103d100}, + {0x60400ca3e2685a45}, + {0x1c8636beb3389b84}, + {0xac1332b60e13eff0}, + {0x2adafcc364e20f87}, + {0x79ffc2b14054ea0b}, + {0x3f98e4c0908f0a05}, + {0xcdb230bc4e8a06c4}, + {0x1bcaf7705b152a74}, + {0xd9bca249a82a7470}, + {0x91e24af19bf82551}, + {0xa62b43ba5cb78858}, + {0xb4898117472e797f}, + {0xb3228bca606cdaa0}, + {0x844461051bca39c9}, + {0xf3411581f6617d68}, + {0xf7fd50646782b533}, + {0x6ca664253c18fb48}, + {0x2d2fcdec0886a08f}, + {0x29da00dd799b575e}, + {0x47d966cc3b6e1e93}, + {0xde884e9a17ced59e}, + {0xdacf46dc1c31a045}, + {0x5d2e3c121eb387f2}, + {0x51f8b0658b124499}, + {0x1e7dbd1daa72167d}, + {0x8275015a25c55b88}, + {0xe8521c24ac7a70b3}, + {0x6521d121c40b3f67}, + {0xac12de797de135b0}, + {0xafa28ead79f6ed6a}, + {0x685174a7a8d26f0b}, + {0xeff92a08d35d9874}, + {0x3058734b76dd123a}, + {0xfa55dcfba429f79c}, + {0x559294d4324c7728}, + {0x7a770f53012dc178}, + {0xedd8f7c408f3883b}, + {0x39b533cf8d795fa5}, + {0x160ef9de243a8c0a}, + {0x431d52da6215fe3f}, + {0x54c51a2a2ef6d528}, + {0x9b13892b46ff9d16}, + {0x263c46fcee210289}, + {0xb738c96d25aabdc4}, + {0x5c33a5203996d38f}, + {0x2626496e7c98d8dd}, + {0xc669e0a52785903a}, + {0xaecde726c8ae1f47}, + {0x039343ef3a81e999}, + {0x2615ceaf044a54f9}, + {0x7e41e834662b66e1}, + {0x4ca5fd4895335783}, + {0x64b334d02916f2b0}, + {0x87268837389a6981}, + {0x034b75bcb20a6274}, + {0x58e658296cc2cd6e}, + {0xe2d0f759acc31df4}, + {0x81a652e435093e20}, + {0x0b72b6e0172eaf47}, + {0x4aec43cec577d66d}, + {0xde78365b028a84e6}, + {0x444e19569adc0ee4}, + {0x942b2451fa40d1da}, + {0xe24506623ea5bd6c}, + {0x082854bf2ef7c743}, + {0x69dbbc566f59d62e}, + {0x248c38d02a7b5cb2}, + {0x4f4e8f8c09d15edb}, + {0xd96682f188d310cf}, + {0x6f9a25d56818b54c}, + {0xb6cefed606546cd9}, + {0x5bc07523da38a67b}, + {0x7df5a3c35b8111cf}, + {0xaaa2cc5d4db34bb0}, + {0x9e673ff22a4653f8}, + {0xbd8b278d60739c62}, + {0xe10d20f6925b8815}, + {0xf6c87b91dd4da2bf}, + {0xfed623e2f71b6f1a}, + {0xa0f02fa52a94d0d3}, + {0xbb5794711b39fa16}, + {0xd3b94fba9d005c7f}, + {0x15a26e89fad946c9}, + {0xf3cb87db8a67cf49}, + {0x400d2bf56aa2a577}, + }; + + +} \ No newline at end of file diff --git a/lib-c/c/src/secp256r1/secp256r1.cpp b/lib-c/c/src/secp256r1/secp256r1.cpp new file mode 100644 index 000000000..2d4d22b31 --- /dev/null +++ b/lib-c/c/src/secp256r1/secp256r1.cpp @@ -0,0 +1,446 @@ + +#include +#include "secp256r1.hpp" +#include "../ffiasm/psecp256r1.hpp" +#include "../ffiasm/nsecp256r1.hpp" +#include "../common/utils.hpp" +#include "../common/globals.hpp" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int inline secp256r1_add_point_ec_fe (bool dbl, const RawpSecp256r1::Element &x1, const RawpSecp256r1::Element &y1, const RawpSecp256r1::Element &x2, const RawpSecp256r1::Element &y2, RawpSecp256r1::Element &x3, RawpSecp256r1::Element &y3) +{ + // Check if results are buffered +#ifdef ENABLE_EXPERIMENTAL_CODE + if(ctx.ecRecoverPrecalcBuffer.filled == true){ + if(ctx.ecRecoverPrecalcBuffer.pos < 2){ + zklog.error("ecRecoverPrecalcBuffer.buffer buffer is not filled, but pos < 2 (pos=" + to_string(ctx.ecRecoverPrecalcBuffer.pos) + ")"); + exitProcess(); + } + x3 = ctx.ecRecoverPrecalcBuffer.buffer[ctx.ecRecoverPrecalcBuffer.pos-2]; + y3 = ctx.ecRecoverPrecalcBuffer.buffer[ctx.ecRecoverPrecalcBuffer.pos-1]; + return ZKR_SUCCESS; + } +#endif + + RawpSecp256r1::Element aux1, aux2, s; + + if (dbl) + { + // s = (3*x1*x1 + (p-3))/2*y1 = 3*(x1^2 - 1)/2*y1 + secp256r1.mul(aux1, x1, x1); + secp256r1.fromUI(aux2, 3); + secp256r1.add(aux1, aux1, secp256r1.negOne()); + secp256r1.mul(aux1, aux1, aux2); + secp256r1.add(aux2, y1, y1); + if (secp256r1.isZero(aux2)) + { + printf("secp256r1_add_point_ec_fe() got denominator=0 1\n"); + return -1; + } + secp256r1.div(s, aux1, aux2); + + // Required for x3 calculation + secp256r1.add(aux2, x1, x1); + } + else + { + // s = (y2-y1)/(x2-x1) + secp256r1.sub(aux1, y2, y1); + secp256r1.sub(aux2, x2, x1); + if (secp256r1.isZero(aux2)) + { + printf("secp256r1_add_point_ec_fe() got denominator=0 2\n"); + return -1; + } + secp256r1.div(s, aux1, aux2); + + // Required for x3 calculation + secp256r1.add(aux2, x1, x2); + } + + // x3 = s*s - (x1+x2) + secp256r1.mul(aux1, s, s); + // aux2 was calculated before + secp256r1.sub(x3, aux1, aux2); + + // y3 = s*(x1-x3) - y1 + secp256r1.sub(aux1, x1, x3);; + secp256r1.mul(aux1, aux1, s); + secp256r1.sub(y3, aux1, y1); + + return 0; +} + +int inline secp256r1_add_point_ec_dbl_fe (RawpSecp256r1::Element &x1, RawpSecp256r1::Element &y1) +{ + // Check if results are buffered +#ifdef ENABLE_EXPERIMENTAL_CODE + if(ctx.ecRecoverPrecalcBuffer.filled == true){ + if(ctx.ecRecoverPrecalcBuffer.pos < 2){ + zklog.error("ecRecoverPrecalcBuffer.buffer buffer is not filled, but pos < 2 (pos=" + to_string(ctx.ecRecoverPrecalcBuffer.pos) + ")"); + exitProcess(); + } + x3 = ctx.ecRecoverPrecalcBuffer.buffer[ctx.ecRecoverPrecalcBuffer.pos-2]; + y3 = ctx.ecRecoverPrecalcBuffer.buffer[ctx.ecRecoverPrecalcBuffer.pos-1]; + return ZKR_SUCCESS; + } +#endif + + RawpSecp256r1::Element aux1, aux2, aux3, s; + + // s = 3*x1*x1/2*y1 + secp256r1.mul(aux1, x1, x1); + secp256r1.fromUI(aux2, 3); + secp256r1.mul(aux1, aux1, aux2); + secp256r1.add(aux2, y1, y1); + if (secp256r1.isZero(aux2)) + { + printf("secp256r1_add_point_ec_dbl_fe() got denominator=0 1\n"); + return -1; + } + secp256r1.div(s, aux1, aux2); + + // Required for x3 calculation + secp256r1.add(aux2, x1, x1); + + // x3 = s*s - (x1+x2) + secp256r1.mul(aux1, s, s); + // aux2 was calculated before + + secp256r1.sub(aux3, aux1, aux2); + + // y3 = s*(x1-x3) - y1 + secp256r1.sub(aux1, x1, aux3); + x1 = aux3; + secp256r1.mul(aux1, aux1, s); + secp256r1.sub(y1, aux1, y1); + + return 0; +} + +int secp256r1_add_point_ec (uint64_t _dbl, const uint64_t * _x1, const uint64_t * _y1, const uint64_t * _x2, const uint64_t * _y2, uint64_t * _x3, uint64_t * _y3) +{ + bool dbl = _dbl; + + RawpSecp256r1::Element x1, y1, x2, y2, x3, y3; + array2fe(_x1, x1); + array2fe(_y1, y1); + if (!dbl) + { + array2fe(_x2, x2); + array2fe(_y2, y2); + } + + int result = secp256r1_add_point_ec_fe (dbl, x1, y1, x2, y2, x3, y3); + + fe2array(x3, _x3); + fe2array(y3, _y3); + + return result; +} + +int secp256r1_add_point_ec_dbl (uint64_t * _x1, uint64_t * _y1) +{ + RawpSecp256r1::Element x1, y1; + array2fe(_x1, x1); + array2fe(_y1, y1); + + int result = secp256r1_add_point_ec_dbl_fe (x1, y1); + + fe2array(x1, _x1); + fe2array(y1, _y1); + + return result; +} + +int secp256r1_add_point_ecp (uint64_t _dbl, const uint64_t * p1, const uint64_t * p2, uint64_t * p3) +{ + bool dbl = _dbl; + + RawpSecp256r1::Element x1, y1, x2, y2, x3, y3; + array2fe(p1, x1); + array2fe(p1 + 4, y1); + if (!dbl) + { + array2fe(p2, x2); + array2fe(p2 + 4, y2); + } + + // printf("secp256r1_add_point_ecp() x1=%s\n", secp256r1.toString(x1, 16).c_str()); + // printf("secp256r1_add_point_ecp() y1=%s\n", secp256r1.toString(y1, 16).c_str()); + // printf("secp256r1_add_point_ecp() x2=%s\n", secp256r1.toString(x2, 16).c_str()); + // printf("secp256r1_add_point_ecp() y2=%s\n", secp256r1.toString(y2, 16).c_str()); + + int result = secp256r1_add_point_ec_fe (dbl, x1, y1, x2, y2, x3, y3); + + fe2array(x3, p3); + fe2array(y3, p3 + 4); + + return result; +} + +uint64_t SECP256R1_G[8] = { + 0xF4A13945D898C296, + 0x77037D812DEB33A0, + 0xF8BCE6E563A440F2, + 0x6B17D1F2E12C4247, + 0xCBB6406837BF51F5, + 0x2BCE33576B315ECE, + 0x8EE7EB4A7C0F9E16, + 0x4FE342E2FE1A7F9B +}; + +int secp256r1_ecdsa_verify ( + const uint64_t * pk, // 8 x 64 bits + const uint64_t * _z, // 4 x 64 bits + const uint64_t * _r, // 4 x 64 bits + const uint64_t * _s, // 4 x 64 bits + uint64_t * result // 8 x 64 bits +) +{ + // Convert z, r, s inputs to field elements + RawnSecp256r1::Element z, r, s; + array2fe(_z, z); + array2fe(_r, r); + array2fe(_s, s); + + // Given the public key pk and the signature (r, s) over the message hash z: + // 1. Computes s_inv = s⁻¹ mod n + // 2. Computes u1 = z·s_inv mod n + // 3. Computes u2 = r·s_inv mod n + // 4. Computes and returns the curve point p = u1·G + u2·PK + + // s_inv = s⁻¹ mod n + RawnSecp256r1::Element s_inv; + secp256r1n.inv(s_inv, s); + + // u1 = z·s_inv mod n + RawnSecp256r1::Element u1; + secp256r1n.mul(u1, z, s_inv); + + // u2 = r·s_inv mod n + RawnSecp256r1::Element u2; + secp256r1n.mul(u2, r, s_inv); + uint64_t u1_array[4]; + uint64_t u2_array[4]; + fe2array(u1, u1_array); + fe2array(u2, u2_array); + + secp256r1_curve_dbl_scalar_mul(u1_array, SECP256R1_G, u2_array, pk, result); + + return 0; +} + +const uint64_t SECP256R1_IDENTITY[8] = {0,0,0,0,0,0,0,0}; + +void secp256r1_curve_add( + const uint64_t * p, // 8 x 64 bits + const uint64_t * q, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +) +{ + // Get the 2 points coordinates + const uint64_t * x1 = &p[0]; + const uint64_t * y1 = &p[4]; + const uint64_t * x2 = &q[0]; + const uint64_t * y2 = &q[4]; + + // If p==q return dbl(p) + if (x1[0] == x2[0] && + x1[1] == x2[1] && + x1[2] == x2[2] && + x1[3] == x2[3]) + { + if (y1[0] == y2[0] && + y1[1] == y2[1] && + y1[2] == y2[2] && + y1[3] == y2[3]) { + secp256r1_curve_dbl(p, r); + return; + } else { + for (int i = 0; i < 8; i++) { + r[i] = SECP256R1_IDENTITY[i]; + } + return; + } + } + + // If p==0 return q + if ( p[0] == SECP256R1_IDENTITY[0] && + p[1] == SECP256R1_IDENTITY[1] && + p[2] == SECP256R1_IDENTITY[2] && + p[3] == SECP256R1_IDENTITY[3] && + p[4] == SECP256R1_IDENTITY[4] && + p[5] == SECP256R1_IDENTITY[5] && + p[6] == SECP256R1_IDENTITY[6] && + p[7] == SECP256R1_IDENTITY[7] ) + { + for (int i = 0; i < 8; i++) + { + r[i] = q[i]; + } + return; + } + // if q == 0 return p + else if ( q[0] == SECP256R1_IDENTITY[0] && + q[1] == SECP256R1_IDENTITY[1] && + q[2] == SECP256R1_IDENTITY[2] && + q[3] == SECP256R1_IDENTITY[3] && + q[4] == SECP256R1_IDENTITY[4] && + q[5] == SECP256R1_IDENTITY[5] && + q[6] == SECP256R1_IDENTITY[6] && + q[7] == SECP256R1_IDENTITY[7] ) + { + for (int i = 0; i < 8; i++) + { + r[i] = p[i]; + } + return; + } + + // Convert coordinates to field elements + RawpSecp256r1::Element x1_fe, y1_fe, x2_fe, y2_fe; + array2fe(x1, x1_fe); + array2fe(y1, y1_fe); + array2fe(x2, x2_fe); + array2fe(y2, y2_fe); + + // Calculate lambda = (y2 - y1) / (x2 - x1) + RawpSecp256r1::Element y2_minus_y1; + secp256r1.sub(y2_minus_y1, y2_fe, y1_fe); + RawpSecp256r1::Element x2_minus_x1; + secp256r1.sub(x2_minus_x1, x2_fe, x1_fe); + RawpSecp256r1::Element x2_minus_x1_inv; + secp256r1.inv(x2_minus_x1_inv, x2_minus_x1); + RawpSecp256r1::Element lambda; + secp256r1.mul(lambda, y2_minus_y1, x2_minus_x1_inv); + + // Calculate x3 = lambda^2 - (x1 + x2) + RawpSecp256r1::Element x3_fe; + RawpSecp256r1::Element lambda_sq; + secp256r1.square(lambda_sq, lambda); + RawpSecp256r1::Element x1_plus_x2; + secp256r1.add(x1_plus_x2, x1_fe, x2_fe); + secp256r1.sub(x3_fe, lambda_sq, x1_plus_x2); + + // Calculate y3 = lambda * (x1 - x3) - y1 + RawpSecp256r1::Element y3_fe; + RawpSecp256r1::Element x1_minus_x3; + secp256r1.sub(x1_minus_x3, x1_fe, x3_fe); + RawpSecp256r1::Element lambda_x1_minus_x3; + secp256r1.mul(lambda_x1_minus_x3, lambda, x1_minus_x3); + secp256r1.sub(y3_fe, lambda_x1_minus_x3, y1_fe); + + // Convert to result + fe2array(x3_fe, r); + fe2array(y3_fe, r + 4); +} + +void secp256r1_curve_dbl( + const uint64_t * p, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +) +{ + // If p==0 return p + if ( p[0] == SECP256R1_IDENTITY[0] && + p[1] == SECP256R1_IDENTITY[1] && + p[2] == SECP256R1_IDENTITY[2] && + p[3] == SECP256R1_IDENTITY[3] && + p[4] == SECP256R1_IDENTITY[4] && + p[5] == SECP256R1_IDENTITY[5] && + p[6] == SECP256R1_IDENTITY[6] && + p[7] == SECP256R1_IDENTITY[7] ) + { + for (int i = 0; i < 8; i++) + { + r[i] = p[i]; + } + return; + } + + // Convert coordinates to field elements + uint64_t * x = (uint64_t *)&p[0]; + uint64_t * y = (uint64_t *)&p[4]; + RawpSecp256r1::Element x_fe, y_fe; + array2fe(x, x_fe); + array2fe(y, y_fe); + + // Calculate lambda = (3*x1^2) / (2*y1) + RawpSecp256r1::Element x1_sq; + secp256r1.square(x1_sq, x_fe); + secp256r1.add(x1_sq, x1_sq, secp256r1.negOne()); + RawpSecp256r1::Element three; + secp256r1.fromUI(three, 3); + RawpSecp256r1::Element three_x1_sq; + secp256r1.mul(three_x1_sq, x1_sq, three); + RawpSecp256r1::Element two_y1; + secp256r1.add(two_y1, y_fe, y_fe); + RawpSecp256r1::Element two_y1_inv; + secp256r1.inv(two_y1_inv, two_y1); + RawpSecp256r1::Element lambda; + secp256r1.mul(lambda, three_x1_sq, two_y1_inv); + + // Calculate x3 = lambda^2 - 2*x1 + RawpSecp256r1::Element lambda_sq; + secp256r1.square(lambda_sq, lambda); + RawpSecp256r1::Element two_x1; + secp256r1.add(two_x1, x_fe, x_fe); + RawpSecp256r1::Element x3_fe; + secp256r1.sub(x3_fe, lambda_sq, two_x1); + + // Calculate y3 = lambda * (x1 - x3) - y1 + RawpSecp256r1::Element x1_minus_x3; + secp256r1.sub(x1_minus_x3, x_fe, x3_fe); + RawpSecp256r1::Element lambda_x1_minus_x3; + secp256r1.mul(lambda_x1_minus_x3, lambda, x1_minus_x3); + RawpSecp256r1::Element y3_fe; + secp256r1.sub(y3_fe, lambda_x1_minus_x3, y_fe); + + // Convert to result + fe2array(x3_fe, r); + fe2array(y3_fe, r + 4); +} + +int secp256r1_curve_dbl_scalar_mul( + const uint64_t * k1, // 4 x 64 bits + const uint64_t * p1, // 8 x 64 bits + const uint64_t * k2, // 4 x 64 bits + const uint64_t * p2, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +) +{ + for (uint64_t i = 0; i < 8; i++) { + r[i] = 0; + } + + for (int64_t ii=255; ii>=0; ii--) { + uint64_t i = ii; + + // r = r + r + secp256r1_curve_dbl(r, r); + + // If k1[i] == 1 then r = r + p1 + uint64_t k1_bit = (k1[i / 64] >> (i % 64)) & 1; + if (k1_bit == 1) + { + secp256r1_curve_add(r, p1, r); + } + + // If k2[i] == 1 then r = r + p2 + uint64_t k2_bit = (k2[i / 64] >> (i % 64)) & 1; + if (k2_bit == 1) + { + secp256r1_curve_add(r, p2, r); + } + } + + return 0; +} + +#ifdef __cplusplus +} // extern "C" +#endif \ No newline at end of file diff --git a/lib-c/c/src/secp256r1/secp256r1.hpp b/lib-c/c/src/secp256r1/secp256r1.hpp new file mode 100644 index 000000000..56e37e010 --- /dev/null +++ b/lib-c/c/src/secp256r1/secp256r1.hpp @@ -0,0 +1,58 @@ +#ifndef SECP256R1_HPP +#define SECP256R1_HPP + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int secp256r1_add_point_ec ( + uint64_t dbl, + const uint64_t * x1, // 4 x 64 bits + const uint64_t * y1, // 4 x 64 bits + const uint64_t * x2, // 4 x 64 bits + const uint64_t * y2, // 4 x 64 bits + uint64_t * x3, // 4 x 64 bits + uint64_t * y3 // 4 x 64 bits +); + +int secp256r1_add_point_ecp ( + const uint64_t dbl, + const uint64_t * p1, // 8 x 64 bits + const uint64_t * p2, // 8 x 64 bits + uint64_t * p3 // 8 x 64 bits +); + +int secp256r1_ecdsa_verify ( + const uint64_t * pk, // 8 x 64 bits + const uint64_t * z, // 4 x 64 bits + const uint64_t * r, // 4 x 64 bits + const uint64_t * s, // 4 x 64 bits + uint64_t * result // 8 x 64 bits +); + +void secp256r1_curve_add( + const uint64_t * p, // 8 x 64 bits + const uint64_t * q, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +); + +void secp256r1_curve_dbl( + const uint64_t * p, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +); + +int secp256r1_curve_dbl_scalar_mul( + const uint64_t * k1, // 4 x 64 bits + const uint64_t * p1, // 8 x 64 bits + const uint64_t * k2, // 4 x 64 bits + const uint64_t * p2, // 8 x 64 bits + uint64_t * r // 8 x 64 bits +); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/lib-float/c/lib/libziskfloat.a b/lib-float/c/lib/libziskfloat.a index 194d15c63..6f69bf238 100644 Binary files a/lib-float/c/lib/libziskfloat.a and b/lib-float/c/lib/libziskfloat.a differ diff --git a/lib-float/c/lib/ziskfloat.elf b/lib-float/c/lib/ziskfloat.elf index f3987097e..640341da0 100755 Binary files a/lib-float/c/lib/ziskfloat.elf and b/lib-float/c/lib/ziskfloat.elf differ diff --git a/pil/Cargo.toml b/pil/Cargo.toml index e386c08db..997e549cd 100644 --- a/pil/Cargo.toml +++ b/pil/Cargo.toml @@ -9,7 +9,6 @@ categories = { workspace = true } [dependencies] proofman-common = { workspace = true } -proofman = { workspace = true } proofman-macros = { workspace = true } fields = { workspace = true } serde = { workspace = true } @@ -20,6 +19,4 @@ serde_arrays = "0.2" [features] default = [] dev = [] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] \ No newline at end of file +no_lib_link = ["proofman-common/no_lib_link"] \ No newline at end of file diff --git a/pil/config.pil b/pil/config.pil new file mode 100644 index 000000000..0ac39215d --- /dev/null +++ b/pil/config.pil @@ -0,0 +1,7 @@ +const int MAIN_STEP_BITS = 36; +const int MAX_STEPS = 1 << MAIN_STEP_BITS; +const int MEM_STEP_BITS = MAIN_STEP_BITS + 2; +const int REG_STEP_BITS = MAIN_STEP_BITS + 2; +const int ADDR_BITS = 32; +const int ADDR_W_BITS = ADDR_BITS - 3; +const int EXTRA_PARAMS_ADDR = 0xA0000F00; \ No newline at end of file diff --git a/pil/operations.pil b/pil/operations.pil index 2d36e2dc2..e9b4d553d 100644 --- a/pil/operations.pil +++ b/pil/operations.pil @@ -8,8 +8,10 @@ - 0x21-0x29 - Arithmetic Operations: - 0xB0-0xBF + - DMA Operations: + - 0xDA-0xDF - Precompiles: - - 0xE2-0xE7 + - 0xE1-0xEA - 0xF0-0xF5 - 0xF9-0xFE - Misc: @@ -83,6 +85,22 @@ const int OP_REMU_W = 0xBD; const int OP_DIV_W = 0xBE; const int OP_REM_W = 0xBF; +const int OP_DMA_MEMCPY = 0xD0; +const int OP_DMA_MEMCMP = 0xD1; +const int OP_DMA_INPUTCPY = 0xD2; +const int __OP_DMA_MEMSET__ = 0xD3; // not implemented, unsual and too expensive +const int __OP_DMA_MEMEQ__ = 0xD4; // main don't known in compilation time + +const int OP_DMA_X_OFFSET = 6; + +const int OP_DMA_XMEMCPY = OP_DMA_MEMCPY + OP_DMA_X_OFFSET; // 0xD6 +const int OP_DMA_XMEMCMP = OP_DMA_MEMCMP + OP_DMA_X_OFFSET; // 0xD7 +const int OP_DMA_XMEMSET = __OP_DMA_MEMSET__ + OP_DMA_X_OFFSET; // 0xD9 +const int OP_DMA_XMEMEQ = __OP_DMA_MEMEQ__ + OP_DMA_X_OFFSET; // 0xDA + + +const int OP_POSEIDON2 = 0xE1; + const int OP_ARITH_384_MOD = 0xE2; const int OP_EC_ADD_BLS12_381 = 0xE3; const int OP_EC_DBL_BLS12_381 = 0xE4; @@ -90,6 +108,11 @@ const int OP_COMPLEX_ADD_BLS12_381 = 0xE5; const int OP_COMPLEX_SUB_BLS12_381 = 0xE6; const int OP_COMPLEX_MUL_BLS12_381 = 0xE7; +const int OP_EC_ADD_SECP256R1 = 0xE8; +const int OP_EC_DBL_SECP256R1 = 0xE9; + +const int OP_BLAKE2BR = 0xEA; + const int OP_ADD256 = 0xF0; const int OP_KECCAKF = 0xF1; const int OP_ARITH_256 = 0xF2; @@ -110,3 +133,36 @@ const int OP_COMPLEX_SUB_BN254 = 0xFD; const int OP_COMPLEX_MUL_BN254 = 0xFE; const int OP_HALT = 0xFF; + +function assumes_operation (const expr op, const expr a[] = [0,0], const expr b[] = [0,0], const expr c[] = [0,0], + const expr flag = 0, const expr main_step = 0, + const expr extended_arg = 0, + const expr sel = 1, + const expr extra_args[] = [0]) +{ + assert(length(extra_args) == 1); + lookup_assumes(OPERATION_BUS_ID, [op, ...a, ...b, ...c, flag, main_step, extended_arg, ...extra_args], + sel:); +} + +function proves_operation (const expr op, const expr a[] = [0, 0], const expr b[] = [0, 0], const expr c[] = [0, 0], + const expr flag = 0, const expr main_step = 0, + const expr extended_arg = 0, + const expr mul = 1, + const expr extra_args[] = [0], + const int table_id = -1) +{ + assert(length(extra_args) == 1); + lookup_proves(OPERATION_BUS_ID, [op, ...a, ...b, ...c, flag, main_step, extended_arg, ...extra_args], table_id:, mul:); +} + + +function assumes_padding_operation (const expr op, const expr a[] = [0,0], const expr b[] = [0,0], const expr c[] = [0,0], + const expr flag = 0, const expr main_step = 0, + const expr extended_arg = 0, + const expr extra_args[] = [0], + const expr padding_size = 1) +{ + assert(length(extra_args) == 1); + direct_update_assumes(OPERATION_BUS_ID, [op, ...a, ...b, ...c, flag, main_step, extended_arg, ...extra_args], sel:padding_size); +} diff --git a/pil/opids.pil b/pil/opids.pil index 1378e1529..0dc881af7 100644 --- a/pil/opids.pil +++ b/pil/opids.pil @@ -11,7 +11,6 @@ const int ROM_BUS_ID = 7890; // Memory ids const int MEMORY_ID = 10; const int MEMORY_ALIGN_ROM_ID = 133; -const int DUAL_BYTE_TABLE_ID = 88; // Arith table ids const int ARITH_TABLE_ID = 331; @@ -28,4 +27,15 @@ const int BINARY_EXTENSION_FROPS_TABLE_ID = 5012; // Precompiles const int ARITH_EQ_LT_TABLE_ID = 5002; -const int KECCAKF_TABLE_ID = 126; \ No newline at end of file +const int KECCAKF_TABLE_ID = 126; +const int BLAKE2BR_PERMUTATION_ID = 127; + +// DMA +const int DMA_BUS_ID = 8000; +const int DMA_ROM_ID = 8001; +const int DMA_PRE_POST_TABLE_ID = 8002; +const int DMA_BYTE_CMP_TABLE_ID = 8003; + +// Ranges +const int DUAL_RANGE_7_BITS_ID = 77; +const int DUAL_RANGE_BYTE_ID = 88; diff --git a/pil/src/constants.rs b/pil/src/constants.rs new file mode 100644 index 000000000..1ffe73072 --- /dev/null +++ b/pil/src/constants.rs @@ -0,0 +1,6 @@ +pub const DUAL_RANGE_BYTE_ID: usize = 88; +pub const DUAL_RANGE_7_BITS_ID: usize = 77; +pub const DMA_ROM_ID: usize = 8001; +pub const DMA_PRE_POST_TABLE_ID: usize = 8002; +pub const DMA_PRE_POST_TABLE_SIZE: usize = 1152; +pub const DMA_BYTE_CMP_TABLE_ID: usize = 8003; diff --git a/pil/src/lib.rs b/pil/src/lib.rs index 77de853a8..92c1ba497 100644 --- a/pil/src/lib.rs +++ b/pil/src/lib.rs @@ -1,3 +1,5 @@ +mod constants; mod pil_helpers; +pub use constants::*; pub use pil_helpers::*; diff --git a/pil/src/pil_helpers/traces.rs b/pil/src/pil_helpers/traces.rs index 35243ece3..ad332037b 100644 --- a/pil/src/pil_helpers/traces.rs +++ b/pil/src/pil_helpers/traces.rs @@ -16,7 +16,7 @@ use std::fmt; #[allow(dead_code)] type FieldExtension = [F; 3]; -pub const PILOUT_HASH: &str = "b6a95d37ee456885c594ed4f5dcabe263897c7117efc5992e6f54e42c61217eb"; +pub const PILOUT_HASH: &str = "34091f7d4eb8b70ad073856a46151341e3beabded43ca6c43eaca854c797df12"; pub const MERKLE_TREE_ARITY: u64 = 4; @@ -26,47 +26,75 @@ pub const ZISK_AIRGROUP_ID: usize = 0; //AIR CONSTANTS -pub const MAIN_AIR_IDS: &[usize] = &[0]; +pub const DMA_AIR_IDS: &[usize] = &[0]; -pub const ROM_AIR_IDS: &[usize] = &[1]; +pub const DMA_MEM_CPY_AIR_IDS: &[usize] = &[1]; -pub const MEM_AIR_IDS: &[usize] = &[2]; +pub const DMA_INPUT_CPY_AIR_IDS: &[usize] = &[2]; -pub const ROM_DATA_AIR_IDS: &[usize] = &[3]; +pub const DMA_64_ALIGNED_AIR_IDS: &[usize] = &[3]; -pub const INPUT_DATA_AIR_IDS: &[usize] = &[4]; +pub const DMA_64_ALIGNED_INPUT_CPY_AIR_IDS: &[usize] = &[4]; -pub const MEM_ALIGN_AIR_IDS: &[usize] = &[5]; +pub const DMA_64_ALIGNED_MEM_SET_AIR_IDS: &[usize] = &[5]; -pub const MEM_ALIGN_BYTE_AIR_IDS: &[usize] = &[6]; +pub const DMA_64_ALIGNED_MEM_AIR_IDS: &[usize] = &[6]; -pub const MEM_ALIGN_READ_BYTE_AIR_IDS: &[usize] = &[7]; +pub const DMA_64_ALIGNED_MEM_CPY_AIR_IDS: &[usize] = &[7]; -pub const MEM_ALIGN_WRITE_BYTE_AIR_IDS: &[usize] = &[8]; +pub const DMA_UNALIGNED_AIR_IDS: &[usize] = &[8]; -pub const ARITH_AIR_IDS: &[usize] = &[9]; +pub const DMA_PRE_POST_AIR_IDS: &[usize] = &[9]; -pub const BINARY_AIR_IDS: &[usize] = &[10]; +pub const DMA_PRE_POST_MEM_CPY_AIR_IDS: &[usize] = &[10]; -pub const BINARY_ADD_AIR_IDS: &[usize] = &[11]; +pub const DMA_PRE_POST_INPUT_CPY_AIR_IDS: &[usize] = &[11]; -pub const BINARY_EXTENSION_AIR_IDS: &[usize] = &[12]; +pub const MAIN_AIR_IDS: &[usize] = &[12]; -pub const ADD_256_AIR_IDS: &[usize] = &[13]; +pub const ROM_AIR_IDS: &[usize] = &[13]; -pub const ARITH_EQ_AIR_IDS: &[usize] = &[14]; +pub const MEM_AIR_IDS: &[usize] = &[14]; -pub const ARITH_EQ_384_AIR_IDS: &[usize] = &[15]; +pub const ROM_DATA_AIR_IDS: &[usize] = &[15]; -pub const KECCAKF_AIR_IDS: &[usize] = &[16]; +pub const INPUT_DATA_AIR_IDS: &[usize] = &[16]; -pub const SHA_256_F_AIR_IDS: &[usize] = &[17]; +pub const MEM_ALIGN_AIR_IDS: &[usize] = &[17]; -pub const SPECIFIED_RANGES_AIR_IDS: &[usize] = &[18]; +pub const MEM_ALIGN_BYTE_AIR_IDS: &[usize] = &[18]; -pub const VIRTUAL_TABLE_0_AIR_IDS: &[usize] = &[19]; +pub const MEM_ALIGN_READ_BYTE_AIR_IDS: &[usize] = &[19]; -pub const VIRTUAL_TABLE_1_AIR_IDS: &[usize] = &[20]; +pub const MEM_ALIGN_WRITE_BYTE_AIR_IDS: &[usize] = &[20]; + +pub const ARITH_AIR_IDS: &[usize] = &[21]; + +pub const BINARY_AIR_IDS: &[usize] = &[22]; + +pub const BINARY_ADD_AIR_IDS: &[usize] = &[23]; + +pub const BINARY_EXTENSION_AIR_IDS: &[usize] = &[24]; + +pub const ADD_256_AIR_IDS: &[usize] = &[25]; + +pub const ARITH_EQ_AIR_IDS: &[usize] = &[26]; + +pub const ARITH_EQ_384_AIR_IDS: &[usize] = &[27]; + +pub const KECCAKF_AIR_IDS: &[usize] = &[28]; + +pub const SHA_256_F_AIR_IDS: &[usize] = &[29]; + +pub const POSEIDON_2_AIR_IDS: &[usize] = &[30]; + +pub const BLAKE_2_BR_AIR_IDS: &[usize] = &[31]; + +pub const SPECIFIED_RANGES_AIR_IDS: &[usize] = &[32]; + +pub const VIRTUAL_TABLE_0_AIR_IDS: &[usize] = &[33]; + +pub const VIRTUAL_TABLE_1_AIR_IDS: &[usize] = &[34]; //PUBLICS @@ -107,296 +135,516 @@ values!(ZiskPublicValues { }); values!(ZiskProofValues { - enable_input_data: F, enable_rom_data: F, + enable_input_data: F, enable_rom_data: F, enable_dma_64_aligned: F, enable_dma_64_aligned_inputcpy: F, enable_dma_64_aligned_mem: F, enable_dma_64_aligned_memcpy: F, enable_dma_64_aligned_memset: F, enable_dma_unaligned: F, }); +trace_row!(DmaFixedRow { + __L1__: F, +}); +pub type DmaFixed = GenericTrace, 2097152, 0, 0>; + +trace_row!(DmaTraceRow { + sel_memcpy:bit, sel_memcmp:bit, sel_memset:bit, fill_byte:u8, sel_extended:bit, sel_inputcpy:bit, h_count:ubit(24), count_lt_256:bit, l_count:ubit(9), count_diff_chunks:[u16; 2], h_dst64:ubit(22), l_dst64:ubit(7), dst_offset:ubit(3), main_step:ubit(36), h_src64:ubit(22), l_src64:ubit(7), src_offset:ubit(3), src_offset_after_pre:ubit(3), src64_inc_by_pre:bit, use_pre:bit, use_loop:bit, use_post:bit, pre_count:ubit(3), l_count64:ubit(9), pre_result_nz:bit, post_result_nz:bit, bus_pre_result:[u32; 2], bus_post_result:[u32; 2], loop_b0:u32, loop_extended_arg:u32, static_count:u32, +}); +pub type DmaTrace = GenericTrace, 2097152, 0, 0>; + + +pub type DmaTracePacked = GenericTrace, 2097152, 0, 0>; + + +trace_row!(DmaMemCpyFixedRow { + __L1__: F, +}); +pub type DmaMemCpyFixed = GenericTrace, 2097152, 0, 1>; + +trace_row!(DmaMemCpyTraceRow { + sel_memcpy:bit, sel_extended:bit, h_count:ubit(24), count_lt_256:bit, l_count:ubit(9), h_dst64:ubit(22), l_dst64:ubit(7), dst_offset:ubit(3), main_step:ubit(36), h_src64:ubit(22), l_src64:ubit(7), src_offset:ubit(3), src_offset_after_pre:ubit(3), src64_inc_by_pre:bit, use_pre:bit, use_loop:bit, use_post:bit, pre_count:ubit(3), l_count64:ubit(9), loop_b0:u32, loop_extended_arg:u32, static_count:u32, +}); +pub type DmaMemCpyTrace = GenericTrace, 2097152, 0, 1>; + + +pub type DmaMemCpyTracePacked = GenericTrace, 2097152, 0, 1>; + + +trace_row!(DmaInputCpyFixedRow { + __L1__: F, +}); +pub type DmaInputCpyFixed = GenericTrace, 2097152, 0, 2>; + +trace_row!(DmaInputCpyTraceRow { + sel_extended:bit, sel_inputcpy:bit, h_count:ubit(24), count_lt_256:bit, l_count:ubit(9), h_dst64:ubit(22), l_dst64:ubit(7), dst_offset:ubit(3), main_step:ubit(36), use_pre:bit, use_loop:bit, use_post:bit, pre_count:ubit(3), l_count64:ubit(9), loop_b0:u32, static_count:u32, +}); +pub type DmaInputCpyTrace = GenericTrace, 2097152, 0, 2>; + + +pub type DmaInputCpyTracePacked = GenericTrace, 2097152, 0, 2>; + + +trace_row!(Dma64AlignedFixedRow { + __L1__: F, +}); +pub type Dma64AlignedFixed = GenericTrace, 2097152, 0, 3>; + +trace_row!(Dma64AlignedTraceRow { + src64:ubit(29), seq_end:bit, previous_seq_end:bit, sel_memcpy:bit, sel_memeq:bit, sel_memset:bit, fill_byte:u8, sel_memcpy_count_load:bit, sel_inputcpy:bit, main_step:ubit(36), dst64:ubit(29), count64:u32, sel_op_from_1:[bit; 3], l_value_chunks:[[u8; 2]; 4], h_value_chunks:[[ubit(24); 2]; 4], sel_op_mem_load:[bit; 4], +}); +pub type Dma64AlignedTrace = GenericTrace, 2097152, 0, 3>; + + +pub type Dma64AlignedTracePacked = GenericTrace, 2097152, 0, 3>; + + +trace_row!(Dma64AlignedInputCpyFixedRow { + __L1__: F, +}); +pub type Dma64AlignedInputCpyFixed = GenericTrace, 2097152, 0, 4>; + +trace_row!(Dma64AlignedInputCpyTraceRow { + seq_end:bit, previous_seq_end:bit, sel_inputcpy:bit, main_step:ubit(36), dst64:ubit(29), count64:u32, sel_op_from_1:[bit; 3], l_value_chunks:[[u8; 2]; 4], h_value_chunks:[[ubit(24); 2]; 4], +}); +pub type Dma64AlignedInputCpyTrace = GenericTrace, 2097152, 0, 4>; + + +pub type Dma64AlignedInputCpyTracePacked = GenericTrace, 2097152, 0, 4>; + + +trace_row!(Dma64AlignedMemSetFixedRow { + __L1__: F, +}); +pub type Dma64AlignedMemSetFixed = GenericTrace, 2097152, 0, 5>; + +trace_row!(Dma64AlignedMemSetTraceRow { + seq_end:bit, previous_seq_end:bit, sel_memset:bit, fill_byte:u8, main_step:ubit(36), dst64:ubit(29), count64:u32, sel_op_from_1:[bit; 7], +}); +pub type Dma64AlignedMemSetTrace = GenericTrace, 2097152, 0, 5>; + + +pub type Dma64AlignedMemSetTracePacked = GenericTrace, 2097152, 0, 5>; + + +trace_row!(Dma64AlignedMemFixedRow { + __L1__: F, +}); +pub type Dma64AlignedMemFixed = GenericTrace, 2097152, 0, 6>; + +trace_row!(Dma64AlignedMemTraceRow { + src64:ubit(29), seq_end:bit, previous_seq_end:bit, sel_memcpy:bit, sel_memeq:bit, sel_memset:bit, fill_byte:u8, sel_memcpy_count_load:bit, main_step:ubit(36), dst64:ubit(29), count64:u32, sel_op_from_1:[bit; 3], value:[[u32; 2]; 4], sel_op_mem_load:[bit; 4], +}); +pub type Dma64AlignedMemTrace = GenericTrace, 2097152, 0, 6>; + + +pub type Dma64AlignedMemTracePacked = GenericTrace, 2097152, 0, 6>; + + +trace_row!(Dma64AlignedMemCpyFixedRow { + __L1__: F, +}); +pub type Dma64AlignedMemCpyFixed = GenericTrace, 2097152, 0, 7>; + +trace_row!(Dma64AlignedMemCpyTraceRow { + src64:ubit(29), seq_end:bit, previous_seq_end:bit, sel_memcpy:bit, sel_memcpy_count_load:bit, main_step:ubit(36), dst64:ubit(29), count64:u32, sel_op_from_1:[bit; 7], value:[[u32; 2]; 8], +}); +pub type Dma64AlignedMemCpyTrace = GenericTrace, 2097152, 0, 7>; + + +pub type Dma64AlignedMemCpyTracePacked = GenericTrace, 2097152, 0, 7>; + + +trace_row!(DmaUnalignedFixedRow { + __L1__: F, +}); +pub type DmaUnalignedFixed = GenericTrace, 2097152, 0, 8>; + +trace_row!(DmaUnalignedTraceRow { + main_step:ubit(36), src64:ubit(29), dst64:ubit(29), count:u32, seq_end:bit, previous_seq_end:bit, is_memeq:bit, offset_7:bit, offset_6:bit, offset_5:bit, offset_4:bit, offset_3:bit, offset_2:bit, read_bytes:[u8; 8], no_last_no_seq_end:bit, write_value:[u32; 2], +}); +pub type DmaUnalignedTrace = GenericTrace, 2097152, 0, 8>; + + +pub type DmaUnalignedTracePacked = GenericTrace, 2097152, 0, 8>; + + +trace_row!(DmaPrePostFixedRow { + __L1__: F, +}); +pub type DmaPrePostFixed = GenericTrace, 2097152, 0, 9>; + +trace_row!(DmaPrePostTraceRow { + main_step:ubit(36), dst64:ubit(29), dst_offset:ubit(3), count:ubit(4), is_post:bit, sel_memcpy:bit, sel_memcmp:bit, memcmp_result_nz:bit, l_memcmp_result:u32, sel_inputcpy:bit, sel_memset:bit, selr:[bit; 7], dst_offset_gt_src_offset:bit, src64:ubit(29), src_offset:ubit(3), enabled_second_read:bit, fill_byte:u8, rb:[u8; 16], pb:[u8; 8], sb:[bit; 8], last_dst_byte:u8, abs_diff_dst_src:u8, memcmp_result_is_negative:bit, diff_factor:[u64; 2], bus_write_value:[u32; 2], write_value:[u32; 4], +}); +pub type DmaPrePostTrace = GenericTrace, 2097152, 0, 9>; + + +pub type DmaPrePostTracePacked = GenericTrace, 2097152, 0, 9>; + + +trace_row!(DmaPrePostMemCpyFixedRow { + __L1__: F, +}); +pub type DmaPrePostMemCpyFixed = GenericTrace, 2097152, 0, 10>; + +trace_row!(DmaPrePostMemCpyTraceRow { + main_step:ubit(36), dst64:ubit(29), dst_offset:ubit(3), count:ubit(4), is_post:bit, sel_memcpy:bit, selr:[bit; 7], dst_offset_gt_src_offset:bit, src64:ubit(29), src_offset:ubit(3), enabled_second_read:bit, rb:[u8; 16], pb:[u8; 8], sb:[bit; 8], bus_write_value:[u32; 2], write_value:[u32; 4], +}); +pub type DmaPrePostMemCpyTrace = GenericTrace, 2097152, 0, 10>; + + +pub type DmaPrePostMemCpyTracePacked = GenericTrace, 2097152, 0, 10>; + + +trace_row!(DmaPrePostInputCpyFixedRow { + __L1__: F, +}); +pub type DmaPrePostInputCpyFixed = GenericTrace, 2097152, 0, 11>; + +trace_row!(DmaPrePostInputCpyTraceRow { + main_step:ubit(36), dst64:ubit(29), dst_offset:ubit(3), count:ubit(4), is_post:bit, sel_inputcpy:bit, rb:[u8; 8], pb:[u8; 8], sb:[bit; 8], bus_write_value:[u32; 2], +}); +pub type DmaPrePostInputCpyTrace = GenericTrace, 2097152, 0, 11>; + + +pub type DmaPrePostInputCpyTracePacked = GenericTrace, 2097152, 0, 11>; + + trace_row!(MainFixedRow { SEGMENT_L1: F, SEGMENT_STEP: F, __L1__: F, }); -pub type MainFixed = GenericTrace, 4194304, 0, 0>; +pub type MainFixed = GenericTrace, 4194304, 0, 12>; trace_row!(MainTraceRow { - a:[u32; 2], b:[u32; 2], c:[u32; 2], flag:bit, pc:u32, a_src_imm:bit, a_src_mem:bit, a_offset_imm0:u64, a_imm1:u32, a_src_step:bit, b_src_imm:bit, b_src_mem:bit, b_offset_imm0:u64, b_imm1:u32, b_src_ind:bit, ind_width:ubit(4), is_external_op:bit, op:u8, store_ra:bit, store_mem:bit, store_ind:bit, store_offset:u64, set_pc:bit, jmp_offset1:u64, jmp_offset2:u64, m32:bit, addr1:u32, a_reg_prev_mem_step:ubit(40), b_reg_prev_mem_step:ubit(40), store_reg_prev_mem_step:ubit(40), store_reg_prev_value:[u32; 2], a_src_reg:bit, b_src_reg:bit, store_reg:bit, + a:[u32; 2], b:[u32; 2], c:[u32; 2], flag:bit, pc:u32, a_src_imm:bit, a_src_mem:bit, a_offset_imm0:u64, a_imm1:u32, is_precompiled:bit, b_src_imm:bit, b_src_mem:bit, b_offset_imm0:u64, b_imm1:u32, b_src_ind:bit, ind_width:ubit(4), is_external_op:bit, op:u8, store_pc:bit, store_mem:bit, store_ind:bit, store_offset:u64, set_pc:bit, jmp_offset1:u64, jmp_offset2:u64, m32:bit, addr1:u32, a_reg_prev_mem_step:ubit(38), b_reg_prev_mem_step:ubit(38), store_reg_prev_mem_step:ubit(38), store_reg_prev_value:[u32; 2], a_src_reg:bit, b_src_reg:bit, store_reg:bit, }); -pub type MainTrace = GenericTrace, 4194304, 0, 0>; +pub type MainTrace = GenericTrace, 4194304, 0, 12>; -pub type MainTracePacked = GenericTrace, 4194304, 0, 0>; +pub type MainTracePacked = GenericTrace, 4194304, 0, 12>; trace_row!(RomFixedRow { __L1__: F, }); -pub type RomFixed = GenericTrace, 4194304, 0, 1>; +pub type RomFixed = GenericTrace, 4194304, 0, 13>; trace_row!(RomTraceRow { multiplicity:F, }); -pub type RomTrace = GenericTrace, 4194304, 0, 1>; +pub type RomTrace = GenericTrace, 4194304, 0, 13>; trace_row!(MemFixedRow { SEGMENT_L1: F, __L1__: F, }); -pub type MemFixed = GenericTrace, 4194304, 0, 2>; +pub type MemFixed = GenericTrace, 4194304, 0, 14>; trace_row!(MemTraceRow { - addr:ubit(29), step:ubit(40), sel:bit, addr_changes:bit, step_dual:ubit(40), sel_dual:bit, value:[u32; 2], wr:bit, previous_step:ubit(40), increment:[ubit(18); 2], read_same_addr:bit, + addr:ubit(29), step:ubit(38), sel:bit, addr_changes:bit, step_dual:ubit(38), sel_dual:bit, value:[u32; 2], wr:bit, previous_step:ubit(40), l_increment:ubit(22), h_increment:u16, read_same_addr:bit, }); -pub type MemTrace = GenericTrace, 4194304, 0, 2>; +pub type MemTrace = GenericTrace, 4194304, 0, 14>; -pub type MemTracePacked = GenericTrace, 4194304, 0, 2>; +pub type MemTracePacked = GenericTrace, 4194304, 0, 14>; trace_row!(RomDataFixedRow { SEGMENT_L1: F, __L1__: F, }); -pub type RomDataFixed = GenericTrace, 2097152, 0, 3>; +pub type RomDataFixed = GenericTrace, 2097152, 0, 15>; trace_row!(RomDataTraceRow { - addr:ubit(29), step:ubit(40), sel:bit, addr_changes:bit, value:[u32; 2], + addr:ubit(29), step:ubit(38), sel:bit, addr_changes:bit, value:[u32; 2], }); -pub type RomDataTrace = GenericTrace, 2097152, 0, 3>; +pub type RomDataTrace = GenericTrace, 2097152, 0, 15>; -pub type RomDataTracePacked = GenericTrace, 2097152, 0, 3>; +pub type RomDataTracePacked = GenericTrace, 2097152, 0, 15>; trace_row!(InputDataFixedRow { SEGMENT_L1: F, __L1__: F, }); -pub type InputDataFixed = GenericTrace, 2097152, 0, 4>; +pub type InputDataFixed = GenericTrace, 2097152, 0, 16>; trace_row!(InputDataTraceRow { - addr:ubit(29), step:ubit(40), sel:bit, addr_changes:bit, value_word:[u16; 4], is_free_read:bit, + addr:ubit(29), step:ubit(38), sel:bit, addr_changes:bit, value_word:[u16; 4], is_free_read:bit, }); -pub type InputDataTrace = GenericTrace, 2097152, 0, 4>; +pub type InputDataTrace = GenericTrace, 2097152, 0, 16>; -pub type InputDataTracePacked = GenericTrace, 2097152, 0, 4>; +pub type InputDataTracePacked = GenericTrace, 2097152, 0, 16>; trace_row!(MemAlignFixedRow { L1: F, __L1__: F, }); -pub type MemAlignFixed = GenericTrace, 2097152, 0, 5>; +pub type MemAlignFixed = GenericTrace, 2097152, 0, 17>; trace_row!(MemAlignTraceRow { addr:ubit(29), offset:ubit(3), width:ubit(4), wr:bit, pc:u8, reset:bit, sel_up_to_down:bit, sel_down_to_up:bit, reg:[u8; 8], sel:[bit; 8], step:ubit(40), delta_addr:u64, sel_prove:bit, value:[u32; 2], }); -pub type MemAlignTrace = GenericTrace, 2097152, 0, 5>; +pub type MemAlignTrace = GenericTrace, 2097152, 0, 17>; -pub type MemAlignTracePacked = GenericTrace, 2097152, 0, 5>; +pub type MemAlignTracePacked = GenericTrace, 2097152, 0, 17>; trace_row!(MemAlignByteFixedRow { __L1__: F, }); -pub type MemAlignByteFixed = GenericTrace, 4194304, 0, 6>; +pub type MemAlignByteFixed = GenericTrace, 4194304, 0, 18>; trace_row!(MemAlignByteTraceRow { sel_high_4b:bit, sel_high_2b:bit, sel_high_b:bit, direct_value:u32, composed_value:u32, written_composed_value:u32, written_byte_value:u8, value_16b:u16, value_8b:u8, byte_value:u8, addr_w:ubit(29), step:ubit(40), is_write:bit, mem_write_values:[u32; 2], bus_byte:u8, }); -pub type MemAlignByteTrace = GenericTrace, 4194304, 0, 6>; +pub type MemAlignByteTrace = GenericTrace, 4194304, 0, 18>; -pub type MemAlignByteTracePacked = GenericTrace, 4194304, 0, 6>; +pub type MemAlignByteTracePacked = GenericTrace, 4194304, 0, 18>; trace_row!(MemAlignReadByteFixedRow { __L1__: F, }); -pub type MemAlignReadByteFixed = GenericTrace, 4194304, 0, 7>; +pub type MemAlignReadByteFixed = GenericTrace, 4194304, 0, 19>; trace_row!(MemAlignReadByteTraceRow { sel_high_4b:bit, sel_high_2b:bit, sel_high_b:bit, direct_value:u32, composed_value:u32, value_16b:u16, value_8b:u8, byte_value:u8, addr_w:ubit(29), step:ubit(40), }); -pub type MemAlignReadByteTrace = GenericTrace, 4194304, 0, 7>; +pub type MemAlignReadByteTrace = GenericTrace, 4194304, 0, 19>; -pub type MemAlignReadByteTracePacked = GenericTrace, 4194304, 0, 7>; +pub type MemAlignReadByteTracePacked = GenericTrace, 4194304, 0, 19>; trace_row!(MemAlignWriteByteFixedRow { __L1__: F, }); -pub type MemAlignWriteByteFixed = GenericTrace, 4194304, 0, 8>; +pub type MemAlignWriteByteFixed = GenericTrace, 4194304, 0, 20>; trace_row!(MemAlignWriteByteTraceRow { sel_high_4b:bit, sel_high_2b:bit, sel_high_b:bit, direct_value:u32, composed_value:u32, written_composed_value:u32, written_byte_value:u8, value_16b:u16, value_8b:u8, byte_value:u8, addr_w:ubit(29), step:ubit(40), mem_write_values:[u32; 2], }); -pub type MemAlignWriteByteTrace = GenericTrace, 4194304, 0, 8>; +pub type MemAlignWriteByteTrace = GenericTrace, 4194304, 0, 20>; -pub type MemAlignWriteByteTracePacked = GenericTrace, 4194304, 0, 8>; +pub type MemAlignWriteByteTracePacked = GenericTrace, 4194304, 0, 20>; trace_row!(ArithFixedRow { __L1__: F, }); -pub type ArithFixed = GenericTrace, 2097152, 0, 9>; +pub type ArithFixed = GenericTrace, 2097152, 0, 21>; trace_row!(ArithTraceRow { carry:[u64; 7], a:[u16; 4], b:[u16; 4], c:[u16; 4], d:[u16; 4], na:bit, nb:bit, nr:bit, np:bit, sext:bit, m32:bit, div:bit, fab:u64, na_fb:u64, nb_fa:u64, main_div:bit, main_mul:bit, signed:bit, div_by_zero:bit, div_overflow:bit, inv_sum_all_bs:u64, op:u8, bus_res1:u32, multiplicity:bit, range_ab:ubit(7), range_cd:ubit(7), }); -pub type ArithTrace = GenericTrace, 2097152, 0, 9>; +pub type ArithTrace = GenericTrace, 2097152, 0, 21>; -pub type ArithTracePacked = GenericTrace, 2097152, 0, 9>; +pub type ArithTracePacked = GenericTrace, 2097152, 0, 21>; trace_row!(BinaryFixedRow { __L1__: F, }); -pub type BinaryFixed = GenericTrace, 4194304, 0, 10>; +pub type BinaryFixed = GenericTrace, 4194304, 0, 22>; trace_row!(BinaryTraceRow { b_op:ubit(7), free_in_a:[u8; 8], free_in_b:[u8; 8], free_in_c:[u8; 8], carry:[bit; 8], mode32:bit, result_is_a:bit, use_first_byte:bit, c_is_signed:bit, b_op_or_sext:ubit(10), mode32_and_c_is_signed:bit, }); -pub type BinaryTrace = GenericTrace, 4194304, 0, 10>; +pub type BinaryTrace = GenericTrace, 4194304, 0, 22>; -pub type BinaryTracePacked = GenericTrace, 4194304, 0, 10>; +pub type BinaryTracePacked = GenericTrace, 4194304, 0, 22>; trace_row!(BinaryAddFixedRow { __L1__: F, }); -pub type BinaryAddFixed = GenericTrace, 4194304, 0, 11>; +pub type BinaryAddFixed = GenericTrace, 4194304, 0, 23>; trace_row!(BinaryAddTraceRow { a:[u32; 2], b:[u32; 2], c_chunks:[u16; 4], cout:[bit; 2], }); -pub type BinaryAddTrace = GenericTrace, 4194304, 0, 11>; +pub type BinaryAddTrace = GenericTrace, 4194304, 0, 23>; -pub type BinaryAddTracePacked = GenericTrace, 4194304, 0, 11>; +pub type BinaryAddTracePacked = GenericTrace, 4194304, 0, 23>; trace_row!(BinaryExtensionFixedRow { __L1__: F, }); -pub type BinaryExtensionFixed = GenericTrace, 4194304, 0, 12>; +pub type BinaryExtensionFixed = GenericTrace, 4194304, 0, 24>; trace_row!(BinaryExtensionTraceRow { op:ubit(6), free_in_a:[u8; 8], free_in_b:u8, free_in_c:[[u32; 2]; 8], op_is_shift:bit, b:[u32; 2], }); -pub type BinaryExtensionTrace = GenericTrace, 4194304, 0, 12>; +pub type BinaryExtensionTrace = GenericTrace, 4194304, 0, 24>; -pub type BinaryExtensionTracePacked = GenericTrace, 4194304, 0, 12>; +pub type BinaryExtensionTracePacked = GenericTrace, 4194304, 0, 24>; trace_row!(Add256FixedRow { __L1__: F, }); -pub type Add256Fixed = GenericTrace, 1048576, 0, 13>; +pub type Add256Fixed = GenericTrace, 1048576, 0, 25>; trace_row!(Add256TraceRow { a:[[u32; 2]; 4], b:[[u32; 2]; 4], c_chunks:[[u16; 4]; 4], cout:[[bit; 2]; 4], addr_params:u32, addr_a:u32, addr_b:u32, addr_c:u32, step:ubit(40), cin:bit, sel:bit, }); -pub type Add256Trace = GenericTrace, 1048576, 0, 13>; +pub type Add256Trace = GenericTrace, 1048576, 0, 25>; -pub type Add256TracePacked = GenericTrace, 1048576, 0, 13>; +pub type Add256TracePacked = GenericTrace, 1048576, 0, 25>; trace_row!(ArithEqFixedRow { CLK_0: F, __L1__: F, }); -pub type ArithEqFixed = GenericTrace, 1048576, 0, 14>; +pub type ArithEqFixed = GenericTrace, 1048576, 0, 26>; trace_row!(ArithEqTraceRow { - x1:u16, y1:u16, x2:u16, y2:u16, x3:u16, y3:u16, q0:ubit(22), q1:ubit(22), q2:ubit(22), s:ubit(22), sel_op:[bit; 9], sel_op_clk0:[bit; 9], x_delta_chunk_inv:u64, x_are_different:bit, x3_lt:bit, y3_lt:bit, carry:[[u64; 2]; 3], step_addr:ubit(40), + x1:u16, y1:u16, x2:u16, y2:u16, x3:u16, y3:u16, q0:ubit(22), q1:ubit(22), q2:ubit(22), s:ubit(22), sel_op:[bit; 11], sel_op_clk0:[bit; 11], x_delta_chunk_inv:u64, x_are_different:bit, x3_lt:bit, y3_lt:bit, carry:[[u64; 2]; 3], step_addr:ubit(40), }); -pub type ArithEqTrace = GenericTrace, 1048576, 0, 14>; +pub type ArithEqTrace = GenericTrace, 1048576, 0, 26>; -pub type ArithEqTracePacked = GenericTrace, 1048576, 0, 14>; +pub type ArithEqTracePacked = GenericTrace, 1048576, 0, 26>; trace_row!(ArithEq384FixedRow { CLK_0: F, __L1__: F, }); -pub type ArithEq384Fixed = GenericTrace, 1048576, 0, 15>; +pub type ArithEq384Fixed = GenericTrace, 1048576, 0, 27>; trace_row!(ArithEq384TraceRow { x1:u16, y1:u16, x2:u16, y2:u16, x3:u16, y3:u16, q0:ubit(22), q1:ubit(22), q2:ubit(22), s:ubit(22), sel_op:[bit; 6], sel_op_clk0:[bit; 6], x_delta_chunk_inv:u64, x_are_different:bit, x3_lt:bit, y3_lt:bit, carry:[[u64; 2]; 3], step_addr:ubit(40), }); -pub type ArithEq384Trace = GenericTrace, 1048576, 0, 15>; +pub type ArithEq384Trace = GenericTrace, 1048576, 0, 27>; -pub type ArithEq384TracePacked = GenericTrace, 1048576, 0, 15>; +pub type ArithEq384TracePacked = GenericTrace, 1048576, 0, 27>; trace_row!(KeccakfFixedRow { CLK_0: F, __L1__: F, }); -pub type KeccakfFixed = GenericTrace, 131072, 0, 16>; +pub type KeccakfFixed = GenericTrace, 131072, 0, 28>; trace_row!(KeccakfTraceRow { in_use_clk_0:bit, in_use:bit, state:[bit; 1600], chunk_acc:[ubit(22); 533], rem_acc:u8, step_addr:ubit(40), }); -pub type KeccakfTrace = GenericTrace, 131072, 0, 16>; +pub type KeccakfTrace = GenericTrace, 131072, 0, 28>; -pub type KeccakfTracePacked = GenericTrace, 131072, 0, 16>; +pub type KeccakfTracePacked = GenericTrace, 131072, 0, 28>; trace_row!(Sha256fFixedRow { CLK_0: F, __L1__: F, }); -pub type Sha256fFixed = GenericTrace, 262144, 0, 17>; +pub type Sha256fFixed = GenericTrace, 262144, 0, 29>; trace_row!(Sha256fTraceRow { a:[bit; 32], e:[bit; 32], w:[bit; 32], new_a_carry_bits:u8, new_e_carry_bits:u8, new_w_carry_bits:ubit(4), step_addr:ubit(40), in_use_clk_0:bit, in_use:bit, }); -pub type Sha256fTrace = GenericTrace, 262144, 0, 17>; +pub type Sha256fTrace = GenericTrace, 262144, 0, 29>; + + +pub type Sha256fTracePacked = GenericTrace, 262144, 0, 29>; + + +trace_row!(Poseidon2FixedRow { + CLK_0: F, __L1__: F, +}); +pub type Poseidon2Fixed = GenericTrace, 131072, 0, 30>; + +trace_row!(Poseidon2TraceRow { + in_use_clk_0:bit, in_use:bit, chunks:[[u32; 2]; 16], step_addr:ubit(40), +}); +pub type Poseidon2Trace = GenericTrace, 131072, 0, 30>; + + +pub type Poseidon2TracePacked = GenericTrace, 131072, 0, 30>; + + +trace_row!(Blake2brFixedRow { + CLK_0: F, MSG_IDX: F, __L1__: F, +}); +pub type Blake2brFixed = GenericTrace, 262144, 0, 31>; + +trace_row!(Blake2brTraceRow { + in_use:bit, round_idx:ubit(4), round_idx_sel:[bit; 10], sigma_idx:ubit(4), m_limbs:[[u16; 2]; 2], ms:[u32; 2], perm_active:bit, g_active:bit, va_limbs:[[u16; 2]; 2], vc_limbs:[[u16; 2]; 2], vb:[[bit; 32]; 2], vd:[[bit; 32]; 2], step_addr:ubit(40), in_use_clk_0:bit, +}); +pub type Blake2brTrace = GenericTrace, 262144, 0, 31>; -pub type Sha256fTracePacked = GenericTrace, 262144, 0, 17>; +pub type Blake2brTracePacked = GenericTrace, 262144, 0, 31>; trace_row!(SpecifiedRangesFixedRow { - RANGE: [F; 33], __L1__: F, + OPID: [F; 29], VALS: [F; 29], __L1__: F, }); -pub type SpecifiedRangesFixed = GenericTrace, 1048576, 0, 18>; +pub type SpecifiedRangesFixed = GenericTrace, 1048576, 0, 32>; trace_row!(SpecifiedRangesTraceRow { - mul:[F; 33], + mul:[F; 29], }); -pub type SpecifiedRangesTrace = GenericTrace, 1048576, 0, 18>; +pub type SpecifiedRangesTrace = GenericTrace, 1048576, 0, 32>; trace_row!(VirtualTable0FixedRow { UID: [F; 8], column: [F; 43], __L1__: F, }); -pub type VirtualTable0Fixed = GenericTrace, 2097152, 0, 19>; +pub type VirtualTable0Fixed = GenericTrace, 2097152, 0, 33>; trace_row!(VirtualTable0TraceRow { multiplicity:[F; 8], }); -pub type VirtualTable0Trace = GenericTrace, 2097152, 0, 19>; +pub type VirtualTable0Trace = GenericTrace, 2097152, 0, 33>; trace_row!(VirtualTable1FixedRow { UID: [F; 8], column: [F; 64], __L1__: F, }); -pub type VirtualTable1Fixed = GenericTrace, 2097152, 0, 20>; +pub type VirtualTable1Fixed = GenericTrace, 2097152, 0, 34>; trace_row!(VirtualTable1TraceRow { multiplicity:[F; 8], }); -pub type VirtualTable1Trace = GenericTrace, 2097152, 0, 20>; +pub type VirtualTable1Trace = GenericTrace, 2097152, 0, 34>; trace_row!(RomRomTraceRow { line: F, a_offset_imm0: F, a_imm1: F, b_offset_imm0: F, b_imm1: F, ind_width: F, op: F, store_offset: F, jmp_offset1: F, jmp_offset2: F, flags: F, }); -pub type RomRomTrace = GenericTrace, 4194304, 0, 1, 0>; +pub type RomRomTrace = GenericTrace, 4194304, 0, 13, 0>; + + +values!(Dma64AlignedAirValues { + segment_id: F, segment_previous_seq_end: F, segment_previous_dst64: F, segment_previous_main_step: F, segment_previous_count64: F, segment_previous_flags: F, segment_last_seq_end: F, segment_last_dst64: F, segment_last_main_step: F, segment_last_count64: F, segment_last_flags: F, is_last_segment: F, segment_previous_src64: F, segment_last_src64: F, segment_previous_fill_byte: F, segment_last_fill_byte: F, last_count_chunk: [F; 2], padding_size: F, im_direct: [FieldExtension; 5], +}); + +values!(Dma64AlignedInputCpyAirValues { + segment_id: F, segment_previous_seq_end: F, segment_previous_dst64: F, segment_previous_main_step: F, segment_previous_count64: F, segment_previous_flags: F, segment_last_seq_end: F, segment_last_dst64: F, segment_last_main_step: F, segment_last_count64: F, segment_last_flags: F, is_last_segment: F, last_count_chunk: [F; 2], padding_size: F, im_direct: [FieldExtension; 5], +}); + +values!(Dma64AlignedMemSetAirValues { + segment_id: F, segment_previous_seq_end: F, segment_previous_dst64: F, segment_previous_main_step: F, segment_previous_count64: F, segment_previous_flags: F, segment_last_seq_end: F, segment_last_dst64: F, segment_last_main_step: F, segment_last_count64: F, segment_last_flags: F, is_last_segment: F, segment_previous_fill_byte: F, segment_last_fill_byte: F, last_count_chunk: [F; 2], padding_size: F, im_direct: [FieldExtension; 5], +}); + +values!(Dma64AlignedMemAirValues { + segment_id: F, segment_previous_seq_end: F, segment_previous_dst64: F, segment_previous_main_step: F, segment_previous_count64: F, segment_previous_flags: F, segment_last_seq_end: F, segment_last_dst64: F, segment_last_main_step: F, segment_last_count64: F, segment_last_flags: F, is_last_segment: F, segment_previous_src64: F, segment_last_src64: F, segment_previous_fill_byte: F, segment_last_fill_byte: F, last_count_chunk: [F; 2], padding_size: F, im_direct: [FieldExtension; 5], +}); + +values!(Dma64AlignedMemCpyAirValues { + segment_id: F, segment_previous_seq_end: F, segment_previous_dst64: F, segment_previous_main_step: F, segment_previous_count64: F, segment_previous_flags: F, segment_last_seq_end: F, segment_last_dst64: F, segment_last_main_step: F, segment_last_count64: F, segment_last_flags: F, is_last_segment: F, segment_previous_src64: F, segment_last_src64: F, last_count_chunk: [F; 2], padding_size: F, im_direct: [FieldExtension; 5], +}); +values!(DmaUnalignedAirValues { + segment_id: F, segment_previous_seq_end: F, segment_previous_src64: F, segment_previous_dst64: F, segment_previous_main_step: F, segment_previous_offset: F, segment_previous_count: F, segment_first_bytes: [F; 8], segment_last_seq_end: F, segment_last_src64: F, segment_last_dst64: F, segment_last_main_step: F, segment_last_offset: F, segment_last_count: F, segment_next_bytes: [F; 8], is_last_segment: F, segment_previous_is_memeq: F, segment_last_is_memeq: F, padding_size: F, last_count_chunk: [F; 2], im_direct: [FieldExtension; 6], +}); values!(MainAirValues { main_last_segment: F, main_segment: F, segment_initial_pc: F, segment_previous_c: [F; 2], segment_next_pc: F, segment_last_c: [F; 2], last_reg_value: [[F; 2]; 31], last_reg_mem_step: [F; 31], im_direct: [FieldExtension; 96], @@ -411,7 +659,7 @@ values!(RomDataAirValues { }); values!(InputDataAirValues { - segment_id: F, is_first_segment: F, is_last_segment: F, previous_segment_value: [F; 2], previous_segment_step: F, previous_segment_addr: F, segment_last_value: [F; 2], segment_last_step: F, segment_last_addr: F, im_direct: [FieldExtension; 4], + segment_id: F, is_first_segment: F, is_last_segment: F, previous_segment_value: [F; 2], previous_segment_step: F, previous_segment_addr: F, segment_last_value: [F; 2], segment_last_step: F, segment_last_addr: F, distance_base: [F; 2], distance_end: [F; 2], im_direct: [FieldExtension; 6], }); values!(MemAlignByteAirValues { @@ -438,6 +686,54 @@ values!(BinaryExtensionAirValues { padding_size: F, im_direct: [FieldExtension; 1], }); +values!(DmaAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(DmaMemCpyAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(DmaInputCpyAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(Dma64AlignedAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(Dma64AlignedInputCpyAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(Dma64AlignedMemSetAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(Dma64AlignedMemAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(Dma64AlignedMemCpyAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(DmaUnalignedAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(DmaPrePostAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(DmaPrePostMemCpyAirGroupValues { + gsum_result: FieldExtension, +}); + +values!(DmaPrePostInputCpyAirGroupValues { + gsum_result: FieldExtension, +}); + values!(MainAirGroupValues { gsum_result: FieldExtension, }); @@ -510,6 +806,14 @@ values!(Sha256fAirGroupValues { gsum_result: FieldExtension, }); +values!(Poseidon2AirGroupValues { + gsum_result: FieldExtension, +}); + +values!(Blake2brAirGroupValues { + gsum_result: FieldExtension, +}); + values!(SpecifiedRangesAirGroupValues { gsum_result: FieldExtension, }); @@ -525,87 +829,157 @@ values!(VirtualTable1AirGroupValues { pub const PACKED_INFO: &[(usize, usize, PackedInfoConst)] = &[ (0, 0, PackedInfoConst { is_packed: true, - num_packed_words: 14, - unpack_info: &[32, 32, 32, 32, 32, 32, 1, 32, 1, 1, 64, 32, 1, 1, 1, 64, 32, 1, 4, 1, 8, 1, 1, 1, 64, 1, 64, 64, 1, 32, 40, 40, 40, 32, 32, 1, 1, 1], + num_packed_words: 7, + unpack_info: &[1, 1, 1, 8, 1, 1, 24, 1, 9, 16, 16, 22, 7, 3, 36, 22, 7, 3, 3, 1, 1, 1, 1, 3, 9, 1, 1, 32, 32, 32, 32, 32, 32, 32], }), - (0, 2, PackedInfoConst { + (0, 1, PackedInfoConst { is_packed: true, num_packed_words: 4, - unpack_info: &[29, 40, 1, 1, 40, 1, 32, 32, 1, 40, 18, 18, 1], + unpack_info: &[1, 1, 24, 1, 9, 22, 7, 3, 36, 22, 7, 3, 3, 1, 1, 1, 1, 3, 9, 32, 32, 32], }), - (0, 3, PackedInfoConst { + (0, 2, PackedInfoConst { is_packed: true, num_packed_words: 3, - unpack_info: &[29, 40, 1, 1, 32, 32], + unpack_info: &[1, 1, 24, 1, 9, 22, 7, 3, 36, 1, 1, 1, 3, 9, 32, 32], + }), + (0, 3, PackedInfoConst { + is_packed: true, + num_packed_words: 7, + unpack_info: &[29, 1, 1, 1, 1, 1, 8, 1, 1, 36, 29, 32, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 24, 24, 1, 1, 1, 1], }), (0, 4, PackedInfoConst { is_packed: true, - num_packed_words: 3, - unpack_info: &[29, 40, 1, 1, 16, 16, 16, 16, 1], + num_packed_words: 6, + unpack_info: &[1, 1, 1, 36, 29, 32, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 24, 24], }), (0, 5, PackedInfoConst { + is_packed: true, + num_packed_words: 2, + unpack_info: &[1, 1, 1, 8, 36, 29, 32, 1, 1, 1, 1, 1, 1, 1], + }), + (0, 6, PackedInfoConst { + is_packed: true, + num_packed_words: 7, + unpack_info: &[29, 1, 1, 1, 1, 1, 8, 1, 36, 29, 32, 1, 1, 1, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 1], + }), + (0, 7, PackedInfoConst { + is_packed: true, + num_packed_words: 11, + unpack_info: &[29, 1, 1, 1, 1, 36, 29, 32, 1, 1, 1, 1, 1, 1, 1, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32], + }), + (0, 8, PackedInfoConst { + is_packed: true, + num_packed_words: 5, + unpack_info: &[36, 29, 29, 32, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 1, 32, 32], + }), + (0, 9, PackedInfoConst { + is_packed: true, + num_packed_words: 11, + unpack_info: &[36, 29, 3, 4, 1, 1, 1, 1, 32, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 29, 3, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 1, 64, 64, 32, 32, 32, 32, 32, 32], + }), + (0, 10, PackedInfoConst { + is_packed: true, + num_packed_words: 8, + unpack_info: &[36, 29, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 29, 3, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 32, 32, 32, 32, 32, 32], + }), + (0, 11, PackedInfoConst { + is_packed: true, + num_packed_words: 5, + unpack_info: &[36, 29, 3, 4, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 32, 32], + }), + (0, 12, PackedInfoConst { + is_packed: true, + num_packed_words: 14, + unpack_info: &[32, 32, 32, 32, 32, 32, 1, 32, 1, 1, 64, 32, 1, 1, 1, 64, 32, 1, 4, 1, 8, 1, 1, 1, 64, 1, 64, 64, 1, 32, 38, 38, 38, 32, 32, 1, 1, 1], + }), + (0, 14, PackedInfoConst { + is_packed: true, + num_packed_words: 4, + unpack_info: &[29, 38, 1, 1, 38, 1, 32, 32, 1, 40, 22, 16, 1], + }), + (0, 15, PackedInfoConst { + is_packed: true, + num_packed_words: 3, + unpack_info: &[29, 38, 1, 1, 32, 32], + }), + (0, 16, PackedInfoConst { + is_packed: true, + num_packed_words: 3, + unpack_info: &[29, 38, 1, 1, 16, 16, 16, 16, 1], + }), + (0, 17, PackedInfoConst { is_packed: true, num_packed_words: 5, unpack_info: &[29, 3, 4, 1, 8, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 40, 64, 1, 32, 32], }), - (0, 6, PackedInfoConst { + (0, 18, PackedInfoConst { is_packed: true, num_packed_words: 5, unpack_info: &[1, 1, 1, 32, 32, 32, 8, 16, 8, 8, 29, 40, 1, 32, 32, 8], }), - (0, 7, PackedInfoConst { + (0, 19, PackedInfoConst { is_packed: true, num_packed_words: 3, unpack_info: &[1, 1, 1, 32, 32, 16, 8, 8, 29, 40], }), - (0, 8, PackedInfoConst { + (0, 20, PackedInfoConst { is_packed: true, num_packed_words: 5, unpack_info: &[1, 1, 1, 32, 32, 32, 8, 16, 8, 8, 29, 40, 32, 32], }), - (0, 9, PackedInfoConst { + (0, 21, PackedInfoConst { is_packed: true, num_packed_words: 17, unpack_info: &[64, 64, 64, 64, 64, 64, 64, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 1, 1, 1, 64, 64, 64, 1, 1, 1, 1, 1, 64, 8, 32, 1, 7, 7], }), - (0, 10, PackedInfoConst { + (0, 22, PackedInfoConst { is_packed: true, num_packed_words: 4, unpack_info: &[7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1], }), - (0, 11, PackedInfoConst { + (0, 23, PackedInfoConst { is_packed: true, num_packed_words: 4, unpack_info: &[32, 32, 32, 32, 16, 16, 16, 16, 1, 1], }), - (0, 12, PackedInfoConst { + (0, 24, PackedInfoConst { is_packed: true, num_packed_words: 11, unpack_info: &[6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 32, 32], }), - (0, 13, PackedInfoConst { + (0, 25, PackedInfoConst { is_packed: true, num_packed_words: 15, unpack_info: &[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 32, 32, 32, 32, 40, 1, 1], }), - (0, 14, PackedInfoConst { + (0, 26, PackedInfoConst { is_packed: true, num_packed_words: 11, - unpack_info: &[16, 16, 16, 16, 16, 16, 22, 22, 22, 22, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 1, 1, 64, 64, 64, 64, 64, 64, 40], + unpack_info: &[16, 16, 16, 16, 16, 16, 22, 22, 22, 22, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 1, 1, 64, 64, 64, 64, 64, 64, 40], }), - (0, 15, PackedInfoConst { + (0, 27, PackedInfoConst { is_packed: true, num_packed_words: 11, unpack_info: &[16, 16, 16, 16, 16, 16, 22, 22, 22, 22, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 1, 1, 1, 64, 64, 64, 64, 64, 64, 40], }), - (0, 16, PackedInfoConst { + (0, 28, PackedInfoConst { is_packed: true, num_packed_words: 209, unpack_info: &[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 8, 40], }), - (0, 17, PackedInfoConst { + (0, 29, PackedInfoConst { is_packed: true, num_packed_words: 3, unpack_info: &[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 4, 40, 1, 1], }), + (0, 30, PackedInfoConst { + is_packed: true, + num_packed_words: 17, + unpack_info: &[1, 1, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40], + }), + (0, 31, PackedInfoConst { + is_packed: true, + num_packed_words: 7, + unpack_info: &[1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 16, 16, 16, 16, 32, 32, 1, 1, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40, 1], + }), ]; \ No newline at end of file diff --git a/pil/src/pil_helpers/traces_dev.rs b/pil/src/pil_helpers/traces_dev.rs index 3754c7955..a04f6d83f 100644 --- a/pil/src/pil_helpers/traces_dev.rs +++ b/pil/src/pil_helpers/traces_dev.rs @@ -108,7 +108,7 @@ trace!(MainFixed { }, 0, 0, 4194304 ); trace!(MainTrace { - a: [F; 2], b: [F; 2], c: [F; 2], flag: F, pc: F, a_src_imm: F, a_src_mem: F, a_offset_imm0: F, a_imm1: F, a_src_step: F, b_src_imm: F, b_src_mem: F, b_offset_imm0: F, b_imm1: F, b_src_ind: F, ind_width: F, is_external_op: F, op: F, store_ra: F, store_mem: F, store_ind: F, store_offset: F, set_pc: F, jmp_offset1: F, jmp_offset2: F, m32: F, addr1: F, a_reg_prev_mem_step: F, b_reg_prev_mem_step: F, store_reg_prev_mem_step: F, store_reg_prev_value: [F; 2], a_src_reg: F, b_src_reg: F, store_reg: F, + a: [F; 2], b: [F; 2], c: [F; 2], flag: F, pc: F, a_src_imm: F, a_src_mem: F, a_offset_imm0: F, a_imm1: F, a_src_step: F, b_src_imm: F, b_src_mem: F, b_offset_imm0: F, b_imm1: F, b_src_ind: F, ind_width: F, is_external_op: F, op: F, store_pc: F, store_mem: F, store_ind: F, store_offset: F, set_pc: F, jmp_offset1: F, jmp_offset2: F, m32: F, addr1: F, a_reg_prev_mem_step: F, b_reg_prev_mem_step: F, store_reg_prev_mem_step: F, store_reg_prev_value: [F; 2], a_src_reg: F, b_src_reg: F, store_reg: F, }, 0, 0, 4194304 ); trace!(RomFixed { diff --git a/pil/zisk.pil b/pil/zisk.pil index f8b058082..1f5e2a1ef 100644 --- a/pil/zisk.pil +++ b/pil/zisk.pil @@ -1,8 +1,15 @@ +// #pragma arg -I pil,../pil2-proofman/pil2-components/lib/std/pil,state-machines,precompiles +// #pragma arg -o pil/zisk.pilout +// #pragma arg -O fixed-to-file +// #pragma arg -u tmp/fixed + require "std_direct.pil" require "operations.pil" require "opids.pil" +require "config.pil" +require "dma/pil/dual_range.pil" require "rom/pil/rom.pil" require "main/pil/main.pil" require "mem/pil/mem.pil" @@ -18,6 +25,17 @@ require "arith_eq/pil/arith_eq.pil" require "arith_eq_384/pil/arith_eq_384.pil" require "keccakf/pil/keccakf.pil" require "sha256f/pil/sha256f.pil" +require "dma/pil/dma.pil" +require "dma/pil/dma_rom.pil" +require "dma/pil/dma_pre_post.pil" +require "dma/pil/dma_pre_post_table.pil" +require "dma/pil/dma_byte_cmp_table.pil" +require "dma/pil/dma_64_aligned.pil" +require "dma/pil/dma_unaligned.pil" +require "poseidon2/pil/poseidon2.pil" +require "blake2/pil/blake2br.pil" + +enable_range_stats(); proofval enable_input_data; enable_input_data * (1 - enable_input_data); @@ -25,17 +43,62 @@ enable_input_data * (1 - enable_input_data); proofval enable_rom_data; enable_rom_data * (1 - enable_rom_data); +proofval enable_dma_64_aligned; +enable_dma_64_aligned * (1 - enable_dma_64_aligned); + +proofval enable_dma_64_aligned_inputcpy; +enable_dma_64_aligned_inputcpy * (1 - enable_dma_64_aligned_inputcpy); + +proofval enable_dma_64_aligned_mem; +enable_dma_64_aligned_mem * (1 - enable_dma_64_aligned_mem); + +proofval enable_dma_64_aligned_memcpy; +enable_dma_64_aligned_memcpy * (1 - enable_dma_64_aligned_memcpy); + +proofval enable_dma_64_aligned_memset; +enable_dma_64_aligned_memset * (1 - enable_dma_64_aligned_memset); + +proofval enable_dma_unaligned; +enable_dma_unaligned * (1 - enable_dma_unaligned); + const int PUBLIC_INPUTS_64_BITS = 32; // 32 x 64 bits = 2048 bits public inputs[PUBLIC_INPUTS_64_BITS * 2]; // 2 x 32-bits = 64 bits +function range_dual_byte(expr byte_a, expr byte_b, expr sel = 1) { + lookup_assumes(DUAL_RANGE_BYTE_ID, expressions: [byte_a, byte_b], sel: sel); +} + airgroup Zisk { // Virtual Tables Configuration set_max_std_tables_bits(20); set_max_num_rows_virtual(1 << 21); // Set the maximum rows for virtual tables set_max_num_virtual_tables(2); - set_group_virtual_tables(table_ids: [ARITH_TABLE_ID, ARITH_RANGE_TABLE_ID, ARITH_EQ_LT_TABLE_ID, BINARY_EXTENSION_TABLE_ID, BINARY_TABLE_ID, MEMORY_ALIGN_ROM_ID, KECCAKF_TABLE_ID]); + set_group_virtual_tables(table_ids: [ARITH_TABLE_ID, ARITH_RANGE_TABLE_ID, ARITH_EQ_LT_TABLE_ID, BINARY_EXTENSION_TABLE_ID, + BINARY_TABLE_ID, MEMORY_ALIGN_ROM_ID, KECCAKF_TABLE_ID, DMA_ROM_ID, DMA_PRE_POST_TABLE_ID]); set_group_virtual_tables(table_ids: [BINARY_EXTENSION_FROPS_TABLE_ID, BINARY_FROPS_TABLE_ID, ARITH_FROPS_TABLE_ID]); + virtual DualRange(id: DUAL_RANGE_7_BITS_ID, min1: 0, max1: P2_7-1, min2: 0, max2: P2_7-1) alias DualRange7Bits; + virtual DualRange(id: DUAL_RANGE_BYTE_ID, min1: 0, max1: P2_8-1, min2: 0, max2: P2_8-1) alias DualByte; + + Dma(); + Dma(enable: E_DMA_MEMCPY) alias DmaMemCpy; + Dma(enable: E_DMA_INPUTCPY) alias DmaInputCpy; + + Dma64Aligned(enable_flag: enable_dma_64_aligned); + Dma64Aligned(enable: E_DMA_INPUTCPY, enable_flag: enable_dma_64_aligned_inputcpy) alias Dma64AlignedInputCpy; + Dma64Aligned(enable: E_DMA_MEMSET, enable_flag: enable_dma_64_aligned_memset, op_x_row: 8) alias Dma64AlignedMemSet; + Dma64Aligned(enable: E_DMA_MEMCPY|E_DMA_MEMCMP|E_DMA_MEMSET, enable_flag: enable_dma_64_aligned_mem) alias Dma64AlignedMem; + Dma64Aligned(enable: E_DMA_MEMCPY, enable_flag: enable_dma_64_aligned_memcpy, op_x_row: 8) alias Dma64AlignedMemCpy; + + DmaUnaligned(enable_flag: enable_dma_unaligned); + + DmaPrePost(); + DmaPrePost(enable: E_DMA_MEMCPY) alias DmaPrePostMemCpy; + DmaPrePost(enable: E_DMA_INPUTCPY) alias DmaPrePostInputCpy; + + virtual DmaByteCmpTable(); + virtual DmaRom(); + virtual DmaPrePostTable(); // Main Program Main(N: 2**22); Rom(N: 2**22); @@ -43,15 +106,14 @@ airgroup Zisk { // Memory Mem(N: 2**22, base_address: 0xA000_0000, size_mb: 512, large_mem: 1, dual_mem: 1); Mem(N: 2**21, base_address: 0x8000_0000, size_mb: 128, immutable: 1, enable_flag: enable_rom_data) alias RomData; - Mem(N: 2**21, base_address: 0x9000_0000, size_mb: 128, free_input_mem: 1, enable_flag: enable_input_data, use_predefined_ranges: 0) alias InputData; + Mem(N: 2**21, base_address: 0x4000_0000, size_mb: 1024, large_mem: 1, free_input_mem: 1, enable_flag: enable_input_data) alias InputData; - MemAlign(N: 2**21, use_predefined_ranges: 0); + MemAlign(N: 2**21); MemAlignByte(N: 2**22, read: 1, write: 1); MemAlignByte(N: 2**22, read: 1, write: 0) alias MemAlignReadByte; MemAlignByte(N: 2**22, read: 0, write: 1) alias MemAlignWriteByte; virtual MemAlignRom(); - virtual DualByte(); - + // Standard operations Arith(N: 2**21); virtual ArithTable(); @@ -81,8 +143,12 @@ airgroup Zisk { Sha256f(N: 2**18); + Poseidon2(N: 2**17); + + Blake2br(N: 2**18); + // Public Inputs for (int i = 0; i < PUBLIC_INPUTS_64_BITS; i++) { - direct_global_update_proves(OPERATION_BUS_ID, [OP_PUBOUT, i, 0, inputs[i*2], inputs[i*2 + 1], inputs[i*2], inputs[i*2 + 1], 0]); + direct_global_update_proves(OPERATION_BUS_ID, [OP_PUBOUT, i, 0, inputs[i*2], inputs[i*2 + 1], inputs[i*2], inputs[i*2 + 1]], surname: PIOP_SURNAME_DYNAMIC); } } diff --git a/precompiles/arith_eq/Cargo.toml b/precompiles/arith_eq/Cargo.toml index e5711f81d..0c9210e87 100644 --- a/precompiles/arith_eq/Cargo.toml +++ b/precompiles/arith_eq/Cargo.toml @@ -30,6 +30,11 @@ name = "arith_eq_test_secp256k1" path = "src/arith_eq_test_secp256k1.rs" required-features = ["test_data"] +[[bin]] +name = "arith_eq_test_secp256r1" +path = "src/arith_eq_test_secp256r1.rs" +required-features = ["test_data"] + [dependencies] zisk-core = { workspace = true } zisk-pil = { workspace = true } @@ -38,7 +43,6 @@ precompiles-helpers = { workspace = true } zisk-common = { workspace = true } lib-c = { workspace = true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -53,6 +57,7 @@ rayon = { workspace = true } ark-ff = { workspace = true } ark-std = { workspace = true } ark-secp256k1 = { workspace = true } +ark-secp256r1 = { workspace = true } ark-bn254 = { workspace = true } num-bigint = { workspace = true } num-traits = { workspace = true } @@ -61,17 +66,14 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" regex = "1.11.1" rustfmt-wrapper = "0.2.1" -k256 = {version = "0.13", features = ["arithmetic"] } typenum = "1.16" lazy_static = "1.4" path-clean = "1.0" -nom = "7" [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -no_lib_link = ["proofman-common/no_lib_link"] +gpu = ["packed"] +packed = [] test_data = [] -test_data_secp256k1 = [] \ No newline at end of file +test_data_secp256k1 = [] +test_data_secp256r1 = [] \ No newline at end of file diff --git a/precompiles/arith_eq/pil/arith_eq.pil b/precompiles/arith_eq/pil/arith_eq.pil index d8b36984e..94b630cab 100644 --- a/precompiles/arith_eq/pil/arith_eq.pil +++ b/precompiles/arith_eq/pil/arith_eq.pil @@ -4,7 +4,7 @@ require "operations.pil" require "opids.pil" require "arith_eq_lt_table.pil" -airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION_BUS_ID) { +airtemplate ArithEq (const int N = 2**18) { // TODO: introduction, map // TODO: explain concept of q @@ -27,15 +27,22 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION EQ13: y1 - y2 - y3 + (q2 * p2) y3 COMPLEX_SUB_BN254 EQ14: x1 * x2 - y1 * y2 - x3 + (q1 * p2) x3 COMPLEX_MUL_BN254 EQ15: y1 * x2 + x1 * y2 - y3 + (q2 * p2) y3 COMPLEX_MUL_BN254 + EQ16: s * x2 - s * x1 - y2 + y1 + (q0 * p3) lambda - ADD EC_ADD_SECP256R1 + EQ17: 2 * s * y1 - 3 * x1 * x1 - a + (q0 * p3) lambda - DBL EC_DBL_SECP256R1 + EQ18: s * s - x1 - x2 - x3 + (q1 * p3) x3 EC_ADD_SECP256R1, EC_DBL_SECP256R1 + EQ19: s * x1 - s * x3 - y1 - y3 + (q2 * p3) y3 EC_ADD_SECP256R1, EC_DBL_SECP256R1 where p1 refers to the base field order of: · Secp256k1: 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F and p2 refers to the base field order of: · BN254: 0x30644E72E131A029B85045B68181585D97816A916871CA8D3C208C16D87CFD47 + and p3 refers to the base field order of: + · Secp256r1: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF */ const int SECP256K1_PRIME = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F; const int BN254_PRIME = 0x30644E72E131A029B85045B68181585D97816A916871CA8D3C208C16D87CFD47; + const int SECP256R1_PRIME = 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF; const int CLOCKS = 16; col fixed CLK_0 = [1, 0:(CLOCKS-1)]...; @@ -59,8 +66,8 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION const int CARRY_MIN = -(2**CARRY_BITS - 1); const int CARRY_MAX = 2**CARRY_BITS; - const int EQS = 16; // Number of equations - const int OPS = 9; // Number of operations + const int EQS = 20; // Number of equations + const int OPS = 11; // Number of operations const int MAX_CEQS = 3; // Max concurrent equations const int QS = 3; // Number of quotients @@ -85,17 +92,20 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION const expr sel_bn254_complex_add = sel_op[6]; const expr sel_bn254_complex_sub = sel_op[7]; const expr sel_bn254_complex_mul = sel_op[8]; + const expr sel_secp256r1_add = sel_op[9]; + const expr sel_secp256r1_dbl = sel_op[10]; // groups of selectors const expr sel_any_arith256 = sel_arith256 + sel_arith256_mod; const expr sel_secp256k1 = sel_secp256k1_add + sel_secp256k1_dbl; const expr sel_bn254_curve = sel_bn254_curve_add + sel_bn254_curve_dbl; const expr sel_bn254_complex = sel_bn254_complex_add + sel_bn254_complex_sub + sel_bn254_complex_mul; - const expr sel_check_diff = sel_secp256k1_add + sel_bn254_curve_add; + const expr sel_secp256r1 = sel_secp256r1_add + sel_secp256r1_dbl; + const expr sel_check_diff = sel_secp256k1_add + sel_bn254_curve_add + sel_secp256r1_add; const expr sel_check_x_lt_prime = sel_arith256_mod + sel_secp256k1 + - sel_bn254_curve + sel_bn254_complex; + sel_bn254_curve + sel_bn254_complex + sel_secp256r1; const expr sel_check_y_lt_prime = sel_secp256k1 + sel_bn254_curve + - sel_bn254_complex; + sel_bn254_complex + sel_secp256r1; const expr eq_selectors[EQS] = [sel_arith256, sel_arith256_mod, sel_secp256k1_add, sel_secp256k1_dbl, @@ -104,11 +114,13 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION sel_bn254_curve, sel_bn254_curve, sel_bn254_complex_add, sel_bn254_complex_add, sel_bn254_complex_sub, sel_bn254_complex_sub, - sel_bn254_complex_mul, sel_bn254_complex_mul]; + sel_bn254_complex_mul, sel_bn254_complex_mul, + sel_secp256r1_add, sel_secp256r1_dbl, + sel_secp256r1, sel_secp256r1]; // constraint to set (x1,y1) = (x2,y2) - (sel_secp256k1_dbl + sel_bn254_curve_dbl) * (x1 - x2) === 0; - (sel_secp256k1_dbl + sel_bn254_curve_dbl) * (y1 - y2) === 0; + (sel_secp256k1_dbl + sel_bn254_curve_dbl + sel_secp256r1_dbl) * (x1 - x2) === 0; + (sel_secp256k1_dbl + sel_bn254_curve_dbl + sel_secp256r1_dbl) * (y1 - y2) === 0; const expr chunk_cols[7] = [x1, y1, x2, y2, x3, y3, s]; const expr qs[QS] = [q0, q1, q2]; @@ -131,6 +143,10 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION include "equations/bn254_complex_sub_y3.pil" include "equations/bn254_complex_mul_x3.pil" include "equations/bn254_complex_mul_y3.pil" + include "equations/secp256r1_add.pil" + include "equations/secp256r1_dbl.pil" + include "equations/secp256r1_x3.pil" + include "equations/secp256r1_y3.pil" col witness bits(1) sel_op_clk0[OPS]; @@ -145,6 +161,9 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION const expr bn254_complex_add_clk0 = sel_op_clk0[6]; const expr bn254_complex_sub_clk0 = sel_op_clk0[7]; const expr bn254_complex_mul_clk0 = sel_op_clk0[8]; + const expr secp256r1_add_clk0 = sel_op_clk0[9]; + const expr secp256r1_dbl_clk0 = sel_op_clk0[10]; + const expr secp256r1_clk0 = sel_op_clk0[9] + sel_op_clk0[10]; expr sum_sel_op = 0; expr sum_sel_op_clk0 = 0; @@ -181,17 +200,21 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION + expr_group_by_cbc(secp256k1_add_clk0, eq_secp256k1_add_chunks, i) + expr_group_by_cbc(secp256k1_dbl_clk0, eq_secp256k1_dbl_chunks, i) + expr_group_by_cbc(bn254_curve_add_clk0, eq_bn254_curve_add_chunks, i) - + expr_group_by_cbc(bn254_curve_dbl_clk0, eq_bn254_curve_dbl_chunks, i); + + expr_group_by_cbc(bn254_curve_dbl_clk0, eq_bn254_curve_dbl_chunks, i) + + expr_group_by_cbc(secp256r1_add_clk0, eq_secp256r1_add_chunks, i) + + expr_group_by_cbc(secp256r1_dbl_clk0, eq_secp256r1_dbl_chunks, i); eq[1][i] = expr_group_by_cbc(secp256k1_clk0, eq_secp256k1_x3_chunks, i) + expr_group_by_cbc(bn254_curve_clk0, eq_bn254_curve_x3_chunks, i) + expr_group_by_cbc(bn254_complex_add_clk0, eq_bn254_complex_add_x3_chunks, i) + expr_group_by_cbc(bn254_complex_sub_clk0, eq_bn254_complex_sub_x3_chunks, i) - + expr_group_by_cbc(bn254_complex_mul_clk0, eq_bn254_complex_mul_x3_chunks, i); + + expr_group_by_cbc(bn254_complex_mul_clk0, eq_bn254_complex_mul_x3_chunks, i) + + expr_group_by_cbc(secp256r1_clk0, eq_secp256r1_x3_chunks, i); eq[2][i] = expr_group_by_cbc(secp256k1_clk0, eq_secp256k1_y3_chunks, i) + expr_group_by_cbc(bn254_curve_clk0, eq_bn254_curve_y3_chunks, i) + expr_group_by_cbc(bn254_complex_add_clk0, eq_bn254_complex_add_y3_chunks, i) + expr_group_by_cbc(bn254_complex_sub_clk0, eq_bn254_complex_sub_y3_chunks, i) - + expr_group_by_cbc(bn254_complex_mul_clk0, eq_bn254_complex_mul_y3_chunks, i); + + expr_group_by_cbc(bn254_complex_mul_clk0, eq_bn254_complex_mul_y3_chunks, i) + + expr_group_by_cbc(secp256r1_clk0, eq_secp256r1_y3_chunks, i); } for (int i = 0; i < length(chunk_cols); ++i) { @@ -357,7 +380,8 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION + clk_cte_selector(bn254_curve_clk0, BN254_PRIME) + clk_cte_selector(bn254_complex_add_clk0, BN254_PRIME) + clk_cte_selector(bn254_complex_sub_clk0, BN254_PRIME) - + clk_cte_selector(bn254_complex_mul_clk0, BN254_PRIME); + + clk_cte_selector(bn254_complex_mul_clk0, BN254_PRIME) + + clk_cte_selector(secp256r1_clk0, SECP256R1_PRIME); const expr delta_x3 = x3 - y2 * sel_arith256_mod - lt_cte; const expr delta_y3 = y3 - lt_cte; @@ -412,6 +436,8 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION // bn254_complex_add x1,y1,x2,y2 x3,y3 x1,y1,x2,y2 // bn254_complex_sub x1,y1,x2,y2 x3,y3 x1,y1,x2,y2 // bn254_complex_mul x1,y1,x2,y2 x3,y3 x1,y1,x2,y2 + // secp256r1_add x1,y1,x2,y2 x3,y3 x1,y1,x2,y2 + // secp256r1_dbl x1,y2 x3,y3 x1,y1 const int ADDR_OP = MAIN_STEP + 1; const int ADDR_X1 = ADDR_OP + 1; @@ -448,10 +474,11 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION // 15 --- ---------- ------- ---------- --------- const expr use_x2 = sel_arith256 + sel_arith256_mod + sel_secp256k1_add + sel_bn254_curve_add + - sel_bn254_complex; - const expr use_y2 = sel_arith256_mod + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex; + sel_bn254_complex + sel_secp256r1_add; + const expr use_y2 = sel_arith256_mod + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex + + sel_secp256r1_add; const expr use_y3 = sel_arith256 + sel_secp256k1_add + sel_secp256k1_dbl + sel_bn254_curve_add + - sel_bn254_curve_dbl + sel_bn254_complex; + sel_bn254_curve_dbl + sel_bn254_complex + sel_secp256r1_add + sel_secp256r1_dbl; // [any_arith256] ADDR_X1 === ADDR_IND_0 // @@ -474,17 +501,24 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION // [bn254_complex] ADDR_X1 === ADDR_IND_0 // ADDR_Y1 === ADDR_IND_0 + 32 // ADDR_X3 === ADDR_X1, ADDR_Y3 === ADDR_Y1 + // + // [secp256r1_add] ADDR_X1 === ADDR_IND_0 + // ADDR_Y1 === ADDR_IND_0 + 32 + // ADDR_X3 === ADDR_X1, ADDR_Y3 === ADDR_Y1 + // + // [secp256r1_dbl] ADDR_X1 === ADDR_OP, + // ADDR_Y1 === ADDR_OP + 32 + // ADDR_X3 === ADDR_X1, ADDR_Y3 === ADDR_Y1 - const expr use_ind_0 = sel_any_arith256 + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex; - - (sel_any_arith256 + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex) * clock_eq(step_addr, ADDR_X1, ADDR_IND_0) === 0; - (sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex) * clock_eq(step_addr, ADDR_Y1, ADDR_IND_0, 32) === 0; + const expr use_ind_0 = sel_any_arith256 + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex + sel_secp256r1_add; - (sel_secp256k1_dbl + sel_bn254_curve_dbl) * clock_eq(step_addr, ADDR_X1, ADDR_OP) === 0; - (sel_secp256k1_dbl + sel_bn254_curve_dbl) * clock_eq(step_addr, ADDR_Y1, ADDR_OP, 32) === 0; + (sel_any_arith256 + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex + sel_secp256r1_add) * clock_eq(step_addr, ADDR_X1, ADDR_IND_0) === 0; + (sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex + sel_secp256r1_add) * clock_eq(step_addr, ADDR_Y1, ADDR_IND_0, 32) === 0; - (sel_secp256k1 + sel_bn254_curve + sel_bn254_complex) * clock_eq(step_addr, ADDR_X1, ADDR_X3) === 0; - (sel_secp256k1 + sel_bn254_curve + sel_bn254_complex) * clock_eq(step_addr, ADDR_Y1, ADDR_Y3) === 0; + (sel_secp256k1_dbl + sel_bn254_curve_dbl + sel_secp256r1_dbl) * clock_eq(step_addr, ADDR_X1, ADDR_OP) === 0; + (sel_secp256k1_dbl + sel_bn254_curve_dbl + sel_secp256r1_dbl) * clock_eq(step_addr, ADDR_Y1, ADDR_OP, 32) === 0; + (sel_secp256k1 + sel_bn254_curve + sel_bn254_complex + sel_secp256r1) * clock_eq(step_addr, ADDR_X1, ADDR_X3) === 0; + (sel_secp256k1 + sel_bn254_curve + sel_bn254_complex + sel_secp256r1) * clock_eq(step_addr, ADDR_Y1, ADDR_Y3) === 0; // [any_arith256] ADDR_Y1 === ADDR_IND_1 // @@ -496,13 +530,16 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION // // [bn254_complex] ADDR_X2 === ADDR_IND_1 // ADDR_Y2 === ADDR_IND_1 + 32 + // + // [secp256r1_add] ADDR_X2 === ADDR_IND_1 + // ADDR_Y2 === ADDR_IND_1 + 32 - const expr use_ind_1 = sel_any_arith256 + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex; + const expr use_ind_1 = sel_any_arith256 + sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex + sel_secp256r1_add; sel_any_arith256 * clock_eq(step_addr, ADDR_Y1, ADDR_IND_1) === 0; - (sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex) * clock_eq(step_addr, ADDR_X2, ADDR_IND_1) === 0; - (sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex) * clock_eq(step_addr, ADDR_Y2, ADDR_IND_1, 32) === 0; + (sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex + sel_secp256r1_add) * clock_eq(step_addr, ADDR_X2, ADDR_IND_1) === 0; + (sel_secp256k1_add + sel_bn254_curve_add + sel_bn254_complex + sel_secp256r1_add) * clock_eq(step_addr, ADDR_Y2, ADDR_IND_1, 32) === 0; // [any_arith256] ADDR_X2 === ADDR_IND_2 @@ -543,7 +580,6 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION const expr main_step = clock_map(step_addr, MAIN_STEP, start: 0, end: 14); - const expr mem_is_write = CLK[8] + CLK[9] + CLK[10] + CLK[11]; const expr mem_value[2][2]; @@ -613,9 +649,12 @@ airtemplate ArithEq (const int N = 2**18, const int operation_bus_id = OPERATION sel_bn254_curve_dbl * OP_EC_DBL_BN254 + sel_bn254_complex_add * OP_COMPLEX_ADD_BN254 + sel_bn254_complex_sub * OP_COMPLEX_SUB_BN254 + - sel_bn254_complex_mul * OP_COMPLEX_MUL_BN254; + sel_bn254_complex_mul * OP_COMPLEX_MUL_BN254 + + sel_secp256r1_add * OP_EC_ADD_SECP256R1 + + sel_secp256r1_dbl * OP_EC_DBL_SECP256R1; - lookup_proves(operation_bus_id, [bus_op, step_addr'(MAIN_STEP), 0, step_addr'(ADDR_OP), 0, 0, 0, 0], mul: in_use_clk0); + proves_operation(op: bus_op, a:[0, 0], b:[step_addr'(ADDR_OP), 0], c:[0, 0], flag:0, + main_step: step_addr'(MAIN_STEP), mul: in_use_clk0); // selclk0 is the clock 0 for dedicated to one operation function expr_group_by_cbc(const expr selclk0, const expr chunks[], const int index ): const expr { diff --git a/precompiles/arith_eq/pil/equations/secp256r1_add.pil b/precompiles/arith_eq/pil/equations/secp256r1_add.pil new file mode 100644 index 000000000..6dc8c4b64 --- /dev/null +++ b/precompiles/arith_eq/pil/equations/secp256r1_add.pil @@ -0,0 +1,777 @@ +// code generated +// +// equation: s*x2-s*x1-y2+y1-p*q0+p*offset +// +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x20000000000000000000000000000000000000000000000000000000000000000 +// (p*offset): 0x1FFFFFFFE00000002000000000000000000000001FFFFFFFFFFFFFFFFFFFFFFFE0000000000000000000000000000000000000000000000000000000000000000 +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + + +const expr eq_secp256r1_add_chunks[32]; + +// clock #0 + +eq_secp256r1_add_chunks[ 0] = s * x2 // s[0] * x2[0] + - s * x1 // - s[0] * x1[0] + - y2 // - y2[0] + + y1 // + y1[0] + - 0xFFFF * q0; // - p[0] * q0[0] + +eq_secp256r1_add_chunks[ 1] = s' * x2 // s[1] * x2[0] + + s * x2' // + s[0] * x2[1] + - s' * x1 // - s[1] * x1[0] + - s * x1' // - s[0] * x1[1] + - y2' // - y2[1] + + y1' // + y1[1] + - 0xFFFF * q0 // - p[1] * q0[0] + - 0xFFFF * q0'; // - p[0] * q0[1] + +// clock #1 + +eq_secp256r1_add_chunks[ 2] = s' * 'x2 // s[2] * x2[0] + + s * x2 // + s[1] * x2[1] + + 's * x2' // + s[0] * x2[2] + - s' * 'x1 // - s[2] * x1[0] + - s * x1 // - s[1] * x1[1] + - 's * x1' // - s[0] * x1[2] + - y2' // - y2[2] + + y1' // + y1[2] + - 0xFFFF * 'q0 // - p[2] * q0[0] + - 0xFFFF * q0 // - p[1] * q0[1] + - 0xFFFF * q0'; // - p[0] * q0[2] + +eq_secp256r1_add_chunks[ 3] = s'2 * 'x2 // s[3] * x2[0] + + s' * x2 // + s[2] * x2[1] + + s * x2' // + s[1] * x2[2] + + 's * x2'2 // + s[0] * x2[3] + - s'2 * 'x1 // - s[3] * x1[0] + - s' * x1 // - s[2] * x1[1] + - s * x1' // - s[1] * x1[2] + - 's * x1'2 // - s[0] * x1[3] + - y2'2 // - y2[3] + + y1'2 // + y1[3] + - 0xFFFF * 'q0 // - p[3] * q0[0] + - 0xFFFF * q0 // - p[2] * q0[1] + - 0xFFFF * q0' // - p[1] * q0[2] + - 0xFFFF * q0'2; // - p[0] * q0[3] + +// clock #2 + +eq_secp256r1_add_chunks[ 4] = s'2 * 2'x2 // s[4] * x2[0] + + s' * 'x2 // + s[3] * x2[1] + + s * x2 // + s[2] * x2[2] + + 's * x2' // + s[1] * x2[3] + + 2's * x2'2 // + s[0] * x2[4] + - s'2 * 2'x1 // - s[4] * x1[0] + - s' * 'x1 // - s[3] * x1[1] + - s * x1 // - s[2] * x1[2] + - 's * x1' // - s[1] * x1[3] + - 2's * x1'2 // - s[0] * x1[4] + - y2'2 // - y2[4] + + y1'2 // + y1[4] + - 0xFFFF * 2'q0 // - p[4] * q0[0] + - 0xFFFF * 'q0 // - p[3] * q0[1] + - 0xFFFF * q0 // - p[2] * q0[2] + - 0xFFFF * q0' // - p[1] * q0[3] + - 0xFFFF * q0'2; // - p[0] * q0[4] + +eq_secp256r1_add_chunks[ 5] = s'3 * 2'x2 // s[5] * x2[0] + + s'2 * 'x2 // + s[4] * x2[1] + + s' * x2 // + s[3] * x2[2] + + s * x2' // + s[2] * x2[3] + + 's * x2'2 // + s[1] * x2[4] + + 2's * x2'3 // + s[0] * x2[5] + - s'3 * 2'x1 // - s[5] * x1[0] + - s'2 * 'x1 // - s[4] * x1[1] + - s' * x1 // - s[3] * x1[2] + - s * x1' // - s[2] * x1[3] + - 's * x1'2 // - s[1] * x1[4] + - 2's * x1'3 // - s[0] * x1[5] + - y2'3 // - y2[5] + + y1'3 // + y1[5] + - 0xFFFF * 2'q0 // - p[5] * q0[0] + - 0xFFFF * 'q0 // - p[4] * q0[1] + - 0xFFFF * q0 // - p[3] * q0[2] + - 0xFFFF * q0' // - p[2] * q0[3] + - 0xFFFF * q0'2 // - p[1] * q0[4] + - 0xFFFF * q0'3; // - p[0] * q0[5] + +// clock #3 + +eq_secp256r1_add_chunks[ 6] = s'3 * 3'x2 // s[6] * x2[0] + + s'2 * 2'x2 // + s[5] * x2[1] + + s' * 'x2 // + s[4] * x2[2] + + s * x2 // + s[3] * x2[3] + + 's * x2' // + s[2] * x2[4] + + 2's * x2'2 // + s[1] * x2[5] + + 3's * x2'3 // + s[0] * x2[6] + - s'3 * 3'x1 // - s[6] * x1[0] + - s'2 * 2'x1 // - s[5] * x1[1] + - s' * 'x1 // - s[4] * x1[2] + - s * x1 // - s[3] * x1[3] + - 's * x1' // - s[2] * x1[4] + - 2's * x1'2 // - s[1] * x1[5] + - 3's * x1'3 // - s[0] * x1[6] + - y2'3 // - y2[6] + + y1'3 // + y1[6] + - 0xFFFF * 2'q0 // - p[5] * q0[1] + - 0xFFFF * 'q0 // - p[4] * q0[2] + - 0xFFFF * q0 // - p[3] * q0[3] + - 0xFFFF * q0' // - p[2] * q0[4] + - 0xFFFF * q0'2 // - p[1] * q0[5] + - 0xFFFF * q0'3; // - p[0] * q0[6] + +eq_secp256r1_add_chunks[ 7] = s'4 * 3'x2 // s[7] * x2[0] + + s'3 * 2'x2 // + s[6] * x2[1] + + s'2 * 'x2 // + s[5] * x2[2] + + s' * x2 // + s[4] * x2[3] + + s * x2' // + s[3] * x2[4] + + 's * x2'2 // + s[2] * x2[5] + + 2's * x2'3 // + s[1] * x2[6] + + 3's * x2'4 // + s[0] * x2[7] + - s'4 * 3'x1 // - s[7] * x1[0] + - s'3 * 2'x1 // - s[6] * x1[1] + - s'2 * 'x1 // - s[5] * x1[2] + - s' * x1 // - s[4] * x1[3] + - s * x1' // - s[3] * x1[4] + - 's * x1'2 // - s[2] * x1[5] + - 2's * x1'3 // - s[1] * x1[6] + - 3's * x1'4 // - s[0] * x1[7] + - y2'4 // - y2[7] + + y1'4 // + y1[7] + - 0xFFFF * 'q0 // - p[5] * q0[2] + - 0xFFFF * q0 // - p[4] * q0[3] + - 0xFFFF * q0' // - p[3] * q0[4] + - 0xFFFF * q0'2 // - p[2] * q0[5] + - 0xFFFF * q0'3 // - p[1] * q0[6] + - 0xFFFF * q0'4; // - p[0] * q0[7] + +// clock #4 + +eq_secp256r1_add_chunks[ 8] = s'4 * 4'x2 // s[8] * x2[0] + + s'3 * 3'x2 // + s[7] * x2[1] + + s'2 * 2'x2 // + s[6] * x2[2] + + s' * 'x2 // + s[5] * x2[3] + + s * x2 // + s[4] * x2[4] + + 's * x2' // + s[3] * x2[5] + + 2's * x2'2 // + s[2] * x2[6] + + 3's * x2'3 // + s[1] * x2[7] + + 4's * x2'4 // + s[0] * x2[8] + - s'4 * 4'x1 // - s[8] * x1[0] + - s'3 * 3'x1 // - s[7] * x1[1] + - s'2 * 2'x1 // - s[6] * x1[2] + - s' * 'x1 // - s[5] * x1[3] + - s * x1 // - s[4] * x1[4] + - 's * x1' // - s[3] * x1[5] + - 2's * x1'2 // - s[2] * x1[6] + - 3's * x1'3 // - s[1] * x1[7] + - 4's * x1'4 // - s[0] * x1[8] + - y2'4 // - y2[8] + + y1'4 // + y1[8] + - 0xFFFF * 'q0 // - p[5] * q0[3] + - 0xFFFF * q0 // - p[4] * q0[4] + - 0xFFFF * q0' // - p[3] * q0[5] + - 0xFFFF * q0'2 // - p[2] * q0[6] + - 0xFFFF * q0'3 // - p[1] * q0[7] + - 0xFFFF * q0'4; // - p[0] * q0[8] + +eq_secp256r1_add_chunks[ 9] = s'5 * 4'x2 // s[9] * x2[0] + + s'4 * 3'x2 // + s[8] * x2[1] + + s'3 * 2'x2 // + s[7] * x2[2] + + s'2 * 'x2 // + s[6] * x2[3] + + s' * x2 // + s[5] * x2[4] + + s * x2' // + s[4] * x2[5] + + 's * x2'2 // + s[3] * x2[6] + + 2's * x2'3 // + s[2] * x2[7] + + 3's * x2'4 // + s[1] * x2[8] + + 4's * x2'5 // + s[0] * x2[9] + - s'5 * 4'x1 // - s[9] * x1[0] + - s'4 * 3'x1 // - s[8] * x1[1] + - s'3 * 2'x1 // - s[7] * x1[2] + - s'2 * 'x1 // - s[6] * x1[3] + - s' * x1 // - s[5] * x1[4] + - s * x1' // - s[4] * x1[5] + - 's * x1'2 // - s[3] * x1[6] + - 2's * x1'3 // - s[2] * x1[7] + - 3's * x1'4 // - s[1] * x1[8] + - 4's * x1'5 // - s[0] * x1[9] + - y2'5 // - y2[9] + + y1'5 // + y1[9] + - 0xFFFF * q0 // - p[5] * q0[4] + - 0xFFFF * q0' // - p[4] * q0[5] + - 0xFFFF * q0'2 // - p[3] * q0[6] + - 0xFFFF * q0'3 // - p[2] * q0[7] + - 0xFFFF * q0'4 // - p[1] * q0[8] + - 0xFFFF * q0'5; // - p[0] * q0[9] + +// clock #5 + +eq_secp256r1_add_chunks[10] = s'5 * 5'x2 // s[10] * x2[0] + + s'4 * 4'x2 // + s[9] * x2[1] + + s'3 * 3'x2 // + s[8] * x2[2] + + s'2 * 2'x2 // + s[7] * x2[3] + + s' * 'x2 // + s[6] * x2[4] + + s * x2 // + s[5] * x2[5] + + 's * x2' // + s[4] * x2[6] + + 2's * x2'2 // + s[3] * x2[7] + + 3's * x2'3 // + s[2] * x2[8] + + 4's * x2'4 // + s[1] * x2[9] + + 5's * x2'5 // + s[0] * x2[10] + - s'5 * 5'x1 // - s[10] * x1[0] + - s'4 * 4'x1 // - s[9] * x1[1] + - s'3 * 3'x1 // - s[8] * x1[2] + - s'2 * 2'x1 // - s[7] * x1[3] + - s' * 'x1 // - s[6] * x1[4] + - s * x1 // - s[5] * x1[5] + - 's * x1' // - s[4] * x1[6] + - 2's * x1'2 // - s[3] * x1[7] + - 3's * x1'3 // - s[2] * x1[8] + - 4's * x1'4 // - s[1] * x1[9] + - 5's * x1'5 // - s[0] * x1[10] + - y2'5 // - y2[10] + + y1'5 // + y1[10] + - 0xFFFF * q0 // - p[5] * q0[5] + - 0xFFFF * q0' // - p[4] * q0[6] + - 0xFFFF * q0'2 // - p[3] * q0[7] + - 0xFFFF * q0'3 // - p[2] * q0[8] + - 0xFFFF * q0'4 // - p[1] * q0[9] + - 0xFFFF * q0'5; // - p[0] * q0[10] + +eq_secp256r1_add_chunks[11] = s'6 * 5'x2 // s[11] * x2[0] + + s'5 * 4'x2 // + s[10] * x2[1] + + s'4 * 3'x2 // + s[9] * x2[2] + + s'3 * 2'x2 // + s[8] * x2[3] + + s'2 * 'x2 // + s[7] * x2[4] + + s' * x2 // + s[6] * x2[5] + + s * x2' // + s[5] * x2[6] + + 's * x2'2 // + s[4] * x2[7] + + 2's * x2'3 // + s[3] * x2[8] + + 3's * x2'4 // + s[2] * x2[9] + + 4's * x2'5 // + s[1] * x2[10] + + 5's * x2'6 // + s[0] * x2[11] + - s'6 * 5'x1 // - s[11] * x1[0] + - s'5 * 4'x1 // - s[10] * x1[1] + - s'4 * 3'x1 // - s[9] * x1[2] + - s'3 * 2'x1 // - s[8] * x1[3] + - s'2 * 'x1 // - s[7] * x1[4] + - s' * x1 // - s[6] * x1[5] + - s * x1' // - s[5] * x1[6] + - 's * x1'2 // - s[4] * x1[7] + - 2's * x1'3 // - s[3] * x1[8] + - 3's * x1'4 // - s[2] * x1[9] + - 4's * x1'5 // - s[1] * x1[10] + - 5's * x1'6 // - s[0] * x1[11] + - y2'6 // - y2[11] + + y1'6 // + y1[11] + - 0xFFFF * q0' // - p[5] * q0[6] + - 0xFFFF * q0'2 // - p[4] * q0[7] + - 0xFFFF * q0'3 // - p[3] * q0[8] + - 0xFFFF * q0'4 // - p[2] * q0[9] + - 0xFFFF * q0'5 // - p[1] * q0[10] + - 0xFFFF * q0'6; // - p[0] * q0[11] + +// clock #6 + +eq_secp256r1_add_chunks[12] = s'6 * 6'x2 // s[12] * x2[0] + + s'5 * 5'x2 // + s[11] * x2[1] + + s'4 * 4'x2 // + s[10] * x2[2] + + s'3 * 3'x2 // + s[9] * x2[3] + + s'2 * 2'x2 // + s[8] * x2[4] + + s' * 'x2 // + s[7] * x2[5] + + s * x2 // + s[6] * x2[6] + + 's * x2' // + s[5] * x2[7] + + 2's * x2'2 // + s[4] * x2[8] + + 3's * x2'3 // + s[3] * x2[9] + + 4's * x2'4 // + s[2] * x2[10] + + 5's * x2'5 // + s[1] * x2[11] + + 6's * x2'6 // + s[0] * x2[12] + - s'6 * 6'x1 // - s[12] * x1[0] + - s'5 * 5'x1 // - s[11] * x1[1] + - s'4 * 4'x1 // - s[10] * x1[2] + - s'3 * 3'x1 // - s[9] * x1[3] + - s'2 * 2'x1 // - s[8] * x1[4] + - s' * 'x1 // - s[7] * x1[5] + - s * x1 // - s[6] * x1[6] + - 's * x1' // - s[5] * x1[7] + - 2's * x1'2 // - s[4] * x1[8] + - 3's * x1'3 // - s[3] * x1[9] + - 4's * x1'4 // - s[2] * x1[10] + - 5's * x1'5 // - s[1] * x1[11] + - 6's * x1'6 // - s[0] * x1[12] + - y2'6 // - y2[12] + + y1'6 // + y1[12] + - 6'q0 // - q0[0] + - 0xFFFF * q0' // - p[5] * q0[7] + - 0xFFFF * q0'2 // - p[4] * q0[8] + - 0xFFFF * q0'3 // - p[3] * q0[9] + - 0xFFFF * q0'4 // - p[2] * q0[10] + - 0xFFFF * q0'5 // - p[1] * q0[11] + - 0xFFFF * q0'6; // - p[0] * q0[12] + +eq_secp256r1_add_chunks[13] = s'7 * 6'x2 // s[13] * x2[0] + + s'6 * 5'x2 // + s[12] * x2[1] + + s'5 * 4'x2 // + s[11] * x2[2] + + s'4 * 3'x2 // + s[10] * x2[3] + + s'3 * 2'x2 // + s[9] * x2[4] + + s'2 * 'x2 // + s[8] * x2[5] + + s' * x2 // + s[7] * x2[6] + + s * x2' // + s[6] * x2[7] + + 's * x2'2 // + s[5] * x2[8] + + 2's * x2'3 // + s[4] * x2[9] + + 3's * x2'4 // + s[3] * x2[10] + + 4's * x2'5 // + s[2] * x2[11] + + 5's * x2'6 // + s[1] * x2[12] + + 6's * x2'7 // + s[0] * x2[13] + - s'7 * 6'x1 // - s[13] * x1[0] + - s'6 * 5'x1 // - s[12] * x1[1] + - s'5 * 4'x1 // - s[11] * x1[2] + - s'4 * 3'x1 // - s[10] * x1[3] + - s'3 * 2'x1 // - s[9] * x1[4] + - s'2 * 'x1 // - s[8] * x1[5] + - s' * x1 // - s[7] * x1[6] + - s * x1' // - s[6] * x1[7] + - 's * x1'2 // - s[5] * x1[8] + - 2's * x1'3 // - s[4] * x1[9] + - 3's * x1'4 // - s[3] * x1[10] + - 4's * x1'5 // - s[2] * x1[11] + - 5's * x1'6 // - s[1] * x1[12] + - 6's * x1'7 // - s[0] * x1[13] + - y2'7 // - y2[13] + + y1'7 // + y1[13] + - 5'q0 // - q0[1] + - 0xFFFF * q0'2 // - p[5] * q0[8] + - 0xFFFF * q0'3 // - p[4] * q0[9] + - 0xFFFF * q0'4 // - p[3] * q0[10] + - 0xFFFF * q0'5 // - p[2] * q0[11] + - 0xFFFF * q0'6 // - p[1] * q0[12] + - 0xFFFF * q0'7; // - p[0] * q0[13] + +// clock #7 + +eq_secp256r1_add_chunks[14] = s'7 * 7'x2 // s[14] * x2[0] + + s'6 * 6'x2 // + s[13] * x2[1] + + s'5 * 5'x2 // + s[12] * x2[2] + + s'4 * 4'x2 // + s[11] * x2[3] + + s'3 * 3'x2 // + s[10] * x2[4] + + s'2 * 2'x2 // + s[9] * x2[5] + + s' * 'x2 // + s[8] * x2[6] + + s * x2 // + s[7] * x2[7] + + 's * x2' // + s[6] * x2[8] + + 2's * x2'2 // + s[5] * x2[9] + + 3's * x2'3 // + s[4] * x2[10] + + 4's * x2'4 // + s[3] * x2[11] + + 5's * x2'5 // + s[2] * x2[12] + + 6's * x2'6 // + s[1] * x2[13] + + 7's * x2'7 // + s[0] * x2[14] + - s'7 * 7'x1 // - s[14] * x1[0] + - s'6 * 6'x1 // - s[13] * x1[1] + - s'5 * 5'x1 // - s[12] * x1[2] + - s'4 * 4'x1 // - s[11] * x1[3] + - s'3 * 3'x1 // - s[10] * x1[4] + - s'2 * 2'x1 // - s[9] * x1[5] + - s' * 'x1 // - s[8] * x1[6] + - s * x1 // - s[7] * x1[7] + - 's * x1' // - s[6] * x1[8] + - 2's * x1'2 // - s[5] * x1[9] + - 3's * x1'3 // - s[4] * x1[10] + - 4's * x1'4 // - s[3] * x1[11] + - 5's * x1'5 // - s[2] * x1[12] + - 6's * x1'6 // - s[1] * x1[13] + - 7's * x1'7 // - s[0] * x1[14] + - y2'7 // - y2[14] + + y1'7 // + y1[14] + - 0xFFFF * 7'q0 // - p[14] * q0[0] + - 5'q0 // - q0[2] + - 0xFFFF * q0'2 // - p[5] * q0[9] + - 0xFFFF * q0'3 // - p[4] * q0[10] + - 0xFFFF * q0'4 // - p[3] * q0[11] + - 0xFFFF * q0'5 // - p[2] * q0[12] + - 0xFFFF * q0'6 // - p[1] * q0[13] + - 0xFFFF * q0'7; // - p[0] * q0[14] + +eq_secp256r1_add_chunks[15] = s'8 * 7'x2 // s[15] * x2[0] + + s'7 * 6'x2 // + s[14] * x2[1] + + s'6 * 5'x2 // + s[13] * x2[2] + + s'5 * 4'x2 // + s[12] * x2[3] + + s'4 * 3'x2 // + s[11] * x2[4] + + s'3 * 2'x2 // + s[10] * x2[5] + + s'2 * 'x2 // + s[9] * x2[6] + + s' * x2 // + s[8] * x2[7] + + s * x2' // + s[7] * x2[8] + + 's * x2'2 // + s[6] * x2[9] + + 2's * x2'3 // + s[5] * x2[10] + + 3's * x2'4 // + s[4] * x2[11] + + 4's * x2'5 // + s[3] * x2[12] + + 5's * x2'6 // + s[2] * x2[13] + + 6's * x2'7 // + s[1] * x2[14] + + 7's * x2'8 // + s[0] * x2[15] + - s'8 * 7'x1 // - s[15] * x1[0] + - s'7 * 6'x1 // - s[14] * x1[1] + - s'6 * 5'x1 // - s[13] * x1[2] + - s'5 * 4'x1 // - s[12] * x1[3] + - s'4 * 3'x1 // - s[11] * x1[4] + - s'3 * 2'x1 // - s[10] * x1[5] + - s'2 * 'x1 // - s[9] * x1[6] + - s' * x1 // - s[8] * x1[7] + - s * x1' // - s[7] * x1[8] + - 's * x1'2 // - s[6] * x1[9] + - 2's * x1'3 // - s[5] * x1[10] + - 3's * x1'4 // - s[4] * x1[11] + - 4's * x1'5 // - s[3] * x1[12] + - 5's * x1'6 // - s[2] * x1[13] + - 6's * x1'7 // - s[1] * x1[14] + - 7's * x1'8 // - s[0] * x1[15] + - y2'8 // - y2[15] + + y1'8 // + y1[15] + - 0xFFFF * 7'q0 // - p[15] * q0[0] + - 0xFFFF * 6'q0 // - p[14] * q0[1] + - 4'q0 // - q0[3] + - 0xFFFF * q0'3 // - p[5] * q0[10] + - 0xFFFF * q0'4 // - p[4] * q0[11] + - 0xFFFF * q0'5 // - p[3] * q0[12] + - 0xFFFF * q0'6 // - p[2] * q0[13] + - 0xFFFF * q0'7 // - p[1] * q0[14] + - 0xFFFF * q0'8; // - p[0] * q0[15] + +// clock #8 + +eq_secp256r1_add_chunks[16] = s'7 * 7'x2 // s[15] * x2[1] + + s'6 * 6'x2 // + s[14] * x2[2] + + s'5 * 5'x2 // + s[13] * x2[3] + + s'4 * 4'x2 // + s[12] * x2[4] + + s'3 * 3'x2 // + s[11] * x2[5] + + s'2 * 2'x2 // + s[10] * x2[6] + + s' * 'x2 // + s[9] * x2[7] + + s * x2 // + s[8] * x2[8] + + 's * x2' // + s[7] * x2[9] + + 2's * x2'2 // + s[6] * x2[10] + + 3's * x2'3 // + s[5] * x2[11] + + 4's * x2'4 // + s[4] * x2[12] + + 5's * x2'5 // + s[3] * x2[13] + + 6's * x2'6 // + s[2] * x2[14] + + 7's * x2'7 // + s[1] * x2[15] + - s'7 * 7'x1 // - s[15] * x1[1] + - s'6 * 6'x1 // - s[14] * x1[2] + - s'5 * 5'x1 // - s[13] * x1[3] + - s'4 * 4'x1 // - s[12] * x1[4] + - s'3 * 3'x1 // - s[11] * x1[5] + - s'2 * 2'x1 // - s[10] * x1[6] + - s' * 'x1 // - s[9] * x1[7] + - s * x1 // - s[8] * x1[8] + - 's * x1' // - s[7] * x1[9] + - 2's * x1'2 // - s[6] * x1[10] + - 3's * x1'3 // - s[5] * x1[11] + - 4's * x1'4 // - s[4] * x1[12] + - 5's * x1'5 // - s[3] * x1[13] + - 6's * x1'6 // - s[2] * x1[14] + - 7's * x1'7 // - s[1] * x1[15] + - 0xFFFF * 7'q0 // - p[15] * q0[1] + - 0xFFFF * 6'q0 // - p[14] * q0[2] + - 4'q0 // - q0[4] + - 0xFFFF * q0'3 // - p[5] * q0[11] + - 0xFFFF * q0'4 // - p[4] * q0[12] + - 0xFFFF * q0'5 // - p[3] * q0[13] + - 0xFFFF * q0'6 // - p[2] * q0[14] + - 0xFFFF * q0'7 // - p[1] * q0[15] + + 0xFFFE; // + (p*offset)[16] + +eq_secp256r1_add_chunks[17] = s'7 * 6'x2 // s[15] * x2[2] + + s'6 * 5'x2 // + s[14] * x2[3] + + s'5 * 4'x2 // + s[13] * x2[4] + + s'4 * 3'x2 // + s[12] * x2[5] + + s'3 * 2'x2 // + s[11] * x2[6] + + s'2 * 'x2 // + s[10] * x2[7] + + s' * x2 // + s[9] * x2[8] + + s * x2' // + s[8] * x2[9] + + 's * x2'2 // + s[7] * x2[10] + + 2's * x2'3 // + s[6] * x2[11] + + 3's * x2'4 // + s[5] * x2[12] + + 4's * x2'5 // + s[4] * x2[13] + + 5's * x2'6 // + s[3] * x2[14] + + 6's * x2'7 // + s[2] * x2[15] + - s'7 * 6'x1 // - s[15] * x1[2] + - s'6 * 5'x1 // - s[14] * x1[3] + - s'5 * 4'x1 // - s[13] * x1[4] + - s'4 * 3'x1 // - s[12] * x1[5] + - s'3 * 2'x1 // - s[11] * x1[6] + - s'2 * 'x1 // - s[10] * x1[7] + - s' * x1 // - s[9] * x1[8] + - s * x1' // - s[8] * x1[9] + - 's * x1'2 // - s[7] * x1[10] + - 2's * x1'3 // - s[6] * x1[11] + - 3's * x1'4 // - s[5] * x1[12] + - 4's * x1'5 // - s[4] * x1[13] + - 5's * x1'6 // - s[3] * x1[14] + - 6's * x1'7 // - s[2] * x1[15] + - 0xFFFF * 6'q0 // - p[15] * q0[2] + - 0xFFFF * 5'q0 // - p[14] * q0[3] + - 3'q0 // - q0[5] + - 0xFFFF * q0'4 // - p[5] * q0[12] + - 0xFFFF * q0'5 // - p[4] * q0[13] + - 0xFFFF * q0'6 // - p[3] * q0[14] + - 0xFFFF * q0'7 // - p[2] * q0[15] + + 0xFFFF; // + (p*offset)[17] + +// clock #9 + +eq_secp256r1_add_chunks[18] = s'6 * 6'x2 // s[15] * x2[3] + + s'5 * 5'x2 // + s[14] * x2[4] + + s'4 * 4'x2 // + s[13] * x2[5] + + s'3 * 3'x2 // + s[12] * x2[6] + + s'2 * 2'x2 // + s[11] * x2[7] + + s' * 'x2 // + s[10] * x2[8] + + s * x2 // + s[9] * x2[9] + + 's * x2' // + s[8] * x2[10] + + 2's * x2'2 // + s[7] * x2[11] + + 3's * x2'3 // + s[6] * x2[12] + + 4's * x2'4 // + s[5] * x2[13] + + 5's * x2'5 // + s[4] * x2[14] + + 6's * x2'6 // + s[3] * x2[15] + - s'6 * 6'x1 // - s[15] * x1[3] + - s'5 * 5'x1 // - s[14] * x1[4] + - s'4 * 4'x1 // - s[13] * x1[5] + - s'3 * 3'x1 // - s[12] * x1[6] + - s'2 * 2'x1 // - s[11] * x1[7] + - s' * 'x1 // - s[10] * x1[8] + - s * x1 // - s[9] * x1[9] + - 's * x1' // - s[8] * x1[10] + - 2's * x1'2 // - s[7] * x1[11] + - 3's * x1'3 // - s[6] * x1[12] + - 4's * x1'4 // - s[5] * x1[13] + - 5's * x1'5 // - s[4] * x1[14] + - 6's * x1'6 // - s[3] * x1[15] + - 0xFFFF * 6'q0 // - p[15] * q0[3] + - 0xFFFF * 5'q0 // - p[14] * q0[4] + - 3'q0 // - q0[6] + - 0xFFFF * q0'4 // - p[5] * q0[13] + - 0xFFFF * q0'5 // - p[4] * q0[14] + - 0xFFFF * q0'6 // - p[3] * q0[15] + + 0xFFFF; // + (p*offset)[18] + +eq_secp256r1_add_chunks[19] = s'6 * 5'x2 // s[15] * x2[4] + + s'5 * 4'x2 // + s[14] * x2[5] + + s'4 * 3'x2 // + s[13] * x2[6] + + s'3 * 2'x2 // + s[12] * x2[7] + + s'2 * 'x2 // + s[11] * x2[8] + + s' * x2 // + s[10] * x2[9] + + s * x2' // + s[9] * x2[10] + + 's * x2'2 // + s[8] * x2[11] + + 2's * x2'3 // + s[7] * x2[12] + + 3's * x2'4 // + s[6] * x2[13] + + 4's * x2'5 // + s[5] * x2[14] + + 5's * x2'6 // + s[4] * x2[15] + - s'6 * 5'x1 // - s[15] * x1[4] + - s'5 * 4'x1 // - s[14] * x1[5] + - s'4 * 3'x1 // - s[13] * x1[6] + - s'3 * 2'x1 // - s[12] * x1[7] + - s'2 * 'x1 // - s[11] * x1[8] + - s' * x1 // - s[10] * x1[9] + - s * x1' // - s[9] * x1[10] + - 's * x1'2 // - s[8] * x1[11] + - 2's * x1'3 // - s[7] * x1[12] + - 3's * x1'4 // - s[6] * x1[13] + - 4's * x1'5 // - s[5] * x1[14] + - 5's * x1'6 // - s[4] * x1[15] + - 0xFFFF * 5'q0 // - p[15] * q0[4] + - 0xFFFF * 4'q0 // - p[14] * q0[5] + - 2'q0 // - q0[7] + - 0xFFFF * q0'5 // - p[5] * q0[14] + - 0xFFFF * q0'6 // - p[4] * q0[15] + + 0xFFFF; // + (p*offset)[19] + +// clock #10 + +eq_secp256r1_add_chunks[20] = s'5 * 5'x2 // s[15] * x2[5] + + s'4 * 4'x2 // + s[14] * x2[6] + + s'3 * 3'x2 // + s[13] * x2[7] + + s'2 * 2'x2 // + s[12] * x2[8] + + s' * 'x2 // + s[11] * x2[9] + + s * x2 // + s[10] * x2[10] + + 's * x2' // + s[9] * x2[11] + + 2's * x2'2 // + s[8] * x2[12] + + 3's * x2'3 // + s[7] * x2[13] + + 4's * x2'4 // + s[6] * x2[14] + + 5's * x2'5 // + s[5] * x2[15] + - s'5 * 5'x1 // - s[15] * x1[5] + - s'4 * 4'x1 // - s[14] * x1[6] + - s'3 * 3'x1 // - s[13] * x1[7] + - s'2 * 2'x1 // - s[12] * x1[8] + - s' * 'x1 // - s[11] * x1[9] + - s * x1 // - s[10] * x1[10] + - 's * x1' // - s[9] * x1[11] + - 2's * x1'2 // - s[8] * x1[12] + - 3's * x1'3 // - s[7] * x1[13] + - 4's * x1'4 // - s[6] * x1[14] + - 5's * x1'5 // - s[5] * x1[15] + - 0xFFFF * 5'q0 // - p[15] * q0[5] + - 0xFFFF * 4'q0 // - p[14] * q0[6] + - 2'q0 // - q0[8] + - 0xFFFF * q0'5 // - p[5] * q0[15] + + 0xFFFF; // + (p*offset)[20] + +eq_secp256r1_add_chunks[21] = s'5 * 4'x2 // s[15] * x2[6] + + s'4 * 3'x2 // + s[14] * x2[7] + + s'3 * 2'x2 // + s[13] * x2[8] + + s'2 * 'x2 // + s[12] * x2[9] + + s' * x2 // + s[11] * x2[10] + + s * x2' // + s[10] * x2[11] + + 's * x2'2 // + s[9] * x2[12] + + 2's * x2'3 // + s[8] * x2[13] + + 3's * x2'4 // + s[7] * x2[14] + + 4's * x2'5 // + s[6] * x2[15] + - s'5 * 4'x1 // - s[15] * x1[6] + - s'4 * 3'x1 // - s[14] * x1[7] + - s'3 * 2'x1 // - s[13] * x1[8] + - s'2 * 'x1 // - s[12] * x1[9] + - s' * x1 // - s[11] * x1[10] + - s * x1' // - s[10] * x1[11] + - 's * x1'2 // - s[9] * x1[12] + - 2's * x1'3 // - s[8] * x1[13] + - 3's * x1'4 // - s[7] * x1[14] + - 4's * x1'5 // - s[6] * x1[15] + - 0xFFFF * 4'q0 // - p[15] * q0[6] + - 0xFFFF * 3'q0 // - p[14] * q0[7] + - 'q0 // - q0[9] + + 0xFFFF; // + (p*offset)[21] + +// clock #11 + +eq_secp256r1_add_chunks[22] = s'4 * 4'x2 // s[15] * x2[7] + + s'3 * 3'x2 // + s[14] * x2[8] + + s'2 * 2'x2 // + s[13] * x2[9] + + s' * 'x2 // + s[12] * x2[10] + + s * x2 // + s[11] * x2[11] + + 's * x2' // + s[10] * x2[12] + + 2's * x2'2 // + s[9] * x2[13] + + 3's * x2'3 // + s[8] * x2[14] + + 4's * x2'4 // + s[7] * x2[15] + - s'4 * 4'x1 // - s[15] * x1[7] + - s'3 * 3'x1 // - s[14] * x1[8] + - s'2 * 2'x1 // - s[13] * x1[9] + - s' * 'x1 // - s[12] * x1[10] + - s * x1 // - s[11] * x1[11] + - 's * x1' // - s[10] * x1[12] + - 2's * x1'2 // - s[9] * x1[13] + - 3's * x1'3 // - s[8] * x1[14] + - 4's * x1'4 // - s[7] * x1[15] + - 0xFFFF * 4'q0 // - p[15] * q0[7] + - 0xFFFF * 3'q0 // - p[14] * q0[8] + - 'q0 // - q0[10] + + 0x1; // + (p*offset)[22] + +eq_secp256r1_add_chunks[23] = s'4 * 3'x2 // s[15] * x2[8] + + s'3 * 2'x2 // + s[14] * x2[9] + + s'2 * 'x2 // + s[13] * x2[10] + + s' * x2 // + s[12] * x2[11] + + s * x2' // + s[11] * x2[12] + + 's * x2'2 // + s[10] * x2[13] + + 2's * x2'3 // + s[9] * x2[14] + + 3's * x2'4 // + s[8] * x2[15] + - s'4 * 3'x1 // - s[15] * x1[8] + - s'3 * 2'x1 // - s[14] * x1[9] + - s'2 * 'x1 // - s[13] * x1[10] + - s' * x1 // - s[12] * x1[11] + - s * x1' // - s[11] * x1[12] + - 's * x1'2 // - s[10] * x1[13] + - 2's * x1'3 // - s[9] * x1[14] + - 3's * x1'4 // - s[8] * x1[15] + - 0xFFFF * 3'q0 // - p[15] * q0[8] + - 0xFFFF * 2'q0 // - p[14] * q0[9] + - q0; // - q0[11] + +// clock #12 + +eq_secp256r1_add_chunks[24] = s'3 * 3'x2 // s[15] * x2[9] + + s'2 * 2'x2 // + s[14] * x2[10] + + s' * 'x2 // + s[13] * x2[11] + + s * x2 // + s[12] * x2[12] + + 's * x2' // + s[11] * x2[13] + + 2's * x2'2 // + s[10] * x2[14] + + 3's * x2'3 // + s[9] * x2[15] + - s'3 * 3'x1 // - s[15] * x1[9] + - s'2 * 2'x1 // - s[14] * x1[10] + - s' * 'x1 // - s[13] * x1[11] + - s * x1 // - s[12] * x1[12] + - 's * x1' // - s[11] * x1[13] + - 2's * x1'2 // - s[10] * x1[14] + - 3's * x1'3 // - s[9] * x1[15] + - 0xFFFF * 3'q0 // - p[15] * q0[9] + - 0xFFFF * 2'q0 // - p[14] * q0[10] + - q0; // - q0[12] + +eq_secp256r1_add_chunks[25] = s'3 * 2'x2 // s[15] * x2[10] + + s'2 * 'x2 // + s[14] * x2[11] + + s' * x2 // + s[13] * x2[12] + + s * x2' // + s[12] * x2[13] + + 's * x2'2 // + s[11] * x2[14] + + 2's * x2'3 // + s[10] * x2[15] + - s'3 * 2'x1 // - s[15] * x1[10] + - s'2 * 'x1 // - s[14] * x1[11] + - s' * x1 // - s[13] * x1[12] + - s * x1' // - s[12] * x1[13] + - 's * x1'2 // - s[11] * x1[14] + - 2's * x1'3 // - s[10] * x1[15] + - 0xFFFF * 2'q0 // - p[15] * q0[10] + - 0xFFFF * 'q0 // - p[14] * q0[11] + - q0'; // - q0[13] + +// clock #13 + +eq_secp256r1_add_chunks[26] = s'2 * 2'x2 // s[15] * x2[11] + + s' * 'x2 // + s[14] * x2[12] + + s * x2 // + s[13] * x2[13] + + 's * x2' // + s[12] * x2[14] + + 2's * x2'2 // + s[11] * x2[15] + - s'2 * 2'x1 // - s[15] * x1[11] + - s' * 'x1 // - s[14] * x1[12] + - s * x1 // - s[13] * x1[13] + - 's * x1' // - s[12] * x1[14] + - 2's * x1'2 // - s[11] * x1[15] + - 0xFFFF * 2'q0 // - p[15] * q0[11] + - 0xFFFF * 'q0 // - p[14] * q0[12] + - q0'; // - q0[14] + +eq_secp256r1_add_chunks[27] = s'2 * 'x2 // s[15] * x2[12] + + s' * x2 // + s[14] * x2[13] + + s * x2' // + s[13] * x2[14] + + 's * x2'2 // + s[12] * x2[15] + - s'2 * 'x1 // - s[15] * x1[12] + - s' * x1 // - s[14] * x1[13] + - s * x1' // - s[13] * x1[14] + - 's * x1'2 // - s[12] * x1[15] + - 0xFFFF * 'q0 // - p[15] * q0[12] + - 0xFFFF * q0 // - p[14] * q0[13] + - q0'2; // - q0[15] + +// clock #14 + +eq_secp256r1_add_chunks[28] = s' * 'x2 // s[15] * x2[13] + + s * x2 // + s[14] * x2[14] + + 's * x2' // + s[13] * x2[15] + - s' * 'x1 // - s[15] * x1[13] + - s * x1 // - s[14] * x1[14] + - 's * x1' // - s[13] * x1[15] + - 0xFFFF * 'q0 // - p[15] * q0[13] + - 0xFFFF * q0 // - p[14] * q0[14] + + 0x2; // + (p*offset)[28] + +eq_secp256r1_add_chunks[29] = s' * x2 // s[15] * x2[14] + + s * x2' // + s[14] * x2[15] + - s' * x1 // - s[15] * x1[14] + - s * x1' // - s[14] * x1[15] + - 0xFFFF * q0 // - p[15] * q0[14] + - 0xFFFF * q0'; // - p[14] * q0[15] + +// clock #15 + +eq_secp256r1_add_chunks[30] = s * x2 // s[15] * x2[15] + - s * x1 // - s[15] * x1[15] + - 0xFFFF * q0 // - p[15] * q0[15] + + 0xFFFE; // + (p*offset)[30] + +eq_secp256r1_add_chunks[31] = 0x1FFFF; // (p*offset)[31] + diff --git a/precompiles/arith_eq/pil/equations/secp256r1_dbl.pil b/precompiles/arith_eq/pil/equations/secp256r1_dbl.pil new file mode 100644 index 000000000..7b3041d1c --- /dev/null +++ b/precompiles/arith_eq/pil/equations/secp256r1_dbl.pil @@ -0,0 +1,757 @@ +// code generated +// +// equation: 2*s*y1-3*x1*x1-a+p*q0-p*offset +// +// a: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x40000000000000000000000000000000000000000000000000000000000000000 +// 2: 2 +// 3: 3 +// (p*offset): 0x3FFFFFFFC00000004000000000000000000000003FFFFFFFFFFFFFFFFFFFFFFFC0000000000000000000000000000000000000000000000000000000000000000 +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + + +const expr eq_secp256r1_dbl_chunks[32]; + +// clock #0 + +eq_secp256r1_dbl_chunks[ 0] = 2 * s * y1 // 2[0] * s[0] * y1[0] + - 3 * x1 * x1 // - 3[0] * x1[0] * x1[0] + - 0xFFFC // - a[0] + + 0xFFFF * q0; // + p[0] * q0[0] + +eq_secp256r1_dbl_chunks[ 1] = 2 * s' * y1 // 2[0] * s[1] * y1[0] + + 2 * s * y1' // + 2[0] * s[0] * y1[1] + - 3 * x1' * x1 // - 3[0] * x1[1] * x1[0] + - 3 * x1 * x1' // - 3[0] * x1[0] * x1[1] + - 0xFFFF // - a[1] + + 0xFFFF * q0 // + p[1] * q0[0] + + 0xFFFF * q0'; // + p[0] * q0[1] + +// clock #1 + +eq_secp256r1_dbl_chunks[ 2] = 2 * s' * 'y1 // 2[0] * s[2] * y1[0] + + 2 * s * y1 // + 2[0] * s[1] * y1[1] + + 2 * 's * y1' // + 2[0] * s[0] * y1[2] + - 3 * x1' * 'x1 // - 3[0] * x1[2] * x1[0] + - 3 * x1 * x1 // - 3[0] * x1[1] * x1[1] + - 3 * 'x1 * x1' // - 3[0] * x1[0] * x1[2] + - 0xFFFF // - a[2] + + 0xFFFF * 'q0 // + p[2] * q0[0] + + 0xFFFF * q0 // + p[1] * q0[1] + + 0xFFFF * q0'; // + p[0] * q0[2] + +eq_secp256r1_dbl_chunks[ 3] = 2 * s'2 * 'y1 // 2[0] * s[3] * y1[0] + + 2 * s' * y1 // + 2[0] * s[2] * y1[1] + + 2 * s * y1' // + 2[0] * s[1] * y1[2] + + 2 * 's * y1'2 // + 2[0] * s[0] * y1[3] + - 3 * x1'2 * 'x1 // - 3[0] * x1[3] * x1[0] + - 3 * x1' * x1 // - 3[0] * x1[2] * x1[1] + - 3 * x1 * x1' // - 3[0] * x1[1] * x1[2] + - 3 * 'x1 * x1'2 // - 3[0] * x1[0] * x1[3] + - 0xFFFF // - a[3] + + 0xFFFF * 'q0 // + p[3] * q0[0] + + 0xFFFF * q0 // + p[2] * q0[1] + + 0xFFFF * q0' // + p[1] * q0[2] + + 0xFFFF * q0'2; // + p[0] * q0[3] + +// clock #2 + +eq_secp256r1_dbl_chunks[ 4] = 2 * s'2 * 2'y1 // 2[0] * s[4] * y1[0] + + 2 * s' * 'y1 // + 2[0] * s[3] * y1[1] + + 2 * s * y1 // + 2[0] * s[2] * y1[2] + + 2 * 's * y1' // + 2[0] * s[1] * y1[3] + + 2 * 2's * y1'2 // + 2[0] * s[0] * y1[4] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[4] * x1[0] + - 3 * x1' * 'x1 // - 3[0] * x1[3] * x1[1] + - 3 * x1 * x1 // - 3[0] * x1[2] * x1[2] + - 3 * 'x1 * x1' // - 3[0] * x1[1] * x1[3] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[0] * x1[4] + - 0xFFFF // - a[4] + + 0xFFFF * 2'q0 // + p[4] * q0[0] + + 0xFFFF * 'q0 // + p[3] * q0[1] + + 0xFFFF * q0 // + p[2] * q0[2] + + 0xFFFF * q0' // + p[1] * q0[3] + + 0xFFFF * q0'2; // + p[0] * q0[4] + +eq_secp256r1_dbl_chunks[ 5] = 2 * s'3 * 2'y1 // 2[0] * s[5] * y1[0] + + 2 * s'2 * 'y1 // + 2[0] * s[4] * y1[1] + + 2 * s' * y1 // + 2[0] * s[3] * y1[2] + + 2 * s * y1' // + 2[0] * s[2] * y1[3] + + 2 * 's * y1'2 // + 2[0] * s[1] * y1[4] + + 2 * 2's * y1'3 // + 2[0] * s[0] * y1[5] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[5] * x1[0] + - 3 * x1'2 * 'x1 // - 3[0] * x1[4] * x1[1] + - 3 * x1' * x1 // - 3[0] * x1[3] * x1[2] + - 3 * x1 * x1' // - 3[0] * x1[2] * x1[3] + - 3 * 'x1 * x1'2 // - 3[0] * x1[1] * x1[4] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[0] * x1[5] + - 0xFFFF // - a[5] + + 0xFFFF * 2'q0 // + p[5] * q0[0] + + 0xFFFF * 'q0 // + p[4] * q0[1] + + 0xFFFF * q0 // + p[3] * q0[2] + + 0xFFFF * q0' // + p[2] * q0[3] + + 0xFFFF * q0'2 // + p[1] * q0[4] + + 0xFFFF * q0'3; // + p[0] * q0[5] + +// clock #3 + +eq_secp256r1_dbl_chunks[ 6] = 2 * s'3 * 3'y1 // 2[0] * s[6] * y1[0] + + 2 * s'2 * 2'y1 // + 2[0] * s[5] * y1[1] + + 2 * s' * 'y1 // + 2[0] * s[4] * y1[2] + + 2 * s * y1 // + 2[0] * s[3] * y1[3] + + 2 * 's * y1' // + 2[0] * s[2] * y1[4] + + 2 * 2's * y1'2 // + 2[0] * s[1] * y1[5] + + 2 * 3's * y1'3 // + 2[0] * s[0] * y1[6] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[6] * x1[0] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[5] * x1[1] + - 3 * x1' * 'x1 // - 3[0] * x1[4] * x1[2] + - 3 * x1 * x1 // - 3[0] * x1[3] * x1[3] + - 3 * 'x1 * x1' // - 3[0] * x1[2] * x1[4] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[1] * x1[5] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[0] * x1[6] + + 0xFFFF * 2'q0 // + p[5] * q0[1] + + 0xFFFF * 'q0 // + p[4] * q0[2] + + 0xFFFF * q0 // + p[3] * q0[3] + + 0xFFFF * q0' // + p[2] * q0[4] + + 0xFFFF * q0'2 // + p[1] * q0[5] + + 0xFFFF * q0'3; // + p[0] * q0[6] + +eq_secp256r1_dbl_chunks[ 7] = 2 * s'4 * 3'y1 // 2[0] * s[7] * y1[0] + + 2 * s'3 * 2'y1 // + 2[0] * s[6] * y1[1] + + 2 * s'2 * 'y1 // + 2[0] * s[5] * y1[2] + + 2 * s' * y1 // + 2[0] * s[4] * y1[3] + + 2 * s * y1' // + 2[0] * s[3] * y1[4] + + 2 * 's * y1'2 // + 2[0] * s[2] * y1[5] + + 2 * 2's * y1'3 // + 2[0] * s[1] * y1[6] + + 2 * 3's * y1'4 // + 2[0] * s[0] * y1[7] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[7] * x1[0] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[6] * x1[1] + - 3 * x1'2 * 'x1 // - 3[0] * x1[5] * x1[2] + - 3 * x1' * x1 // - 3[0] * x1[4] * x1[3] + - 3 * x1 * x1' // - 3[0] * x1[3] * x1[4] + - 3 * 'x1 * x1'2 // - 3[0] * x1[2] * x1[5] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[1] * x1[6] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[0] * x1[7] + + 0xFFFF * 'q0 // + p[5] * q0[2] + + 0xFFFF * q0 // + p[4] * q0[3] + + 0xFFFF * q0' // + p[3] * q0[4] + + 0xFFFF * q0'2 // + p[2] * q0[5] + + 0xFFFF * q0'3 // + p[1] * q0[6] + + 0xFFFF * q0'4; // + p[0] * q0[7] + +// clock #4 + +eq_secp256r1_dbl_chunks[ 8] = 2 * s'4 * 4'y1 // 2[0] * s[8] * y1[0] + + 2 * s'3 * 3'y1 // + 2[0] * s[7] * y1[1] + + 2 * s'2 * 2'y1 // + 2[0] * s[6] * y1[2] + + 2 * s' * 'y1 // + 2[0] * s[5] * y1[3] + + 2 * s * y1 // + 2[0] * s[4] * y1[4] + + 2 * 's * y1' // + 2[0] * s[3] * y1[5] + + 2 * 2's * y1'2 // + 2[0] * s[2] * y1[6] + + 2 * 3's * y1'3 // + 2[0] * s[1] * y1[7] + + 2 * 4's * y1'4 // + 2[0] * s[0] * y1[8] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[8] * x1[0] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[7] * x1[1] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[6] * x1[2] + - 3 * x1' * 'x1 // - 3[0] * x1[5] * x1[3] + - 3 * x1 * x1 // - 3[0] * x1[4] * x1[4] + - 3 * 'x1 * x1' // - 3[0] * x1[3] * x1[5] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[2] * x1[6] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[1] * x1[7] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[0] * x1[8] + + 0xFFFF * 'q0 // + p[5] * q0[3] + + 0xFFFF * q0 // + p[4] * q0[4] + + 0xFFFF * q0' // + p[3] * q0[5] + + 0xFFFF * q0'2 // + p[2] * q0[6] + + 0xFFFF * q0'3 // + p[1] * q0[7] + + 0xFFFF * q0'4; // + p[0] * q0[8] + +eq_secp256r1_dbl_chunks[ 9] = 2 * s'5 * 4'y1 // 2[0] * s[9] * y1[0] + + 2 * s'4 * 3'y1 // + 2[0] * s[8] * y1[1] + + 2 * s'3 * 2'y1 // + 2[0] * s[7] * y1[2] + + 2 * s'2 * 'y1 // + 2[0] * s[6] * y1[3] + + 2 * s' * y1 // + 2[0] * s[5] * y1[4] + + 2 * s * y1' // + 2[0] * s[4] * y1[5] + + 2 * 's * y1'2 // + 2[0] * s[3] * y1[6] + + 2 * 2's * y1'3 // + 2[0] * s[2] * y1[7] + + 2 * 3's * y1'4 // + 2[0] * s[1] * y1[8] + + 2 * 4's * y1'5 // + 2[0] * s[0] * y1[9] + - 3 * x1'5 * 4'x1 // - 3[0] * x1[9] * x1[0] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[8] * x1[1] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[7] * x1[2] + - 3 * x1'2 * 'x1 // - 3[0] * x1[6] * x1[3] + - 3 * x1' * x1 // - 3[0] * x1[5] * x1[4] + - 3 * x1 * x1' // - 3[0] * x1[4] * x1[5] + - 3 * 'x1 * x1'2 // - 3[0] * x1[3] * x1[6] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[2] * x1[7] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[1] * x1[8] + - 3 * 4'x1 * x1'5 // - 3[0] * x1[0] * x1[9] + + 0xFFFF * q0 // + p[5] * q0[4] + + 0xFFFF * q0' // + p[4] * q0[5] + + 0xFFFF * q0'2 // + p[3] * q0[6] + + 0xFFFF * q0'3 // + p[2] * q0[7] + + 0xFFFF * q0'4 // + p[1] * q0[8] + + 0xFFFF * q0'5; // + p[0] * q0[9] + +// clock #5 + +eq_secp256r1_dbl_chunks[10] = 2 * s'5 * 5'y1 // 2[0] * s[10] * y1[0] + + 2 * s'4 * 4'y1 // + 2[0] * s[9] * y1[1] + + 2 * s'3 * 3'y1 // + 2[0] * s[8] * y1[2] + + 2 * s'2 * 2'y1 // + 2[0] * s[7] * y1[3] + + 2 * s' * 'y1 // + 2[0] * s[6] * y1[4] + + 2 * s * y1 // + 2[0] * s[5] * y1[5] + + 2 * 's * y1' // + 2[0] * s[4] * y1[6] + + 2 * 2's * y1'2 // + 2[0] * s[3] * y1[7] + + 2 * 3's * y1'3 // + 2[0] * s[2] * y1[8] + + 2 * 4's * y1'4 // + 2[0] * s[1] * y1[9] + + 2 * 5's * y1'5 // + 2[0] * s[0] * y1[10] + - 3 * x1'5 * 5'x1 // - 3[0] * x1[10] * x1[0] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[9] * x1[1] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[8] * x1[2] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[7] * x1[3] + - 3 * x1' * 'x1 // - 3[0] * x1[6] * x1[4] + - 3 * x1 * x1 // - 3[0] * x1[5] * x1[5] + - 3 * 'x1 * x1' // - 3[0] * x1[4] * x1[6] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[3] * x1[7] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[2] * x1[8] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[1] * x1[9] + - 3 * 5'x1 * x1'5 // - 3[0] * x1[0] * x1[10] + + 0xFFFF * q0 // + p[5] * q0[5] + + 0xFFFF * q0' // + p[4] * q0[6] + + 0xFFFF * q0'2 // + p[3] * q0[7] + + 0xFFFF * q0'3 // + p[2] * q0[8] + + 0xFFFF * q0'4 // + p[1] * q0[9] + + 0xFFFF * q0'5; // + p[0] * q0[10] + +eq_secp256r1_dbl_chunks[11] = 2 * s'6 * 5'y1 // 2[0] * s[11] * y1[0] + + 2 * s'5 * 4'y1 // + 2[0] * s[10] * y1[1] + + 2 * s'4 * 3'y1 // + 2[0] * s[9] * y1[2] + + 2 * s'3 * 2'y1 // + 2[0] * s[8] * y1[3] + + 2 * s'2 * 'y1 // + 2[0] * s[7] * y1[4] + + 2 * s' * y1 // + 2[0] * s[6] * y1[5] + + 2 * s * y1' // + 2[0] * s[5] * y1[6] + + 2 * 's * y1'2 // + 2[0] * s[4] * y1[7] + + 2 * 2's * y1'3 // + 2[0] * s[3] * y1[8] + + 2 * 3's * y1'4 // + 2[0] * s[2] * y1[9] + + 2 * 4's * y1'5 // + 2[0] * s[1] * y1[10] + + 2 * 5's * y1'6 // + 2[0] * s[0] * y1[11] + - 3 * x1'6 * 5'x1 // - 3[0] * x1[11] * x1[0] + - 3 * x1'5 * 4'x1 // - 3[0] * x1[10] * x1[1] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[9] * x1[2] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[8] * x1[3] + - 3 * x1'2 * 'x1 // - 3[0] * x1[7] * x1[4] + - 3 * x1' * x1 // - 3[0] * x1[6] * x1[5] + - 3 * x1 * x1' // - 3[0] * x1[5] * x1[6] + - 3 * 'x1 * x1'2 // - 3[0] * x1[4] * x1[7] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[3] * x1[8] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[2] * x1[9] + - 3 * 4'x1 * x1'5 // - 3[0] * x1[1] * x1[10] + - 3 * 5'x1 * x1'6 // - 3[0] * x1[0] * x1[11] + + 0xFFFF * q0' // + p[5] * q0[6] + + 0xFFFF * q0'2 // + p[4] * q0[7] + + 0xFFFF * q0'3 // + p[3] * q0[8] + + 0xFFFF * q0'4 // + p[2] * q0[9] + + 0xFFFF * q0'5 // + p[1] * q0[10] + + 0xFFFF * q0'6; // + p[0] * q0[11] + +// clock #6 + +eq_secp256r1_dbl_chunks[12] = 2 * s'6 * 6'y1 // 2[0] * s[12] * y1[0] + + 2 * s'5 * 5'y1 // + 2[0] * s[11] * y1[1] + + 2 * s'4 * 4'y1 // + 2[0] * s[10] * y1[2] + + 2 * s'3 * 3'y1 // + 2[0] * s[9] * y1[3] + + 2 * s'2 * 2'y1 // + 2[0] * s[8] * y1[4] + + 2 * s' * 'y1 // + 2[0] * s[7] * y1[5] + + 2 * s * y1 // + 2[0] * s[6] * y1[6] + + 2 * 's * y1' // + 2[0] * s[5] * y1[7] + + 2 * 2's * y1'2 // + 2[0] * s[4] * y1[8] + + 2 * 3's * y1'3 // + 2[0] * s[3] * y1[9] + + 2 * 4's * y1'4 // + 2[0] * s[2] * y1[10] + + 2 * 5's * y1'5 // + 2[0] * s[1] * y1[11] + + 2 * 6's * y1'6 // + 2[0] * s[0] * y1[12] + - 3 * x1'6 * 6'x1 // - 3[0] * x1[12] * x1[0] + - 3 * x1'5 * 5'x1 // - 3[0] * x1[11] * x1[1] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[10] * x1[2] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[9] * x1[3] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[8] * x1[4] + - 3 * x1' * 'x1 // - 3[0] * x1[7] * x1[5] + - 3 * x1 * x1 // - 3[0] * x1[6] * x1[6] + - 3 * 'x1 * x1' // - 3[0] * x1[5] * x1[7] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[4] * x1[8] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[3] * x1[9] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[2] * x1[10] + - 3 * 5'x1 * x1'5 // - 3[0] * x1[1] * x1[11] + - 3 * 6'x1 * x1'6 // - 3[0] * x1[0] * x1[12] + - 0x1 // - a[12] + + 6'q0 // + q0[0] + + 0xFFFF * q0' // + p[5] * q0[7] + + 0xFFFF * q0'2 // + p[4] * q0[8] + + 0xFFFF * q0'3 // + p[3] * q0[9] + + 0xFFFF * q0'4 // + p[2] * q0[10] + + 0xFFFF * q0'5 // + p[1] * q0[11] + + 0xFFFF * q0'6; // + p[0] * q0[12] + +eq_secp256r1_dbl_chunks[13] = 2 * s'7 * 6'y1 // 2[0] * s[13] * y1[0] + + 2 * s'6 * 5'y1 // + 2[0] * s[12] * y1[1] + + 2 * s'5 * 4'y1 // + 2[0] * s[11] * y1[2] + + 2 * s'4 * 3'y1 // + 2[0] * s[10] * y1[3] + + 2 * s'3 * 2'y1 // + 2[0] * s[9] * y1[4] + + 2 * s'2 * 'y1 // + 2[0] * s[8] * y1[5] + + 2 * s' * y1 // + 2[0] * s[7] * y1[6] + + 2 * s * y1' // + 2[0] * s[6] * y1[7] + + 2 * 's * y1'2 // + 2[0] * s[5] * y1[8] + + 2 * 2's * y1'3 // + 2[0] * s[4] * y1[9] + + 2 * 3's * y1'4 // + 2[0] * s[3] * y1[10] + + 2 * 4's * y1'5 // + 2[0] * s[2] * y1[11] + + 2 * 5's * y1'6 // + 2[0] * s[1] * y1[12] + + 2 * 6's * y1'7 // + 2[0] * s[0] * y1[13] + - 3 * x1'7 * 6'x1 // - 3[0] * x1[13] * x1[0] + - 3 * x1'6 * 5'x1 // - 3[0] * x1[12] * x1[1] + - 3 * x1'5 * 4'x1 // - 3[0] * x1[11] * x1[2] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[10] * x1[3] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[9] * x1[4] + - 3 * x1'2 * 'x1 // - 3[0] * x1[8] * x1[5] + - 3 * x1' * x1 // - 3[0] * x1[7] * x1[6] + - 3 * x1 * x1' // - 3[0] * x1[6] * x1[7] + - 3 * 'x1 * x1'2 // - 3[0] * x1[5] * x1[8] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[4] * x1[9] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[3] * x1[10] + - 3 * 4'x1 * x1'5 // - 3[0] * x1[2] * x1[11] + - 3 * 5'x1 * x1'6 // - 3[0] * x1[1] * x1[12] + - 3 * 6'x1 * x1'7 // - 3[0] * x1[0] * x1[13] + + 5'q0 // + q0[1] + + 0xFFFF * q0'2 // + p[5] * q0[8] + + 0xFFFF * q0'3 // + p[4] * q0[9] + + 0xFFFF * q0'4 // + p[3] * q0[10] + + 0xFFFF * q0'5 // + p[2] * q0[11] + + 0xFFFF * q0'6 // + p[1] * q0[12] + + 0xFFFF * q0'7; // + p[0] * q0[13] + +// clock #7 + +eq_secp256r1_dbl_chunks[14] = 2 * s'7 * 7'y1 // 2[0] * s[14] * y1[0] + + 2 * s'6 * 6'y1 // + 2[0] * s[13] * y1[1] + + 2 * s'5 * 5'y1 // + 2[0] * s[12] * y1[2] + + 2 * s'4 * 4'y1 // + 2[0] * s[11] * y1[3] + + 2 * s'3 * 3'y1 // + 2[0] * s[10] * y1[4] + + 2 * s'2 * 2'y1 // + 2[0] * s[9] * y1[5] + + 2 * s' * 'y1 // + 2[0] * s[8] * y1[6] + + 2 * s * y1 // + 2[0] * s[7] * y1[7] + + 2 * 's * y1' // + 2[0] * s[6] * y1[8] + + 2 * 2's * y1'2 // + 2[0] * s[5] * y1[9] + + 2 * 3's * y1'3 // + 2[0] * s[4] * y1[10] + + 2 * 4's * y1'4 // + 2[0] * s[3] * y1[11] + + 2 * 5's * y1'5 // + 2[0] * s[2] * y1[12] + + 2 * 6's * y1'6 // + 2[0] * s[1] * y1[13] + + 2 * 7's * y1'7 // + 2[0] * s[0] * y1[14] + - 3 * x1'7 * 7'x1 // - 3[0] * x1[14] * x1[0] + - 3 * x1'6 * 6'x1 // - 3[0] * x1[13] * x1[1] + - 3 * x1'5 * 5'x1 // - 3[0] * x1[12] * x1[2] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[11] * x1[3] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[10] * x1[4] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[9] * x1[5] + - 3 * x1' * 'x1 // - 3[0] * x1[8] * x1[6] + - 3 * x1 * x1 // - 3[0] * x1[7] * x1[7] + - 3 * 'x1 * x1' // - 3[0] * x1[6] * x1[8] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[5] * x1[9] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[4] * x1[10] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[3] * x1[11] + - 3 * 5'x1 * x1'5 // - 3[0] * x1[2] * x1[12] + - 3 * 6'x1 * x1'6 // - 3[0] * x1[1] * x1[13] + - 3 * 7'x1 * x1'7 // - 3[0] * x1[0] * x1[14] + - 0xFFFF // - a[14] + + 0xFFFF * 7'q0 // + p[14] * q0[0] + + 5'q0 // + q0[2] + + 0xFFFF * q0'2 // + p[5] * q0[9] + + 0xFFFF * q0'3 // + p[4] * q0[10] + + 0xFFFF * q0'4 // + p[3] * q0[11] + + 0xFFFF * q0'5 // + p[2] * q0[12] + + 0xFFFF * q0'6 // + p[1] * q0[13] + + 0xFFFF * q0'7; // + p[0] * q0[14] + +eq_secp256r1_dbl_chunks[15] = 2 * s'8 * 7'y1 // 2[0] * s[15] * y1[0] + + 2 * s'7 * 6'y1 // + 2[0] * s[14] * y1[1] + + 2 * s'6 * 5'y1 // + 2[0] * s[13] * y1[2] + + 2 * s'5 * 4'y1 // + 2[0] * s[12] * y1[3] + + 2 * s'4 * 3'y1 // + 2[0] * s[11] * y1[4] + + 2 * s'3 * 2'y1 // + 2[0] * s[10] * y1[5] + + 2 * s'2 * 'y1 // + 2[0] * s[9] * y1[6] + + 2 * s' * y1 // + 2[0] * s[8] * y1[7] + + 2 * s * y1' // + 2[0] * s[7] * y1[8] + + 2 * 's * y1'2 // + 2[0] * s[6] * y1[9] + + 2 * 2's * y1'3 // + 2[0] * s[5] * y1[10] + + 2 * 3's * y1'4 // + 2[0] * s[4] * y1[11] + + 2 * 4's * y1'5 // + 2[0] * s[3] * y1[12] + + 2 * 5's * y1'6 // + 2[0] * s[2] * y1[13] + + 2 * 6's * y1'7 // + 2[0] * s[1] * y1[14] + + 2 * 7's * y1'8 // + 2[0] * s[0] * y1[15] + - 3 * x1'8 * 7'x1 // - 3[0] * x1[15] * x1[0] + - 3 * x1'7 * 6'x1 // - 3[0] * x1[14] * x1[1] + - 3 * x1'6 * 5'x1 // - 3[0] * x1[13] * x1[2] + - 3 * x1'5 * 4'x1 // - 3[0] * x1[12] * x1[3] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[11] * x1[4] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[10] * x1[5] + - 3 * x1'2 * 'x1 // - 3[0] * x1[9] * x1[6] + - 3 * x1' * x1 // - 3[0] * x1[8] * x1[7] + - 3 * x1 * x1' // - 3[0] * x1[7] * x1[8] + - 3 * 'x1 * x1'2 // - 3[0] * x1[6] * x1[9] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[5] * x1[10] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[4] * x1[11] + - 3 * 4'x1 * x1'5 // - 3[0] * x1[3] * x1[12] + - 3 * 5'x1 * x1'6 // - 3[0] * x1[2] * x1[13] + - 3 * 6'x1 * x1'7 // - 3[0] * x1[1] * x1[14] + - 3 * 7'x1 * x1'8 // - 3[0] * x1[0] * x1[15] + - 0xFFFF // - a[15] + + 0xFFFF * 7'q0 // + p[15] * q0[0] + + 0xFFFF * 6'q0 // + p[14] * q0[1] + + 4'q0 // + q0[3] + + 0xFFFF * q0'3 // + p[5] * q0[10] + + 0xFFFF * q0'4 // + p[4] * q0[11] + + 0xFFFF * q0'5 // + p[3] * q0[12] + + 0xFFFF * q0'6 // + p[2] * q0[13] + + 0xFFFF * q0'7 // + p[1] * q0[14] + + 0xFFFF * q0'8; // + p[0] * q0[15] + +// clock #8 + +eq_secp256r1_dbl_chunks[16] = 2 * s'7 * 7'y1 // 2[0] * s[15] * y1[1] + + 2 * s'6 * 6'y1 // + 2[0] * s[14] * y1[2] + + 2 * s'5 * 5'y1 // + 2[0] * s[13] * y1[3] + + 2 * s'4 * 4'y1 // + 2[0] * s[12] * y1[4] + + 2 * s'3 * 3'y1 // + 2[0] * s[11] * y1[5] + + 2 * s'2 * 2'y1 // + 2[0] * s[10] * y1[6] + + 2 * s' * 'y1 // + 2[0] * s[9] * y1[7] + + 2 * s * y1 // + 2[0] * s[8] * y1[8] + + 2 * 's * y1' // + 2[0] * s[7] * y1[9] + + 2 * 2's * y1'2 // + 2[0] * s[6] * y1[10] + + 2 * 3's * y1'3 // + 2[0] * s[5] * y1[11] + + 2 * 4's * y1'4 // + 2[0] * s[4] * y1[12] + + 2 * 5's * y1'5 // + 2[0] * s[3] * y1[13] + + 2 * 6's * y1'6 // + 2[0] * s[2] * y1[14] + + 2 * 7's * y1'7 // + 2[0] * s[1] * y1[15] + - 3 * x1'7 * 7'x1 // - 3[0] * x1[15] * x1[1] + - 3 * x1'6 * 6'x1 // - 3[0] * x1[14] * x1[2] + - 3 * x1'5 * 5'x1 // - 3[0] * x1[13] * x1[3] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[12] * x1[4] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[11] * x1[5] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[10] * x1[6] + - 3 * x1' * 'x1 // - 3[0] * x1[9] * x1[7] + - 3 * x1 * x1 // - 3[0] * x1[8] * x1[8] + - 3 * 'x1 * x1' // - 3[0] * x1[7] * x1[9] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[6] * x1[10] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[5] * x1[11] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[4] * x1[12] + - 3 * 5'x1 * x1'5 // - 3[0] * x1[3] * x1[13] + - 3 * 6'x1 * x1'6 // - 3[0] * x1[2] * x1[14] + - 3 * 7'x1 * x1'7 // - 3[0] * x1[1] * x1[15] + + 0xFFFF * 7'q0 // + p[15] * q0[1] + + 0xFFFF * 6'q0 // + p[14] * q0[2] + + 4'q0 // + q0[4] + + 0xFFFF * q0'3 // + p[5] * q0[11] + + 0xFFFF * q0'4 // + p[4] * q0[12] + + 0xFFFF * q0'5 // + p[3] * q0[13] + + 0xFFFF * q0'6 // + p[2] * q0[14] + + 0xFFFF * q0'7 // + p[1] * q0[15] + - 0xFFFC; // - (p*offset)[16] + +eq_secp256r1_dbl_chunks[17] = 2 * s'7 * 6'y1 // 2[0] * s[15] * y1[2] + + 2 * s'6 * 5'y1 // + 2[0] * s[14] * y1[3] + + 2 * s'5 * 4'y1 // + 2[0] * s[13] * y1[4] + + 2 * s'4 * 3'y1 // + 2[0] * s[12] * y1[5] + + 2 * s'3 * 2'y1 // + 2[0] * s[11] * y1[6] + + 2 * s'2 * 'y1 // + 2[0] * s[10] * y1[7] + + 2 * s' * y1 // + 2[0] * s[9] * y1[8] + + 2 * s * y1' // + 2[0] * s[8] * y1[9] + + 2 * 's * y1'2 // + 2[0] * s[7] * y1[10] + + 2 * 2's * y1'3 // + 2[0] * s[6] * y1[11] + + 2 * 3's * y1'4 // + 2[0] * s[5] * y1[12] + + 2 * 4's * y1'5 // + 2[0] * s[4] * y1[13] + + 2 * 5's * y1'6 // + 2[0] * s[3] * y1[14] + + 2 * 6's * y1'7 // + 2[0] * s[2] * y1[15] + - 3 * x1'7 * 6'x1 // - 3[0] * x1[15] * x1[2] + - 3 * x1'6 * 5'x1 // - 3[0] * x1[14] * x1[3] + - 3 * x1'5 * 4'x1 // - 3[0] * x1[13] * x1[4] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[12] * x1[5] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[11] * x1[6] + - 3 * x1'2 * 'x1 // - 3[0] * x1[10] * x1[7] + - 3 * x1' * x1 // - 3[0] * x1[9] * x1[8] + - 3 * x1 * x1' // - 3[0] * x1[8] * x1[9] + - 3 * 'x1 * x1'2 // - 3[0] * x1[7] * x1[10] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[6] * x1[11] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[5] * x1[12] + - 3 * 4'x1 * x1'5 // - 3[0] * x1[4] * x1[13] + - 3 * 5'x1 * x1'6 // - 3[0] * x1[3] * x1[14] + - 3 * 6'x1 * x1'7 // - 3[0] * x1[2] * x1[15] + + 0xFFFF * 6'q0 // + p[15] * q0[2] + + 0xFFFF * 5'q0 // + p[14] * q0[3] + + 3'q0 // + q0[5] + + 0xFFFF * q0'4 // + p[5] * q0[12] + + 0xFFFF * q0'5 // + p[4] * q0[13] + + 0xFFFF * q0'6 // + p[3] * q0[14] + + 0xFFFF * q0'7 // + p[2] * q0[15] + - 0xFFFF; // - (p*offset)[17] + +// clock #9 + +eq_secp256r1_dbl_chunks[18] = 2 * s'6 * 6'y1 // 2[0] * s[15] * y1[3] + + 2 * s'5 * 5'y1 // + 2[0] * s[14] * y1[4] + + 2 * s'4 * 4'y1 // + 2[0] * s[13] * y1[5] + + 2 * s'3 * 3'y1 // + 2[0] * s[12] * y1[6] + + 2 * s'2 * 2'y1 // + 2[0] * s[11] * y1[7] + + 2 * s' * 'y1 // + 2[0] * s[10] * y1[8] + + 2 * s * y1 // + 2[0] * s[9] * y1[9] + + 2 * 's * y1' // + 2[0] * s[8] * y1[10] + + 2 * 2's * y1'2 // + 2[0] * s[7] * y1[11] + + 2 * 3's * y1'3 // + 2[0] * s[6] * y1[12] + + 2 * 4's * y1'4 // + 2[0] * s[5] * y1[13] + + 2 * 5's * y1'5 // + 2[0] * s[4] * y1[14] + + 2 * 6's * y1'6 // + 2[0] * s[3] * y1[15] + - 3 * x1'6 * 6'x1 // - 3[0] * x1[15] * x1[3] + - 3 * x1'5 * 5'x1 // - 3[0] * x1[14] * x1[4] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[13] * x1[5] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[12] * x1[6] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[11] * x1[7] + - 3 * x1' * 'x1 // - 3[0] * x1[10] * x1[8] + - 3 * x1 * x1 // - 3[0] * x1[9] * x1[9] + - 3 * 'x1 * x1' // - 3[0] * x1[8] * x1[10] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[7] * x1[11] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[6] * x1[12] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[5] * x1[13] + - 3 * 5'x1 * x1'5 // - 3[0] * x1[4] * x1[14] + - 3 * 6'x1 * x1'6 // - 3[0] * x1[3] * x1[15] + + 0xFFFF * 6'q0 // + p[15] * q0[3] + + 0xFFFF * 5'q0 // + p[14] * q0[4] + + 3'q0 // + q0[6] + + 0xFFFF * q0'4 // + p[5] * q0[13] + + 0xFFFF * q0'5 // + p[4] * q0[14] + + 0xFFFF * q0'6 // + p[3] * q0[15] + - 0xFFFF; // - (p*offset)[18] + +eq_secp256r1_dbl_chunks[19] = 2 * s'6 * 5'y1 // 2[0] * s[15] * y1[4] + + 2 * s'5 * 4'y1 // + 2[0] * s[14] * y1[5] + + 2 * s'4 * 3'y1 // + 2[0] * s[13] * y1[6] + + 2 * s'3 * 2'y1 // + 2[0] * s[12] * y1[7] + + 2 * s'2 * 'y1 // + 2[0] * s[11] * y1[8] + + 2 * s' * y1 // + 2[0] * s[10] * y1[9] + + 2 * s * y1' // + 2[0] * s[9] * y1[10] + + 2 * 's * y1'2 // + 2[0] * s[8] * y1[11] + + 2 * 2's * y1'3 // + 2[0] * s[7] * y1[12] + + 2 * 3's * y1'4 // + 2[0] * s[6] * y1[13] + + 2 * 4's * y1'5 // + 2[0] * s[5] * y1[14] + + 2 * 5's * y1'6 // + 2[0] * s[4] * y1[15] + - 3 * x1'6 * 5'x1 // - 3[0] * x1[15] * x1[4] + - 3 * x1'5 * 4'x1 // - 3[0] * x1[14] * x1[5] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[13] * x1[6] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[12] * x1[7] + - 3 * x1'2 * 'x1 // - 3[0] * x1[11] * x1[8] + - 3 * x1' * x1 // - 3[0] * x1[10] * x1[9] + - 3 * x1 * x1' // - 3[0] * x1[9] * x1[10] + - 3 * 'x1 * x1'2 // - 3[0] * x1[8] * x1[11] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[7] * x1[12] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[6] * x1[13] + - 3 * 4'x1 * x1'5 // - 3[0] * x1[5] * x1[14] + - 3 * 5'x1 * x1'6 // - 3[0] * x1[4] * x1[15] + + 0xFFFF * 5'q0 // + p[15] * q0[4] + + 0xFFFF * 4'q0 // + p[14] * q0[5] + + 2'q0 // + q0[7] + + 0xFFFF * q0'5 // + p[5] * q0[14] + + 0xFFFF * q0'6 // + p[4] * q0[15] + - 0xFFFF; // - (p*offset)[19] + +// clock #10 + +eq_secp256r1_dbl_chunks[20] = 2 * s'5 * 5'y1 // 2[0] * s[15] * y1[5] + + 2 * s'4 * 4'y1 // + 2[0] * s[14] * y1[6] + + 2 * s'3 * 3'y1 // + 2[0] * s[13] * y1[7] + + 2 * s'2 * 2'y1 // + 2[0] * s[12] * y1[8] + + 2 * s' * 'y1 // + 2[0] * s[11] * y1[9] + + 2 * s * y1 // + 2[0] * s[10] * y1[10] + + 2 * 's * y1' // + 2[0] * s[9] * y1[11] + + 2 * 2's * y1'2 // + 2[0] * s[8] * y1[12] + + 2 * 3's * y1'3 // + 2[0] * s[7] * y1[13] + + 2 * 4's * y1'4 // + 2[0] * s[6] * y1[14] + + 2 * 5's * y1'5 // + 2[0] * s[5] * y1[15] + - 3 * x1'5 * 5'x1 // - 3[0] * x1[15] * x1[5] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[14] * x1[6] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[13] * x1[7] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[12] * x1[8] + - 3 * x1' * 'x1 // - 3[0] * x1[11] * x1[9] + - 3 * x1 * x1 // - 3[0] * x1[10] * x1[10] + - 3 * 'x1 * x1' // - 3[0] * x1[9] * x1[11] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[8] * x1[12] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[7] * x1[13] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[6] * x1[14] + - 3 * 5'x1 * x1'5 // - 3[0] * x1[5] * x1[15] + + 0xFFFF * 5'q0 // + p[15] * q0[5] + + 0xFFFF * 4'q0 // + p[14] * q0[6] + + 2'q0 // + q0[8] + + 0xFFFF * q0'5 // + p[5] * q0[15] + - 0xFFFF; // - (p*offset)[20] + +eq_secp256r1_dbl_chunks[21] = 2 * s'5 * 4'y1 // 2[0] * s[15] * y1[6] + + 2 * s'4 * 3'y1 // + 2[0] * s[14] * y1[7] + + 2 * s'3 * 2'y1 // + 2[0] * s[13] * y1[8] + + 2 * s'2 * 'y1 // + 2[0] * s[12] * y1[9] + + 2 * s' * y1 // + 2[0] * s[11] * y1[10] + + 2 * s * y1' // + 2[0] * s[10] * y1[11] + + 2 * 's * y1'2 // + 2[0] * s[9] * y1[12] + + 2 * 2's * y1'3 // + 2[0] * s[8] * y1[13] + + 2 * 3's * y1'4 // + 2[0] * s[7] * y1[14] + + 2 * 4's * y1'5 // + 2[0] * s[6] * y1[15] + - 3 * x1'5 * 4'x1 // - 3[0] * x1[15] * x1[6] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[14] * x1[7] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[13] * x1[8] + - 3 * x1'2 * 'x1 // - 3[0] * x1[12] * x1[9] + - 3 * x1' * x1 // - 3[0] * x1[11] * x1[10] + - 3 * x1 * x1' // - 3[0] * x1[10] * x1[11] + - 3 * 'x1 * x1'2 // - 3[0] * x1[9] * x1[12] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[8] * x1[13] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[7] * x1[14] + - 3 * 4'x1 * x1'5 // - 3[0] * x1[6] * x1[15] + + 0xFFFF * 4'q0 // + p[15] * q0[6] + + 0xFFFF * 3'q0 // + p[14] * q0[7] + + 'q0 // + q0[9] + - 0xFFFF; // - (p*offset)[21] + +// clock #11 + +eq_secp256r1_dbl_chunks[22] = 2 * s'4 * 4'y1 // 2[0] * s[15] * y1[7] + + 2 * s'3 * 3'y1 // + 2[0] * s[14] * y1[8] + + 2 * s'2 * 2'y1 // + 2[0] * s[13] * y1[9] + + 2 * s' * 'y1 // + 2[0] * s[12] * y1[10] + + 2 * s * y1 // + 2[0] * s[11] * y1[11] + + 2 * 's * y1' // + 2[0] * s[10] * y1[12] + + 2 * 2's * y1'2 // + 2[0] * s[9] * y1[13] + + 2 * 3's * y1'3 // + 2[0] * s[8] * y1[14] + + 2 * 4's * y1'4 // + 2[0] * s[7] * y1[15] + - 3 * x1'4 * 4'x1 // - 3[0] * x1[15] * x1[7] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[14] * x1[8] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[13] * x1[9] + - 3 * x1' * 'x1 // - 3[0] * x1[12] * x1[10] + - 3 * x1 * x1 // - 3[0] * x1[11] * x1[11] + - 3 * 'x1 * x1' // - 3[0] * x1[10] * x1[12] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[9] * x1[13] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[8] * x1[14] + - 3 * 4'x1 * x1'4 // - 3[0] * x1[7] * x1[15] + + 0xFFFF * 4'q0 // + p[15] * q0[7] + + 0xFFFF * 3'q0 // + p[14] * q0[8] + + 'q0 // + q0[10] + - 0x3; // - (p*offset)[22] + +eq_secp256r1_dbl_chunks[23] = 2 * s'4 * 3'y1 // 2[0] * s[15] * y1[8] + + 2 * s'3 * 2'y1 // + 2[0] * s[14] * y1[9] + + 2 * s'2 * 'y1 // + 2[0] * s[13] * y1[10] + + 2 * s' * y1 // + 2[0] * s[12] * y1[11] + + 2 * s * y1' // + 2[0] * s[11] * y1[12] + + 2 * 's * y1'2 // + 2[0] * s[10] * y1[13] + + 2 * 2's * y1'3 // + 2[0] * s[9] * y1[14] + + 2 * 3's * y1'4 // + 2[0] * s[8] * y1[15] + - 3 * x1'4 * 3'x1 // - 3[0] * x1[15] * x1[8] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[14] * x1[9] + - 3 * x1'2 * 'x1 // - 3[0] * x1[13] * x1[10] + - 3 * x1' * x1 // - 3[0] * x1[12] * x1[11] + - 3 * x1 * x1' // - 3[0] * x1[11] * x1[12] + - 3 * 'x1 * x1'2 // - 3[0] * x1[10] * x1[13] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[9] * x1[14] + - 3 * 3'x1 * x1'4 // - 3[0] * x1[8] * x1[15] + + 0xFFFF * 3'q0 // + p[15] * q0[8] + + 0xFFFF * 2'q0 // + p[14] * q0[9] + + q0; // + q0[11] + +// clock #12 + +eq_secp256r1_dbl_chunks[24] = 2 * s'3 * 3'y1 // 2[0] * s[15] * y1[9] + + 2 * s'2 * 2'y1 // + 2[0] * s[14] * y1[10] + + 2 * s' * 'y1 // + 2[0] * s[13] * y1[11] + + 2 * s * y1 // + 2[0] * s[12] * y1[12] + + 2 * 's * y1' // + 2[0] * s[11] * y1[13] + + 2 * 2's * y1'2 // + 2[0] * s[10] * y1[14] + + 2 * 3's * y1'3 // + 2[0] * s[9] * y1[15] + - 3 * x1'3 * 3'x1 // - 3[0] * x1[15] * x1[9] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[14] * x1[10] + - 3 * x1' * 'x1 // - 3[0] * x1[13] * x1[11] + - 3 * x1 * x1 // - 3[0] * x1[12] * x1[12] + - 3 * 'x1 * x1' // - 3[0] * x1[11] * x1[13] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[10] * x1[14] + - 3 * 3'x1 * x1'3 // - 3[0] * x1[9] * x1[15] + + 0xFFFF * 3'q0 // + p[15] * q0[9] + + 0xFFFF * 2'q0 // + p[14] * q0[10] + + q0; // + q0[12] + +eq_secp256r1_dbl_chunks[25] = 2 * s'3 * 2'y1 // 2[0] * s[15] * y1[10] + + 2 * s'2 * 'y1 // + 2[0] * s[14] * y1[11] + + 2 * s' * y1 // + 2[0] * s[13] * y1[12] + + 2 * s * y1' // + 2[0] * s[12] * y1[13] + + 2 * 's * y1'2 // + 2[0] * s[11] * y1[14] + + 2 * 2's * y1'3 // + 2[0] * s[10] * y1[15] + - 3 * x1'3 * 2'x1 // - 3[0] * x1[15] * x1[10] + - 3 * x1'2 * 'x1 // - 3[0] * x1[14] * x1[11] + - 3 * x1' * x1 // - 3[0] * x1[13] * x1[12] + - 3 * x1 * x1' // - 3[0] * x1[12] * x1[13] + - 3 * 'x1 * x1'2 // - 3[0] * x1[11] * x1[14] + - 3 * 2'x1 * x1'3 // - 3[0] * x1[10] * x1[15] + + 0xFFFF * 2'q0 // + p[15] * q0[10] + + 0xFFFF * 'q0 // + p[14] * q0[11] + + q0'; // + q0[13] + +// clock #13 + +eq_secp256r1_dbl_chunks[26] = 2 * s'2 * 2'y1 // 2[0] * s[15] * y1[11] + + 2 * s' * 'y1 // + 2[0] * s[14] * y1[12] + + 2 * s * y1 // + 2[0] * s[13] * y1[13] + + 2 * 's * y1' // + 2[0] * s[12] * y1[14] + + 2 * 2's * y1'2 // + 2[0] * s[11] * y1[15] + - 3 * x1'2 * 2'x1 // - 3[0] * x1[15] * x1[11] + - 3 * x1' * 'x1 // - 3[0] * x1[14] * x1[12] + - 3 * x1 * x1 // - 3[0] * x1[13] * x1[13] + - 3 * 'x1 * x1' // - 3[0] * x1[12] * x1[14] + - 3 * 2'x1 * x1'2 // - 3[0] * x1[11] * x1[15] + + 0xFFFF * 2'q0 // + p[15] * q0[11] + + 0xFFFF * 'q0 // + p[14] * q0[12] + + q0'; // + q0[14] + +eq_secp256r1_dbl_chunks[27] = 2 * s'2 * 'y1 // 2[0] * s[15] * y1[12] + + 2 * s' * y1 // + 2[0] * s[14] * y1[13] + + 2 * s * y1' // + 2[0] * s[13] * y1[14] + + 2 * 's * y1'2 // + 2[0] * s[12] * y1[15] + - 3 * x1'2 * 'x1 // - 3[0] * x1[15] * x1[12] + - 3 * x1' * x1 // - 3[0] * x1[14] * x1[13] + - 3 * x1 * x1' // - 3[0] * x1[13] * x1[14] + - 3 * 'x1 * x1'2 // - 3[0] * x1[12] * x1[15] + + 0xFFFF * 'q0 // + p[15] * q0[12] + + 0xFFFF * q0 // + p[14] * q0[13] + + q0'2; // + q0[15] + +// clock #14 + +eq_secp256r1_dbl_chunks[28] = 2 * s' * 'y1 // 2[0] * s[15] * y1[13] + + 2 * s * y1 // + 2[0] * s[14] * y1[14] + + 2 * 's * y1' // + 2[0] * s[13] * y1[15] + - 3 * x1' * 'x1 // - 3[0] * x1[15] * x1[13] + - 3 * x1 * x1 // - 3[0] * x1[14] * x1[14] + - 3 * 'x1 * x1' // - 3[0] * x1[13] * x1[15] + + 0xFFFF * 'q0 // + p[15] * q0[13] + + 0xFFFF * q0 // + p[14] * q0[14] + - 0x4; // - (p*offset)[28] + +eq_secp256r1_dbl_chunks[29] = 2 * s' * y1 // 2[0] * s[15] * y1[14] + + 2 * s * y1' // + 2[0] * s[14] * y1[15] + - 3 * x1' * x1 // - 3[0] * x1[15] * x1[14] + - 3 * x1 * x1' // - 3[0] * x1[14] * x1[15] + + 0xFFFF * q0 // + p[15] * q0[14] + + 0xFFFF * q0'; // + p[14] * q0[15] + +// clock #15 + +eq_secp256r1_dbl_chunks[30] = 2 * s * y1 // 2[0] * s[15] * y1[15] + - 3 * x1 * x1 // - 3[0] * x1[15] * x1[15] + + 0xFFFF * q0 // + p[15] * q0[15] + - 0xFFFC; // - (p*offset)[30] + +eq_secp256r1_dbl_chunks[31] = - 0x3FFFF; // - (p*offset)[31] + diff --git a/precompiles/arith_eq/pil/equations/secp256r1_x3.pil b/precompiles/arith_eq/pil/equations/secp256r1_x3.pil new file mode 100644 index 000000000..8a5baf6ad --- /dev/null +++ b/precompiles/arith_eq/pil/equations/secp256r1_x3.pil @@ -0,0 +1,537 @@ +// code generated +// +// equation: s*s-x1-x2-x3-p*q1+p*offset +// +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x4 +// (p*offset): 0x3FFFFFFFC00000004000000000000000000000003FFFFFFFFFFFFFFFFFFFFFFFC +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + + +const expr eq_secp256r1_x3_chunks[31]; + +// clock #0 + +eq_secp256r1_x3_chunks[ 0] = s * s // s[0] * s[0] + - x1 // - x1[0] + - x2 // - x2[0] + - x3 // - x3[0] + - 0xFFFF * q1 // - p[0] * q1[0] + + 0xFFFC; // + (p*offset)[0] + +eq_secp256r1_x3_chunks[ 1] = s' * s // s[1] * s[0] + + s * s' // + s[0] * s[1] + - x1' // - x1[1] + - x2' // - x2[1] + - x3' // - x3[1] + - 0xFFFF * q1 // - p[1] * q1[0] + - 0xFFFF * q1' // - p[0] * q1[1] + + 0xFFFF; // + (p*offset)[1] + +// clock #1 + +eq_secp256r1_x3_chunks[ 2] = s' * 's // s[2] * s[0] + + s * s // + s[1] * s[1] + + 's * s' // + s[0] * s[2] + - x1' // - x1[2] + - x2' // - x2[2] + - x3' // - x3[2] + - 0xFFFF * 'q1 // - p[2] * q1[0] + - 0xFFFF * q1 // - p[1] * q1[1] + - 0xFFFF * q1' // - p[0] * q1[2] + + 0xFFFF; // + (p*offset)[2] + +eq_secp256r1_x3_chunks[ 3] = s'2 * 's // s[3] * s[0] + + s' * s // + s[2] * s[1] + + s * s' // + s[1] * s[2] + + 's * s'2 // + s[0] * s[3] + - x1'2 // - x1[3] + - x2'2 // - x2[3] + - x3'2 // - x3[3] + - 0xFFFF * 'q1 // - p[3] * q1[0] + - 0xFFFF * q1 // - p[2] * q1[1] + - 0xFFFF * q1' // - p[1] * q1[2] + - 0xFFFF * q1'2 // - p[0] * q1[3] + + 0xFFFF; // + (p*offset)[3] + +// clock #2 + +eq_secp256r1_x3_chunks[ 4] = s'2 * 2's // s[4] * s[0] + + s' * 's // + s[3] * s[1] + + s * s // + s[2] * s[2] + + 's * s' // + s[1] * s[3] + + 2's * s'2 // + s[0] * s[4] + - x1'2 // - x1[4] + - x2'2 // - x2[4] + - x3'2 // - x3[4] + - 0xFFFF * 2'q1 // - p[4] * q1[0] + - 0xFFFF * 'q1 // - p[3] * q1[1] + - 0xFFFF * q1 // - p[2] * q1[2] + - 0xFFFF * q1' // - p[1] * q1[3] + - 0xFFFF * q1'2 // - p[0] * q1[4] + + 0xFFFF; // + (p*offset)[4] + +eq_secp256r1_x3_chunks[ 5] = s'3 * 2's // s[5] * s[0] + + s'2 * 's // + s[4] * s[1] + + s' * s // + s[3] * s[2] + + s * s' // + s[2] * s[3] + + 's * s'2 // + s[1] * s[4] + + 2's * s'3 // + s[0] * s[5] + - x1'3 // - x1[5] + - x2'3 // - x2[5] + - x3'3 // - x3[5] + - 0xFFFF * 2'q1 // - p[5] * q1[0] + - 0xFFFF * 'q1 // - p[4] * q1[1] + - 0xFFFF * q1 // - p[3] * q1[2] + - 0xFFFF * q1' // - p[2] * q1[3] + - 0xFFFF * q1'2 // - p[1] * q1[4] + - 0xFFFF * q1'3 // - p[0] * q1[5] + + 0xFFFF; // + (p*offset)[5] + +// clock #3 + +eq_secp256r1_x3_chunks[ 6] = s'3 * 3's // s[6] * s[0] + + s'2 * 2's // + s[5] * s[1] + + s' * 's // + s[4] * s[2] + + s * s // + s[3] * s[3] + + 's * s' // + s[2] * s[4] + + 2's * s'2 // + s[1] * s[5] + + 3's * s'3 // + s[0] * s[6] + - x1'3 // - x1[6] + - x2'3 // - x2[6] + - x3'3 // - x3[6] + - 0xFFFF * 2'q1 // - p[5] * q1[1] + - 0xFFFF * 'q1 // - p[4] * q1[2] + - 0xFFFF * q1 // - p[3] * q1[3] + - 0xFFFF * q1' // - p[2] * q1[4] + - 0xFFFF * q1'2 // - p[1] * q1[5] + - 0xFFFF * q1'3 // - p[0] * q1[6] + + 0x3; // + (p*offset)[6] + +eq_secp256r1_x3_chunks[ 7] = s'4 * 3's // s[7] * s[0] + + s'3 * 2's // + s[6] * s[1] + + s'2 * 's // + s[5] * s[2] + + s' * s // + s[4] * s[3] + + s * s' // + s[3] * s[4] + + 's * s'2 // + s[2] * s[5] + + 2's * s'3 // + s[1] * s[6] + + 3's * s'4 // + s[0] * s[7] + - x1'4 // - x1[7] + - x2'4 // - x2[7] + - x3'4 // - x3[7] + - 0xFFFF * 'q1 // - p[5] * q1[2] + - 0xFFFF * q1 // - p[4] * q1[3] + - 0xFFFF * q1' // - p[3] * q1[4] + - 0xFFFF * q1'2 // - p[2] * q1[5] + - 0xFFFF * q1'3 // - p[1] * q1[6] + - 0xFFFF * q1'4; // - p[0] * q1[7] + +// clock #4 + +eq_secp256r1_x3_chunks[ 8] = s'4 * 4's // s[8] * s[0] + + s'3 * 3's // + s[7] * s[1] + + s'2 * 2's // + s[6] * s[2] + + s' * 's // + s[5] * s[3] + + s * s // + s[4] * s[4] + + 's * s' // + s[3] * s[5] + + 2's * s'2 // + s[2] * s[6] + + 3's * s'3 // + s[1] * s[7] + + 4's * s'4 // + s[0] * s[8] + - x1'4 // - x1[8] + - x2'4 // - x2[8] + - x3'4 // - x3[8] + - 0xFFFF * 'q1 // - p[5] * q1[3] + - 0xFFFF * q1 // - p[4] * q1[4] + - 0xFFFF * q1' // - p[3] * q1[5] + - 0xFFFF * q1'2 // - p[2] * q1[6] + - 0xFFFF * q1'3 // - p[1] * q1[7] + - 0xFFFF * q1'4; // - p[0] * q1[8] + +eq_secp256r1_x3_chunks[ 9] = s'5 * 4's // s[9] * s[0] + + s'4 * 3's // + s[8] * s[1] + + s'3 * 2's // + s[7] * s[2] + + s'2 * 's // + s[6] * s[3] + + s' * s // + s[5] * s[4] + + s * s' // + s[4] * s[5] + + 's * s'2 // + s[3] * s[6] + + 2's * s'3 // + s[2] * s[7] + + 3's * s'4 // + s[1] * s[8] + + 4's * s'5 // + s[0] * s[9] + - x1'5 // - x1[9] + - x2'5 // - x2[9] + - x3'5 // - x3[9] + - 0xFFFF * q1 // - p[5] * q1[4] + - 0xFFFF * q1' // - p[4] * q1[5] + - 0xFFFF * q1'2 // - p[3] * q1[6] + - 0xFFFF * q1'3 // - p[2] * q1[7] + - 0xFFFF * q1'4 // - p[1] * q1[8] + - 0xFFFF * q1'5; // - p[0] * q1[9] + +// clock #5 + +eq_secp256r1_x3_chunks[10] = s'5 * 5's // s[10] * s[0] + + s'4 * 4's // + s[9] * s[1] + + s'3 * 3's // + s[8] * s[2] + + s'2 * 2's // + s[7] * s[3] + + s' * 's // + s[6] * s[4] + + s * s // + s[5] * s[5] + + 's * s' // + s[4] * s[6] + + 2's * s'2 // + s[3] * s[7] + + 3's * s'3 // + s[2] * s[8] + + 4's * s'4 // + s[1] * s[9] + + 5's * s'5 // + s[0] * s[10] + - x1'5 // - x1[10] + - x2'5 // - x2[10] + - x3'5 // - x3[10] + - 0xFFFF * q1 // - p[5] * q1[5] + - 0xFFFF * q1' // - p[4] * q1[6] + - 0xFFFF * q1'2 // - p[3] * q1[7] + - 0xFFFF * q1'3 // - p[2] * q1[8] + - 0xFFFF * q1'4 // - p[1] * q1[9] + - 0xFFFF * q1'5; // - p[0] * q1[10] + +eq_secp256r1_x3_chunks[11] = s'6 * 5's // s[11] * s[0] + + s'5 * 4's // + s[10] * s[1] + + s'4 * 3's // + s[9] * s[2] + + s'3 * 2's // + s[8] * s[3] + + s'2 * 's // + s[7] * s[4] + + s' * s // + s[6] * s[5] + + s * s' // + s[5] * s[6] + + 's * s'2 // + s[4] * s[7] + + 2's * s'3 // + s[3] * s[8] + + 3's * s'4 // + s[2] * s[9] + + 4's * s'5 // + s[1] * s[10] + + 5's * s'6 // + s[0] * s[11] + - x1'6 // - x1[11] + - x2'6 // - x2[11] + - x3'6 // - x3[11] + - 0xFFFF * q1' // - p[5] * q1[6] + - 0xFFFF * q1'2 // - p[4] * q1[7] + - 0xFFFF * q1'3 // - p[3] * q1[8] + - 0xFFFF * q1'4 // - p[2] * q1[9] + - 0xFFFF * q1'5 // - p[1] * q1[10] + - 0xFFFF * q1'6; // - p[0] * q1[11] + +// clock #6 + +eq_secp256r1_x3_chunks[12] = s'6 * 6's // s[12] * s[0] + + s'5 * 5's // + s[11] * s[1] + + s'4 * 4's // + s[10] * s[2] + + s'3 * 3's // + s[9] * s[3] + + s'2 * 2's // + s[8] * s[4] + + s' * 's // + s[7] * s[5] + + s * s // + s[6] * s[6] + + 's * s' // + s[5] * s[7] + + 2's * s'2 // + s[4] * s[8] + + 3's * s'3 // + s[3] * s[9] + + 4's * s'4 // + s[2] * s[10] + + 5's * s'5 // + s[1] * s[11] + + 6's * s'6 // + s[0] * s[12] + - x1'6 // - x1[12] + - x2'6 // - x2[12] + - x3'6 // - x3[12] + - 6'q1 // - q1[0] + - 0xFFFF * q1' // - p[5] * q1[7] + - 0xFFFF * q1'2 // - p[4] * q1[8] + - 0xFFFF * q1'3 // - p[3] * q1[9] + - 0xFFFF * q1'4 // - p[2] * q1[10] + - 0xFFFF * q1'5 // - p[1] * q1[11] + - 0xFFFF * q1'6 // - p[0] * q1[12] + + 0x4; // + (p*offset)[12] + +eq_secp256r1_x3_chunks[13] = s'7 * 6's // s[13] * s[0] + + s'6 * 5's // + s[12] * s[1] + + s'5 * 4's // + s[11] * s[2] + + s'4 * 3's // + s[10] * s[3] + + s'3 * 2's // + s[9] * s[4] + + s'2 * 's // + s[8] * s[5] + + s' * s // + s[7] * s[6] + + s * s' // + s[6] * s[7] + + 's * s'2 // + s[5] * s[8] + + 2's * s'3 // + s[4] * s[9] + + 3's * s'4 // + s[3] * s[10] + + 4's * s'5 // + s[2] * s[11] + + 5's * s'6 // + s[1] * s[12] + + 6's * s'7 // + s[0] * s[13] + - x1'7 // - x1[13] + - x2'7 // - x2[13] + - x3'7 // - x3[13] + - 5'q1 // - q1[1] + - 0xFFFF * q1'2 // - p[5] * q1[8] + - 0xFFFF * q1'3 // - p[4] * q1[9] + - 0xFFFF * q1'4 // - p[3] * q1[10] + - 0xFFFF * q1'5 // - p[2] * q1[11] + - 0xFFFF * q1'6 // - p[1] * q1[12] + - 0xFFFF * q1'7; // - p[0] * q1[13] + +// clock #7 + +eq_secp256r1_x3_chunks[14] = s'7 * 7's // s[14] * s[0] + + s'6 * 6's // + s[13] * s[1] + + s'5 * 5's // + s[12] * s[2] + + s'4 * 4's // + s[11] * s[3] + + s'3 * 3's // + s[10] * s[4] + + s'2 * 2's // + s[9] * s[5] + + s' * 's // + s[8] * s[6] + + s * s // + s[7] * s[7] + + 's * s' // + s[6] * s[8] + + 2's * s'2 // + s[5] * s[9] + + 3's * s'3 // + s[4] * s[10] + + 4's * s'4 // + s[3] * s[11] + + 5's * s'5 // + s[2] * s[12] + + 6's * s'6 // + s[1] * s[13] + + 7's * s'7 // + s[0] * s[14] + - x1'7 // - x1[14] + - x2'7 // - x2[14] + - x3'7 // - x3[14] + - 0xFFFF * 7'q1 // - p[14] * q1[0] + - 5'q1 // - q1[2] + - 0xFFFF * q1'2 // - p[5] * q1[9] + - 0xFFFF * q1'3 // - p[4] * q1[10] + - 0xFFFF * q1'4 // - p[3] * q1[11] + - 0xFFFF * q1'5 // - p[2] * q1[12] + - 0xFFFF * q1'6 // - p[1] * q1[13] + - 0xFFFF * q1'7 // - p[0] * q1[14] + + 0xFFFC; // + (p*offset)[14] + +eq_secp256r1_x3_chunks[15] = s'8 * 7's // s[15] * s[0] + + s'7 * 6's // + s[14] * s[1] + + s'6 * 5's // + s[13] * s[2] + + s'5 * 4's // + s[12] * s[3] + + s'4 * 3's // + s[11] * s[4] + + s'3 * 2's // + s[10] * s[5] + + s'2 * 's // + s[9] * s[6] + + s' * s // + s[8] * s[7] + + s * s' // + s[7] * s[8] + + 's * s'2 // + s[6] * s[9] + + 2's * s'3 // + s[5] * s[10] + + 3's * s'4 // + s[4] * s[11] + + 4's * s'5 // + s[3] * s[12] + + 5's * s'6 // + s[2] * s[13] + + 6's * s'7 // + s[1] * s[14] + + 7's * s'8 // + s[0] * s[15] + - x1'8 // - x1[15] + - x2'8 // - x2[15] + - x3'8 // - x3[15] + - 0xFFFF * 7'q1 // - p[15] * q1[0] + - 0xFFFF * 6'q1 // - p[14] * q1[1] + - 4'q1 // - q1[3] + - 0xFFFF * q1'3 // - p[5] * q1[10] + - 0xFFFF * q1'4 // - p[4] * q1[11] + - 0xFFFF * q1'5 // - p[3] * q1[12] + - 0xFFFF * q1'6 // - p[2] * q1[13] + - 0xFFFF * q1'7 // - p[1] * q1[14] + - 0xFFFF * q1'8 // - p[0] * q1[15] + + 0xFFFF; // + (p*offset)[15] + +// clock #8 + +eq_secp256r1_x3_chunks[16] = s'7 * 7's // s[15] * s[1] + + s'6 * 6's // + s[14] * s[2] + + s'5 * 5's // + s[13] * s[3] + + s'4 * 4's // + s[12] * s[4] + + s'3 * 3's // + s[11] * s[5] + + s'2 * 2's // + s[10] * s[6] + + s' * 's // + s[9] * s[7] + + s * s // + s[8] * s[8] + + 's * s' // + s[7] * s[9] + + 2's * s'2 // + s[6] * s[10] + + 3's * s'3 // + s[5] * s[11] + + 4's * s'4 // + s[4] * s[12] + + 5's * s'5 // + s[3] * s[13] + + 6's * s'6 // + s[2] * s[14] + + 7's * s'7 // + s[1] * s[15] + - 0xFFFF * 7'q1 // - p[15] * q1[1] + - 0xFFFF * 6'q1 // - p[14] * q1[2] + - 4'q1 // - q1[4] + - 0xFFFF * q1'3 // - p[5] * q1[11] + - 0xFFFF * q1'4 // - p[4] * q1[12] + - 0xFFFF * q1'5 // - p[3] * q1[13] + - 0xFFFF * q1'6 // - p[2] * q1[14] + - 0xFFFF * q1'7 // - p[1] * q1[15] + + 0x3; // + (p*offset)[16] + +eq_secp256r1_x3_chunks[17] = s'7 * 6's // s[15] * s[2] + + s'6 * 5's // + s[14] * s[3] + + s'5 * 4's // + s[13] * s[4] + + s'4 * 3's // + s[12] * s[5] + + s'3 * 2's // + s[11] * s[6] + + s'2 * 's // + s[10] * s[7] + + s' * s // + s[9] * s[8] + + s * s' // + s[8] * s[9] + + 's * s'2 // + s[7] * s[10] + + 2's * s'3 // + s[6] * s[11] + + 3's * s'4 // + s[5] * s[12] + + 4's * s'5 // + s[4] * s[13] + + 5's * s'6 // + s[3] * s[14] + + 6's * s'7 // + s[2] * s[15] + - 0xFFFF * 6'q1 // - p[15] * q1[2] + - 0xFFFF * 5'q1 // - p[14] * q1[3] + - 3'q1 // - q1[5] + - 0xFFFF * q1'4 // - p[5] * q1[12] + - 0xFFFF * q1'5 // - p[4] * q1[13] + - 0xFFFF * q1'6 // - p[3] * q1[14] + - 0xFFFF * q1'7; // - p[2] * q1[15] + +// clock #9 + +eq_secp256r1_x3_chunks[18] = s'6 * 6's // s[15] * s[3] + + s'5 * 5's // + s[14] * s[4] + + s'4 * 4's // + s[13] * s[5] + + s'3 * 3's // + s[12] * s[6] + + s'2 * 2's // + s[11] * s[7] + + s' * 's // + s[10] * s[8] + + s * s // + s[9] * s[9] + + 's * s' // + s[8] * s[10] + + 2's * s'2 // + s[7] * s[11] + + 3's * s'3 // + s[6] * s[12] + + 4's * s'4 // + s[5] * s[13] + + 5's * s'5 // + s[4] * s[14] + + 6's * s'6 // + s[3] * s[15] + - 0xFFFF * 6'q1 // - p[15] * q1[3] + - 0xFFFF * 5'q1 // - p[14] * q1[4] + - 3'q1 // - q1[6] + - 0xFFFF * q1'4 // - p[5] * q1[13] + - 0xFFFF * q1'5 // - p[4] * q1[14] + - 0xFFFF * q1'6; // - p[3] * q1[15] + +eq_secp256r1_x3_chunks[19] = s'6 * 5's // s[15] * s[4] + + s'5 * 4's // + s[14] * s[5] + + s'4 * 3's // + s[13] * s[6] + + s'3 * 2's // + s[12] * s[7] + + s'2 * 's // + s[11] * s[8] + + s' * s // + s[10] * s[9] + + s * s' // + s[9] * s[10] + + 's * s'2 // + s[8] * s[11] + + 2's * s'3 // + s[7] * s[12] + + 3's * s'4 // + s[6] * s[13] + + 4's * s'5 // + s[5] * s[14] + + 5's * s'6 // + s[4] * s[15] + - 0xFFFF * 5'q1 // - p[15] * q1[4] + - 0xFFFF * 4'q1 // - p[14] * q1[5] + - 2'q1 // - q1[7] + - 0xFFFF * q1'5 // - p[5] * q1[14] + - 0xFFFF * q1'6; // - p[4] * q1[15] + +// clock #10 + +eq_secp256r1_x3_chunks[20] = s'5 * 5's // s[15] * s[5] + + s'4 * 4's // + s[14] * s[6] + + s'3 * 3's // + s[13] * s[7] + + s'2 * 2's // + s[12] * s[8] + + s' * 's // + s[11] * s[9] + + s * s // + s[10] * s[10] + + 's * s' // + s[9] * s[11] + + 2's * s'2 // + s[8] * s[12] + + 3's * s'3 // + s[7] * s[13] + + 4's * s'4 // + s[6] * s[14] + + 5's * s'5 // + s[5] * s[15] + - 0xFFFF * 5'q1 // - p[15] * q1[5] + - 0xFFFF * 4'q1 // - p[14] * q1[6] + - 2'q1 // - q1[8] + - 0xFFFF * q1'5; // - p[5] * q1[15] + +eq_secp256r1_x3_chunks[21] = s'5 * 4's // s[15] * s[6] + + s'4 * 3's // + s[14] * s[7] + + s'3 * 2's // + s[13] * s[8] + + s'2 * 's // + s[12] * s[9] + + s' * s // + s[11] * s[10] + + s * s' // + s[10] * s[11] + + 's * s'2 // + s[9] * s[12] + + 2's * s'3 // + s[8] * s[13] + + 3's * s'4 // + s[7] * s[14] + + 4's * s'5 // + s[6] * s[15] + - 0xFFFF * 4'q1 // - p[15] * q1[6] + - 0xFFFF * 3'q1 // - p[14] * q1[7] + - 'q1; // - q1[9] + +// clock #11 + +eq_secp256r1_x3_chunks[22] = s'4 * 4's // s[15] * s[7] + + s'3 * 3's // + s[14] * s[8] + + s'2 * 2's // + s[13] * s[9] + + s' * 's // + s[12] * s[10] + + s * s // + s[11] * s[11] + + 's * s' // + s[10] * s[12] + + 2's * s'2 // + s[9] * s[13] + + 3's * s'3 // + s[8] * s[14] + + 4's * s'4 // + s[7] * s[15] + - 0xFFFF * 4'q1 // - p[15] * q1[7] + - 0xFFFF * 3'q1 // - p[14] * q1[8] + - 'q1; // - q1[10] + +eq_secp256r1_x3_chunks[23] = s'4 * 3's // s[15] * s[8] + + s'3 * 2's // + s[14] * s[9] + + s'2 * 's // + s[13] * s[10] + + s' * s // + s[12] * s[11] + + s * s' // + s[11] * s[12] + + 's * s'2 // + s[10] * s[13] + + 2's * s'3 // + s[9] * s[14] + + 3's * s'4 // + s[8] * s[15] + - 0xFFFF * 3'q1 // - p[15] * q1[8] + - 0xFFFF * 2'q1 // - p[14] * q1[9] + - q1; // - q1[11] + +// clock #12 + +eq_secp256r1_x3_chunks[24] = s'3 * 3's // s[15] * s[9] + + s'2 * 2's // + s[14] * s[10] + + s' * 's // + s[13] * s[11] + + s * s // + s[12] * s[12] + + 's * s' // + s[11] * s[13] + + 2's * s'2 // + s[10] * s[14] + + 3's * s'3 // + s[9] * s[15] + - 0xFFFF * 3'q1 // - p[15] * q1[9] + - 0xFFFF * 2'q1 // - p[14] * q1[10] + - q1; // - q1[12] + +eq_secp256r1_x3_chunks[25] = s'3 * 2's // s[15] * s[10] + + s'2 * 's // + s[14] * s[11] + + s' * s // + s[13] * s[12] + + s * s' // + s[12] * s[13] + + 's * s'2 // + s[11] * s[14] + + 2's * s'3 // + s[10] * s[15] + - 0xFFFF * 2'q1 // - p[15] * q1[10] + - 0xFFFF * 'q1 // - p[14] * q1[11] + - q1'; // - q1[13] + +// clock #13 + +eq_secp256r1_x3_chunks[26] = s'2 * 2's // s[15] * s[11] + + s' * 's // + s[14] * s[12] + + s * s // + s[13] * s[13] + + 's * s' // + s[12] * s[14] + + 2's * s'2 // + s[11] * s[15] + - 0xFFFF * 2'q1 // - p[15] * q1[11] + - 0xFFFF * 'q1 // - p[14] * q1[12] + - q1'; // - q1[14] + +eq_secp256r1_x3_chunks[27] = s'2 * 's // s[15] * s[12] + + s' * s // + s[14] * s[13] + + s * s' // + s[13] * s[14] + + 's * s'2 // + s[12] * s[15] + - 0xFFFF * 'q1 // - p[15] * q1[12] + - 0xFFFF * q1 // - p[14] * q1[13] + - q1'2; // - q1[15] + +// clock #14 + +eq_secp256r1_x3_chunks[28] = s' * 's // s[15] * s[13] + + s * s // + s[14] * s[14] + + 's * s' // + s[13] * s[15] + - 0xFFFF * 'q1 // - p[15] * q1[13] + - 0xFFFF * q1; // - p[14] * q1[14] + +eq_secp256r1_x3_chunks[29] = s' * s // s[15] * s[14] + + s * s' // + s[14] * s[15] + - 0xFFFF * q1 // - p[15] * q1[14] + - 0xFFFF * q1'; // - p[14] * q1[15] + +// clock #15 + +eq_secp256r1_x3_chunks[30] = s * s // s[15] * s[15] + - 0xFFFF * q1; // - p[15] * q1[15] + diff --git a/precompiles/arith_eq/pil/equations/secp256r1_y3.pil b/precompiles/arith_eq/pil/equations/secp256r1_y3.pil new file mode 100644 index 000000000..a5e356d34 --- /dev/null +++ b/precompiles/arith_eq/pil/equations/secp256r1_y3.pil @@ -0,0 +1,777 @@ +// code generated +// +// equation: s*x1-s*x3-y1-y3+p*q2-p*offset +// +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x20000000000000000000000000000000000000000000000000000000000000000 +// (p*offset): 0x1FFFFFFFE00000002000000000000000000000001FFFFFFFFFFFFFFFFFFFFFFFE0000000000000000000000000000000000000000000000000000000000000000 +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + + +const expr eq_secp256r1_y3_chunks[32]; + +// clock #0 + +eq_secp256r1_y3_chunks[ 0] = s * x1 // s[0] * x1[0] + - s * x3 // - s[0] * x3[0] + - y1 // - y1[0] + - y3 // - y3[0] + + 0xFFFF * q2; // + p[0] * q2[0] + +eq_secp256r1_y3_chunks[ 1] = s' * x1 // s[1] * x1[0] + + s * x1' // + s[0] * x1[1] + - s' * x3 // - s[1] * x3[0] + - s * x3' // - s[0] * x3[1] + - y1' // - y1[1] + - y3' // - y3[1] + + 0xFFFF * q2 // + p[1] * q2[0] + + 0xFFFF * q2'; // + p[0] * q2[1] + +// clock #1 + +eq_secp256r1_y3_chunks[ 2] = s' * 'x1 // s[2] * x1[0] + + s * x1 // + s[1] * x1[1] + + 's * x1' // + s[0] * x1[2] + - s' * 'x3 // - s[2] * x3[0] + - s * x3 // - s[1] * x3[1] + - 's * x3' // - s[0] * x3[2] + - y1' // - y1[2] + - y3' // - y3[2] + + 0xFFFF * 'q2 // + p[2] * q2[0] + + 0xFFFF * q2 // + p[1] * q2[1] + + 0xFFFF * q2'; // + p[0] * q2[2] + +eq_secp256r1_y3_chunks[ 3] = s'2 * 'x1 // s[3] * x1[0] + + s' * x1 // + s[2] * x1[1] + + s * x1' // + s[1] * x1[2] + + 's * x1'2 // + s[0] * x1[3] + - s'2 * 'x3 // - s[3] * x3[0] + - s' * x3 // - s[2] * x3[1] + - s * x3' // - s[1] * x3[2] + - 's * x3'2 // - s[0] * x3[3] + - y1'2 // - y1[3] + - y3'2 // - y3[3] + + 0xFFFF * 'q2 // + p[3] * q2[0] + + 0xFFFF * q2 // + p[2] * q2[1] + + 0xFFFF * q2' // + p[1] * q2[2] + + 0xFFFF * q2'2; // + p[0] * q2[3] + +// clock #2 + +eq_secp256r1_y3_chunks[ 4] = s'2 * 2'x1 // s[4] * x1[0] + + s' * 'x1 // + s[3] * x1[1] + + s * x1 // + s[2] * x1[2] + + 's * x1' // + s[1] * x1[3] + + 2's * x1'2 // + s[0] * x1[4] + - s'2 * 2'x3 // - s[4] * x3[0] + - s' * 'x3 // - s[3] * x3[1] + - s * x3 // - s[2] * x3[2] + - 's * x3' // - s[1] * x3[3] + - 2's * x3'2 // - s[0] * x3[4] + - y1'2 // - y1[4] + - y3'2 // - y3[4] + + 0xFFFF * 2'q2 // + p[4] * q2[0] + + 0xFFFF * 'q2 // + p[3] * q2[1] + + 0xFFFF * q2 // + p[2] * q2[2] + + 0xFFFF * q2' // + p[1] * q2[3] + + 0xFFFF * q2'2; // + p[0] * q2[4] + +eq_secp256r1_y3_chunks[ 5] = s'3 * 2'x1 // s[5] * x1[0] + + s'2 * 'x1 // + s[4] * x1[1] + + s' * x1 // + s[3] * x1[2] + + s * x1' // + s[2] * x1[3] + + 's * x1'2 // + s[1] * x1[4] + + 2's * x1'3 // + s[0] * x1[5] + - s'3 * 2'x3 // - s[5] * x3[0] + - s'2 * 'x3 // - s[4] * x3[1] + - s' * x3 // - s[3] * x3[2] + - s * x3' // - s[2] * x3[3] + - 's * x3'2 // - s[1] * x3[4] + - 2's * x3'3 // - s[0] * x3[5] + - y1'3 // - y1[5] + - y3'3 // - y3[5] + + 0xFFFF * 2'q2 // + p[5] * q2[0] + + 0xFFFF * 'q2 // + p[4] * q2[1] + + 0xFFFF * q2 // + p[3] * q2[2] + + 0xFFFF * q2' // + p[2] * q2[3] + + 0xFFFF * q2'2 // + p[1] * q2[4] + + 0xFFFF * q2'3; // + p[0] * q2[5] + +// clock #3 + +eq_secp256r1_y3_chunks[ 6] = s'3 * 3'x1 // s[6] * x1[0] + + s'2 * 2'x1 // + s[5] * x1[1] + + s' * 'x1 // + s[4] * x1[2] + + s * x1 // + s[3] * x1[3] + + 's * x1' // + s[2] * x1[4] + + 2's * x1'2 // + s[1] * x1[5] + + 3's * x1'3 // + s[0] * x1[6] + - s'3 * 3'x3 // - s[6] * x3[0] + - s'2 * 2'x3 // - s[5] * x3[1] + - s' * 'x3 // - s[4] * x3[2] + - s * x3 // - s[3] * x3[3] + - 's * x3' // - s[2] * x3[4] + - 2's * x3'2 // - s[1] * x3[5] + - 3's * x3'3 // - s[0] * x3[6] + - y1'3 // - y1[6] + - y3'3 // - y3[6] + + 0xFFFF * 2'q2 // + p[5] * q2[1] + + 0xFFFF * 'q2 // + p[4] * q2[2] + + 0xFFFF * q2 // + p[3] * q2[3] + + 0xFFFF * q2' // + p[2] * q2[4] + + 0xFFFF * q2'2 // + p[1] * q2[5] + + 0xFFFF * q2'3; // + p[0] * q2[6] + +eq_secp256r1_y3_chunks[ 7] = s'4 * 3'x1 // s[7] * x1[0] + + s'3 * 2'x1 // + s[6] * x1[1] + + s'2 * 'x1 // + s[5] * x1[2] + + s' * x1 // + s[4] * x1[3] + + s * x1' // + s[3] * x1[4] + + 's * x1'2 // + s[2] * x1[5] + + 2's * x1'3 // + s[1] * x1[6] + + 3's * x1'4 // + s[0] * x1[7] + - s'4 * 3'x3 // - s[7] * x3[0] + - s'3 * 2'x3 // - s[6] * x3[1] + - s'2 * 'x3 // - s[5] * x3[2] + - s' * x3 // - s[4] * x3[3] + - s * x3' // - s[3] * x3[4] + - 's * x3'2 // - s[2] * x3[5] + - 2's * x3'3 // - s[1] * x3[6] + - 3's * x3'4 // - s[0] * x3[7] + - y1'4 // - y1[7] + - y3'4 // - y3[7] + + 0xFFFF * 'q2 // + p[5] * q2[2] + + 0xFFFF * q2 // + p[4] * q2[3] + + 0xFFFF * q2' // + p[3] * q2[4] + + 0xFFFF * q2'2 // + p[2] * q2[5] + + 0xFFFF * q2'3 // + p[1] * q2[6] + + 0xFFFF * q2'4; // + p[0] * q2[7] + +// clock #4 + +eq_secp256r1_y3_chunks[ 8] = s'4 * 4'x1 // s[8] * x1[0] + + s'3 * 3'x1 // + s[7] * x1[1] + + s'2 * 2'x1 // + s[6] * x1[2] + + s' * 'x1 // + s[5] * x1[3] + + s * x1 // + s[4] * x1[4] + + 's * x1' // + s[3] * x1[5] + + 2's * x1'2 // + s[2] * x1[6] + + 3's * x1'3 // + s[1] * x1[7] + + 4's * x1'4 // + s[0] * x1[8] + - s'4 * 4'x3 // - s[8] * x3[0] + - s'3 * 3'x3 // - s[7] * x3[1] + - s'2 * 2'x3 // - s[6] * x3[2] + - s' * 'x3 // - s[5] * x3[3] + - s * x3 // - s[4] * x3[4] + - 's * x3' // - s[3] * x3[5] + - 2's * x3'2 // - s[2] * x3[6] + - 3's * x3'3 // - s[1] * x3[7] + - 4's * x3'4 // - s[0] * x3[8] + - y1'4 // - y1[8] + - y3'4 // - y3[8] + + 0xFFFF * 'q2 // + p[5] * q2[3] + + 0xFFFF * q2 // + p[4] * q2[4] + + 0xFFFF * q2' // + p[3] * q2[5] + + 0xFFFF * q2'2 // + p[2] * q2[6] + + 0xFFFF * q2'3 // + p[1] * q2[7] + + 0xFFFF * q2'4; // + p[0] * q2[8] + +eq_secp256r1_y3_chunks[ 9] = s'5 * 4'x1 // s[9] * x1[0] + + s'4 * 3'x1 // + s[8] * x1[1] + + s'3 * 2'x1 // + s[7] * x1[2] + + s'2 * 'x1 // + s[6] * x1[3] + + s' * x1 // + s[5] * x1[4] + + s * x1' // + s[4] * x1[5] + + 's * x1'2 // + s[3] * x1[6] + + 2's * x1'3 // + s[2] * x1[7] + + 3's * x1'4 // + s[1] * x1[8] + + 4's * x1'5 // + s[0] * x1[9] + - s'5 * 4'x3 // - s[9] * x3[0] + - s'4 * 3'x3 // - s[8] * x3[1] + - s'3 * 2'x3 // - s[7] * x3[2] + - s'2 * 'x3 // - s[6] * x3[3] + - s' * x3 // - s[5] * x3[4] + - s * x3' // - s[4] * x3[5] + - 's * x3'2 // - s[3] * x3[6] + - 2's * x3'3 // - s[2] * x3[7] + - 3's * x3'4 // - s[1] * x3[8] + - 4's * x3'5 // - s[0] * x3[9] + - y1'5 // - y1[9] + - y3'5 // - y3[9] + + 0xFFFF * q2 // + p[5] * q2[4] + + 0xFFFF * q2' // + p[4] * q2[5] + + 0xFFFF * q2'2 // + p[3] * q2[6] + + 0xFFFF * q2'3 // + p[2] * q2[7] + + 0xFFFF * q2'4 // + p[1] * q2[8] + + 0xFFFF * q2'5; // + p[0] * q2[9] + +// clock #5 + +eq_secp256r1_y3_chunks[10] = s'5 * 5'x1 // s[10] * x1[0] + + s'4 * 4'x1 // + s[9] * x1[1] + + s'3 * 3'x1 // + s[8] * x1[2] + + s'2 * 2'x1 // + s[7] * x1[3] + + s' * 'x1 // + s[6] * x1[4] + + s * x1 // + s[5] * x1[5] + + 's * x1' // + s[4] * x1[6] + + 2's * x1'2 // + s[3] * x1[7] + + 3's * x1'3 // + s[2] * x1[8] + + 4's * x1'4 // + s[1] * x1[9] + + 5's * x1'5 // + s[0] * x1[10] + - s'5 * 5'x3 // - s[10] * x3[0] + - s'4 * 4'x3 // - s[9] * x3[1] + - s'3 * 3'x3 // - s[8] * x3[2] + - s'2 * 2'x3 // - s[7] * x3[3] + - s' * 'x3 // - s[6] * x3[4] + - s * x3 // - s[5] * x3[5] + - 's * x3' // - s[4] * x3[6] + - 2's * x3'2 // - s[3] * x3[7] + - 3's * x3'3 // - s[2] * x3[8] + - 4's * x3'4 // - s[1] * x3[9] + - 5's * x3'5 // - s[0] * x3[10] + - y1'5 // - y1[10] + - y3'5 // - y3[10] + + 0xFFFF * q2 // + p[5] * q2[5] + + 0xFFFF * q2' // + p[4] * q2[6] + + 0xFFFF * q2'2 // + p[3] * q2[7] + + 0xFFFF * q2'3 // + p[2] * q2[8] + + 0xFFFF * q2'4 // + p[1] * q2[9] + + 0xFFFF * q2'5; // + p[0] * q2[10] + +eq_secp256r1_y3_chunks[11] = s'6 * 5'x1 // s[11] * x1[0] + + s'5 * 4'x1 // + s[10] * x1[1] + + s'4 * 3'x1 // + s[9] * x1[2] + + s'3 * 2'x1 // + s[8] * x1[3] + + s'2 * 'x1 // + s[7] * x1[4] + + s' * x1 // + s[6] * x1[5] + + s * x1' // + s[5] * x1[6] + + 's * x1'2 // + s[4] * x1[7] + + 2's * x1'3 // + s[3] * x1[8] + + 3's * x1'4 // + s[2] * x1[9] + + 4's * x1'5 // + s[1] * x1[10] + + 5's * x1'6 // + s[0] * x1[11] + - s'6 * 5'x3 // - s[11] * x3[0] + - s'5 * 4'x3 // - s[10] * x3[1] + - s'4 * 3'x3 // - s[9] * x3[2] + - s'3 * 2'x3 // - s[8] * x3[3] + - s'2 * 'x3 // - s[7] * x3[4] + - s' * x3 // - s[6] * x3[5] + - s * x3' // - s[5] * x3[6] + - 's * x3'2 // - s[4] * x3[7] + - 2's * x3'3 // - s[3] * x3[8] + - 3's * x3'4 // - s[2] * x3[9] + - 4's * x3'5 // - s[1] * x3[10] + - 5's * x3'6 // - s[0] * x3[11] + - y1'6 // - y1[11] + - y3'6 // - y3[11] + + 0xFFFF * q2' // + p[5] * q2[6] + + 0xFFFF * q2'2 // + p[4] * q2[7] + + 0xFFFF * q2'3 // + p[3] * q2[8] + + 0xFFFF * q2'4 // + p[2] * q2[9] + + 0xFFFF * q2'5 // + p[1] * q2[10] + + 0xFFFF * q2'6; // + p[0] * q2[11] + +// clock #6 + +eq_secp256r1_y3_chunks[12] = s'6 * 6'x1 // s[12] * x1[0] + + s'5 * 5'x1 // + s[11] * x1[1] + + s'4 * 4'x1 // + s[10] * x1[2] + + s'3 * 3'x1 // + s[9] * x1[3] + + s'2 * 2'x1 // + s[8] * x1[4] + + s' * 'x1 // + s[7] * x1[5] + + s * x1 // + s[6] * x1[6] + + 's * x1' // + s[5] * x1[7] + + 2's * x1'2 // + s[4] * x1[8] + + 3's * x1'3 // + s[3] * x1[9] + + 4's * x1'4 // + s[2] * x1[10] + + 5's * x1'5 // + s[1] * x1[11] + + 6's * x1'6 // + s[0] * x1[12] + - s'6 * 6'x3 // - s[12] * x3[0] + - s'5 * 5'x3 // - s[11] * x3[1] + - s'4 * 4'x3 // - s[10] * x3[2] + - s'3 * 3'x3 // - s[9] * x3[3] + - s'2 * 2'x3 // - s[8] * x3[4] + - s' * 'x3 // - s[7] * x3[5] + - s * x3 // - s[6] * x3[6] + - 's * x3' // - s[5] * x3[7] + - 2's * x3'2 // - s[4] * x3[8] + - 3's * x3'3 // - s[3] * x3[9] + - 4's * x3'4 // - s[2] * x3[10] + - 5's * x3'5 // - s[1] * x3[11] + - 6's * x3'6 // - s[0] * x3[12] + - y1'6 // - y1[12] + - y3'6 // - y3[12] + + 6'q2 // + q2[0] + + 0xFFFF * q2' // + p[5] * q2[7] + + 0xFFFF * q2'2 // + p[4] * q2[8] + + 0xFFFF * q2'3 // + p[3] * q2[9] + + 0xFFFF * q2'4 // + p[2] * q2[10] + + 0xFFFF * q2'5 // + p[1] * q2[11] + + 0xFFFF * q2'6; // + p[0] * q2[12] + +eq_secp256r1_y3_chunks[13] = s'7 * 6'x1 // s[13] * x1[0] + + s'6 * 5'x1 // + s[12] * x1[1] + + s'5 * 4'x1 // + s[11] * x1[2] + + s'4 * 3'x1 // + s[10] * x1[3] + + s'3 * 2'x1 // + s[9] * x1[4] + + s'2 * 'x1 // + s[8] * x1[5] + + s' * x1 // + s[7] * x1[6] + + s * x1' // + s[6] * x1[7] + + 's * x1'2 // + s[5] * x1[8] + + 2's * x1'3 // + s[4] * x1[9] + + 3's * x1'4 // + s[3] * x1[10] + + 4's * x1'5 // + s[2] * x1[11] + + 5's * x1'6 // + s[1] * x1[12] + + 6's * x1'7 // + s[0] * x1[13] + - s'7 * 6'x3 // - s[13] * x3[0] + - s'6 * 5'x3 // - s[12] * x3[1] + - s'5 * 4'x3 // - s[11] * x3[2] + - s'4 * 3'x3 // - s[10] * x3[3] + - s'3 * 2'x3 // - s[9] * x3[4] + - s'2 * 'x3 // - s[8] * x3[5] + - s' * x3 // - s[7] * x3[6] + - s * x3' // - s[6] * x3[7] + - 's * x3'2 // - s[5] * x3[8] + - 2's * x3'3 // - s[4] * x3[9] + - 3's * x3'4 // - s[3] * x3[10] + - 4's * x3'5 // - s[2] * x3[11] + - 5's * x3'6 // - s[1] * x3[12] + - 6's * x3'7 // - s[0] * x3[13] + - y1'7 // - y1[13] + - y3'7 // - y3[13] + + 5'q2 // + q2[1] + + 0xFFFF * q2'2 // + p[5] * q2[8] + + 0xFFFF * q2'3 // + p[4] * q2[9] + + 0xFFFF * q2'4 // + p[3] * q2[10] + + 0xFFFF * q2'5 // + p[2] * q2[11] + + 0xFFFF * q2'6 // + p[1] * q2[12] + + 0xFFFF * q2'7; // + p[0] * q2[13] + +// clock #7 + +eq_secp256r1_y3_chunks[14] = s'7 * 7'x1 // s[14] * x1[0] + + s'6 * 6'x1 // + s[13] * x1[1] + + s'5 * 5'x1 // + s[12] * x1[2] + + s'4 * 4'x1 // + s[11] * x1[3] + + s'3 * 3'x1 // + s[10] * x1[4] + + s'2 * 2'x1 // + s[9] * x1[5] + + s' * 'x1 // + s[8] * x1[6] + + s * x1 // + s[7] * x1[7] + + 's * x1' // + s[6] * x1[8] + + 2's * x1'2 // + s[5] * x1[9] + + 3's * x1'3 // + s[4] * x1[10] + + 4's * x1'4 // + s[3] * x1[11] + + 5's * x1'5 // + s[2] * x1[12] + + 6's * x1'6 // + s[1] * x1[13] + + 7's * x1'7 // + s[0] * x1[14] + - s'7 * 7'x3 // - s[14] * x3[0] + - s'6 * 6'x3 // - s[13] * x3[1] + - s'5 * 5'x3 // - s[12] * x3[2] + - s'4 * 4'x3 // - s[11] * x3[3] + - s'3 * 3'x3 // - s[10] * x3[4] + - s'2 * 2'x3 // - s[9] * x3[5] + - s' * 'x3 // - s[8] * x3[6] + - s * x3 // - s[7] * x3[7] + - 's * x3' // - s[6] * x3[8] + - 2's * x3'2 // - s[5] * x3[9] + - 3's * x3'3 // - s[4] * x3[10] + - 4's * x3'4 // - s[3] * x3[11] + - 5's * x3'5 // - s[2] * x3[12] + - 6's * x3'6 // - s[1] * x3[13] + - 7's * x3'7 // - s[0] * x3[14] + - y1'7 // - y1[14] + - y3'7 // - y3[14] + + 0xFFFF * 7'q2 // + p[14] * q2[0] + + 5'q2 // + q2[2] + + 0xFFFF * q2'2 // + p[5] * q2[9] + + 0xFFFF * q2'3 // + p[4] * q2[10] + + 0xFFFF * q2'4 // + p[3] * q2[11] + + 0xFFFF * q2'5 // + p[2] * q2[12] + + 0xFFFF * q2'6 // + p[1] * q2[13] + + 0xFFFF * q2'7; // + p[0] * q2[14] + +eq_secp256r1_y3_chunks[15] = s'8 * 7'x1 // s[15] * x1[0] + + s'7 * 6'x1 // + s[14] * x1[1] + + s'6 * 5'x1 // + s[13] * x1[2] + + s'5 * 4'x1 // + s[12] * x1[3] + + s'4 * 3'x1 // + s[11] * x1[4] + + s'3 * 2'x1 // + s[10] * x1[5] + + s'2 * 'x1 // + s[9] * x1[6] + + s' * x1 // + s[8] * x1[7] + + s * x1' // + s[7] * x1[8] + + 's * x1'2 // + s[6] * x1[9] + + 2's * x1'3 // + s[5] * x1[10] + + 3's * x1'4 // + s[4] * x1[11] + + 4's * x1'5 // + s[3] * x1[12] + + 5's * x1'6 // + s[2] * x1[13] + + 6's * x1'7 // + s[1] * x1[14] + + 7's * x1'8 // + s[0] * x1[15] + - s'8 * 7'x3 // - s[15] * x3[0] + - s'7 * 6'x3 // - s[14] * x3[1] + - s'6 * 5'x3 // - s[13] * x3[2] + - s'5 * 4'x3 // - s[12] * x3[3] + - s'4 * 3'x3 // - s[11] * x3[4] + - s'3 * 2'x3 // - s[10] * x3[5] + - s'2 * 'x3 // - s[9] * x3[6] + - s' * x3 // - s[8] * x3[7] + - s * x3' // - s[7] * x3[8] + - 's * x3'2 // - s[6] * x3[9] + - 2's * x3'3 // - s[5] * x3[10] + - 3's * x3'4 // - s[4] * x3[11] + - 4's * x3'5 // - s[3] * x3[12] + - 5's * x3'6 // - s[2] * x3[13] + - 6's * x3'7 // - s[1] * x3[14] + - 7's * x3'8 // - s[0] * x3[15] + - y1'8 // - y1[15] + - y3'8 // - y3[15] + + 0xFFFF * 7'q2 // + p[15] * q2[0] + + 0xFFFF * 6'q2 // + p[14] * q2[1] + + 4'q2 // + q2[3] + + 0xFFFF * q2'3 // + p[5] * q2[10] + + 0xFFFF * q2'4 // + p[4] * q2[11] + + 0xFFFF * q2'5 // + p[3] * q2[12] + + 0xFFFF * q2'6 // + p[2] * q2[13] + + 0xFFFF * q2'7 // + p[1] * q2[14] + + 0xFFFF * q2'8; // + p[0] * q2[15] + +// clock #8 + +eq_secp256r1_y3_chunks[16] = s'7 * 7'x1 // s[15] * x1[1] + + s'6 * 6'x1 // + s[14] * x1[2] + + s'5 * 5'x1 // + s[13] * x1[3] + + s'4 * 4'x1 // + s[12] * x1[4] + + s'3 * 3'x1 // + s[11] * x1[5] + + s'2 * 2'x1 // + s[10] * x1[6] + + s' * 'x1 // + s[9] * x1[7] + + s * x1 // + s[8] * x1[8] + + 's * x1' // + s[7] * x1[9] + + 2's * x1'2 // + s[6] * x1[10] + + 3's * x1'3 // + s[5] * x1[11] + + 4's * x1'4 // + s[4] * x1[12] + + 5's * x1'5 // + s[3] * x1[13] + + 6's * x1'6 // + s[2] * x1[14] + + 7's * x1'7 // + s[1] * x1[15] + - s'7 * 7'x3 // - s[15] * x3[1] + - s'6 * 6'x3 // - s[14] * x3[2] + - s'5 * 5'x3 // - s[13] * x3[3] + - s'4 * 4'x3 // - s[12] * x3[4] + - s'3 * 3'x3 // - s[11] * x3[5] + - s'2 * 2'x3 // - s[10] * x3[6] + - s' * 'x3 // - s[9] * x3[7] + - s * x3 // - s[8] * x3[8] + - 's * x3' // - s[7] * x3[9] + - 2's * x3'2 // - s[6] * x3[10] + - 3's * x3'3 // - s[5] * x3[11] + - 4's * x3'4 // - s[4] * x3[12] + - 5's * x3'5 // - s[3] * x3[13] + - 6's * x3'6 // - s[2] * x3[14] + - 7's * x3'7 // - s[1] * x3[15] + + 0xFFFF * 7'q2 // + p[15] * q2[1] + + 0xFFFF * 6'q2 // + p[14] * q2[2] + + 4'q2 // + q2[4] + + 0xFFFF * q2'3 // + p[5] * q2[11] + + 0xFFFF * q2'4 // + p[4] * q2[12] + + 0xFFFF * q2'5 // + p[3] * q2[13] + + 0xFFFF * q2'6 // + p[2] * q2[14] + + 0xFFFF * q2'7 // + p[1] * q2[15] + - 0xFFFE; // - (p*offset)[16] + +eq_secp256r1_y3_chunks[17] = s'7 * 6'x1 // s[15] * x1[2] + + s'6 * 5'x1 // + s[14] * x1[3] + + s'5 * 4'x1 // + s[13] * x1[4] + + s'4 * 3'x1 // + s[12] * x1[5] + + s'3 * 2'x1 // + s[11] * x1[6] + + s'2 * 'x1 // + s[10] * x1[7] + + s' * x1 // + s[9] * x1[8] + + s * x1' // + s[8] * x1[9] + + 's * x1'2 // + s[7] * x1[10] + + 2's * x1'3 // + s[6] * x1[11] + + 3's * x1'4 // + s[5] * x1[12] + + 4's * x1'5 // + s[4] * x1[13] + + 5's * x1'6 // + s[3] * x1[14] + + 6's * x1'7 // + s[2] * x1[15] + - s'7 * 6'x3 // - s[15] * x3[2] + - s'6 * 5'x3 // - s[14] * x3[3] + - s'5 * 4'x3 // - s[13] * x3[4] + - s'4 * 3'x3 // - s[12] * x3[5] + - s'3 * 2'x3 // - s[11] * x3[6] + - s'2 * 'x3 // - s[10] * x3[7] + - s' * x3 // - s[9] * x3[8] + - s * x3' // - s[8] * x3[9] + - 's * x3'2 // - s[7] * x3[10] + - 2's * x3'3 // - s[6] * x3[11] + - 3's * x3'4 // - s[5] * x3[12] + - 4's * x3'5 // - s[4] * x3[13] + - 5's * x3'6 // - s[3] * x3[14] + - 6's * x3'7 // - s[2] * x3[15] + + 0xFFFF * 6'q2 // + p[15] * q2[2] + + 0xFFFF * 5'q2 // + p[14] * q2[3] + + 3'q2 // + q2[5] + + 0xFFFF * q2'4 // + p[5] * q2[12] + + 0xFFFF * q2'5 // + p[4] * q2[13] + + 0xFFFF * q2'6 // + p[3] * q2[14] + + 0xFFFF * q2'7 // + p[2] * q2[15] + - 0xFFFF; // - (p*offset)[17] + +// clock #9 + +eq_secp256r1_y3_chunks[18] = s'6 * 6'x1 // s[15] * x1[3] + + s'5 * 5'x1 // + s[14] * x1[4] + + s'4 * 4'x1 // + s[13] * x1[5] + + s'3 * 3'x1 // + s[12] * x1[6] + + s'2 * 2'x1 // + s[11] * x1[7] + + s' * 'x1 // + s[10] * x1[8] + + s * x1 // + s[9] * x1[9] + + 's * x1' // + s[8] * x1[10] + + 2's * x1'2 // + s[7] * x1[11] + + 3's * x1'3 // + s[6] * x1[12] + + 4's * x1'4 // + s[5] * x1[13] + + 5's * x1'5 // + s[4] * x1[14] + + 6's * x1'6 // + s[3] * x1[15] + - s'6 * 6'x3 // - s[15] * x3[3] + - s'5 * 5'x3 // - s[14] * x3[4] + - s'4 * 4'x3 // - s[13] * x3[5] + - s'3 * 3'x3 // - s[12] * x3[6] + - s'2 * 2'x3 // - s[11] * x3[7] + - s' * 'x3 // - s[10] * x3[8] + - s * x3 // - s[9] * x3[9] + - 's * x3' // - s[8] * x3[10] + - 2's * x3'2 // - s[7] * x3[11] + - 3's * x3'3 // - s[6] * x3[12] + - 4's * x3'4 // - s[5] * x3[13] + - 5's * x3'5 // - s[4] * x3[14] + - 6's * x3'6 // - s[3] * x3[15] + + 0xFFFF * 6'q2 // + p[15] * q2[3] + + 0xFFFF * 5'q2 // + p[14] * q2[4] + + 3'q2 // + q2[6] + + 0xFFFF * q2'4 // + p[5] * q2[13] + + 0xFFFF * q2'5 // + p[4] * q2[14] + + 0xFFFF * q2'6 // + p[3] * q2[15] + - 0xFFFF; // - (p*offset)[18] + +eq_secp256r1_y3_chunks[19] = s'6 * 5'x1 // s[15] * x1[4] + + s'5 * 4'x1 // + s[14] * x1[5] + + s'4 * 3'x1 // + s[13] * x1[6] + + s'3 * 2'x1 // + s[12] * x1[7] + + s'2 * 'x1 // + s[11] * x1[8] + + s' * x1 // + s[10] * x1[9] + + s * x1' // + s[9] * x1[10] + + 's * x1'2 // + s[8] * x1[11] + + 2's * x1'3 // + s[7] * x1[12] + + 3's * x1'4 // + s[6] * x1[13] + + 4's * x1'5 // + s[5] * x1[14] + + 5's * x1'6 // + s[4] * x1[15] + - s'6 * 5'x3 // - s[15] * x3[4] + - s'5 * 4'x3 // - s[14] * x3[5] + - s'4 * 3'x3 // - s[13] * x3[6] + - s'3 * 2'x3 // - s[12] * x3[7] + - s'2 * 'x3 // - s[11] * x3[8] + - s' * x3 // - s[10] * x3[9] + - s * x3' // - s[9] * x3[10] + - 's * x3'2 // - s[8] * x3[11] + - 2's * x3'3 // - s[7] * x3[12] + - 3's * x3'4 // - s[6] * x3[13] + - 4's * x3'5 // - s[5] * x3[14] + - 5's * x3'6 // - s[4] * x3[15] + + 0xFFFF * 5'q2 // + p[15] * q2[4] + + 0xFFFF * 4'q2 // + p[14] * q2[5] + + 2'q2 // + q2[7] + + 0xFFFF * q2'5 // + p[5] * q2[14] + + 0xFFFF * q2'6 // + p[4] * q2[15] + - 0xFFFF; // - (p*offset)[19] + +// clock #10 + +eq_secp256r1_y3_chunks[20] = s'5 * 5'x1 // s[15] * x1[5] + + s'4 * 4'x1 // + s[14] * x1[6] + + s'3 * 3'x1 // + s[13] * x1[7] + + s'2 * 2'x1 // + s[12] * x1[8] + + s' * 'x1 // + s[11] * x1[9] + + s * x1 // + s[10] * x1[10] + + 's * x1' // + s[9] * x1[11] + + 2's * x1'2 // + s[8] * x1[12] + + 3's * x1'3 // + s[7] * x1[13] + + 4's * x1'4 // + s[6] * x1[14] + + 5's * x1'5 // + s[5] * x1[15] + - s'5 * 5'x3 // - s[15] * x3[5] + - s'4 * 4'x3 // - s[14] * x3[6] + - s'3 * 3'x3 // - s[13] * x3[7] + - s'2 * 2'x3 // - s[12] * x3[8] + - s' * 'x3 // - s[11] * x3[9] + - s * x3 // - s[10] * x3[10] + - 's * x3' // - s[9] * x3[11] + - 2's * x3'2 // - s[8] * x3[12] + - 3's * x3'3 // - s[7] * x3[13] + - 4's * x3'4 // - s[6] * x3[14] + - 5's * x3'5 // - s[5] * x3[15] + + 0xFFFF * 5'q2 // + p[15] * q2[5] + + 0xFFFF * 4'q2 // + p[14] * q2[6] + + 2'q2 // + q2[8] + + 0xFFFF * q2'5 // + p[5] * q2[15] + - 0xFFFF; // - (p*offset)[20] + +eq_secp256r1_y3_chunks[21] = s'5 * 4'x1 // s[15] * x1[6] + + s'4 * 3'x1 // + s[14] * x1[7] + + s'3 * 2'x1 // + s[13] * x1[8] + + s'2 * 'x1 // + s[12] * x1[9] + + s' * x1 // + s[11] * x1[10] + + s * x1' // + s[10] * x1[11] + + 's * x1'2 // + s[9] * x1[12] + + 2's * x1'3 // + s[8] * x1[13] + + 3's * x1'4 // + s[7] * x1[14] + + 4's * x1'5 // + s[6] * x1[15] + - s'5 * 4'x3 // - s[15] * x3[6] + - s'4 * 3'x3 // - s[14] * x3[7] + - s'3 * 2'x3 // - s[13] * x3[8] + - s'2 * 'x3 // - s[12] * x3[9] + - s' * x3 // - s[11] * x3[10] + - s * x3' // - s[10] * x3[11] + - 's * x3'2 // - s[9] * x3[12] + - 2's * x3'3 // - s[8] * x3[13] + - 3's * x3'4 // - s[7] * x3[14] + - 4's * x3'5 // - s[6] * x3[15] + + 0xFFFF * 4'q2 // + p[15] * q2[6] + + 0xFFFF * 3'q2 // + p[14] * q2[7] + + 'q2 // + q2[9] + - 0xFFFF; // - (p*offset)[21] + +// clock #11 + +eq_secp256r1_y3_chunks[22] = s'4 * 4'x1 // s[15] * x1[7] + + s'3 * 3'x1 // + s[14] * x1[8] + + s'2 * 2'x1 // + s[13] * x1[9] + + s' * 'x1 // + s[12] * x1[10] + + s * x1 // + s[11] * x1[11] + + 's * x1' // + s[10] * x1[12] + + 2's * x1'2 // + s[9] * x1[13] + + 3's * x1'3 // + s[8] * x1[14] + + 4's * x1'4 // + s[7] * x1[15] + - s'4 * 4'x3 // - s[15] * x3[7] + - s'3 * 3'x3 // - s[14] * x3[8] + - s'2 * 2'x3 // - s[13] * x3[9] + - s' * 'x3 // - s[12] * x3[10] + - s * x3 // - s[11] * x3[11] + - 's * x3' // - s[10] * x3[12] + - 2's * x3'2 // - s[9] * x3[13] + - 3's * x3'3 // - s[8] * x3[14] + - 4's * x3'4 // - s[7] * x3[15] + + 0xFFFF * 4'q2 // + p[15] * q2[7] + + 0xFFFF * 3'q2 // + p[14] * q2[8] + + 'q2 // + q2[10] + - 0x1; // - (p*offset)[22] + +eq_secp256r1_y3_chunks[23] = s'4 * 3'x1 // s[15] * x1[8] + + s'3 * 2'x1 // + s[14] * x1[9] + + s'2 * 'x1 // + s[13] * x1[10] + + s' * x1 // + s[12] * x1[11] + + s * x1' // + s[11] * x1[12] + + 's * x1'2 // + s[10] * x1[13] + + 2's * x1'3 // + s[9] * x1[14] + + 3's * x1'4 // + s[8] * x1[15] + - s'4 * 3'x3 // - s[15] * x3[8] + - s'3 * 2'x3 // - s[14] * x3[9] + - s'2 * 'x3 // - s[13] * x3[10] + - s' * x3 // - s[12] * x3[11] + - s * x3' // - s[11] * x3[12] + - 's * x3'2 // - s[10] * x3[13] + - 2's * x3'3 // - s[9] * x3[14] + - 3's * x3'4 // - s[8] * x3[15] + + 0xFFFF * 3'q2 // + p[15] * q2[8] + + 0xFFFF * 2'q2 // + p[14] * q2[9] + + q2; // + q2[11] + +// clock #12 + +eq_secp256r1_y3_chunks[24] = s'3 * 3'x1 // s[15] * x1[9] + + s'2 * 2'x1 // + s[14] * x1[10] + + s' * 'x1 // + s[13] * x1[11] + + s * x1 // + s[12] * x1[12] + + 's * x1' // + s[11] * x1[13] + + 2's * x1'2 // + s[10] * x1[14] + + 3's * x1'3 // + s[9] * x1[15] + - s'3 * 3'x3 // - s[15] * x3[9] + - s'2 * 2'x3 // - s[14] * x3[10] + - s' * 'x3 // - s[13] * x3[11] + - s * x3 // - s[12] * x3[12] + - 's * x3' // - s[11] * x3[13] + - 2's * x3'2 // - s[10] * x3[14] + - 3's * x3'3 // - s[9] * x3[15] + + 0xFFFF * 3'q2 // + p[15] * q2[9] + + 0xFFFF * 2'q2 // + p[14] * q2[10] + + q2; // + q2[12] + +eq_secp256r1_y3_chunks[25] = s'3 * 2'x1 // s[15] * x1[10] + + s'2 * 'x1 // + s[14] * x1[11] + + s' * x1 // + s[13] * x1[12] + + s * x1' // + s[12] * x1[13] + + 's * x1'2 // + s[11] * x1[14] + + 2's * x1'3 // + s[10] * x1[15] + - s'3 * 2'x3 // - s[15] * x3[10] + - s'2 * 'x3 // - s[14] * x3[11] + - s' * x3 // - s[13] * x3[12] + - s * x3' // - s[12] * x3[13] + - 's * x3'2 // - s[11] * x3[14] + - 2's * x3'3 // - s[10] * x3[15] + + 0xFFFF * 2'q2 // + p[15] * q2[10] + + 0xFFFF * 'q2 // + p[14] * q2[11] + + q2'; // + q2[13] + +// clock #13 + +eq_secp256r1_y3_chunks[26] = s'2 * 2'x1 // s[15] * x1[11] + + s' * 'x1 // + s[14] * x1[12] + + s * x1 // + s[13] * x1[13] + + 's * x1' // + s[12] * x1[14] + + 2's * x1'2 // + s[11] * x1[15] + - s'2 * 2'x3 // - s[15] * x3[11] + - s' * 'x3 // - s[14] * x3[12] + - s * x3 // - s[13] * x3[13] + - 's * x3' // - s[12] * x3[14] + - 2's * x3'2 // - s[11] * x3[15] + + 0xFFFF * 2'q2 // + p[15] * q2[11] + + 0xFFFF * 'q2 // + p[14] * q2[12] + + q2'; // + q2[14] + +eq_secp256r1_y3_chunks[27] = s'2 * 'x1 // s[15] * x1[12] + + s' * x1 // + s[14] * x1[13] + + s * x1' // + s[13] * x1[14] + + 's * x1'2 // + s[12] * x1[15] + - s'2 * 'x3 // - s[15] * x3[12] + - s' * x3 // - s[14] * x3[13] + - s * x3' // - s[13] * x3[14] + - 's * x3'2 // - s[12] * x3[15] + + 0xFFFF * 'q2 // + p[15] * q2[12] + + 0xFFFF * q2 // + p[14] * q2[13] + + q2'2; // + q2[15] + +// clock #14 + +eq_secp256r1_y3_chunks[28] = s' * 'x1 // s[15] * x1[13] + + s * x1 // + s[14] * x1[14] + + 's * x1' // + s[13] * x1[15] + - s' * 'x3 // - s[15] * x3[13] + - s * x3 // - s[14] * x3[14] + - 's * x3' // - s[13] * x3[15] + + 0xFFFF * 'q2 // + p[15] * q2[13] + + 0xFFFF * q2 // + p[14] * q2[14] + - 0x2; // - (p*offset)[28] + +eq_secp256r1_y3_chunks[29] = s' * x1 // s[15] * x1[14] + + s * x1' // + s[14] * x1[15] + - s' * x3 // - s[15] * x3[14] + - s * x3' // - s[14] * x3[15] + + 0xFFFF * q2 // + p[15] * q2[14] + + 0xFFFF * q2'; // + p[14] * q2[15] + +// clock #15 + +eq_secp256r1_y3_chunks[30] = s * x1 // s[15] * x1[15] + - s * x3 // - s[15] * x3[15] + + 0xFFFF * q2 // + p[15] * q2[15] + - 0xFFFE; // - (p*offset)[30] + +eq_secp256r1_y3_chunks[31] = - 0x1FFFF; // - (p*offset)[31] + diff --git a/precompiles/arith_eq/src/arith_eq.rs b/precompiles/arith_eq/src/arith_eq.rs index 82e9c12d1..92e7fc573 100644 --- a/precompiles/arith_eq/src/arith_eq.rs +++ b/precompiles/arith_eq/src/arith_eq.rs @@ -23,8 +23,9 @@ use crate::{ arith_eq_constants::*, executors, Arith256Input, Arith256ModInput, ArithEqInput, ArithEqLtTableSM, Bn254ComplexAddInput, Bn254ComplexMulInput, Bn254ComplexSubInput, Bn254CurveAddInput, Bn254CurveDblInput, Secp256k1AddInput, Secp256k1DblInput, - SECP256K1_PRIME_CHUNKS, SEL_OP_ARITH256, SEL_OP_ARITH256_MOD, SEL_OP_SECP256K1_ADD, - SEL_OP_SECP256K1_DBL, + Secp256r1AddInput, Secp256r1DblInput, SECP256K1_PRIME_CHUNKS, SECP256R1_PRIME_CHUNKS, + SEL_OP_ARITH256, SEL_OP_ARITH256_MOD, SEL_OP_SECP256K1_ADD, SEL_OP_SECP256K1_DBL, + SEL_OP_SECP256R1_ADD, SEL_OP_SECP256R1_DBL, }; use rayon::prelude::*; @@ -303,6 +304,52 @@ impl ArithEqSM { ); } + fn process_secp256r1_add( + &self, + input: &Secp256r1AddInput, + trace: &mut [ArithEqTraceRowType], + ) { + let data = executors::Secp256r1::execute_add(&input.p1, &input.p2); + self.expand_data_on_trace(&data, trace, SEL_OP_SECP256R1_ADD); + Self::expand_addr_step_on_trace( + &ArithEqStepAddr { + main_step: input.step, + addr_op: input.addr, + addr_x1: input.p1_addr, + addr_y1: input.p1_addr + 32, + addr_x2: input.p2_addr, + addr_y2: input.p2_addr + 32, + addr_x3: input.p1_addr, + addr_y3: input.p1_addr + 32, + addr_ind: [input.p1_addr, input.p2_addr, 0, 0, 0], + }, + trace, + ); + } + + fn process_secp256r1_dbl( + &self, + input: &Secp256r1DblInput, + trace: &mut [ArithEqTraceRowType], + ) { + let data = executors::Secp256r1::execute_dbl(&input.p1); + self.expand_data_on_trace(&data, trace, SEL_OP_SECP256R1_DBL); + Self::expand_addr_step_on_trace( + &ArithEqStepAddr { + main_step: input.step, + addr_op: input.addr, + addr_x1: input.addr, + addr_y1: input.addr + 32, + addr_x2: input.addr, + addr_y2: input.addr + 32, + addr_x3: input.addr, + addr_y3: input.addr + 32, + addr_ind: [0, 0, 0, 0, 0], + }, + trace, + ); + } + #[inline(always)] fn to_ranged_field(&self, value: i64, range_id: usize) -> u64 { self.std.range_check(range_id, value, 1); @@ -425,12 +472,38 @@ impl ArithEqSM { self.std.inc_virtual_row(self.table_id, row as u64, 1); prev_y3_lt = y3_lt; } + SEL_OP_SECP256R1_ADD | SEL_OP_SECP256R1_DBL => { + let x3_lt = data.x3[i] < SECP256R1_PRIME_CHUNKS[i] + || (data.x3[i] == SECP256R1_PRIME_CHUNKS[i] && prev_x3_lt); + trace[i].set_x3_lt(x3_lt); + let row = ArithEqLtTableSM::calculate_table_row( + prev_x3_lt, + x3_lt, + data.x3[i] - SECP256R1_PRIME_CHUNKS[i], + ); + self.std.inc_virtual_row(self.table_id, row as u64, 1); + prev_x3_lt = x3_lt; + + let y3_lt = data.y3[i] < SECP256R1_PRIME_CHUNKS[i] + || (data.y3[i] == SECP256R1_PRIME_CHUNKS[i] && prev_y3_lt); + trace[i].set_y3_lt(y3_lt); + let row = ArithEqLtTableSM::calculate_table_row( + prev_y3_lt, + y3_lt, + data.y3[i] - SECP256R1_PRIME_CHUNKS[i], + ); + self.std.inc_virtual_row(self.table_id, row as u64, 1); + prev_y3_lt = y3_lt; + } _ => { trace[i].set_x3_lt(false); trace[i].set_y3_lt(false); } } - if (sel_op == SEL_OP_SECP256K1_ADD) || (sel_op == SEL_OP_BN254_CURVE_ADD) { + if (sel_op == SEL_OP_SECP256K1_ADD) + || (sel_op == SEL_OP_BN254_CURVE_ADD) + || (sel_op == SEL_OP_SECP256R1_ADD) + { if x1_x2_different { trace[i].set_x_are_different(true); trace[i].set_x_delta_chunk_inv(0); @@ -510,6 +583,8 @@ impl ArithEqSM { ArithEqInput::Bn254ComplexMul(idata) => { self.process_bn254_complex_mul(idata, trace); } + ArithEqInput::Secp256r1Add(idata) => self.process_secp256r1_add(idata, trace), + ArithEqInput::Secp256r1Dbl(idata) => self.process_secp256r1_dbl(idata, trace), } }); diff --git a/precompiles/arith_eq/src/arith_eq_bus_device.rs b/precompiles/arith_eq/src/arith_eq_bus_device.rs index 3774e69f0..b3b125a37 100644 --- a/precompiles/arith_eq/src/arith_eq_bus_device.rs +++ b/precompiles/arith_eq/src/arith_eq_bus_device.rs @@ -2,11 +2,12 @@ //! sent over the data bus. It connects to the bus and gathers metrics for specific //! `ZiskOperationType::ArithEq` instructions. -use std::{collections::VecDeque, ops::Add}; +use std::ops::Add; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; +use zisk_common::STEP; use zisk_common::{ - BusDevice, BusDeviceMode, BusId, Counter, Metrics, A, B, OP, OPERATION_BUS_ID, OP_TYPE, + BusDevice, BusDeviceMode, BusId, Counter, Metrics, B, OP, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; @@ -15,14 +16,16 @@ use crate::mem_inputs::{ generate_bn254_complex_add_mem_inputs, generate_bn254_complex_mul_mem_inputs, generate_bn254_complex_sub_mem_inputs, generate_bn254_curve_add_mem_inputs, generate_bn254_curve_dbl_mem_inputs, generate_secp256k1_add_mem_inputs, - generate_secp256k1_dbl_mem_inputs, + generate_secp256k1_dbl_mem_inputs, generate_secp256r1_add_mem_inputs, + generate_secp256r1_dbl_mem_inputs, }; use crate::mem_inputs::{ skip_arith256_mem_inputs, skip_arith256_mod_mem_inputs, skip_bn254_complex_add_mem_inputs, skip_bn254_complex_mul_mem_inputs, skip_bn254_complex_sub_mem_inputs, skip_bn254_curve_add_mem_inputs, skip_bn254_curve_dbl_mem_inputs, - skip_secp256k1_add_mem_inputs, skip_secp256k1_dbl_mem_inputs, + skip_secp256k1_add_mem_inputs, skip_secp256k1_dbl_mem_inputs, skip_secp256r1_add_mem_inputs, + skip_secp256r1_dbl_mem_inputs, }; const ARITH256_OP: u8 = ZiskOp::Arith256.code(); @@ -34,6 +37,8 @@ const BN254_CURVE_DBL_OP: u8 = ZiskOp::Bn254CurveDbl.code(); const BN254_COMPLEX_ADD_OP: u8 = ZiskOp::Bn254ComplexAdd.code(); const BN254_COMPLEX_SUB_OP: u8 = ZiskOp::Bn254ComplexSub.code(); const BN254_COMPLEX_MUL_OP: u8 = ZiskOp::Bn254ComplexMul.code(); +const SECP256R1_ADD_OP: u8 = ZiskOp::Secp256r1Add.code(); +const SECP256R1_DBL_OP: u8 = ZiskOp::Secp256r1Dbl.code(); /// The `ArithEqCounter` struct represents a counter that monitors and measures /// arith_eq-related operations on the data bus. @@ -72,93 +77,49 @@ impl ArithEqCounterInputGen { (op_type == ZiskOperationType::ArithEq).then_some(self.counter.inst_count) } - fn skip_data(&self, data: &[u64], mem_collectors_info: &[MemCollectorInfo]) -> bool { + fn skip_data(&self, data: &[u64], mem_processors: &mut P) -> bool { let addr_main = data[B] as u32; match data[OP] as u8 { - ARITH256_OP => skip_arith256_mem_inputs(addr_main, data, mem_collectors_info), - ARITH256_MOD_OP => skip_arith256_mod_mem_inputs(addr_main, data, mem_collectors_info), - SECP256K1_ADD_OP => skip_secp256k1_add_mem_inputs(addr_main, data, mem_collectors_info), - SECP256K1_DBL_OP => skip_secp256k1_dbl_mem_inputs(addr_main, data, mem_collectors_info), - BN254_CURVE_ADD_OP => { - skip_bn254_curve_add_mem_inputs(addr_main, data, mem_collectors_info) - } - BN254_CURVE_DBL_OP => { - skip_bn254_curve_dbl_mem_inputs(addr_main, data, mem_collectors_info) - } + ARITH256_OP => skip_arith256_mem_inputs(addr_main, data, mem_processors), + ARITH256_MOD_OP => skip_arith256_mod_mem_inputs(addr_main, data, mem_processors), + SECP256K1_ADD_OP => skip_secp256k1_add_mem_inputs(addr_main, data, mem_processors), + SECP256K1_DBL_OP => skip_secp256k1_dbl_mem_inputs(addr_main, data, mem_processors), + BN254_CURVE_ADD_OP => skip_bn254_curve_add_mem_inputs(addr_main, data, mem_processors), + BN254_CURVE_DBL_OP => skip_bn254_curve_dbl_mem_inputs(addr_main, data, mem_processors), BN254_COMPLEX_ADD_OP => { - skip_bn254_complex_add_mem_inputs(addr_main, data, mem_collectors_info) + skip_bn254_complex_add_mem_inputs(addr_main, data, mem_processors) } BN254_COMPLEX_SUB_OP => { - skip_bn254_complex_sub_mem_inputs(addr_main, data, mem_collectors_info) + skip_bn254_complex_sub_mem_inputs(addr_main, data, mem_processors) } BN254_COMPLEX_MUL_OP => { - skip_bn254_complex_mul_mem_inputs(addr_main, data, mem_collectors_info) + skip_bn254_complex_mul_mem_inputs(addr_main, data, mem_processors) } + SECP256R1_ADD_OP => skip_secp256r1_add_mem_inputs(addr_main, data, mem_processors), + SECP256R1_DBL_OP => skip_secp256r1_dbl_mem_inputs(addr_main, data, mem_processors), _ => { panic!("ArithEqCounterInputGen: Unsupported data length {}", data.len(),); } } } -} - -impl Metrics for ArithEqCounterInputGen { - /// Tracks activity on the connected bus and updates counters for recognized operations. - /// - /// # Arguments - /// * `_bus_id` - The ID of the bus (unused in this implementation). - /// * `_data` - The data received from the bus. - /// - /// # Returns - /// An empty vector, as this implementation does not produce any derived inputs for the bus. - #[inline(always)] - fn measure(&mut self, _data: &[u64]) { - self.counter.update(1); - } - - /// Provides a dynamic reference for downcasting purposes. - /// - /// # Returns - /// A reference to `self` as `dyn std::any::Any`. - fn as_any(&self) -> &dyn std::any::Any { - self - } -} - -impl Add for ArithEqCounterInputGen { - type Output = ArithEqCounterInputGen; - /// Combines two `Arith256Counter` instances by summing their counters. - /// - /// # Arguments - /// * `self` - The first `Arith256Counter` instance. - /// * `other` - The second `Arith256Counter` instance. - /// - /// # Returns - /// A new `Arith256Counter` with combined counters. - fn add(self, other: Self) -> ArithEqCounterInputGen { - ArithEqCounterInputGen { counter: &self.counter + &other.counter, mode: self.mode } - } -} - -impl BusDevice for ArithEqCounterInputGen { /// Processes data received on the bus, updating counters and generating inputs when applicable. /// /// # Arguments /// * `bus_id` - The ID of the bus sending the data. /// * `data` - The data received from the bus. - /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// * `mem_processors` – A collection of memory processors used to send derived inputs. /// /// # Returns /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( + pub fn process_data( &mut self, bus_id: &BusId, data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, + mem_processors: &mut P, ) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); @@ -168,24 +129,36 @@ impl BusDevice for ArithEqCounterInputGen { return true; } - if let Some(mem_collectors_info) = mem_collector_info { - if self.skip_data(data, mem_collectors_info) { - return true; - } - } - let op = data[OP] as u8; - let step_main = data[A]; + let step_main = data[STEP]; let addr_main = data[B] as u32; - let only_counters = self.mode == BusDeviceMode::Counter; - if only_counters { - self.measure(data); - } + let only_counters = match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + true + } + BusDeviceMode::CounterAsm => { + self.measure(data); + return true; + } + BusDeviceMode::InputGenerator => { + if self.skip_data(data, mem_processors) { + return true; + } + false + } + }; match op { ARITH256_OP => { - generate_arith256_mem_inputs(addr_main, step_main, data, only_counters, pending); + generate_arith256_mem_inputs( + addr_main, + step_main, + data, + only_counters, + mem_processors, + ); } ARITH256_MOD_OP => { generate_arith256_mod_mem_inputs( @@ -193,7 +166,7 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } SECP256K1_ADD_OP => { @@ -202,7 +175,7 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } SECP256K1_DBL_OP => { @@ -211,7 +184,7 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BN254_CURVE_ADD_OP => { @@ -220,7 +193,7 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BN254_CURVE_DBL_OP => { @@ -229,7 +202,7 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BN254_COMPLEX_ADD_OP => { @@ -238,7 +211,7 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BN254_COMPLEX_SUB_OP => { @@ -247,7 +220,7 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BN254_COMPLEX_MUL_OP => { @@ -256,7 +229,25 @@ impl BusDevice for ArithEqCounterInputGen { step_main, data, only_counters, - pending, + mem_processors, + ); + } + SECP256R1_ADD_OP => { + generate_secp256r1_add_mem_inputs( + addr_main, + step_main, + data, + only_counters, + mem_processors, + ); + } + SECP256R1_DBL_OP => { + generate_secp256r1_dbl_mem_inputs( + addr_main, + step_main, + data, + only_counters, + mem_processors, ); } @@ -267,15 +258,48 @@ impl BusDevice for ArithEqCounterInputGen { true } +} + +impl Metrics for ArithEqCounterInputGen { + /// Tracks activity on the connected bus and updates counters for recognized operations. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `_data` - The data received from the bus. + /// + /// # Returns + /// An empty vector, as this implementation does not produce any derived inputs for the bus. + #[inline(always)] + fn measure(&mut self, _data: &[u64]) { + self.counter.update(1); + } - /// Returns the bus IDs associated with this counter. + /// Provides a dynamic reference for downcasting purposes. /// /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] + /// A reference to `self` as `dyn std::any::Any`. + fn as_any(&self) -> &dyn std::any::Any { + self } +} +impl Add for ArithEqCounterInputGen { + type Output = ArithEqCounterInputGen; + + /// Combines two `Arith256Counter` instances by summing their counters. + /// + /// # Arguments + /// * `self` - The first `Arith256Counter` instance. + /// * `other` - The second `Arith256Counter` instance. + /// + /// # Returns + /// A new `Arith256Counter` with combined counters. + fn add(self, other: Self) -> ArithEqCounterInputGen { + ArithEqCounterInputGen { counter: &self.counter + &other.counter, mode: self.mode } + } +} + +impl BusDevice for ArithEqCounterInputGen { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/precompiles/arith_eq/src/arith_eq_constants.rs b/precompiles/arith_eq/src/arith_eq_constants.rs index cfb8ec0e0..ea5469cbf 100644 --- a/precompiles/arith_eq/src/arith_eq_constants.rs +++ b/precompiles/arith_eq/src/arith_eq_constants.rs @@ -7,7 +7,7 @@ pub const ARITH_EQ_CHUNKS: usize = 16; pub const ARITH_EQ_CHUNK_BITS: usize = 16; pub const ARITH_EQ_CHUNK_SIZE: usize = 1 << ARITH_EQ_CHUNK_BITS; pub const ARITH_EQ_CHUNK_BASE_MAX: usize = ARITH_EQ_CHUNK_SIZE - 1; -pub const ARITH_EQ_OP_NUM: usize = 9; +pub const ARITH_EQ_OP_NUM: usize = 11; pub const SEL_OP_ARITH256: usize = 0; pub const SEL_OP_ARITH256_MOD: usize = 1; @@ -18,6 +18,8 @@ pub const SEL_OP_BN254_CURVE_DBL: usize = 5; pub const SEL_OP_BN254_COMPLEX_ADD: usize = 6; pub const SEL_OP_BN254_COMPLEX_SUB: usize = 7; pub const SEL_OP_BN254_COMPLEX_MUL: usize = 8; +pub const SEL_OP_SECP256R1_ADD: usize = 9; +pub const SEL_OP_SECP256R1_DBL: usize = 10; pub const SECP256K1_PRIME_CHUNKS: [i64; 16] = [ 0xFC2F, 0xFFFF, 0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, @@ -28,3 +30,8 @@ pub const BN254_PRIME_CHUNKS: [i64; 16] = [ 0xFD47, 0xD87C, 0x8C16, 0x3C20, 0xCA8D, 0x6871, 0x6A91, 0x9781, 0x585D, 0x8181, 0x45B6, 0xB850, 0xA029, 0xE131, 0x4E72, 0x3064, ]; + +pub const SECP256R1_PRIME_CHUNKS: [i64; 16] = [ + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0000, 0xFFFF, 0xFFFF, +]; diff --git a/precompiles/arith_eq/src/arith_eq_generator.rs b/precompiles/arith_eq/src/arith_eq_generator.rs index faf67ca13..b45a0fc8f 100644 --- a/precompiles/arith_eq/src/arith_eq_generator.rs +++ b/precompiles/arith_eq/src/arith_eq_generator.rs @@ -296,4 +296,75 @@ fn main() { let pil_file = pil_code_path.join("bn254_complex_mul_y3.pil"); eq.generate_pil_code_to_file("eq_bn254_complex_mul_y3", pil_file.to_str().unwrap()); + + // SECP256R1 + + // s - different points + + let mut eq = Equation::new(&config); + eq.parse( + "s*x2-s*x1-y2+y1-p*q0+p*offset", + &[ + ("p", "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"), + ("offset", "0x20000000000000000000000000000000000000000000000000000000000000000"), + ], + ); + + let rust_file = rust_code_path.join("secp256r1_add.rs"); + eq.generate_rust_code_to_file("Secp256r1Add", "x1,y1,x2,y2,s,q0", rust_file.to_str().unwrap()); + + let pil_file = pil_code_path.join("secp256r1_add.pil"); + eq.generate_pil_code_to_file("eq_secp256r1_add", pil_file.to_str().unwrap()); + + // s - duplicate points + + let mut eq = Equation::new(&config); + eq.parse( + "2*s*y1-3*x1*x1-a+p*q0-p*offset", + &[ + ("a", "0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc"), + ("p", "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"), + ("offset", "0x40000000000000000000000000000000000000000000000000000000000000000"), + ], + ); + + let rust_file = rust_code_path.join("secp256r1_dbl.rs"); + eq.generate_rust_code_to_file("Secp256r1Dbl", "x1,y1,s,q0", rust_file.to_str().unwrap()); + + let pil_file = pil_code_path.join("secp256r1_dbl.pil"); + eq.generate_pil_code_to_file("eq_secp256r1_dbl", pil_file.to_str().unwrap()); + + // x3 + + let mut eq = Equation::new(&config); + eq.parse( + "s*s-x1-x2-x3-p*q1+p*offset", + &[ + ("p", "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"), + ("offset", "0x4"), + ], + ); + + let rust_file = rust_code_path.join("secp256r1_x3.rs"); + eq.generate_rust_code_to_file("Secp256r1X3", "x1,x2,x3,s,q1", rust_file.to_str().unwrap()); + + let pil_file = pil_code_path.join("secp256r1_x3.pil"); + eq.generate_pil_code_to_file("eq_secp256r1_x3", pil_file.to_str().unwrap()); + + // y3 + + let mut eq = Equation::new(&config); + eq.parse( + "s*x1-s*x3-y1-y3+p*q2-p*offset", + &[ + ("p", "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"), + ("offset", "0x20000000000000000000000000000000000000000000000000000000000000000"), + ], + ); + + let rust_file = rust_code_path.join("secp256r1_y3.rs"); + eq.generate_rust_code_to_file("Secp256r1Y3", "x1,y1,x3,y3,s,q2", rust_file.to_str().unwrap()); + + let pil_file = pil_code_path.join("secp256r1_y3.pil"); + eq.generate_pil_code_to_file("eq_secp256r1_y3", pil_file.to_str().unwrap()); } diff --git a/precompiles/arith_eq/src/arith_eq_input.rs b/precompiles/arith_eq/src/arith_eq_input.rs index 648f43fb8..568151aa8 100644 --- a/precompiles/arith_eq/src/arith_eq_input.rs +++ b/precompiles/arith_eq/src/arith_eq_input.rs @@ -2,6 +2,7 @@ use zisk_common::{ OperationArith256Data, OperationArith256ModData, OperationBn254ComplexAddData, OperationBn254ComplexMulData, OperationBn254ComplexSubData, OperationBn254CurveAddData, OperationBn254CurveDblData, OperationSecp256k1AddData, OperationSecp256k1DblData, + OperationSecp256r1AddData, OperationSecp256r1DblData, }; #[derive(Debug)] @@ -15,6 +16,8 @@ pub enum ArithEqInput { Bn254ComplexAdd(Bn254ComplexAddInput), Bn254ComplexSub(Bn254ComplexSubInput), Bn254ComplexMul(Bn254ComplexMulInput), + Secp256r1Add(Secp256r1AddInput), + Secp256r1Dbl(Secp256r1DblInput), } #[derive(Debug)] @@ -35,15 +38,15 @@ impl Arith256Input { pub fn from(values: &OperationArith256Data) -> Self { Self { addr: values[3] as u32, - a_addr: values[4] as u32, - b_addr: values[5] as u32, - c_addr: values[6] as u32, - dl_addr: values[7] as u32, - dh_addr: values[8] as u32, - step: values[2], - a: values[9..13].try_into().unwrap(), - b: values[13..17].try_into().unwrap(), - c: values[17..21].try_into().unwrap(), + a_addr: values[5] as u32, + b_addr: values[6] as u32, + c_addr: values[7] as u32, + dl_addr: values[8] as u32, + dh_addr: values[9] as u32, + step: values[4], + a: values[10..14].try_into().unwrap(), + b: values[14..18].try_into().unwrap(), + c: values[18..22].try_into().unwrap(), } } } @@ -67,16 +70,16 @@ impl Arith256ModInput { pub fn from(values: &OperationArith256ModData) -> Self { Self { addr: values[3] as u32, - a_addr: values[4] as u32, - b_addr: values[5] as u32, - c_addr: values[6] as u32, - module_addr: values[7] as u32, - d_addr: values[8] as u32, - step: values[2], - a: values[9..13].try_into().unwrap(), - b: values[13..17].try_into().unwrap(), - c: values[17..21].try_into().unwrap(), - module: values[21..25].try_into().unwrap(), + a_addr: values[5] as u32, + b_addr: values[6] as u32, + c_addr: values[7] as u32, + module_addr: values[8] as u32, + d_addr: values[9] as u32, + step: values[4], + a: values[10..14].try_into().unwrap(), + b: values[14..18].try_into().unwrap(), + c: values[18..22].try_into().unwrap(), + module: values[22..26].try_into().unwrap(), } } } @@ -95,11 +98,11 @@ impl Secp256k1AddInput { pub fn from(values: &OperationSecp256k1AddData) -> Self { Self { addr: values[3] as u32, - p1_addr: values[4] as u32, - p2_addr: values[5] as u32, - step: values[2], - p1: values[6..14].try_into().unwrap(), - p2: values[14..22].try_into().unwrap(), + p1_addr: values[5] as u32, + p2_addr: values[6] as u32, + step: values[4], + p1: values[7..15].try_into().unwrap(), + p2: values[15..23].try_into().unwrap(), } } } @@ -113,7 +116,7 @@ pub struct Secp256k1DblInput { impl Secp256k1DblInput { pub fn from(values: &OperationSecp256k1DblData) -> Self { - Self { addr: values[3] as u32, step: values[2], p1: values[4..12].try_into().unwrap() } + Self { addr: values[3] as u32, step: values[4], p1: values[5..13].try_into().unwrap() } } } @@ -131,11 +134,11 @@ impl Bn254CurveAddInput { pub fn from(values: &OperationBn254CurveAddData) -> Self { Self { addr: values[3] as u32, - p1_addr: values[4] as u32, - p2_addr: values[5] as u32, - step: values[2], - p1: values[6..14].try_into().unwrap(), - p2: values[14..22].try_into().unwrap(), + p1_addr: values[5] as u32, + p2_addr: values[6] as u32, + step: values[4], + p1: values[7..15].try_into().unwrap(), + p2: values[15..23].try_into().unwrap(), } } } @@ -149,7 +152,7 @@ pub struct Bn254CurveDblInput { impl Bn254CurveDblInput { pub fn from(values: &OperationBn254CurveDblData) -> Self { - Self { addr: values[3] as u32, step: values[2], p1: values[4..12].try_into().unwrap() } + Self { addr: values[3] as u32, step: values[4], p1: values[5..13].try_into().unwrap() } } } @@ -167,11 +170,11 @@ impl Bn254ComplexAddInput { pub fn from(values: &OperationBn254ComplexAddData) -> Self { Self { addr: values[3] as u32, - f1_addr: values[4] as u32, - f2_addr: values[5] as u32, - step: values[2], - f1: values[6..14].try_into().unwrap(), - f2: values[14..22].try_into().unwrap(), + f1_addr: values[5] as u32, + f2_addr: values[6] as u32, + step: values[4], + f1: values[7..15].try_into().unwrap(), + f2: values[15..23].try_into().unwrap(), } } } @@ -190,11 +193,11 @@ impl Bn254ComplexSubInput { pub fn from(values: &OperationBn254ComplexSubData) -> Self { Self { addr: values[3] as u32, - f1_addr: values[4] as u32, - f2_addr: values[5] as u32, - step: values[2], - f1: values[6..14].try_into().unwrap(), - f2: values[14..22].try_into().unwrap(), + f1_addr: values[5] as u32, + f2_addr: values[6] as u32, + step: values[4], + f1: values[7..15].try_into().unwrap(), + f2: values[15..23].try_into().unwrap(), } } } @@ -213,11 +216,47 @@ impl Bn254ComplexMulInput { pub fn from(values: &OperationBn254ComplexMulData) -> Self { Self { addr: values[3] as u32, - f1_addr: values[4] as u32, - f2_addr: values[5] as u32, - step: values[2], - f1: values[6..14].try_into().unwrap(), - f2: values[14..22].try_into().unwrap(), + f1_addr: values[5] as u32, + f2_addr: values[6] as u32, + step: values[4], + f1: values[7..15].try_into().unwrap(), + f2: values[15..23].try_into().unwrap(), } } } + +#[derive(Debug)] +pub struct Secp256r1AddInput { + pub addr: u32, + pub p1_addr: u32, + pub p2_addr: u32, + pub step: u64, + pub p1: [u64; 8], + pub p2: [u64; 8], +} + +impl Secp256r1AddInput { + pub fn from(values: &OperationSecp256r1AddData) -> Self { + Self { + addr: values[3] as u32, + p1_addr: values[5] as u32, + p2_addr: values[6] as u32, + step: values[4], + p1: values[7..15].try_into().unwrap(), + p2: values[15..23].try_into().unwrap(), + } + } +} + +#[derive(Debug)] +pub struct Secp256r1DblInput { + pub addr: u32, + pub step: u64, + pub p1: [u64; 8], +} + +impl Secp256r1DblInput { + pub fn from(values: &OperationSecp256r1DblData) -> Self { + Self { addr: values[3] as u32, step: values[4], p1: values[5..13].try_into().unwrap() } + } +} diff --git a/precompiles/arith_eq/src/arith_eq_instance.rs b/precompiles/arith_eq/src/arith_eq_instance.rs index 6d15e9800..fac251ddd 100644 --- a/precompiles/arith_eq/src/arith_eq_instance.rs +++ b/precompiles/arith_eq/src/arith_eq_instance.rs @@ -7,18 +7,17 @@ use crate::{ Arith256Input, Arith256ModInput, ArithEqInput, ArithEqSM, Bn254ComplexAddInput, Bn254ComplexMulInput, Bn254ComplexSubInput, Bn254CurveAddInput, Bn254CurveDblInput, - Secp256k1AddInput, Secp256k1DblInput, + Secp256k1AddInput, Secp256k1DblInput, Secp256r1AddInput, Secp256r1DblInput, }; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; -use std::collections::VecDeque; use std::{any::Any, collections::HashMap, sync::Arc}; use zisk_common::ChunkId; +use zisk_common::StatsType; use zisk_common::{ BusDevice, BusId, CheckPoint, CollectSkipper, ExtOperationData, Instance, InstanceCtx, - InstanceType, MemCollectorInfo, OperationBusData, PayloadType, OPERATION_BUS_ID, + InstanceType, OperationBusData, PayloadType, OPERATION_BUS_ID, }; - use zisk_core::ZiskOperationType; use zisk_pil::ArithEqTrace; @@ -120,6 +119,10 @@ impl Instance for ArithEqInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { let (num_ops, collect_skipper) = self.collect_info[&chunk_id]; Some(Box::new(ArithEqCollector::new(num_ops, collect_skipper))) @@ -159,9 +162,7 @@ impl ArithEqCollector { collect_skipper, } } -} -impl BusDevice for ArithEqCollector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -173,13 +174,7 @@ impl BusDevice for ArithEqCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[PayloadType], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[PayloadType]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); if self.inputs.len() == self.num_operations as usize { @@ -225,21 +220,21 @@ impl BusDevice for ArithEqCollector { ExtOperationData::OperationBn254ComplexMulData(bus_data) => { ArithEqInput::Bn254ComplexMul(Bn254ComplexMulInput::from(&bus_data)) } + ExtOperationData::OperationSecp256r1AddData(bus_data) => { + ArithEqInput::Secp256r1Add(Secp256r1AddInput::from(&bus_data)) + } + ExtOperationData::OperationSecp256r1DblData(bus_data) => { + ArithEqInput::Secp256r1Dbl(Secp256r1DblInput::from(&bus_data)) + } // Add here new operations _ => panic!("Expected ExtOperationData::OperationData"), }); self.inputs.len() < self.num_operations as usize } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for ArithEqCollector { fn as_any(self: Box) -> Box { self } diff --git a/precompiles/arith_eq/src/arith_eq_manager.rs b/precompiles/arith_eq/src/arith_eq_manager.rs index 2d8835a68..86bc95250 100644 --- a/precompiles/arith_eq/src/arith_eq_manager.rs +++ b/precompiles/arith_eq/src/arith_eq_manager.rs @@ -2,11 +2,8 @@ use std::sync::Arc; use fields::PrimeField64; use pil_std_lib::Std; -use zisk_common::{BusDevice, PayloadType}; -use zisk_common::{ - BusDeviceMetrics, BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner, -}; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; use zisk_core::ZiskOperationType; use zisk_pil::ArithEqTrace; @@ -31,8 +28,11 @@ impl ArithEqManager { Arc::new(Self { arith_eq_sm }) } - pub fn build_arith_eq_counter(&self) -> ArithEqCounterInputGen { - ArithEqCounterInputGen::new(BusDeviceMode::Counter) + pub fn build_arith_eq_counter(&self, asm_execution: bool) -> ArithEqCounterInputGen { + match asm_execution { + true => ArithEqCounterInputGen::new(BusDeviceMode::CounterAsm), + false => ArithEqCounterInputGen::new(BusDeviceMode::Counter), + } } pub fn build_arith_eq_input_generator(&self) -> ArithEqCounterInputGen { @@ -41,14 +41,6 @@ impl ArithEqManager { } impl ComponentBuilder for ArithEqManager { - /// Builds and returns a new counter for monitoring arith256 operations. - /// - /// # Returns - /// A boxed implementation of `RegularCounters` configured for arith256 operations. - fn build_counter(&self) -> Option> { - Some(Box::new(ArithEqCounterInputGen::new(BusDeviceMode::Counter))) - } - /// Builds a planner to plan arith256-related instances. /// /// # Returns @@ -86,8 +78,4 @@ impl ComponentBuilder for ArithEqManager { } } } - - fn build_inputs_generator(&self) -> Option>> { - Some(Box::new(ArithEqCounterInputGen::new(BusDeviceMode::InputGenerator))) - } } diff --git a/precompiles/arith_eq/src/arith_eq_test_bigint.rs b/precompiles/arith_eq/src/arith_eq_test_bigint.rs index 9b24bfe67..e24be9f27 100644 --- a/precompiles/arith_eq/src/arith_eq_test_bigint.rs +++ b/precompiles/arith_eq/src/arith_eq_test_bigint.rs @@ -1,13 +1,15 @@ mod test_data; use test_data::{ get_arith256_mod_test_data, get_arith256_test_data, get_bn254_curve_add_test_data, - get_secp256k1_add_test_data, get_secp256k1_dbl_test_data, + get_secp256k1_add_test_data, get_secp256k1_dbl_test_data, get_secp256r1_add_test_data, + get_secp256r1_dbl_test_data, }; mod equations; mod executors; use executors::{ arith256::Arith256, arith256_mod::Arith256Mod, bn254_curve::Bn254Curve, secp256k1::Secp256k1, + secp256r1::Secp256r1, }; // cargo run --release --features="test_data" --bin arith_eq_test_bigint @@ -63,4 +65,23 @@ fn main() { // Bn254Curve::verify_add(&p1, &p2, &p3); // index += 1; // } + + index = 0; + while let Some((p1, p2, p3)) = get_secp256r1_add_test_data(index) { + println!("testing index secp256r1_add #{} ....", index); + if verbose { + println!("SECP256R1_ADD\n p1: {:?},\n p2: {:?},\n p3: {:?}", p1, p2, p3); + } + Secp256r1::verify_add(&p1, &p2, &p3); + index += 1; + } + index = 0; + while let Some((p1, p3)) = get_secp256r1_dbl_test_data(index) { + println!("testing index secp256r1_dbl #{} ....", index); + if verbose { + println!("SECP256R1_DBL\n p1: {:?},\n p3: {:?}", p1, p3); + } + Secp256r1::verify_dbl(&p1, &p3); + index += 1; + } } diff --git a/precompiles/arith_eq/src/arith_eq_test_generator.rs b/precompiles/arith_eq/src/arith_eq_test_generator.rs index 422a29f68..763e1cc1c 100644 --- a/precompiles/arith_eq/src/arith_eq_test_generator.rs +++ b/precompiles/arith_eq/src/arith_eq_test_generator.rs @@ -4,24 +4,19 @@ use test_data::{ get_arith256_mod_test_data, get_arith256_test_data, get_bn254_complex_add_test_data, get_bn254_complex_mul_test_data, get_bn254_complex_sub_test_data, get_bn254_curve_add_test_data, get_bn254_curve_dbl_test_data, get_secp256k1_add_test_data, - get_secp256k1_dbl_test_data, + get_secp256k1_dbl_test_data, get_secp256r1_add_test_data, get_secp256r1_dbl_test_data, }; mod arith_eq_constants; use arith_eq_constants::ARITH_EQ_ROWS_BY_OP; -// cargo run --release --features="test_data" --bin arith_eq_test_generator +// cargo run --release --features="test_data" --bin arith_eq_test_generator > ../zisk-testvectors/zisk-programs/arith_eq_gen/program/src/main.rs fn main() { let mut code = String::new(); code += "#![no_main]\n"; - code += "#![cfg(all(target_os = \"zkvm\", target_vendor = \"zisk\"))]\n"; code += "ziskos::entrypoint!(main);\n\n"; - code += "use ziskos::{\n"; - code += "\tarith256::*, arith256_mod::*, bn254_complex_add::*, bn254_complex_mul::*, bn254_complex_sub::*,\n"; - code += "\tbn254_curve_add::*, bn254_curve_dbl::*, complex::*, point::*, secp256k1_add::*,\n"; - code += "\tsecp256k1_dbl::*,\n"; - code += "};\n\n"; + code += "use zisk_definitions::*;\n\n"; code += "fn main() {\n"; code += "\tlet mut a: [u64;4] = [0,0,0,0];\n"; code += "\tlet mut b: [u64;4] = [0,0,0,0];\n"; @@ -139,6 +134,59 @@ fn main() { index += 1; } + code += "\tlet mut params = SyscallSecp256r1AddParams { p1: &mut p1, p2: &p2 };\n"; + + let initial_index = index; + while let Some((p1, p2, p3)) = get_secp256r1_add_test_data(index - initial_index) { + code += &format!( + "\t// secp256r1_add test rows: {}-{}\n\n", + index * ARITH_EQ_ROWS_BY_OP, + (index + 1) * ARITH_EQ_ROWS_BY_OP - 1 + ); + let p1_x: [u64; 4] = p1[0..4].try_into().unwrap(); + let p1_y: [u64; 4] = p1[4..8].try_into().unwrap(); + code += &format!( + "\tlet mut p1 = SyscallPoint256 {{\n\t\tx: {p1_x:?},\n\t\ty: {p1_y:?}\n\t}};\n" + ); + let p2_x: [u64; 4] = p2[0..4].try_into().unwrap(); + let p2_y: [u64; 4] = p2[4..8].try_into().unwrap(); + code += + &format!("\tlet p2 = SyscallPoint256 {{\n\t\tx: {p2_x:?},\n\t\ty: {p2_y:?}\n\t}};\n"); + code += "\tparams.p1 = &mut p1;\n"; + code += "\tparams.p2 = &p2;\n"; + code += "\tsyscall_secp256r1_add(&mut params);\n"; + + let p3_x: [u64; 4] = p3[0..4].try_into().unwrap(); + let p3_y: [u64; 4] = p3[4..8].try_into().unwrap(); + code += + &format!("\tlet p3 = SyscallPoint256 {{\n\t\tx: {p3_x:?},\n\t\ty: {p3_y:?}\n\t}};\n"); + code += "\tassert_eq!(params.p1.x, p3.x);\n"; + code += "\tassert_eq!(params.p1.y, p3.y);\n\n"; + index += 1; + } + + let initial_index = index; + while let Some((p1, p3)) = get_secp256r1_dbl_test_data(index - initial_index) { + code += &format!( + "\t// secp256r1_dbl test rows: {}-{}\n\n", + index * ARITH_EQ_ROWS_BY_OP, + (index + 1) * ARITH_EQ_ROWS_BY_OP - 1 + ); + let p1_x: [u64; 4] = p1[0..4].try_into().unwrap(); + let p1_y: [u64; 4] = p1[4..8].try_into().unwrap(); + code += &format!( + "\tlet mut p1 = SyscallPoint256 {{\n\t\tx: {p1_x:?},\n\t\ty: {p1_y:?}\n\t}};\n" + ); + code += "\tsyscall_secp256r1_dbl(&mut p1);\n"; + let p3_x: [u64; 4] = p3[0..4].try_into().unwrap(); + let p3_y: [u64; 4] = p3[4..8].try_into().unwrap(); + code += + &format!("\tlet p3 = SyscallPoint256 {{\n\t\tx: {p3_x:?},\n\t\ty: {p3_y:?}\n\t}};\n"); + code += "\tassert_eq!(&p1.x, &p3.x);\n"; + code += "\tassert_eq!(&p1.y, &p3.y);\n\n"; + index += 1; + } + code += "\tlet mut params = SyscallBn254CurveAddParams { p1: &mut p1, p2: &p2 };\n"; let initial_index = index; while let Some((p1, p2, p3)) = get_bn254_curve_add_test_data(index - initial_index) { diff --git a/precompiles/arith_eq/src/arith_eq_test_secp256r1.rs b/precompiles/arith_eq/src/arith_eq_test_secp256r1.rs new file mode 100644 index 000000000..e34981fd5 --- /dev/null +++ b/precompiles/arith_eq/src/arith_eq_test_secp256r1.rs @@ -0,0 +1,67 @@ +use ark_ff::BigInt; +use ark_secp256r1::Fq as Secp256r1Field; +use ark_std::{One, Zero}; +use std::time::Instant; +#[cfg(any(feature = "test_data", feature = "test_data_secp256r1"))] +mod test_data; +#[cfg(any(feature = "test_data", feature = "test_data_secp256r1"))] +use precompiles_helpers::{secp256r1_add, secp256r1_dbl}; +#[cfg(any(feature = "test_data", feature = "test_data_secp256r1"))] +use test_data::{get_secp256r1_add_test_data, get_secp256r1_dbl_test_data}; + +fn verify_secp256r1_add(test_id: usize, p1: &[u64; 8], p2: &[u64; 8], p: &mut [u64; 8]) { + let mut _p = [0u64; 8]; + secp256r1_add(p1, p2, &mut _p); + assert_eq!(&p[..], &_p[..8], "fail test {}", test_id); + println!("Test #{} (secp256r1_add) .... [\x1B[32mOK\x1B[0m]", test_id) +} + +fn verify_secp256r1_dbl(test_id: usize, p1: &[u64; 8], p: &mut [u64; 8]) { + let mut _p = [0u64; 8]; + secp256r1_dbl(p1, &mut _p); + assert_eq!(&p[..], &_p[..8], "fail test {}", test_id); + println!("Test #{} (secp256r1_dbl) .... [\x1B[32mOK\x1B[0m]", test_id) +} + +#[cfg(any(feature = "test_data", feature = "test_data_secp256r1"))] +fn test() { + let mut index = 0; + while let Some((p1, p2, mut p3)) = get_secp256r1_add_test_data(index) { + verify_secp256r1_add(index, &p1, &p2, &mut p3); + index += 1; + } + index = 0; + while let Some((p1, mut p3)) = get_secp256r1_dbl_test_data(index) { + verify_secp256r1_dbl(index, &p1, &mut p3); + index += 1; + } + + // Run the first test a million times to measure performance + if let Some((p1, p2, mut p3)) = get_secp256r1_add_test_data(0) { + let start = Instant::now(); + for _ in 0..1000000 { + secp256r1_add(&p1, &p2, &mut p3); + } + let duration = start.elapsed(); + let secs = duration.as_secs_f64(); + let tp = if secs == 0.0 { 1_f64 } else { 1_f64 / secs }; + println!("Duration = {:.4} sec, TP = {:.4} M/sec", secs, tp); + } +} + +fn main() { + let arr = BigInt::<4>([ + 0xfffffffffffffffe, + 0x00000000ffffffff, + 0x0000000000000000, + 0xffffffff00000001, + ]); + + let element = Secp256r1Field::from(arr); + println!("Element: {:?}", element); + let one = Secp256r1Field::one(); + let zero = Secp256r1Field::zero(); + let sum = zero - one; + println!("0-1: {:?}", sum); + test(); +} diff --git a/precompiles/arith_eq/src/equations/mod.rs b/precompiles/arith_eq/src/equations/mod.rs index 9e6e56faf..e97a10437 100644 --- a/precompiles/arith_eq/src/equations/mod.rs +++ b/precompiles/arith_eq/src/equations/mod.rs @@ -14,6 +14,10 @@ mod secp256k1_add; mod secp256k1_dbl; mod secp256k1_x3; mod secp256k1_y3; +mod secp256r1_add; +mod secp256r1_dbl; +mod secp256r1_x3; +mod secp256r1_y3; pub use arith256::*; pub use arith256_mod::*; @@ -31,3 +35,7 @@ pub use secp256k1_add::*; pub use secp256k1_dbl::*; pub use secp256k1_x3::*; pub use secp256k1_y3::*; +pub use secp256r1_add::*; +pub use secp256r1_dbl::*; +pub use secp256r1_x3::*; +pub use secp256r1_y3::*; diff --git a/precompiles/arith_eq/src/equations/secp256r1_add.rs b/precompiles/arith_eq/src/equations/secp256r1_add.rs new file mode 100644 index 000000000..f53775ac8 --- /dev/null +++ b/precompiles/arith_eq/src/equations/secp256r1_add.rs @@ -0,0 +1,755 @@ +// code generated +// +// equation: s*x2-s*x1-y2+y1-p*q0+p*offset +// +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x20000000000000000000000000000000000000000000000000000000000000000 +// (p*offset): 0x1FFFFFFFE00000002000000000000000000000001FFFFFFFFFFFFFFFFFFFFFFFE0000000000000000000000000000000000000000000000000000000000000000 +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + +pub struct Secp256r1Add {} + +impl Secp256r1Add { + #[allow(clippy::too_many_arguments)] + pub fn calculate( + icol: u8, + x1: &[i64; 16], + y1: &[i64; 16], + x2: &[i64; 16], + y2: &[i64; 16], + s: &[i64; 16], + q0: &[i64; 16], + ) -> i64 { + match icol { + 0 => s[0] * x2[0] - s[0] * x1[0] - y2[0] + y1[0] - 0xFFFF * q0[0], + 1 => { + s[1] * x2[0] + s[0] * x2[1] - s[1] * x1[0] - s[0] * x1[1] - y2[1] + y1[1] + - 0xFFFF * q0[0] + - 0xFFFF * q0[1] + } + 2 => { + s[2] * x2[0] + s[1] * x2[1] + s[0] * x2[2] + - s[2] * x1[0] + - s[1] * x1[1] + - s[0] * x1[2] + - y2[2] + + y1[2] + - 0xFFFF * q0[0] + - 0xFFFF * q0[1] + - 0xFFFF * q0[2] + } + 3 => { + s[3] * x2[0] + s[2] * x2[1] + s[1] * x2[2] + s[0] * x2[3] + - s[3] * x1[0] + - s[2] * x1[1] + - s[1] * x1[2] + - s[0] * x1[3] + - y2[3] + + y1[3] + - 0xFFFF * q0[0] + - 0xFFFF * q0[1] + - 0xFFFF * q0[2] + - 0xFFFF * q0[3] + } + 4 => { + s[4] * x2[0] + s[3] * x2[1] + s[2] * x2[2] + s[1] * x2[3] + s[0] * x2[4] + - s[4] * x1[0] + - s[3] * x1[1] + - s[2] * x1[2] + - s[1] * x1[3] + - s[0] * x1[4] + - y2[4] + + y1[4] + - 0xFFFF * q0[0] + - 0xFFFF * q0[1] + - 0xFFFF * q0[2] + - 0xFFFF * q0[3] + - 0xFFFF * q0[4] + } + 5 => { + s[5] * x2[0] + + s[4] * x2[1] + + s[3] * x2[2] + + s[2] * x2[3] + + s[1] * x2[4] + + s[0] * x2[5] + - s[5] * x1[0] + - s[4] * x1[1] + - s[3] * x1[2] + - s[2] * x1[3] + - s[1] * x1[4] + - s[0] * x1[5] + - y2[5] + + y1[5] + - 0xFFFF * q0[0] + - 0xFFFF * q0[1] + - 0xFFFF * q0[2] + - 0xFFFF * q0[3] + - 0xFFFF * q0[4] + - 0xFFFF * q0[5] + } + 6 => { + s[6] * x2[0] + + s[5] * x2[1] + + s[4] * x2[2] + + s[3] * x2[3] + + s[2] * x2[4] + + s[1] * x2[5] + + s[0] * x2[6] + - s[6] * x1[0] + - s[5] * x1[1] + - s[4] * x1[2] + - s[3] * x1[3] + - s[2] * x1[4] + - s[1] * x1[5] + - s[0] * x1[6] + - y2[6] + + y1[6] + - 0xFFFF * q0[1] + - 0xFFFF * q0[2] + - 0xFFFF * q0[3] + - 0xFFFF * q0[4] + - 0xFFFF * q0[5] + - 0xFFFF * q0[6] + } + 7 => { + s[7] * x2[0] + + s[6] * x2[1] + + s[5] * x2[2] + + s[4] * x2[3] + + s[3] * x2[4] + + s[2] * x2[5] + + s[1] * x2[6] + + s[0] * x2[7] + - s[7] * x1[0] + - s[6] * x1[1] + - s[5] * x1[2] + - s[4] * x1[3] + - s[3] * x1[4] + - s[2] * x1[5] + - s[1] * x1[6] + - s[0] * x1[7] + - y2[7] + + y1[7] + - 0xFFFF * q0[2] + - 0xFFFF * q0[3] + - 0xFFFF * q0[4] + - 0xFFFF * q0[5] + - 0xFFFF * q0[6] + - 0xFFFF * q0[7] + } + 8 => { + s[8] * x2[0] + + s[7] * x2[1] + + s[6] * x2[2] + + s[5] * x2[3] + + s[4] * x2[4] + + s[3] * x2[5] + + s[2] * x2[6] + + s[1] * x2[7] + + s[0] * x2[8] + - s[8] * x1[0] + - s[7] * x1[1] + - s[6] * x1[2] + - s[5] * x1[3] + - s[4] * x1[4] + - s[3] * x1[5] + - s[2] * x1[6] + - s[1] * x1[7] + - s[0] * x1[8] + - y2[8] + + y1[8] + - 0xFFFF * q0[3] + - 0xFFFF * q0[4] + - 0xFFFF * q0[5] + - 0xFFFF * q0[6] + - 0xFFFF * q0[7] + - 0xFFFF * q0[8] + } + 9 => { + s[9] * x2[0] + + s[8] * x2[1] + + s[7] * x2[2] + + s[6] * x2[3] + + s[5] * x2[4] + + s[4] * x2[5] + + s[3] * x2[6] + + s[2] * x2[7] + + s[1] * x2[8] + + s[0] * x2[9] + - s[9] * x1[0] + - s[8] * x1[1] + - s[7] * x1[2] + - s[6] * x1[3] + - s[5] * x1[4] + - s[4] * x1[5] + - s[3] * x1[6] + - s[2] * x1[7] + - s[1] * x1[8] + - s[0] * x1[9] + - y2[9] + + y1[9] + - 0xFFFF * q0[4] + - 0xFFFF * q0[5] + - 0xFFFF * q0[6] + - 0xFFFF * q0[7] + - 0xFFFF * q0[8] + - 0xFFFF * q0[9] + } + 10 => { + s[10] * x2[0] + + s[9] * x2[1] + + s[8] * x2[2] + + s[7] * x2[3] + + s[6] * x2[4] + + s[5] * x2[5] + + s[4] * x2[6] + + s[3] * x2[7] + + s[2] * x2[8] + + s[1] * x2[9] + + s[0] * x2[10] + - s[10] * x1[0] + - s[9] * x1[1] + - s[8] * x1[2] + - s[7] * x1[3] + - s[6] * x1[4] + - s[5] * x1[5] + - s[4] * x1[6] + - s[3] * x1[7] + - s[2] * x1[8] + - s[1] * x1[9] + - s[0] * x1[10] + - y2[10] + + y1[10] + - 0xFFFF * q0[5] + - 0xFFFF * q0[6] + - 0xFFFF * q0[7] + - 0xFFFF * q0[8] + - 0xFFFF * q0[9] + - 0xFFFF * q0[10] + } + 11 => { + s[11] * x2[0] + + s[10] * x2[1] + + s[9] * x2[2] + + s[8] * x2[3] + + s[7] * x2[4] + + s[6] * x2[5] + + s[5] * x2[6] + + s[4] * x2[7] + + s[3] * x2[8] + + s[2] * x2[9] + + s[1] * x2[10] + + s[0] * x2[11] + - s[11] * x1[0] + - s[10] * x1[1] + - s[9] * x1[2] + - s[8] * x1[3] + - s[7] * x1[4] + - s[6] * x1[5] + - s[5] * x1[6] + - s[4] * x1[7] + - s[3] * x1[8] + - s[2] * x1[9] + - s[1] * x1[10] + - s[0] * x1[11] + - y2[11] + + y1[11] + - 0xFFFF * q0[6] + - 0xFFFF * q0[7] + - 0xFFFF * q0[8] + - 0xFFFF * q0[9] + - 0xFFFF * q0[10] + - 0xFFFF * q0[11] + } + 12 => { + s[12] * x2[0] + + s[11] * x2[1] + + s[10] * x2[2] + + s[9] * x2[3] + + s[8] * x2[4] + + s[7] * x2[5] + + s[6] * x2[6] + + s[5] * x2[7] + + s[4] * x2[8] + + s[3] * x2[9] + + s[2] * x2[10] + + s[1] * x2[11] + + s[0] * x2[12] + - s[12] * x1[0] + - s[11] * x1[1] + - s[10] * x1[2] + - s[9] * x1[3] + - s[8] * x1[4] + - s[7] * x1[5] + - s[6] * x1[6] + - s[5] * x1[7] + - s[4] * x1[8] + - s[3] * x1[9] + - s[2] * x1[10] + - s[1] * x1[11] + - s[0] * x1[12] + - y2[12] + + y1[12] + - q0[0] + - 0xFFFF * q0[7] + - 0xFFFF * q0[8] + - 0xFFFF * q0[9] + - 0xFFFF * q0[10] + - 0xFFFF * q0[11] + - 0xFFFF * q0[12] + } + 13 => { + s[13] * x2[0] + + s[12] * x2[1] + + s[11] * x2[2] + + s[10] * x2[3] + + s[9] * x2[4] + + s[8] * x2[5] + + s[7] * x2[6] + + s[6] * x2[7] + + s[5] * x2[8] + + s[4] * x2[9] + + s[3] * x2[10] + + s[2] * x2[11] + + s[1] * x2[12] + + s[0] * x2[13] + - s[13] * x1[0] + - s[12] * x1[1] + - s[11] * x1[2] + - s[10] * x1[3] + - s[9] * x1[4] + - s[8] * x1[5] + - s[7] * x1[6] + - s[6] * x1[7] + - s[5] * x1[8] + - s[4] * x1[9] + - s[3] * x1[10] + - s[2] * x1[11] + - s[1] * x1[12] + - s[0] * x1[13] + - y2[13] + + y1[13] + - q0[1] + - 0xFFFF * q0[8] + - 0xFFFF * q0[9] + - 0xFFFF * q0[10] + - 0xFFFF * q0[11] + - 0xFFFF * q0[12] + - 0xFFFF * q0[13] + } + 14 => { + s[14] * x2[0] + + s[13] * x2[1] + + s[12] * x2[2] + + s[11] * x2[3] + + s[10] * x2[4] + + s[9] * x2[5] + + s[8] * x2[6] + + s[7] * x2[7] + + s[6] * x2[8] + + s[5] * x2[9] + + s[4] * x2[10] + + s[3] * x2[11] + + s[2] * x2[12] + + s[1] * x2[13] + + s[0] * x2[14] + - s[14] * x1[0] + - s[13] * x1[1] + - s[12] * x1[2] + - s[11] * x1[3] + - s[10] * x1[4] + - s[9] * x1[5] + - s[8] * x1[6] + - s[7] * x1[7] + - s[6] * x1[8] + - s[5] * x1[9] + - s[4] * x1[10] + - s[3] * x1[11] + - s[2] * x1[12] + - s[1] * x1[13] + - s[0] * x1[14] + - y2[14] + + y1[14] + - 0xFFFF * q0[0] + - q0[2] + - 0xFFFF * q0[9] + - 0xFFFF * q0[10] + - 0xFFFF * q0[11] + - 0xFFFF * q0[12] + - 0xFFFF * q0[13] + - 0xFFFF * q0[14] + } + 15 => { + s[15] * x2[0] + + s[14] * x2[1] + + s[13] * x2[2] + + s[12] * x2[3] + + s[11] * x2[4] + + s[10] * x2[5] + + s[9] * x2[6] + + s[8] * x2[7] + + s[7] * x2[8] + + s[6] * x2[9] + + s[5] * x2[10] + + s[4] * x2[11] + + s[3] * x2[12] + + s[2] * x2[13] + + s[1] * x2[14] + + s[0] * x2[15] + - s[15] * x1[0] + - s[14] * x1[1] + - s[13] * x1[2] + - s[12] * x1[3] + - s[11] * x1[4] + - s[10] * x1[5] + - s[9] * x1[6] + - s[8] * x1[7] + - s[7] * x1[8] + - s[6] * x1[9] + - s[5] * x1[10] + - s[4] * x1[11] + - s[3] * x1[12] + - s[2] * x1[13] + - s[1] * x1[14] + - s[0] * x1[15] + - y2[15] + + y1[15] + - 0xFFFF * q0[0] + - 0xFFFF * q0[1] + - q0[3] + - 0xFFFF * q0[10] + - 0xFFFF * q0[11] + - 0xFFFF * q0[12] + - 0xFFFF * q0[13] + - 0xFFFF * q0[14] + - 0xFFFF * q0[15] + } + 16 => { + s[15] * x2[1] + + s[14] * x2[2] + + s[13] * x2[3] + + s[12] * x2[4] + + s[11] * x2[5] + + s[10] * x2[6] + + s[9] * x2[7] + + s[8] * x2[8] + + s[7] * x2[9] + + s[6] * x2[10] + + s[5] * x2[11] + + s[4] * x2[12] + + s[3] * x2[13] + + s[2] * x2[14] + + s[1] * x2[15] + - s[15] * x1[1] + - s[14] * x1[2] + - s[13] * x1[3] + - s[12] * x1[4] + - s[11] * x1[5] + - s[10] * x1[6] + - s[9] * x1[7] + - s[8] * x1[8] + - s[7] * x1[9] + - s[6] * x1[10] + - s[5] * x1[11] + - s[4] * x1[12] + - s[3] * x1[13] + - s[2] * x1[14] + - s[1] * x1[15] + - 0xFFFF * q0[1] + - 0xFFFF * q0[2] + - q0[4] + - 0xFFFF * q0[11] + - 0xFFFF * q0[12] + - 0xFFFF * q0[13] + - 0xFFFF * q0[14] + - 0xFFFF * q0[15] + + 0xFFFE + } + 17 => { + s[15] * x2[2] + + s[14] * x2[3] + + s[13] * x2[4] + + s[12] * x2[5] + + s[11] * x2[6] + + s[10] * x2[7] + + s[9] * x2[8] + + s[8] * x2[9] + + s[7] * x2[10] + + s[6] * x2[11] + + s[5] * x2[12] + + s[4] * x2[13] + + s[3] * x2[14] + + s[2] * x2[15] + - s[15] * x1[2] + - s[14] * x1[3] + - s[13] * x1[4] + - s[12] * x1[5] + - s[11] * x1[6] + - s[10] * x1[7] + - s[9] * x1[8] + - s[8] * x1[9] + - s[7] * x1[10] + - s[6] * x1[11] + - s[5] * x1[12] + - s[4] * x1[13] + - s[3] * x1[14] + - s[2] * x1[15] + - 0xFFFF * q0[2] + - 0xFFFF * q0[3] + - q0[5] + - 0xFFFF * q0[12] + - 0xFFFF * q0[13] + - 0xFFFF * q0[14] + - 0xFFFF * q0[15] + + 0xFFFF + } + 18 => { + s[15] * x2[3] + + s[14] * x2[4] + + s[13] * x2[5] + + s[12] * x2[6] + + s[11] * x2[7] + + s[10] * x2[8] + + s[9] * x2[9] + + s[8] * x2[10] + + s[7] * x2[11] + + s[6] * x2[12] + + s[5] * x2[13] + + s[4] * x2[14] + + s[3] * x2[15] + - s[15] * x1[3] + - s[14] * x1[4] + - s[13] * x1[5] + - s[12] * x1[6] + - s[11] * x1[7] + - s[10] * x1[8] + - s[9] * x1[9] + - s[8] * x1[10] + - s[7] * x1[11] + - s[6] * x1[12] + - s[5] * x1[13] + - s[4] * x1[14] + - s[3] * x1[15] + - 0xFFFF * q0[3] + - 0xFFFF * q0[4] + - q0[6] + - 0xFFFF * q0[13] + - 0xFFFF * q0[14] + - 0xFFFF * q0[15] + + 0xFFFF + } + 19 => { + s[15] * x2[4] + + s[14] * x2[5] + + s[13] * x2[6] + + s[12] * x2[7] + + s[11] * x2[8] + + s[10] * x2[9] + + s[9] * x2[10] + + s[8] * x2[11] + + s[7] * x2[12] + + s[6] * x2[13] + + s[5] * x2[14] + + s[4] * x2[15] + - s[15] * x1[4] + - s[14] * x1[5] + - s[13] * x1[6] + - s[12] * x1[7] + - s[11] * x1[8] + - s[10] * x1[9] + - s[9] * x1[10] + - s[8] * x1[11] + - s[7] * x1[12] + - s[6] * x1[13] + - s[5] * x1[14] + - s[4] * x1[15] + - 0xFFFF * q0[4] + - 0xFFFF * q0[5] + - q0[7] + - 0xFFFF * q0[14] + - 0xFFFF * q0[15] + + 0xFFFF + } + 20 => { + s[15] * x2[5] + + s[14] * x2[6] + + s[13] * x2[7] + + s[12] * x2[8] + + s[11] * x2[9] + + s[10] * x2[10] + + s[9] * x2[11] + + s[8] * x2[12] + + s[7] * x2[13] + + s[6] * x2[14] + + s[5] * x2[15] + - s[15] * x1[5] + - s[14] * x1[6] + - s[13] * x1[7] + - s[12] * x1[8] + - s[11] * x1[9] + - s[10] * x1[10] + - s[9] * x1[11] + - s[8] * x1[12] + - s[7] * x1[13] + - s[6] * x1[14] + - s[5] * x1[15] + - 0xFFFF * q0[5] + - 0xFFFF * q0[6] + - q0[8] + - 0xFFFF * q0[15] + + 0xFFFF + } + 21 => { + s[15] * x2[6] + + s[14] * x2[7] + + s[13] * x2[8] + + s[12] * x2[9] + + s[11] * x2[10] + + s[10] * x2[11] + + s[9] * x2[12] + + s[8] * x2[13] + + s[7] * x2[14] + + s[6] * x2[15] + - s[15] * x1[6] + - s[14] * x1[7] + - s[13] * x1[8] + - s[12] * x1[9] + - s[11] * x1[10] + - s[10] * x1[11] + - s[9] * x1[12] + - s[8] * x1[13] + - s[7] * x1[14] + - s[6] * x1[15] + - 0xFFFF * q0[6] + - 0xFFFF * q0[7] + - q0[9] + + 0xFFFF + } + 22 => { + s[15] * x2[7] + + s[14] * x2[8] + + s[13] * x2[9] + + s[12] * x2[10] + + s[11] * x2[11] + + s[10] * x2[12] + + s[9] * x2[13] + + s[8] * x2[14] + + s[7] * x2[15] + - s[15] * x1[7] + - s[14] * x1[8] + - s[13] * x1[9] + - s[12] * x1[10] + - s[11] * x1[11] + - s[10] * x1[12] + - s[9] * x1[13] + - s[8] * x1[14] + - s[7] * x1[15] + - 0xFFFF * q0[7] + - 0xFFFF * q0[8] + - q0[10] + + 0x1 + } + 23 => { + s[15] * x2[8] + + s[14] * x2[9] + + s[13] * x2[10] + + s[12] * x2[11] + + s[11] * x2[12] + + s[10] * x2[13] + + s[9] * x2[14] + + s[8] * x2[15] + - s[15] * x1[8] + - s[14] * x1[9] + - s[13] * x1[10] + - s[12] * x1[11] + - s[11] * x1[12] + - s[10] * x1[13] + - s[9] * x1[14] + - s[8] * x1[15] + - 0xFFFF * q0[8] + - 0xFFFF * q0[9] + - q0[11] + } + 24 => { + s[15] * x2[9] + + s[14] * x2[10] + + s[13] * x2[11] + + s[12] * x2[12] + + s[11] * x2[13] + + s[10] * x2[14] + + s[9] * x2[15] + - s[15] * x1[9] + - s[14] * x1[10] + - s[13] * x1[11] + - s[12] * x1[12] + - s[11] * x1[13] + - s[10] * x1[14] + - s[9] * x1[15] + - 0xFFFF * q0[9] + - 0xFFFF * q0[10] + - q0[12] + } + 25 => { + s[15] * x2[10] + + s[14] * x2[11] + + s[13] * x2[12] + + s[12] * x2[13] + + s[11] * x2[14] + + s[10] * x2[15] + - s[15] * x1[10] + - s[14] * x1[11] + - s[13] * x1[12] + - s[12] * x1[13] + - s[11] * x1[14] + - s[10] * x1[15] + - 0xFFFF * q0[10] + - 0xFFFF * q0[11] + - q0[13] + } + 26 => { + s[15] * x2[11] + s[14] * x2[12] + s[13] * x2[13] + s[12] * x2[14] + s[11] * x2[15] + - s[15] * x1[11] + - s[14] * x1[12] + - s[13] * x1[13] + - s[12] * x1[14] + - s[11] * x1[15] + - 0xFFFF * q0[11] + - 0xFFFF * q0[12] + - q0[14] + } + 27 => { + s[15] * x2[12] + s[14] * x2[13] + s[13] * x2[14] + s[12] * x2[15] + - s[15] * x1[12] + - s[14] * x1[13] + - s[13] * x1[14] + - s[12] * x1[15] + - 0xFFFF * q0[12] + - 0xFFFF * q0[13] + - q0[15] + } + 28 => { + s[15] * x2[13] + s[14] * x2[14] + s[13] * x2[15] + - s[15] * x1[13] + - s[14] * x1[14] + - s[13] * x1[15] + - 0xFFFF * q0[13] + - 0xFFFF * q0[14] + + 0x2 + } + 29 => { + s[15] * x2[14] + s[14] * x2[15] + - s[15] * x1[14] + - s[14] * x1[15] + - 0xFFFF * q0[14] + - 0xFFFF * q0[15] + } + 30 => s[15] * x2[15] - s[15] * x1[15] - 0xFFFF * q0[15] + 0xFFFE, + 31 => 0x1FFFF, + _ => 0, + } + } +} diff --git a/precompiles/arith_eq/src/equations/secp256r1_dbl.rs b/precompiles/arith_eq/src/equations/secp256r1_dbl.rs new file mode 100644 index 000000000..65395b8c8 --- /dev/null +++ b/precompiles/arith_eq/src/equations/secp256r1_dbl.rs @@ -0,0 +1,741 @@ +// code generated +// +// equation: 2*s*y1-3*x1*x1-a+p*q0-p*offset +// +// a: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x40000000000000000000000000000000000000000000000000000000000000000 +// 2: 2 +// 3: 3 +// (p*offset): 0x3FFFFFFFC00000004000000000000000000000003FFFFFFFFFFFFFFFFFFFFFFFC0000000000000000000000000000000000000000000000000000000000000000 +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + +pub struct Secp256r1Dbl {} + +impl Secp256r1Dbl { + #[allow(clippy::too_many_arguments)] + pub fn calculate( + icol: u8, + x1: &[i64; 16], + y1: &[i64; 16], + s: &[i64; 16], + q0: &[i64; 16], + ) -> i64 { + match icol { + 0 => 2 * s[0] * y1[0] - 3 * x1[0] * x1[0] - 0xFFFC + 0xFFFF * q0[0], + 1 => { + 2 * s[1] * y1[0] + 2 * s[0] * y1[1] - 3 * x1[1] * x1[0] - 3 * x1[0] * x1[1] - 0xFFFF + + 0xFFFF * q0[0] + + 0xFFFF * q0[1] + } + 2 => { + 2 * s[2] * y1[0] + 2 * s[1] * y1[1] + 2 * s[0] * y1[2] + - 3 * x1[2] * x1[0] + - 3 * x1[1] * x1[1] + - 3 * x1[0] * x1[2] + - 0xFFFF + + 0xFFFF * q0[0] + + 0xFFFF * q0[1] + + 0xFFFF * q0[2] + } + 3 => { + 2 * s[3] * y1[0] + 2 * s[2] * y1[1] + 2 * s[1] * y1[2] + 2 * s[0] * y1[3] + - 3 * x1[3] * x1[0] + - 3 * x1[2] * x1[1] + - 3 * x1[1] * x1[2] + - 3 * x1[0] * x1[3] + - 0xFFFF + + 0xFFFF * q0[0] + + 0xFFFF * q0[1] + + 0xFFFF * q0[2] + + 0xFFFF * q0[3] + } + 4 => { + 2 * s[4] * y1[0] + + 2 * s[3] * y1[1] + + 2 * s[2] * y1[2] + + 2 * s[1] * y1[3] + + 2 * s[0] * y1[4] + - 3 * x1[4] * x1[0] + - 3 * x1[3] * x1[1] + - 3 * x1[2] * x1[2] + - 3 * x1[1] * x1[3] + - 3 * x1[0] * x1[4] + - 0xFFFF + + 0xFFFF * q0[0] + + 0xFFFF * q0[1] + + 0xFFFF * q0[2] + + 0xFFFF * q0[3] + + 0xFFFF * q0[4] + } + 5 => { + 2 * s[5] * y1[0] + + 2 * s[4] * y1[1] + + 2 * s[3] * y1[2] + + 2 * s[2] * y1[3] + + 2 * s[1] * y1[4] + + 2 * s[0] * y1[5] + - 3 * x1[5] * x1[0] + - 3 * x1[4] * x1[1] + - 3 * x1[3] * x1[2] + - 3 * x1[2] * x1[3] + - 3 * x1[1] * x1[4] + - 3 * x1[0] * x1[5] + - 0xFFFF + + 0xFFFF * q0[0] + + 0xFFFF * q0[1] + + 0xFFFF * q0[2] + + 0xFFFF * q0[3] + + 0xFFFF * q0[4] + + 0xFFFF * q0[5] + } + 6 => { + 2 * s[6] * y1[0] + + 2 * s[5] * y1[1] + + 2 * s[4] * y1[2] + + 2 * s[3] * y1[3] + + 2 * s[2] * y1[4] + + 2 * s[1] * y1[5] + + 2 * s[0] * y1[6] + - 3 * x1[6] * x1[0] + - 3 * x1[5] * x1[1] + - 3 * x1[4] * x1[2] + - 3 * x1[3] * x1[3] + - 3 * x1[2] * x1[4] + - 3 * x1[1] * x1[5] + - 3 * x1[0] * x1[6] + + 0xFFFF * q0[1] + + 0xFFFF * q0[2] + + 0xFFFF * q0[3] + + 0xFFFF * q0[4] + + 0xFFFF * q0[5] + + 0xFFFF * q0[6] + } + 7 => { + 2 * s[7] * y1[0] + + 2 * s[6] * y1[1] + + 2 * s[5] * y1[2] + + 2 * s[4] * y1[3] + + 2 * s[3] * y1[4] + + 2 * s[2] * y1[5] + + 2 * s[1] * y1[6] + + 2 * s[0] * y1[7] + - 3 * x1[7] * x1[0] + - 3 * x1[6] * x1[1] + - 3 * x1[5] * x1[2] + - 3 * x1[4] * x1[3] + - 3 * x1[3] * x1[4] + - 3 * x1[2] * x1[5] + - 3 * x1[1] * x1[6] + - 3 * x1[0] * x1[7] + + 0xFFFF * q0[2] + + 0xFFFF * q0[3] + + 0xFFFF * q0[4] + + 0xFFFF * q0[5] + + 0xFFFF * q0[6] + + 0xFFFF * q0[7] + } + 8 => { + 2 * s[8] * y1[0] + + 2 * s[7] * y1[1] + + 2 * s[6] * y1[2] + + 2 * s[5] * y1[3] + + 2 * s[4] * y1[4] + + 2 * s[3] * y1[5] + + 2 * s[2] * y1[6] + + 2 * s[1] * y1[7] + + 2 * s[0] * y1[8] + - 3 * x1[8] * x1[0] + - 3 * x1[7] * x1[1] + - 3 * x1[6] * x1[2] + - 3 * x1[5] * x1[3] + - 3 * x1[4] * x1[4] + - 3 * x1[3] * x1[5] + - 3 * x1[2] * x1[6] + - 3 * x1[1] * x1[7] + - 3 * x1[0] * x1[8] + + 0xFFFF * q0[3] + + 0xFFFF * q0[4] + + 0xFFFF * q0[5] + + 0xFFFF * q0[6] + + 0xFFFF * q0[7] + + 0xFFFF * q0[8] + } + 9 => { + 2 * s[9] * y1[0] + + 2 * s[8] * y1[1] + + 2 * s[7] * y1[2] + + 2 * s[6] * y1[3] + + 2 * s[5] * y1[4] + + 2 * s[4] * y1[5] + + 2 * s[3] * y1[6] + + 2 * s[2] * y1[7] + + 2 * s[1] * y1[8] + + 2 * s[0] * y1[9] + - 3 * x1[9] * x1[0] + - 3 * x1[8] * x1[1] + - 3 * x1[7] * x1[2] + - 3 * x1[6] * x1[3] + - 3 * x1[5] * x1[4] + - 3 * x1[4] * x1[5] + - 3 * x1[3] * x1[6] + - 3 * x1[2] * x1[7] + - 3 * x1[1] * x1[8] + - 3 * x1[0] * x1[9] + + 0xFFFF * q0[4] + + 0xFFFF * q0[5] + + 0xFFFF * q0[6] + + 0xFFFF * q0[7] + + 0xFFFF * q0[8] + + 0xFFFF * q0[9] + } + 10 => { + 2 * s[10] * y1[0] + + 2 * s[9] * y1[1] + + 2 * s[8] * y1[2] + + 2 * s[7] * y1[3] + + 2 * s[6] * y1[4] + + 2 * s[5] * y1[5] + + 2 * s[4] * y1[6] + + 2 * s[3] * y1[7] + + 2 * s[2] * y1[8] + + 2 * s[1] * y1[9] + + 2 * s[0] * y1[10] + - 3 * x1[10] * x1[0] + - 3 * x1[9] * x1[1] + - 3 * x1[8] * x1[2] + - 3 * x1[7] * x1[3] + - 3 * x1[6] * x1[4] + - 3 * x1[5] * x1[5] + - 3 * x1[4] * x1[6] + - 3 * x1[3] * x1[7] + - 3 * x1[2] * x1[8] + - 3 * x1[1] * x1[9] + - 3 * x1[0] * x1[10] + + 0xFFFF * q0[5] + + 0xFFFF * q0[6] + + 0xFFFF * q0[7] + + 0xFFFF * q0[8] + + 0xFFFF * q0[9] + + 0xFFFF * q0[10] + } + 11 => { + 2 * s[11] * y1[0] + + 2 * s[10] * y1[1] + + 2 * s[9] * y1[2] + + 2 * s[8] * y1[3] + + 2 * s[7] * y1[4] + + 2 * s[6] * y1[5] + + 2 * s[5] * y1[6] + + 2 * s[4] * y1[7] + + 2 * s[3] * y1[8] + + 2 * s[2] * y1[9] + + 2 * s[1] * y1[10] + + 2 * s[0] * y1[11] + - 3 * x1[11] * x1[0] + - 3 * x1[10] * x1[1] + - 3 * x1[9] * x1[2] + - 3 * x1[8] * x1[3] + - 3 * x1[7] * x1[4] + - 3 * x1[6] * x1[5] + - 3 * x1[5] * x1[6] + - 3 * x1[4] * x1[7] + - 3 * x1[3] * x1[8] + - 3 * x1[2] * x1[9] + - 3 * x1[1] * x1[10] + - 3 * x1[0] * x1[11] + + 0xFFFF * q0[6] + + 0xFFFF * q0[7] + + 0xFFFF * q0[8] + + 0xFFFF * q0[9] + + 0xFFFF * q0[10] + + 0xFFFF * q0[11] + } + 12 => { + 2 * s[12] * y1[0] + + 2 * s[11] * y1[1] + + 2 * s[10] * y1[2] + + 2 * s[9] * y1[3] + + 2 * s[8] * y1[4] + + 2 * s[7] * y1[5] + + 2 * s[6] * y1[6] + + 2 * s[5] * y1[7] + + 2 * s[4] * y1[8] + + 2 * s[3] * y1[9] + + 2 * s[2] * y1[10] + + 2 * s[1] * y1[11] + + 2 * s[0] * y1[12] + - 3 * x1[12] * x1[0] + - 3 * x1[11] * x1[1] + - 3 * x1[10] * x1[2] + - 3 * x1[9] * x1[3] + - 3 * x1[8] * x1[4] + - 3 * x1[7] * x1[5] + - 3 * x1[6] * x1[6] + - 3 * x1[5] * x1[7] + - 3 * x1[4] * x1[8] + - 3 * x1[3] * x1[9] + - 3 * x1[2] * x1[10] + - 3 * x1[1] * x1[11] + - 3 * x1[0] * x1[12] + - 0x1 + + q0[0] + + 0xFFFF * q0[7] + + 0xFFFF * q0[8] + + 0xFFFF * q0[9] + + 0xFFFF * q0[10] + + 0xFFFF * q0[11] + + 0xFFFF * q0[12] + } + 13 => { + 2 * s[13] * y1[0] + + 2 * s[12] * y1[1] + + 2 * s[11] * y1[2] + + 2 * s[10] * y1[3] + + 2 * s[9] * y1[4] + + 2 * s[8] * y1[5] + + 2 * s[7] * y1[6] + + 2 * s[6] * y1[7] + + 2 * s[5] * y1[8] + + 2 * s[4] * y1[9] + + 2 * s[3] * y1[10] + + 2 * s[2] * y1[11] + + 2 * s[1] * y1[12] + + 2 * s[0] * y1[13] + - 3 * x1[13] * x1[0] + - 3 * x1[12] * x1[1] + - 3 * x1[11] * x1[2] + - 3 * x1[10] * x1[3] + - 3 * x1[9] * x1[4] + - 3 * x1[8] * x1[5] + - 3 * x1[7] * x1[6] + - 3 * x1[6] * x1[7] + - 3 * x1[5] * x1[8] + - 3 * x1[4] * x1[9] + - 3 * x1[3] * x1[10] + - 3 * x1[2] * x1[11] + - 3 * x1[1] * x1[12] + - 3 * x1[0] * x1[13] + + q0[1] + + 0xFFFF * q0[8] + + 0xFFFF * q0[9] + + 0xFFFF * q0[10] + + 0xFFFF * q0[11] + + 0xFFFF * q0[12] + + 0xFFFF * q0[13] + } + 14 => { + 2 * s[14] * y1[0] + + 2 * s[13] * y1[1] + + 2 * s[12] * y1[2] + + 2 * s[11] * y1[3] + + 2 * s[10] * y1[4] + + 2 * s[9] * y1[5] + + 2 * s[8] * y1[6] + + 2 * s[7] * y1[7] + + 2 * s[6] * y1[8] + + 2 * s[5] * y1[9] + + 2 * s[4] * y1[10] + + 2 * s[3] * y1[11] + + 2 * s[2] * y1[12] + + 2 * s[1] * y1[13] + + 2 * s[0] * y1[14] + - 3 * x1[14] * x1[0] + - 3 * x1[13] * x1[1] + - 3 * x1[12] * x1[2] + - 3 * x1[11] * x1[3] + - 3 * x1[10] * x1[4] + - 3 * x1[9] * x1[5] + - 3 * x1[8] * x1[6] + - 3 * x1[7] * x1[7] + - 3 * x1[6] * x1[8] + - 3 * x1[5] * x1[9] + - 3 * x1[4] * x1[10] + - 3 * x1[3] * x1[11] + - 3 * x1[2] * x1[12] + - 3 * x1[1] * x1[13] + - 3 * x1[0] * x1[14] + - 0xFFFF + + 0xFFFF * q0[0] + + q0[2] + + 0xFFFF * q0[9] + + 0xFFFF * q0[10] + + 0xFFFF * q0[11] + + 0xFFFF * q0[12] + + 0xFFFF * q0[13] + + 0xFFFF * q0[14] + } + 15 => { + 2 * s[15] * y1[0] + + 2 * s[14] * y1[1] + + 2 * s[13] * y1[2] + + 2 * s[12] * y1[3] + + 2 * s[11] * y1[4] + + 2 * s[10] * y1[5] + + 2 * s[9] * y1[6] + + 2 * s[8] * y1[7] + + 2 * s[7] * y1[8] + + 2 * s[6] * y1[9] + + 2 * s[5] * y1[10] + + 2 * s[4] * y1[11] + + 2 * s[3] * y1[12] + + 2 * s[2] * y1[13] + + 2 * s[1] * y1[14] + + 2 * s[0] * y1[15] + - 3 * x1[15] * x1[0] + - 3 * x1[14] * x1[1] + - 3 * x1[13] * x1[2] + - 3 * x1[12] * x1[3] + - 3 * x1[11] * x1[4] + - 3 * x1[10] * x1[5] + - 3 * x1[9] * x1[6] + - 3 * x1[8] * x1[7] + - 3 * x1[7] * x1[8] + - 3 * x1[6] * x1[9] + - 3 * x1[5] * x1[10] + - 3 * x1[4] * x1[11] + - 3 * x1[3] * x1[12] + - 3 * x1[2] * x1[13] + - 3 * x1[1] * x1[14] + - 3 * x1[0] * x1[15] + - 0xFFFF + + 0xFFFF * q0[0] + + 0xFFFF * q0[1] + + q0[3] + + 0xFFFF * q0[10] + + 0xFFFF * q0[11] + + 0xFFFF * q0[12] + + 0xFFFF * q0[13] + + 0xFFFF * q0[14] + + 0xFFFF * q0[15] + } + 16 => { + 2 * s[15] * y1[1] + + 2 * s[14] * y1[2] + + 2 * s[13] * y1[3] + + 2 * s[12] * y1[4] + + 2 * s[11] * y1[5] + + 2 * s[10] * y1[6] + + 2 * s[9] * y1[7] + + 2 * s[8] * y1[8] + + 2 * s[7] * y1[9] + + 2 * s[6] * y1[10] + + 2 * s[5] * y1[11] + + 2 * s[4] * y1[12] + + 2 * s[3] * y1[13] + + 2 * s[2] * y1[14] + + 2 * s[1] * y1[15] + - 3 * x1[15] * x1[1] + - 3 * x1[14] * x1[2] + - 3 * x1[13] * x1[3] + - 3 * x1[12] * x1[4] + - 3 * x1[11] * x1[5] + - 3 * x1[10] * x1[6] + - 3 * x1[9] * x1[7] + - 3 * x1[8] * x1[8] + - 3 * x1[7] * x1[9] + - 3 * x1[6] * x1[10] + - 3 * x1[5] * x1[11] + - 3 * x1[4] * x1[12] + - 3 * x1[3] * x1[13] + - 3 * x1[2] * x1[14] + - 3 * x1[1] * x1[15] + + 0xFFFF * q0[1] + + 0xFFFF * q0[2] + + q0[4] + + 0xFFFF * q0[11] + + 0xFFFF * q0[12] + + 0xFFFF * q0[13] + + 0xFFFF * q0[14] + + 0xFFFF * q0[15] + - 0xFFFC + } + 17 => { + 2 * s[15] * y1[2] + + 2 * s[14] * y1[3] + + 2 * s[13] * y1[4] + + 2 * s[12] * y1[5] + + 2 * s[11] * y1[6] + + 2 * s[10] * y1[7] + + 2 * s[9] * y1[8] + + 2 * s[8] * y1[9] + + 2 * s[7] * y1[10] + + 2 * s[6] * y1[11] + + 2 * s[5] * y1[12] + + 2 * s[4] * y1[13] + + 2 * s[3] * y1[14] + + 2 * s[2] * y1[15] + - 3 * x1[15] * x1[2] + - 3 * x1[14] * x1[3] + - 3 * x1[13] * x1[4] + - 3 * x1[12] * x1[5] + - 3 * x1[11] * x1[6] + - 3 * x1[10] * x1[7] + - 3 * x1[9] * x1[8] + - 3 * x1[8] * x1[9] + - 3 * x1[7] * x1[10] + - 3 * x1[6] * x1[11] + - 3 * x1[5] * x1[12] + - 3 * x1[4] * x1[13] + - 3 * x1[3] * x1[14] + - 3 * x1[2] * x1[15] + + 0xFFFF * q0[2] + + 0xFFFF * q0[3] + + q0[5] + + 0xFFFF * q0[12] + + 0xFFFF * q0[13] + + 0xFFFF * q0[14] + + 0xFFFF * q0[15] + - 0xFFFF + } + 18 => { + 2 * s[15] * y1[3] + + 2 * s[14] * y1[4] + + 2 * s[13] * y1[5] + + 2 * s[12] * y1[6] + + 2 * s[11] * y1[7] + + 2 * s[10] * y1[8] + + 2 * s[9] * y1[9] + + 2 * s[8] * y1[10] + + 2 * s[7] * y1[11] + + 2 * s[6] * y1[12] + + 2 * s[5] * y1[13] + + 2 * s[4] * y1[14] + + 2 * s[3] * y1[15] + - 3 * x1[15] * x1[3] + - 3 * x1[14] * x1[4] + - 3 * x1[13] * x1[5] + - 3 * x1[12] * x1[6] + - 3 * x1[11] * x1[7] + - 3 * x1[10] * x1[8] + - 3 * x1[9] * x1[9] + - 3 * x1[8] * x1[10] + - 3 * x1[7] * x1[11] + - 3 * x1[6] * x1[12] + - 3 * x1[5] * x1[13] + - 3 * x1[4] * x1[14] + - 3 * x1[3] * x1[15] + + 0xFFFF * q0[3] + + 0xFFFF * q0[4] + + q0[6] + + 0xFFFF * q0[13] + + 0xFFFF * q0[14] + + 0xFFFF * q0[15] + - 0xFFFF + } + 19 => { + 2 * s[15] * y1[4] + + 2 * s[14] * y1[5] + + 2 * s[13] * y1[6] + + 2 * s[12] * y1[7] + + 2 * s[11] * y1[8] + + 2 * s[10] * y1[9] + + 2 * s[9] * y1[10] + + 2 * s[8] * y1[11] + + 2 * s[7] * y1[12] + + 2 * s[6] * y1[13] + + 2 * s[5] * y1[14] + + 2 * s[4] * y1[15] + - 3 * x1[15] * x1[4] + - 3 * x1[14] * x1[5] + - 3 * x1[13] * x1[6] + - 3 * x1[12] * x1[7] + - 3 * x1[11] * x1[8] + - 3 * x1[10] * x1[9] + - 3 * x1[9] * x1[10] + - 3 * x1[8] * x1[11] + - 3 * x1[7] * x1[12] + - 3 * x1[6] * x1[13] + - 3 * x1[5] * x1[14] + - 3 * x1[4] * x1[15] + + 0xFFFF * q0[4] + + 0xFFFF * q0[5] + + q0[7] + + 0xFFFF * q0[14] + + 0xFFFF * q0[15] + - 0xFFFF + } + 20 => { + 2 * s[15] * y1[5] + + 2 * s[14] * y1[6] + + 2 * s[13] * y1[7] + + 2 * s[12] * y1[8] + + 2 * s[11] * y1[9] + + 2 * s[10] * y1[10] + + 2 * s[9] * y1[11] + + 2 * s[8] * y1[12] + + 2 * s[7] * y1[13] + + 2 * s[6] * y1[14] + + 2 * s[5] * y1[15] + - 3 * x1[15] * x1[5] + - 3 * x1[14] * x1[6] + - 3 * x1[13] * x1[7] + - 3 * x1[12] * x1[8] + - 3 * x1[11] * x1[9] + - 3 * x1[10] * x1[10] + - 3 * x1[9] * x1[11] + - 3 * x1[8] * x1[12] + - 3 * x1[7] * x1[13] + - 3 * x1[6] * x1[14] + - 3 * x1[5] * x1[15] + + 0xFFFF * q0[5] + + 0xFFFF * q0[6] + + q0[8] + + 0xFFFF * q0[15] + - 0xFFFF + } + 21 => { + 2 * s[15] * y1[6] + + 2 * s[14] * y1[7] + + 2 * s[13] * y1[8] + + 2 * s[12] * y1[9] + + 2 * s[11] * y1[10] + + 2 * s[10] * y1[11] + + 2 * s[9] * y1[12] + + 2 * s[8] * y1[13] + + 2 * s[7] * y1[14] + + 2 * s[6] * y1[15] + - 3 * x1[15] * x1[6] + - 3 * x1[14] * x1[7] + - 3 * x1[13] * x1[8] + - 3 * x1[12] * x1[9] + - 3 * x1[11] * x1[10] + - 3 * x1[10] * x1[11] + - 3 * x1[9] * x1[12] + - 3 * x1[8] * x1[13] + - 3 * x1[7] * x1[14] + - 3 * x1[6] * x1[15] + + 0xFFFF * q0[6] + + 0xFFFF * q0[7] + + q0[9] + - 0xFFFF + } + 22 => { + 2 * s[15] * y1[7] + + 2 * s[14] * y1[8] + + 2 * s[13] * y1[9] + + 2 * s[12] * y1[10] + + 2 * s[11] * y1[11] + + 2 * s[10] * y1[12] + + 2 * s[9] * y1[13] + + 2 * s[8] * y1[14] + + 2 * s[7] * y1[15] + - 3 * x1[15] * x1[7] + - 3 * x1[14] * x1[8] + - 3 * x1[13] * x1[9] + - 3 * x1[12] * x1[10] + - 3 * x1[11] * x1[11] + - 3 * x1[10] * x1[12] + - 3 * x1[9] * x1[13] + - 3 * x1[8] * x1[14] + - 3 * x1[7] * x1[15] + + 0xFFFF * q0[7] + + 0xFFFF * q0[8] + + q0[10] + - 0x3 + } + 23 => { + 2 * s[15] * y1[8] + + 2 * s[14] * y1[9] + + 2 * s[13] * y1[10] + + 2 * s[12] * y1[11] + + 2 * s[11] * y1[12] + + 2 * s[10] * y1[13] + + 2 * s[9] * y1[14] + + 2 * s[8] * y1[15] + - 3 * x1[15] * x1[8] + - 3 * x1[14] * x1[9] + - 3 * x1[13] * x1[10] + - 3 * x1[12] * x1[11] + - 3 * x1[11] * x1[12] + - 3 * x1[10] * x1[13] + - 3 * x1[9] * x1[14] + - 3 * x1[8] * x1[15] + + 0xFFFF * q0[8] + + 0xFFFF * q0[9] + + q0[11] + } + 24 => { + 2 * s[15] * y1[9] + + 2 * s[14] * y1[10] + + 2 * s[13] * y1[11] + + 2 * s[12] * y1[12] + + 2 * s[11] * y1[13] + + 2 * s[10] * y1[14] + + 2 * s[9] * y1[15] + - 3 * x1[15] * x1[9] + - 3 * x1[14] * x1[10] + - 3 * x1[13] * x1[11] + - 3 * x1[12] * x1[12] + - 3 * x1[11] * x1[13] + - 3 * x1[10] * x1[14] + - 3 * x1[9] * x1[15] + + 0xFFFF * q0[9] + + 0xFFFF * q0[10] + + q0[12] + } + 25 => { + 2 * s[15] * y1[10] + + 2 * s[14] * y1[11] + + 2 * s[13] * y1[12] + + 2 * s[12] * y1[13] + + 2 * s[11] * y1[14] + + 2 * s[10] * y1[15] + - 3 * x1[15] * x1[10] + - 3 * x1[14] * x1[11] + - 3 * x1[13] * x1[12] + - 3 * x1[12] * x1[13] + - 3 * x1[11] * x1[14] + - 3 * x1[10] * x1[15] + + 0xFFFF * q0[10] + + 0xFFFF * q0[11] + + q0[13] + } + 26 => { + 2 * s[15] * y1[11] + + 2 * s[14] * y1[12] + + 2 * s[13] * y1[13] + + 2 * s[12] * y1[14] + + 2 * s[11] * y1[15] + - 3 * x1[15] * x1[11] + - 3 * x1[14] * x1[12] + - 3 * x1[13] * x1[13] + - 3 * x1[12] * x1[14] + - 3 * x1[11] * x1[15] + + 0xFFFF * q0[11] + + 0xFFFF * q0[12] + + q0[14] + } + 27 => { + 2 * s[15] * y1[12] + 2 * s[14] * y1[13] + 2 * s[13] * y1[14] + 2 * s[12] * y1[15] + - 3 * x1[15] * x1[12] + - 3 * x1[14] * x1[13] + - 3 * x1[13] * x1[14] + - 3 * x1[12] * x1[15] + + 0xFFFF * q0[12] + + 0xFFFF * q0[13] + + q0[15] + } + 28 => { + 2 * s[15] * y1[13] + 2 * s[14] * y1[14] + 2 * s[13] * y1[15] + - 3 * x1[15] * x1[13] + - 3 * x1[14] * x1[14] + - 3 * x1[13] * x1[15] + + 0xFFFF * q0[13] + + 0xFFFF * q0[14] + - 0x4 + } + 29 => { + 2 * s[15] * y1[14] + 2 * s[14] * y1[15] - 3 * x1[15] * x1[14] - 3 * x1[14] * x1[15] + + 0xFFFF * q0[14] + + 0xFFFF * q0[15] + } + 30 => 2 * s[15] * y1[15] - 3 * x1[15] * x1[15] + 0xFFFF * q0[15] - 0xFFFC, + 31 => -0x3FFFF, + _ => 0, + } + } +} diff --git a/precompiles/arith_eq/src/equations/secp256r1_x3.rs b/precompiles/arith_eq/src/equations/secp256r1_x3.rs new file mode 100644 index 000000000..8a884346f --- /dev/null +++ b/precompiles/arith_eq/src/equations/secp256r1_x3.rs @@ -0,0 +1,502 @@ +// code generated +// +// equation: s*s-x1-x2-x3-p*q1+p*offset +// +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x4 +// (p*offset): 0x3FFFFFFFC00000004000000000000000000000003FFFFFFFFFFFFFFFFFFFFFFFC +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + +pub struct Secp256r1X3 {} + +impl Secp256r1X3 { + #[allow(clippy::too_many_arguments)] + pub fn calculate( + icol: u8, + x1: &[i64; 16], + x2: &[i64; 16], + x3: &[i64; 16], + s: &[i64; 16], + q1: &[i64; 16], + ) -> i64 { + match icol { + 0 => s[0] * s[0] - x1[0] - x2[0] - x3[0] - 0xFFFF * q1[0] + 0xFFFC, + 1 => { + s[1] * s[0] + s[0] * s[1] - x1[1] - x2[1] - x3[1] - 0xFFFF * q1[0] - 0xFFFF * q1[1] + + 0xFFFF + } + 2 => { + s[2] * s[0] + s[1] * s[1] + s[0] * s[2] + - x1[2] + - x2[2] + - x3[2] + - 0xFFFF * q1[0] + - 0xFFFF * q1[1] + - 0xFFFF * q1[2] + + 0xFFFF + } + 3 => { + s[3] * s[0] + s[2] * s[1] + s[1] * s[2] + s[0] * s[3] + - x1[3] + - x2[3] + - x3[3] + - 0xFFFF * q1[0] + - 0xFFFF * q1[1] + - 0xFFFF * q1[2] + - 0xFFFF * q1[3] + + 0xFFFF + } + 4 => { + s[4] * s[0] + s[3] * s[1] + s[2] * s[2] + s[1] * s[3] + s[0] * s[4] + - x1[4] + - x2[4] + - x3[4] + - 0xFFFF * q1[0] + - 0xFFFF * q1[1] + - 0xFFFF * q1[2] + - 0xFFFF * q1[3] + - 0xFFFF * q1[4] + + 0xFFFF + } + 5 => { + s[5] * s[0] + s[4] * s[1] + s[3] * s[2] + s[2] * s[3] + s[1] * s[4] + s[0] * s[5] + - x1[5] + - x2[5] + - x3[5] + - 0xFFFF * q1[0] + - 0xFFFF * q1[1] + - 0xFFFF * q1[2] + - 0xFFFF * q1[3] + - 0xFFFF * q1[4] + - 0xFFFF * q1[5] + + 0xFFFF + } + 6 => { + s[6] * s[0] + + s[5] * s[1] + + s[4] * s[2] + + s[3] * s[3] + + s[2] * s[4] + + s[1] * s[5] + + s[0] * s[6] + - x1[6] + - x2[6] + - x3[6] + - 0xFFFF * q1[1] + - 0xFFFF * q1[2] + - 0xFFFF * q1[3] + - 0xFFFF * q1[4] + - 0xFFFF * q1[5] + - 0xFFFF * q1[6] + + 0x3 + } + 7 => { + s[7] * s[0] + + s[6] * s[1] + + s[5] * s[2] + + s[4] * s[3] + + s[3] * s[4] + + s[2] * s[5] + + s[1] * s[6] + + s[0] * s[7] + - x1[7] + - x2[7] + - x3[7] + - 0xFFFF * q1[2] + - 0xFFFF * q1[3] + - 0xFFFF * q1[4] + - 0xFFFF * q1[5] + - 0xFFFF * q1[6] + - 0xFFFF * q1[7] + } + 8 => { + s[8] * s[0] + + s[7] * s[1] + + s[6] * s[2] + + s[5] * s[3] + + s[4] * s[4] + + s[3] * s[5] + + s[2] * s[6] + + s[1] * s[7] + + s[0] * s[8] + - x1[8] + - x2[8] + - x3[8] + - 0xFFFF * q1[3] + - 0xFFFF * q1[4] + - 0xFFFF * q1[5] + - 0xFFFF * q1[6] + - 0xFFFF * q1[7] + - 0xFFFF * q1[8] + } + 9 => { + s[9] * s[0] + + s[8] * s[1] + + s[7] * s[2] + + s[6] * s[3] + + s[5] * s[4] + + s[4] * s[5] + + s[3] * s[6] + + s[2] * s[7] + + s[1] * s[8] + + s[0] * s[9] + - x1[9] + - x2[9] + - x3[9] + - 0xFFFF * q1[4] + - 0xFFFF * q1[5] + - 0xFFFF * q1[6] + - 0xFFFF * q1[7] + - 0xFFFF * q1[8] + - 0xFFFF * q1[9] + } + 10 => { + s[10] * s[0] + + s[9] * s[1] + + s[8] * s[2] + + s[7] * s[3] + + s[6] * s[4] + + s[5] * s[5] + + s[4] * s[6] + + s[3] * s[7] + + s[2] * s[8] + + s[1] * s[9] + + s[0] * s[10] + - x1[10] + - x2[10] + - x3[10] + - 0xFFFF * q1[5] + - 0xFFFF * q1[6] + - 0xFFFF * q1[7] + - 0xFFFF * q1[8] + - 0xFFFF * q1[9] + - 0xFFFF * q1[10] + } + 11 => { + s[11] * s[0] + + s[10] * s[1] + + s[9] * s[2] + + s[8] * s[3] + + s[7] * s[4] + + s[6] * s[5] + + s[5] * s[6] + + s[4] * s[7] + + s[3] * s[8] + + s[2] * s[9] + + s[1] * s[10] + + s[0] * s[11] + - x1[11] + - x2[11] + - x3[11] + - 0xFFFF * q1[6] + - 0xFFFF * q1[7] + - 0xFFFF * q1[8] + - 0xFFFF * q1[9] + - 0xFFFF * q1[10] + - 0xFFFF * q1[11] + } + 12 => { + s[12] * s[0] + + s[11] * s[1] + + s[10] * s[2] + + s[9] * s[3] + + s[8] * s[4] + + s[7] * s[5] + + s[6] * s[6] + + s[5] * s[7] + + s[4] * s[8] + + s[3] * s[9] + + s[2] * s[10] + + s[1] * s[11] + + s[0] * s[12] + - x1[12] + - x2[12] + - x3[12] + - q1[0] + - 0xFFFF * q1[7] + - 0xFFFF * q1[8] + - 0xFFFF * q1[9] + - 0xFFFF * q1[10] + - 0xFFFF * q1[11] + - 0xFFFF * q1[12] + + 0x4 + } + 13 => { + s[13] * s[0] + + s[12] * s[1] + + s[11] * s[2] + + s[10] * s[3] + + s[9] * s[4] + + s[8] * s[5] + + s[7] * s[6] + + s[6] * s[7] + + s[5] * s[8] + + s[4] * s[9] + + s[3] * s[10] + + s[2] * s[11] + + s[1] * s[12] + + s[0] * s[13] + - x1[13] + - x2[13] + - x3[13] + - q1[1] + - 0xFFFF * q1[8] + - 0xFFFF * q1[9] + - 0xFFFF * q1[10] + - 0xFFFF * q1[11] + - 0xFFFF * q1[12] + - 0xFFFF * q1[13] + } + 14 => { + s[14] * s[0] + + s[13] * s[1] + + s[12] * s[2] + + s[11] * s[3] + + s[10] * s[4] + + s[9] * s[5] + + s[8] * s[6] + + s[7] * s[7] + + s[6] * s[8] + + s[5] * s[9] + + s[4] * s[10] + + s[3] * s[11] + + s[2] * s[12] + + s[1] * s[13] + + s[0] * s[14] + - x1[14] + - x2[14] + - x3[14] + - 0xFFFF * q1[0] + - q1[2] + - 0xFFFF * q1[9] + - 0xFFFF * q1[10] + - 0xFFFF * q1[11] + - 0xFFFF * q1[12] + - 0xFFFF * q1[13] + - 0xFFFF * q1[14] + + 0xFFFC + } + 15 => { + s[15] * s[0] + + s[14] * s[1] + + s[13] * s[2] + + s[12] * s[3] + + s[11] * s[4] + + s[10] * s[5] + + s[9] * s[6] + + s[8] * s[7] + + s[7] * s[8] + + s[6] * s[9] + + s[5] * s[10] + + s[4] * s[11] + + s[3] * s[12] + + s[2] * s[13] + + s[1] * s[14] + + s[0] * s[15] + - x1[15] + - x2[15] + - x3[15] + - 0xFFFF * q1[0] + - 0xFFFF * q1[1] + - q1[3] + - 0xFFFF * q1[10] + - 0xFFFF * q1[11] + - 0xFFFF * q1[12] + - 0xFFFF * q1[13] + - 0xFFFF * q1[14] + - 0xFFFF * q1[15] + + 0xFFFF + } + 16 => { + s[15] * s[1] + + s[14] * s[2] + + s[13] * s[3] + + s[12] * s[4] + + s[11] * s[5] + + s[10] * s[6] + + s[9] * s[7] + + s[8] * s[8] + + s[7] * s[9] + + s[6] * s[10] + + s[5] * s[11] + + s[4] * s[12] + + s[3] * s[13] + + s[2] * s[14] + + s[1] * s[15] + - 0xFFFF * q1[1] + - 0xFFFF * q1[2] + - q1[4] + - 0xFFFF * q1[11] + - 0xFFFF * q1[12] + - 0xFFFF * q1[13] + - 0xFFFF * q1[14] + - 0xFFFF * q1[15] + + 0x3 + } + 17 => { + s[15] * s[2] + + s[14] * s[3] + + s[13] * s[4] + + s[12] * s[5] + + s[11] * s[6] + + s[10] * s[7] + + s[9] * s[8] + + s[8] * s[9] + + s[7] * s[10] + + s[6] * s[11] + + s[5] * s[12] + + s[4] * s[13] + + s[3] * s[14] + + s[2] * s[15] + - 0xFFFF * q1[2] + - 0xFFFF * q1[3] + - q1[5] + - 0xFFFF * q1[12] + - 0xFFFF * q1[13] + - 0xFFFF * q1[14] + - 0xFFFF * q1[15] + } + 18 => { + s[15] * s[3] + + s[14] * s[4] + + s[13] * s[5] + + s[12] * s[6] + + s[11] * s[7] + + s[10] * s[8] + + s[9] * s[9] + + s[8] * s[10] + + s[7] * s[11] + + s[6] * s[12] + + s[5] * s[13] + + s[4] * s[14] + + s[3] * s[15] + - 0xFFFF * q1[3] + - 0xFFFF * q1[4] + - q1[6] + - 0xFFFF * q1[13] + - 0xFFFF * q1[14] + - 0xFFFF * q1[15] + } + 19 => { + s[15] * s[4] + + s[14] * s[5] + + s[13] * s[6] + + s[12] * s[7] + + s[11] * s[8] + + s[10] * s[9] + + s[9] * s[10] + + s[8] * s[11] + + s[7] * s[12] + + s[6] * s[13] + + s[5] * s[14] + + s[4] * s[15] + - 0xFFFF * q1[4] + - 0xFFFF * q1[5] + - q1[7] + - 0xFFFF * q1[14] + - 0xFFFF * q1[15] + } + 20 => { + s[15] * s[5] + + s[14] * s[6] + + s[13] * s[7] + + s[12] * s[8] + + s[11] * s[9] + + s[10] * s[10] + + s[9] * s[11] + + s[8] * s[12] + + s[7] * s[13] + + s[6] * s[14] + + s[5] * s[15] + - 0xFFFF * q1[5] + - 0xFFFF * q1[6] + - q1[8] + - 0xFFFF * q1[15] + } + 21 => { + s[15] * s[6] + + s[14] * s[7] + + s[13] * s[8] + + s[12] * s[9] + + s[11] * s[10] + + s[10] * s[11] + + s[9] * s[12] + + s[8] * s[13] + + s[7] * s[14] + + s[6] * s[15] + - 0xFFFF * q1[6] + - 0xFFFF * q1[7] + - q1[9] + } + 22 => { + s[15] * s[7] + + s[14] * s[8] + + s[13] * s[9] + + s[12] * s[10] + + s[11] * s[11] + + s[10] * s[12] + + s[9] * s[13] + + s[8] * s[14] + + s[7] * s[15] + - 0xFFFF * q1[7] + - 0xFFFF * q1[8] + - q1[10] + } + 23 => { + s[15] * s[8] + + s[14] * s[9] + + s[13] * s[10] + + s[12] * s[11] + + s[11] * s[12] + + s[10] * s[13] + + s[9] * s[14] + + s[8] * s[15] + - 0xFFFF * q1[8] + - 0xFFFF * q1[9] + - q1[11] + } + 24 => { + s[15] * s[9] + + s[14] * s[10] + + s[13] * s[11] + + s[12] * s[12] + + s[11] * s[13] + + s[10] * s[14] + + s[9] * s[15] + - 0xFFFF * q1[9] + - 0xFFFF * q1[10] + - q1[12] + } + 25 => { + s[15] * s[10] + + s[14] * s[11] + + s[13] * s[12] + + s[12] * s[13] + + s[11] * s[14] + + s[10] * s[15] + - 0xFFFF * q1[10] + - 0xFFFF * q1[11] + - q1[13] + } + 26 => { + s[15] * s[11] + s[14] * s[12] + s[13] * s[13] + s[12] * s[14] + s[11] * s[15] + - 0xFFFF * q1[11] + - 0xFFFF * q1[12] + - q1[14] + } + 27 => { + s[15] * s[12] + s[14] * s[13] + s[13] * s[14] + s[12] * s[15] + - 0xFFFF * q1[12] + - 0xFFFF * q1[13] + - q1[15] + } + 28 => s[15] * s[13] + s[14] * s[14] + s[13] * s[15] - 0xFFFF * q1[13] - 0xFFFF * q1[14], + 29 => s[15] * s[14] + s[14] * s[15] - 0xFFFF * q1[14] - 0xFFFF * q1[15], + 30 => s[15] * s[15] - 0xFFFF * q1[15], + _ => 0, + } + } +} diff --git a/precompiles/arith_eq/src/equations/secp256r1_y3.rs b/precompiles/arith_eq/src/equations/secp256r1_y3.rs new file mode 100644 index 000000000..fbfdb4add --- /dev/null +++ b/precompiles/arith_eq/src/equations/secp256r1_y3.rs @@ -0,0 +1,753 @@ +// code generated +// +// equation: s*x1-s*x3-y1-y3+p*q2-p*offset +// +// p: 0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF +// offset: 0x20000000000000000000000000000000000000000000000000000000000000000 +// (p*offset): 0x1FFFFFFFE00000002000000000000000000000001FFFFFFFFFFFFFFFFFFFFFFFE0000000000000000000000000000000000000000000000000000000000000000 +// +// chunks:16 +// chunk_bits:16 +// terms_by_clock: 2 + +pub struct Secp256r1Y3 {} + +impl Secp256r1Y3 { + #[allow(clippy::too_many_arguments)] + pub fn calculate( + icol: u8, + x1: &[i64; 16], + y1: &[i64; 16], + x3: &[i64; 16], + y3: &[i64; 16], + s: &[i64; 16], + q2: &[i64; 16], + ) -> i64 { + match icol { + 0 => s[0] * x1[0] - s[0] * x3[0] - y1[0] - y3[0] + 0xFFFF * q2[0], + 1 => { + s[1] * x1[0] + s[0] * x1[1] - s[1] * x3[0] - s[0] * x3[1] - y1[1] - y3[1] + + 0xFFFF * q2[0] + + 0xFFFF * q2[1] + } + 2 => { + s[2] * x1[0] + s[1] * x1[1] + s[0] * x1[2] + - s[2] * x3[0] + - s[1] * x3[1] + - s[0] * x3[2] + - y1[2] + - y3[2] + + 0xFFFF * q2[0] + + 0xFFFF * q2[1] + + 0xFFFF * q2[2] + } + 3 => { + s[3] * x1[0] + s[2] * x1[1] + s[1] * x1[2] + s[0] * x1[3] + - s[3] * x3[0] + - s[2] * x3[1] + - s[1] * x3[2] + - s[0] * x3[3] + - y1[3] + - y3[3] + + 0xFFFF * q2[0] + + 0xFFFF * q2[1] + + 0xFFFF * q2[2] + + 0xFFFF * q2[3] + } + 4 => { + s[4] * x1[0] + s[3] * x1[1] + s[2] * x1[2] + s[1] * x1[3] + s[0] * x1[4] + - s[4] * x3[0] + - s[3] * x3[1] + - s[2] * x3[2] + - s[1] * x3[3] + - s[0] * x3[4] + - y1[4] + - y3[4] + + 0xFFFF * q2[0] + + 0xFFFF * q2[1] + + 0xFFFF * q2[2] + + 0xFFFF * q2[3] + + 0xFFFF * q2[4] + } + 5 => { + s[5] * x1[0] + + s[4] * x1[1] + + s[3] * x1[2] + + s[2] * x1[3] + + s[1] * x1[4] + + s[0] * x1[5] + - s[5] * x3[0] + - s[4] * x3[1] + - s[3] * x3[2] + - s[2] * x3[3] + - s[1] * x3[4] + - s[0] * x3[5] + - y1[5] + - y3[5] + + 0xFFFF * q2[0] + + 0xFFFF * q2[1] + + 0xFFFF * q2[2] + + 0xFFFF * q2[3] + + 0xFFFF * q2[4] + + 0xFFFF * q2[5] + } + 6 => { + s[6] * x1[0] + + s[5] * x1[1] + + s[4] * x1[2] + + s[3] * x1[3] + + s[2] * x1[4] + + s[1] * x1[5] + + s[0] * x1[6] + - s[6] * x3[0] + - s[5] * x3[1] + - s[4] * x3[2] + - s[3] * x3[3] + - s[2] * x3[4] + - s[1] * x3[5] + - s[0] * x3[6] + - y1[6] + - y3[6] + + 0xFFFF * q2[1] + + 0xFFFF * q2[2] + + 0xFFFF * q2[3] + + 0xFFFF * q2[4] + + 0xFFFF * q2[5] + + 0xFFFF * q2[6] + } + 7 => { + s[7] * x1[0] + + s[6] * x1[1] + + s[5] * x1[2] + + s[4] * x1[3] + + s[3] * x1[4] + + s[2] * x1[5] + + s[1] * x1[6] + + s[0] * x1[7] + - s[7] * x3[0] + - s[6] * x3[1] + - s[5] * x3[2] + - s[4] * x3[3] + - s[3] * x3[4] + - s[2] * x3[5] + - s[1] * x3[6] + - s[0] * x3[7] + - y1[7] + - y3[7] + + 0xFFFF * q2[2] + + 0xFFFF * q2[3] + + 0xFFFF * q2[4] + + 0xFFFF * q2[5] + + 0xFFFF * q2[6] + + 0xFFFF * q2[7] + } + 8 => { + s[8] * x1[0] + + s[7] * x1[1] + + s[6] * x1[2] + + s[5] * x1[3] + + s[4] * x1[4] + + s[3] * x1[5] + + s[2] * x1[6] + + s[1] * x1[7] + + s[0] * x1[8] + - s[8] * x3[0] + - s[7] * x3[1] + - s[6] * x3[2] + - s[5] * x3[3] + - s[4] * x3[4] + - s[3] * x3[5] + - s[2] * x3[6] + - s[1] * x3[7] + - s[0] * x3[8] + - y1[8] + - y3[8] + + 0xFFFF * q2[3] + + 0xFFFF * q2[4] + + 0xFFFF * q2[5] + + 0xFFFF * q2[6] + + 0xFFFF * q2[7] + + 0xFFFF * q2[8] + } + 9 => { + s[9] * x1[0] + + s[8] * x1[1] + + s[7] * x1[2] + + s[6] * x1[3] + + s[5] * x1[4] + + s[4] * x1[5] + + s[3] * x1[6] + + s[2] * x1[7] + + s[1] * x1[8] + + s[0] * x1[9] + - s[9] * x3[0] + - s[8] * x3[1] + - s[7] * x3[2] + - s[6] * x3[3] + - s[5] * x3[4] + - s[4] * x3[5] + - s[3] * x3[6] + - s[2] * x3[7] + - s[1] * x3[8] + - s[0] * x3[9] + - y1[9] + - y3[9] + + 0xFFFF * q2[4] + + 0xFFFF * q2[5] + + 0xFFFF * q2[6] + + 0xFFFF * q2[7] + + 0xFFFF * q2[8] + + 0xFFFF * q2[9] + } + 10 => { + s[10] * x1[0] + + s[9] * x1[1] + + s[8] * x1[2] + + s[7] * x1[3] + + s[6] * x1[4] + + s[5] * x1[5] + + s[4] * x1[6] + + s[3] * x1[7] + + s[2] * x1[8] + + s[1] * x1[9] + + s[0] * x1[10] + - s[10] * x3[0] + - s[9] * x3[1] + - s[8] * x3[2] + - s[7] * x3[3] + - s[6] * x3[4] + - s[5] * x3[5] + - s[4] * x3[6] + - s[3] * x3[7] + - s[2] * x3[8] + - s[1] * x3[9] + - s[0] * x3[10] + - y1[10] + - y3[10] + + 0xFFFF * q2[5] + + 0xFFFF * q2[6] + + 0xFFFF * q2[7] + + 0xFFFF * q2[8] + + 0xFFFF * q2[9] + + 0xFFFF * q2[10] + } + 11 => { + s[11] * x1[0] + + s[10] * x1[1] + + s[9] * x1[2] + + s[8] * x1[3] + + s[7] * x1[4] + + s[6] * x1[5] + + s[5] * x1[6] + + s[4] * x1[7] + + s[3] * x1[8] + + s[2] * x1[9] + + s[1] * x1[10] + + s[0] * x1[11] + - s[11] * x3[0] + - s[10] * x3[1] + - s[9] * x3[2] + - s[8] * x3[3] + - s[7] * x3[4] + - s[6] * x3[5] + - s[5] * x3[6] + - s[4] * x3[7] + - s[3] * x3[8] + - s[2] * x3[9] + - s[1] * x3[10] + - s[0] * x3[11] + - y1[11] + - y3[11] + + 0xFFFF * q2[6] + + 0xFFFF * q2[7] + + 0xFFFF * q2[8] + + 0xFFFF * q2[9] + + 0xFFFF * q2[10] + + 0xFFFF * q2[11] + } + 12 => { + s[12] * x1[0] + + s[11] * x1[1] + + s[10] * x1[2] + + s[9] * x1[3] + + s[8] * x1[4] + + s[7] * x1[5] + + s[6] * x1[6] + + s[5] * x1[7] + + s[4] * x1[8] + + s[3] * x1[9] + + s[2] * x1[10] + + s[1] * x1[11] + + s[0] * x1[12] + - s[12] * x3[0] + - s[11] * x3[1] + - s[10] * x3[2] + - s[9] * x3[3] + - s[8] * x3[4] + - s[7] * x3[5] + - s[6] * x3[6] + - s[5] * x3[7] + - s[4] * x3[8] + - s[3] * x3[9] + - s[2] * x3[10] + - s[1] * x3[11] + - s[0] * x3[12] + - y1[12] + - y3[12] + + q2[0] + + 0xFFFF * q2[7] + + 0xFFFF * q2[8] + + 0xFFFF * q2[9] + + 0xFFFF * q2[10] + + 0xFFFF * q2[11] + + 0xFFFF * q2[12] + } + 13 => { + s[13] * x1[0] + + s[12] * x1[1] + + s[11] * x1[2] + + s[10] * x1[3] + + s[9] * x1[4] + + s[8] * x1[5] + + s[7] * x1[6] + + s[6] * x1[7] + + s[5] * x1[8] + + s[4] * x1[9] + + s[3] * x1[10] + + s[2] * x1[11] + + s[1] * x1[12] + + s[0] * x1[13] + - s[13] * x3[0] + - s[12] * x3[1] + - s[11] * x3[2] + - s[10] * x3[3] + - s[9] * x3[4] + - s[8] * x3[5] + - s[7] * x3[6] + - s[6] * x3[7] + - s[5] * x3[8] + - s[4] * x3[9] + - s[3] * x3[10] + - s[2] * x3[11] + - s[1] * x3[12] + - s[0] * x3[13] + - y1[13] + - y3[13] + + q2[1] + + 0xFFFF * q2[8] + + 0xFFFF * q2[9] + + 0xFFFF * q2[10] + + 0xFFFF * q2[11] + + 0xFFFF * q2[12] + + 0xFFFF * q2[13] + } + 14 => { + s[14] * x1[0] + + s[13] * x1[1] + + s[12] * x1[2] + + s[11] * x1[3] + + s[10] * x1[4] + + s[9] * x1[5] + + s[8] * x1[6] + + s[7] * x1[7] + + s[6] * x1[8] + + s[5] * x1[9] + + s[4] * x1[10] + + s[3] * x1[11] + + s[2] * x1[12] + + s[1] * x1[13] + + s[0] * x1[14] + - s[14] * x3[0] + - s[13] * x3[1] + - s[12] * x3[2] + - s[11] * x3[3] + - s[10] * x3[4] + - s[9] * x3[5] + - s[8] * x3[6] + - s[7] * x3[7] + - s[6] * x3[8] + - s[5] * x3[9] + - s[4] * x3[10] + - s[3] * x3[11] + - s[2] * x3[12] + - s[1] * x3[13] + - s[0] * x3[14] + - y1[14] + - y3[14] + + 0xFFFF * q2[0] + + q2[2] + + 0xFFFF * q2[9] + + 0xFFFF * q2[10] + + 0xFFFF * q2[11] + + 0xFFFF * q2[12] + + 0xFFFF * q2[13] + + 0xFFFF * q2[14] + } + 15 => { + s[15] * x1[0] + + s[14] * x1[1] + + s[13] * x1[2] + + s[12] * x1[3] + + s[11] * x1[4] + + s[10] * x1[5] + + s[9] * x1[6] + + s[8] * x1[7] + + s[7] * x1[8] + + s[6] * x1[9] + + s[5] * x1[10] + + s[4] * x1[11] + + s[3] * x1[12] + + s[2] * x1[13] + + s[1] * x1[14] + + s[0] * x1[15] + - s[15] * x3[0] + - s[14] * x3[1] + - s[13] * x3[2] + - s[12] * x3[3] + - s[11] * x3[4] + - s[10] * x3[5] + - s[9] * x3[6] + - s[8] * x3[7] + - s[7] * x3[8] + - s[6] * x3[9] + - s[5] * x3[10] + - s[4] * x3[11] + - s[3] * x3[12] + - s[2] * x3[13] + - s[1] * x3[14] + - s[0] * x3[15] + - y1[15] + - y3[15] + + 0xFFFF * q2[0] + + 0xFFFF * q2[1] + + q2[3] + + 0xFFFF * q2[10] + + 0xFFFF * q2[11] + + 0xFFFF * q2[12] + + 0xFFFF * q2[13] + + 0xFFFF * q2[14] + + 0xFFFF * q2[15] + } + 16 => { + s[15] * x1[1] + + s[14] * x1[2] + + s[13] * x1[3] + + s[12] * x1[4] + + s[11] * x1[5] + + s[10] * x1[6] + + s[9] * x1[7] + + s[8] * x1[8] + + s[7] * x1[9] + + s[6] * x1[10] + + s[5] * x1[11] + + s[4] * x1[12] + + s[3] * x1[13] + + s[2] * x1[14] + + s[1] * x1[15] + - s[15] * x3[1] + - s[14] * x3[2] + - s[13] * x3[3] + - s[12] * x3[4] + - s[11] * x3[5] + - s[10] * x3[6] + - s[9] * x3[7] + - s[8] * x3[8] + - s[7] * x3[9] + - s[6] * x3[10] + - s[5] * x3[11] + - s[4] * x3[12] + - s[3] * x3[13] + - s[2] * x3[14] + - s[1] * x3[15] + + 0xFFFF * q2[1] + + 0xFFFF * q2[2] + + q2[4] + + 0xFFFF * q2[11] + + 0xFFFF * q2[12] + + 0xFFFF * q2[13] + + 0xFFFF * q2[14] + + 0xFFFF * q2[15] + - 0xFFFE + } + 17 => { + s[15] * x1[2] + + s[14] * x1[3] + + s[13] * x1[4] + + s[12] * x1[5] + + s[11] * x1[6] + + s[10] * x1[7] + + s[9] * x1[8] + + s[8] * x1[9] + + s[7] * x1[10] + + s[6] * x1[11] + + s[5] * x1[12] + + s[4] * x1[13] + + s[3] * x1[14] + + s[2] * x1[15] + - s[15] * x3[2] + - s[14] * x3[3] + - s[13] * x3[4] + - s[12] * x3[5] + - s[11] * x3[6] + - s[10] * x3[7] + - s[9] * x3[8] + - s[8] * x3[9] + - s[7] * x3[10] + - s[6] * x3[11] + - s[5] * x3[12] + - s[4] * x3[13] + - s[3] * x3[14] + - s[2] * x3[15] + + 0xFFFF * q2[2] + + 0xFFFF * q2[3] + + q2[5] + + 0xFFFF * q2[12] + + 0xFFFF * q2[13] + + 0xFFFF * q2[14] + + 0xFFFF * q2[15] + - 0xFFFF + } + 18 => { + s[15] * x1[3] + + s[14] * x1[4] + + s[13] * x1[5] + + s[12] * x1[6] + + s[11] * x1[7] + + s[10] * x1[8] + + s[9] * x1[9] + + s[8] * x1[10] + + s[7] * x1[11] + + s[6] * x1[12] + + s[5] * x1[13] + + s[4] * x1[14] + + s[3] * x1[15] + - s[15] * x3[3] + - s[14] * x3[4] + - s[13] * x3[5] + - s[12] * x3[6] + - s[11] * x3[7] + - s[10] * x3[8] + - s[9] * x3[9] + - s[8] * x3[10] + - s[7] * x3[11] + - s[6] * x3[12] + - s[5] * x3[13] + - s[4] * x3[14] + - s[3] * x3[15] + + 0xFFFF * q2[3] + + 0xFFFF * q2[4] + + q2[6] + + 0xFFFF * q2[13] + + 0xFFFF * q2[14] + + 0xFFFF * q2[15] + - 0xFFFF + } + 19 => { + s[15] * x1[4] + + s[14] * x1[5] + + s[13] * x1[6] + + s[12] * x1[7] + + s[11] * x1[8] + + s[10] * x1[9] + + s[9] * x1[10] + + s[8] * x1[11] + + s[7] * x1[12] + + s[6] * x1[13] + + s[5] * x1[14] + + s[4] * x1[15] + - s[15] * x3[4] + - s[14] * x3[5] + - s[13] * x3[6] + - s[12] * x3[7] + - s[11] * x3[8] + - s[10] * x3[9] + - s[9] * x3[10] + - s[8] * x3[11] + - s[7] * x3[12] + - s[6] * x3[13] + - s[5] * x3[14] + - s[4] * x3[15] + + 0xFFFF * q2[4] + + 0xFFFF * q2[5] + + q2[7] + + 0xFFFF * q2[14] + + 0xFFFF * q2[15] + - 0xFFFF + } + 20 => { + s[15] * x1[5] + + s[14] * x1[6] + + s[13] * x1[7] + + s[12] * x1[8] + + s[11] * x1[9] + + s[10] * x1[10] + + s[9] * x1[11] + + s[8] * x1[12] + + s[7] * x1[13] + + s[6] * x1[14] + + s[5] * x1[15] + - s[15] * x3[5] + - s[14] * x3[6] + - s[13] * x3[7] + - s[12] * x3[8] + - s[11] * x3[9] + - s[10] * x3[10] + - s[9] * x3[11] + - s[8] * x3[12] + - s[7] * x3[13] + - s[6] * x3[14] + - s[5] * x3[15] + + 0xFFFF * q2[5] + + 0xFFFF * q2[6] + + q2[8] + + 0xFFFF * q2[15] + - 0xFFFF + } + 21 => { + s[15] * x1[6] + + s[14] * x1[7] + + s[13] * x1[8] + + s[12] * x1[9] + + s[11] * x1[10] + + s[10] * x1[11] + + s[9] * x1[12] + + s[8] * x1[13] + + s[7] * x1[14] + + s[6] * x1[15] + - s[15] * x3[6] + - s[14] * x3[7] + - s[13] * x3[8] + - s[12] * x3[9] + - s[11] * x3[10] + - s[10] * x3[11] + - s[9] * x3[12] + - s[8] * x3[13] + - s[7] * x3[14] + - s[6] * x3[15] + + 0xFFFF * q2[6] + + 0xFFFF * q2[7] + + q2[9] + - 0xFFFF + } + 22 => { + s[15] * x1[7] + + s[14] * x1[8] + + s[13] * x1[9] + + s[12] * x1[10] + + s[11] * x1[11] + + s[10] * x1[12] + + s[9] * x1[13] + + s[8] * x1[14] + + s[7] * x1[15] + - s[15] * x3[7] + - s[14] * x3[8] + - s[13] * x3[9] + - s[12] * x3[10] + - s[11] * x3[11] + - s[10] * x3[12] + - s[9] * x3[13] + - s[8] * x3[14] + - s[7] * x3[15] + + 0xFFFF * q2[7] + + 0xFFFF * q2[8] + + q2[10] + - 0x1 + } + 23 => { + s[15] * x1[8] + + s[14] * x1[9] + + s[13] * x1[10] + + s[12] * x1[11] + + s[11] * x1[12] + + s[10] * x1[13] + + s[9] * x1[14] + + s[8] * x1[15] + - s[15] * x3[8] + - s[14] * x3[9] + - s[13] * x3[10] + - s[12] * x3[11] + - s[11] * x3[12] + - s[10] * x3[13] + - s[9] * x3[14] + - s[8] * x3[15] + + 0xFFFF * q2[8] + + 0xFFFF * q2[9] + + q2[11] + } + 24 => { + s[15] * x1[9] + + s[14] * x1[10] + + s[13] * x1[11] + + s[12] * x1[12] + + s[11] * x1[13] + + s[10] * x1[14] + + s[9] * x1[15] + - s[15] * x3[9] + - s[14] * x3[10] + - s[13] * x3[11] + - s[12] * x3[12] + - s[11] * x3[13] + - s[10] * x3[14] + - s[9] * x3[15] + + 0xFFFF * q2[9] + + 0xFFFF * q2[10] + + q2[12] + } + 25 => { + s[15] * x1[10] + + s[14] * x1[11] + + s[13] * x1[12] + + s[12] * x1[13] + + s[11] * x1[14] + + s[10] * x1[15] + - s[15] * x3[10] + - s[14] * x3[11] + - s[13] * x3[12] + - s[12] * x3[13] + - s[11] * x3[14] + - s[10] * x3[15] + + 0xFFFF * q2[10] + + 0xFFFF * q2[11] + + q2[13] + } + 26 => { + s[15] * x1[11] + s[14] * x1[12] + s[13] * x1[13] + s[12] * x1[14] + s[11] * x1[15] + - s[15] * x3[11] + - s[14] * x3[12] + - s[13] * x3[13] + - s[12] * x3[14] + - s[11] * x3[15] + + 0xFFFF * q2[11] + + 0xFFFF * q2[12] + + q2[14] + } + 27 => { + s[15] * x1[12] + s[14] * x1[13] + s[13] * x1[14] + s[12] * x1[15] + - s[15] * x3[12] + - s[14] * x3[13] + - s[13] * x3[14] + - s[12] * x3[15] + + 0xFFFF * q2[12] + + 0xFFFF * q2[13] + + q2[15] + } + 28 => { + s[15] * x1[13] + s[14] * x1[14] + s[13] * x1[15] + - s[15] * x3[13] + - s[14] * x3[14] + - s[13] * x3[15] + + 0xFFFF * q2[13] + + 0xFFFF * q2[14] + - 0x2 + } + 29 => { + s[15] * x1[14] + s[14] * x1[15] - s[15] * x3[14] - s[14] * x3[15] + + 0xFFFF * q2[14] + + 0xFFFF * q2[15] + } + 30 => s[15] * x1[15] - s[15] * x3[15] + 0xFFFF * q2[15] - 0xFFFE, + 31 => -0x1FFFF, + _ => 0, + } + } +} diff --git a/precompiles/arith_eq/src/executors/mod.rs b/precompiles/arith_eq/src/executors/mod.rs index 2d114fbf7..52a9df04e 100644 --- a/precompiles/arith_eq/src/executors/mod.rs +++ b/precompiles/arith_eq/src/executors/mod.rs @@ -4,6 +4,7 @@ pub(crate) mod arith_eq_data; pub(crate) mod bn254_complex; pub(crate) mod bn254_curve; pub(crate) mod secp256k1; +pub(crate) mod secp256r1; #[allow(unused_imports)] pub use arith256::*; @@ -15,5 +16,7 @@ pub use bn254_complex::*; pub use bn254_curve::*; #[allow(unused_imports)] pub use secp256k1::*; +#[allow(unused_imports)] +pub use secp256r1::*; pub use arith_eq_data::*; diff --git a/precompiles/arith_eq/src/executors/secp256r1.rs b/precompiles/arith_eq/src/executors/secp256r1.rs new file mode 100644 index 000000000..08db6b884 --- /dev/null +++ b/precompiles/arith_eq/src/executors/secp256r1.rs @@ -0,0 +1,182 @@ +use super::ArithEqData; +use lazy_static::lazy_static; +use num_bigint::BigInt; +use num_traits::Zero; +use precompiles_helpers::{bigint2_to_8_u64, bigint_from_field, bigint_to_16_chunks}; + +use crate::equations; +use ark_secp256r1::Fq as Secp256r1Field; + +const COLS: u8 = 32; + +lazy_static! { + pub static ref SECP256R1_PRIME: BigInt = BigInt::parse_bytes( + b"ffffffff00000001000000000000000000000000ffffffffffffffffffffffff", + 16 + ) + .unwrap(); + pub static ref SECP256R1_A: BigInt = BigInt::parse_bytes( + b"ffffffff00000001000000000000000000000000fffffffffffffffffffffffc", + 16 + ) + .unwrap(); + pub static ref SECP256R1_ADD_Q0_OFFSET: BigInt = BigInt::from(1) << 257; + pub static ref SECP256R1_DBL_Q0_OFFSET: BigInt = BigInt::from(1) << 258; + pub static ref SECP256R1_Q1_OFFSET: BigInt = BigInt::from(4); + pub static ref SECP256R1_Q2_OFFSET: BigInt = BigInt::from(1) << 257; +} + +pub struct Secp256r1 {} + +impl Secp256r1 { + #[allow(dead_code)] + pub fn calculate_add(p1: &[u64; 8], p2: &[u64; 8], p3: &mut [u64; 8]) { + Self::prepare(false, p1, p2, Some(p3)); + } + #[allow(dead_code)] + pub fn calculate_dbl(p1: &[u64; 8], p3: &mut [u64; 8]) { + Self::prepare(true, p1, p1, Some(p3)); + } + + fn point_from_8x64(p: &[u64; 8]) -> (Secp256r1Field, Secp256r1Field) { + ( + Secp256r1Field::from(ark_ff::BigInt::<4>(p[0..4].try_into().unwrap())), + Secp256r1Field::from(ark_ff::BigInt::<4>(p[4..8].try_into().unwrap())), + ) + } + fn prepare( + is_dbl: bool, + p1: &[u64; 8], + p2: &[u64; 8], + p3: Option<&mut [u64; 8]>, + ) -> Option { + let (x1, y1) = Self::point_from_8x64(p1); + let (x2, y2) = if is_dbl { (x1, y1) } else { Self::point_from_8x64(p2) }; + + let s = if is_dbl { + (Secp256r1Field::from(3u64) * x1 * x1 + Secp256r1Field::from(-3)) / (y1 + y1) + } else { + (y2 - y1) / (x2 - x1) + }; + let x3 = s * s - (x1 + x2); + let y3 = s * (x1 - x3) - y1; + + let s = bigint_from_field(&s); + let x1 = bigint_from_field(&x1); + let y1 = bigint_from_field(&y1); + let x2 = bigint_from_field(&x2); + let y2 = bigint_from_field(&y2); + let x3 = bigint_from_field(&x3); + let y3 = bigint_from_field(&y3); + + let q0 = if is_dbl { + let _q0: BigInt = 2 * &s * &y1 - 3 * &x1 * &x1 - &*SECP256R1_A; + assert!((&_q0 % &*SECP256R1_PRIME).is_zero()); + &*SECP256R1_DBL_Q0_OFFSET - (&_q0 / &*SECP256R1_PRIME) + } else { + let _q0: BigInt = &s * (&x2 - &x1) - &y2 + &y1; + assert!((&_q0 % &*SECP256R1_PRIME).is_zero()); + (&_q0 / &*SECP256R1_PRIME) + &*SECP256R1_ADD_Q0_OFFSET + }; + + let _q1 = &s * &s - &x1 - &x2 - &x3; + assert!((&_q1 % &*SECP256R1_PRIME).is_zero()); + let q1 = (&_q1 / &*SECP256R1_PRIME) + &*SECP256R1_Q1_OFFSET; + + let _q2 = &s * &x1 - &s * &x3 - &y1 - &y3; + assert!((&_q2 % &*SECP256R1_PRIME).is_zero()); + let q2 = &*SECP256R1_Q2_OFFSET - (&_q2 / &*SECP256R1_PRIME); + + if let Some(p3) = p3 { + bigint2_to_8_u64(&x3, &y3, p3); + return None; + } + + let mut data = ArithEqData::default(); + bigint_to_16_chunks(&q0, &mut data.q0); + bigint_to_16_chunks(&q1, &mut data.q1); + bigint_to_16_chunks(&q2, &mut data.q2); + bigint_to_16_chunks(&s, &mut data.s); + bigint_to_16_chunks(&x1, &mut data.x1); + bigint_to_16_chunks(&y1, &mut data.y1); + bigint_to_16_chunks(&x2, &mut data.x2); + bigint_to_16_chunks(&y2, &mut data.y2); + bigint_to_16_chunks(&x3, &mut data.x3); + bigint_to_16_chunks(&y3, &mut data.y3); + Some(data) + } + #[inline(always)] + #[allow(dead_code)] + pub fn execute_add(p1: &[u64; 8], p2: &[u64; 8]) -> ArithEqData { + Self::execute_add_dbl(false, p1, p2) + } + + #[inline(always)] + #[allow(dead_code)] + pub fn execute_dbl(p1: &[u64; 8]) -> ArithEqData { + Self::execute_add_dbl(true, p1, p1) + } + pub fn execute_add_dbl(is_dbl: bool, p1: &[u64; 8], p2: &[u64; 8]) -> ArithEqData { + let mut data = Self::prepare(is_dbl, p1, p2, None).unwrap(); + for icol in 0..COLS { + let index = icol as usize; + data.eq[index] = [ + if is_dbl { + equations::Secp256r1Dbl::calculate(icol, &data.x1, &data.y1, &data.s, &data.q0) + } else { + equations::Secp256r1Add::calculate( + icol, &data.x1, &data.y1, &data.x2, &data.y2, &data.s, &data.q0, + ) + }, + equations::Secp256r1X3::calculate( + icol, &data.x1, &data.x2, &data.x3, &data.s, &data.q1, + ), + equations::Secp256r1Y3::calculate( + icol, &data.x1, &data.y1, &data.x3, &data.y3, &data.s, &data.q2, + ), + ]; + for ieq in 0..3 { + let cin = if index > 0 { data.cout[index - 1][ieq] } else { 0 }; + let value = data.eq[index][ieq] + cin; + if icol != COLS - 1 { + data.cout[index][ieq] = value / 0x10000; + } + debug_assert!( + 0 == if icol == COLS - 1 { value } else { value % 0x10000 }, + "EqSecp256r1 residue eq{ieq} ({index}) #:{value} cin:{cin}" + ); + } + } + data + } + #[cfg(feature = "test_data")] + #[allow(dead_code)] + pub fn verify_add_dbl(is_dbl: bool, p1: &[u64; 8], p2: &[u64; 8], p: &[u64; 8]) { + let data = Self::execute_add_dbl(is_dbl, p1, p2); + data.check_ranges(); + let op = if is_dbl { "Secp256r1Dbl" } else { "Secp256r1Add" }; + for i in 0..2 { + let offset = (i + 1) * 4 - 1; + let mut x3 = data.x3[offset] as u64; + let mut y3 = data.y3[offset] as u64; + for j in 1..4 { + x3 <<= 16; + y3 <<= 16; + x3 += data.x3[offset - j] as u64; + y3 += data.y3[offset - j] as u64; + } + assert!(p[i] == x3, "{} p[{}]:{} not match with x3:{}", op, i, p[i], x3); + assert!(p[i + 4] == y3, "{} p[{}]:{} not match with y3:{}", op, i + 4, p[i + 4], y3); + } + } + #[cfg(feature = "test_data")] + #[allow(dead_code)] + pub fn verify_add(p1: &[u64; 8], p2: &[u64; 8], p: &[u64; 8]) { + Self::verify_add_dbl(false, p1, p2, p); + } + #[cfg(feature = "test_data")] + #[allow(dead_code)] + pub fn verify_dbl(p1: &[u64; 8], p: &[u64; 8]) { + Self::verify_add_dbl(true, p1, p1, p); + } +} diff --git a/precompiles/arith_eq/src/generator/equation.rs b/precompiles/arith_eq/src/generator/equation.rs index 00180a795..7b895c0e2 100644 --- a/precompiles/arith_eq/src/generator/equation.rs +++ b/precompiles/arith_eq/src/generator/equation.rs @@ -382,7 +382,20 @@ impl Equation { } else { " " }); - for (i, term) in addt.terms.iter().enumerate() { + + // Filter out BigInt terms with value 1 when there are other terms + let terms_to_output: Vec<_> = if addt.terms.len() > 1 { + addt.terms + .iter() + .filter(|t| { + !matches!(t, ProductTerm::BigInt { value, .. } if *value == BigInt::one()) + }) + .collect() + } else { + addt.terms.iter().collect() + }; + + for (i, term) in terms_to_output.iter().enumerate() { if i > 0 { line.append(" * "); } diff --git a/precompiles/arith_eq/src/mem_inputs/arith256.rs b/precompiles/arith_eq/src/mem_inputs/arith256.rs index dc6dc5084..e47286031 100644 --- a/precompiles/arith_eq/src/mem_inputs/arith256.rs +++ b/precompiles/arith_eq/src/mem_inputs/arith256.rs @@ -1,8 +1,6 @@ use super::ArithEqMemInputConfig; use crate::executors::Arith256; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; pub const ARITH_256_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 5, @@ -12,17 +10,17 @@ pub const ARITH_256_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { chunks_per_param: 4, }; -pub fn generate_arith256_mem_inputs( +pub fn generate_arith256_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[5],... - let a: &[u64; 4] = &data[9..13].try_into().unwrap(); - let b: &[u64; 4] = &data[13..17].try_into().unwrap(); - let c: &[u64; 4] = &data[17..21].try_into().unwrap(); + let a: &[u64; 4] = &data[10..14].try_into().unwrap(); + let b: &[u64; 4] = &data[14..18].try_into().unwrap(); + let c: &[u64; 4] = &data[18..22].try_into().unwrap(); // let mut dh = [0u64; 4]; // let mut dl = [0u64; 4]; let mut d: [u64; 8] = [0u64; 8]; @@ -38,15 +36,15 @@ pub fn generate_arith256_mem_inputs( data, Some(&d), only_counters, - pending, + mem_processors, &ARITH_256_MEM_CONFIG, ); } -pub fn skip_arith256_mem_inputs( +pub fn skip_arith256_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &ARITH_256_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &ARITH_256_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/arith256_mod.rs b/precompiles/arith_eq/src/mem_inputs/arith256_mod.rs index a8c0b8d45..04aa76153 100644 --- a/precompiles/arith_eq/src/mem_inputs/arith256_mod.rs +++ b/precompiles/arith_eq/src/mem_inputs/arith256_mod.rs @@ -1,8 +1,6 @@ use super::ArithEqMemInputConfig; use crate::executors::Arith256Mod; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; pub const ARITH_256_MOD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 5, @@ -11,18 +9,18 @@ pub const ARITH_256_MOD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfi write_params: 1, chunks_per_param: 4, }; -pub fn generate_arith256_mod_mem_inputs( +pub fn generate_arith256_mod_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[5],... - let a: &[u64; 4] = &data[9..13].try_into().unwrap(); - let b: &[u64; 4] = &data[13..17].try_into().unwrap(); - let c: &[u64; 4] = &data[17..21].try_into().unwrap(); - let module: &[u64; 4] = &data[21..25].try_into().unwrap(); + let a: &[u64; 4] = &data[10..14].try_into().unwrap(); + let b: &[u64; 4] = &data[14..18].try_into().unwrap(); + let c: &[u64; 4] = &data[18..22].try_into().unwrap(); + let module: &[u64; 4] = &data[22..26].try_into().unwrap(); let mut d: [u64; 4] = [0u64; 4]; Arith256Mod::calculate(a, b, c, module, &mut d); @@ -32,15 +30,15 @@ pub fn generate_arith256_mod_mem_inputs( data, Some(&d), only_counters, - pending, + mem_processors, &ARITH_256_MOD_MEM_CONFIG, ); } -pub fn skip_arith256_mod_mem_inputs( +pub fn skip_arith256_mod_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &ARITH_256_MOD_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &ARITH_256_MOD_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/bn254_complex_add.rs b/precompiles/arith_eq/src/mem_inputs/bn254_complex_add.rs index fca141932..3e4e97826 100644 --- a/precompiles/arith_eq/src/mem_inputs/bn254_complex_add.rs +++ b/precompiles/arith_eq/src/mem_inputs/bn254_complex_add.rs @@ -1,8 +1,6 @@ use super::ArithEqMemInputConfig; use crate::executors::Bn254Complex; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; pub const BN254_COMPLEX_ADD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 2, @@ -12,16 +10,16 @@ pub const BN254_COMPLEX_ADD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputC chunks_per_param: 8, }; -pub fn generate_bn254_complex_add_mem_inputs( +pub fn generate_bn254_complex_add_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[2],... - let f1: &[u64; 8] = &data[6..14].try_into().unwrap(); - let f2: &[u64; 8] = &data[14..22].try_into().unwrap(); + let f1: &[u64; 8] = &data[7..15].try_into().unwrap(); + let f2: &[u64; 8] = &data[15..23].try_into().unwrap(); let mut f3 = [0u64; 8]; Bn254Complex::calculate_add(f1, f2, &mut f3); @@ -31,15 +29,15 @@ pub fn generate_bn254_complex_add_mem_inputs( data, Some(&f3), only_counters, - pending, + mem_processors, &BN254_COMPLEX_ADD_MEM_CONFIG, ); } -pub fn skip_bn254_complex_add_mem_inputs( +pub fn skip_bn254_complex_add_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BN254_COMPLEX_ADD_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BN254_COMPLEX_ADD_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/bn254_complex_mul.rs b/precompiles/arith_eq/src/mem_inputs/bn254_complex_mul.rs index 6c70af05b..4ee73bc18 100644 --- a/precompiles/arith_eq/src/mem_inputs/bn254_complex_mul.rs +++ b/precompiles/arith_eq/src/mem_inputs/bn254_complex_mul.rs @@ -1,8 +1,6 @@ use super::ArithEqMemInputConfig; use crate::executors::Bn254Complex; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; pub const BN254_COMPLEX_MUL_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 2, @@ -12,16 +10,16 @@ pub const BN254_COMPLEX_MUL_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputC chunks_per_param: 8, }; -pub fn generate_bn254_complex_mul_mem_inputs( +pub fn generate_bn254_complex_mul_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[2],... - let f1: &[u64; 8] = &data[6..14].try_into().unwrap(); - let f2: &[u64; 8] = &data[14..22].try_into().unwrap(); + let f1: &[u64; 8] = &data[7..15].try_into().unwrap(); + let f2: &[u64; 8] = &data[15..23].try_into().unwrap(); let mut f3 = [0u64; 8]; Bn254Complex::calculate_mul(f1, f2, &mut f3); @@ -31,15 +29,15 @@ pub fn generate_bn254_complex_mul_mem_inputs( data, Some(&f3), only_counters, - pending, + mem_processors, &BN254_COMPLEX_MUL_MEM_CONFIG, ); } -pub fn skip_bn254_complex_mul_mem_inputs( +pub fn skip_bn254_complex_mul_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BN254_COMPLEX_MUL_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BN254_COMPLEX_MUL_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/bn254_complex_sub.rs b/precompiles/arith_eq/src/mem_inputs/bn254_complex_sub.rs index df7d18f56..11f1e7573 100644 --- a/precompiles/arith_eq/src/mem_inputs/bn254_complex_sub.rs +++ b/precompiles/arith_eq/src/mem_inputs/bn254_complex_sub.rs @@ -1,8 +1,6 @@ use super::ArithEqMemInputConfig; use crate::executors::Bn254Complex; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; pub const BN254_COMPLEX_SUB_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 2, @@ -12,16 +10,16 @@ pub const BN254_COMPLEX_SUB_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputC chunks_per_param: 8, }; -pub fn generate_bn254_complex_sub_mem_inputs( +pub fn generate_bn254_complex_sub_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[2],... - let f1: &[u64; 8] = &data[6..14].try_into().unwrap(); - let f2: &[u64; 8] = &data[14..22].try_into().unwrap(); + let f1: &[u64; 8] = &data[7..15].try_into().unwrap(); + let f2: &[u64; 8] = &data[15..23].try_into().unwrap(); let mut f3 = [0u64; 8]; Bn254Complex::calculate_sub(f1, f2, &mut f3); @@ -31,15 +29,15 @@ pub fn generate_bn254_complex_sub_mem_inputs( data, Some(&f3), only_counters, - pending, + mem_processors, &BN254_COMPLEX_SUB_MEM_CONFIG, ); } -pub fn skip_bn254_complex_sub_mem_inputs( +pub fn skip_bn254_complex_sub_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BN254_COMPLEX_SUB_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BN254_COMPLEX_SUB_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/bn254_curve_add.rs b/precompiles/arith_eq/src/mem_inputs/bn254_curve_add.rs index e160843ee..b43bc2828 100644 --- a/precompiles/arith_eq/src/mem_inputs/bn254_curve_add.rs +++ b/precompiles/arith_eq/src/mem_inputs/bn254_curve_add.rs @@ -1,8 +1,8 @@ use super::ArithEqMemInputConfig; use crate::executors::Bn254Curve; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; + +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; pub const BN254_CURVE_ADD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 2, @@ -12,16 +12,20 @@ pub const BN254_CURVE_ADD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputCon chunks_per_param: 8, }; -pub fn generate_bn254_curve_add_mem_inputs( +pub fn generate_bn254_curve_add_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[2],... - let p1: &[u64; 8] = &data[6..14].try_into().unwrap(); - let p2: &[u64; 8] = &data[14..22].try_into().unwrap(); + let p1_start = OPERATION_PRECOMPILED_BUS_DATA_SIZE + BN254_CURVE_ADD_MEM_CONFIG.indirect_params; + let p1: &[u64; 8] = + &data[p1_start..p1_start + BN254_CURVE_ADD_MEM_CONFIG.chunks_per_param].try_into().unwrap(); + let p2_start = p1_start + BN254_CURVE_ADD_MEM_CONFIG.chunks_per_param; + let p2: &[u64; 8] = + &data[p2_start..p2_start + BN254_CURVE_ADD_MEM_CONFIG.chunks_per_param].try_into().unwrap(); let mut p3 = [0u64; 8]; Bn254Curve::calculate_add(p1, p2, &mut p3); @@ -31,15 +35,15 @@ pub fn generate_bn254_curve_add_mem_inputs( data, Some(&p3), only_counters, - pending, + mem_processors, &BN254_CURVE_ADD_MEM_CONFIG, ); } -pub fn skip_bn254_curve_add_mem_inputs( +pub fn skip_bn254_curve_add_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BN254_CURVE_ADD_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BN254_CURVE_ADD_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/bn254_curve_dbl.rs b/precompiles/arith_eq/src/mem_inputs/bn254_curve_dbl.rs index 1c683f1f7..0273c3aa0 100644 --- a/precompiles/arith_eq/src/mem_inputs/bn254_curve_dbl.rs +++ b/precompiles/arith_eq/src/mem_inputs/bn254_curve_dbl.rs @@ -1,8 +1,8 @@ use super::ArithEqMemInputConfig; use crate::executors::Bn254Curve; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; + +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; pub const BN254_CURVE_DBL_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 0, @@ -12,15 +12,18 @@ pub const BN254_CURVE_DBL_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputCon chunks_per_param: 8, }; -pub fn generate_bn254_curve_dbl_mem_inputs( +pub fn generate_bn254_curve_dbl_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[2],... - let p1: &[u64; 8] = &data[4..12].try_into().unwrap(); + let p1: &[u64; 8] = &data + [OPERATION_PRECOMPILED_BUS_DATA_SIZE..OPERATION_PRECOMPILED_BUS_DATA_SIZE + 8] + .try_into() + .unwrap(); let mut p3 = [0u64; 8]; Bn254Curve::calculate_dbl(p1, &mut p3); @@ -30,15 +33,15 @@ pub fn generate_bn254_curve_dbl_mem_inputs( data, Some(&p3), only_counters, - pending, + mem_processors, &BN254_CURVE_DBL_MEM_CONFIG, ); } -pub fn skip_bn254_curve_dbl_mem_inputs( +pub fn skip_bn254_curve_dbl_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BN254_CURVE_DBL_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BN254_CURVE_DBL_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/generate_mem_inputs.rs b/precompiles/arith_eq/src/mem_inputs/generate_mem_inputs.rs index 2b1b7d943..cc9061695 100644 --- a/precompiles/arith_eq/src/mem_inputs/generate_mem_inputs.rs +++ b/precompiles/arith_eq/src/mem_inputs/generate_mem_inputs.rs @@ -1,7 +1,7 @@ use precompiles_common::MemBusHelpers; -use std::collections::VecDeque; -use zisk_common::MemCollectorInfo; -use zisk_common::{BusId, OPERATION_BUS_DATA_SIZE}; +use precompiles_common::MemProcessor; + +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; #[derive(Debug)] pub struct ArithEqMemInputConfig { @@ -11,24 +11,24 @@ pub struct ArithEqMemInputConfig { pub write_params: usize, pub chunks_per_param: usize, } -pub fn generate_mem_inputs( +pub fn generate_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], write_data: Option<&[u64]>, only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, config: &ArithEqMemInputConfig, ) { let params_count = config.read_params + config.write_params; - let params_offset = OPERATION_BUS_DATA_SIZE + config.indirect_params; + let params_offset = OPERATION_PRECOMPILED_BUS_DATA_SIZE + config.indirect_params; for iparam in 0..config.indirect_params { - MemBusHelpers::mem_aligned_load( + MemBusHelpers::mem_aligned_read( addr_main + iparam as u32 * 8, step_main, - data[OPERATION_BUS_DATA_SIZE + iparam], - pending, + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + iparam], + mem_processors, ) } for iparam in 0..params_count { @@ -39,7 +39,7 @@ pub fn generate_mem_inputs( }; let param_addr = if config.indirect_params > 0 { // read indirect parameters, means stored the address of parameter - data[OPERATION_BUS_DATA_SIZE + param_index] as u32 + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_index] as u32 } else { addr_main + (param_index * 8 * config.chunks_per_param) as u32 }; @@ -66,27 +66,25 @@ pub fn generate_mem_inputs( step_main, chunk_data, is_write, - pending, + mem_processors, ) } } } -pub fn skip_mem_inputs( +pub fn skip_mem_inputs( addr_main: u32, data: &[u64], config: &ArithEqMemInputConfig, - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { let params_count = config.read_params + config.write_params; // Check indirect loads for iparam in 0..config.indirect_params { let addr = addr_main + iparam as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } @@ -97,16 +95,14 @@ pub fn skip_mem_inputs( iparam }; let param_addr = if config.indirect_params > 0 { - data[OPERATION_BUS_DATA_SIZE + param_index] as u32 + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_index] as u32 } else { addr_main + (param_index * 8 * config.chunks_per_param) as u32 }; for ichunk in 0..config.chunks_per_param { let addr = param_addr + ichunk as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } } diff --git a/precompiles/arith_eq/src/mem_inputs/mod.rs b/precompiles/arith_eq/src/mem_inputs/mod.rs index 0701a1db3..665159f2a 100644 --- a/precompiles/arith_eq/src/mem_inputs/mod.rs +++ b/precompiles/arith_eq/src/mem_inputs/mod.rs @@ -8,6 +8,8 @@ mod bn254_curve_dbl; mod generate_mem_inputs; mod secp256k1_add; mod secp256k1_dbl; +mod secp256r1_add; +mod secp256r1_dbl; pub use arith256::*; pub use arith256_mod::*; @@ -19,3 +21,5 @@ pub use bn254_curve_dbl::*; pub use generate_mem_inputs::*; pub use secp256k1_add::*; pub use secp256k1_dbl::*; +pub use secp256r1_add::*; +pub use secp256r1_dbl::*; diff --git a/precompiles/arith_eq/src/mem_inputs/secp256k1_add.rs b/precompiles/arith_eq/src/mem_inputs/secp256k1_add.rs index 0c08a0ba0..bdd2ff7aa 100644 --- a/precompiles/arith_eq/src/mem_inputs/secp256k1_add.rs +++ b/precompiles/arith_eq/src/mem_inputs/secp256k1_add.rs @@ -1,8 +1,6 @@ use super::ArithEqMemInputConfig; use crate::executors::Secp256k1; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; pub const SECP256K1_ADD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 2, @@ -12,16 +10,16 @@ pub const SECP256K1_ADD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfi chunks_per_param: 8, }; -pub fn generate_secp256k1_add_mem_inputs( +pub fn generate_secp256k1_add_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // op,op_type,a,b,addr[2],... - let p1: &[u64; 8] = &data[6..14].try_into().unwrap(); - let p2: &[u64; 8] = &data[14..22].try_into().unwrap(); + let p1: &[u64; 8] = &data[7..15].try_into().unwrap(); + let p2: &[u64; 8] = &data[15..23].try_into().unwrap(); let mut p3 = [0u64; 8]; Secp256k1::calculate_add(p1, p2, &mut p3); @@ -31,15 +29,15 @@ pub fn generate_secp256k1_add_mem_inputs( data, Some(&p3), only_counters, - pending, + mem_processors, &SECP256K1_ADD_MEM_CONFIG, ); } -pub fn skip_secp256k1_add_mem_inputs( +pub fn skip_secp256k1_add_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &SECP256K1_ADD_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &SECP256K1_ADD_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/secp256k1_dbl.rs b/precompiles/arith_eq/src/mem_inputs/secp256k1_dbl.rs index b86bf868a..451e4e065 100644 --- a/precompiles/arith_eq/src/mem_inputs/secp256k1_dbl.rs +++ b/precompiles/arith_eq/src/mem_inputs/secp256k1_dbl.rs @@ -1,8 +1,6 @@ use super::ArithEqMemInputConfig; use crate::executors::Secp256k1; -use std::collections::VecDeque; -use zisk_common::BusId; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; pub const SECP256K1_DBL_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { indirect_params: 0, @@ -12,15 +10,15 @@ pub const SECP256K1_DBL_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfi chunks_per_param: 8, }; -pub fn generate_secp256k1_dbl_mem_inputs( +pub fn generate_secp256k1_dbl_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + processor: &mut P, ) { // op,op_type,a,b,... - let p1: &[u64; 8] = &data[4..12].try_into().unwrap(); + let p1: &[u64; 8] = &data[5..13].try_into().unwrap(); let mut p3 = [0u64; 8]; Secp256k1::calculate_dbl(p1, &mut p3); @@ -30,15 +28,15 @@ pub fn generate_secp256k1_dbl_mem_inputs( data, Some(&p3), only_counters, - pending, + processor, &SECP256K1_DBL_MEM_CONFIG, ); } -pub fn skip_secp256k1_dbl_mem_inputs( +pub fn skip_secp256k1_dbl_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &SECP256K1_DBL_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &SECP256K1_DBL_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq/src/mem_inputs/secp256r1_add.rs b/precompiles/arith_eq/src/mem_inputs/secp256r1_add.rs new file mode 100644 index 000000000..c76966895 --- /dev/null +++ b/precompiles/arith_eq/src/mem_inputs/secp256r1_add.rs @@ -0,0 +1,43 @@ +use super::ArithEqMemInputConfig; +use crate::executors::Secp256r1; +use precompiles_common::MemProcessor; + +pub const SECP256R1_ADD_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { + indirect_params: 2, + rewrite_params: true, + read_params: 2, + write_params: 1, + chunks_per_param: 8, +}; + +pub fn generate_secp256r1_add_mem_inputs( + addr_main: u32, + step_main: u64, + data: &[u64], + only_counters: bool, + mem_processors: &mut P, +) { + // op,op_type,a,b,addr[2],... + let p1: &[u64; 8] = &data[7..15].try_into().unwrap(); + let p2: &[u64; 8] = &data[15..23].try_into().unwrap(); + let mut p3 = [0u64; 8]; + + Secp256r1::calculate_add(p1, p2, &mut p3); + super::generate_mem_inputs( + addr_main, + step_main, + data, + Some(&p3), + only_counters, + mem_processors, + &SECP256R1_ADD_MEM_CONFIG, + ); +} + +pub fn skip_secp256r1_add_mem_inputs( + addr_main: u32, + data: &[u64], + mem_processors: &mut P, +) -> bool { + super::skip_mem_inputs(addr_main, data, &SECP256R1_ADD_MEM_CONFIG, mem_processors) +} diff --git a/precompiles/arith_eq/src/mem_inputs/secp256r1_dbl.rs b/precompiles/arith_eq/src/mem_inputs/secp256r1_dbl.rs new file mode 100644 index 000000000..b7c48897d --- /dev/null +++ b/precompiles/arith_eq/src/mem_inputs/secp256r1_dbl.rs @@ -0,0 +1,42 @@ +use super::ArithEqMemInputConfig; +use crate::executors::Secp256r1; +use precompiles_common::MemProcessor; + +pub const SECP256R1_DBL_MEM_CONFIG: ArithEqMemInputConfig = ArithEqMemInputConfig { + indirect_params: 0, + rewrite_params: true, + read_params: 1, + write_params: 1, + chunks_per_param: 8, +}; + +pub fn generate_secp256r1_dbl_mem_inputs( + addr_main: u32, + step_main: u64, + data: &[u64], + only_counters: bool, + processor: &mut P, +) { + // op,op_type,a,b,... + let p1: &[u64; 8] = &data[5..13].try_into().unwrap(); + let mut p3 = [0u64; 8]; + + Secp256r1::calculate_dbl(p1, &mut p3); + super::generate_mem_inputs( + addr_main, + step_main, + data, + Some(&p3), + only_counters, + processor, + &SECP256R1_DBL_MEM_CONFIG, + ); +} + +pub fn skip_secp256r1_dbl_mem_inputs( + addr_main: u32, + data: &[u64], + mem_processors: &mut P, +) -> bool { + super::skip_mem_inputs(addr_main, data, &SECP256R1_DBL_MEM_CONFIG, mem_processors) +} diff --git a/precompiles/arith_eq/src/test_data/mod.rs b/precompiles/arith_eq/src/test_data/mod.rs index 7056634f2..12a785ddc 100644 --- a/precompiles/arith_eq/src/test_data/mod.rs +++ b/precompiles/arith_eq/src/test_data/mod.rs @@ -8,6 +8,8 @@ mod bn254_curve_add_test_data; mod bn254_curve_dbl_test_data; mod secp256k1_add_test_data; mod secp256k1_dbl_test_data; +mod secp256r1_add_test_data; +mod secp256r1_dbl_test_data; mod str_test_data; pub use arith256_mod_test_data::*; @@ -19,4 +21,6 @@ pub use bn254_curve_add_test_data::*; pub use bn254_curve_dbl_test_data::*; pub use secp256k1_add_test_data::*; pub use secp256k1_dbl_test_data::*; +pub use secp256r1_add_test_data::*; +pub use secp256r1_dbl_test_data::*; pub use str_test_data::*; diff --git a/precompiles/arith_eq/src/test_data/secp256r1_add_test_data.rs b/precompiles/arith_eq/src/test_data/secp256r1_add_test_data.rs new file mode 100644 index 000000000..5dda8b76c --- /dev/null +++ b/precompiles/arith_eq/src/test_data/secp256r1_add_test_data.rs @@ -0,0 +1,428 @@ +use super::str_test_data; + +pub fn get_secp256r1_add_test_data(index: usize) -> Option<([u64; 8], [u64; 8], [u64; 8])> { + if let Some(sdata) = get_secp256r1_add_test_str_data(index) { + let bdata = str_test_data::<6, 8>(index, "secp256r1_add_test", sdata); + Some((bdata[0], bdata[1], bdata[2])) + } else { + None + } +} + +/* +p = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff +a = 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc +b = 0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b +F = GF(p) +E = EllipticCurve(F, (a, b)) +for i in range(50): + P = E.random_point(); + Q = E.random_point(); + R = P + Q + print(f"{i} => Some([\"{P[0]}\",\"{P[1]}\",\"{Q[0]}\",\"{Q[1]}\",\"{R[0]}\",\"{R[1]}\"]),"); +*/ +pub fn get_secp256r1_add_test_str_data(index: usize) -> Option<[&'static str; 6]> { + match index { + 0 => Some([ + "17684782685806207390954577263779598973526381175946291755799324209262768957344", + "92167691407581242143601595837485822613797650653636052564997068795998294585315", + "73984533821977080713866179493522290598614194355644276021965064794957344172596", + "25220164086070734051904195238061809059634781413542833942170478608760396143401", + "112171174882501884528082106717813190259999700649843659774819863553444790677722", + "38217399076316596903440542314150866607868328433182787710786920250876662996546", + ]), + 1 => Some([ + "82918273272589823868042929503759319670759008984275041506509079652257071194521", + "61224585167429861024102734372621389555494697000978287314851561976642958452244", + "67310915831906821858103204863888318834564940463611273649349677923602187616233", + "51814378521375348755702907304644887862192343269383631658543556746676469157830", + "77203751959144651428250626283649437330076229869597141272893263683006993559929", + "67344331379487595029094632279940506068248676066019697791912043900620046856013", + ]), + 2 => Some([ + "93111698684678093939605103683080290397897724258093264284317661355069057630808", + "35675818774711895574491415838927782765793396943357279529533888204119443078802", + "96544594863230548499306260104755349234540376563330938978557078346969342385220", + "110188329108817311801109847817685558648859748784587130029696651128198391411170", + "55406363010319599318791981827531471269195981263250380326076602674363183979546", + "80187187215393328353937446610893129036121989978504756983389948399116563153899", + ]), + 3 => Some([ + "109792441664755217847584901718687659931344458985210623403156605941433490235562", + "13672769320709405707218937931583674538845507455548724277425218320749068446824", + "38687032845773596096569999620157205944238436689184055065468202061825530140413", + "85394043624262560205157547608063947866391005887241800198754175377855117746319", + "53842541175280800529597949933301239418747219522402878122636476673093344807096", + "85028463323583941565197628685128161692305398288598561754474353670768548890109", + ]), + 4 => Some([ + "111046606926526837876522959178243244999552433237100111673240488583131028784991", + "30865955838698820275812672384269028129284981326041433797245757418940757931288", + "25115970043795004262482663636387713014533227375501854235219685135338149402223", + "58677217951910880232368327840967535372058919419693130862518037877021177730467", + "51835429297154079647474400111135195081670903668751067082006513570458550334753", + "49963073480457359940942527575761352637668977353404502946418927921633348201017", + ]), + 5 => Some([ + "56716310841055871819822533195423997545886613427643009335772467969509251528982", + "64774184185872237889875447604630858017971380400746453022020026633771596864715", + "67440228314388245033149888721824888298468314735195926940350696944539969178383", + "38253130939000485854773109995881400131962639704062457685413726276301316660839", + "110008225213432446344888784765089079768890827616417156232722879918666196039391", + "68455554716316449349213273103787842483668147082745949068312805763810897213318", + ]), + 6 => Some([ + "10711371137774951565075017512938527972196596954031454094456874742414877754922", + "10979325994147291644193898262920514822778310295654136614674184725119802042368", + "103118243427658940385528043976310308098606077756589810400151657401414766130507", + "93782498337492415246158407900656434766812279585430391592239869365183866375555", + "51788787192610256325779347918095837410829899638556998910764153476027741992579", + "66827727791564345726359173612923110597811907765712045298521380068116711548260", + ]), + 7 => Some([ + "37878582505801285302116644905259385594851471580997806424818245790989358942746", + "3954867695159406499019530537199039649186077483484343374511573856022884343885", + "15954034272742718634920131612614825254413138500513377939178226732036408341417", + "78294798114425485839393414355181903431219656006283736777111163420095295683942", + "114507089246636857174017359524370096126339431136935841344681560435492754667849", + "63142883043554587034959529206801513237434737868484377801071458081196270035831", + ]), + 8 => Some([ + "33550053298798125312683244411504391660388321336315520983909658171623560514091", + "31582360259010038393481741519828217163955429470960819312510044046349274020609", + "75176112042124483069595148862822617085802089111830682967463865996787645016048", + "2932340845413258164776964798617768268605887815062269192858510956466958935973", + "89599095453192155946840139555393621753171863523719103613729148019210406414939", + "69562167672104032160611733500691587785973814928241336686794033626566330555264", + ]), + 9 => Some([ + "94823042128298259500654179528002304592138382315135147006121846477324829311273", + "99906940534375788963644081843967870473312301256166670894408207440018140151479", + "85613919116770679219788316084323512693744412000122326672150148641564001318398", + "103146823225288187202733820965146317648710151494325480234705786985988275268655", + "96791634910682562993887711843304952493943431947706491565722374279494742752228", + "113144214650399993038874400932041418225203539779667359872130127433295594801204", + ]), + 10 => Some([ + "3924293741150057366064332225528023499563338199182095965555578837053505998259", + "66873024892857422435221094361651117209003246442158284596207674673367578708949", + "109298453923049928929026883598539553189485119784740188785061901690209373576555", + "83480763728226906765148281399893822950673958852988467138695175156078878982994", + "9585465302163048573719869770885934779171894251260901734741813172711790573552", + "36580229740635493024586893283432496581936112516452384421778917660278670379205", + ]), + 11 => Some([ + "18310530620295480468572934814176808997048007995002733256546647990508177628495", + "73840740082159408202648255163410263148570126042167268033205813190171840460729", + "110524227115255716942075764723157634370474802155898135889569164884230304931574", + "110768478803701575395251932670901325946021336657137380316423865518882701453077", + "86848462668105028049027273369135648339749859252554207169657766605664687038235", + "103951338529650365319959162200642544339524568925410996102416607861607493173359", + ]), + 12 => Some([ + "99026000812720516094198217504658807704529606677040629671690224871721994009866", + "102283167815738585734079225269834039547914441148344541189360146911122240529378", + "47030337388076660274220789896103266916807259984607814932407577130242858996530", + "13988063588848201446959776861162160594895507686034814948277302761786467365367", + "68963083552307892403665499897704699081845623816023270639734553736595722927729", + "88516918828671556877288187103380179464665131773376519067341033009089322289871", + ]), + 13 => Some([ + "38487995441962658887226186976171889915668616244117603539058056811075303395665", + "30729290185158340402230787453296886837781643907390297912367958322597124855735", + "46377664604144300952370182291547699817924119708455288990002188270179749941596", + "78011506084475784490315938908345250336210574034788332544774927562237068135115", + "44849552197990575942636252844937561490512552613659834659176260937609879733076", + "52667203167598901110147452523952598762344006520849334796894159504486191385966", + ]), + 14 => Some([ + "107323871096141655023648133824881321033974511825709097974777326668514404430186", + "25876968499913150169795244747395736904102969467863417392071209018709682667871", + "14987872774801053572489943273001700127722261309709505847450248474445802930149", + "113761216433816835891945223129831698304508251814618734494593960340282523326599", + "45652559185939258244295551039101331351471636935897381037972987043202864089589", + "109846613246848823431335575806177887162399192846740045730679001049149689360781", + ]), + 15 => Some([ + "89498624753157943574848869547714395176575531584463061175358098074964274548237", + "54610500846976780115167326368939014701633330820351986760119188118026446396172", + "95140673971388669355462722845028729563858704443940420306818524437258583643764", + "33372883152878338678481880215734848164890267885445926530505844306681822831192", + "58052714241912431298421833412422781913243185039601912297088735600684521051387", + "101430933770040428979875496205448661542353192781356287757602374631579200560473", + ]), + 16 => Some([ + "20076577295121672562412962777148154728710138141363032581329189442228756250980", + "84275991458571559479283216723081968429043855843498809147798329351941749450622", + "17925867131735381996928284478794942700284969337102328149800785605282738098817", + "37621115907253832086334801875641895037364746119996579693954641922285954197272", + "31758381054468008065966754331296926026063698834207480276589462782231723619324", + "96280061408269851056578038229643687160614513133720649050290010088707768986190", + ]), + 17 => Some([ + "37833252873912169524349932076511664774170190212025289576052077155435451536339", + "53444662663525260248083762828343868780347220955549460221713254441547310615471", + "43864924226398863070516914810877092088739179987027735292280176204228288218844", + "61005705831629982989436785681121304807121444435528518201484013535773956527579", + "99480240365553947808422113855848188966386992208895265013295687188736307649082", + "87536301283601900687312639272033287940222528700243293560604854292813186922404", + ]), + 18 => Some([ + "82262089720121462595945763654356490365121875935383936536996893316797471892471", + "43505406937748654235816434495553014184099200289777144288995445971344568638598", + "37928360437627000803527366093122380390125981680530416956692770746909699333363", + "96927347055660015057982480998533256325068043525049747439124102704436442998964", + "16212424997607839624955014540613147385929385816592333594424024375317443919449", + "37691497548960493344162615415721531757555993305083789143947631329012108542126", + ]), + 19 => Some([ + "1367365622087344939351945828053936856061499931539283999396846816508794452252", + "83799125362740311566530398272339378559428930014331061571738445063846734504951", + "105919145000244084049580642211428333984193142445263769646392623126960508956661", + "52886877527805697672283964864216990888753423817134777213153377256726324598249", + "115760698842416184204790826679138312305753824704381087052900277549322444920156", + "75240750283097166896899089057864412206714386510014748200266404108094097511380", + ]), + 20 => Some([ + "12654877640585584941390969918023113422749533710886242612568395102294595144458", + "87880387757558380320173739872126572094227539260928934411743227331433368901774", + "98036551135651205790432805807164301015456219642410417085929765389200972441773", + "77546413552649808426580209521162971565591754878695040582777056092346246556907", + "48942679803473092458570336426447518800299566137794197814519683760932791314077", + "58187128405356536097790573896414569139095436177480649375592124810025437524565", + ]), + 21 => Some([ + "34724128525181436934184252551946588572272360650226861914547682412204499271442", + "85078812506403798249947331221949111629861910300409971103013469709968384966134", + "68780544494574478086432450902260402510058643791491361069254750999420272923028", + "709822679066441527422428232812964558258976246726291156594716899010966528392", + "51246220245483583339618360572710688775594049285038923222741388176973453648219", + "40080380480941938145513302461059815248501148539318074581535780765762362205841", + ]), + 22 => Some([ + "70431055459072866888593600666475469825668392770979273745039573158458682324701", + "86867137214653338771212863237013126068915160634255968731936950104374121720364", + "98139461395927289302484436275575259197661915413598596910410433324419277413072", + "9623318446644888671815849599669450078797425124502623626155673010743177752348", + "76929609807364453541418579982904016607090782495571035444607895649037963729123", + "73463845371689733045095247304740596879796165941396449430049816696271139550850", + ]), + 23 => Some([ + "9441554039850447067436133580199088375433916940157917376785533450201617198990", + "110899367047838333158229392583362187984010486558832854496925755036415811046678", + "106052240812411914734793733638511395589573453787088042497837472345934028779997", + "21286873478661319893957432565714681230654086426508335597590975998514450145775", + "110284032788425371286216948717330028002551389199813251716120143779532408427756", + "94039413261401628026497136029100643968966679998651852077901395000726685057459", + ]), + 24 => Some([ + "75969892884820301220070407837486168482129896286078105470831958068154011066253", + "68029562791644718983404506086125821645287589528436959429914264532568861147313", + "48041522956484263320739131189084347731880804634345989375732322670139185566988", + "55862608788643415931350866922256321607280397147632577024468873036849092308964", + "50544743925708136174630587392895274130726065884144829401401852483613075929571", + "47660039615364704328901816741870550256933370358893928106796995393361194601147", + ]), + 25 => Some([ + "93672567375930934807356337490671551016358128651604941091082401597864881914654", + "97107591870001992228553911690339053410231905825769475984890055827634100572832", + "14511678543903603372701087598500786602321721366666782193112090490791630698480", + "3995161265837882039098595643598119671851511259014710566285491863571630337214", + "110646106285265692147884703939174854914286298305591431361676422578809859575333", + "77400618114030994900310583895910478803534286430821025347359253744610751022915", + ]), + 26 => Some([ + "102703062322189224954472739851094742674716572505363993033934707090358928153093", + "29423914759435986269535995913517418616500690926863780423277331742056718682877", + "63734164515543282401013686266437391979973894861124524260778180483339482872550", + "103912386011641552388391244838421172011186150106027961659334909684801444205548", + "9693306655266152369632858065548028931695448445687527765165665993196642740014", + "30015251212531507543313524860541630284887571290440586894903412149823191925774", + ]), + 27 => Some([ + "2210111107897660734573797030186664834182221613246114325746835133127115459259", + "52143443862739299051820998984417230645937564079376260657234806900694798367995", + "63171057060175692913217361114292263068839516853085376463019791345857695343995", + "103381239874046770219850656826381869813584200270849218618004252520493488821519", + "43500770603565630307579686262694316821021686465090697669357203951794872643183", + "56590132770472356844155242547622282714504842705857656373738368510015788734163", + ]), + 28 => Some([ + "14157835229576069119792407133151679287854718553245428114052281219203289750083", + "84647122890378799262817713401375547836089036902220927224570978018804126730131", + "86845608676718187144879739102363207118010952841099531790143952234149577367876", + "74581012233007728408593077758601562949593508594705092935811810499627280008504", + "77117088110901940204182947123917770397208857951115824769198059648021292443535", + "58731842680597213121773459196104380619451164668845205911288220541490845806391", + ]), + 29 => Some([ + "69928742639936344048598373230638130201775271938079774940079796375546123270292", + "94813290892602790142920344449639250364723227256486132066787437258110718616780", + "77180544937058093189876662268912659635354078759523772855870632487132794629922", + "12063256824651236274306611963240579663713326075914535867495818160095664576629", + "110734189220580761880219761041494779496266289937646949824468951110217803643967", + "100048470574657061403918312748922531118841950091365014201468116991291064139737", + ]), + 30 => Some([ + "40833647432394108188067079585841213557652279160464787563924201568269653962383", + "12061143888578525077744469124109987156016597738239013281079135363586294808505", + "53896309055379180089376690242838975277819315715656788790307075477000125239526", + "24617509855908834659320618573662804335655054140987591902549923110640938653241", + "30003163742531165339899480376456992857407863515231096726747580583988780269787", + "18528065410133402654463440343628206298821836924916229509718822212123758991988", + ]), + 31 => Some([ + "115377147796467875644599880350836837934363458179313277312050652191531974714817", + "40892464273221617347382496161869344706374780758075574308796670465355451631291", + "66466553840427100072823726111186038661942090484840976109514020353762816170942", + "54906531779533059685610440115844619484422316722425159575704549897586736791773", + "13444311971109807431434466220145694942603035874708503110793914589176019332459", + "13849728853253979615348616092492843774889838749033582601557952492541966545529", + ]), + 32 => Some([ + "105203689550274114040945430032566499276588290839350226077864208823595393864902", + "96120821386051133717286482581846213136576051605105852796000322925693714768175", + "43263202689208777345594739741906244922053437154232280819157161529579539262633", + "107625820720206849148100722855677738125317433569995332059467331772729734734412", + "23985402459150634182847873136793777576882033323594566363227924602177596119989", + "45362220531791375766178258142092197795394535110240251315180322128700354409444", + ]), + 33 => Some([ + "75632894459563536446672852762250993526049205374450388805777735713895520961799", + "114393201063217157751647165476340953918159290026566225269962531299510195930148", + "59686045302710549367350964181911697640000727020287489066619430590943247600091", + "27528136108182499296332864648142601033691769630039974559954879016698731510816", + "92212711138896264019328128182078619374143813809018818264414395399464232290423", + "44356685718472702247422606172339888465290994667798888501447111057059912924226", + ]), + 34 => Some([ + "39138368728492398486954793309659662296785514207706895315010445591098773185577", + "107091543455417075768966137778406791232503450034180530824166635869852107342299", + "94924517089109757011197781535262382902947734688053865564187390231352986750529", + "113506473115036544550546281248450205770893710891082588438658485146634651080985", + "56249288162224753278326598155200248778345417526366519397502878285299712606719", + "93901626464075017486934112989826223091809007341422383612245573547092478088734", + ]), + 35 => Some([ + "62977172685037110948072903218410698141451968449389910589615614867943361358085", + "45257362169644012289441951793513371860466242621255659286272104001422567870713", + "112704413806640622877558248130034421586972967616046023010121610541929192747523", + "51239398917791240207839425730936680501197530002225723654384928811755031874311", + "30405018237698908524092926703827606961814334376436030834082402529535815206925", + "91600553196968549458095806412166321658673718577831723438748164982747038192587", + ]), + 36 => Some([ + "37316781919864858250508825820799252762236093548215309526063240983115388307002", + "92065293952034728177596775770123547035123140295906654822791835417523439805136", + "3784716451994793383674369635777476753196300164268017025023308099900391616990", + "24877455512103777870127143674243127569045061301086789275010282989523175224471", + "21668122133833805213728598021274639736092880226692011951677383211964753973887", + "87341549546719749904482695414117510401381125508536981042745723373587776523983", + ]), + 37 => Some([ + "41733557441873379557853633089158052587664975289193675916822158555184062363973", + "54829584665559693213380027669915792369192340992508720502846759571214860960700", + "112288755455850118760736937917607296859127879751229035687737477602207927993339", + "81461988233585863252395036489492268043973570005091746944122953476172359896066", + "110064749350856327858286213250464823407601673454335142087056018527058836366529", + "42162432163526560057171976138878928948333534297779587982656976644155978523222", + ]), + 38 => Some([ + "108601842112476324003732869565410732658138086604590138648292170312970050187474", + "91033881143222487565964973929748982039663649368694920620769113163005584815170", + "72305020943925161946048359754747104510479968176883671535827791972643243772790", + "31825568583101363325459876750009019926418630861622657747104220196492974157481", + "56729930381091281268466561575890153768124925626415802731035270515413722258728", + "6382192644419839591991134610005356538565265137776538430306489893072434829381", + ]), + 39 => Some([ + "110057956689440083657668611755060721791990236478131128473407034349282568815310", + "35946149523420923935145040155586965807313521928838670446369440195608124681621", + "40909871195474608839942711765943092130092405565416192851992151970259936012416", + "106870661082258282632444857881509209670126926118079384308670768736318476719941", + "22315267136428264121532618884559001934721851805546727456223809866918874321148", + "44547522409324652743586736865842067699639592608216711736422643721509254995900", + ]), + 40 => Some([ + "107485325347796407671388015359823178242451996312768816574123323416026044370106", + "50839488651363674558135671194854888566328967053910495912509821129252169140659", + "63335002634940318370439611952914952237204048637686196443287115507597016766648", + "38672283700544518322945051533075923533755916907891482757544393899631898169224", + "108662579882817052805050986004143032723168511241431143745184654575629630513704", + "19649199210347160877422315697683791643136727231603438115232511244704017341461", + ]), + 41 => Some([ + "25411496175355141746890415235431503068530373142155374862823924034395005790002", + "91068968875633819382539516229780881778723670173045281627457761054560271763868", + "76085561714047110662465771018526765363077780214846971696956444592221164383037", + "86731160596420773154191635324679096016311659217151045383265708010546536090949", + "96725493169835689858956240877643845116166095991989735780879551374356814859610", + "79376344051414450246732447941609138972322414996041037311858151646373314061375", + ]), + 42 => Some([ + "56102996560984849487069966786194128890511072505522656184665103355112974294435", + "51553998015901583108872218227013316539828964340037843751423430863367725141022", + "15998429368760911518905700713406934848181272864292598503817390491845521965866", + "2310508821683633536668724705994946078848602036239474768940008735486350225159", + "37769289690297247294534085860166450383987445341376560287512976563613610591887", + "5296357534529156265415504441491928300888550971621159139157948012711877767344", + ]), + 43 => Some([ + "96719930252444052597642041345793011228619227459711070098526834248460214041996", + "23865165653542290858733630831761052913038817763220581878899324862888507530700", + "2946311160093038764289336502321462775322681633833175557230625591532725229883", + "30069956481863829211278337776074372612030845987890649118170772744927706473743", + "46915285708143942297479393945222755020998491702106394728671025707096919993996", + "69266120427482287576712223686333054664226987014112019366422528680014576960606", + ]), + 44 => Some([ + "65051462413891671129225867179546758143726585398078388607814068124960649447940", + "31381833805609080618489036414733779351330041681173105828422676953587093339016", + "51011745484506694561450015429988019077819035803612110543366161322884819534285", + "113844475431868304872090350127567971455785020925147288644866668506265982255165", + "84705462504278282272851002088982770927677824734985070660159553684882397510753", + "73457374232190677143927780263693416700558585140264548669718573638669710850199", + ]), + 45 => Some([ + "112522999038853079314988559604351880524612924077818163227902186734640579219556", + "5619492670117547164920188890999129789310713295821655768403557553886101368976", + "79942246241615802867477105465475440646383212201742028570681477707358654705198", + "29634807148720351364631869355763087983721300756064618344987903470208344524872", + "29762887570221357868960666817455326077050302536085512351138128019976126994517", + "8917888557981321787427029090591731315024561091113350452915823603832748755969", + ]), + 46 => Some([ + "104207887061711157445362963152034955840923990059437458918209951756591677376977", + "74006063057168174946440710863673448132292493904672628703260856685806444692900", + "40640613915236513486536299056279097059754020679320172628435226138772383138453", + "92619313530806343161465449209945726767501192538038081070281908092420852750145", + "64816721658334116711561117572944084882733804453039892851067376443016627555702", + "19142518236398157846109356659205888879891198776238436777826236827013759329687", + ]), + 47 => Some([ + "32702286579289147034182559797231313755360054223391478073573037828909117062528", + "50865582199050470556560603675346151929078007930284746451564776344678109518855", + "65736118316401465497403098908912532899029906668895970931675340839692704831107", + "65864822934892145754006315762328140452231287739130167705959543074012029001215", + "45531241932389678988757631100611223553088514341465278158877033343241235790421", + "106758084880369418117102272879151094010617932285830471190124604391958674331368", + ]), + 48 => Some([ + "9274515152587056471384582364877717622404425981251102336564216988502165066424", + "80580737068393435827809175626846409127867474156780974360780328085412111192569", + "58466884242639961077864176140504526297662814780252177865746636700182117203484", + "89461614052461727522178669535051318181492096248574725014848740966256864813471", + "64500294620894692026409882776417842379823140219092683029132615428962645192231", + "35875911528397224238602620074464252981730926403820622552293133236459887760472", + ]), + 49 => Some([ + "51821397846560117243251460672909334643040296891791662155544247214090802464509", + "67925887814568610026304093613646998768677837897685270659066718846290586647576", + "105832849288959482777670265488407868460057738392187111231133654125954176337272", + "97687616214762711968100756989214763066786608857690239294623507469853299138420", + "100144853711840929258130164913346820143407027068659221904542527171658184022281", + "87692150256066340963075598693539984014822637248252508403877581559194736715297", + ]), + _ => None, + } +} diff --git a/precompiles/arith_eq/src/test_data/secp256r1_dbl_test_data.rs b/precompiles/arith_eq/src/test_data/secp256r1_dbl_test_data.rs new file mode 100644 index 000000000..fb127f7ed --- /dev/null +++ b/precompiles/arith_eq/src/test_data/secp256r1_dbl_test_data.rs @@ -0,0 +1,327 @@ +use super::str_test_data; + +pub fn get_secp256r1_dbl_test_data(index: usize) -> Option<([u64; 8], [u64; 8])> { + if let Some(sdata) = get_secp256r1_dbl_test_str_data(index) { + let bdata = str_test_data::<4, 8>(index, "secp256r1_dbl_test", sdata); + Some((bdata[0], bdata[1])) + } else { + None + } +} + +/* +p = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff +a = 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc +b = 0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b +F = GF(p) +E = EllipticCurve(F, (a, b)) +for i in range(50): + P = E.random_point(); + R = 2*P + print(f"{i} => Some([\"{P[0]}\",\"{P[1]}\",\"{R[0]}\",\"{R[1]}\"]),"); +*/ +pub fn get_secp256r1_dbl_test_str_data(index: usize) -> Option<[&'static str; 4]> { + match index { + 0 => Some([ + "27675493881189608185000019896557472664993584052492607765862010472799068084386", + "7352244569642907517516338217145955072413095899750742887468663360455917814611", + "79037361267200367168785815871744887643164632725736514592166483490619230121075", + "4077933234470343714276241883240920906139062404301557560973844876092588323988", + ]), + 1 => Some([ + "3360376272603723971624701532604720898676706339789378925682489753859349836709", + "43974730703370341091550222085681175563853832276728365259120521493842901894638", + "52853259462882411969640510256518235858172147991597135999425041457772314775863", + "68030551993853624445614927051536516109504529419190077317254253330368193311403", + ]), + 2 => Some([ + "88676035452794063749057713437014174922784706808285202738350243652079149130848", + "50824802278369076535825198791773071013146180029413341048526379677412529985812", + "72997795821879687104072496376499913353753964526499672523434804669196718469500", + "53762373387355522063466547667122148388804866346281354009917298229100535229898", + ]), + 3 => Some([ + "71372090362814190117397057916958324888323655186932282733112070467895401464045", + "111078344811409713697956617838312642415112432605771182890795220317205222879342", + "107192017635467963612225109436024041761976193253449255543057737880777787424394", + "66432532050590218574089936703773699841012078667162567282180177066526754582860", + ]), + 4 => Some([ + "31794533443528416677323423022454608423413062407393976650457453651471044087593", + "10825871155769179903674149509522662127600696279366454156648831938944439227339", + "61562171207971025081273194822135660173739778880688146968647561830202311477043", + "68513288579896028646973003634202406296825969838469972129649526467266918162572", + ]), + 5 => Some([ + "91060368462428685546946494344746410879554238660838080424175100582724711970328", + "44618099832436814463676281645434359880211089763096176914976894141915094186022", + "48042260235037936576812642413787606194822455404860397369779429598327736582433", + "55591103925453110525440241481629898238706089664546814584468937596549986619793", + ]), + 6 => Some([ + "103965886319419460503461474648787158389315813863288110577385007120529004326270", + "92527803764805458617665276672768135155565125500566011891297680408757047050103", + "96987638986373391379724462307346631952435654568241522815748739193441366499518", + "47374945241639497121296667606175204211725572815381139171523661194929544012544", + ]), + 7 => Some([ + "65219326653226218931128974159670307413494269555373006268070592938984912873583", + "28760857951737509711442707692992902113718450640177089688234917623278702478945", + "65888248906982506067834341692105543875511022545617620335574067809473067919741", + "93711731565602791746384191345707242822637247270636674067520708184331360428596", + ]), + 8 => Some([ + "106210426944164244332900103221252827801696290984990191607033822921403699315591", + "66033879474684143638864677646736962638245492768184401626396449385215185079527", + "97001551816916045057440501215891575169935415297206405924580499588082170702215", + "15313907143426807302847678555561409196571233221759673790380019028238856536708", + ]), + 9 => Some([ + "81563981051532074510470434314933288696795114080113201674765829984492109486247", + "94674495020699996024474089925556673141693997390438745495314754098592636485900", + "70579020275693309056763353367363433388874088838321890677499690050146010172181", + "67532799562931560324535916150414138770617059854724626133688711653168011828562", + ]), + 10 => Some([ + "105926744943182775251389926603443779857488256819460597607587603328742557603551", + "89577085024073186154133911806520967358216088648102277366282334438354735657829", + "36969790594839896046009511659892773317207198219378886190623842770738386817863", + "3176710786499933388496208045770416504923970016417545654543582917258692384339", + ]), + 11 => Some([ + "82952801040030046591653359323115156132588706551236277788806789798511802077341", + "67742512949797851642969519083149394508321283247151086013805783326991120608730", + "18839059680713021002945713993319317691522923507949439596538311131459950100096", + "91749667201212490259065072726948349028602535295787384286392321911710656353203", + ]), + 12 => Some([ + "22249079430472750171094101149699481011007467462254290937028389796347578947965", + "110451857460863006329308647518239512245462245343091310286371274298354769356867", + "51018571064862945609971891259190336795507255409361316715413486315339248406194", + "13348224299011701169985097248159921338180531951451600523286675222818843730914", + ]), + 13 => Some([ + "20484629867944456759671199726804749562668465495566813175379512349152466919404", + "69903748127630776090713848568240576795730885931070138238857012507383688147507", + "111580365324163467359918940152662651124553825640652378688252993921053010482487", + "64388698672712248371625002437296025913342008627987120257603958806391456496624", + ]), + 14 => Some([ + "96537501690400388864304028251258137628846976823895141796583036867993993579900", + "98827348280558554936402817469868835961178438953500929397002088255878035202936", + "107043732074358154753389140602001488371796425269635314156202806901221637108811", + "25720630990896256983664489172451501236091949689965543387711136596715013493811", + ]), + 15 => Some([ + "82964332720597251906863047406034107806204475340942193345880545959223161824757", + "2535705325786516421399361925342224235740784167765863940836092666238884145878", + "74975954962092129827075131486493034123049767154498679732752780100130495721780", + "37486673765435147797773619302439219485825392891339272513129446812828990307073", + ]), + 16 => Some([ + "80882199979706522570998083928753031262041303012506640217916233556544354791914", + "39227900293525267745120623191429459615976130195792597481572786788050036079078", + "80157914499418311940685319482509009427116245762577395716883422644319491876401", + "1129448055158863327588719303480731505190665335377534896973127904371693995043", + ]), + 17 => Some([ + "34641704536088193550079102190647247315424978742846182992457402433345347219295", + "90544308526668958962250641732315596397992903264864385620881118500444134086491", + "74069842740832456785103147548357508233091392981149031026712944888912932864868", + "38595262063118161000564729964611435602495437057846194302717818225702965593488", + ]), + 18 => Some([ + "38041273931034400682131345618719147367267652239101263153549641389166797025805", + "8576277969817328064421002238004850377503021104554593250727903538068307018853", + "7644947780884249836683427700565231870908265110028398427877728739930650676562", + "328209809804861647965920601674391453752039760419779127506949862375352123044", + ]), + 19 => Some([ + "8807143131588724563187546356572887144829343259549349928752248026524571566309", + "71764049905439050593922539421751972431097533016322006535867892917433028764473", + "23458790796645888403187291802917321680007079214960418481959483134884565534763", + "111924358597078312991433457376953377492712605551203682978487745838169539088610", + ]), + 20 => Some([ + "63046303122414836857224945991520306458421317653897286450867387616910328505302", + "68321673072516662136375058424032953039446318778956851743367881469345795571488", + "105609519772040867633323682815853052290089931431224707019153475958466586956217", + "48018313219458211946382006202812789635140218122120147934600538758473417440637", + ]), + 21 => Some([ + "102850103546686443020777382132243955419052591850564364137968313810076658648496", + "1254184153782947799803255341743511148898695450011429568204904192310559388673", + "34701454034912545367190186926154646880963419792730319801590280911209128145083", + "5393716746037739427030194481726031199116389264008785647551233681919420827579", + ]), + 22 => Some([ + "62125292866235572735647937972691049010068228868522712378077493442262158281702", + "99503402427857905262229444564080614638102331332799423074576486003566320336076", + "75941257376561286389079890532683795873852315520311718401442439244384069352630", + "43410723719737593730678318955332439501752729276328362873620500901115161656950", + ]), + 23 => Some([ + "64002322064408361853299819563502381355621924280333335533869684528122027965061", + "84495687164229915249162290734988559477746746377164916107694314118818232962524", + "42898088288940134681766700375196999964990414107254048963459932412730534424485", + "29567662893995207927990509632456334189154106280859269336843413690718319464856", + ]), + 24 => Some([ + "12230674786972538120739240328212053191294726317188869883061405966970746595705", + "31516260071695450510879817050566422129794141579982164039599932703606874916061", + "89724391829108235533785922249105481431224544947348739771699403896925352102088", + "86059397811547285278094778759838953930413515056327197861905859368936505580885", + ]), + 25 => Some([ + "58911865631286240286286539642126777192947789330399378943775821308950784285059", + "106618713328730944840192222484431068598783463090223974086782442308115955545190", + "27119460803600800453434644673040121454455038496677588224477119999976355504530", + "111350586119120342537866962125681396060167593324927256000078238332947571537442", + ]), + 26 => Some([ + "17516364029844442641337075032263298201873493073042943428522943752637722956884", + "99780914298702666179771354652927927952146668873384802596545252230428779009011", + "82358107210864043857952883304094862908976337621695822836748665056314939088447", + "13715968981053881371721835537936146409767904603911336360276385845766049746422", + ]), + 27 => Some([ + "74676228708033920986476364862754304451632046939799453186857911444661407501891", + "5927547383987894337822663704444138312583278465380838092641381201302317593589", + "98502616564885478487235974276837215120346349879844786094914300546915810617728", + "75885107513472011330622269146806285453016887708274925865578929439188084362800", + ]), + 28 => Some([ + "24157278883353118388239010875288245752318985749430363083628571624681771367032", + "84961028276250472053794415497313644852366248374320110870723180287329394555999", + "3330552996162391742832531233626085633537169937294466797389676474135741805139", + "81503738788380421890123773740525891797551407910254861173192877911391987693810", + ]), + 29 => Some([ + "17655551802530837699364385806086954367370263699138951734258195446283051940946", + "98236955804386266924558244073713230219746501710518799298106066085008964705579", + "57373187902213099141557276910127760835812365635279458399636976452428314017903", + "9636580600280637726687443346287348575766474677970441092861343995111248178274", + ]), + 30 => Some([ + "92022461209860697440521006840256200564038622301844592762370643753640651534991", + "60558887757685951339215977663629837935727625839126000879651344761701232882130", + "91068447104669134346553826252864994723246512397081355308357996049153969413034", + "114436257797237415761486717557225249123444379935578269119188334930600216061100", + ]), + 31 => Some([ + "40735563338348272209711080311152397013729625167694629904889796587190384159878", + "103394991638383348741611031375232586820926278961195153765593574672767509666946", + "7767882644474144882688122335458137792113104721225153739076142233830789110880", + "21420242927622805275896998550026136085544516609245995982558646703074913955120", + ]), + 32 => Some([ + "87037739712144167008889348413849026685553472861758898069048907403689426969264", + "69811747734495700577564216321023837046241672496286717669840584631876922566008", + "4408995650263083241075343365037204209717714840832666019099705245657855977893", + "77519099661396457842203008707072557749026719773815850614662057747366057170371", + ]), + 33 => Some([ + "102307323108974896425869759287684534597753535918441079037597093081066898905523", + "42264319808047447998438717210540350015751262088840890747437915648429111651182", + "67100401758586308051929785437996646296320231818129368260495465036591455823021", + "107194883327677412413837098740699799247735320308927394536601938897972894830923", + ]), + 34 => Some([ + "102929584491481651932553963305865068019795484110342028010262347695551448209928", + "104315708275376527763160005390439347397781353204748765719436768555321906257952", + "10403471979638198626278497210025626524860064482229064684808713578165907868298", + "49336784843051988726052384190907456418108387966940856140766606450916396674854", + ]), + 35 => Some([ + "38473870230989310668264274838632438522446363421372475860190301222545472809780", + "97345650640090374352059605999075195801309291071431682904319809615819123624985", + "18746312099851846921879737588510086104615122710877875369690874313180242176304", + "63852631640261386482515242207657436643396006625142718652569168015656644612958", + ]), + 36 => Some([ + "111172365718777275261553049424428529843132810242786988596766731423856070474763", + "32894689523743969696950044240462150378016563604261509322210792502679911101598", + "21001578149873794620866081274221900471032662007616757226205252751061799172978", + "83491776935661227994808240284899338462513069457427691158188743983077458455237", + ]), + 37 => Some([ + "96946461112700250258584173246125407860519408199485590262165671530140251267253", + "63833705914298844770895413071881165278384966242232556079275216948879800758898", + "53313054758482710952584912174011882402989680261576083898969060616217867673777", + "69935351470681056441519422734336191119513717666082220696982519579791724292977", + ]), + 38 => Some([ + "42340520195350827366266644689985470833831335294106115895278918186457640808106", + "34161995772621826684243404258534800600675860756541900224421043769840774351397", + "27688047487626059001558633704427386371949511389832012274777469895652170986654", + "69879424116441455155756606426803509348361249825720648607126586477708296240444", + ]), + 39 => Some([ + "95995450170096461230228761655365532900866210643271944071491176089373079855005", + "37223595982737666989884270765650773530495097325417815961173155837912976958883", + "5454086141833025197796219662821159066398337835428881252109615110578757165363", + "81614047057994862372520469438146729029679722278625644677073368093893804430747", + ]), + 40 => Some([ + "22805561457752636094721086973193770193044553761898008645735770860067309849542", + "76122302100612426549304461560020749139367314089832284841993225181404923861170", + "40338732334467043512259459638567318527155753186418690373091973991852276338236", + "88977652408711377264669794973375958023041300003319958641864909530050057384069", + ]), + 41 => Some([ + "73884916476412840787432457274089533668123750611740988703793990873479668341267", + "95375953396453830641479439091149343986838296783284410546155159111696077466671", + "13278794831264317878740049962945340832281608095403907754296647279357981691070", + "93479632508352372310355930177768467644813000592911557048584067102644563839360", + ]), + 42 => Some([ + "94941193142043240036059928178683880901688999529090111670381882971844837503037", + "114371587718742372061688208831213894214366851291820397231992995224259969622625", + "72777706257975327042753004132486596699962560376026966469545399199160716463074", + "114540614316974213053899847712189585924251711944228773093364280994383190154957", + ]), + 43 => Some([ + "4253189979396770887746703699548781354801499301957512157857615543224980036648", + "58877397710734925298226969980530754719555255196436220096277790458128792906250", + "50483388696246046430618742841874776760005205365821788982791359957721720516986", + "59495465481421930390441071589159802068466910095526901052457542972222451310945", + ]), + 44 => Some([ + "10394902551382846129674861822500670503694149627817912602881936017490906790392", + "72341768474514678671359875990601473484533978345834760046963654129453630085782", + "102812627892202637177795752111556094509063075418756313273952952148617094876122", + "28626981733687699848179882938274423067928389533757862828457083548325085613961", + ]), + 45 => Some([ + "16215521976833204209050248497110404538897337563635192783616736445368048831192", + "44161797662527352894621865841326779649853068942784278731876887905470170816256", + "96701684562904329883895684844683627284420234618267449701968521327399111835811", + "86467583438555269372399863894802164298514241124505016282687302672455382615999", + ]), + 46 => Some([ + "77304614138727580166467654224182477075304448916355029313519480279527026362685", + "4097294974765126542919353376388476485070593452805769406824288484172598351160", + "50768358693567732372714579880181823673916620610363234297674956719343270986769", + "112104006189746323710806096486930425242587209629621022190465721944855454525692", + ]), + 47 => Some([ + "46630979645136848407503076566838422657349763976789907884318182230480133505164", + "50039112218205956891168840689072767346876077125226222747266575979778032021694", + "61247552980881057576144354591383932614484085471246070341570683599992494819988", + "68136838446751300763551527780117141304059332794759369321508784589076628277655", + ]), + 48 => Some([ + "79492169635876824167413866720993449454663528481176426987777389681340468912177", + "106977025309353131750871442464209539184863629176593136980629258721474651776580", + "112345830952717412914379261907614526402048162012954114300835209619748870016011", + "107001196952579848275877468070801422289566544783405231404190478281531942515168", + ]), + 49 => Some([ + "83848287480453503544287194869475671289089650965884747542818004494192759435413", + "14423946942137591196388014316114088651825974510710115056815066159144287107799", + "18171416013064719205219973633800359850758400149900498404718542060411164329311", + "25249779374936489305192546248778458932225984848949286977981267947408203584264", + ]), + _ => None, + } +} diff --git a/precompiles/arith_eq_384/Cargo.toml b/precompiles/arith_eq_384/Cargo.toml index d97145c3c..6c7c29d48 100644 --- a/precompiles/arith_eq_384/Cargo.toml +++ b/precompiles/arith_eq_384/Cargo.toml @@ -30,7 +30,6 @@ precomp-arith-eq = { workspace = true } zisk-common = { workspace = true } lib-c = { workspace = true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -53,16 +52,12 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" regex = "1.11.1" rustfmt-wrapper = "0.2.1" -k256 = {version = "0.13", features = ["arithmetic"] } typenum = "1.16" lazy_static = "1.4" path-clean = "1.0" -nom = "7" [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -no_lib_link = ["proofman-common/no_lib_link"] +gpu = ["packed"] +packed = [] test_data = [] \ No newline at end of file diff --git a/precompiles/arith_eq_384/pil/arith_eq_384.pil b/precompiles/arith_eq_384/pil/arith_eq_384.pil index 7a621320c..efce94aeb 100644 --- a/precompiles/arith_eq_384/pil/arith_eq_384.pil +++ b/precompiles/arith_eq_384/pil/arith_eq_384.pil @@ -4,7 +4,7 @@ require "operations.pil" require "opids.pil" require "arith_eq_lt_table.pil" -airtemplate ArithEq384(const int N, const int operation_bus_id = OPERATION_BUS_ID) { +airtemplate ArithEq384(const int N) { /* EQ0 : x1 * y1 + x2 - x3 - q1 * y2 * p2_384 - q0 * y2 modular arith ARITH_384_MOD x3 = mod(x1*y1+x2, y2) EQ1 : s * x2 - s * x1 - y2 + y1 + (q0 * p) lambda - ADD EC_ADD_BLS12_381 @@ -567,7 +567,8 @@ airtemplate ArithEq384(const int N, const int operation_bus_id = OPERATION_BUS_I sel_bls12_381_complex_sub * OP_COMPLEX_SUB_BLS12_381 + sel_bls12_381_complex_mul * OP_COMPLEX_MUL_BLS12_381; - lookup_proves(operation_bus_id, [bus_op, step_addr'(MAIN_STEP), 0, step_addr'(ADDR_OP), 0, 0, 0, 0], mul: in_use_clk0); + proves_operation(op: bus_op, a:[0, 0], b:[step_addr'(ADDR_OP), 0], c:[0, 0], flag:0, + main_step: step_addr'(MAIN_STEP), mul: in_use_clk0); // selclk0 is the clock 0 for dedicated to one operation function expr_group_by_cbc(const expr selclk0, const expr chunks[], const int index): const expr { diff --git a/precompiles/arith_eq_384/src/arith_eq_384.rs b/precompiles/arith_eq_384/src/arith_eq_384.rs index cd145a3dc..26f453a83 100644 --- a/precompiles/arith_eq_384/src/arith_eq_384.rs +++ b/precompiles/arith_eq_384/src/arith_eq_384.rs @@ -397,10 +397,11 @@ impl ArithEq384SM { let num_available_ops = self.num_available_ops; let total_inputs: usize = inputs.iter().map(|x| x.len()).sum(); + let all_ops_used = total_inputs == num_available_ops; let num_rows_filled = total_inputs * ARITH_EQ_384_ROWS_BY_OP; let num_rows_needed = if total_inputs < num_available_ops { total_inputs * ARITH_EQ_384_ROWS_BY_OP - } else if total_inputs == num_available_ops { + } else if all_ops_used { num_rows } else { panic!( @@ -468,7 +469,16 @@ impl ArithEq384SM { self.std.range_check(self.chunk_range_id, 0, chunk_range_mult); self.std.range_check(self.carry_range_id, 0, carry_range_mult); - let padding_row = ArithEq384TraceRowType::default(); + let mut padding_row = ArithEq384TraceRowType::default(); + + // In the no-op rows, the first x_are_different val should be the same as the previous one + // To make the constraint `x_are_different === 'x_are_different * (1 - CLK_0) + x_chunk_different;` + // be satisfied + if all_ops_used { + let prev_x_are_different = trace.buffer[num_rows_filled - 1].get_x_are_different(); + + padding_row.set_x_are_different(prev_x_are_different); + } trace.buffer[num_rows_filled..num_rows].par_iter_mut().for_each(|slot| *slot = padding_row); diff --git a/precompiles/arith_eq_384/src/arith_eq_384_bus_device.rs b/precompiles/arith_eq_384/src/arith_eq_384_bus_device.rs index 5f06e8f25..c30fc1293 100644 --- a/precompiles/arith_eq_384/src/arith_eq_384_bus_device.rs +++ b/precompiles/arith_eq_384/src/arith_eq_384_bus_device.rs @@ -2,11 +2,11 @@ //! sent over the data bus. It connects to the bus and gathers metrics for specific //! `ZiskOperationType::ArithEq384` instructions. -use std::{collections::VecDeque, ops::Add}; +use std::ops::Add; +use precompiles_common::MemProcessor; use zisk_common::{ - BusDevice, BusDeviceMode, BusId, Counter, MemCollectorInfo, Metrics, A, B, OP, - OPERATION_BUS_ID, OP_TYPE, + BusDevice, BusDeviceMode, BusId, Counter, Metrics, B, OP, OPERATION_BUS_ID, OP_TYPE, STEP, }; use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; @@ -63,90 +63,48 @@ impl ArithEq384CounterInputGen { (op_type == ZiskOperationType::ArithEq384).then_some(self.counter.inst_count) } - fn skip_data(&self, data: &[u64], mem_collectors_info: &[MemCollectorInfo]) -> bool { + fn skip_data(&self, data: &[u64], mem_processors: &mut P) -> bool { let addr_main = data[B] as u32; match data[OP] as u8 { - ARITH384_MOD_OP => skip_arith384_mod_mem_inputs(addr_main, data, mem_collectors_info), + ARITH384_MOD_OP => skip_arith384_mod_mem_inputs(addr_main, data, mem_processors), BLS12_381_CURVE_ADD_OP => { - skip_bls12_381_curve_add_mem_inputs(addr_main, data, mem_collectors_info) + skip_bls12_381_curve_add_mem_inputs(addr_main, data, mem_processors) } BLS12_381_CURVE_DBL_OP => { - skip_bls12_381_curve_dbl_mem_inputs(addr_main, data, mem_collectors_info) + skip_bls12_381_curve_dbl_mem_inputs(addr_main, data, mem_processors) } BLS12_381_COMPLEX_ADD_OP => { - skip_bls12_381_complex_add_mem_inputs(addr_main, data, mem_collectors_info) + skip_bls12_381_complex_add_mem_inputs(addr_main, data, mem_processors) } BLS12_381_COMPLEX_SUB_OP => { - skip_bls12_381_complex_sub_mem_inputs(addr_main, data, mem_collectors_info) + skip_bls12_381_complex_sub_mem_inputs(addr_main, data, mem_processors) } BLS12_381_COMPLEX_MUL_OP => { - skip_bls12_381_complex_mul_mem_inputs(addr_main, data, mem_collectors_info) + skip_bls12_381_complex_mul_mem_inputs(addr_main, data, mem_processors) } _ => { panic!("ArithEq384CounterInputGen: Unsupported data length {}", data.len()); } } } -} - -impl Metrics for ArithEq384CounterInputGen { - /// Tracks activity on the connected bus and updates counters for recognized operations. - /// - /// # Arguments - /// * `_bus_id` - The ID of the bus (unused in this implementation). - /// * `_data` - The data received from the bus. - /// - /// # Returns - /// An empty vector, as this implementation does not produce any derived inputs for the bus. - #[inline(always)] - fn measure(&mut self, _data: &[u64]) { - self.counter.update(1); - } - - /// Provides a dynamic reference for downcasting purposes. - /// - /// # Returns - /// A reference to `self` as `dyn std::any::Any`. - fn as_any(&self) -> &dyn std::any::Any { - self - } -} - -impl Add for ArithEq384CounterInputGen { - type Output = ArithEq384CounterInputGen; - - /// Combines two `Arith384Counter` instances by summing their counters. - /// - /// # Arguments - /// * `self` - The first `Arith384Counter` instance. - /// * `other` - The second `Arith384Counter` instance. - /// - /// # Returns - /// A new `Arith384Counter` with combined counters. - fn add(self, other: Self) -> ArithEq384CounterInputGen { - ArithEq384CounterInputGen { counter: &self.counter + &other.counter, mode: self.mode } - } -} -impl BusDevice for ArithEq384CounterInputGen { /// Processes data received on the bus, updating counters and generating inputs when applicable. /// /// # Arguments /// * `bus_id` - The ID of the bus sending the data. /// * `data` - The data received from the bus. - /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// * `mem_processors` – A queue of mem_processors bus operations used to send derived inputs. /// /// # Returns /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( + pub fn process_data( &mut self, bus_id: &BusId, data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, + mem_processors: &mut P, ) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); @@ -156,20 +114,26 @@ impl BusDevice for ArithEq384CounterInputGen { return true; } - if let Some(mem_collectors_info) = mem_collector_info { - if self.skip_data(data, mem_collectors_info) { - return true; - } - } - let op = data[OP] as u8; - let step_main = data[A]; + let step_main = data[STEP]; let addr_main = data[B] as u32; - let only_counters = self.mode == BusDeviceMode::Counter; - if only_counters { - self.measure(data); - } + let only_counters = match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + true + } + BusDeviceMode::CounterAsm => { + self.measure(data); + return true; + } + BusDeviceMode::InputGenerator => { + if self.skip_data(data, mem_processors) { + return true; + } + false + } + }; match op { ARITH384_MOD_OP => { @@ -178,7 +142,7 @@ impl BusDevice for ArithEq384CounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BLS12_381_CURVE_ADD_OP => { @@ -187,7 +151,7 @@ impl BusDevice for ArithEq384CounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BLS12_381_CURVE_DBL_OP => { @@ -196,7 +160,7 @@ impl BusDevice for ArithEq384CounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BLS12_381_COMPLEX_ADD_OP => { @@ -205,7 +169,7 @@ impl BusDevice for ArithEq384CounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BLS12_381_COMPLEX_SUB_OP => { @@ -214,7 +178,7 @@ impl BusDevice for ArithEq384CounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } BLS12_381_COMPLEX_MUL_OP => { @@ -223,7 +187,7 @@ impl BusDevice for ArithEq384CounterInputGen { step_main, data, only_counters, - pending, + mem_processors, ); } _ => { @@ -233,15 +197,48 @@ impl BusDevice for ArithEq384CounterInputGen { true } +} - /// Returns the bus IDs associated with this counter. +impl Metrics for ArithEq384CounterInputGen { + /// Tracks activity on the connected bus and updates counters for recognized operations. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `_data` - The data received from the bus. /// /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] + /// An empty vector, as this implementation does not produce any derived inputs for the bus. + #[inline(always)] + fn measure(&mut self, _data: &[u64]) { + self.counter.update(1); } + /// Provides a dynamic reference for downcasting purposes. + /// + /// # Returns + /// A reference to `self` as `dyn std::any::Any`. + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +impl Add for ArithEq384CounterInputGen { + type Output = ArithEq384CounterInputGen; + + /// Combines two `Arith384Counter` instances by summing their counters. + /// + /// # Arguments + /// * `self` - The first `Arith384Counter` instance. + /// * `other` - The second `Arith384Counter` instance. + /// + /// # Returns + /// A new `Arith384Counter` with combined counters. + fn add(self, other: Self) -> ArithEq384CounterInputGen { + ArithEq384CounterInputGen { counter: &self.counter + &other.counter, mode: self.mode } + } +} + +impl BusDevice for ArithEq384CounterInputGen { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/precompiles/arith_eq_384/src/arith_eq_384_input.rs b/precompiles/arith_eq_384/src/arith_eq_384_input.rs index c5f8dbfbc..ba3ce8a59 100644 --- a/precompiles/arith_eq_384/src/arith_eq_384_input.rs +++ b/precompiles/arith_eq_384/src/arith_eq_384_input.rs @@ -35,16 +35,16 @@ impl Arith384ModInput { pub fn from(values: &OperationArith384ModData) -> Self { Self { addr: values[3] as u32, - a_addr: values[4] as u32, - b_addr: values[5] as u32, - c_addr: values[6] as u32, - module_addr: values[7] as u32, - d_addr: values[8] as u32, - step: values[2], - a: values[9..15].try_into().unwrap(), - b: values[15..21].try_into().unwrap(), - c: values[21..27].try_into().unwrap(), - module: values[27..33].try_into().unwrap(), + a_addr: values[5] as u32, + b_addr: values[6] as u32, + c_addr: values[7] as u32, + module_addr: values[8] as u32, + d_addr: values[9] as u32, + step: values[4], + a: values[10..16].try_into().unwrap(), + b: values[16..22].try_into().unwrap(), + c: values[22..28].try_into().unwrap(), + module: values[28..34].try_into().unwrap(), } } } @@ -63,11 +63,11 @@ impl Bls12_381CurveAddInput { pub fn from(values: &OperationBls12_381CurveAddData) -> Self { Self { addr: values[3] as u32, - p1_addr: values[4] as u32, - p2_addr: values[5] as u32, - step: values[2], - p1: values[6..18].try_into().unwrap(), - p2: values[18..30].try_into().unwrap(), + p1_addr: values[5] as u32, + p2_addr: values[6] as u32, + step: values[4], + p1: values[7..19].try_into().unwrap(), + p2: values[19..31].try_into().unwrap(), } } } @@ -81,7 +81,7 @@ pub struct Bls12_381CurveDblInput { impl Bls12_381CurveDblInput { pub fn from(values: &OperationBls12_381CurveDblData) -> Self { - Self { addr: values[3] as u32, step: values[2], p1: values[4..16].try_into().unwrap() } + Self { addr: values[3] as u32, step: values[4], p1: values[5..17].try_into().unwrap() } } } @@ -99,11 +99,11 @@ impl Bls12_381ComplexAddInput { pub fn from(values: &OperationBls12_381ComplexAddData) -> Self { Self { addr: values[3] as u32, - f1_addr: values[4] as u32, - f2_addr: values[5] as u32, - step: values[2], - f1: values[6..18].try_into().unwrap(), - f2: values[18..30].try_into().unwrap(), + f1_addr: values[5] as u32, + f2_addr: values[6] as u32, + step: values[4], + f1: values[7..19].try_into().unwrap(), + f2: values[19..31].try_into().unwrap(), } } } @@ -122,11 +122,11 @@ impl Bls12_381ComplexSubInput { pub fn from(values: &OperationBls12_381ComplexSubData) -> Self { Self { addr: values[3] as u32, - f1_addr: values[4] as u32, - f2_addr: values[5] as u32, - step: values[2], - f1: values[6..18].try_into().unwrap(), - f2: values[18..30].try_into().unwrap(), + f1_addr: values[5] as u32, + f2_addr: values[6] as u32, + step: values[4], + f1: values[7..19].try_into().unwrap(), + f2: values[19..31].try_into().unwrap(), } } } @@ -145,11 +145,11 @@ impl Bls12_381ComplexMulInput { pub fn from(values: &OperationBls12_381ComplexMulData) -> Self { Self { addr: values[3] as u32, - f1_addr: values[4] as u32, - f2_addr: values[5] as u32, - step: values[2], - f1: values[6..18].try_into().unwrap(), - f2: values[18..30].try_into().unwrap(), + f1_addr: values[5] as u32, + f2_addr: values[6] as u32, + step: values[4], + f1: values[7..19].try_into().unwrap(), + f2: values[19..31].try_into().unwrap(), } } } diff --git a/precompiles/arith_eq_384/src/arith_eq_384_instance.rs b/precompiles/arith_eq_384/src/arith_eq_384_instance.rs index 81ebc371e..b14626b46 100644 --- a/precompiles/arith_eq_384/src/arith_eq_384_instance.rs +++ b/precompiles/arith_eq_384/src/arith_eq_384_instance.rs @@ -6,14 +6,14 @@ use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; -use std::collections::VecDeque; use std::{any::Any, collections::HashMap, sync::Arc}; +use zisk_common::ChunkId; +use zisk_common::StatsType; use zisk_common::{ BusDevice, BusId, CheckPoint, CollectSkipper, ExtOperationData, Instance, InstanceCtx, InstanceType, OperationBusData, PayloadType, OPERATION_BUS_ID, }; -use zisk_common::{ChunkId, MemCollectorInfo}; use zisk_core::ZiskOperationType; use zisk_pil::ArithEq384Trace; @@ -123,6 +123,10 @@ impl Instance for ArithEq384Instance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { let (num_ops, collect_skipper) = self.collect_info[&chunk_id]; Some(Box::new(ArithEq384Collector::new(num_ops, collect_skipper))) @@ -158,9 +162,7 @@ impl ArithEq384Collector { pub fn new(num_operations: u64, collect_skipper: CollectSkipper) -> Self { Self { inputs: Vec::new(), num_operations, collect_skipper } } -} -impl BusDevice for ArithEq384Collector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -171,13 +173,7 @@ impl BusDevice for ArithEq384Collector { /// # Returns /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. - fn process_data( - &mut self, - bus_id: &BusId, - data: &[PayloadType], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[PayloadType]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); if self.inputs.len() == self.num_operations as usize { @@ -220,15 +216,9 @@ impl BusDevice for ArithEq384Collector { self.inputs.len() < self.num_operations as usize } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for ArithEq384Collector { fn as_any(self: Box) -> Box { self } diff --git a/precompiles/arith_eq_384/src/arith_eq_384_manager.rs b/precompiles/arith_eq_384/src/arith_eq_384_manager.rs index 033c8d158..7d79ea4d4 100644 --- a/precompiles/arith_eq_384/src/arith_eq_384_manager.rs +++ b/precompiles/arith_eq_384/src/arith_eq_384_manager.rs @@ -2,11 +2,8 @@ use std::sync::Arc; use fields::PrimeField64; use pil_std_lib::Std; -use zisk_common::{BusDevice, PayloadType}; -use zisk_common::{ - BusDeviceMetrics, BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner, -}; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; use zisk_core::ZiskOperationType; use zisk_pil::ArithEq384Trace; @@ -31,8 +28,11 @@ impl ArithEq384Manager { Arc::new(Self { arith_eq_384_sm }) } - pub fn build_arith_eq_384_counter(&self) -> ArithEq384CounterInputGen { - ArithEq384CounterInputGen::new(BusDeviceMode::Counter) + pub fn build_arith_eq_384_counter(&self, asm_execution: bool) -> ArithEq384CounterInputGen { + match asm_execution { + true => ArithEq384CounterInputGen::new(BusDeviceMode::CounterAsm), + false => ArithEq384CounterInputGen::new(BusDeviceMode::Counter), + } } pub fn build_arith_eq_384_input_generator(&self) -> ArithEq384CounterInputGen { @@ -41,14 +41,6 @@ impl ArithEq384Manager { } impl ComponentBuilder for ArithEq384Manager { - /// Builds and returns a new counter for monitoring arith256 operations. - /// - /// # Returns - /// A boxed implementation of `RegularCounters` configured for arith256 operations. - fn build_counter(&self) -> Option> { - Some(Box::new(ArithEq384CounterInputGen::new(BusDeviceMode::Counter))) - } - /// Builds a planner to plan arith256-related instances. /// /// # Returns @@ -89,8 +81,4 @@ impl ComponentBuilder for ArithEq384Manager { } } } - - fn build_inputs_generator(&self) -> Option>> { - Some(Box::new(ArithEq384CounterInputGen::new(BusDeviceMode::InputGenerator))) - } } diff --git a/precompiles/arith_eq_384/src/mem_inputs/arith384_mod.rs b/precompiles/arith_eq_384/src/mem_inputs/arith384_mod.rs index a73fa3d93..e7e4b8f3a 100644 --- a/precompiles/arith_eq_384/src/mem_inputs/arith384_mod.rs +++ b/precompiles/arith_eq_384/src/mem_inputs/arith384_mod.rs @@ -1,5 +1,4 @@ -use std::collections::VecDeque; -use zisk_common::{BusId, MemCollectorInfo}; +use precompiles_common::MemProcessor; use super::ArithEq384MemInputConfig; use crate::{executors::Arith384Mod, ARITH_EQ_384_U64S}; @@ -12,14 +11,14 @@ pub const ARITH_384_MOD_MEM_CONFIG: ArithEq384MemInputConfig = ArithEq384MemInpu chunks_per_param: ARITH_EQ_384_U64S, }; -pub fn generate_arith384_mod_mem_inputs( +pub fn generate_arith384_mod_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { - let mut pos_offset: usize = 9; // op,op_type,a,b,addr[5],... + let mut pos_offset: usize = 10; // op,op_type,a,b,addr[5],... let a: &[u64; ARITH_EQ_384_U64S] = &data[pos_offset..(pos_offset + ARITH_EQ_384_U64S)].try_into().unwrap(); pos_offset += ARITH_EQ_384_U64S; @@ -40,15 +39,15 @@ pub fn generate_arith384_mod_mem_inputs( data, Some(&d), only_counters, - pending, + mem_processors, &ARITH_384_MOD_MEM_CONFIG, ); } -pub fn skip_arith384_mod_mem_inputs( +pub fn skip_arith384_mod_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &ARITH_384_MOD_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &ARITH_384_MOD_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_add.rs b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_add.rs index c10a6e3c7..d2c88f709 100644 --- a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_add.rs +++ b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_add.rs @@ -1,5 +1,4 @@ -use std::collections::VecDeque; -use zisk_common::{BusId, MemCollectorInfo}; +use precompiles_common::MemProcessor; use super::ArithEq384MemInputConfig; use crate::{executors::Bls12_381Complex, ARITH_EQ_384_U64S_DOUBLE}; @@ -12,14 +11,14 @@ pub const BLS12_381_COMPLEX_ADD_MEM_CONFIG: ArithEq384MemInputConfig = ArithEq38 chunks_per_param: ARITH_EQ_384_U64S_DOUBLE, }; -pub fn generate_bls12_381_complex_add_mem_inputs( +pub fn generate_bls12_381_complex_add_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { - let mut pos_offset: usize = 6; // op,op_type,a,b,addr[2],... + let mut pos_offset: usize = 7; // op,op_type,a,b,addr[2],... let f1: &[u64; ARITH_EQ_384_U64S_DOUBLE] = &data[pos_offset..(pos_offset + ARITH_EQ_384_U64S_DOUBLE)].try_into().unwrap(); pos_offset += ARITH_EQ_384_U64S_DOUBLE; @@ -34,15 +33,15 @@ pub fn generate_bls12_381_complex_add_mem_inputs( data, Some(&f3), only_counters, - pending, + mem_processors, &BLS12_381_COMPLEX_ADD_MEM_CONFIG, ); } -pub fn skip_bls12_381_complex_add_mem_inputs( +pub fn skip_bls12_381_complex_add_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BLS12_381_COMPLEX_ADD_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BLS12_381_COMPLEX_ADD_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_mul.rs b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_mul.rs index fec7de46d..bd444258c 100644 --- a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_mul.rs +++ b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_mul.rs @@ -1,5 +1,4 @@ -use std::collections::VecDeque; -use zisk_common::{BusId, MemCollectorInfo}; +use precompiles_common::MemProcessor; use super::ArithEq384MemInputConfig; use crate::{executors::Bls12_381Complex, ARITH_EQ_384_U64S_DOUBLE}; @@ -12,14 +11,14 @@ pub const BLS12_381_COMPLEX_MUL_MEM_CONFIG: ArithEq384MemInputConfig = ArithEq38 chunks_per_param: ARITH_EQ_384_U64S_DOUBLE, }; -pub fn generate_bls12_381_complex_mul_mem_inputs( +pub fn generate_bls12_381_complex_mul_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { - let mut pos_offset: usize = 6; // op,op_type,a,b,addr[2],... + let mut pos_offset: usize = 7; // op,op_type,a,b,addr[2],... let f1: &[u64; ARITH_EQ_384_U64S_DOUBLE] = &data[pos_offset..(pos_offset + ARITH_EQ_384_U64S_DOUBLE)].try_into().unwrap(); pos_offset += ARITH_EQ_384_U64S_DOUBLE; @@ -34,15 +33,15 @@ pub fn generate_bls12_381_complex_mul_mem_inputs( data, Some(&f3), only_counters, - pending, + mem_processors, &BLS12_381_COMPLEX_MUL_MEM_CONFIG, ); } -pub fn skip_bls12_381_complex_mul_mem_inputs( +pub fn skip_bls12_381_complex_mul_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BLS12_381_COMPLEX_MUL_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BLS12_381_COMPLEX_MUL_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_sub.rs b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_sub.rs index 5d15fbc29..d3e7b4fe4 100644 --- a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_sub.rs +++ b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_complex_sub.rs @@ -1,5 +1,4 @@ -use std::collections::VecDeque; -use zisk_common::{BusId, MemCollectorInfo}; +use precompiles_common::MemProcessor; use super::ArithEq384MemInputConfig; use crate::{executors::Bls12_381Complex, ARITH_EQ_384_U64S_DOUBLE}; @@ -12,14 +11,14 @@ pub const BLS12_381_COMPLEX_SUB_MEM_CONFIG: ArithEq384MemInputConfig = ArithEq38 chunks_per_param: ARITH_EQ_384_U64S_DOUBLE, }; -pub fn generate_bls12_381_complex_sub_mem_inputs( +pub fn generate_bls12_381_complex_sub_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { - let mut pos_offset: usize = 6; // op,op_type,a,b,addr[2],... + let mut pos_offset: usize = 7; // op,op_type,a,b,addr[2],... let f1: &[u64; ARITH_EQ_384_U64S_DOUBLE] = &data[pos_offset..(pos_offset + ARITH_EQ_384_U64S_DOUBLE)].try_into().unwrap(); pos_offset += ARITH_EQ_384_U64S_DOUBLE; @@ -34,15 +33,15 @@ pub fn generate_bls12_381_complex_sub_mem_inputs( data, Some(&f3), only_counters, - pending, + mem_processors, &BLS12_381_COMPLEX_SUB_MEM_CONFIG, ); } -pub fn skip_bls12_381_complex_sub_mem_inputs( +pub fn skip_bls12_381_complex_sub_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BLS12_381_COMPLEX_SUB_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BLS12_381_COMPLEX_SUB_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_add.rs b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_add.rs index 983b3c7f6..9fa1ece0a 100644 --- a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_add.rs +++ b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_add.rs @@ -1,5 +1,4 @@ -use std::collections::VecDeque; -use zisk_common::{BusId, MemCollectorInfo}; +use precompiles_common::MemProcessor; use super::ArithEq384MemInputConfig; use crate::{executors::Bls12_381Curve, ARITH_EQ_384_U64S_DOUBLE}; @@ -12,14 +11,14 @@ pub const BLS12_381_CURVE_ADD_MEM_CONFIG: ArithEq384MemInputConfig = ArithEq384M chunks_per_param: ARITH_EQ_384_U64S_DOUBLE, }; -pub fn generate_bls12_381_curve_add_mem_inputs( +pub fn generate_bls12_381_curve_add_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { - let mut pos_offset: usize = 6; // op,op_type,a,b,addr[2],... + let mut pos_offset: usize = 7; // op,op_type,a,b,addr[2],... let p1: &[u64; ARITH_EQ_384_U64S_DOUBLE] = &data[pos_offset..(pos_offset + ARITH_EQ_384_U64S_DOUBLE)].try_into().unwrap(); pos_offset += ARITH_EQ_384_U64S_DOUBLE; @@ -34,15 +33,15 @@ pub fn generate_bls12_381_curve_add_mem_inputs( data, Some(&p3), only_counters, - pending, + mem_processors, &BLS12_381_CURVE_ADD_MEM_CONFIG, ); } -pub fn skip_bls12_381_curve_add_mem_inputs( +pub fn skip_bls12_381_curve_add_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BLS12_381_CURVE_ADD_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BLS12_381_CURVE_ADD_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_dbl.rs b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_dbl.rs index b61f8da15..d2ee68ce0 100644 --- a/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_dbl.rs +++ b/precompiles/arith_eq_384/src/mem_inputs/bls12_381_curve_dbl.rs @@ -1,5 +1,4 @@ -use std::collections::VecDeque; -use zisk_common::{BusId, MemCollectorInfo}; +use precompiles_common::MemProcessor; use super::ArithEq384MemInputConfig; use crate::{executors::Bls12_381Curve, ARITH_EQ_384_U64S_DOUBLE}; @@ -12,14 +11,14 @@ pub const BLS12_381_CURVE_DBL_MEM_CONFIG: ArithEq384MemInputConfig = ArithEq384M chunks_per_param: ARITH_EQ_384_U64S_DOUBLE, }; -pub fn generate_bls12_381_curve_dbl_mem_inputs( +pub fn generate_bls12_381_curve_dbl_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { - let pos_offset: usize = 4; // op,op_type,a,b,... + let pos_offset: usize = 5; // op,op_type,a,b,... let p1: &[u64; ARITH_EQ_384_U64S_DOUBLE] = &data[pos_offset..(pos_offset + ARITH_EQ_384_U64S_DOUBLE)].try_into().unwrap(); let mut p3 = [0u64; ARITH_EQ_384_U64S_DOUBLE]; @@ -31,15 +30,15 @@ pub fn generate_bls12_381_curve_dbl_mem_inputs( data, Some(&p3), only_counters, - pending, + mem_processors, &BLS12_381_CURVE_DBL_MEM_CONFIG, ); } -pub fn skip_bls12_381_curve_dbl_mem_inputs( +pub fn skip_bls12_381_curve_dbl_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { - super::skip_mem_inputs(addr_main, data, &BLS12_381_CURVE_DBL_MEM_CONFIG, mem_collectors_info) + super::skip_mem_inputs(addr_main, data, &BLS12_381_CURVE_DBL_MEM_CONFIG, mem_processors) } diff --git a/precompiles/arith_eq_384/src/mem_inputs/generate_mem_inputs.rs b/precompiles/arith_eq_384/src/mem_inputs/generate_mem_inputs.rs index 2b1602d29..b2c0d5f95 100644 --- a/precompiles/arith_eq_384/src/mem_inputs/generate_mem_inputs.rs +++ b/precompiles/arith_eq_384/src/mem_inputs/generate_mem_inputs.rs @@ -1,6 +1,6 @@ use precompiles_common::MemBusHelpers; -use std::collections::VecDeque; -use zisk_common::{BusId, MemCollectorInfo, OPERATION_BUS_DATA_SIZE}; +use precompiles_common::MemProcessor; +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; #[derive(Debug)] pub struct ArithEq384MemInputConfig { @@ -10,24 +10,24 @@ pub struct ArithEq384MemInputConfig { pub write_params: usize, pub chunks_per_param: usize, } -pub fn generate_mem_inputs( +pub fn generate_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], write_data: Option<&[u64]>, only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, config: &ArithEq384MemInputConfig, ) { let params_count = config.read_params + config.write_params; - let params_offset = OPERATION_BUS_DATA_SIZE + config.indirect_params; + let params_offset = OPERATION_PRECOMPILED_BUS_DATA_SIZE + config.indirect_params; for iparam in 0..config.indirect_params { - MemBusHelpers::mem_aligned_load( + MemBusHelpers::mem_aligned_read( addr_main + iparam as u32 * 8, step_main, - data[OPERATION_BUS_DATA_SIZE + iparam], - pending, + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + iparam], + mem_processors, ) } for iparam in 0..params_count { @@ -38,7 +38,7 @@ pub fn generate_mem_inputs( }; let param_addr = if config.indirect_params > 0 { // read indirect parameters, means stored the address of parameter - data[OPERATION_BUS_DATA_SIZE + param_index] as u32 + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_index] as u32 } else { addr_main + (param_index * 8 * config.chunks_per_param) as u32 }; @@ -65,27 +65,25 @@ pub fn generate_mem_inputs( step_main, chunk_data, is_write, - pending, + mem_processors, ) } } } -pub fn skip_mem_inputs( +pub fn skip_mem_inputs( addr_main: u32, data: &[u64], config: &ArithEq384MemInputConfig, - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { let params_count = config.read_params + config.write_params; // Check indirect loads for iparam in 0..config.indirect_params { let addr = addr_main + iparam as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } @@ -96,16 +94,14 @@ pub fn skip_mem_inputs( iparam }; let param_addr = if config.indirect_params > 0 { - data[OPERATION_BUS_DATA_SIZE + param_index] as u32 + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_index] as u32 } else { addr_main + (param_index * 8 * config.chunks_per_param) as u32 }; for ichunk in 0..config.chunks_per_param { let addr = param_addr + ichunk as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } } diff --git a/precompiles/big_int/Cargo.toml b/precompiles/big_int/Cargo.toml index 9b7aaa0ba..9ddbe5bfd 100644 --- a/precompiles/big_int/Cargo.toml +++ b/precompiles/big_int/Cargo.toml @@ -16,7 +16,6 @@ sm-mem = { workspace = true } mem-common = { workspace = true } lib-c = { workspace = true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -28,8 +27,5 @@ generic-array = "0.14" [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] \ No newline at end of file +gpu = ["packed"] +packed = [] \ No newline at end of file diff --git a/precompiles/big_int/pil/big_int_add.pil b/precompiles/big_int/pil/big_int_add.pil index 0e6f93af4..4f7bbf072 100644 --- a/precompiles/big_int/pil/big_int_add.pil +++ b/precompiles/big_int/pil/big_int_add.pil @@ -12,14 +12,13 @@ require "opids.pil" /// Parameters: /// /// - N: number of rows by instance -/// - operation_bus_id: the opid of bus used by precompile /// - bits: number of bits of the addition (N_RC * RC * 32) /// - RC: number of chunks of 32 bits defined in native architecture /// - operation_code: the operation code used to identify the precompile /// /// Integration: /// -/// - main assumes an operation on operation_bus_id: +/// - main assumes an operation: /// [ operation_code, a: [step, 0], b: [addr_params, 0], c: [carry_out, 0], flag: carry_out ] /// /// - precompiles "load" params from memory: @@ -45,7 +44,7 @@ require "opids.pil" /// addr_c + 8 * (N_RC - 1) <- c[N_RC-1] (c_chunks[N_RC-1][0] + c_chunks[N_RC-1][1] << 16, /// c_chunks[N_RC-1][2] + c_chunks[N_RC-1][3] << 16) -airtemplate BigIntAdd(const int N = 2**21, const int bits = 256, const int operation_code, const int operation_bus_id = OPERATION_BUS_ID) { +airtemplate BigIntAdd(const int N = 2**21, const int bits = 256, const int operation_code) { const int RC = 2; const int N_RC = bits / (RC * 32); @@ -134,5 +133,6 @@ airtemplate BigIntAdd(const int N = 2**21, const int bits = 256, const int opera const expr final_cout = cout[N_RC-1][RC-1]; // proves the operation lauched by main, c = flag = carry_out - lookup_proves(operation_bus_id, [operation_code, step, 0, addr_params, 0, final_cout, 0, final_cout], sel); + proves_operation(op: operation_code, a:[0, 0], b:[addr_params, 0], c:[final_cout, 0], + flag:0, main_step: step, mul:sel); } \ No newline at end of file diff --git a/precompiles/big_int/src/add256.rs b/precompiles/big_int/src/add256.rs index acea6702d..c339491d7 100644 --- a/precompiles/big_int/src/add256.rs +++ b/precompiles/big_int/src/add256.rs @@ -62,9 +62,9 @@ impl Add256SM { trace: &mut Add256TraceRowType, multiplicities: &mut [u32], ) { + debug_assert!(input.cin < 2); trace.set_cin(input.cin != 0); let mut cout_2 = input.cin as u32; - for i in 0..4 { let al = input.a[i] as u32; let ah = (input.a[i] >> 32) as u32; @@ -140,19 +140,19 @@ impl Add256SM { let flat_inputs: Vec<_> = inputs.iter().flatten().collect(); let trace_rows = trace.buffer.as_mut_slice(); - // Determinar tamaño óptimo de chunks + // Calculate optimal chunk size let num_threads = rayon::current_num_threads(); let chunk_size = std::cmp::max(1, flat_inputs.len() / num_threads); - // Procesar en chunks para compartir arrays locales de multiplicities + // Process in chunks to allow per-chunk local multiplicities arrays let local_multiplicities_vec: Vec> = flat_inputs .par_chunks(chunk_size) .zip(trace_rows.par_chunks_mut(chunk_size)) .map(|(input_chunk, trace_chunk)| { - // Array local compartido por este chunk + // Local array shared by this chunk let mut local_multiplicities = vec![0u32; 1 << 16]; - // Procesar todos los inputs del chunk + // Sum all local arrays into a global one for (input, trace_row) in input_chunk.iter().zip(trace_chunk.iter_mut()) { self.process_slice(input, trace_row, &mut local_multiplicities); } @@ -161,7 +161,7 @@ impl Add256SM { }) .collect(); - // Sumar todos los arrays locales en uno global + // Sum all local arrays into a global one let mut global_multiplicities = vec![0u32; 1 << 16]; for local_multiplicities in local_multiplicities_vec { for (i, count) in local_multiplicities.iter().enumerate() { @@ -169,7 +169,7 @@ impl Add256SM { } } - // Enviar el resultado final al std + // Send final result to std self.std.range_checks(self.range_id, global_multiplicities); timer_stop_and_log_trace!(ADD256_TRACE); diff --git a/precompiles/big_int/src/add256_bus_device.rs b/precompiles/big_int/src/add256_bus_device.rs index d65dcff9f..71cf20ec5 100644 --- a/precompiles/big_int/src/add256_bus_device.rs +++ b/precompiles/big_int/src/add256_bus_device.rs @@ -2,11 +2,12 @@ //! sent over the data bus. It connects to the bus and gathers metrics for specific //! `ZiskOperationType::Add256` instructions. -use std::{collections::VecDeque, ops::Add}; +use std::ops::Add; + +use precompiles_common::MemProcessor; -use zisk_common::MemCollectorInfo; use zisk_common::{ - BusDevice, BusDeviceMode, BusId, Counter, Metrics, A, B, OPERATION_BUS_ID, OP_TYPE, + BusDevice, BusDeviceMode, BusId, Counter, Metrics, B, OPERATION_BUS_ID, OP_TYPE, STEP, }; use zisk_core::ZiskOperationType; @@ -48,6 +49,51 @@ impl Add256CounterInputGen { pub fn inst_count(&self, op_type: ZiskOperationType) -> Option { (op_type == ZiskOperationType::BigInt).then_some(self.counter.inst_count) } + + /// Processes data received on the bus, updating counters and generating inputs when applicable. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// * `mem_processors` – A queue of mem_processors bus operations used to send derived inputs. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data( + &mut self, + bus_id: &BusId, + data: &[u64], + mem_processors: &mut P, + ) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] as u32 != ZiskOperationType::BigInt as u32 { + return true; + } + + let step_main = data[STEP]; + let addr_main = data[B] as u32; + + match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + generate_add256_mem_inputs(addr_main, step_main, data, true, mem_processors); + } + BusDeviceMode::CounterAsm => { + self.measure(data); + } + BusDeviceMode::InputGenerator => { + if skip_add256_mem_inputs(addr_main, data, mem_processors) { + return true; + } + generate_add256_mem_inputs(addr_main, step_main, data, false, mem_processors); + } + } + + true + } } impl Metrics for Add256CounterInputGen { @@ -90,57 +136,6 @@ impl Add for Add256CounterInputGen { } impl BusDevice for Add256CounterInputGen { - /// Processes data received on the bus, updating counters and generating inputs when applicable. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus sending the data. - /// * `data` - The data received from the bus. - /// * `pending` – A queue of pending bus operations used to send derived inputs. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - debug_assert!(*bus_id == OPERATION_BUS_ID); - - if data[OP_TYPE] as u32 != ZiskOperationType::BigInt as u32 { - return true; - } - - if let Some(mem_collectors_info) = mem_collector_info { - if skip_add256_mem_inputs(data[B] as u32, data, mem_collectors_info) { - return true; - } - } - - let step_main = data[A]; - let addr_main = data[B] as u32; - - let only_counters = self.mode == BusDeviceMode::Counter; - if only_counters { - self.measure(data); - } - - generate_add256_mem_inputs(addr_main, step_main, data, only_counters, pending); - - true - } - - /// Returns the bus IDs associated with this counter. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/precompiles/big_int/src/add256_constants.rs b/precompiles/big_int/src/add256_constants.rs index 4b51efd21..6b358c74b 100644 --- a/precompiles/big_int/src/add256_constants.rs +++ b/precompiles/big_int/src/add256_constants.rs @@ -1,4 +1,4 @@ -use zisk_common::OPERATION_BUS_DATA_SIZE; +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; pub const PARAMS: usize = 4; pub const READ_PARAMS: usize = 2; @@ -6,7 +6,7 @@ pub const DIRECT_READ_PARAMS: usize = 1; pub const WRITE_PARAMS: usize = 1; pub const RESULT_PARAMS: usize = 1; pub const PARAM_CHUNKS: usize = 4; -pub const START_READ_PARAMS: usize = OPERATION_BUS_DATA_SIZE + PARAMS; +pub const START_READ_PARAMS: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + PARAMS; pub const START_WRITE_PARAMS: usize = START_READ_PARAMS + READ_PARAMS * PARAM_CHUNKS + RESULT_PARAMS; pub const WRITE_ADDR_PARAM: usize = READ_PARAMS + DIRECT_READ_PARAMS; diff --git a/precompiles/big_int/src/add256_gen_mem_inputs.rs b/precompiles/big_int/src/add256_gen_mem_inputs.rs index 4f26b6c69..3f6dbc936 100644 --- a/precompiles/big_int/src/add256_gen_mem_inputs.rs +++ b/precompiles/big_int/src/add256_gen_mem_inputs.rs @@ -2,9 +2,9 @@ use lib_c::add256; use crate::add256_constants::*; use precompiles_common::MemBusHelpers; -use std::collections::VecDeque; -use zisk_common::MemCollectorInfo; -use zisk_common::{BusId, OPERATION_BUS_DATA_SIZE}; +use precompiles_common::MemProcessor; + +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; #[derive(Debug)] pub struct Add256MemInputConfig { @@ -15,32 +15,32 @@ pub struct Add256MemInputConfig { pub chunks_per_param: usize, } -pub fn generate_add256_mem_inputs( +pub fn generate_add256_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // Start by generating the params (indirection read, direct, indirection write) for iparam in 0..PARAMS { - MemBusHelpers::mem_aligned_load( + MemBusHelpers::mem_aligned_read( addr_main + iparam as u32 * 8, step_main, - data[OPERATION_BUS_DATA_SIZE + iparam], - pending, + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + iparam], + mem_processors, ); } // generate load params for iparam in 0..READ_PARAMS { - let param_addr = data[OPERATION_BUS_DATA_SIZE + iparam] as u32; + let param_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + iparam] as u32; for ichunk in 0..PARAM_CHUNKS { - MemBusHelpers::mem_aligned_load( + MemBusHelpers::mem_aligned_read( param_addr + ichunk as u32 * 8, step_main, data[START_READ_PARAMS + iparam * PARAM_CHUNKS + ichunk], - pending, + mem_processors, ); } } @@ -53,14 +53,14 @@ pub fn generate_add256_mem_inputs( [START_READ_PARAMS + PARAM_CHUNKS..START_READ_PARAMS + 2 * PARAM_CHUNKS] .try_into() .unwrap(); - add256(&a, &b, data[OPERATION_BUS_DATA_SIZE + READ_PARAMS], &mut write_data); + add256(&a, &b, data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + READ_PARAMS], &mut write_data); } // verify write param - let write_addr = data[OPERATION_BUS_DATA_SIZE + WRITE_ADDR_PARAM] as u32; + let write_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + WRITE_ADDR_PARAM] as u32; for (ichunk, write_data) in write_data.iter().enumerate().take(PARAM_CHUNKS) { let param_addr = write_addr + ichunk as u32 * 8; - MemBusHelpers::mem_aligned_write(param_addr, step_main, *write_data, pending); + MemBusHelpers::mem_aligned_write(param_addr, step_main, *write_data, mem_processors); } } @@ -68,42 +68,36 @@ pub fn generate_add256_mem_inputs( // op_b = addr_main // mem_trace: @a, @b, cin, @c, a[0..3], b[0..3], cout, [ c[0..3] ] -pub fn skip_add256_mem_inputs( +pub fn skip_add256_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { // verify main params "struct" of indirections for iparam in 0..PARAMS { let addr = addr_main + iparam as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } // verify read params for iparam in 0..READ_PARAMS { - let param_addr = data[OPERATION_BUS_DATA_SIZE + iparam] as u32; + let param_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + iparam] as u32; for ichunk in 0..PARAM_CHUNKS { let addr = param_addr + ichunk as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } } // verify write param - let write_addr = data[OPERATION_BUS_DATA_SIZE + WRITE_ADDR_PARAM] as u32; + let write_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + WRITE_ADDR_PARAM] as u32; for ichunk in 0..PARAM_CHUNKS { let addr = write_addr + ichunk as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } diff --git a/precompiles/big_int/src/add256_input.rs b/precompiles/big_int/src/add256_input.rs index 71e7bc755..746b58593 100644 --- a/precompiles/big_int/src/add256_input.rs +++ b/precompiles/big_int/src/add256_input.rs @@ -1,6 +1,6 @@ use crate::add256_constants::*; use zisk_common::OperationAdd256Data; -use zisk_common::{A, B, OPERATION_BUS_DATA_SIZE}; +use zisk_common::{B, OPERATION_PRECOMPILED_BUS_DATA_SIZE, STEP}; #[derive(Debug)] pub struct Add256Input { @@ -17,12 +17,12 @@ pub struct Add256Input { impl Add256Input { pub fn from(values: &OperationAdd256Data) -> Self { Self { - step_main: values[A], + step_main: values[STEP], addr_main: values[B] as u32, - addr_a: values[OPERATION_BUS_DATA_SIZE] as u32, - addr_b: values[OPERATION_BUS_DATA_SIZE + 1] as u32, - addr_c: values[OPERATION_BUS_DATA_SIZE + READ_PARAMS + DIRECT_READ_PARAMS] as u32, - cin: values[OPERATION_BUS_DATA_SIZE + READ_PARAMS], + addr_a: values[OPERATION_PRECOMPILED_BUS_DATA_SIZE] as u32, + addr_b: values[OPERATION_PRECOMPILED_BUS_DATA_SIZE + 1] as u32, + cin: values[OPERATION_PRECOMPILED_BUS_DATA_SIZE + 2], + addr_c: values[OPERATION_PRECOMPILED_BUS_DATA_SIZE + 3] as u32, a: values[START_READ_PARAMS..START_READ_PARAMS + PARAM_CHUNKS].try_into().unwrap(), b: values[START_READ_PARAMS + PARAM_CHUNKS..START_READ_PARAMS + 2 * PARAM_CHUNKS] .try_into() diff --git a/precompiles/big_int/src/add256_instance.rs b/precompiles/big_int/src/add256_instance.rs index 48799c65d..5594fc50d 100644 --- a/precompiles/big_int/src/add256_instance.rs +++ b/precompiles/big_int/src/add256_instance.rs @@ -7,12 +7,12 @@ use crate::{Add256Input, Add256SM}; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; -use std::collections::VecDeque; use std::{any::Any, collections::HashMap, sync::Arc}; use zisk_common::ChunkId; +use zisk_common::StatsType; use zisk_common::{ BusDevice, BusId, CheckPoint, CollectSkipper, ExtOperationData, Instance, InstanceCtx, - InstanceType, MemCollectorInfo, PayloadType, OPERATION_BUS_ID, OP_TYPE, + InstanceType, PayloadType, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::ZiskOperationType; use zisk_pil::Add256Trace; @@ -101,6 +101,10 @@ impl Instance for Add256Instance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { assert_eq!( self.ictx.plan.air_id, @@ -149,9 +153,7 @@ impl Add256Collector { collect_skipper, } } -} -impl BusDevice for Add256Collector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -164,13 +166,7 @@ impl BusDevice for Add256Collector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[PayloadType], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[PayloadType]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); if self.inputs.len() == self.num_operations as usize { @@ -195,15 +191,9 @@ impl BusDevice for Add256Collector { self.inputs.len() < self.num_operations as usize } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for Add256Collector { fn as_any(self: Box) -> Box { self } diff --git a/precompiles/big_int/src/add256_manager.rs b/precompiles/big_int/src/add256_manager.rs index 99a471749..bd3b99fc9 100644 --- a/precompiles/big_int/src/add256_manager.rs +++ b/precompiles/big_int/src/add256_manager.rs @@ -2,10 +2,7 @@ use std::sync::Arc; use fields::PrimeField64; use pil_std_lib::Std; -use zisk_common::{ - BusDevice, BusDeviceMetrics, BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, - InstanceInfo, PayloadType, Planner, -}; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; use zisk_core::ZiskOperationType; #[cfg(not(feature = "packed"))] use zisk_pil::Add256Trace; @@ -38,8 +35,11 @@ impl Add256Manager { Arc::new(Self { add256_sm }) } - pub fn build_add256_counter(&self) -> Add256CounterInputGen { - Add256CounterInputGen::new(BusDeviceMode::Counter) + pub fn build_add256_counter(&self, asm_execution: bool) -> Add256CounterInputGen { + match asm_execution { + true => Add256CounterInputGen::new(BusDeviceMode::CounterAsm), + false => Add256CounterInputGen::new(BusDeviceMode::Counter), + } } pub fn build_add256_input_generator(&self) -> Add256CounterInputGen { @@ -48,14 +48,6 @@ impl Add256Manager { } impl ComponentBuilder for Add256Manager { - /// Builds and returns a new counter for monitoring Add256 operations. - /// - /// # Returns - /// A boxed implementation of `RegularCounters` configured for Add256 operations. - fn build_counter(&self) -> Option> { - Some(Box::new(Add256CounterInputGen::new(BusDeviceMode::Counter))) - } - /// Builds a planner to plan Add256-related instances. /// /// # Returns @@ -93,8 +85,4 @@ impl ComponentBuilder for Add256Manager { } } } - - fn build_inputs_generator(&self) -> Option>> { - Some(Box::new(Add256CounterInputGen::new(BusDeviceMode::InputGenerator))) - } } diff --git a/precompiles/blake2/Cargo.toml b/precompiles/blake2/Cargo.toml new file mode 100644 index 000000000..43cad97e4 --- /dev/null +++ b/precompiles/blake2/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "precomp-blake2" +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } + +[dependencies] +zisk-core = { workspace = true } +zisk-common = { workspace = true } +zisk-pil = { workspace = true } +precompiles-common = { workspace = true } +sm-mem = { workspace = true } +mem-common = { workspace = true } + +proofman-common = { workspace = true } +proofman-macros = { workspace = true } +proofman-util = { workspace = true } +pil-std-lib = { workspace = true } +fields = { workspace=true } +tracing = { workspace = true } +rayon = { workspace = true } + +[features] +default = [] +gpu = ["packed"] +packed = [] \ No newline at end of file diff --git a/precompiles/blake2/pil/blake2br.pil b/precompiles/blake2/pil/blake2br.pil new file mode 100644 index 000000000..779e159b6 --- /dev/null +++ b/precompiles/blake2/pil/blake2br.pil @@ -0,0 +1,722 @@ +require "std_constants.pil" +require "std_lookup.pil" +require "std_permutation.pil" +require "std_range_check.pil" +require "operations.pil" +require "opids.pil" + +// Precompile in charge of performing the BLAKE2b round function. +// For reference: https://datatracker.ietf.org/doc/html/rfc7693 or https://www.blake2.net/ + +// We use little-endian representation + +airtemplate Blake2br(const int N = 2**20) { + /* + ═══════════════════════════════════════════════════════════════════════════ + BLAKE2B ROUND FUNCTION SPECIFICATION + ═══════════════════════════════════════════════════════════════════════════ + + BLAKE2BR(v: [u64; 16], m: [u64; 16], round_idx: u32) -> [u64; 16] + + Each round consists of 8 G function calls: + • 4 Column steps (G0-G3): Mix columns independently + • 4 Diagonal steps (G0-G3): Mix diagonals independently + + Each G function performs 8 sequential operations on 4 u64 values: + v[a] := (v[a] + v[b] + x) mod 2⁶⁴ + v[d] := (v[d] ^ v[a]) >>> 32 + v[c] := (v[c] + v[d]) mod 2⁶⁴ + v[b] := (v[b] ^ v[c]) >>> 24 + v[a] := (v[a] + v[b] + y) mod 2⁶⁴ + v[d] := (v[d] ^ v[a]) >>> 16 + v[c] := (v[c] + v[d]) mod 2⁶⁴ + v[b] := (v[b] ^ v[c]) >>> 63 + + Message words x,y selected by SIGMA[round_idx % 10] + + ═══════════════════════════════════════════════════════════════════════════ + EXECUTION TRACE LAYOUT (24 clocks per round) + ═══════════════════════════════════════════════════════════════════════════ + + Each G function uses 3 rows: + Row 0: Initial state + first message word (input) + Row 1: Intermediate state + second message (intermediate) + Row 2: Final state (output) + + ROW va[2] vb[2][32] vc[2] vd[2][32] ms0[2] STAGE + |------|----------|----------|----------|----------|----------|----------------| + | 0 | v[0] | v[4] | v[8] | v[12] | m[s[0]] | G1 - COL MIX | <-- V/M IN + | 1 | v[0]i | v[4]i | v[8]i | v[12]i | m[s[1]] | G1 - COL MIX | <-- M IN + | 2 | v[0]o | v[4]o | v[8]o | v[12]o | XXXXXXX | G1 - COL MIX | + |------|----------|----------|----------|----------|----------|----------------| + | 3 | v[1] | v[5] | v[9] | v[13] | m[s[2]] | G2 - COL MIX | <-- V/M IN + | 4 | v[1]i | v[5]i | v[9]i | v[13]i | m[s[3]] | G2 - COL MIX | <-- M IN + | 5 | v[1]o | v[5]o | v[9]o | v[13]o | XXXXXXX | G2 - COL MIX | + |------|----------|----------|----------|----------|----------|----------------| + | 6 | v[2] | v[6] | v[10] | v[14] | m[s[4]] | G3 - COL MIX | <-- V/M IN + | 7 | v[2]i | v[6]i | v[10]i | v[14]i | m[s[5]] | G3 - COL MIX | <-- M IN + | 8 | v[2]o | v[6]o | v[10]o | v[14]o | XXXXXXX | G3 - COL MIX | + |------|----------|----------|----------|----------|----------|----------------| + | 9 | v[3] | v[7] | v[11] | v[15] | m[s[6]] | G4 - COL MIX | <-- V/M IN + | 10 | v[3]i | v[7]i | v[11]i | v[15]i | m[s[7]] | G4 - COL MIX | <-- M IN + | 11 | v[3]o | v[7]o | v[11]o | v[15]o | XXXXXXX | G4 - COL MIX | + |------|----------|----------|----------|----------|----------|----------------| + | 12 | v[0] | v[5] | v[10] | v[15] | m[s[8]] | G1 - DIA MIX | <-- M IN + | 13 | v[0]i | v[5]i | v[10]i | v[15]i | m[s[9]] | G1 - DIA MIX | <-- M IN + | 14 | v[0]o | v[5]o | v[10]o | v[15]o | XXXXXXX | G1 - DIA MIX | <-- V OUT + |------|----------|----------|----------|----------|----------|----------------| + | 15 | v[1] | v[6] | v[11] | v[12] | m[s[10]] | G2 - DIA MIX | <-- M IN + | 16 | v[1]i | v[6]i | v[11]i | v[12]i | m[s[11]] | G2 - DIA MIX | <-- M IN + | 17 | v[1]o | v[6]o | v[11]o | v[12]o | XXXXXXXX | G2 - DIA MIX | <-- V OUT + |------|----------|----------|----------|----------|----------|----------------| + | 18 | v[2] | v[7] | v[8] | v[13] | m[s[12]] | G3 - DIA MIX | <-- M IN + | 19 | v[2]i | v[7]i | v[8]i | v[13]i | m[s[13]] | G3 - DIA MIX | <-- M IN + | 20 | v[2]o | v[7]o | v[8]o | v[13]o | XXXXXXXX | G3 - DIA MIX | <-- V OUT + |------|----------|----------|----------|----------|----------|----------------| + | 21 | v[3] | v[4] | v[9] | v[14] | m[s[14]] | G4 - DIA MIX | <-- M IN + | 22 | v[3]i | v[4]i | v[9]i | v[14]i | m[s[15]] | G4 - DIA MIX | <-- M IN + | 23 | v[3]o | v[4]o | v[9]o | v[14]o | XXXXXXXX | G4 - DIA MIX | <-- V OUT + |------|----------|----------|----------|----------|----------|----------------| + */ + + const int CLOCKS_PER_G = 3; + const int NUM_G_FUNCTIONS = 8; + const int CLOCKS = CLOCKS_PER_G * NUM_G_FUNCTIONS; + + /// Message word permutation schedule + const int SIGMA[10][16]; + SIGMA[0] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + SIGMA[1] = [14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3]; + SIGMA[2] = [11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4]; + SIGMA[3] = [7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8]; + SIGMA[4] = [9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13]; + SIGMA[5] = [2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9]; + SIGMA[6] = [12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11]; + SIGMA[7] = [13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10]; + SIGMA[8] = [6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5]; + SIGMA[9] = [10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0]; + + // Ensure that the Blake2br fits + if (N < 2*CLOCKS) { + error(`Blake2br requires N >= ${2*CLOCKS}, got N=${N}`); + } + + // Calculate usable capacity + const int NUM_NON_USABLE_ROWS = N % CLOCKS; + const int NUM_BLAKE2BR; + if (NUM_NON_USABLE_ROWS == 0) { + NUM_BLAKE2BR = N / CLOCKS; + } else { + NUM_BLAKE2BR = (N - NUM_NON_USABLE_ROWS) / CLOCKS - 1; // The -1 is because CLOCKS is not a divisor of N + } + println(`Blake2br capacity: ${NUM_BLAKE2BR} rounds (N=${N})`); + + // ═══════════════════════════════════════════════════════════════════════════ + // CLOCK SELECTORS + // ═══════════════════════════════════════════════════════════════════════════ + + // Base clock: 1 at row 0, 0 elsewhere within each 24-row cycle + col fixed CLK_0 = [[1, 0:(CLOCKS-1)]:NUM_BLAKE2BR, 0...]; + + // Clock selectors for each row in the cycle + const expr CLK[CLOCKS]; + for (int i = 0; i < CLOCKS; i++) { + CLK[i] = (i)'CLK_0; + } + + col witness bits(1) in_use; + + in_use * (1 - in_use) === 0; + + // Selector latching: once activated, stays active for full cycle + const expr in_use_latched = clock_set(start: 1, end: CLOCKS); + in_use_latched * (in_use - 'in_use) === 0; + + // ═══════════════════════════════════════════════════════════════════════════ + // ROUND INDEX AND SIGMA PERMUTATION + // ═══════════════════════════════════════════════════════════════════════════ + + // 1] Start by deriving m[SIGMA[round_idx][off + 2*i]], m[SIGMA[round_idx][off + 2*i + 1]] + + // Round index (0-9, determines which SIGMA row to use) + col witness bits(4) round_idx; + + // Round index is constant within a round + (1 - CLK_0) * (round_idx - 'round_idx) === 0; + + // One-hot encoding for round index selection + col witness bits(1) round_idx_sel[10]; + + expr sum_sel = 0; + expr sum_weighted_sel = 0; + for (int i = 0; i < 10; i++) { + // Selectors are bits + round_idx_sel[i] * (round_idx_sel[i] - 1) === 0; + + // Add all selectors + sum_sel += round_idx_sel[i]; + + // Add weighted selectors to compute round_idx + sum_weighted_sel += round_idx_sel[i] * i; + } + sum_sel * (sum_sel - 1) === 0; // At most one of the round_idx_sel is active + round_idx === sum_weighted_sel; // Check that round_idx is the index of the active round_idx_sel + // Moreover, this ensures that round_idx is in [0,10) + // Since round_idx is fixed within the cycle, all round_idx_sel are fixed as well + + // Compute permuted message index: SIGMA[round_idx] = ∑ᵢ round_idx_sel[i]·SIGMA[i] + col witness bits(4) sigma_idx; + expr sigma_idx_expr = 0; + for (int j = 0; j < 10; j++) { + expr sigma_sum = 0; + int msg_pos = 0; + for (int k = 0; k < CLOCKS; k++) { + // Skip output rows (no message word needed) + if (k > 0 && k % CLOCKS_PER_G == (CLOCKS_PER_G - 1)) { + continue; + } + sigma_sum += CLK[k] * SIGMA[j][msg_pos]; + msg_pos++; + } + sigma_idx_expr += round_idx_sel[j] * sigma_sum; + } + sigma_idx <== sigma_idx_expr; + + // ═══════════════════════════════════════════════════════════════════════════ + // MESSAGE WORDS (m and ms) + // ═══════════════════════════════════════════════════════════════════════════ + + // Message index for permutation + col fixed MSG_IDX = [0, 1, 0, 2, 3, 0, 4, 5, 0, 6, 7, 0, + 8, 9, 0, 10, 11, 0, 12, 13, 0, 14, 15, 0]...; + + // Original message words (16-bit limbs for range checking) + col witness bits(16) m_limbs[2][2]; + const expr m[2] = [m_limbs[0][0] + P2_16 * m_limbs[0][1], m_limbs[1][0] + P2_16 * m_limbs[1][1]]; + + // Range check message limbs + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + range_check(expression: m_limbs[i][j], min: 0, max: P2_16 - 1); + } + } + + // Permuted message words (selected by sigma_idx) + col witness bits(32) ms[2]; + + // Permutation: map message index -> permuted message + // Perform the permutation + col witness bits(1) perm_active; + const expr perm_active_expr = clock_set(in_use, start: 0, end: CLOCKS - NUM_G_FUNCTIONS, step: 2, skip: 1); + perm_active <== perm_active_expr; + + permutation_assumes(opid: BLAKE2BR_PERMUTATION_ID, expressions: [MSG_IDX, m[0], m[1]], sel: perm_active); + permutation_proves(opid: BLAKE2BR_PERMUTATION_ID, expressions: [sigma_idx, ms[0], ms[1]], sel: perm_active); + + // ═══════════════════════════════════════════════════════════════════════════ + // G-FUNCTION STATE VARIABLES + // ═══════════════════════════════════════════════════════════════════════════ + + // G-function active selector (every 3 rows, skipping output rows) + col witness bits(1) g_active; + const expr g_active_expr = clock_set(in_use, start: 0, end: CLOCKS / CLOCKS_PER_G, step: 1, skip: 2); + g_active <== g_active_expr; + + // State variables: va, vb, vc, vd (each 64-bit) + // va, vc stored as 16-bit limbs for range checking + // vb, vd stored as individual bits for rotation operations + col witness bits(16) va_limbs[2][2]; + col witness bits(16) vc_limbs[2][2]; + col witness bits(1) vb[2][32]; + col witness bits(1) vd[2][32]; + + // Reconstructed 32-bit values + const expr va[2] = [va_limbs[0][0] + P2_16 * va_limbs[0][1], va_limbs[1][0] + P2_16 * va_limbs[1][1]]; + const expr vc[2] = [vc_limbs[0][0] + P2_16 * vc_limbs[0][1], vc_limbs[1][0] + P2_16 * vc_limbs[1][1]]; + + // Range check va and vc limbs + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + range_check(expression: va_limbs[i][j], min: 0, max: P2_16 - 1); + range_check(expression: vc_limbs[i][j], min: 0, max: P2_16 - 1); + } + } + + // Bit constraints for vb and vd + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 32; j++) { + vb[i][j] * (1 - vb[i][j]) === 0; + vd[i][j] * (1 - vd[i][j]) === 0; + } + } + + // Packed bit representations + const expr vb_packed[2] = [pack_bits(vb[0]), pack_bits(vb[1])]; + const expr vd_packed[2] = [pack_bits(vd[0]), pack_bits(vd[1])]; + + // ═══════════════════════════════════════════════════════════════════════════ + // G-FUNCTION INTERMEDIATE AND OUTPUT VALUES + // ═══════════════════════════════════════════════════════════════════════════ + + // Intermediate values + const expr va_im[2] = [va[0]', va[1]']; + const expr vc_im[2] = [vc[0]', vc[1]']; + const expr vb_im[2][32]; + const expr vd_im[2][32]; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 32; j++) { + vb_im[i][j] = vb[i][j]'; + vd_im[i][j] = vd[i][j]'; + } + } + + const expr vb_im_packed[2] = [pack_bits(vb_im[0]), pack_bits(vb_im[1])]; + const expr vd_im_packed[2] = [pack_bits(vd_im[0]), pack_bits(vd_im[1])]; + + // Output values + const expr va_out[2] = [va[0]'2, va[1]'2]; + const expr vc_out[2] = [vc[0]'2, vc[1]'2]; + const expr vb_out[2][32]; + const expr vd_out[2][32]; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 32; j++) { + vb_out[i][j] = vb[i][j]'2; + vd_out[i][j] = vd[i][j]'2; + } + } + + const expr vd_out_packed[2] = [pack_bits(vd_out[0]), pack_bits(vd_out[1])]; + + + // ═══════════════════════════════════════════════════════════════════════════ + // G-FUNCTION CONSTRAINTS (First Half) + // ═══════════════════════════════════════════════════════════════════════════ + // Operations: va' = va + vb + m[0] + // vd' = (vd ^ va') >>> 32 [iff va' == (vd' <<< 32) ^ vd] + // vc' = vc + vd' + // vb' = (vb ^ vc') >>> 24 [iff vc' == (vb' <<< 24) ^ vb] + + add3_check(va_im, va, vb_packed, ms); + rotl_xor_check(va_im, vd_im, vd, 32); + add2_check(vc_im, vc, vd_im_packed); + rotl_xor_check(vc_im, vb_im, vb, 24); + + // ═══════════════════════════════════════════════════════════════════════════ + // G-FUNCTION CONSTRAINTS (Second Half) + // ═══════════════════════════════════════════════════════════════════════════ + // Operations: va'' = va' + vb' + m[1] + // vd'' = (vd' ^ va'') >>> 16 [va'' == (vd'' <<< 16) ^ vd'] + // vc'' = vc' + vd'' + // vb'' = (vb' ^ vc'') >>> 63 [vc'' == (vb'' <<< 63) ^ vb'] + + const expr ms_next[2] = [ms[0]', ms[1]']; + add3_check(va_out, va_im, vb_im_packed, ms_next); + rotl_xor_check(va_out, vd_out, vd_im, 16); + add2_check(vc_out, vc_im, vd_out_packed); + rotl_xor_check(vc_out, vb_out, vb_im, 63); + + // ═══════════════════════════════════════════════════════════════════════════ + // COLUMN-TO-DIAGONAL TRANSITION CONSTRAINTS + // ═══════════════════════════════════════════════════════════════════════════ + // Ensure diagonal mixing inputs match column mixing outputs + + // va: diagonal inputs come from column outputs at specific offsets + (CLK[12] + CLK[15] + CLK[18] + CLK[21]) * (va[0] - 10'va[0]) === 0; + + // vb: different routing for each diagonal + (CLK[12] + CLK[15] + CLK[18]) * (vb_packed[0] - 7'vb_packed[0]) === 0; + CLK[21] * (vb_packed[0] - 19'vb_packed[0]) === 0; + + // vc: pairs have same routing + (CLK[12] + CLK[15]) * (vc[0] - 4'vc[0]) === 0; + (CLK[18] + CLK[21]) * (vc[0] - 16'vc[0]) === 0; + + // vd: first diagonal different from rest + (CLK[12]) * (vd_packed[0] - 'vd_packed[0]) === 0; + (CLK[15] + CLK[18] + CLK[21]) * (vd_packed[0] - 13'vd_packed[0]) === 0; + + // ═══════════════════════════════════════════════════════════════════════════ + // MEMORY INTERFACE + // ═══════════════════════════════════════════════════════════════════════════ + // + // MEMORY ACCESS MAP + // ========================================================= + // 0 | STEP_MAIN | R | ADDR_STATE | state[0] + // | ... | | ... | ... + // 15 | STEP_MAIN | R | ADDR_STATE + 120 | state[15] + // 16 | STEP_MAIN | R | ADDR_INPUT | input[0] + // | ... | | ... | ... + // 31 | STEP_MAIN | R | ADDR_INPUT + 120 | input[15] + // 32 | STEP_MAIN + 1 | W | ADDR_STATE | state[0] + // | ... | | ... | ... + // 47 | STEP_MAIN + 1 | W | ADDR_STATE + 120 | state[15] + // 48 | STEP_MAIN | R | ADDR_OP | index + // 49 | STEP_MAIN | R | ADDR_OP + 8 | ADDR_IND_0 + // 50 | STEP_MAIN | R | ADDR_OP + 16 | ADDR_IND_1 + // ========================================================= + // + // Memory Access Schedule (51 total operations, 4 in parallel): + // Slots 0-3: Read state[0..15] (16 ops) + // Slots 4-7: Read input[0..15] (16 ops) + // Slots 8-11: Write state'[0..15] (16 ops) + // Slot 12: Read index, addr_ind_0, addr_ind_1 (3 ops) + // + // Address indirection: + // - addr_state is read from addr_op + 8 + // - addr_input is read from addr_op + 16 + // - Both must match the actual memory addresses used + + col witness bits(40) step_addr; + + const int STEP_MAIN = 0; + const int ADDR_OP = STEP_MAIN + 1; + const int ADDR_STATE = ADDR_OP + 1; + const int ADDR_INPUT = ADDR_STATE + 1; + const int ADDR_IND_0 = ADDR_INPUT + 1; + const int ADDR_IND_1 = ADDR_IND_0 + 1; + + // Verify address indirections match + clock_eq(step_addr, ADDR_STATE, ADDR_IND_0) === 0; + clock_eq(step_addr, ADDR_INPUT, ADDR_IND_1) === 0; + + // Memory operation counts + const int NUM_STATE_WORDS = 16; + const int NUM_INPUT_WORDS = 16; + const int NUM_PARALLEL_MEM_OPS = 4; + const int WORD_SIZE = 8; // bytes per u64 + + const int CLOCKS_READ_STATE = NUM_STATE_WORDS / NUM_PARALLEL_MEM_OPS; + const int CLOCKS_READ_INPUT = NUM_INPUT_WORDS / NUM_PARALLEL_MEM_OPS; + const int CLOCKS_WRITE_STATE = NUM_STATE_WORDS / NUM_PARALLEL_MEM_OPS; + + // Memory operation configuration + const int CLK_SEQ_READ_PARAMS[1] = [2]; + const int CLK_SEQ_READ_STATE[4] = [0, 3, 6, 9]; + const int CLK_SEQ_READ_INPUT[4] = [1, 7, 13, 19]; + const int CLK_SEQ_WRITE_STATE[4] = [14, 17, 20, 23]; + const int CLK_SEQ_ALL_MEM_OPS[13] = [2, 0, 3, 6, 9, 1, 7, 13, 19, 14, 17, 20, 23]; // All the previous + + expr mem_sel = clock_set_with_seq(in_use, seq: CLK_SEQ_ALL_MEM_OPS); + const expr mem_is_write = clock_set_with_seq(seq: CLK_SEQ_WRITE_STATE); + const expr main_step = clock_shift_with_seq(step_addr, STEP_MAIN, seq: CLK_SEQ_ALL_MEM_OPS); + + // Generate 4 parallel memory ports + const expr clk_read_params = clock_set_with_seq(seq: CLK_SEQ_READ_PARAMS); + const expr clk_read_state = clock_set_with_seq(seq: CLK_SEQ_READ_STATE); + const expr clk_read_input = clock_set_with_seq(seq: CLK_SEQ_READ_INPUT); + const expr clk_write_state = clock_set_with_seq(seq: CLK_SEQ_WRITE_STATE); + for (int port = 0; port < NUM_PARALLEL_MEM_OPS; port++) { + const expr mem_addr = + // Op structure access: round_idx, ADDR_STATE ptr, ADDR_INPUT ptr + clock_shift(step_addr, ADDR_OP, + start: 2, + offset: WORD_SIZE * port, + delta: WORD_SIZE) + + // Read state: ports read state[port], state[port+4], state[port+8], state[port+12] + clock_shift(step_addr, ADDR_STATE, + start: 0, end: CLOCKS_READ_STATE, + step: 1, skip: 2, + offset: WORD_SIZE * NUM_PARALLEL_MEM_OPS * port, + delta: WORD_SIZE) + + // Read input: ports read input[port*4+0..3] across 4 clocks + clock_shift(step_addr, ADDR_INPUT, + start: 1, end: 1 + CLOCKS_READ_INPUT, + step: 1, skip: 5, + offset: WORD_SIZE * port, + delta: WORD_SIZE * NUM_PARALLEL_MEM_OPS) + + // Write state: ports write state'[port], state'[port+4], etc. + clock_shift(step_addr, ADDR_STATE, + start: 14, end: 14 + CLOCKS_WRITE_STATE, + step: 1, skip: 2, + offset_shift: port, + offset: WORD_SIZE * NUM_PARALLEL_MEM_OPS * port, + delta: WORD_SIZE); + + // Value calculation for each port + expr mem_value[2] = [0, 0]; + switch (port) { + case 0: + // Input index + mem_value[0] += clk_read_params * round_idx; + + // Input v + mem_value[0] += clk_read_state * va[0]; + mem_value[1] += clk_read_state * va[1]; + + // Input m + mem_value[0] += clk_read_input * 'm[0]; + mem_value[1] += clk_read_input * 'm[1]; + + // Output v + mem_value[0] += clk_write_state * va[0]; + mem_value[1] += clk_write_state * va[1]; + + case 1: + // Input addr_ind_0 + mem_value[0] += clock_shift(step_addr, ADDR_IND_0, 2); + + // Input v + mem_value[0] += clk_read_state * vb_packed[0]; + mem_value[1] += clk_read_state * vb_packed[1]; + + // Input m + mem_value[0] += clk_read_input * m[0]; + mem_value[1] += clk_read_input * m[1]; + + // Output v + mem_value[0] += clk_write_state * vb_packed[0]; + mem_value[1] += clk_write_state * vb_packed[1]; + + case 2: + // Input addr_ind_1 + mem_value[0] += clock_shift(step_addr, ADDR_IND_1, 2); + + // Input v + mem_value[0] += clk_read_state * vc[0]; + mem_value[1] += clk_read_state * vc[1]; + + // Input m + mem_value[0] += clk_read_input * m[0]'2; + mem_value[1] += clk_read_input * m[1]'2; + + // Output v + mem_value[0] += clk_write_state * vc[0]; + mem_value[1] += clk_write_state * vc[1]; + + case 3: + // Input v + mem_value[0] += clk_read_state * vd_packed[0]; + mem_value[1] += clk_read_state * vd_packed[1]; + + // Input m + mem_value[0] += clk_read_input * m[0]'3; + mem_value[1] += clk_read_input * m[1]'3; + + // Output v + mem_value[0] += clk_write_state * vd_packed[0]; + mem_value[1] += clk_write_state * vd_packed[1]; + + default: + error(`Unexpected port index: ${port}`); + } + + // Port 3 is unused at clock 2 (only 3 ops: round_idx, ADDR_STATE, ADDR_INPUT) + if (port == NUM_PARALLEL_MEM_OPS - 1) { + mem_sel -= CLK[2] * in_use; + } + + precompiled_mem_op( + is_write: mem_is_write, + sel: mem_sel, + main_step: main_step, + addr: mem_addr, + value: mem_value + ); + } + + // ═══════════════════════════════════════════════════════════════════════════ + // MAIN PROCESSOR INTEGRATION + // ═══════════════════════════════════════════════════════════════════════════ + col witness bits(1) in_use_clk_0; // 1 at the first clock cycle of the Blake2br operation, 0 otherwise + in_use_clk_0 <== CLK_0 * in_use; + + proves_operation(op: OP_BLAKE2BR, b: [step_addr'(ADDR_OP), 0], main_step: step_addr'(STEP_MAIN), + mul: in_use_clk_0); + + function pack_bits(const expr a[]): expr { + const int len = length(a); + expr packed = 0; + for (int j = 0; j < len; j++) { + packed += a[j] * 2**j; + } + return packed; + } + + // Checks a == (b + c + d) mod 2⁶⁴ + // Assumes a,b,c,d are all given as 2 limbs of 32-bits each, i.e., a = a0 + 2³²·a1 < 2⁶⁴ + // It also assumes that p > 3·2³² + // + // Adapted on https://github.com/Plonky3/Plonky3/blob/main/air/src/utils.rs#L82 + function add3_check(const expr a[], const expr b[], const expr c[], const expr d[]): expr[] { + assert(length(a) == 2 && length(b) == 2 && length(c) == 2 && length(d) == 2); + // a == (b + c + d) mod 2⁶⁴ iff a - b - c - d = 0, -2⁶⁴, -2·2⁶⁴ (over the integers) + + // Since we work over a finite field, we instead check that + // (1) a0 - b0 - c0 - d0 = 0, -2³², -2·2³² (mod p) + // (2) a - b - c - d = 0, -2⁶⁴, -2·2⁶⁴ (mod p) + + // Notice that since p > 2·2³² we have that (1) holds over the integers + // Now since a - b - c - d = (a0 - b0 - c0 - d0) + 2³²·(a1 - b1 - c1 - d1), + // we have that a - b - c - d = 0 (mod 2³²) (3) + + // Using CRT with (2) and (3) we obtain: + // a - b - c - d = 0, -2⁶⁴, -2·2⁶⁴ (mod 2³²·p) (4) + // Now, since a,b,c,d < 2⁶⁴ and 2³²·p > 3·2⁶⁴, we have that (4) holds over the integers + + expr sum_0 = a[0] - b[0] - c[0] - d[0]; + expr sum_1 = a[1] - b[1] - c[1] - d[1]; + expr sum = sum_0 + P2_32*sum_1; + + air.g_active * (sum_0 * (sum_0 + P2_32) * (sum_0 + 2*P2_32)) === 0; + air.g_active * (sum * (sum + P2_64) * (sum + 2*P2_64)) === 0; + } + + // Checks a == (b + c) mod 2⁶⁴ + // Assumes a,b,c are all given as 2 limbs of 32-bits each, i.e., a = a0 + 2³²·a1 < 2⁶⁴ + // It also assumes that p > 2·2³² + function add2_check(const expr a[], const expr b[], const expr c[]): expr[] { + assert(length(a) == 2 && length(b) == 2 && length(c) == 2); + + // a == (b + c) mod 2⁶⁴ iff a - b - c = 0, -2⁶⁴ (over the integers) + + // Since we work over a finite field, we instead check that + // (1) a0 - b0 - c0 = 0, -2³² (mod p) + // (2) a - b - c = 0, -2⁶⁴ (mod p) + + // Notice that since p > 2³² we have that (1) holds over the integers + // Now since a - b - c = (a0 - b0 - c0) + 2³²·(a1 - b1 - c1), + // we have that a - b - c = 0 (mod 2³²) (3) + + // Using CRT with (2) and (3) we obtain: + // a - b - c = 0, -2⁶⁴ (mod 2³²·p) (4) + // Now, since a,b,c < 2⁶⁴ and 2³²·p > 2·2⁶⁴, we have that (4) holds over the integers + + expr sum_0 = a[0] - b[0] - c[0]; + expr sum_1 = a[1] - b[1] - c[1]; + expr sum = sum_0 + P2_32*sum_1; + + air.g_active * (sum_0 * (sum_0 + P2_32)) === 0; + air.g_active * (sum * (sum + P2_64)) === 0; + } + + // Checks a == (b <<< n) ^ c + // Assumes a is given as 2 limbs of 32-bits each, i.e., a = a0 + 2³²·a1 + // Assumes b,c are both given unpacked as 64 bits, i.e., b = b0 + 2·b1 + ... + 2⁶³·b63 + function rotl_xor_check(const expr a[], const expr b[][], const expr c[][], const int n) { + assert(length(a) == 2 && length(b) == 2 && length(c) == 2); + assert(length(b[0]) == 32 && length(b[1]) == 32 && length(c[0]) == 32 && length(c[1]) == 32); + + // Unpack b and c into 64-bit arrays + expr b_unpacked[64]; + expr c_unpacked[64]; + for (int i = 0; i < 32; i++) { + b_unpacked[i] = b[0][i]; + b_unpacked[32 + i] = b[1][i]; + c_unpacked[i] = c[0][i]; + c_unpacked[32 + i] = c[1][i]; + } + + // Rotate + expr b_rot[64]; + for (int i = 0; i < 64; i++) { + b_rot[i] = b_unpacked[(i + 64 - n) % 64]; + } + + // XOR + expr xor_result[64]; + for (int i = 0; i < 64; i++) { + xor_result[i] = b_rot[i] + c_unpacked[i] - 2 * b_rot[i] * c_unpacked[i]; + } + + // Pack into two 32-bit limbs + expr packed_low = 0; + expr packed_high = 0; + for (int i = 0; i < 32; i++) { + packed_low += xor_result[i] * 2**i; + packed_high += xor_result[32 + i] * 2**i; + } + + // Check equality + air.g_active * (a[0] - packed_low) === 0; + air.g_active * (a[1] - packed_high) === 0; + + // Notice that these equalities also ensure that a[0], a[1] are range checked + } + + function clock_eq(const expr mvcol, int pos1, int pos2): const expr { + return air.CLK_0 * (mvcol'(pos1) - mvcol'(pos2)); + } + + function clock_set(const expr mvcol = 1, int start = 0, int end = -1, const int step = 1, const int skip = 0): const expr { + expr result = 0; + if (end == -1) { + end = start + 1; + } + + int clock_idx = start; + int count = 0; + int in_group = 0; + while (count < end - start) { + result += air.CLK[clock_idx]; + count++; + in_group++; + clock_idx++; + + // After 'step' consecutive clocks, skip 'skip' clocks + if (skip > 0 && in_group == step) { + clock_idx += skip; + in_group = 0; + } + } + return result * mvcol; + } + + function clock_set_with_seq(const expr mvcol = 1, const int seq[]): const expr { + const int len = length(seq); + assert(len > 0); + + expr result = 0; + for (int i = 0; i < len; i++) { + result += air.CLK[seq[i]]; + } + return result * mvcol; + } + + function clock_shift(const expr mvcol, const int pos, const int start = 0, int end = -1, const int step = 1, const int skip = 0, int offset = 0, const int delta = 0, const int offset_shift = 0): const expr { + expr result = 0; + if (end == -1) { + end = start + 1; + } + + const int num_clocks = end - start; + + int clock_idx = start; + int count = 0; + int in_group = 0; + while (count < num_clocks) { + const int col_idx = (pos - clock_idx) % air.CLOCKS; + // Compute the shifted offset index: (count + offset_shift) mod num_clocks + const int shifted_count = (count + offset_shift) % num_clocks; + const int current_offset = offset + delta * shifted_count; + + if (current_offset != 0) { + result += air.CLK[clock_idx] * (mvcol'(col_idx) + current_offset); + } else { + result += air.CLK[clock_idx] * mvcol'(col_idx); + } + + count++; + in_group++; + clock_idx++; + + // After 'step' consecutive clocks, skip 'skip' clocks + if (skip > 0 && in_group == step) { + clock_idx += skip; + in_group = 0; + } + } + return result; + } + + function clock_shift_with_seq(const expr mvcol, const int pos, const int seq[]): const expr { + const int len = length(seq); + assert(len > 0); + + expr result = 0; + for (int i = 0; i < len; i++) { + const int col_idx = (pos - seq[i]) % air.CLOCKS; + result += air.CLK[seq[i]] * mvcol'(col_idx); + } + return result; + } +} \ No newline at end of file diff --git a/precompiles/blake2/src/blake2.rs b/precompiles/blake2/src/blake2.rs new file mode 100644 index 000000000..36bdbddbf --- /dev/null +++ b/precompiles/blake2/src/blake2.rs @@ -0,0 +1,397 @@ +use core::panic; +use std::sync::Arc; + +use fields::PrimeField64; +use rayon::prelude::*; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +#[cfg(not(feature = "packed"))] +use zisk_pil::{Blake2brTrace, Blake2brTraceRow}; +#[cfg(feature = "packed")] +use zisk_pil::{Blake2brTracePacked, Blake2brTraceRowPacked}; +#[cfg(feature = "packed")] +type Blake2TraceRowType = Blake2brTraceRowPacked; +#[cfg(feature = "packed")] +type Blake2TraceType = Blake2brTracePacked; + +#[cfg(not(feature = "packed"))] +type Blake2TraceRowType = Blake2brTraceRow; +#[cfg(not(feature = "packed"))] +type Blake2TraceType = Blake2brTrace; + +use super::{ + blake2_constants::{CLOCKS, CLOCKS_PER_G, R1_G, R2_G, R3_G, R4_G, SIGMA}, + Blake2Input, +}; + +/// The `Blake2SM` struct encapsulates the logic of the Blake2 State Machine. +pub struct Blake2SM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Number of available blake2s in the trace. + pub num_available_blake2s: usize, + + num_non_usable_rows: usize, + + range_id: usize, +} + +impl Blake2SM { + /// Creates a new Blake2 State Machine instance. + /// + /// # Returns + /// A new `Blake2SM` instance. + pub fn new(std: Arc>) -> Arc { + // Compute some useful values + let num_non_usable_rows = Blake2TraceType::::NUM_ROWS % CLOCKS; + let num_available_blake2s = + Blake2TraceType::::NUM_ROWS / CLOCKS - (num_non_usable_rows != 0) as usize; + + let range_id = std.get_range_id(0, (1 << 16) - 1, None).expect("Failed to get range ID"); + + Arc::new(Self { std, num_available_blake2s, num_non_usable_rows, range_id }) + } + + /// Processes a slice of operation data, updating the trace and multiplicities. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Blake2 trace. + /// * `num_circuits` - The number of circuits to process. + /// * `input` - The operation data to process. + /// * `multiplicity` - A mutable slice to update with multiplicities for the operation. + #[inline(always)] + pub fn process_input( + &self, + input: &Blake2Input, + trace: &mut [Blake2TraceRowType], + ) -> [u32; 65536] { + let mut range_checks = [0u32; 65536]; // 2^16 range checks for the 16-bit limbs + + let step_main = input.step_main; + let addr_main = input.addr_main; + let state_addr = input.state_addr; + let input_addr = input.input_addr; + let index = input.index as u8; + let state = &input.state; + let input = &input.input; + + // Fill the step_addr + trace[0].set_step_addr(step_main); // STEP_MAIN + trace[1].set_step_addr(addr_main as u64); // ADDR_OP + trace[2].set_step_addr(state_addr as u64); // ADDR_STATE + trace[3].set_step_addr(input_addr as u64); // ADDR_INPUT + trace[4].set_step_addr(state_addr as u64); // ADDR_IND_0 + trace[5].set_step_addr(input_addr as u64); // ADDR_IND_1 + + // Set latched columns + let idx_usize = index as usize; + for row in trace.iter_mut().take(CLOCKS) { + // Activate the in_use selector + row.set_in_use(true); + + // Set idx + row.set_round_idx(index); + + // Set idx_sel + row.set_round_idx_sel(idx_usize, true); + } + + // Set m columns + let mut m_idx = 0; + for (i, &inp) in input.iter().enumerate() { + let low_inp = [inp as u16, (inp >> 16) as u16]; + let high_inp = [(inp >> 32) as u16, (inp >> 48) as u16]; + for j in 0..2 { + trace[m_idx].set_m_limbs(0, j, low_inp[j]); + trace[m_idx].set_m_limbs(1, j, high_inp[j]); + } + range_checks[low_inp[0] as usize] += 1; + range_checks[low_inp[1] as usize] += 1; + range_checks[high_inp[0] as usize] += 1; + range_checks[high_inp[1] as usize] += 1; + + m_idx += 1; + if (i + 1) % (CLOCKS_PER_G - 1) == 0 { + m_idx += 1; + } + } + + // Set ms columns + let s = &SIGMA[idx_usize]; + let mut ms: [u64; 16] = [0u64; 16]; + m_idx = 0; + for i in 0..input.len() { + let inp = input[s[i]]; + ms[i] = inp; + + trace[m_idx].set_ms(0, inp as u32); + trace[m_idx].set_ms(1, (inp >> 32) as u32); + m_idx += 1; + if (i + 1) % (CLOCKS_PER_G - 1) == 0 { + m_idx += 1; + } + } + + // Column mixing + let (state0, state4, state8, state12) = compute_g_and_set_vs( + trace, + &mut range_checks, + 0, + &[state[0], state[4], state[8], state[12]], + &[ms[0], ms[1]], + ); + let (state1, state5, state9, state13) = compute_g_and_set_vs( + trace, + &mut range_checks, + 1, + &[state[1], state[5], state[9], state[13]], + &[ms[2], ms[3]], + ); + let (state2, state6, state10, state14) = compute_g_and_set_vs( + trace, + &mut range_checks, + 2, + &[state[2], state[6], state[10], state[14]], + &[ms[4], ms[5]], + ); + let (state3, state7, state11, state15) = compute_g_and_set_vs( + trace, + &mut range_checks, + 3, + &[state[3], state[7], state[11], state[15]], + &[ms[6], ms[7]], + ); + + // Diagonal mixing + compute_g_and_set_vs( + trace, + &mut range_checks, + 4, + &[state0, state5, state10, state15], + &[ms[8], ms[9]], + ); + compute_g_and_set_vs( + trace, + &mut range_checks, + 5, + &[state1, state6, state11, state12], + &[ms[10], ms[11]], + ); + compute_g_and_set_vs( + trace, + &mut range_checks, + 6, + &[state2, state7, state8, state13], + &[ms[12], ms[13]], + ); + compute_g_and_set_vs( + trace, + &mut range_checks, + 7, + &[state3, state4, state9, state14], + &[ms[14], ms[15]], + ); + + return range_checks; + + fn compute_g_and_set_vs( + trace: &mut [Blake2TraceRowType], + range_checks: &mut [u32; 65536], + i: usize, + v: &[u64; 4], + m: &[u64; 2], + ) -> (u64, u64, u64, u64) { + // Compute the g function + let (va, vb, vc, vd) = (v[0], v[1], v[2], v[3]); + let (va_i, vb_i, vc_i, vd_i) = compute_half_g(va, vb, vc, vd, m[0], R1_G, R2_G); + let (va_o, vb_o, vc_o, vd_o) = compute_half_g(va_i, vb_i, vc_i, vd_i, m[1], R3_G, R4_G); + + // Set va, vb, vc, vd columns + set_vs(&mut trace[3 * i], range_checks, va, vb, vc, vd); + set_vs(&mut trace[3 * i + 1], range_checks, va_i, vb_i, vc_i, vd_i); + set_vs(&mut trace[3 * i + 2], range_checks, va_o, vb_o, vc_o, vd_o); + + (va_o, vb_o, vc_o, vd_o) + } + + fn compute_half_g( + va: u64, + vb: u64, + vc: u64, + vd: u64, + m: u64, + r1: u32, + r2: u32, + ) -> (u64, u64, u64, u64) { + let va = va.wrapping_add(vb).wrapping_add(m); + let vd = (vd ^ va).rotate_right(r1); + let vc = vc.wrapping_add(vd); + let vb = (vb ^ vc).rotate_right(r2); + (va, vb, vc, vd) + } + + fn set_vs( + row: &mut Blake2TraceRowType, + range_checks: &mut [u32; 65536], + va: u64, + vb: u64, + vc: u64, + vd: u64, + ) { + let low_va = [va as u16, (va >> 16) as u16]; + let high_va = [(va >> 32) as u16, (va >> 48) as u16]; + for j in 0..2 { + row.set_va_limbs(0, j, low_va[j]); + row.set_va_limbs(1, j, high_va[j]); + } + range_checks[low_va[0] as usize] += 1; + range_checks[low_va[1] as usize] += 1; + range_checks[high_va[0] as usize] += 1; + range_checks[high_va[1] as usize] += 1; + + let low_vb = vb as u32; + let low_vb = u32_to_le_bits(low_vb); + let high_vb = (vb >> 32) as u32; + let high_vb = u32_to_le_bits(high_vb); + for j in 0..32 { + row.set_vb(0, j, low_vb[j]); + row.set_vb(1, j, high_vb[j]); + } + + let low_vc = [vc as u16, (vc >> 16) as u16]; + let high_vc = [(vc >> 32) as u16, (vc >> 48) as u16]; + for j in 0..2 { + row.set_vc_limbs(0, j, low_vc[j]); + row.set_vc_limbs(1, j, high_vc[j]); + } + range_checks[low_vc[0] as usize] += 1; + range_checks[low_vc[1] as usize] += 1; + range_checks[high_vc[0] as usize] += 1; + range_checks[high_vc[1] as usize] += 1; + + let low_vd = vd as u32; + let low_vd = u32_to_le_bits(low_vd); + let high_vd = (vd >> 32) as u32; + let high_vd = u32_to_le_bits(high_vd); + for j in 0..32 { + row.set_vd(0, j, low_vd[j]); + row.set_vd(1, j, high_vd[j]); + } + } + + fn u32_to_le_bits(x: u32) -> [bool; 32] { + let mut bits = [false; 32]; + for (i, bit) in bits.iter_mut().enumerate() { + if ((x >> i) & 1) != 0 { + *bit = true; + } + } + bits + } + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + pub fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = Blake2TraceType::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + let num_available_blake2s = self.num_available_blake2s; + + // Check that we can fit all the blake2s in the trace + let num_inputs = inputs.iter().map(|v| v.len()).sum::(); + let all_ops_used = num_inputs == num_available_blake2s; + let num_rows_filled = num_inputs * CLOCKS; + let num_rows_needed = if num_inputs < num_available_blake2s { + num_inputs * CLOCKS + } else if all_ops_used { + num_rows + } else { + panic!( + "Exceeded available Blake2s inputs: requested {}, but only {} are available.", + num_inputs, self.num_available_blake2s + ); + }; + + tracing::debug!( + "··· Creating Blake2 instance [{} / {} rows filled {:.2}%]", + num_rows_needed, + num_rows, + num_rows_needed as f64 / num_rows as f64 * 100.0 + ); + + timer_start_trace!(BLAKE2_TRACE); + + // Split trace into chunks for parallel processing + let mut trace_rows = trace.buffer.as_mut_slice(); + let mut par_traces = Vec::new(); + let mut inputs_indexes = Vec::new(); + for (i, inputs) in inputs.iter().enumerate() { + for (j, _) in inputs.iter().enumerate() { + let (head, tail) = trace_rows.split_at_mut(CLOCKS); + par_traces.push(head); + inputs_indexes.push((i, j)); + trace_rows = tail; + } + } + + // Fill the trace and collect range checks + let range_checks_vec: Vec<[u32; 65536]> = par_traces + .into_par_iter() + .enumerate() + .map(|(index, trace)| { + let input_index = inputs_indexes[index]; + let input = &inputs[input_index.0][input_index.1]; + self.process_input(input, trace) + }) + .collect(); + + // Aggregate all range checks + let mut range_checks = vec![0; 65536]; + for rc in range_checks_vec { + for i in 0..65536 { + range_checks[i] += rc[i]; + } + } + + timer_stop_and_log_trace!(BLAKE2_TRACE); + + let mut padding_row = Blake2TraceRowType::default(); + // In the no-op rows, the `idx` should be the same as the previous one until the end + // to make the constraint `(1 - CLK_0) * (idx - 'idx) === 0;` be satisfied + // As a consequence, one should also set idx_sel + if all_ops_used { + let prev_idx = trace.buffer[num_rows_filled - 1].get_round_idx(); + padding_row.set_round_idx(prev_idx); + padding_row.set_round_idx_sel(prev_idx as usize, true); + } + + trace.buffer[num_rows_filled..num_rows].par_iter_mut().for_each(|slot| *slot = padding_row); + + // Perform the zero range checks + let mut count_zeros = ((num_available_blake2s - num_inputs + + (self.num_non_usable_rows != 0) as usize) + * CLOCKS + + self.num_non_usable_rows) + * 12; // 12 range checked columns, 8 from va and vc, and 4 from m16 + count_zeros += 8 * num_inputs * 4; // m16 columns have one padding row per g function + // and there are 8 g functions per blake2 + range_checks[0] += count_zeros as u32; + + self.std.range_checks(self.range_id, range_checks); + + Ok(AirInstance::new_from_trace(FromTrace::new(&mut trace))) + } +} diff --git a/precompiles/blake2/src/blake2_bus_device.rs b/precompiles/blake2/src/blake2_bus_device.rs new file mode 100644 index 000000000..d4e798375 --- /dev/null +++ b/precompiles/blake2/src/blake2_bus_device.rs @@ -0,0 +1,143 @@ +//! The `Blake2Counter` module defines a counter for tracking blake2-related operations +//! sent over the data bus. It connects to the bus and gathers metrics for specific +//! `ZiskOperationType::Blake2` instructions. + +use std::ops::Add; + +use precompiles_common::MemProcessor; +use zisk_common::STEP; +use zisk_common::{ + BusDevice, BusDeviceMode, BusId, Counter, Metrics, B, OPERATION_BUS_ID, OP_TYPE, +}; +use zisk_core::ZiskOperationType; + +use crate::{generate_blake2_mem_inputs, skip_blake2_mem_inputs}; + +/// The `Blake2Counter` struct represents a counter that monitors and measures +/// blake2-related operations on the data bus. +/// +/// It tracks specific operation types (`ZiskOperationType`) and updates counters for each +/// accepted operation type whenever data is processed on the bus. +pub struct Blake2CounterInputGen { + /// Blake2 counter. + counter: Counter, + + /// Bus device mode (counter or input generator). + mode: BusDeviceMode, +} + +impl Blake2CounterInputGen { + /// Creates a new instance of `Blake2Counter`. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus to which this counter is connected. + /// * `op_type` - A vector of `ZiskOperationType` instructions to monitor. + /// + /// # Returns + /// A new `Blake2Counter` instance. + pub fn new(mode: BusDeviceMode) -> Self { + Self { counter: Counter::default(), mode } + } + + /// Retrieves the count of instructions for a specific `ZiskOperationType`. + /// + /// # Arguments + /// * `op_type` - The operation type to retrieve the count for. + /// + /// # Returns + /// Returns the count of instructions for the specified operation type. + pub fn inst_count(&self, op_type: ZiskOperationType) -> Option { + (op_type == ZiskOperationType::Blake2).then_some(self.counter.inst_count) + } + + /// Processes data received on the bus, updating counters and generating inputs when applicable. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// * `mem_processors` – A queue of mem_processors bus operations used to send derived inputs. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data( + &mut self, + bus_id: &BusId, + data: &[u64], + mem_processors: &mut P, + ) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] as u32 != ZiskOperationType::Blake2 as u32 { + return true; + } + + let step_main = data[STEP]; + let addr_main = data[B] as u32; + + match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + generate_blake2_mem_inputs(addr_main, step_main, data, true, mem_processors); + } + BusDeviceMode::CounterAsm => { + self.measure(data); + } + BusDeviceMode::InputGenerator => { + if skip_blake2_mem_inputs(addr_main, data, mem_processors) { + return true; + } + generate_blake2_mem_inputs(addr_main, step_main, data, false, mem_processors); + } + } + + true + } +} + +impl Metrics for Blake2CounterInputGen { + /// Tracks activity on the connected bus and updates counters for recognized operations. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `_data` - The data received from the bus. + /// + /// # Returns + /// An empty vector, as this implementation does not produce any derived inputs for the bus. + #[inline(always)] + fn measure(&mut self, _data: &[u64]) { + self.counter.update(1); + } + + /// Provides a dynamic reference for downcasting purposes. + /// + /// # Returns + /// A reference to `self` as `dyn std::any::Any`. + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +impl Add for Blake2CounterInputGen { + type Output = Blake2CounterInputGen; + + /// Combines two `Blake2Counter` instances by summing their counters. + /// + /// # Arguments + /// * `self` - The first `Blake2Counter` instance. + /// * `other` - The second `Blake2Counter` instance. + /// + /// # Returns + /// A new `Blake2Counter` with combined counters. + fn add(self, other: Self) -> Blake2CounterInputGen { + Blake2CounterInputGen { counter: &self.counter + &other.counter, mode: self.mode } + } +} + +impl BusDevice for Blake2CounterInputGen { + /// Provides a dynamic reference for downcasting purposes. + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/blake2/src/blake2_constants.rs b/precompiles/blake2/src/blake2_constants.rs new file mode 100644 index 000000000..09418437b --- /dev/null +++ b/precompiles/blake2/src/blake2_constants.rs @@ -0,0 +1,39 @@ +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; + +// Memory layout +pub const PARAMS: usize = 3; +pub const READ_PARAMS: usize = 2; +pub const DIRECT_READ_PARAMS: usize = 1; +pub const DIRECT_READ_PARAM_POS: usize = 0; +pub const WRITE_PARAMS: usize = 1; +pub const RESULT_PARAMS: usize = 0; +pub const PARAM_CHUNKS: usize = 16; +pub const START_READ_PARAMS: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + PARAMS; + +// Generic Parameters +pub const CLOCKS_PER_G: usize = 3; +pub const G_FUNCTIONS_COLUMN_MIXING: usize = 4; +pub const G_FUNCTIONS_DIAGONAL_MIXING: usize = 4; +pub const CLOCKS_COLUMN_MIXING: usize = CLOCKS_PER_G * G_FUNCTIONS_COLUMN_MIXING; +pub const CLOCKS_DIAGONAL_MIXING: usize = CLOCKS_PER_G * G_FUNCTIONS_DIAGONAL_MIXING; +pub const CLOCKS: usize = CLOCKS_COLUMN_MIXING + CLOCKS_DIAGONAL_MIXING; + +/// Message word permutation schedule +pub const SIGMA: [[usize; 16]; 10] = [ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3], + [11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4], + [7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8], + [9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13], + [2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9], + [12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11], + [13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10], + [6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5], + [10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0], +]; + +/// Rotation constants for G function +pub const R1_G: u32 = 32; +pub const R2_G: u32 = 24; +pub const R3_G: u32 = 16; +pub const R4_G: u32 = 63; diff --git a/precompiles/blake2/src/blake2_gen_mem_inputs.rs b/precompiles/blake2/src/blake2_gen_mem_inputs.rs new file mode 100644 index 000000000..0028eb5a4 --- /dev/null +++ b/precompiles/blake2/src/blake2_gen_mem_inputs.rs @@ -0,0 +1,111 @@ +use precompiles_common::MemBusHelpers; +use precompiles_common::MemProcessor; + +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; +use zisk_core::blake2br; + +use crate::blake2_constants::{ + DIRECT_READ_PARAMS, PARAMS, PARAM_CHUNKS, READ_PARAMS, START_READ_PARAMS, +}; + +#[derive(Debug)] +pub struct Blake2MemInputConfig { + pub indirect_params: usize, + pub rewrite_params: bool, + pub read_params: usize, + pub write_params: usize, + pub chunks_per_param: usize, +} + +pub fn generate_blake2_mem_inputs( + addr_main: u32, + step_main: u64, + data: &[u64], + only_counters: bool, + mem_processors: &mut P, +) { + // data = [op,op_type,a,b,step,index,addr[2],state[16],input[16]] + + // Start by generating the params (direct, indirection write, indirection read) + for iparam in 0..PARAMS { + MemBusHelpers::mem_aligned_read( + addr_main + iparam as u32 * 8, + step_main, + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + iparam], + mem_processors, + ); + } + + // Generate memory load params + for iparam in 0..READ_PARAMS { + // let param_idx = if iparam >= DIRECT_READ_PARAM_POS { iparam + 1 } else { iparam }; + let param_idx = iparam + 1; + + let param_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_idx] as u32; + for ichunk in 0..PARAM_CHUNKS { + MemBusHelpers::mem_aligned_read( + param_addr + ichunk as u32 * 8, + step_main, + data[START_READ_PARAMS + iparam * PARAM_CHUNKS + ichunk], + mem_processors, + ); + } + } + + let mut write_data = [0u64; PARAM_CHUNKS]; + if !only_counters { + let index = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE]; + let mut state: [u64; 16] = + data[START_READ_PARAMS..START_READ_PARAMS + PARAM_CHUNKS].try_into().unwrap(); + let input: [u64; 16] = data + [START_READ_PARAMS + PARAM_CHUNKS..START_READ_PARAMS + 2 * PARAM_CHUNKS] + .try_into() + .unwrap(); + blake2br(index, &mut state, &input); + write_data.copy_from_slice(&state); + } + + // verify write param + let write_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + DIRECT_READ_PARAMS] as u32; + for (ichunk, write_data) in write_data.iter().enumerate().take(PARAM_CHUNKS) { + let param_addr = write_addr + ichunk as u32 * 8; + MemBusHelpers::mem_aligned_write(param_addr, step_main, *write_data, mem_processors); + } +} + +pub fn skip_blake2_mem_inputs( + addr_main: u32, + data: &[u64], + mem_processors: &mut P, +) -> bool { + // Check all PARAMS words at addr_main (index, addr_state, addr_input) + for iparam in 0..PARAMS { + let addr = addr_main + iparam as u32 * 8; + if !mem_processors.skip_addr(addr) { + return false; + } + } + + // Check READ_PARAMS arrays (state and input, each PARAM_CHUNKS u64s) + for iparam in 0..READ_PARAMS { + let param_idx = iparam + 1; + let param_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_idx] as u32; + for ichunk in 0..PARAM_CHUNKS { + let addr = param_addr + ichunk as u32 * 8; + if !mem_processors.skip_addr(addr) { + return false; + } + } + } + + // Check write address (output state array) + let write_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + DIRECT_READ_PARAMS] as u32; + for ichunk in 0..PARAM_CHUNKS { + let addr = write_addr + ichunk as u32 * 8; + if !mem_processors.skip_addr(addr) { + return false; + } + } + + true +} diff --git a/precompiles/blake2/src/blake2_input.rs b/precompiles/blake2/src/blake2_input.rs new file mode 100644 index 000000000..3da43dc67 --- /dev/null +++ b/precompiles/blake2/src/blake2_input.rs @@ -0,0 +1,26 @@ +use zisk_common::OperationBlake2Data; + +#[derive(Debug)] +pub struct Blake2Input { + pub addr_main: u32, + pub step_main: u64, + pub index: u64, + pub state_addr: u32, + pub input_addr: u32, + pub state: [u64; 16], + pub input: [u64; 16], +} + +impl Blake2Input { + pub fn from(values: &OperationBlake2Data) -> Self { + Self { + addr_main: values[3] as u32, + step_main: values[4], + index: values[5], + state_addr: values[6] as u32, + input_addr: values[7] as u32, + state: values[8..24].try_into().unwrap(), + input: values[24..40].try_into().unwrap(), + } + } +} diff --git a/precompiles/blake2/src/blake2_instance.rs b/precompiles/blake2/src/blake2_instance.rs new file mode 100644 index 000000000..827f74bb8 --- /dev/null +++ b/precompiles/blake2/src/blake2_instance.rs @@ -0,0 +1,195 @@ +//! The `Blake2Instance` module defines an instance to perform the witness computation +//! for the Blake2 State Machine. +//! +//! It manages collected inputs and interacts with the `Blake2SM` to compute witnesses for +//! execution plans. + +use crate::{Blake2Input, Blake2SM}; +use fields::PrimeField64; +use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; +use std::{any::Any, collections::HashMap, sync::Arc}; +use zisk_common::ChunkId; +use zisk_common::{ + BusDevice, BusId, CheckPoint, CollectSkipper, ExtOperationData, Instance, InstanceCtx, + InstanceType, PayloadType, OPERATION_BUS_ID, OP_TYPE, +}; +use zisk_core::ZiskOperationType; +use zisk_pil::Blake2brTrace; + +/// The `Blake2Instance` struct represents an instance for the Blake2 State Machine. +/// +/// It encapsulates the `Blake2SM` and its associated context, and it processes input data +/// to compute witnesses for the Blake2 State Machine. +pub struct Blake2Instance { + /// Blake2 state machine. + blake2_sm: Arc>, + + /// Instance context. + ictx: InstanceCtx, +} + +impl Blake2Instance { + /// Creates a new `Blake2Instance`. + /// + /// # Arguments + /// * `blake2_sm` - An `Arc`-wrapped reference to the Blake2 State Machine. + /// * `ictx` - The `InstanceCtx` associated with this instance, containing the execution plan. + /// * `bus_id` - The bus ID associated with this instance. + /// + /// # Returns + /// A new `Blake2Instance` instance initialized with the provided state machine and + /// context. + pub fn new(blake2_sm: Arc>, ictx: InstanceCtx) -> Self { + Self { blake2_sm, ictx } + } + + pub fn build_blake2_collector(&self, chunk_id: ChunkId) -> Blake2Collector { + assert_eq!( + self.ictx.plan.air_id, + Blake2brTrace::::AIR_ID, + "Blake2Instance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::>().unwrap(); + let (num_ops, collect_skipper) = collect_info[&chunk_id]; + Blake2Collector::new(num_ops, collect_skipper) + } +} + +impl Instance for Blake2Instance { + /// Computes the witness for the blake2 execution plan. + /// + /// This method leverages the `Blake2SM` to generate an `AirInstance` using the collected + /// inputs. + /// + /// # Arguments + /// * `_pctx` - The proof context, unused in this implementation. + /// + /// # Returns + /// An `Option` containing the computed `AirInstance`. + fn compute_witness( + &self, + _pctx: &ProofCtx, + _sctx: &SetupCtx, + collectors: Vec<(usize, Box>)>, + trace_buffer: Vec, + ) -> ProofmanResult>> { + let inputs: Vec<_> = collectors + .into_iter() + .map(|(_, collector)| collector.as_any().downcast::().unwrap().inputs) + .collect(); + + Ok(Some(self.blake2_sm.compute_witness(&inputs, trace_buffer)?)) + } + + /// Retrieves the checkpoint associated with this instance. + /// + /// # Returns + /// A `CheckPoint` object representing the checkpoint of the execution plan. + fn check_point(&self) -> &CheckPoint { + &self.ictx.plan.check_point + } + + /// Retrieves the type of this instance. + /// + /// # Returns + /// An `InstanceType` representing the type of this instance (`InstanceType::Instance`). + fn instance_type(&self) -> InstanceType { + InstanceType::Instance + } + + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { + assert_eq!( + self.ictx.plan.air_id, + Blake2brTrace::::AIR_ID, + "Blake2Instance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::>().unwrap(); + let (num_ops, collect_skipper) = collect_info[&chunk_id]; + Some(Box::new(Blake2Collector::new(num_ops, collect_skipper))) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +pub struct Blake2Collector { + /// Collected inputs for witness computation. + inputs: Vec, + + /// The number of operations to collect. + num_operations: u64, + + /// Helper to skip instructions based on the plan's configuration. + collect_skipper: CollectSkipper, +} + +impl Blake2Collector { + /// Creates a new `Blake2Collector`. + /// + /// # Arguments + /// + /// * `bus_id` - The connected bus ID. + /// * `num_operations` - The number of operations to collect. + /// * `collect_skipper` - The helper to skip instructions based on the plan's configuration. + /// + /// # Returns + /// A new `ArithInstanceCollector` instance initialized with the provided parameters. + pub fn new(num_operations: u64, collect_skipper: CollectSkipper) -> Self { + Self { + inputs: Vec::with_capacity(num_operations as usize), + num_operations, + collect_skipper, + } + } + + /// Processes data received on the bus, collecting the inputs necessary for witness computation. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `data` - The data received from the bus. + /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// + /// # Returns + /// A tuple where: + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[PayloadType]) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if self.inputs.len() == self.num_operations as usize { + return false; + } + + if data[OP_TYPE] as u32 != ZiskOperationType::Blake2 as u32 { + return true; + } + + if self.collect_skipper.should_skip() { + return true; + } + + let data: ExtOperationData = + data.try_into().expect("Regular Metrics: Failed to convert data"); + if let ExtOperationData::OperationBlake2Data(data) = data { + self.inputs.push(Blake2Input::from(&data)); + } else { + panic!("Expected ExtOperationData::OperationBlake2Data"); + } + + self.inputs.len() < self.num_operations as usize + } +} + +impl BusDevice for Blake2Collector { + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/blake2/src/blake2_manager.rs b/precompiles/blake2/src/blake2_manager.rs new file mode 100644 index 000000000..18d073dee --- /dev/null +++ b/precompiles/blake2/src/blake2_manager.rs @@ -0,0 +1,80 @@ +use std::sync::Arc; + +use fields::PrimeField64; +use pil_std_lib::Std; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; +use zisk_core::ZiskOperationType; +use zisk_pil::Blake2brTrace; + +use crate::{Blake2CounterInputGen, Blake2Instance, Blake2Planner, Blake2SM}; + +/// The `Blake2Manager` struct represents the Blake2 manager, +/// which is responsible for managing the Blake2 state machine and its table state machine. +#[allow(dead_code)] +pub struct Blake2Manager { + /// Blake2 state machine + blake2_sm: Arc>, +} + +impl Blake2Manager { + /// Creates a new instance of `Blake2Manager`. + /// + /// # Returns + /// An `Arc`-wrapped instance of `Blake2Manager`. + pub fn new(std: Arc>) -> Arc { + let blake2_sm = Blake2SM::new(std); + + Arc::new(Self { blake2_sm }) + } + + pub fn build_blake2_counter(&self, asm_execution: bool) -> Blake2CounterInputGen { + match asm_execution { + true => Blake2CounterInputGen::new(BusDeviceMode::CounterAsm), + false => Blake2CounterInputGen::new(BusDeviceMode::Counter), + } + } + + pub fn build_blake2_input_generator(&self) -> Blake2CounterInputGen { + Blake2CounterInputGen::new(BusDeviceMode::InputGenerator) + } +} + +impl ComponentBuilder for Blake2Manager { + /// Builds a planner to plan blake2-related instances. + /// + /// # Returns + /// A boxed implementation of `RegularPlanner`. + fn build_planner(&self) -> Box { + // Get the number of blake2s that a single blake2 instance can handle + let num_available_blake2s = self.blake2_sm.num_available_blake2s; + + Box::new(Blake2Planner::new().add_instance(InstanceInfo::new( + Blake2brTrace::::AIRGROUP_ID, + Blake2brTrace::::AIR_ID, + num_available_blake2s, + ZiskOperationType::Blake2, + ))) + } + + /// Builds an inputs data collector for blake2 operations. + /// + /// # Arguments + /// * `ictx` - The context of the instance, containing the plan and its associated + /// configurations. + /// + /// # Returns + /// A boxed implementation of `BusDeviceInstance` specific to the requested `air_id` instance. + /// + /// # Panics + /// Panics if the provided `air_id` is not supported. + fn build_instance(&self, ictx: InstanceCtx) -> Box> { + match ictx.plan.air_id { + id if id == Blake2brTrace::::AIR_ID => { + Box::new(Blake2Instance::new(self.blake2_sm.clone(), ictx)) + } + _ => { + panic!("Blake2Manager::build_instance() Unsupported air_id: {:?}", ictx.plan.air_id) + } + } + } +} diff --git a/precompiles/blake2/src/blake2_planner.rs b/precompiles/blake2/src/blake2_planner.rs new file mode 100644 index 000000000..e4cacaa82 --- /dev/null +++ b/precompiles/blake2/src/blake2_planner.rs @@ -0,0 +1,136 @@ +//! The `Blake2Planner` module defines a planner for generating execution plans specific to +//! arithmetic operations. +//! +//! It organizes execution plans for both regular instances and table instances, +//! leveraging arithmetic operation counts and metadata to construct detailed plans. + +use std::any::Any; + +use crate::Blake2CounterInputGen; + +use zisk_common::{ + plan, BusDeviceMetrics, CheckPoint, ChunkId, InstCount, InstanceInfo, InstanceType, Metrics, + Plan, Planner, TableInfo, +}; + +/// The `Blake2Planner` struct organizes execution plans for arithmetic instances and tables. +/// +/// It allows adding metadata about instances and tables and generates plans +/// based on the provided counters. +#[derive(Default)] +pub struct Blake2Planner { + /// Arithmetic instances info to be planned. + instances_info: Vec, + + /// Arithmetic table instances info to be planned. + tables_info: Vec, +} + +impl Blake2Planner { + /// Creates a new `Blake2Planner`. + /// + /// # Returns + /// A new `Blake2Planner` instance with no preconfigured instances or tables. + pub fn new() -> Self { + Self { instances_info: Vec::new(), tables_info: Vec::new() } + } + + /// Adds an arithmetic instance to the planner. + /// + /// # Arguments + /// * `instance_info` - The `InstanceInfo` describing the arithmetic instance to be added. + /// + /// # Returns + /// The updated `Blake2Planner` instance. + pub fn add_instance(mut self, instance_info: InstanceInfo) -> Self { + self.instances_info.push(instance_info); + self + } + + /// Adds an arithmetic table instance to the planner. + /// + /// # Arguments + /// * `table_info` - The `TableInfo` describing the arithmetic table instance to be added. + /// + /// # Returns + /// The updated `Blake2Planner` instance. + pub fn add_table_instance(mut self, table_info: TableInfo) -> Self { + self.tables_info.push(table_info); + self + } +} + +impl Planner for Blake2Planner { + /// Generates execution plans for arithmetic instances and tables. + /// + /// # Arguments + /// * `counters` - A vector of counters, each associated with a `ChunkId` and `ArithCounter` + /// metrics data. + /// + /// # Returns + /// A vector of `Plan` instances representing execution configurations for the instances and + /// tables. + /// + /// # Panics + /// Panics if any counter cannot be downcasted to an `ArithCounter`. + fn plan(&self, counters: Vec<(ChunkId, Box)>) -> Vec { + // Prepare counts + let mut count: Vec> = Vec::with_capacity(self.instances_info.len()); + + for _ in 0..self.instances_info.len() { + count.push(Vec::new()); + } + + counters.iter().for_each(|(chunk_id, counter)| { + let reg_counter = + Metrics::as_any(&**counter).downcast_ref::().unwrap(); + + // Iterate over `instances_info` and add `InstCount` objects to the correct vector + for (index, instance_info) in self.instances_info.iter().enumerate() { + let inst_count = InstCount::new( + *chunk_id, + reg_counter.inst_count(instance_info.op_type).unwrap(), + ); + + // Add the `InstCount` to the corresponding inner vector + count[index].push(inst_count); + } + }); + + let mut plan_result = Vec::new(); + + for (idx, instance) in self.instances_info.iter().enumerate() { + let plan: Vec<_> = plan(&count[idx], instance.num_ops as u64) + .into_iter() + .map(|(check_point, collect_info)| { + let converted: Box = Box::new(collect_info); + Plan::new( + instance.airgroup_id, + instance.air_id, + None, + InstanceType::Instance, + check_point, + Some(converted), + ) + }) + .collect(); + + plan_result.extend(plan); + } + + if !plan_result.is_empty() { + for table_instance in self.tables_info.iter() { + plan_result.push(Plan::new( + table_instance.airgroup_id, + table_instance.air_id, + None, + InstanceType::Table, + CheckPoint::None, + None, + )); + } + } + + plan_result + } +} diff --git a/precompiles/blake2/src/lib.rs b/precompiles/blake2/src/lib.rs new file mode 100644 index 000000000..5b2c9ccb6 --- /dev/null +++ b/precompiles/blake2/src/lib.rs @@ -0,0 +1,17 @@ +mod blake2; +mod blake2_bus_device; +mod blake2_constants; +mod blake2_gen_mem_inputs; +mod blake2_input; +mod blake2_instance; +mod blake2_manager; +mod blake2_planner; + +pub use blake2::*; +pub use blake2_bus_device::*; +pub use blake2_constants::*; +pub use blake2_gen_mem_inputs::*; +pub use blake2_input::*; +pub use blake2_instance::*; +pub use blake2_manager::*; +pub use blake2_planner::*; diff --git a/precompiles/common/Cargo.toml b/precompiles/common/Cargo.toml index dd772a536..aee745b1b 100644 --- a/precompiles/common/Cargo.toml +++ b/precompiles/common/Cargo.toml @@ -10,5 +10,7 @@ categories = { workspace = true } [dependencies] zisk-core = { workspace = true } zisk-common = { workspace = true } +sm-mem = { workspace = true } +mem-common = { workspace = true } -fields = { workspace = true } \ No newline at end of file +fields = { workspace = true } diff --git a/precompiles/common/src/lib.rs b/precompiles/common/src/lib.rs index f37f9081a..7b918f4d3 100644 --- a/precompiles/common/src/lib.rs +++ b/precompiles/common/src/lib.rs @@ -1,19 +1,25 @@ +//! Common utilities and helpers for Zisk precompiles. + mod goldilocks_constants; pub use goldilocks_constants::{get_ks, GOLDILOCKS_GEN, GOLDILOCKS_K}; -use std::collections::VecDeque; -use zisk_common::{BusId, MEM_BUS_ID}; +use mem_common::MemCounters; +use sm_mem::{MemAlignCollector, MemModuleCollector}; +use zisk_common::MEM_BUS_ID; use zisk_core::InstContext; +/// Represents a precompile operation code. #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] pub struct PrecompileCode(u16); impl PrecompileCode { + /// Creates a new precompile code from a u16 value. pub fn new(value: u16) -> Self { PrecompileCode(value) } + /// Returns the underlying u16 value of the precompile code. pub fn value(&self) -> u16 { self.0 } @@ -31,83 +37,339 @@ impl From for u16 { } } +/// Context for precompile execution. pub struct PrecompileContext {} +/// Trait for implementing precompile calls. pub trait PrecompileCall: Send + Sync { + /// Executes the precompile operation with the given opcode and instruction context. + /// Returns an optional tuple containing the result value and a boolean flag. fn execute(&self, opcode: PrecompileCode, ctx: &mut InstContext) -> Option<(u64, bool)>; } +/// Helper functions for memory bus operations. pub struct MemBusHelpers {} +/// Memory load operation code. const MEMORY_LOAD_OP: u64 = 1; +/// Memory store operation code. const MEMORY_STORE_OP: u64 = 2; +/// Base step for memory operations. const MEM_STEP_BASE: u64 = 1; +/// Maximum number of memory operations per main step. const MAX_MEM_OPS_BY_MAIN_STEP: u64 = 4; +/// Trait for processing memory operations - allows static dispatch +pub trait MemProcessor { + fn process_mem_data(&mut self, data: &[u64; 7]); + fn skip_addr(&mut self, addr: u32) -> bool; + fn skip_addr_range(&mut self, addr_from: u32, addr_to: u32) -> bool; +} + +/// Collector-based memory mem_processor +pub struct MemCollectorProcessor<'a> { + pub mem: &'a mut [(usize, MemModuleCollector)], + pub align: &'a mut [(usize, MemAlignCollector)], +} + +impl<'a> MemCollectorProcessor<'a> { + #[inline(always)] + pub fn new( + mem: &'a mut [(usize, MemModuleCollector)], + align: &'a mut [(usize, MemAlignCollector)], + ) -> Self { + Self { mem, align } + } +} + +impl MemProcessor for MemCollectorProcessor<'_> { + #[inline(always)] + fn process_mem_data(&mut self, data: &[u64; 7]) { + for collector in self.mem.iter_mut() { + collector.1.process_data(&MEM_BUS_ID, data); + } + for collector in self.align.iter_mut() { + collector.1.process_data(&MEM_BUS_ID, data); + } + } + + #[inline(always)] + fn skip_addr(&mut self, addr: u32) -> bool { + for collector in self.mem.iter_mut() { + if !collector.1.skip_addr(addr) { + return false; + } + } + true + } + + #[inline(always)] + fn skip_addr_range(&mut self, addr_from: u32, addr_to: u32) -> bool { + for collector in self.mem.iter_mut() { + if !collector.1.skip_addr_range(addr_from, addr_to) { + return false; + } + } + true + } +} + +/// Counter-based memory mem_processor +pub struct MemCounterProcessor<'a> { + pub counters: Option<&'a mut MemCounters>, +} + +impl<'a> MemCounterProcessor<'a> { + #[inline(always)] + pub fn new(counters: Option<&'a mut MemCounters>) -> Self { + Self { counters } + } +} + +impl MemProcessor for MemCounterProcessor<'_> { + #[inline(always)] + fn process_mem_data(&mut self, data: &[u64; 7]) { + if let Some(counters) = &mut self.counters { + counters.process_data(&MEM_BUS_ID, data); + } + } + + fn skip_addr(&mut self, _addr: u32) -> bool { + false + } + + fn skip_addr_range(&mut self, _addr_from: u32, _addr_to: u32) -> bool { + false + } +} + impl MemBusHelpers { - pub fn mem_aligned_load( + /// Generates an aligned memory read operation. + /// The address must be 8-byte aligned. + pub fn mem_aligned_read( addr: u32, step: u64, mem_value: u64, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processor: &mut P, ) { - assert!(addr % 8 == 0); - pending.push_back(( - MEM_BUS_ID, - vec![ - MEMORY_LOAD_OP, - addr as u64, - MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 2, - 8, - mem_value, - 0, - 0, - ], - )); - } - pub fn mem_aligned_write( + debug_assert!(addr % 8 == 0); + let data: [u64; 7] = [ + MEMORY_LOAD_OP, + addr as u64, + MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 2, + 8, + mem_value, + 0, + 0, + ]; + mem_processor.process_mem_data(&data); + } + + /// Generates an aligned memory write operation. + /// The address must be 8-byte aligned. + pub fn mem_aligned_write( addr: u32, step: u64, value: u64, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processor: &mut P, ) { - assert!(addr % 8 == 0); - pending.push_back(( - MEM_BUS_ID, - vec![ - MEMORY_STORE_OP, - addr as u64, - MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 3, - 8, - 0, - 0, - value, - ], - )); - } - pub fn mem_aligned_op( + debug_assert!(addr % 8 == 0); + let data: [u64; 7] = [ + MEMORY_STORE_OP, + addr as u64, + MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 3, + 8, + 0, + 0, + value, + ]; + mem_processor.process_mem_data(&data); + } + + /// Generates an aligned memory operation (load or write). + /// The address must be 8-byte aligned. + pub fn mem_aligned_op( addr: u32, step: u64, value: u64, is_write: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processor: &mut P, + ) { + let data: [u64; 7] = [ + if is_write { MEMORY_STORE_OP } else { MEMORY_LOAD_OP }, + addr as u64, + MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + if is_write { 3 } else { 2 }, + 8, + if is_write { 0 } else { value }, + 0, + if is_write { value } else { 0 }, + ]; + + mem_processor.process_mem_data(&data); + } + + /// Generates multiple aligned memory load operations from a slice of values. + /// The address must be 8-byte aligned. + pub fn mem_aligned_read_from_slice( + addr: u32, + step: u64, + values: &[u64], + mem_processor: &mut P, + ) { + assert!(addr % 8 == 0); + let mem_step = MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 2; + for (i, &value) in values.iter().enumerate() { + let data: [u64; 7] = + [MEMORY_LOAD_OP, (addr as usize + i * 8) as u64, mem_step, 8, value, 0, 0]; + + mem_processor.process_mem_data(&data); + } + } + + /// Generates multiple aligned memory double load operations from a slice of values. This function + /// is useful for memcmp when are aligned because the words must be the same. At same time do dst + /// and src read. The address must be 8-byte aligned. + pub fn mem_double_aligned_read_from_slice( + dst: u32, + src: u32, + step: u64, + values: &[u64], + mem_processor: &mut P, + ) { + assert!(dst % 8 == 0); + assert!(src % 8 == 0); + let mut dst = dst as u64; + let mut src = src as u64; + let mem_step = MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 2; + for value in values.iter() { + let mut data: [u64; 7] = [MEMORY_LOAD_OP, dst, mem_step, 8, *value, 0, 0]; + mem_processor.process_mem_data(&data); + data[1] = src; + mem_processor.process_mem_data(&data); + dst += 8; + src += 8; + } + } + /// Generates multiple aligned memory write operations from a slice of values. + /// The address must be 8-byte aligned. + pub fn mem_aligned_write_from_slice( + addr: u32, + step: u64, + values: &[u64], + mem_processor: &mut P, ) { - pending.push_back(( - MEM_BUS_ID, - vec![ - if is_write { MEMORY_STORE_OP } else { MEMORY_LOAD_OP }, - addr as u64, - MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + if is_write { 3 } else { 2 }, - 8, - if is_write { 0 } else { value }, - 0, - if is_write { value } else { 0 }, - ], - )); + assert!(addr % 8 == 0); + let mem_step = MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 3; + for (i, &value) in values.iter().enumerate() { + let data: [u64; 7] = + [MEMORY_STORE_OP, (addr as usize + i * 8) as u64, mem_step, 8, 0, 0, value]; + mem_processor.process_mem_data(&data); + } + } + /// Generates multiple aligned memory write operations with same fill pattern + /// The address must be 8-byte aligned. + pub fn mem_aligned_write_pattern( + addr: u32, + step: u64, + value: u64, + count64: usize, + mem_processor: &mut P, + ) { + assert!(addr % 8 == 0); + let mem_step = MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 3; + for i in 0..count64 { + let data: [u64; 7] = + [MEMORY_STORE_OP, (addr as usize + i * 8) as u64, mem_step, 8, 0, 0, value]; + + mem_processor.process_mem_data(&data); + } + } + /// Generates aligned memory writes from an unaligned read slice using the specified source offset. + /// The number of writes generated is `values.len() - 1` because the last value is not enough to + /// create a full 8-byte write. This function is useful to use the same slice of values to generate + /// first aligned reads and then aligned writes. + /// The address must be 8-byte aligned. + pub fn mem_aligned_write_from_read_unaligned_slice( + addr: u32, + step: u64, + src_offset: u8, + values: &[u64], + mem_processor: &mut P, + ) { + assert!(addr % 8 == 0); + let mem_step = MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 3; + let write_count = values.len() - 1; + for i in 0..write_count { + let write_value = match src_offset { + 1 => (values[i] >> 8) | (values[i + 1] << 56), + 2 => (values[i] >> 16) | (values[i + 1] << 48), + 3 => (values[i] >> 24) | (values[i + 1] << 40), + 4 => (values[i] >> 32) | (values[i + 1] << 32), + 5 => (values[i] >> 40) | (values[i + 1] << 24), + 6 => (values[i] >> 48) | (values[i + 1] << 16), + 7 => (values[i] >> 56) | (values[i + 1] << 8), + _ => panic!("invalid src_offset {src_offset} on DmaUnaligned"), + }; + let data: [u64; 7] = + [MEMORY_STORE_OP, (addr as usize + i * 8) as u64, mem_step, 8, 0, 0, write_value]; + + mem_processor.process_mem_data(&data); + } + } + + /// Generates aligned memory reads from an unaligned read slice using the specified source offset. + /// This function is useful for memcmp, because at same time read src and dst like memcpy but only + /// with reads. The number of dst reads generated is `values.len() - 1` because the last value is not + /// enough to create a full 8-byte dst read. The address must be 8-byte aligned. + pub fn mem_aligned_read_from_read_unaligned_slice( + dst: u32, + src: u32, + step: u64, + src_offset: u8, + values: &[u64], + mem_processor: &mut P, + ) { + assert!(dst % 8 == 0); + assert!(src % 8 == 0); + let mut dst = dst as u64; + let mut src = src as u64; + let mem_step = MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 2; + let write_count = values.len() - 1; + for i in 0..write_count { + let dst_value = match src_offset { + 1 => (values[i] >> 8) | (values[i + 1] << 56), + 2 => (values[i] >> 16) | (values[i + 1] << 48), + 3 => (values[i] >> 24) | (values[i + 1] << 40), + 4 => (values[i] >> 32) | (values[i + 1] << 32), + 5 => (values[i] >> 40) | (values[i + 1] << 24), + 6 => (values[i] >> 48) | (values[i + 1] << 16), + 7 => (values[i] >> 56) | (values[i + 1] << 8), + _ => panic!("invalid src_offset {src_offset} on DmaUnaligned"), + }; + let mut data: [u64; 7] = [MEMORY_LOAD_OP, dst, mem_step, 8, dst_value, 0, 0]; + mem_processor.process_mem_data(&data); + data[1] = src; + data[4] = values[i]; + mem_processor.process_mem_data(&data); + dst += 8; + src += 8; + } + let data: [u64; 7] = [MEMORY_LOAD_OP, src, mem_step, 8, values[write_count], 0, 0]; + mem_processor.process_mem_data(&data); + } + + /// Returns the memory read step for the given step number. + pub fn get_mem_read_step(step: u64) -> u64 { + MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 2 + } + /// Returns the memory write step for the given step number. + pub fn get_mem_write_step(step: u64) -> u64 { + MEM_STEP_BASE + MAX_MEM_OPS_BY_MAIN_STEP * step + 3 } } +/// Calculates the base-2 logarithm of n (floor). pub fn log2(n: usize) -> usize { let mut res = 0; let mut n = n; diff --git a/precompiles/dma/Cargo.toml b/precompiles/dma/Cargo.toml new file mode 100644 index 000000000..c4dec9390 --- /dev/null +++ b/precompiles/dma/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "precomp-dma" +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } + +[dependencies] +zisk-core = { workspace = true } +zisk-common = { workspace = true } +zisk-pil = { workspace = true } +precompiles-common = { workspace = true } +precompiles-helpers = { workspace = true } +sm-mem = { workspace = true } +mem-common = { workspace = true } +lib-c = { workspace = true } + +proofman = { workspace = true } +proofman-common = { workspace = true } +proofman-macros = { workspace = true } +proofman-util = { workspace = true } +pil-std-lib = { workspace = true } +fields = { workspace=true } +tracing = { workspace = true } +rayon = { workspace = true } +generic-array = "0.14" + +[features] +default = [] +dma_memcmp = [] +debug_dma = [] +save_dma_collectors = [] +save_dma_inputs = [] +save_dma_plans = [] +debug_dma_gen_mem_inputs = [] +gpu = ["packed"] +packed = [] diff --git a/precompiles/dma/pil/dma.pil b/precompiles/dma/pil/dma.pil new file mode 100644 index 000000000..036ac9be4 --- /dev/null +++ b/precompiles/dma/pil/dma.pil @@ -0,0 +1,553 @@ +const int DMA_MEM_CPY = 1; +const int DMA_MEM_PRE_POST = 2; +const int DMA_MEM_EQ = 3; + +const int E_DMA_MEMCPY = 0x01; +const int E_DMA_MEMCMP = 0x02; +const int E_DMA_INPUTCPY = 0x04; +const int E_DMA_MEMSET = 0x08; + +const int E_DMA_ALL = E_DMA_MEMCPY | E_DMA_MEMCMP | E_DMA_INPUTCPY | E_DMA_MEMSET; + +const int DMA_64_ALIGNED_CONT_ID = 8200; +const int DMA_UNALIGNED_CONT_ID = 8201; + +const int OP_DMA_PRE_XMEMCPY = 1; +const int OP_DMA_PRE_INPUTCPY = 2; +const int OP_DMA_PRE_XMEMEQ = 3; +const int OP_DMA_PRE_XMEMCMP = 4; +const int OP_DMA_PRE_XMEMSET = 5; + +const int OP_DMA_POST_OFFSET = 10; + +const int OP_DMA_POST_XMEMCPY = OP_DMA_PRE_XMEMCPY + OP_DMA_POST_OFFSET; +const int OP_DMA_POST_INPUTCPY = OP_DMA_PRE_INPUTCPY + OP_DMA_POST_OFFSET; +const int OP_DMA_POST_XMEMEQ = OP_DMA_PRE_XMEMEQ + OP_DMA_POST_OFFSET; +const int OP_DMA_POST_XMEMCMP = OP_DMA_PRE_XMEMCMP + OP_DMA_POST_OFFSET; +const int OP_DMA_POST_XMEMSET = OP_DMA_PRE_XMEMSET + OP_DMA_POST_OFFSET; + +airtemplate Dma(int N = 2**21, + const int RC = 2, + const int enable = E_DMA_ALL, + const int enable_extended = 1) { + + const int enable_memcpy = (enable & E_DMA_MEMCPY) ? 1 : 0; + const int enable_memcmp = (enable & E_DMA_MEMCMP) ? 1 : 0; + const int enable_inputcpy = (enable & E_DMA_INPUTCPY) ? 1 : 0; + const int enable_memset = (enable & E_DMA_MEMSET) ? 1 : 0; + + assert(enable_extended == 0 || enable_extended == 1); + assert((enable_memcpy + enable_memcmp + enable_inputcpy + enable_memset) > 0); + assert(RC == 2); + + const int has_src = (enable_memcpy || enable_memcmp); + + // DMA: MEMCPY + // + // Unlike a CPU memcpy, the DMA memcpy is instantaneous from a temporal point of view all reads + // occur in the same step and all writes in the next step. In the case of reads there are no + // problems in performing the same read several times in the same step because the result is + // always the same in this moment. In the case of writes, this is not the case, because it would + // create an ambiguity to know the written value. Therefore, we must guarantee that each 64-bit + // value is written only once. + // + // The strategy that DMA follows in the case of memcpy is to divide the operation into 3 phases: + // + // 1. Pre-copy: initial bytes are copied to achieve at least destination alignment, to avoid + // double writes. + // + // 2. Fully or partially aligned copy. It's a loop where 64-bit values are copied. There are + // two variants: + // a) Aligned 64-bit copy: both source and destination addresses are aligned to 8 bytes. + // b) Unaligned 64-bit copy: the source address is not aligned to 8 bytes, but it's read in + // bytes and 64-bit values are reassembled. + // + // 3. Post-copy: final bytes that don't complete a 64-bit word are copied. + // + // Not all phases are necessary, they depend on the copy parameters. + // + // Example of fully aligned memcpy operation: + // + // PRE MEMCPY POST + // ┌──────────┐ ┌──────────────────────────────┐ ┌──────────┐ + // + // ┌──────┐ ┌────────┬────────┐ ┌────────┐ ┌────┐ + // src │ 0 │ │ 1 │ 2 │...│ n │ │ n+1│ + // └──────┘ └────────┴────────┘ └────────┘ └────┘ + // ↓ ↓ ↓ ↓ ↓ + // ┌───┬──────┐ ┌────────┬────────┐ ┌────────┐ ┌────┬─────┐ + // dst │ 0 │ │ 1 │ 2 │...│ n │ │ n+1 │ + // └───┴──────┘ └────────┴────────┘ └────────┘ └────┴─────┘ + // ^dst64 ^dst64+1 ^dst64+2 ^dst64+n ^dst64+n+1 <-- each dst64 address is + // written only once + // + // + // Example of partially aligned memcpy operation: + // + // PRE MEMCPY POST + // ┌──────────┐ ┌────────────────────────────────────────┐ ┌──────────┐ + // ┌────┐┌─ ┌────────┬────────┐ ┌────────┐┌─┬──────┐ ┌─┬───┐ + // src │ 0 ││1 │ 1 │ 2 │...│ n ││ n+1 │ │ n+1│ + // └────┘└─ └────────┴────────┘ └────────┘└─┴──────┘ └─┴───┘ + // ↓ ↓ ↓ ↙ ↓ ↙ ↓ ↙ ↙ + // ┌───┬──────┐ ┌────────┬────────┐ ┌────────┐ ┌────┬─────┐ + // dst │ 0 │ │ 1 │ 2 │...│ n │ ↑ │ n+1 │ + // └───┴──────┘ └────────┴────────┘ └────────┘ │ └────┴─────┘ + // ^dst64 ^dst64+1 ^dst64+2 ^dst64+n │ ^dst64+n+1 <-- each dst64 address is + // without written only once + // write + // + + + // main_step MAIN_STEP_BITS + // fill_byte 8 + // sel_memcpy 1 + // sel_memcmp 1 + // sel_memset 1 + // sel_extended 1 + // sel_inputcpy 1 + + // count_lt_256 1 + // h_count 24 + // l_count 9 + // count_diff_chunks 16 <=== + + // h_dst64 22 + // l_dst64 7 + // dst_offset 3 + + // h_src64 22 + // l_src64 7 + // src_offset 3 + // src_offset_after_pre 3 + // src64_inc_by_pre 1 + + // use_pre 1 + // use_loop 1 + // use_post 1 + // pre_count 3 + // l_count64 9 + + // pre_result_nz 1 + // post_result_nz 1 + // bus_pre_result[2] 32 + // bus_post_result[2] 32 + + // INTERMEDIATES + // loop_b0 32 + // loop_extended_arg 32 + // static_count 32 + // b0 32 + // extended_arg 32 + + + // temporal building expression for selector of any operation + expr _sel = 0; + + // temporal building expressions for operations codes of loop, pre and op_bus + expr _bus_op = 0; + expr _loop_op = 0; + expr _pre_op = 0; + if (enable_memcpy) { + col witness bits(1) air.sel_memcpy; + sel_memcpy * (1 - sel_memcpy) === 0; + _sel += sel_memcpy; + _bus_op += sel_memcpy * OP_DMA_MEMCPY; + _loop_op += sel_memcpy * OP_DMA_XMEMCPY; + _pre_op += sel_memcpy * OP_DMA_PRE_XMEMCPY; + } else { + const expr air.sel_memcpy = 0; + } + + if (enable_memcmp) { + col witness bits(1) air.sel_memcmp; + sel_memcmp * (1 - sel_memcmp) === 0; + _sel += sel_memcmp; + _bus_op += sel_memcmp * OP_DMA_MEMCMP; + _loop_op += sel_memcmp * OP_DMA_XMEMEQ; + _pre_op += sel_memcmp * OP_DMA_PRE_XMEMCMP; + } else { + const expr air.sel_memcmp = 0; + } + + if (enable_memset) { + col witness bits(1) air.sel_memset; + sel_memset * (1 - sel_memset) === 0; + + // fill_byte is send from main in extended_arg, this argument is static, known in + // compilation time, for this reason we don't need to validate its range check. + + col witness bits(8) air.fill_byte; + + _sel += sel_memset; + _bus_op += sel_memset * __OP_DMA_MEMSET__; + _loop_op += sel_memset * OP_DMA_XMEMSET; + _pre_op += sel_memset * OP_DMA_PRE_XMEMSET; + } else { + const int air.sel_memset = 0; + const int air.fill_byte = 0; + } + + if (enable_extended) { + col witness bits(1) air.sel_extended; + sel_extended * (1 - sel_extended) === 0; + _bus_op += sel_extended * OP_DMA_X_OFFSET; + } else { + const int air.sel_extended = 0; + } + + const expr sel_load_count_from_mem = _sel; + + if (enable_inputcpy) { + col witness bits(1) air.sel_inputcpy; + sel_inputcpy * (1 - sel_inputcpy) === 0; + + _sel += sel_inputcpy; + _bus_op += sel_inputcpy * OP_DMA_INPUTCPY; + _loop_op += sel_inputcpy * OP_DMA_INPUTCPY; + _pre_op += sel_inputcpy * OP_DMA_PRE_INPUTCPY; + } else { + const expr air.sel_inputcpy = 0; + } + + const expr use_src = sel_memcpy + sel_memcmp; + + const expr bus_op = _bus_op; + const expr loop_op = _loop_op; + const expr pre_op = _pre_op; + const expr post_op = _pre_op + OP_DMA_POST_OFFSET; + const expr sel = _sel; + sel * (1 - sel) === 0; + + // assuming max memory of 4GB (32 bits) + // count: 24 bits RC + 8 bits T + col witness bits(24) h_count; + + // What is count_lt_256 used for? + // + // If count were split in the traditional way into h_count|l_count there would be a problem + // because the DMA ROM has to check whether the count is small. + // + // But what would happen if the bits of l_count were 0 while h_count was not? + // The DMA ROM could not distinguish whether count is 0 or, for example, 1024 = 4|0 + // + // To avoid this, was defined that in such a case one is subtracted from h_count and 256 is added + // to l_count so that the overall count value remains the same and so that for any k up to 511, + // if l_count > k ==> count > k. + // + // To ensure this condition a flag count_lt_256 must be enforced to verify that h_count is actually + // 0 when l_count < 256, and the ROM verifies the consistency between count_lt_256 and l_count. + + col witness bits(1) count_lt_256; + col witness bits(9) l_count; // 0..255 + 256 = 9 bits + const expr count = h_count * 256 + l_count; + + count_lt_256 * (1 - count_lt_256) === 0; + + // constraint (1) + count_lt_256 * h_count === 0; + + // count count count cons Range h_count * 256 + // 31..8 7..0 h_count lt_256 (1) l_count Check Rom + l_count + // ----- ----- ------- ------ ---- ------- ----- ---- ------------- + // 0 x 0 1 OK x OK OK 0 | x OK + // 0 x 0 0 OK x OK FAIL 0 | x OK <== DETECTED + // 1 x 0 0 OK 256 + x OK OK 1 | x OK + // 1 x 0 1 OK 256 + x OK FAIL 1 | x OK <== DETECTED + // 1 x 1 1 FAIL x OK OK 1 | x OK <== DETECTED + // y > 1 x y - 1 0 OK 256 + x OK y | x OK + // MAX x MAX - 1 0 OK 256 + x OK MAX | x OK + // 0 x -1 0 OK 256 + x FAIL 0 | x OK <== DETECTED + // MAX x MAX 0 OK 256 + x FAIL MAX+1 | x FAIL <== DETECTED + // - - - - - x < 0 - FAIL - | - - <== DETECTED + // - - - - - x >=512 - FAIL - | - - <== DETECTED + + range_check(expression: h_count, min: 0, max: 2**24-1, sel: sel); + + // count_bus (well formed) + // count (chunks divided) + // count_bus - count === count_diff (chunks divided) + + const expr count_bus; + const expr count_diff; + if (enable_memcmp) { + count_bus = count + count_diff; + col witness bits(16) air.count_diff_chunks[2]; + count_diff = count_diff_chunks[0] + count_diff_chunks[1] * P2_16; + + range_check(expression: count_diff_chunks[0], min: 0, max: 0xFFFF, sel: sel_memcmp); + range_check(expression: count_diff_chunks[1], min: 0, max: 0xFFFF, sel: sel_memcmp); + + (sel_memcpy + sel_inputcpy + sel_memset) * count_diff === 0; + } else { + count_bus = count; + count_diff = 0; + } + // src_offset range verified with dma_rom table + + + // 32 bit address + // 22 bits RC + 7 bits RC + 3 bits T + col witness bits(22) h_dst64; + col witness bits(7) l_dst64; + col witness bits(3) dst_offset; + const expr dst = h_dst64 * 2**10 + l_dst64 * 2**3 + dst_offset; + const expr dst64 = h_dst64 * 2**10 + l_dst64 * 2**3; + // dst_offset range verified with dma_rom table + + col witness bits(MAIN_STEP_BITS) main_step; + + range_check(expression: h_dst64, min: 0, max: 2**22-1, sel: sel); + + // 32 bit address + // 22 bits RC + 5 bits RC + 3 bits T + if (has_src) { + col witness bits(22) air.h_src64; + col witness bits(7) air.l_src64; + col witness bits(3) air.src_offset; + col witness bits(3) air.src_offset_after_pre; + + col witness bits(1) air.src64_inc_by_pre; // src64 increment after apply memcpy_pre operation + src64_inc_by_pre * (1 - src64_inc_by_pre) === 0; + + const expr air.src = h_src64 * 2**10 + l_src64 * 2**3 + src_offset; + const expr air.src64 = h_src64 * 2**10 + l_src64 * 2**3; + if (enable_memset) { + (sel_memset + sel_inputcpy) * src_offset_after_pre === 0; + } + + range_check(expression: h_src64, min: 0, max: 2**22-1, sel: sel); + lookup_assumes(DUAL_RANGE_7_BITS_ID, expressions: [l_src64, l_dst64], sel: sel); + } else { + const expr air.src = 0; + const expr air.src64 = 0; + const expr air.src_offset = 0; + const expr air.src64_inc_by_pre = 0; + const expr air.src_offset_after_pre = 0; + range_check(expression: l_dst64, min: 0, max: 0x7F, sel: sel); + } + + col witness bits(1) use_pre; // use pre operation + col witness bits(1) use_loop; // use loop (aligned, unaligned) operation + col witness bits(1) use_post; // use post operation + + use_pre * (1 - use_pre) === 0; + use_loop * (1 - use_loop) === 0; + use_post * (1 - use_post) === 0; + + col witness bits(3) pre_count; // number of bytes of memcpy_pre operation + col witness bits(9) l_count64; // number of 64 bits words in l_count after substract pre_count and post_count + + const expr post_count = count - pre_count - h_count * 256 - l_count64 * 8; + + // if operation not selected then other selectors must be 0. + (1 - sel) * use_loop === 0; + (1 - sel) * use_pre === 0; + (1 - sel) * use_post === 0; + + const expr result_nz; + const expr bus_result[2]; + + if (enable_memcmp) { + col witness bits(1) air.pre_result_nz; + col witness bits(1) air.post_result_nz; + + pre_result_nz * (1 - pre_result_nz) === 0; + post_result_nz * (1 - post_result_nz) === 0; + + pre_result_nz * post_result_nz === 0; + + // previous constraint => result_nz * (1 - result_nz) === 0; + result_nz = pre_result_nz + post_result_nz; + + col witness bits(32) air.bus_pre_result[2]; + col witness bits(32) air.bus_post_result[2]; + + bus_result[0] = bus_pre_result[0] + bus_post_result[0] + dst * (1 - sel_memcmp); + bus_result[1] = bus_pre_result[1] + bus_post_result[1]; + + (bus_pre_result[0] + bus_pre_result[1]) * (bus_post_result[0] + bus_post_result[1]) === 0; + + // if use_pre == 0 the pre result must be zero + bus_pre_result[0] * (1 - use_pre) === 0; + bus_pre_result[1] * (1 - use_pre) === 0; + pre_result_nz * (1 - use_pre) === 0; + + // if use_post == 0 the post result must be zero + bus_post_result[0] * (1 - use_post) === 0; + bus_post_result[1] * (1 - use_post) === 0; + post_result_nz * (1 - use_post) === 0; + + // if use_post == 1 the result must come from post ==> bus_result_pre must be zero + bus_pre_result[0] * use_post === 0; + bus_pre_result[1] * use_post === 0; + pre_result_nz * use_post === 0; + + (1 - sel_memcmp) * pre_result_nz === 0; + (1 - sel_memcmp) * post_result_nz === 0; + + // if memcmp_result_is_zero means that bus_result must be zero + (1 - pre_result_nz) * bus_pre_result[0] === 0; + (1 - pre_result_nz) * bus_pre_result[1] === 0; + (1 - post_result_nz) * bus_post_result[0] === 0; + (1 - post_result_nz) * bus_post_result[1] === 0; + + // if memcmp_result_is_zero means count and count_eq are equals + (1 - result_nz) * count_diff === 0; + result_nz * (1 - sel_memcmp) === 0; + } else { + result_nz = 0; + // return dst address + bus_result[0] = dst; + bus_result[1] = 0; + const int air.post_result_nz = 0; + const int air.pre_result_nz = 0; + const int air.bus_pre_result[2] = [0, 0]; + const int air.bus_post_result[2] = [0, 0]; + } + + const expr flags = use_pre * DMA_ROM_USE_PRE_F + + use_loop * DMA_ROM_LOOP_F + + use_post * DMA_ROM_USE_POST_F + + src64_inc_by_pre * DMA_ROM_SRC64_INC_BY_PRE_F + + count_lt_256 * DMA_ROM_COUNT_LT_256_F + + result_nz * DMA_ROM_NEQ_F + + use_src * DMA_ROM_USE_SRC_F; + + lookup_assumes(DMA_ROM_ID, expressions: [dst_offset, src_offset, l_count, flags, pre_count, src_offset_after_pre, l_count64], sel: sel); + + // OP_DMA_MEMSET isn't implemented because usually memset used with static value known + // in compilation time, and for this case we could use OP_DMA_XMEMSET. For a OP_DMA_MEMSET + // we will need divide two extra columns for garbage in value to send to bus, value bits 8-31 and + // bits 32-63, and two additional range checks (8 bits, 24 bits) is too expensive for a few + // values. + + // OPCODES + // --------------------------------------------- + // OP_DMA_MEMCPY 0xD0 OP_DMA_XMEMCPY 0xD6 + // OP_DMA_MEMCMP 0xD1 OP_DMA_XMEMCMP 0xD7 + // OP_DMA_INPUTCPY 0xD2 + // OP_DMA_XMEMSET 0xD9 + // OP_DMA_XMEMEQ 0xDA + + + // BUS: DMA ===> DMA_64_ALIGNED (Definitions) + // BUS: DMA ===> DMA_UNALIGNED (Definitions) + + const expr loop_count = h_count * 256 + l_count64 * 8; + const expr loop_dst = dst64 + use_pre * 8; + const expr loop_src = src64 + src64_inc_by_pre * 8; + + expr _loop_b0 = loop_src * (sel_memcpy + sel_memcmp) + + loop_count * (sel_inputcpy + sel_memset); + + if (degree(_loop_b0) > 1) { + col witness bits(32) air.loop_b0; + loop_b0 <== _loop_b0; + } else { + const expr air.loop_b0 = _loop_b0; + } + + expr _loop_extended_arg = loop_count * (sel_memcpy + sel_memcmp) + fill_byte * sel_memset; + + if (degree(_loop_extended_arg) > 1) { + col witness bits(32) air.loop_extended_arg; + loop_extended_arg <== _loop_extended_arg; + } else { + const expr air.loop_extended_arg = _loop_extended_arg; + } + + // DMA ===> DMA_64_ALIGNED + // DMA ===> DMA_UNALIGNED + // -------------------------------------------------------------------------------- + // OP_DMA_XMEMCPY (dst, 0, src, 0, 0, 0, 0, main_step, count, src_offset_after_pre) + // OP_DMA_INPUTCPY (dst, 0, count, 0, 0, 0, 0, main_step, 0 , src_offset_after_pre) + // OP_DMA_XMEMEQ (dst, 0, src, 0, 0, 0, 0, main_step, count, src_offset_after_pre) + // OP_DMA_XMEMSET (dst, 0, count, 0, 0, 0, 0, main_step, value, src_offset_after_pre) + + assumes_operation(op: loop_op, a: [loop_dst, 0], + b: [loop_b0, 0], + c: [loop_dst, 0], + main_step:, + extended_arg: loop_extended_arg, + extra_args: [src_offset_after_pre], + sel: use_loop); + + // DMA ===> DMA_PRE_POST (PRE) + // -------------------------------------------------------------------------------- + // OP_DMA_PRE_XMEMCPY (dst, src, dst_offset, src_offset, pre_count, main_step) + // OP_DMA_PRE_INPUTCPY (dst, 0, dst_offset, 0, pre_count, main_step) + // OP_DMA_PRE_XMEMEQ (dst, src, dst_offset, src_offset, pre_count, main_step) + // OP_DMA_PRE_XMEMCMP (dst, src, dst_offset, src_offset, pre_count, main_step, 0, r_nz, R0, R1) + // OP_DMA_PRE_XMEMSET (dst, 0, dst_offset, 0, pre_count, main_step, value) + + permutation_assumes(DMA_BUS_ID, [pre_op, + dst64, + src64, + dst_offset, + src_offset, + pre_count, + main_step, + fill_byte, + pre_result_nz, + ...bus_pre_result + ], sel: use_pre); + + // DMA ===> DMA_PRE_POST (POST) + // -------------------------------------------------------------------------------- + // OP_DMA_POST_XMEMCPY (dst, src, 0, src_offset, post_count, main_step) + // OP_DMA_POST_INPUTCPY (dst, 0, 0, 0, post_count, main_step) + // OP_DMA_POST_XMEMEQ (dst, src, 0, src_offset, post_count, main_step) + // OP_DMA_POST_XMEMCMP (dst, src, 0, src_offset, post_count, main_step, r_nz, R0, R1) + // OP_DMA_POST_XMEMSET (dst, 0, 0, 0, post_count, main_step, value) + + const expr post_dst64 = dst64 + use_pre * 8 + l_count64 * 8 + h_count * 256; + const expr post_src64 = src64 + src64_inc_by_pre * 8 + l_count64 * 8 + h_count * 256; + permutation_assumes(DMA_BUS_ID, [post_op, + post_dst64, + post_src64, + 0, + src_offset_after_pre, + post_count, + main_step, + fill_byte, + post_result_nz, + ...bus_post_result], sel: use_post); + + // BUS: DMA ===> MAIN (Definitions) + + if (enable_extended) { + col witness bits(32) air.static_count; + static_count <== count_bus * sel_extended; + } else { + const expr air.static_count = 0; + } + + // BUS: DMA ===> MAIN + // -------------------------------------------------------------------------------- + // OP_DMA_MEMCPY (dst, 0, src, 0, 0, 0, 0, main_step) + MEM(count) + // OP_DMA_XMEMCPY (dst, 0, src, 0, 0, 0, 0, main_step, count) + // OP_DMA_INPUTCPY (dst, 0, count, 0, 0, 0, 0, main_step) + // OP_DMA_MEMCMP (dst, 0, src, 0, 0, R0, R1, main_step) + MEM(count) + // OP_DMA_XMEMCMP (dst, 0, src, 0, 0, R0, R1, main_step, count) + // OP_DMA_XMEMSET (dst, 0, count, 0, 0, 0, 0, main_step, value) + + proves_operation(op: bus_op, + a: [dst, 0], + b: [src * (sel_memcpy + sel_memcmp) + count * (sel_inputcpy + sel_memset), 0], + c: bus_result, + flag: 0, + main_step:, + extended_arg: static_count * (sel_memcpy + sel_memcmp) + fill_byte * sel_memset, + mul: sel); + + // BUS: DMA ===> MEM + // + // For OP_DMA_MEMCPY and OP_DMA_MEMCMP we need read the third parameter (count), to do it + // we read a specific position on RAM, EXTRA_PARAM_ADDR. + + precompiled_mem_load(sel: sel_load_count_from_mem - sel_extended, + main_step:, + addr: EXTRA_PARAMS_ADDR, + value: [count_bus, 0]); +} \ No newline at end of file diff --git a/precompiles/dma/pil/dma_64_aligned.pil b/precompiles/dma/pil/dma_64_aligned.pil new file mode 100644 index 000000000..ccd63598b --- /dev/null +++ b/precompiles/dma/pil/dma_64_aligned.pil @@ -0,0 +1,650 @@ +// The Dma64Aligned state-machine is used to verify full aligned 64-bit operations. +// +// This machine can be configured to enable each specific command individually, as well as to +// define the number of operations per row. Depending on the type of machine, it makes sense to +// support up to 8 ops. +// +// This machine can also be configured to support "direct operations". These are operations that +// do not require the involvement of the DMA controller, nor the pre or post dma operations. +// +// For an operation to be direct, its addresses must be aligned and its count must be a multiple of 8. +// On the other hand, this does not apply to memcmp, since it requires DMA in order to demonstrate +// the relationship between the count, the effective count, and the result of the operation. +// +// This machine has continuations, since an operation can span multiple rows. Each instantiation of +// this airtemplate generates a different air with an unique continuation ID, because they must not +// be mixed. +// +// This implies that an operation that has been launched for a specific AIR (airtemplate instantiation +// with a particular configuration) must be completed within the same AIR. + +airtemplate Dma64Aligned(int N = 2**21, // Rows of instance + const int RC = 2, // Number of chunks for native value + const int op_x_row = 4, // Number of operations by rows + const expr enable_flag = 1, // Expression to active the beggining of + // continuations, used for executions that + // not generates instances. + // + const int enable = E_DMA_ALL, // Enable inputcpy operations + int enable_count_load = -1, // Enable that dma_64_align prove directly + // Not extendend memcpy where need load count + // From memory position + // + int cont_subid = 0) { // Force subid used in continuations + + const int enable_memcpy = (enable & E_DMA_MEMCPY) ? 1 : 0; + const int enable_memcmp = (enable & E_DMA_MEMCMP) ? 1 : 0; + const int enable_inputcpy = (enable & E_DMA_INPUTCPY) ? 1 : 0; + const int enable_memset = (enable & E_DMA_MEMSET) ? 1 : 0; + const int enables_count = enable_memcpy + enable_memcmp + enable_inputcpy + enable_memset; + + if (enable_count_load == -1) { + enable_count_load = enable_memcpy; + } + // parameters verification + + assert(enable_count_load == 0 || enable_count_load == 1); + assert(cont_subid >= 0); + + // the enable_count_load to load count from memory it's used only by direct from main + // memcpy operation + + assert(enable_count_load == 0 || enable_memcpy == 1); + assert((enable_memcpy + enable_memcmp + enable_inputcpy + enable_memset) > 0); + assert(op_x_row >= 1); + assert(RC == 2); + + const int has_src = (enable_memcpy || enable_memcmp); + + // continuation control, used to create a new continuation subid to avoid collision + // between Dma64Aligned continuations. If you instance diferent airs with same template + // need diferenciate them. If parameter cont_subid = 0 means that the subid is auto-generated + + container proof.dma_64_aligned { + int cont_subids_count = 0; + int cont_subids[64]; + } + use proof.dma_64_aligned; + + if (cont_subid == 0) { + cont_subid = cont_subids_count + 1; + for (int i = 0; i < cont_subids_count; ++i) { + if (cont_subids[i] >= cont_subid) { + cont_subid = cont_subids[i] + 1; + } + } + } else { + for (int i = 0; i < cont_subids_count; ++i) { + assert(cont_subids[i] != cont_subid, `duplicated cont_subid ${cont_subid}`); + } + } + cont_subids[cont_subids_count] = cont_subid; + cont_subids_count += 1; + + // get reference to L1 fixed column and create LAST from L1. + + const expr L1 = get_L1(); + const expr LAST = L1'; + + // WITNESS + // ───────────────────────────────────────────────────────────────────────────────────────────── + // dst64 ADDR_W_BITS → 64-bit address memory dst + // continuations (increment + ops_selected) @[dst64] + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // src64 ADDR_W_BITS → 64-bit address memory src + // continuations (increment + ops_selected) @[src64] + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // count64 32 → 64-bit remain count + // continuations (increment - ops_selected) @[count64] + // no uses inversa, only check pass to zero, and in the segment + // was verified that no negative @[count64_zero_check] + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // seq_end 1 → indicator that it's last row of the sequence + // continuations, if seq_end at last row, the flags, and other + // values throw segments are zero. + // binary contraint @[seq_end_binary] + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // fill_byte 8 → continuations (latch) + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // main_step MAIN_STEP_BITS → continuations (latch) + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // sel_op_from_1 1 → row + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // l_value_chunks 8 → Use asymethric range check to reduce number of range checks + // h_value_chunks 24 → for 64-bits, 2 x 24 bits + dual 8 bits = 3 range checks + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // value 32 → row + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // sel_xmemcpy 1 ┐ + // sel_memeq 1 │ + // sel_memset 1 ├ @[flags] → continuations (latch) + // sel_inputcpy 1 ┘ + // sel_memcpy_count_load 1 this only was active first clock + // ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ + // b0 32 intermediate + // extended_arg 32 intermediate + // previous_seq_end 1 intermediate + + airval segment_id; // Id of current segment + airval segment_previous_seq_end; // Last value of @[seq_end] in previous segment. + segment_previous_seq_end * (1 - segment_previous_seq_end) === 0; + + airval segment_previous_dst64; // Last value of @[dst64] in previous segment. + airval segment_previous_main_step; // Last value of @[main_step] in previous segment. + airval segment_previous_count64; // Last value of @[count64] in previous segment. + airval segment_previous_flags; // Last value of @[flags] in previous segment. + + airval segment_last_seq_end; // Last value of @[seq_end] in current segment. + segment_last_seq_end * (1 - segment_last_seq_end) === 0; + + airval segment_last_dst64; // Last value of @[dst64] in current segment. + airval segment_last_main_step; // Last value of @[main_step] in current segment. + airval segment_last_count64; // Last value of @[count64] in current segment. + airval segment_last_flags; // Last value of @[flags] in current segment. + + airval is_last_segment; // 1 if this is the last segment, 0 otherwise. + is_last_segment * (1 - is_last_segment) === 0; + + // the inputcpy or memset doesn't has source, they don't use it + + if (has_src) { + col witness bits(ADDR_W_BITS) air.src64; + airval air.segment_previous_src64; // Last value of @[src64] in previous segment. + airval air.segment_last_src64; // Last value of @[src64] in current segment. + } else { + const int air.segment_previous_src64 = 0; + const int air.segment_last_src64 = 0; + } + + // This witness is used to mark the last row of an onperation. + col witness bits(1) seq_end; + seq_end * (1 - seq_end) === 0; + + col witness bits(1) previous_seq_end; + previous_seq_end <== L1 * (segment_previous_seq_end - 'seq_end) + 'seq_end; + + // temporal building expression for selector of any operation + expr _sel = 0; + + // temporal building expression for operation code + expr _op = 0; + + // temporal building expression for segment flags + expr _flags = 0; + + expr _sel_mem_load = 0; + + const int enable_full = enable_memcpy && enable_memcmp && enable_memset && enable_inputcpy && + enable_count_load; + + const int F_SEL_MEMCPY = 1; + const int F_SEL_MEMCMP = 2; + const int F_SEL_INPUTCPY = 4; + const int F_SEL_MEMSET = 8; + + if (enable_memcpy) { + col witness bits(1) air.sel_memcpy; + sel_memcpy * (1 - sel_memcpy) === 0; + _sel += sel_memcpy; + _sel_mem_load += sel_memcpy; + _op += sel_memcpy * OP_DMA_XMEMCPY; + _flags += F_SEL_MEMCPY * sel_memcpy; + } else { + const expr air.sel_memcpy = 0; + } + + if (enable_memcmp) { + col witness bits(1) air.sel_memeq; + sel_memeq * (1 - sel_memeq) === 0; + _sel += sel_memeq; + _sel_mem_load += sel_memeq; + _op += sel_memeq * OP_DMA_XMEMEQ; + _flags += F_SEL_MEMCMP * sel_memeq; + } else { + const expr air.sel_memeq = 0; + } + + if (enable_memset) { + col witness bits(1) air.sel_memset; + sel_memset * (1 - sel_memset) === 0; + + // fill_byte is send from main in extended_arg, this argument is static, known in + // compilation time, for this reason we don't need to validate its range check. + + col witness bits(8) air.fill_byte; + const expr air.fill_dword = fill_byte + fill_byte * P2_8 + fill_byte * P2_16 + fill_byte * P2_24; + + airval air.segment_previous_fill_byte; // Last value of `fill_byte` in previous segment. + airval air.segment_last_fill_byte; // Last value of `fill_byte` in current segment. + + LAST * (1 - seq_end) * (fill_byte - segment_last_fill_byte) === 0; + + // if sel_memset is disabled fill_byte must be zero, it's an optimization for extended_arg + (1 - sel_memset) * fill_byte === 0; + + // when segment_last_seq_end = 1, means that the next segment starts with new input, no need to + // compare with previous, to simplify WC of continuations, zeros are sent to bus + // these constraints aren't strictly necessary, because these values are used only when seq_end = 0 + + segment_last_fill_byte * segment_last_seq_end === 0; + segment_previous_fill_byte * segment_previous_seq_end === 0; + + _sel += sel_memset; + _op += sel_memset * OP_DMA_XMEMSET; + _flags += F_SEL_MEMSET * sel_memset; + } else { + const int air.sel_memset = 0; + const int air.fill_byte = 0; + const int air.fill_dword = 0; + const int air.segment_previous_fill_byte = 0; // Last value of `fill_byte` in previous segment. + const int air.segment_last_fill_byte = 0; // Last value of `fill_byte` in current segment. + } + + if (enable_count_load) { + col witness bits(1) air.sel_memcpy_count_load; + sel_memcpy_count_load * (1 - sel_memcpy_count_load) === 0; + + // enable read the counter from memory only could be active if operation memcpy is active. + sel_memcpy_count_load * (1 - sel_memcpy) === 0; + + // this selector only could be active the first clock of operation, after that must be 0. + sel_memcpy_count_load * (1 - previous_seq_end) === 0; + + // the selector "update" the operation from XMEMCPY to regular MEMCPY + _op += sel_memcpy_count_load * (OP_DMA_MEMCPY - OP_DMA_XMEMCPY); + } else { + const int air.sel_memcpy_count_load = 0; + } + + if (enable_inputcpy) { + col witness bits(1) air.sel_inputcpy; + sel_inputcpy * (1 - sel_inputcpy) === 0; + + _sel += sel_inputcpy; + _op += sel_inputcpy * OP_DMA_INPUTCPY; + _flags += F_SEL_INPUTCPY * sel_inputcpy; + } else { + const expr air.sel_inputcpy = 0; + } + + const expr op = _op; + const expr flags = _flags; + const expr sel = _sel; + const expr sel_mem_load = _sel_mem_load; + sel * (1 - sel) === 0; + + // Binary constraints + + // main step is "time" when operation is done + + col witness bits(MAIN_STEP_BITS) main_step; // @[main_step] timestamp of operation + col witness bits(ADDR_W_BITS) dst64; // @[dst64] 64-bit destination address + col witness bits(32) count64; // @[count64] number of operation 64-bits words + + // We have the column sel to indicate when row is used, and if row is used means at least + // sel_op[0] must be 1, for this reason if we have multiple operations for rows define an + // array of columns (sel_op_from_1) defined from 1 to op_x_row-1 + + const expr sel_op[op_x_row]; + sel_op[0] = sel; + + if (op_x_row > 1) { + col witness bits(1) air.sel_op_from_1[op_x_row-1]; + expr _ops_selected = sel_op[0]; + for (int i = 1; i < op_x_row; ++i) { + sel_op_from_1[i-1] * (1 - sel_op_from_1[i-1]) === 0; + sel_op[i] = sel_op_from_1[i-1]; + // if previous sel_op it's zero, the rest must be zero. + (1 - sel_op[i-1]) * sel_op[i] === 0; + _ops_selected += sel_op[i]; + } + const expr air.ops_selected = _ops_selected; + } else { + const expr air.ops_selected = sel; + } + + // The flag is_mem_eq is used to mark and memcmp equal operation, means that the operations was + // read - read and value it's the same to match + + // To enable that this machine could be used to operations without dma controller, for these + // reason used a operation bus like rest of operations. If this machine was used on non aligned + // address, there are two posibilities + // - count64 ∈ [0,1] ⇒ it's correct, too expensive (use mem_aligns) but correct. + // - count64 > 1 ⇒ has two write in same "timestamp" and address, memory can't demostrait it. + // + // This machine has three modes to demostrate operations: + // - secondary of dma, in this case all parameters are sent to bus in same assume/prove (@[sel_xmemcpy]) + // - direct using memory, loading the count64 from memory (@[sel_memcpy_load_count]) + // - direct using bus (extended argument with static value) (@[sel_xmemcpy]) + // + + if (enable_count_load) { + precompiled_mem_load(addr: EXTRA_PARAMS_ADDR, + value: [count64 * 8, 0], + main_step:, + sel: sel_memcpy_count_load); + } + + LAST * (seq_end - segment_last_seq_end) === 0; + LAST * (1 - seq_end) * (dst64 - segment_last_dst64) === 0; // continuation: last row @[dst64_cont] + LAST * (1 - seq_end) * (main_step - segment_last_main_step) === 0; + LAST * (1 - seq_end) * (count64 - segment_last_count64) === 0; + LAST * (1 - seq_end) * (flags - segment_last_flags) === 0; + + // when segment_last_seq_end = 1, means that the next segment starts with new input, no need to + // compare with previous, to simplify WC of continuations, zeros are sent to bus + // these constraints aren't strictly necessary, because these values are used only when seq_end = 0 + + segment_last_dst64 * segment_last_seq_end === 0; // continuation: reset by seq end @[dst64_cont] + segment_last_count64 * segment_last_seq_end === 0; + segment_last_main_step * segment_last_seq_end === 0; + segment_last_flags * segment_last_seq_end === 0; + + // when segment_previous_seq_end = 1, means that this segment starts with new input, no need to + // compare with previous, to simplify WC of continuations, zeros are sent to bus + // these constraints aren't strictly necessary, because these values are used only when seq_end = 0 + + segment_previous_dst64 * segment_previous_seq_end === 0; + segment_previous_count64 * segment_previous_seq_end === 0; + segment_previous_main_step * segment_previous_seq_end === 0; + segment_previous_flags * segment_previous_seq_end === 0; + + if (has_src) { + LAST * (1 - seq_end) * (src64 - segment_last_src64) === 0; + const expr air.previous_src64 = L1 * (segment_previous_src64 - 'src64) + 'src64; + segment_previous_src64 * segment_previous_seq_end === 0; + segment_last_src64 * segment_last_seq_end === 0; + } + + // Continuations + + // AIR_ID, segment_id, seq_end, src64, dst64, count64, main_step + + direct_global_update_proves(DMA_64_ALIGNED_CONT_ID, + [0, + 0, + cont_subid, // continuations sub id + 1, // initial seq_end + 0, // initial src64 + 0, // initial dst64 + 0, // initial count64 + 0, // initial main_step + 0, // initial fill byte + 0], // initial flags + sel: enable_flag); + + + direct_update_assumes(DMA_64_ALIGNED_CONT_ID, + [segment_id, + 0, + cont_subid, + segment_previous_seq_end, + segment_previous_src64, + segment_previous_dst64, + segment_previous_count64, + segment_previous_main_step, + segment_previous_fill_byte, + segment_previous_flags]); + + direct_update_proves(DMA_64_ALIGNED_CONT_ID, [ + segment_id + 1, + is_last_segment, + cont_subid, + segment_last_seq_end, + segment_last_src64, + segment_last_dst64, + segment_last_count64, + segment_last_main_step, + segment_last_fill_byte, + segment_last_flags] + , sel: 1 - is_last_segment); + + + if (enable_inputcpy) { + const expr air.value[op_x_row][RC]; + col witness bits(8) l_value_chunks[op_x_row][RC]; + col witness bits(24) h_value_chunks[op_x_row][RC]; + assert(RC == 2); + for (int i = 0; i < op_x_row; i++) { + value[i][0] = l_value_chunks[i][0] + h_value_chunks[i][0] * P2_8; + value[i][1] = l_value_chunks[i][1] + h_value_chunks[i][1] * P2_8; + + // PERFORMANCE OPTIMIZATION: In this instance it's a non-specific instance, only use + // this range checks with the input it's select, to avoid "compute" this + // range check in other situation where it didn't used. + + if (enables_count >= 2) { + range_check(h_value_chunks[i][0], 0, P2_24-1, sel_inputcpy); + range_check(h_value_chunks[i][1], 0, P2_24-1, sel_inputcpy); + range_dual_byte(l_value_chunks[i][1], l_value_chunks[i][0], sel_inputcpy); + } else { + range_check(h_value_chunks[i][0], 0, P2_24-1, sel_op[i]); + range_check(h_value_chunks[i][1], 0, P2_24-1, sel_op[i]); + range_dual_byte(l_value_chunks[i][1], l_value_chunks[i][0], sel_op[i]); + } + } + } else if (has_src) { + col witness bits(32) air.value[op_x_row][RC]; + } else if (enable_memset) { + const expr air.value[op_x_row][RC]; + for (int i = 0; i < op_x_row; i++) { + for (int irc = 0; irc < RC; ++ irc) { + value[i][irc] = fill_dword; + } + } + } else { + assert(false, "Invalid case"); + } + + if (enable_memset && has_src) { + // a new col must be separated for memload. + col witness bits(1) air.sel_op_mem_load[op_x_row]; + } else { + const expr air.sel_op_mem_load[op_x_row]; + } + + for (int i = 0; i < op_x_row; i++) { + if (has_src) { + if (enable_memset) { + sel_op_mem_load[i] * (1 - sel_op_mem_load[i]) === 0; + sel_op_mem_load[i] <== sel_op[i] * sel_mem_load; + } else { + sel_op_mem_load[i] = sel_op[i]; + } + precompiled_mem_load( + sel: sel_op_mem_load[i], + main_step:, + addr: src64 * 8 + 8 * i, + value: value[i] + ); + } + + + if (enable_memset && enables_count > 1) { + // add constraint to force that memset operation value "read" must be fill word + // only if this instance isn't a dedicated memset instance. In case of dedicated + // instance value really was directly fill_dword. + sel_op[i] * sel_memset * (value[i][0] - fill_dword) === 0; + sel_op[i] * sel_memset * (value[i][1] - fill_dword) === 0; + } + + // load previous value before write operation, because only some bytes must be written + // only in case of memcpy, inputcpy and memset really write to memory, in memcmp do + // a read with same value, this value must match with src and dst. + precompiled_mem_op( + is_write: sel_memcpy + sel_memset + sel_inputcpy, + sel: sel_op[i], + main_step:, + addr: dst64 * 8 + 8 * i, + value: value[i] + ); + } + + const expr continue_seq_on_l1 = L1 * (1 - segment_previous_seq_end); + const expr continue_seq_on_no_l1 = (1 - L1) * (1 - 'seq_end); + + // const expr new_seq_on_l1 = L1 * segment_previous_seq_end; + // const expr new_seq_on_no_l1 = (1 - L1) * 'seq_end; + + // TRANSITIONS: + // + // If not finish sequence in previous row, means current it's a continuation of previous sequence + // count64 match with previous value plus ops_selected. + continue_seq_on_l1 * (count64 - (segment_previous_count64 - op_x_row)) === 0; + continue_seq_on_no_l1 * (count64 - ('count64 - op_x_row)) === 0; + + // If finish sequence count64 match with ops_selected, because final count64 must be 0. + // new_seq_on_l1 * (count64 - ops_selected) === 0; + // new_seq_on_no_l1 * (count64 - ops_selected) === 0; + seq_end * (count64 - ops_selected) === 0; + (1 - seq_end) * (ops_selected - op_x_row) === 0; + + // If previous row was a seq_end, start with new dst/src addresses, else increment by ops_selected + continue_seq_on_l1 * (dst64 - (segment_previous_dst64 + op_x_row)) === 0; // continuation: L1 @[dst64_cont] + continue_seq_on_no_l1 * (dst64 - ('dst64 + op_x_row)) === 0; // continuation: !L1 @[dst64_cont] + + if (has_src) { + continue_seq_on_l1 * (src64 - (segment_previous_src64 + op_x_row)) === 0; + continue_seq_on_no_l1 * (src64 - ('src64 + op_x_row)) === 0; + } + + // NOTE: transition of src64 is defined only + + // LATCHS: + // + // flags = 'flags + // main_step = 'main_step + // fill_byte = 'fill_byte + + continue_seq_on_l1 * (flags - segment_previous_flags) === 0; + continue_seq_on_no_l1 * (flags - 'flags) === 0; + + continue_seq_on_l1 * (main_step - segment_previous_main_step) === 0; + continue_seq_on_no_l1 * (main_step - 'main_step) === 0; + + if (enable_memset) { + continue_seq_on_l1 * (fill_byte - segment_previous_fill_byte) === 0; + continue_seq_on_no_l1 * (fill_byte - 'fill_byte) === 0; + } + + // SECURITY: control count64 no negative @[count64_zero_check] + // + // if the seq_end it isn't active when count64 = 0, in each rows continues decreasing 8 x op_x_row + // units, means in 2^22 rows * 2^3 * 2^2 = 2^27. It's secure, because if any row lies, at end of + // instance this constraint fails. + + airval last_count_chunk[2]; + range_check(expression: last_count_chunk[0], min: 0, max: 2**16-1); + range_check(expression: last_count_chunk[1], min: 0, max: 2**16-1); + last_count_chunk[0] + last_count_chunk[1] * P2_16 === segment_last_count64; + + // BUS + // + // enable_memcpy: 2x mem + 1 op = 2x + 1 + // enable_memset: x mem + 1 op = x + 1 + // enable_memcmp: 2x mem + 1 op = 2x + 1 + // enable_memcpy + enable_memcmp + enable_memset: 2x + 1 + // enable_memcpy + enable_memcmp + enable_memset + enable_count_load: 2x + 2 + // + // enable_count_load: 2x mem + 1 op + 1 mem = 2x + 2 + // enable_input: x mem + 4x rc16 + 1 op = 5x + 1 + // enable_input: x mem + 3x rc16 + 1 op = 4x + 1 (24+D8)*2 + // + // full: 2x mem + 4x rc16 + 1 op + 1 mem = 6x + 2 + // full: 2x mem + 3x rc16 + 1 op + 1 mem = 5x + 2 (24+D8)*2 + + const int bus_count = 1 + op_x_row + + ((enable_memcpy || enable_memcmp) ? op_x_row : 0) + + (enable_inputcpy ? 3 * op_x_row: 0) + + (enable_count_load ? 1 : 0); + + const int max_bus_op_degree = (bus_count & 0x01) == 0 ? 2 : 1; + + println(`bus_count: ${bus_count}`); + println(`max_bus_op_degree: ${max_bus_op_degree}`); + + // DMA_64_ALIGNED ==> DMA + // DMA_64_ALIGNED ==> MAIN + // -------------------------------------------------------------------------------- + // OP_DMA_MEMCPY (dst, 0, src, 0, 0, 0, 0, main_step, 0) + MEM(count) (*) ¿? + // OP_DMA_XMEMCPY (dst, 0, src, 0, 0, 0, 0, main_step, count) + // OP_DMA_INPUTCPY (dst, 0, count, 0, 0, 0, 0, main_step, 0) + // OP_DMA_XMEMSET (dst, 0, count, 0, 0, 0, 0, main_step, value) + // OP_DMA_XMEMEQ (dst, 0, src, 0, 0, 0, 0, main_step, count) + + const int b0_could_be_count = enable_inputcpy || enable_memset; + expr _b0 = 0; + + if (has_src && !b0_could_be_count) { + _b0 = src64 * 8; + } else if (!has_src && b0_could_be_count) { + _b0 = count64 * 8; + } else { + _b0 = count64 * 8 * (sel_inputcpy + sel_memset) + + src64 * 8 * (sel_memcpy + sel_memeq); + } + + if (degree(_b0) > max_bus_op_degree) { + col witness bits(32) air.b0; + b0 <== _b0; + } else { + const expr air.b0 = _b0; + } + + const int extended_arg_could_be_count = enable_memcpy || (enable_memcmp && !enable_count_load); + const int extended_arg_could_be_zero = enable_count_load || enable_inputcpy; + + expr _extended_arg; + + if (extended_arg_could_be_count && !enable_memset && !extended_arg_could_be_zero) { + _extended_arg = count64 * 8; + } else if (!extended_arg_could_be_count) { + _extended_arg = fill_byte; + } else { + // sel_memcpy_count_load only could be active when sel_memcpy is active, means that + // this pair of selector take following values: + // + // ┌─ sel_memcpy + // │ ┌─── sel_memcpy_count_load + // │ │ ┌─── sel_memcpy - sel_memcpy_count_load + // + // 0 0 0 disable, only active if it's memcmp(memeq) + // 0 1 -1 not allowed by constraints + // 1 0 1 means a xmemcpy, the count must be on extended_arg + // 1 1 1 means memcpy, the count value must be read from memory + + _extended_arg = count64 * 8 * (sel_memcpy - sel_memcpy_count_load + sel_memeq) + fill_byte; + } + + if (degree(_extended_arg) > max_bus_op_degree) { + col witness bits(32) air.extended_arg; + extended_arg <== _extended_arg; + } else { + const expr air.extended_arg = _extended_arg; + } + + proves_operation(op:, + a: [dst64 * 8, 0], + b: [b0, 0], + c: [dst64 * 8, 0], + flag: 0, + main_step:, + extended_arg:, + mul: previous_seq_end); + + airval padding_size; + + // Used to cancel padding operations + assumes_padding_operation(op: 0, padding_size: padding_size); + + // FUTURE IMPROVEMENTS + // + // Delegate/Split a operation, if a operation uses more than 1 row, the final row could + // be used to send to bus as assume the "rest" of operation. This is special usefull if + // instance large and cheap instances, without selectors, that only admins full rows, and + // the remaing part is done by other small instantation that has selectors. With this design + // the cost of one memcpy could be near 6-7 columns + costs of memory. + // + // Analyze if it's possible that the equal memcmp could be done directly by Dma64Aligned without + // using controller, pre or post. +} \ No newline at end of file diff --git a/precompiles/dma/pil/dma_byte_cmp_table.pil b/precompiles/dma/pil/dma_byte_cmp_table.pil new file mode 100644 index 000000000..ba5f45762 --- /dev/null +++ b/precompiles/dma/pil/dma_byte_cmp_table.pil @@ -0,0 +1,43 @@ +require "std_lookup.pil" + +// this table not includes the equal values +const int DMA_BYTE_CMP_TABLE_SIZE = P2_8 * (P2_8 - 1); + +airtemplate DmaByteCmpTable(int N = DMA_BYTE_CMP_TABLE_SIZE) { + col fixed BYTE = [0:255..255:255]; + col fixed IS_NEG; + col fixed BYTE_DIFF; + + int index = 0; + #pragma transpile + for (int byte = 0; byte < P2_8; ++byte) { + for (int byte2 = 0; byte2 < byte; ++byte2) { + IS_NEG[index] = 0; + BYTE_DIFF[index] = byte - byte2; + ++index; + } + for (int byte2 = byte+1; byte2 < P2_8; ++byte2) { + IS_NEG[index] = 1; + BYTE_DIFF[index] = byte2 - byte; + ++index; + } + } + + assert_eq(index, N); + + // for (int byte = 0; byte < P2_8; ++byte) { + // for (int byte2 = 0; byte2 < P2_8; ++byte2) { + // if (byte == byte2) continue; + // const int is_neg = byte2 > byte ? 1 : 0; + // const int row = byte * 255 + byte2 - is_neg; + // const int byte_diff = byte2 > byte ? (byte2 - byte) : (byte - byte2); + // assert(BYTE[row] == byte, `FAIL BYTE[${row}] byte:${byte} byte2:${byte2}`); + // assert_eq(BYTE[row], byte, `FAIL BYTE[${row}] byte:${byte} byte2:${byte2}`); + // assert_eq(IS_NEG[row], is_neg, `FAIL IS_NEG[${row}] byte:${byte} byte2:${byte2}`); + // assert_eq(BYTE_DIFF[row], byte_diff, `FAIL BYTE_DIFF[${row}] byte:${byte} byte2:${byte2}`); + // } + // } + + col witness multiplicity; + lookup_proves(DMA_BYTE_CMP_TABLE_ID, mul: multiplicity, expressions: [BYTE, IS_NEG, BYTE_DIFF]); +} \ No newline at end of file diff --git a/precompiles/dma/pil/dma_pre_post.pil b/precompiles/dma/pil/dma_pre_post.pil new file mode 100644 index 000000000..797133b2d --- /dev/null +++ b/precompiles/dma/pil/dma_pre_post.pil @@ -0,0 +1,478 @@ +// This machine is used for DMA operations performed at the beginning, to align +// writes and at the end in case aligned writes need to work with less than +// 8 bytes. The part that handles alignment of writes is known as PRE, while +// the part that handles the final part is known as POST. +// +// This machine needs to read one or two 64-bit words from src to be able to write or compare +// the destination. +// +// Another feature this machine has is to resolve the last different byte of a memcmp. In +// reality a mem_cmp(src, dst, count) = mem_eq(src, dst, count_eq) + cmp_byte(src + count_eq, dst + count_eq) +// where count_eq <= count. + +airtemplate DmaPrePost(int N = 2**21, // Rows of instance + const int RC = 2, // Number of chunks for native value + const int enable = E_DMA_ALL) // Enable operations +{ + const int enable_memcpy = (enable & E_DMA_MEMCPY) ? 1 : 0; + const int enable_memcmp = (enable & E_DMA_MEMCMP) ? 1 : 0; + const int enable_inputcpy = (enable & E_DMA_INPUTCPY) ? 1 : 0; + const int enable_memset = (enable & E_DMA_MEMSET) ? 1 : 0; + + const int has_src = (enable_memcpy || enable_memcmp); + const int enabled_count = enable_memcpy + enable_memcmp + enable_inputcpy + enable_memset; + const int enable_write_mem = enable_memcpy || enable_inputcpy || enable_memset; + const int enabled_all = enabled_count == 4; + + col witness bits(MAIN_STEP_BITS) main_step; + col witness bits(ADDR_W_BITS) dst64; + col witness bits(3) dst_offset; + col witness bits(4) count; + + // bytes + // has sel ┌──────────────┐ + // src read rb rb2 pb + // ───────────────────────────────────────────── + // enable_memcpy ✓ ✓ ✓ ✓ ✓ + // enable_memcmp ✓ ✓ ✓ ✓ ✓ + // enable_inputcpy x x ✓ x ✓ + // enable_memset x x x x ✓ + + col witness bits(1) is_post; + is_post * (1 - is_post) === 0; + + // prepare operation selectors + + expr _sel = 0; + expr _sel_write = 0; + expr _sel_byte_rc = 0; + + // definition @[sel_memcpy] + expr _pre_op = 0; + if (enable_memcpy) { + col witness bits(1) air.sel_memcpy; + sel_memcpy * (1 - sel_memcpy) === 0; + _sel += sel_memcpy; + _sel_write += sel_memcpy; + _sel_byte_rc += sel_memcpy; + _pre_op += sel_memcpy * OP_DMA_PRE_XMEMCPY; + } else { + const int air.sel_memcpy = 0; + } + + // definition @[sel_memcmp] and @[memcmp_result_nz] + + if (enable_memcmp) { + col witness bits(1) air.sel_memcmp; + sel_memcmp * (1 - sel_memcmp) === 0; + _sel += sel_memcmp; + // @[_sel_write] it's active for sel_memcmp because read the same that preloads before read, + // only need to verify that the value will written and pre read value are the same + + _sel_byte_rc += sel_memcmp; + col witness bits(1) air.memcmp_result_nz; + memcmp_result_nz * (1 - memcmp_result_nz) === 0; + // memcmp_result_nz only could be 1 if it's an memcmp operation + memcmp_result_nz * (1 - sel_memcmp) === 0; + + col witness bits(32) air.l_memcmp_result; + // memcmp_result_nz is 0 the memcmp_result must be 0 + (1 - memcmp_result_nz) * l_memcmp_result === 0; + + _pre_op += sel_memcmp * OP_DMA_PRE_XMEMCMP; + } else { + const int air.sel_memcmp = 0; + const int air.memcmp_result[2] = [0, 0]; + } + + // definition @[sel_inputcpy] + + if (enable_inputcpy) { + col witness bits(1) air.sel_inputcpy; + sel_inputcpy * (1 - sel_inputcpy) === 0; + _sel += sel_inputcpy; + _sel_write += sel_inputcpy; + _sel_byte_rc += sel_inputcpy; + _pre_op += sel_inputcpy * OP_DMA_PRE_INPUTCPY; + } else { + const int air.sel_inputcpy = 0; + } + + // definition @[sel_memset] + + if (enable_memset) { + col witness bits(1) air.sel_memset; + sel_memset * (1 - sel_memset) === 0; + _sel += sel_memset; + _sel_write += sel_memset; + _pre_op += sel_memset * OP_DMA_PRE_XMEMSET; + } else { + const int air.sel_memset = 0; + } + + // definition @[sel] + + const expr sel = _sel; + + // sel_write and sel_byte_rc are a subset of sel, how sel is binary sel_write and sel_byte_rc + // are also binary + const expr sel_write = _sel_write; + const expr sel_byte_rc = _sel_byte_rc; + + if (enabled_count > 1) { + sel * (1 - sel) === 0; + } + + const expr pre_post_op = _pre_op + is_post * OP_DMA_POST_OFFSET; + + // Prepare source and flags to selected the "row" + const int RB; + expr rom_flags = 0; + + if (has_src) { + col witness bits(1) air.selr[7]; + + const expr air.sr[8]; + expr _sr_0to6 = 0; + expr _sr_value = 0; + for (int i = 0; i < 7; ++i) { + selr[i] * (1 - selr[i]) === 0; + sr[i] = selr[i]; + _sr_0to6 = _sr_0to6 + selr[i]; + if (i > 0) { + _sr_value = _sr_value + i * sr[i]; + } + } + const expr air.sr_0to6 = _sr_0to6; + _sr_value = _sr_value + 7 * sr[7]; + + const expr air.load_src = sel_memcpy + sel_memcmp; + const expr sr_value = _sr_value; + + sr_0to6 * (1 - sr_0to6) === 0; + sr[7] = (1 - sr_0to6); + + col witness bits(1) air.dst_offset_gt_src_offset; + dst_offset_gt_src_offset * (1 - dst_offset_gt_src_offset) === 0; + + + col witness bits(ADDR_W_BITS) air.src64; + col witness bits(3) air.src_offset; + + col witness bits(1) air.enabled_second_read; + enabled_second_read * (1 - enabled_second_read) === 0; + + // to force use table with enable when enabled_second_read is 1. If enable is 1, with table + // it's verified the correct value of enabled_second_read using offset an count + enabled_second_read * (1 - sel) === 0; + + // update rom flags + // sr_value is a expression of bits for this reason range check of sr_value is restricted + // to range [0, 7] + + rom_flags += DMA_PRE_POST_DST_OFFSET_GT_SRC_F * dst_offset_gt_src_offset + + DMA_PRE_POST_ENABLED_SECOND_READ_F * enabled_second_read + + DMA_PRE_POST_SR_VALUE_F * sr_value + + DMA_PRE_POST_IS_POST * is_post + + DMA_PRE_POST_LOAD_SRC * load_src; + + RB = RC * 4 * 2; + } else { + const int air.src64 = 0; + const int air.src_offset = 0; + if (enable_inputcpy) { + RB = RC * 4; + } else { + RB = 0; + } + } + + if (enable_memset) { + // if memset it's enable, take first rb as fill_free, and define a constraint + // to be sure that all are equals (sr must be 0). + col witness bits(8) air.fill_byte; + (1 - sel_memset) * fill_byte === 0; + } + + const int RV = RB / 4; + + if (RB > 0) { + col witness bits(8) air.rb[RB]; + const expr air.r_values[RV]; + for (int i = 0; i < RB; i+=2) { + range_dual_byte(rb[i+1], rb[i], sel_byte_rc); + if (enable_memset) { + sel_memset * (rb[i] - fill_byte) === 0; + sel_memset * (rb[i+1] - fill_byte) === 0; + } + } + + for (int i = 0; i < RV; ++i) { + r_values[i] = rb[i*4] + P2_8 * rb[i*4+1] + P2_16 * rb[i*4+2] + P2_24 * rb[i*4+3]; + } + } + + const int PRB = RC * 4; // Pre-write bytes + col witness bits(8) pb[PRB]; + + const expr p_values[RC]; + + for (int i = 0; i < PRB; i+=2) { + range_dual_byte(pb[i+1], pb[i], sel); + } + for (int i = 0; i < RC; ++i) { + p_values[i] = pb[i*4] + P2_8 * pb[i*4+1] + P2_16 * pb[i*4+2] + P2_24 * pb[i*4+3]; + } + + col witness bits(1) sb[8]; + for (int i = 0; i < 8; ++i) { + sb[i] * (1 - sb[i]) === 0; + rom_flags += sb[i] * (1 << (7-i)); + } + if (enable_memcmp) { + // The DMA controller send throw bus, an flag @[memcmp_nz] to indicate that operation + // must detect that last byte of count was different. This flag only could be active + // if @[sel_memcmp] it's active. + // + // The count send by DMA controller includes the different bytes, the idea es generate + // a value that "compenses" this difference. + // + // To detect it, we detect the byte previous to a falling edge of selector of bytes + // read. See @[last_dst_byte] + // + // Using PrePost Rom verifies the factors associated to this position, PrePost sent to + // Rom also if difference is negative @[memcmp_result_is_negative], because if it's + // negative also the factor was negative. + // + // We need other table to verify @[last_dst_byte] with @[abs_diff_dst_src] and + // @[memcmp_result_is_negative]. This table doesn't include the zero for abs_diff_dst_src. + // The lookup to this tables is only active when @[memcmp_result_nz] + + // For byte that's first different byte + // write_byte == dst_byte + // write_byte ≠ src_byte + // dst_minus_src = dst_byte - src_byte + // write_byte == dst_minus_src + src_byte + + col witness bits(8) air.last_dst_byte; + col witness bits(8) air.abs_diff_dst_src; + col witness bits(1) air.memcmp_result_is_negative; + col witness bits(64, signed) air.diff_factor[2]; + const expr air.diff_dst_src[2]; + + memcmp_result_is_negative * (1 - memcmp_result_is_negative) === 0; + + last_dst_byte <== sb[0] * (1 - sb[1]) * (pb[0] - pb[7]) + + sb[1] * (1 - sb[2]) * (pb[1] - pb[7]) + + sb[2] * (1 - sb[3]) * (pb[2] - pb[7]) + + sb[3] * (1 - sb[4]) * (pb[3] - pb[7]) + + sb[4] * (1 - sb[5]) * (pb[4] - pb[7]) + + sb[5] * (1 - sb[6]) * (pb[5] - pb[7]) + + sb[6] * (1 - sb[7]) * (pb[6] - pb[7]) + + pb[7]; + + diff_dst_src[0] = diff_factor[0] * abs_diff_dst_src * memcmp_result_nz; + diff_dst_src[1] = diff_factor[1] * abs_diff_dst_src * memcmp_result_nz; + + l_memcmp_result <== 0x1_0000_0000 * memcmp_result_is_negative + (1 - 2 * memcmp_result_is_negative) * abs_diff_dst_src; + + const expr air.memcmp_result[2]; + memcmp_result[0] = l_memcmp_result; + memcmp_result[1] = 0xFFFF_FFFF * memcmp_result_is_negative; + rom_flags += DMA_PRE_POST_MEMCMP_RESULT_IS_NEG_F * memcmp_result_is_negative + + DMA_PRE_POST_MEMCMP_RESULT_NZ_F * memcmp_result_nz; + lookup_assumes(DMA_BYTE_CMP_TABLE_ID, [last_dst_byte, + memcmp_result_is_negative, + abs_diff_dst_src + ], sel: memcmp_result_nz); + } else { + const int air.diff_dst_src[2] = [0, 0]; + const int air.diff_factor[2] = [0, 0]; + const int air.diff_zero = 0; + const int air.memcmp_result[2] = [0, 0]; + const int air.memcmp_result_nz = 0; + } + if (!enable_memset) { + const int air.fill_byte; + } + + col witness bits(32) bus_write_value[2]; + + if (has_src) { + col witness bits(32) air.write_value[4]; + + // byte_0, byte_1, byte_2, byte_3, byte_4, byte_5, byte_6, byte_7, byte_0', byte_1' ... + + // │ src_offset: + // selr_value │ 0 1 2 3 4 5 6 7 + // ──────────────┼─────────────────────── + // dst_offset: 0 │ 0 1 2 3 4 5 6 7 + // 1 │ 1 0 1 2 3 4 5 6 + // 2 │ 2 1 0 1 2 3 4 5 + // 3 │ 3 2 1 0 1 2 3 4 + // 4 │ 4 3 2 1 0 1 2 3 + // 5 │ 5 4 3 2 1 0 1 2 + // 6 │ 6 5 4 3 2 1 0 1 + // 7 │ 7 6 5 4 3 2 1 0 + // + // NOTE: selr_value = ABS(src_offset - dst_offset) + + // R0..R3,R4..R7 = current row read bytes + // R8..R11,R12..R15 = R0'..R3',R4'..R7' bytes of next row + // + // ┌────── src_offset - dst_offset + // │ + // │ 0 1 2 3 4 5 6 7 + // ┌─────────────────┬─────────────────┐ + // 0 │ R0 R1 R2 R3 │ R4 R5 R6 R7 │ + // 1 │ R1 R2 R3 R4 │ R5 R6 R7 R8 │ + // 2 │ R2 R3 R4 R5 │ R6 R7 R8 R9 │ + // 3 │ R3 R4 R5 R6 │ R7 R8 R9 R10 │ + // 4 │ R4 R5 R6 R7 │ R8 R9 R10 R11 │ + // 5 │ R5 R6 R7 R8 │ R9 R10 R11 R12 │ + // 6 │ R6 R7 R8 R9 │ R10 R11 R12 R13 │ + // 7 │ R7 R8 R9 R10 │ R11 R12 R13 R14 │ + // └─────────────────┴─────────────────┘ + + // dst_offset <= src_offset + + expr _wr_0 = 0; + expr _wr_1 = 0; + for (int j = 0; j < 8; ++j) { + expr _sr_wr_0 = 0; + expr _sr_wr_1 = 0; + for (int i = 0; i < 4; ++i) { + const int factor = 1 << (i * 8); + _sr_wr_0 += factor * sb[i] * rb[i+j]; + _sr_wr_1 += factor * sb[i+4] * rb[i+4+j]; + } + _wr_0 += sr[j] * _sr_wr_0; + _wr_1 += sr[j] * _sr_wr_1; + } + for (int i = 0; i < 4; ++i) { + const int factor = 1 << (i * 8); + _wr_0 += factor * (1 - sb[i]) * pb[i]; + _wr_1 += factor * (1 - sb[i+4]) * pb[i+4]; + } + write_value[0] <== _wr_0; + write_value[1] <== _wr_1; + + + // dst_offset > src_offset + // ┌────── src_offset - dst_offset + // │ ┌── abs(src_offset - dst_offset) + // │ │ + // │ │ 0 1 2 3 4 5 6 7 + // ┌─────────────────┬─────────────────┐ + // -7 7 │ │ R0 │ + // -6 6 │ │ R0 R1 │ + // -5 5 │ │ R0 R1 R2 │ + // -4 4 │ │ R0 R1 R2 R3 │ + // -3 3 │ R0 │ R1 R2 R3 R4 │ + // -2 2 │ R0 R1 │ R2 R3 R4 R5 │ + // -1 1 │ R0 R1 R2 │ R3 R4 R5 R6 │ + // └─────────────────┴─────────────────┘ + + write_value[2] <== sr[3] * ( sb[3] * P2_24 * rb[0]) + + sr[2] * ( sb[2] * P2_16 * rb[0] + sb[3] * P2_24 * rb[1]) + + sr[1] * ( sb[1] * P2_8 * rb[0] + sb[2] * P2_16 * rb[1] + sb[3] * P2_24 * rb[2]) + + (1 - sb[0]) * pb[0] + (1 - sb[1]) * P2_8 * pb[1] + (1 - sb[2]) * P2_16 * pb[2] + (1 - sb[3]) * P2_24 * pb[3]; + + write_value[3] <== sr[7] * ( + sb[7] * P2_24 * rb[0]) + + sr[6] * ( + sb[6] * P2_16 * rb[0] + sb[7] * P2_24 * rb[1]) + + sr[5] * ( + sb[5] * P2_8 * rb[0] + sb[6] * P2_16 * rb[1] + sb[7] * P2_24 * rb[2]) + + sr[4] * (sb[4] * rb[0] + sb[5] * P2_8 * rb[1] + sb[6] * P2_16 * rb[2] + sb[7] * P2_24 * rb[3]) + + sr[3] * (sb[4] * rb[1] + sb[5] * P2_8 * rb[2] + sb[6] * P2_16 * rb[3] + sb[7] * P2_24 * rb[4]) + + sr[2] * (sb[4] * rb[2] + sb[5] * P2_8 * rb[3] + sb[6] * P2_16 * rb[4] + sb[7] * P2_24 * rb[5]) + + sr[1] * (sb[4] * rb[3] + sb[5] * P2_8 * rb[4] + sb[6] * P2_16 * rb[5] + sb[7] * P2_24 * rb[6]) + + (1 - sb[4]) * pb[4] + (1 - sb[5]) * P2_8 * pb[5] + (1 - sb[6]) * P2_16 * pb[6] + (1 - sb[7]) * P2_24 * pb[7]; + + bus_write_value[0] <== dst_offset_gt_src_offset * (write_value[2] - write_value[0]) + write_value[0]; + bus_write_value[1] <== dst_offset_gt_src_offset * (write_value[3] - write_value[1]) + write_value[1]; + + // MEMORY ACCESS + // + // Read first 64 bits + // Read next 64 bits (optional, only if enabled_second_read) + // Read previous value before write + // Write final value + + precompiled_mem_load(sel: load_src, main_step:, addr: src64 * 8, value: [r_values[0], r_values[1]]); + precompiled_mem_load(sel: enabled_second_read, main_step:, addr: src64 * 8 + 8, value: [r_values[2], r_values[3]]); + + } else if (enable_inputcpy) { + // at this point memcpy, memcmp are disabled, but enable_memset could be enabled + expr _write_value_0 = 0; + expr _write_value_1 = 0; + for (int i = 0; i < 4; ++i) { + const int factor = 1 << (i * 8); + _write_value_0 += factor * (sb[i] * (rb[i] - pb[i]) + pb[i]); + _write_value_1 += factor * (sb[i+4] * (rb[i+4] - pb[i+4]) + pb[i+4]); + } + bus_write_value[0] <== _write_value_0; + bus_write_value[1] <== _write_value_1; + } else { + expr _write_value_0 = 0; + expr _write_value_1 = 0; + for (int i = 0; i < 4; ++i) { + const int factor = 1 << (i * 8); + _write_value_0 += factor * (sb[i] * (fill_byte - pb[i]) + pb[i]); + _write_value_1 += factor * (sb[i+4] * (fill_byte - pb[i+4]) + pb[i+4]); + } + bus_write_value[0] <== _write_value_0; + bus_write_value[1] <== _write_value_1; + } + + + // MEMORY ACCESS + // + // Read previous value before write + // Write final value + + // this load is always used to read previous value that was partially modified or verified + precompiled_mem_load(sel:, main_step:, addr: dst64 * 8, value: p_values); + + if (enable_write_mem) { + // when is a memcmp reads, no writes + precompiled_mem_store(sel: sel_write, main_step:, addr: dst64 * 8, value: bus_write_value); + } + if (enable_memcmp) { + // for memcmp we need to verify that value read from memory are the same that value generated to write, because this value is the one that "compensate" the difference between src and dst, if this value is different that value read from memory, the memcmp result will be different that expected. + sel_memcmp * (p_values[0] - bus_write_value[0] - diff_dst_src[0]) === 0; + sel_memcmp * (p_values[1] - bus_write_value[1] - diff_dst_src[1]) === 0; + } + + // DMA ===> DMA_PRE_POST (PRE) + // -------------------------------------------------------------------------------- + // OP_DMA_PRE_XMEMCPY (dst, src, dst_offset, src_offset, pre_count, main_step) + // OP_DMA_PRE_INPUTCPY (dst, 0, dst_offset, 0, pre_count, main_step) + // OP_DMA_PRE_XMEMEQ (dst, src, dst_offset, src_offset, pre_count, main_step) + // OP_DMA_PRE_XMEMCMP (dst, src, dst_offset, src_offset, pre_count, main_step, 0, r_nz, R0, R1) + // OP_DMA_PRE_XMEMSET (dst, 0, dst_offset, 0, pre_count, main_step, value) + + // Send operation to bus + permutation_proves(DMA_BUS_ID, [pre_post_op, + dst64 * 8, + src64 * 8, + dst_offset, + src_offset, + count, + main_step, + fill_byte, + memcmp_result_nz, + ...memcmp_result + ], sel:); + + + // @[selectors_mask] (8 bits) + // DMA_PRE_POST_ENABLED_SECOND_READ_F = P2_8 @[enabled_second_read] (1 bit) + // DMA_PRE_POST_DST_OFFSET_GT_SRC_F = P2_9 @[dst_offset_gt_src_offset] (1 bit) + // DMA_PRE_POST_SR_VALUE_F = P2_10 @[sr_value] (0-7) (3 bits) + // DMA_PRE_POST_MEMCMP_RESULT_IS_NEG_F = P2_13 @[memcmp_result_is_negative] (1 bit) + // DMA_PRE_POST_MEMCMP_RESULT_NZ = P2_14 @[memcmp_result_nz] (1 bit) + + lookup_assumes(DMA_PRE_POST_TABLE_ID, [rom_flags, dst_offset, src_offset, count, ...diff_factor], sel: sel); + + // TBO: For single memcmp operation, we only need two reads, and after generate two values to compare, less + // columns. +} \ No newline at end of file diff --git a/precompiles/dma/pil/dma_pre_post_table.pil b/precompiles/dma/pil/dma_pre_post_table.pil new file mode 100644 index 000000000..acf703554 --- /dev/null +++ b/precompiles/dma/pil/dma_pre_post_table.pil @@ -0,0 +1,152 @@ +require "std_lookup.pil" + +const int DMA_PRE_POST_TABLE_SIZE = 288 * 4; + +const int DMA_PRE_POST_ENABLED_SECOND_READ_F = P2_8; +const int DMA_PRE_POST_DST_OFFSET_GT_SRC_F = P2_9; +const int DMA_PRE_POST_SR_VALUE_F = P2_10; +const int DMA_PRE_POST_MEMCMP_RESULT_IS_NEG_F = P2_13; +const int DMA_PRE_POST_MEMCMP_RESULT_NZ_F = P2_14; +const int DMA_PRE_POST_IS_POST = P2_15; +const int DMA_PRE_POST_LOAD_SRC = P2_16; + +airtemplate DmaPrePostTable(int N = DMA_PRE_POST_TABLE_SIZE) { + col fixed FLAGS; + col fixed DST_OFFSET; + col fixed SRC_OFFSET; + col fixed COUNT; + col fixed L_FACTOR; + col fixed H_FACTOR; + + int index = 0; + + // OUT @[selectors_mask] (8 bits) + // DMA_PRE_POST_ENABLED_SECOND_READ_F = P2_8 OUT @[enabled_second_read] (1 bit) + // DMA_PRE_POST_DST_OFFSET_GT_SRC_F = P2_9 OUT @[dst_offset_gt_src_offset] (1 bit) + // DMA_PRE_POST_SR_VALUE_F = P2_10 OUT @[sr_value] (0-7) (3 bits) + // DMA_PRE_POST_MEMCMP_RESULT_IS_NEG_F = P2_13 IN @[memcmp_result_is_negative] (1 bit) + // DMA_PRE_POST_MEMCMP_RESULT_NZ_F = P2_14 IN @[memcmp_result_nz] (1 bit) + // DMA_PRE_POST_IS_POST = P2_15 IN @[is_post] (1 bit) + // POST OPERATION objective execute the last incomplete "write": + // dst_offset = 0 + // count ∈ [1,7] + + int table_offsets[64]; + + const int flags_memcmp_result_positive = DMA_PRE_POST_MEMCMP_RESULT_NZ_F; + const int flags_memcmp_result_negative = DMA_PRE_POST_MEMCMP_RESULT_NZ_F + DMA_PRE_POST_MEMCMP_RESULT_IS_NEG_F; + + const int l_factors[8] = [ 1, P2_8, P2_16, P2_24, 0, 0, 0, 0]; + const int h_factors[8] = [ 0, 0, 0, 0, 1, P2_8, P2_16, P2_24]; + + for (int src_offset = 0; src_offset < 8; ++src_offset) { + table_offsets[src_offset] = index; + // For the edge case of memcmp when dst_offset = 0 and count is a multiple of 8 that need + // to be validate for DmaPrePost, at least if last byte is the first different. + for (int count = 1; count < 9; ++ count) { + for (int k = 0; k < 4; ++k) { + const int lindex = index + k; + SRC_OFFSET[lindex] = src_offset; + DST_OFFSET[lindex] = 0; + COUNT[lindex] = count; + } + const int selectors = (0xFF << (8 - count)) & 0xFF; + assert(selectors != 0); + assert(selectors <= 0xFF); + + const int enabled_second_read = (src_offset + count) > 8 ? 1:0; + // selr_value = src_offset + // dst_offset_gt_src_offset = 0 + expr flags_load_src = selectors + DMA_PRE_POST_ENABLED_SECOND_READ_F * enabled_second_read + + DMA_PRE_POST_LOAD_SRC + + DMA_PRE_POST_SR_VALUE_F * src_offset + + DMA_PRE_POST_IS_POST; // dst_offset = 0 ==> post operation + + expr flags_no_load_src = selectors + DMA_PRE_POST_SR_VALUE_F * src_offset + + DMA_PRE_POST_IS_POST; // dst_offset = 0 ==> post operation + + FLAGS[index] = flags_load_src; + FLAGS[index + 1] = flags_load_src + flags_memcmp_result_positive; + FLAGS[index + 2] = flags_load_src + flags_memcmp_result_negative; + FLAGS[index + 3] = flags_no_load_src; + + const int factor_index = count - 1; + L_FACTOR[index] = 0; + L_FACTOR[index + 1] = l_factors[factor_index]; + L_FACTOR[index + 2] = -l_factors[factor_index]; + L_FACTOR[index + 3] = 0; + + H_FACTOR[index] = 0; + H_FACTOR[index + 1] = h_factors[factor_index]; + H_FACTOR[index + 2] = -h_factors[factor_index]; + H_FACTOR[index + 3] = 0; + index += 4; + } + } + + // PRE OPERATION objective execute the "first" incomplete "write" + // dst_offset > 0 + // count = 8 - dst_offset + + for (int dst_offset = 1; dst_offset < 8; ++dst_offset) { + const int mask = 0xFF >> dst_offset; + for (int src_offset = 0; src_offset < 8; ++src_offset) { + table_offsets[dst_offset * 8 + src_offset] = index; + for (int count = 1; count < (9 - dst_offset); ++ count) { + for (int k = 0; k < 4; ++k) { + const int lindex = index + k; + SRC_OFFSET[lindex] = src_offset; + DST_OFFSET[lindex] = dst_offset; + COUNT[lindex] = count; + } + + const int selectors = mask & (0xFF << (8 - (dst_offset + count))); + assert(selectors != 0); + assert(selectors <= 0xFF); + + const int enabled_second_read = (src_offset + count) > 8 ? 1:0; + const int dst_offset_gt_src_offset = dst_offset > src_offset ? 1:0; + const int selr_value = dst_offset > src_offset ? dst_offset - src_offset: src_offset - dst_offset; + // selr_value = src_offset + // dst_offset_gt_src_offset = 0 + + expr flags_load_src = selectors + DMA_PRE_POST_ENABLED_SECOND_READ_F * enabled_second_read + + DMA_PRE_POST_DST_OFFSET_GT_SRC_F * dst_offset_gt_src_offset + + DMA_PRE_POST_LOAD_SRC + + DMA_PRE_POST_SR_VALUE_F * selr_value; + + expr flags_no_load_src = selectors + DMA_PRE_POST_DST_OFFSET_GT_SRC_F * dst_offset_gt_src_offset + + DMA_PRE_POST_SR_VALUE_F * selr_value; + + FLAGS[index] = flags_load_src; + FLAGS[index + 1] = flags_load_src + flags_memcmp_result_positive; + FLAGS[index + 2] = flags_load_src + flags_memcmp_result_negative; + FLAGS[index + 3] = flags_no_load_src; + + const int factor_index = dst_offset + count - 1; + L_FACTOR[index] = 0; + L_FACTOR[index + 1] = l_factors[factor_index]; + L_FACTOR[index + 2] = -l_factors[factor_index]; + L_FACTOR[index + 3] = 0; + + H_FACTOR[index] = 0; + H_FACTOR[index + 1] = h_factors[factor_index]; + H_FACTOR[index + 2] = -h_factors[factor_index]; + H_FACTOR[index + 3] = 0; + + index += 4; + } + } + } + println(`TABLE_OFFSETS[${length(table_offsets)}] = [` ,table_offsets, "]"); + println(`DMA_PRE_POST_TABLE_SIZE=${DMA_PRE_POST_TABLE_SIZE}`); + assert_eq(index, DMA_PRE_POST_TABLE_SIZE); + + col witness multiplicity; + lookup_proves(DMA_PRE_POST_TABLE_ID, expressions: [FLAGS, + DST_OFFSET, + SRC_OFFSET, + COUNT, + L_FACTOR, + H_FACTOR], mul: multiplicity ); +} \ No newline at end of file diff --git a/precompiles/dma/pil/dma_rom.pil b/precompiles/dma/pil/dma_rom.pil new file mode 100644 index 000000000..4abba74e3 --- /dev/null +++ b/precompiles/dma/pil/dma_rom.pil @@ -0,0 +1,77 @@ +require "std_lookup.pil" + +// 3+3+9+1 = 16 bits = 2^16 TABLE +const int DMA_ROM_TABLE_SIZE = 8 * 8 * P2_9 * 3; + +const int DMA_ROM_USE_PRE_F = 1; +const int DMA_ROM_LOOP_F = 2; +const int DMA_ROM_USE_POST_F = 4; +const int DMA_ROM_SRC64_INC_BY_PRE_F = 8; +const int DMA_ROM_COUNT_LT_256_F = 16; +const int DMA_ROM_NEQ_F = 32; +const int DMA_ROM_USE_SRC_F = 64; + +airtemplate DmaRom(int N = DMA_ROM_TABLE_SIZE) { + + col fixed DST_OFFSET; + col fixed SRC_OFFSET; + col fixed L_COUNT; + col fixed FLAGS; + col fixed PRE_COUNT; + col fixed SRC_OFFSET_AFTER_PRE; + col fixed L_COUNT64; + + int i = 0; + for (int icase = 0; icase < 3; ++icase) { + const int use_src = icase < 2; + const int neq = icase == 1 ? 1 : 0; + for (int dst_offset = 0; dst_offset < 8; ++dst_offset) { + for (int src_offset = 0; src_offset < 8; ++src_offset) { + for (int l_count = 0; l_count < P2_9; ++l_count) { + DST_OFFSET[i] = dst_offset; + SRC_OFFSET[i] = src_offset; + + if (l_count == 0) { + L_COUNT[i] = 0; + FLAGS[i] = 16 + neq * 32 + use_src * DMA_ROM_USE_SRC_F; + PRE_COUNT[i] = 0; + SRC_OFFSET_AFTER_PRE[i] = src_offset; + L_COUNT64[i] = 0; + } else { + const int use_pre = dst_offset > 0; + const int pre_count = use_pre ? (((8 - dst_offset) < l_count) ? (8 - dst_offset) : l_count) : 0; + int post_count = (l_count - pre_count) % 8; + int loop_count = (l_count - pre_count - post_count) / 8; + if (neq && post_count == 0 && loop_count > 0) { + loop_count -= 1; + post_count = 8; + } + const int use_post = post_count > 0; + const int use_loop = loop_count > 0; + const int src64_inc_by_pre = (use_pre && (src_offset + pre_count) >= 8) ? 1 : 0; + const int count_lt_256 = l_count < 256 ? 1 : 0; + + L_COUNT[i] = l_count; + FLAGS[i] = use_pre * DMA_ROM_USE_PRE_F + + use_loop * DMA_ROM_LOOP_F + + use_post * DMA_ROM_USE_POST_F + + src64_inc_by_pre * DMA_ROM_SRC64_INC_BY_PRE_F + + count_lt_256 * DMA_ROM_COUNT_LT_256_F + + neq * DMA_ROM_NEQ_F + + use_src * DMA_ROM_USE_SRC_F; + + + PRE_COUNT[i] = pre_count; + SRC_OFFSET_AFTER_PRE[i] = use_src ? (src_offset + pre_count) % 8 : 0; + L_COUNT64[i] = loop_count; + } + i += 1; + } + } + } + } + println(`${i} rows`); + + col witness multiplicity; + lookup_proves(DMA_ROM_ID, [DST_OFFSET, SRC_OFFSET, L_COUNT, FLAGS, PRE_COUNT, SRC_OFFSET_AFTER_PRE, L_COUNT64], multiplicity); +} \ No newline at end of file diff --git a/precompiles/dma/pil/dma_unaligned.pil b/precompiles/dma/pil/dma_unaligned.pil new file mode 100644 index 000000000..c5829148a --- /dev/null +++ b/precompiles/dma/pil/dma_unaligned.pil @@ -0,0 +1,398 @@ + +// DmaUnaligned +// +// Basically this machine performs a 64-bit read and breaks it down into bytes, +// taking those it needs from the current row or the next row using the row offset +// for this purpose. In the last row, it commits to some bytes that it adds in the +// continuations so that in the first row of the next instance it verifies that it +// matches the read data. +// +// Writes must be aligned, since more than one write can be done at the same instant +// of time. This means that count + 1 rows are needed to prove this unaligned operation. +// +// ROW i ┊ i+1 ... i+count-1 ┊ i+count +// ┊ ┊ +// bytes 0..7 ┊ 8..15 ┊ +// ┌──────────────┐ ┊ ┌──────────────┐ ┊ +// ┊ ┊ +// READ ┌─┬─┬─┬─┐┌─┬─┬─┬─┐ ┊ ┌─┬─┬─┬─┐┌─┬─┬─┬─┐ ┊ ┌─┬─┬─┬─┐┌─┬─┬─┬─┐ +// └─┴─┴─┴─┘└─┴─┴─┴─┘ ┊ └─┴─┴─┴─┘└─┴─┴─┴─┘ ┊ └─┴─┴─┴─┘└─┴─┴─┴─┘ +// ┌───┘ │ │ │ │ │ ┊ │ │ │ ┌────┊──┘ │ +// │ ┌───┘ │ │ │ │ ┊ │ │ │ │ ┌──┊────┘ +// │ │ ┌────┘ │ │ │ ┊ │ │ │ │ │ ┊ +// │ │ │ ┌────┘ │ │ ┊ │ │ │ ...... │ │ ┊ +// │ │ │ │ ┌───┘ │ ┊ │ │ │ │ │ ┊ +// │ │ │ │ │ ┌───┘ ┊ │ │ │ │ │ ┊ +// │ │ │ │ │ │ ┌────┊──┘ │ │ ...... │ │ ┊ +// │ │ │ │ │ │ │ ┌──┊────┘ │ │ │ ┊ +// │ │ │ │ │ │ │ │ ┊ │ │ │ ┊ +// │ │ │ │ │ │ │ │ ┊ ┌───┘ │ │ ┊ +// WRITE ┌─┬─┬─┬─┐┌─┬─┬─┬─┐ ┊ ┌─┬─┬─┬─┐┌─┬─┬─┬─┐ ┊ In the last row there +// └─┴─┴─┴─┘└─┴─┴─┴─┘ ┊ └─┴─┴─┴─┘└─┴─┴─┴─┘ ┊ aren't writes +// ┊ ┊ +// └──────────────┘ ┊ ┊ +// bytes 16..23 ┊ ┊ +// +// NOTE: Data from several programs has been analyzed to see if a special version +// for 32-bit displacements would make sense because then it wouldn't be necessary +// to decompose into bytes since the chunks would be taken directly. It has been seen +// that these operations are not the most frequent. + +airtemplate DmaUnaligned(int N = 2**21, // Rows of instance + const int RC = 2, // Number of chunks for native value + const expr enable_flag = 1, // Expression to active the beggining of + // continuations, used for executions that + // not generates instances. + // + const int enable = E_DMA_ALL, // Enable operations + int cont_subid = 0) { // Force subid used in continuations + + const int enable_memcpy = (enable & E_DMA_MEMCPY) ? 1 : 0; + const int enable_memcmp = (enable & E_DMA_MEMCMP) ? 1 : 0; + assert((enable_memcmp + enable_memcpy) >= 1); + + const int enable_full = enable_memcpy && enable_memcmp; + + // continuation control, used to create a new continuation subid to avoid collision + // between DmaUnaligned continuations. If you instance diferent airs with same template + // need diferenciate them. If parameter cont_subid = 0 means that the subid is auto-generated + + container proof.dma_unaligned { + int cont_subids_count = 0; + int cont_subids[64]; + } + use proof.dma_unaligned; + + if (cont_subid == 0) { + cont_subid = cont_subids_count + 1; + for (int i = 0; i < cont_subids_count; ++i) { + if (cont_subids[i] >= cont_subid) { + cont_subid = cont_subids[i] + 1; + } + } + } else { + for (int i = 0; i < cont_subids_count; ++i) { + assert(cont_subids[i] != cont_subid, `duplicated cont_subid ${cont_subid}`); + } + } + cont_subids[cont_subids_count] = cont_subid; + cont_subids_count += 1; + + const expr L1 = get_L1(); + const expr LAST = L1'; + + airval segment_id; // Id of current segment + airval segment_previous_seq_end; // Last value of @[seq_end] in previous segment. + airval segment_previous_src64; // Last value of @[src64] in previous segment. + airval segment_previous_dst64; // Last value of @[dst64] in previous segment. + airval segment_previous_main_step; // Last value of @[main_step] in previous segment. + airval segment_previous_offset; // Last value of @[offset] in previous segment. + airval segment_previous_count; // Last value of @[count] in previous segment. + + + + airval segment_first_bytes[8]; // bytes of next block + + airval segment_last_seq_end; // Last value of @[seq_end] in current segment. + airval segment_last_src64; // Last value of @[src64] in current segment. + airval segment_last_dst64; // Last value of @[dst64] in current segment. + airval segment_last_main_step; // Last value of @[main_step] in current segment. + airval segment_last_offset; // Last value of @[offset] in current segment. + airval segment_last_count; // Last value of @[count] in current segment. + + airval segment_next_bytes[8]; // bytes of next block + + airval is_last_segment; // 1 if this is the last segment, 0 otherwise. + + if (enable_full) { + airval air.segment_previous_is_memeq; // Last value of @[is_memeq] in previous segment. + airval air.segment_last_is_memeq; // Last value of @[is_memeq] in previous segment. + + segment_previous_is_memeq * (1 - segment_previous_is_memeq) === 0; + segment_last_is_memeq * (1 - segment_last_is_memeq) === 0; + } else { + const int air.segment_previous_is_memeq = 0; + const int air.segment_last_is_memeq = 0; + } + + segment_previous_seq_end * (1 - segment_previous_seq_end) === 0; + segment_last_seq_end * (1 - segment_last_seq_end) === 0; + + is_last_segment * (1 - is_last_segment) === 0; + + // if it's last segment must be the end of sequence or padding that use end of sequence + is_last_segment * (1 - segment_last_seq_end) === 0; + + col witness bits(MAIN_STEP_BITS) main_step; + col witness bits(ADDR_W_BITS) src64; + col witness bits(ADDR_W_BITS) dst64; + col witness bits(32) count; // number of words + + col witness bits(1) seq_end; + seq_end * (1 - seq_end) === 0; + + col witness bits(1) previous_seq_end; + previous_seq_end <== L1 * (segment_previous_seq_end - 'seq_end) + 'seq_end; + + LAST * (seq_end - segment_last_seq_end) === 0; + LAST * (1 - seq_end) * (src64 - segment_last_src64) === 0; + LAST * (1 - seq_end) * (dst64 - segment_last_dst64) === 0; + LAST * (1 - seq_end) * (main_step - segment_last_main_step) === 0; + LAST * (1 - seq_end) * (count - segment_last_count) === 0; + + if (enable_full) { + col witness bits(1) air.is_memeq; // Last value of @[is_memeq] in previous segment. + is_memeq * (1 - is_memeq) === 0; + LAST * (1 - seq_end) * (is_memeq - segment_last_is_memeq) === 0; + } + + col witness bits(1) offset_7; + col witness bits(1) offset_6; + col witness bits(1) offset_5; + col witness bits(1) offset_4; + col witness bits(1) offset_3; + col witness bits(1) offset_2; + + // To save one witness column, define offset_1 using other columns, because no offset_0 supported + // offset = 0 means aligned memcpy. + // CONSIDERATION: support offset_0 to prove also aligned memcpy. + const expr offset_7to2 = offset_7 + offset_6 + offset_5 + offset_4 + offset_3 + offset_2; + const expr offset_1 = (1 - offset_7to2); + + offset_7to2 * (offset_7to2 - 1) === 0; + const expr offset = offset_1 + offset_2 * 2 + offset_3 * 3 + offset_4 * 4 + offset_5 * 5 + + offset_6 * 6 + offset_7 * 7; + + offset_7 * (1 - offset_7) === 0; + offset_6 * (1 - offset_6) === 0; + offset_5 * (1 - offset_5) === 0; + offset_4 * (1 - offset_4) === 0; + offset_3 * (1 - offset_3) === 0; + offset_2 * (1 - offset_2) === 0; + + const expr previous_offset = L1 * (segment_previous_offset - 'offset) + 'offset; + LAST * (1 - seq_end) * (offset - segment_last_offset) === 0; + + col witness bits(8) read_bytes[8]; + + range_dual_byte(read_bytes[1], read_bytes[0]); + range_dual_byte(read_bytes[3], read_bytes[2]); + range_dual_byte(read_bytes[5], read_bytes[4]); + range_dual_byte(read_bytes[7], read_bytes[6]); + + const expr read_value[2]; + + read_value[0] = read_bytes[0] + P2_8 * read_bytes[1] + P2_16 * read_bytes[2] + P2_24 * read_bytes[3]; + read_value[1] = read_bytes[4] + P2_8 * read_bytes[5] + P2_16 * read_bytes[6] + P2_24 * read_bytes[7]; + + // byte_0, byte_1, byte_2, byte_3, byte_4, byte_5, byte_6, byte_7, byte_0', byte_1' ... + + const expr next_bytes[8]; + + // when an instance last row has seq_end = 1, means that next_bytes don't need to match + // with first read_bytes, because it's new dma operation, in this situation set value + // of segment_next_bytes to 0. + + col witness bits(1) no_last_no_seq_end; + no_last_no_seq_end <== (1 - LAST) * (1 - seq_end); + + for (int i = 0; i < 8; ++i) { + // if LAST next_bytes are bytes sent to bus, if no LAST but is a seq_end, no sense + // use bytes of the next input, in this case next_bytes = 0, otherwise next_bytes are + // the next read (src64 + 1) * 8 + next_bytes[i] = LAST * segment_next_bytes[i] + no_last_no_seq_end * read_bytes[i]'; + + // if last row of previous instance aren't the the last row of an input, segment_first_bytes + // must match with bytes "received" from bus. Otherwise, zeros was sent to bus and them not + // need to match with read_bytes because are bytes of other input. + (1 - segment_previous_seq_end) * L1 * (segment_first_bytes[i] - read_bytes[i]) === 0; + + // force segment_next_bytes to zero, when it's finish of input + segment_last_seq_end * segment_next_bytes[i] === 0; + + // to force the "received" values from the bus are 0 when the last row of the previous segment + // is final of input (seq_end). + segment_previous_seq_end * L1 * segment_first_bytes[i] === 0; + } + + col witness bits(32) write_value[2]; + + write_value[0] <== offset_1 * (read_bytes[1] + P2_8 * read_bytes[2] + P2_16 * read_bytes[3] + P2_24 * read_bytes[4]) + + offset_2 * (read_bytes[2] + P2_8 * read_bytes[3] + P2_16 * read_bytes[4] + P2_24 * read_bytes[5]) + + offset_3 * (read_bytes[3] + P2_8 * read_bytes[4] + P2_16 * read_bytes[5] + P2_24 * read_bytes[6]) + + offset_4 * (read_bytes[4] + P2_8 * read_bytes[5] + P2_16 * read_bytes[6] + P2_24 * read_bytes[7]) + + offset_5 * (read_bytes[5] + P2_8 * read_bytes[6] + P2_16 * read_bytes[7] + P2_24 * next_bytes[0]) + + offset_6 * (read_bytes[6] + P2_8 * read_bytes[7] + P2_16 * next_bytes[0] + P2_24 * next_bytes[1]) + + offset_7 * (read_bytes[7] + P2_8 * next_bytes[0] + P2_16 * next_bytes[1] + P2_24 * next_bytes[2]); + + write_value[1] <== offset_1 * (read_bytes[5] + P2_8 * read_bytes[6] + P2_16 * read_bytes[7] + P2_24 * next_bytes[0]) + + offset_2 * (read_bytes[6] + P2_8 * read_bytes[7] + P2_16 * next_bytes[0] + P2_24 * next_bytes[1]) + + offset_3 * (read_bytes[7] + P2_8 * next_bytes[0] + P2_16 * next_bytes[1] + P2_24 * next_bytes[2]) + + offset_4 * (next_bytes[0] + P2_8 * next_bytes[1] + P2_16 * next_bytes[2] + P2_24 * next_bytes[3]) + + offset_5 * (next_bytes[1] + P2_8 * next_bytes[2] + P2_16 * next_bytes[3] + P2_24 * next_bytes[4]) + + offset_6 * (next_bytes[2] + P2_8 * next_bytes[3] + P2_16 * next_bytes[4] + P2_24 * next_bytes[5]) + + offset_7 * (next_bytes[3] + P2_8 * next_bytes[4] + P2_16 * next_bytes[5] + P2_24 * next_bytes[6]); + + // At begining of sequence + // DMA BUS [op, dst64, src64, dst_offset, src_offset, bytes, main_step] + + const expr op; + const expr padding_op; + const expr is_write; + + if (enable_full) { + op = is_memeq * (OP_DMA_XMEMEQ - OP_DMA_XMEMCPY) + OP_DMA_XMEMCPY; + padding_op = OP_DMA_XMEMCPY; + is_write = (1 - is_memeq); + } else if (enable_memcpy) { + op = OP_DMA_XMEMCPY; + padding_op = OP_DMA_XMEMCPY; + is_write = 1; + } else { + op = OP_DMA_XMEMEQ; + padding_op = OP_DMA_XMEMEQ; + is_write = 0; + } + + // DMA_UNALIGNED op_bus.prove <== + // -------------------------------------------------------------------------------- + // OP_DMA_XMEMCPY (dst, 0, src, 0, 0, 0, 0, main_step, count, src_offset) + // OP_DMA_XMEMEQ (dst, 0, src, 0, 0, 0, 0, main_step, count, src_offset) + + proves_operation(op:, + a: [dst64 * 8, 0], + b: [src64 * 8, 0], + c: [dst64 * 8, 0], + main_step:, + extended_arg: count * 8, + extra_args: [offset], + mul: previous_seq_end); + + // memory access + precompiled_mem_load( + sel: 1, + main_step: , + addr: src64 * 8, + value: read_value + ); + + precompiled_mem_op( + is_write:, + sel: (1 - seq_end), + main_step:, + addr: dst64 * 8, + value: write_value + ); + + + // PADDING + // + // cancel operation sent to bus src=0, dst=0, offset=1, count=0, main_step=0 + // precompiled_mem_load_padding demostrate read addr=0 width=8 main_step=0 value=0 + // in padding rows seq_end is active, means no precompiled_mem_store + + airval padding_size; + + precompiled_mem_load_padding(padding: padding_size); + assumes_padding_operation(op: padding_op, extra_args: [1], padding_size:); + + const expr continue_seq_on_l1 = L1 * (1 - segment_previous_seq_end); + const expr continue_seq_on_no_l1 = (1 - L1) * (1 - 'seq_end); + + const expr new_seq_on_l1 = L1 * segment_previous_seq_end; + const expr new_seq_on_no_l1 = (1 - L1) * 'seq_end; + + // TRANSITIONS: + // + // After first element of sequence + // count = 'count - 1 + // src64 = 'src64 + 1 + // dst64 = 'dst64 + 1 + + continue_seq_on_l1 * (count - (segment_previous_count - 1)) === 0; + continue_seq_on_no_l1 * (count - ('count - 1)) === 0; + + continue_seq_on_l1 * (src64 - (segment_previous_src64 + 1)) === 0; + continue_seq_on_no_l1 * (src64 - ('src64 + 1)) === 0; + + continue_seq_on_l1 * (dst64 - (segment_previous_dst64 + 1)) === 0; + continue_seq_on_no_l1 * (dst64 - ('dst64 + 1)) === 0; + + // LATCHS: + // + // offset = 'offset + // is_memeq = 'is_memeq + // main_step = 'main_step + continue_seq_on_l1 * (offset - segment_previous_offset) === 0; + continue_seq_on_no_l1 * (offset - 'offset) === 0; + + if (enable_full) { + continue_seq_on_l1 * (is_memeq - segment_previous_is_memeq) === 0; + continue_seq_on_no_l1 * (is_memeq - 'is_memeq) === 0; + } + continue_seq_on_l1 * (main_step - segment_previous_main_step) === 0; + continue_seq_on_no_l1 * (main_step - 'main_step) === 0; + + // At end of sequence + // count must be 0 at end of sequence + count * seq_end === 0; + + // SECURITY: control count no negative + // + // if the seq_end it isn't active when count = 0, in each rows continues decreasing 8 units, + // means in 2^22 rows * 2^3 = 2^25. It's secure, because if any row lies, at end of instance + // this constraint fails. + + airval last_count_chunk[2]; + range_check(expression: last_count_chunk[0], min: 0, max: 2**16-1); + range_check(expression: last_count_chunk[1], min: 0, max: 2**16-1); + last_count_chunk[0] + last_count_chunk[1] * P2_16 === segment_last_count; + + // CONTINUATIONS + + // AIR_ID, segment_id, seq_end, src64, dst64, count, main_step + + direct_global_update_proves(DMA_UNALIGNED_CONT_ID, + [0, // initial segment_id + 0, // security flag no_final @[] indicate that not final + cont_subid, // continuation subid + 1, // initial @[seq_end] + 0, // initial @[src64] + 0, // initial @[dst64] + 0, // initial @[offset] + 0, // initial @[count] + 0, // initial @[main_step] + 0, // initial @[is_memeq] + 0,0,0,0,0,0,0,0], // first bytes + sel: enable_flag); + + + direct_update_assumes(DMA_UNALIGNED_CONT_ID, + [segment_id, // current segment_id + 0, // security flag to indicate that not final + cont_subid, // continuation subid @[cont_subid] + segment_previous_seq_end, // previous @[seq_end] + segment_previous_src64, // previous @[src64] + segment_previous_dst64, // previous @[dst64] + segment_previous_offset, // previous @[offset] + segment_previous_count, // previous @[count] + segment_previous_main_step,// previous @[main_step] + segment_previous_is_memeq, // previous @[is_memeq] + ...segment_first_bytes // first bytes read from memory + ]); + + direct_update_proves(DMA_UNALIGNED_CONT_ID, + [segment_id + 1, // next segment_id + is_last_segment, // security flag to indicate that not final + cont_subid, // continuation subid @[cont_subid] + segment_last_seq_end, // last @[seq_end] + segment_last_src64, // last @[src64] + segment_last_dst64, // last @[dst64] + segment_last_offset, // last @[offset] + segment_last_count, // last @[count] + segment_last_main_step, // last @[main_step] + segment_last_is_memeq, // last @[is_memeq] + ...segment_next_bytes // next bytes read from memory + ], sel: 1 - is_last_segment); + +} \ No newline at end of file diff --git a/precompiles/dma/pil/dual_range.pil b/precompiles/dma/pil/dual_range.pil new file mode 100644 index 000000000..c8dd82f55 --- /dev/null +++ b/precompiles/dma/pil/dual_range.pil @@ -0,0 +1,17 @@ +require "std_lookup.pil" + +// Template to verify two small ranges of values in the same lookup (typically used for bytes). +// The lookup table size is limited by (range1_size * range2_size). + +airtemplate DualRange(int N = 0, const int id, const int min1, const int max1, const int min2, const int max2 ) { + // Number of elements in B + const int COUNT_B = (max2 - min2 + 1); + N = (max1 - min1 + 1) * COUNT_B; + + // Repeat each A element COUNT_B times so every A value pairs with every B value + col fixed A = [min1:COUNT_B..max1:COUNT_B]...; + col fixed B = [min2..max2]...; + + col witness multiplicity; + lookup_proves(id, mul: multiplicity, expressions: [A, B]); +} \ No newline at end of file diff --git a/precompiles/dma/pil/tools.pil b/precompiles/dma/pil/tools.pil new file mode 100644 index 000000000..57c6325ff --- /dev/null +++ b/precompiles/dma/pil/tools.pil @@ -0,0 +1,13 @@ + +function get_continuation_id(const int base_cont_id, const int max_cont): int { + container proof.continuation.`${AIRTEMPLATE}` alias continuation { + int id = base_cont_id; + } + const int id = continuation.id; + assert(id < base_cont_id); + continuation.id += continuation.id; + return id; +} + +// const int continuation_id = get_new_continuation_id(MEM_CPY_BYTE_CONT_ID, 1); + diff --git a/precompiles/dma/src/dma/dma.rs b/precompiles/dma/src/dma/dma.rs new file mode 100644 index 000000000..b4e804e32 --- /dev/null +++ b/precompiles/dma/src/dma/dma.rs @@ -0,0 +1,362 @@ +use std::sync::Arc; + +use fields::PrimeField64; +use rayon::prelude::*; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_core::zisk_ops::ZiskOp; +use zisk_pil::{DMA_ROM_ID, DUAL_RANGE_7_BITS_ID}; + +use crate::{dma::dma_rom::DmaRom, dma_trace, DmaInput, DmaModule, DMA_ROM_WITH_MEMCMP_SIZE}; +use precompiles_helpers::DmaInfo; + +#[cfg(feature = "packed")] +pub use zisk_pil::{DmaTracePacked, DmaTraceRowPacked}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{DmaTrace, DmaTraceRow}; + +#[cfg(feature = "packed")] +type DmaTraceRowType = DmaTraceRowPacked; +#[cfg(feature = "packed")] +type DmaTraceType = DmaTracePacked; + +#[cfg(not(feature = "packed"))] +type DmaTraceRowType = DmaTraceRow; +#[cfg(not(feature = "packed"))] +type DmaTraceType = DmaTrace; + +/// The `DmaSM` struct encapsulates the logic of the Dma State Machine. +pub struct DmaSM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + pub rom_table_id: usize, + pub dual_range_7_bits_id: usize, + pub range_22_bits_id: usize, + pub range_24_bits_id: usize, + pub range_16_bits_id: usize, +} + +impl DmaSM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `DmaSM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + rom_table_id: std.get_virtual_table_id(DMA_ROM_ID).expect("Failed to get dma rom ID"), + dual_range_7_bits_id: std + .get_virtual_table_id(DUAL_RANGE_7_BITS_ID) + .expect("Failed to get dual 7-bits table ID"), + range_22_bits_id: std + .get_range_id(0, 0x3F_FFFF, None) + .expect("Failed to get 22b table ID"), + range_24_bits_id: std + .get_range_id(0, 0xFF_FFFF, None) + .expect("Failed to get 24b table ID"), + range_16_bits_id: std + .get_range_id(0, 0xFFFF, None) + .expect("Failed to get 16b table ID"), + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[allow(clippy::too_many_arguments)] + #[inline(always)] + pub fn process_slice( + &self, + input: &DmaInput, + // row_offset: usize, + trace: &mut DmaTraceRowType, + local_dual_7_bits_multiplicities: &mut [u64], + local_22_bits_values: &mut Vec, + local_24_bits_values: &mut Vec, + local_24_bits_low_values: &mut [u32], + local_16_bits_multiplicities: &mut [u32], + local_rom_multiplicities: &mut [u64], + ) { + let count = DmaInfo::get_count(input.encoded); + let count_lt_256 = count < 256; + let count_ge_256 = 1 - count_lt_256 as usize; + let h_count = ((count >> 8) - count_ge_256) as u32; + trace.set_count_lt_256(count_lt_256); + trace.set_h_count(h_count); + let l_count = (count & 0xFF) as u16 + 256 * count_ge_256 as u16; + trace.set_l_count(l_count); + + // to increase performance because the 99.99% of count is < 64K => h_count < 256 + if h_count < 256 { + local_24_bits_low_values[h_count as usize] += 1; + } else { + local_24_bits_values.push(h_count); + } + + let h_src64 = input.src >> 10; + let h_dst64 = input.dst >> 10; + let l_src64 = (input.src >> 3) as u8 & 0x7F; + let l_dst64 = (input.dst >> 3) as u8 & 0x7F; + + trace.set_h_src64(h_src64); + trace.set_l_src64(l_src64); + let src_offset = input.src as u8 & 0x07; + trace.set_src_offset(src_offset); + + trace.set_h_dst64(h_dst64); + trace.set_l_dst64(l_dst64); + trace.set_dst_offset(input.dst as u8 & 0x07); + + local_22_bits_values.push(h_src64); + local_22_bits_values.push(h_dst64); + let dual_7_bits_row = ((l_src64 as usize) << 7) | l_dst64 as usize; + local_dual_7_bits_multiplicities[dual_7_bits_row] += 1; + + trace.set_main_step(input.step); + + let pre_count = DmaInfo::get_pre_count(input.encoded) as u8; + let loop_count = DmaInfo::get_loop_count(input.encoded); + let post_count = DmaInfo::get_post_count(input.encoded); + trace.set_use_pre(pre_count > 0); + trace.set_use_loop(loop_count > 0); + trace.set_use_post(post_count > 0); + + trace.set_src64_inc_by_pre(DmaInfo::get_src64_inc_by_pre(input.encoded) > 0); + + trace.set_pre_count(pre_count); + trace.set_l_count64((l_count - pre_count as u16 - post_count as u16) >> 3); + + let use_src = input.op != ZiskOp::DMA_INPUTCPY && input.op != ZiskOp::DMA_XMEMSET; + if use_src { + trace.set_src_offset_after_pre((src_offset + pre_count) % 8); + } + let mut result_nz = false; + match input.op { + ZiskOp::DMA_MEMCPY => trace.set_sel_memcpy(true), + ZiskOp::DMA_XMEMCPY => { + trace.set_sel_memcpy(true); + trace.set_sel_extended(true); + } + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => { + trace.set_sel_memcmp(true); + trace.set_sel_extended(input.op == ZiskOp::DMA_XMEMCMP); + let pre_result_nz = DmaInfo::get_memcmp_pre_result_nz(input.encoded); + let post_result_nz = DmaInfo::get_memcmp_post_result_nz(input.encoded); + trace.set_pre_result_nz(pre_result_nz); + trace.set_post_result_nz(post_result_nz); + let count_diff = input.count_bus - count as u32; + + // INVALID ASSERT BECAUSE count_diff == 0 and diffent, case last byte is + // different. + // assert!( + // (count_diff == 0 && (pre_result_nz as u32 + post_result_nz as u32) == 0) + // || (count_diff != 0 && (pre_result_nz as u32 + post_result_nz as u32) == 1), + // "Invalid memcmp result for count_diff {count_diff}: ({}-{count}) \p + // pre_result_nz={pre_result_nz}, post_result_nz={post_result_nz} {}", + // input.count_bus, + // DmaInfo::to_string(input.encoded) + // ); + + let diff_chunk = count_diff as u16; + trace.set_count_diff_chunks(0, diff_chunk); + local_16_bits_multiplicities[diff_chunk as usize] += 1; + + let diff_chunk = (count_diff >> 16) as u16; + trace.set_count_diff_chunks(1, diff_chunk); + local_16_bits_multiplicities[diff_chunk as usize] += 1; + if pre_result_nz { + let result = DmaInfo::get_memcmp_res_as_u64(input.encoded); + trace.set_bus_pre_result(0, result as u32); + trace.set_bus_pre_result(1, (result >> 32) as u32); + result_nz = true; + } + if post_result_nz { + let result = DmaInfo::get_memcmp_res_as_u64(input.encoded); + trace.set_bus_post_result(0, result as u32); + trace.set_bus_post_result(1, (result >> 32) as u32); + result_nz = true; + } + } + ZiskOp::DMA_INPUTCPY => trace.set_sel_inputcpy(true), + ZiskOp::DMA_XMEMSET => { + trace.set_sel_memset(true); + trace.set_sel_extended(true); + trace.set_fill_byte(DmaInfo::get_fill_byte(input.encoded)); + // println!("XMEMSET fill_byte: 0x{:02X}", DmaInfo::get_fill_byte(input.encoded)); + } + _ => panic!("Invalid DMA operation {}", input.op), + } + + let rom_index = + DmaRom::get_row(input.dst & 0x07, input.src & 0x07, count, result_nz, use_src); + local_rom_multiplicities[rom_index] += 1; + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut DmaTraceRowType) { + // trace was initialized with zeroes + trace.set_count_lt_256(true); + } +} +impl DmaModule for DmaSM { + fn get_name(&self) -> &'static str { + "dma" + } + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = DmaTraceType::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs.iter().map(|c| c.len()).sum(); + assert!(total_inputs <= num_rows); + + dma_trace("Dma", total_inputs, num_rows); + + timer_start_trace!(DMA_TRACE); + + // Split the dma_trace.buffer into slices matching each inner vector’s length. + let flat_inputs: Vec<_> = inputs.iter().flatten().collect(); + let trace_rows = trace.buffer.as_mut_slice(); + + // Calculate optimal chunk size + let num_threads = rayon::current_num_threads(); + let chunk_size = std::cmp::max(1, flat_inputs.len() / num_threads); + + // TODO: add new interface with u32 to std to be used with global_rom_multiplicities + // Split the add256_trace.buffer into slices matching each inner vector’s length. + let ( + global_dual_7_bits_multiplicities, + global_22_bits_values, + global_24_bits_values, + global_24_bits_low_values, + global_16_bits_multiplicities, + global_rom_multiplicities, + ) = flat_inputs + .par_chunks(chunk_size) + .zip(trace_rows.par_chunks_mut(chunk_size)) + // .enumerate() + // .map(|(chunk_idx, (input_chunk, trace_chunk))| { + .map(|(input_chunk, trace_chunk)| { + // Local array shared by this chunk + let mut local_dual_7_bits_multiplicities = vec![0u64; 1 << 14]; + let mut local_22_bits_values = Vec::::with_capacity(inputs.len() * 2); + let mut local_24_bits_values = Vec::::new(); + let mut local_24_bits_low_values = vec![0u32; 256]; + let mut local_16_bits_multiplicities = vec![0u32; 1 << 16]; + let mut local_rom_multiplicities = vec![0u64; DMA_ROM_WITH_MEMCMP_SIZE]; + + // let chunk_offset = chunk_idx * chunk_size; + // Sum all local arrays into a global one + // for (local_idx, (input, trace_row)) in + // input_chunk.iter().zip(trace_chunk.iter_mut()).enumerate() + for (input, trace_row) in input_chunk.iter().zip(trace_chunk.iter_mut()) { + // let row_offset = chunk_offset + local_idx; + self.process_slice( + input, + //row_offset, + trace_row, + &mut local_dual_7_bits_multiplicities, + &mut local_22_bits_values, + &mut local_24_bits_values, + &mut local_24_bits_low_values, + &mut local_16_bits_multiplicities, + &mut local_rom_multiplicities, + ); + } + ( + local_dual_7_bits_multiplicities, + local_22_bits_values, + local_24_bits_values, + local_24_bits_low_values, + local_16_bits_multiplicities, + local_rom_multiplicities, + ) + }) + .reduce( + // Identity: create empty accumulators + || { + ( + vec![0u64; 1 << 14], + Vec::new(), + Vec::new(), + vec![0u32; 256], + vec![0u32; 1 << 16], + vec![0u64; DMA_ROM_WITH_MEMCMP_SIZE], + ) + }, + // Combine two results + |mut acc, local| { + // Merge multiplicities (element-wise addition) + for (i, &val) in local.0.iter().enumerate() { + acc.0[i] += val; + } + // Concatenate value vectors + acc.1.extend(local.1); + acc.2.extend(local.2); + // Merge low values (element-wise addition) + for (i, &val) in local.3.iter().enumerate() { + acc.3[i] += val; + } + for (i, &val) in local.4.iter().enumerate() { + acc.4[i] += val; + } + for (i, &val) in local.5.iter().enumerate() { + acc.5[i] += val; + } + acc + }, + ); + + // for i in [ + // 78643, 78832, 78833, 78834, 82529, 82530, 82531, 85171, 85172, 85173, 87342, 87343, + // 87344, 103310, 103470, 103471, 103472, 105228, 105229, 105230, 105444, 53605, 86086, + // ] { + // println!("TRACE[{i}]={:?}", trace_rows[i]); + // } + self.std + .inc_virtual_rows_ranged(self.dual_range_7_bits_id, &global_dual_7_bits_multiplicities); + self.std.range_checks(self.range_24_bits_id, global_24_bits_low_values); + self.std.inc_virtual_rows_ranged(self.rom_table_id, &global_rom_multiplicities); + self.std.range_checks(self.range_16_bits_id, global_16_bits_multiplicities); + + for value in global_22_bits_values { + self.std.range_check(self.range_22_bits_id, value as i64, 1); + } + for value in global_24_bits_values { + self.std.range_check(self.range_24_bits_id, value as i64, 1); + } + + if total_inputs < num_rows { + self.process_empty_slice(&mut trace_rows[total_inputs]); + let empty_row = trace_rows[total_inputs]; + trace_rows[total_inputs + 1..].par_iter_mut().for_each(|row| { + *row = empty_row; + }); + } + timer_stop_and_log_trace!(DMA_TRACE); + let from_trace = FromTrace::new(&mut trace); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma/dma_collector.rs b/precompiles/dma/src/dma/dma_collector.rs new file mode 100644 index 000000000..7cece1b88 --- /dev/null +++ b/precompiles/dma/src/dma/dma_collector.rs @@ -0,0 +1,131 @@ +//! The `DmaCollector` module defines a collector to gather all inputs for an instance +//! of the DMA State Machine. + +use std::any::Any; + +use precompiles_helpers::DmaInfo; +use zisk_common::{BusDevice, BusId, ChunkId, DMA_ENCODED, OP, OPERATION_BUS_ID, OP_TYPE}; +use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; + +use crate::{DmaCollectCounters, DmaCollectorRoutingLog, DmaInput}; + +pub struct DmaCollector { + /// The chunk identifier being collected (used for tracing/debugging). + pub chunk_id: ChunkId, + + /// Collected inputs for witness computation. + pub inputs: Vec, + + /// Routing log for debugging and tracking collection operations. + pub rlog: DmaCollectorRoutingLog, + + /// The number of operations to collect. + pub num_operations: u64, + + /// Counters to determine which operations to collect based on the plan's configuration. + pub collect_counters: DmaCollectCounters, +} + +impl DmaCollector { + /// Creates a new `DmaCollector`. + /// + /// # Arguments + /// + /// * `chunk_id` - The chunk identifier for this collector instance. + /// * `num_operations` - The number of operations to collect. + /// * `collect_counters` - Counters to determine which operations to collect based on the plan's configuration. + /// + /// # Returns + /// A new `DmaCollector` instance initialized with the provided parameters. + pub fn new( + chunk_id: ChunkId, + num_operations: u64, + collect_counters: DmaCollectCounters, + ) -> Self { + Self { + chunk_id, + inputs: Vec::with_capacity(num_operations as usize), + num_operations, + collect_counters, + rlog: DmaCollectorRoutingLog::new(chunk_id), + } + } + + /// Processes data received on the bus, collecting the inputs necessary for witness computation. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus (validated to be OPERATION_BUS_ID). + /// * `data` - The main data array received from the bus containing operation information. + /// * `data_ext` - Extended data array containing additional operation-specific information. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64], data_ext: &[u64]) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] != ZiskOperationType::Dma as u64 { + return true; + } + + if self.inputs.len() == self.num_operations as usize { + debug_assert!(self.collect_counters.is_final_skip()); + return self.rlog.log_discard_cond(false, 1, data, false); + } + + let encoded = data[DMA_ENCODED]; + let op = data[OP] as u8; + if DmaInfo::is_direct(encoded) { + if op == ZiskOp::DMA_MEMCMP || op == ZiskOp::DMA_XMEMCMP { + // We need to collect all memcmp/memcpy operations for the pre/post processing. + panic!("Direct memcmp/memcpy operations are not supported"); + } + self.rlog.log_discard(2, data); + return true; + } + + if self.collect_counters.should_collect_single_row(op) { + self.rlog.log_collect(1, data, 0, 0); + self.inputs.push(if op == ZiskOp::DMA_XMEMSET { + DmaInput::from_memset(encoded, op, data, data_ext) + } else { + DmaInput::from(encoded, op, data, data_ext) + }); + if self.inputs.len() >= self.num_operations as usize { + debug_assert!(self.collect_counters.is_final_skip()); + self.rlog.log_discard(4, data); + return true; + } + } else { + self.rlog.log_discard(3, data); + } + + true + } + /// Returns debug information about the collector's state. + /// + /// When the `save_dma_collectors` feature is enabled, this returns detailed information + /// including chunk ID, number of collected inputs, counter information, and routing log. + /// Otherwise, returns an empty string. + /// + /// # Returns + /// A formatted string with debug information. + pub fn get_debug_info(&self) -> String { + #[cfg(feature = "save_dma_collectors")] + return format!( + "CC|{}|{}|{}\n", + self.chunk_id, + self.inputs.len(), + self.collect_counters.get_debug_info(), + ) + &self.rlog.get_debug_info(); + #[cfg(not(feature = "save_dma_collectors"))] + String::new() + } +} + +impl BusDevice for DmaCollector { + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/dma/src/dma/dma_input.rs b/precompiles/dma/src/dma/dma_input.rs new file mode 100644 index 000000000..ce7af899a --- /dev/null +++ b/precompiles/dma/src/dma/dma_input.rs @@ -0,0 +1,79 @@ +use precompiles_helpers::DmaInfo; +use zisk_common::{A, B, DMA_MEMCMP_COUNT_BUS, STEP}; +use zisk_core::zisk_ops::ZiskOp; + +#[derive(Debug)] +pub struct DmaInput { + pub src: u32, + pub dst: u32, + pub op: u8, + pub encoded: u64, + pub count_bus: u32, + pub step: u64, // main step +} + +impl DmaInput { + pub fn from(encoded: u64, op: u8, data: &[u64], _data_ext: &[u64]) -> Self { + Self { + dst: data[A] as u32, + src: data[B] as u32, + step: data[STEP], + encoded, + op, + count_bus: if op == ZiskOp::DMA_MEMCMP || op == ZiskOp::DMA_XMEMCMP { + data[DMA_MEMCMP_COUNT_BUS] as u32 + } else { + 0 + }, + } + } + pub fn from_memset(encoded: u64, op: u8, data: &[u64], _data_ext: &[u64]) -> Self { + Self { + dst: data[A] as u32, + // src: (data[A] & 0x7) as u32, + src: 0, + step: data[STEP], + encoded, + op, + count_bus: DmaInfo::get_count(encoded) as u32, + } + } + + #[cfg(feature = "save_dma_inputs")] + /// Writes a list of DmaInput instances to a text file with columns separated by |. + /// Path is taken from DEBUG_OUTPUT_PATH environment variable, defaulting to "tmp/". + pub fn dump_to_file(inputs: &[Vec], filename: &str) -> std::io::Result<()> { + use std::io::Write; + let path = std::env::var("DEBUG_OUTPUT_PATH").unwrap_or_else(|_| "tmp/".to_string()); + let full_path = format!("{}{}", path, filename); + + let mut file = std::fs::File::create(&full_path)?; + + // Write header + writeln!( + file, + "{:>8}|{:>10}|{:>10}|{:>2}|{:>18}|{:>8}|{:>14}|{}|{}|{:>10}", + "pos", "src", "dst", "op", "encoded", "count_bus", "step", "pre", "post", "loop" + )?; + + // Write data rows + for (pos, input) in inputs.iter().flatten().enumerate() { + writeln!( + file, + "{:>8}|0x{:08X}|0x{:08X}|{:02X}|0x{:016X}|{:>8}|{:>14}|{}|{}|{:>10}", + pos, + input.src, + input.dst, + input.op, + input.encoded, + input.count_bus, + input.step, + DmaInfo::get_pre_count(input.encoded), + DmaInfo::get_post_count(input.encoded), + DmaInfo::get_loop_count(input.encoded), + )?; + } + + Ok(()) + } +} diff --git a/precompiles/dma/src/dma/dma_inputcpy.rs b/precompiles/dma/src/dma/dma_inputcpy.rs new file mode 100644 index 000000000..559ee5bea --- /dev/null +++ b/precompiles/dma/src/dma/dma_inputcpy.rs @@ -0,0 +1,254 @@ +use std::sync::Arc; + +use fields::PrimeField64; +use rayon::prelude::*; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_pil::DMA_ROM_ID; + +use crate::{dma::dma_rom::DmaRom, dma_trace, DmaInput, DmaModule, DMA_ROM_WITHOUT_MEMCMP_SIZE}; +use precompiles_helpers::DmaInfo; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + DmaInputCpyTracePacked as DmaInputCpyTrace, DmaInputCpyTraceRowPacked as DmaInputCpyTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{DmaInputCpyTrace, DmaInputCpyTraceRow}; + +/// The `DmaInputCpySM` struct encapsulates the logic of the Dma State Machine. +pub struct DmaInputCpySM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + pub rom_table_id: usize, + pub range_7_bits_id: usize, + pub range_22_bits_id: usize, + pub range_24_bits_id: usize, +} + +impl DmaInputCpySM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `DmaInputCpySM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + rom_table_id: std.get_virtual_table_id(DMA_ROM_ID).expect("Failed to get dma rom ID"), + range_7_bits_id: std + .get_range_id(0, 0x07F, None) + .expect("Failed to get 7-bits range ID"), + range_22_bits_id: std + .get_range_id(0, 0x3F_FFFF, None) + .expect("Failed to get 22b range ID"), + range_24_bits_id: std + .get_range_id(0, 0xFF_FFFF, None) + .expect("Failed to get 24b range ID"), + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[allow(clippy::too_many_arguments)] + #[inline(always)] + pub fn process_slice( + &self, + input: &DmaInput, + trace: &mut DmaInputCpyTraceRow, + local_7_bits_multiplicities: &mut [u32], + local_22_bits_values: &mut Vec, + local_24_bits_values: &mut Vec, + local_24_bits_low_values: &mut [u32], + local_rom_multiplicities: &mut [u64], + ) { + let count = DmaInfo::get_count(input.encoded); + let count_lt_256 = count < 256; + let count_ge_256 = 1 - count_lt_256 as usize; + let h_count = ((count >> 8) - count_ge_256) as u32; + trace.set_count_lt_256(count_lt_256); + trace.set_h_count(h_count); + let l_count = (count & 0xFF) as u16 + 256 * count_ge_256 as u16; + trace.set_l_count(l_count); + + // to increase performance because the 99.99% of count is < 64K => h_count < 256 + if h_count < 256 { + local_24_bits_low_values[h_count as usize] += 1; + } else { + local_24_bits_values.push(h_count); + } + + let h_dst64 = input.dst >> 10; + let l_dst64 = (input.dst >> 3) as u8 & 0x7F; + + trace.set_h_dst64(h_dst64); + trace.set_l_dst64(l_dst64); + trace.set_dst_offset(input.dst as u8 & 0x07); + + local_22_bits_values.push(h_dst64); + local_7_bits_multiplicities[l_dst64 as usize] += 1; + + let rom_index = DmaRom::get_row(input.dst & 0x07, input.src & 0x07, count, false, false); + + local_rom_multiplicities[rom_index] += 1; + + trace.set_main_step(input.step); + + let pre_count = DmaInfo::get_pre_count(input.encoded) as u8; + let loop_count = DmaInfo::get_loop_count(input.encoded); + let post_count = DmaInfo::get_post_count(input.encoded); + trace.set_use_pre(pre_count > 0); + trace.set_use_loop(loop_count > 0); + trace.set_use_post(post_count > 0); + + trace.set_pre_count(pre_count); + trace.set_l_count64((l_count - pre_count as u16 - post_count as u16) >> 3); + + trace.set_sel_inputcpy(true); + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the DmaInputCpy trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut DmaInputCpyTraceRow) { + trace.set_count_lt_256(true); + } +} + +impl DmaModule for DmaInputCpySM { + fn get_name(&self) -> &'static str { + "dma_inputcpy" + } + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = DmaInputCpyTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs.iter().map(|c| c.len()).sum(); + assert!(total_inputs <= num_rows); + + dma_trace("DmaInputCpy", total_inputs, num_rows); + + timer_start_trace!(DMA_TRACE); + + // Split the dma_trace.buffer into slices matching each inner vector’s length. + let flat_inputs: Vec<_> = inputs.iter().flatten().collect(); + let trace_rows = trace.buffer.as_mut_slice(); + + // Calculate optimal chunk size + let num_threads = rayon::current_num_threads(); + let chunk_size = std::cmp::max(1, flat_inputs.len() / num_threads); + + // TODO: add new interface with u32 to std to be used with global_rom_multiplicities + // Split the add256_trace.buffer into slices matching each inner vector’s length. + let ( + global_7_bits_multiplicities, + global_22_bits_values, + global_24_bits_values, + global_24_bits_low_values, + global_rom_multiplicities, + ) = flat_inputs + .par_chunks(chunk_size) + .zip(trace_rows.par_chunks_mut(chunk_size)) + .map(|(input_chunk, trace_chunk)| { + // Local array shared by this chunk + let mut local_7_bits_multiplicities = vec![0u32; 1 << 14]; + let mut local_22_bits_values = Vec::::with_capacity(inputs.len() * 2); + let mut local_24_bits_values = Vec::::new(); + let mut local_24_bits_low_values = vec![0u32; 256]; + let mut local_rom_multiplicities = vec![0u64; DMA_ROM_WITHOUT_MEMCMP_SIZE]; + // Sum all local arrays into a global one + for (input, trace_row) in input_chunk.iter().zip(trace_chunk.iter_mut()) { + self.process_slice( + input, + trace_row, + &mut local_7_bits_multiplicities, + &mut local_22_bits_values, + &mut local_24_bits_values, + &mut local_24_bits_low_values, + &mut local_rom_multiplicities, + ); + } + ( + local_7_bits_multiplicities, + local_22_bits_values, + local_24_bits_values, + local_24_bits_low_values, + local_rom_multiplicities, + ) + }) + .reduce( + // Identity: create empty accumulators + || { + ( + vec![0u32; 1 << 14], + Vec::new(), + Vec::new(), + vec![0u32; 256], + vec![0u64; DMA_ROM_WITHOUT_MEMCMP_SIZE], + ) + }, + // Combine two results + |mut acc, local| { + // Merge multiplicities (element-wise addition) + for (i, &val) in local.0.iter().enumerate() { + acc.0[i] += val; + } + // Concatenate value vectors + acc.1.extend(local.1); + acc.2.extend(local.2); + // Merge low values (element-wise addition) + for (i, &val) in local.3.iter().enumerate() { + acc.3[i] += val; + } + for (i, &val) in local.4.iter().enumerate() { + acc.4[i] += val; + } + acc + }, + ); + + self.std.range_checks(self.range_7_bits_id, global_7_bits_multiplicities); + self.std.range_checks(self.range_24_bits_id, global_24_bits_low_values); + self.std.inc_virtual_rows_ranged(self.rom_table_id, &global_rom_multiplicities); + + for value in global_22_bits_values { + self.std.range_check(self.range_22_bits_id, value as i64, 1); + } + for value in global_24_bits_values { + self.std.range_check(self.range_24_bits_id, value as i64, 1); + } + + if total_inputs < num_rows { + self.process_empty_slice(&mut trace_rows[total_inputs]); + let empty_row = trace_rows[total_inputs]; + trace_rows[total_inputs + 1..].par_iter_mut().for_each(|row| { + *row = empty_row; + }); + } + + timer_stop_and_log_trace!(DMA_TRACE); + let from_trace = FromTrace::new(&mut trace); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma/dma_instance.rs b/precompiles/dma/src/dma/dma_instance.rs new file mode 100644 index 000000000..b6cc8b5b2 --- /dev/null +++ b/precompiles/dma/src/dma/dma_instance.rs @@ -0,0 +1,151 @@ +//! The `DmaInstance` module defines an instance to perform the witness computation +//! for the Dma State Machine. +//! +//! It manages collected inputs and interacts with the `DmaSM` to compute witnesses for +//! execution plans. + +use crate::dma::dma_collector::DmaCollector; +#[cfg(feature = "save_dma_collectors")] +use crate::save_dma_collectors; +#[cfg(feature = "save_dma_inputs")] +use crate::DmaInput; +use crate::{DmaCheckPoint, DmaModule}; +use fields::PrimeField64; +use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; +use std::sync::Arc; +use zisk_common::{ + BusDevice, CheckPoint, ChunkId, Instance, InstanceCtx, InstanceType, PayloadType, StatsType, +}; +use zisk_pil::{DmaInputCpyTrace, DmaMemCpyTrace, DmaTrace}; + +/// The `DmaInstance` struct represents an instance for the Dma State Machine. +/// +/// It encapsulates the `DmaSM` and its associated context, and it processes input data +/// to compute witnesses for the Dma State Machine. +pub struct DmaInstance { + /// Dma state machine. + module: Arc>, + + /// Instance context. + ictx: InstanceCtx, +} + +impl DmaInstance { + /// Creates a new `DmaInstance`. + /// + /// # Arguments + /// * `dma_sm` - An `Arc`-wrapped reference to the Dma State Machine. + /// * `ictx` - The `InstanceCtx` associated with this instance, containing the execution plan. + /// * `bus_id` - The bus ID associated with this instance. + /// + /// # Returns + /// A new `DmaInstance` instance initialized with the provided state machine and + /// context. + pub fn new(module: Arc>, ictx: InstanceCtx) -> Self { + Self { module, ictx } + } + + pub fn build_dma_collector(&self, chunk_id: ChunkId) -> DmaCollector { + debug_assert!( + [DmaTrace::::AIR_ID, DmaMemCpyTrace::::AIR_ID, DmaInputCpyTrace::::AIR_ID,] + .contains(&self.ictx.plan.air_id), + "DmaInstance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::().unwrap(); + let (num_ops, collect_counters) = collect_info.chunks[&chunk_id]; + DmaCollector::new(chunk_id, num_ops, collect_counters) + } +} + +impl Instance for DmaInstance { + /// Computes the witness for the Dma execution plan. + /// + /// This method leverages the `DmaSM` to generate an `AirInstance` using the collected + /// inputs. + /// + /// # Arguments + /// * `_pctx` - The proof context, unused in this implementation. + /// + /// # Returns + /// An `Option` containing the computed `AirInstance`. + fn compute_witness( + &self, + _pctx: &ProofCtx, + _sctx: &SetupCtx, + collectors: Vec<(usize, Box>)>, + trace_buffer: Vec, + ) -> ProofmanResult>> { + #[cfg(feature = "save_dma_collectors")] + let (debug, inputs): (Vec<_>, Vec<_>) = collectors + .into_iter() + .map(|(_, collector)| { + let collector = collector.as_any().downcast::().unwrap(); + (collector.get_debug_info(), collector.inputs) + }) + .unzip(); + #[cfg(not(feature = "save_dma_collectors"))] + let inputs: Vec<_> = collectors + .into_iter() + .map(|(_, collector)| collector.as_any().downcast::().unwrap().inputs) + .collect(); + + #[cfg(any(feature = "save_dma_collectors", feature = "save_dma_inputs"))] + let air_instance_id = + _pctx.dctx_find_air_instance_id(self.ictx.plan.global_id.unwrap()).unwrap(); + + #[cfg(feature = "save_dma_collectors")] + save_dma_collectors( + &format!("{}_collector_{air_instance_id:04}.txt", self.module.get_name()), + debug, + )?; + + #[cfg(feature = "save_dma_inputs")] + DmaInput::dump_to_file( + &inputs, + &format!("{}_inputs_{air_instance_id:04}.txt", self.module.get_name()), + )?; + + Ok(Some(self.module.compute_witness(&inputs, trace_buffer)?)) + } + + /// Retrieves the checkpoint associated with this instance. + /// + /// # Returns + /// A `CheckPoint` object representing the checkpoint of the execution plan. + fn check_point(&self) -> &CheckPoint { + &self.ictx.plan.check_point + } + + /// Retrieves the type of this instance. + /// + /// # Returns + /// An `InstanceType` representing the type of this instance (`InstanceType::Instance`). + fn instance_type(&self) -> InstanceType { + InstanceType::Instance + } + + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { + assert_eq!( + self.ictx.plan.air_id, + DmaTrace::::AIR_ID, + "DmaInstance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::().unwrap(); + let (num_ops, collect_counter) = collect_info.chunks[&chunk_id]; + Some(Box::new(DmaCollector::new(chunk_id, num_ops, collect_counter))) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} diff --git a/precompiles/dma/src/dma/dma_memcpy.rs b/precompiles/dma/src/dma/dma_memcpy.rs new file mode 100644 index 000000000..de3a145c0 --- /dev/null +++ b/precompiles/dma/src/dma/dma_memcpy.rs @@ -0,0 +1,286 @@ +use std::sync::Arc; + +use fields::PrimeField64; +use rayon::prelude::*; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_core::zisk_ops::ZiskOp; +use zisk_pil::{DMA_ROM_ID, DUAL_RANGE_7_BITS_ID}; + +use crate::{dma::dma_rom::DmaRom, dma_trace, DmaInput, DmaModule, DMA_ROM_WITHOUT_MEMCMP_SIZE}; +use precompiles_helpers::DmaInfo; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + DmaMemCpyTracePacked as DmaMemCpyTrace, DmaMemCpyTraceRowPacked as DmaMemCpyTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{DmaMemCpyTrace, DmaMemCpyTraceRow}; + +/// The `DmaSM` struct encapsulates the logic of the Dma State Machine. +pub struct DmaMemCpySM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + pub rom_table_id: usize, + pub dual_range_7_bits_id: usize, + pub range_22_bits_id: usize, + pub range_24_bits_id: usize, +} + +impl DmaMemCpySM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `DmaSM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + rom_table_id: std.get_virtual_table_id(DMA_ROM_ID).expect("Failed to get dma rom ID"), + dual_range_7_bits_id: std + .get_virtual_table_id(DUAL_RANGE_7_BITS_ID) + .expect("Failed to get dual 7-bits table ID"), + range_22_bits_id: std + .get_range_id(0, 0x3F_FFFF, None) + .expect("Failed to get 22b table ID"), + range_24_bits_id: std + .get_range_id(0, 0xFF_FFFF, None) + .expect("Failed to get 24b table ID"), + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[allow(clippy::too_many_arguments)] + #[inline(always)] + pub fn process_slice( + &self, + input: &DmaInput, + trace: &mut DmaMemCpyTraceRow, + local_dual_7_bits_multiplicities: &mut [u64], + local_22_bits_values: &mut Vec, + local_24_bits_values: &mut Vec, + local_24_bits_low_values: &mut [u32], + local_rom_multiplicities: &mut [u64], + ) { + let count = DmaInfo::get_count(input.encoded); + let count_lt_256 = count < 256; + let count_ge_256 = 1 - count_lt_256 as usize; + let h_count = ((count >> 8) - count_ge_256) as u32; + trace.set_count_lt_256(count_lt_256); + trace.set_h_count(h_count); + let l_count = (count & 0xFF) as u16 + 256 * count_ge_256 as u16; + trace.set_l_count(l_count); + + // to increase performance because the 99.99% of count is < 64K => h_count < 256 + if h_count < 256 { + local_24_bits_low_values[h_count as usize] += 1; + } else { + local_24_bits_values.push(h_count); + } + + let h_src64 = input.src >> 10; + let h_dst64 = input.dst >> 10; + let l_src64 = (input.src >> 3) as u8 & 0x7F; + let l_dst64 = (input.dst >> 3) as u8 & 0x7F; + + trace.set_h_src64(h_src64); + trace.set_l_src64(l_src64); + let src_offset = input.src as u8 & 0x07; + trace.set_src_offset(src_offset); + + trace.set_h_dst64(h_dst64); + trace.set_l_dst64(l_dst64); + trace.set_dst_offset(input.dst as u8 & 0x07); + + local_22_bits_values.push(h_src64); + local_22_bits_values.push(h_dst64); + let dual_7_bits_row = ((l_src64 as usize) << 7) | l_dst64 as usize; + local_dual_7_bits_multiplicities[dual_7_bits_row] += 1; + // println!( + // "local_dual_7_bits_multiplicities[{dual_7_bits_row} ({l_src64}|{l_dst64})] = {}", + // local_dual_7_bits_multiplicities[dual_7_bits_row] + // ); + + let rom_index = DmaRom::get_row(input.dst & 0x07, input.src & 0x07, count, false, true); + // println!( + // "\x1B[1;35m[DmaMemCpy] ROM index: {rom_index} [dst_offset:{}, src_offset:{} count:{count}] E:0x{:016X} => {rom_index} \ + // OP:0x{:02X} S:{}\x1B[0m", + // input.dst & 0x07, + // input.src & 0x07, + // input.encoded, + // input.op, + // input.step + // ); + + local_rom_multiplicities[rom_index] += 1; + + trace.set_main_step(input.step); + + let pre_count = DmaInfo::get_pre_count(input.encoded) as u8; + let loop_count = DmaInfo::get_loop_count(input.encoded); + let post_count = DmaInfo::get_post_count(input.encoded); + trace.set_use_pre(pre_count > 0); + trace.set_use_loop(loop_count > 0); + trace.set_use_post(post_count > 0); + + trace.set_src64_inc_by_pre(DmaInfo::get_src64_inc_by_pre(input.encoded) > 0); + + trace.set_pre_count(pre_count); + trace.set_l_count64((l_count - pre_count as u16 - post_count as u16) >> 3); + trace.set_src_offset_after_pre((src_offset + pre_count) % 8); + + trace.set_sel_memcpy(true); + trace.set_sel_extended(input.op == ZiskOp::DMA_XMEMCPY); + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut DmaMemCpyTraceRow) { + trace.set_count_lt_256(true); + } +} + +impl DmaModule for DmaMemCpySM { + fn get_name(&self) -> &'static str { + "dma_memcpy" + } + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = DmaMemCpyTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs.iter().map(|c| c.len()).sum(); + assert!(total_inputs <= num_rows); + + dma_trace("DmaMemCpy", total_inputs, num_rows); + + timer_start_trace!(DMA_TRACE); + + // Split the dma_trace.buffer into slices matching each inner vector’s length. + let flat_inputs: Vec<_> = inputs.iter().flatten().collect(); + let trace_rows = trace.buffer.as_mut_slice(); + + // Calculate optimal chunk size + let num_threads = rayon::current_num_threads(); + let chunk_size = std::cmp::max(1, flat_inputs.len() / num_threads); + + // TODO: add new interface with u32 to std to be used with global_rom_multiplicities + // Split the add256_trace.buffer into slices matching each inner vector’s length. + let ( + global_dual_7_bits_multiplicities, + global_22_bits_values, + global_24_bits_values, + global_24_bits_low_values, + global_rom_multiplicities, + ) = flat_inputs + .par_chunks(chunk_size) + .zip(trace_rows.par_chunks_mut(chunk_size)) + .map(|(input_chunk, trace_chunk)| { + // Local array shared by this chunk + let mut local_dual_7_bits_multiplicities = vec![0u64; 1 << 14]; + let mut local_22_bits_values = Vec::::with_capacity(inputs.len() * 2); + let mut local_24_bits_values = Vec::::new(); + let mut local_24_bits_low_values = vec![0u32; 256]; + let mut local_rom_multiplicities = vec![0u64; DMA_ROM_WITHOUT_MEMCMP_SIZE]; + // Sum all local arrays into a global one + for (input, trace_row) in input_chunk.iter().zip(trace_chunk.iter_mut()) { + self.process_slice( + input, + trace_row, + &mut local_dual_7_bits_multiplicities, + &mut local_22_bits_values, + &mut local_24_bits_values, + &mut local_24_bits_low_values, + &mut local_rom_multiplicities, + ); + } + ( + local_dual_7_bits_multiplicities, + local_22_bits_values, + local_24_bits_values, + local_24_bits_low_values, + local_rom_multiplicities, + ) + }) + .reduce( + // Identity: create empty accumulators + || { + ( + vec![0u64; 1 << 14], + Vec::new(), + Vec::new(), + vec![0u32; 256], + vec![0u64; DMA_ROM_WITHOUT_MEMCMP_SIZE], + ) + }, + // Combine two results + |mut acc, local| { + // Merge multiplicities (element-wise addition) + for (i, &val) in local.0.iter().enumerate() { + acc.0[i] += val; + } + // Concatenate value vectors + acc.1.extend(local.1); + acc.2.extend(local.2); + // Merge low values (element-wise addition) + for (i, &val) in local.3.iter().enumerate() { + acc.3[i] += val; + } + for (i, &val) in local.4.iter().enumerate() { + acc.4[i] += val; + } + acc + }, + ); + + // for (index, value) in global_dual_7_bits_multiplicities.iter().enumerate() { + // if *value != 0 { + // println!("DUAL_7_BITS[{index}]={value}") + // } + // } + self.std + .inc_virtual_rows_ranged(self.dual_range_7_bits_id, &global_dual_7_bits_multiplicities); + self.std.range_checks(self.range_24_bits_id, global_24_bits_low_values); + self.std.inc_virtual_rows_ranged(self.rom_table_id, &global_rom_multiplicities); + + for value in global_22_bits_values { + self.std.range_check(self.range_22_bits_id, value as i64, 1); + } + for value in global_24_bits_values { + self.std.range_check(self.range_24_bits_id, value as i64, 1); + } + + if total_inputs < num_rows { + self.process_empty_slice(&mut trace_rows[total_inputs]); + let empty_row = trace_rows[total_inputs]; + trace_rows[total_inputs + 1..].par_iter_mut().for_each(|row| { + *row = empty_row; + }); + } + timer_stop_and_log_trace!(DMA_TRACE); + let from_trace = FromTrace::new(&mut trace); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma/dma_module.rs b/precompiles/dma/src/dma/dma_module.rs new file mode 100644 index 000000000..869e2b269 --- /dev/null +++ b/precompiles/dma/src/dma/dma_module.rs @@ -0,0 +1,11 @@ +use crate::DmaInput; +use proofman_common::{AirInstance, ProofmanResult}; + +pub trait DmaModule: Send + Sync { + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult>; + fn get_name(&self) -> &'static str; +} diff --git a/precompiles/dma/src/dma/dma_rom.rs b/precompiles/dma/src/dma/dma_rom.rs new file mode 100644 index 000000000..37a872c70 --- /dev/null +++ b/precompiles/dma/src/dma/dma_rom.rs @@ -0,0 +1,22 @@ +pub enum DmaRom {} + +impl DmaRom { + #[allow(dead_code)] + pub fn get_row( + dst_offset: u32, + src_offset: u32, + count: usize, + neq: bool, + use_src: bool, + ) -> usize { + assert!(dst_offset < 8, "dst_offset too big {dst_offset}"); + assert!(src_offset < 8, "src_offset too big {src_offset}"); + assert!(count < u32::MAX as usize, "count too big {count}"); + assert!(!neq || use_src); + let count = if count >= 256 { (count & 0xFF) + 256 } else { count & 0xFF }; + (dst_offset as usize * 8 + src_offset as usize) * 512 + + count + + if neq { 1 << 15 } else { 0 } + + if use_src { 0 } else { 1 << 16 } + } +} diff --git a/precompiles/dma/src/dma/mod.rs b/precompiles/dma/src/dma/mod.rs new file mode 100644 index 000000000..ca0a29662 --- /dev/null +++ b/precompiles/dma/src/dma/mod.rs @@ -0,0 +1,17 @@ +#[allow(clippy::module_inception)] +mod dma; +mod dma_collector; +mod dma_input; +mod dma_inputcpy; +mod dma_instance; +mod dma_memcpy; +mod dma_module; +mod dma_rom; + +pub use dma::*; +pub use dma_collector::*; +pub use dma_input::*; +pub use dma_inputcpy::*; +pub use dma_instance::*; +pub use dma_memcpy::*; +pub use dma_module::*; diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned.rs new file mode 100644 index 000000000..8fd0a6482 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned.rs @@ -0,0 +1,351 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_common::SegmentId; +use zisk_core::zisk_ops::ZiskOp; +use zisk_pil::{Dma64AlignedAirValues, DUAL_RANGE_BYTE_ID}; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + Dma64AlignedTracePacked as Dma64AlignedTrace, + Dma64AlignedTraceRowPacked as Dma64AlignedTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{Dma64AlignedTrace, Dma64AlignedTraceRow}; + +use crate::{ + dma_trace, Dma64AlignedInput, Dma64AlignedModule, DMA_64_ALIGNED_OPS_BY_ROW, F_SEL_INPUTCPY, + F_SEL_MEMCMP, F_SEL_MEMCPY, F_SEL_MEMSET, +}; +use precompiles_helpers::DmaInfo; + +/// The `Dma64AlignedSM` struct encapsulates the logic of the Dma64Aligned State Machine. +pub struct Dma64AlignedSM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + range_16_bits_id: usize, + range_24_bits_id: usize, + dual_range_byte_id: usize, + + op_x_rows: usize, +} + +impl Dma64AlignedSM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `Dma64AlignedSM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + range_16_bits_id: std + .get_range_id(0, 0xFFFF, None) + .expect("Failed to get 16b table ID"), + range_24_bits_id: std + .get_range_id(0, (1 << 24) - 1, None) + .expect("Failed to get 24b table ID"), + dual_range_byte_id: std + .get_virtual_table_id(DUAL_RANGE_BYTE_ID) + .expect("Failed to get tabl eDUAL_RANGE_BYTE ID ID"), + op_x_rows: DMA_64_ALIGNED_OPS_BY_ROW, + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_input( + &self, + input: &Dma64AlignedInput, + trace: &mut [Dma64AlignedTraceRow], + dual_byte_range_check_values: &mut Vec, + range_check_24b_values: &mut Vec, + range_check_non_used_ops: &mut u64, + air_values: &mut Dma64AlignedAirValues, + ) -> usize { + let rows = input.rows as usize; + let is_last_instance_input = rows >= trace.len(); + let skip_count = input.skip_rows as usize * self.op_x_rows; + let initial_count = DmaInfo::get_loop_count(input.encoded) - skip_count; + let mut count64 = initial_count; + // println!( + // "DMA_64_ALIGNED INPUT {input:?} count:{count64} rows:{rows} dma_info:{}", + // DmaInfo::to_string(input.encoded) + // ); + let mut src_values_index = 0; + let mut dst64 = ((input.dst + 7) >> 3) + skip_count as u32; + let mut src64 = ((input.src + 7) >> 3) + skip_count as u32; + let mut seq_end = false; + let addr_incr_by_row = self.op_x_rows as u32; + + let is_memcpy = input.op == ZiskOp::DMA_XMEMCPY || input.op == ZiskOp::DMA_MEMCPY; + let is_memeq = input.op == ZiskOp::DMA_MEMCMP || input.op == ZiskOp::DMA_XMEMCMP; + let is_memset = input.op == ZiskOp::DMA_XMEMSET; + let is_inputcpy = input.op == ZiskOp::DMA_INPUTCPY; + let fill_byte = if is_memset { DmaInfo::get_fill_byte(input.encoded) } else { 0 }; + for (irow, row) in trace.iter_mut().enumerate().take(rows) { + row.set_main_step(input.step); + + row.set_sel_memcpy(is_memcpy); + row.set_sel_memeq(is_memeq); + row.set_sel_memset(is_memset); + row.set_sel_inputcpy(is_inputcpy); + if irow == 0 && input.skip_rows == 0 { + row.set_sel_memcpy_count_load(input.op == ZiskOp::DMA_MEMCPY); + } + row.set_previous_seq_end(irow == 0 && input.skip_rows == 0); + + // calculate the first aligned address + // if dst is aligned is same address if not it's addr + 8 + row.set_dst64(dst64); + row.set_src64(src64); + dst64 += addr_incr_by_row; + src64 += addr_incr_by_row; + + row.set_count64(count64 as u32); + let use_count = if count64 <= self.op_x_rows { + seq_end = true; + count64 + } else { + count64 -= self.op_x_rows; + self.op_x_rows + }; + row.set_seq_end(seq_end); + if !is_memset { + for index in 0..use_count { + if index > 0 { + row.set_sel_op_from_1(index - 1, true); + } + let value = input.src_values[src_values_index]; + src_values_index += 1; + let h0 = ((value >> 8) & 0xFFFFFF) as u32; + let h1 = ((value >> 40) & 0xFFFFFF) as u32; + let l0 = value as u8; + let l1 = (value >> 32) as u8; + row.set_h_value_chunks(index, 0, h0); + row.set_h_value_chunks(index, 1, h1); + row.set_l_value_chunks(index, 0, l0); + row.set_l_value_chunks(index, 1, l1); + if is_inputcpy { + dual_byte_range_check_values.push(l0 as u16 + ((l1 as u16) << 8)); + range_check_24b_values.push(h0); + range_check_24b_values.push(h1); + } + } + if is_inputcpy && use_count < self.op_x_rows { + *range_check_non_used_ops += (self.op_x_rows - use_count) as u64; + } + } else { + let fill_bytes = fill_byte as u32 * 0x010101; + row.set_fill_byte(fill_byte); + for index in 0..use_count { + if index > 0 { + row.set_sel_op_from_1(index - 1, true); + } + row.set_h_value_chunks(index, 0, fill_bytes); + row.set_h_value_chunks(index, 1, fill_bytes); + row.set_l_value_chunks(index, 0, fill_byte); + row.set_l_value_chunks(index, 1, fill_byte); + } + } + } + + if is_last_instance_input { + if seq_end { + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + air_values.segment_last_fill_byte = F::ZERO; + } else { + air_values.segment_last_seq_end = F::ZERO; + air_values.segment_last_src64 = F::from_u32(src64 - addr_incr_by_row); + air_values.segment_last_dst64 = F::from_u32(dst64 - addr_incr_by_row); + air_values.segment_last_main_step = F::from_u64(input.step); + let last_count = initial_count - (rows - 1) * self.op_x_rows; + air_values.segment_last_count64 = F::from_u32(last_count as u32); + air_values.last_count_chunk[0] = F::from_u16(last_count as u16); + air_values.last_count_chunk[1] = F::from_u16((last_count >> 16) as u16); + air_values.segment_last_flags = F::from_u16(match input.op { + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => F_SEL_MEMCPY, + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => F_SEL_MEMCMP, + ZiskOp::DMA_INPUTCPY => F_SEL_INPUTCPY, + ZiskOp::DMA_XMEMSET => F_SEL_MEMSET, + _ => panic!("Invalid operation 0x{:02X}", input.op), + } as u16); + air_values.segment_last_fill_byte = F::from_u8(fill_byte); + } + } + rows + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut Dma64AlignedTraceRow) { + trace.set_seq_end(true); + trace.set_previous_seq_end(true); + } +} +impl Dma64AlignedModule for Dma64AlignedSM { + fn get_name(&self) -> &'static str { + "dma_64_aligned" + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + segment_id: SegmentId, + is_last_segment: bool, + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = Dma64AlignedTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs + .iter() + .map(|inputs| inputs.iter().map(|input| input.rows as usize).sum::()) + .sum(); + + assert!(total_inputs > 0); + // println!("LAST INPUT: {:?}", inputs.last().unwrap()); + // println!("DMA_64_ALIGNED TOTALS total_inputs:{total_inputs} num_rows:{num_rows}"); + assert!( + total_inputs <= num_rows, + "Too many inputs, total_inputs:{total_inputs} num_rows:{num_rows}" + ); + + dma_trace("Dma64Aligned", total_inputs, num_rows); + + timer_start_trace!(DMA_64_ALIGNED_TRACE); + + // Flatten and reorder inputs to ensure first, last are in theirs positions + let flat_inputs = crate::flatten_and_reorder_inputs(inputs); + let trace_rows = trace.buffer.as_mut_slice(); + + let mut air_values = Dma64AlignedAirValues::::new(); + let mut dual_byte_range_check_values = Vec::new(); + let mut range_check_24b_values = Vec::new(); + let mut range_check_non_used_ops = 0u64; + // TODO: inputs between instances + let mut row_offset = 0; + for input in flat_inputs.iter() { + let rows_used = self.process_input( + input, + &mut trace_rows[row_offset..], + &mut dual_byte_range_check_values, + &mut range_check_24b_values, + &mut range_check_non_used_ops, + &mut air_values, + ); + row_offset += rows_used; + } + + // padding + let padding_size = num_rows.saturating_sub(row_offset); + air_values.padding_size = F::from_u32(padding_size as u32); + + for padding_row in trace_rows.iter_mut().take(num_rows).skip(row_offset) { + self.process_empty_slice(padding_row); + } + if row_offset < num_rows { + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + air_values.segment_last_fill_byte = F::ZERO; + } + + // add range check of count to check that it's a positive 32-bits number + let last_count = air_values.segment_last_count64.as_canonical_u64(); + self.std.range_check(self.range_16_bits_id, (last_count & 0xFFFF) as i64, 1); + self.std.range_check(self.range_16_bits_id, ((last_count >> 16) & 0xFFFF) as i64, 1); + + // range check of 24 must be multiplied by 2 because there are two values, but dual range check + // it's dual, no need to multiply by 2. + self.std.inc_virtual_row(self.dual_range_byte_id, 0u64, range_check_non_used_ops); + self.std.range_check(self.range_24_bits_id, 0, range_check_non_used_ops * 2); + for value in dual_byte_range_check_values { + self.std.inc_virtual_row(self.dual_range_byte_id, value as u64, 1); + } + for value in range_check_24b_values { + self.std.range_check(self.range_24_bits_id, value as i64, 1); + } + + let segment_id = segment_id.into(); + air_values.segment_id = F::from_usize(segment_id); + air_values.is_last_segment = F::from_bool(is_last_segment); + + let first_input = flat_inputs.first().unwrap(); + if first_input.skip_rows == 0 { + air_values.segment_previous_seq_end = F::ONE; + air_values.segment_previous_dst64 = F::ZERO; + air_values.segment_previous_src64 = F::ZERO; + air_values.segment_previous_main_step = F::ZERO; + air_values.segment_previous_count64 = F::ZERO; + air_values.segment_previous_flags = F::ZERO; + air_values.segment_previous_fill_byte = F::ZERO; + } else { + assert!(segment_id > 0); + air_values.segment_previous_seq_end = F::ZERO; + air_values.segment_previous_dst64 = + F::from_u32(trace_rows[0].get_dst64() - self.op_x_rows as u32); + air_values.segment_previous_src64 = + F::from_u32(trace_rows[0].get_src64() - self.op_x_rows as u32); + air_values.segment_previous_main_step = F::from_u64(trace_rows[0].get_main_step()); + air_values.segment_previous_count64 = + F::from_u32(trace_rows[0].get_count64() + self.op_x_rows as u32); + air_values.segment_previous_flags = F::from_u16(match first_input.op { + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => F_SEL_MEMCPY, + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => F_SEL_MEMCMP, + ZiskOp::DMA_INPUTCPY => F_SEL_INPUTCPY, + ZiskOp::DMA_XMEMSET => F_SEL_MEMSET, + _ => panic!("Invalid operation 0x{:02X}", first_input.op), + } as u16); + air_values.segment_previous_fill_byte = F::from_u8(trace_rows[0].get_fill_byte()); + } + + #[cfg(feature = "debug_dma")] + { + println!("TRACE Dma64AlignedSM @{segment_id} [0] {:?}", trace[0]); + println!( + "TRACE Dma64AlignedSM @{segment_id} [{}] {:?}", + num_rows - 1, + trace[num_rows - 1] + ); + println!("TRACE Dma64AlignedSM AIR_VALUES {:?}", air_values); + } + timer_stop_and_log_trace!(DMA_64_ALIGNED_TRACE); + let from_trace = FromTrace::new(&mut trace).with_air_values(&mut air_values); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_collector.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_collector.rs new file mode 100644 index 000000000..1e929c773 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_collector.rs @@ -0,0 +1,216 @@ +//! The `Dma64AlignedInstance` module defines an instance to perform the witness computation +//! for the Dma State Machine. +//! +//! It manages collected inputs and interacts with the `DmaSM` to compute witnesses for +//! execution plans. + +use crate::{Dma64AlignedInput, DmaCollectCounters, DmaCollectorRoutingLog, DmaInputPosition}; +use precompiles_helpers::DmaInfo; +use std::any::Any; +use zisk_common::{BusDevice, BusId, ChunkId, DMA_ENCODED, OP, OPERATION_BUS_ID, OP_TYPE}; +use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; +#[derive(Debug)] +pub struct Dma64AlignedCollector { + /// Collected inputs for witness computation. + pub inputs: Vec, + + /// index inside inputs of the last input, because at last stage must be swapped + /// with the last one, to ensure that it's the last one in the trace. + pub last_input_index: Option, + + pub chunk_id: ChunkId, + + pub rlog: DmaCollectorRoutingLog, + + /// The number of inputs to collect. + pub num_inputs: u64, + + /// Helper to skip instructions based on the plan's configuration. + pub collect_counters: DmaCollectCounters, + + pub trace_offset: usize, + pub ops_by_row: usize, + pub last_segment_collector: bool, +} + +impl Dma64AlignedCollector { + /// Creates a new `Dma64AlignedCollector`. + /// + /// # Arguments + /// + /// * `bus_id` - The connected bus ID. + /// * `num_inputs` - The number of inputs to collect. + /// * `collect_counter` - The helper to skip instructions based on the plan's configuration. + /// * `ops_by_row` - The number of operations per row. + /// * `last_segment_collector` - Indicates if this is the last segment collector. + /// + /// # Returns + /// A new `Dma64AlignedCollector` instance initialized with the provided parameters. + pub fn new( + chunk_id: ChunkId, + num_inputs: u64, + collect_counters: DmaCollectCounters, + ops_by_row: usize, + last_segment_collector: bool, + ) -> Self { + Self { + inputs: Vec::with_capacity(num_inputs as usize), + num_inputs, + collect_counters, + trace_offset: 0, + ops_by_row, + last_segment_collector, + rlog: DmaCollectorRoutingLog::new(chunk_id), + chunk_id, + last_input_index: None, + } + } + + /// Processes data received on the bus, collecting the inputs necessary for witness computation. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `data` - The data received from the bus. + /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// + /// # Returns + /// A tuple where: + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64], data_ext: &[u64]) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] != ZiskOperationType::Dma as u64 { + return true; + } + + if self.inputs.len() == self.num_inputs as usize { + debug_assert!(self.collect_counters.is_final_skip()); + return self.rlog.log_discard_cond(false, 1, data, false); + } + + let op = data[OP] as u8; + let has_src = op == ZiskOp::DMA_MEMCPY + || op == ZiskOp::DMA_XMEMCPY + || op == ZiskOp::DMA_MEMCMP + || op == ZiskOp::DMA_XMEMCMP; + let encoded = data[DMA_ENCODED]; + + if has_src && !DmaInfo::dst_is_aligned_with_src(encoded) { + self.rlog.log_discard(2, data); + return true; + } + + let rows = DmaInfo::get_loop_count(encoded).div_ceil(self.ops_by_row); + if rows == 0 { + self.rlog.log_discard(3, data); + return true; + } + // self.collect_counters.memcpy.should_process(rows) + if let Some((skip, max_count)) = self.collect_counters.should_collect(rows as u64, op) { + self.rlog.log_collect(rows as u32, data, skip, max_count); + self.add_input(match op { + ZiskOp::DMA_XMEMSET => Dma64AlignedInput::from_memset( + data, + self.trace_offset, + skip as usize, + self.ops_by_row, + max_count as usize, + ), + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => Dma64AlignedInput::from( + data, + data_ext, + self.trace_offset, + skip as usize, + self.ops_by_row, + max_count as usize, + ), + ZiskOp::DMA_INPUTCPY | ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => { + Dma64AlignedInput::from( + data, + data_ext, + self.trace_offset, + skip as usize, + self.ops_by_row, + max_count as usize, + ) + } + _ => panic!("Invalid operation 0x{op:02X}"), + }); + // Update trace offset + self.trace_offset += max_count as usize; + } else { + self.rlog.log_discard(10, data); + } + if self.inputs.len() >= self.num_inputs as usize { + debug_assert!(self.collect_counters.is_final_skip()); + return self.rlog.log_discard_cond(true, 11, data, false); + } + true + } + + /// Adds an input to the collector with proper ordering management. + /// + /// This method handles: + /// - Adding the input to the vector + /// - Managing inputs that must be first (swaps to position 0) + /// - Tracking inputs that must be last (stores index for later swap) + /// + /// # Arguments + /// * `input` - The input to add + #[inline(always)] + fn add_input(&mut self, input: Dma64AlignedInput) { + // Check if input must be first before pushing + let must_be_first = input.must_be_first(); + let must_be_last = input.must_be_last(); + let current_index = self.inputs.len(); + + // Push the input + self.inputs.push(input); + + // Handle ordering requirements + if must_be_first { + // Swap with position 0 if not already first + if current_index > 0 { + self.inputs.swap(0, current_index); + } + } else if must_be_last { + // Edge case: if an input is huge and it's both first and last, + // must_be_first takes precedence and this branch won't execute + assert!(self.last_input_index.is_none(), "Multiple inputs marked as last input"); + self.last_input_index = Some(current_index); + } + } + + pub fn get_debug_info(&self) -> String { + #[cfg(feature = "save_dma_collectors")] + return format!( + "CC|{}|{}|{}\n", + self.chunk_id, + self.inputs.len(), + self.collect_counters.get_debug_info(), + ) + &self.rlog.get_debug_info(); + #[cfg(not(feature = "save_dma_collectors"))] + String::new() + } + pub fn take_inputs(&mut self) -> Vec { + if let Some(last_index) = self.last_input_index { + // If there's a last input index, swap it with the last element to ensure it's the last one in the trace. + let current_last_index = self.inputs.len() - 1; + self.inputs.swap(last_index, current_last_index); + } + std::mem::take(&mut self.inputs) + } + pub fn take_debug_inputs(&mut self) -> (String, Vec) { + let debug_info = self.get_debug_info(); + let inputs = self.take_inputs(); + (debug_info, inputs) + } +} + +impl BusDevice for Dma64AlignedCollector { + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_input.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_input.rs new file mode 100644 index 000000000..4deab6413 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_input.rs @@ -0,0 +1,209 @@ +use precompiles_helpers::DmaInfo; +use zisk_common::{A, B, DMA_ENCODED, OP, STEP}; +use zisk_core::zisk_ops::ZiskOp; + +use crate::DMA_64_ALIGNED_OPS_BY_ROW; + +#[derive(Debug)] +pub struct Dma64AlignedInput { + pub src: u32, + pub dst: u32, + pub is_last_instance_input: bool, + pub op: u8, + pub trace_offset: u32, // offset inside trace to paralelize + pub skip_rows: u32, // inside input how many rows skip + pub rows: u32, // number of rows used + pub step: u64, + pub encoded: u64, + pub src_values: Vec, +} + +impl Dma64AlignedInput { + pub fn get_rows(data: &[u64]) -> usize { + let encoded = data[DMA_ENCODED]; + if DmaInfo::get_dst_offset(encoded) == DmaInfo::get_src_offset(encoded) { + let count = DmaInfo::get_loop_count(encoded); + if count > 0 { + count.div_ceil(DMA_64_ALIGNED_OPS_BY_ROW) + } else { + 0 + } + } else { + 0 + } + } + pub fn get_count(data: &[u64]) -> usize { + let encoded = data[DMA_ENCODED]; + if DmaInfo::get_dst_offset(encoded) == DmaInfo::get_src_offset(encoded) { + DmaInfo::get_loop_count(encoded) + } else { + 0 + } + } + pub fn from( + data: &[u64], + data_ext: &[u64], + trace_offset: usize, + skip_rows: usize, + ops_x_rows: usize, + max_rows: usize, + ) -> Self { + let op = data[OP] as u8; + let encoded = data[DMA_ENCODED]; + let pre_count = DmaInfo::get_pre_count(encoded) as u32; + let skip_count = skip_rows * ops_x_rows; + let data_offset = DmaInfo::get_loop_data_offset(encoded) + skip_count; + let count = DmaInfo::get_loop_count(encoded) - skip_count; + let total_rows = DmaInfo::get_loop_count(encoded).div_ceil(ops_x_rows); + let rows = std::cmp::min(total_rows - skip_rows, max_rows) as u32; + Self { + dst: data[A] as u32 + pre_count, + src: data[B] as u32 + pre_count, + trace_offset: trace_offset as u32, + is_last_instance_input: max_rows < (total_rows - skip_rows), + step: data[STEP], + skip_rows: skip_rows as u32, + rows, + encoded, + src_values: data_ext[data_offset..data_offset + count].to_vec(), + op: match op { + ZiskOp::DMA_MEMCPY => { + if DmaInfo::is_direct(encoded) { + ZiskOp::DMA_MEMCPY + } else { + ZiskOp::DMA_XMEMCPY + } + } + _ => op, + }, + } + } + pub fn from_memset( + data: &[u64], + trace_offset: usize, + skip_rows: usize, + ops_x_rows: usize, + max_rows: usize, + ) -> Self { + let op = data[OP] as u8; + let encoded = data[DMA_ENCODED]; + let pre_count = DmaInfo::get_pre_count(encoded) as u32; + let total_rows = DmaInfo::get_loop_count(encoded).div_ceil(ops_x_rows); + let rows = std::cmp::min(total_rows - skip_rows, max_rows) as u32; + Self { + dst: data[A] as u32 + pre_count, + src: 0, + trace_offset: trace_offset as u32, + is_last_instance_input: max_rows < (total_rows - skip_rows), + step: data[STEP], + skip_rows: skip_rows as u32, + rows, + encoded, + src_values: vec![], + op, + } + } + pub fn from_memcmp( + data: &[u64], + data_ext: &[u64], + trace_offset: usize, + skip_rows: usize, + ops_x_rows: usize, + max_rows: usize, + ) -> Self { + let dst = data[A] as u32; + let src = data[B] as u32; + let op = data[OP] as u8; + let encoded = data[DMA_ENCODED]; + let pre_count = DmaInfo::get_pre_count(encoded) as u32; + let skip_count = skip_rows * ops_x_rows; + let data_offset = (pre_count as usize) + skip_count; + let count = DmaInfo::get_loop_count(encoded) - skip_count; + let total_rows = DmaInfo::get_loop_count(encoded).div_ceil(ops_x_rows); + let rows = std::cmp::min(total_rows - skip_rows, max_rows) as u32; + Self { + dst: dst + pre_count, + src: src + pre_count, + trace_offset: trace_offset as u32, + is_last_instance_input: max_rows < (total_rows - skip_rows), + step: data[STEP], + skip_rows: skip_rows as u32, + rows, + encoded, + src_values: data_ext[data_offset..data_offset + count].to_vec(), + op: match op { + ZiskOp::DMA_MEMCPY => { + if DmaInfo::is_direct(encoded) { + ZiskOp::DMA_MEMCPY + } else { + ZiskOp::DMA_XMEMCPY + } + } + _ => op, + }, + } + } + + #[cfg(feature = "save_dma_inputs")] + /// Writes a list of Dma64AlignedInput instances to a text file with columns separated by |. + /// Path is taken from DEBUG_OUTPUT_PATH environment variable, defaulting to "tmp/". + pub fn save_debug_info(filename: &str, inputs: &[Vec]) -> std::io::Result<()> { + use std::io::Write; + + let path = std::env::var("DEBUG_OUTPUT_PATH").unwrap_or_else(|_| "tmp/".to_string()); + let full_path = format!("{}{}", path, filename); + + let mut file = std::fs::File::create(&full_path)?; + + // Write header + writeln!( + file, + "{:>8}|{:>10}|{:>10}|{:>22}|{:>4}|{:>12}|{:>9}|{:>8}|{:>14}|{:>18}|{:>9}|src_values", + "pos", + "src", + "dst", + "is_last_input", + "op", + "trace_offset", + "skip_rows", + "rows", + "step", + "encoded", + "fill_byte" + )?; + + // Write data rows + for (pos, input) in inputs.iter().flatten().enumerate() { + let src_values_hex: Vec = + input.src_values.iter().map(|v| format!("0x{:016X}", v)).collect(); + writeln!( + file, + "{:>8}|0x{:08X}|0x{:08X}|{:>22}|{:>4}|{:>12}|{:>9}|{:>8}|{:>14}|0x{:016X}|{:>9}|{}", + pos, + input.src, + input.dst, + input.is_last_instance_input, + input.op, + input.trace_offset, + input.skip_rows, + input.rows, + input.step, + input.encoded, + input.fill_byte, + src_values_hex.join(",") + )?; + } + + Ok(()) + } +} + +impl crate::DmaInputPosition for Dma64AlignedInput { + fn must_be_first(&self) -> bool { + self.skip_rows > 0 + } + + fn must_be_last(&self) -> bool { + self.is_last_instance_input + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_inputcpy.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_inputcpy.rs new file mode 100644 index 000000000..2f731ffc7 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_inputcpy.rs @@ -0,0 +1,270 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_common::SegmentId; +use zisk_core::zisk_ops::ZiskOp; +use zisk_pil::{Dma64AlignedInputCpyAirValues, DUAL_RANGE_BYTE_ID}; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + Dma64AlignedInputCpyTracePacked as Dma64AlignedInputCpyTrace, + Dma64AlignedInputCpyTraceRowPacked as Dma64AlignedInputCpyTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{Dma64AlignedInputCpyTrace, Dma64AlignedInputCpyTraceRow}; + +use crate::{ + dma_trace, Dma64AlignedInput, Dma64AlignedModule, DMA_64_ALIGNED_INPUTCPY_OPS_BY_ROW, + F_SEL_INPUTCPY, +}; +use precompiles_helpers::DmaInfo; + +/// The `Dma64AlignedInputCpySM` struct encapsulates the logic of the Dma64Aligned State Machine. +pub struct Dma64AlignedInputCpySM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + range_16_bits_id: usize, + range_24_bits_id: usize, + dual_range_byte_id: usize, + op_x_rows: usize, +} + +impl Dma64AlignedInputCpySM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `Dma64AlignedInputCpySM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + range_16_bits_id: std + .get_range_id(0, 0xFFFF, None) + .expect("Failed to get 16b table ID"), + range_24_bits_id: std + .get_range_id(0, (1 << 24) - 1, None) + .expect("Failed to get 24b table ID"), + dual_range_byte_id: std + .get_virtual_table_id(DUAL_RANGE_BYTE_ID) + .expect("Failed to get tabl eDUAL_RANGE_BYTE ID ID"), + op_x_rows: DMA_64_ALIGNED_INPUTCPY_OPS_BY_ROW, + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_input( + &self, + input: &Dma64AlignedInput, + trace: &mut [Dma64AlignedInputCpyTraceRow], + local_dual_byte: &mut [u64], // for input_cpy + values_24_bits: &mut Vec, + air_values: &mut Dma64AlignedInputCpyAirValues, + ) -> usize { + let mut values_index = 0; + let rows = input.rows as usize; + let is_last_instance_input = rows >= trace.len(); + let skip_count = input.skip_rows as usize * self.op_x_rows; + let initial_count = DmaInfo::get_loop_count(input.encoded) - skip_count; + let mut count64 = initial_count; + let mut dst64 = ((input.dst + 7) >> 3) + skip_count as u32; + let mut seq_end = false; + let addr_incr_by_row = self.op_x_rows as u32; + for (irow, row) in trace.iter_mut().enumerate().take(rows) { + row.set_main_step(input.step); + debug_assert!(input.op == ZiskOp::DMA_INPUTCPY); + row.set_sel_inputcpy(true); + row.set_previous_seq_end(irow == 0 && input.skip_rows == 0); + + // calculate the first aligned address + // if dst is aligned is same address if not it's addr + 8 + row.set_dst64(dst64); + dst64 += addr_incr_by_row; + + row.set_count64(count64 as u32); + let use_count = if count64 <= self.op_x_rows { + seq_end = true; + count64 + } else { + count64 -= self.op_x_rows; + self.op_x_rows + }; + row.set_seq_end(seq_end); + for index in 0..use_count { + if index > 0 { + row.set_sel_op_from_1(index - 1, true); + } + let value = input.src_values[values_index]; + values_index += 1; + let h0 = ((value & 0xFFFF_FF00) >> 8) as u32; + let h1 = (value >> 40) as u32; + let l0: u8 = value as u8; + let l1 = (value >> 32) as u8; + row.set_h_value_chunks(index, 0, h0); + row.set_h_value_chunks(index, 1, h1); + row.set_l_value_chunks(index, 0, l0); + row.set_l_value_chunks(index, 1, l1); + let pos = ((l1 as usize) << 8) | (l0 as usize); + local_dual_byte[pos] += 1; + values_24_bits.push(h0); + values_24_bits.push(h1); + } + } + + if is_last_instance_input { + if seq_end { + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + } else { + air_values.segment_last_seq_end = F::ZERO; + air_values.segment_last_dst64 = F::from_u32(dst64 - addr_incr_by_row); + air_values.segment_last_main_step = F::from_u64(input.step); + let last_count = initial_count - (rows - 1) * self.op_x_rows; + air_values.segment_last_count64 = F::from_u32(last_count as u32); + air_values.last_count_chunk[0] = F::from_u16(last_count as u16); + air_values.last_count_chunk[1] = F::from_u16((last_count >> 16) as u16); + air_values.segment_last_flags = F::from_u16(F_SEL_INPUTCPY as u16); + } + } + rows + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut Dma64AlignedInputCpyTraceRow) { + trace.set_seq_end(true); + trace.set_previous_seq_end(true); + } +} +impl Dma64AlignedModule for Dma64AlignedInputCpySM { + fn get_name(&self) -> &'static str { + "dma_64_aligned_inputcpy" + } + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + segment_id: SegmentId, + is_last_segment: bool, + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = Dma64AlignedInputCpyTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs + .iter() + .map(|inputs| inputs.iter().map(|input| input.rows as usize).sum::()) + .sum(); + + assert!(total_inputs > 0); + assert!( + total_inputs <= num_rows, + "Too many inputs, total_inputs:{total_inputs} num_rows:{num_rows}" + ); + + dma_trace("Dma64AlignedInputCpy", total_inputs, num_rows); + + timer_start_trace!(DMA_64_ALIGNED_TRACE); + + // Flat the inputs and reorder to ensure first, last are in theirs positions. + let flat_inputs = crate::flatten_and_reorder_inputs(inputs); + let trace_rows = trace.buffer.as_mut_slice(); + + let mut values_24_bits = Vec::with_capacity(num_rows * self.op_x_rows * 2); + let mut local_dual_byte = [0u64; 1 << 16]; + let mut air_values = Dma64AlignedInputCpyAirValues::::new(); + + let mut row_offset = 0; + for input in flat_inputs.iter() { + let rows_used = self.process_input( + input, + &mut trace_rows[row_offset..], + &mut local_dual_byte, + &mut values_24_bits, + &mut air_values, + ); + row_offset += rows_used; + } + + // padding + let padding_size = num_rows.saturating_sub(row_offset); + air_values.padding_size = F::from_u32(padding_size as u32); + + if padding_size > 0 { + for padding_row in trace_rows.iter_mut().take(num_rows).skip(row_offset) { + self.process_empty_slice(padding_row); + } + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + } + + // local_dual_byte[0] += padding_size as u64; + // self.std.range_check(self.range_24_bits_id, 0, padding_size as u64 * 2); + + // add range check of count to check that it's a positive 32-bits number + let last_count = air_values.segment_last_count64.as_canonical_u64(); + + self.std.range_check(self.range_16_bits_id, (last_count & 0xFFFF) as i64, 1); + self.std.range_check(self.range_16_bits_id, ((last_count >> 16) & 0xFFFF) as i64, 1); + self.std.inc_virtual_rows_ranged(self.dual_range_byte_id, &local_dual_byte); + for value in values_24_bits.iter() { + self.std.range_check(self.range_24_bits_id, *value as i64, 1); + } + + let segment_id = segment_id.into(); + air_values.segment_id = F::from_usize(segment_id); + air_values.is_last_segment = F::from_bool(is_last_segment); + + let first_input = flat_inputs.first().unwrap(); + if first_input.skip_rows == 0 { + air_values.segment_previous_seq_end = F::ONE; + air_values.segment_previous_dst64 = F::ZERO; + air_values.segment_previous_main_step = F::ZERO; + air_values.segment_previous_count64 = F::ZERO; + air_values.segment_previous_flags = F::ZERO; + } else { + assert!(segment_id > 0); + air_values.segment_previous_seq_end = F::ZERO; + air_values.segment_previous_dst64 = + F::from_u32(trace_rows[0].get_dst64() - self.op_x_rows as u32); + air_values.segment_previous_main_step = F::from_u64(trace_rows[0].get_main_step()); + air_values.segment_previous_count64 = + F::from_u32(trace_rows[0].get_count64() + self.op_x_rows as u32); + air_values.segment_previous_flags = F::from_u16(F_SEL_INPUTCPY as u16); + } + timer_stop_and_log_trace!(DMA_64_ALIGNED_TRACE); + let from_trace = FromTrace::new(&mut trace).with_air_values(&mut air_values); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_instance.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_instance.rs new file mode 100644 index 000000000..ec55b681e --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_instance.rs @@ -0,0 +1,172 @@ +//! The `Dma64AlignedInstance` module defines an instance to perform the witness computation +//! for the Dma State Machine. +//! +//! It manages collected inputs and interacts with the `DmaSM` to compute witnesses for +//! execution plans. + +#[cfg(feature = "save_dma_collectors")] +use crate::save_dma_collectors; +#[cfg(feature = "save_dma_inputs")] +use crate::Dma64AlignedInput; +use crate::{ + Dma64AlignedCollector, Dma64AlignedModule, DmaCheckPoint, DMA_64_ALIGNED_INPUTCPY_OPS_BY_ROW, + DMA_64_ALIGNED_MEMCPY_OPS_BY_ROW, DMA_64_ALIGNED_MEMSET_OPS_BY_ROW, + DMA_64_ALIGNED_MEM_OPS_BY_ROW, DMA_64_ALIGNED_OPS_BY_ROW, +}; +use fields::PrimeField64; +use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; +use std::sync::Arc; +use zisk_common::ChunkId; +use zisk_common::StatsType; +use zisk_common::{BusDevice, CheckPoint, Instance, InstanceCtx, InstanceType, PayloadType}; +use zisk_pil::{ + Dma64AlignedInputCpyTrace, Dma64AlignedMemCpyTrace, Dma64AlignedMemSetTrace, + Dma64AlignedMemTrace, Dma64AlignedTrace, +}; + +pub const F_SEL_MEMCPY: u64 = 1; +pub const F_SEL_MEMCMP: u64 = 2; +pub const F_SEL_INPUTCPY: u64 = 4; +pub const F_SEL_MEMSET: u64 = 8; + +/// The `Dma64AlignedInstance` struct represents an instance for the Dma State Machine. +/// +/// It encapsulates the `Dma64AlignedSM` and its associated context, and it processes input data +/// to compute witnesses for the Dma64Aligned State Machine. +pub struct Dma64AlignedInstance { + /// Dma state machine. + module: Arc>, + + /// Instance context. + ictx: InstanceCtx, + + /// Flag to define that it's last segment + is_last_segment: bool, +} + +impl Dma64AlignedInstance { + /// Creates a new `Dma64AlignedInstance`. + /// + /// # Arguments + /// * `module` - An `Arc`-wrapped reference to the Dma 64 Aligned Module. + /// * `ictx` - The `InstanceCtx` associated with this instance, containing the execution plan. + /// * `bus_id` - The bus ID associated with this instance. + /// + /// # Returns + /// A new `Dma64AlignedInstance` instance initialized with the provided state machine and + /// context. + pub fn new(module: Arc>, ictx: InstanceCtx) -> Self { + let is_last_segment = { + let meta = ictx.plan.meta.as_ref().unwrap(); + let checkpoint = meta.downcast_ref::().unwrap(); + checkpoint.is_last_segment + }; + Self { module, ictx, is_last_segment } + } + + pub fn build_dma_collector(&self, chunk_id: ChunkId) -> Dma64AlignedCollector { + let ops_by_row = match self.ictx.plan.air_id { + Dma64AlignedTrace::::AIR_ID => DMA_64_ALIGNED_OPS_BY_ROW, + Dma64AlignedMemCpyTrace::::AIR_ID => DMA_64_ALIGNED_MEMCPY_OPS_BY_ROW, + Dma64AlignedInputCpyTrace::::AIR_ID => DMA_64_ALIGNED_INPUTCPY_OPS_BY_ROW, + Dma64AlignedMemSetTrace::::AIR_ID => DMA_64_ALIGNED_MEMSET_OPS_BY_ROW, + Dma64AlignedMemTrace::::AIR_ID => DMA_64_ALIGNED_MEM_OPS_BY_ROW, + _ => panic!("Dma64AlignedInstance: Unsupported air_id: {:?}", self.ictx.plan.air_id), + }; + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::().unwrap(); + let (num_inputs, collect_counters) = collect_info.chunks[&chunk_id]; + Dma64AlignedCollector::new( + chunk_id, + num_inputs, + collect_counters, + ops_by_row, + Some(chunk_id) == collect_info.last_chunk, + ) + } +} + +impl Instance for Dma64AlignedInstance { + /// Computes the witness for the Dma execution plan. + /// + /// This method leverages the `Dma64AlignedSM` to generate an `AirInstance` using the collected + /// inputs. + /// + /// # Arguments + /// * `_pctx` - The proof context, unused in this implementation. + /// + /// # Returns + /// An `Option` containing the computed `AirInstance`. + fn compute_witness( + &self, + _pctx: &ProofCtx, + _sctx: &SetupCtx, + collectors: Vec<(usize, Box>)>, + trace_buffer: Vec, + ) -> ProofmanResult>> { + #[cfg(feature = "save_dma_collectors")] + let (debug, inputs): (Vec<_>, Vec<_>) = collectors + .into_iter() + .map(|(_, collector)| { + collector.as_any().downcast::().unwrap().take_debug_inputs() + }) + .unzip(); + #[cfg(not(feature = "save_dma_collectors"))] + let inputs: Vec<_> = collectors + .into_iter() + .map(|(_, collector)| { + collector.as_any().downcast::().unwrap().take_inputs() + }) + .collect(); + + let segment_id = self.ictx.plan.segment_id.unwrap(); + + #[cfg(feature = "save_dma_collectors")] + save_dma_collectors( + &format!("{}_collector_{segment_id:04}.txt", self.module.get_name()), + debug, + )?; + + #[cfg(feature = "save_dma_inputs")] + Dma64AlignedInput::save_debug_info( + &format!("{}_inputs_{segment_id:04}.txt", self.module.get_name()), + &inputs, + )?; + + Ok(Some(self.module.compute_witness( + &inputs, + segment_id, + self.is_last_segment, + trace_buffer, + )?)) + } + + /// Retrieves the checkpoint associated with this instance. + /// + /// # Returns + /// A `CheckPoint` object representing the checkpoint of the execution plan. + fn check_point(&self) -> &CheckPoint { + &self.ictx.plan.check_point + } + + /// Retrieves the type of this instance. + /// + /// # Returns + /// An `InstanceType` representing the type of this instance (`InstanceType::Instance`). + fn instance_type(&self) -> InstanceType { + InstanceType::Instance + } + + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { + Some(Box::new(self.build_dma_collector(chunk_id))) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_mem.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_mem.rs new file mode 100644 index 000000000..ee144d5c5 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_mem.rs @@ -0,0 +1,299 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_common::SegmentId; +use zisk_core::zisk_ops::ZiskOp; +use zisk_pil::Dma64AlignedMemAirValues; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + Dma64AlignedMemTracePacked as Dma64AlignedMemTrace, + Dma64AlignedMemTraceRowPacked as Dma64AlignedMemTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{Dma64AlignedMemTrace, Dma64AlignedMemTraceRow}; + +use crate::{ + dma_trace, Dma64AlignedInput, Dma64AlignedModule, DMA_64_ALIGNED_MEM_OPS_BY_ROW, + F_SEL_INPUTCPY, F_SEL_MEMCMP, F_SEL_MEMCPY, F_SEL_MEMSET, +}; +use precompiles_helpers::DmaInfo; + +/// The `Dma64AlignedMemSM` struct encapsulates the logic of the Dma64Aligned State Machine. +pub struct Dma64AlignedMemSM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + range_16_bits_id: usize, + op_x_rows: usize, +} + +impl Dma64AlignedMemSM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `Dma64AlignedMemSM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + range_16_bits_id: std + .get_range_id(0, 0xFFFF, None) + .expect("Failed to get 16b table ID"), + op_x_rows: DMA_64_ALIGNED_MEM_OPS_BY_ROW, + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_input( + &self, + input: &Dma64AlignedInput, + trace: &mut [Dma64AlignedMemTraceRow], + _local_16_bits_table: &mut [u32], // for input_cpy + air_values: &mut Dma64AlignedMemAirValues, + ) -> usize { + let rows = input.rows as usize; + let is_last_instance_input = rows >= trace.len(); + let skip_count = input.skip_rows as usize * self.op_x_rows; + let initial_count = DmaInfo::get_loop_count(input.encoded) - skip_count; + let mut count64 = initial_count; + // println!( + // "DMA_64_ALIGNED INPUT {input:?} count:{count64} rows:{rows} dma_info:{} S:{}", + // DmaInfo::to_string(input.encoded), + // input.step + // ); + let mut src_values_index = 0; + let mut dst64 = ((input.dst + 7) >> 3) + skip_count as u32; + let mut src64 = ((input.src + 7) >> 3) + skip_count as u32; + let mut seq_end = false; + let addr_incr_by_row = self.op_x_rows as u32; + let is_memcpy = input.op == ZiskOp::DMA_XMEMCPY || input.op == ZiskOp::DMA_MEMCPY; + let is_memeq = input.op == ZiskOp::DMA_MEMCMP || input.op == ZiskOp::DMA_XMEMCMP; + let is_memset = input.op == ZiskOp::DMA_XMEMSET; + let fill_byte = if is_memset { DmaInfo::get_fill_byte(input.encoded) } else { 0 }; + + for (irow, row) in trace.iter_mut().enumerate().take(rows) { + row.set_main_step(input.step); + row.set_sel_memcpy(is_memcpy); + row.set_sel_memeq(is_memeq); + if irow == 0 && input.skip_rows == 0 { + row.set_sel_memcpy_count_load(input.op == ZiskOp::DMA_MEMCPY); + } + row.set_sel_memset(is_memset); + row.set_previous_seq_end(irow == 0 && input.skip_rows == 0); + + // calculate the first aligned address + // if dst is aligned is same address if not it's addr + 8 + row.set_dst64(dst64); + row.set_src64(src64); + dst64 += addr_incr_by_row; + src64 += addr_incr_by_row; + + row.set_count64(count64 as u32); + let use_count = if count64 <= self.op_x_rows { + seq_end = true; + count64 + } else { + count64 -= self.op_x_rows; + self.op_x_rows + }; + row.set_seq_end(seq_end); + if !is_memset { + for index in 0..use_count { + if index > 0 { + row.set_sel_op_from_1(index - 1, true); + } + let value = input.src_values[src_values_index]; + row.set_value(index, 0, value as u32); + row.set_value(index, 1, (value >> 32) as u32); + src_values_index += 1; + } + } else { + let fill_bytes = fill_byte as u32 * 0x01010101; + row.set_fill_byte(fill_byte); + for index in 0..use_count { + if index > 0 { + row.set_sel_op_from_1(index - 1, true); + } + row.set_value(index, 0, fill_bytes); + row.set_value(index, 1, fill_bytes); + } + } + } + + if is_last_instance_input { + if seq_end { + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + air_values.segment_last_fill_byte = F::ZERO; + } else { + air_values.segment_last_seq_end = F::ZERO; + air_values.segment_last_src64 = F::from_u32(src64 - addr_incr_by_row); + air_values.segment_last_dst64 = F::from_u32(dst64 - addr_incr_by_row); + air_values.segment_last_main_step = F::from_u64(input.step); + let last_count = initial_count - (rows - 1) * self.op_x_rows; + air_values.segment_last_count64 = F::from_u32(last_count as u32); + air_values.last_count_chunk[0] = F::from_u16(last_count as u16); + air_values.last_count_chunk[1] = F::from_u16((last_count >> 16) as u16); + air_values.segment_last_flags = F::from_u16(match input.op { + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => F_SEL_MEMCPY, + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => F_SEL_MEMCMP, + ZiskOp::DMA_INPUTCPY => F_SEL_INPUTCPY, + ZiskOp::DMA_XMEMSET => F_SEL_MEMSET, + _ => panic!("Invalid operation 0x{:02X}", input.op), + } as u16); + air_values.segment_last_fill_byte = F::from_u8(fill_byte); + } + } + rows + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut Dma64AlignedMemTraceRow) { + trace.set_seq_end(true); + trace.set_previous_seq_end(true); + } +} +impl Dma64AlignedModule for Dma64AlignedMemSM { + fn get_name(&self) -> &'static str { + "dma_64_aligned_mem" + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + segment_id: SegmentId, + is_last_segment: bool, + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = Dma64AlignedMemTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs + .iter() + .map(|inputs| inputs.iter().map(|input| input.rows as usize).sum::()) + .sum(); + + assert!(total_inputs > 0); + // println!("LAST INPUT: {:?}", inputs.last().unwrap()); + // println!("DMA_64_ALIGNED TOTALS total_inputs:{total_inputs} num_rows:{num_rows}"); + assert!( + total_inputs <= num_rows, + "Too many inputs, total_inputs:{total_inputs} num_rows:{num_rows}" + ); + + dma_trace("Dma64AlignedMem", total_inputs, num_rows); + + timer_start_trace!(DMA_64_ALIGNED_TRACE); + // Flat the inputs and reorder to ensure first, last are in theirs positions. + let flat_inputs = crate::flatten_and_reorder_inputs(inputs); + let trace_rows = trace.buffer.as_mut_slice(); + + let mut local_16_bits_table = vec![0u32; 1 << 16]; + let mut air_values = Dma64AlignedMemAirValues::::new(); + + // TODO: inputs between instances + let mut row_offset = 0; + for input in flat_inputs.iter() { + let rows_used = self.process_input( + input, + &mut trace_rows[row_offset..], + &mut local_16_bits_table, + &mut air_values, + ); + row_offset += rows_used; + } + + // padding + let padding_size = num_rows.saturating_sub(row_offset); + air_values.padding_size = F::from_u32(padding_size as u32); + + if row_offset < num_rows { + for padding_row in trace_rows.iter_mut().take(num_rows).skip(row_offset) { + self.process_empty_slice(padding_row); + } + + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + air_values.segment_last_fill_byte = F::ZERO; + } + + // add range check of count to check that it's a positive 32-bits number + let last_count = air_values.segment_last_count64.as_canonical_u64(); + local_16_bits_table[(last_count & 0xFFFF) as usize] += 1; + local_16_bits_table[((last_count >> 16) & 0xFFFF) as usize] += 1; + + self.std.range_checks(self.range_16_bits_id, local_16_bits_table); + + let segment_id = segment_id.into(); + air_values.segment_id = F::from_usize(segment_id); + air_values.is_last_segment = F::from_bool(is_last_segment); + + let first_input = flat_inputs.first().unwrap(); + if first_input.skip_rows == 0 { + air_values.segment_previous_seq_end = F::ONE; + air_values.segment_previous_dst64 = F::ZERO; + air_values.segment_previous_src64 = F::ZERO; + air_values.segment_previous_main_step = F::ZERO; + air_values.segment_previous_count64 = F::ZERO; + air_values.segment_previous_flags = F::ZERO; + air_values.segment_previous_fill_byte = F::ZERO; + } else { + assert!(segment_id > 0); + air_values.segment_previous_seq_end = F::ZERO; + air_values.segment_previous_dst64 = + F::from_u32(trace_rows[0].get_dst64() - self.op_x_rows as u32); + air_values.segment_previous_src64 = + F::from_u32(trace_rows[0].get_src64() - self.op_x_rows as u32); + air_values.segment_previous_main_step = F::from_u64(trace_rows[0].get_main_step()); + air_values.segment_previous_count64 = + F::from_u32(trace_rows[0].get_count64() + self.op_x_rows as u32); + air_values.segment_previous_flags = F::from_u16(match first_input.op { + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => F_SEL_MEMCPY, + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => F_SEL_MEMCMP, + ZiskOp::DMA_INPUTCPY => F_SEL_INPUTCPY, + ZiskOp::DMA_XMEMSET => F_SEL_MEMSET, + _ => panic!("Invalid operation 0x{:02X}", first_input.op), + } as u16); + air_values.segment_previous_fill_byte = F::from_u8(trace_rows[0].get_fill_byte()); + } + timer_stop_and_log_trace!(DMA_64_ALIGNED_TRACE); + let from_trace = FromTrace::new(&mut trace).with_air_values(&mut air_values); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_memcpy.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_memcpy.rs new file mode 100644 index 000000000..537005a91 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_memcpy.rs @@ -0,0 +1,261 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_common::SegmentId; +use zisk_core::zisk_ops::ZiskOp; +use zisk_pil::Dma64AlignedMemCpyAirValues; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + Dma64AlignedMemCpyTracePacked as Dma64AlignedMemCpyTrace, + Dma64AlignedMemCpyTraceRowPacked as Dma64AlignedMemCpyTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{Dma64AlignedMemCpyTrace, Dma64AlignedMemCpyTraceRow}; + +use crate::{ + dma_trace, Dma64AlignedInput, Dma64AlignedModule, DMA_64_ALIGNED_MEMCPY_OPS_BY_ROW, + F_SEL_MEMCPY, +}; +use precompiles_helpers::DmaInfo; + +/// The `Dma64AlignedMemCpySM` struct encapsulates the logic of the Dma64Aligned State Machine. +pub struct Dma64AlignedMemCpySM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + range_16_bits_id: usize, + op_x_rows: usize, +} + +impl Dma64AlignedMemCpySM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `Dma64AlignedMemCpySM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + range_16_bits_id: std + .get_range_id(0, 0xFFFF, None) + .expect("Failed to get 16b table ID"), + op_x_rows: DMA_64_ALIGNED_MEMCPY_OPS_BY_ROW, + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_input( + &self, + input: &Dma64AlignedInput, + trace: &mut [Dma64AlignedMemCpyTraceRow], + _local_16_bits_table: &mut [u32], // for input_cpy + air_values: &mut Dma64AlignedMemCpyAirValues, + ) -> usize { + let rows = input.rows as usize; + let is_last_instance_input = rows >= trace.len(); + let skip_count = input.skip_rows as usize * self.op_x_rows; + let initial_count = DmaInfo::get_loop_count(input.encoded) - skip_count; + let mut count64 = initial_count; + + let mut src_values_index = 0; + let mut dst64 = ((input.dst + 7) >> 3) + skip_count as u32; + let mut src64 = ((input.src + 7) >> 3) + skip_count as u32; + let mut seq_end = false; + let addr_incr_by_row = self.op_x_rows as u32; + for (irow, row) in trace.iter_mut().enumerate().take(rows) { + row.set_main_step(input.step); + row.set_sel_memcpy(input.op == ZiskOp::DMA_XMEMCPY || input.op == ZiskOp::DMA_MEMCPY); + row.set_previous_seq_end(irow == 0 && input.skip_rows == 0); + + if irow == 0 && input.skip_rows == 0 { + row.set_sel_memcpy_count_load(input.op == ZiskOp::DMA_MEMCPY); + } + + // calculate the first aligned address + // if dst is aligned is same address if not it's addr + 8 + row.set_dst64(dst64); + row.set_src64(src64); + dst64 += addr_incr_by_row; + src64 += addr_incr_by_row; + + row.set_count64(count64 as u32); + let use_count = if count64 <= self.op_x_rows { + seq_end = true; + // trace i zerofilled, not set values zero + count64 + } else { + count64 -= self.op_x_rows; + self.op_x_rows + }; + row.set_seq_end(seq_end); + for index in 0..use_count { + if index > 0 { + row.set_sel_op_from_1(index - 1, true); + } + let value = input.src_values[src_values_index]; + src_values_index += 1; + row.set_value(index, 0, value as u32); + row.set_value(index, 1, (value >> 32) as u32); + } + } + + if is_last_instance_input { + if seq_end { + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + } else { + air_values.segment_last_seq_end = F::ZERO; + air_values.segment_last_src64 = F::from_u32(src64 - addr_incr_by_row); + air_values.segment_last_dst64 = F::from_u32(dst64 - addr_incr_by_row); + air_values.segment_last_main_step = F::from_u64(input.step); + let last_count = initial_count - (rows - 1) * self.op_x_rows; + air_values.segment_last_count64 = F::from_u32(last_count as u32); + air_values.last_count_chunk[0] = F::from_u16(last_count as u16); + air_values.last_count_chunk[1] = F::from_u16((last_count >> 16) as u16); + air_values.segment_last_flags = F::from_u16(F_SEL_MEMCPY as u16); + } + } + rows + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut Dma64AlignedMemCpyTraceRow) { + trace.set_seq_end(true); + trace.set_previous_seq_end(true); + } +} +impl Dma64AlignedModule for Dma64AlignedMemCpySM { + fn get_name(&self) -> &'static str { + "dma_64_aligned_memcpy" + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + segment_id: SegmentId, + is_last_segment: bool, + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = Dma64AlignedMemCpyTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs + .iter() + .map(|inputs| inputs.iter().map(|input| input.rows as usize).sum::()) + .sum(); + + assert!(total_inputs > 0); + // println!("LAST INPUT: {:?}", inputs.last().unwrap()); + // println!("DMA_64_ALIGNED TOTALS total_inputs:{total_inputs} num_rows:{num_rows}"); + assert!( + total_inputs <= num_rows, + "Too many inputs, total_inputs:{total_inputs} num_rows:{num_rows}" + ); + + dma_trace("Dma64AlignedMemCpy", total_inputs, num_rows); + + timer_start_trace!(DMA_64_ALIGNED_TRACE); + + // Flat the inputs and reorder to ensure first, last are in theirs positions. + let flat_inputs = crate::flatten_and_reorder_inputs(inputs); + let trace_rows = trace.buffer.as_mut_slice(); + + let mut local_16_bits_table = vec![0u32; 1 << 16]; + let mut air_values = Dma64AlignedMemCpyAirValues::::new(); + + // TODO: inputs between instances + let mut row_offset = 0; + for input in flat_inputs.iter() { + let rows_used = self.process_input( + input, + &mut trace_rows[row_offset..], + &mut local_16_bits_table, + &mut air_values, + ); + row_offset += rows_used; + } + + // padding + let padding_size = num_rows.saturating_sub(row_offset); + air_values.padding_size = F::from_u32(padding_size as u32); + + if row_offset < num_rows { + for padding_row in trace_rows.iter_mut().take(num_rows).skip(row_offset) { + self.process_empty_slice(padding_row); + } + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + } + + // add range check of count to check that it's a positive 32-bits number + let last_count = air_values.segment_last_count64.as_canonical_u64(); + local_16_bits_table[(last_count & 0xFFFF) as usize] += 1; + local_16_bits_table[((last_count >> 16) & 0xFFFF) as usize] += 1; + + self.std.range_checks(self.range_16_bits_id, local_16_bits_table); + + let segment_id = segment_id.into(); + air_values.segment_id = F::from_usize(segment_id); + air_values.is_last_segment = F::from_bool(is_last_segment); + + let first_input = flat_inputs.first().unwrap(); + if first_input.skip_rows == 0 { + air_values.segment_previous_seq_end = F::ONE; + air_values.segment_previous_dst64 = F::ZERO; + air_values.segment_previous_src64 = F::ZERO; + air_values.segment_previous_main_step = F::ZERO; + air_values.segment_previous_count64 = F::ZERO; + air_values.segment_previous_flags = F::ZERO; + } else { + assert!(segment_id > 0); + air_values.segment_previous_seq_end = F::ZERO; + air_values.segment_previous_dst64 = + F::from_u32(trace_rows[0].get_dst64() - self.op_x_rows as u32); + air_values.segment_previous_src64 = + F::from_u32(trace_rows[0].get_src64() - self.op_x_rows as u32); + air_values.segment_previous_main_step = F::from_u64(trace_rows[0].get_main_step()); + air_values.segment_previous_count64 = + F::from_u32(trace_rows[0].get_count64() + self.op_x_rows as u32); + air_values.segment_previous_flags = F::from_u16(F_SEL_MEMCPY as u16); + } + timer_stop_and_log_trace!(DMA_64_ALIGNED_TRACE); + let from_trace = FromTrace::new(&mut trace).with_air_values(&mut air_values); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_memset.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_memset.rs new file mode 100644 index 000000000..94732c7db --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_memset.rs @@ -0,0 +1,252 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_common::SegmentId; +use zisk_pil::Dma64AlignedMemSetAirValues; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + Dma64AlignedMemSetTracePacked as Dma64AlignedMemSetTrace, + Dma64AlignedMemSetTraceRowPacked as Dma64AlignedMemSetTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{Dma64AlignedMemSetTrace, Dma64AlignedMemSetTraceRow}; + +use crate::{ + dma_trace, Dma64AlignedInput, Dma64AlignedModule, DMA_64_ALIGNED_MEMSET_OPS_BY_ROW, + F_SEL_MEMSET, +}; +use precompiles_helpers::DmaInfo; + +/// The `Dma64AlignedMemSetSM` struct encapsulates the logic of the Dma64Aligned State Machine. +pub struct Dma64AlignedMemSetSM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + range_16_bits_id: usize, + op_x_rows: usize, +} + +impl Dma64AlignedMemSetSM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `Dma64AlignedMemSetSM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + range_16_bits_id: std + .get_range_id(0, 0xFFFF, None) + .expect("Failed to get 16b table ID"), + op_x_rows: DMA_64_ALIGNED_MEMSET_OPS_BY_ROW, + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_input( + &self, + input: &Dma64AlignedInput, + trace: &mut [Dma64AlignedMemSetTraceRow], + _local_16_bits_table: &mut [u32], // for input_cpy + air_values: &mut Dma64AlignedMemSetAirValues, + ) -> usize { + let rows = input.rows as usize; + let is_last_instance_input = rows >= trace.len(); + let skip_count = input.skip_rows as usize * self.op_x_rows; + let initial_count = DmaInfo::get_loop_count(input.encoded) - skip_count; + let mut count64 = initial_count; + // println!( + // "DMA_64_ALIGNED INPUT {input:?} count:{count64} rows:{rows} dma_info:{}", + // DmaInfo::to_string(input.encoded) + // ); + let mut dst64 = ((input.dst + 7) >> 3) + skip_count as u32; + let mut seq_end = false; + let addr_incr_by_row = self.op_x_rows as u32; + + let fill_byte = DmaInfo::get_fill_byte(input.encoded); + for (irow, row) in trace.iter_mut().enumerate().take(rows) { + row.set_main_step(input.step); + row.set_sel_memset(true); + row.set_fill_byte(fill_byte); + row.set_previous_seq_end(irow == 0 && input.skip_rows == 0); + + // calculate the first aligned address + // if dst is aligned is same address if not it's addr + 8 + row.set_dst64(dst64); + dst64 += addr_incr_by_row; + + row.set_count64(count64 as u32); + let use_count = if count64 <= self.op_x_rows { + seq_end = true; + count64 + } else { + count64 -= self.op_x_rows; + self.op_x_rows + }; + row.set_seq_end(seq_end); + for index in 0..use_count { + if index > 0 { + row.set_sel_op_from_1(index - 1, true); + } + } + } + + if is_last_instance_input { + if seq_end { + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + air_values.segment_last_fill_byte = F::ZERO; + } else { + air_values.segment_last_seq_end = F::ZERO; + air_values.segment_last_dst64 = F::from_u32(dst64 - addr_incr_by_row); + air_values.segment_last_main_step = F::from_u64(input.step); + let last_count = initial_count - (rows - 1) * self.op_x_rows; + air_values.segment_last_count64 = F::from_u32(last_count as u32); + air_values.last_count_chunk[0] = F::from_u16(last_count as u16); + air_values.last_count_chunk[1] = F::from_u16((last_count >> 16) as u16); + air_values.segment_last_flags = F::from_u16(F_SEL_MEMSET as u16); + air_values.segment_last_fill_byte = F::from_u8(fill_byte); + } + } + rows + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut Dma64AlignedMemSetTraceRow) { + trace.set_seq_end(true); + trace.set_previous_seq_end(true); + } +} +impl Dma64AlignedModule for Dma64AlignedMemSetSM { + fn get_name(&self) -> &'static str { + "dma_64_aligned_memset" + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + segment_id: SegmentId, + is_last_segment: bool, + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = Dma64AlignedMemSetTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs + .iter() + .map(|inputs| inputs.iter().map(|input| input.rows as usize).sum::()) + .sum(); + + assert!(total_inputs > 0); + // println!("LAST INPUT: {:?}", inputs.last().unwrap()); + // println!("DMA_64_ALIGNED TOTALS total_inputs:{total_inputs} num_rows:{num_rows}"); + assert!( + total_inputs <= num_rows, + "Too many inputs, total_inputs:{total_inputs} num_rows:{num_rows}" + ); + + dma_trace("Dma64AlignedMemSet", total_inputs, num_rows); + + timer_start_trace!(DMA_64_ALIGNED_TRACE); + + // Flat the inputs and to ensure that the first and the last inputs are placed in the right order, + let flat_inputs = crate::flatten_and_reorder_inputs(inputs); + + let trace_rows = trace.buffer.as_mut_slice(); + + let mut local_16_bits_table = vec![0u32; 1 << 16]; + let mut air_values = Dma64AlignedMemSetAirValues::::new(); + + let mut row_offset = 0; + for input in flat_inputs.iter() { + let rows_used = self.process_input( + input, + &mut trace_rows[row_offset..], + &mut local_16_bits_table, + &mut air_values, + ); + row_offset += rows_used; + } + + // padding + let padding_size = num_rows.saturating_sub(row_offset); + air_values.padding_size = F::from_u32(padding_size as u32); + + if row_offset < num_rows { + for padding_row in trace_rows.iter_mut().take(num_rows).skip(row_offset) { + self.process_empty_slice(padding_row); + } + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count64 = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_flags = F::ZERO; + air_values.segment_last_fill_byte = F::ZERO; + } + + // add range check of count to check that it's a positive 32-bits number + let last_count = air_values.segment_last_count64.as_canonical_u64(); + local_16_bits_table[(last_count & 0xFFFF) as usize] += 1; + local_16_bits_table[((last_count >> 16) & 0xFFFF) as usize] += 1; + + self.std.range_checks(self.range_16_bits_id, local_16_bits_table); + + let segment_id = segment_id.into(); + air_values.segment_id = F::from_usize(segment_id); + air_values.is_last_segment = F::from_bool(is_last_segment); + + let first_input = flat_inputs.first().unwrap(); + if first_input.skip_rows == 0 { + air_values.segment_previous_seq_end = F::ONE; + air_values.segment_previous_dst64 = F::ZERO; + air_values.segment_previous_main_step = F::ZERO; + air_values.segment_previous_count64 = F::ZERO; + air_values.segment_previous_flags = F::ZERO; + air_values.segment_previous_fill_byte = F::ZERO; + } else { + assert!(segment_id > 0); + air_values.segment_previous_seq_end = F::ZERO; + air_values.segment_previous_dst64 = + F::from_u32(trace_rows[0].get_dst64() - self.op_x_rows as u32); + air_values.segment_previous_main_step = F::from_u64(trace_rows[0].get_main_step()); + air_values.segment_previous_count64 = + F::from_u32(trace_rows[0].get_count64() + self.op_x_rows as u32); + air_values.segment_previous_flags = F::from_u16(F_SEL_MEMSET as u16); + air_values.segment_previous_fill_byte = F::from_u8(trace_rows[0].get_fill_byte()); + } + timer_stop_and_log_trace!(DMA_64_ALIGNED_TRACE); + let from_trace = FromTrace::new(&mut trace).with_air_values(&mut air_values); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_64_aligned/dma_64_aligned_module.rs b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_module.rs new file mode 100644 index 000000000..16364b9f9 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/dma_64_aligned_module.rs @@ -0,0 +1,14 @@ +use crate::Dma64AlignedInput; +use proofman_common::{AirInstance, ProofmanResult}; +use zisk_common::SegmentId; + +pub trait Dma64AlignedModule: Send + Sync { + fn compute_witness( + &self, + inputs: &[Vec], + segment_id: SegmentId, + is_last_segment: bool, + trace_buffer: Vec, + ) -> ProofmanResult>; + fn get_name(&self) -> &'static str; +} diff --git a/precompiles/dma/src/dma_64_aligned/mod.rs b/precompiles/dma/src/dma_64_aligned/mod.rs new file mode 100644 index 000000000..059ade510 --- /dev/null +++ b/precompiles/dma/src/dma_64_aligned/mod.rs @@ -0,0 +1,20 @@ +#[allow(clippy::module_inception)] +mod dma_64_aligned; +mod dma_64_aligned_collector; +mod dma_64_aligned_input; +mod dma_64_aligned_inputcpy; +mod dma_64_aligned_instance; +mod dma_64_aligned_mem; +mod dma_64_aligned_memcpy; +mod dma_64_aligned_memset; +mod dma_64_aligned_module; + +pub use dma_64_aligned::*; +pub use dma_64_aligned_collector::*; +pub use dma_64_aligned_input::*; +pub use dma_64_aligned_inputcpy::*; +pub use dma_64_aligned_instance::*; +pub use dma_64_aligned_mem::*; +pub use dma_64_aligned_memcpy::*; +pub use dma_64_aligned_memset::*; +pub use dma_64_aligned_module::*; diff --git a/precompiles/dma/src/dma_bus_device.rs b/precompiles/dma/src/dma_bus_device.rs new file mode 100644 index 000000000..257f22c1d --- /dev/null +++ b/precompiles/dma/src/dma_bus_device.rs @@ -0,0 +1,314 @@ +//! The `DmaCounter` module defines a counter for tracking dma-related operations +//! sent over the data bus. It connects to the bus and gathers metrics for specific +//! `ZiskOperationType::Dma` instructions. + +use std::fmt; +use std::ops::Add; + +use precompiles_common::MemProcessor; +use precompiles_helpers::DmaInfo; +use zisk_common::{BusDevice, BusDeviceMode, BusId, Metrics, OPERATION_BUS_ID, OP_TYPE, STEP}; +use zisk_common::{OP, OPERATION_PRECOMPILED_BUS_DATA_SIZE}; +use zisk_core::zisk_ops::ZiskOp; +use zisk_core::ZiskOperationType; + +use crate::{generate_dma_mem_inputs, skip_dma_mem_inputs}; + +// The `DmaOpMultiCounter` struct represents a counter that monitors and measures +// dma specific operation on the data bus. +// +// Dma Full OnlyMemCpy OnlyInputCpy +// Dma64Aligned Full4 OnlyMemCpy8 OnlyInputCpy4 OnlyMemSet8 Mem4 +// DmaUnaligned Full +// DmaPrePost Full OnlyMemCpy OnlyInputCpy +// +// MEMCPY + XMEMCPY +// dma_memcpy | dma_pre_post_memcpy | dma_unaligned (unaligned_dst_src) +// dma_memcpy | dma_pre_post_memcpy | dma_64_aligned_memcpy (aligned_dst_src + lcount > 4) +// dma_memcpy | dma_pre_post_memcpy | dma_64_aligned_mem (aligned_dst_src + lcount <= 4) +// +// MEMCMP +// dma | dma_pre_post | dma_unaligned (unaligned_dst_src) +// dma | dma_pre_post | dma_64_aligned_mem (aligned_dst_src) +// +// INPUTCPY +// dma_inputcpy | dma_pre_post_inputcpy | dma_64_aligned_inputcpy +// +// XMEMSET +// dma | dma_pre_post | dma_64_aligned_memset +// +// With this config, for the memcpy the limit was 4 words of 64-bits, more than 4 it's +// better a OnlyMemCpy8 vs Mem4 +// +// DMA => 4 counters +// DMA_PRE_POST => 4 +// DMA_UNALIGNED = 4 +// DMA_UNALIGNED_INPUTS = 4 +// DMA_64_ALIGNED_ROWS = 6 +// DMA_64_ALIGNED_INPUTS = 4 + +pub const DMA_OFFSET: usize = 0; +pub const DMA_PRE_POST_OFFSET: usize = 4; +pub const DMA_UNALIGNED_OFFSET: usize = 8; +pub const DMA_UNALIGNED_INPUTS_OFFSET: usize = 12; +pub const DMA_64_ALIGNED_OFFSET: usize = 16; +pub const DMA_64_ALIGNED_INPUTS_OFFSET: usize = 22; +pub const DMA_INPUT_GEN_COUNTERS: usize = 26; + +pub const DMA_COUNTER_MEMCPY: usize = 0; +pub const DMA_COUNTER_MEMSET: usize = 1; +pub const DMA_COUNTER_MEMCMP: usize = 2; +pub const DMA_COUNTER_INPUTCPY: usize = 3; +pub const DMA_COUNTER_MEMCPY_8: usize = 4; +pub const DMA_COUNTER_MEMSET_8: usize = 5; + +pub const DMA_COUNTER_OPS: usize = 4; +pub const DMA_COUNTER_OPS_EXT: usize = 6; +#[derive(Debug)] +pub struct DmaCounterInputGen { + pub counters: [usize; DMA_INPUT_GEN_COUNTERS], + + mode: BusDeviceMode, +} + +impl fmt::Display for DmaCounterInputGen { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "ROWS:\n \ + memcpy4 memcpy8 memcmp inputcpy memset4 memset8\n \ + dma {:>8} {:>8} {:>8} {:>8} \n \ + dma_pre_post {:>8} {:>8} {:>8} {:>8} \n \ + dma_64_aligned {:>8} {:>8} {:>8} {:>8} {:>8} {:>8}\n \ + dma_unaligned {:>8} {:>8} {:>8} {:>8} \n\n \ + INPUTS\n \ + memcpy4 memcpy8 memcmp inputcpy memset4 memset8\n \ + dma_64_aligned {:>8} {:>8} {:>8} {:>8} \n \ + dma_unaligned {:>8} {:>8} {:>8} {:>8} \n\n", + self.counters[DMA_OFFSET + DMA_COUNTER_MEMCPY], + self.counters[DMA_OFFSET + DMA_COUNTER_MEMCMP], + self.counters[DMA_OFFSET + DMA_COUNTER_INPUTCPY], + self.counters[DMA_OFFSET + DMA_COUNTER_MEMSET], + self.counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_MEMCPY], + self.counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_MEMCMP], + self.counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_INPUTCPY], + self.counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_MEMSET], + self.counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_MEMCPY], + self.counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_MEMCPY_8], + self.counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_MEMCMP], + self.counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_INPUTCPY], + self.counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_MEMSET], + self.counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_MEMSET_8], + self.counters[DMA_UNALIGNED_OFFSET + DMA_COUNTER_MEMCPY], + self.counters[DMA_UNALIGNED_OFFSET + DMA_COUNTER_MEMCMP], + self.counters[DMA_UNALIGNED_OFFSET + DMA_COUNTER_INPUTCPY], + self.counters[DMA_UNALIGNED_OFFSET + DMA_COUNTER_MEMSET], + self.counters[DMA_64_ALIGNED_INPUTS_OFFSET + DMA_COUNTER_MEMCPY], + self.counters[DMA_64_ALIGNED_INPUTS_OFFSET + DMA_COUNTER_MEMCMP], + self.counters[DMA_64_ALIGNED_INPUTS_OFFSET + DMA_COUNTER_INPUTCPY], + self.counters[DMA_64_ALIGNED_INPUTS_OFFSET + DMA_COUNTER_MEMSET], + self.counters[DMA_UNALIGNED_INPUTS_OFFSET + DMA_COUNTER_MEMCPY], + self.counters[DMA_UNALIGNED_INPUTS_OFFSET + DMA_COUNTER_MEMCMP], + self.counters[DMA_UNALIGNED_INPUTS_OFFSET + DMA_COUNTER_INPUTCPY], + self.counters[DMA_UNALIGNED_INPUTS_OFFSET + DMA_COUNTER_MEMSET], + ) + } +} + +impl DmaCounterInputGen { + /// Creates a new instance of `DmaCounter`. + /// + /// # Arguments + /// * `mode` - The ID of the bus to which this counter is connected. + /// + /// # Returns + /// A new `DmaCounter` instance. + pub fn new(mode: BusDeviceMode) -> Self { + Self { counters: [0; DMA_INPUT_GEN_COUNTERS], mode } + } + const OPS_X_ROW: [usize; 6] = [ + 4, // MEMCPY_4 + 4, // MEMSET_4 + 4, // MEMCMP + 4, // INPUTCPY + 8, // MEMCPY_8 + 8, // MEMSET_8 + ]; + const IS_DOUBLE: [usize; 6] = [ + 1, // MEMCPY_4 + 1, // MEMSET_4 + 0, // MEMCMP + 0, // INPUTCPY + 0, // MEMCPY_8 + 0, // MEMSET_8 + ]; + + fn incr_counters(&mut self, encoded: u64, operation: usize, _step: u64) { + if !DmaInfo::is_direct(encoded) { + if DmaInfo::get_pre_count(encoded) > 0 { + self.counters[DMA_PRE_POST_OFFSET + operation] += 1; + } + if DmaInfo::get_post_count(encoded) > 0 { + self.counters[DMA_PRE_POST_OFFSET + operation] += 1; + } + self.counters[DMA_OFFSET + operation] += 1; + } + let loop_count = DmaInfo::get_loop_count(encoded); + // it's effective loop count + let use_src = operation != DMA_COUNTER_MEMSET && operation != DMA_COUNTER_INPUTCPY; + if loop_count > 0 { + if DmaInfo::dst_is_aligned_with_src(encoded) || !use_src { + let rows = loop_count.div_ceil(Self::OPS_X_ROW[operation]); + self.counters[DMA_64_ALIGNED_OFFSET + operation] += rows; + self.counters[DMA_64_ALIGNED_INPUTS_OFFSET + operation] += 1; + if Self::IS_DOUBLE[operation] == 1 { + let rows = loop_count.div_ceil(Self::OPS_X_ROW[operation + 4]); + self.counters[DMA_64_ALIGNED_OFFSET + operation + 4] += rows; + } + } else { + self.counters[DMA_UNALIGNED_OFFSET + operation] += loop_count + 1; + self.counters[DMA_UNALIGNED_INPUTS_OFFSET + operation] += 1; + } + } + } + + /// Retrieves the count of instructions for a specific `ZiskOperationType`. + /// + /// # Arguments + /// * `dst` - The destination address of operation. + /// * `src` - The source address of operation. + /// * `count` - The bytes of operation. + pub fn inst_count(&mut self, encoded: u64, op: u8, step: u64) { + // count and plan no need the count, need the effective count: + // effective_count = if is_equal { count } else { count_eq + 1 } + // the count encoded was effective + match op { + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => { + // if DmaInfo::dst_is_aligned_with_src(encoded) { + self.incr_counters(encoded, DMA_COUNTER_MEMCPY, step); + // } + } + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => { + self.incr_counters(encoded, DMA_COUNTER_MEMCMP, step) + } + ZiskOp::DMA_INPUTCPY => self.incr_counters(encoded, DMA_COUNTER_INPUTCPY, step), + ZiskOp::DMA_XMEMSET => self.incr_counters(encoded, DMA_COUNTER_MEMSET, step), + _ => panic!("Unknown DMA Cmd 0x{op:02X}"), + } + } + + /// Processes data received on the bus, updating counters and generating inputs when applicable. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// * `mem_processors` – A queue of mem_processors bus operations used to send derived inputs. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data( + &mut self, + bus_id: &BusId, + data: &[u64], + data_ext: &[u64], + mem_processors: &mut P, + ) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] as u32 != ZiskOperationType::Dma as u32 { + return true; + } + + match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + generate_dma_mem_inputs(data, data_ext, true, mem_processors); + } + BusDeviceMode::CounterAsm => { + self.measure(data); + } + BusDeviceMode::InputGenerator => { + if skip_dma_mem_inputs(data, data_ext, mem_processors) { + return true; + } + generate_dma_mem_inputs(data, data_ext, false, mem_processors); + } + } + + true + } +} + +impl Metrics for DmaCounterInputGen { + /// Tracks activity on the connected bus and updates counters for recognized operations. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `_data` - The data received from the bus. + /// + /// # Returns + /// An empty vector, as this implementation does not produce any derived inputs for the bus. + #[inline(always)] + fn measure(&mut self, data: &[u64]) { + if data[OP_TYPE] != ZiskOperationType::Dma as u64 { + return; + } + let op = data[OP] as u8; + let encoded = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE]; + self.inst_count(encoded, op, data[STEP]); + } + + /// Provides a dynamic reference for downcasting purposes. + /// + /// # Returns + /// A reference to `self` as `dyn std::any::Any`. + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +impl Add for DmaCounterInputGen { + type Output = DmaCounterInputGen; + + /// Combines two `DmaCounter` instances by summing their counters. + /// + /// # Arguments + /// * `self` - The first `DmaCounter` instance. + /// * `other` - The second `DmaCounter` instance. + /// + /// # Returns + /// A new `DmaCounter` with combined counters. + fn add(self, other: Self) -> DmaCounterInputGen { + DmaCounterInputGen { + counters: std::array::from_fn(|i| self.counters[i] + other.counters[i]), + mode: self.mode.clone(), + } + } +} + +impl Add<&DmaCounterInputGen> for &DmaCounterInputGen { + type Output = DmaCounterInputGen; + + /// Combines two `DmaCounter` references by summing their counters. + /// + /// # Arguments + /// * `self` - Reference to the first `DmaCounter` instance. + /// * `other` - Reference to the second `DmaCounter` instance. + /// + /// # Returns + /// A new `DmaCounter` with combined counters. + fn add(self, other: &DmaCounterInputGen) -> DmaCounterInputGen { + DmaCounterInputGen { + counters: std::array::from_fn(|i| self.counters[i] + other.counters[i]), + mode: self.mode.clone(), + } + } +} +impl BusDevice for DmaCounterInputGen { + /// Provides a dynamic reference for downcasting purposes. + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/dma/src/dma_checkpoint.rs b/precompiles/dma/src/dma_checkpoint.rs new file mode 100644 index 000000000..1eac30f1d --- /dev/null +++ b/precompiles/dma/src/dma_checkpoint.rs @@ -0,0 +1,30 @@ +use std::collections::HashMap; + +use zisk_common::ChunkId; + +use crate::DmaCollectCounters; + +#[derive(Default, Debug)] +pub struct DmaCheckPoint { + pub chunks: HashMap, + pub last_chunk: Option, + pub is_last_segment: bool, +} + +impl DmaCheckPoint { + #[cfg(any(feature = "save_dma_collectors", feature = "save_dma_plans"))] + pub fn get_debug_info(&self, title: &str, segment_id: u64) -> String { + self.chunks + .iter() + .map(|(chunk_id, (num_inputs, collect_counters))| { + format!( + "{title} #{segment_id}@{chunk_id} [{num_inputs}|{}]{}{}", + collect_counters.get_debug_info(), + if Some(*chunk_id) == self.last_chunk { " [last_chunk]" } else { "" }, + if self.is_last_segment { " [last_segment]" } else { "" }, + ) + }) + .collect::>() + .join("\n") + } +} diff --git a/precompiles/dma/src/dma_collect_counters.rs b/precompiles/dma/src/dma_collect_counters.rs new file mode 100644 index 000000000..90a6092a1 --- /dev/null +++ b/precompiles/dma/src/dma_collect_counters.rs @@ -0,0 +1,83 @@ +use zisk_common::CollectCounter; +use zisk_core::zisk_ops::ZiskOp; + +#[derive(Debug, Clone, Copy)] +pub struct DmaCollectCounters { + // This counters are for a specific instance, means that only need to know of collect + // for each different operation, no for destination + pub memcpy: CollectCounter, + pub inputcpy: CollectCounter, + pub memset: CollectCounter, + pub memcmp: CollectCounter, +} + +impl DmaCollectCounters { + pub fn debug_assert_is_final_skip(&self) { + debug_assert!( + self.is_final_skip(), + "pending to collect => memcpy: {}/{}|inputcpy: {}/{}|memset: {}/{}|memcmp: {}/{}", + self.memcpy.collected, + self.memcpy.collect_count, + self.inputcpy.collected, + self.inputcpy.collect_count, + self.memset.collected, + self.memset.collect_count, + self.memcmp.collected, + self.memcmp.collect_count + ); + } + pub fn is_final_skip(&self) -> bool { + self.memcpy.is_final_skip() + && self.inputcpy.is_final_skip() + && self.memset.is_final_skip() + && self.memcmp.is_final_skip() + } + pub fn should_collect(&mut self, rows: u64, op: u8) -> Option<(u32, u32)> { + match op { + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => self.memcpy.should_process(rows as u32), + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => self.memcmp.should_process(rows as u32), + ZiskOp::DMA_INPUTCPY => self.inputcpy.should_process(rows as u32), + ZiskOp::DMA_XMEMSET => self.memset.should_process(rows as u32), + _ => panic!("Invalid operation 0x{op:02X} for DmaCollectCounters"), + } + } + #[inline(always)] + pub fn should_collect_single_row(&mut self, op: u8) -> bool { + self.should_collect(1, op).is_some() + } + + #[cfg(feature = "save_dma_collectors")] + pub fn get_full_debug_info(&self) -> String { + format!( + "CY:{}/{}|IC:{}/{}|MS:{}/{}|MC:{}/{}", + self.memcpy.collected, + self.memcpy.collect_count, + self.inputcpy.collected, + self.inputcpy.collect_count, + self.memset.collected, + self.memset.collect_count, + self.memcmp.collected, + self.memcmp.collect_count, + ) + } + #[cfg(any(feature = "save_dma_collectors", feature = "save_dma_plans"))] + pub fn get_debug_info(&self) -> String { + (if self.memcpy.initial_skip == 0 { + format!("CY:{}|", self.memcpy.collect_count) + } else { + format!("CY:({}){}|", self.memcpy.collect_count, self.memcpy.initial_skip) + }) + &(if self.inputcpy.initial_skip == 0 { + format!("IC:{}|", self.inputcpy.collect_count) + } else { + format!("IC:({}){}|", self.inputcpy.collect_count, self.inputcpy.initial_skip) + }) + &(if self.memset.initial_skip == 0 { + format!("MS:{}|", self.memset.collect_count) + } else { + format!("MS:({}){}|", self.memset.collect_count, self.memset.initial_skip) + }) + &(if self.memcmp.initial_skip == 0 { + format!("MC:{}", self.memcmp.collect_count) + } else { + format!("MC:({}){}", self.memcmp.collect_count, self.memcmp.initial_skip) + }) + } +} diff --git a/precompiles/dma/src/dma_collector_routing_log.rs b/precompiles/dma/src/dma_collector_routing_log.rs new file mode 100644 index 000000000..642423151 --- /dev/null +++ b/precompiles/dma/src/dma_collector_routing_log.rs @@ -0,0 +1,74 @@ +use zisk_common::ChunkId; + +#[cfg(feature = "save_dma_collectors")] +use zisk_common::STEP; + +#[cfg(feature = "save_dma_collectors")] +#[derive(Debug)] +pub struct DmaCollectorRoutingLog { + pub chunk_id: ChunkId, + pub log: Vec<(u8, u64, u32, u32, u32)>, +} + +#[cfg(not(feature = "save_dma_collectors"))] +#[derive(Debug)] +pub struct DmaCollectorRoutingLog {} + +#[cfg(not(feature = "save_dma_collectors"))] +impl DmaCollectorRoutingLog { + pub fn new(_chunk_id: ChunkId) -> Self { + Self {} + } + #[inline(always)] + pub fn log_collect(&mut self, _rows: u32, _data: &[u64], _skip: u32, _max_count: u32) {} + #[inline(always)] + pub fn log_discard(&mut self, _reason: u8, _data: &[u64]) {} + #[inline(always)] + pub fn log_discard_cond( + &mut self, + cond: bool, + _reason: u8, + _data: &[u64], + _result: bool, + ) -> bool { + cond + } +} + +#[cfg(feature = "save_dma_collectors")] +impl DmaCollectorRoutingLog { + pub fn new(chunk_id: ChunkId) -> Self { + Self { chunk_id, log: Vec::new() } + } + + pub fn get_debug_info(&self) -> String { + self.log + .iter() + .map(|(reason, step, rows, skip, max_count)| { + format!( + "{}|{reason}|@{}|C:{rows}|K:{skip}|M:{max_count}|S:{step}", + if *reason == 0 { "COLLECT" } else { "SKIP" }, + self.chunk_id + ) + }) + .collect::>() + .join("\n") + + "\n" + } + + #[inline(always)] + pub fn log_discard(&mut self, reason: u8, data: &[u64]) { + self.log.push((reason, data[zisk_common::STEP], 0, 0, 0)); + } + + #[inline(always)] + pub fn log_collect(&mut self, rows: u32, data: &[u64], skip: u32, max_count: u32) { + self.log.push((0, data[zisk_common::STEP], rows, skip, max_count)); + } + + #[inline(always)] + pub fn log_discard_cond(&mut self, cond: bool, reason: u8, data: &[u64], result: bool) -> bool { + self.log.push((reason + cond as u8, data[STEP], 0, 0, 0)); + result + } +} diff --git a/precompiles/dma/src/dma_common.rs b/precompiles/dma/src/dma_common.rs new file mode 100644 index 000000000..58219182f --- /dev/null +++ b/precompiles/dma/src/dma_common.rs @@ -0,0 +1,139 @@ +use fields::PrimeField64; +use zisk_pil::{ + Dma64AlignedInputCpyTrace, Dma64AlignedMemCpyTrace, Dma64AlignedMemSetTrace, + Dma64AlignedMemTrace, Dma64AlignedTrace, DmaInputCpyTrace, DmaMemCpyTrace, + DmaPrePostInputCpyTrace, DmaPrePostMemCpyTrace, DmaPrePostTrace, DmaTrace, DmaUnalignedTrace, +}; + +pub fn get_dma_air_name(air_id: usize) -> &'static str { + match air_id { + DmaTrace::::AIR_ID => "Dma", + DmaMemCpyTrace::::AIR_ID => "DmaMemCpy", + DmaInputCpyTrace::::AIR_ID => "DmaInputCpy", + DmaPrePostTrace::::AIR_ID => "DmaPrePost", + DmaPrePostMemCpyTrace::::AIR_ID => "DmaPrePostMemCpy", + DmaPrePostInputCpyTrace::::AIR_ID => "DmaPrePostInputCpy", + Dma64AlignedTrace::::AIR_ID => "Dma64Aligned", + Dma64AlignedMemSetTrace::::AIR_ID => "Dma64AlignedMemSet", + Dma64AlignedMemCpyTrace::::AIR_ID => "Dma64AlignedMemCpy", + Dma64AlignedInputCpyTrace::::AIR_ID => "Dma64AlignedInputCpy", + Dma64AlignedMemTrace::::AIR_ID => "Dma64AlignedMem", + DmaUnalignedTrace::::AIR_ID => "DmaUnaligned", + _ => "Unknown", + } +} + +pub fn dma_trace(title: &str, rows: usize, num_rows: usize) { + tracing::debug!( + "··· Creating {title} instance [{rows} / {num_rows} rows filled {:.2}%]", + rows as f64 / num_rows as f64 * 100.0 + ); +} + +/// Flattens and reorders input vectors to ensure proper sequencing. +/// +/// This function reorders vectors so that: +/// - The vector whose first element has `must_be_first()` == true is placed first +/// - The vector whose last element has `must_be_last()` == true is placed last +/// +/// This is necessary for DMA operations to maintain proper sequencing when +/// operations span multiple chunks or segments. +/// +/// # Type Parameters +/// * `T` - The input type, must implement `DmaInputPosition` +/// +/// # Arguments +/// * `inputs` - Slice of vectors containing DMA inputs +/// +/// # Returns +/// A flattened vector with references to inputs, properly ordered +pub fn flatten_and_reorder_inputs(inputs: &[Vec]) -> Vec<&T> +where + T: DmaInputPosition, +{ + if inputs.is_empty() { + return Vec::new(); + } + + // Find indices of vectors that must be first/last + let first_idx = + inputs.iter().position(|vec| vec.first().is_some_and(|input| input.must_be_first())); + + let last_idx = + inputs.iter().position(|vec| vec.last().is_some_and(|input| input.must_be_last())); + + match (first_idx, last_idx) { + (None, None) => { + // No special ordering required, simple flatten + inputs.iter().flatten().collect() + } + (Some(0), None) => { + // First is already at position 0, simple flatten + inputs.iter().flatten().collect() + } + (Some(f_idx), None) => { + // Only first needs reordering: move to beginning + std::iter::once(&inputs[f_idx]) + .chain(inputs[..f_idx].iter()) + .chain(inputs[f_idx + 1..].iter()) + .flatten() + .collect() + } + (None, Some(l_idx)) if l_idx == inputs.len() - 1 => { + // Last is already at final position, simple flatten + inputs.iter().flatten().collect() + } + (None, Some(l_idx)) => { + // Only last needs reordering: move to end + inputs[..l_idx] + .iter() + .chain(inputs[l_idx + 1..].iter()) + .chain(std::iter::once(&inputs[l_idx])) + .flatten() + .collect() + } + (Some(f_idx), Some(l_idx)) if f_idx == l_idx => { + // Same vector is both first and last: all constrained inputs belong to one + // large ("huge") DMA operation that spans from its first to its last element. + // The only case in which this can happen is when there is a single collector and, + // therefore, the length of the collector’s input list is 1. Within a collector, + // the number of inputs does not necessarily have to be 1. + assert!(f_idx == 0); + assert!(inputs.len() == 1); + inputs.iter().flatten().collect() + } + (Some(f_idx), Some(l_idx)) if f_idx == 0 && l_idx == inputs.len() - 1 => { + // Already in correct order + inputs.iter().flatten().collect() + } + (Some(f_idx), Some(l_idx)) => { + // Both need reordering: first at beginning, last at end + // Handle different cases to avoid double-including indices + if f_idx < l_idx { + // first comes before last in original order + std::iter::once(&inputs[f_idx]) + .chain(inputs[..f_idx].iter()) + .chain(inputs[f_idx + 1..l_idx].iter()) + .chain(inputs[l_idx + 1..].iter()) + .chain(std::iter::once(&inputs[l_idx])) + .flatten() + .collect() + } else { + // last comes before first in original order + std::iter::once(&inputs[f_idx]) + .chain(inputs[..l_idx].iter()) + .chain(inputs[l_idx + 1..f_idx].iter()) + .chain(inputs[f_idx + 1..].iter()) + .chain(std::iter::once(&inputs[l_idx])) + .flatten() + .collect() + } + } + } +} + +/// Trait for types that have a skip_rows field +pub trait DmaInputPosition { + fn must_be_last(&self) -> bool; + fn must_be_first(&self) -> bool; +} diff --git a/precompiles/dma/src/dma_constants.rs b/precompiles/dma/src/dma_constants.rs new file mode 100644 index 000000000..049829dc8 --- /dev/null +++ b/precompiles/dma/src/dma_constants.rs @@ -0,0 +1,19 @@ +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; + +pub const PARAMS: usize = 4; +pub const READ_PARAMS: usize = 2; +pub const DIRECT_READ_PARAMS: usize = 1; +pub const WRITE_PARAMS: usize = 1; +pub const RESULT_PARAMS: usize = 1; +pub const PARAM_CHUNKS: usize = 4; +pub const START_READ_PARAMS: usize = OPERATION_PRECOMPILED_BUS_DATA_SIZE + PARAMS; +pub const START_WRITE_PARAMS: usize = + START_READ_PARAMS + READ_PARAMS * PARAM_CHUNKS + RESULT_PARAMS; +pub const WRITE_ADDR_PARAM: usize = READ_PARAMS + DIRECT_READ_PARAMS; +pub const DMA_64_ALIGNED_OPS_BY_ROW: usize = 4; +pub const DMA_64_ALIGNED_INPUTCPY_OPS_BY_ROW: usize = 4; +pub const DMA_64_ALIGNED_MEMCPY_OPS_BY_ROW: usize = 8; +pub const DMA_64_ALIGNED_MEMSET_OPS_BY_ROW: usize = 8; +pub const DMA_64_ALIGNED_MEM_OPS_BY_ROW: usize = 4; +pub const DMA_ROM_WITHOUT_MEMCMP_SIZE: usize = 1 << 15; +pub const DMA_ROM_WITH_MEMCMP_SIZE: usize = 3 * (1 << 15); diff --git a/precompiles/dma/src/dma_gen_inputcpy_mem_inputs.rs b/precompiles/dma/src/dma_gen_inputcpy_mem_inputs.rs new file mode 100644 index 000000000..9fcc42315 --- /dev/null +++ b/precompiles/dma/src/dma_gen_inputcpy_mem_inputs.rs @@ -0,0 +1,101 @@ +use precompiles_common::MemBusHelpers; +use precompiles_common::MemProcessor; +use precompiles_helpers::DmaInfo; +use zisk_common::{A, DMA_ENCODED, STEP}; + +pub fn generate_dma_inputcpy_mem_inputs( + data: &[u64], + data_ext: &[u64], + mem_processors: &mut P, +) { + // inputcpy has same offset that dst, but when prepare data add zero-bytes before the input + // to emulate aligned operation. + + let dst = data[A]; + let dst_offset = dst & 0x07; + let encoded = data[DMA_ENCODED]; + let dst64 = (dst & !0x07) as u32; + let main_step = data[STEP]; + let pre_count = DmaInfo::get_pre_count(encoded) as u64; + + // NOTE: for dual memories it's very important to keep the order of loads and stores because + // stores happend after loads. + + let pre_value = if pre_count > 0 { + let pre_data_offset = DmaInfo::get_pre_data_offset(encoded); + + // pre-load of write address before unaligned write + let value_before_write = data_ext[DmaInfo::get_pre_write_offset(encoded)]; + MemBusHelpers::mem_aligned_read(dst64, main_step, value_before_write, mem_processors); + + // if src and dst have the same offset, no double read + // TBO: calculate_write_value with same offset + + let dst_offset_bits = dst_offset * 8; + let mask = 0xFFFF_FFFF_FFFF_FFFF << dst_offset_bits; + Some((value_before_write & !mask) | (data_ext[pre_data_offset] & mask)) + } else { + None + }; + + let post_count = DmaInfo::get_post_count(encoded) as u64; + let loop_count = DmaInfo::get_loop_count(encoded); + + let loop_values = if loop_count > 0 { + let loop_data_offset = DmaInfo::get_loop_data_offset(encoded); + let loop_data_count = DmaInfo::get_loop_count(encoded); + let loop_data_end = loop_data_offset + loop_data_count; + + Some(&data_ext[loop_data_offset..loop_data_end]) + } else { + None + }; + + let post_value = if post_count > 0 { + let post_data_offset = DmaInfo::get_post_data_offset(encoded); + let dst64 = dst as u32 + pre_count as u32 + loop_count as u32 * 8; + + // pre-load of write address before unaligned write + let value_before_write = data_ext[DmaInfo::get_post_write_offset(encoded)]; + MemBusHelpers::mem_aligned_read(dst64, main_step, value_before_write, mem_processors); + + let post_bits = post_count * 8; + let mask = 0xFFFF_FFFF_FFFF_FFFF << post_bits; + Some((value_before_write & mask) | (data_ext[post_data_offset] & !mask)) + } else { + None + }; + + // Before writes, all reads should be done, to avoid issues with dual memory + + if let Some(pre_value) = pre_value { + MemBusHelpers::mem_aligned_write(dst64, main_step, pre_value, mem_processors); + } + if let Some(loop_values) = loop_values { + let dst64 = (dst as u32 + pre_count as u32) & !0x07; + MemBusHelpers::mem_aligned_write_from_slice(dst64, main_step, loop_values, mem_processors); + } + if let Some(post_value) = post_value { + let dst64 = dst as u32 + pre_count as u32 + loop_count as u32 * 8; + MemBusHelpers::mem_aligned_write(dst64, main_step, post_value, mem_processors); + } +} + +pub fn skip_dma_inputcpy_mem_inputs(data: &[u64], mem_processors: &mut P) -> bool { + let dst = data[A]; + + let count = DmaInfo::get_count(data[DMA_ENCODED]) as u64; + // calculate range for dst and src to verify if any of them are included + // in the memcollector addresses. + + let dst64_from = dst as u32 & !0x07; + let dst64_to = (dst + count + 7) as u32 & !0x07; + + if !mem_processors.skip_addr_range(dst64_from, dst64_to) { + return false; + } + + // If any mem_collector includes this addresses we could skip this precompiles + // at mem input data generation. + true +} diff --git a/precompiles/dma/src/dma_gen_mem_inputs.rs b/precompiles/dma/src/dma_gen_mem_inputs.rs new file mode 100644 index 000000000..ee8b16f3c --- /dev/null +++ b/precompiles/dma/src/dma_gen_mem_inputs.rs @@ -0,0 +1,49 @@ +use precompiles_common::MemProcessor; +use zisk_common::OP; +use zisk_core::zisk_ops::ZiskOp; + +use crate::generate_dma_inputcpy_mem_inputs; +use crate::generate_dma_memcmp_mem_inputs; +use crate::generate_dma_memcpy_mem_inputs; +use crate::generate_dma_memset_mem_inputs; +use crate::skip_dma_inputcpy_mem_inputs; +use crate::skip_dma_memcmp_mem_inputs; +use crate::skip_dma_memcpy_mem_inputs; +use crate::skip_dma_memset_mem_inputs; + +pub fn generate_dma_mem_inputs( + data: &[u64], + data_ext: &[u64], + _only_counters: bool, + mem_processors: &mut P, +) { + match data[OP] as u8 { + ZiskOp::DMA_INPUTCPY => generate_dma_inputcpy_mem_inputs(data, data_ext, mem_processors), + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => { + generate_dma_memcmp_mem_inputs(data, data_ext, mem_processors) + } + ZiskOp::DMA_XMEMSET => generate_dma_memset_mem_inputs(data, data_ext, mem_processors), + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => { + generate_dma_memcpy_mem_inputs(data, data_ext, mem_processors) + } + _ => panic!("Invalid op 0x{:02X}", data[OP]), + } +} + +pub fn skip_dma_mem_inputs( + data: &[u64], + _data_ext: &[u64], + mem_processors: &mut P, +) -> bool { + match data[OP] as u8 { + ZiskOp::DMA_INPUTCPY => skip_dma_inputcpy_mem_inputs(data, mem_processors), + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => { + skip_dma_memcmp_mem_inputs(data, mem_processors) + } + ZiskOp::DMA_XMEMSET => skip_dma_memset_mem_inputs(data, mem_processors), + ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => { + skip_dma_memcpy_mem_inputs(data, mem_processors) + } + _ => panic!("Invalid op 0x{:02X}", data[OP]), + } +} diff --git a/precompiles/dma/src/dma_gen_memcmp_mem_inputs.rs b/precompiles/dma/src/dma_gen_memcmp_mem_inputs.rs new file mode 100644 index 000000000..cc0b7db25 --- /dev/null +++ b/precompiles/dma/src/dma_gen_memcmp_mem_inputs.rs @@ -0,0 +1,194 @@ +use precompiles_common::MemBusHelpers; +use precompiles_common::MemProcessor; +use precompiles_helpers::DmaInfo; +use zisk_common::DMA_MEMCMP_COUNT_BUS; +use zisk_common::{A, B, DMA_ENCODED, OP, STEP}; +use zisk_core::{zisk_ops::ZiskOp, EXTRA_PARAMS_ADDR}; + +pub fn generate_dma_memcmp_mem_inputs( + data: &[u64], + data_ext: &[u64], + mem_processors: &mut P, +) { + // encoding of count was done with effective count, means that if dst and src are equals, + // effective_count = count while if dst and src are different effective_count = count_eq + 1 + // count_eq is the number of beggining bytes equal between src and dst + let op = data[OP] as u8; + + let dst = data[A]; + let src = data[B]; + let encoded = data[DMA_ENCODED]; + let dst64 = (dst & !0x07) as u32; + let src64 = (src & !0x07) as u32; + let main_step = data[STEP]; + let pre_count = DmaInfo::get_pre_count(encoded) as u64; + let dst_offset = dst & 0x07; + let src_offset = src & 0x07; + let aligned = dst_offset == src_offset; + + // NOTE: for dual memories it's very important to keep the order of loads and stores because + // stores happend after loads. + + if op == ZiskOp::DMA_MEMCMP { + MemBusHelpers::mem_aligned_read( + EXTRA_PARAMS_ADDR as u32, + main_step, + data[DMA_MEMCMP_COUNT_BUS], + mem_processors, + ); + } + + if pre_count > 0 { + let pre_data_offset = DmaInfo::get_pre_data_offset(encoded); + let read_value = data_ext[pre_data_offset]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@pre 0x{src64:08X} S:{main_step} V:{read_value} (0x{read_value:016X})"); + MemBusHelpers::mem_aligned_read(src64, main_step, read_value, mem_processors); + // pre-load of write address before unaligned write + let pre_value = data_ext[DmaInfo::get_pre_write_offset(encoded)]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@pre-p 0x{dst64:08X} S:{main_step} V:{pre_value} (0x{pre_value:016X})"); + + MemBusHelpers::mem_aligned_read(dst64, main_step, pre_value, mem_processors); + + if DmaInfo::is_double_read_pre(encoded) { + let second_read_value = data_ext[pre_data_offset + 1]; + #[cfg(feature = "debug_dma")] + println!( + "DMA: mem_aligned_load@pre2 0x{:08X} S:{main_step} V:{second_read_value} (0x{second_read_value:016X})", + src64 + 8 + ); + MemBusHelpers::mem_aligned_read( + src64 + 8, + main_step, + second_read_value, + mem_processors, + ); + } + } + + // this is part of words loop + let post_count = DmaInfo::get_post_count(encoded) as u64; + let loop_count = DmaInfo::get_loop_count(encoded); + if loop_count > 0 { + let loop_src = src as u32 + pre_count as u32; + let dst64 = (dst as u32 + pre_count as u32) & !0x07; + let src64 = loop_src & !0x07; + let loop_data_offset = DmaInfo::get_loop_data_offset(encoded); + let loop_data_count = DmaInfo::get_loop_count(encoded); + let loop_src_data_end = + loop_data_offset + loop_data_count + ((loop_src & 0x07) > 0) as usize; + let values = &data_ext[loop_data_offset..loop_src_data_end]; + let src_offset = (src_offset + pre_count) & 0x07; + if aligned { + #[cfg(feature = "debug_dma")] + println!( + "DMA: mem_double_aligned_read_from_slice 0x{dst64:08X} 0x{src64:08X} \ + S:{main_step} V:{values:?}" + ); + MemBusHelpers::mem_double_aligned_read_from_slice( + dst64, + src64, + main_step, + values, + mem_processors, + ); + + // MemBusHelpers::mem_aligned_read_from_slice(dst64, main_step, values, mem_processors); + } else { + #[cfg(feature = "debug_dma")] + println!( + "DMA: mem_aligned_read_from_read_unaligned_slice 0x{dst64:08X} 0x{src64:08X} \ + SO:{src_offset} S:{main_step} V:{values:?}" + ); + MemBusHelpers::mem_aligned_read_from_read_unaligned_slice( + dst64, + src64, + main_step, + src_offset as u8, + values, + mem_processors, + ); + } + } + if post_count > 0 { + let post_data_offset = DmaInfo::get_post_data_offset(encoded); + let src64 = (src as u32 + pre_count as u32 + loop_count as u32 * 8) & !0x07; + let dst64 = dst as u32 + pre_count as u32 + loop_count as u32 * 8; + let read_value = data_ext[post_data_offset]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@post 0x{src64:08X} S:{main_step} V:{read_value} (0x{read_value:016X})"); + MemBusHelpers::mem_aligned_read(src64, main_step, read_value, mem_processors); + + // pre-load of write address before unaligned write + let pre_value = data_ext[DmaInfo::get_post_write_offset(encoded)]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@post-p 0x{dst64:08X} S:{main_step} V:{pre_value} (0x{pre_value:016X})"); + + MemBusHelpers::mem_aligned_read(dst64, main_step, pre_value, mem_processors); + + if DmaInfo::is_double_read_post(encoded) { + let second_read_value = data_ext[post_data_offset + 1]; + #[cfg(feature = "debug_dma")] + println!( + "DMA: mem_aligned_load@post2 0x{:08X} S:{main_step} V:{second_read_value} (0x{second_read_value:016X})", + src64 + 8 + ); + MemBusHelpers::mem_aligned_read( + src64 + 8, + main_step, + second_read_value, + mem_processors, + ); + } + } +} + +pub fn skip_dma_memcmp_mem_inputs(data: &[u64], mem_processors: &mut P) -> bool { + let dst = data[A]; + let src = data[B]; + let count = DmaInfo::get_count(data[DMA_ENCODED]) as u64; + let op = data[OP] as u8; + let load_count_from_mem = op == ZiskOp::DMA_MEMCMP; + + // calculate range for dst and src to verify if any of them are included + // in the memcollector addresses. + + let dst64_from = dst as u32 & !0x07; + let dst64_to = (dst + count + 7) as u32 & !0x07; + #[cfg(feature = "debug_dma_gen_mem_inputs")] + let (count64, step) = (dst64_to as u64 - dst64_from as u64 + 1, data[STEP]); + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memcmp] SKIP DST:[0x{dst64_from:08X}..=0x{dst64_to:08X}] C:{count} S:{step}"); + + if load_count_from_mem { + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memcmp] SKIP PARAM 0x{EXTRA_PARAMS_ADDR:08X} S:{step}"); + if !mem_processors.skip_addr(EXTRA_PARAMS_ADDR as u32) { + return false; + } + } + + if !mem_processors.skip_addr_range(dst64_from, dst64_to) { + return false; + } + + let src64_from = src as u32 & !0x07; + let src64_to = (src + count + 7) as u32 & !0x07; + #[cfg(feature = "debug_dma_gen_mem_inputs")] + let (count64, step) = (dst64_to as u64 - dst64_from as u64 + 1, data[STEP]); + + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memcmp] SKIP SRC:[0x{src64_from:08X}..=0x{src64_to:08X}] C:{count} S:{step}"); + if !mem_processors.skip_addr_range(src64_from, src64_to) { + return false; + } + + // If any mem_collector includes this addresses we could skip this precompiles + // at mem input data generation. + true +} diff --git a/precompiles/dma/src/dma_gen_memcpy_mem_inputs.rs b/precompiles/dma/src/dma_gen_memcpy_mem_inputs.rs new file mode 100644 index 000000000..a9997b4ad --- /dev/null +++ b/precompiles/dma/src/dma_gen_memcpy_mem_inputs.rs @@ -0,0 +1,208 @@ +use precompiles_common::MemBusHelpers; +use precompiles_common::MemProcessor; +use precompiles_helpers::{DmaHelpers, DmaInfo}; +use zisk_common::{A, B, DMA_ENCODED, OP, STEP}; +use zisk_core::{zisk_ops::ZiskOp, EXTRA_PARAMS_ADDR}; + +pub fn generate_dma_memcpy_mem_inputs( + data: &[u64], + data_ext: &[u64], + mem_processors: &mut P, +) { + let op = data[OP] as u8; + + let dst = data[A]; + let src = data[B]; + let encoded = data[DMA_ENCODED]; + let dst64 = (dst & !0x07) as u32; + let src64 = (src & !0x07) as u32; + let main_step = data[STEP]; + let pre_count = DmaInfo::get_pre_count(encoded) as u64; + let dst_offset = dst & 0x07; + let src_offset = src & 0x07; + let aligned = dst_offset == src_offset; + + // NOTE: for dual memories it's very important to keep the order of loads and stores because + // stores happend after loads. + + if op == ZiskOp::DMA_MEMCPY { + MemBusHelpers::mem_aligned_read( + EXTRA_PARAMS_ADDR as u32, + main_step, + DmaInfo::get_count(encoded) as u64, + mem_processors, + ); + } + + let pre_write_value = if pre_count > 0 { + let pre_data_offset = DmaInfo::get_pre_data_offset(encoded); + let read_value = data_ext[pre_data_offset]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@pre 0x{src64:08X} S:{main_step} V:{read_value} (0x{read_value:016X})"); + MemBusHelpers::mem_aligned_read(src64, main_step, read_value, mem_processors); + // pre-load of write address before unaligned write + let pre_value = data_ext[DmaInfo::get_pre_write_offset(encoded)]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@pre-p 0x{dst64:08X} S:{main_step} V:{pre_value} (0x{pre_value:016X})"); + + MemBusHelpers::mem_aligned_read(dst64, main_step, pre_value, mem_processors); + + if DmaInfo::is_double_read_pre(encoded) { + let second_read_value = data_ext[pre_data_offset + 1]; + #[cfg(feature = "debug_dma")] + println!( + "DMA: mem_aligned_load@pre2 0x{:08X} S:{main_step} V:{second_read_value} (0x{second_read_value:016X})", + src64 + 8 + ); + MemBusHelpers::mem_aligned_read( + src64 + 8, + main_step, + second_read_value, + mem_processors, + ); + Some(DmaHelpers::calculate_write_value( + dst_offset, + src_offset, + pre_count, + pre_value, + &[read_value, second_read_value], + )) + } else { + Some(DmaHelpers::calculate_write_value( + dst_offset, + src_offset, + pre_count, + pre_value, + &[read_value], + )) + } + } else { + None + }; + + let post_count = DmaInfo::get_post_count(encoded) as u64; + let loop_count = DmaInfo::get_loop_count(encoded); + + let post_write_value = if post_count > 0 { + let src_offset = src & 0x07; + + let post_data_offset = DmaInfo::get_post_data_offset(encoded); + let src64 = (src as u32 + pre_count as u32 + loop_count as u32 * 8) & !0x07; + let dst64 = dst as u32 + pre_count as u32 + loop_count as u32 * 8; + let read_value = data_ext[post_data_offset]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@post 0x{src64:08X} S:{main_step} V:{read_value} (0x{read_value:016X})"); + MemBusHelpers::mem_aligned_read(src64, main_step, read_value, mem_processors); + + // pre-load of write address before unaligned write + let pre_value = data_ext[DmaInfo::get_post_write_offset(encoded)]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load@post-p 0x{dst64:08X} S:{main_step} V:{pre_value} (0x{pre_value:016X})"); + + MemBusHelpers::mem_aligned_read(dst64, main_step, pre_value, mem_processors); + + if DmaInfo::is_double_read_post(encoded) { + let second_read_value = data_ext[post_data_offset + 1]; + #[cfg(feature = "debug_dma")] + println!( + "DMA: mem_aligned_load@post2 0x{:08X} S:{main_step} V:{second_read_value} (0x{second_read_value:016X})", + src64 + 8 + ); + MemBusHelpers::mem_aligned_read( + src64 + 8, + main_step, + second_read_value, + mem_processors, + ); + Some(DmaHelpers::calculate_write_value( + 0, // in post offset it's 0 + (src_offset + pre_count) & 0x07, // src_offset it's modified by pre, aligned/unaligned no change offset + post_count, + pre_value, + &[read_value, second_read_value], + )) + } else { + Some(DmaHelpers::calculate_write_value( + 0, // in post offset it's 0 + (src_offset + pre_count) & 0x07, // src_offset it's modified by pre, aligned/unaligned no change offset + post_count, + pre_value, + &[read_value], + )) + } + } else { + None + }; + + // NOTE: loop it's done at end to do loop write after loop reads in same scope to avoid + // recalculate values or extract them from scope. + + if loop_count > 0 { + let loop_src = src as u32 + pre_count as u32; + let dst64 = (dst as u32 + pre_count as u32) & !0x07; + let src64 = loop_src & !0x07; + let loop_data_offset = DmaInfo::get_loop_data_offset(encoded); + let loop_data_count = DmaInfo::get_loop_count(encoded); + let loop_src_data_end = + loop_data_offset + loop_data_count + ((loop_src & 0x07) > 0) as usize; + let values = &data_ext[loop_data_offset..loop_src_data_end]; + + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_load_from_slice 0x{src64:08X} S:{main_step} V:{values:?}"); + MemBusHelpers::mem_aligned_read_from_slice(src64, main_step, values, mem_processors); + + let src_offset = (src_offset + pre_count) & 0x07; + if aligned { + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_write_from_slice 0x{dst64:08X} S:{main_step} V:{values:?}"); + + MemBusHelpers::mem_aligned_write_from_slice(dst64, main_step, values, mem_processors); + } else { + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_write_from_read_unaligned_slice 0x{dst64:08X} S:{main_step} V:{values:?}"); + MemBusHelpers::mem_aligned_write_from_read_unaligned_slice( + dst64, + main_step, + src_offset as u8, + values, + mem_processors, + ); + } + } + + if let Some(write_value) = pre_write_value { + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_write@pre 0x{dst64:08X} S:{main_step} V:{write_value} (0x{write_value:016X})"); + + MemBusHelpers::mem_aligned_write(dst64, main_step, write_value, mem_processors); + } + if let Some(write_value) = post_write_value { + #[cfg(feature = "debug_dma")] + println!("DMA: mem_aligned_write@post 0x{dst64:08X} S:{main_step} V:{write_value} (0x{write_value:016X})"); + let dst64 = dst as u32 + pre_count as u32 + loop_count as u32 * 8; + MemBusHelpers::mem_aligned_write(dst64, main_step, write_value, mem_processors); + } +} + +pub fn skip_dma_memcpy_mem_inputs(data: &[u64], mem_processors: &mut P) -> bool { + let dst = data[A]; + let src = data[B]; + + let count = DmaInfo::get_count(data[DMA_ENCODED]) as u64; + + // calculate range for dst and src to verify if any of them are included + // in the memcollector addresses. + + let dst64_from = dst as u32 & !0x07; + let src64_from = src as u32 & !0x07; + let dst64_to = (dst + count + 7) as u32 & !0x07; + let src64_to = (src + count + 7) as u32 & !0x07; + + mem_processors.skip_addr(EXTRA_PARAMS_ADDR as u32) + && mem_processors.skip_addr_range(dst64_from, dst64_to) + && mem_processors.skip_addr_range(src64_from, src64_to) +} diff --git a/precompiles/dma/src/dma_gen_memset_mem_inputs.rs b/precompiles/dma/src/dma_gen_memset_mem_inputs.rs new file mode 100644 index 000000000..25195f7bc --- /dev/null +++ b/precompiles/dma/src/dma_gen_memset_mem_inputs.rs @@ -0,0 +1,103 @@ +use precompiles_common::MemBusHelpers; +use precompiles_common::MemProcessor; +use precompiles_helpers::DmaInfo; +use zisk_common::{A, DMA_ENCODED, STEP}; + +pub fn generate_dma_memset_mem_inputs( + data: &[u64], + data_ext: &[u64], + mem_processors: &mut P, +) { + let dst = data[A]; + let encoded = data[DMA_ENCODED]; + let dst64 = (dst & !0x07) as u32; + let dst_offset = dst & 0x07; + let step = data[STEP]; + let pre_count = DmaInfo::get_pre_count(encoded) as u64; + let post_count = DmaInfo::get_post_count(encoded) as u64; + + debug_assert_eq!( + (pre_count > 0) as usize + (post_count > 0) as usize, + data_ext.len(), + "[dma_memset] data length mismatch DATA:[{}] INFO={}", + data.iter().map(|v| format!("0x{v:016X}")).collect::>().join(", "), + DmaInfo::to_string(encoded) + ); + + // the memset operation was simple, no unaligned loop, on aligned loop no need to read previous value + // really only need read previous value if has pre o post + + if pre_count > 0 { + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memset] INPUT PRE DST:0x{dst64:08X} S:{step}"); + MemBusHelpers::mem_aligned_read(dst64, step, data_ext[0], mem_processors); + } + + let loop_count = DmaInfo::get_loop_count(encoded); + + if post_count > 0 { + let dst64_post = ((dst + pre_count) as usize + loop_count * 8) as u32; + + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memset] INPUT POST DST:0x{dst64_post:08X} S:{step}"); + MemBusHelpers::mem_aligned_read( + dst64_post, + step, + data_ext[(pre_count > 0) as usize], + mem_processors, + ); + } + + let fill_byte = DmaInfo::get_fill_byte(encoded) as u64; + let fill_word = fill_byte + | fill_byte << 8 + | fill_byte << 16 + | fill_byte << 24 + | fill_byte << 32 + | fill_byte << 40 + | fill_byte << 48 + | fill_byte << 56; + + if pre_count > 0 { + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memset] INPUT PRE WRITE DST:0x{dst64:08X} S:{step}"); + let mask = (0xFFFF_FFFF_FFFF_FFFFu64 >> (64 - pre_count * 8)) << (dst_offset * 8); + let write_value = (fill_word & mask) | (data_ext[0] & !mask); + MemBusHelpers::mem_aligned_write(dst64, step, write_value, mem_processors); + } + + if loop_count > 0 { + let dst64_loop = dst as u32 + pre_count as u32; + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memset] INPUT LOOP DST:0x{dst64_loop:08X} C:{loop_count} S:{step}"); + MemBusHelpers::mem_aligned_write_pattern( + dst64_loop, + step, + fill_word, + loop_count, + mem_processors, + ); + } + if post_count > 0 { + let dst64_post = ((dst + pre_count) as usize + loop_count * 8) as u32; + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memset] INPUT POST WRITE DST:0x{dst64_post:08X} S:{step}"); + let mask = 0xFFFF_FFFF_FFFF_FFFFu64 >> (64 - post_count * 8); + let write_value = (fill_word & mask) | (data_ext[(pre_count > 0) as usize] & !mask); + MemBusHelpers::mem_aligned_write(dst64_post, step, write_value, mem_processors); + } +} + +pub fn skip_dma_memset_mem_inputs(data: &[u64], mem_processors: &mut P) -> bool { + let dst = data[A]; + let encoded = data[DMA_ENCODED]; + let dst64 = (dst & !0x07) as u32; + let dst64_to = (dst + DmaInfo::get_count(encoded) as u64 - 1) as u32 & !0x07; + + #[cfg(feature = "debug_dma_gen_mem_inputs")] + let step = data[STEP]; + #[cfg(feature = "debug_dma_gen_mem_inputs")] + println!("[dma_memset] SKIP DST:[0x{dst64:08X}..=0x{dst64_to:08X}] S:{step}"); + + mem_processors.skip_addr_range(dst64, dst64_to) +} diff --git a/precompiles/dma/src/dma_instance_info.rs b/precompiles/dma/src/dma_instance_info.rs new file mode 100644 index 000000000..bd669ef1f --- /dev/null +++ b/precompiles/dma/src/dma_instance_info.rs @@ -0,0 +1,11 @@ +use std::collections::HashMap; + +use zisk_common::ChunkId; + +use crate::DmaCollectCounters; + +#[derive(Debug)] +pub struct DmaInstanceInfo { + pub chunks: HashMap, + pub last_chunk: Option, +} diff --git a/precompiles/dma/src/dma_instances_builder.rs b/precompiles/dma/src/dma_instances_builder.rs new file mode 100644 index 000000000..90358d526 --- /dev/null +++ b/precompiles/dma/src/dma_instances_builder.rs @@ -0,0 +1,244 @@ +use core::panic; +use std::collections::HashMap; + +use zisk_common::{CheckPoint, ChunkId, CollectCounter}; + +use crate::{ + DmaCheckPoint, DmaCollectCounters, DmaInstanceInfo, DMA_COUNTER_INPUTCPY, DMA_COUNTER_MEMCMP, + DMA_COUNTER_MEMCPY, DMA_COUNTER_MEMSET, +}; +#[derive(Debug)] +pub struct DmaInstancesBuilder { + pub tag: String, + pub current_chunk: Option, + pub max_instances: usize, + pub rows: usize, + pub rows_available: usize, + pub instances: Vec, + pub count_memcpy_rows: usize, + pub count_memset_rows: usize, + pub count_memcmp_rows: usize, + pub count_inputcpy_rows: usize, + pub skip_memcpy_rows: usize, + pub skip_memset_rows: usize, + pub skip_memcmp_rows: usize, + pub skip_inputcpy_rows: usize, + pub inputs_counter: usize, +} + +impl DmaInstancesBuilder { + pub fn new(tag: &str, max_instances: usize, rows: usize) -> Self { + Self { + tag: tag.to_string(), + current_chunk: None, + max_instances, + rows, + rows_available: 0, + instances: Vec::new(), + skip_memcpy_rows: 0, + skip_memset_rows: 0, + skip_memcmp_rows: 0, + skip_inputcpy_rows: 0, + count_memcpy_rows: 0, + count_memset_rows: 0, + count_memcmp_rows: 0, + count_inputcpy_rows: 0, + inputs_counter: 0, + } + } + pub fn count_to_skip(&mut self) { + self.skip_memcpy_rows += self.count_memcpy_rows; + self.skip_memset_rows += self.count_memset_rows; + self.skip_memcmp_rows += self.count_memcmp_rows; + self.skip_inputcpy_rows += self.count_inputcpy_rows; + self.count_memcpy_rows = 0; + self.count_memset_rows = 0; + self.count_memcmp_rows = 0; + self.count_inputcpy_rows = 0; + } + pub fn reset_count_and_skip(&mut self) { + self.skip_memcpy_rows = 0; + self.skip_memset_rows = 0; + self.skip_memcmp_rows = 0; + self.skip_inputcpy_rows = 0; + self.count_memcpy_rows = 0; + self.count_memset_rows = 0; + self.count_memcmp_rows = 0; + self.count_inputcpy_rows = 0; + } + + pub fn open_new_instance(&mut self) { + if self.rows_available > 0 { + panic!( + "[{}] Cannot open new instance, rows still available: {}", + self.tag, self.rows_available + ); + } + if self.instances.len() >= self.max_instances { + println!("{:?}", self); + panic!( + "[{}] Too many instances {} max: {}, cannot create more", + self.tag, + self.instances.len(), + self.max_instances + ); + } + self.instances.push(DmaInstanceInfo { chunks: HashMap::new(), last_chunk: None }); + self.rows_available = self.rows; + } + pub fn flush_current_chunk(&mut self) { + if let Some(chunk_id) = self.current_chunk { + if self.count_memcpy_rows == 0 + && self.count_inputcpy_rows == 0 + && self.count_memset_rows == 0 + && self.count_memcmp_rows == 0 + { + return; + } + if self.instances.is_empty() { + self.open_new_instance(); + } + let collect_counters = DmaCollectCounters { + memcpy: CollectCounter::new( + self.skip_memcpy_rows as u32, + self.count_memcpy_rows as u32, + ), + inputcpy: CollectCounter::new( + self.skip_inputcpy_rows as u32, + self.count_inputcpy_rows as u32, + ), + memset: CollectCounter::new( + self.skip_memset_rows as u32, + self.count_memset_rows as u32, + ), + memcmp: CollectCounter::new( + self.skip_memcmp_rows as u32, + self.count_memcmp_rows as u32, + ), + }; + self.instances + .last_mut() + .unwrap() + .chunks + .insert(chunk_id, (self.inputs_counter as u64, collect_counters)); + self.instances.last_mut().unwrap().last_chunk = Some(chunk_id); + } + } + #[inline(always)] + pub fn add_memcpy_rows(&mut self, chunk_id: ChunkId, skip: usize, rows: usize, inputs: usize) { + self.add_op_rows(chunk_id, skip, rows, inputs, DMA_COUNTER_MEMCPY); + } + #[inline(always)] + pub fn add_memset_rows(&mut self, chunk_id: ChunkId, skip: usize, rows: usize, inputs: usize) { + self.add_op_rows(chunk_id, skip, rows, inputs, DMA_COUNTER_MEMSET); + } + #[inline(always)] + pub fn add_memcmp_rows(&mut self, chunk_id: ChunkId, skip: usize, rows: usize, inputs: usize) { + self.add_op_rows(chunk_id, skip, rows, inputs, DMA_COUNTER_MEMCMP); + } + #[inline(always)] + pub fn add_inputcpy_rows( + &mut self, + chunk_id: ChunkId, + skip: usize, + rows: usize, + inputs: usize, + ) { + self.add_op_rows(chunk_id, skip, rows, inputs, DMA_COUNTER_INPUTCPY); + } + + pub fn add_op_rows( + &mut self, + chunk_id: ChunkId, + skip: usize, + rows: usize, + inputs: usize, + op: usize, + ) { + if Some(chunk_id) != self.current_chunk { + self.flush_current_chunk(); + self.reset_count_and_skip(); + self.current_chunk = Some(chunk_id); + } + let mut rows = rows; + while rows > 0 { + if self.rows_available == 0 { + self.flush_current_chunk(); + self.count_to_skip(); + self.open_new_instance(); + } + let rows_applicable = std::cmp::min(self.rows_available, rows); + rows -= rows_applicable; + self.rows_available -= rows_applicable; + match op { + DMA_COUNTER_MEMCPY => { + if skip > 0 { + assert!( + self.count_memcpy_rows == 0, + "Cannot have both skip and count for memcpy in the same chunk", + ); + self.skip_memcpy_rows += skip; + } + self.count_memcpy_rows += rows_applicable; + } + DMA_COUNTER_MEMSET => { + if skip > 0 { + assert!( + self.count_memset_rows == 0, + "Cannot have both skip and count for memset in the same chunk", + ); + self.skip_memset_rows += skip; + } + self.count_memset_rows += rows_applicable; + } + DMA_COUNTER_MEMCMP => { + if skip > 0 { + assert!( + self.count_memcmp_rows == 0, + "Cannot have both skip and count for memcmp in the same chunk", + ); + self.skip_memcmp_rows += skip; + } + self.count_memcmp_rows += rows_applicable; + } + DMA_COUNTER_INPUTCPY => { + if skip > 0 { + assert!( + self.count_inputcpy_rows == 0, + "Cannot have both skip and count for inputcpy in the same chunk", + ); + self.skip_inputcpy_rows += skip; + } + self.count_inputcpy_rows += rows_applicable; + } + _ => { + panic!("Unsupported operation for DMA instance builder 0x{op:02X}") + } + } + self.inputs_counter += inputs; + } + } + pub fn flush(&mut self) { + self.flush_current_chunk(); + self.reset_count_and_skip(); + self.current_chunk = None; + } + pub fn get_plan(&mut self) -> Vec<(CheckPoint, DmaCheckPoint)> { + self.flush(); + let mut checkpoints = Vec::new(); + let last_segment_id = self.instances.len().saturating_sub(1); + for (segment_id, dma_info) in self.instances.iter_mut().enumerate() { + let keys = dma_info.chunks.keys().cloned().collect::>(); + + checkpoints.push(( + CheckPoint::Multiple(keys), + DmaCheckPoint { + chunks: std::mem::take(&mut dma_info.chunks), + last_chunk: dma_info.last_chunk, + is_last_segment: segment_id == last_segment_id, + }, + )); + } + checkpoints + } +} diff --git a/precompiles/dma/src/dma_manager.rs b/precompiles/dma/src/dma_manager.rs new file mode 100644 index 000000000..905779e14 --- /dev/null +++ b/precompiles/dma/src/dma_manager.rs @@ -0,0 +1,175 @@ +use std::sync::Arc; + +use fields::PrimeField64; +use pil_std_lib::Std; +use proofman_common::ProofCtx; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, Plan, Planner}; +use zisk_pil::{ + Dma64AlignedInputCpyTrace, Dma64AlignedMemCpyTrace, Dma64AlignedMemSetTrace, + Dma64AlignedMemTrace, Dma64AlignedTrace, DmaInputCpyTrace, DmaMemCpyTrace, + DmaPrePostInputCpyTrace, DmaPrePostMemCpyTrace, DmaPrePostTrace, DmaTrace, DmaUnalignedTrace, + ZiskProofValues, +}; + +use crate::{ + Dma64AlignedInputCpySM, Dma64AlignedInstance, Dma64AlignedMemCpySM, Dma64AlignedMemSM, + Dma64AlignedMemSetSM, Dma64AlignedSM, DmaCounterInputGen, DmaInputCpySM, DmaInstance, + DmaMemCpySM, DmaPlanner, DmaPrePostInputCpySM, DmaPrePostInstance, DmaPrePostMemCpySM, + DmaPrePostSM, DmaSM, DmaUnalignedInstance, DmaUnalignedSM, +}; + +/// The `DmaManager` struct represents the Dma manager, +/// which is responsible for managing the Dma state machine and its table state machine. +#[allow(dead_code)] +pub struct DmaManager { + /// Dma state machine + dma_sm: Arc>, + dma_memcpy_sm: Arc>, + dma_inputcpy_sm: Arc>, + dma_pre_post_sm: Arc>, + dma_pre_post_memcpy_sm: Arc>, + dma_pre_post_inputcpy_sm: Arc>, + dma_64_aligned_sm: Arc>, + dma_64_aligned_mem_sm: Arc>, + dma_64_aligned_memcpy_sm: Arc>, + dma_64_aligned_memset_sm: Arc>, + dma_64_aligned_inputcpy_sm: Arc>, + dma_unaligned_sm: Arc>, +} + +impl DmaManager { + /// Creates a new instance of `DmaManager`. + /// + /// # Returns + /// An `Arc`-wrapped instance of `DmaManager`. + pub fn new(std: Arc>) -> Arc { + let dma_sm = DmaSM::new(std.clone()); + let dma_memcpy_sm = DmaMemCpySM::new(std.clone()); + let dma_inputcpy_sm = DmaInputCpySM::new(std.clone()); + let dma_pre_post_sm = DmaPrePostSM::new(std.clone()); + let dma_pre_post_inputcpy_sm = DmaPrePostInputCpySM::new(std.clone()); + let dma_pre_post_memcpy_sm = DmaPrePostMemCpySM::new(std.clone()); + let dma_64_aligned_sm = Dma64AlignedSM::new(std.clone()); + let dma_64_aligned_mem_sm = Dma64AlignedMemSM::new(std.clone()); + let dma_64_aligned_memcpy_sm = Dma64AlignedMemCpySM::new(std.clone()); + let dma_64_aligned_memset_sm = Dma64AlignedMemSetSM::new(std.clone()); + let dma_64_aligned_inputcpy_sm = Dma64AlignedInputCpySM::new(std.clone()); + let dma_unaligned_sm = DmaUnalignedSM::new(std); + + Arc::new(Self { + dma_sm, + dma_memcpy_sm, + dma_inputcpy_sm, + dma_pre_post_sm, + dma_pre_post_inputcpy_sm, + dma_pre_post_memcpy_sm, + dma_64_aligned_sm, + dma_64_aligned_mem_sm, + dma_64_aligned_memcpy_sm, + dma_64_aligned_memset_sm, + dma_64_aligned_inputcpy_sm, + dma_unaligned_sm, + }) + } + + pub fn build_dma_counter(&self, asm_execution: bool) -> DmaCounterInputGen { + match asm_execution { + true => DmaCounterInputGen::new(BusDeviceMode::CounterAsm), + false => DmaCounterInputGen::new(BusDeviceMode::Counter), + } + } + + pub fn build_dma_input_generator(&self) -> DmaCounterInputGen { + DmaCounterInputGen::new(BusDeviceMode::InputGenerator) + } +} + +impl ComponentBuilder for DmaManager { + /// Builds a planner to plan Dma-related instances. + /// + /// # Returns + /// A boxed implementation of `RegularPlanner`. + fn build_planner(&self) -> Box { + // Get the number of Dmas that a single Dma instance can handle + Box::new(DmaPlanner::::new()) + } + + /// Builds an inputs data collector for Dma operations. + /// + /// # Arguments + /// * `ictx` - The context of the instance, containing the plan and its associated + /// configurations. + /// + /// # Returns + /// A boxed implementation of `BusDeviceInstance` specific to the requested `air_id` instance. + /// + /// # Panics + /// Panics if the provided `air_id` is not supported. + fn build_instance(&self, ictx: InstanceCtx) -> Box> { + match ictx.plan.air_id { + // DMA controller instances + DmaTrace::::AIR_ID => Box::new(DmaInstance::new(self.dma_sm.clone(), ictx)), + DmaMemCpyTrace::::AIR_ID => { + Box::new(DmaInstance::new(self.dma_memcpy_sm.clone(), ictx)) + } + DmaInputCpyTrace::::AIR_ID => { + Box::new(DmaInstance::new(self.dma_inputcpy_sm.clone(), ictx)) + } + // DMA pre post instances + DmaPrePostTrace::::AIR_ID => { + Box::new(DmaPrePostInstance::new(self.dma_pre_post_sm.clone(), ictx)) + } + DmaPrePostMemCpyTrace::::AIR_ID => { + Box::new(DmaPrePostInstance::new(self.dma_pre_post_memcpy_sm.clone(), ictx)) + } + DmaPrePostInputCpyTrace::::AIR_ID => { + Box::new(DmaPrePostInstance::new(self.dma_pre_post_inputcpy_sm.clone(), ictx)) + } + // DMA 64 aligned instances + Dma64AlignedTrace::::AIR_ID => { + Box::new(Dma64AlignedInstance::new(self.dma_64_aligned_sm.clone(), ictx)) + } + Dma64AlignedMemCpyTrace::::AIR_ID => { + Box::new(Dma64AlignedInstance::new(self.dma_64_aligned_memcpy_sm.clone(), ictx)) + } + Dma64AlignedInputCpyTrace::::AIR_ID => { + Box::new(Dma64AlignedInstance::new(self.dma_64_aligned_inputcpy_sm.clone(), ictx)) + } + Dma64AlignedMemSetTrace::::AIR_ID => { + Box::new(Dma64AlignedInstance::new(self.dma_64_aligned_memset_sm.clone(), ictx)) + } + Dma64AlignedMemTrace::::AIR_ID => { + Box::new(Dma64AlignedInstance::new(self.dma_64_aligned_mem_sm.clone(), ictx)) + } + // DMA unaligned instances + DmaUnalignedTrace::::AIR_ID => { + Box::new(DmaUnalignedInstance::new(self.dma_unaligned_sm.clone(), ictx)) + } + _ => { + panic!("DmaBuilder::get_instance() Unsupported air_id: {:?}", ictx.plan.air_id) + } + } + } + + fn configure_instances(&self, pctx: &ProofCtx, plannings: &[Plan]) { + let enable_dma_64_aligned = + plannings.iter().any(|p| p.air_id == Dma64AlignedTrace::::AIR_ID); + let enable_dma_64_aligned_memcpy = + plannings.iter().any(|p| p.air_id == Dma64AlignedMemCpyTrace::::AIR_ID); + let enable_dma_64_aligned_memset = + plannings.iter().any(|p| p.air_id == Dma64AlignedMemSetTrace::::AIR_ID); + let enable_dma_64_aligned_inputcpy = + plannings.iter().any(|p| p.air_id == Dma64AlignedInputCpyTrace::::AIR_ID); + let enable_dma_64_aligned_mem = + plannings.iter().any(|p| p.air_id == Dma64AlignedMemTrace::::AIR_ID); + let enable_dma_unaligned = + plannings.iter().any(|p| p.air_id == DmaUnalignedTrace::::AIR_ID); + let mut proof_values = ZiskProofValues::from_vec_guard(pctx.get_proof_values()); + proof_values.enable_dma_64_aligned = F::from_bool(enable_dma_64_aligned); + proof_values.enable_dma_unaligned = F::from_bool(enable_dma_unaligned); + proof_values.enable_dma_64_aligned_memcpy = F::from_bool(enable_dma_64_aligned_memcpy); + proof_values.enable_dma_64_aligned_memset = F::from_bool(enable_dma_64_aligned_memset); + proof_values.enable_dma_64_aligned_inputcpy = F::from_bool(enable_dma_64_aligned_inputcpy); + proof_values.enable_dma_64_aligned_mem = F::from_bool(enable_dma_64_aligned_mem); + } +} diff --git a/precompiles/dma/src/dma_planner.rs b/precompiles/dma/src/dma_planner.rs new file mode 100644 index 000000000..0fa94e1a9 --- /dev/null +++ b/precompiles/dma/src/dma_planner.rs @@ -0,0 +1,63 @@ +//! The `DmaPlanner` module defines a planner for generating execution plans specific to +//! arithmetic operations. +//! +//! It organizes execution plans for both regular instances and table instances, +//! leveraging arithmetic operation counts and metadata to construct detailed plans. + +use crate::DmaStrategy; + +use fields::PrimeField64; +use zisk_common::{BusDeviceMetrics, ChunkId, InstanceType, Plan, Planner, SegmentId}; +use zisk_pil::ZISK_AIRGROUP_ID; + +/// The `DmaPlanner` struct organizes execution plans for arithmetic instances and tables. +/// +/// It allows adding metadata about instances and tables and generates plans +/// based on the provided counters. +#[derive(Default)] +pub struct DmaPlanner { + _marker: std::marker::PhantomData, +} + +impl DmaPlanner { + /// Creates a new `DmaPlanner`. + /// + /// # Returns + /// A new `DmaPlanner` instance with no preconfigured instances or tables. + pub fn new() -> Self { + Self::default() + } +} + +impl Planner for DmaPlanner { + /// Generates execution plans for Dma instances. + /// + /// # Arguments + /// * `counters` - A vector of counters, each associated with a `ChunkId` and `DmaCounter` + /// metrics data. + /// + /// # Returns + /// A vector of `Plan` instances representing execution configurations for the instances + /// + /// # Panics + /// Panics if any counter cannot be downcasted to an `DmaCounter`. + fn plan(&self, counters: Vec<(ChunkId, Box)>) -> Vec { + // Calculate total counters by summing all DmaCounterInputGen instances + let mut dma_strategy = DmaStrategy::::default(); + let _plans = dma_strategy.calculate(counters); + let mut plans: Vec = Vec::new(); + for (air_id, segments) in _plans.into_iter() { + for (segment_id, (check_point, collect_info)) in segments.into_iter().enumerate() { + plans.push(Plan::new( + ZISK_AIRGROUP_ID, + air_id, + Some(SegmentId(segment_id)), + InstanceType::Instance, + check_point.clone(), + Some(Box::new(collect_info)), + )); + } + } + plans + } +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post.rs new file mode 100644 index 000000000..9765897b5 --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post.rs @@ -0,0 +1,467 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use rayon::{ + iter::{IndexedParallelIterator, ParallelIterator}, + slice::{ParallelSlice, ParallelSliceMut}, +}; +use zisk_core::zisk_ops::ZiskOp; +use zisk_pil::{ + DMA_BYTE_CMP_TABLE_ID, DMA_PRE_POST_TABLE_ID, DMA_PRE_POST_TABLE_SIZE, DUAL_RANGE_BYTE_ID, +}; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + DmaPrePostTracePacked as DmaPrePostTrace, DmaPrePostTraceRowPacked as DmaPrePostTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{DmaPrePostTrace, DmaPrePostTraceRow}; + +use crate::{dma_trace, DmaPrePostInput, DmaPrePostModule, DmaPrePostRom}; +use precompiles_helpers::DmaInfo; + +// Type aliases to simplify complex types +type MultTable = Vec>; +type PrePostAndByteCmpTables = (MultTable, MultTable); +type GlobalMultiplicities = (PrePostAndByteCmpTables, MultTable); + +/// The `DmaPrePostSM` struct encapsulates the logic of the DmaPrePost State Machine. +pub struct DmaPrePostSM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + pre_post_table_id: usize, + + /// Table to verify byte comparison + byte_cmp_table_id: usize, + + /// Dual Byte Range checks + dual_range_byte_id: usize, +} + +impl DmaPrePostSM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `DmaPrePostSM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + dual_range_byte_id: std + .get_virtual_table_id(DUAL_RANGE_BYTE_ID) + .expect("Failed to get table DUAL_RANGE_BYTE indentifer"), + byte_cmp_table_id: std + .get_virtual_table_id(DMA_BYTE_CMP_TABLE_ID) + .expect("Failed to get table DMA_BYTE_CMP_TABLE indentifier"), + pre_post_table_id: std + .get_virtual_table_id(DMA_PRE_POST_TABLE_ID) + .expect("Failed to get table DMA_PRE_POST_TABLE identifier"), + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_slice( + &self, + input: &DmaPrePostInput, + trace: &mut DmaPrePostTraceRow, + pre_post_table_mul: &mut [u64], + byte_cmp_table_mul: &mut [u64], + local_dual_range_byte_mul: &mut [u64], + ) { + let is_memcmp = input.op == ZiskOp::DMA_MEMCMP || input.op == ZiskOp::DMA_XMEMCMP; + let is_memcpy = input.op == ZiskOp::DMA_MEMCPY || input.op == ZiskOp::DMA_XMEMCPY; + let is_memset = input.op == ZiskOp::DMA_XMEMSET; + let is_inputcpy = input.op == ZiskOp::DMA_INPUTCPY; + let load_src = is_memcpy || is_memcmp; + + let dst_offset = input.dst & 0x07; + let src_offset = if load_src { input.src & 0x07 } else { 0 }; + let is_pre = dst_offset > 0; + let step = input.step; + + let dst64 = input.dst >> 3; + let src64 = input.src >> 3; + + trace.set_main_step(input.step); + trace.set_dst64(dst64); + trace.set_src64(src64); + trace.set_dst_offset(dst_offset as u8); + trace.set_src_offset(src_offset as u8); + trace.set_is_post(!is_pre); + + let count = if is_pre { + DmaInfo::get_pre_count(input.encoded) + } else { + DmaInfo::get_post_count(input.encoded) + }; + + trace.set_count(count as u8); + + trace.set_sel_memcpy(is_memcpy); + trace.set_sel_memset(is_memset); + trace.set_sel_inputcpy(is_inputcpy); + trace.set_sel_memcmp(is_memcmp); + + let fill_byte = DmaInfo::get_fill_byte(input.encoded); + if is_memset { + trace.set_fill_byte(fill_byte); + } + let second_read = (src_offset as usize + count) > 8; + //println!("SECOND_READ: {second_read}"); + trace.set_enabled_second_read(second_read); + + let mut value = input.src_values[0]; + let mut rb = [0u8; 16]; + let mut pb = [0u8; 8]; + + if is_memset { + for rb in rb.iter_mut() { + *rb = fill_byte; + } + } else { + rb[0] = value as u8; + rb[1] = (value >> 8) as u8; + rb[2] = (value >> 16) as u8; + rb[3] = (value >> 24) as u8; + rb[4] = (value >> 32) as u8; + rb[5] = (value >> 40) as u8; + rb[6] = (value >> 48) as u8; + rb[7] = (value >> 56) as u8; + + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + + if second_read { + value = input.src_values[1]; + rb[8] = value as u8; + rb[9] = (value >> 8) as u8; + rb[10] = (value >> 16) as u8; + rb[11] = (value >> 24) as u8; + rb[12] = (value >> 32) as u8; + rb[13] = (value >> 40) as u8; + rb[14] = (value >> 48) as u8; + rb[15] = (value >> 56) as u8; + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + } else { + local_dual_range_byte_mul[0] += 4; + } + } + + value = input.dst_pre_value; + pb[0] = value as u8; + pb[1] = (value >> 8) as u8; + pb[2] = (value >> 16) as u8; + pb[3] = (value >> 24) as u8; + pb[4] = (value >> 32) as u8; + pb[5] = (value >> 40) as u8; + pb[6] = (value >> 48) as u8; + pb[7] = (value >> 56) as u8; + + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + + let selr_value = if dst_offset > src_offset { + trace.set_dst_offset_gt_src_offset(true); + dst_offset - src_offset + } else { + trace.set_dst_offset_gt_src_offset(false); + src_offset - dst_offset + }; + + // NOTE: special case of count = 8 for memcmp, the mask must be all 0s, for this reason + // apply mask to count before left shift. + let mask = if count == 8 { + assert!(dst_offset == 0); + 0xFFFF_FFFF_FFFF_FFFFu64 + } else { + let _mask = 0xFFFF_FFFF_FFFF_FFFFu64 << (dst_offset * 8); + _mask ^ (_mask << (count * 8)) + }; + + trace.set_sb(0, (mask & 0x0000_0000_0000_00FF) != 0); + trace.set_sb(1, (mask & 0x0000_0000_0000_FF00) != 0); + trace.set_sb(2, (mask & 0x0000_0000_00FF_0000) != 0); + trace.set_sb(3, (mask & 0x0000_0000_FF00_0000) != 0); + trace.set_sb(4, (mask & 0x0000_00FF_0000_0000) != 0); + trace.set_sb(5, (mask & 0x0000_FF00_0000_0000) != 0); + trace.set_sb(6, (mask & 0x00FF_0000_0000_0000) != 0); + trace.set_sb(7, (mask & 0xFF00_0000_0000_0000) != 0); + + for (index, byte) in rb.iter().enumerate() { + // println!("PRE-POST bytes[{index}]: 0x{byte:02X}"); + trace.set_rb(index, *byte); + } + for (index, byte) in pb.iter().enumerate() { + // println!("PRE-POST bytes[{index}]: 0x{byte:02X}"); + trace.set_pb(index, *byte); + } + + trace.set_selr(0, selr_value == 0); + trace.set_selr(1, selr_value == 1); + trace.set_selr(2, selr_value == 2); + trace.set_selr(3, selr_value == 3); + trace.set_selr(4, selr_value == 4); + trace.set_selr(5, selr_value == 5); + trace.set_selr(6, selr_value == 6); + + let table_row = if is_memcmp { + let post_count = DmaInfo::get_post_count(input.encoded); + let result = if !is_pre || post_count == 0 { + DmaInfo::get_memcmp_res_as_u64(input.encoded) + } else { + 0 + }; + let is_negative = result != 0 && DmaInfo::is_memcmp_negative(input.encoded); + let is_nz = result != 0; + trace.set_memcmp_result_is_negative(is_negative); + trace.set_memcmp_result_nz(is_nz); + let abs_diff_dst_src = if is_negative { (!result).wrapping_add(1) } else { result }; + assert!(abs_diff_dst_src <= 0xFF); + let abs_diff_dst_src = abs_diff_dst_src as u8; + trace.set_abs_diff_dst_src(abs_diff_dst_src); + trace.set_bus_write_value(0, input.dst_pre_value as u32); + trace.set_bus_write_value(1, (input.dst_pre_value >> 32) as u32); + + // the index of different byte determines the factor + let dst_index = dst_offset as usize + count - 1; + if is_negative { + // implies that count > 0 + if dst_index < 4 { + trace.set_diff_factor(0, F::ORDER_U64 - (1 << (8 * dst_index))); + trace.set_diff_factor(1, 0); + } else { + trace.set_diff_factor(0, 0); + trace.set_diff_factor(1, F::ORDER_U64 - (1 << (8 * (dst_index - 4)))); + } + } else if is_nz { + if dst_index < 4 { + trace.set_diff_factor(0, 1 << (8 * dst_index)); + trace.set_diff_factor(1, 0); + } else { + trace.set_diff_factor(0, 0); + trace.set_diff_factor(1, 1 << (8 * (dst_index - 4))); + } + } + + // calculate the contribution to byte_cmp_table multiplicity + if is_nz { + let last_dst_byte = pb[dst_index]; + let row_byte_cmp_table = if is_negative { + assert!( + abs_diff_dst_src <= (255 - last_dst_byte) && abs_diff_dst_src > 0, + "abs_diff_dst_src: {abs_diff_dst_src} last_dst_byte: 0x{last_dst_byte:02X} result: 0x{result:016X} S:{step} \ + index:{dst_index} DST64:0x{:08X} SRC64:0x{:08X} DST_O:{dst_offset} SRC_O:{src_offset} VALUE:0x{value:016X} \ + is_pre:{is_pre} dst_offset:{dst_offset} count:{count}", dst64 * 8, src64 * 8, + ); + last_dst_byte as usize * 255 + (abs_diff_dst_src + last_dst_byte) as usize - 1 + } else { + assert!( + abs_diff_dst_src <= last_dst_byte && abs_diff_dst_src > 0, + "abs_diff_dst_src: {abs_diff_dst_src} last_dst_byte: 0x{last_dst_byte:02X} result: 0x{result:016X} S:{step} index:{dst_index} DST:0x{dst64:08X} SRC:0x{src64:08X} \ + index:{dst_index} DST64:0x{:08X} SRC64:0x{:08X} DST_O:{dst_offset} SRC_O:{src_offset} VALUE:0x{value:016X} \ + is_pre:{is_pre} dst_offset:{dst_offset} count:{count}", dst64 * 8, src64 * 8 + ); + last_dst_byte as usize * 255 + (last_dst_byte - abs_diff_dst_src) as usize + }; + // println!("\x1B[1;41mBYTE_CMP_TABLE[{row_byte_cmp_table}] abs_diff_dst_src: {abs_diff_dst_src} last_dst_byte: 0x{last_dst_byte:02X} is_negative:{is_negative} result: 0x{result:016X} S:{step}\x1B[0m"); + byte_cmp_table_mul[row_byte_cmp_table] += 1; + } + DmaPrePostRom::get_row( + dst_offset as usize, + src_offset as usize, + count, + is_nz, + is_negative, + true, + ) + } else { + DmaPrePostRom::get_row( + dst_offset as usize, + src_offset as usize, + count, + false, + false, + load_src, + ) + }; + + pre_post_table_mul[table_row] += 1; + } +} + +impl DmaPrePostModule for DmaPrePostSM { + fn get_name(&self) -> &'static str { + "dma_pre_post" + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = DmaPrePostTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs.iter().map(|inputs| inputs.len()).sum(); + + assert!(total_inputs <= num_rows); + assert!(total_inputs > 0); + + dma_trace("DmaPrePost", total_inputs, num_rows); + + timer_start_trace!(DMA_PRE_POST_TRACE); + + // Split the dma_trace.buffer into slices matching each inner vector’s length. + let flat_inputs: Vec<_> = inputs.iter().flatten().collect(); + let trace_rows = trace.buffer.as_mut_slice(); + + // Calculate optimal chunk size + let num_threads = rayon::current_num_threads(); + let chunk_size = std::cmp::max(1, flat_inputs.len() / num_threads); + + // Process in chunks to allow per-chunk local multiplicities arrays + let ((global_pre_post_table_mul, global_byte_cmp_table_mul), global_dual_range_byte_mul): GlobalMultiplicities = + flat_inputs + .par_chunks(chunk_size) + .zip(trace_rows.par_chunks_mut(chunk_size)) + .map(|(input_chunk, trace_chunk)| { + // Local array shared by this chunk + let mut local_pre_post_table_mul = vec![0u64; DMA_PRE_POST_TABLE_SIZE]; + let mut local_dual_range_byte_mul = vec![0u64; 1 << 16]; + let mut local_byte_cmp_table_mul = vec![0u64; 256 * 255]; + + // Sum all local arrays into a global one + for (input, trace_row) in input_chunk.iter().zip(trace_chunk.iter_mut()) { + self.process_slice( + input, + trace_row, + &mut local_pre_post_table_mul, + &mut local_byte_cmp_table_mul, + &mut local_dual_range_byte_mul, + ) + } + + // Return nested tuple for unzip + ((local_pre_post_table_mul, local_byte_cmp_table_mul), local_dual_range_byte_mul) + }) + .unzip(); + for pre_post_table_mul in global_pre_post_table_mul.iter() { + // println!("PRE_POST_TABLE_MUL {:?}", pre_post_table_mul); + self.std.inc_virtual_rows_ranged(self.pre_post_table_id, pre_post_table_mul); + } + + for byte_cmp_table_mul in global_byte_cmp_table_mul.iter() { + // println!("PRE_POST_TABLE_MUL {:?}", pre_post_table_mul); + self.std.inc_virtual_rows_ranged(self.byte_cmp_table_id, byte_cmp_table_mul); + } + + for dual_range_byte_mul in global_dual_range_byte_mul.iter() { + self.std.inc_virtual_rows_ranged(self.dual_range_byte_id, dual_range_byte_mul); + } + // for i in [ + // 4538, 4541, 4542, 4544, 4545, 4546, 4549, 4550, 4551, 4739, 147059, 147215, 147258, + // 147261, 162643, 171955, 172130, 172133, 172136, 172137, 70114, 104010, 104123, 104124, + // 104125, 130422, 131634, 131635, 131636, 131789, + // ] { + // let p_values: [u64; 2] = [ + // trace[i].get_pb(0) as u64 + // + 256 * trace[i].get_pb(1) as u64 + // + 65536 * trace[i].get_pb(2) as u64 + // + 16777216 * trace[i].get_pb(3) as u64, + // trace[i].get_pb(4) as u64 + // + 256 * trace[i].get_pb(5) as u64 + // + 65536 * trace[i].get_pb(6) as u64 + // + 16777216 * trace[i].get_pb(7) as u64, + // ]; + // let bus_write_values: [u64; 2] = if trace[i].get_dst_offset_gt_src_offset() { + // [trace[i].get_write_value(2) as u64, trace[i].get_write_value(3) as u64] + // } else { + // [trace[i].get_write_value(0) as u64, trace[i].get_write_value(1) as u64] + // }; + // if trace[i].get_diff_factor(0) >= 0xFFFF_FFFF_8000_0000 { + // println!( + // "get_diff_factor[0]=\x1B[1;31m-0x{:08X}\x1B[0m", + // 0xFFFF_FFFF_0000_0001 - trace[i].get_diff_factor(0) + // ); + // } else { + // println!("get_diff_factor[0]=0x{:08X}", trace[i].get_diff_factor(0)); + // } + // if trace[i].get_diff_factor(1) as u64 >= 0xFFFF_FFFF_8000_0000 { + // println!( + // "get_diff_factor[1]=\x1B[1;31m-0x{:08X}\x1B[0m", + // 0xFFFF_FFFF_0000_0001 - trace[i].get_diff_factor(1) as u64 + // ); + // } else { + // println!("get_diff_factor[1]=0x{:08X}", trace[i].get_diff_factor(1)); + // } + // if p_values[0] >= bus_write_values[0] { + // println!( + // "p_value[0]=0x{:08X} - bus_write_value[0]=0x{:08X} = 0x{:08X}", + // p_values[0], + // bus_write_values[0], + // p_values[0] - bus_write_values[0] + // ); + // } else { + // println!( + // "p_value[0]=0x{:08X} - bus_write_value[0]=0x{:08X} = \x1B[1;31m-0x{:08X}\x1B[0m", + // p_values[0], + // bus_write_values[0], + // bus_write_values[0] - p_values[0] + // ); + // } + // if trace[i].get_memcmp_result_is_negative() { + // println!("\x1B[1;31mis negative\x1B[0m"); + // } + // if p_values[1] >= bus_write_values[1] { + // println!( + // "p_value[1]=0x{:08X} - bus_write_value[1]=0x{:08X} = 0x{:08X}", + // p_values[1], + // bus_write_values[1], + // p_values[1] - bus_write_values[1] + // ); + // } else { + // println!( + // "p_value[1]=0x{:08X} - bus_write_value[1]=0x{:08X} = \x1B[1;31m-0x{:08X}\x1B[0m", + // p_values[1], + // bus_write_values[1], + // bus_write_values[1] - p_values[1] + // ); + // } + // bus_write_value[0] <== dst_offset_gt_src_offset * (write_value[2] - write_value[0]) + write_value[0]; + // bus_write_value[1] <== dst_offset_gt_src_offset * (write_value[3] - write_value[1]) + write_value[1]; + // sel_memcmp * (p_values[0] - bus_write_value[0] + diff_dst_src[0]) === 0; + // sel_memcmp * (p_values[1] - bus_write_value[1] + diff_dst_src[1]) === 0; + // for i in [70114, 104010, 104123, 104124, 104125, 130422, 131634, 131635, 131636, 131789] { + // println!("TRACE[{i}]={:?}", trace[i]); + // } + let from_trace = FromTrace::new(&mut trace); + timer_stop_and_log_trace!(DMA_PRE_POST_TRACE); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post_collector.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post_collector.rs new file mode 100644 index 000000000..df3b54777 --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post_collector.rs @@ -0,0 +1,125 @@ +//! The `DmaPrePostCollector` module defines an collector to calculate all inputs of an instance +//! for the DmaPrePost State Machine. + +use std::any::Any; + +use precompiles_helpers::DmaInfo; +use zisk_common::{BusDevice, BusId, ChunkId, DMA_ENCODED, OP, OPERATION_BUS_ID, OP_TYPE}; +use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; + +use crate::{DmaCollectCounters, DmaCollectorRoutingLog, DmaPrePostInput}; + +pub struct DmaPrePostCollector { + pub chunk_id: ChunkId, + /// Collected inputs for witness computation. + pub inputs: Vec, + + /// Routing log for debugging and tracking collection operations. + pub rlog: DmaCollectorRoutingLog, + + /// The number of operations to collect. + pub num_inputs: u64, + + /// Helper to skip instructions based on the plan's configuration. + pub collect_counters: DmaCollectCounters, +} + +impl DmaPrePostCollector { + /// Creates a new `DmaPrePostCollector`. + /// + /// # Arguments + /// + /// * `bus_id` - The connected bus ID. + /// * `num_inputs` - The number of inputs to collect. + /// * `collect_skipper` - The helper to skip instructions based on the plan's configuration. + /// + /// # Returns + /// A new `DmaPrePostCollector` instance initialized with the provided parameters. + pub fn new(chunk_id: ChunkId, num_inputs: u64, collect_counters: DmaCollectCounters) -> Self { + Self { + chunk_id, + inputs: Vec::with_capacity(num_inputs as usize), + num_inputs, + collect_counters, + rlog: DmaCollectorRoutingLog::new(chunk_id), + } + } + + /// Processes data received on the bus, collecting the inputs necessary for witness computation. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `data` - The data received from the bus. + /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// + /// # Returns + /// A tuple where: + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64], data_ext: &[u64]) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] != ZiskOperationType::Dma as u64 { + return true; + } + + if self.inputs.len() == self.num_inputs as usize { + return self.rlog.log_discard_cond(false, 1, data, false); + } + + let op = data[OP] as u8; + let encoded = data[DMA_ENCODED]; + if DmaInfo::is_direct(encoded) { + if op == ZiskOp::DMA_MEMCMP || op == ZiskOp::DMA_XMEMCMP { + // We need to collect all memcmp/memcpy operations for the pre/post processing. + panic!("Direct memcmp/memcpy operations are not supported"); + } + self.rlog.log_discard(2, data); + return true; + } + + let rows = DmaInfo::get_pre_writes(encoded); + if rows == 0 { + self.rlog.log_discard(3, data); + return true; + } + + if let Some((skip, max_count)) = self.collect_counters.should_collect(rows as u64, op) { + self.rlog.log_collect(rows as u32, data, skip, max_count); + self.inputs.extend(match op { + ZiskOp::DMA_XMEMSET => { + DmaPrePostInput::from_memset(data, data_ext, skip, max_count) + } + ZiskOp::DMA_MEMCMP | ZiskOp::DMA_XMEMCMP => { + DmaPrePostInput::from(data, data_ext, skip, max_count) + } + ZiskOp::DMA_INPUTCPY | ZiskOp::DMA_MEMCPY | ZiskOp::DMA_XMEMCPY => { + DmaPrePostInput::from(data, data_ext, skip, max_count) + } + _ => panic!("Invalid operation 0x{op:02X}"), + }); + } else { + self.rlog.log_discard(10, data); + } + self.rlog.log_discard_cond(self.inputs.len() < self.num_inputs as usize, 13, data, true) + } + + pub fn get_debug_info(&self) -> String { + #[cfg(feature = "save_dma_collectors")] + return format!( + "CC|{}|{}|{}\n", + self.chunk_id, + self.inputs.len(), + self.collect_counters.get_debug_info(), + ) + &self.rlog.get_debug_info(); + #[cfg(not(feature = "save_dma_collectors"))] + String::new() + } +} + +impl BusDevice for DmaPrePostCollector { + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post_input.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post_input.rs new file mode 100644 index 000000000..a7d51d953 --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post_input.rs @@ -0,0 +1,219 @@ +use precompiles_helpers::DmaInfo; +use zisk_common::{A, B, DMA_ENCODED, OP, STEP}; + +#[derive(Debug)] +pub struct DmaPrePostInput { + pub src: u32, + pub dst: u32, + pub step: u64, + pub encoded: u64, // contains fill_byte/memcmp_result + pub src_values: [u64; 2], + pub dst_pre_value: u64, + pub op: u8, +} +impl DmaPrePostInput { + pub fn get_count(data: &[u64]) -> usize { + let encoded = data[DMA_ENCODED]; + (DmaInfo::get_pre_count(encoded) > 0) as usize + + (DmaInfo::get_post_count(encoded) > 0) as usize + } + pub fn from(data: &[u64], data_ext: &[u64], skip: u32, max_count: u32) -> Vec { + let encoded = data[DMA_ENCODED]; + let op = data[OP] as u8; + let mut inputs = Vec::new(); + let pre_count = DmaInfo::get_pre_count(encoded); + let mut skipped = 0; + + if pre_count > 0 { + if skipped < skip { + skipped += 1; + } else { + let src_offset = DmaInfo::get_pre_data_offset(encoded); + let input = Self { + dst: data[A] as u32, + src: data[B] as u32, + step: data[STEP], + encoded, + src_values: [ + data_ext[src_offset], + if DmaInfo::is_double_read_pre(encoded) { + data_ext[src_offset + 1] + } else { + 0 + }, + ], + op, + dst_pre_value: data_ext[DmaInfo::get_pre_write_offset(encoded)], + }; + inputs.push(input); + } + } + let post_count = DmaInfo::get_post_count(encoded); + if post_count > 0 && skipped >= skip && max_count as usize > inputs.len() { + let src_offset = DmaInfo::get_post_data_offset(encoded); + let loop_count = DmaInfo::get_loop_count(encoded); + let input = Self { + dst: data[A] as u32 + pre_count as u32 + loop_count as u32 * 8, + src: data[B] as u32 + pre_count as u32 + loop_count as u32 * 8, + step: data[STEP], + encoded, + src_values: [ + data_ext[src_offset], + if DmaInfo::is_double_read_post(encoded) { + data_ext[src_offset + 1] + } else { + 0 + }, + ], + dst_pre_value: data_ext[DmaInfo::get_post_write_offset(encoded)], + op, + }; + inputs.push(input); + } + inputs + } + pub fn from_memset(data: &[u64], data_ext: &[u64], skip: u32, max_count: u32) -> Vec { + let encoded = data[DMA_ENCODED]; + let op = data[OP] as u8; + let mut inputs = Vec::new(); + let pre_count = DmaInfo::get_pre_count(encoded); + let mut skipped = 0; + if pre_count > 0 { + if skipped < skip { + skipped += 1; + } else { + inputs.push(Self { + dst: data[A] as u32, + src: 0, + step: data[STEP], + encoded, + src_values: [0, 0], + op, + dst_pre_value: data_ext[0], + }); + } + } + let post_count = DmaInfo::get_post_count(encoded); + if post_count > 0 && skipped >= skip && max_count as usize > inputs.len() { + let loop_count = DmaInfo::get_loop_count(encoded); + inputs.push(Self { + dst: data[A] as u32 + pre_count as u32 + loop_count as u32 * 8, + src: pre_count as u32 + loop_count as u32 * 8, + step: data[STEP], + encoded, + src_values: [0, 0], + // pre value words are at begging + dst_pre_value: data_ext[(pre_count > 0) as usize], + op, + }); + } + inputs + } + // memcmp has different format, because need to read dst and src, for this reason has a more + // easy format, first all dst (a), and after all src (b) + pub fn from_memcmp(data: &[u64], data_ext: &[u64], skip: u32, max_count: u32) -> Vec { + let dst = data[A] as u32; + let src = data[B] as u32; + let encoded = data[DMA_ENCODED]; + let count = DmaInfo::get_count(encoded); + let op = data[OP] as u8; + let dst_words = (((dst + count as u32 + 7) >> 3) - (dst >> 3)) as usize; + let src_words = (((src + count as u32 + 7) >> 3) - (src >> 3)) as usize; + let mut inputs = Vec::new(); + let pre_count = DmaInfo::get_pre_count(encoded); + let mut skipped = 0; + if data[STEP] == 31841694 { + println!( + "DATA data:{data:?} data_ext:{data_ext:?} S:{} PRE_COUNT:{pre_count} POST_COUNT:{} SKIP:{skip} MAX_COUNT:{max_count}", + data[STEP], DmaInfo::get_post_count(encoded) + ); + } + + if pre_count > 0 { + if skipped < skip { + skipped += 1; + } else { + let input = Self { + dst, + src, + step: data[STEP], + encoded, + src_values: [ + data_ext[dst_words], + if DmaInfo::is_double_read_pre(encoded) { + data_ext[dst_words + 1] + } else { + 0 + }, + ], + op, + dst_pre_value: data_ext[0], + }; + inputs.push(input); + } + } + let post_count = DmaInfo::get_post_count(encoded); + if post_count > 0 && skipped >= skip && max_count as usize > inputs.len() { + // src_offset it's last src words + let src_offset = + dst_words + src_words - 1 - DmaInfo::is_double_read_post(encoded) as usize; + let loop_count = DmaInfo::get_loop_count(encoded); + let input = Self { + dst: dst + pre_count as u32 + loop_count as u32 * 8, + src: src + pre_count as u32 + loop_count as u32 * 8, + step: data[STEP], + encoded, + src_values: [ + data_ext[src_offset], + if DmaInfo::is_double_read_post(encoded) { + data_ext[src_offset + 1] + } else { + 0 + }, + ], + dst_pre_value: data_ext[dst_words - 1], + op, + }; + inputs.push(input); + } + inputs + } + + #[cfg(feature = "save_dma_inputs")] + /// Writes a list of DmaPrePostInput instances to a text file with columns separated by |. + /// Path is taken from DEBUG_OUTPUT_PATH environment variable, defaulting to "tmp/". + pub fn dump_to_file(inputs: &[Vec], filename: &str) -> std::io::Result<()> { + use std::io::Write; + let path = std::env::var("DEBUG_OUTPUT_PATH").unwrap_or_else(|_| "tmp/".to_string()); + let full_path = format!("{}{}", path, filename); + + let mut file = std::fs::File::create(&full_path)?; + + // Write header + writeln!( + file, + "{:>8}|{:>10}|{:>10}|{:>14}|{:>18}|{:>18}|{:>4}|src_values", + "pos", "src", "dst", "step", "encoded", "dst_pre_value", "op" + )?; + + // Write data rows + for (pos, input) in inputs.iter().flatten().enumerate() { + let src_values_hex: Vec = + input.src_values.iter().map(|v| format!("0x{:016X}", v)).collect(); + writeln!( + file, + "{:>8}|0x{:08X}|0x{:08X}|{:>14}|0x{:016X}|0x{:016X}|{:>4}|{}", + pos, + input.src, + input.dst, + input.step, + input.encoded, + input.dst_pre_value, + input.op, + src_values_hex.join(",") + )?; + } + + Ok(()) + } +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post_inputcpy.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post_inputcpy.rs new file mode 100644 index 000000000..d62d49e99 --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post_inputcpy.rs @@ -0,0 +1,256 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use rayon::{ + iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator}, + slice::{ParallelSlice, ParallelSliceMut}, +}; +use zisk_pil::{DMA_PRE_POST_TABLE_ID, DMA_PRE_POST_TABLE_SIZE, DUAL_RANGE_BYTE_ID}; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + DmaPrePostInputCpyTracePacked as DmaPrePostInputCpyTrace, + DmaPrePostInputCpyTraceRowPacked as DmaPrePostInputCpyTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{DmaPrePostInputCpyTrace, DmaPrePostInputCpyTraceRow}; + +use crate::{dma_trace, DmaPrePostInput, DmaPrePostModule, DmaPrePostRom}; +use precompiles_helpers::DmaInfo; + +/// The `DmaPrePostSM` struct encapsulates the logic of the DmaPrePost State Machine. +pub struct DmaPrePostInputCpySM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + pre_post_table_id: usize, + + /// Dual Byte Range checks + dual_range_byte_id: usize, +} + +impl DmaPrePostInputCpySM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `DmaPrePostInputCpySM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + dual_range_byte_id: std + .get_virtual_table_id(DUAL_RANGE_BYTE_ID) + .expect("Failed to get table DUAL_RANGE_BYTE ID"), + pre_post_table_id: std + .get_virtual_table_id(DMA_PRE_POST_TABLE_ID) + .expect("Failed to get table DMA_PRE_POST_TABLE_ID ID"), + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_slice( + &self, + input: &DmaPrePostInput, + trace: &mut DmaPrePostInputCpyTraceRow, + pre_post_table_mul: &mut [u64], + local_dual_range_byte_mul: &mut [u64], + ) { + let dst_offset = input.dst & 0x07; + let is_pre = dst_offset > 0; + + let dst64 = input.dst >> 3; + + trace.set_main_step(input.step); + trace.set_dst64(dst64); + trace.set_dst_offset(dst_offset as u8); + trace.set_is_post(!is_pre); + + let count = if is_pre { + DmaInfo::get_pre_count(input.encoded) + } else { + DmaInfo::get_post_count(input.encoded) + }; + + trace.set_count(count as u8); + trace.set_sel_inputcpy(false); + + let mut value = input.src_values[0]; + let mut rb = [0u8; 16]; + let mut pb = [0u8; 8]; + + rb[0] = value as u8; + rb[1] = (value >> 8) as u8; + rb[2] = (value >> 16) as u8; + rb[3] = (value >> 24) as u8; + rb[4] = (value >> 32) as u8; + rb[5] = (value >> 40) as u8; + rb[6] = (value >> 48) as u8; + rb[7] = (value >> 56) as u8; + + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + + value = input.dst_pre_value; + pb[0] = value as u8; + pb[1] = (value >> 8) as u8; + pb[2] = (value >> 16) as u8; + pb[3] = (value >> 24) as u8; + pb[4] = (value >> 32) as u8; + pb[5] = (value >> 40) as u8; + pb[6] = (value >> 48) as u8; + pb[7] = (value >> 56) as u8; + + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + + let _mask = 0xFFFF_FFFF_FFFF_FFFFu64 << (dst_offset * 8); + let mask = _mask ^ (_mask << (count * 8)); + + trace.set_sb(0, (mask & 0x0000_0000_0000_00FF) != 0); + trace.set_sb(1, (mask & 0x0000_0000_0000_FF00) != 0); + trace.set_sb(2, (mask & 0x0000_0000_00FF_0000) != 0); + trace.set_sb(3, (mask & 0x0000_0000_FF00_0000) != 0); + trace.set_sb(4, (mask & 0x0000_00FF_0000_0000) != 0); + trace.set_sb(5, (mask & 0x0000_FF00_0000_0000) != 0); + trace.set_sb(6, (mask & 0x00FF_0000_0000_0000) != 0); + trace.set_sb(7, (mask & 0xFF00_0000_0000_0000) != 0); + + for (index, byte) in rb.iter().enumerate() { + // println!("PRE-POST bytes[{index}]: 0x{byte:02X}"); + trace.set_rb(index, *byte); + } + for (index, byte) in pb.iter().enumerate() { + // println!("PRE-POST bytes[{index}]: 0x{byte:02X}"); + trace.set_pb(index, *byte); + } + + let table_row = DmaPrePostRom::get_row(dst_offset as usize, 0, count, false, false, false); + // println!("PRE-POST-ROM [{table_row}] dst_offset: {dst_offset} src_offset: {src_offset} count: {count}"); + pre_post_table_mul[table_row] += 1; + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut DmaPrePostInputCpyTraceRow) { + trace.set_main_step(0); + trace.set_dst64(0); + trace.set_dst_offset(0); + + trace.set_count(0); + trace.set_sel_inputcpy(false); + // intermediate: trace.set_last_dst_byte(0); + + for index in 0..16 { + trace.set_rb(index, 0); + } + for index in 0..8 { + trace.set_pb(index, 0); + } + for index in 0..8 { + trace.set_sb(index, false); + } + } +} +impl DmaPrePostModule for DmaPrePostInputCpySM { + fn get_name(&self) -> &'static str { + "dma_pre_post_inputcpy" + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = DmaPrePostInputCpyTrace::::new_from_vec(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs.iter().map(|inputs| inputs.len()).sum(); + + assert!(total_inputs <= num_rows); + assert!(total_inputs > 0); + + dma_trace("DmaPrePostInputCpy", total_inputs, num_rows); + + timer_start_trace!(DMA_PRE_POST_TRACE); + + // Split the dma_trace.buffer into slices matching each inner vector’s length. + let flat_inputs: Vec<_> = inputs.iter().flatten().collect(); + let trace_rows = trace.buffer.as_mut_slice(); + + // Calculate optimal chunk size + let num_threads = rayon::current_num_threads(); + let chunk_size = std::cmp::max(1, flat_inputs.len() / num_threads); + + // Process in chunks to allow per-chunk local multiplicities arrays + let (global_pre_post_table_mul, global_dual_range_byte_mul): ( + Vec>, + Vec>, + ) = flat_inputs + .par_chunks(chunk_size) + .zip(trace_rows.par_chunks_mut(chunk_size)) + .map(|(input_chunk, trace_chunk)| { + // Local array shared by this chunk + let mut local_pre_post_table_mul = vec![0u64; DMA_PRE_POST_TABLE_SIZE]; + let mut local_dual_range_byte_mul = vec![0u64; 1 << 16]; + + // Sum all local arrays into a global one + for (input, trace_row) in input_chunk.iter().zip(trace_chunk.iter_mut()) { + self.process_slice( + input, + trace_row, + &mut local_pre_post_table_mul, + &mut local_dual_range_byte_mul, + ); + } + + (local_pre_post_table_mul, local_dual_range_byte_mul) + }) + .collect(); + + for pre_post_table_mul in global_pre_post_table_mul.iter() { + // println!("PRE_POST_TABLE_MUL {:?}", pre_post_table_mul); + self.std.inc_virtual_rows_ranged(self.pre_post_table_id, pre_post_table_mul); + } + + for dual_range_byte_mul in global_dual_range_byte_mul.iter() { + self.std.inc_virtual_rows_ranged(self.dual_range_byte_id, dual_range_byte_mul); + } + + if total_inputs < num_rows { + self.process_empty_slice(&mut trace_rows[total_inputs]); + let empty_row = trace_rows[total_inputs]; + trace_rows[total_inputs + 1..].par_iter_mut().for_each(|row| { + *row = empty_row; + }); + } + let from_trace = FromTrace::new(&mut trace); + timer_stop_and_log_trace!(DMA_PRE_POST_TRACE); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post_instance.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post_instance.rs new file mode 100644 index 000000000..8fc012189 --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post_instance.rs @@ -0,0 +1,166 @@ +//! The `DmaPrePostInstance` module defines an instance to perform the witness computation +//! for the DmaPrePost State Machine. +//! +//! It manages collected inputs and interacts with the `DmaPrePostSM` to compute witnesses for +//! execution plans. + +#[cfg(feature = "save_dma_inputs")] +use crate::DmaPrePostInput; +use crate::{DmaCheckPoint, DmaPrePostCollector, DmaPrePostModule}; +use fields::PrimeField64; +use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; +use std::sync::Arc; + +use zisk_common::{ + BusDevice, CheckPoint, ChunkId, Instance, InstanceCtx, InstanceType, PayloadType, StatsType, +}; +use zisk_pil::{DmaPrePostInputCpyTrace, DmaPrePostMemCpyTrace, DmaPrePostTrace}; + +/// The `DmaPrePostInstance` struct represents an instance for the DmaPrePost State Machine. +/// +/// It encapsulates the `DmaPrePostModule` and its associated context, and it processes input data +/// to compute witnesses for the DmaPrePost State Machine. +pub struct DmaPrePostInstance { + /// DmaPrePost State machine. + module: Arc>, + + /// Instance context. + ictx: InstanceCtx, +} + +impl DmaPrePostInstance { + /// Creates a new `DmaPrePostInstance`. + /// + /// # Arguments + /// * `dma_sm` - An `Arc`-wrapped reference to the DmaPrePost State Machine. + /// * `ictx` - The `InstanceCtx` associated with this instance, containing the execution plan. + /// * `bus_id` - The bus ID associated with this instance. + /// + /// # Returns + /// A new `DmaPrePostInstance` instance initialized with the provided state machine and + /// context. + pub fn new(module: Arc>, ictx: InstanceCtx) -> Self { + Self { module, ictx } + } + + pub fn build_dma_collector(&self, chunk_id: ChunkId) -> DmaPrePostCollector { + debug_assert!( + [ + DmaPrePostTrace::::AIR_ID, + DmaPrePostMemCpyTrace::::AIR_ID, + DmaPrePostInputCpyTrace::::AIR_ID, + ] + .contains(&self.ictx.plan.air_id), + "DmaPrePostInstance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info: &DmaCheckPoint = meta.downcast_ref::().unwrap(); + let (num_ops, collect_counters) = collect_info.chunks[&chunk_id]; + DmaPrePostCollector::new(chunk_id, num_ops, collect_counters) + } +} + +impl Instance for DmaPrePostInstance { + /// Computes the witness for the Dma execution plan. + /// + /// This method leverages the `DmaPrePostSM` to generate an `AirInstance` using the collected + /// inputs. + /// + /// # Arguments + /// * `_pctx` - The proof context, unused in this implementation. + /// + /// # Returns + /// An `Option` containing the computed `AirInstance`. + fn compute_witness( + &self, + _pctx: &ProofCtx, + _sctx: &SetupCtx, + collectors: Vec<(usize, Box>)>, + trace_buffer: Vec, + ) -> ProofmanResult>> { + #[cfg(feature = "save_dma_collectors")] + let (debug, inputs): (Vec<_>, Vec<_>) = collectors + .into_iter() + .map(|(_, collector)| { + let collector = collector.as_any().downcast::().unwrap(); + (collector.get_debug_info(), collector.inputs) + }) + .unzip(); + #[cfg(not(feature = "save_dma_collectors"))] + let inputs: Vec<_> = collectors + .into_iter() + .map(|(_, collector)| { + collector.as_any().downcast::().unwrap().inputs + }) + .collect(); + + #[cfg(any(feature = "save_dma_collectors", feature = "save_dma_inputs"))] + let air_instance_id = + _pctx.dctx_find_air_instance_id(self.ictx.plan.global_id.unwrap()).unwrap(); + + #[cfg(feature = "save_dma_collectors")] + save_dma_collectors( + &format!("{}_collector_{air_instance_id:04}.txt", self.module.get_name()), + debug, + )?; + + #[cfg(feature = "save_dma_inputs")] + DmaPrePostInput::dump_to_file( + &inputs, + &format!("{}_inputs_{air_instance_id:04}.txt", self.module.get_name()), + )?; + + Ok(Some(self.module.compute_witness(&inputs, trace_buffer)?)) + } + + /// Retrieves the checkpoint associated with this instance. + /// + /// # Returns + /// A `CheckPoint` object representing the checkpoint of the execution plan. + fn check_point(&self) -> &CheckPoint { + &self.ictx.plan.check_point + } + + /// Retrieves the type of this instance. + /// + /// # Returns + /// An `InstanceType` representing the type of this instance (`InstanceType::Instance`). + fn instance_type(&self) -> InstanceType { + InstanceType::Instance + } + + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { + assert_eq!( + self.ictx.plan.air_id, + DmaPrePostTrace::::AIR_ID, + "DmaPrePostInstance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::().unwrap(); + let (num_ops, collect_counters) = collect_info.chunks[&chunk_id]; + Some(Box::new(DmaPrePostCollector::new(chunk_id, num_ops, collect_counters))) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +#[cfg(feature = "save_dma_collectors")] +pub fn save_dma_collectors(filename: &str, debug: Vec) -> std::io::Result<()> { + use std::fs; + + let path = std::env::var("DEBUG_OUTPUT_PATH").unwrap_or_else(|_| "tmp/".to_string()); + let full_path = format!("{}{}", path, filename); + + fs::write(&full_path, debug.join("\n"))?; + Ok(()) +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post_memcpy.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post_memcpy.rs new file mode 100644 index 000000000..edc7b0c9b --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post_memcpy.rs @@ -0,0 +1,296 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use pil_std_lib::Std; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use rayon::{ + iter::{IndexedParallelIterator, ParallelIterator}, + slice::{ParallelSlice, ParallelSliceMut}, +}; +use zisk_pil::{DMA_PRE_POST_TABLE_ID, DMA_PRE_POST_TABLE_SIZE, DUAL_RANGE_BYTE_ID}; + +#[cfg(feature = "packed")] +pub use zisk_pil::{ + DmaPrePostMemCpyTracePacked as DmaPrePostMemCpyTrace, + DmaPrePostMemCpyTraceRowPacked as DmaPrePostMemCpyTraceRow, +}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{DmaPrePostMemCpyTrace, DmaPrePostMemCpyTraceRow}; + +use crate::{dma_trace, DmaPrePostInput, DmaPrePostModule, DmaPrePostRom}; +use precompiles_helpers::DmaInfo; + +/// The `DmaPrePostMemCpySM` struct encapsulates the logic of the DmaPrePost State Machine. +pub struct DmaPrePostMemCpySM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + pre_post_table_id: usize, + + /// Dual Byte Range checks + dual_range_byte_id: usize, +} + +impl DmaPrePostMemCpySM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `DmaPrePostMemCpySM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + dual_range_byte_id: std + .get_virtual_table_id(DUAL_RANGE_BYTE_ID) + .expect("Failed to get table DUAL_RANGE_BYTE ID"), + pre_post_table_id: std + .get_virtual_table_id(DMA_PRE_POST_TABLE_ID) + .expect("Failed to get table DMA_PRE_POST_TABLE_ID ID"), + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_slice( + &self, + input: &DmaPrePostInput, + trace: &mut DmaPrePostMemCpyTraceRow, + pre_post_table_mul: &mut [u64], + local_dual_range_byte_mul: &mut [u64], + ) { + let dst_offset = input.dst & 0x07; + let src_offset = input.src & 0x07; + let is_pre = dst_offset > 0; + + let dst64 = input.dst >> 3; + let src64 = input.src >> 3; + + trace.set_main_step(input.step); + trace.set_dst64(dst64); + trace.set_src64(src64); + trace.set_dst_offset(dst_offset as u8); + trace.set_src_offset(src_offset as u8); + trace.set_is_post(!is_pre); + + let count = if is_pre { + DmaInfo::get_pre_count(input.encoded) + } else { + DmaInfo::get_post_count(input.encoded) + }; + + trace.set_count(count as u8); + trace.set_sel_memcpy(true); + // intermediate: trace.last_dst_byte(0); + let second_read = (src_offset as usize + count) > 8; + //println!("SECOND_READ: {second_read}"); + trace.set_enabled_second_read(second_read); + + let mut value = input.src_values[0]; + let mut rb = [0u8; 16]; + let mut pb = [0u8; 8]; + + rb[0] = value as u8; + rb[1] = (value >> 8) as u8; + rb[2] = (value >> 16) as u8; + rb[3] = (value >> 24) as u8; + rb[4] = (value >> 32) as u8; + rb[5] = (value >> 40) as u8; + rb[6] = (value >> 48) as u8; + rb[7] = (value >> 56) as u8; + + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + + // println!("DUAL_RANGE_BYTE_1({:08X})", (value & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_1({:08X})", ((value >> 16) & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_1({:08X})", ((value >> 32) & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_1({:08X})", ((value >> 48) & 0xFFFF)); + + if second_read { + value = input.src_values[1]; + rb[8] = value as u8; + rb[9] = (value >> 8) as u8; + rb[10] = (value >> 16) as u8; + rb[11] = (value >> 24) as u8; + rb[12] = (value >> 32) as u8; + rb[13] = (value >> 40) as u8; + rb[14] = (value >> 48) as u8; + rb[15] = (value >> 56) as u8; + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + // println!("DUAL_RANGE_BYTE_2({:08X})", (value & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_2({:08X})", ((value >> 16) & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_2({:08X})", ((value >> 32) & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_2({:08X})", ((value >> 48) & 0xFFFF)); + } else { + local_dual_range_byte_mul[0] += 4; + } + + value = input.dst_pre_value; + pb[0] = value as u8; + pb[1] = (value >> 8) as u8; + pb[2] = (value >> 16) as u8; + pb[3] = (value >> 24) as u8; + pb[4] = (value >> 32) as u8; + pb[5] = (value >> 40) as u8; + pb[6] = (value >> 48) as u8; + pb[7] = (value >> 56) as u8; + + local_dual_range_byte_mul[(value & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 16) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 32) & 0xFFFF) as usize] += 1; + local_dual_range_byte_mul[((value >> 48) & 0xFFFF) as usize] += 1; + + // println!("DUAL_RANGE_BYTE_3({:08X})", (value & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_3({:08X})", ((value >> 16) & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_3({:08X})", ((value >> 32) & 0xFFFF)); + // println!("DUAL_RANGE_BYTE_3({:08X})", ((value >> 48) & 0xFFFF)); + + let selr_value = if dst_offset > src_offset { + trace.set_dst_offset_gt_src_offset(true); + dst_offset - src_offset + } else { + trace.set_dst_offset_gt_src_offset(false); + src_offset - dst_offset + }; + + let _mask = 0xFFFF_FFFF_FFFF_FFFFu64 << (dst_offset * 8); + let mask = _mask ^ (_mask << (count * 8)); + + trace.set_sb(0, (mask & 0x0000_0000_0000_00FF) != 0); + trace.set_sb(1, (mask & 0x0000_0000_0000_FF00) != 0); + trace.set_sb(2, (mask & 0x0000_0000_00FF_0000) != 0); + trace.set_sb(3, (mask & 0x0000_0000_FF00_0000) != 0); + trace.set_sb(4, (mask & 0x0000_00FF_0000_0000) != 0); + trace.set_sb(5, (mask & 0x0000_FF00_0000_0000) != 0); + trace.set_sb(6, (mask & 0x00FF_0000_0000_0000) != 0); + trace.set_sb(7, (mask & 0xFF00_0000_0000_0000) != 0); + + for (index, byte) in rb.iter().enumerate() { + // println!("PRE-POST bytes[{index}]: 0x{byte:02X}"); + trace.set_rb(index, *byte); + } + for (index, byte) in pb.iter().enumerate() { + // println!("PRE-POST bytes[{index}]: 0x{byte:02X}"); + trace.set_pb(index, *byte); + } + + trace.set_selr(0, selr_value == 0); + trace.set_selr(1, selr_value == 1); + trace.set_selr(2, selr_value == 2); + trace.set_selr(3, selr_value == 3); + trace.set_selr(4, selr_value == 4); + trace.set_selr(5, selr_value == 5); + trace.set_selr(6, selr_value == 6); + + // println!("PRE-POST write_value: 0x{write_value_01:016X} 0x{write_value_23:016X}"); + + let table_row = DmaPrePostRom::get_row( + dst_offset as usize, + src_offset as usize, + count, + false, + false, + true, + ); + // println!("PRE-POST-ROM [{table_row}] dst_offset: {dst_offset} src_offset: {src_offset} count: {count}"); + pre_post_table_mul[table_row] += 1; + } +} +impl DmaPrePostModule for DmaPrePostMemCpySM { + fn get_name(&self) -> &'static str { + "dma_pre_post_memcpy" + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = DmaPrePostMemCpyTrace::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs.iter().map(|inputs| inputs.len()).sum(); + + assert!(total_inputs <= num_rows); + assert!(total_inputs > 0); + + dma_trace("DmaPrePostMemCpy", total_inputs, num_rows); + + timer_start_trace!(DMA_PRE_POST_TRACE); + + // Split the dma_trace.buffer into slices matching each inner vector’s length. + let flat_inputs: Vec<_> = inputs.iter().flatten().collect(); + let trace_rows = trace.buffer.as_mut_slice(); + + // Calculate optimal chunk size + let num_threads = rayon::current_num_threads(); + let chunk_size = std::cmp::max(1, flat_inputs.len() / num_threads); + + // Process in chunks to allow per-chunk local multiplicities arrays + let (global_pre_post_table_mul, global_dual_range_byte_mul): ( + Vec>, + Vec>, + ) = flat_inputs + .par_chunks(chunk_size) + .zip(trace_rows.par_chunks_mut(chunk_size)) + .map(|(input_chunk, trace_chunk)| { + // Local array shared by this chunk + let mut local_pre_post_table_mul = vec![0u64; DMA_PRE_POST_TABLE_SIZE]; + let mut local_dual_range_byte_mul = vec![0u64; 1 << 16]; + + // Sum all local arrays into a global one + for (input, trace_row) in input_chunk.iter().zip(trace_chunk.iter_mut()) { + self.process_slice( + input, + trace_row, + &mut local_pre_post_table_mul, + &mut local_dual_range_byte_mul, + ); + } + + (local_pre_post_table_mul, local_dual_range_byte_mul) + }) + .collect(); + + for pre_post_table_mul in global_pre_post_table_mul.iter() { + // println!("PRE_POST_TABLE_MUL {:?}", pre_post_table_mul); + self.std.inc_virtual_rows_ranged(self.pre_post_table_id, pre_post_table_mul); + } + + for dual_range_byte_mul in global_dual_range_byte_mul.iter() { + self.std.inc_virtual_rows_ranged(self.dual_range_byte_id, dual_range_byte_mul); + } + + /* + if total_inputs < num_rows { + self.process_empty_slice(&mut trace_rows[total_inputs]); + let empty_row = trace_rows[total_inputs]; + trace_rows[total_inputs + 1..].par_iter_mut().for_each(|row| { + *row = empty_row; + }); + }*/ + let from_trace = FromTrace::new(&mut trace); + timer_stop_and_log_trace!(DMA_PRE_POST_TRACE); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post_module.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post_module.rs new file mode 100644 index 000000000..e30441178 --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post_module.rs @@ -0,0 +1,11 @@ +use crate::DmaPrePostInput; +use proofman_common::{AirInstance, ProofmanResult}; + +pub trait DmaPrePostModule: Send + Sync { + fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult>; + fn get_name(&self) -> &'static str; +} diff --git a/precompiles/dma/src/dma_pre_post/dma_pre_post_rom.rs b/precompiles/dma/src/dma_pre_post/dma_pre_post_rom.rs new file mode 100644 index 000000000..b11fe283f --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/dma_pre_post_rom.rs @@ -0,0 +1,47 @@ +use precompiles_helpers::DmaInfo; + +pub enum DmaPrePostRom {} + +impl DmaPrePostRom { + // Table generated from pil + const TABLE_OFFSETS: [usize; 64] = [ + 0, 32, 64, 96, 128, 160, 192, 224, 256, 284, 312, 340, 368, 396, 424, 452, 480, 504, 528, + 552, 576, 600, 624, 648, 672, 692, 712, 732, 752, 772, 792, 812, 832, 848, 864, 880, 896, + 912, 928, 944, 960, 972, 984, 996, 1008, 1020, 1032, 1044, 1056, 1064, 1072, 1080, 1088, + 1096, 1104, 1112, 1120, 1124, 1128, 1132, 1136, 1140, 1144, 1148, + ]; + + #[allow(dead_code)] + pub fn get_row_from_encoded( + encoded: u64, + memcmp_result_nz: bool, + memcmp_result_is_neg: bool, + load_src: bool, + ) -> usize { + let src_offset = DmaInfo::get_src_offset(encoded); + let dst_offset = DmaInfo::get_dst_offset(encoded); + let count = DmaInfo::get_count(encoded); + Self::get_row( + dst_offset, + src_offset, + count, + memcmp_result_nz, + memcmp_result_is_neg, + load_src, + ) + } + pub fn get_row( + dst_offset: usize, + src_offset: usize, + count: usize, + memcmp_result_nz: bool, + memcmp_result_is_neg: bool, + load_src: bool, + ) -> usize { + debug_assert!(!memcmp_result_is_neg || memcmp_result_nz); + debug_assert!(load_src || (!memcmp_result_is_neg && !memcmp_result_nz)); + Self::TABLE_OFFSETS[dst_offset * 8 + src_offset] + + (count - 1) * 4 + + if load_src { memcmp_result_is_neg as usize + memcmp_result_nz as usize } else { 3 } + } +} diff --git a/precompiles/dma/src/dma_pre_post/mod.rs b/precompiles/dma/src/dma_pre_post/mod.rs new file mode 100644 index 000000000..3808e8179 --- /dev/null +++ b/precompiles/dma/src/dma_pre_post/mod.rs @@ -0,0 +1,18 @@ +#[allow(clippy::module_inception)] +mod dma_pre_post; +mod dma_pre_post_collector; +mod dma_pre_post_input; +mod dma_pre_post_inputcpy; +mod dma_pre_post_instance; +mod dma_pre_post_memcpy; +mod dma_pre_post_module; +mod dma_pre_post_rom; + +pub use dma_pre_post::*; +pub use dma_pre_post_collector::*; +pub use dma_pre_post_input::*; +pub use dma_pre_post_inputcpy::*; +pub use dma_pre_post_instance::*; +pub use dma_pre_post_memcpy::*; +pub use dma_pre_post_module::*; +pub use dma_pre_post_rom::*; diff --git a/precompiles/dma/src/dma_strategy.rs b/precompiles/dma/src/dma_strategy.rs new file mode 100644 index 000000000..05e51a22a --- /dev/null +++ b/precompiles/dma/src/dma_strategy.rs @@ -0,0 +1,603 @@ +//! The `DmaPlanner` module defines a planner for generating execution plans specific to +//! arithmetic operations. +//! +//! It organizes execution plans for both regular instances and table instances, +//! leveraging arithmetic operation counts and metadata to construct detailed plans. + +use core::panic; +use std::fmt; + +use crate::{ + DmaCheckPoint, DmaCounterInputGen, DmaInstancesBuilder, DMA_64_ALIGNED_INPUTS_OFFSET, + DMA_64_ALIGNED_OFFSET, DMA_COUNTER_INPUTCPY, DMA_COUNTER_MEMCMP, DMA_COUNTER_MEMCPY, + DMA_COUNTER_MEMCPY_8, DMA_COUNTER_MEMSET, DMA_COUNTER_MEMSET_8, DMA_COUNTER_OPS, + DMA_COUNTER_OPS_EXT, DMA_INPUT_GEN_COUNTERS, DMA_OFFSET, DMA_PRE_POST_OFFSET, + DMA_UNALIGNED_INPUTS_OFFSET, DMA_UNALIGNED_OFFSET, +}; + +#[cfg(feature = "save_dma_plans")] +use crate::get_dma_air_name; + +use fields::PrimeField64; +use zisk_common::{BusDeviceMetrics, BusDeviceMode, CheckPoint, ChunkId}; +#[cfg(not(feature = "packed"))] +use zisk_pil::{ + Dma64AlignedInputCpyTrace, Dma64AlignedMemCpyTrace, Dma64AlignedMemSetTrace, + Dma64AlignedMemTrace, Dma64AlignedTrace, DmaInputCpyTrace, DmaMemCpyTrace, + DmaPrePostInputCpyTrace, DmaPrePostMemCpyTrace, DmaPrePostTrace, DmaTrace, DmaUnalignedTrace, +}; + +#[cfg(feature = "packed")] +use zisk_pil::{ + Dma64AlignedInputCpyTracePacked as Dma64AlignedInputCpyTrace, + Dma64AlignedMemCpyTracePacked as Dma64AlignedMemCpyTrace, + Dma64AlignedMemSetTracePacked as Dma64AlignedMemSetTrace, + Dma64AlignedMemTracePacked as Dma64AlignedMemTrace, + Dma64AlignedTracePacked as Dma64AlignedTrace, DmaInputCpyTracePacked as DmaInputCpyTrace, + DmaMemCpyTracePacked as DmaMemCpyTrace, + DmaPrePostInputCpyTracePacked as DmaPrePostInputCpyTrace, + DmaPrePostMemCpyTracePacked as DmaPrePostMemCpyTrace, DmaPrePostTracePacked as DmaPrePostTrace, + DmaTracePacked as DmaTrace, DmaUnalignedTracePacked as DmaUnalignedTrace, +}; + +#[derive(Debug, Default, Clone)] +pub struct DmaInstances { + // memcpy: memcpy ==> full + // memcmp: full + // memset: full + // inputcpy: input_cpy ==> full + pub full: usize, + pub memcpy: usize, + pub inputcpy: usize, + pub rows_memcpy_to_full: usize, + pub rows_inputcpy_to_full: usize, +} + +impl fmt::Display for DmaInstances { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + " full {:>3}\n \ + memcpy {:>3} {:>6} → full\n \ + inputcpy {:>3} {:>6} → full\n", + self.full, + self.memcpy, + self.rows_memcpy_to_full, + self.inputcpy, + self.rows_inputcpy_to_full + ) + } +} + +#[derive(Debug, Default, Clone)] +pub struct Dma64AlignedInstances { + // memcpy: memcpy ==> mem ==> full + // memcmp: mem ==> full + // memset: memset ==> mem ==> full + // inputcpy: input_cpy ==> full + pub full: usize, + pub memcpy: usize, + pub inputcpy: usize, + pub mem: usize, + pub memset: usize, + pub rows_memcpy_to_mem: usize, + pub rows_memcpy_to_full: usize, + pub rows_inputcpy_to_full: usize, + pub rows_memset_to_mem: usize, + pub rows_memset_to_full: usize, + pub rows_memcmp_to_full: usize, +} + +impl fmt::Display for Dma64AlignedInstances { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + " full {:>3}\n \ + memcpy {:>3} {:>6} → mem {:>6} → full\n \ + inputcpy {:>3} {:>6} → full\n \ + mem {:>3}\n \ + memset {:>3} {:>6} → mem {:>6} → full\n \ + memcmp - {:>6} → full\n", + self.full, + self.memcpy, + self.rows_memcpy_to_mem, + self.rows_memcpy_to_full, + self.inputcpy, + self.rows_inputcpy_to_full, + self.mem, + self.memset, + self.rows_memset_to_mem, + self.rows_memset_to_full, + self.rows_memcmp_to_full + ) + } +} + +/// The `DmaStrategy` struct organizes execution plans for arithmetic instances and tables. +/// +/// It allows adding metadata about instances and tables and generates plans +/// based on the provided counters. +#[derive(Default)] +pub struct DmaStrategy { + pub dma: DmaInstances, + pub dma_pre_post: DmaInstances, + pub dma_64_aligned: Dma64AlignedInstances, + pub dma_unaligned: usize, + _marker: std::marker::PhantomData, +} + +impl fmt::Display for DmaStrategy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "────────────────────────────────────────── DMA\n\ + {}\ + ───────────────────────────────── DMA_PRE_POST\n\ + {}\ + ─────────────────────────────── DMA_64_ALIGNED\n\ + {}\ + ──────────────────────────────── DMA_UNALIGNED\n \ + full {:>3}\n\n", + self.dma, self.dma_pre_post, self.dma_64_aligned, self.dma_unaligned, + ) + } +} + +impl DmaStrategy { + /// Creates a new `DmaStrategy`. + /// + /// # Returns + /// A new `DmaStrategy` instance with no preconfigured instances or tables. + pub fn new() -> Self { + Self::default() + } + + // define_plan_for_field!(plan_dma_controller, DmaCounterInputGen, dma_ops); + // define_plan_for_field!(plan_dma_pre_post, DmaCounterInputGen, dma_pre_post_ops); + // define_plan_for_field!( + // plan_dma_unaligned, + // DmaCounterInputGen, + // dma_unaligned_rows, + // dma_unaligned_inputs + // ); + // define_plan_for_field!( + // plan_dma_64_aligned, + // DmaCounterInputGen, + // dma_64_aligned_rows, + // dma_64_aligned_inputs + // ); + + fn calculate_totals( + &self, + counters: &Vec<(ChunkId, Box)>, + ) -> DmaCounterInputGen { + let mut totals = DmaCounterInputGen::new(BusDeviceMode::Counter); + for (_, counter) in counters.iter() { + let counter = (**counter).as_any().downcast_ref::().unwrap(); + for i in 0..DMA_INPUT_GEN_COUNTERS { + totals.counters[i] += counter.counters[i]; + } + } + totals + } + + const DMA_ROWS: usize = DmaTrace::::NUM_ROWS; + const DMA_MEMCPY_ROWS: usize = DmaMemCpyTrace::::NUM_ROWS; + const DMA_INPUTCPY_ROWS: usize = DmaInputCpyTrace::::NUM_ROWS; + const DMA_PRE_POST_ROWS: usize = DmaPrePostTrace::::NUM_ROWS; + const DMA_PRE_POST_MEMCPY_ROWS: usize = DmaPrePostMemCpyTrace::::NUM_ROWS; + const DMA_PRE_POST_INPUTCPY_ROWS: usize = DmaPrePostInputCpyTrace::::NUM_ROWS; + const DMA_64_ALIGNED_ROWS: usize = Dma64AlignedTrace::::NUM_ROWS; + const DMA_64_ALIGNED_MEMCPY_ROWS: usize = Dma64AlignedMemCpyTrace::::NUM_ROWS; + const DMA_64_ALIGNED_MEMSET_ROWS: usize = Dma64AlignedMemSetTrace::::NUM_ROWS; + const DMA_64_ALIGNED_INPUTCPY_ROWS: usize = Dma64AlignedInputCpyTrace::::NUM_ROWS; + const DMA_64_ALIGNED_MEM_ROWS: usize = Dma64AlignedMemTrace::::NUM_ROWS; + const DMA_UNALIGNED_ROWS: usize = DmaUnalignedTrace::::NUM_ROWS; + // Dma + // DmaMemCpy + // DmaInputCpy + pub fn calculate_dma_strategy( + rows: &[usize], + rows_x_full_instance: usize, + rows_x_memcpy_instance: usize, + rows_x_inputcpy_instance: usize, + info: &mut DmaInstances, + ) { + let rows_full = rows[DMA_COUNTER_MEMSET] + rows[DMA_COUNTER_MEMCMP]; + let rows_memcpy = rows[DMA_COUNTER_MEMCPY]; + let rows_inputcpy = rows[DMA_COUNTER_INPUTCPY]; + + info.full = rows_full.div_ceil(rows_x_full_instance); + info.memcpy = rows_memcpy.div_ceil(rows_x_memcpy_instance); + info.inputcpy = rows_inputcpy.div_ceil(rows_x_inputcpy_instance); + + let remain_dma = rows_full % rows_x_full_instance; + let available_on_dma = if rows_full == 0 { 0 } else { rows_x_full_instance - remain_dma }; + let remain_dma_memcpy = rows_memcpy % rows_x_memcpy_instance; + let remain_dma_inputcpy = rows_inputcpy % rows_x_inputcpy_instance; + let remain = remain_dma_memcpy + remain_dma_inputcpy; + + if remain <= available_on_dma { + if remain_dma_memcpy > 0 { + info.memcpy -= 1; + info.rows_memcpy_to_full = remain_dma_memcpy; + } + if remain_dma_inputcpy > 0 { + info.inputcpy -= 1; + info.rows_inputcpy_to_full = remain_dma_inputcpy; + } + } else if remain_dma_memcpy <= available_on_dma { + if remain_dma_memcpy > 0 { + info.memcpy -= 1; + info.rows_memcpy_to_full = remain_dma_memcpy; + } + } else if remain_dma_inputcpy <= available_on_dma { + if remain_dma_inputcpy > 0 { + info.inputcpy -= 1; + info.rows_inputcpy_to_full = remain_dma_inputcpy; + } + } else if remain_dma_memcpy > 0 + && remain_dma_inputcpy > 0 + && remain <= (available_on_dma + rows_x_full_instance) + { + // COST(Dma) < COST(DmaMemCpy) + COST(DmaInputCpy) + info.memcpy -= 1; + info.inputcpy -= 1; + info.full += 1; + info.rows_memcpy_to_full = remain_dma_memcpy; + info.rows_inputcpy_to_full = remain_dma_inputcpy; + } + } + // DmaPrePost + // DmaPrePostMemCpy + // DmaPrePostInputCpy + // memcpy: memcpy ==> mem ==> full + // memcmp: mem ==> full + // memset: memset ==> mem ==> full + // inputcpy: input_cpy ==> full + + pub fn calculate_dma_64_alignment_strategy(rows: &[usize], info: &mut Dma64AlignedInstances) { + info.full = 0; + info.memcpy = rows[DMA_COUNTER_MEMCPY_8].div_ceil(Self::DMA_64_ALIGNED_MEMCPY_ROWS); + info.memset = rows[DMA_COUNTER_MEMSET_8].div_ceil(Self::DMA_64_ALIGNED_MEMSET_ROWS); + info.inputcpy = rows[DMA_COUNTER_INPUTCPY].div_ceil(Self::DMA_64_ALIGNED_INPUTCPY_ROWS); + + let rows_mem = rows[DMA_COUNTER_MEMCMP]; + info.mem = rows_mem.div_ceil(Self::DMA_64_ALIGNED_ROWS); + // TBO: To Be Optimized + info.rows_inputcpy_to_full = 0; + info.rows_memcpy_to_mem = 0; + info.rows_memcpy_to_full = 0; + info.rows_memset_to_mem = 0; + info.rows_memset_to_full = 0; + info.rows_memcmp_to_full = 0; + } + pub fn calculate_dma_unalignment_strategy(rows: &[usize]) -> usize { + let rows = rows[DMA_COUNTER_MEMCPY] + + rows[DMA_COUNTER_INPUTCPY] + + rows[DMA_COUNTER_MEMSET] + + rows[DMA_COUNTER_MEMCMP]; + rows.div_ceil(Self::DMA_UNALIGNED_ROWS) + } + fn calculate_strategy(&mut self, totals: &DmaCounterInputGen) { + Self::calculate_dma_strategy( + &totals.counters[DMA_OFFSET..DMA_OFFSET + DMA_COUNTER_OPS], + Self::DMA_ROWS, + Self::DMA_MEMCPY_ROWS, + Self::DMA_INPUTCPY_ROWS, + &mut self.dma, + ); + Self::calculate_dma_strategy( + &totals.counters[DMA_PRE_POST_OFFSET..DMA_PRE_POST_OFFSET + DMA_COUNTER_OPS], + Self::DMA_PRE_POST_ROWS, + Self::DMA_PRE_POST_MEMCPY_ROWS, + Self::DMA_PRE_POST_INPUTCPY_ROWS, + &mut self.dma_pre_post, + ); + Self::calculate_dma_64_alignment_strategy( + &totals.counters[DMA_64_ALIGNED_OFFSET..DMA_64_ALIGNED_OFFSET + DMA_COUNTER_OPS_EXT], + &mut self.dma_64_aligned, + ); + self.dma_unaligned = Self::calculate_dma_unalignment_strategy( + &totals.counters[DMA_UNALIGNED_OFFSET..DMA_UNALIGNED_OFFSET + DMA_COUNTER_OPS], + ); + } + // DmaUnaligned => + // Dma64Aligned => decision by chunk + // pub send_memcpy_to_mem: bool, + // pub send_memcpy_to_full: bool, + // pub send_inputcpy_to_full: bool, + // pub send_memset_to_mem: bool, + // pub send_memset_to_full: bool, + // pub send_mem_to_full: bool, + // + // pub rows_memcpy_to_full: usize, + // pub rows_inputcpy_to_full: usize, + + pub fn calculate( + &mut self, + counters: Vec<(ChunkId, Box)>, + ) -> Vec<(usize, Vec<(CheckPoint, DmaCheckPoint)>)> { + let totals: DmaCounterInputGen = self.calculate_totals(&counters); + #[cfg(feature = "save_dma_plans")] + let totals_debug_info = format!("{}", totals); + + self.calculate_strategy(&totals); + + let mut dma_pre_post_full = DmaInstancesBuilder::new( + "dma_pre_post_full", + self.dma_pre_post.full, + Self::DMA_PRE_POST_ROWS, + ); + let mut dma_pre_post_memcpy = DmaInstancesBuilder::new( + "dma_pre_post_memcpy", + self.dma_pre_post.memcpy, + Self::DMA_PRE_POST_MEMCPY_ROWS, + ); + let mut dma_pre_post_inputcpy = DmaInstancesBuilder::new( + "dma_pre_post_inputcpy", + self.dma_pre_post.inputcpy, + Self::DMA_PRE_POST_INPUTCPY_ROWS, + ); + + let mut dma_full = DmaInstancesBuilder::new("dma_full", self.dma.full, Self::DMA_ROWS); + let mut dma_memcpy = + DmaInstancesBuilder::new("dma_memcpy", self.dma.memcpy, Self::DMA_MEMCPY_ROWS); + let mut dma_inputcpy = + DmaInstancesBuilder::new("dma_inputcpy", self.dma.inputcpy, Self::DMA_INPUTCPY_ROWS); + + let mut dma_64_aligned_full = DmaInstancesBuilder::new( + "dma_64_aligned_full", + self.dma_64_aligned.full, + Self::DMA_64_ALIGNED_ROWS, + ); + let mut dma_64_aligned_memset = DmaInstancesBuilder::new( + "dma_64_aligned_memset", + self.dma_64_aligned.memset, + Self::DMA_64_ALIGNED_MEMSET_ROWS, + ); + let mut dma_64_aligned_memcpy = DmaInstancesBuilder::new( + "dma_64_aligned_memcpy", + self.dma_64_aligned.memcpy, + Self::DMA_64_ALIGNED_MEMCPY_ROWS, + ); + let mut dma_64_aligned_inputcpy = DmaInstancesBuilder::new( + "dma_64_aligned_inputcpy", + self.dma_64_aligned.inputcpy, + Self::DMA_64_ALIGNED_INPUTCPY_ROWS, + ); + let mut dma_64_aligned_mem = DmaInstancesBuilder::new( + "dma_64_aligned_mem", + self.dma_64_aligned.mem, + Self::DMA_64_ALIGNED_MEM_ROWS, + ); + + let mut dma_unaligned = + DmaInstancesBuilder::new("dma_unaligned", self.dma_unaligned, Self::DMA_UNALIGNED_ROWS); + + for (current_chunk, dyn_counter) in counters.iter() { + let counters = + (**dyn_counter).as_any().downcast_ref::().unwrap().counters; + + // DMA + + let rows = counters[DMA_OFFSET + DMA_COUNTER_MEMSET]; + if rows > 0 { + dma_full.add_op_rows(*current_chunk, 0, rows, rows, DMA_COUNTER_MEMSET); + } + + let rows = counters[DMA_OFFSET + DMA_COUNTER_MEMCMP]; + if rows > 0 { + dma_full.add_op_rows(*current_chunk, 0, rows, rows, DMA_COUNTER_MEMCMP); + } + + let mut rows = counters[DMA_OFFSET + DMA_COUNTER_MEMCPY]; + let skip = if rows > 0 && self.dma.rows_memcpy_to_full > 0 { + let rows_applicable = std::cmp::min(rows, self.dma.rows_memcpy_to_full); + dma_full.add_op_rows( + *current_chunk, + 0, + rows_applicable, + rows_applicable, + DMA_COUNTER_MEMCPY, + ); + rows -= rows_applicable; + self.dma.rows_memcpy_to_full -= rows_applicable; + rows_applicable + } else { + 0 + }; + if rows > 0 { + dma_memcpy.add_op_rows(*current_chunk, skip, rows, rows, DMA_COUNTER_MEMCPY); + } + + let mut rows = counters[DMA_OFFSET + DMA_COUNTER_INPUTCPY]; + let skip = if self.dma.rows_inputcpy_to_full > 0 { + let rows_applicable = std::cmp::min(rows, self.dma.rows_inputcpy_to_full); + dma_full.add_op_rows( + *current_chunk, + 0, + rows_applicable, + rows_applicable, + DMA_COUNTER_INPUTCPY, + ); + rows -= rows_applicable; + self.dma.rows_inputcpy_to_full -= rows_applicable; + rows_applicable + } else { + 0 + }; + if rows > 0 { + dma_inputcpy.add_op_rows(*current_chunk, skip, rows, rows, DMA_COUNTER_INPUTCPY); + } + + // DMA_PRE_POST + + let rows = counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_MEMSET]; + if rows > 0 { + dma_pre_post_full.add_op_rows(*current_chunk, 0, rows, rows, DMA_COUNTER_MEMSET); + } + + let rows = counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_MEMCMP]; + if rows > 0 { + dma_pre_post_full.add_op_rows(*current_chunk, 0, rows, rows, DMA_COUNTER_MEMCMP); + } + + let mut rows = counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_MEMCPY]; + let skip = if rows > 0 && self.dma_pre_post.rows_memcpy_to_full > 0 { + let rows_applicable = std::cmp::min(rows, self.dma_pre_post.rows_memcpy_to_full); + dma_pre_post_full.add_op_rows( + *current_chunk, + 0, + rows_applicable, + rows_applicable, + DMA_COUNTER_MEMCPY, + ); + rows -= rows_applicable; + self.dma_pre_post.rows_memcpy_to_full -= rows_applicable; + rows_applicable + } else { + 0 + }; + if rows > 0 { + dma_pre_post_memcpy.add_op_rows( + *current_chunk, + skip, + rows, + rows, + DMA_COUNTER_MEMCPY, + ); + } + + let mut rows = counters[DMA_PRE_POST_OFFSET + DMA_COUNTER_INPUTCPY]; + let skip = if self.dma_pre_post.rows_inputcpy_to_full > 0 { + let rows_applicable = std::cmp::min(rows, self.dma_pre_post.rows_inputcpy_to_full); + dma_pre_post_full.add_op_rows( + *current_chunk, + 0, + rows_applicable, + rows_applicable, + DMA_COUNTER_INPUTCPY, + ); + rows -= rows_applicable; + self.dma_pre_post.rows_inputcpy_to_full -= rows_applicable; + rows_applicable + } else { + 0 + }; + if rows > 0 { + dma_pre_post_inputcpy.add_op_rows( + *current_chunk, + skip, + rows, + rows, + DMA_COUNTER_INPUTCPY, + ); + } + + // DMA_64_ALIGNED + + for op in 0..DMA_COUNTER_OPS { + let inputs = counters[DMA_64_ALIGNED_INPUTS_OFFSET + op]; + match op { + DMA_COUNTER_INPUTCPY => dma_64_aligned_inputcpy.add_op_rows( + *current_chunk, + 0, + counters[DMA_64_ALIGNED_OFFSET + op], + inputs, + op, + ), + DMA_COUNTER_MEMSET => dma_64_aligned_memset.add_op_rows( + *current_chunk, + 0, + counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_MEMSET_8], + inputs, + op, + ), + DMA_COUNTER_MEMCMP => dma_64_aligned_mem.add_op_rows( + *current_chunk, + 0, + counters[DMA_64_ALIGNED_OFFSET + op], + inputs, + op, + ), + DMA_COUNTER_MEMCPY => dma_64_aligned_memcpy.add_op_rows( + *current_chunk, + 0, + counters[DMA_64_ALIGNED_OFFSET + DMA_COUNTER_MEMCPY_8], + inputs, + op, + ), + _ => panic!("Unexpected op code {op} in DMA 64 aligned counters"), + }; + } + + // DMA_UNALIGNED + + for op in 0..DMA_COUNTER_OPS { + let rows = counters[DMA_UNALIGNED_OFFSET + op]; + let inputs = counters[DMA_UNALIGNED_INPUTS_OFFSET + op]; + if rows > 0 { + dma_unaligned.add_op_rows(*current_chunk, 0, rows, inputs, op); + } + } + + // println!("chunk {current_chunk} counter: {counters:?}"); + } + + let plans = vec![ + (DmaTrace::::AIR_ID, dma_full.get_plan()), + (DmaMemCpyTrace::::AIR_ID, dma_memcpy.get_plan()), + (DmaInputCpyTrace::::AIR_ID, dma_inputcpy.get_plan()), + (DmaPrePostTrace::::AIR_ID, dma_pre_post_full.get_plan()), + (DmaPrePostMemCpyTrace::::AIR_ID, dma_pre_post_memcpy.get_plan()), + (DmaPrePostInputCpyTrace::::AIR_ID, dma_pre_post_inputcpy.get_plan()), + (Dma64AlignedTrace::::AIR_ID, dma_64_aligned_full.get_plan()), + (Dma64AlignedMemSetTrace::::AIR_ID, dma_64_aligned_memset.get_plan()), + (Dma64AlignedMemCpyTrace::::AIR_ID, dma_64_aligned_memcpy.get_plan()), + (Dma64AlignedInputCpyTrace::::AIR_ID, dma_64_aligned_inputcpy.get_plan()), + (Dma64AlignedMemTrace::::AIR_ID, dma_64_aligned_mem.get_plan()), + (DmaUnalignedTrace::::AIR_ID, dma_unaligned.get_plan()), + ]; + + #[cfg(feature = "save_dma_plans")] + self.save_plans("dma_plans.txt", totals_debug_info, &plans).unwrap(); + + plans + } + + #[cfg(feature = "save_dma_plans")] + fn save_plans( + &self, + filename: &str, + totals_debug_info: String, + plans: &Vec<(usize, Vec<(CheckPoint, DmaCheckPoint)>)>, + ) -> std::io::Result<()> { + let mut debug_info = format!( + "───────────────────────────────────────────────────── TOTALS\n{}\n{}", + totals_debug_info, self + ); + for (air_id, plan) in plans { + if plan.is_empty() { + continue; + } + let title = &format!("{}", get_dma_air_name::(*air_id)); + debug_info += &plan + .iter() + .enumerate() + .map(|(segment_id, (_checkpoint, dma_checkpoint))| { + dma_checkpoint.get_debug_info(title, segment_id as u64) + }) + .collect::>() + .join("\n"); + debug_info += "\n"; + } + use std::fs; + + let path = std::env::var("DEBUG_OUTPUT_PATH").unwrap_or_else(|_| "tmp/".to_string()); + let full_path = format!("{}{}", path, filename); + + fs::write(&full_path, debug_info)?; + Ok(()) + } +} diff --git a/precompiles/dma/src/dma_unaligned/dma_unaligned.rs b/precompiles/dma/src/dma_unaligned/dma_unaligned.rs new file mode 100644 index 000000000..b01a6759a --- /dev/null +++ b/precompiles/dma/src/dma_unaligned/dma_unaligned.rs @@ -0,0 +1,300 @@ +use std::sync::Arc; + +use fields::PrimeField64; + +use crate::{dma_trace, DmaUnalignedInput}; +use pil_std_lib::Std; +use precompiles_helpers::DmaInfo; +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +use zisk_common::SegmentId; +use zisk_pil::{DmaUnalignedAirValues, DUAL_RANGE_BYTE_ID}; + +#[cfg(feature = "packed")] +pub use zisk_pil::{DmaUnalignedTracePacked, DmaUnalignedTraceRowPacked}; + +#[cfg(not(feature = "packed"))] +pub use zisk_pil::{DmaUnalignedTrace, DmaUnalignedTraceRow}; + +#[cfg(feature = "packed")] +type DmaUnalignedTraceRowType = DmaUnalignedTraceRowPacked; +#[cfg(feature = "packed")] +type DmaUnalignedTraceType = DmaUnalignedTracePacked; + +#[cfg(not(feature = "packed"))] +type DmaUnalignedTraceRowType = DmaUnalignedTraceRow; +#[cfg(not(feature = "packed"))] +type DmaUnalignedTraceType = DmaUnalignedTrace; + +pub struct DmaUnalignedPrevSegment { + pub seq_end: bool, + pub dst64: u32, + pub src64: u32, + pub src_offset: u8, + pub main_step: u64, + pub count: u32, + pub is_mem_eq: bool, +} + +/// The `DmaUnalignedSM` struct encapsulates the logic of the DmaUnaligned State Machine. +pub struct DmaUnalignedSM { + /// Reference to the PIL2 standard library. + pub std: Arc>, + + /// Range checks ID's + range_16_bits_id: usize, + dual_range_byte_id: usize, +} + +impl DmaUnalignedSM { + /// Creates a new Dma State Machine instance. + /// + /// # Returns + /// A new `DmaUnalignedSM` instance. + pub fn new(std: Arc>) -> Arc { + Arc::new(Self { + std: std.clone(), + dual_range_byte_id: std + .get_virtual_table_id(DUAL_RANGE_BYTE_ID) + .expect("Failed to get tabl eDUAL_RANGE_BYTE ID ID"), + range_16_bits_id: std + .get_range_id(0, 0xFFFF, None) + .expect("Failed to get 16b table ID"), + }) + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_input( + &self, + input: &DmaUnalignedInput, + trace: &mut [DmaUnalignedTraceRowType], + local_dual_byte_table: &mut [u64], + air_values: &mut DmaUnalignedAirValues, + ) -> usize { + let rows = input.count as usize; + let is_last_instance_input = rows >= trace.len(); + let initial_count = DmaInfo::get_loop_count(input.encoded) - input.skip as usize; + let mut count = initial_count; + let src_offset = DmaInfo::get_loop_src_offset(input.encoded); + let mut dst64 = (input.dst >> 3) + input.skip; + let mut src64 = (input.src >> 3) + input.skip; + + let mut src_values_index = 0; + + let mut seq_end = false; + let mut next_value = 0; + assert!(rows > 0); + for (irow, row) in trace.iter_mut().enumerate().take(rows) { + row.set_main_step(input.step); + row.set_is_memeq(input.is_mem_eq); + row.set_no_last_no_seq_end(count != 0); + row.set_previous_seq_end(input.skip == 0 && irow == 0); + + row.set_dst64(dst64); + row.set_src64(src64); + dst64 += 1; + src64 += 1; + + row.set_offset_2(src_offset == 2); + row.set_offset_3(src_offset == 3); + row.set_offset_4(src_offset == 4); + row.set_offset_5(src_offset == 5); + row.set_offset_6(src_offset == 6); + row.set_offset_7(src_offset == 7); + + row.set_count(count as u32); + // println!("DMA_UNALIGNED: trace[{irow}] count:{count}"); + row.set_seq_end(count == 0); + + let value = input.src_values[src_values_index]; + src_values_index += 1; + if count == 0 { + seq_end = true; + next_value = 0; + } else { + count -= 1; + if src_values_index >= input.src_values.len() { + println!( + "DMA_UNALIGNED INPUT src_values_index out of bounds {} / {} count:{count} irow:{irow} INPUT:{:?}", + src_values_index, + input.src_values.len(), + input + ); + } + next_value = input.src_values[src_values_index]; + }; + + row.set_read_bytes(0, value as u8); + row.set_read_bytes(1, (value >> 8) as u8); + row.set_read_bytes(2, (value >> 16) as u8); + row.set_read_bytes(3, (value >> 24) as u8); + row.set_read_bytes(4, (value >> 32) as u8); + row.set_read_bytes(5, (value >> 40) as u8); + row.set_read_bytes(6, (value >> 48) as u8); + row.set_read_bytes(7, (value >> 56) as u8); + + // row.set_write_value(0, write_value as u32); + // row.set_write_value(1, (write_value >> 32) as u32); + + let value = value as usize; + local_dual_byte_table[value & 0xFFFF] += 1; + local_dual_byte_table[(value >> 16) & 0xFFFF] += 1; + local_dual_byte_table[(value >> 32) & 0xFFFF] += 1; + local_dual_byte_table[(value >> 48) & 0xFFFF] += 1; + } + + if is_last_instance_input { + if seq_end { + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count = F::ZERO; + air_values.segment_last_count = F::ZERO; + air_values.segment_last_offset = F::ZERO; + air_values.last_count_chunk[0] = F::ZERO; + air_values.last_count_chunk[1] = F::ZERO; + air_values.segment_last_is_memeq = F::ZERO; + air_values.segment_next_bytes = [F::ZERO; 8]; + } else { + let last_row = rows - 1; + air_values.segment_last_seq_end = F::ZERO; + air_values.segment_last_src64 = F::from_u32(trace[last_row].get_src64()); + air_values.segment_last_dst64 = F::from_u32(trace[last_row].get_dst64()); + air_values.segment_last_main_step = F::from_u64(trace[last_row].get_main_step()); + air_values.segment_last_count = F::from_u32(trace[last_row].get_count()); + air_values.segment_last_offset = F::from_u8(src_offset); + let count = trace[last_row].get_count(); + air_values.last_count_chunk[0] = F::from_u16(count as u16); + air_values.last_count_chunk[1] = F::from_u16((count >> 16) as u16); + air_values.segment_last_is_memeq = F::from_bool(trace[last_row].get_is_memeq()); + for (index, byte) in air_values.segment_next_bytes.iter_mut().enumerate() { + *byte = F::from_u8((next_value >> (index * 8)) as u8); + } + } + } + rows + } + + /// Processes a slice of operation data, updating the trace. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Dma trace. + /// * `input` - The operation data to process. + #[inline(always)] + pub fn process_empty_slice(&self, trace: &mut DmaUnalignedTraceRowType) { + trace.set_seq_end(true); + trace.set_previous_seq_end(true); + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + pub fn compute_witness( + &self, + inputs: &[Vec], + segment_id: SegmentId, + is_last_segment: bool, + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut trace = DmaUnalignedTraceType::::new_from_vec_zeroes(trace_buffer)?; + let num_rows = trace.num_rows(); + + let total_inputs: usize = inputs + .iter() + .map(|inputs| inputs.iter().map(|input| input.count as usize).sum::()) + .sum(); + + assert!(total_inputs <= num_rows, "total_inputs({total_inputs}) > num_rows({num_rows})"); + assert!(total_inputs > 0); + + dma_trace("DmaUnaligned", total_inputs, num_rows); + + timer_start_trace!(DMA_UNALIGNED_TRACE); + + let flat_inputs = crate::flatten_and_reorder_inputs(inputs); + // Split the dma_trace.buffer into slices matching each inner vector’s length. + let trace_rows = trace.buffer.as_mut_slice(); + + // TODO: add std method to used short table, no sense with instances around 2^22 use 64 bits, need more space. + let mut local_dual_byte_table = vec![0u64; 1 << 16]; + let mut air_values = DmaUnalignedAirValues::::new(); + let mut row_offset = 0; + for input in flat_inputs.iter() { + let rows_used = self.process_input( + input, + &mut trace_rows[row_offset..], + &mut local_dual_byte_table, + &mut air_values, + ); + row_offset += rows_used; + } + + let padding_size = num_rows - row_offset; + let last_count = if padding_size == 0 && !trace_rows[num_rows - 1].get_seq_end() { + trace_rows[num_rows - 1].get_count() + } else { + 0 + }; + self.std.range_check(self.range_16_bits_id, (last_count & 0xFFFF) as i64, 1); + self.std.range_check(self.range_16_bits_id, ((last_count >> 16) & 0xFFFF) as i64, 1); + + local_dual_byte_table[0] += (padding_size * 4) as u64; + self.std.inc_virtual_rows_ranged(self.dual_range_byte_id, &local_dual_byte_table); + + air_values.segment_id = F::from_usize(segment_id.into()); + air_values.is_last_segment = F::from_bool(is_last_segment); + + let first_input = flat_inputs.first().unwrap(); + if first_input.skip == 0 { + air_values.segment_previous_seq_end = F::ONE; + air_values.segment_previous_dst64 = F::ZERO; + air_values.segment_previous_src64 = F::ZERO; + air_values.segment_previous_main_step = F::ZERO; + air_values.segment_previous_count = F::ZERO; + air_values.segment_previous_is_memeq = F::ZERO; + air_values.segment_previous_offset = F::ZERO; + air_values.segment_first_bytes = [F::ZERO; 8]; + } else { + air_values.segment_previous_seq_end = F::ZERO; + air_values.segment_previous_dst64 = F::from_u32(trace_rows[0].get_dst64() - 1); + air_values.segment_previous_src64 = F::from_u32(trace_rows[0].get_src64() - 1); + air_values.segment_previous_main_step = F::from_u64(trace_rows[0].get_main_step()); + air_values.segment_previous_count = F::from_u32(trace_rows[0].get_count() + 1); + air_values.segment_previous_is_memeq = F::from_bool(trace_rows[0].get_is_memeq()); + air_values.segment_previous_offset = + F::from_u8(DmaInfo::get_loop_src_offset(first_input.encoded)); + for (index, byte) in air_values.segment_first_bytes.iter_mut().enumerate() { + *byte = F::from_u8(trace_rows[0].get_read_bytes(index)); + } + } + + // padding + if padding_size > 0 { + air_values.padding_size = F::from_u32(padding_size as u32); + for row in trace_rows.iter_mut().take(num_rows).skip(row_offset) { + self.process_empty_slice(row); + } + air_values.segment_last_seq_end = F::ONE; + air_values.segment_last_src64 = F::ZERO; + air_values.segment_last_dst64 = F::ZERO; + air_values.segment_last_main_step = F::ZERO; + air_values.segment_last_count = F::ZERO; + air_values.segment_last_is_memeq = F::ZERO; + air_values.segment_next_bytes = [F::ZERO; 8]; + } + timer_stop_and_log_trace!(DMA_UNALIGNED_TRACE); + let from_trace = FromTrace::new(&mut trace).with_air_values(&mut air_values); + Ok(AirInstance::new_from_trace(from_trace)) + } +} diff --git a/precompiles/dma/src/dma_unaligned/dma_unaligned_collector.rs b/precompiles/dma/src/dma_unaligned/dma_unaligned_collector.rs new file mode 100644 index 000000000..810c74726 --- /dev/null +++ b/precompiles/dma/src/dma_unaligned/dma_unaligned_collector.rs @@ -0,0 +1,194 @@ +//! The `DmaUnalignedInstance` module defines an instance to perform the witness computation +//! for the Dma State Machine. +//! +//! It manages collected inputs and interacts with the `DmaSM` to compute witnesses for +//! execution plans. + +use crate::{DmaCollectCounters, DmaCollectorRoutingLog, DmaInputPosition, DmaUnalignedInput}; +use std::any::Any; +use zisk_common::{BusDevice, BusId, ChunkId, OP, OPERATION_BUS_ID, OP_TYPE}; +use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; + +pub struct DmaUnalignedCollector { + /// Collected inputs for witness computation. + pub inputs: Vec, + pub last_input_index: Option, + + pub chunk_id: ChunkId, + + /// Routing log for debugging and tracking collection operations. + pub rlog: DmaCollectorRoutingLog, + + /// The number of operations to collect. + pub num_inputs: u64, + + /// Helper to skip instructions based on the plan's configuration. + pub collect_counters: DmaCollectCounters, + + pub trace_offset: usize, + pub last_segment_collector: bool, +} + +impl DmaUnalignedCollector { + /// Creates a new `DmaUnalignedCollector`. + /// + /// # Arguments + /// + /// * `bus_id` - The connected bus ID. + /// * `num_inputs` - The number of inputs to collect. + /// * `collect_counter` - The helper to skip instructions based on the plan's configuration. + /// + /// # Returns + /// A new `DmaUnalignedCollector` instance initialized with the provided parameters. + pub fn new( + chunk_id: zisk_common::ChunkId, + num_inputs: u64, + collect_counters: DmaCollectCounters, + last_segment_collector: bool, + ) -> Self { + Self { + inputs: Vec::with_capacity(num_inputs as usize), + num_inputs, + collect_counters, + trace_offset: 0, + last_segment_collector, + chunk_id, + rlog: DmaCollectorRoutingLog::new(chunk_id), + last_input_index: None, + } + } + + const DMA_UNALIGNED_OPS: [u8; 4] = + [ZiskOp::DMA_MEMCPY, ZiskOp::DMA_XMEMCPY, ZiskOp::DMA_MEMCMP, ZiskOp::DMA_XMEMCMP]; + + /// Processes data received on the bus, collecting the inputs necessary for witness computation. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `data` - The data received from the bus. + /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// + /// # Returns + /// A tuple where: + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64], data_ext: &[u64]) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] != ZiskOperationType::Dma as u64 { + return true; + } + + // Method get_count get the rows that applies, means that if a + // input has src, dst aligned not applies + let rows = DmaUnalignedInput::get_count(data) as u64; + if rows == 0 { + return true; + } + + let op = data[OP] as u8; + + if !Self::DMA_UNALIGNED_OPS.contains(&op) { + return true; + } + + if self.inputs.len() == self.num_inputs as usize { + self.collect_counters.debug_assert_is_final_skip(); + return self.rlog.log_discard_cond(false, 3, data, true); + } + + if let Some((skip, max_count)) = self.collect_counters.should_collect(rows, op) { + self.rlog.log_collect(rows as u32, data, skip, max_count); + self.add_input(DmaUnalignedInput::from( + data, + data_ext, + self.trace_offset, + skip as usize, + max_count as usize, + )); + + self.trace_offset += max_count as usize; + if self.inputs.len() >= self.num_inputs as usize { + self.collect_counters.debug_assert_is_final_skip(); + self.rlog.log_discard(10, data); + return false; + } + } else { + self.rlog.log_discard(11, data); + } + true + } + + /// Adds an input to the collector with proper ordering management. + /// + /// This method handles: + /// - Adding the input to the vector + /// - Managing inputs that must be first (swaps to position 0) + /// - Tracking inputs that must be last (stores index for later swap) + /// + /// # Arguments + /// * `input` - The input to add + #[inline(always)] + fn add_input(&mut self, input: DmaUnalignedInput) { + // Check if input must be first before pushing + let must_be_first = input.must_be_first(); + let must_be_last = input.must_be_last(); + let current_index = self.inputs.len(); + + // Push the input + self.inputs.push(input); + + // Handle ordering requirements + if must_be_first { + // Swap with position 0 if not already first + if current_index > 0 { + self.inputs.swap(0, current_index); + } + } else if must_be_last { + // Edge case: if an input is huge and it's both first and last, + // must_be_first takes precedence and this branch won't execute + assert!(self.last_input_index.is_none(), "Multiple inputs marked as last input"); + self.last_input_index = Some(current_index); + } + } + + /// Returns debug information about the collector's state. + /// + /// When the `save_dma_collectors` feature is enabled, this returns detailed information + /// including chunk ID, number of collected inputs, counter information, and routing log. + /// Otherwise, returns an empty string. + /// + /// # Returns + /// A formatted string with debug information. + pub fn get_debug_info(&self) -> String { + #[cfg(feature = "save_dma_collectors")] + return format!( + "CC|{}|{}|{}\n", + self.chunk_id, + self.inputs.len(), + self.collect_counters.get_debug_info(), + ) + &self.rlog.get_debug_info(); + #[cfg(not(feature = "save_dma_collectors"))] + String::new() + } + pub fn take_inputs(&mut self) -> Vec { + if let Some(last_index) = self.last_input_index { + // If there's a last input index, swap it with the last element to ensure it's the last one in the trace. + let current_last_index = self.inputs.len() - 1; + self.inputs.swap(last_index, current_last_index); + } + std::mem::take(&mut self.inputs) + } + pub fn take_debug_inputs(&mut self) -> (String, Vec) { + let debug_info = self.get_debug_info(); + let inputs = self.take_inputs(); + (debug_info, inputs) + } +} + +impl BusDevice for DmaUnalignedCollector { + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/dma/src/dma_unaligned/dma_unaligned_input.rs b/precompiles/dma/src/dma_unaligned/dma_unaligned_input.rs new file mode 100644 index 000000000..5a6e8dc79 --- /dev/null +++ b/precompiles/dma/src/dma_unaligned/dma_unaligned_input.rs @@ -0,0 +1,144 @@ +use precompiles_helpers::DmaInfo; +use zisk_common::{A, B, DMA_ENCODED, OP, STEP}; +use zisk_core::zisk_ops::ZiskOp; + +#[derive(Debug)] +pub struct DmaUnalignedInput { + pub src: u32, + pub dst: u32, + pub is_last_instance_input: bool, + pub is_mem_eq: bool, + pub trace_offset: u32, // offset inside trace to paralelize + pub skip: u32, // inside input how many rows skip + pub count: u32, // number of rows used + pub step: u64, + pub encoded: u64, + pub src_values: Vec, +} + +impl DmaUnalignedInput { + pub fn get_count(data: &[u64]) -> usize { + let encoded = data[DMA_ENCODED]; + if DmaInfo::get_dst_offset(encoded) == DmaInfo::get_src_offset(encoded) { + 0 + } else { + let count = DmaInfo::get_loop_count(encoded); + if count > 0 { + count + 1 + } else { + 0 + } + } + } + pub fn get_last_count(&self) -> usize { + let rows = self.count as usize; + let initial_count = self.get_initial_count(); + initial_count - rows + 1 + } + pub fn get_initial_count(&self) -> usize { + DmaInfo::get_count(self.encoded) - self.skip as usize + } + pub fn from( + data: &[u64], + data_ext: &[u64], + trace_offset: usize, + skip: usize, + max_count: usize, + ) -> Self { + let encoded = data[DMA_ENCODED]; + let op = data[OP] as u8; + debug_assert!( + op == ZiskOp::DMA_MEMCPY + || op == ZiskOp::DMA_XMEMCPY + || op == ZiskOp::DMA_MEMCMP + || op == ZiskOp::DMA_XMEMCMP, + "Unexpected operation on DmaUnalignedInput 0x{op:02X}", + ); + let pre_count = DmaInfo::get_pre_count(encoded) as u32; + let data_offset = DmaInfo::get_loop_data_offset(encoded) + skip; + + // unaligned need an extra row to read part of next bytes + let pending_count = DmaInfo::get_loop_count(encoded) + 1 - skip; + let count: usize = std::cmp::min(pending_count, max_count); + + // if count not enough to finish unaligned memcpy, add extra source because one row + // use next source value + let src_values_count = if count < pending_count { count + 1 } else { count }; + let op = data[OP] as u8; + assert!(DmaInfo::get_loop_count(encoded) > 0); + Self { + dst: data[A] as u32 + pre_count, + src: data[B] as u32 + DmaInfo::get_src64_inc_by_pre(encoded) as u32 * 8, + trace_offset: trace_offset as u32, + is_last_instance_input: max_count < pending_count, + step: data[STEP], + skip: skip as u32, + count: count as u32, + encoded, + is_mem_eq: op == ZiskOp::DMA_MEMCMP || op == ZiskOp::DMA_XMEMCMP, + src_values: data_ext[data_offset..data_offset + src_values_count].to_vec(), + } + } + pub fn get_rows(&self) -> usize { + DmaInfo::get_loop_count(self.encoded) + } + + #[cfg(feature = "save_dma_inputs")] + /// Writes a list of DmaUnalignedInput instances to a text file with columns separated by |. + /// Path is taken from DEBUG_OUTPUT_PATH environment variable, defaulting to "tmp/". + pub fn dump_to_file(inputs: &[Vec], filename: &str) -> std::io::Result<()> { + use std::io::Write; + let path = std::env::var("DEBUG_OUTPUT_PATH").unwrap_or_else(|_| "tmp/".to_string()); + let full_path = format!("{}{}", path, filename); + + let mut file = std::fs::File::create(&full_path)?; + + // Write header + writeln!( + file, + "{:>8}|{:>10}|{:>10}|{:>22}|{:>9}|{:>12}|{:>8}|{:>8}|{:>14}|{:>18}|src_values", + "pos", + "src", + "dst", + "is_last_instance_input", + "is_mem_eq", + "trace_offset", + "skip", + "count", + "step", + "encoded" + )?; + + // Write data rows + for (pos, input) in inputs.iter().flatten().enumerate() { + let src_values_hex: Vec = + input.src_values.iter().map(|v| format!("0x{:016X}", v)).collect(); + writeln!( + file, + "{:>8}|0x{:08X}|0x{:08X}|{:>22}|{:>9}|{:>12}|{:>8}|{:>8}|{:>14}|0x{:016X}|{}", + pos, + input.src, + input.dst, + input.is_last_instance_input, + input.is_mem_eq, + input.trace_offset, + input.skip, + input.count, + input.step, + input.encoded, + src_values_hex.join(",") + )?; + } + + Ok(()) + } +} + +impl crate::DmaInputPosition for DmaUnalignedInput { + fn must_be_first(&self) -> bool { + self.skip > 0 + } + fn must_be_last(&self) -> bool { + self.is_last_instance_input + } +} diff --git a/precompiles/dma/src/dma_unaligned/dma_unaligned_instance.rs b/precompiles/dma/src/dma_unaligned/dma_unaligned_instance.rs new file mode 100644 index 000000000..611b49956 --- /dev/null +++ b/precompiles/dma/src/dma_unaligned/dma_unaligned_instance.rs @@ -0,0 +1,169 @@ +//! The `DmaUnalignedInstance` module defines an instance to perform the witness computation +//! for the Dma State Machine. +//! +//! It manages collected inputs and interacts with the `DmaSM` to compute witnesses for +//! execution plans. + +#[cfg(feature = "save_dma_collectors")] +use crate::save_dma_collectors; +#[cfg(feature = "save_dma_inputs")] +use crate::DmaUnalignedInput; +use crate::{DmaCheckPoint, DmaUnalignedCollector, DmaUnalignedSM}; +use fields::PrimeField64; +use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; +use std::sync::Arc; +use zisk_common::ChunkId; +use zisk_common::StatsType; +use zisk_common::{BusDevice, CheckPoint, Instance, InstanceCtx, InstanceType, PayloadType}; +use zisk_pil::DmaUnalignedTrace; + +/// The `DmaUnalignedInstance` struct represents an instance for the Dma State Machine. +/// +/// It encapsulates the `DmaUnalignedSM` and its associated context, and it processes input data +/// to compute witnesses for the DmaUnaligned State Machine. +pub struct DmaUnalignedInstance { + /// Dma state machine. + dma_unaligned_sm: Arc>, + + /// Instance context. + ictx: InstanceCtx, + + /// Flag to define that it's last segment + is_last_segment: bool, +} + +impl DmaUnalignedInstance { + /// Creates a new `DmaUnalignedInstance`. + /// + /// # Arguments + /// * `dma_unaligned_sm` - An `Arc`-wrapped reference to the Dma Unaligned State Machine. + /// * `ictx` - The `InstanceCtx` associated with this instance, containing the execution plan. + /// * `bus_id` - The bus ID associated with this instance. + /// + /// # Returns + /// A new `DmaUnalignedInstance` instance initialized with the provided state machine and + /// context. + pub fn new(dma_unaligned_sm: Arc>, ictx: InstanceCtx) -> Self { + let is_last_segment = { + let meta = ictx.plan.meta.as_ref().unwrap(); + let checkpoint = meta.downcast_ref::().unwrap(); + checkpoint.is_last_segment + }; + Self { dma_unaligned_sm, ictx, is_last_segment } + } + + pub fn build_dma_collector(&self, chunk_id: ChunkId) -> DmaUnalignedCollector { + assert_eq!( + self.ictx.plan.air_id, + DmaUnalignedTrace::::AIR_ID, + "DmaUnalignedInstance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::().unwrap(); + let (num_inputs, collect_counter) = collect_info.chunks[&chunk_id]; + DmaUnalignedCollector::new( + chunk_id, + num_inputs, + collect_counter, + Some(chunk_id) == collect_info.last_chunk, + ) + } +} + +impl Instance for DmaUnalignedInstance { + /// Computes the witness for the Dma execution plan. + /// + /// This method leverages the `DmaUnalignedSM` to generate an `AirInstance` using the collected + /// inputs. + /// + /// # Arguments + /// * `_pctx` - The proof context, unused in this implementation. + /// + /// # Returns + /// An `Option` containing the computed `AirInstance`. + fn compute_witness( + &self, + _pctx: &ProofCtx, + _sctx: &SetupCtx, + collectors: Vec<(usize, Box>)>, + trace_buffer: Vec, + ) -> ProofmanResult>> { + #[cfg(feature = "save_dma_collectors")] + let (debug, inputs): (Vec<_>, Vec<_>) = collectors + .into_iter() + .map(|(_, collector)| { + collector.as_any().downcast::().unwrap().take_debug_inputs() + }) + .unzip(); + #[cfg(not(feature = "save_dma_collectors"))] + let inputs: Vec<_> = collectors + .into_iter() + .map(|(_, collector)| { + collector.as_any().downcast::().unwrap().take_inputs() + }) + .collect(); + + let segment_id = self.ictx.plan.segment_id.unwrap(); + + #[cfg(feature = "save_dma_collectors")] + save_dma_collectors(&format!("dma_unaligned_collector_{segment_id:04}.txt"), debug)?; + + #[cfg(feature = "save_dma_inputs")] + DmaUnalignedInput::dump_to_file( + &inputs, + &format!("dma_unaligned_inputs_{segment_id:04}.txt"), + )?; + + Ok(Some(self.dma_unaligned_sm.compute_witness( + &inputs, + segment_id, + self.is_last_segment, + trace_buffer, + )?)) + } + + /// Retrieves the checkpoint associated with this instance. + /// + /// # Returns + /// A `CheckPoint` object representing the checkpoint of the execution plan. + fn check_point(&self) -> &CheckPoint { + &self.ictx.plan.check_point + } + + /// Retrieves the type of this instance. + /// + /// # Returns + /// An `InstanceType` representing the type of this instance (`InstanceType::Instance`). + fn instance_type(&self) -> InstanceType { + InstanceType::Instance + } + + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { + assert_eq!( + self.ictx.plan.air_id, + DmaUnalignedTrace::::AIR_ID, + "DmaUnalignedInstance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::().unwrap(); + let (num_inputs, collect_counter) = collect_info.chunks[&chunk_id]; + Some(Box::new(DmaUnalignedCollector::new( + chunk_id, + num_inputs, + collect_counter, + Some(chunk_id) == collect_info.last_chunk, + ))) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} diff --git a/precompiles/dma/src/dma_unaligned/mod.rs b/precompiles/dma/src/dma_unaligned/mod.rs new file mode 100644 index 000000000..4949c3c18 --- /dev/null +++ b/precompiles/dma/src/dma_unaligned/mod.rs @@ -0,0 +1,10 @@ +#[allow(clippy::module_inception)] +mod dma_unaligned; +mod dma_unaligned_collector; +mod dma_unaligned_input; +mod dma_unaligned_instance; + +pub use dma_unaligned::*; +pub use dma_unaligned_collector::*; +pub use dma_unaligned_input::*; +pub use dma_unaligned_instance::*; diff --git a/precompiles/dma/src/lib.rs b/precompiles/dma/src/lib.rs new file mode 100644 index 000000000..1894ebba2 --- /dev/null +++ b/precompiles/dma/src/lib.rs @@ -0,0 +1,41 @@ +mod dma; +mod dma_64_aligned; +mod dma_bus_device; +mod dma_checkpoint; +mod dma_collect_counters; +mod dma_collector_routing_log; +mod dma_common; +mod dma_constants; +mod dma_gen_inputcpy_mem_inputs; +mod dma_gen_mem_inputs; +mod dma_gen_memcmp_mem_inputs; +mod dma_gen_memcpy_mem_inputs; +mod dma_gen_memset_mem_inputs; +mod dma_instance_info; +mod dma_instances_builder; +mod dma_manager; +mod dma_planner; +mod dma_pre_post; +mod dma_strategy; +mod dma_unaligned; + +pub use dma::*; +pub use dma_64_aligned::*; +pub use dma_bus_device::*; +pub use dma_checkpoint::*; +pub use dma_collect_counters::*; +pub use dma_collector_routing_log::*; +pub use dma_common::*; +pub use dma_constants::*; +pub use dma_gen_inputcpy_mem_inputs::*; +pub use dma_gen_mem_inputs::*; +pub use dma_gen_memcmp_mem_inputs::*; +pub use dma_gen_memcpy_mem_inputs::*; +pub use dma_gen_memset_mem_inputs::*; +pub use dma_instance_info::*; +pub use dma_instances_builder::*; +pub use dma_manager::*; +pub use dma_planner::*; +pub use dma_pre_post::*; +pub use dma_strategy::*; +pub use dma_unaligned::*; diff --git a/precompiles/helpers/Cargo.toml b/precompiles/helpers/Cargo.toml index 65a7e9764..6839a050e 100644 --- a/precompiles/helpers/Cargo.toml +++ b/precompiles/helpers/Cargo.toml @@ -14,6 +14,7 @@ circuit = { workspace = true } ark-ff = { workspace = true } ark-std = { workspace = true } ark-secp256k1 = { workspace = true } +ark-secp256r1 = { workspace = true } ark-bn254 = { workspace = true } ark-bls12-381 = { workspace = true } num-bigint = { workspace = true } @@ -22,4 +23,5 @@ num-traits = { workspace = true } cfg-if = "1.0" [features] -default = [] \ No newline at end of file +default = [] +debug_dma = [] \ No newline at end of file diff --git a/precompiles/helpers/src/arith_eq/mod.rs b/precompiles/helpers/src/arith_eq/mod.rs index ce92bb1a8..8bfcd8522 100644 --- a/precompiles/helpers/src/arith_eq/mod.rs +++ b/precompiles/helpers/src/arith_eq/mod.rs @@ -2,8 +2,10 @@ mod arith256; mod bn254complex; mod bn254curve; mod secp256k1; +mod secp256r1; pub use arith256::*; pub use bn254complex::*; pub use bn254curve::*; pub use secp256k1::*; +pub use secp256r1::*; diff --git a/precompiles/helpers/src/arith_eq/secp256r1.rs b/precompiles/helpers/src/arith_eq/secp256r1.rs new file mode 100644 index 000000000..fa10fba4c --- /dev/null +++ b/precompiles/helpers/src/arith_eq/secp256r1.rs @@ -0,0 +1,30 @@ +// TODO: Implement these functions in assembly to speed things up! + +use ark_ff::{BigInt, PrimeField}; +use ark_secp256r1::Fq as Secp256r1Field; + +pub fn secp256r1_add(p1: &[u64; 8], p2: &[u64; 8], p: &mut [u64; 8]) { + let x1 = Secp256r1Field::from(BigInt::<4>(p1[0..4].try_into().unwrap())); + let y1 = Secp256r1Field::from(BigInt::<4>(p1[4..8].try_into().unwrap())); + let x2 = Secp256r1Field::from(BigInt::<4>(p2[0..4].try_into().unwrap())); + let y2 = Secp256r1Field::from(BigInt::<4>(p2[4..8].try_into().unwrap())); + + let s = (y2 - y1) / (x2 - x1); + let x3 = s * s - (x1 + x2); + let y3 = s * (x1 - x3) - y1; + + p[..4].copy_from_slice(&x3.into_bigint().0); + p[4..].copy_from_slice(&y3.into_bigint().0); +} + +pub fn secp256r1_dbl(p1: &[u64; 8], p: &mut [u64; 8]) { + let x1 = Secp256r1Field::from(BigInt::<4>(p1[0..4].try_into().unwrap())); + let y1 = Secp256r1Field::from(BigInt::<4>(p1[4..8].try_into().unwrap())); + + let s = (Secp256r1Field::from(3u64) * x1 * x1 + Secp256r1Field::from(-3)) / (y1 + y1); + let x3 = s * s - (x1 + x1); + let y3 = s * (x1 - x3) - y1; + + p[..4].copy_from_slice(&x3.into_bigint().0); + p[4..].copy_from_slice(&y3.into_bigint().0); +} diff --git a/precompiles/helpers/src/blake2/blake2b/mod.rs b/precompiles/helpers/src/blake2/blake2b/mod.rs new file mode 100644 index 000000000..b5a63501f --- /dev/null +++ b/precompiles/helpers/src/blake2/blake2b/mod.rs @@ -0,0 +1,347 @@ +mod round; + +pub use round::blake2b_round; + +/// BLAKE2b initialization vectors +const IV: [u64; 8] = [ + 0x6A09E667F3BCC908, + 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, + 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, + 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, + 0x5BE0CD19137E2179, +]; + +/// BLAKE2b compression function +/// +/// # Arguments +/// * `rounds` - Number of rounds (typically 12 for BLAKE2b) +/// * `state` - The internal state h (8 x 64-bit words as bits) +/// * `message` - The message block m (16 x 64-bit words as bits) +/// * `t` - Offset counters (2 x 64-bit words) +/// * `f` - Final block flag +pub fn blake2b_compress(rounds: u32, h: &mut [u64; 8], m: &[u64; 16], t: &[u64; 2], f: bool) { + let mut v = [0u64; 16]; + + v[..8].copy_from_slice(h); + v[8..16].copy_from_slice(&IV); + + v[12] ^= t[0]; + v[13] ^= t[1]; + + if f { + v[14] = !v[14]; + } + + for r in 0..rounds { + blake2b_round(&mut v, m, r); + } + + for i in 0..8 { + h[i] ^= v[i] ^ v[i + 8]; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_blake2b_eip152_vector1() { + // Test vector from EIP-152 + // Input: + // rounds = 0 + // h = 48c9bdf267e6096a 3ba7ca8485ae67bb 2bf894fe72f36e3c f1361d5f3af54fa5 + // d182e6ad7f520e51 1f6c3e2b8c68059b 6bbd41fbabd9831f 79217e1319cde05b + // m = 6162630000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // t = 03 00 00 00 00 00 00 00, 00 00 00 00 00 00 00 00 + // f = true + // + // Expected output: + // 08c9bcf367e6096a 3ba7ca8485ae67bb 2bf894fe72f36e3c f1361d5f3af54fa5 + // d282e6ad7f520e51 1f6c3e2b8c68059b 9442be0454267ce0 79217e1319cde05b + + let rounds = 0u32; + + let mut h: [u64; 8] = [ + 0x48c9bdf267e6096au64.swap_bytes(), + 0x3ba7ca8485ae67bbu64.swap_bytes(), + 0x2bf894fe72f36e3cu64.swap_bytes(), + 0xf1361d5f3af54fa5u64.swap_bytes(), + 0xd182e6ad7f520e51u64.swap_bytes(), + 0x1f6c3e2b8c68059bu64.swap_bytes(), + 0x6bbd41fbabd9831fu64.swap_bytes(), + 0x79217e1319cde05bu64.swap_bytes(), + ]; + + let m: [u64; 16] = [ + 0x6162630000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + ]; + + let t: [u64; 2] = [0x0300000000000000u64.swap_bytes(), 0x0000000000000000u64.swap_bytes()]; + + let f = true; + + blake2b_compress(rounds, &mut h, &m, &t, f); + + // Expected output (8 × u64, little-endian) + let expected = [ + 0x08c9bcf367e6096au64.swap_bytes(), + 0x3ba7ca8485ae67bbu64.swap_bytes(), + 0x2bf894fe72f36e3cu64.swap_bytes(), + 0xf1361d5f3af54fa5u64.swap_bytes(), + 0xd282e6ad7f520e51u64.swap_bytes(), + 0x1f6c3e2b8c68059bu64.swap_bytes(), + 0x9442be0454267ce0u64.swap_bytes(), + 0x79217e1319cde05bu64.swap_bytes(), + ]; + + assert_eq!( + h, expected, + "Blake2b does not match:\n exp: {:02x?},\n got: {:02x?}", + expected, h + ); + } + + #[test] + fn test_blake2b_eip152_vector2() { + // Test vector from EIP-152 + // Input: + // rounds = 12 + // h = 48c9bdf267e6096a 3ba7ca8485ae67bb 2bf894fe72f36e3c f1361d5f3af54fa5 + // d182e6ad7f520e51 1f6c3e2b8c68059b 6bbd41fbabd9831f 79217e1319cde05b + // m = 6162630000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // t = 03 00 00 00 00 00 00 00, 00 00 00 00 00 00 00 00 + // f = true + // + // Expected output: + // ba80a53f981c4d0d 6a2797b69f12f6e9 4c212f14685ac4b7 4b12bb6fdbffa2d1 + // 7d87c5392aab792d c252d5de4533cc95 18d38aa8dbf1925a b92386edd4009923 + + let rounds = 12; + + let mut h: [u64; 8] = [ + 0x48c9bdf267e6096au64.swap_bytes(), + 0x3ba7ca8485ae67bbu64.swap_bytes(), + 0x2bf894fe72f36e3cu64.swap_bytes(), + 0xf1361d5f3af54fa5u64.swap_bytes(), + 0xd182e6ad7f520e51u64.swap_bytes(), + 0x1f6c3e2b8c68059bu64.swap_bytes(), + 0x6bbd41fbabd9831fu64.swap_bytes(), + 0x79217e1319cde05bu64.swap_bytes(), + ]; + + let m: [u64; 16] = [ + 0x6162630000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + ]; + + let t: [u64; 2] = [0x0300000000000000u64.swap_bytes(), 0x0000000000000000u64.swap_bytes()]; + + let f = true; + + blake2b_compress(rounds, &mut h, &m, &t, f); + + let expected: [u64; 8] = [ + 0xba80a53f981c4d0du64.swap_bytes(), + 0x6a2797b69f12f6e9u64.swap_bytes(), + 0x4c212f14685ac4b7u64.swap_bytes(), + 0x4b12bb6fdbffa2d1u64.swap_bytes(), + 0x7d87c5392aab792du64.swap_bytes(), + 0xc252d5de4533cc95u64.swap_bytes(), + 0x18d38aa8dbf1925au64.swap_bytes(), + 0xb92386edd4009923u64.swap_bytes(), + ]; + + assert_eq!( + h, expected, + "Blake2b does not match:\n exp: {:016x?},\n got: {:016x?}", + expected, h + ); + } + + #[test] + fn test_blake2b_eip152_vector3() { + // Test vector from EIP-152 + // Input: + // rounds = 12 + // h = 48c9bdf267e6096a 3ba7ca8485ae67bb 2bf894fe72f36e3c f1361d5f3af54fa5 + // d182e6ad7f520e51 1f6c3e2b8c68059b 6bbd41fbabd9831f 79217e1319cde05b + // m = 6162630000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // t = 03 00 00 00 00 00 00 00, 00 00 00 00 00 00 00 00 + // f = false + // + // Expected output: + // 75ab69d3190a562c 51aef8d88f1c2775 876944407270c42c 9844252c26d28752 + // 98743e7f6d5ea2f2 d3e8d226039cd31b 4e426ac4f2d3d666 a610c2116fde4735 + + let rounds = 12; + + let mut h: [u64; 8] = [ + 0x48c9bdf267e6096au64.swap_bytes(), + 0x3ba7ca8485ae67bbu64.swap_bytes(), + 0x2bf894fe72f36e3cu64.swap_bytes(), + 0xf1361d5f3af54fa5u64.swap_bytes(), + 0xd182e6ad7f520e51u64.swap_bytes(), + 0x1f6c3e2b8c68059bu64.swap_bytes(), + 0x6bbd41fbabd9831fu64.swap_bytes(), + 0x79217e1319cde05bu64.swap_bytes(), + ]; + + let m: [u64; 16] = [ + 0x6162630000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + ]; + + let t: [u64; 2] = [0x0300000000000000u64.swap_bytes(), 0x0000000000000000u64.swap_bytes()]; + + let f = false; + + blake2b_compress(rounds, &mut h, &m, &t, f); + + let expected: [u64; 8] = [ + 0x75ab69d3190a562cu64.swap_bytes(), + 0x51aef8d88f1c2775u64.swap_bytes(), + 0x876944407270c42cu64.swap_bytes(), + 0x9844252c26d28752u64.swap_bytes(), + 0x98743e7f6d5ea2f2u64.swap_bytes(), + 0xd3e8d226039cd31bu64.swap_bytes(), + 0x4e426ac4f2d3d666u64.swap_bytes(), + 0xa610c2116fde4735u64.swap_bytes(), + ]; + + assert_eq!( + h, expected, + "Blake2b does not match:\n exp: {:016x?},\n got: {:016x?}", + expected, h + ); + } + + #[test] + fn test_blake2b_eip152_vector4() { + // Test vector from EIP-152 + // Input: + // rounds = 1 + // h = 48c9bdf267e6096a 3ba7ca8485ae67bb 2bf894fe72f36e3c f1361d5f3af54fa5 + // d182e6ad7f520e51 1f6c3e2b8c68059b 6bbd41fbabd9831f 79217e1319cde05b + // m = 6162630000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + // t = 03 00 00 00 00 00 00 00, 00 00 00 00 00 00 00 00 + // f = true + // + // Expected output: + // b63a380cb2897d52 1994a85234ee2c18 1b5f844d2c624c00 2677e9703449d2fb + // a551b3a8333bcdf5 f2f7e08993d53923 de3d64fcc68c034e 717b9293fed7a421 + + let rounds = 1; + + let mut h: [u64; 8] = [ + 0x48c9bdf267e6096au64.swap_bytes(), + 0x3ba7ca8485ae67bbu64.swap_bytes(), + 0x2bf894fe72f36e3cu64.swap_bytes(), + 0xf1361d5f3af54fa5u64.swap_bytes(), + 0xd182e6ad7f520e51u64.swap_bytes(), + 0x1f6c3e2b8c68059bu64.swap_bytes(), + 0x6bbd41fbabd9831fu64.swap_bytes(), + 0x79217e1319cde05bu64.swap_bytes(), + ]; + + let m: [u64; 16] = [ + 0x6162630000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + 0x0000000000000000u64.swap_bytes(), + ]; + + let t: [u64; 2] = [0x0300000000000000u64.swap_bytes(), 0x0000000000000000u64.swap_bytes()]; + + let f = true; + + blake2b_compress(rounds, &mut h, &m, &t, f); + + let expected: [u64; 8] = [ + 0xb63a380cb2897d52u64.swap_bytes(), + 0x1994a85234ee2c18u64.swap_bytes(), + 0x1b5f844d2c624c00u64.swap_bytes(), + 0x2677e9703449d2fbu64.swap_bytes(), + 0xa551b3a8333bcdf5u64.swap_bytes(), + 0xf2f7e08993d53923u64.swap_bytes(), + 0xde3d64fcc68c034eu64.swap_bytes(), + 0x717b9293fed7a421u64.swap_bytes(), + ]; + + assert_eq!( + h, expected, + "Blake2b does not match:\n exp: {:016x?},\n got: {:016x?}", + expected, h + ); + } +} diff --git a/precompiles/helpers/src/blake2/blake2b/round.rs b/precompiles/helpers/src/blake2/blake2b/round.rs new file mode 100644 index 000000000..66da64619 --- /dev/null +++ b/precompiles/helpers/src/blake2/blake2b/round.rs @@ -0,0 +1,64 @@ +/// Message word permutation schedule +const SIGMA: [[usize; 16]; 10] = [ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3], + [11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4], + [7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8], + [9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13], + [2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9], + [12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11], + [13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10], + [6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5], + [10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0], +]; + +/// Rotation constants for G function +const R1: u32 = 32; +const R2: u32 = 24; +const R3: u32 = 16; +const R4: u32 = 63; + +/// BLAKE2b round function +pub fn blake2b_round(v: &mut [u64; 16], m: &[u64; 16], round: u32) { + // Message word selection permutation for this round + let s = &SIGMA[(round % 10) as usize]; + + // Column step + g(v, 0, 4, 8, 12, m[s[0]], m[s[1]]); + g(v, 1, 5, 9, 13, m[s[2]], m[s[3]]); + g(v, 2, 6, 10, 14, m[s[4]], m[s[5]]); + g(v, 3, 7, 11, 15, m[s[6]], m[s[7]]); + + // Diagonal step + g(v, 0, 5, 10, 15, m[s[8]], m[s[9]]); + g(v, 1, 6, 11, 12, m[s[10]], m[s[11]]); + g(v, 2, 7, 8, 13, m[s[12]], m[s[13]]); + g(v, 3, 4, 9, 14, m[s[14]], m[s[15]]); +} + +/// G mixing function +/// +/// The G function mixes two input words `x` and `y` from the message block into the state. +/// It operates on 4 state words: v[a], v[b], v[c], v[d] +#[allow(clippy::too_many_arguments)] +fn g(v: &mut [u64; 16], a: usize, b: usize, c: usize, d: usize, x: u64, y: u64) { + let mut va = v[a]; + let mut vb = v[b]; + let mut vc = v[c]; + let mut vd = v[d]; + + va = va.wrapping_add(vb).wrapping_add(x); + vd = (vd ^ va).rotate_right(R1); + vc = vc.wrapping_add(vd); + vb = (vb ^ vc).rotate_right(R2); + + va = va.wrapping_add(vb).wrapping_add(y); + vd = (vd ^ va).rotate_right(R3); + vc = vc.wrapping_add(vd); + vb = (vb ^ vc).rotate_right(R4); + + v[a] = va; + v[b] = vb; + v[c] = vc; + v[d] = vd; +} diff --git a/precompiles/helpers/src/blake2/mod.rs b/precompiles/helpers/src/blake2/mod.rs new file mode 100644 index 000000000..5513f49c8 --- /dev/null +++ b/precompiles/helpers/src/blake2/mod.rs @@ -0,0 +1,7 @@ +mod blake2b; +// mod blake2b_expr; +// mod blake2b_state; + +pub use blake2b::*; +// pub use blake2b_expr::blake2b_expr; +// pub use blake2b_state::blake2b_state; diff --git a/precompiles/helpers/src/common.rs b/precompiles/helpers/src/common.rs index 61a28cd2c..b979f9da1 100644 --- a/precompiles/helpers/src/common.rs +++ b/precompiles/helpers/src/common.rs @@ -2,6 +2,7 @@ use ark_bls12_381::Fq as Bls12_381Field; use ark_bn254::Fq as Bn254Field; use ark_ff::PrimeField; use ark_secp256k1::Fq as Secp256k1Field; +use ark_secp256r1::Fq as Secp256r1Field; use num_bigint::{BigInt, Sign}; use num_traits::Zero; @@ -183,6 +184,17 @@ impl FieldToBigInt for Bls12_381Field { } } +impl FieldToBigInt for Secp256r1Field { + fn to_bigint(&self) -> BigInt { + let mut result = BigInt::zero(); + for &word in self.into_bigint().0.iter().rev() { + result <<= 64; + result += word; + } + result + } +} + pub fn bigint_from_field(value: &F) -> BigInt { value.to_bigint() } diff --git a/precompiles/helpers/src/dma.rs b/precompiles/helpers/src/dma.rs new file mode 100644 index 000000000..5744520b5 --- /dev/null +++ b/precompiles/helpers/src/dma.rs @@ -0,0 +1,925 @@ +// use static_assertions::const_assert; +// const_assert!(CHUNK_MEM_STEP_BITS <= 24); + +pub struct DmaHelpers {} + +pub struct DmaValues { + pub dst64: u64, + pub src64: u64, + pub src_offset: u64, + pub dst_offset: u64, + pub pre_count: u64, + pub post_count: u64, + pub memcpy_count: u64, + pub src64_inc_by_pre: u64, + pub src_offset_after_pre: u64, +} + +// #bits bits +// pre_count: 0-7 3 0-2 +// post_count: 0-8(*) 4 3-6 (*) memcmp +// pre_writes: 0,1,2 2 7-8 +// dst_offset: 0-7 3 9-11 +// src_offset: 0-7 3 12-14 +// double_src_pre: 0,1 1 15 +// double_src_post: 0,1 1 16 +// extra_src_reads: 0-3 2 17-18 +// src64_inc_by_pre: 1 19 +// unaligned_dst_src: 1 20 +// fill_byte/cmp: 8 21-28 +// cmp_negative: 1 29 +// requires_dma: 1 30 +// (reserved) 1 31 +// lpre_count 3 32-34 +// loop_count 29 35-63 + +const FAST_ENCODE_TABLE_SIZE: usize = 8 * 8 * 16; +const FAST_ENCODE_TABLE_MEMCMP_SIZE: usize = FAST_ENCODE_TABLE_SIZE * 2; +const FAST_ENCODE_NO_SRC_TABLE_SIZE: usize = 8 * 16; +const FAST_ENCODE_TABLE: [u64; FAST_ENCODE_TABLE_SIZE] = generate_fast_encode_table(); +const FAST_ENCODE_MEMCMP_TABLE: [u64; FAST_ENCODE_TABLE_MEMCMP_SIZE] = + generate_fast_encode_memcmp_table(); +const FAST_ENCODE_NO_SRC_TABLE: [u64; FAST_ENCODE_NO_SRC_TABLE_SIZE] = + generate_fast_encode_no_src_table(); + +const fn generate_fast_encode_table() -> [u64; FAST_ENCODE_TABLE_SIZE] { + let mut table = [0u64; FAST_ENCODE_TABLE_SIZE]; + // fill table + let mut dst_offset: u64 = 0; + while dst_offset < 8 { + let base_index = dst_offset << 7; + let mut src_offset: u64 = 0; + while src_offset < 8 { + let index = (base_index + (src_offset << 4)) as usize; + let mut count: usize = 0; + while count < 16 { + let value = DmaInfo::calculate_encode(dst_offset, src_offset, count, false, true); + let loop_count = DmaInfo::get_loop_count(value) as u64; + // The table is create to add directly de loop count and after all values + // are correct, for this reason substract de count, because we need diference + // between loop_count (shifted 32) and count (shifted 29) + table[index + count] = ((value & 0x0000_0007_FFFF_FFFF) + + (loop_count << DmaInfo::DMA_LOOP_COUNT_RS)) + .wrapping_sub((count as u64) << DmaInfo::DMA_LPRE_COUNT_RS); + count += 1; + } + src_offset += 1; + } + dst_offset += 1; + } + table +} + +const fn generate_fast_encode_memcmp_table() -> [u64; FAST_ENCODE_TABLE_MEMCMP_SIZE] { + let mut table = [0u64; FAST_ENCODE_TABLE_MEMCMP_SIZE]; + // fill table + let mut neq_index = 0; + while neq_index < 2 { + let neq = neq_index != 0; + let neq_base = 8 * 8 * 16 * neq_index; + let mut dst_offset: u64 = 0; + while dst_offset < 8 { + let base_index = (dst_offset << 7) + neq_base as u64; + let mut src_offset: u64 = 0; + while src_offset < 8 { + let index = (base_index + (src_offset << 4)) as usize; + let mut count: usize = 0; + while count < 16 { + let value = DmaInfo::calculate_encode(dst_offset, src_offset, count, neq, true) + | DmaInfo::DMA_REQUIRES_DMA_TEST_MASK; + let loop_count = DmaInfo::get_loop_count(value) as u64; + // The table is create to add directly de loop count and after all values + // are correct, for this reason substract de count, because we need diference + // between loop_count (shifted 32) and count (shifted 29) + table[index + count] = ((value & 0x0000_0007_FFFF_FFFF) + + (loop_count << DmaInfo::DMA_LOOP_COUNT_RS)) + .wrapping_sub((count as u64) << DmaInfo::DMA_LPRE_COUNT_RS); + count += 1; + } + src_offset += 1; + } + dst_offset += 1; + } + neq_index += 1; + } + table +} + +const fn generate_fast_encode_no_src_table() -> [u64; FAST_ENCODE_NO_SRC_TABLE_SIZE] { + let mut table = [0u64; FAST_ENCODE_NO_SRC_TABLE_SIZE]; + // fill table + let mut dst_offset: u64 = 0; + while dst_offset < 8 { + let index = (dst_offset << 4) as usize; + let mut count: usize = 0; + while count < 16 { + let value = DmaInfo::calculate_encode_no_src(dst_offset, count); + let loop_count = DmaInfo::get_loop_count(value) as u64; + // The table is create to add directly de loop count and after all values + // are correct, for this reason substract de count, because we need diference + // between loop_count (shifted 32) and count (shifted 29) + table[index + count] = ((value & 0x0000_0007_FFFF_FFFF) + + (loop_count << DmaInfo::DMA_LOOP_COUNT_RS)) + .wrapping_sub((count as u64) << DmaInfo::DMA_LPRE_COUNT_RS); + count += 1; + } + dst_offset += 1; + } + table +} + +pub struct DmaInfo {} + +impl DmaInfo { + #[inline(always)] + pub fn to_string(encoded: u64) -> String { + format!("LC:{}|PWR:{}|DO:{}|SO:{}|PRE:{}|POST:{}|ESR:{}|DPRE:{}|DPOS:{}|SIBP:{}|DA:{}|FB:{:02X}", + Self::get_loop_count(encoded), + Self::get_pre_writes(encoded), + Self::get_dst_offset(encoded), + Self::get_src_offset(encoded), + Self::get_pre_count(encoded), + Self::get_post_count(encoded), + Self::get_extra_src_reads(encoded), + Self::is_double_read_pre(encoded) as usize, + Self::is_double_read_post(encoded) as usize, + Self::get_src64_inc_by_pre(encoded), + Self::dst_is_aligned_with_src(encoded) as usize, + Self::get_fill_byte(encoded)) + } + #[inline(always)] + pub const fn encode_memcmp_neq(dst: u64, src: u64, count: usize, neq: bool) -> u64 { + let table_count = if count >= 16 { count & 0x07 | 0x08 } else { count }; + (FAST_ENCODE_MEMCMP_TABLE[(((dst & 0x07) << 7) + ((src & 0x07) << 4)) as usize + + table_count + + FAST_ENCODE_TABLE_SIZE * neq as usize]) + .wrapping_add((count as u64) << Self::DMA_LPRE_COUNT_RS) + } + + #[inline(always)] + pub const fn encode_memcmp(dst: u64, src: u64, count: usize, result: u64) -> u64 { + let table_count = if count >= 16 { count & 0x07 | 0x08 } else { count }; + (FAST_ENCODE_MEMCMP_TABLE[(((dst & 0x07) << 7) + ((src & 0x07) << 4)) as usize + + table_count + + FAST_ENCODE_TABLE_SIZE * (result != 0) as usize] + + ((result & Self::DMA_FILL_BITS9_MASK) << Self::DMA_FILL_BYTE_RS)) + .wrapping_add((count as u64) << Self::DMA_LPRE_COUNT_RS) + } + + #[inline(always)] + pub const fn encode_memcpy(dst: u64, src: u64, count: usize) -> u64 { + let table_count = if count >= 16 { count & 0x07 | 0x08 } else { count }; + FAST_ENCODE_TABLE[(((dst & 0x07) << 7) + ((src & 0x07) << 4)) as usize + table_count] + .wrapping_add((count as u64) << Self::DMA_LPRE_COUNT_RS) + } + + #[inline(always)] + pub const fn encode_inputcpy(dst: u64, count: usize) -> u64 { + let table_count = if count >= 16 { count & 0x07 | 0x08 } else { count }; + FAST_ENCODE_NO_SRC_TABLE[((dst & 0x07) << 4) as usize + table_count] + .wrapping_add((count as u64) << Self::DMA_LPRE_COUNT_RS) + } + + #[inline(always)] + pub const fn encode_memset(dst: u64, count: usize, fill_byte: u8) -> u64 { + let table_count = if count >= 16 { count & 0x07 | 0x08 } else { count }; + (FAST_ENCODE_NO_SRC_TABLE[((dst & 0x07) << 4) as usize + table_count] + .wrapping_add((count as u64) << Self::DMA_LPRE_COUNT_RS)) + | ((fill_byte as u64) << Self::DMA_FILL_BYTE_RS) + } + + pub const DMA_PRE_COUNT_TEST_MASK: u64 = 0x07; + pub const DMA_PRE_COUNT_MASK: u64 = 0x07; + + pub const DMA_POST_COUNT_RS: u64 = 3; + pub const DMA_POST_COUNT_TEST_MASK: u64 = 0x78; + pub const DMA_POST_COUNT_MASK: u64 = 0x0F; + + pub const DMA_PRE_WRITES_RS: u64 = 7; + pub const DMA_PRE_WRITES_TEST_MASK: u64 = 0x180; + pub const DMA_PRE_WRITES_MASK: u64 = 0x003; + + pub const DMA_DST_OFFSET_RS: u64 = 9; + pub const DMA_DST_OFFSET_TEST_MASK: u64 = 0x0E00; + pub const DMA_DST_OFFSET_MASK: u64 = 0x007; + + pub const DMA_SRC_OFFSET_RS: u64 = 12; + pub const DMA_SRC_OFFSET_TEST_MASK: u64 = 0x70000; + pub const DMA_SRC_OFFSET_MASK: u64 = 0x007; + + pub const DMA_DOUBLE_SRC_PRE_RS: u64 = 15; + pub const DMA_DOUBLE_SRC_PRE_TEST_MASK: u64 = 0x08000; + + pub const DMA_DOUBLE_SRC_POST_RS: u64 = 16; + pub const DMA_DOUBLE_SRC_POST_TEST_MASK: u64 = 0x10000; + + pub const DMA_EXTRA_SRC_READS_RS: u64 = 17; + pub const DMA_EXTRA_SRC_READS_TEST_MASK: u64 = 0x60000; + pub const DMA_EXTRA_SRC_READS_MASK: u64 = 0x00003; + + pub const DMA_SRC64_INC_BY_PRE_RS: u64 = 19; + pub const DMA_SRC64_INC_BY_PRE_TEST_MASK: u64 = 0x80000; + + pub const DMA_UNALIGNED_DST_SRC_RS: u64 = 20; + pub const DMA_UNALIGNED_DST_SRC_TEST_MASK: u64 = 0x100000; + + pub const DMA_FILL_BYTE_RS: u64 = 21; + pub const DMA_FILL_BYTE_TEST_MASK: u64 = 0x1FE00000; + pub const DMA_FILL_BYTE_MASK: u64 = 0x000000FF; + + pub const DMA_FILL_BITS9_MASK: u64 = 0x000001FF; + + pub const DMA_FILL_BYTE_SIGN_TEST_MASK: u64 = 0x20000000; + + pub const DMA_LPRE_COUNT_RS: u64 = 32; + pub const DMA_LPRE_COUNT_TEST_MASK: u64 = 0x70000000; + pub const DMA_LPRE_COUNT_MASK: u64 = 0x00000007; + + // the REQUIRES_DMA flag is set after when operation is memcmp where + // dma need to calculate count_eq and verify it. + + pub const DMA_REQUIRES_DMA_RS: u64 = 30; + pub const DMA_REQUIRES_DMA_TEST_MASK: u64 = 0x40000000; + pub const DMA_REQUIRES_DMA_MASK: u64 = 0x00000001; + + pub const DMA_PRE_OR_POST_TEST_MASK: u64 = + Self::DMA_PRE_COUNT_TEST_MASK | Self::DMA_POST_COUNT_TEST_MASK; + + pub const DMA_LOOP_COUNT_RS: u64 = 35; + const DMA_FULL_ALIGNED_MASK: u64 = Self::DMA_PRE_COUNT_TEST_MASK + | Self::DMA_POST_COUNT_TEST_MASK + | Self::DMA_PRE_WRITES_TEST_MASK + | Self::DMA_DST_OFFSET_TEST_MASK + | Self::DMA_SRC_OFFSET_TEST_MASK + | Self::DMA_DOUBLE_SRC_PRE_TEST_MASK + | Self::DMA_DOUBLE_SRC_POST_TEST_MASK + | Self::DMA_EXTRA_SRC_READS_TEST_MASK + | Self::DMA_SRC64_INC_BY_PRE_TEST_MASK + | Self::DMA_UNALIGNED_DST_SRC_TEST_MASK; + + const DMA_DIRECT_MASK: u64 = Self::DMA_FULL_ALIGNED_MASK | Self::DMA_REQUIRES_DMA_TEST_MASK; + + #[inline(always)] + pub const fn calculate_encode( + dst: u64, + src: u64, + count: usize, + neq: bool, + has_src: bool, + ) -> u64 { + let dst_offset = dst & 0x07; + let src_offset = src & 0x07; + + let count = count as u64; + let (pre_count, mut loop_count, mut post_count) = if dst_offset > 0 { + let _pre_count = 8 - dst_offset; + if _pre_count >= count { + (count, 0, 0) + } else { + let pending = count - _pre_count; + (_pre_count, pending >> 3, pending & 0x07) + } + } else { + (0, count >> 3, count & 0x07) + }; + let mut pre_writes = (pre_count > 0) as u64 + (post_count > 0) as u64; + // let to_src_offset = (src + count - 1) & 0x07; + let src_offset_pos = (src_offset + pre_count) & 0x07; + let mut double_src_post = (src_offset_pos + post_count) > 8; + let double_src_pre = (src_offset + pre_count) > 8; + let mut extra_src_reads = + if count == 0 { 0 } else { (((src + count - 1) >> 3) - (src >> 3) + 1) - loop_count }; + + let src64_inc_by_pre = (pre_count > 0 && (src_offset + pre_count) >= 8) as u64; + let unaligned_dst_src = (count > 0 && src_offset != dst_offset) as u64; + + if neq && post_count == 0 && loop_count > 0 { + // (dst + count) 0x07 == 7 ==> (dst_offset + count) 0x07 == 7 ==> post_count == 0 + // loop = loop - 1 + // pre_writes = pre_writes + 1 + // post = 8 + // double_src_post = unaligned_dst_src ? 1:0; + // extra_src_reads = extra_src_read + 1 + loop_count -= 1; + pre_writes += 1; + post_count = 8; + double_src_post = src_offset != dst_offset; + extra_src_reads += 1; + } + let requires_dma = count == 0 || pre_count != 0 || post_count != 0; + if has_src { + pre_count + | (post_count << Self::DMA_POST_COUNT_RS) + | (pre_writes << Self::DMA_PRE_WRITES_RS) + | (dst_offset << Self::DMA_DST_OFFSET_RS) + | (src_offset << Self::DMA_SRC_OFFSET_RS) + | ((double_src_pre as u64) << Self::DMA_DOUBLE_SRC_PRE_RS) + | ((double_src_post as u64) << Self::DMA_DOUBLE_SRC_POST_RS) + | (extra_src_reads << Self::DMA_EXTRA_SRC_READS_RS) + | (src64_inc_by_pre << Self::DMA_SRC64_INC_BY_PRE_RS) + | (unaligned_dst_src << Self::DMA_UNALIGNED_DST_SRC_RS) + | (pre_count << Self::DMA_LPRE_COUNT_RS) // optimization to read loop_count * 8 + pre_count + | (loop_count << Self::DMA_LOOP_COUNT_RS) + | ((requires_dma as u64) << Self::DMA_REQUIRES_DMA_RS) + } else { + pre_count + | (post_count << Self::DMA_POST_COUNT_RS) + | (pre_writes << Self::DMA_PRE_WRITES_RS) + | (dst_offset << Self::DMA_DST_OFFSET_RS) + | (pre_count << Self::DMA_LPRE_COUNT_RS) // optimization to read loop_count * 8 + pre_count + | (loop_count << Self::DMA_LOOP_COUNT_RS) + | ((requires_dma as u64) << Self::DMA_REQUIRES_DMA_RS) + } + } + + #[inline(always)] + pub const fn calculate_encode_no_src(dst: u64, count: usize) -> u64 { + let dst_offset = dst & 0x07; + + let count = count as u64; + let (pre_count, loop_count, post_count) = if dst_offset > 0 { + let _pre_count = 8 - dst_offset; + if _pre_count >= count { + (count, 0, 0) + } else { + let pending = count - _pre_count; + (_pre_count, pending >> 3, pending & 0x07) + } + } else { + (0, count >> 3, count & 0x07) + }; + let pre_writes = (pre_count > 0) as u64 + (post_count > 0) as u64; + let requires_dma = count == 0 || pre_count != 0 || post_count != 0; + pre_count + | (post_count << Self::DMA_POST_COUNT_RS) + | (pre_writes << Self::DMA_PRE_WRITES_RS) + | (dst_offset << Self::DMA_DST_OFFSET_RS) + | (pre_count << Self::DMA_LPRE_COUNT_RS) // optimization to read loop_count * 8 + pre_count + | (loop_count << Self::DMA_LOOP_COUNT_RS) + | ((requires_dma as u64) << Self::DMA_REQUIRES_DMA_RS) + } + + #[inline(always)] + pub const fn get_extra_src_reads(encoded: u64) -> usize { + (encoded as usize) >> Self::DMA_EXTRA_SRC_READS_RS & Self::DMA_EXTRA_SRC_READS_MASK as usize + } + #[inline(always)] + pub const fn get_count(encoded: u64) -> usize { + Self::get_loop_count(encoded) * 8 + + Self::get_pre_count(encoded) + + Self::get_post_count(encoded) + } + #[inline(always)] + pub const fn get_dst_offset(encoded: u64) -> usize { + (encoded as usize >> Self::DMA_DST_OFFSET_RS) & Self::DMA_DST_OFFSET_MASK as usize + } + + #[inline(always)] + pub const fn get_src_offset(encoded: u64) -> usize { + (encoded as usize >> Self::DMA_SRC_OFFSET_RS) & Self::DMA_SRC_OFFSET_MASK as usize + } + + #[inline(always)] + pub const fn get_loop_count(encoded: u64) -> usize { + (encoded >> Self::DMA_LOOP_COUNT_RS) as usize + } + + #[inline(always)] + pub const fn get_pre_writes(encoded: u64) -> usize { + (encoded as usize >> Self::DMA_PRE_WRITES_RS) & Self::DMA_PRE_WRITES_MASK as usize + } + + #[inline(always)] + pub const fn is_double_read_pre(encoded: u64) -> bool { + encoded & Self::DMA_DOUBLE_SRC_PRE_TEST_MASK != 0 + } + + #[inline(always)] + pub const fn is_double_read_post(encoded: u64) -> bool { + encoded & Self::DMA_DOUBLE_SRC_POST_TEST_MASK != 0 + } + + #[inline(always)] + pub const fn get_pre_count(encoded: u64) -> usize { + (encoded as usize) & Self::DMA_PRE_COUNT_MASK as usize + } + + #[inline(always)] + pub const fn get_post_count(encoded: u64) -> usize { + (encoded as usize >> Self::DMA_POST_COUNT_RS) & Self::DMA_POST_COUNT_MASK as usize + } + + #[inline(always)] + pub const fn get_pre(encoded: u64) -> u8 { + (Self::get_pre_count(encoded) > 0) as u8 + Self::is_double_read_pre(encoded) as u8 + } + + #[inline(always)] + pub const fn get_post(encoded: u64) -> u8 { + (Self::get_post_count(encoded) > 0) as u8 + Self::is_double_read_post(encoded) as u8 + } + + #[inline(always)] + pub const fn get_src64_inc_by_pre(encoded: u64) -> usize { + (encoded & Self::DMA_SRC64_INC_BY_PRE_TEST_MASK != 0) as usize + } + + #[inline(always)] + pub const fn get_loop_data_offset(encoded: u64) -> usize { + let pre_count = Self::get_pre_count(encoded); + Self::get_pre_writes(encoded) + + (pre_count > 0 && (Self::get_src_offset(encoded) + pre_count) >= 8) as usize + } + + #[inline(always)] + pub const fn get_loop_src_offset(encoded: u64) -> u8 { + (Self::get_src_offset(encoded) + Self::get_pre_count(encoded)) as u8 & 0x07 + } + + #[inline(always)] + pub const fn get_src_size(encoded: u64) -> usize { + Self::get_loop_count(encoded) + Self::get_extra_src_reads(encoded) + } + #[inline(always)] + pub const fn get_data_size(encoded: u64) -> usize { + Self::get_pre_writes(encoded) + Self::get_src_size(encoded) + } + #[inline(always)] + pub const fn get_post_data_offset(encoded: u64) -> usize { + Self::get_pre_writes(encoded) + Self::get_src_size(encoded) + - (Self::is_double_read_post(encoded) as usize + 1) + } + #[inline(always)] + pub const fn get_pre_write_offset(_encoded: u64) -> usize { + 0 + } + #[inline(always)] + pub const fn get_post_write_offset(encoded: u64) -> usize { + (Self::get_pre_count(encoded) != 0) as usize + } + #[inline(always)] + pub const fn get_pre_data_offset(encoded: u64) -> usize { + Self::get_pre_writes(encoded) + } + #[inline(always)] + pub const fn dst_is_unaligned_with_src(encoded: u64) -> bool { + (encoded & Self::DMA_UNALIGNED_DST_SRC_TEST_MASK) != 0 + } + #[inline(always)] + pub const fn dst_is_aligned_with_src(encoded: u64) -> bool { + (encoded & Self::DMA_UNALIGNED_DST_SRC_TEST_MASK) == 0 + } + #[inline(always)] + pub const fn is_full_aligned(encoded: u64) -> bool { + (Self::DMA_FULL_ALIGNED_MASK & encoded) == 0 + } + #[inline(always)] + pub const fn is_direct(encoded: u64) -> bool { + (Self::DMA_DIRECT_MASK & encoded) == 0 && Self::get_loop_count(encoded) > 0 + } + #[inline(always)] + pub const fn get_fill_byte(encoded: u64) -> u8 { + (encoded >> Self::DMA_FILL_BYTE_RS) as u8 + } + #[inline(always)] + pub const fn is_memcmp_negative(encoded: u64) -> bool { + (encoded & Self::DMA_FILL_BYTE_SIGN_TEST_MASK) != 0 + } + + #[inline(always)] + pub const fn get_memcmp_res_as_u64(encoded: u64) -> u64 { + if (encoded & Self::DMA_FILL_BYTE_SIGN_TEST_MASK) != 0 { + (encoded >> Self::DMA_FILL_BYTE_RS) | !Self::DMA_FILL_BYTE_MASK + } else { + (encoded >> Self::DMA_FILL_BYTE_RS) & Self::DMA_FILL_BYTE_MASK + } + } + + #[inline(always)] + pub const fn get_memcmp_pre_result_nz(encoded: u64) -> bool { + (encoded & Self::DMA_FILL_BYTE_TEST_MASK) != 0 + && (encoded & Self::DMA_POST_COUNT_TEST_MASK) == 0 + && (encoded & Self::DMA_PRE_COUNT_TEST_MASK) != 0 + } + #[inline(always)] + pub const fn get_memcmp_post_result_nz(encoded: u64) -> bool { + (encoded & Self::DMA_FILL_BYTE_TEST_MASK) != 0 + && (encoded & Self::DMA_POST_COUNT_TEST_MASK) != 0 + } + #[inline(always)] + pub const fn get_memcmp_result_nz(encoded: u64) -> bool { + (encoded & Self::DMA_FILL_BYTE_TEST_MASK) != 0 + } + + #[inline(always)] + pub const fn has_pre_or_post(encoded: u64) -> bool { + (encoded & Self::DMA_PRE_OR_POST_TEST_MASK) != 0 + } +} + +impl DmaHelpers { + pub fn calculate_write_value( + dst_offset: u64, + src_offset: u64, + count: u64, + pre_value: u64, + src_values: &[u64], + ) -> u64 { + let write_mask = + (0xFFFF_FFFF_FFFF_FFFF << ((8 - count) * 8)) >> ((8 - dst_offset - count) * 8); + let value = if dst_offset <= src_offset { + (src_values[0] >> ((src_offset - dst_offset) * 8)) + | if dst_offset == src_offset { + 0 + } else if (src_offset + count) > 8 { + if src_values.len() < 2 { + panic!("ERROR src_values: {:?} dst_offset: {dst_offset} src_offset: {src_offset} count: {count}", src_values); + } + src_values[1] << ((8 - src_offset + dst_offset) * 8) + } else { + 0 + } + } else if dst_offset > src_offset { + src_values[0] << ((dst_offset - src_offset) * 8) + } else { + // dst_offset = src_offset + src_values[0] + }; + #[cfg(feature = "debug_dma")] + println!( + "WRITE_MASK 0x{write_mask:016X} VALUE 0x{value:016X} SRC_VALUES 0x{:016X},0x{:016X} PRE_VALUE:{pre_value:016X} DST_OFFSET:{dst_offset} SRC_OFFSET:{src_offset} COUNT:{count}", + src_values[0], if src_values.len() > 1 { src_values[1] } else { 0 } + ); + (pre_value & !write_mask) | (value & write_mask) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper function to compute expected value using byte-by-byte copy + fn expected_write_value( + dst_offset: u64, + src_offset: u64, + count: u64, + pre_value: u64, + src_values: &[u64], + ) -> u64 { + // Convert pre_value to bytes (big-endian layout as used in the function) + let mut result_bytes = pre_value.to_le_bytes(); + + // Convert src_values to a contiguous byte array (big-endian) + let mut src_bytes = Vec::new(); + for &val in src_values { + src_bytes.extend_from_slice(&val.to_le_bytes()); + } + + // Copy count bytes from src_bytes[src_offset..] to result_bytes[dst_offset..] + for i in 0..count as usize { + result_bytes[dst_offset as usize + i] = src_bytes[src_offset as usize + i]; + } + + u64::from_le_bytes(result_bytes) + } + + #[test] + fn test_calculate_write_value_all_combinations() { + // Test patterns for src_values + let src0: u64 = 0x0102030405060708; + let src1: u64 = 0x1112131415161718; + + // Test pattern for pre_value + let pre_value: u64 = 0xAABBCCDDEEFF0011; + + // Iterate over all dst_offset values (0..8) + for dst_offset in 0..8 { + // For each dst_offset, count can be 1 to (8 - dst_offset) + for count in 1..=(8 - dst_offset) { + // For each valid (dst_offset, count), test all src_offset values + // src_offset can be 0..8, but we need to ensure we have enough src data + for src_offset in 0..8 { + // Determine if we need one or two src values + // We need two src values if (src_offset + count) > 8 + let needs_two_src = (src_offset + count) > 8; + let src_values: Vec = if needs_two_src { + vec![src0, src1] + } else { + vec![src0, 0] // Always provide both for safety + }; + + let result = DmaHelpers::calculate_write_value( + dst_offset, + src_offset, + count, + pre_value, + &src_values, + ); + + let expected = + expected_write_value(dst_offset, src_offset, count, pre_value, &src_values); + + assert_eq!( + result, expected, + "Failed for dst_offset={}, src_offset={}, count={}\n\ + pre_value: 0x{:016X}\n\ + src[0]: 0x{:016X}\n\ + src[1]: 0x{:016X}\n\ + expected: 0x{:016X}\n\ + got: 0x{:016X}", + dst_offset, src_offset, count, pre_value, src0, src1, expected, result + ); + } + } + } + } + + #[test] + fn test_calculate_write_value_edge_cases() { + let src0: u64 = 0x0102030405060708; + let src1: u64 = 0x1112131415161718; + let pre_value: u64 = 0xAABBCCDDEEFF0011; + + // Test case: dst_offset=0, count=8 (full overwrite) + let result = DmaHelpers::calculate_write_value(0, 0, 8, pre_value, &[src0, src1]); + assert_eq!(result, src0, "Full overwrite with aligned offsets failed"); + + // Test case: dst_offset=0, count=1 (single byte at start) + let result = DmaHelpers::calculate_write_value(0, 0, 1, pre_value, &[src0, src1]); + let expected = 0xAABBCCDDEEFF0008u64; + assert_eq!(result, expected, "Single byte at start failed"); + + // Test case: dst_offset=7, count=1 (single byte at end) + let result = DmaHelpers::calculate_write_value(7, 0, 1, pre_value, &[src0, src1]); + let expected = 0x08BBCCDDEEFF0011; + assert_eq!(result, expected, "Single byte at end failed"); + + // Test case: src spans two values (src_offset=7, count=2) + let result = DmaHelpers::calculate_write_value(0, 7, 2, pre_value, &[src0, src1]); + let expected = 0xAABBCCDDEEFF1801; + assert_eq!(result, expected, "Src spanning two values failed"); + } + + #[test] + fn test_calculate_write_value_zero_patterns() { + let src0: u64 = 0x0000000000000000; + let src1: u64 = 0x0000000000000000; + let pre_value: u64 = 0xFFFFFFFFFFFFFFFF; + + // Writing zeros should clear the appropriate bytes + for dst_offset in 0..8 { + for count in 1..=(8 - dst_offset) { + let result = DmaHelpers::calculate_write_value( + dst_offset, + 0, + count, + pre_value, + &[src0, src1], + ); + let expected = expected_write_value(dst_offset, 0, count, pre_value, &[src0, src1]); + assert_eq!( + result, expected, + "Zero pattern failed for dst_offset={}, count={}", + dst_offset, count + ); + } + } + } + + #[test] + fn test_calculate_write_value_ff_patterns() { + let src0: u64 = 0xFFFFFFFFFFFFFFFF; + let src1: u64 = 0xFFFFFFFFFFFFFFFF; + let pre_value: u64 = 0x0000000000000000; + + // Writing 0xFF should set the appropriate bytes + for dst_offset in 0..8 { + for count in 1..=(8 - dst_offset) { + let result = DmaHelpers::calculate_write_value( + dst_offset, + 0, + count, + pre_value, + &[src0, src1], + ); + let expected = expected_write_value(dst_offset, 0, count, pre_value, &[src0, src1]); + assert_eq!( + result, expected, + "FF pattern failed for dst_offset={}, count={}", + dst_offset, count + ); + } + } + } + + /// Byte-based implementation for comparison + #[inline(always)] + fn calculate_write_value_bytes( + dst_offset: usize, + src_offset: usize, + count: usize, + pre_value: u64, + src_values: &[u64], + ) -> u64 { + let mut result_bytes = pre_value.to_le_bytes(); + let src0_bytes = src_values[0].to_le_bytes(); + let src1_bytes = src_values[1].to_le_bytes(); + + for i in 0..count { + let src_idx = src_offset + i; + result_bytes[dst_offset + i] = + if src_idx < 8 { src0_bytes[src_idx] } else { src1_bytes[src_idx - 8] }; + } + + u64::from_le_bytes(result_bytes) + } + + #[test] + fn benchmark_calculate_write_value() { + use std::time::Instant; + + let src0: u64 = 0x0102030405060708; + let src1: u64 = 0x1112131415161718; + let pre_value: u64 = 0xAABBCCDDEEFF0011; + let src_values = [src0, src1]; + + const ITERATIONS: usize = 1_000_000; + + // Warm up + let mut sum_bitwise: u64 = 0; + let mut sum_bytes: u64 = 0; + + // Benchmark bitwise implementation + let start = Instant::now(); + for _ in 0..ITERATIONS { + for dst_offset in 0..8 { + for count in 1..=(8 - dst_offset) { + for src_offset in 0..8 { + sum_bitwise = sum_bitwise.wrapping_add(DmaHelpers::calculate_write_value( + dst_offset, + src_offset, + count, + pre_value, + &src_values, + )); + } + } + } + } + let bitwise_duration = start.elapsed(); + + // Benchmark byte-based implementation + let start = Instant::now(); + for _ in 0..ITERATIONS { + for dst_offset in 0..8 { + for count in 1..=(8 - dst_offset) { + for src_offset in 0..8 { + sum_bytes = sum_bytes.wrapping_add(calculate_write_value_bytes( + dst_offset, + src_offset, + count, + pre_value, + &src_values, + )); + } + } + } + } + let bytes_duration = start.elapsed(); + + // Verify both produce same results + assert_eq!(sum_bitwise, sum_bytes, "Results differ!"); + + // 288 combinations per iteration (8 dst * varying count * 8 src) + let total_ops = ITERATIONS * 288; + + println!("\n=== Benchmark Results ==="); + println!("Iterations: {} ({} total operations)", ITERATIONS, total_ops); + println!("Bitwise implementation: {:?}", bitwise_duration); + println!("Byte-based implementation: {:?}", bytes_duration); + println!( + "Bitwise ops/sec: {:.2}M", + total_ops as f64 / bitwise_duration.as_secs_f64() / 1_000_000.0 + ); + println!( + "Bytes ops/sec: {:.2}M", + total_ops as f64 / bytes_duration.as_secs_f64() / 1_000_000.0 + ); + println!( + "Speedup (bitwise vs bytes): {:.2}x", + bytes_duration.as_secs_f64() / bitwise_duration.as_secs_f64() + ); + println!("Checksum (to prevent optimization): {}", sum_bitwise); + } + + #[test] + fn asm_fast_encode_table() { + let table = generate_fast_encode_table(); + for i in 0..256 { + let dst_offset = (i >> 5) & 0x7; + let src_offset = (i >> 2) & 0x7; + println!( + "\t.quad 0x{:016x}, 0x{:016X}, 0x{:016X}, 0x{:016X} # {:4} - {:4} D{dst_offset} S{src_offset} C{}{}", + table[i * 4], + table[i * 4 + 1], + table[i * 4 + 2], + table[i * 4 + 3], + i * 4, + i * 4 + 3, + (i * 4) & 0xF, + if i >= 256 { " neq" } else { "" } + ); + } + assert!(table.len() == 1024); + } + + #[test] + fn asm_fast_encode_memcmp_table() { + let table = generate_fast_encode_memcmp_table(); + for i in 0..512 { + let dst_offset = (i >> 5) & 0x7; + let src_offset = (i >> 2) & 0x7; + println!( + "\t.quad 0x{:016x}, 0x{:016X}, 0x{:016X}, 0x{:016X} # {:4} - {:4} D{dst_offset} S{src_offset} C{}{}", + table[i * 4], + table[i * 4 + 1], + table[i * 4 + 2], + table[i * 4 + 3], + i * 4, + i * 4 + 3, + (i * 4) & 0xF, + if i >= 256 { " neq" } else { "" } + ); + } + assert!(table.len() == 2048); + } + + #[test] + fn asm_fast_encode_no_src_table() { + let table = generate_fast_encode_no_src_table(); + for i in 0..32 { + let dst_offset = (i >> 3) & 0x7; + println!( + "\t.quad 0x{:016x}, 0x{:016X}, 0x{:016X}, 0x{:016X} # {:4} - {:4} D{dst_offset} C{}", + table[i * 4], + table[i * 4 + 1], + table[i * 4 + 2], + table[i * 4 + 3], + i * 4, + i * 4 + 3, + (i * 4) & 0xF, + ); + } + assert!(table.len() == 128); + } + + #[test] + fn test_simple() { + let dst = 0xA011FE70; + let src = 0xA011F4D0; + let count = 5; + + let encode = DmaInfo::calculate_encode(dst, src, count, false, true); + let fast_encode = DmaInfo::encode_memcpy(dst, src, count); + println!("encode: 0x{encode:016X} {}", DmaInfo::to_string(encode)); + println!("fast_encode: 0x{fast_encode:016X} {}", DmaInfo::to_string(fast_encode)); + let encode = DmaInfo::calculate_encode(dst, src, count, true, true); + let fast_encode = DmaInfo::encode_memcmp(dst, src, count, 0xDB); + println!("encode: 0x{encode:016X} {}", DmaInfo::to_string(encode)); + println!("fast_encode: 0x{fast_encode:016X} {}", DmaInfo::to_string(fast_encode)); + assert_eq!(encode, encode, + "testing with memcpy dst:0x{dst:08X} src:0x{src:08X} count:{count} E:0x{encode:016X} FE:0x{fast_encode:016X}"); + } + #[test] + fn test_fast_encode_table() { + for dst in 0..256 { + for src in 0..256 { + for count in 0..256 { + let encode = DmaInfo::calculate_encode(dst, src, count, false, true); + let fast_encode = DmaInfo::encode_memcpy(dst, src, count); + assert_eq!(encode, fast_encode, + "testing with memcpy dst:0x{dst:08X} src:0x{src:08X} count:{count} E:0x{encode:016X} FE:0x{fast_encode:016X}" + ); + assert_eq!(count, DmaInfo::get_count(encode), "testing with memcpy dst:0x{dst:08X} src:0x{src:08X} count:{count} E:0x{encode:016X} FE:0x{fast_encode:016X}"); + } + } + } + for neq in [false, true] { + for dst in 0..256 { + for src in 0..256 { + for count in 0..256 { + let encode = DmaInfo::calculate_encode(dst, src, count, neq, true) + | DmaInfo::DMA_REQUIRES_DMA_TEST_MASK; + let fast_encode = DmaInfo::encode_memcmp_neq(dst, src, count, neq); + assert_eq!( + encode, + fast_encode, + "testing NEQ with memcmp dst:0x{dst:08X} src:0x{src:08X} count:{count} E:0x{encode:016X} FE:0x{fast_encode:016X}" + ); + assert_eq!(count, DmaInfo::get_count(encode), "testing NEQ with memcmp dst:0x{dst:08X} src:0x{src:08X} count:{count} E:0x{encode:016X} FE:0x{fast_encode:016X}"); + } + } + } + } + } +} diff --git a/precompiles/helpers/src/lib.rs b/precompiles/helpers/src/lib.rs index 5c2c38842..b747221ec 100644 --- a/precompiles/helpers/src/lib.rs +++ b/precompiles/helpers/src/lib.rs @@ -1,13 +1,17 @@ mod arith_eq; mod arith_eq_384; mod big_int; +mod blake2; mod common; +mod dma; mod keccak; pub use arith_eq::*; pub use arith_eq_384::*; pub use big_int::*; +pub use blake2::{blake2b_compress, blake2b_round}; pub use common::*; +pub use dma::*; pub use keccak::{ keccak_f, keccak_f_expr, keccak_f_round_states, keccak_f_rounds, keccak_f_state, keccakf_idx_pos, keccakf_state_from_linear, keccakf_state_to_linear, diff --git a/precompiles/hints/Cargo.toml b/precompiles/hints/Cargo.toml new file mode 100644 index 000000000..1e842a52d --- /dev/null +++ b/precompiles/hints/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "precompiles-hints" +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } + +[lib] +name = "precompiles_hints" +path = "src/lib.rs" + +[[bin]] +name = "hints-socket-server" +path = "src/bin/hints_socket_server.rs" + +[[bin]] +name = "hints-quic-server" +path = "src/bin/hints_quic_server.rs" + +[dependencies] +ziskos-hints = { workspace = true } +lib-c = { workspace = true } +precompiles-helpers = { workspace = true } +anyhow = { workspace = true } +rayon = { workspace = true } +tracing = { workspace = true } +zisk-common = { workspace = true } +rustls = { version = "0.23", features = ["ring"] } +borsh = { workspace = true } +zisk-distributed-common = { workspace = true } + +[dev-dependencies] +criterion = "0.8" + +[[bench]] +name = "hints_benchmarks" +harness = false diff --git a/precompiles/hints/benches/hints_benchmarks.rs b/precompiles/hints/benches/hints_benchmarks.rs new file mode 100644 index 000000000..9e84eb1e4 --- /dev/null +++ b/precompiles/hints/benches/hints_benchmarks.rs @@ -0,0 +1,279 @@ +use anyhow::Result; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use precompiles_hints::{HintHandlers, HintsProcessor}; +use std::hint::black_box; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::Duration; +use zisk_common::io::StreamSink; + +struct BenchSink { + received: Arc>>>, +} + +impl StreamSink for BenchSink { + fn submit(&self, processed: &[u64]) -> Result<()> { + self.received.lock().unwrap().push(processed.to_vec()); + Ok(()) + } +} + +fn make_header(hint_type: u32, length: u32) -> u64 { + ((hint_type as u64) << 32) | (length as u64) +} + +fn parallel_speedup_benchmark(c: &mut Criterion) { + // Define custom hints with known processing times (use high values to avoid built-in conflicts) + const FAST_HINT: u32 = 0x7FFF_0000; // 1ms + const MEDIUM_HINT: u32 = 0x7FFF_0001; // 5ms + const SLOW_HINT: u32 = 0x7FFF_0002; // 10ms + + // Test configuration + const NUM_FAST: usize = 100; + const NUM_MEDIUM: usize = 50; + const NUM_SLOW: usize = 20; + + let mut group = c.benchmark_group("parallel_speedup"); + group.sample_size(10); // Reduce sample size for slower benchmarks + + let thread_counts = [1, 2, 4, 8, 16]; + + for &num_threads in &thread_counts { + group.bench_with_input( + BenchmarkId::from_parameter(format!("{}_threads", num_threads)), + &num_threads, + |b, &threads| { + b.iter(|| { + let received = Arc::new(Mutex::new(Vec::new())); + let received_clone = received.clone(); + let sink = Arc::new(BenchSink { received: received_clone }); + + let p = HintsProcessor::builder(sink, None::>) + .num_threads(threads) + .with_hint_handlers( + HintHandlers::default() + .register(FAST_HINT, |data: &[u64]| -> Result> { + thread::sleep(Duration::from_millis(1)); + Ok(vec![data[0] + 1]) + }) + .register(MEDIUM_HINT, |data: &[u64]| -> Result> { + thread::sleep(Duration::from_millis(5)); + Ok(vec![data[0] + 2]) + }) + .register(SLOW_HINT, |data: &[u64]| -> Result> { + thread::sleep(Duration::from_millis(10)); + Ok(vec![data[0] + 3]) + }), + ) + .build() + .unwrap(); + + let mut data = Vec::new(); + let mut hint_idx = 0; + + for _ in 0..NUM_FAST { + data.push(make_header(FAST_HINT, 1)); + data.push(hint_idx); + hint_idx += 1; + } + + for _ in 0..NUM_MEDIUM { + data.push(make_header(MEDIUM_HINT, 1)); + data.push(hint_idx); + hint_idx += 1; + } + + for _ in 0..NUM_SLOW { + data.push(make_header(SLOW_HINT, 1)); + data.push(hint_idx); + hint_idx += 1; + } + + p.process_hints(black_box(&data), false).unwrap(); + p.wait_for_completion().unwrap(); + + let results = received.lock().unwrap(); + assert_eq!(results.len(), NUM_FAST + NUM_MEDIUM + NUM_SLOW); + }); + }, + ); + } + + group.finish(); +} + +fn microsecond_hints_benchmark(c: &mut Criterion) { + const ULTRA_FAST: u32 = 0x7FFF_0010; // 10µs + const VERY_FAST: u32 = 0x7FFF_0011; // 50µs + const FAST: u32 = 0x7FFF_0012; // 100µs + const NUM_HINTS: usize = 1000; + + let mut group = c.benchmark_group("microsecond_hints"); + group.sample_size(50); + + let test_cases = vec![ + ("ultra_fast_10us", ULTRA_FAST, 10u64), + ("very_fast_50us", VERY_FAST, 50u64), + ("fast_100us", FAST, 100u64), + ]; + + for (name, hint_code, micros) in test_cases { + group.bench_function(name, |b| { + b.iter(|| { + let received = Arc::new(Mutex::new(Vec::new())); + let received_clone = received.clone(); + let sink = Arc::new(BenchSink { received: received_clone }); + + let p = HintsProcessor::builder(sink, None::>) + .num_threads(16) + .with_hint_handlers(HintHandlers::default().register( + hint_code, + move |data: &[u64]| { + thread::sleep(Duration::from_micros(micros)); + Ok(vec![data[0] + 1]) + }, + )) + .build() + .unwrap(); + + let mut data = Vec::new(); + for i in 0..NUM_HINTS { + data.push(make_header(hint_code, 1)); + data.push(i as u64); + } + + p.process_hints(black_box(&data), false).unwrap(); + p.wait_for_completion().unwrap(); + + let results = received.lock().unwrap(); + assert_eq!(results.len(), NUM_HINTS); + }); + }); + } + + group.finish(); +} + +fn workload_patterns_benchmark(c: &mut Criterion) { + const VERY_FAST: u32 = 0x7FFF_0020; // 0.5ms + const FAST: u32 = 0x7FFF_0021; // 2ms + const MEDIUM: u32 = 0x7FFF_0022; // 5ms + const SLOW: u32 = 0x7FFF_0023; // 10ms + const VERY_SLOW: u32 = 0x7FFF_0024; // 20ms + + let mut group = c.benchmark_group("workload_patterns"); + group.sample_size(10); + + let patterns = vec![ + ("uniform_fast", vec![(FAST, 100)]), + ("uniform_slow", vec![(SLOW, 50)]), + ("mixed_balanced", vec![(FAST, 40), (MEDIUM, 20), (SLOW, 10)]), + ("skewed_fast", vec![(VERY_FAST, 80), (SLOW, 10), (VERY_SLOW, 10)]), + ("heavy_tail", vec![(FAST, 50), (VERY_SLOW, 5)]), + ]; + + for (name, hints) in patterns { + group.bench_function(name, |b| { + b.iter(|| { + let received = Arc::new(Mutex::new(Vec::new())); + let received_clone = received.clone(); + let sink = Arc::new(BenchSink { received: received_clone }); + + let handlers = HintHandlers::default() + .register(VERY_FAST, |data: &[u64]| { + thread::sleep(Duration::from_micros(500)); + Ok(vec![data[0] + 1]) + }) + .register(FAST, |data: &[u64]| { + thread::sleep(Duration::from_millis(2)); + Ok(vec![data[0] + 1]) + }) + .register(MEDIUM, |data: &[u64]| { + thread::sleep(Duration::from_millis(5)); + Ok(vec![data[0] + 1]) + }) + .register(SLOW, |data: &[u64]| { + thread::sleep(Duration::from_millis(10)); + Ok(vec![data[0] + 1]) + }) + .register(VERY_SLOW, |data: &[u64]| { + thread::sleep(Duration::from_millis(20)); + Ok(vec![data[0] + 1]) + }); + let p = HintsProcessor::builder(sink, None::>) + .num_threads(8) + .with_hint_handlers(handlers) + .build() + .unwrap(); + + let mut data = Vec::new(); + let mut idx = 0; + for (hint_code, count) in &hints { + for _ in 0..*count { + data.push(make_header(*hint_code, 1)); + data.push(idx); + idx += 1; + } + } + + p.process_hints(black_box(&data), false).unwrap(); + p.wait_for_completion().unwrap(); + + let total_hints: usize = hints.iter().map(|(_, count)| count).sum(); + let results = received.lock().unwrap(); + assert_eq!(results.len(), total_hints); + }); + }); + } + + group.finish(); +} + +fn noop_throughput_benchmark(c: &mut Criterion) { + struct NullSink; + + impl StreamSink for NullSink { + fn submit(&self, _processed: &[u64]) -> Result<()> { + Ok(()) + } + } + + let mut group = c.benchmark_group("noop_throughput"); + group.sample_size(20); + + let hint_counts = [1000, 10000, 100000]; + + // Pass-through hint code (bit 31 set = pass-through, no computation needed) + const PASSTHROUGH_HINT: u32 = 0x8000_1000; + + for &count in &hint_counts { + group.bench_with_input(BenchmarkId::from_parameter(count), &count, |b, &num_hints| { + b.iter(|| { + let p = HintsProcessor::builder(Arc::new(NullSink), None::>) + .num_threads(32) + .build() + .unwrap(); + + let mut data = Vec::with_capacity(num_hints * 2); + for i in 0..num_hints { + data.push(make_header(PASSTHROUGH_HINT, 1)); + data.push(i as u64); + } + + p.process_hints(black_box(&data), false).unwrap(); + p.wait_for_completion().unwrap(); + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + parallel_speedup_benchmark, + microsecond_hints_benchmark, + workload_patterns_benchmark, + noop_throughput_benchmark +); +criterion_main!(benches); diff --git a/precompiles/hints/src/bin/hints_quic_server.rs b/precompiles/hints/src/bin/hints_quic_server.rs new file mode 100644 index 000000000..fa4e60c02 --- /dev/null +++ b/precompiles/hints/src/bin/hints_quic_server.rs @@ -0,0 +1,159 @@ +//! Hints QUIC Server +//! +//! A development tool that opens a QUIC server, writes binary file contents to it, +//! and waits for the user to press '0' to close. +//! +//! Usage: hints-quic-server +//! Example: hints-quic-server hints.bin 127.0.0.1:8080 + +use std::fs::File; +use std::io::{self, Read}; +use std::path::Path; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; +use zisk_common::io::{QuicStreamWriter, StreamWrite}; + +/// Reads binary file and returns its contents +fn read_binary_file>(path: P) -> io::Result> { + let mut file = File::open(path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + Ok(buffer) +} + +fn main() -> io::Result<()> { + // Initialize crypto provider for QUIC + let _ = rustls::crypto::ring::default_provider().install_default(); + + let args: Vec = std::env::args().collect(); + + if args.len() != 3 { + eprintln!("Usage: {} ", args[0]); + eprintln!("Example: {} hints.bin 127.0.0.1:8080", args[0]); + std::process::exit(1); + } + + let file_path = &args[1]; + let bind_address = &args[2]; + + // Parse bind address + let bind_addr: std::net::SocketAddr = bind_address.parse().map_err(|e| { + io::Error::new(io::ErrorKind::InvalidInput, format!("Invalid bind address: {}", e)) + })?; + + // Read the binary file + let file_data = read_binary_file(file_path)?; + println!("Read {} bytes from: {}", file_data.len(), file_path); + + println!("========================================"); + println!("Hints QUIC Server"); + println!("========================================"); + println!("Binary file: {}", file_path); + println!("Bind address: {}", bind_addr); + println!(); + + // Create the QUIC writer (server) + let mut writer = QuicStreamWriter::new(bind_addr).map_err(io::Error::other)?; + + println!("QUIC server created successfully"); + println!("Waiting for client connection..."); + + // Open the connection (waits for client to connect) + writer.open().map_err(io::Error::other)?; + + println!("Client connected! Starting data transfer..."); + + let shutdown = Arc::new(AtomicBool::new(false)); + + // Spawn shutdown listener thread + let shutdown_clone = Arc::clone(&shutdown); + thread::spawn(move || { + println!("Press '0' + Enter to close at any time"); + let stdin = io::stdin(); + let mut buffer = String::new(); + loop { + buffer.clear(); + if stdin.read_line(&mut buffer).is_ok() && buffer.trim() == "0" { + println!("Shutdown signal received!"); + shutdown_clone.store(true, Ordering::Relaxed); + break; + } + } + }); + + // File structure: + // - First 8 bytes: header + // - Middle: batches of hints (each hint = 26 * 8 = 208 bytes) + // - Last 8 bytes: footer + + const HINT_SIZE: usize = 26 * 8; // 208 bytes per hint + const HINTS_PER_BATCH: usize = 100; + const BATCH_SIZE: usize = HINTS_PER_BATCH * HINT_SIZE; // 20,800 bytes + + if file_data.len() < 16 { + eprintln!("Error: File too small (need at least 16 bytes for header+footer)"); + return Ok(()); + } + + let mut offset = 0; + let mut message_num = 0; + + loop { + if shutdown.load(Ordering::Relaxed) { + println!("\nShutdown requested, exiting..."); + break; + } + + if offset >= file_data.len() { + // All data sent + println!("All data sent successfully!"); + println!("Connection active. Press '0' to close..."); + while !shutdown.load(Ordering::Relaxed) { + thread::sleep(Duration::from_millis(100)); + } + break; + } + + // Determine what to send in this message + let (start, end) = if offset == 0 { + // First message: 8 bytes header + (0, 8) + } else if offset + 8 >= file_data.len() { + // Last message: final 8 bytes + (file_data.len() - 8, file_data.len()) + } else { + // Middle messages: batches of hints + let data_end = file_data.len() - 8; // Before footer + let remaining_data = data_end - offset; + let batch_size = std::cmp::min(BATCH_SIZE, remaining_data); + (offset, offset + batch_size) + }; + + let chunk = &file_data[start..end]; + + match writer.write(chunk) { + Ok(_) => { + message_num += 1; + println!( + "Message {}: Sent {} bytes (offset {}-{})", + message_num, + chunk.len(), + start, + end + ); + offset = end; + } + Err(e) => { + eprintln!("Error writing to QUIC stream: {}", e); + break; + } + } + } + + println!("Closing connection..."); + let _ = writer.close(); + println!("Server shutting down..."); + Ok(()) +} diff --git a/precompiles/hints/src/bin/hints_socket_server.rs b/precompiles/hints/src/bin/hints_socket_server.rs new file mode 100644 index 000000000..6769bead3 --- /dev/null +++ b/precompiles/hints/src/bin/hints_socket_server.rs @@ -0,0 +1,155 @@ +//! Hints Unix Socket Server +//! +//! A development tool that opens a Unix domain socket, writes binary file contents to it, +//! and waits for the user to press '0' to close. +//! +//! Usage: hints-socket-server +//! Example: hints-socket-server hints.bin /tmp/hints.sock + +use std::fs::File; +use std::io::{self, Read}; +use std::path::Path; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; +use zisk_common::io::{StreamWrite, UnixSocketStreamWriter}; + +/// Reads binary file and returns its contents +fn read_binary_file>(path: P) -> io::Result> { + let mut file = File::open(path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + Ok(buffer) +} + +fn main() -> io::Result<()> { + let args: Vec = std::env::args().collect(); + + if args.len() != 3 { + eprintln!("Usage: {} ", args[0]); + eprintln!("Example: {} hints.bin /tmp/hints.sock", args[0]); + std::process::exit(1); + } + + let file_path = &args[1]; + let socket_path = &args[2]; + + // Read the binary file + let file_data = read_binary_file(file_path)?; + println!("Read {} bytes from: {}", file_data.len(), file_path); + + println!("========================================"); + println!("Hints Unix Socket Server"); + println!("========================================"); + println!("Binary file: {}", file_path); + println!("Socket path: {}", socket_path); + println!(); + + // Create the Unix socket writer (server) + let mut writer = UnixSocketStreamWriter::new(socket_path).map_err(io::Error::other)?; + + println!("Unix socket server created successfully"); + println!("Waiting for client connection..."); + + // Open the connection (waits for client to connect) + writer.open().map_err(io::Error::other)?; + + println!("Client connected! Starting hint data transfer..."); + + let shutdown = Arc::new(AtomicBool::new(false)); + + // Spawn shutdown listener thread + let shutdown_clone = Arc::clone(&shutdown); + thread::spawn(move || { + println!("Press '0' + Enter to close at any time"); + let stdin = io::stdin(); + let mut buffer = String::new(); + loop { + buffer.clear(); + if stdin.read_line(&mut buffer).is_ok() && buffer.trim() == "0" { + println!("Shutdown signal received!"); + shutdown_clone.store(true, Ordering::Relaxed); + break; + } + } + }); + + // Sleep 500ms + thread::sleep(Duration::from_millis(500)); + + let mut offset = 0; + let mut hint_count = 0; + let mut first_hint = true; + + let start_time = std::time::Instant::now(); + loop { + if shutdown.load(Ordering::Relaxed) { + println!("\nShutdown requested, exiting..."); + break; + } + + if offset >= file_data.len() { + panic!("Reached end of file data unexpectedly!"); + } + + let mut hint_total_len = 8; + + let hint_header = u64::from_le_bytes(file_data[offset..offset + 8].try_into().unwrap()); + let hint_id = (hint_header >> 32) as u32 & 0x7FFF_FFFF; + + if first_hint { + // HINT_START + assert!(hint_id == 0, "Invalid hint file format: first hint must be START"); + println!("Received START hint"); + first_hint = false; + } + + let hint_data_len = hint_header & 0x_FFFF_FFFF; + let pad = (8 - (hint_data_len % 8)) % 8; // Padding to align to 8 bytes + let data_len_with_pad = hint_data_len + pad as u64; + + hint_total_len += data_len_with_pad; + + if (offset + hint_total_len as usize) > file_data.len() { + eprintln!("Error: Hint data length exceeds file ends"); + return Ok(()); + } + + let data_with_pad = &file_data[offset..offset + hint_total_len as usize]; + match writer.write(data_with_pad) { + Ok(_) => { + if hint_count % 100 == 0 && hint_id != 0 && hint_id != 1 { + println!( + "#{} Hint id: 0x{:x}, sent: {} bytes, offset: {}", + hint_count, hint_id, hint_total_len, offset + ); + } + } + Err(e) => { + eprintln!("Error writing to Unix socket: {}", e); + break; + } + } + offset += hint_total_len as usize; + + if hint_id != 1 && hint_id != 0 { + hint_count += 1; + } + + if hint_id == 1 { + // HINT_END + println!( + "Received END hint. All hints sent, total: {}, time elapsed: {:?}", + hint_count, + start_time.elapsed() + ); + break; + } + } + + println!("Closing connection..."); + let _ = writer.close(); + println!("Server shutting down..."); + Ok(()) +} diff --git a/precompiles/hints/src/hint_handlers.rs b/precompiles/hints/src/hint_handlers.rs new file mode 100644 index 000000000..628be6ceb --- /dev/null +++ b/precompiles/hints/src/hint_handlers.rs @@ -0,0 +1,121 @@ +use anyhow::Result; +use std::collections::HashMap; +use zisk_common::{BuiltInHint, HintCode, PrecompileHint}; +use ziskos_hints::handlers::blake2b::blake2b_compress_hint; +use ziskos_hints::handlers::bls381::{ + bls12_381_fp2_to_g2_hint, bls12_381_fp_to_g1_hint, bls12_381_g1_add_hint, + bls12_381_g1_msm_hint, bls12_381_g2_add_hint, bls12_381_g2_msm_hint, + bls12_381_pairing_check_hint, +}; +use ziskos_hints::handlers::bn254::{ + bn254_g1_add_hint, bn254_g1_mul_hint, bn254_pairing_check_hint, +}; +use ziskos_hints::handlers::keccak256::keccak256_hint; +use ziskos_hints::handlers::kzg::verify_kzg_proof_hint; +use ziskos_hints::handlers::modexp::modexp_hint; +use ziskos_hints::handlers::secp256k1::{ + secp256k1_ecdsa_address_recover, secp256k1_ecdsa_verify_address_recover, +}; +use ziskos_hints::handlers::secp256r1::secp256r1_ecdsa_verify_hint; +use ziskos_hints::handlers::sha256::sha256_hint; + +/// Type alias for custom hint handler functions. +pub type CustomHintHandler = Box Result> + Send + Sync>; + +/// Bundles built-in and custom hint dispatch logic. +/// +/// This is the single table that maps hint codes to compute functions. +/// Passed via `Arc` to each Rayon worker for parallel, allocation-free dispatch. +#[derive(Default)] +pub struct HintHandlers { + custom: HashMap, +} + +impl HintHandlers { + /// Register a custom hint handler for the given hint code. + pub fn register(mut self, hint_code: u32, handler: F) -> Self + where + F: Fn(&[u64]) -> Result> + Send + Sync + 'static, + { + self.custom.insert(hint_code, Box::new(handler)); + self + } + + pub fn has_custom_hint_code(&self, code: u32) -> bool { + self.custom.contains_key(&code) + } + + /// Dispatch a hint to the appropriate handler. + /// + /// Control hints and Input hints must be handled before calling this. + #[inline] + pub fn dispatch(&self, hint: PrecompileHint) -> Result> { + match hint.hint_code { + HintCode::BuiltIn(builtin) => { + Self::dispatch_builtin(builtin, hint.data, hint.data_len_bytes) + } + HintCode::Custom(code) => self + .custom + .get(&code) + .map(|handler| handler(&hint.data)) + .unwrap_or_else(|| Err(anyhow::anyhow!("Unknown custom hint"))), + _ => unreachable!("Control hints handled before dispatch"), + } + } + + /// Dispatches built-in hints to their corresponding handler functions. + /// The `data_len_bytes` parameter is used for hints that operate on byte arrays (e.g., SHA256, Keccak256) + /// to indicate the actual length of the data in bytes, since the `data` field is a `Vec` and may contain padding. + /// The BuiltInHint::Input is intentionally not handled here, as input hints require special handling and should be processed separately before dispatching to workers. + #[inline] + fn dispatch_builtin( + hint: BuiltInHint, + data: Vec, + data_len_bytes: usize, + ) -> Result> { + match hint { + // SHA256 Hint Codes + BuiltInHint::Sha256 => sha256_hint(&data, data_len_bytes), + + // BN254 Hint Codes + BuiltInHint::Bn254G1Add => bn254_g1_add_hint(&data), + BuiltInHint::Bn254G1Mul => bn254_g1_mul_hint(&data), + BuiltInHint::Bn254PairingCheck => bn254_pairing_check_hint(&data), + + // Secp256k1 Hints + BuiltInHint::Secp256k1EcdsaAddressRecover => secp256k1_ecdsa_address_recover(&data), + BuiltInHint::Secp256k1EcdsaVerifyAddressRecover => { + secp256k1_ecdsa_verify_address_recover(&data) + } + + // Secp256r1 Hints + BuiltInHint::Secp256r1EcdsaVerify => secp256r1_ecdsa_verify_hint(&data), + + // BLS12-381 Hint Codes + BuiltInHint::Bls12_381G1Add => bls12_381_g1_add_hint(&data), + BuiltInHint::Bls12_381G1Msm => bls12_381_g1_msm_hint(&data), + BuiltInHint::Bls12_381G2Add => bls12_381_g2_add_hint(&data), + BuiltInHint::Bls12_381G2Msm => bls12_381_g2_msm_hint(&data), + BuiltInHint::Bls12_381PairingCheck => bls12_381_pairing_check_hint(&data), + BuiltInHint::Bls12_381FpToG1 => bls12_381_fp_to_g1_hint(&data), + BuiltInHint::Bls12_381Fp2ToG2 => bls12_381_fp2_to_g2_hint(&data), + + // Modular Exponentiation Hint Codes + BuiltInHint::ModExp => modexp_hint(&data), + + // KZG Hint Codes + BuiltInHint::VerifyKzgProof => verify_kzg_proof_hint(&data), + + // Keccak256 Hint Codes + BuiltInHint::Keccak256 => keccak256_hint(&data, data_len_bytes), + + // Blake2b Hint Codes + BuiltInHint::Blake2bCompress => blake2b_compress_hint(&data), + + // Input Hint Codes + BuiltInHint::Input => unreachable!( + "Input hints should be handled separately and not dispatched to workers" + ), + } + } +} diff --git a/precompiles/hints/src/hints_processor.rs b/precompiles/hints/src/hints_processor.rs new file mode 100644 index 000000000..15967e4cb --- /dev/null +++ b/precompiles/hints/src/hints_processor.rs @@ -0,0 +1,1646 @@ +//! Precompile Hints Processor +//! +//! This module provides functionality for processing precompile hints +//! that are received as a stream of `u64` values. Hints are used to provide preprocessed +//! data to precompile operations in the ZisK zkVM. + +use anyhow::Result; +use rayon::{ThreadPool, ThreadPoolBuilder}; +use std::collections::{HashMap, VecDeque}; +use std::mem::ManuallyDrop; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::{Arc, Condvar, Mutex}; +use std::time::Instant; +use tracing::{debug, info}; +use zisk_common::io::{StreamProcessor, StreamSink}; +use zisk_common::{ + BuiltInHint, CtrlHint, HintCode, PartialPrecompileHint, PrecompileHint, + PrecompileHintParseResult, +}; +use zisk_distributed_common::{JobPhase, StreamMessage}; + +use crate::hint_handlers::HintHandlers; + +/// Ordered result buffer with drain state. +/// +/// This structure maintains a VecDeque that holds processed results in order, +/// allowing out-of-order completion while ensuring in-order output. +struct ResultQueue { + /// The result buffer: None = pending, Some(Ok(...)) = ready, Some(Err(...)) = error + buffer: VecDeque>>>, + /// Sequence ID of the next result to drain from buffer[0] + next_drain_seq: usize, +} + +/// Thread-safe shared state for parallel hint processing. +struct HintProcessorState { + /// Ordered results ready for draining + queue: Mutex, + /// Notifies drainer thread when a hint completes + drain_signal: Condvar, + /// Next sequence ID to assign to incoming hints + next_seq: AtomicUsize, + /// Signals processing should stop + error_flag: AtomicBool, + /// Signals drainer thread to shut down + shutdown: AtomicBool, + /// Invalidates stale workers after reset + generation: AtomicUsize, +} + +impl HintProcessorState { + fn new() -> Self { + Self { + queue: Mutex::new(ResultQueue { buffer: VecDeque::new(), next_drain_seq: 0 }), + drain_signal: Condvar::new(), + next_seq: AtomicUsize::new(0), + error_flag: AtomicBool::new(false), + shutdown: AtomicBool::new(false), + generation: AtomicUsize::new(0), + } + } + + fn reset(&self) { + self.error_flag.store(false, Ordering::Release); + self.next_seq.store(0, Ordering::Relaxed); + self.generation.fetch_add(1, Ordering::SeqCst); + let mut queue = self.queue.lock().unwrap(); + queue.buffer.clear(); + queue.next_drain_seq = 0; + } +} + +/// Type alias for MPI broadcast callback function. +pub type MpiBroadcastFn = Arc) -> Result<()> + Send + Sync>; + +/// Builder for configuring and constructing a [`HintsProcessor`]. +pub struct HintsProcessorBuilder { + hints_sink: Arc, + inputs_sink: Option>, + num_threads: usize, + enable_stats: bool, + handlers: HintHandlers, + mpi_broadcast_fn: Option, +} + +impl HintsProcessorBuilder { + /// Sets the number of worker threads in the thread pool. + pub fn num_threads(mut self, num_threads: usize) -> Self { + self.num_threads = num_threads; + self + } + + /// Enables or disables statistics collection. + pub fn enable_stats(mut self, enable: bool) -> Self { + self.enable_stats = enable; + self + } + + /// Sets an MPI broadcast function to be called during initialization. + /// + /// This allows synchronization of initialization data across MPI ranks. + pub fn with_mpi_broadcast(mut self, broadcast_fn: F) -> Self + where + F: Fn(&mut Vec) -> Result<()> + Send + Sync + 'static, + { + self.mpi_broadcast_fn = Some(Arc::new(broadcast_fn)); + self + } + + /// Sets the hint dispatch table. + pub fn with_hint_handlers(mut self, handlers: HintHandlers) -> Self { + self.handlers = handlers; + self + } + + /// Builds the [`HintsProcessor`] with the configured settings. + /// + /// # Returns + /// + /// * `Ok(HintsProcessor)` - The configured hints processor + /// * `Err` - If the thread pool fails to initialize + pub fn build(self) -> Result> { + let pool = ThreadPoolBuilder::new() + .num_threads(self.num_threads) + .build() + .map_err(|e| anyhow::anyhow!("Failed to create thread pool: {}", e))?; + + let state = Arc::new(HintProcessorState::new()); + let hints_sink = self.hints_sink; + let inputs_sink = self.inputs_sink; + + // Spawn drainer thread + let drainer_state = Arc::clone(&state); + let drainer_sink = Arc::clone(&hints_sink); + let drainer_broadcast = self.mpi_broadcast_fn.clone(); + let drainer_thread = std::thread::spawn(move || { + HintsProcessor::drainer_thread(drainer_state, drainer_sink, drainer_broadcast); + }); + + Ok(HintsProcessor { + pool, + num_hint: AtomicUsize::new(0), + state, + stats: if self.enable_stats { Some(Mutex::new(HashMap::new())) } else { None }, + hints_sink, + inputs_sink, + drainer_thread: ManuallyDrop::new(drainer_thread), + handlers: Arc::new(self.handlers), + stream_active: AtomicBool::new(false), + instant: Mutex::new(None), + pending_partial: Mutex::new(None), + mpi_broadcast_fn: self.mpi_broadcast_fn.clone(), + }) + } +} + +/// Processor for precompile hints that supports parallel execution. +/// +/// This struct provides methods to parse and process a stream of concatenated +/// hints, using a dedicated Rayon thread pool for parallel processing while +/// preserving the original order of results. +pub struct HintsProcessor { + /// The thread pool used for parallel hint processing. + pool: ThreadPool, + + num_hint: AtomicUsize, + + /// Shared state for parallel hint processing + state: Arc, + + /// Optional statistics collected during hint processing (for debugging). + stats: Option>>, + + /// The hints sink used to submit processed hints. + hints_sink: Arc, + + /// The inputs sink used to submit processed input hints (if any). + inputs_sink: Option>, + + /// Handle to the drainer thread (wrapped in ManuallyDrop to join in Drop) + drainer_thread: ManuallyDrop>, + + /// Hint dispatch table (built-in + custom) + handlers: Arc, + + /// Tracks whether a stream is currently active (between CTRL_START and CTRL_END) + stream_active: AtomicBool, + + /// Timestamp of when the current stream started (for performance metrics) + instant: Mutex>, + + /// Buffer for incomplete hint data between batches + pending_partial: Mutex>, + + /// Optional MPI broadcast function for distributed execution + mpi_broadcast_fn: Option, +} + +impl HintsProcessor { + /// Default number of worker threads in the thread pool. + const DEFAULT_NUM_THREADS: usize = 32; + + /// Creates a builder for configuring a [`HintsProcessor`]. + /// + /// # Arguments + /// + /// * `hints_sink` - The sink used to submit processed hints (any type implementing StreamSink) + /// + /// # Examples + /// + /// ```ignore + /// let processor = HintsProcessor::builder(my_sink) + /// .num_threads(16) + /// .enable_stats(false) + /// .build()?; + /// ``` + pub fn builder( + hints_sink: Arc, + inputs_sink: Option>, + ) -> HintsProcessorBuilder { + HintsProcessorBuilder { + hints_sink, + inputs_sink: inputs_sink.map(|s| s as Arc), + num_threads: Self::DEFAULT_NUM_THREADS, + enable_stats: false, + handlers: HintHandlers::default(), + mpi_broadcast_fn: None, + } + } + + /// Processes hints in parallel with non-blocking, ordered output. + /// + /// This method dispatches each hint to the thread pool for parallel processing. + /// Results are collected in a reorder buffer and submitted to the sink in the original + /// order as soon as consecutive results become available. + /// + /// # Key characteristics: + /// - **Non-blocking**: Returns immediately after enqueuing hints + /// - **Global sequence**: Sequence IDs maintained across multiple batch calls + /// - **Ordered submission**: Results submitted to sink in order hints were received + /// - **Error handling**: Stops processing on first error + /// + /// # Concurrency Warning + /// + /// This method takes is designed for **sequential usage only**. + /// Concurrent calls may cause incorrect processing. + /// + /// # Arguments + /// + /// * `hints` - A slice of `u64` values containing concatenated hints + /// * `first_batch` - Whether this is the first batch (for CTRL_START validation) + /// + /// # Returns + /// + /// * `Ok(true)` - CTRL_END was encountered + /// * `Ok(false)` - Batch processed successfully, no CTRL_END + /// * `Err` - If a previous error occurred or hints are malformed + pub fn process_hints(&self, hints: &[u64], first_batch: bool) -> Result { + let mut has_ctrl_end = false; + + // Take any pending partial hint from previous batch + let mut pending_partial = self.pending_partial.lock().unwrap().take(); + + // Parse hints and dispatch to pool + let mut idx = 0; + while idx < hints.len() { + // Check for error before processing each hint + if self.state.error_flag.load(Ordering::Acquire) { + return Err(anyhow::anyhow!("Processing stopped due to previous error")); + } + let (parsed_hint, consumed) = + PrecompileHint::from_u64_slice(hints, idx, true, pending_partial.take())?; + + let hint = match parsed_hint { + PrecompileHintParseResult::Complete(hint) => hint, + PrecompileHintParseResult::Partial(partial) => { + // Store partial for next batch and exit loop + *self.pending_partial.lock().unwrap() = Some(partial); + break; + } + }; + + // println!("Received Hint <= {:?}:", hint); + + self.num_hint.fetch_add(1, Ordering::Relaxed); + + // Fail fast if a custom hint code has no registered handler (skip pass-through) + if !hint.is_passthrough { + if let HintCode::Custom(code) = hint.hint_code { + if !self.handlers.has_custom_hint_code(code) { + return Err(anyhow::anyhow!( + "Unknown custom hint code {:#x}: no handler registered", + code + )); + } + } + } + + let length = consumed; + + if let Some(stats) = &self.stats { + if !matches!(hint.hint_code, HintCode::Ctrl(_)) { + stats + .lock() + .unwrap() + .entry(hint.hint_code) + .and_modify(|c| *c += 1) + .or_insert(1); + } + } + + // Check if this is a control code + match hint.hint_code { + HintCode::Ctrl(CtrlHint::Start) => { + // CTRL_START must be the first message of the first batch + if !first_batch { + return Err(anyhow::anyhow!( + "CTRL_START can only be sent as the first message in the stream" + )); + } + if idx != 0 { + return Err(anyhow::anyhow!( + "CTRL_START must be the first hint in the batch, but found at index {}", + idx + )); + } + + debug!("CTRL_START received, starting new stream"); + + // Mark stream as active + self.stream_active.store(true, Ordering::Release); + // Control hint only; skip processing + idx += length; + *self.instant.lock().unwrap() = Some(Instant::now()); + continue; + } + HintCode::Ctrl(CtrlHint::End) => { + // CTRL_END requires a prior CTRL_START + if !self.stream_active.swap(false, Ordering::AcqRel) { + return Err(anyhow::anyhow!( + "CTRL_END received without a prior CTRL_START" + )); + } + + // Control hint only; wait for completion then set flag + self.wait_for_completion()?; + has_ctrl_end = true; + idx += length; + + debug!("CTRL_END received, all hints processed"); + + // CTRL_END should be the last message - verify and break + if idx < hints.len() { + return Err(anyhow::anyhow!( + "CTRL_END must be the last hint, but {} bytes remain", + hints.len() - idx + )); + } + + self.print_num_processed_hints(); + + break; + } + HintCode::Ctrl(CtrlHint::Cancel) => { + // Cancel current stream: set error and notify + self.state.error_flag.store(true, Ordering::Release); + self.state.drain_signal.notify_all(); + return Err(anyhow::anyhow!("Stream cancelled")); + } + HintCode::Ctrl(CtrlHint::Error) => { + // External error signal + self.state.error_flag.store(true, Ordering::Release); + self.state.drain_signal.notify_all(); + return Err(anyhow::anyhow!("Stream error signalled")); + } + _ => {} // Built-in data hint or custom hint; continue processing + } + + // If the hint is an input hint, write it to the inputs sink instead of processing + if hint.hint_code == HintCode::BuiltIn(BuiltInHint::Input) { + if let Some(broadcast_fn) = &self.mpi_broadcast_fn { + let mut serialized = borsh::to_vec(&( + JobPhase::ContributionsInputsStream, + StreamMessage { data: hint.data.clone() }, + )) + .unwrap(); + + broadcast_fn(&mut serialized).expect("MPI broadcast failed for input hint"); + } + + self.inputs_sink + .as_ref() + .ok_or_else(|| { + anyhow::anyhow!("Received input hint but no inputs sink configured") + })? + .submit(&hint.data)?; + // Continue to next hint without spawning worker + idx += length; + continue; + } + + let generation = self.state.generation.load(Ordering::SeqCst); + + // Atomically reserve slot - use Relaxed for seq since mutex provides ordering + let seq_id = { + let mut queue = self.state.queue.lock().unwrap(); + let seq = self.state.next_seq.fetch_add(1, Ordering::Relaxed); + + // Handle pass-through hints immediately + if hint.is_passthrough { + queue.buffer.push_back(Some(Ok(hint.data.clone()))); + // Notify immediately while holding the lock to ensure drainer sees the result + // Release lock after this block, avoiding duplicate notification + drop(queue); + // Use notify_all since wait_for_completion also waits on this condvar + self.state.drain_signal.notify_all(); + // Continue to next hint without spawning worker + idx += length; + continue; + } else { + queue.buffer.push_back(None); + } + + seq + }; + + // Spawn processing task for async hints (Noop already handled above) + let state = Arc::clone(&self.state); + let handlers = Arc::clone(&self.handlers); + self.pool.spawn(move || { + Self::worker_thread(state, hint, generation, seq_id, handlers); + }); + + idx += length; + } + + if has_ctrl_end { + if let Some(stats) = &self.stats { + debug!("Hints stats:"); + let stats = stats.lock().unwrap(); + let mut sorted_stats: Vec<_> = stats.iter().collect(); + sorted_stats.sort_by_key(|(&hint_code, _)| hint_code.to_u32()); + for (hint_code, count) in sorted_stats { + debug!(" {}: {}", hint_code, count); + } + } + } + + Ok(has_ctrl_end) + } + + /// Prints the total number of processed hints and processing rate if in debug mode. + fn print_num_processed_hints(&self) { + let num_hints = self.num_hint.load(Ordering::Relaxed); + + if tracing::enabled!(tracing::Level::DEBUG) { + let elapsed = self.instant.lock().as_ref().unwrap().unwrap().elapsed(); + let rate = num_hints as f64 / elapsed.as_secs_f64(); + + let (value, unit) = if rate >= 1_000_000.0 { + (rate / 1_000_000.0, "MHz") + } else if rate >= 1_000.0 { + (rate / 1_000.0, "kHz") + } else { + (rate, "Hz") + }; + + debug!("Processed {} hints in {:.0?} ({}{})", num_hints, elapsed, value.round(), unit); + } else { + info!("··· Processed {} hints", num_hints); + } + } + + /// Worker thread that processes a single hint and stores the result. + /// + /// # Arguments + /// + /// * `state` - Shared processor state + /// * `hint` - The hint to process + /// * `generation` - Generation number for detecting stale workers + /// * `seq_id` - Sequence ID for ordering results + /// * `handlers` - Hint Handlers for dispatching the hint + fn worker_thread( + state: Arc, + hint: PrecompileHint, + generation: usize, + seq_id: usize, + handlers: Arc, + ) { + // Check generation first to detect stale workers (before processing) + let current_gen = state.generation.load(Ordering::SeqCst); + if generation != current_gen { + return; + } + + // Catch panics to prevent permanently-stuck None slots in the buffer. + // If dispatch_hint panics, Rayon catches it silently but the slot would + // stay None forever, blocking the drainer from making progress. + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + if state.error_flag.load(Ordering::Acquire) { + Err(anyhow::anyhow!("Processing stopped due to error")) + } else { + handlers.dispatch(hint) + } + })) + .unwrap_or_else(|panic_info| { + let msg = if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else { + "unknown panic".to_string() + }; + Err(anyhow::anyhow!("Worker panicked processing hint: {}", msg)) + }); + + // Store result - MUST fill slot even if error occurred + let mut queue = state.queue.lock().unwrap(); + + // Check generation again in case reset happened during processing + let current_gen = state.generation.load(Ordering::SeqCst); + if generation != current_gen { + return; + } + + // Calculate offset in buffer; handle drained slots + if seq_id < queue.next_drain_seq { + return; + } + let offset = seq_id - queue.next_drain_seq; + + // Check if slot exists - if not, drainer already processed and removed it + if offset >= queue.buffer.len() { + return; + } + + // Fill the slot to allow drainer to proceed (critical for ordering) + queue.buffer[offset] = Some(result); + + // Only wake the drainer when we filled the FRONT slot (offset == 0). + // The drainer can only make progress when buffer[0] is ready; waking it + // for any other slot causes it to re-check, find nothing drainable, and + // go back to sleep — pure overhead (O(N) context switches for N hints). + // When offset > 0, the drainer will reach this slot naturally during its + // current drain cycle after the front slots are consumed. + // Notify WHILE holding the lock to prevent a missed-wakeup race: the + // drainer cannot enter condvar.wait() while we hold the lock, so this + // notification cannot be lost. + if offset == 0 { + state.drain_signal.notify_all(); + } + + // Release lock after notifying + drop(queue); + } + + /// Drainer thread that waits for hints to complete and drains ready results from queue. + fn drainer_thread( + state: Arc, + hints_sink: Arc, + mpi_broadcast_fn: Option, + ) { + loop { + let mut queue = state.queue.lock().unwrap(); + + // Check for shutdown + if state.shutdown.load(Ordering::Acquire) { + break; + } + + // Drain all consecutive ready results from the front + let mut drained_any = false; + while let Some(Some(res)) = queue.buffer.front() { + drained_any = true; + match res { + Ok(data) => { + // Clone data before dropping lock + let data_to_submit = data.clone(); + queue.buffer.pop_front(); + queue.next_drain_seq += 1; + + // Drop lock before submitting to avoid blocking workers + drop(queue); + + // Submit to sink + if let Err(e) = hints_sink.submit(&data_to_submit) { + eprintln!("Error submitting to sink: {}", e); + state.error_flag.store(true, Ordering::Release); + state.drain_signal.notify_all(); + return; + } + + if let Some(broadcast_fn) = &mpi_broadcast_fn { + let mut serialized = borsh::to_vec(&( + JobPhase::ContributionsHintsStream, + StreamMessage { data: data_to_submit.clone() }, + )) + .unwrap(); + + broadcast_fn(&mut serialized) + .expect("MPI broadcast failed in drainer thread"); + } + + // Re-acquire lock for next iteration + queue = state.queue.lock().unwrap(); + } + Err(e) => { + // Error found - signal to stop + state.error_flag.store(true, Ordering::Release); + eprintln!("[seq={}] Error: {}", queue.next_drain_seq, e); + queue.buffer.pop_front(); + queue.next_drain_seq += 1; + state.drain_signal.notify_all(); + return; + } + } + } + + // If we drained any results, notify wait_for_completion that buffer changed + if drained_any { + state.drain_signal.notify_all(); + } + + // Check for shutdown again before waiting + if state.shutdown.load(Ordering::Acquire) { + break; + } + + // Wait for notification that a hint completed + #[allow(unused_assignments)] + { + queue = state.drain_signal.wait(queue).unwrap(); + } + } + } + + /// Waits for all pending hints to be processed and drained. + /// + /// This method blocks until the reorder buffer is empty, meaning all + /// dispatched hints have been processed and their results printed. + /// + /// # Returns + /// + /// * `Ok(())` - All hints processed successfully + /// * `Err` - If an error occurred during processing + pub fn wait_for_completion(&self) -> Result<()> { + let mut queue = self.state.queue.lock().unwrap(); + + while !queue.buffer.is_empty() { + if self.state.error_flag.load(Ordering::Acquire) { + return Err(anyhow::anyhow!("Processing stopped due to error")); + } + // Wait for notification that buffer state changed + queue = self.state.drain_signal.wait(queue).unwrap(); + } + + if self.state.error_flag.load(Ordering::Acquire) { + return Err(anyhow::anyhow!("Processing stopped due to error")); + } + + Ok(()) + } + + pub fn reset_state(&self) { + self.num_hint.store(0, Ordering::Relaxed); + self.state.reset(); + if let Some(stats) = self.stats.as_ref() { + stats.lock().unwrap().clear(); + } + self.hints_sink.reset(); + self.stream_active.store(false, Ordering::Release); + self.instant.lock().unwrap().take(); + self.pending_partial.lock().unwrap().take(); + } + + pub fn hints_sink(&self) -> Arc { + Arc::clone(&self.hints_sink) + } +} + +impl Drop for HintsProcessor { + fn drop(&mut self) { + // Signal drainer thread to shut down + self.state.shutdown.store(true, Ordering::Release); + self.state.drain_signal.notify_all(); + + // Join the drainer thread to ensure clean shutdown + // Safety: We only take the value once in drop + unsafe { + let handle = ManuallyDrop::take(&mut self.drainer_thread); + let _ = handle.join(); + } + } +} + +impl StreamProcessor for HintsProcessor { + fn process_hints(&self, data: &[u64], first_batch: bool) -> Result { + self.process_hints(data, first_batch) + } + + fn reset(&self) { + self.reset_state(); + } +} + +#[cfg(test)] +mod tests { + use zisk_common::HintCode; + + use super::*; + + struct NullHints; + + impl StreamSink for NullHints { + fn submit(&self, _processed: &[u64]) -> Result<()> { + Ok(()) + } + } + + fn make_header(hint_type: u32, length: u32) -> u64 { + ((hint_type as u64) << 32) | (length as u64) + } + + fn make_ctrl_header(ctrl: u32, length: u32) -> u64 { + make_header(ctrl, length) + } + + // Pass-through hint code for testing (bit 31 set = pass-through) + // Use high value (0x7FFF_xxxx range) to avoid conflicting with any built-in hint codes + const TEST_PASSTHROUGH_HINT: u32 = 0x8000_0000 | 0x7FFF_0000; + + fn processor() -> HintsProcessor { + HintsProcessor::builder(Arc::new(NullHints), None::>) + .num_threads(2) + .build() + .unwrap() + } + + // Positive tests + #[test] + fn test_single_result_hint_non_blocking() { + let p = processor(); + // length=16 means 16 bytes = 2 u64s of data + let data = vec![make_header(TEST_PASSTHROUGH_HINT, 16), 0x111, 0x222]; + + // Dispatch should succeed and be non-blocking + assert!(p.process_hints(&data, false).is_ok()); + // Wait for completion should succeed + assert!(p.wait_for_completion().is_ok()); + + // Buffer should be empty after completion + let queue = p.state.queue.lock().unwrap(); + assert!(queue.buffer.is_empty()); + assert_eq!(queue.next_drain_seq, 1); + } + + #[test] + fn test_multiple_hints_ordered_output() { + let p = processor(); + // length is in bytes: 8 bytes = 1 u64 + let data = vec![ + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x111, + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x222, + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x333, + ]; + assert!(p.process_hints(&data, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Verify all hints were processed (buffer empty, next_drain_seq advanced) + let queue = p.state.queue.lock().unwrap(); + assert!(queue.buffer.is_empty()); + assert_eq!(queue.next_drain_seq, 3); + } + + #[test] + fn test_multiple_calls_global_sequence() { + let p = processor(); + // length is in bytes: 8 bytes = 1 u64 + let data1 = vec![make_header(TEST_PASSTHROUGH_HINT, 8), 0xAAA]; + let data2 = vec![make_header(TEST_PASSTHROUGH_HINT, 8), 0xBBB]; + + assert!(p.process_hints(&data1, false).is_ok()); + assert!(p.process_hints(&data2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Verify sequence continued across calls + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 2); + assert!(queue.buffer.is_empty()); + } + + #[test] + fn test_empty_input_ok() { + let p = processor(); + let data: Vec = vec![]; + assert!(p.process_hints(&data, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // No hints processed + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 0); + } + + // Negative tests + #[test] + fn test_unknown_hint_type_returns_error() { + let p = processor(); + // length is in bytes: 8 bytes = 1 u64 + let data = vec![make_header(999, 8), 0x1234]; + + // Should return error immediately during validation + let result = p.process_hints(&data, false); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Unknown custom hint code")); + } + + #[test] + fn test_error_stops_wait() { + let p = processor(); + // First valid (8 bytes = 1 u64), then invalid type with 0 bytes + let data = vec![make_header(TEST_PASSTHROUGH_HINT, 8), 0x111, make_header(999, 0)]; + + // Should error immediately when encountering invalid hint type + let result = p.process_hints(&data, false); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Unknown custom hint code")); + } + + #[test] + fn test_reset_clears_error() { + let p = processor(); + let bad = vec![make_header(999, 0)]; + let result = p.process_hints(&bad, false); + + // Should get synchronous error for invalid hint type + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Unknown custom hint code")); + + // Reset should clear any error state + p.reset_state(); + assert!(!p.state.error_flag.load(Ordering::Acquire)); + + // Should be able to process new hints after reset (8 bytes = 1 u64) + let good = vec![make_header(TEST_PASSTHROUGH_HINT, 8), 0x42]; + assert!(p.process_hints(&good, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + } + + #[test] + fn test_stream_end_waits_until_completion() { + let p = processor(); + + // Send CTRL_START first (required before CTRL_END) + let start = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::Start).to_u32(), 0)]; + p.process_hints(&start, true).unwrap(); + + // Dispatch hints (8 bytes = 1 u64 each) + let data = vec![ + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x10, + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x20, + ]; + p.process_hints(&data, false).unwrap(); + + // END should wait internally + let end = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::End).to_u32(), 0)]; + p.process_hints(&end, false).unwrap(); + + // Buffer should already be empty + { + let queue = p.state.queue.lock().unwrap(); + assert!(queue.buffer.is_empty()); + assert_eq!(queue.next_drain_seq, 2); + } + + // Explicit wait should be instant + assert!(p.wait_for_completion().is_ok()); + } + + #[test] + fn test_stream_cancel_returns_error() { + let p = processor(); + let cancel = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::Cancel).to_u32(), 0)]; + + let result = p.process_hints(&cancel, false); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("cancelled")); + + // Error flag should be set + assert!(p.state.error_flag.load(Ordering::Acquire)); + } + + #[test] + fn test_stream_error_signal_returns_error() { + let p = processor(); + let signal_err = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::Error).to_u32(), 0)]; + + let result = p.process_hints(&signal_err, false); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("error")); + + // Error flag should be set + assert!(p.state.error_flag.load(Ordering::Acquire)); + } + + #[test] + fn test_ctrl_start_must_be_first_in_batch() { + let p = processor(); + + // CTRL_START not at position 0 should fail (8 bytes = 1 u64) + let data = vec![ + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x42, + make_ctrl_header(HintCode::Ctrl(CtrlHint::Start).to_u32(), 0), + ]; + + let result = p.process_hints(&data, true); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("must be the first hint")); + } + + #[test] + fn test_ctrl_start_only_in_first_batch() { + let p = processor(); + + // First batch is ok (8 bytes = 1 u64) + let batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 8), 0x01]; + p.process_hints(&batch1, false).unwrap(); + + // CTRL_START in non-first batch should fail + let start = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::Start).to_u32(), 0)]; + let result = p.process_hints(&start, false); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("first message in the stream")); + } + + #[test] + fn test_ctrl_end_must_be_last() { + let p = processor(); + + // Send CTRL_START first (required before CTRL_END) + let start = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::Start).to_u32(), 0)]; + p.process_hints(&start, true).unwrap(); + + // CTRL_END not at end should fail (8 bytes = 1 u64) + let data = vec![ + make_ctrl_header(HintCode::Ctrl(CtrlHint::End).to_u32(), 0), + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x42, + ]; + + let result = p.process_hints(&data, false); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("must be the last hint")); + } + + #[test] + fn test_sink_receives_correct_data() { + use std::sync::{Arc, Mutex}; + + struct RecordingSink { + received: Arc>>>, + } + + impl StreamSink for RecordingSink { + fn submit(&self, processed: &[u64]) -> Result<()> { + self.received.lock().unwrap().push(processed.to_vec()); + Ok(()) + } + } + + let received = Arc::new(Mutex::new(Vec::new())); + let sink = RecordingSink { received: Arc::clone(&received) }; + let p = HintsProcessor::builder(Arc::new(sink), None::>) + .num_threads(2) + .build() + .unwrap(); + + // Send some data (16 bytes = 2 u64s, 8 bytes = 1 u64) + let data = vec![ + make_header(TEST_PASSTHROUGH_HINT, 16), + 0xAAA, + 0xBBB, + make_header(TEST_PASSTHROUGH_HINT, 8), + 0xCCC, + ]; + + p.process_hints(&data, false).unwrap(); + p.wait_for_completion().unwrap(); + + // Verify sink received correct data in order + let received = received.lock().unwrap(); + assert_eq!(received.len(), 2); + assert_eq!(received[0], vec![0xAAA, 0xBBB]); + assert_eq!(received[1], vec![0xCCC]); + } + + #[test] + fn test_sink_error_stops_processing() { + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::Arc; + + struct FailingSink { + should_fail: Arc, + } + + impl StreamSink for FailingSink { + fn submit(&self, _processed: &[u64]) -> Result<()> { + if self.should_fail.load(Ordering::Acquire) { + Err(anyhow::anyhow!("Sink error")) + } else { + Ok(()) + } + } + } + + let should_fail = Arc::new(AtomicBool::new(false)); + let sink = FailingSink { should_fail: Arc::clone(&should_fail) }; + let p = HintsProcessor::builder(Arc::new(sink), None::>) + .num_threads(2) + .build() + .unwrap(); + + // First batch succeeds (8 bytes = 1 u64) + let data1 = vec![make_header(TEST_PASSTHROUGH_HINT, 8), 0x01]; + assert!(p.process_hints(&data1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Make sink fail + should_fail.store(true, Ordering::Release); + + // Second batch should trigger sink error + let data2 = vec![make_header(TEST_PASSTHROUGH_HINT, 1), 0x02]; + assert!(p.process_hints(&data2, false).is_ok()); + + // Wait should detect the error from drainer thread + std::thread::sleep(std::time::Duration::from_millis(100)); + assert!(p.state.error_flag.load(Ordering::Acquire)); + } + + // Builder tests + #[test] + fn test_builder_configuration() { + // Default builder - stats disabled + let p1 = + HintsProcessor::builder(Arc::new(NullHints), None::>).build().unwrap(); + assert!(p1.stats.is_none()); + + // Explicitly disabled stats + let p2 = HintsProcessor::builder(Arc::new(NullHints), None::>) + .enable_stats(false) + .build() + .unwrap(); + assert!(p2.stats.is_none()); + + // Stats enabled + let p3 = HintsProcessor::builder(Arc::new(NullHints), None::>) + .enable_stats(true) + .build() + .unwrap(); + assert!(p3.stats.is_some()); + + // Custom threads + let p4 = HintsProcessor::builder(Arc::new(NullHints), None::>) + .num_threads(4) + .build() + .unwrap(); + let data = vec![make_header(TEST_PASSTHROUGH_HINT, 1), 0x42]; + assert!(p4.process_hints(&data, false).is_ok()); + assert!(p4.wait_for_completion().is_ok()); + + // Chaining multiple options + let p5 = HintsProcessor::builder(Arc::new(NullHints), None::>) + .num_threads(8) + .enable_stats(true) + .build() + .unwrap(); + assert!(p5.stats.is_some()); + } + + // Stress test + #[test] + fn test_stress_throughput() { + use std::time::Instant; + + let p = HintsProcessor::builder(Arc::new(NullHints), None::>) + .num_threads(32) + .build() + .unwrap(); + + // Generate a large batch of hints + const NUM_HINTS: usize = 100_000; + let mut data = Vec::with_capacity(NUM_HINTS * 2); + + for i in 0..NUM_HINTS { + data.push(make_header(TEST_PASSTHROUGH_HINT, 1)); + data.push(i as u64); + } + + let start = Instant::now(); + p.process_hints(&data, false).unwrap(); + p.wait_for_completion().unwrap(); + let duration = start.elapsed(); + + let ops_per_sec = NUM_HINTS as f64 / duration.as_secs_f64(); + println!("\n========================================"); + println!("Stress Test Results:"); + println!(" Total hints: {}", NUM_HINTS); + println!(" Duration: {:.3}s", duration.as_secs_f64()); + println!(" Throughput: {:.0} ops/sec", ops_per_sec); + println!(" Avg latency: {:.2}µs per hint", duration.as_micros() as f64 / NUM_HINTS as f64); + println!("========================================\n"); + + // Sanity check: should be able to process at least 10k ops/sec + assert!(ops_per_sec > 10_000.0, "Throughput too low: {:.0} ops/sec", ops_per_sec); + } + + #[test] + fn test_stress_concurrent_batches() { + use std::time::Instant; + + let p = HintsProcessor::builder(Arc::new(NullHints), None::>) + .num_threads(32) + .build() + .unwrap(); + + const NUM_BATCHES: usize = 1_000; + const HINTS_PER_BATCH: usize = 100; + + let start = Instant::now(); + + // Call process_hints multiple times with small batches + for batch_id in 0..NUM_BATCHES { + let mut data = Vec::with_capacity(HINTS_PER_BATCH * 2); + for i in 0..HINTS_PER_BATCH { + data.push(make_header(TEST_PASSTHROUGH_HINT, 1)); + data.push((batch_id * HINTS_PER_BATCH + i) as u64); + } + p.process_hints(&data, false).unwrap(); + } + + p.wait_for_completion().unwrap(); + let duration = start.elapsed(); + + let total_hints = NUM_BATCHES * HINTS_PER_BATCH; + let ops_per_sec = total_hints as f64 / duration.as_secs_f64(); + + println!("\n========================================"); + println!("Multiple Batches Stress Test:"); + println!(" Number of batches: {}", NUM_BATCHES); + println!(" Hints per batch: {}", HINTS_PER_BATCH); + println!(" Total hints: {}", total_hints); + println!(" Duration: {:.3}s", duration.as_secs_f64()); + println!(" Throughput: {:.0} ops/sec", ops_per_sec); + println!("========================================\n"); + + assert!(ops_per_sec > 10_000.0, "Throughput too low: {:.0} ops/sec", ops_per_sec); + } + + #[test] + fn test_stress_with_resets() { + use std::time::Instant; + + let p = HintsProcessor::builder(Arc::new(NullHints), None::>) + .num_threads(32) + .build() + .unwrap(); + + const ITERATIONS: usize = 100; + const HINTS_PER_ITER: usize = 1_000; + + let start = Instant::now(); + + for _iter in 0..ITERATIONS { + // Reset at start of each iteration + let reset = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::Start).to_u32(), 0)]; + p.process_hints(&reset, true).unwrap(); + + // Process batch + let mut data = Vec::with_capacity(HINTS_PER_ITER * 2); + for i in 0..HINTS_PER_ITER { + data.push(make_header(TEST_PASSTHROUGH_HINT, 1)); + data.push(i as u64); + } + p.process_hints(&data, false).unwrap(); + + // End stream + let end = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::End).to_u32(), 0)]; + p.process_hints(&end, false).unwrap(); + } + + let duration = start.elapsed(); + let total_hints = ITERATIONS * HINTS_PER_ITER; + let ops_per_sec = total_hints as f64 / duration.as_secs_f64(); + + println!("\n========================================"); + println!("Reset Stress Test:"); + println!(" Iterations: {}", ITERATIONS); + println!(" Hints per iteration: {}", HINTS_PER_ITER); + println!(" Total hints: {}", total_hints); + println!(" Duration: {:.3}s", duration.as_secs_f64()); + println!(" Throughput: {:.0} ops/sec", ops_per_sec); + println!("========================================\n"); + + assert!( + ops_per_sec > 5_000.0, + "Throughput too low with resets: {:.0} ops/sec", + ops_per_sec + ); + } + + #[test] + fn test_custom_handlers_ordered_with_delays() { + use std::sync::{Arc, Mutex}; + use std::thread; + use std::time::Duration; + + struct RecordingSink { + received: Arc>>>, + } + + impl StreamSink for RecordingSink { + fn submit(&self, processed: &[u64]) -> Result<()> { + self.received.lock().unwrap().push(processed.to_vec()); + Ok(()) + } + } + + let received = Arc::new(Mutex::new(Vec::new())); + let sink = RecordingSink { received: Arc::clone(&received) }; + + // Custom hint codes (use high values to avoid conflicts with built-ins) + const FAST_HINT: u32 = 0x7FFF_0000; // Processes instantly + const SLOW_HINT: u32 = 0x7FFF_0001; // Delays 10ms + const MED_HINT: u32 = 0x7FFF_0002; // Delays 5ms + + let handlers = HintHandlers::default() + .register(FAST_HINT, |data| Ok(vec![data[0] * 2])) + .register(SLOW_HINT, |data| { + thread::sleep(Duration::from_millis(10)); + Ok(vec![data[0] * 3]) + }) + .register(MED_HINT, |data| { + thread::sleep(Duration::from_millis(5)); + Ok(vec![data[0] * 4]) + }); + let p = HintsProcessor::builder(Arc::new(sink), None::>) + .num_threads(8) + .with_hint_handlers(handlers) + .build() + .unwrap(); + + // Send hints in order: SLOW, FAST, MED + // They should complete in order: FAST, MED, SLOW + // But results should be returned in submission order: SLOW, FAST, MED + let data = vec![ + make_header(SLOW_HINT, 1), + 10, // Will complete last but should be first result + make_header(FAST_HINT, 1), + 20, // Will complete first but should be second result + make_header(MED_HINT, 1), + 30, // Will complete second but should be third result + make_header(FAST_HINT, 1), + 40, // Fast again + make_header(SLOW_HINT, 1), + 50, // Slow again + ]; + + p.process_hints(&data, false).unwrap(); + p.wait_for_completion().unwrap(); + + // Verify results are in submission order, not completion order + let results = received.lock().unwrap(); + assert_eq!(results.len(), 5); + assert_eq!(results[0], vec![30]); // SLOW: 10 * 3 + assert_eq!(results[1], vec![40]); // FAST: 20 * 2 + assert_eq!(results[2], vec![120]); // MED: 30 * 4 + assert_eq!(results[3], vec![80]); // FAST: 40 * 2 + assert_eq!(results[4], vec![150]); // SLOW: 50 * 3 + } + + // Partial hint tests + #[test] + fn test_partial_hint_header_data_split() { + let p = processor(); + + // Hint split exactly at boundary: header in first batch, data in second + // Header indicates 8 bytes (1 u64) of data + let batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 8)]; + let batch2 = vec![0x1234]; // The data + + // First batch should succeed but not complete the hint + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Verify no results yet (hint is partial) + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 0); + assert!(queue.buffer.is_empty()); + } + + // Second batch completes the hint + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Now we should have the complete result + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_partial_hint_partial_data_split() { + let p = processor(); + + // Hint with header + some data in first batch, remaining data in second + // Header indicates 16 bytes (2 u64s) of data + let batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 16), 0x1111]; // Header + 1 u64 + let batch2 = vec![0x2222]; // Remaining 1 u64 + + // First batch has partial data + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Verify no results yet + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 0); + } + + // Second batch completes the hint + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Now we should have the complete result + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_partial_hint_across_multiple_batches() { + let p = processor(); + + // Large hint split across 3 batches + // Header indicates 32 bytes (4 u64s) of data + let batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 32), 0x1111]; // Header + 1 u64 + let batch2 = vec![0x2222, 0x3333]; // 2 more u64s + let batch3 = vec![0x4444]; // Final u64 + + // First batch + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Second batch + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Still no complete results + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 0); + } + + // Third batch completes the hint + assert!(p.process_hints(&batch3, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Now we should have the complete result + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_multiple_partial_hints_interleaved() { + let p = processor(); + + // Start first hint in first batch, start second hint in second batch, and + // complete them in different subsequent batches. + let batch1 = vec![ + make_header(TEST_PASSTHROUGH_HINT, 16), + 0x1111, // First hint: header + partial data (incomplete) + ]; + let batch2 = vec![ + 0x2222, // Completes first hint + make_header(TEST_PASSTHROUGH_HINT, 8), // Second hint: header only + ]; + let batch3 = vec![0x3333]; // Completes second hint + + // First batch has a single partial hint + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // No results yet + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 0); + } + + // Complete first hint + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Should have first result now + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + } + + // Complete second hint + assert!(p.process_hints(&batch3, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Should have both results + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 2); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_partial_hint_with_complete_hints() { + let p = processor(); + + // Mix partial and complete hints in same batch + let batch1 = vec![ + make_header(TEST_PASSTHROUGH_HINT, 8), + 0x1111, // Complete hint + make_header(TEST_PASSTHROUGH_HINT, 16), + 0x2222, // Partial hint (needs 1 more u64) + ]; + let batch2 = vec![0x3333]; // Completes partial hint + + // First batch processes one complete, one partial + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Should have result for complete hint + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + } + + // Complete the partial hint + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Should have both results + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 2); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_partial_hint_zero_length() { + let p = processor(); + + // Hint with zero data length split across batches + let batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 0)]; // Header only, zero data + + // Should complete immediately since no data needed + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Should have result + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_partial_hint_stream_control() { + let p = processor(); + + // Test partial hints with stream control messages + let start = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::Start).to_u32(), 0)]; + let batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 8)]; // Header only + let batch2 = vec![0x1234]; // Complete the hint + let end = vec![make_ctrl_header(HintCode::Ctrl(CtrlHint::End).to_u32(), 0)]; + + // Start stream + assert!(p.process_hints(&start, true).is_ok()); + + // Partial hint + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Complete hint + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // End stream should wait for all hints to complete + assert!(p.process_hints(&end, false).is_ok()); + + // Everything should be processed + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_partial_hint_reset_clears_pending() { + let p = processor(); + + // Start a partial hint + let batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 16), 0x1111]; // Needs 1 more u64 + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Verify partial hint is pending + { + let partial = p.pending_partial.lock().unwrap(); + assert!(partial.is_some()); + } + + // Reset should clear pending partial + p.reset_state(); + + // Verify pending partial is cleared + { + let partial = p.pending_partial.lock().unwrap(); + assert!(partial.is_none()); + } + + // Should be able to process new hints normally + let batch2 = vec![make_header(TEST_PASSTHROUGH_HINT, 8), 0x2222]; + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + } + } + + #[test] + fn test_partial_hint_large_data() { + let p = processor(); + + // Test with larger data size (80 bytes = 10 u64s) + let mut batch1 = vec![make_header(TEST_PASSTHROUGH_HINT, 80)]; + batch1.extend([0x1111, 0x2222, 0x3333]); // Header + 3 u64s + + let batch2 = vec![0x4444, 0x5555, 0x6666, 0x7777]; // 4 more u64s + let batch3 = vec![0x8888, 0x9999, 0xAAAA]; // Final 3 u64s + + // Process all batches + assert!(p.process_hints(&batch1, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + assert!(p.process_hints(&batch2, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + assert!(p.process_hints(&batch3, false).is_ok()); + assert!(p.wait_for_completion().is_ok()); + + // Should have complete result + { + let queue = p.state.queue.lock().unwrap(); + assert_eq!(queue.next_drain_seq, 1); + assert!(queue.buffer.is_empty()); + } + } + + #[test] + fn test_custom_handlers_stress_ordering() { + use std::sync::{Arc, Mutex}; + use std::thread; + use std::time::Duration; + + struct RecordingSink { + received: Arc>>>, + } + + impl StreamSink for RecordingSink { + fn submit(&self, processed: &[u64]) -> Result<()> { + self.received.lock().unwrap().push(processed.to_vec()); + Ok(()) + } + } + + let received = Arc::new(Mutex::new(Vec::new())); + let sink = RecordingSink { received: Arc::clone(&received) }; + + const VARIABLE_HINT: u32 = 0x7FFF_0100; + + let handlers = HintHandlers::default().register(VARIABLE_HINT, |data| { + let hash = data[0].wrapping_mul(2654435761); + let delay_ms = hash % 16; + if delay_ms > 0 { + thread::sleep(Duration::from_millis(delay_ms)); + } + Ok(vec![data[0] + 1000]) + }); + let p = HintsProcessor::builder(Arc::new(sink), None::>) + .num_threads(16) + .with_hint_handlers(handlers) + .build() + .unwrap(); + + // Generate pseudo-random number of hints between 100 and 500 + // Using current time as seed for variation across test runs + use std::time::SystemTime; + let seed = + SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_nanos() as u64; + let num_hints = 100 + (seed % 401) as usize; // 100 to 500 inclusive + + let mut data = Vec::with_capacity(num_hints * 2); + for i in 0..num_hints { + data.push(make_header(VARIABLE_HINT, 1)); + data.push(i as u64); + } + + p.process_hints(&data, false).unwrap(); + p.wait_for_completion().unwrap(); + + // Verify all results are in correct order despite random completion times + let results = received.lock().unwrap(); + assert_eq!(results.len(), num_hints, "Expected {} results", num_hints); + for i in 0..num_hints { + assert_eq!(results[i][0], i as u64 + 1000, "Result {} out of order", i); + } + } +} diff --git a/precompiles/hints/src/lib.rs b/precompiles/hints/src/lib.rs new file mode 100644 index 000000000..de936e3cc --- /dev/null +++ b/precompiles/hints/src/lib.rs @@ -0,0 +1,5 @@ +mod hint_handlers; +mod hints_processor; + +pub use hint_handlers::HintHandlers; +pub use hints_processor::{HintsProcessor, MpiBroadcastFn}; diff --git a/precompiles/keccakf/Cargo.toml b/precompiles/keccakf/Cargo.toml index 15f3250c2..731b61df2 100644 --- a/precompiles/keccakf/Cargo.toml +++ b/precompiles/keccakf/Cargo.toml @@ -16,7 +16,6 @@ zisk-core = { workspace = true } zisk-common = { workspace = true } zisk-pil = { workspace = true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -38,8 +37,5 @@ tiny-keccak = { version = "2.0.2", features = ["keccak"] } [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] +gpu = ["packed"] +packed = [] \ No newline at end of file diff --git a/precompiles/keccakf/pil/keccakf.pil b/precompiles/keccakf/pil/keccakf.pil index cccb54c01..d42eeba3a 100644 --- a/precompiles/keccakf/pil/keccakf.pil +++ b/precompiles/keccakf/pil/keccakf.pil @@ -6,7 +6,7 @@ require "keccakf_table.pil" // Precompile in charge of performing the Keccak-f[1600] permutation. // For reference: https://keccak.team/files/Keccak-reference-3.0.pdf -airtemplate Keccakf(const int N = 2**17, const int operation_bus_id = OPERATION_BUS_ID, const int mem_ops_in_parallel = 25) { +airtemplate Keccakf(const int N = 2**17, const int mem_ops_in_parallel = 25) { // Validate individual inputs const int WIDTH = 1600; const int BITS_PER_LIMB = 32; @@ -181,7 +181,8 @@ airtemplate Keccakf(const int N = 2**17, const int operation_bus_id = OPERATION_ } // --> Constraints to make sure that this coprocessor is called from the main processor - lookup_proves(operation_bus_id, [OP_KECCAKF, step_addr'(STEP_MAIN), 0, step_addr'(ADDR_STATE), 0, 0, 0, 0], mul: in_use_clk_0); + proves_operation(op: OP_KECCAKF, b: [step_addr'(ADDR_STATE), 0], main_step: step_addr'(STEP_MAIN), + mul: in_use_clk_0); function add(const expr b[]): const expr { const int len = length(b); diff --git a/precompiles/keccakf/src/keccakf_bus_device.rs b/precompiles/keccakf/src/keccakf_bus_device.rs index 2a1884298..87ca467b2 100644 --- a/precompiles/keccakf/src/keccakf_bus_device.rs +++ b/precompiles/keccakf/src/keccakf_bus_device.rs @@ -2,11 +2,12 @@ //! sent over the data bus. It connects to the bus and gathers metrics for specific //! `ZiskOperationType::Keccakf` instructions. -use std::{collections::VecDeque, ops::Add}; +use std::ops::Add; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; +use zisk_common::STEP; use zisk_common::{ - BusDevice, BusDeviceMode, BusId, Counter, Metrics, A, B, OPERATION_BUS_ID, OP_TYPE, + BusDevice, BusDeviceMode, BusId, Counter, Metrics, B, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::ZiskOperationType; @@ -48,6 +49,51 @@ impl KeccakfCounterInputGen { pub fn inst_count(&self, op_type: ZiskOperationType) -> Option { (op_type == ZiskOperationType::Keccak).then_some(self.counter.inst_count) } + + /// Processes data received on the bus, updating counters and generating inputs when applicable. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// * `mem_processors` – A queue of mem_processors bus operations used to send derived inputs. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data( + &mut self, + bus_id: &BusId, + data: &[u64], + mem_processors: &mut P, + ) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] as u32 != ZiskOperationType::Keccak as u32 { + return true; + } + + let step_main = data[STEP]; + let addr_main = data[B] as u32; + + match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + generate_keccakf_mem_inputs(addr_main, step_main, data, true, mem_processors); + } + BusDeviceMode::CounterAsm => { + self.measure(data); + } + BusDeviceMode::InputGenerator => { + if skip_keccakf_mem_inputs(addr_main, mem_processors) { + return true; + } + generate_keccakf_mem_inputs(addr_main, step_main, data, false, mem_processors); + } + } + + true + } } impl Metrics for KeccakfCounterInputGen { @@ -90,57 +136,6 @@ impl Add for KeccakfCounterInputGen { } impl BusDevice for KeccakfCounterInputGen { - /// Processes data received on the bus, updating counters and generating inputs when applicable. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus sending the data. - /// * `data` - The data received from the bus. - /// * `pending` – A queue of pending bus operations used to send derived inputs. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - debug_assert!(*bus_id == OPERATION_BUS_ID); - - if data[OP_TYPE] as u32 != ZiskOperationType::Keccak as u32 { - return true; - } - - if let Some(mem_collectors_info) = mem_collector_info { - if skip_keccakf_mem_inputs(data[B] as u32, mem_collectors_info) { - return true; - } - } - - let step_main = data[A]; - let addr_main = data[B] as u32; - - let only_counters = self.mode == BusDeviceMode::Counter; - if only_counters { - self.measure(data); - } - - generate_keccakf_mem_inputs(addr_main, step_main, data, only_counters, pending); - - true - } - - /// Returns the bus IDs associated with this counter. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/precompiles/keccakf/src/keccakf_gen_mem_inputs.rs b/precompiles/keccakf/src/keccakf_gen_mem_inputs.rs index 3c648530d..a35672ca7 100644 --- a/precompiles/keccakf/src/keccakf_gen_mem_inputs.rs +++ b/precompiles/keccakf/src/keccakf_gen_mem_inputs.rs @@ -1,10 +1,9 @@ -use std::collections::VecDeque; +use precompiles_common::MemProcessor; use tiny_keccak::keccakf; use precompiles_common::MemBusHelpers; -use zisk_common::MemCollectorInfo; -use zisk_common::{BusId, OPERATION_BUS_DATA_SIZE}; +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; #[derive(Debug)] pub struct KeccakfMemInputConfig { @@ -14,16 +13,16 @@ pub struct KeccakfMemInputConfig { pub chunks_per_param: usize, } -pub fn generate_keccakf_mem_inputs( +pub fn generate_keccakf_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // Get the basic data from the input // op,op_type,a,b,... - let state: &mut [u64; 25] = &mut data[4..29].try_into().unwrap(); + let state: &mut [u64; 25] = &mut data[5..30].try_into().unwrap(); // Apply the keccakf function keccakf(state); @@ -33,7 +32,7 @@ pub fn generate_keccakf_mem_inputs( let write_params = 1; let chunks_per_param = 25; let params_count = read_params + write_params; - let params_offset = OPERATION_BUS_DATA_SIZE; + let params_offset = OPERATION_PRECOMPILED_BUS_DATA_SIZE; for iparam in 0..params_count { let is_write = iparam >= read_params; let param_index = if is_write { iparam - read_params } else { iparam }; @@ -59,23 +58,21 @@ pub fn generate_keccakf_mem_inputs( step_main, chunk_data, is_write, - pending, + mem_processors, ); } } } -pub fn skip_keccakf_mem_inputs(addr_main: u32, mem_collectors_info: &[MemCollectorInfo]) -> bool { +pub fn skip_keccakf_mem_inputs(addr_main: u32, mem_processors: &mut P) -> bool { let write_params = 1; let chunks_per_param = 25; for param_index in 0..write_params { let param_addr = addr_main + (param_index * 8 * chunks_per_param) as u32; for ichunk in 0..chunks_per_param { let addr = param_addr + ichunk as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } } diff --git a/precompiles/keccakf/src/keccakf_input.rs b/precompiles/keccakf/src/keccakf_input.rs index ebbb38dc3..daeb20f76 100644 --- a/precompiles/keccakf/src/keccakf_input.rs +++ b/precompiles/keccakf/src/keccakf_input.rs @@ -10,9 +10,9 @@ pub struct KeccakfInput { impl KeccakfInput { pub fn from(values: &OperationKeccakData) -> Self { Self { - step_main: values[2], + step_main: values[4], addr_main: values[3] as u32, - state: values[4..29].try_into().unwrap(), + state: values[5..30].try_into().unwrap(), } } } diff --git a/precompiles/keccakf/src/keccakf_instance.rs b/precompiles/keccakf/src/keccakf_instance.rs index 13f2187cc..de7dbc41c 100644 --- a/precompiles/keccakf/src/keccakf_instance.rs +++ b/precompiles/keccakf/src/keccakf_instance.rs @@ -6,14 +6,11 @@ use crate::{KeccakfInput, KeccakfSM}; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; -use std::{ - any::Any, - collections::{HashMap, VecDeque}, - sync::Arc, -}; +use std::{any::Any, collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, BusId, CheckPoint, ChunkId, CollectSkipper, ExtOperationData, Instance, InstanceCtx, - InstanceType, MemCollectorInfo, PayloadType, OPERATION_BUS_ID, OP_TYPE, + InstanceType, PayloadType, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::ZiskOperationType; use zisk_pil::KeccakfTrace; @@ -116,6 +113,10 @@ impl Instance for KeccakfInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { assert_eq!( self.ictx.plan.air_id, @@ -162,9 +163,7 @@ impl KeccakfCollector { collect_skipper, } } -} -impl BusDevice for KeccakfCollector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -177,13 +176,7 @@ impl BusDevice for KeccakfCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[PayloadType], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[PayloadType]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); if self.inputs.len() == self.num_operations as usize { @@ -208,15 +201,9 @@ impl BusDevice for KeccakfCollector { self.inputs.len() < self.num_operations as usize } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for KeccakfCollector { fn as_any(self: Box) -> Box { self } diff --git a/precompiles/keccakf/src/keccakf_manager.rs b/precompiles/keccakf/src/keccakf_manager.rs index e3a206cdf..28f9e97d0 100644 --- a/precompiles/keccakf/src/keccakf_manager.rs +++ b/precompiles/keccakf/src/keccakf_manager.rs @@ -2,11 +2,8 @@ use std::sync::Arc; use fields::PrimeField64; use pil_std_lib::Std; -use zisk_common::{BusDevice, PayloadType}; -use zisk_common::{ - BusDeviceMetrics, BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner, -}; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; use zisk_core::ZiskOperationType; use zisk_pil::KeccakfTrace; @@ -31,8 +28,11 @@ impl KeccakfManager { Arc::new(Self { keccakf_sm }) } - pub fn build_keccakf_counter(&self) -> KeccakfCounterInputGen { - KeccakfCounterInputGen::new(BusDeviceMode::Counter) + pub fn build_keccakf_counter(&self, asm_execution: bool) -> KeccakfCounterInputGen { + match asm_execution { + true => KeccakfCounterInputGen::new(BusDeviceMode::CounterAsm), + false => KeccakfCounterInputGen::new(BusDeviceMode::Counter), + } } pub fn build_keccakf_input_generator(&self) -> KeccakfCounterInputGen { @@ -41,14 +41,6 @@ impl KeccakfManager { } impl ComponentBuilder for KeccakfManager { - /// Builds and returns a new counter for monitoring keccakf operations. - /// - /// # Returns - /// A boxed implementation of `RegularCounters` configured for keccakf operations. - fn build_counter(&self) -> Option> { - Some(Box::new(KeccakfCounterInputGen::new(BusDeviceMode::Counter))) - } - /// Builds a planner to plan keccakf-related instances. /// /// # Returns @@ -86,8 +78,4 @@ impl ComponentBuilder for KeccakfManager { } } } - - fn build_inputs_generator(&self) -> Option>> { - Some(Box::new(KeccakfCounterInputGen::new(BusDeviceMode::InputGenerator))) - } } diff --git a/precompiles/poseidon2/Cargo.toml b/precompiles/poseidon2/Cargo.toml new file mode 100644 index 000000000..7291a99a9 --- /dev/null +++ b/precompiles/poseidon2/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "precomp-poseidon2" +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } + +[dependencies] +zisk-core = { workspace = true } +zisk-common = { workspace = true } +zisk-pil = { workspace = true } +precompiles-common = { workspace = true } +sm-mem = { workspace = true } +mem-common = { workspace = true } + +proofman-common = { workspace = true } +proofman-macros = { workspace = true } +proofman-util = { workspace = true } +pil-std-lib = { workspace = true } +fields = { workspace=true } +tracing = { workspace = true } +rayon = { workspace = true } +sha2 = { workspace = true } + +[features] +default = [] +gpu = ["packed"] +packed = [] \ No newline at end of file diff --git a/precompiles/poseidon2/pil/poseidon2.pil b/precompiles/poseidon2/pil/poseidon2.pil new file mode 100644 index 000000000..0a85e5c27 --- /dev/null +++ b/precompiles/poseidon2/pil/poseidon2.pil @@ -0,0 +1,263 @@ +require "std_lookup.pil" +require "operations.pil" +require "opids.pil" +require "poseidon2_constants.pil" + +// Precompile in charge of performing the Poseidon2 permutation. +// For reference: https://eprint.iacr.org/2023/323.pdf + + +airtemplate Poseidon2(const int N = 2**17) { + const int n = 16; + + // Compute some stats + const int CLOCKS = 14; + + const int NUM_NON_USABLE_ROWS = N % CLOCKS; + const int NUM_POSEIDON2; + if (NUM_NON_USABLE_ROWS == 0) { + // N is perfectly divisible by CLOCKS + if (N < CLOCKS) { + error(`N must be at least ${CLOCKS} to fit the Poseidon2 arithmetization, but received N=${N}`); + } + + NUM_POSEIDON2 = N / CLOCKS; + } else { + // N is not divisible by CLOCKS + if (N < 2*CLOCKS) { + error(`N must be at least ${2*CLOCKS} to fit the Poseidon2 arithmetization, but received N=${N}`); + } + + // Subtract 1 because we can't fit a complete cycle in the remaining rows + NUM_POSEIDON2 = (N - NUM_NON_USABLE_ROWS) / CLOCKS - 1; + } + + col fixed CLK_0 = [[1, 0:(CLOCKS-1)]:NUM_POSEIDON2, 0...]; + + const expr CLK[CLOCKS]; + for (int i = 0; i < CLOCKS; i++) { + CLK[i] = (i)'CLK_0; + } + + const expr r0 = 'CLK_0; + const expr r1 = (2)'CLK_0; + const expr r2 = (3)'CLK_0; + const expr r3 = (4)'CLK_0; + const expr r4 = (5)'CLK_0; + const expr p1 = (6)'CLK_0; + const expr r15 = (7)'CLK_0; + const expr p2 = (8)'CLK_0; + const expr r26 = (9)'CLK_0; + const expr r27 = (10)'CLK_0; + const expr r28 = (11)'CLK_0; + const expr r29 = (12)'CLK_0; + + const expr full_round = r0 + r1 + r2 + r3 + r26 + r27 + r28 + r29; + + // Define the clock selectors + col witness bits(1) in_use_clk_0; // 1 at the first clock cycle of the Poseidon2 operation, 0 otherwise + col witness bits(1) in_use; // 1 when the Poseidon2 operation is in use, 0 otherwise + + in_use_clk_0 * (1 - in_use_clk_0) === 0; + in_use * (1 - in_use) === 0; + + in_use_clk_0 - CLK_0 * in_use === 0; // This constraint is two-fold: + // · in_use_clk_0 can only be active when CLK_0 is active + // · if in_use is active then so is in_use_clk_0 + + const expr in_use_active = clock_set(start: 1, end: CLOCKS); + in_use_active * (in_use - 'in_use) === 0; // selector latching + + // --> Constraints to assert the Poseidon2 permutation + + // Poseidon2 state represented as a 1D array of elements of 64 bits, represented in two limbs of 32 bits each + col witness bits(32) chunks[n][2]; + + const expr prev_state[n]; + const expr state[n]; + const expr next_state[n]; + for (int i = 0; i < n; i++) { + prev_state[i] = 'chunks[i][1] * 2**32 + 'chunks[i][0]; + state[i] = chunks[i][1] * 2**32 + chunks[i][0]; + next_state[i] = chunks[i][1]' * 2**32 + chunks[i][0]'; + } + + poseidonFullRound(n, state, next_state, in_use_clk_0); + + const expr state_7[n]; + const expr rc[n]; + const expr im_values[11]; + const expr im_values_7[11]; + for (int i = 0; i < n; i++) { + rc[i] = r0 * RC_16[i] + r1 * RC_16[i + n] + r2 * RC_16[i + 2*n] + r3 * RC_16[i + 3*n] + p1 * RC_16[i + 4*n] + p2 * RC_16[i + 4*n + 11] + r26 * RC_16[i + 4*n + 22] + r27 * RC_16[i + 5*n + 22] + r28 * RC_16[i + 6*n + 22] + r29 * RC_16[i + 7*n + 22]; + state_7[i] = pow7(state[i] + rc[i]); + if (i < 11) { + im_values[i] = state[i]; + im_values_7[i] = state_7[i]; + } + } + + poseidonFullRound(n, state_7, next_state, full_round); + + poseidonPartialRound(n, 11, prev_state, next_state, im_values, im_values_7, D_16, p1 + p2); + + // --> Constraints to read inputs from memory and write outputs to memory + /* + MEMORY ACCESS MAP + ===================================================================================== + 0 | STEP_MAIN | R | ADDR_STATE | input_state[0] + 1 | STEP_MAIN | R | ADDR_STATE + 8 | input_state[1] + 2 | STEP_MAIN | R | ADDR_STATE + 16 | input_state[2] + . | ... | . | ... | ... + n - 1 | STEP_MAIN | R | ADDR_STATE + (n - 1)*8 | input_state[n-1] + n | STEP_MAIN + 1 | W | ADDR_STATE | output_state[0] + n + 1 | STEP_MAIN + 1 | W | ADDR_STATE + 8 | output_state[1] + n + 2 | STEP_MAIN + 1 | W | ADDR_STATE + 16 | output_state[2] + . | ... | . | ... | ... + 2*n - 1 | STEP_MAIN + 1 | . | ADDR_STATE + (n - 1)*8 | output_state[n-1] + ===================================================================================== + */ + col witness bits(40) step_addr; + + const int STEP_MAIN = 0; + const int ADDR_STATE = STEP_MAIN + 1; + + const int MEM_OPS = n * 64; + const int MEM_OPS_PARALLEL = n / 4; + + const expr mem_sel = clock_set(in_use, start: 0, end: 4) + clock_set(in_use, start: CLOCKS - 4, end: CLOCKS); + + const expr mem_is_write = clock_set(start: CLOCKS - 4, end: CLOCKS); + + const expr main_step = clock_shift(step_addr, STEP_MAIN, start: 0, end: 4) + clock_shift(step_addr, STEP_MAIN, start: CLOCKS - 4, end: CLOCKS); + + for (int i = 0; i < MEM_OPS_PARALLEL; i++) { + const expr mem_addr = clock_shift(step_addr, ADDR_STATE, start: 0, end: 4, offset: 8*i, delta: 8 * 4) + clock_shift(step_addr, ADDR_STATE, start: CLOCKS - 4, end: CLOCKS, offset: 8*i, delta: 8 * 4); + + expr mem_value[2]; + mem_value[0] = 0; + mem_value[1] = 0; + + // Reads + for(int j = 0; j < 4; j++) { + mem_value[0] += (j)'CLK_0 * (j)'chunks[MEM_OPS_PARALLEL*j + i][0]; + mem_value[1] += (j)'CLK_0 * (j)'chunks[MEM_OPS_PARALLEL*j + i][1]; + } + + // Writes + for(int j = 0; j < 4; j++) { + mem_value[0] += (CLOCKS - 4 + j)'CLK_0 * chunks[MEM_OPS_PARALLEL*j + i][0]'(3 - j); + mem_value[1] += (CLOCKS - 4 + j)'CLK_0 * chunks[MEM_OPS_PARALLEL*j + i][1]'(3 - j); + } + + precompiled_mem_op( + sel: mem_sel, + is_write: mem_is_write, + main_step: main_step, + addr: mem_addr, + value: mem_value + ); + } + + + // --> Constraints to make sure that this coprocessor is called from the main processor + proves_operation(op: OP_POSEIDON2, b: [step_addr'(ADDR_STATE), 0], main_step: step_addr'(STEP_MAIN), mul: in_use_clk_0); + + function clock_set(const expr mvcol = 1, const int start = 0, int end = -1): const expr { + expr result = 0; + if (end == -1) { + end = start; + } + for (int i = start; i < end; i++) { + result += air.CLK[i]; + } + return result * mvcol; + } + + function clock_shift(const expr mvcol, const int pos, const int start = 0, int end = -1, int offset = 0, const int delta = 0): const expr { + expr result = 0; + if (end == -1) { + end = start + 1; + } + for (int i = start; i < end; i++) { + const int iclock = (pos - i) % air.CLOCKS; + if (offset != 0) { + result += air.CLK[i] * (mvcol'(iclock) + offset); + } else { + result += air.CLK[i] * mvcol'(iclock); + } + offset += delta; + } + return result; + } + + function pow7(const expr input): const expr { + const expr input3 = input * input * input; + const expr input6 = input3 * input3; + return input6 * input; + } + + function poseidonFullRound(const int n, const expr input[], const expr output[], const expr sel) { + const expr mat[n]; + + const expr t0[n/4], t1[n/4], t2[n/4], t3[n/4]; + for (int i = 0; i < n/4; i++) { + t0[i] = input[4*i] + input[4*i + 1]; + t1[i] = input[4*i + 2] + input[4*i + 3]; + t2[i] = 2*input[4*i + 1] + t1[i]; + t3[i] = 2*input[4*i + 3] + t0[i]; + + mat[4*i + 3] = 4*t1[i] + t3[i]; + mat[4*i + 1] = 4*t0[i] + t2[i]; + mat[4*i] = t3[i] + mat[4*i + 1]; + mat[4*i + 2] = t2[i] + mat[4*i + 3]; + + } + + expr stored[n/4]; + for (int i = 0; i < n/4; ++i) { + stored[i] = 0; + } + + for (int i = 0; i < n/4; i++) { + for (int j = 0; j < 4; j++) { + stored[j] += mat[4*i + j]; + } + } + + for (int i = 0; i < n; i++) { + sel * (output[i] - (mat[i] + stored[i%4])) === 0; + } + } + + function poseidonPartialRound(const int n, const int rounds, const expr input[], const expr output[], const expr st0[], const expr intermediateValues[], const int D[], const expr sel) { + const expr sR[rounds + 1][n]; + const expr sum_partial[rounds]; + for (int i = 0; i < n; ++i) { + sR[0][i] = input[i]; + } + + for (int i = 0; i < rounds; i++) { + sel * (st0[i] - sR[i][0]) === 0; + sum_partial[i] = partial_sum(n, sR[i], intermediateValues[i]); + + sR[i+1][0] = intermediateValues[i] * D[0] + sum_partial[i]; + for (int j = 1; j < n; j++) { + sR[i+1][j] = sR[i][j] * D[j] + sum_partial[i]; + } + } + + for (int i = 0; i < n; i++) { + sel * (output[i] - sR[rounds][i]) === 0; + } + } + + function partial_sum(const int n, const expr sR[], const expr intermediateValue) : const expr { + const expr res[n-1]; + res[0] = intermediateValue + sR[1]; + for (int i = 2; i < n; i++) { + res[i - 1] = res[i - 2] + sR[i]; + } + return res[n-2]; + } +} \ No newline at end of file diff --git a/precompiles/poseidon2/pil/poseidon2_constants.pil b/precompiles/poseidon2/pil/poseidon2_constants.pil new file mode 100644 index 000000000..3e0e69c45 --- /dev/null +++ b/precompiles/poseidon2/pil/poseidon2_constants.pil @@ -0,0 +1,271 @@ +const int D_8[8] = [ + 0xa98811a1fed4e3a5, + 0x1cc48b54f377e2a0, + 0xe40cd4f6c5609a26, + 0x11de79ebca97a4a3, + 0x9177c73d8b7e929c, + 0x2a6fe8085797e791, + 0x3de6e93329f8d5ad, + 0x3f7af9125da962fe +]; + +const int D_16[16] = [ + 0xde9b91a467d6afc0, + 0xc5f16b9c76a9be17, + 0x0ab0fef2d540ac55, + 0x3001d27009d05773, + 0xed23b1f906d3d9eb, + 0x5ce73743cba97054, + 0x1c3bab944af4ba24, + 0x2faa105854dbafae, + 0x53ffb3ae6d421a10, + 0xbcda9df8884ba396, + 0xfc1273e4a31807bb, + 0xc77952573d5142c0, + 0x56683339a819b85e, + 0x328fcbd8f0ddc8eb, + 0xb5101e303fce9cb7, + 0x774487b8c40089bb +]; + +const int RC_8[86] = [ + 0xdd5743e7f2a5a5d9, + 0xcb3a864e58ada44b, + 0xffa2449ed32f8cdc, + 0x42025f65d6bd13ee, + 0x7889175e25506323, + 0x34b98bb03d24b737, + 0xbdcc535ecc4faa2a, + 0x5b20ad869fc0d033, + 0xf1dda5b9259dfcb4, + 0x27515210be112d59, + 0x4227d1718c766c3f, + 0x26d333161a5bd794, + 0x49b938957bf4b026, + 0x4a56b5938b213669, + 0x1120426b48c8353d, + 0x6b323c3f10a56cad, + 0xce57d6245ddca6b2, + 0xb1fc8d402bba1eb1, + 0xb5c5096ca959bd04, + 0x6db55cd306d31f7f, + 0xc49d293a81cb9641, + 0x1ce55a4fe979719f, + 0xa92e60a9d178a4d1, + 0x002cc64973bcfd8c, + 0xcea721cce82fb11b, + 0xe5b55eb8098ece81, + 0x4e30525c6f1ddd66, + 0x43c6702827070987, + 0xaca68430a7b5762a, + 0x3674238634df9c93, + 0x88cee1c825e33433, + 0xde99ae8d74b57176, + 0x488897d85ff51f56, + 0x1140737ccb162218, + 0xa7eeb9215866ed35, + 0x9bd2976fee49fcc9, + 0xc0c8f0de580a3fcc, + 0x4fb2dae6ee8fc793, + 0x343a89f35f37395b, + 0x223b525a77ca72c8, + 0x56ccb62574aaa918, + 0xc4d507d8027af9ed, + 0xa080673cf0b7e95c, + 0xf0184884eb70dcf8, + 0x044f10b0cb3d5c69, + 0xe9e3f7993938f186, + 0x1b761c80e772f459, + 0x606cec607a1b5fac, + 0x14a0c2e1d45f03cd, + 0x4eace8855398574f, + 0xf905ca7103eff3e6, + 0xf8c8f8d20862c059, + 0xb524fe8bdd678e5a, + 0xfbb7865901a1ec41, + 0x014ef1197d341346, + 0x9725e20825d07394, + 0xfdb25aef2c5bae3b, + 0xbe5402dc598c971e, + 0x93a5711f04cdca3d, + 0xc45a9a5b2f8fb97b, + 0xfe8946a924933545, + 0x2af997a27369091c, + 0xaa62c88e0b294011, + 0x058eb9d810ce9f74, + 0xb3cb23eced349ae4, + 0xa3648177a77b4a84, + 0x43153d905992d95d, + 0xf4e2a97cda44aa4b, + 0x5baa2702b908682f, + 0x082923bdf4f750d1, + 0x98ae09a325893803, + 0xf8a6475077968838, + 0xceb0735bf00b2c5f, + 0x0a1a5d953888e072, + 0x2fcb190489f94475, + 0xb5be06270dec69fc, + 0x739cb934b09acf8b, + 0x537750b75ec7f25b, + 0xe9dd318bae1f3961, + 0xf7462137299efe1a, + 0xb1f6b8eee9adb940, + 0xbdebcc8a809dfe6b, + 0x40fc1f791b178113, + 0x3ac1c3362d014864, + 0x9a016184bdb8aeba, + 0x95f2394459fbc25e +]; + +const int RC_16[150] = [ + 0x15ebea3fc73397c3, + 0xd73cd9fbfe8e275c, + 0x8c096bfce77f6c26, + 0x4e128f68b53d8fea, + 0x29b779a36b2763f6, + 0xfe2adc6fb65acd08, + 0x8d2520e725ad0955, + 0x1c2392b214624d2a, + 0x37482118206dcc6e, + 0x2f829bed19be019a, + 0x2fe298cb6f8159b0, + 0x2bbad982deccdbbf, + 0xbad568b8cc60a81e, + 0xb86a814265baad10, + 0xbec2005513b3acb3, + 0x6bf89b59a07c2a94, + 0xa25deeb835e230f5, + 0x3c5bad8512b8b12a, + 0x7230f73c3cb7a4f2, + 0xa70c87f095c74d0f, + 0x6b7606b830bb2e80, + 0x6cd467cfc4f24274, + 0xfeed794df42a9b0a, + 0x8cf7cf6163b7dbd3, + 0x9a6e9dda597175a0, + 0xaa52295a684faf7b, + 0x017b811cc3589d8d, + 0x55bfb699b6181648, + 0xc2ccaf71501c2421, + 0x1707950327596402, + 0xdd2fcdcd42a8229f, + 0x8b9d7d5b27778a21, + 0xac9a05525f9cf512, + 0x2ba125c58627b5e8, + 0xc74e91250a8147a5, + 0xa3e64b640d5bb384, + 0xf53047d18d1f9292, + 0xbaaeddacae3a6374, + 0xf2d0914a808b3db1, + 0x18af1a3742bfa3b0, + 0x9a621ef50c55bdb8, + 0xc615f4d1cc5466f3, + 0xb7fbac19a35cf793, + 0xd2b1a15ba517e46d, + 0x4a290c4d7fd26f6f, + 0x4f0cf1bb1770c4c4, + 0x548345386cd377f5, + 0x33978d2789fddd42, + 0xab78c59deb77e211, + 0xc485b2a933d2be7f, + 0xbde3792c00c03c53, + 0xab4cefe8f893d247, + 0xc5c0e752eab7f85f, + 0xdbf5a76f893bafea, + 0xa91f6003e3d984de, + 0x099539077f311e87, + 0x097ec52232f9559e, + 0x53641bdf8991e48c, + 0x2afe9711d5ed9d7c, + 0xa7b13d3661b5d117, + 0x5a0e243fe7af6556, + 0x1076fae8932d5f00, + 0x9b53a83d434934e3, + 0xed3fd595a3c0344a, + 0x28eff4b01103d100, + 0x60400ca3e2685a45, + 0x1c8636beb3389b84, + 0xac1332b60e13eff0, + 0x2adafcc364e20f87, + 0x79ffc2b14054ea0b, + 0x3f98e4c0908f0a05, + 0xcdb230bc4e8a06c4, + 0x1bcaf7705b152a74, + 0xd9bca249a82a7470, + 0x91e24af19bf82551, + 0xa62b43ba5cb78858, + 0xb4898117472e797f, + 0xb3228bca606cdaa0, + 0x844461051bca39c9, + 0xf3411581f6617d68, + 0xf7fd50646782b533, + 0x6ca664253c18fb48, + 0x2d2fcdec0886a08f, + 0x29da00dd799b575e, + 0x47d966cc3b6e1e93, + 0xde884e9a17ced59e, + 0xdacf46dc1c31a045, + 0x5d2e3c121eb387f2, + 0x51f8b0658b124499, + 0x1e7dbd1daa72167d, + 0x8275015a25c55b88, + 0xe8521c24ac7a70b3, + 0x6521d121c40b3f67, + 0xac12de797de135b0, + 0xafa28ead79f6ed6a, + 0x685174a7a8d26f0b, + 0xeff92a08d35d9874, + 0x3058734b76dd123a, + 0xfa55dcfba429f79c, + 0x559294d4324c7728, + 0x7a770f53012dc178, + 0xedd8f7c408f3883b, + 0x39b533cf8d795fa5, + 0x160ef9de243a8c0a, + 0x431d52da6215fe3f, + 0x54c51a2a2ef6d528, + 0x9b13892b46ff9d16, + 0x263c46fcee210289, + 0xb738c96d25aabdc4, + 0x5c33a5203996d38f, + 0x2626496e7c98d8dd, + 0xc669e0a52785903a, + 0xaecde726c8ae1f47, + 0x039343ef3a81e999, + 0x2615ceaf044a54f9, + 0x7e41e834662b66e1, + 0x4ca5fd4895335783, + 0x64b334d02916f2b0, + 0x87268837389a6981, + 0x034b75bcb20a6274, + 0x58e658296cc2cd6e, + 0xe2d0f759acc31df4, + 0x81a652e435093e20, + 0x0b72b6e0172eaf47, + 0x4aec43cec577d66d, + 0xde78365b028a84e6, + 0x444e19569adc0ee4, + 0x942b2451fa40d1da, + 0xe24506623ea5bd6c, + 0x082854bf2ef7c743, + 0x69dbbc566f59d62e, + 0x248c38d02a7b5cb2, + 0x4f4e8f8c09d15edb, + 0xd96682f188d310cf, + 0x6f9a25d56818b54c, + 0xb6cefed606546cd9, + 0x5bc07523da38a67b, + 0x7df5a3c35b8111cf, + 0xaaa2cc5d4db34bb0, + 0x9e673ff22a4653f8, + 0xbd8b278d60739c62, + 0xe10d20f6925b8815, + 0xf6c87b91dd4da2bf, + 0xfed623e2f71b6f1a, + 0xa0f02fa52a94d0d3, + 0xbb5794711b39fa16, + 0xd3b94fba9d005c7f, + 0x15a26e89fad946c9, + 0xf3cb87db8a67cf49, + 0x400d2bf56aa2a577 +]; \ No newline at end of file diff --git a/precompiles/poseidon2/src/lib.rs b/precompiles/poseidon2/src/lib.rs new file mode 100644 index 000000000..b673e370e --- /dev/null +++ b/precompiles/poseidon2/src/lib.rs @@ -0,0 +1,15 @@ +mod poseidon2; +mod poseidon2_bus_device; +mod poseidon2_gen_mem_inputs; +mod poseidon2_input; +mod poseidon2_instance; +mod poseidon2_manager; +mod poseidon2_planner; + +pub use poseidon2::*; +pub use poseidon2_bus_device::*; +pub use poseidon2_gen_mem_inputs::*; +pub use poseidon2_input::*; +pub use poseidon2_instance::*; +pub use poseidon2_manager::*; +pub use poseidon2_planner::*; diff --git a/precompiles/poseidon2/src/poseidon2.rs b/precompiles/poseidon2/src/poseidon2.rs new file mode 100644 index 000000000..e8ebbf9ef --- /dev/null +++ b/precompiles/poseidon2/src/poseidon2.rs @@ -0,0 +1,226 @@ +use core::panic; +use std::sync::Arc; + +use fields::{ + add, matmul_external, pow7, pow7add, prodadd, Poseidon16, Poseidon2Constants, PrimeField64, +}; +use rayon::prelude::*; + +use proofman_common::{AirInstance, FromTrace, ProofmanResult}; +use proofman_util::{timer_start_trace, timer_stop_and_log_trace}; +#[cfg(not(feature = "packed"))] +use zisk_pil::{Poseidon2Trace, Poseidon2TraceRow}; +#[cfg(feature = "packed")] +use zisk_pil::{Poseidon2TracePacked, Poseidon2TraceRowPacked}; + +#[cfg(feature = "packed")] +type Poseidon2TraceRowType = Poseidon2TraceRowPacked; +#[cfg(feature = "packed")] +type Poseidon2TraceType = Poseidon2TracePacked; + +#[cfg(not(feature = "packed"))] +type Poseidon2TraceRowType = Poseidon2TraceRow; +#[cfg(not(feature = "packed"))] +type Poseidon2TraceType = Poseidon2Trace; + +use super::Poseidon2Input; + +/// The `Poseidon2SM` struct encapsulates the logic of the Poseidon2 State Machine. +pub struct Poseidon2SM { + /// Number of available poseidon2s in the trace. + pub num_available_poseidon2s: usize, + _phantom: std::marker::PhantomData, +} + +pub const CLOCKS: usize = 14; + +impl Poseidon2SM { + /// Creates a new Poseidon2 State Machine instance. + /// + /// # Returns + /// A new `Poseidon2SM` instance. + pub fn new() -> Arc { + // Compute some useful values + let num_available_poseidon2s = Poseidon2TraceType::::NUM_ROWS / CLOCKS - 1; + + Arc::new(Self { num_available_poseidon2s, _phantom: std::marker::PhantomData }) + } + + /// Processes a slice of operation data, updating the trace and multiplicities. + /// + /// # Arguments + /// * `trace` - A mutable reference to the Poseidon2 trace. + /// * `num_circuits` - The number of circuits to process. + /// * `input` - The operation data to process. + /// * `multiplicity` - A mutable slice to update with multiplicities for the operation. + #[inline(always)] + pub fn process_input( + &self, + trace: &mut [Poseidon2TraceRowType], + input: &Poseidon2Input, + is_active: bool, + ) { + // Fill the states + let mut round_states = [[0u64; 16]; CLOCKS]; + round_states[0] = input.state; + + let mut state = input.state.map(|x| F::from_u64(x)); + matmul_external::(&mut state); + round_states[1] = state.map(|x| x.as_canonical_u64()); + + for r in 0..Poseidon16::HALF_ROUNDS { + let mut c_slice = [F::ZERO; 16]; + for (i, c) in c_slice.iter_mut().enumerate() { + *c = F::from_u64(Poseidon16::RC[r * 16 + i]); + } + pow7add::(&mut state, &c_slice); + matmul_external::(&mut state); + round_states[2 + r] = state.map(|x| x.as_canonical_u64()); + } + + let mut row = 6; + let mut index = 0; + for r in 0..Poseidon16::N_PARTIAL_ROUNDS { + round_states[row][index] = state[0].as_canonical_u64(); + index += 1; + + state[0] += F::from_u64(Poseidon16::RC[Poseidon16::HALF_ROUNDS * 16 + r]); + state[0] = pow7(state[0]); + let sum = add::(&state); + prodadd::(&mut state, Poseidon16::DIAG, sum); + if r == 10 { + round_states[7] = state.map(|x| x.as_canonical_u64()); + row = 8; + index = 0; + } + } + + round_states[9] = state.map(|x| x.as_canonical_u64()); + + for r in 0..Poseidon16::HALF_ROUNDS { + let mut c_slice = [F::ZERO; 16]; + for (i, c) in c_slice.iter_mut().enumerate() { + *c = F::from_u64( + Poseidon16::RC + [Poseidon16::HALF_ROUNDS * 16 + Poseidon16::N_PARTIAL_ROUNDS + r * 16 + i], + ); + } + pow7add::(&mut state, &c_slice); + matmul_external::(&mut state); + round_states[10 + r] = state.map(|x| x.as_canonical_u64()); + } + + for r in 0..CLOCKS { + for (i, &state) in round_states[r].iter().enumerate() { + trace[r].set_chunks(i, 0, state as u32); + trace[r].set_chunks(i, 1, (state >> 32) as u32); + } + } + + if !is_active { + return; + } + + // Fill step and addr + trace[0].set_step_addr(input.step_main); + trace[1].set_step_addr(input.addr_main as u64); + + // Fill in_use_clk_0 + trace[0].set_in_use_clk_0(true); + + // Fill in_use + for item in trace.iter_mut().take(CLOCKS) { + item.set_in_use(true); + } + } + + /// Computes the witness for a series of inputs and produces an `AirInstance`. + /// + /// # Arguments + /// * `sctx` - The setup context containing the setup data. + /// * `inputs` - A slice of operations to process. + /// + /// # Returns + /// An `AirInstance` containing the computed witness data. + pub fn compute_witness( + &self, + inputs: &[Vec], + trace_buffer: Vec, + ) -> ProofmanResult> { + let mut poseidon2_trace = Poseidon2TraceType::new_from_vec_zeroes(trace_buffer)?; + let num_rows = poseidon2_trace.num_rows(); + let num_available_poseidon2s = self.num_available_poseidon2s; + + // Check that we can fit all the poseidon2s in the trace + let num_inputs = inputs.iter().map(|v| v.len()).sum::(); + let num_rows_needed = if num_inputs < num_available_poseidon2s { + num_inputs * CLOCKS + } else if num_inputs == num_available_poseidon2s { + num_rows + } else { + panic!( + "Exceeded available Poseidon2 inputs: requested {}, but only {} are available.", + num_inputs, self.num_available_poseidon2s + ); + }; + + tracing::debug!( + "··· Creating Poseidon2 instance [{}{{}} / {} rows filled {:.2}%]", + num_rows_needed, + num_rows, + (num_rows_needed as f64 / num_rows as f64 * 100.0) as usize + ); + + timer_start_trace!(POSEIDON2_TRACE); + let mut trace_rows = poseidon2_trace.buffer.as_mut_slice(); + let mut par_traces = Vec::new(); + let mut inputs_indexes = Vec::new(); + for (i, inputs) in inputs.iter().enumerate() { + for (j, _) in inputs.iter().enumerate() { + let (head, tail) = trace_rows.split_at_mut(CLOCKS); + par_traces.push(head); + inputs_indexes.push((i, j)); + trace_rows = tail; + } + } + + // Fill the trace + par_traces.into_par_iter().enumerate().for_each(|(index, trace)| { + let input_index = inputs_indexes[index]; + let input = &inputs[input_index.0][input_index.1]; + self.process_input(trace, input, true); + }); + + timer_stop_and_log_trace!(POSEIDON2_TRACE); + + timer_start_trace!(POSEIDON2_PADDING); + + // 3] Fill the padding rows with Poseidon2(0) + let padding_rows_start = num_rows_needed; + let padding_rows_end: usize = + padding_rows_start + ((num_available_poseidon2s - num_inputs) * CLOCKS); + + // Split the padding trace into padding chunks + let padding_trace = &mut poseidon2_trace.buffer[padding_rows_start..padding_rows_end]; + let mut padding_chunks: Vec<_> = padding_trace.chunks_mut(CLOCKS).collect(); + + // Process padding in parallel + if let Some((first, rest)) = padding_chunks.split_first_mut() { + self.process_input( + first, + &Poseidon2Input { state: [0; 16], step_main: 0, addr_main: 0 }, + false, + ); + + rest.par_iter_mut().for_each(|chunk| { + chunk.copy_from_slice(first); + }); + } + + // 4] The non-usable rows should be zeroes, which are already set at initialization + + timer_stop_and_log_trace!(POSEIDON2_PADDING); + + Ok(AirInstance::new_from_trace(FromTrace::new(&mut poseidon2_trace))) + } +} diff --git a/precompiles/poseidon2/src/poseidon2_bus_device.rs b/precompiles/poseidon2/src/poseidon2_bus_device.rs new file mode 100644 index 000000000..4663ad03a --- /dev/null +++ b/precompiles/poseidon2/src/poseidon2_bus_device.rs @@ -0,0 +1,143 @@ +//! The `Poseidon2Counter` module defines a counter for tracking poseidon2-related operations +//! sent over the data bus. It connects to the bus and gathers metrics for specific +//! `ZiskOperationType::Poseidon2` instructions. + +use std::ops::Add; + +use precompiles_common::MemProcessor; + +use zisk_common::{ + BusDevice, BusDeviceMode, BusId, Counter, Metrics, B, OPERATION_BUS_ID, OP_TYPE, STEP, +}; +use zisk_core::ZiskOperationType; + +use crate::{generate_poseidon2_mem_inputs, skip_poseidon2_mem_inputs}; + +/// The `Poseidon2Counter` struct represents a counter that monitors and measures +/// poseidon2-related operations on the data bus. +/// +/// It tracks specific operation types (`ZiskOperationType`) and updates counters for each +/// accepted operation type whenever data is processed on the bus. +pub struct Poseidon2CounterInputGen { + /// Poseidon2 counter. + counter: Counter, + + /// Bus device mode (counter or input generator). + mode: BusDeviceMode, +} + +impl Poseidon2CounterInputGen { + /// Creates a new instance of `Poseidon2Counter`. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus to which this counter is connected. + /// * `op_type` - A vector of `ZiskOperationType` instructions to monitor. + /// + /// # Returns + /// A new `Poseidon2Counter` instance. + pub fn new(mode: BusDeviceMode) -> Self { + Self { counter: Counter::default(), mode } + } + + /// Retrieves the count of instructions for a specific `ZiskOperationType`. + /// + /// # Arguments + /// * `op_type` - The operation type to retrieve the count for. + /// + /// # Returns + /// Returns the count of instructions for the specified operation type. + pub fn inst_count(&self, op_type: ZiskOperationType) -> Option { + (op_type == ZiskOperationType::Poseidon2).then_some(self.counter.inst_count) + } + + /// Processes data received on the bus, updating counters and generating inputs when applicable. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// * `mem_processors` – A queue of mem_processors bus operations used to send derived inputs. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data( + &mut self, + bus_id: &BusId, + data: &[u64], + mem_processors: &mut P, + ) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] as u32 != ZiskOperationType::Poseidon2 as u32 { + return true; + } + + let step_main = data[STEP]; + let addr_main = data[B] as u32; + + match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + generate_poseidon2_mem_inputs(addr_main, step_main, data, true, mem_processors); + } + BusDeviceMode::CounterAsm => { + self.measure(data); + } + BusDeviceMode::InputGenerator => { + if skip_poseidon2_mem_inputs(addr_main, mem_processors) { + return true; + } + generate_poseidon2_mem_inputs(addr_main, step_main, data, false, mem_processors); + } + } + + true + } +} + +impl Metrics for Poseidon2CounterInputGen { + /// Tracks activity on the connected bus and updates counters for recognized operations. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `_data` - The data received from the bus. + /// + /// # Returns + /// An empty vector, as this implementation does not produce any derived inputs for the bus. + #[inline(always)] + fn measure(&mut self, _data: &[u64]) { + self.counter.update(1); + } + + /// Provides a dynamic reference for downcasting purposes. + /// + /// # Returns + /// A reference to `self` as `dyn std::any::Any`. + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +impl Add for Poseidon2CounterInputGen { + type Output = Poseidon2CounterInputGen; + + /// Combines two `Poseidon2Counter` instances by summing their counters. + /// + /// # Arguments + /// * `self` - The first `Poseidon2Counter` instance. + /// * `other` - The second `Poseidon2Counter` instance. + /// + /// # Returns + /// A new `Poseidon2Counter` with combined counters. + fn add(self, other: Self) -> Poseidon2CounterInputGen { + Poseidon2CounterInputGen { counter: &self.counter + &other.counter, mode: self.mode } + } +} + +impl BusDevice for Poseidon2CounterInputGen { + /// Provides a dynamic reference for downcasting purposes. + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/poseidon2/src/poseidon2_gen_mem_inputs.rs b/precompiles/poseidon2/src/poseidon2_gen_mem_inputs.rs new file mode 100644 index 000000000..c87432c12 --- /dev/null +++ b/precompiles/poseidon2/src/poseidon2_gen_mem_inputs.rs @@ -0,0 +1,82 @@ +use fields::{poseidon2_hash, Goldilocks, Poseidon16, PrimeField64}; +use precompiles_common::MemBusHelpers; +use precompiles_common::MemProcessor; + +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; + +#[derive(Debug)] +pub struct Poseidon2MemInputConfig { + pub rewrite_params: bool, + pub read_params: usize, + pub write_params: usize, + pub chunks_per_param: usize, +} + +pub fn generate_poseidon2_mem_inputs( + addr_main: u32, + step_main: u64, + data: &[u64], + only_counters: bool, + mem_processors: &mut P, +) { + // Get the basic data from the input + // op,op_type,a,b,... + let state: &mut [u64; 16] = &mut data[5..21].try_into().unwrap(); + + // Apply the poseidon2 hash function + let state_gl = state.map(Goldilocks::new); + let res_gl = poseidon2_hash::(&state_gl); + for (i, d) in state.iter_mut().enumerate() { + *d = res_gl[i].as_canonical_u64(); + } + + let read_params = 1; + let write_params = 1; + let chunks_per_param = 16; + let params_count = read_params + write_params; + let params_offset = OPERATION_PRECOMPILED_BUS_DATA_SIZE; + for iparam in 0..params_count { + let is_write = iparam >= read_params; + let param_index = if is_write { iparam - read_params } else { iparam }; + let param_addr = addr_main + (param_index * 8 * chunks_per_param) as u32; + + // read/write all chunks of the iparam parameter + let current_param_offset = if is_write { + // if write calculate index over write_data + chunks_per_param * (iparam - read_params) + } else { + params_offset + chunks_per_param * iparam + }; + for ichunk in 0..chunks_per_param { + let chunk_data = if only_counters && is_write { + 0 + } else if is_write { + state[current_param_offset + ichunk] + } else { + data[current_param_offset + ichunk] + }; + MemBusHelpers::mem_aligned_op( + param_addr + ichunk as u32 * 8, + step_main, + chunk_data, + is_write, + mem_processors, + ); + } + } +} + +pub fn skip_poseidon2_mem_inputs(addr_main: u32, mem_processors: &mut P) -> bool { + let write_params = 1; + let chunks_per_param = 16; + for param_index in 0..write_params { + let param_addr = addr_main + (param_index * 8 * chunks_per_param) as u32; + for ichunk in 0..chunks_per_param { + let addr = param_addr + ichunk as u32 * 8; + if !mem_processors.skip_addr(addr) { + return false; + } + } + } + true +} diff --git a/precompiles/poseidon2/src/poseidon2_input.rs b/precompiles/poseidon2/src/poseidon2_input.rs new file mode 100644 index 000000000..716829a7c --- /dev/null +++ b/precompiles/poseidon2/src/poseidon2_input.rs @@ -0,0 +1,18 @@ +use zisk_common::OperationPoseidon2Data; + +#[derive(Debug)] +pub struct Poseidon2Input { + pub step_main: u64, + pub addr_main: u32, + pub state: [u64; 16], +} + +impl Poseidon2Input { + pub fn from(values: &OperationPoseidon2Data) -> Self { + Self { + step_main: values[4], + addr_main: values[3] as u32, + state: values[5..21].try_into().unwrap(), + } + } +} diff --git a/precompiles/poseidon2/src/poseidon2_instance.rs b/precompiles/poseidon2/src/poseidon2_instance.rs new file mode 100644 index 000000000..0955b0f1e --- /dev/null +++ b/precompiles/poseidon2/src/poseidon2_instance.rs @@ -0,0 +1,202 @@ +//! The `Poseidon2Instance` module defines an instance to perform the witness computation +//! for the Poseidon2 State Machine. +//! +//! It manages collected inputs and interacts with the `Poseidon2SM` to compute witnesses for +//! execution plans. + +use crate::{Poseidon2Input, Poseidon2SM}; +use fields::PrimeField64; +use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; +use std::{any::Any, collections::HashMap, sync::Arc}; +use zisk_common::ChunkId; +use zisk_common::StatsType; +use zisk_common::{ + BusDevice, BusId, CheckPoint, CollectSkipper, ExtOperationData, Instance, InstanceCtx, + InstanceType, PayloadType, OPERATION_BUS_ID, OP_TYPE, +}; +use zisk_core::ZiskOperationType; +use zisk_pil::Poseidon2Trace; + +/// The `Poseidon2Instance` struct represents an instance for the Poseidon2 State Machine. +/// +/// It encapsulates the `Poseidon2SM` and its associated context, and it processes input data +/// to compute witnesses for the Poseidon2 State Machine. +pub struct Poseidon2Instance { + /// Poseidon2 state machine. + poseidon2_sm: Arc>, + + /// Instance context. + ictx: InstanceCtx, +} + +impl Poseidon2Instance { + /// Creates a new `Poseidon2Instance`. + /// + /// # Arguments + /// * `poseidon2_sm` - An `Arc`-wrapped reference to the Poseidon2 State Machine. + /// * `ictx` - The `InstanceCtx` associated with this instance, containing the execution plan. + /// * `bus_id` - The bus ID associated with this instance. + /// + /// # Returns + /// A new `Poseidon2Instance` instance initialized with the provided state machine and + /// context. + pub fn new(poseidon2_sm: Arc>, ictx: InstanceCtx) -> Self { + Self { poseidon2_sm, ictx } + } + + pub fn build_poseidon2_collector(&self, chunk_id: ChunkId) -> Poseidon2Collector { + assert_eq!( + self.ictx.plan.air_id, + Poseidon2Trace::::AIR_ID, + "Poseidon2Instance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::>().unwrap(); + let (num_ops, collect_skipper) = collect_info[&chunk_id]; + Poseidon2Collector::new(num_ops, collect_skipper) + } +} + +impl Instance for Poseidon2Instance { + /// Computes the witness for the poseidon2 execution plan. + /// + /// This method leverages the `Poseidon2SM` to generate an `AirInstance` using the collected + /// inputs. + /// + /// # Arguments + /// * `_pctx` - The proof context, unused in this implementation. + /// + /// # Returns + /// An `Option` containing the computed `AirInstance`. + fn compute_witness( + &self, + _pctx: &ProofCtx, + _sctx: &SetupCtx, + collectors: Vec<(usize, Box>)>, + trace_buffer: Vec, + ) -> ProofmanResult>> { + let inputs: Vec<_> = collectors + .into_iter() + .map(|(_, collector)| { + collector.as_any().downcast::().unwrap().inputs + }) + .collect(); + + Ok(Some(self.poseidon2_sm.compute_witness(&inputs, trace_buffer)?)) + } + + /// Retrieves the checkpoint associated with this instance. + /// + /// # Returns + /// A `CheckPoint` object representing the checkpoint of the execution plan. + fn check_point(&self) -> &CheckPoint { + &self.ictx.plan.check_point + } + + /// Retrieves the type of this instance. + /// + /// # Returns + /// An `InstanceType` representing the type of this instance (`InstanceType::Instance`). + fn instance_type(&self) -> InstanceType { + InstanceType::Instance + } + + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { + assert_eq!( + self.ictx.plan.air_id, + Poseidon2Trace::::AIR_ID, + "Poseidon2Instance: Unsupported air_id: {:?}", + self.ictx.plan.air_id + ); + + let meta = self.ictx.plan.meta.as_ref().unwrap(); + let collect_info = meta.downcast_ref::>().unwrap(); + let (num_ops, collect_skipper) = collect_info[&chunk_id]; + Some(Box::new(Poseidon2Collector::new(num_ops, collect_skipper))) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +pub struct Poseidon2Collector { + /// Collected inputs for witness computation. + inputs: Vec, + + /// The number of operations to collect. + num_operations: u64, + + /// Helper to skip instructions based on the plan's configuration. + collect_skipper: CollectSkipper, +} + +impl Poseidon2Collector { + /// Creates a new `Poseidon2Collector`. + /// + /// # Arguments + /// + /// * `bus_id` - The connected bus ID. + /// * `num_operations` - The number of operations to collect. + /// * `collect_skipper` - The helper to skip instructions based on the plan's configuration. + /// + /// # Returns + /// A new `ArithInstanceCollector` instance initialized with the provided parameters. + pub fn new(num_operations: u64, collect_skipper: CollectSkipper) -> Self { + Self { + inputs: Vec::with_capacity(num_operations as usize), + num_operations, + collect_skipper, + } + } + + /// Processes data received on the bus, collecting the inputs necessary for witness computation. + /// + /// # Arguments + /// * `_bus_id` - The ID of the bus (unused in this implementation). + /// * `data` - The data received from the bus. + /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// + /// # Returns + /// A tuple where: + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[PayloadType]) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if self.inputs.len() == self.num_operations as usize { + return false; + } + + if data[OP_TYPE] as u32 != ZiskOperationType::Poseidon2 as u32 { + return true; + } + + if self.collect_skipper.should_skip() { + return true; + } + + let data: ExtOperationData = + data.try_into().expect("Regular Metrics: Failed to convert data"); + if let ExtOperationData::OperationPoseidon2Data(data) = data { + self.inputs.push(Poseidon2Input::from(&data)); + } else { + panic!("Expected ExtOperationData::OperationData"); + } + + self.inputs.len() < self.num_operations as usize + } +} + +impl BusDevice for Poseidon2Collector { + fn as_any(self: Box) -> Box { + self + } +} diff --git a/precompiles/poseidon2/src/poseidon2_manager.rs b/precompiles/poseidon2/src/poseidon2_manager.rs new file mode 100644 index 000000000..a025941c8 --- /dev/null +++ b/precompiles/poseidon2/src/poseidon2_manager.rs @@ -0,0 +1,82 @@ +use std::sync::Arc; + +use fields::PrimeField64; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; +use zisk_core::ZiskOperationType; +use zisk_pil::Poseidon2Trace; + +use crate::{Poseidon2CounterInputGen, Poseidon2Instance, Poseidon2Planner, Poseidon2SM}; + +/// The `Poseidon2Manager` struct represents the Poseidon2 manager, +/// which is responsible for managing the Poseidon2 state machine and its table state machine. +#[allow(dead_code)] +pub struct Poseidon2Manager { + /// Poseidon2 state machine + poseidon2_sm: Arc>, +} + +impl Poseidon2Manager { + /// Creates a new instance of `Poseidon2Manager`. + /// + /// # Returns + /// An `Arc`-wrapped instance of `Poseidon2Manager`. + pub fn new() -> Arc { + let poseidon2_sm = Poseidon2SM::new(); + + Arc::new(Self { poseidon2_sm }) + } + + pub fn build_poseidon2_counter(&self, asm_execution: bool) -> Poseidon2CounterInputGen { + match asm_execution { + true => Poseidon2CounterInputGen::new(BusDeviceMode::CounterAsm), + false => Poseidon2CounterInputGen::new(BusDeviceMode::Counter), + } + } + + pub fn build_poseidon2_input_generator(&self) -> Poseidon2CounterInputGen { + Poseidon2CounterInputGen::new(BusDeviceMode::InputGenerator) + } +} + +impl ComponentBuilder for Poseidon2Manager { + /// Builds a planner to plan poseidon2-related instances. + /// + /// # Returns + /// A boxed implementation of `RegularPlanner`. + fn build_planner(&self) -> Box { + // Get the number of poseidon2s that a single poseidon2 instance can handle + let num_available_poseidon2s = self.poseidon2_sm.num_available_poseidon2s; + + Box::new(Poseidon2Planner::new().add_instance(InstanceInfo::new( + Poseidon2Trace::::AIRGROUP_ID, + Poseidon2Trace::::AIR_ID, + num_available_poseidon2s, + ZiskOperationType::Poseidon2, + ))) + } + + /// Builds an inputs data collector for poseidon2 operations. + /// + /// # Arguments + /// * `ictx` - The context of the instance, containing the plan and its associated + /// configurations. + /// + /// # Returns + /// A boxed implementation of `BusDeviceInstance` specific to the requested `air_id` instance. + /// + /// # Panics + /// Panics if the provided `air_id` is not supported. + fn build_instance(&self, ictx: InstanceCtx) -> Box> { + match ictx.plan.air_id { + id if id == Poseidon2Trace::::AIR_ID => { + Box::new(Poseidon2Instance::new(self.poseidon2_sm.clone(), ictx)) + } + _ => { + panic!( + "Poseidon2Builder::get_instance() Unsupported air_id: {:?}", + ictx.plan.air_id + ) + } + } + } +} diff --git a/precompiles/poseidon2/src/poseidon2_planner.rs b/precompiles/poseidon2/src/poseidon2_planner.rs new file mode 100644 index 000000000..b6eba1e75 --- /dev/null +++ b/precompiles/poseidon2/src/poseidon2_planner.rs @@ -0,0 +1,136 @@ +//! The `Poseidon2Planner` module defines a planner for generating execution plans specific to +//! arithmetic operations. +//! +//! It organizes execution plans for both regular instances and table instances, +//! leveraging arithmetic operation counts and metadata to construct detailed plans. + +use std::any::Any; + +use crate::Poseidon2CounterInputGen; + +use zisk_common::{ + plan, BusDeviceMetrics, CheckPoint, ChunkId, InstCount, InstanceInfo, InstanceType, Metrics, + Plan, Planner, TableInfo, +}; + +/// The `Poseidon2Planner` struct organizes execution plans for arithmetic instances and tables. +/// +/// It allows adding metadata about instances and tables and generates plans +/// based on the provided counters. +#[derive(Default)] +pub struct Poseidon2Planner { + /// Arithmetic instances info to be planned. + instances_info: Vec, + + /// Arithmetic table instances info to be planned. + tables_info: Vec, +} + +impl Poseidon2Planner { + /// Creates a new `Poseidon2Planner`. + /// + /// # Returns + /// A new `Poseidon2Planner` instance with no preconfigured instances or tables. + pub fn new() -> Self { + Self { instances_info: Vec::new(), tables_info: Vec::new() } + } + + /// Adds an arithmetic instance to the planner. + /// + /// # Arguments + /// * `instance_info` - The `InstanceInfo` describing the arithmetic instance to be added. + /// + /// # Returns + /// The updated `Poseidon2Planner` instance. + pub fn add_instance(mut self, instance_info: InstanceInfo) -> Self { + self.instances_info.push(instance_info); + self + } + + /// Adds an arithmetic table instance to the planner. + /// + /// # Arguments + /// * `table_info` - The `TableInfo` describing the arithmetic table instance to be added. + /// + /// # Returns + /// The updated `Poseidon2Planner` instance. + pub fn add_table_instance(mut self, table_info: TableInfo) -> Self { + self.tables_info.push(table_info); + self + } +} + +impl Planner for Poseidon2Planner { + /// Generates execution plans for arithmetic instances and tables. + /// + /// # Arguments + /// * `counters` - A vector of counters, each associated with a `ChunkId` and `ArithCounter` + /// metrics data. + /// + /// # Returns + /// A vector of `Plan` instances representing execution configurations for the instances and + /// tables. + /// + /// # Panics + /// Panics if any counter cannot be downcasted to an `ArithCounter`. + fn plan(&self, counters: Vec<(ChunkId, Box)>) -> Vec { + // Prepare counts + let mut count: Vec> = Vec::with_capacity(self.instances_info.len()); + + for _ in 0..self.instances_info.len() { + count.push(Vec::new()); + } + + counters.iter().for_each(|(chunk_id, counter)| { + let reg_counter = + Metrics::as_any(&**counter).downcast_ref::().unwrap(); + + // Iterate over `instances_info` and add `InstCount` objects to the correct vector + for (index, instance_info) in self.instances_info.iter().enumerate() { + let inst_count = InstCount::new( + *chunk_id, + reg_counter.inst_count(instance_info.op_type).unwrap(), + ); + + // Add the `InstCount` to the corresponding inner vector + count[index].push(inst_count); + } + }); + + let mut plan_result = Vec::new(); + + for (idx, instance) in self.instances_info.iter().enumerate() { + let plan: Vec<_> = plan(&count[idx], instance.num_ops as u64) + .into_iter() + .map(|(check_point, collect_info)| { + let converted: Box = Box::new(collect_info); + Plan::new( + instance.airgroup_id, + instance.air_id, + None, + InstanceType::Instance, + check_point, + Some(converted), + ) + }) + .collect(); + + plan_result.extend(plan); + } + + if !plan_result.is_empty() { + for table_instance in self.tables_info.iter() { + plan_result.push(Plan::new( + table_instance.airgroup_id, + table_instance.air_id, + None, + InstanceType::Table, + CheckPoint::None, + None, + )); + } + } + + plan_result + } +} diff --git a/precompiles/sha256f/Cargo.toml b/precompiles/sha256f/Cargo.toml index 3220332d9..205f81f56 100644 --- a/precompiles/sha256f/Cargo.toml +++ b/precompiles/sha256f/Cargo.toml @@ -15,7 +15,6 @@ precompiles-common = { workspace = true } sm-mem = { workspace = true } mem-common = { workspace = true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -23,12 +22,8 @@ pil-std-lib = { workspace = true } fields = { workspace=true } tracing = { workspace = true } rayon = { workspace = true } -sha2 = { workspace = true } [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] \ No newline at end of file +gpu = ["packed"] +packed = [] \ No newline at end of file diff --git a/precompiles/sha256f/pil/sha256f.pil b/precompiles/sha256f/pil/sha256f.pil index ff1660a55..d44db4788 100644 --- a/precompiles/sha256f/pil/sha256f.pil +++ b/precompiles/sha256f/pil/sha256f.pil @@ -11,7 +11,7 @@ require "opids.pil" // Note: We use little endian representation. -airtemplate Sha256f(const int N = 2**22, const int operation_bus_id = OPERATION_BUS_ID) { +airtemplate Sha256f(const int N = 2**22) { /* ROW a[0..32] e[0..32] w[0..32] STAGE | 0 | 0bD₁D₂..D₃₂ | 0bH₁H₂..H₃₂ | XXXXXXXXXXXXXXXX | LOAD STATE | @@ -201,18 +201,21 @@ airtemplate Sha256f(const int N = 2**22, const int operation_bus_id = OPERATION_ clock_eq(step_addr, ADDR_STATE, ADDR_IND_0) === 0; clock_eq(step_addr, ADDR_INPUT, ADDR_IND_1) === 0; + // Swap w bytes for memory consistency + const expr w_spacked = swap_bytes_and_pack(w); + expr mem_value[2]; - mem_value[0] = CLK[0] * a_packed'2 + CLK[1] * 'a_packed + CLK[2] * e_packed + CLK[3] * 3'e_packed + - CLK[4] * w_packed' + CLK[5] * w_packed'2 + CLK[6] * w_packed'3 + CLK[7] * w_packed'4 + - CLK[8] * w_packed'5 + CLK[9] * w_packed'6 + CLK[10] * w_packed'7 + CLK[11] * w_packed'8 + - CLK[12] * a_packed'58 + CLK[13] * a_packed'55 + CLK[14] * e_packed'56 + CLK[15] * e_packed'53 + + mem_value[0] = CLK[0] * a_packed'3 + CLK[1] * a_packed + CLK[2] * e_packed' + CLK[3] * 2'e_packed + + CLK[4] * w_spacked + CLK[5] * w_spacked' + CLK[6] * w_spacked'2 + CLK[7] * w_spacked'3 + + CLK[8] * w_spacked'4 + CLK[9] * w_spacked'5 + CLK[10] * w_spacked'6 + CLK[11] * w_spacked'7 + + CLK[12] * a_packed'59 + CLK[13] * a_packed'56 + CLK[14] * e_packed'57 + CLK[15] * e_packed'54 + clock_map(step_addr, ADDR_IND_0, 16) + clock_map(step_addr, ADDR_IND_1, 17); - mem_value[1] = CLK[0] * a_packed'3 + CLK[1] * a_packed + CLK[2] * e_packed' + CLK[3] * 2'e_packed + - CLK[4] * w_packed + CLK[5] * w_packed' + CLK[6] * w_packed'2 + CLK[7] * w_packed'3 + - CLK[8] * w_packed'4 + CLK[9] * w_packed'5 + CLK[10] * w_packed'6 + CLK[11] * w_packed'7 + - CLK[12] * a_packed'59 + CLK[13] * a_packed'56 + CLK[14] * e_packed'57 + CLK[15] * e_packed'54; - // high bits of ADDR_IND_0 and ADDR_IND_ are 0 + mem_value[1] = CLK[0] * a_packed'2 + CLK[1] * 'a_packed + CLK[2] * e_packed + CLK[3] * 3'e_packed + + CLK[4] * w_spacked' + CLK[5] * w_spacked'2 + CLK[6] * w_spacked'3 + CLK[7] * w_spacked'4 + + CLK[8] * w_spacked'5 + CLK[9] * w_spacked'6 + CLK[10] * w_spacked'7 + CLK[11] * w_spacked'8 + + CLK[12] * a_packed'58 + CLK[13] * a_packed'55 + CLK[14] * e_packed'56 + CLK[15] * e_packed'53; + // addresses are 32-bit values const expr mem_addr = clock_map(step_addr, ADDR_STATE, start: 0, end: 3, delta: 8) + clock_map(step_addr, ADDR_INPUT, start: 4, end: 11, delta: 8) + @@ -246,7 +249,8 @@ airtemplate Sha256f(const int N = 2**22, const int operation_bus_id = OPERATION_ ); // --> Constraints to make sure that this coprocessor is called from the main processor - lookup_proves(operation_bus_id, [OP_SHA256F, step_addr'(STEP_MAIN), 0, step_addr'(ADDR_OP), 0, 0, 0, 0], mul: in_use_clk_0); + + proves_operation(op: OP_SHA256F, b: [step_addr'(ADDR_OP), 0], main_step: step_addr'(STEP_MAIN), mul: in_use_clk_0); function pack(const expr a[]): expr { const int len = length(a); @@ -257,6 +261,19 @@ airtemplate Sha256f(const int N = 2**22, const int operation_bus_id = OPERATION_ return packed; } + function swap_bytes_and_pack(const expr a[]): expr { + const int len = length(a); + expr result = 0; + for (int i = 0; i < len; i++) { + int byte_idx = i / 8; + int bit_in_byte = i % 8; + int swapped_byte = 3 - byte_idx; + int swapped_idx = swapped_byte * 8 + bit_in_byte; + result += a[swapped_idx] * 2**i; + } + return result; + } + // Given an old w, computes the new w for the next round function compute_w(const expr old_w[][]): expr { expr [old_w2, old_w7, old_w15, old_w16] = [old_w[0], old_w[1], old_w[2], old_w[3]]; diff --git a/precompiles/sha256f/src/sha256f.rs b/precompiles/sha256f/src/sha256f.rs index 2141ca0cb..9891e039d 100644 --- a/precompiles/sha256f/src/sha256f.rs +++ b/precompiles/sha256f/src/sha256f.rs @@ -99,19 +99,22 @@ impl Sha256fSM { let mut prev_state = [0u32; 8]; for i in 0..CLOCKS_LOAD_STATE { let word = state[i]; - let word_high = (word >> 32) as u32; - let word_low = (word & 0xFFFF_FFFF) as u32; + + // First word is the low significant 32 bits of word + // Second word is the high significant 32 bits of word + let word_first = (word & 0xFFFF_FFFF) as u32; + let word_second = (word >> 32) as u32; // Store the state as u32 for further processing - prev_state[2 * i] = word_high; - prev_state[2 * i + 1] = word_low; + prev_state[2 * i] = word_first; + prev_state[2 * i + 1] = word_second; let mut row = if i == 1 || i == 3 { offset + 1 } else { offset + 3 }; // Locate the state bits in the trace let is_a = i < 2; for j in 0..32 { - let bit = ((word_high >> j) & 1) != 0; + let bit = ((word_first >> j) & 1) != 0; if is_a { trace[row].set_a(j, bit); } else { @@ -120,7 +123,7 @@ impl Sha256fSM { } row -= 1; for j in 0..32 { - let bit = ((word_low >> j) & 1) != 0; + let bit = ((word_second >> j) & 1) != 0; if is_a { trace[row].set_a(j, bit); } else { @@ -133,10 +136,13 @@ impl Sha256fSM { // Compute the load input stage let mut w = [0u32; 16]; for i in 0..CLOCKS_LOAD_INPUT { - let word = input[i / 2]; + // Input is received as little-endian u64 words, so we need to swap bytes + let word = input[i / 2].swap_bytes(); + let word_low = (word & 0xFFFF_FFFF) as u32; + let word_high = (word >> 32) as u32; // Store the input as u32 for further processing - w[i] = if i % 2 == 0 { (word >> 32) as u32 } else { (word & 0xFFFF_FFFF) as u32 }; + w[i] = if i % 2 == 0 { word_high } else { word_low }; // Compute the a and e values for the current input let [old_a, old_b, old_c, old_d, old_e, old_f, old_g, old_h] = prev_state; @@ -232,32 +238,36 @@ impl Sha256fSM { for i in 0..CLOCKS_WRITE_STATE { let prev = state[i]; - let prev_high = prev >> 32; - let prev_low = prev & 0xFFFF_FFFF; - let curr_high = (prev_state[2 * i]) as u64; - let curr_low = (prev_state[2 * i + 1]) as u64; + // First word is the low significant 32 bits of word + // Second word is the high significant 32 bits of word + let prev_first = prev & 0xFFFF_FFFF; + let prev_second = prev >> 32; + + let curr_first = (prev_state[2 * i]) as u64; + let curr_second = (prev_state[2 * i + 1]) as u64; - let new_high = curr_high + prev_high; - let new_low = curr_low + prev_low; - let (new_high_carry, new_high) = - ((new_high >> 32) as u8, (new_high & 0xFFFF_FFFF) as u32); - let (new_low_carry, new_low) = ((new_low >> 32) as u8, (new_low & 0xFFFF_FFFF) as u32); + let new_first = curr_first + prev_first; + let new_second = curr_second + prev_second; + let (new_first_carry, new_first) = + ((new_first >> 32) as u8, (new_first & 0xFFFF_FFFF) as u32); + let (new_second_carry, new_second) = + ((new_second >> 32) as u8, (new_second & 0xFFFF_FFFF) as u32); let mut row = if i == 1 || i == 3 { offset + 1 } else { offset + 3 }; // Locate the state bits in the trace let is_a = i < 2; if is_a { - trace[row].set_new_a_carry_bits(new_high_carry); - a_range_checks[new_high_carry as usize] += 1; + trace[row].set_new_a_carry_bits(new_first_carry); + a_range_checks[new_first_carry as usize] += 1; } else { - trace[row].set_new_e_carry_bits(new_high_carry); - e_range_checks[new_high_carry as usize] += 1; + trace[row].set_new_e_carry_bits(new_first_carry); + e_range_checks[new_first_carry as usize] += 1; } for j in 0..32 { - let bit = ((new_high >> j) & 1) != 0; + let bit = ((new_first >> j) & 1) != 0; if is_a { trace[row].set_a(j, bit); } else { @@ -267,15 +277,15 @@ impl Sha256fSM { row -= 1; if is_a { - trace[row].set_new_a_carry_bits(new_low_carry); - a_range_checks[new_low_carry as usize] += 1; + trace[row].set_new_a_carry_bits(new_second_carry); + a_range_checks[new_second_carry as usize] += 1; } else { - trace[row].set_new_e_carry_bits(new_low_carry); - e_range_checks[new_low_carry as usize] += 1; + trace[row].set_new_e_carry_bits(new_second_carry); + e_range_checks[new_second_carry as usize] += 1; } for j in 0..32 { - let bit = ((new_low >> j) & 1) != 0; + let bit = ((new_second >> j) & 1) != 0; if is_a { trace[row].set_a(j, bit); } else { @@ -300,7 +310,6 @@ impl Sha256fSM { let a = (t1 as u64) + (t2 as u64); let e = (old_d as u64) + (t1 as u64); (a, e) - // (s0 as u64, s1 as u64) } fn compute_w(old_w2: u32, old_w7: u32, old_w15: u32, old_w16: u32) -> u64 { diff --git a/precompiles/sha256f/src/sha256f_bus_device.rs b/precompiles/sha256f/src/sha256f_bus_device.rs index e232b50b3..a9814a51c 100644 --- a/precompiles/sha256f/src/sha256f_bus_device.rs +++ b/precompiles/sha256f/src/sha256f_bus_device.rs @@ -2,11 +2,12 @@ //! sent over the data bus. It connects to the bus and gathers metrics for specific //! `ZiskOperationType::Sha256f` instructions. -use std::{collections::VecDeque, ops::Add}; +use std::ops::Add; -use zisk_common::MemCollectorInfo; +use precompiles_common::MemProcessor; +use zisk_common::STEP; use zisk_common::{ - BusDevice, BusDeviceMode, BusId, Counter, Metrics, A, B, OPERATION_BUS_ID, OP_TYPE, + BusDevice, BusDeviceMode, BusId, Counter, Metrics, B, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::ZiskOperationType; @@ -48,6 +49,51 @@ impl Sha256fCounterInputGen { pub fn inst_count(&self, op_type: ZiskOperationType) -> Option { (op_type == ZiskOperationType::Sha256).then_some(self.counter.inst_count) } + + /// Processes data received on the bus, updating counters and generating inputs when applicable. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// * `mem_processors` – A queue of mem_processors bus operations used to send derived inputs. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data( + &mut self, + bus_id: &BusId, + data: &[u64], + mem_processors: &mut P, + ) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + if data[OP_TYPE] as u32 != ZiskOperationType::Sha256 as u32 { + return true; + } + + let step_main = data[STEP]; + let addr_main = data[B] as u32; + + match self.mode { + BusDeviceMode::Counter => { + self.measure(data); + generate_sha256f_mem_inputs(addr_main, step_main, data, true, mem_processors); + } + BusDeviceMode::CounterAsm => { + self.measure(data); + } + BusDeviceMode::InputGenerator => { + if skip_sha256f_mem_inputs(addr_main, data, mem_processors) { + return true; + } + generate_sha256f_mem_inputs(addr_main, step_main, data, false, mem_processors); + } + } + + true + } } impl Metrics for Sha256fCounterInputGen { @@ -90,57 +136,6 @@ impl Add for Sha256fCounterInputGen { } impl BusDevice for Sha256fCounterInputGen { - /// Processes data received on the bus, updating counters and generating inputs when applicable. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus sending the data. - /// * `data` - The data received from the bus. - /// * `pending` – A queue of pending bus operations used to send derived inputs. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - debug_assert!(*bus_id == OPERATION_BUS_ID); - - if data[OP_TYPE] as u32 != ZiskOperationType::Sha256 as u32 { - return true; - } - - if let Some(mem_collectors_info) = mem_collector_info { - if skip_sha256f_mem_inputs(data[B] as u32, data, mem_collectors_info) { - return true; - } - } - - let step_main = data[A]; - let addr_main = data[B] as u32; - - let only_counters = self.mode == BusDeviceMode::Counter; - if only_counters { - self.measure(data); - } - - generate_sha256f_mem_inputs(addr_main, step_main, data, only_counters, pending); - - true - } - - /// Returns the bus IDs associated with this counter. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/precompiles/sha256f/src/sha256f_gen_mem_inputs.rs b/precompiles/sha256f/src/sha256f_gen_mem_inputs.rs index 4b4f7c43e..d037a96f8 100644 --- a/precompiles/sha256f/src/sha256f_gen_mem_inputs.rs +++ b/precompiles/sha256f/src/sha256f_gen_mem_inputs.rs @@ -1,10 +1,8 @@ -use sha2::compress256; - use precompiles_common::MemBusHelpers; -use std::collections::VecDeque; -use zisk_common::MemCollectorInfo; -use zisk_common::{BusId, OPERATION_BUS_DATA_SIZE}; -use zisk_core::{convert_u32_to_u64, convert_u64_to_generic_array_bytes, convert_u64_to_u32}; +use precompiles_common::MemProcessor; + +use zisk_common::OPERATION_PRECOMPILED_BUS_DATA_SIZE; +use zisk_core::sha256f; #[derive(Debug)] pub struct Sha256MemInputConfig { @@ -15,35 +13,31 @@ pub struct Sha256MemInputConfig { pub chunks_per_param: usize, } -pub fn generate_sha256f_mem_inputs( +pub fn generate_sha256f_mem_inputs( addr_main: u32, step_main: u64, data: &[u64], only_counters: bool, - pending: &mut VecDeque<(BusId, Vec)>, + mem_processors: &mut P, ) { // Get the basic data from the input // op,op_type,a,b,addr[2],... - let state: &mut [u64; 4] = &mut data[6..10].try_into().unwrap(); - let input: &[u64; 8] = &data[10..18].try_into().unwrap(); + let state: &mut [u64; 4] = &mut data[7..11].try_into().unwrap(); + let input: &[u64; 8] = &data[11..19].try_into().unwrap(); // Apply the sha256f function and get the output - let mut state_u32: [u32; 8] = convert_u64_to_u32(state).try_into().unwrap(); - let block = convert_u64_to_generic_array_bytes(input); - compress256(&mut state_u32, &[block]); - - *state = convert_u32_to_u64(&state_u32); + sha256f(state, input); // Generate the memory reads/writes let indirect_params = 2; // Start by generating the indirection reads for iparam in 0..indirect_params { - MemBusHelpers::mem_aligned_load( + MemBusHelpers::mem_aligned_read( addr_main + iparam as u32 * 8, step_main, - data[OPERATION_BUS_DATA_SIZE + iparam], - pending, + data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + iparam], + mem_processors, ); } @@ -52,12 +46,12 @@ pub fn generate_sha256f_mem_inputs( let write_params = 1; let chunks_per_param = [4usize, 8, 4]; let params_count = read_params + write_params; - let params_offset = OPERATION_BUS_DATA_SIZE + indirect_params; + let params_offset = OPERATION_PRECOMPILED_BUS_DATA_SIZE + indirect_params; let mut read_chunks = 0; for (iparam, &chunks) in chunks_per_param.iter().enumerate().take(params_count) { let is_write = iparam >= read_params; let param_index = if is_write { iparam - read_params } else { iparam }; - let param_addr = data[OPERATION_BUS_DATA_SIZE + param_index] as u32; + let param_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_index] as u32; // read/write all chunks of the iparam parameter let current_param_offset = if is_write { // if write calculate index over write_data @@ -81,16 +75,16 @@ pub fn generate_sha256f_mem_inputs( step_main, chunk_data, is_write, - pending, + mem_processors, ); } } } -pub fn skip_sha256f_mem_inputs( +pub fn skip_sha256f_mem_inputs( addr_main: u32, data: &[u64], - mem_collectors_info: &[MemCollectorInfo], + mem_processors: &mut P, ) -> bool { let indirect_params = 2; let read_params = 2; @@ -99,24 +93,20 @@ pub fn skip_sha256f_mem_inputs( for iparam in 0..indirect_params { let addr = addr_main + iparam as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } for (iparam, &chunks) in chunks_per_param.iter().enumerate().take(read_params + write_params) { let is_write = iparam >= read_params; let param_index = if is_write { iparam - read_params } else { iparam }; - let param_addr = data[OPERATION_BUS_DATA_SIZE + param_index] as u32; + let param_addr = data[OPERATION_PRECOMPILED_BUS_DATA_SIZE + param_index] as u32; for ichunk in 0..chunks { let addr = param_addr + ichunk as u32 * 8; - for mem_collector in mem_collectors_info { - if !mem_collector.skip_addr(addr) { - return false; - } + if !mem_processors.skip_addr(addr) { + return false; } } } diff --git a/precompiles/sha256f/src/sha256f_input.rs b/precompiles/sha256f/src/sha256f_input.rs index 9e2ba78e3..f3e8229c3 100644 --- a/precompiles/sha256f/src/sha256f_input.rs +++ b/precompiles/sha256f/src/sha256f_input.rs @@ -13,12 +13,12 @@ pub struct Sha256fInput { impl Sha256fInput { pub fn from(values: &OperationSha256Data) -> Self { Self { - step_main: values[2], + step_main: values[4], addr_main: values[3] as u32, - state_addr: values[4] as u32, - input_addr: values[5] as u32, - state: values[6..10].try_into().unwrap(), - input: values[10..18].try_into().unwrap(), + state_addr: values[5] as u32, + input_addr: values[6] as u32, + state: values[7..11].try_into().unwrap(), + input: values[11..19].try_into().unwrap(), } } } diff --git a/precompiles/sha256f/src/sha256f_instance.rs b/precompiles/sha256f/src/sha256f_instance.rs index 891e79425..b06713aa2 100644 --- a/precompiles/sha256f/src/sha256f_instance.rs +++ b/precompiles/sha256f/src/sha256f_instance.rs @@ -7,12 +7,12 @@ use crate::{Sha256fInput, Sha256fSM}; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; -use std::collections::VecDeque; use std::{any::Any, collections::HashMap, sync::Arc}; use zisk_common::ChunkId; +use zisk_common::StatsType; use zisk_common::{ BusDevice, BusId, CheckPoint, CollectSkipper, ExtOperationData, Instance, InstanceCtx, - InstanceType, MemCollectorInfo, PayloadType, OPERATION_BUS_ID, OP_TYPE, + InstanceType, PayloadType, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::ZiskOperationType; use zisk_pil::Sha256fTrace; @@ -101,6 +101,10 @@ impl Instance for Sha256fInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Precompiled + } + fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { assert_eq!( self.ictx.plan.air_id, @@ -149,9 +153,7 @@ impl Sha256fCollector { collect_skipper, } } -} -impl BusDevice for Sha256fCollector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -164,13 +166,7 @@ impl BusDevice for Sha256fCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[PayloadType], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[PayloadType]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); if self.inputs.len() == self.num_operations as usize { @@ -195,15 +191,9 @@ impl BusDevice for Sha256fCollector { self.inputs.len() < self.num_operations as usize } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for Sha256fCollector { fn as_any(self: Box) -> Box { self } diff --git a/precompiles/sha256f/src/sha256f_manager.rs b/precompiles/sha256f/src/sha256f_manager.rs index 658c6201f..03312d2e1 100644 --- a/precompiles/sha256f/src/sha256f_manager.rs +++ b/precompiles/sha256f/src/sha256f_manager.rs @@ -2,10 +2,7 @@ use std::sync::Arc; use fields::PrimeField64; use pil_std_lib::Std; -use zisk_common::{ - BusDevice, BusDeviceMetrics, BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, - InstanceInfo, PayloadType, Planner, -}; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; use zisk_core::ZiskOperationType; use zisk_pil::Sha256fTrace; @@ -30,8 +27,11 @@ impl Sha256fManager { Arc::new(Self { sha256f_sm }) } - pub fn build_sha256f_counter(&self) -> Sha256fCounterInputGen { - Sha256fCounterInputGen::new(BusDeviceMode::Counter) + pub fn build_sha256f_counter(&self, asm_execution: bool) -> Sha256fCounterInputGen { + match asm_execution { + true => Sha256fCounterInputGen::new(BusDeviceMode::CounterAsm), + false => Sha256fCounterInputGen::new(BusDeviceMode::Counter), + } } pub fn build_sha256f_input_generator(&self) -> Sha256fCounterInputGen { @@ -40,14 +40,6 @@ impl Sha256fManager { } impl ComponentBuilder for Sha256fManager { - /// Builds and returns a new counter for monitoring sha256f operations. - /// - /// # Returns - /// A boxed implementation of `RegularCounters` configured for sha256f operations. - fn build_counter(&self) -> Option> { - Some(Box::new(Sha256fCounterInputGen::new(BusDeviceMode::Counter))) - } - /// Builds a planner to plan sha256f-related instances. /// /// # Returns @@ -85,8 +77,4 @@ impl ComponentBuilder for Sha256fManager { } } } - - fn build_inputs_generator(&self) -> Option>> { - Some(Box::new(Sha256fCounterInputGen::new(BusDeviceMode::InputGenerator))) - } } diff --git a/riscv/Cargo.toml b/riscv/Cargo.toml index 10fd17018..a3a27ac98 100644 --- a/riscv/Cargo.toml +++ b/riscv/Cargo.toml @@ -6,6 +6,3 @@ license = { workspace = true } keywords = { workspace = true } repository = { workspace = true } categories = { workspace = true } - -[dependencies] -elf = "0.7.4" diff --git a/riscv/src/riscv_inst.rs b/riscv/src/riscv_inst.rs index 6fdacdafd..679ea33dd 100644 --- a/riscv/src/riscv_inst.rs +++ b/riscv/src/riscv_inst.rs @@ -33,7 +33,7 @@ //! See /// RISC-V instruction data -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone)] pub struct RiscvInstruction { /// Instruction ROM address, i.e. program counter value pub rom_address: u64, diff --git a/rom-setup/Cargo.toml b/rom-setup/Cargo.toml index 194b0fcfe..19a546f5c 100644 --- a/rom-setup/Cargo.toml +++ b/rom-setup/Cargo.toml @@ -11,17 +11,16 @@ categories = { workspace = true } sm-rom = { workspace = true } zisk-core = { workspace = true } zisk-pil = { workspace = true } +zisk-common = { workspace = true } tracing = { workspace = true } fields = { workspace = true } proofman-common = { workspace = true } colored = { workspace = true } anyhow = { workspace = true } - blake3 = "1.3.1" [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -no_lib_link = ["proofman-common/no_lib_link"] +gpu = ["packed"] +packed = [] diff --git a/rom-setup/src/asm_setup.rs b/rom-setup/src/asm_setup.rs index 525827c17..0fb4aa945 100644 --- a/rom-setup/src/asm_setup.rs +++ b/rom-setup/src/asm_setup.rs @@ -1,30 +1,239 @@ +use anyhow::Context; +use anyhow::Result; use std::{ path::{Path, PathBuf}, process::{Command, Stdio}, }; - -use anyhow::Result; use zisk_core::{is_elf_file, AsmGenerationMethod, Riscv2zisk}; -pub fn generate_assembly( +use crate::get_elf_data_hash_from_path; + +fn find_workspace_root(start: &Path) -> Option { + let mut current = Some(start); + + while let Some(dir) = current { + let cargo_toml = dir.join("Cargo.toml"); + + if cargo_toml.exists() { + if let Ok(contents) = std::fs::read_to_string(&cargo_toml) { + if contents.contains("[workspace]") { + return Some(dir.to_path_buf()); + } + } + } + + current = dir.parent(); + } + + None +} + +pub fn resolve_emulator_asm() -> Result { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + + let workspace_root = + if manifest_dir.exists() { find_workspace_root(&manifest_dir) } else { None }; + + let cargo_available = Command::new("cargo") + .arg("--version") + .status() + .map(|status| status.success()) + .unwrap_or(false); + + // Check if we can build from workspace (need both cargo and workspace with ziskclib) + let can_build_from_workspace = cargo_available + && if let Some(ref root) = workspace_root { + let candidate = root.join("emulator-asm"); + let ziskclib_path = root.join("ziskclib"); + candidate.exists() && ziskclib_path.exists() + } else { + false + }; + + let installed_path = crate::get_default_zisk_path(); + let installed_asm_path = installed_path.join("zisk/emulator-asm"); + + let emulator_asm_path = if can_build_from_workspace { + let candidate = workspace_root.unwrap().join("emulator-asm"); + tracing::debug!("Using emulator-asm from workspace: {}", candidate.display()); + candidate + } else { + if !cargo_available { + tracing::debug!( + "Cargo not available, using installed path: {}", + installed_asm_path.display() + ); + } else if workspace_root.is_none() { + tracing::debug!( + "No workspace found, using installed path: {}", + installed_asm_path.display() + ); + } else { + tracing::debug!( + "Workspace missing ziskclib source, using installed path: {}", + installed_asm_path.display() + ); + } + + installed_asm_path.clone() + }; + + tracing::info!("Looking for emulator-asm at: {}", emulator_asm_path.display()); + + if !emulator_asm_path.exists() { + anyhow::bail!("emulator-asm directory not found at: {}", emulator_asm_path.display()); + } + + let emulator_parent = + emulator_asm_path.parent().context("Failed to get parent directory of emulator-asm")?; + let ziskclib_path = emulator_parent.join("ziskclib"); + + let target_lib_path = if emulator_asm_path == installed_asm_path { + // For installed path, look in .zisk/bin/ + installed_path.join("bin").join("libziskclib.a") + } else { + // For workspace builds, look in target/release/ + emulator_parent.join("target/release/libziskclib.a") + }; + + tracing::info!("Looking for ziskclib at: {}", target_lib_path.display()); + + // Only try to build if cargo is available and ziskclib source exists + if cargo_available && ziskclib_path.exists() { + tracing::debug!("Found ziskclib at: {}", ziskclib_path.display()); + tracing::debug!("Building ziskclib..."); + + let output = Command::new("cargo") + .args(["build", "--release", "-p", "ziskclib"]) + .current_dir(emulator_parent) + .output() + .context("Failed to execute cargo build for ziskclib")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + anyhow::bail!("Failed to build ziskclib:\nstdout: {}\nstderr: {}", stdout, stderr); + } + + if !target_lib_path.exists() { + anyhow::bail!( + "ziskclib build succeeded but library not found at: {}", + target_lib_path.display() + ); + } + + tracing::debug!("ziskclib built successfully at: {}", target_lib_path.display()); + } else { + if !target_lib_path.exists() { + if emulator_asm_path == installed_path { + anyhow::bail!( + "Pre-built libziskclib.a not found at: {}\nPlease ensure zisk is properly installed", + target_lib_path.display() + ); + } else if cargo_available { + anyhow::bail!( + "libziskclib.a not found at: {}\nziskclib directory not found at: {}\nCannot build or locate ziskclib library", + target_lib_path.display(), + ziskclib_path.display() + ); + } else { + anyhow::bail!( + "libziskclib.a not found at: {}\nCargo not available for building from source\nConsider using the installed version instead", + target_lib_path.display() + ); + } + } + tracing::debug!("Using existing ziskclib at: {}", target_lib_path.display()); + } + + Ok(emulator_asm_path) +} + +/// Get the paths to all assembly binary files for a given ELF and output path +pub fn get_assembly_file_paths( elf: &Path, - elf_hash: &str, - zisk_path: &Path, output_path: &Path, + hints: bool, +) -> Result> { + let elf_hash = get_elf_data_hash_from_path(elf)?; + + let stem = elf + .file_stem() + .context("Failed to extract file stem from ELF path")? + .to_str() + .context("Failed to convert ELF file stem to string")?; + let stem = if hints { format!("{stem}-hints") } else { stem.to_string() }; + let new_filename = format!("{stem}-{elf_hash}.tmp"); + let base_path = output_path.join(new_filename); + let file_stem = base_path + .file_stem() + .context("Failed to extract file stem from base path")? + .to_str() + .context("Failed to convert file stem to string")?; + + let bin_mt_file = format!("{file_stem}-mt.bin"); + let bin_mt_file = base_path.with_file_name(bin_mt_file); + + let bin_rh_file = format!("{file_stem}-rh.bin"); + let bin_rh_file = base_path.with_file_name(bin_rh_file); + + let bin_mo_file = format!("{file_stem}-mo.bin"); + let bin_mo_file = base_path.with_file_name(bin_mo_file); + + Ok(vec![bin_mt_file, bin_rh_file, bin_mo_file]) +} + +/// Check if all assembly binary files exist for a given ELF and output path +pub fn assembly_files_exist(elf: &Path, output_path: &Path, hints: bool) -> Result { + let files = get_assembly_file_paths(elf, output_path, hints)?; + Ok(files.iter().all(|f| f.exists())) +} + +pub fn gen_assembly( + _elf: &Path, + _output_dir: &Option, + _hints: bool, + _verbose: bool, +) -> Result<(), anyhow::Error> { + // Assembly setup is not needed on macOS due to the lack of support for assembly generation. + #[cfg(not(target_os = "macos"))] + { + let output_path = crate::get_output_path(_output_dir)?; + let elf_data = + std::fs::read(_elf).with_context(|| format!("Error reading ELF file: {_elf:?}"))?; + let stem = _elf + .file_stem() + .context("Failed to extract file stem from ELF path")? + .to_str() + .context("Failed to convert ELF file stem to string")?; + tracing::info!("Computing assembly setup"); + generate_assembly(&elf_data, stem, output_path.as_path(), _hints, _verbose)?; + tracing::info!("Assembly setup generated at {}", output_path.display()); + } + Ok(()) +} + +pub fn generate_assembly( + elf: &[u8], + elf_name: &str, + output_path: &Path, + hints: bool, verbose: bool, ) -> Result<(), anyhow::Error> { - // Read the ELF file and check if it is a valid ELF file - let elf_file_path = PathBuf::from(elf); - let file_data = std::fs::read(&elf_file_path)?; + let elf_hash = blake3::hash(elf).to_hex().to_string(); - if !is_elf_file(&file_data).unwrap_or_else(|_| panic!("Error reading ROM file")) { - panic!("ROM file is not a valid ELF file"); + if !is_elf_file(elf).context("Error reading ROM file")? { + anyhow::bail!("ROM file is not a valid ELF file"); } - let stem = elf.file_stem().unwrap().to_str().unwrap(); + let stem = if hints { format!("{elf_name}-hints") } else { elf_name.to_string() }; let new_filename = format!("{stem}-{elf_hash}.tmp"); let base_path = output_path.join(new_filename); - let file_stem = base_path.file_stem().unwrap().to_str().unwrap(); + let file_stem = base_path + .file_stem() + .context("Failed to extract file stem from base path")? + .to_str() + .context("Failed to convert file stem to string")?; let bin_mt_file = format!("{file_stem}-mt.bin"); let bin_mt_file = base_path.with_file_name(bin_mt_file); @@ -35,22 +244,24 @@ pub fn generate_assembly( let bin_mo_file = format!("{file_stem}-mo.bin"); let bin_mo_file = base_path.with_file_name(bin_mo_file); - [ - (bin_mt_file, AsmGenerationMethod::AsmMinimalTraces), - (bin_rh_file, AsmGenerationMethod::AsmRomHistogram), - (bin_mo_file, AsmGenerationMethod::AsmMemOp), - ] - .iter() - .for_each(|(file, gen_method)| { + let emulator_asm_path = resolve_emulator_asm()?; + + let emulator_asm_path = + emulator_asm_path.to_str().context("Failed to convert emulator-asm path to string")?; + + for (file, gen_method, trace_target) in [ + (bin_mt_file, AsmGenerationMethod::AsmMinimalTraces, "MT"), + (bin_rh_file, AsmGenerationMethod::AsmRomHistogram, "RH"), + (bin_mo_file, AsmGenerationMethod::AsmMemOp, "MO"), + ] { let asm_file = file.with_extension("asm"); // Convert the ELF file to Zisk format and generates an assembly file - let rv2zk = Riscv2zisk::new(elf_file_path.to_str().unwrap().to_string()); + let rv2zk = Riscv2zisk::new(elf); + let asm_file_str = + asm_file.to_str().context("Failed to convert asm_file path to string")?; rv2zk - .runfile(asm_file.to_str().unwrap().to_string(), *gen_method, false, false) - .expect("Error converting elf to assembly"); - - let emulator_asm_path = zisk_path.join("emulator-asm"); - let emulator_asm_path = emulator_asm_path.to_str().unwrap(); + .runfile(asm_file_str.to_string(), gen_method, false, false, hints) + .map_err(|e| anyhow::anyhow!("Error converting ELF to assembly: {}", e))?; // Build the emulator assembly let status = Command::new("make") @@ -59,27 +270,28 @@ pub fn generate_assembly( .stdout(if verbose { Stdio::inherit() } else { Stdio::null() }) .stderr(if verbose { Stdio::inherit() } else { Stdio::null() }) .status() - .expect("Failed to run make clean"); + .context("Failed to execute 'make clean' command")?; if !status.success() { - eprintln!("make clean failed"); - std::process::exit(1); + anyhow::bail!("'make clean' failed with exit code: {:?}", status.code()); } + let out_file_str = file.to_str().context("Failed to convert output file path to string")?; + let status = Command::new("make") - .arg(format!("EMU_PATH={}", asm_file.to_str().unwrap())) - .arg(format!("OUT_PATH={}", file.to_str().unwrap())) + .arg(format!("EMU_PATH={}", asm_file_str)) + .arg(format!("OUT_PATH={}", out_file_str)) + .arg(format!("TRACE_TARGET={trace_target}")) .current_dir(emulator_asm_path) .stdout(if verbose { Stdio::inherit() } else { Stdio::null() }) .stderr(if verbose { Stdio::inherit() } else { Stdio::null() }) .status() - .expect("Failed to run make"); + .context("Failed to execute 'make' command")?; if !status.success() { - eprintln!("make failed"); - std::process::exit(1); + anyhow::bail!("'make' failed with exit code: {:?}", status.code()); } - }); + } Ok(()) } diff --git a/rom-setup/src/lib.rs b/rom-setup/src/lib.rs index 8ce3a89e0..1b64aea08 100644 --- a/rom-setup/src/lib.rs +++ b/rom-setup/src/lib.rs @@ -1,11 +1,7 @@ mod asm_setup; -mod rom_full_setup; mod rom_merkle; -mod rom_vkey; mod utils; pub use asm_setup::*; -pub use rom_full_setup::*; pub use rom_merkle::*; -pub use rom_vkey::*; pub use utils::*; diff --git a/rom-setup/src/rom_full_setup.rs b/rom-setup/src/rom_full_setup.rs deleted file mode 100644 index 95d5c963e..000000000 --- a/rom-setup/src/rom_full_setup.rs +++ /dev/null @@ -1,70 +0,0 @@ -use std::{ - fs, - path::{Path, PathBuf}, -}; - -use colored::Colorize; - -use crate::{get_elf_data_hash, DEFAULT_CACHE_PATH}; - -#[allow(unused_variables)] -pub fn rom_full_setup( - elf: &Path, - proving_key: &Path, - zisk_path: &Path, - output_dir: &Option, - verbose: bool, -) -> std::result::Result<(), anyhow::Error> { - let output_path = if output_dir.is_none() { - let cache_path = std::env::var("HOME") - .map(PathBuf::from) - .map(|home| home.join(DEFAULT_CACHE_PATH)) - .unwrap_or_else(|_| panic!("$HOME environment variable is not set")); - - ensure_dir_exists(&cache_path); - cache_path - } else { - ensure_dir_exists(output_dir.as_ref().unwrap()); - output_dir.clone().unwrap() - }; - - let output_path = fs::canonicalize(&output_path) - .unwrap_or_else(|_| panic!("Failed to get absolute path for {output_path:?}")); - - println!(); - - tracing::info!("Computing setup for ROM {}", elf.display()); - - tracing::info!("Computing ELF hash"); - let elf_hash = get_elf_data_hash(elf)?; - - tracing::info!("Computing merkle root"); - crate::rom_merkle_setup(elf, &elf_hash, output_path.as_path(), proving_key, false)?; - - tracing::info!("Computing Verification key"); - crate::rom_vkey()?; - - // Assembly setup is not needed on macOS due to the lack of support for assembly generation. - #[cfg(not(target_os = "macos"))] - { - tracing::info!("Computing assembly setup"); - crate::generate_assembly(elf, &elf_hash, zisk_path, output_path.as_path(), verbose)?; - } - - println!(); - tracing::info!( - "{} {}", - "ROM setup successfully completed at".bright_green().bold(), - output_path.display() - ); - - Ok(()) -} - -fn ensure_dir_exists(path: &PathBuf) { - if let Err(e) = std::fs::create_dir_all(path) { - if e.kind() != std::io::ErrorKind::AlreadyExists { - panic!("Failed to create cache directory {path:?}: {e}"); - } - } -} diff --git a/rom-setup/src/rom_merkle.rs b/rom-setup/src/rom_merkle.rs index 2a57ebc48..d94db60f0 100644 --- a/rom-setup/src/rom_merkle.rs +++ b/rom-setup/src/rom_merkle.rs @@ -1,37 +1,93 @@ -use std::path::Path; +use fields::PrimeField64; +use proofman_common::ProofCtx; +use std::path::{Path, PathBuf}; +use zisk_common::ElfBinaryLike; -use crate::{gen_elf_hash, get_elf_bin_file_path_with_hash, get_rom_blowup_factor_and_arity}; +use crate::{ + gen_elf_hash, get_elf_bin_file_path_with_hash, get_elf_bin_verkey_file_path_with_hash, + get_elf_data_hash, get_elf_vk, get_output_path, get_rom_info, +}; -pub fn rom_merkle_setup( - elf: &Path, - elf_hash: &str, - output_path: &Path, - proving_key: &Path, - mut check: bool, -) -> Result<(), anyhow::Error> { - // Check if the path is a file and not a directory - if !elf.is_file() { - tracing::error!("Error: The specified ROM path is not a file: {}", elf.display()); - std::process::exit(1); - } +pub fn rom_merkle_setup( + pctx: &ProofCtx, + elf: &impl ElfBinaryLike, + output_dir: &Option, +) -> Result<(PathBuf, Vec), anyhow::Error> { + let output_path = get_output_path(output_dir)?; + + let elf_hash = get_elf_data_hash(elf)?; - let (blowup_factor, merkle_tree_arity) = get_rom_blowup_factor_and_arity(proving_key); + let rom_info = get_rom_info(&pctx.global_info.get_proving_key_path())?; let elf_bin_path = get_elf_bin_file_path_with_hash( - elf, - elf_hash, - output_path, - blowup_factor, - merkle_tree_arity, + &elf_hash, + &output_path, + rom_info.blowup_factor, + rom_info.merkle_tree_arity, )?; - if !elf_bin_path.exists() { - check = false; + let elf_verkey_bin_path = get_elf_bin_verkey_file_path_with_hash( + &elf_hash, + &output_path, + rom_info.blowup_factor, + rom_info.merkle_tree_arity, + )?; + + if elf_bin_path.exists() && elf_verkey_bin_path.exists() { + let verkey = get_elf_vk(elf_verkey_bin_path.as_path())? + .ok_or_else(|| anyhow::anyhow!("Failed to read existing verkey file"))?; + + return Ok((elf_bin_path, verkey)); } - let root = gen_elf_hash(elf, elf_bin_path.as_path(), blowup_factor, merkle_tree_arity, check)?; + let root = gen_elf_hash::( + pctx, + elf.elf(), + elf_bin_path.as_path(), + rom_info.blowup_factor, + rom_info.merkle_tree_arity, + )?; tracing::info!("Root hash: {:?}", root); - Ok(()) + let verkey: Vec = root.iter().flat_map(|x| x.as_canonical_u64().to_le_bytes()).collect(); + + std::fs::write(&elf_verkey_bin_path, &verkey)?; + + Ok((elf_bin_path, verkey)) +} + +pub fn rom_merkle_setup_verkey( + elf: &impl ElfBinaryLike, + output_dir: &Option, + proving_key: &Path, +) -> Result, anyhow::Error> { + let output_path = get_output_path(output_dir)?; + + let elf_hash = get_elf_data_hash(elf)?; + + let rom_info = get_rom_info(proving_key)?; + + let elf_bin_path = get_elf_bin_file_path_with_hash( + &elf_hash, + &output_path, + rom_info.blowup_factor, + rom_info.merkle_tree_arity, + )?; + + let elf_verkey_bin_path = get_elf_bin_verkey_file_path_with_hash( + &elf_hash, + &output_path, + rom_info.blowup_factor, + rom_info.merkle_tree_arity, + )?; + + if elf_bin_path.exists() && elf_verkey_bin_path.exists() { + let verkey = get_elf_vk(elf_verkey_bin_path.as_path())? + .ok_or_else(|| anyhow::anyhow!("Failed to read existing verkey file"))?; + + Ok(verkey) + } else { + Err(anyhow::anyhow!("ROM merkle setup has not been performed yet")) + } } diff --git a/rom-setup/src/rom_vkey.rs b/rom-setup/src/rom_vkey.rs deleted file mode 100644 index aee4e6f9a..000000000 --- a/rom-setup/src/rom_vkey.rs +++ /dev/null @@ -1,31 +0,0 @@ -// use std::path::Path; - -// use tracing::info; - -// use crate::{gen_elf_hash, get_elf_bin_file_path, get_rom_blowup_factor}; - -pub fn rom_vkey(// elf: &Path, - // output_path: &Path, - // proving_key: &Path, - // mut check: bool, -) -> Result<(), anyhow::Error> { - // // Check if the path is a file and not a directory - // if !elf.is_file() { - // log::error!("Error: The specified ROM path is not a file: {}", elf.display()); - // std::process::exit(1); - // } - - // let blowup_factor = get_rom_blowup_factor(proving_key); - - // let elf_bin_path = get_elf_bin_file_path(elf, output_path, blowup_factor)?; - - // if !elf_bin_path.exists() { - // check = false; - // } - - // let root = gen_elf_hash(elf, elf_bin_path.as_path(), blowup_factor, check)?; - - // info!("Root hash: {:?}", root); - - Ok(()) -} diff --git a/rom-setup/src/utils.rs b/rom-setup/src/utils.rs index 2a2676d33..144511d29 100644 --- a/rom-setup/src/utils.rs +++ b/rom-setup/src/utils.rs @@ -1,40 +1,83 @@ use anyhow::{Context, Result}; -use fields::{Field, Goldilocks}; +use fields::{Goldilocks, PrimeField64}; use proofman_common::{ - write_custom_commit_trace, GlobalInfo, ProofType, ProofmanResult, StarkInfo, + write_custom_commit_trace, GlobalInfo, ProofCtx, ProofType, ProofmanResult, StarkInfo, }; use sm_rom::RomSM; +use std::env; use std::fs; +use std::fs::File; +use std::io::Read; use std::path::{Path, PathBuf}; +use zisk_common::ElfBinaryLike; use zisk_pil::{RomRomTrace, PILOUT_HASH}; pub const DEFAULT_CACHE_PATH: &str = ".zisk/cache"; -pub fn gen_elf_hash( - rom_path: &Path, +/// Gets the user's home directory as specified by the HOME environment variable. +pub fn get_home_dir() -> String { + env::var("HOME").expect("get_home_dir() failed to get HOME environment variable") +} + +/// Gets the default zisk folder location in the home installation directory. +pub fn get_default_zisk_path() -> PathBuf { + let zisk_path = format!("{}/.zisk", get_home_dir()); + PathBuf::from(zisk_path) +} + +pub fn get_output_path(output_dir: &Option) -> Result { + let output_path = if output_dir.is_none() { + let cache_path = std::env::var("HOME") + .map(PathBuf::from) + .map(|home| home.join(DEFAULT_CACHE_PATH)) + .unwrap_or_else(|_| panic!("$HOME environment variable is not set")); + + ensure_dir_exists(&cache_path); + cache_path + } else { + ensure_dir_exists(output_dir.as_ref().unwrap()); + output_dir.clone().unwrap() + }; + + let output_path = fs::canonicalize(&output_path) + .unwrap_or_else(|_| panic!("Failed to get absolute path for {output_path:?}")); + + Ok(output_path) +} + +pub fn gen_elf_hash( + pctx: &ProofCtx, + elf: &[u8], rom_buffer_path: &Path, blowup_factor: u64, merkle_tree_arity: u64, - check: bool, -) -> ProofmanResult> { - let buffer = vec![ - Goldilocks::ZERO; - RomRomTrace::::NUM_ROWS * RomRomTrace::::ROW_SIZE - ]; - let mut custom_rom_trace: RomRomTrace = RomRomTrace::new_from_vec(buffer)?; +) -> ProofmanResult> { + let buffer = vec![F::ZERO; RomRomTrace::::NUM_ROWS * RomRomTrace::::ROW_SIZE]; + let mut custom_rom_trace: RomRomTrace = RomRomTrace::new_from_vec(buffer)?; - RomSM::compute_custom_trace_rom(rom_path.to_path_buf(), &mut custom_rom_trace); + RomSM::compute_custom_trace_rom(elf, &mut custom_rom_trace); write_custom_commit_trace( + pctx, &mut custom_rom_trace, blowup_factor, merkle_tree_arity, rom_buffer_path, - check, ) } -pub fn get_elf_data_hash(elf_path: &Path) -> Result { +pub fn get_elf_vk(verkey_path: &Path) -> Result>> { + if !verkey_path.exists() { + return Ok(None); + } + + let mut file = File::open(verkey_path)?; + let mut root_bytes = [0u8; 32]; + file.read_exact(&mut root_bytes)?; + Ok(Some(root_bytes.to_vec())) +} + +pub fn get_elf_data_hash_from_path(elf_path: &Path) -> Result { let elf_data = fs::read(elf_path).with_context(|| format!("Error reading ELF file: {elf_path:?}"))?; @@ -43,40 +86,48 @@ pub fn get_elf_data_hash(elf_path: &Path) -> Result { Ok(hash) } -pub fn get_elf_bin_file_path( - elf_path: &Path, +pub fn get_elf_data_hash(elf: &impl ElfBinaryLike) -> Result { + let hash = blake3::hash(elf.elf()).to_hex().to_string(); + Ok(hash) +} + +pub fn get_elf_bin_file_path_with_hash( + hash: &str, default_cache_path: &Path, blowup_factor: u64, arity: u64, ) -> Result { - let elf_data = - fs::read(elf_path).with_context(|| format!("Error reading ELF file: {elf_path:?}"))?; + let pilout_hash = PILOUT_HASH; - let hash = blake3::hash(&elf_data).to_hex().to_string(); + let n = RomRomTrace::::NUM_ROWS; - get_elf_bin_file_path_with_hash(elf_path, &hash, default_cache_path, blowup_factor, arity) + let gpu = if cfg!(feature = "gpu") { "_gpu" } else { "" }; + let rom_cache_file_name = format!( + "{}_{}_{}_{}_{}{}.bin", + hash, + pilout_hash, + &n.to_string(), + &blowup_factor.to_string(), + &arity.to_string(), + gpu + ); + + Ok(default_cache_path.join(rom_cache_file_name)) } -pub fn get_elf_bin_file_path_with_hash( - elf_path: &Path, +pub fn get_elf_bin_verkey_file_path_with_hash( hash: &str, default_cache_path: &Path, blowup_factor: u64, arity: u64, ) -> Result { - if !elf_path.is_file() { - return Err(anyhow::anyhow!( - "Error: The specified ROM path is not a file: {}", - elf_path.display() - )); - } let pilout_hash = PILOUT_HASH; let n = RomRomTrace::::NUM_ROWS; let gpu = if cfg!(feature = "gpu") { "_gpu" } else { "" }; let rom_cache_file_name = format!( - "{}_{}_{}_{}_{}{}.bin", + "{}_{}_{}_{}_{}{}.verkey.bin", hash, pilout_hash, &n.to_string(), @@ -88,7 +139,12 @@ pub fn get_elf_bin_file_path_with_hash( Ok(default_cache_path.join(rom_cache_file_name)) } -pub fn get_rom_blowup_factor_and_arity(proving_key_path: &Path) -> (u64, u64) { +pub struct RomInfo { + pub blowup_factor: u64, + pub merkle_tree_arity: u64, +} + +pub fn get_rom_info(proving_key_path: &Path) -> ProofmanResult { let global_info = GlobalInfo::new(proving_key_path).expect("Failed to load global info from proving key"); let (airgroup_id, air_id) = global_info.get_air_id("Zisk", "Rom"); @@ -97,9 +153,16 @@ pub fn get_rom_blowup_factor_and_arity(proving_key_path: &Path) -> (u64, u64) { let stark_info_json = std::fs::read_to_string(&stark_info_path) .unwrap_or_else(|_| panic!("Failed to read file {}", &stark_info_path)); let stark_info = StarkInfo::from_json(&stark_info_json); + Ok(RomInfo { + blowup_factor: 1 << (stark_info.stark_struct.n_bits_ext - stark_info.stark_struct.n_bits), + merkle_tree_arity: stark_info.stark_struct.merkle_tree_arity, + }) +} - ( - 1 << (stark_info.stark_struct.n_bits_ext - stark_info.stark_struct.n_bits), - stark_info.stark_struct.merkle_tree_arity, - ) +pub fn ensure_dir_exists(path: &PathBuf) { + if let Err(e) = std::fs::create_dir_all(path) { + if e.kind() != std::io::ErrorKind::AlreadyExists { + panic!("Failed to create cache directory {path:?}: {e}"); + } + } } diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index 8b9a9e66c..8218763df 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -8,26 +8,47 @@ repository = { workspace = true } categories = { workspace = true } [dependencies] +proofman-verifier = { workspace = true } proofman-common = { workspace = true } proofman-util = { workspace = true } zisk-common = { workspace = true } fields = { workspace = true } -libloading = { workspace = true } anyhow = { workspace = true } proofman = { workspace = true } rom-setup = { workspace = true } asm-runner = { workspace = true } colored = { workspace = true } tracing = { workspace = true } -zstd = { workspace = true } -bytemuck = { workspace = true } zisk-distributed-common = { workspace = true } +ziskemu = { workspace = true } +zisk-core = { workspace = true } +zisk-build = { workspace = true } +sha2 = { workspace = true } +executor = { workspace = true } +bincode = { workspace = true } +serde = { workspace = true } +precompiles-hints = { workspace = true } [features] default = [] -gpu = [] +gpu = [ + "proofman/gpu", + "proofman-common/gpu", + "rom-setup/gpu", + "executor/gpu", + "ziskemu/gpu", + "packed", +] +packed = ["proofman/packed", "proofman-common/packed", "executor/packed"] stats = [] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] +disable_distributed = [ + "proofman/disable_distributed", + "proofman-common/disable_distributed", + "executor/disable_distributed", + "zisk-common/disable_distributed", + "zisk-build/disable_distributed", +] +diagnostic = ["proofman-common/diagnostic"] [lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(distributed)'] } \ No newline at end of file +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(distributed)'] } diff --git a/sdk/src/builder.rs b/sdk/src/builder.rs index 0734b0505..76f6b9c29 100644 --- a/sdk/src/builder.rs +++ b/sdk/src/builder.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; use crate::{ - get_asm_paths, get_proving_key, get_witness_computation_lib, + get_proving_key, get_proving_key_snark, prover::{Asm, AsmProver, Emu, EmuProver, ZiskProver}, }; use colored::Colorize; @@ -28,21 +28,16 @@ pub struct Prove; /// ```rust,no_run /// use zisk_sdk::ProverClientBuilder; /// -/// let elf_path = std::path::PathBuf::from("path/to/program.elf"); /// let output_path = std::path::PathBuf::from("path/to/output"); /// /// let prover_emu = ProverClientBuilder::new() /// .emu() /// .verify_constraints() -/// .elf_path(elf_path.clone()) /// .build(); /// /// let prover_asm = ProverClientBuilder::new() /// .asm() /// .prove() -/// .elf_path(elf_path) -/// .save_proofs(true) -/// .output_dir(output_path) /// .unlock_mapped_memory(true) /// .build(); /// ``` @@ -50,11 +45,9 @@ pub struct Prove; pub struct ProverClientBuilder { // Common fields for both EMU and ASM aggregation: bool, - rma: bool, - final_snark: bool, - witness_lib: Option, + snark_wrapper: bool, proving_key: Option, - elf: Option, + proving_key_snark: Option, verify_constraints: bool, witness: bool, verbose: u8, @@ -66,13 +59,15 @@ pub struct ProverClientBuilder { asm_path: Option, base_port: Option, unlock_mapped_memory: bool, + asm_out_file: bool, + no_auto_setup: bool, + is_distributed: bool, // Prove-specific fields (only available when Operation = Prove) - save_proofs: bool, - output_dir: Option, - verify_proofs: bool, - minimal_memory: bool, - gpu_params: Option, + gpu_params: ParamsGPU, + + // Indicates if building a verifier only + verifier: bool, // Phantom data to track state _backend: std::marker::PhantomData, @@ -82,7 +77,11 @@ pub struct ProverClientBuilder { impl ProverClientBuilder<(), ()> { #[must_use] pub fn new() -> Self { - Self { aggregation: true, rma: true, ..Default::default() } + Self { aggregation: true, snark_wrapper: false, ..Default::default() } + } + + pub fn new_verifier() -> Self { + Self { verifier: true, ..Default::default() } } /// Configure for Emulator backend @@ -96,6 +95,11 @@ impl ProverClientBuilder<(), ()> { pub fn asm(self) -> ProverClientBuilder { self.into() } + + pub fn build(self) -> Result> { + let builder: ProverClientBuilder = self.emu().into(); + builder.build_emu() + } } // Common methods available for any backend @@ -135,29 +139,15 @@ impl ProverClientBuilder { self } - /// Set RMA. - #[must_use] - pub fn rma(mut self, use_rma: bool) -> Self { - self.rma = use_rma; - self - } - - /// Enables final SNARK generation. #[must_use] - pub fn final_snark(mut self, enable: bool) -> Self { - self.final_snark = enable; + pub fn snark(mut self) -> Self { + self.snark_wrapper = true; self } #[must_use] - pub fn witness_lib_path(mut self, witness_lib: PathBuf) -> Self { - self.witness_lib = Some(witness_lib); - self - } - - #[must_use] - pub fn witness_lib_path_opt(mut self, witness_lib: Option) -> Self { - self.witness_lib = witness_lib; + pub fn with_snark(mut self, snark: bool) -> Self { + self.snark_wrapper = snark; self } @@ -174,8 +164,14 @@ impl ProverClientBuilder { } #[must_use] - pub fn elf_path(mut self, elf_path: PathBuf) -> Self { - self.elf = Some(elf_path); + pub fn proving_key_snark_path(mut self, proving_key_snark: PathBuf) -> Self { + self.proving_key_snark = Some(proving_key_snark); + self + } + + #[must_use] + pub fn proving_key_snark_path_opt(mut self, proving_key_snark: Option) -> Self { + self.proving_key_snark = proving_key_snark; self } @@ -219,53 +215,49 @@ impl ProverClientBuilder { } #[must_use] - pub fn base_port(mut self, base_port: u16) -> Self { - self.base_port = Some(base_port); + pub fn no_auto_setup(mut self, no_auto_setup: bool) -> Self { + self.no_auto_setup = no_auto_setup; self } #[must_use] - pub fn base_port_opt(mut self, base_port: Option) -> Self { - self.base_port = base_port; - self - } - - #[must_use] - pub fn unlock_mapped_memory(mut self, unlock: bool) -> Self { - self.unlock_mapped_memory = unlock; + pub fn is_distributed(mut self, is_distributed: bool) -> Self { + self.is_distributed = is_distributed; self } -} -// Prove-specific methods (available for both backends when operation is Prove) -impl ProverClientBuilder { #[must_use] - pub fn save_proofs(mut self, save: bool) -> Self { - self.save_proofs = save; + pub fn base_port(mut self, base_port: u16) -> Self { + self.base_port = Some(base_port); self } #[must_use] - pub fn output_dir(mut self, output_dir: PathBuf) -> Self { - self.output_dir = Some(output_dir); + pub fn base_port_opt(mut self, base_port: Option) -> Self { + self.base_port = base_port; self } #[must_use] - pub fn verify_proofs(mut self, verify: bool) -> Self { - self.verify_proofs = verify; + pub fn unlock_mapped_memory(mut self, unlock: bool) -> Self { + self.unlock_mapped_memory = unlock; self } #[must_use] - pub fn minimal_memory(mut self, minimal: bool) -> Self { - self.minimal_memory = minimal; + pub fn asm_out_file(mut self, asm_out_file: bool) -> Self { + self.asm_out_file = asm_out_file; self } +} +// Prove-specific methods (available for any operation state - will use defaults if not in Prove mode) +impl ProverClientBuilder { #[must_use] - pub fn gpu(mut self, gpu_params: ParamsGPU) -> Self { - self.gpu_params = Some(gpu_params); + pub fn gpu(mut self, gpu_params: Option) -> Self { + if let Some(gpu_params) = gpu_params { + self.gpu_params = gpu_params; + } self } } @@ -278,12 +270,10 @@ impl ProverClientBuilder { /// ```rust,no_run /// use zisk_sdk::ProverClientBuilder; /// - /// let elf_path = std::path::PathBuf::from("path/to/program.elf"); /// /// let prover = ProverClientBuilder::new() /// .emu() /// .verify_constraints() - /// .elf_path(elf_path) /// .build(); /// ``` pub fn build(self) -> Result> { @@ -291,6 +281,13 @@ impl ProverClientBuilder { } } +impl ProverClientBuilder { + pub fn build(self) -> Result> { + let builder: ProverClientBuilder = self.into(); + builder.build_emu() + } +} + impl ProverClientBuilder { /// Builds an [`EmuProver`] configured for proof generation. /// @@ -298,12 +295,9 @@ impl ProverClientBuilder { /// ```rust,no_run /// use zisk_sdk::ProverClientBuilder; /// - /// let elf_path = std::path::PathBuf::from("path/to/program.elf"); - /// /// let prover = ProverClientBuilder::new() /// .emu() /// .prove() - /// .elf_path(elf_path) /// .build(); /// ``` pub fn build(self) -> Result> { @@ -313,76 +307,41 @@ impl ProverClientBuilder { impl ProverClientBuilder { fn build_emu(self) -> Result> { - let witness_lib = get_witness_computation_lib(self.witness_lib.as_ref()); let proving_key = get_proving_key(self.proving_key.as_ref()); - let elf = self.elf.ok_or_else(|| anyhow::anyhow!("ELF path is required"))?; - - let output_dir = if !self.verify_constraints { - Some(self.output_dir.unwrap_or_else(|| "tmp".into())) - } else { - None - }; + let proving_key_snark = get_proving_key_snark(self.proving_key_snark.as_ref()); if self.print_command_info { - Self::print_emu_command_info( - self.witness, - self.verify_constraints, - &witness_lib, - &proving_key, - &elf, - output_dir.as_ref(), - ); + Self::print_emu_command_info(&proving_key, &proving_key_snark); } - let emu = EmuProver::new( - self.verify_constraints, - self.aggregation, - self.rma, - self.final_snark, - witness_lib, - proving_key, - elf, - self.verbose, - self.shared_tables, - self.gpu_params.filter(|_| !self.verify_constraints).unwrap_or_default(), - self.verify_proofs, - self.minimal_memory, - self.save_proofs, - output_dir.clone(), - self.logging_config, - )?; + let emu = if self.verifier { + EmuProver::new_verifier(proving_key, proving_key_snark)? + } else { + EmuProver::new( + self.verify_constraints || self.witness, + self.aggregation, + self.snark_wrapper, + proving_key, + proving_key_snark, + self.verbose, + self.shared_tables, + self.gpu_params, + self.logging_config, + )? + }; Ok(ZiskProver::::new(emu)) } - fn print_emu_command_info( - witness: bool, - verify_constraints: bool, - witness_lib: &Path, - proving_key: &Path, - elf: &Path, - output_dir: Option<&PathBuf>, - ) { - if witness { - println!("{: >12} StatsConstraints", "Command".bright_green().bold()); - } else if verify_constraints { - println!("{: >12} VerifyConstraints", "Command".bright_green().bold()); - } else { - println!("{: >12} Prove", "Command".bright_green().bold()); - } - - println!("{: >12} {}", "Witness Lib".bright_green().bold(), witness_lib.display()); - println!("{: >12} {}", "Elf".bright_green().bold(), elf.display()); + fn print_emu_command_info(proving_key: &Path, proving_key_snark: &Path) { println!( "{: >12} {}", "Emulator".bright_green().bold(), "Running in emulator mode".bright_yellow() ); - println!("{: >12} {}", "Proving key".bright_green().bold(), proving_key.display()); + println!("{: >12} {}", "Proving Key".bright_green().bold(), proving_key.display()); - if let Some(output_dir) = output_dir { - println!("{: >12} {}", "Output Dir".bright_green().bold(), output_dir.display()); - } + println!("{: >12} {}", "SNARK Key".bright_green().bold(), proving_key_snark.display()); println!(); } @@ -396,12 +355,9 @@ impl ProverClientBuilder { /// ```rust,no_run /// use zisk_sdk::ProverClientBuilder; /// - /// let elf_path = std::path::PathBuf::from("path/to/program.elf"); - /// /// let prover = ProverClientBuilder::new() /// .asm() /// .verify_constraints() - /// .elf_path(elf_path) /// .build(); /// ``` pub fn build(self) -> Result> @@ -413,6 +369,17 @@ impl ProverClientBuilder { } } +impl ProverClientBuilder { + pub fn build(self) -> Result> + where + F: PrimeField64, + GoldilocksQuinticExtension: ExtensionField, + { + let builder: ProverClientBuilder = self.into(); + builder.build_asm() + } +} + impl ProverClientBuilder { /// Builds an [`AsmProver`] configured for proof generation. /// @@ -420,12 +387,9 @@ impl ProverClientBuilder { /// ```rust,no_run /// use zisk_sdk::ProverClientBuilder; /// - /// let elf_path = std::path::PathBuf::from("path/to/program.elf"); - /// /// let prover = ProverClientBuilder::new() /// .asm() /// .prove() - /// .elf_path(elf_path) /// .build(); /// ``` pub fn build(self) -> Result> @@ -443,77 +407,41 @@ impl ProverClientBuilder { F: PrimeField64, GoldilocksQuinticExtension: ExtensionField, { - let witness_lib = get_witness_computation_lib(self.witness_lib.as_ref()); let proving_key = get_proving_key(self.proving_key.as_ref()); - let elf = self.elf.ok_or_else(|| anyhow::anyhow!("ELF path is required"))?; - - let output_dir = if !self.verify_constraints { - Some(self.output_dir.unwrap_or_else(|| "tmp".into())) - } else { - None - }; - - let (asm_mt_filename, asm_rh_filename) = get_asm_paths(&elf)?; + let proving_key_snark = get_proving_key_snark(self.proving_key_snark.as_ref()); if self.print_command_info { - Self::print_asm_command_info( - self.witness, - self.verify_constraints, - &witness_lib, - &proving_key, - &elf, - output_dir.as_ref(), - ); + Self::print_asm_command_info(&proving_key, &proving_key_snark); } - let asm = AsmProver::new( - self.verify_constraints, - self.aggregation, - self.rma, - self.final_snark, - witness_lib, - proving_key, - elf, - self.verbose, - self.shared_tables, - asm_mt_filename, - asm_rh_filename, - self.base_port, - self.unlock_mapped_memory, - self.gpu_params.filter(|_| !self.verify_constraints).unwrap_or_default(), - self.verify_proofs, - self.minimal_memory, - self.save_proofs, - output_dir.clone(), - self.logging_config, - )?; + let asm = if self.verifier { + AsmProver::new_verifier(proving_key, proving_key_snark)? + } else { + AsmProver::new( + self.verify_constraints || self.witness, + self.aggregation, + self.snark_wrapper, + proving_key, + proving_key_snark, + self.verbose, + self.shared_tables, + self.base_port, + self.unlock_mapped_memory, + self.asm_out_file, + self.no_auto_setup, + self.gpu_params, + self.is_distributed, + self.logging_config, + )? + }; Ok(ZiskProver::::new(asm)) } - fn print_asm_command_info( - witness: bool, - verify_constraints: bool, - witness_lib: &Path, - proving_key: &Path, - elf: &Path, - output_dir: Option<&PathBuf>, - ) { - if witness { - println!("{: >12} StatsConstraints", "Command".bright_green().bold()); - } else if verify_constraints { - println!("{: >12} VerifyConstraints", "Command".bright_green().bold()); - } else { - println!("{: >12} Prove", "Command".bright_green().bold()); - } + fn print_asm_command_info(proving_key: &Path, proving_key_snark: &Path) { + println!("{: >12} {}", "Proving Key".bright_green().bold(), proving_key.display()); - println!("{: >12} {}", "Witness Lib".bright_green().bold(), witness_lib.display()); - println!("{: >12} {}", "Elf".bright_green().bold(), elf.display()); - println!("{: >12} {}", "Proving key".bright_green().bold(), proving_key.display()); - - if let Some(output_dir) = output_dir { - println!("{: >12} {}", "Output Dir".bright_green().bold(), output_dir.display()); - } + println!("{: >12} {}", "SNARK Key".bright_green().bold(), proving_key_snark.display()); println!(); } @@ -524,30 +452,27 @@ impl From> for ProverClientBuilder { fn from(builder: ProverClientBuilder<(), ()>) -> Self { Self { // Preserve common fields + verifier: builder.verifier, aggregation: builder.aggregation, witness: builder.witness, - rma: builder.rma, - final_snark: builder.final_snark, - witness_lib: builder.witness_lib, + snark_wrapper: builder.snark_wrapper, proving_key: builder.proving_key, + proving_key_snark: builder.proving_key_snark, verify_constraints: builder.verify_constraints, - elf: builder.elf, verbose: builder.verbose, shared_tables: builder.shared_tables, print_command_info: builder.print_command_info, logging_config: builder.logging_config, + gpu_params: builder.gpu_params, // Reset ASM-specific fields for EMU backend asm_path: None, base_port: None, unlock_mapped_memory: false, + asm_out_file: false, - // Reset prove-specific fields (will be set when choosing operation) - save_proofs: false, - output_dir: None, - verify_proofs: false, - minimal_memory: false, - gpu_params: None, + no_auto_setup: false, + is_distributed: false, _backend: std::marker::PhantomData, _operation: std::marker::PhantomData, @@ -559,30 +484,26 @@ impl From> for ProverClientBuilder { fn from(builder: ProverClientBuilder<(), ()>) -> Self { Self { // Preserve common fields + verifier: builder.verifier, aggregation: builder.aggregation, + snark_wrapper: builder.snark_wrapper, witness: builder.witness, - rma: builder.rma, - final_snark: builder.final_snark, - witness_lib: builder.witness_lib, proving_key: builder.proving_key, + proving_key_snark: builder.proving_key_snark, verify_constraints: builder.verify_constraints, - elf: builder.elf, verbose: builder.verbose, shared_tables: builder.shared_tables, print_command_info: builder.print_command_info, logging_config: builder.logging_config, + gpu_params: builder.gpu_params, // Preserve ASM-specific fields (user may have set defaults) asm_path: builder.asm_path, base_port: builder.base_port, unlock_mapped_memory: builder.unlock_mapped_memory, - - // Reset prove-specific fields (will be set when choosing operation) - save_proofs: false, - output_dir: None, - verify_proofs: false, - minimal_memory: false, - gpu_params: None, + asm_out_file: builder.asm_out_file, + no_auto_setup: builder.no_auto_setup, + is_distributed: builder.is_distributed, _backend: std::marker::PhantomData, _operation: std::marker::PhantomData, @@ -596,30 +517,26 @@ impl From> fn from(builder: ProverClientBuilder) -> Self { Self { // Preserve common fields + verifier: builder.verifier, aggregation: builder.aggregation, + snark_wrapper: builder.snark_wrapper, witness: builder.witness, - rma: builder.rma, - final_snark: builder.final_snark, - witness_lib: builder.witness_lib, proving_key: builder.proving_key, + proving_key_snark: builder.proving_key_snark, verify_constraints: builder.verify_constraints, - elf: builder.elf, verbose: builder.verbose, shared_tables: builder.shared_tables, print_command_info: builder.print_command_info, logging_config: builder.logging_config, + gpu_params: builder.gpu_params, // Preserve backend-specific fields (ASM or EMU) asm_path: builder.asm_path, base_port: builder.base_port, unlock_mapped_memory: builder.unlock_mapped_memory, - - // Initialize prove-specific fields to defaults for verify_constraints mode - save_proofs: false, // Not relevant for constraint verification - output_dir: None, // Not needed for constraint verification - verify_proofs: false, // Not applicable for constraint verification - minimal_memory: false, // Not relevant for constraint verification - gpu_params: None, // Not relevant for constraint verification + asm_out_file: builder.asm_out_file, + no_auto_setup: builder.no_auto_setup, + is_distributed: builder.is_distributed, _backend: std::marker::PhantomData, _operation: std::marker::PhantomData, @@ -631,14 +548,14 @@ impl From> for ProverClientBuilder) -> Self { Self { // Preserve common fields + verifier: builder.verifier, aggregation: builder.aggregation, + snark_wrapper: builder.snark_wrapper, witness: builder.witness, - rma: builder.rma, - final_snark: builder.final_snark, - witness_lib: builder.witness_lib, proving_key: builder.proving_key, + proving_key_snark: builder.proving_key_snark, verify_constraints: false, - elf: builder.elf, + gpu_params: builder.gpu_params, verbose: builder.verbose, shared_tables: builder.shared_tables, print_command_info: builder.print_command_info, @@ -648,13 +565,9 @@ impl From> for ProverClientBuilder ProverClientBuilder { + if PROVER_CLIENT_CREATED.swap(true, Ordering::SeqCst) { + panic!( + "ProverClient::builder() can only be called once! \ + Multiple ProverClient instances are not supported. \ + Reuse the existing client for all operations." + ); + } ProverClientBuilder::new() } } diff --git a/sdk/src/lib.rs b/sdk/src/lib.rs index 54dd45946..df13db768 100644 --- a/sdk/src/lib.rs +++ b/sdk/src/lib.rs @@ -2,27 +2,33 @@ mod builder; mod client; mod prover; mod utils; -mod zisk_lib_loader; +mod verifier; +mod ziskemu; pub use builder::*; pub use client::ProverClient; pub use prover::*; pub use utils::*; -pub use zisk_lib_loader::*; +pub use verifier::*; -pub struct RankInfo { - pub world_rank: i32, - pub local_rank: i32, -} +pub use ziskemu::*; -pub struct Proof { - pub id: Option, - pub proof: Option>, -} +pub use proofman_common::VerboseMode; + +pub use zisk_common::{io::*, ElfBinary, ElfBinaryFromFile}; + +pub use zisk_build::*; #[macro_export] macro_rules! include_elf { - ($arg:tt) => {{ - include_bytes!(env!(concat!("ZISK_ELF_", $arg))) + ($arg:literal) => {{ + const WITH_HINTS: bool = option_env!(concat!("ZISK_ELF_", $arg, "_WITH_HINTS")).is_some(); + + ElfBinary { + elf: include_bytes!(env!(concat!("ZISK_ELF_", $arg))), + name: $arg, + with_hints: WITH_HINTS, + path: Some(env!(concat!("ZISK_ELF_", $arg))), + } }}; } diff --git a/sdk/src/proof.rs b/sdk/src/proof.rs deleted file mode 100644 index 952430cc9..000000000 --- a/sdk/src/proof.rs +++ /dev/null @@ -1,317 +0,0 @@ -/// Strongly-typed proof formats -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ProofFormat { - /// Raw, unprocessed proof data - Raw(RawProof), - /// Compressed proof using compression algorithms - Compressed(CompressedProof), - /// Wrapped proof with additional metadata - Wrapped(WrappedProof), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Proof { - pub id: Option, - pub proof: Option>, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct RawProof(pub Proof); - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CompressedProof { - pub data: Proof, - pub compression_info: CompressionInfo, -} - -/// Information about compression applied to a proof -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CompressionInfo { - pub algorithm: String, - pub level: u32, - pub original_size: usize, - pub compressed_size: usize, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct WrappedProof { - pub data: Proof, - pub metadata: ProofMetadata, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ProofMetadata { - pub created_at: std::time::SystemTime, - pub version: String, - pub additional_data: std::collections::HashMap, -} - - - - - -use anyhow::{Result, Context}; - -// ...existing code... - -// Utility functions for converting between bytes and u64 vectors -fn bytes_to_u64_vec(bytes: &[u8]) -> Vec { - bytes.chunks(8) - .map(|chunk| { - let mut array = [0u8; 8]; - for (i, &byte) in chunk.iter().enumerate() { - if i < 8 { - array[i] = byte; - } - } - u64::from_le_bytes(array) - }) - .collect() -} - -fn u64_vec_to_bytes(data: &[u64]) -> Vec { - data.iter() - .flat_map(|&num| num.to_le_bytes()) - .collect() -} - -impl CompressedProof { - /// Create a compressed proof from raw proof with default compression level (3) - pub fn from_raw(raw: RawProof) -> Result { - Self::from_raw_with_level(raw, 3) - } - - /// Create a compressed proof from raw proof with specified compression level (1-22) - pub fn from_raw_with_level(raw: RawProof, level: i32) -> Result { - // Serialize the proof to bytes - let original_data = bincode::serialize(&raw.0) - .context("Failed to serialize proof for compression")?; - - let original_size = original_data.len(); - - // Compress using zstd - let compressed_data = zstd::encode_all(original_data.as_slice(), level) - .context("Failed to compress proof with zstd")?; - - let compressed_size = compressed_data.len(); - - // Create a new proof with compressed data - let compressed_proof = Proof { - id: raw.0.id.clone(), - proof: Some(bytes_to_u64_vec(&compressed_data)), - }; - - let compression_info = CompressionInfo { - algorithm: "zstd".to_string(), - level: level as u32, - original_size, - compressed_size, - }; - - Ok(CompressedProof { - data: compressed_proof, - compression_info, - }) - } - - /// Get compression ratio (original_size / compressed_size) - pub fn compression_ratio(&self) -> f64 { - if self.compression_info.compressed_size == 0 { - return 1.0; - } - self.compression_info.original_size as f64 / self.compression_info.compressed_size as f64 - } - - /// Get space saved as percentage - pub fn space_saved_percent(&self) -> f64 { - if self.compression_info.original_size == 0 { - return 0.0; - } - let saved = self.compression_info.original_size.saturating_sub(self.compression_info.compressed_size); - (saved as f64 / self.compression_info.original_size as f64) * 100.0 - } -} - -impl RawProof { - /// Decompress from a compressed proof - pub fn from_compressed(compressed: CompressedProof) -> Result { - if compressed.compression_info.algorithm != "zstd" { - return Err(anyhow::anyhow!( - "Unsupported compression algorithm: {}", - compressed.compression_info.algorithm - )); - } - - // Extract compressed bytes from the proof - let compressed_bytes = match &compressed.data.proof { - Some(data) => u64_vec_to_bytes(data), - None => return Err(anyhow::anyhow!("No proof data found")), - }; - - // Decompress using zstd - let decompressed_data = zstd::decode_all(compressed_bytes.as_slice()) - .context("Failed to decompress proof with zstd")?; - - // Deserialize back to proof - let original_proof: Proof = bincode::deserialize(&decompressed_data) - .context("Failed to deserialize decompressed proof")?; - - Ok(RawProof(original_proof)) - } -} - -// From trait implementations for basic conversions -impl From for RawProof { - fn from(proof: Proof) -> Self { - RawProof(proof) - } -} - -impl From for ProofFormat { - fn from(raw: RawProof) -> Self { - ProofFormat::Raw(raw) - } -} - -impl From for ProofFormat { - fn from(compressed: CompressedProof) -> Self { - ProofFormat::Compressed(compressed) - } -} - -impl From for ProofFormat { - fn from(wrapped: WrappedProof) -> Self { - ProofFormat::Wrapped(wrapped) - } -} - -// TryFrom for fallible conversions (compression/decompression) -impl TryFrom for CompressedProof { - type Error = anyhow::Error; - - fn try_from(raw: RawProof) -> Result { - CompressedProof::from_raw(raw) - } -} - -impl TryFrom for RawProof { - type Error = anyhow::Error; - - fn try_from(compressed: CompressedProof) -> Result { - RawProof::from_compressed(compressed) - } -} - -// Additional From implementations for convenience -impl From<&RawProof> for Result { - fn from(raw: &RawProof) -> Self { - CompressedProof::from_raw(raw.clone()) - } -} - -impl From<&CompressedProof> for Result { - fn from(compressed: &CompressedProof) -> Self { - RawProof::from_compressed(compressed.clone()) - } -} - -// ProofFormat convenience methods -impl ProofFormat { - /// Extract the underlying proof data regardless of format - pub fn proof(&self) -> &Proof { - match self { - ProofFormat::Raw(raw) => &raw.0, - ProofFormat::Compressed(compressed) => &compressed.data, - ProofFormat::Wrapped(wrapped) => &wrapped.data, - } - } - - /// Check if this is a raw proof - pub fn is_raw(&self) -> bool { - matches!(self, ProofFormat::Raw(_)) - } - - /// Check if this is a compressed proof - pub fn is_compressed(&self) -> bool { - matches!(self, ProofFormat::Compressed(_)) - } - - /// Check if this is a wrapped proof - pub fn is_wrapped(&self) -> bool { - matches!(self, ProofFormat::Wrapped(_)) - } - - /// Try to compress this proof format - pub fn try_compress(self) -> Result { - match self { - ProofFormat::Raw(raw) => Ok(ProofFormat::Compressed(raw.try_into()?)), - ProofFormat::Compressed(_) => Ok(self), // Already compressed - ProofFormat::Wrapped(wrapped) => { - let raw = RawProof(wrapped.data); - Ok(ProofFormat::Compressed(raw.try_into()?)) - } - } - } - - /// Try to decompress this proof format - pub fn try_decompress(self) -> Result { - match self { - ProofFormat::Raw(_) => Ok(self), // Already raw - ProofFormat::Compressed(compressed) => Ok(ProofFormat::Raw(compressed.try_into()?)), - ProofFormat::Wrapped(_) => Ok(self), // Keep as wrapped - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_compression_with_from() -> Result<()> { - let original_proof = Proof { - id: Some("test_proof".to_string()), - proof: Some(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - }; - - let raw_proof = RawProof(original_proof.clone()); - - // Using From trait - let compressed: CompressedProof = raw_proof.try_into()?; - - // Check compression info - assert_eq!(compressed.compression_info.algorithm, "zstd"); - assert!(compressed.compression_info.original_size > 0); - - // Using From trait to decompress - let decompressed: RawProof = compressed.try_into()?; - - // Should match original - assert_eq!(decompressed.0, original_proof); - - Ok(()) - } - - #[test] - fn test_compression_methods() -> Result<()> { - let proof = Proof { - id: Some("test".to_string()), - proof: Some(vec![42; 100]), // Larger data for better compression - }; - - let raw = RawProof(proof); - - // Test different compression levels - let compressed_default = CompressedProof::from_raw(raw.clone())?; - let compressed_high = CompressedProof::from_raw_with_level(raw.clone(), 9)?; - - // Higher compression should result in smaller size (usually) - assert!(compressed_high.compression_info.level > compressed_default.compression_info.level); - - // Test compression ratio - assert!(compressed_default.compression_ratio() > 1.0); - assert!(compressed_default.space_saved_percent() > 0.0); - - Ok(()) - } -} \ No newline at end of file diff --git a/sdk/src/prover/asm.rs b/sdk/src/prover/asm.rs index 69b141b78..c63600470 100644 --- a/sdk/src/prover/asm.rs +++ b/sdk/src/prover/asm.rs @@ -1,18 +1,28 @@ +use crate::get_asm_paths; use crate::{ - check_paths_exist, create_debug_info, ensure_custom_commits, + check_paths_exist, ensure_custom_commits, prover::{ProverBackend, ProverEngine, ZiskBackend}, - RankInfo, ZiskAggPhaseResult, ZiskExecuteResult, ZiskLibLoader, ZiskPhaseResult, - ZiskProveResult, ZiskVerifyConstraintsResult, + ZiskAggPhaseResult, ZiskExecuteResult, ZiskPhaseResult, ZiskProgramPK, ZiskProgramVK, + ZiskProof, ZiskProofWithPublicValues, ZiskProveResult, ZiskPublics, + ZiskVerifyConstraintsResult, }; +use crate::{ProofMode, ProofOpts}; use asm_runner::{AsmRunnerOptions, AsmServices}; -use proofman::{AggProofs, ProofMan, ProvePhase, ProvePhaseInputs}; -use proofman_common::{initialize_logger, ParamsGPU, ProofOptions}; +use executor::{get_packed_info, init_executor_asm, AsmResources}; +use proofman::{ + AggProofs, AggProofsRegister, ProofMan, ProvePhase, ProvePhaseInputs, SnarkWrapper, WitnessInfo, +}; +use proofman_common::{initialize_logger, ParamsGPU, ProofOptions, RankInfo, RowInfo, VerboseMode}; use proofman_util::{timer_start_info, timer_stop_and_log_info}; -use rom_setup::DEFAULT_CACHE_PATH; -use std::{collections::HashMap, path::PathBuf}; -use tracing::info; +use rom_setup::{generate_assembly, get_output_path, DEFAULT_CACHE_PATH}; +use std::path::PathBuf; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; use zisk_common::io::ZiskStdin; -use zisk_common::ExecutorStats; +use zisk_common::ElfBinaryLike; +use zisk_common::ExecutorStatsHandle; +use zisk_common::ZiskExecutorTime; +use zisk_core::Riscv2zisk; use zisk_distributed_common::LoggingConfig; use anyhow::Result; @@ -32,48 +42,44 @@ impl AsmProver { pub fn new( verify_constraints: bool, aggregation: bool, - rma: bool, - final_snark: bool, - witness_lib: PathBuf, + snark_wrapper: bool, proving_key: PathBuf, - elf: PathBuf, + proving_key_snark: PathBuf, verbose: u8, shared_tables: bool, - asm_mt_filename: String, - asm_rh_filename: String, base_port: Option, unlock_mapped_memory: bool, + asm_out_file: bool, + no_auto_setup: bool, gpu_params: ParamsGPU, - verify_proofs: bool, - minimal_memory: bool, - save_proofs: bool, - output_dir: Option, + is_distributed: bool, logging_config: Option, ) -> Result { let core_prover = AsmCoreProver::new( verify_constraints, aggregation, - rma, - final_snark, - witness_lib, + snark_wrapper, proving_key, - elf, + proving_key_snark, verbose, shared_tables, - asm_mt_filename, - asm_rh_filename, base_port, unlock_mapped_memory, + asm_out_file, + no_auto_setup, gpu_params, - verify_proofs, - minimal_memory, - save_proofs, - output_dir, + is_distributed, logging_config, )?; Ok(Self { core_prover }) } + + pub fn new_verifier(proving_key: PathBuf, proving_key_snark: PathBuf) -> Result { + let core_prover = AsmCoreProver::new_verifier(proving_key, proving_key_snark)?; + + Ok(Self { core_prover }) + } } impl ProverEngine for AsmProver { @@ -85,52 +91,234 @@ impl ProverEngine for AsmProver { self.core_prover.rank_info.local_rank } - fn set_stdin(&self, stdin: ZiskStdin) { - self.core_prover.backend.witness_lib.set_stdin(stdin); + fn set_stdin(&self, stdin: ZiskStdin) -> Result<()> { + self.core_prover.backend.set_stdin(stdin) + } + + fn register_program(&self, pk: &ZiskProgramPK) -> Result<()> { + self.core_prover.backend.register_program(pk) } fn executed_steps(&self) -> u64 { self.core_prover .backend - .witness_lib .execution_result() - .map(|(exec_result, _)| exec_result.executed_steps) + .map(|(exec_result, _)| exec_result.steps) .unwrap_or(0) } - fn execute(&self, stdin: ZiskStdin, output_path: Option) -> Result { - self.core_prover.backend.execute(stdin, output_path) + fn setup(&self, elf: &impl ElfBinaryLike) -> Result<(ZiskProgramPK, ZiskProgramVK)> { + let pctx = self.core_prover.backend.get_pctx()?; + let (rom_bin_path, vk) = ensure_custom_commits(&pctx, elf)?; + + let world_rank = self.core_prover.rank_info.world_rank; + let local_rank = self.core_prover.rank_info.local_rank; + let n_processes = self.core_prover.rank_info.n_processes; + let is_distributed = self.core_prover.asm_info.is_distributed; + let unlock_mapped_memory = self.core_prover.asm_info.unlock_mapped_memory; + let asm_out_file = self.core_prover.asm_info.asm_out_file; + let verbose_mode = self.core_prover.asm_info.verbose; + let rank_info = self.core_prover.rank_info.clone(); + let base_port = Some(AsmServices::port_base_offset( + self.core_prover.asm_info.base_port, + n_processes, + self.core_prover.asm_info.n_setups.load(Ordering::SeqCst), + )); + + let rv2zk = Riscv2zisk::new(elf.elf()); + + let zisk_rom = rv2zk.run().unwrap_or_else(|e| panic!("Application error: {e}")); + let zisk_rom = Arc::new(zisk_rom); + + let default_cache_path = std::env::var("HOME") + .map(PathBuf::from) + .map_err(|e| anyhow::anyhow!("Failed to read HOME environment variable: {e}"))? + .join(DEFAULT_CACHE_PATH); + + let (asm_mt_filename, asm_rh_filename) = get_asm_paths(elf)?; + + let asm_mt_path = default_cache_path.join(asm_mt_filename); + let asm_rh_path = default_cache_path.join(asm_rh_filename); + + if check_paths_exist(&asm_mt_path).is_err() || check_paths_exist(&asm_rh_path).is_err() { + if self.core_prover.asm_info.no_auto_setup { + return Err(anyhow::anyhow!( + "Assembly files not found for ELF {}. Force ROM setup is enabled, but assembly files are still missing. Please ensure that the assembly generation process has been completed successfully.", + elf.name() + )); + } + + if pctx.mpi_ctx.rank == 0 { + tracing::info!( + ">>> ROM SETUP (one time only) - Generating assembly files for ELF: {}", + elf.name() + ); + timer_start_info!(ROM_SETUP); + let output_path = get_output_path(&None)?; + generate_assembly( + elf.elf(), + elf.name(), + &output_path, + elf.with_hints(), + self.core_prover.asm_info.verbose != VerboseMode::Info, + )?; + timer_stop_and_log_info!(ROM_SETUP); + tracing::info!("<<< ROM SETUP complete - Assembly files cached for future use"); + } + pctx.mpi_ctx.barrier(); + } + + pctx.mpi_ctx.barrier(); + + timer_start_info!(STARTING_ASM_MICROSERVICES); + let asm_services = AsmServices::new(world_rank, local_rank, base_port); + + let asm_runner_options = AsmRunnerOptions::new() + .with_base_port(base_port) + .with_world_rank(world_rank) + .with_local_rank(local_rank) + .with_verbose(verbose_mode == VerboseMode::Debug) + .with_metrics(verbose_mode == VerboseMode::Debug) + .with_unlock_mapped_memory(unlock_mapped_memory) + .with_asm_out_file(asm_out_file); + + asm_services.start_asm_services(&asm_mt_path, asm_runner_options)?; + timer_stop_and_log_info!(STARTING_ASM_MICROSERVICES); + + let mpi_broadcast_fn = (is_distributed && n_processes > 1 && elf.with_hints()).then(|| { + let pctx = pctx.clone(); + Arc::new(move |data: &mut Vec| { + pctx.mpi_ctx.broadcast(data); + Ok(()) + }) as Arc) -> Result<()> + Send + Sync> + }); + + let init_rom = !is_distributed && world_rank == 0; + + let asm_resources = AsmResources::new( + local_rank, + base_port, + unlock_mapped_memory, + verbose_mode, + elf.with_hints(), + mpi_broadcast_fn, + init_rom, + )?; + + self.core_prover.asm_info.n_setups.fetch_add(1, Ordering::SeqCst); + + Ok(( + ZiskProgramPK { + zisk_rom, + elf_bin_path: rom_bin_path, + asm_services: Some(asm_services), + asm_resources: Some(asm_resources), + rank_info, + use_hints: elf.with_hints(), + }, + ZiskProgramVK { vk }, + )) + } + + fn get_execution_info(&self) -> Result<(WitnessInfo, ZiskExecutorTime)> { + self.core_prover.backend.get_execution_info() + } + + fn execute( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + output_path: Option, + ) -> Result { + self.core_prover.backend.execute(pk, stdin, output_path) } fn stats( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, + minimal_memory: bool, mpi_node: Option, - ) -> Result<(i32, i32, Option)> { - let debug_info = - create_debug_info(debug_info, self.core_prover.backend.proving_key.clone())?; + ) -> Result<(i32, i32, Option)> { + self.core_prover.backend.stats(pk, stdin, debug_info, minimal_memory, mpi_node) + } + + fn get_instance_trace( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + self.core_prover.backend.get_instance_trace(instance_id, first_row, num_rows, offset) + } - self.core_prover.backend.stats(stdin, debug_info, mpi_node) + fn get_instance_air_values(&self, instance_id: usize) -> Result> { + self.core_prover.backend.get_instance_air_values(instance_id) + } + + fn get_instance_fixed( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + self.core_prover.backend.get_instance_fixed(instance_id, first_row, num_rows, offset) } fn verify_constraints_debug( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, ) -> Result { - let debug_info = - create_debug_info(debug_info, self.core_prover.backend.proving_key.clone())?; + self.core_prover.backend.verify_constraints_debug(pk, stdin, debug_info) + } - self.core_prover.backend.verify_constraints_debug(stdin, debug_info) + fn verify_constraints( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + ) -> Result { + self.core_prover.backend.verify_constraints(pk, stdin) } - fn verify_constraints(&self, stdin: ZiskStdin) -> Result { - self.core_prover.backend.verify_constraints(stdin) + fn vk(&self, elf: &impl ElfBinaryLike) -> Result { + self.core_prover.backend.vk(elf) } - fn prove(&self, stdin: ZiskStdin) -> Result { - self.core_prover.backend.prove(stdin) + fn verify(&self, proof: &ZiskProof, publics: &ZiskPublics, vk: &ZiskProgramVK) -> Result<()> { + self.core_prover.backend.verify(proof, publics, vk) + } + + fn prove( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + mode: ProofMode, + proof_options: ProofOpts, + ) -> Result { + self.core_prover.backend.prove(pk, stdin, mode, proof_options) + } + + fn prove_snark( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result { + self.core_prover.backend.prove_snark(proof, publics, vk) + } + + fn compress( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result { + self.core_prover.backend.compress(proof, publics, vk) } fn prove_phase( @@ -142,6 +330,19 @@ impl ProverEngine for AsmProver { self.core_prover.backend.prove_phase(phase_inputs, options, phase) } + fn set_partition( + &self, + total_compute_units: usize, + allocation: Vec, + rank_id: usize, + ) -> Result<()> { + self.core_prover.backend.set_partition(total_compute_units, allocation, rank_id) + } + + fn register_aggregated_proofs(&self, agg_proofs: Vec) -> Result<()> { + self.core_prover.backend.register_aggregated_proofs(agg_proofs) + } + fn aggregate_proofs( &self, agg_proofs: Vec, @@ -152,29 +353,34 @@ impl ProverEngine for AsmProver { self.core_prover.backend.aggregate_proofs(agg_proofs, last_proof, final_proof, options) } - fn mpi_broadcast(&self, data: &mut Vec) { - self.core_prover.backend.mpi_broadcast(data); + fn mpi_broadcast(&self, data: &mut Vec) -> Result<()> { + self.core_prover.backend.mpi_broadcast(data) } + + fn prepare_send_proof( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result> { + self.core_prover.backend.prepare_send_proof(proof, publics, program_vk) + } +} + +pub struct AsmInfo { + pub is_distributed: bool, + pub base_port: Option, + pub unlock_mapped_memory: bool, + pub asm_out_file: bool, + pub verbose: VerboseMode, + pub no_auto_setup: bool, + pub n_setups: AtomicU64, } pub struct AsmCoreProver { backend: ProverBackend, - asm_services: AsmServices, rank_info: RankInfo, -} - -impl Drop for AsmCoreProver { - fn drop(&mut self) { - // Shut down ASM microservices - info!(">>> [{}] Stopping ASM microservices.", self.rank_info.world_rank); - if let Err(e) = self.asm_services.stop_asm_services() { - tracing::error!( - ">>> [{}] Failed to stop ASM microservices: {}", - self.rank_info.world_rank, - e - ); - } - } + asm_info: AsmInfo, } impl AsmCoreProver { @@ -182,105 +388,99 @@ impl AsmCoreProver { pub fn new( verify_constraints: bool, aggregation: bool, - rma: bool, - final_snark: bool, - witness_lib: PathBuf, + use_snark_wrapper: bool, proving_key: PathBuf, - elf: PathBuf, + proving_key_snark: PathBuf, verbose: u8, shared_tables: bool, - asm_mt_filename: String, - asm_rh_filename: String, base_port: Option, unlock_mapped_memory: bool, + asm_out_file: bool, + no_auto_setup: bool, gpu_params: ParamsGPU, - verify_proofs: bool, - minimal_memory: bool, - save_proofs: bool, - output_dir: Option, + is_distributed: bool, logging_config: Option, ) -> Result { - let rom_bin_path = ensure_custom_commits(&proving_key, &elf)?; - let custom_commits_map = HashMap::from([("rom".to_string(), rom_bin_path)]); - - let default_cache_path = std::env::var("HOME") - .map(PathBuf::from) - .map_err(|e| anyhow::anyhow!("Failed to read HOME environment variable: {e}"))? - .join(DEFAULT_CACHE_PATH); - - let asm_mt_path = default_cache_path.join(asm_mt_filename); - let asm_rh_path = default_cache_path.join(asm_rh_filename); - - check_paths_exist(&witness_lib)?; check_paths_exist(&proving_key)?; - check_paths_exist(&elf)?; - check_paths_exist(&asm_mt_path)?; - check_paths_exist(&asm_rh_path)?; - - let (library, mut witness_lib) = ZiskLibLoader::load_asm( - witness_lib, - elf, - verbose.into(), - shared_tables, - asm_mt_path.clone(), - asm_rh_path, - base_port, - unlock_mapped_memory, - )?; - let proofman = ProofMan::new( proving_key.clone(), - custom_commits_map, verify_constraints, aggregation, - final_snark, gpu_params, verbose.into(), - witness_lib.get_packed_info(), + get_packed_info(), ) .map_err(|e| anyhow::anyhow!(e.to_string()))?; - let world_rank = proofman.get_world_rank(); - let local_rank = proofman.get_local_rank(); + let rank_info = proofman.get_rank_info(); if logging_config.is_some() { - zisk_distributed_common::init(logging_config.as_ref(), Some(world_rank))?; + zisk_distributed_common::init(logging_config.as_ref(), Some(&rank_info))?; } else { - initialize_logger(verbose.into(), Some(world_rank)); + initialize_logger(verbose.into(), Some(&rank_info)); } - timer_start_info!(STARTING_ASM_MICROSERVICES); - let asm_services = AsmServices::new(world_rank, local_rank, base_port); - - let asm_runner_options = AsmRunnerOptions::new() - .with_verbose(verbose > 0) - .with_base_port(base_port) - .with_world_rank(world_rank) - .with_local_rank(local_rank) - .with_unlock_mapped_memory(unlock_mapped_memory); - - asm_services.start_asm_services(&asm_mt_path, asm_runner_options)?; - timer_stop_and_log_info!(STARTING_ASM_MICROSERVICES); + proofman.set_barrier(); - proofman.register_witness(&mut *witness_lib, library)?; + let mut snark_wrapper = None; + if use_snark_wrapper { + check_paths_exist(&proving_key_snark)?; + let (aux_trace, d_buffers, reload_fixed_pols_gpu) = proofman.get_preallocated_buffers(); + snark_wrapper = Some(SnarkWrapper::new_with_preallocated_buffers( + &proving_key_snark, + verbose.into(), + Some(aux_trace), + Some(d_buffers), + Some(reload_fixed_pols_gpu), + )?); + } - proofman.set_barrier(); + let executor = init_executor_asm( + verbose.into(), + shared_tables, + unlock_mapped_memory, + &proofman.get_wcm(), + )?; - let core = ProverBackend { - verify_constraints, - aggregation, - rma, - final_snark, - witness_lib, - proving_key: proving_key.clone(), - verify_proofs, - minimal_memory, - save_proofs, - output_dir, + let core = ProverBackend::new( proofman, - rank_info: RankInfo { world_rank, local_rank }, - }; + snark_wrapper, + executor, + proving_key, + Some(proving_key_snark), + ); + + Ok(Self { + backend: core, + rank_info, + asm_info: AsmInfo { + is_distributed, + base_port, + unlock_mapped_memory, + asm_out_file, + verbose: verbose.into(), + no_auto_setup, + n_setups: AtomicU64::new(0), + }, + }) + } - Ok(Self { backend: core, asm_services, rank_info: RankInfo { world_rank, local_rank } }) + #[allow(clippy::too_many_arguments)] + pub fn new_verifier(proving_key: PathBuf, proving_key_snark: PathBuf) -> Result { + let core_prover = ProverBackend::new_verifier(proving_key, Some(proving_key_snark)); + + Ok(Self { + backend: core_prover, + rank_info: RankInfo { world_rank: 0, local_rank: 0, n_processes: 1 }, + asm_info: AsmInfo { + is_distributed: false, + base_port: None, + unlock_mapped_memory: false, + asm_out_file: false, + verbose: VerboseMode::Info, + no_auto_setup: false, + n_setups: AtomicU64::new(0), + }, + }) } } diff --git a/sdk/src/prover/backend.rs b/sdk/src/prover/backend.rs index abb82c33e..e5ad4dd7e 100644 --- a/sdk/src/prover/backend.rs +++ b/sdk/src/prover/backend.rs @@ -1,224 +1,514 @@ +use crate::create_debug_info; +use crate::ZiskProofWithPublicValues; +use crate::ZiskPublics; use crate::{ - Proof, RankInfo, ZiskAggPhaseResult, ZiskExecuteResult, ZiskPhaseResult, ZiskProveResult, - ZiskVerifyConstraintsResult, + get_program_vk_with_proving_key, verify_zisk_proof_with_proving_key, + verify_zisk_snark_proof_with_proving_key, ZISK_PUBLICS, +}; +use crate::{ProofMode, ProofOpts}; +use crate::{ + ZiskAggPhaseResult, ZiskExecuteResult, ZiskPhaseResult, ZiskProgramPK, ZiskProgramVK, + ZiskProof, ZiskProveResult, ZiskVerifyConstraintsResult, }; use anyhow::Result; -use bytemuck::cast_slice; use colored::Colorize; +use executor::ZiskExecutor; use fields::Goldilocks; -use proofman::{AggProofs, ProofInfo, ProofMan, ProvePhase, ProvePhaseInputs, ProvePhaseResult}; -use proofman_common::{DebugInfo, ProofOptions}; -use std::{fs::File, io::Write, path::PathBuf}; -use zisk_common::{io::ZiskStdin, ExecutorStats, ProofLog, ZiskExecutionResult, ZiskLib}; +use proofman::{ + get_vadcop_final_proof_vkey, AggProofs, AggProofsRegister, ProofMan, ProvePhase, + ProvePhaseInputs, ProvePhaseResult, SnarkProtocol, SnarkWrapper, WitnessInfo, +}; +use proofman_common::{ProofCtx, ProofOptions, RowInfo}; +use proofman_util::VadcopFinalProof; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use zisk_common::stats_mark; +use zisk_common::ZiskExecutorTime; +use zisk_common::{io::ZiskStdin, ElfBinaryLike, ExecutorStatsHandle, ZiskExecutorSummary}; pub(crate) struct ProverBackend { - pub verify_constraints: bool, - pub aggregation: bool, - pub rma: bool, - pub final_snark: bool, - pub witness_lib: Box>, - pub proving_key: PathBuf, - pub verify_proofs: bool, - pub minimal_memory: bool, - pub save_proofs: bool, - pub output_dir: Option, - pub proofman: ProofMan, - pub rank_info: RankInfo, + proofman: Option>, + snark_wrapper: Option>, + executor: Option>>, + proving_key_path: PathBuf, + proving_key_snark_path: Option, } impl ProverBackend { + pub fn new( + proofman: ProofMan, + snark_wrapper: Option>, + executor: Arc>, + proving_key_path: PathBuf, + proving_key_snark_path: Option, + ) -> Self { + Self { + proofman: Some(proofman), + snark_wrapper, + executor: Some(executor), + proving_key_path, + proving_key_snark_path, + } + } + + pub fn new_verifier( + proving_key_path: PathBuf, + proving_key_snark_path: Option, + ) -> Self { + Self { + proofman: None, + snark_wrapper: None, + executor: None, + proving_key_path, + proving_key_snark_path, + } + } + + pub fn get_pctx(&self) -> Result>> { + let proofman = self.proofman.as_ref().ok_or_else(|| { + anyhow::anyhow!("Proofman is not initialized. Please initialize it before use.") + })?; + Ok(proofman.get_wcm().get_pctx()) + } + + pub fn register_program(&self, program_pk: &ZiskProgramPK) -> Result<()> { + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; + + let proofman = self.proofman.as_ref().ok_or_else(|| { + anyhow::anyhow!("Proofman is not initialized. Please initialize it before use.") + })?; + + if let Some(asm_resources) = &program_pk.asm_resources { + executor.set_asm_resources(asm_resources.clone()); + } + + executor.set_rom(program_pk.zisk_rom.clone(), program_pk.use_hints()); + + let custom_commits_map = + HashMap::from([("rom".to_string(), program_pk.elf_bin_path.clone())]); + proofman + .register_custom_commits(custom_commits_map) + .map_err(|e| anyhow::anyhow!(e.to_string())) + } + + pub fn set_stdin(&self, stdin: ZiskStdin) -> Result<()> { + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; + executor.set_stdin(stdin); + Ok(()) + } + + pub fn execution_result(&self) -> Result<(ZiskExecutorSummary, ExecutorStatsHandle)> { + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; + + let (result, stats) = executor.get_execution_result(); + + Ok((result, stats)) + } + pub(crate) fn execute( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, output_path: Option, ) -> Result { - self.witness_lib.set_stdin(stdin); + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot execute in verifier mode"))?; + + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; + + self.register_program(pk)?; + + executor.set_stdin(stdin); let start = std::time::Instant::now(); - self.proofman + let planning_info = proofman .execute_from_lib(output_path) .map_err(|e| anyhow::anyhow!("Error generating execution: {}", e))?; let elapsed = start.elapsed(); - let (result, _) = self.witness_lib.execution_result().ok_or_else(|| { - anyhow::anyhow!("Failed to get execution result from emulator prover") - })?; + let (result, _) = executor.get_execution_result(); - Ok(ZiskExecuteResult { execution: result, duration: elapsed }) + let publics = proofman.get_publics(); + + Ok(ZiskExecuteResult::new(elapsed, result, planning_info, &publics)) } pub(crate) fn stats( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, - debug_info: DebugInfo, + debug_info: Option>, + minimal_memory: bool, _mpi_node: Option, - ) -> Result<(i32, i32, Option)> { - self.witness_lib.set_stdin(stdin); + ) -> Result<(i32, i32, Option)> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot compute stats in verifier mode"))?; - let world_rank = self.proofman.get_world_rank(); - let local_rank = self.proofman.get_local_rank(); - let n_processes = self.proofman.get_n_processes(); + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; + + let debug_info = create_debug_info(debug_info, self.proving_key_path.clone())?; + + self.register_program(pk)?; + + executor.set_stdin(stdin); + + let rank_info = proofman.get_rank_info(); let mut is_active = true; if let Some(mpi_node) = _mpi_node { - if local_rank != mpi_node as i32 { + if rank_info.local_rank != mpi_node as i32 { is_active = false; } } - self.proofman.split_active_processes(is_active); + proofman.split_active_processes(is_active); if !is_active { println!( "{}: {}", - format!("Rank {local_rank}").bright_yellow().bold(), + format!("Rank {}", rank_info.local_rank).bright_yellow().bold(), "Inactive rank, skipping computation.".bright_yellow() ); - return Ok((world_rank, n_processes, None)); + return Ok((rank_info.world_rank, rank_info.n_processes, None)); } - self.proofman + proofman .compute_witness_from_lib( &debug_info, - ProofOptions::new( - false, - false, - false, - false, - false, - self.minimal_memory, - false, - PathBuf::new(), - ), + ProofOptions::new(false, false, false, false, false, minimal_memory, false, None), ) .map_err(|e| anyhow::anyhow!("Error generating execution: {}", e))?; - let (_, stats): (ZiskExecutionResult, ExecutorStats) = - self.witness_lib.execution_result().ok_or_else(|| { - anyhow::anyhow!("Failed to get execution result from emulator prover") - })?; + let (_, stats): (ZiskExecutorSummary, ExecutorStatsHandle) = + executor.get_execution_result(); + + Ok((rank_info.world_rank, rank_info.n_processes, Some(stats))) + } + + pub(crate) fn get_instance_trace( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot get instance trace in verifier mode"))?; + + proofman + .get_instance_trace(instance_id, first_row, num_rows, offset) + .map_err(|e| anyhow::anyhow!("Error getting instance trace: {}", e)) + } + + pub(crate) fn get_instance_air_values(&self, instance_id: usize) -> Result> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot get instance AIR values in verifier mode"))?; + + proofman + .get_instance_air_values(instance_id) + .map_err(|e| anyhow::anyhow!("Error getting instance AIR values: {}", e)) + } + + pub(crate) fn get_instance_fixed( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot get instance fixed in verifier mode"))?; - Ok((world_rank, n_processes, Some(stats))) + proofman + .get_instance_fixed(instance_id, first_row, num_rows, offset) + .map_err(|e| anyhow::anyhow!("Error getting instance fixed: {}", e)) } pub(crate) fn verify_constraints_debug( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, - debug_info: DebugInfo, + debug_info: Option>, ) -> Result { - if !self.verify_constraints { - return Err(anyhow::anyhow!("Constraint verification is disabled for this prover.")); - } + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot verify constraints in verifier mode"))?; + + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; let start = std::time::Instant::now(); - self.witness_lib.set_stdin(stdin); + let debug_info = create_debug_info(debug_info, self.proving_key_path.clone())?; + + self.register_program(pk)?; - self.proofman + executor.set_stdin(stdin); + + proofman .verify_proof_constraints_from_lib(&debug_info, false) .map_err(|e| anyhow::anyhow!("Error generating proof: {}", e))?; let elapsed = start.elapsed(); - let (result, stats) = self.witness_lib.execution_result().ok_or_else(|| { - anyhow::anyhow!("Failed to get execution result from emulator prover") - })?; + let (result, stats) = executor.get_execution_result(); + + stats_mark!(stats, 0, "END", 0); - // Store the stats in stats.json #[cfg(feature = "stats")] - { - let stats_id = _stats.lock().unwrap().get_id(); - _stats.lock().unwrap().add_stat(0, stats_id, "END", 0, ExecutorStatsEvent::Mark); - _stats.lock().unwrap().store_stats(); - } + stats.store_stats(); + + let publics = proofman.get_publics(); - Ok(ZiskVerifyConstraintsResult { execution: result, duration: elapsed, stats }) + Ok(ZiskVerifyConstraintsResult::new(result, elapsed, stats, &publics)) } pub(crate) fn verify_constraints( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, ) -> Result { - self.verify_constraints_debug(stdin, DebugInfo::default()) + self.verify_constraints_debug(pk, stdin, None) } - pub(crate) fn prove(&self, stdin: ZiskStdin) -> Result { - if self.verify_constraints { + pub(crate) fn vk(&self, elf: &impl ElfBinaryLike) -> Result { + get_program_vk_with_proving_key(elf, self.proving_key_path.clone()) + } + + pub(crate) fn prove( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + mode: ProofMode, + proof_options: ProofOpts, + ) -> Result { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot prove in verifier mode"))?; + + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; + + self.register_program(pk)?; + + if mode == ProofMode::Snark && self.snark_wrapper.is_none() { return Err(anyhow::anyhow!( - "Prover initialized with constraint verification enabled. Use `prove` instead." + "Snark wrapper is not initialized. Cannot generate snark proof." )); } let start = std::time::Instant::now(); - self.witness_lib.set_stdin(stdin); + executor.set_stdin(stdin); - self.proofman.set_barrier(); - let proof = self - .proofman + let compressed = matches!(mode, ProofMode::VadcopFinalCompressed); + + proofman.set_partition(1, vec![0], 0)?; + + proofman.set_barrier(); + let proof = proofman .generate_proof_from_lib( - ProvePhaseInputs::Full(ProofInfo::new(None, 1, vec![0], 0)), + ProvePhaseInputs::Full(), ProofOptions::new( - self.verify_constraints, - self.aggregation, - self.rma, - self.final_snark, - self.verify_proofs, - self.minimal_memory, - self.save_proofs, - self.output_dir.clone().expect("output_dir must be set, unreachable"), + false, + proof_options.aggregation, + proof_options.rma, + compressed, + proof_options.verify_proofs, + proof_options.minimal_memory, + proof_options.save_proofs, + proof_options.output_dir_path.clone(), ), ProvePhase::Full, ) .map_err(|e| anyhow::anyhow!("Error generating proof: {}", e))?; - let elapsed = start.elapsed(); - let (proof_id, proof) = match proof { ProvePhaseResult::Full(proof_id, proof) => (proof_id, proof), _ => (None, None), }; - let (execution_result, stats) = self.witness_lib.execution_result().ok_or_else(|| { - anyhow::anyhow!("Failed to get execution result from emulator prover") - })?; + let (execution_result, stats) = executor.get_execution_result(); - let proof = Proof { id: proof_id, proof }; + // Store the stats in stats.json + stats_mark!(stats, 0, "END", 0); - if let Some(proof_id) = proof.id.clone() { - let output_dir = self.output_dir.as_ref().unwrap(); + #[cfg(feature = "stats")] + stats.store_stats(); + + proofman.set_barrier(); + + match (mode, proof) { + (ProofMode::Snark, Some(vadcop_proof)) => { + let snark_proof = self.snark_wrapper.as_ref().unwrap().generate_final_snark_proof( + &vadcop_proof, + proof_options.output_dir_path.clone(), + )?; + + let publics = ZiskPublics::new(&vadcop_proof.public_values); + let program_vk = ZiskProgramVK::new_from_publics(&vadcop_proof.public_values); + if snark_proof.protocol_id == SnarkProtocol::Plonk.protocol_id() { + Ok(ZiskProveResult::new( + execution_result, + start.elapsed(), + stats, + proof_id, + ZiskProofWithPublicValues { + proof: ZiskProof::Plonk(snark_proof.proof_bytes), + publics, + program_vk, + }, + )) + } else if snark_proof.protocol_id == SnarkProtocol::Fflonk.protocol_id() { + Ok(ZiskProveResult::new( + execution_result, + start.elapsed(), + stats, + proof_id, + ZiskProofWithPublicValues { + proof: ZiskProof::Fflonk(snark_proof.proof_bytes), + publics, + program_vk, + }, + )) + } else { + Err(anyhow::anyhow!( + "Unsupported snark protocol id: {}", + snark_proof.protocol_id + )) + } + } + (_, Some(p)) => { + let proof = if compressed { + ZiskProof::VadcopFinalCompressed(p.proof) + } else { + ZiskProof::VadcopFinal(p.proof) + }; + Ok(ZiskProveResult::new( + execution_result, + start.elapsed(), + stats, + proof_id, + ZiskProofWithPublicValues { + proof, + publics: ZiskPublics::new(&p.public_values), + program_vk: ZiskProgramVK::new_from_publics(&p.public_values), + }, + )) + } + (_, None) => Ok(ZiskProveResult::new_null(execution_result, start.elapsed(), stats)), + } + } - if self.rank_info.local_rank == 0 && !output_dir.exists() { - std::fs::create_dir_all(output_dir)?; + pub(crate) fn compress( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot compress in verifier mode"))?; + + let proof_bytes = match proof { + ZiskProof::VadcopFinal(bytes) => bytes.clone(), + _ => { + return Err(anyhow::anyhow!( + "Cannot generate SNARK proof. Only VadcopFinal proofs can be converted to SNARK proofs.", + )); } + }; - let logs = - ProofLog::new(execution_result.executed_steps, proof_id, elapsed.as_secs_f64()); - let log_path = output_dir.join("result.json"); - ProofLog::write_json_log(&log_path, &logs) - .map_err(|e| anyhow::anyhow!("Error generating log: {}", e))?; - - // Save the uncompressed vadcop final proof - let output_file_path = output_dir.join("vadcop_final_proof.bin"); - let vadcop_proof = proof.proof.clone().unwrap(); - let mut file = File::create(output_file_path)?; - file.write_all(cast_slice(&vadcop_proof))?; - } + let mut pubs = program_vk.vk.clone(); + pubs.extend(publics.public_bytes()); + let vadcop_final_proof = VadcopFinalProof::new(proof_bytes, pubs, false); - // Store the stats in stats.json - #[cfg(feature = "stats")] - { - let stats_id = _stats.lock().unwrap().get_id(); - _stats.lock().unwrap().add_stat(0, stats_id, "END", 0, ExecutorStatsEvent::Mark); - _stats.lock().unwrap().store_stats(); + let compressed_proof = proofman + .generate_vadcop_final_proof_compressed(&vadcop_final_proof, None, false) + .map_err(|e| anyhow::anyhow!("Error generating compressed proof: {}", e))?; + + Ok(ZiskProofWithPublicValues { + proof: ZiskProof::VadcopFinalCompressed(compressed_proof.proof), + publics: ZiskPublics::new(&compressed_proof.public_values), + program_vk: ZiskProgramVK::new_from_publics(&compressed_proof.public_values), + }) + } + + pub(crate) fn prove_snark( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result { + if self.snark_wrapper.is_none() { + return Err(anyhow::anyhow!( + "Snark wrapper is not initialized. Cannot generate snark proof." + )); } - self.proofman.set_barrier(); + let proof_bytes = match proof { + ZiskProof::VadcopFinal(bytes) => bytes.clone(), + _ => { + return Err(anyhow::anyhow!( + "Cannot generate SNARK proof. Only VadcopFinal proofs can be converted to SNARK proofs.", + )); + } + }; - Ok(ZiskProveResult { execution: execution_result, duration: elapsed, stats, proof }) + let mut pubs = program_vk.vk.clone(); + pubs.extend(publics.public_bytes()); + let vadcop_final_proof = VadcopFinalProof::new(proof_bytes, pubs, false); + + let snark_proof = self + .snark_wrapper + .as_ref() + .unwrap() + .generate_final_snark_proof(&vadcop_final_proof, None)?; + + if snark_proof.protocol_id == SnarkProtocol::Plonk.protocol_id() { + Ok(ZiskProofWithPublicValues { + proof: ZiskProof::Plonk(snark_proof.proof_bytes), + publics: publics.clone(), + program_vk: program_vk.clone(), + }) + } else if snark_proof.protocol_id == SnarkProtocol::Fflonk.protocol_id() { + Ok(ZiskProofWithPublicValues { + proof: ZiskProof::Fflonk(snark_proof.proof_bytes), + publics: publics.clone(), + program_vk: program_vk.clone(), + }) + } else { + Err(anyhow::anyhow!("Unsupported snark protocol id: {}", snark_proof.protocol_id)) + } } pub(crate) fn prove_phase( @@ -227,11 +517,61 @@ impl ProverBackend { options: ProofOptions, phase: ProvePhase, ) -> Result { - self.proofman + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot prove in verifier mode"))?; + + proofman .generate_proof_from_lib(phase_inputs, options, phase.clone()) .map_err(|e| anyhow::anyhow!("Error generating proof in phase {:?}: {}", phase, e)) } + pub(crate) fn set_partition( + &self, + total_compute_units: usize, + allocation: Vec, + rank_id: usize, + ) -> Result<()> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot set partition in verifier mode"))?; + + Ok(proofman.set_partition(total_compute_units, allocation, rank_id)?) + } + + pub(crate) fn get_execution_info(&self) -> Result<(WitnessInfo, ZiskExecutorTime)> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot get execution info in verifier mode"))?; + + let witness_info = proofman.get_witness_info(); + + let executor = self.executor.as_ref().ok_or_else(|| { + anyhow::anyhow!("Executor is not initialized. Please initialize it before use.") + })?; + + let (execution_result, _) = executor.get_execution_result(); + + Ok((witness_info, execution_result.executor_time)) + } + + pub(crate) fn register_aggregated_proofs( + &self, + agg_proofs: Vec, + ) -> Result<()> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot aggregate proofs in verifier mode"))?; + + proofman + .register_aggregated_proofs(agg_proofs) + .map_err(|e| anyhow::anyhow!("Error registering aggregate proof: {}", e)) + } + pub(crate) fn aggregate_proofs( &self, agg_proofs: Vec, @@ -239,15 +579,82 @@ impl ProverBackend { final_proof: bool, options: &ProofOptions, ) -> Result> { - let result = self + let proofman = self .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot aggregate proofs in verifier mode"))?; + + let result = proofman .receive_aggregated_proofs(agg_proofs, last_proof, final_proof, options) .map_err(|e| anyhow::anyhow!("Error aggregating proofs: {}", e))?; Ok(result.map(|agg| ZiskAggPhaseResult { agg_proofs: agg })) } - pub(crate) fn mpi_broadcast(&self, data: &mut Vec) { - self.proofman.mpi_broadcast(data); + pub(crate) fn mpi_broadcast(&self, data: &mut Vec) -> Result<()> { + let proofman = self + .proofman + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Cannot broadcast in verifier mode"))?; + + proofman.mpi_broadcast(data); + Ok(()) + } + + pub(crate) fn prepare_send_proof( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result> { + match &proof { + ZiskProof::Null() | ZiskProof::Plonk(_) | ZiskProof::Fflonk(_) => Err(anyhow::anyhow!("Proof not suitable for preparing to send. Only VadcopFinal and VadcopFinalCompressed proofs can be prepared for sending.")), + ZiskProof::VadcopFinal(proof_bytes) | ZiskProof::VadcopFinalCompressed(proof_bytes) => { + let compressed = matches!(proof, ZiskProof::VadcopFinalCompressed(_)); + + let vk = get_vadcop_final_proof_vkey(&self.proving_key_path, compressed)?; + + let mut pubs = program_vk.vk.clone(); + pubs.extend(publics.public_bytes()); + + // Format: [compressed(8)][pubs_len(8)][pubs][proof_bytes][zisk_vk] + let mut proof = Vec::new(); + proof.extend_from_slice(&(compressed as u64).to_le_bytes()); + proof.extend_from_slice(&(ZISK_PUBLICS + 4).to_le_bytes()); + proof.extend_from_slice(&pubs); + proof.extend_from_slice(proof_bytes); + proof.extend_from_slice(&vk); + + Ok(proof) + } + } + } + + pub(crate) fn verify( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result<()> { + match &proof { + ZiskProof::Null() => Err(anyhow::anyhow!("No proof found to verify.")), + ZiskProof::Plonk(_) | ZiskProof::Fflonk(_) => verify_zisk_snark_proof_with_proving_key( + proof, + publics, + program_vk, + self.proving_key_path.clone(), + self.proving_key_snark_path + .clone() + .expect("Proving key snark path is required for snark proofs"), + ), + ZiskProof::VadcopFinal(_) | ZiskProof::VadcopFinalCompressed(_) => { + verify_zisk_proof_with_proving_key( + proof, + publics, + program_vk, + self.proving_key_path.clone(), + ) + } + } } } diff --git a/sdk/src/prover/emu.rs b/sdk/src/prover/emu.rs index e49e69d2f..07679ca82 100644 --- a/sdk/src/prover/emu.rs +++ b/sdk/src/prover/emu.rs @@ -1,14 +1,23 @@ use crate::{ - check_paths_exist, create_debug_info, get_custom_commits_map, + check_paths_exist, prover::{ProverBackend, ProverEngine, ZiskBackend}, - RankInfo, ZiskAggPhaseResult, ZiskExecuteResult, ZiskLibLoader, ZiskPhaseResult, - ZiskProveResult, ZiskVerifyConstraintsResult, + ZiskAggPhaseResult, ZiskExecuteResult, ZiskPhaseResult, ZiskProgramPK, ZiskProgramVK, + ZiskProof, ZiskProofWithPublicValues, ZiskProveResult, ZiskPublics, + ZiskVerifyConstraintsResult, }; -use proofman::{AggProofs, ProofMan, ProvePhase, ProvePhaseInputs}; -use proofman_common::{initialize_logger, ParamsGPU, ProofOptions}; +use crate::{ensure_custom_commits, ProofMode, ProofOpts}; +use executor::{get_packed_info, init_executor_emu}; +use proofman::{ + AggProofs, AggProofsRegister, ProofMan, ProvePhase, ProvePhaseInputs, SnarkWrapper, WitnessInfo, +}; +use proofman_common::{initialize_logger, ParamsGPU, ProofOptions, RankInfo, RowInfo}; use std::path::PathBuf; +use std::sync::Arc; use zisk_common::io::ZiskStdin; -use zisk_common::ExecutorStats; +use zisk_common::ElfBinaryLike; +use zisk_common::ExecutorStatsHandle; +use zisk_common::ZiskExecutorTime; +use zisk_core::Riscv2zisk; use zisk_distributed_common::LoggingConfig; use anyhow::Result; @@ -28,40 +37,34 @@ impl EmuProver { pub fn new( verify_constraints: bool, aggregation: bool, - rma: bool, - final_snark: bool, - witness_lib: PathBuf, + snark_wrapper: bool, proving_key: PathBuf, - elf: PathBuf, + proving_key_snark: PathBuf, verbose: u8, shared_tables: bool, gpu_params: ParamsGPU, - verify_proofs: bool, - minimal_memory: bool, - save_proofs: bool, - output_dir: Option, logging_config: Option, ) -> Result { let core_prover = EmuCoreProver::new( verify_constraints, aggregation, - rma, - final_snark, - witness_lib, + snark_wrapper, proving_key, - elf, + proving_key_snark, verbose, shared_tables, gpu_params, - verify_proofs, - minimal_memory, - save_proofs, - output_dir, logging_config, )?; Ok(Self { core_prover }) } + + pub fn new_verifier(proving_key: PathBuf, proving_key_snark: PathBuf) -> Result { + let core_prover = EmuCoreProver::new_verifier(proving_key, proving_key_snark)?; + + Ok(Self { core_prover }) + } } impl ProverEngine for EmuProver { @@ -73,52 +76,144 @@ impl ProverEngine for EmuProver { self.core_prover.rank_info.local_rank } - fn set_stdin(&self, stdin: ZiskStdin) { - self.core_prover.backend.witness_lib.set_stdin(stdin); + fn set_stdin(&self, stdin: ZiskStdin) -> Result<()> { + self.core_prover.backend.set_stdin(stdin) + } + + fn register_program(&self, pk: &ZiskProgramPK) -> Result<()> { + self.core_prover.backend.register_program(pk) + } + + fn setup(&self, elf: &impl ElfBinaryLike) -> Result<(ZiskProgramPK, ZiskProgramVK)> { + let pctx = self.core_prover.backend.get_pctx()?; + + let (rom_bin_path, vk) = ensure_custom_commits(&pctx, elf)?; + + let rv2zk = Riscv2zisk::new(elf.elf()); + + let zisk_rom = rv2zk.run().unwrap_or_else(|e| panic!("Application error: {e}")); + let zisk_rom = Arc::new(zisk_rom); + + Ok(( + ZiskProgramPK { + zisk_rom, + elf_bin_path: rom_bin_path, + asm_resources: None, + asm_services: None, + rank_info: self.core_prover.rank_info.clone(), + use_hints: false, + }, + ZiskProgramVK { vk }, + )) } fn executed_steps(&self) -> u64 { self.core_prover .backend - .witness_lib .execution_result() - .map(|(exec_result, _)| exec_result.executed_steps) + .map(|(exec_result, _)| exec_result.steps) .unwrap_or(0) } - fn execute(&self, stdin: ZiskStdin, output_path: Option) -> Result { - self.core_prover.backend.execute(stdin, output_path) + fn get_execution_info(&self) -> Result<(WitnessInfo, ZiskExecutorTime)> { + self.core_prover.backend.get_execution_info() + } + + fn execute( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + output_path: Option, + ) -> Result { + self.core_prover.backend.execute(pk, stdin, output_path) } fn stats( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, + minimal_memory: bool, mpi_node: Option, - ) -> Result<(i32, i32, Option)> { - let debug_info = - create_debug_info(debug_info, self.core_prover.backend.proving_key.clone())?; + ) -> Result<(i32, i32, Option)> { + self.core_prover.backend.stats(pk, stdin, debug_info, minimal_memory, mpi_node) + } - self.core_prover.backend.stats(stdin, debug_info, mpi_node) + fn get_instance_trace( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + self.core_prover.backend.get_instance_trace(instance_id, first_row, num_rows, offset) + } + + fn get_instance_air_values(&self, instance_id: usize) -> Result> { + self.core_prover.backend.get_instance_air_values(instance_id) + } + + fn get_instance_fixed( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + self.core_prover.backend.get_instance_fixed(instance_id, first_row, num_rows, offset) } fn verify_constraints_debug( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, ) -> Result { - let debug_info = - create_debug_info(debug_info, self.core_prover.backend.proving_key.clone())?; + self.core_prover.backend.verify_constraints_debug(pk, stdin, debug_info) + } + + fn verify_constraints( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + ) -> Result { + self.core_prover.backend.verify_constraints(pk, stdin) + } + + fn vk(&self, elf: &impl ElfBinaryLike) -> Result { + self.core_prover.backend.vk(elf) + } + + fn verify(&self, proof: &ZiskProof, publics: &ZiskPublics, vk: &ZiskProgramVK) -> Result<()> { + self.core_prover.backend.verify(proof, publics, vk) + } - self.core_prover.backend.verify_constraints_debug(stdin, debug_info) + fn prove( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + mode: ProofMode, + proof_options: ProofOpts, + ) -> Result { + self.core_prover.backend.prove(pk, stdin, mode, proof_options) } - fn verify_constraints(&self, stdin: ZiskStdin) -> Result { - self.core_prover.backend.verify_constraints(stdin) + fn prove_snark( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result { + self.core_prover.backend.prove_snark(proof, publics, vk) } - fn prove(&self, stdin: ZiskStdin) -> Result { - self.core_prover.backend.prove(stdin) + fn compress( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result { + self.core_prover.backend.compress(proof, publics, vk) } fn prove_phase( @@ -130,6 +225,19 @@ impl ProverEngine for EmuProver { self.core_prover.backend.prove_phase(phase_inputs, options, phase) } + fn set_partition( + &self, + total_compute_units: usize, + allocation: Vec, + rank_id: usize, + ) -> Result<()> { + self.core_prover.backend.set_partition(total_compute_units, allocation, rank_id) + } + + fn register_aggregated_proofs(&self, agg_proofs: Vec) -> Result<()> { + self.core_prover.backend.register_aggregated_proofs(agg_proofs) + } + fn aggregate_proofs( &self, agg_proofs: Vec, @@ -140,8 +248,17 @@ impl ProverEngine for EmuProver { self.core_prover.backend.aggregate_proofs(agg_proofs, last_proof, final_proof, options) } - fn mpi_broadcast(&self, data: &mut Vec) { - self.core_prover.backend.mpi_broadcast(data); + fn mpi_broadcast(&self, data: &mut Vec) -> Result<()> { + self.core_prover.backend.mpi_broadcast(data) + } + + fn prepare_send_proof( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result> { + self.core_prover.backend.prepare_send_proof(proof, publics, program_vk) } } @@ -155,70 +272,69 @@ impl EmuCoreProver { pub fn new( verify_constraints: bool, aggregation: bool, - rma: bool, - final_snark: bool, - witness_lib: PathBuf, + use_snark_wrapper: bool, proving_key: PathBuf, - elf: PathBuf, + proving_key_snark: PathBuf, verbose: u8, shared_tables: bool, gpu_params: ParamsGPU, - verify_proofs: bool, - minimal_memory: bool, - save_proofs: bool, - output_dir: Option, logging_config: Option, ) -> Result { - let custom_commits_map = get_custom_commits_map(&proving_key, &elf)?; - - check_paths_exist(&witness_lib)?; check_paths_exist(&proving_key)?; - check_paths_exist(&elf)?; - - // Build emulator library - let (library, mut witness_lib) = - ZiskLibLoader::load_emu(witness_lib, elf, verbose.into(), shared_tables)?; let proofman = ProofMan::new( proving_key.clone(), - custom_commits_map, verify_constraints, aggregation, - final_snark, gpu_params, verbose.into(), - witness_lib.get_packed_info(), + get_packed_info(), ) .map_err(|e| anyhow::anyhow!(e.to_string()))?; - let world_rank = proofman.get_world_rank(); - let local_rank = proofman.get_local_rank(); + let rank_info = proofman.get_rank_info(); if logging_config.is_some() { - zisk_distributed_common::init(logging_config.as_ref(), Some(world_rank))?; + zisk_distributed_common::init(logging_config.as_ref(), Some(&rank_info))?; } else { - initialize_logger(verbose.into(), Some(world_rank)); + initialize_logger(verbose.into(), Some(&rank_info)); } - proofman.register_witness(&mut *witness_lib, library)?; - proofman.set_barrier(); - let core = ProverBackend { - verify_constraints, - aggregation, - rma, - final_snark, - witness_lib, - proving_key: proving_key.clone(), - verify_proofs, - minimal_memory, - save_proofs, - output_dir, + let mut snark_wrapper = None; + if use_snark_wrapper { + check_paths_exist(&proving_key_snark)?; + let (aux_trace, d_buffers, reload_fixed_pols_gpu) = proofman.get_preallocated_buffers(); + snark_wrapper = Some(SnarkWrapper::new_with_preallocated_buffers( + &proving_key_snark, + verbose.into(), + Some(aux_trace), + Some(d_buffers), + Some(reload_fixed_pols_gpu), + )?); + } + + let executor = init_executor_emu(verbose.into(), shared_tables, &proofman.get_wcm())?; + + let core = ProverBackend::new( proofman, - rank_info: RankInfo { world_rank, local_rank }, - }; + snark_wrapper, + executor, + proving_key, + Some(proving_key_snark), + ); + + Ok(Self { backend: core, rank_info }) + } + + #[allow(clippy::too_many_arguments)] + pub fn new_verifier(proving_key: PathBuf, proving_key_snark: PathBuf) -> Result { + let core_prover = ProverBackend::new_verifier(proving_key, Some(proving_key_snark)); - Ok(Self { backend: core, rank_info: RankInfo { world_rank, local_rank } }) + Ok(Self { + backend: core_prover, + rank_info: RankInfo { world_rank: 0, local_rank: 0, n_processes: 1 }, + }) } } diff --git a/sdk/src/prover/mod.rs b/sdk/src/prover/mod.rs index efb3f9ce0..002cce3b4 100644 --- a/sdk/src/prover/mod.rs +++ b/sdk/src/prover/mod.rs @@ -1,34 +1,746 @@ mod asm; mod backend; mod emu; - pub use asm::*; use backend::*; pub use emu::*; -use proofman::{AggProofs, ProvePhase, ProvePhaseInputs, ProvePhaseResult}; -use proofman_common::ProofOptions; +use precompiles_hints::HintsProcessor; +use proofman::{ + AggProofs, AggProofsRegister, ProvePhase, ProvePhaseInputs, ProvePhaseResult, SnarkProtocol, + WitnessInfo, +}; +use proofman_common::{ProofOptions, RankInfo, RowInfo}; +use proofman_util::VadcopFinalProof; +use sha2::{Digest, Sha256}; -use crate::Proof; -use anyhow::Result; -use std::{path::PathBuf, time::Duration}; -use zisk_common::{io::ZiskStdin, ExecutorStats, ZiskExecutionResult}; +use anyhow::{Context, Result}; +use asm_runner::{AsmServices, HintsShmem}; +use executor::AsmResources; +use proofman::PlanningInfo; +use serde::{Deserialize, Serialize}; +use std::fs::File; +use std::{ + cell::Cell, + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; +use tracing::info; +use zisk_common::{ + io::StreamSource, io::ZiskStdin, ElfBinaryLike, ExecutorStatsHandle, StatsCostPerType, + ZiskExecutorSummary, ZiskExecutorTime, +}; +use zisk_core::ZiskRom; pub struct ZiskExecuteResult { - pub execution: ZiskExecutionResult, - pub duration: Duration, + pub total_duration: Duration, + pub executor_summary: ZiskExecutorSummary, + pub planning_info: PlanningInfo, + pub publics: ZiskPublics, +} + +impl ZiskExecuteResult { + pub fn new( + total_duration: Duration, + executor_summary: ZiskExecutorSummary, + planning_info: PlanningInfo, + publics: &[u8], + ) -> Self { + Self { total_duration, executor_summary, planning_info, publics: ZiskPublics::new(publics) } + } + + pub fn get_publics(&self) -> &ZiskPublics { + &self.publics + } + + pub fn get_public_values( + &self, + ) -> Result { + self.publics.read() + } + + pub fn get_execution_steps(&self) -> u64 { + self.executor_summary.steps + } + + pub fn get_execution_total_cost(&self) -> u64 { + self.executor_summary.cost_per_type.total_cost() + } + + pub fn get_execution_cost_per_type(&self) -> &StatsCostPerType { + &self.executor_summary.cost_per_type + } + + pub fn get_duration(&self) -> Duration { + self.total_duration + } } pub struct ZiskVerifyConstraintsResult { - pub execution: ZiskExecutionResult, + pub executor_summary: ZiskExecutorSummary, pub duration: Duration, - pub stats: ExecutorStats, + pub stats: ExecutorStatsHandle, + pub publics: ZiskPublics, +} + +impl ZiskVerifyConstraintsResult { + pub fn new( + execution: ZiskExecutorSummary, + duration: Duration, + stats: ExecutorStatsHandle, + publics: &[u8], + ) -> Self { + Self { executor_summary: execution, duration, stats, publics: ZiskPublics::new(publics) } + } + + pub fn get_publics(&self) -> &ZiskPublics { + &self.publics + } + + pub fn get_public_values( + &self, + ) -> Result { + self.publics.read() + } + + pub fn get_execution_steps(&self) -> u64 { + self.executor_summary.steps + } + + pub fn get_execution_total_cost(&self) -> u64 { + self.executor_summary.cost_per_type.total_cost() + } + + pub fn get_execution_cost_per_type(&self) -> &StatsCostPerType { + &self.executor_summary.cost_per_type + } + + pub fn get_duration(&self) -> Duration { + self.duration + } +} + +#[derive(Debug, Clone)] +pub struct ZiskProgramPK { + pub zisk_rom: Arc, + pub elf_bin_path: PathBuf, + pub asm_resources: Option, + pub asm_services: Option, + pub rank_info: RankInfo, + use_hints: bool, +} + +impl ZiskProgramPK { + pub fn use_hints(&self) -> bool { + self.use_hints + } + + pub fn register_hints_stream(&self, stream: StreamSource) -> Result<()> { + if self.use_hints() { + if let Some(asm_resources) = &self.asm_resources { + asm_resources + .set_hints_stream_src(stream) + .map_err(|e| anyhow::anyhow!("Failed to set hints stream source: {}", e))?; + } else { + return Err(anyhow::anyhow!( + "ASM resources not initialized, cannot register hints stream" + )); + } + } else { + return Err(anyhow::anyhow!( + "Hints not enabled for this program, cannot register hints stream" + )); + } + Ok(()) + } + + pub fn is_asm(&self) -> bool { + self.asm_services.is_some() + } + + pub fn get_hints_processor(&self) -> Option>> { + self.asm_resources.as_ref().and_then(|r| r.get_hints_processor()) + } + + pub fn reset(&self) { + if let Some(asm_resources) = &self.asm_resources { + asm_resources.reset(); + } + } +} + +impl Drop for ZiskProgramPK { + fn drop(&mut self) { + // Shut down ASM microservices + if let Some(asm_services) = &self.asm_services { + info!(">>> [{}] Stopping ASM microservices.", self.rank_info.world_rank); + if let Err(e) = asm_services.stop_asm_services() { + tracing::error!( + ">>> [{}] Failed to stop ASM microservices: {}", + self.rank_info.world_rank, + e + ); + } + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ZiskProgramVK { + pub vk: Vec, +} + +impl ZiskProgramVK { + pub fn new_from_publics(publics: &[u8]) -> Self { + assert!( + publics.len() >= 32, + "Not enough bytes to extract program VK (expected at least 32 bytes)" + ); + + Self { vk: publics[0..32].to_vec() } + } + + pub fn new_empty() -> Self { + Self { vk: vec![0u8; 32] } + } +} + +#[derive(Debug, Clone)] +pub struct ProofOpts { + pub aggregation: bool, + pub verify_proofs: bool, + pub rma: bool, + pub minimal_memory: bool, + pub output_dir_path: Option, + pub save_proofs: bool, +} + +impl Default for ProofOpts { + fn default() -> Self { + Self { + aggregation: true, + verify_proofs: false, + rma: false, + minimal_memory: false, + output_dir_path: None, + save_proofs: false, + } + } +} + +impl ProofOpts { + pub fn output_dir(mut self, path: PathBuf) -> Self { + self.output_dir_path = Some(path); + self + } + + pub fn save_proofs(mut self) -> Self { + self.save_proofs = true; + self + } + + pub fn verify_proofs(mut self) -> Self { + self.verify_proofs = true; + self + } + + pub fn minimal_memory(mut self) -> Self { + self.minimal_memory = true; + self + } + + pub fn no_aggregation(mut self) -> Self { + self.aggregation = false; + self + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum ProofMode { + VadcopFinal, + VadcopFinalCompressed, + Snark, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum ZiskProof { + Null(), + VadcopFinal(Vec), + VadcopFinalCompressed(Vec), + Plonk(Vec), + Fflonk(Vec), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ZiskVadcopFinalProof { + pub proof: Vec, + pub compressed: bool, +} + +impl ZiskVadcopFinalProof { + pub fn new(proof: Vec, compressed: bool) -> Self { + Self { proof, compressed } + } + + pub fn save( + &self, + path: impl AsRef, + ) -> Result<(), Box> { + let path = path.as_ref(); + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + + let file = File::create(path).map_err(|e| { + std::io::Error::new( + e.kind(), + format!( + "Failed to create file for saving Vadcop Final proof: {}: {}", + path.display(), + e + ), + ) + })?; + + bincode::serialize_into(file, self)?; + Ok(()) + } + + pub fn load(path: impl AsRef) -> Result> { + let file = File::open(path.as_ref()).map_err(|e| { + std::io::Error::new( + e.kind(), + format!( + "Failed to open file for loading proof: {}: {}", + path.as_ref().display(), + e + ), + ) + })?; + let proof: ZiskVadcopFinalProof = bincode::deserialize_from(file)?; + Ok(proof) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ZiskSnarkProof { + pub proof: Vec, + pub protocol_id: u64, +} + +impl ZiskSnarkProof { + pub fn new(proof: Vec, protocol_id: u64) -> Self { + Self { proof, protocol_id } + } + + pub fn save( + &self, + path: impl AsRef, + ) -> Result<(), Box> { + let path = path.as_ref(); + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + + let file = File::create(path).map_err(|e| { + std::io::Error::new( + e.kind(), + format!("Failed to create file for saving SNARK proof: {}: {}", path.display(), e), + ) + })?; + + bincode::serialize_into(file, self)?; + Ok(()) + } + + pub fn load(path: impl AsRef) -> Result> { + let file = File::open(path.as_ref()).map_err(|e| { + std::io::Error::new( + e.kind(), + format!( + "Failed to open file for loading SNARK proof: {}: {}", + path.as_ref().display(), + e + ), + ) + })?; + let proof: ZiskSnarkProof = bincode::deserialize_from(file)?; + Ok(proof) + } +} + +impl ZiskProof { + pub fn save(&self, path: impl AsRef) -> Result<()> { + match self { + ZiskProof::Null() => Err(anyhow::anyhow!("No proof to save")), + ZiskProof::VadcopFinal(proof) | ZiskProof::VadcopFinalCompressed(proof) => { + let compressed = matches!(self, ZiskProof::VadcopFinalCompressed(_)); + let zisk_proof = ZiskVadcopFinalProof::new(proof.clone(), compressed); + zisk_proof.save(path).map_err(|e| anyhow::anyhow!("{}", e)) + } + ZiskProof::Plonk(snark_proof) | ZiskProof::Fflonk(snark_proof) => { + let protocol_id = match self { + ZiskProof::Plonk(_) => SnarkProtocol::Plonk.protocol_id(), + ZiskProof::Fflonk(_) => SnarkProtocol::Fflonk.protocol_id(), + _ => unreachable!(), + }; + let snark_proof = ZiskSnarkProof::new(snark_proof.clone(), protocol_id); + snark_proof.save(path).map_err(|e| anyhow::anyhow!("{}", e)) + } + } + } + + pub fn load(path: impl AsRef) -> Result { + if let Ok(vadcop_proof) = ZiskVadcopFinalProof::load(path.as_ref()) { + let proof = if vadcop_proof.compressed { + ZiskProof::VadcopFinalCompressed(vadcop_proof.proof) + } else { + ZiskProof::VadcopFinal(vadcop_proof.proof) + }; + return Ok(proof); + } + + if let Ok(snark_proof) = ZiskSnarkProof::load(path.as_ref()) { + let proof = match SnarkProtocol::from_protocol_id(snark_proof.protocol_id)? { + SnarkProtocol::Plonk => ZiskProof::Plonk(snark_proof.proof), + SnarkProtocol::Fflonk => ZiskProof::Fflonk(snark_proof.proof), + }; + return Ok(proof); + } + + Err(anyhow::anyhow!("Failed to load proof: unsupported format or corrupted file")) + } +} + +pub const ZISK_PUBLICS: usize = 64; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ZiskPublics { + data: Vec, + ptr: Cell, +} + +impl ZiskPublics { + pub fn new(publics_bytes: &[u8]) -> Self { + assert!( + publics_bytes.len() == ZISK_PUBLICS * 8 + 32, + "Not enough bytes to fill ZiskPublics" + ); + + let mut data = [0u8; ZISK_PUBLICS * 4]; + for (i, chunk) in publics_bytes[32..].chunks_exact(8).enumerate() { + let v32 = u32::from_le_bytes(chunk[0..4].try_into().unwrap()); + data[i * 4..(i + 1) * 4].copy_from_slice(&v32.to_le_bytes()); + } + + Self { data: data.to_vec(), ptr: Cell::new(0) } + } + + pub fn new_empty() -> Self { + Self { data: [0u8; ZISK_PUBLICS * 4].to_vec(), ptr: Cell::new(0) } + } + + /// Create ZiskPublics from a serializable value. + /// The value is serialized with bincode and stored in the public outputs as 64-bit chunks. + pub fn write(value: &T) -> Result { + let serialized = bincode::serialize(value) + .map_err(|e| anyhow::anyhow!("Serialization failed: {}", e))?; + + if serialized.len() > ZISK_PUBLICS * 4 { + return Err(anyhow::anyhow!( + "Serialized data too large: {} bytes (max {} bytes)", + serialized.len(), + ZISK_PUBLICS * 4 + )); + } + + let mut data = [0u8; ZISK_PUBLICS * 4]; + // Chunk into 8-byte (u64) values + for (i, chunk) in serialized.chunks(4).enumerate() { + // copy chunk into 32-bit slot, padding with zeros if chunk < 4 bytes + let mut buf = [0u8; 4]; + buf[..chunk.len()].copy_from_slice(chunk); + data[i * 4..(i + 1) * 4].copy_from_slice(&buf); + } + + Ok(Self { data: data.to_vec(), ptr: Cell::new(0) }) + } + + /// Reset the reading pointer to the beginning. + pub fn head(&self) { + self.ptr.set(0); + } + + /// Read raw bytes from public outputs. + pub fn read_slice(&self, slice: &mut [u8]) { + let ptr = self.ptr.get(); + slice.copy_from_slice(&self.data[ptr..ptr + slice.len()]); + self.ptr.set(ptr + slice.len()); + } + + /// Deserialize a value from public outputs. + /// The value must have been previously written with bincode serialization using `commit()`. + pub fn read(&self) -> Result { + let ptr = self.ptr.get(); + let result: T = bincode::deserialize(&self.data[ptr..]) + .map_err(|e| anyhow::anyhow!("Deserialization failed: {}", e))?; + let nb_bytes = bincode::serialized_size(&result) + .map_err(|e| anyhow::anyhow!("Failed to get serialized size: {}", e))?; + self.ptr.set(ptr + nb_bytes as usize); + Ok(result) + } + + pub fn public_bytes(&self) -> Vec { + let mut bytes = [0u8; ZISK_PUBLICS * 8]; + + // Convert the 256 bytes back to ZISK_PUBLICS u64 values (padding upper 32 bits with zeros) + for i in 0..ZISK_PUBLICS { + let start = i * 4; + let val32 = u32::from_le_bytes([ + self.data[start], + self.data[start + 1], + self.data[start + 2], + self.data[start + 3], + ]); + let val64 = val32 as u64; + bytes[i * 8..(i + 1) * 8].copy_from_slice(&val64.to_le_bytes()); + } + + bytes.to_vec() + } + + pub fn public_bytes_solidity(&self) -> Vec { + let mut bytes = [0u8; ZISK_PUBLICS * 4]; + + for i in 0..ZISK_PUBLICS { + let start = i * 4; + let val32 = u32::from_le_bytes([ + self.data[start], + self.data[start + 1], + self.data[start + 2], + self.data[start + 3], + ]); + bytes[i * 4..(i + 1) * 4].copy_from_slice(&val32.to_be_bytes()); + } + + bytes.to_vec() + } + + pub fn hash_solidity(&self, program_vk: &ZiskProgramVK, vadcop_verkey: &[u8]) -> Vec { + let bytes = self.bytes_solidity(program_vk, vadcop_verkey); + + // SHA-256 + let hash = Sha256::digest(&bytes); + + hash.to_vec() + } +} + +impl ZiskPublics { + pub fn bytes_u64(&self, program_vk: &ZiskProgramVK) -> Vec { + let mut bytes = Vec::with_capacity(program_vk.vk.len() + ZISK_PUBLICS * 8); + + bytes.extend(&program_vk.vk); + bytes.extend(self.public_bytes()); + + bytes + } + + pub fn bytes_solidity(&self, program_vk: &ZiskProgramVK, vadcop_verkey: &[u8]) -> Vec { + let mut prefix = [0u8; 32]; + for (i, chunk) in program_vk.vk.chunks_exact(8).enumerate() { + let val = u64::from_le_bytes(chunk.try_into().unwrap()); + prefix[i * 8..(i + 1) * 8].copy_from_slice(&val.to_be_bytes()); + } + + let mut bytes = prefix.to_vec(); + bytes.extend(self.public_bytes_solidity()); + let mut suffix = [0u8; 32]; + for (i, chunk) in vadcop_verkey.chunks_exact(8).enumerate() { + let val = u64::from_le_bytes(chunk.try_into().unwrap()); + suffix[i * 8..(i + 1) * 8].copy_from_slice(&val.to_be_bytes()); + } + bytes.extend(&suffix); + bytes + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ZiskProofWithPublicValues { + pub proof: ZiskProof, + pub publics: ZiskPublics, + pub program_vk: ZiskProgramVK, +} + +impl ZiskProofWithPublicValues { + pub fn new(proof: ZiskProof, publics: ZiskPublics, program_vk: ZiskProgramVK) -> Self { + Self { proof, publics, program_vk } + } + + pub fn save(&self, path: impl AsRef) -> Result<()> { + bincode::serialize_into( + File::create(path.as_ref()).with_context(|| { + format!("failed to create file for saving proof: {}", path.as_ref().display()) + })?, + self, + ) + .map_err(Into::into) + } + + pub fn load(path: impl AsRef) -> Result { + let file = File::open(path.as_ref()).with_context(|| { + format!("failed to open file for loading proof: {}", path.as_ref().display()) + })?; + let proof_with_publics: ZiskProofWithPublicValues = bincode::deserialize_from(file)?; + Ok(proof_with_publics) + } + + pub fn get_vadcop_final_proof(&self) -> Result { + match &self.proof { + ZiskProof::VadcopFinal(proof_bytes) | ZiskProof::VadcopFinalCompressed(proof_bytes) => { + let compressed = matches!(self.proof, ZiskProof::VadcopFinalCompressed(_)); + let mut pubs = self.program_vk.vk.clone(); + pubs.extend(self.publics.public_bytes()); + Ok(VadcopFinalProof::new(proof_bytes.clone(), pubs, compressed)) + } + + _ => Err(anyhow::anyhow!("Proof is not a Vadcop final proof")), + } + } + + pub fn get_proof(&self) -> &ZiskProof { + &self.proof + } + + pub fn get_publics(&self) -> &ZiskPublics { + &self.publics + } + + pub fn get_program_vk(&self) -> &ZiskProgramVK { + &self.program_vk + } + + /// Create ZiskProofWithPublicValues directly from a Vadcop proof byte array. + /// + /// This method parses the proof format (n_publics, publics..., proof...) and extracts + /// the public values and program VK directly, without creating an intermediate VadcopFinalProof. + /// + /// # Parameters + /// + /// * `proof` - The proof as a slice of u64 values + /// * `compressed` - Whether the proof is compressed + /// + /// # Returns + /// + /// A ZiskProofWithPublicValues containing the parsed proof, publics, and program VK + pub fn new_from_vadcop_proof(proof: &[u64], compressed: bool) -> Result { + let vadcop_proof = VadcopFinalProof::new_from_proof(proof, compressed) + .map_err(|e| anyhow::anyhow!("Failed to parse Vadcop proof: {}", e))?; + + let zisk_proof = if compressed { + ZiskProof::VadcopFinalCompressed(vadcop_proof.proof) + } else { + ZiskProof::VadcopFinal(vadcop_proof.proof) + }; + + Ok(Self { + proof: zisk_proof, + publics: ZiskPublics::new(&vadcop_proof.public_values), + program_vk: ZiskProgramVK::new_from_publics(&vadcop_proof.public_values), + }) + } } pub struct ZiskProveResult { - pub execution: ZiskExecutionResult, + pub executor_summary: ZiskExecutorSummary, pub duration: Duration, - pub stats: ExecutorStats, - pub proof: Proof, + stats: ExecutorStatsHandle, + proof_id: Option, + proof_with_publics: ZiskProofWithPublicValues, +} + +impl ZiskProveResult { + pub fn new( + execution: ZiskExecutorSummary, + duration: Duration, + stats: ExecutorStatsHandle, + proof_id: Option, + proof_with_publics: ZiskProofWithPublicValues, + ) -> Self { + Self { executor_summary: execution, duration, stats, proof_id, proof_with_publics } + } + + pub fn new_null( + execution: ZiskExecutorSummary, + duration: Duration, + stats: ExecutorStatsHandle, + ) -> Self { + Self { + executor_summary: execution, + duration, + stats, + proof_id: None, + proof_with_publics: ZiskProofWithPublicValues { + proof: ZiskProof::Null(), + publics: ZiskPublics::new_empty(), + program_vk: ZiskProgramVK::new_empty(), + }, + } + } + + pub fn get_stats(&self) -> &ExecutorStatsHandle { + &self.stats + } + + pub fn get_duration(&self) -> Duration { + self.duration + } + + pub fn get_execution_steps(&self) -> u64 { + self.executor_summary.steps + } + + pub fn get_execution_total_cost(&self) -> u64 { + self.executor_summary.cost_per_type.total_cost() + } + + pub fn get_execution_cost_per_type(&self) -> &StatsCostPerType { + &self.executor_summary.cost_per_type + } + + pub fn get_proof_id(&self) -> Option<&String> { + self.proof_id.as_ref() + } + + pub fn get_proof(&self) -> &ZiskProof { + &self.proof_with_publics.proof + } + + pub fn get_publics(&self) -> &ZiskPublics { + &self.proof_with_publics.publics + } + + pub fn get_program_vk(&self) -> &ZiskProgramVK { + &self.proof_with_publics.program_vk + } + + pub fn get_proof_with_publics(&self) -> &ZiskProofWithPublicValues { + &self.proof_with_publics + } + + pub fn save_proof_with_publics(&self, path: impl AsRef) -> Result<()> { + self.proof_with_publics.save(path) + } + + /// Deserialize a value from public outputs. + /// The value must have been previously written with bincode serialization using `commit()`. + pub fn get_public_values( + &self, + ) -> Result { + self.proof_with_publics.publics.read() + } } pub type ZiskPhaseResult = ProvePhaseResult; @@ -38,32 +750,92 @@ pub struct ZiskAggPhaseResult { } pub trait ProverEngine { + fn setup(&self, elf: &impl ElfBinaryLike) -> Result<(ZiskProgramPK, ZiskProgramVK)>; + fn world_rank(&self) -> i32; fn local_rank(&self) -> i32; - fn set_stdin(&self, stdin: ZiskStdin); + fn set_stdin(&self, stdin: ZiskStdin) -> Result<()>; + + fn register_program(&self, pk: &ZiskProgramPK) -> Result<()>; fn executed_steps(&self) -> u64; - fn execute(&self, stdin: ZiskStdin, output_path: Option) -> Result; + fn get_execution_info(&self) -> Result<(WitnessInfo, ZiskExecutorTime)>; + + fn get_instance_trace( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result>; + + fn get_instance_air_values(&self, instance_id: usize) -> Result>; + + fn get_instance_fixed( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result>; + + fn execute( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + output_path: Option, + ) -> Result; fn stats( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, + minimal_memory: bool, mpi_node: Option, - ) -> Result<(i32, i32, Option)>; + ) -> Result<(i32, i32, Option)>; fn verify_constraints_debug( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, ) -> Result; - fn verify_constraints(&self, stdin: ZiskStdin) -> Result; + fn verify_constraints( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + ) -> Result; + + fn vk(&self, elf: &impl ElfBinaryLike) -> Result; + + fn verify(&self, proof: &ZiskProof, publics: &ZiskPublics, vk: &ZiskProgramVK) -> Result<()>; - fn prove(&self, stdin: ZiskStdin) -> Result; + fn prove( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + mode: ProofMode, + proof_options: ProofOpts, + ) -> Result; + + fn prove_snark( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result; + + fn compress( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result; fn prove_phase( &self, @@ -72,6 +844,15 @@ pub trait ProverEngine { phase: ProvePhase, ) -> Result; + fn set_partition( + &self, + total_compute_units: usize, + allocation: Vec, + rank_id: usize, + ) -> Result<()>; + + fn register_aggregated_proofs(&self, agg_proofs: Vec) -> Result<()>; + fn aggregate_proofs( &self, agg_proofs: Vec, @@ -80,7 +861,14 @@ pub trait ProverEngine { options: &ProofOptions, ) -> Result>; - fn mpi_broadcast(&self, data: &mut Vec); + fn mpi_broadcast(&self, data: &mut Vec) -> Result<()>; + + fn prepare_send_proof( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result>; } pub trait ZiskBackend: Send + Sync { @@ -97,9 +885,17 @@ impl ZiskProver { Self { prover } } + pub fn setup(&self, elf: &impl ElfBinaryLike) -> Result<(ZiskProgramPK, ZiskProgramVK)> { + self.prover.setup(elf) + } + /// Set the standard input for the current proof. - pub fn set_stdin(&self, stdin: ZiskStdin) { - self.prover.set_stdin(stdin); + pub fn set_stdin(&self, stdin: ZiskStdin) -> Result<()> { + self.prover.set_stdin(stdin) + } + + pub fn register_program(&self, pk: &ZiskProgramPK) -> Result<()> { + self.prover.register_program(pk) } /// Get the world rank of the prover. The world rank is the rank of the prover in the global MPI context. @@ -119,39 +915,114 @@ impl ZiskProver { self.prover.executed_steps() } + pub fn get_execution_info(&self) -> Result<(WitnessInfo, ZiskExecutorTime)> { + self.prover.get_execution_info() + } + /// Execute the prover with the given standard input and output path. /// It only runs the execution without generating a proof. - pub fn execute(&self, stdin: ZiskStdin) -> Result { - self.prover.execute(stdin, None) + pub fn execute(&self, pk: &ZiskProgramPK, stdin: ZiskStdin) -> Result { + self.prover.execute(pk, stdin, None) } /// Get the execution statistics with the given standard input and debug information. pub fn stats( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, + minimal_memory: bool, mpi_node: Option, - ) -> Result<(i32, i32, Option)> { - self.prover.stats(stdin, debug_info, mpi_node) + ) -> Result<(i32, i32, Option)> { + self.prover.stats(pk, stdin, debug_info, minimal_memory, mpi_node) + } + + /// Get the instance trace for a given instance ID and row range. + pub fn get_instance_trace( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + self.prover.get_instance_trace(instance_id, first_row, num_rows, offset) + } + + /// Get the instance AIR values for a given instance ID. + pub fn get_instance_air_values(&self, instance_id: usize) -> Result> { + self.prover.get_instance_air_values(instance_id) + } + + /// Get the instance fixed for a given instance ID and row range. + pub fn get_instance_fixed( + &self, + instance_id: usize, + first_row: usize, + num_rows: usize, + offset: Option, + ) -> Result> { + self.prover.get_instance_fixed(instance_id, first_row, num_rows, offset) } /// Verify the constraints with the given standard input and debug information. pub fn verify_constraints_debug( &self, + pk: &ZiskProgramPK, stdin: ZiskStdin, debug_info: Option>, ) -> Result { - self.prover.verify_constraints_debug(stdin, debug_info) + self.prover.verify_constraints_debug(pk, stdin, debug_info) } /// Verify the constraints with the given standard input. - pub fn verify_constraints(&self, stdin: ZiskStdin) -> Result { - self.prover.verify_constraints(stdin) + pub fn verify_constraints( + &self, + pk: &ZiskProgramPK, + stdin: ZiskStdin, + ) -> Result { + self.prover.verify_constraints(pk, stdin) + } + + pub fn vk(&self, elf: &impl ElfBinaryLike) -> Result { + self.prover.vk(elf) + } + + pub fn verify( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result<()> { + self.prover.verify(proof, publics, vk) } /// Generate a proof with the given standard input. - pub fn prove(&self, stdin: ZiskStdin) -> Result { - self.prover.prove(stdin) + /// Returns a `ProveBuilder` that allows setting per-proof options before running. + /// + /// # Example + /// ```ignore + /// let result = prover.prove(&pk, stdin).compressed().run()?; + /// ``` + pub fn prove<'a>(&'a self, pk: &'a ZiskProgramPK, stdin: ZiskStdin) -> ProveBuilder<'a, C> { + ProveBuilder::new(&self.prover, pk, stdin) + } + + pub fn prove_snark( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result { + self.prover.prove_snark(proof, publics, vk) + } + + pub fn compress( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + vk: &ZiskProgramVK, + ) -> Result { + self.prover.compress(proof, publics, vk) } pub fn prove_phase( @@ -163,6 +1034,19 @@ impl ZiskProver { self.prover.prove_phase(phase_inputs, options, phase) } + pub fn set_partition( + &self, + total_compute_units: usize, + allocation: Vec, + rank_id: usize, + ) -> Result<()> { + self.prover.set_partition(total_compute_units, allocation, rank_id) + } + + pub fn register_aggregated_proofs(&self, agg_proofs: Vec) -> Result<()> { + self.prover.register_aggregated_proofs(agg_proofs) + } + pub fn aggregate_proofs( &self, agg_proofs: Vec, @@ -174,7 +1058,66 @@ impl ZiskProver { } /// Broadcast data to all MPI processes. - pub fn mpi_broadcast(&self, data: &mut Vec) { - self.prover.mpi_broadcast(data); + pub fn mpi_broadcast(&self, data: &mut Vec) -> Result<()> { + self.prover.mpi_broadcast(data) + } + + pub fn prepare_send_proof( + &self, + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + ) -> Result> { + self.prover.prepare_send_proof(proof, publics, program_vk) + } +} + +/// Builder for configuring and running a proof. +/// +/// This struct provides a fluent API for setting per-proof options +/// before executing the proof generation. +/// +/// # Example +/// ```ignore +/// let result = prover.prove(stdin).compressed().run()?; +/// ``` +pub struct ProveBuilder<'a, C: ZiskBackend> { + prover: &'a C::Prover, + pk: &'a ZiskProgramPK, + stdin: ZiskStdin, + mode: ProofMode, + proof_options: ProofOpts, +} + +impl<'a, C: ZiskBackend> ProveBuilder<'a, C> { + fn new(prover: &'a C::Prover, pk: &'a ZiskProgramPK, stdin: ZiskStdin) -> Self { + Self { + prover, + pk, + stdin, + mode: ProofMode::VadcopFinal, + proof_options: ProofOpts::default(), + } + } + + /// Enable compressed proof generation. + pub fn compressed(mut self) -> Self { + self.mode = ProofMode::VadcopFinalCompressed; + self + } + + pub fn plonk(mut self) -> Self { + self.mode = ProofMode::Snark; + self + } + + pub fn with_proof_options(mut self, options: ProofOpts) -> Self { + self.proof_options = options; + self + } + + /// Execute the proof generation with the configured options. + pub fn run(self) -> Result { + self.prover.prove(self.pk, self.stdin, self.mode, self.proof_options) } } diff --git a/sdk/src/utils.rs b/sdk/src/utils.rs index f3acf7c70..5dfa1f30f 100644 --- a/sdk/src/utils.rs +++ b/sdk/src/utils.rs @@ -1,43 +1,37 @@ +use fields::PrimeField64; use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::{env, fs}; +use std::env; +use std::path::PathBuf; use anyhow::Result; -use proofman_common::{json_to_debug_instances_map, DebugInfo, ProofmanResult}; -use rom_setup::{ - gen_elf_hash, get_elf_bin_file_path, get_elf_data_hash, get_rom_blowup_factor_and_arity, - DEFAULT_CACHE_PATH, +use proofman_common::{ + initialize_logger, json_to_debug_instances_map, DebugInfo, ProofCtx, ProofmanResult, + VerboseMode, }; +use rom_setup::{get_elf_data_hash, rom_merkle_setup}; +use zisk_common::ElfBinaryLike; /// Gets the user's home directory as specified by the HOME environment variable. pub fn get_home_dir() -> String { env::var("HOME").expect("get_home_dir() failed to get HOME environment variable") } -/// Gets the default witness computation library file location in the home installation directory. -pub fn get_default_witness_computation_lib() -> PathBuf { - let extension = if cfg!(target_os = "macos") { "dylib" } else { "so" }; - let witness_computation_lib = - format!("{}/.zisk/bin/libzisk_witness.{}", get_home_dir(), extension); - PathBuf::from(witness_computation_lib) -} - /// Gets the default proving key file location in the home installation directory. pub fn get_default_proving_key() -> PathBuf { let proving_key = format!("{}/.zisk/provingKey", get_home_dir()); PathBuf::from(proving_key) } -/// Gets the default zisk folder location in the home installation directory. -pub fn get_home_zisk_path() -> PathBuf { - let zisk_path = format!("{}/.zisk", get_home_dir()); - PathBuf::from(zisk_path) +/// Gets the default proving key file location in the home installation directory. +pub fn get_default_proving_key_snark() -> PathBuf { + let proving_key_snark = format!("{}/.zisk/provingKeySnark", get_home_dir()); + PathBuf::from(proving_key_snark) } /// Gets the default zisk folder location in the home installation directory. -pub fn get_default_zisk_path() -> PathBuf { - let zisk_path = format!("{}/.zisk/zisk", get_home_dir()); +pub fn get_home_zisk_path() -> PathBuf { + let zisk_path = format!("{}/.zisk", get_home_dir()); PathBuf::from(zisk_path) } @@ -73,70 +67,36 @@ pub fn cli_fail_if_macos() -> anyhow::Result<()> { } } -/// If the feature "gpu" is enabled, returns an error indicating that the command is not supported. -pub fn cli_fail_if_gpu_mode() -> anyhow::Result<()> { - if cfg!(feature = "gpu") { - Err(anyhow::anyhow!("Command is not supported on GPU mode")) - } else { - Ok(()) - } -} - -/// Gets the witness computation library file location. -/// Uses the default one if not specified by user. -pub fn get_witness_computation_lib(witness_lib: Option<&PathBuf>) -> PathBuf { - witness_lib.cloned().unwrap_or_else(get_default_witness_computation_lib) -} - /// Gets the proving key file location. /// Uses the default one if not specified by user. pub fn get_proving_key(proving_key: Option<&PathBuf>) -> PathBuf { proving_key.cloned().unwrap_or_else(get_default_proving_key) } -/// Gets the zisk folder. +/// Gets the proving key file location. /// Uses the default one if not specified by user. -pub fn get_zisk_path(zisk_path: Option<&PathBuf>) -> PathBuf { - zisk_path.cloned().unwrap_or_else(get_default_zisk_path) +pub fn get_proving_key_snark(proving_key_snark: Option<&PathBuf>) -> PathBuf { + proving_key_snark.cloned().unwrap_or_else(get_default_proving_key_snark) } -pub fn ensure_custom_commits(proving_key: &Path, elf: &Path) -> Result { - // Ensure cache directory exists - let default_cache_path = std::env::var("HOME") - .map(PathBuf::from) - .map_err(|e| anyhow::anyhow!("Failed to read HOME environment variable: {e}"))? - .join(DEFAULT_CACHE_PATH); - - if let Err(e) = fs::create_dir_all(&default_cache_path) { - if e.kind() != std::io::ErrorKind::AlreadyExists { - panic!("Failed to create cache directory: {e:?}"); - } - } - - // Get the blowup factor as the custom commits filename is formed using it - // {ELF_HASH}_{PILOUT_HASH}_{ROM_NUM_ROWS}_{BLOWUP_FACTOR}.bin - let (blowup_factor, merkle_tree_arity) = get_rom_blowup_factor_and_arity(proving_key); - - // Compute the path for the custom commits file - let rom_bin_path = - get_elf_bin_file_path(elf, &default_cache_path, blowup_factor, merkle_tree_arity)?; - - // Check if the custom commits file exists, if not generate it - if !rom_bin_path.exists() { - let _ = gen_elf_hash(elf, rom_bin_path.as_path(), blowup_factor, merkle_tree_arity, false) - .map_err(|e| anyhow::anyhow!("Error generating elf hash: {}", e)); - } - - Ok(rom_bin_path) +pub fn ensure_custom_commits( + pctx: &ProofCtx, + elf: &impl ElfBinaryLike, +) -> Result<(PathBuf, Vec)> { + rom_merkle_setup(pctx, elf, &None) } -pub fn get_custom_commits_map(proving_key: &Path, elf: &Path) -> Result> { - let rom_bin_path = ensure_custom_commits(proving_key, elf)?; +pub fn get_custom_commits_map( + pctx: &ProofCtx, + elf: &impl ElfBinaryLike, +) -> Result> { + let (rom_bin_path, _) = ensure_custom_commits(pctx, elf)?; Ok(HashMap::from([("rom".to_string(), rom_bin_path)])) } -pub fn get_asm_paths(elf: &Path) -> Result<(String, String)> { - let stem = elf.file_stem().unwrap().to_str().unwrap(); +pub fn get_asm_paths(elf: &impl ElfBinaryLike) -> Result<(String, String)> { + let stem = elf.name(); + let stem = if elf.with_hints() { format!("{stem}-hints") } else { stem.to_string() }; let hash = get_elf_data_hash(elf).map_err(|e| anyhow::anyhow!("Error computing ELF hash: {}", e))?; @@ -160,3 +120,7 @@ pub fn create_debug_info( Some(Some(debug_value)) => json_to_debug_instances_map(proving_key, debug_value.clone()), } } + +pub fn setup_logger(verbose: VerboseMode) { + initialize_logger(verbose, None); +} diff --git a/sdk/src/verifier.rs b/sdk/src/verifier.rs new file mode 100644 index 000000000..303cf13ce --- /dev/null +++ b/sdk/src/verifier.rs @@ -0,0 +1,126 @@ +use crate::{ + get_default_proving_key, get_default_proving_key_snark, ZiskProgramVK, ZiskProof, ZiskPublics, +}; +use anyhow::{anyhow, Ok, Result}; +use proofman::{get_vadcop_final_proof_vkey, verify_snark_proof, SnarkProof, SnarkProtocol}; +use proofman_util::VadcopFinalProof; +use proofman_verifier::{verify_vadcop_final, verify_vadcop_final_compressed}; +use rom_setup::rom_merkle_setup_verkey; +use sha2::{Digest, Sha256}; +use std::path::PathBuf; +use zisk_common::ElfBinaryLike; + +pub fn verify_zisk_snark_proof( + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, +) -> Result<()> { + let proving_key = get_default_proving_key(); + let proving_key_snark = get_default_proving_key_snark(); + + verify_zisk_snark_proof_with_proving_key( + proof, + publics, + program_vk, + proving_key, + proving_key_snark, + ) +} + +pub fn verify_zisk_proof( + zisk_proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, +) -> Result<()> { + let proving_key = get_default_proving_key(); + verify_zisk_proof_with_proving_key(zisk_proof, publics, program_vk, proving_key) +} + +pub fn get_program_vk(elf: &impl ElfBinaryLike) -> Result { + let proving_key_path = get_default_proving_key(); + get_program_vk_with_proving_key(elf, proving_key_path) +} + +pub fn verify_zisk_snark_proof_with_proving_key( + proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + proving_key: PathBuf, + proving_key_snark: PathBuf, +) -> Result<()> { + match &proof { + ZiskProof::Plonk(proof_bytes) | ZiskProof::Fflonk(proof_bytes) => { + let protocol_id = if let ZiskProof::Plonk(_) = &proof { + SnarkProtocol::Plonk.protocol_id() + } else { + SnarkProtocol::Fflonk.protocol_id() + }; + + if !proving_key_snark.exists() { + return Err(anyhow!( + "Proving key snark path does not exist: {}", + proving_key_snark.display() + )); + } + + let verkey = get_vadcop_final_proof_vkey(&proving_key, false)?; + + let pubs = publics.bytes_solidity(program_vk, &verkey); + let hash = Sha256::digest(&pubs).to_vec(); + + let snark_proof = SnarkProof { + proof_bytes: proof_bytes.clone(), + public_bytes: pubs, + public_snark_bytes: hash, + protocol_id, + }; + + let verkey_path = PathBuf::from(format!( + "{}/{}/{}.verkey.json", + proving_key_snark.display(), + "final", + "final" + )); + Ok(verify_snark_proof(&snark_proof, &verkey_path)?) + } + _ => Err(anyhow!("Not a snark proof.")), + } +} + +pub fn verify_zisk_proof_with_proving_key( + zisk_proof: &ZiskProof, + publics: &ZiskPublics, + program_vk: &ZiskProgramVK, + proving_key: PathBuf, +) -> Result<()> { + match &zisk_proof { + ZiskProof::VadcopFinal(proof_bytes) | ZiskProof::VadcopFinalCompressed(proof_bytes) => { + let compressed = matches!(zisk_proof, ZiskProof::VadcopFinalCompressed(_)); + let mut pubs = program_vk.vk.clone(); + pubs.extend(publics.public_bytes()); + let vadcop_final_proof = VadcopFinalProof::new(proof_bytes.clone(), pubs, compressed); + + let vk = get_vadcop_final_proof_vkey(&proving_key, compressed)?; + let is_valid = if compressed { + verify_vadcop_final_compressed(&vadcop_final_proof, &vk) + } else { + verify_vadcop_final(&vadcop_final_proof, &vk) + }; + + if !is_valid { + Err(anyhow!("Zisk Proof was not verified")) + } else { + Ok(()) + } + } + _ => Err(anyhow!("Not a Vadcop final proof.")), + } +} + +pub fn get_program_vk_with_proving_key( + elf: &impl ElfBinaryLike, + proving_key_path: PathBuf, +) -> Result { + let vk = rom_merkle_setup_verkey(elf, &None, &proving_key_path)?; + Ok(ZiskProgramVK { vk }) +} diff --git a/sdk/src/zisk_lib_loader.rs b/sdk/src/zisk_lib_loader.rs deleted file mode 100644 index 62b47fc36..000000000 --- a/sdk/src/zisk_lib_loader.rs +++ /dev/null @@ -1,78 +0,0 @@ -use std::path::PathBuf; - -use fields::PrimeField64; -use libloading::{Library, Symbol}; -use proofman_common::VerboseMode; -use zisk_common::{ZiskLib, ZiskLibInitFn}; - -use anyhow::Result; - -use crate::get_witness_computation_lib; - -#[derive(Default)] -pub struct ZiskLibLoader; - -impl ZiskLibLoader { - #[allow(clippy::too_many_arguments)] - fn load_library( - witness_lib: PathBuf, - elf: PathBuf, - verbose: VerboseMode, - shared_tables: bool, - asm_mt_filename: Option, - asm_rh_filename: Option, - base_port: Option, - unlock_mapped_memory: Option, - ) -> Result<(Library, Box>)> { - let lib_path = get_witness_computation_lib(Some(&witness_lib)); - let library = unsafe { Library::new(lib_path) }?; - - let witness_lib_constructor: Symbol> = - unsafe { library.get(b"init_library")? }; - - let witness_lib = witness_lib_constructor( - verbose, - elf, - asm_mt_filename, - asm_rh_filename, - base_port, - unlock_mapped_memory.unwrap_or(false), - shared_tables, - ) - .expect("Failed to initialize witness library"); - - Ok((library, witness_lib)) - } - - pub fn load_emu( - witness_lib: PathBuf, - elf: PathBuf, - verbose: VerboseMode, - shared_tables: bool, - ) -> Result<(Library, Box>)> { - Self::load_library(witness_lib, elf, verbose, shared_tables, None, None, None, None) - } - - #[allow(clippy::too_many_arguments)] - pub fn load_asm( - witness_lib: PathBuf, - elf: PathBuf, - verbose: VerboseMode, - shared_tables: bool, - asm_mt_filename: PathBuf, - asm_rh_filename: PathBuf, - base_port: Option, - unlock_mapped_memory: bool, - ) -> Result<(Library, Box>)> { - Self::load_library( - witness_lib, - elf, - verbose, - shared_tables, - Some(asm_mt_filename), - Some(asm_rh_filename), - base_port, - Some(unlock_mapped_memory), - ) - } -} diff --git a/sdk/src/ziskemu.rs b/sdk/src/ziskemu.rs new file mode 100644 index 000000000..513a08706 --- /dev/null +++ b/sdk/src/ziskemu.rs @@ -0,0 +1,40 @@ +use std::fmt::Write; +use zisk_common::io::ZiskStdin; +use zisk_common::ElfBinaryLike; +use zisk_core::Riscv2zisk; +pub use ziskemu::EmuOptions; +use ziskemu::ZiskEmulator; + +pub fn ziskemu( + elf: &impl ElfBinaryLike, + stdin: ZiskStdin, + options: &EmuOptions, +) -> anyhow::Result<()> { + let riscv2zisk = Riscv2zisk::new(elf.elf()); + + let zisk_rom = riscv2zisk + .run() + .map_err(|e| anyhow::anyhow!("Failed to convert ELF to ZISK ROM: {e:?}"))?; + + let callback = None::>; + + let inputs = stdin.read_raw_bytes(); + + let options = EmuOptions { elf: elf.path(), ..options.clone() }; + let result = ZiskEmulator::process_rom(&zisk_rom, &inputs, &options, callback); + match result { + Ok(result) => { + // println!("Emulation completed successfully"); + result.iter().fold(String::new(), |mut acc, byte| { + write!(&mut acc, "{byte:02x}").unwrap(); + acc + }); + Ok(()) + // print!("Result: 0x{}", hex_string); + } + Err(e) => { + eprintln!("Error during emulation: {e:?}"); + Err(anyhow::anyhow!("Emulation failed")) + } + } +} diff --git a/server/Cargo.toml b/server/Cargo.toml deleted file mode 100644 index 1f0464cec..000000000 --- a/server/Cargo.toml +++ /dev/null @@ -1,47 +0,0 @@ -[package] -name = "server" -version = { workspace = true } -edition = { workspace = true } -license = { workspace = true } -keywords = { workspace = true } -repository = { workspace = true } -categories = { workspace = true } - -build = "build.rs" - -[dependencies] -zisk-common = { workspace = true } -executor = { workspace = true } -zisk-witness = { workspace = true } -asm-runner = { workspace = true } - -proofman = { workspace = true } -proofman-common = { workspace = true } -witness = { workspace = true } -anyhow = { workspace = true} -libloading = { workspace = true } -colored = { workspace = true } -fields = { workspace = true } -tracing = { workspace = true} - -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -uuid = { version = "1.6", features = ["v4"] } -clap = { workspace = true } -bytemuck = { workspace = true } -zstd = { workspace = true } - -# Distributed mode (mpi) is only supported on Linux x86_64 -[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] -mpi = { workspace = true } -named-sem = { workspace = true } - -[features] -default = [] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -stats = [] - -[lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(distributed)'] } \ No newline at end of file diff --git a/server/build.rs b/server/build.rs deleted file mode 100644 index 8da52f323..000000000 --- a/server/build.rs +++ /dev/null @@ -1,11 +0,0 @@ -fn main() { - let disable_distributed = - std::env::vars().any(|(k, _)| k == "CARGO_FEATURE_DISABLE_DISTRIBUTED"); - let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); - - // Distributed feature is only available on linux x86_64 - if !disable_distributed && target_os == "linux" && target_arch == "x86_64" { - println!("cargo:rustc-cfg=distributed"); - } -} diff --git a/server/src/handler_prove.rs b/server/src/handler_prove.rs deleted file mode 100644 index a0eed6cd8..000000000 --- a/server/src/handler_prove.rs +++ /dev/null @@ -1,182 +0,0 @@ -use bytemuck::cast_slice; -use colored::Colorize; -use fields::Goldilocks; -use proofman::ProofMan; -use proofman::{ProofInfo, ProvePhase, ProvePhaseInputs, ProvePhaseResult}; -use proofman_common::ProofOptions; -use serde::{Deserialize, Serialize}; -use std::io::Write; -use std::sync::Arc; -use std::thread::JoinHandle; -use std::{fs::File, path::PathBuf}; -use zisk_common::{ExecutorStats, ProofLog, ZiskExecutionResult, ZiskLib}; - -use crate::{ - ServerConfig, ZiskBaseResponse, ZiskCmdResult, ZiskResponse, ZiskResultCode, ZiskService, -}; - -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] -pub struct ZiskProveRequest { - pub input: PathBuf, - pub aggregation: bool, - pub rma: bool, - pub final_snark: bool, - pub verify_proofs: bool, - pub minimal_memory: bool, - pub folder: PathBuf, - pub prefix: String, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct ZiskProveResponse { - #[serde(flatten)] - pub base: ZiskBaseResponse, - - server_id: String, - elf_file: String, - input: String, -} -pub struct ZiskServiceProveHandler; - -impl ZiskServiceProveHandler { - pub fn handle( - config: Arc, - request: ZiskProveRequest, - // It is important to keep the witness_lib declaration before the proofman declaration - // to ensure that the witness library is dropped before the proofman. - witness_lib: Arc>>, - proofman: Arc>, - is_busy: Arc, - ) -> (ZiskResponse, Option>) { - is_busy.store(true, std::sync::atomic::Ordering::SeqCst); - - let handle = std::thread::spawn({ - let request_input = request.input.clone(); - let config = config.clone(); - move || { - let start = std::time::Instant::now(); - - let result = proofman - .generate_proof_from_lib( - ProvePhaseInputs::Full(ProofInfo::new(Some(request_input), 1, vec![0], 0)), - ProofOptions::new( - false, - request.aggregation, - request.rma, - request.final_snark, - request.verify_proofs, - request.minimal_memory, - false, - request.folder.clone(), - ), - ProvePhase::Full, - ) - .map_err(|e| anyhow::anyhow!("Error generating proof: {}", e)) - .expect("Failed to generate proof"); - - let (proof_id, vadcop_final_proof) = - if let ProvePhaseResult::Full(proof_id, vadcop_final_proof) = result { - (proof_id, vadcop_final_proof) - } else { - (None, None) - }; - - let elapsed = start.elapsed(); - - if proofman.rank().unwrap() == 0 { - #[allow(clippy::type_complexity)] - let (result, mut _stats): ( - ZiskExecutionResult, - ExecutorStats, - ) = witness_lib.execution_result().expect("Failed to get execution result"); - - proofman.set_barrier(); - let elapsed = elapsed.as_secs_f64(); - tracing::info!(""); - tracing::info!( - "{}", - "--- PROVE SUMMARY ------------------------".bright_green().bold() - ); - if let Some(proof_id) = &proof_id { - tracing::info!(" Proof ID: {}", proof_id); - } - tracing::info!(" ► Statistics"); - tracing::info!( - " time: {} seconds, steps: {}", - elapsed, - result.executed_steps - ); - - // Store the stats in stats.json - #[cfg(feature = "stats")] - { - let stats_id = _stats.next_id(); - _stats.add_stat(0, stats_id, "END", 0, ExecutorStatsEvent::Mark); - _stats.store_stats(); - } - - if let Some(proof_id) = proof_id { - let logs = ProofLog::new(result.executed_steps, proof_id, elapsed); - let log_path = - request.folder.join(format!("{}-result.json", request.prefix)); - println!("Writing proof log to: {}", log_path.display()); - ProofLog::write_json_log(&log_path, &logs) - .map_err(|e| anyhow::anyhow!("Error generating log: {}", e)) - .expect("Failed to generate proof"); - // Save the uncompressed vadcop final proof - let output_file_path = request - .folder - .join(format!("{}-vadcop_final_proof.bin", request.prefix)); - - let vadcop_proof = vadcop_final_proof.unwrap(); - let proof_data = cast_slice(&vadcop_proof); - let mut file = - File::create(&output_file_path).expect("Error while creating file"); - file.write_all(proof_data).expect("Error while writing to file"); - } - } - is_busy.store(false, std::sync::atomic::Ordering::SeqCst); - ZiskService::print_waiting_message(&config); - } - }); - - ( - ZiskResponse::ZiskProveResponse(ZiskProveResponse { - base: ZiskBaseResponse { - cmd: "prove".to_string(), - result: ZiskCmdResult::InProgress, - code: ZiskResultCode::Ok, - msg: None, - node: config.asm_runner_options.world_rank, - }, - server_id: config.server_id.to_string(), - elf_file: config.elf.display().to_string(), - input: request.input.display().to_string(), - }), - Some(handle), - ) - } - pub fn process_handle(request: ZiskProveRequest, proofman: Arc>) { - proofman - .generate_proof_from_lib( - ProvePhaseInputs::Full(ProofInfo::new(Some(request.input), 1, vec![0], 0)), - ProofOptions::new( - false, - request.aggregation, - request.rma, - request.final_snark, - request.verify_proofs, - request.minimal_memory, - false, - request.folder.clone(), - ), - ProvePhase::Full, - ) - .map_err(|e| anyhow::anyhow!("Error generating proof: {}", e)) - .expect("Failed to generate proof"); - proofman.set_barrier(); - } -} diff --git a/server/src/handler_shutdown.rs b/server/src/handler_shutdown.rs deleted file mode 100644 index 4185a9d12..000000000 --- a/server/src/handler_shutdown.rs +++ /dev/null @@ -1,64 +0,0 @@ -use std::thread::JoinHandle; - -use asm_runner::AsmServices; - -use serde::{Deserialize, Serialize}; - -use crate::{ServerConfig, ZiskBaseResponse, ZiskResponse}; - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] -pub struct ZiskShutdownRequest; - -#[derive(Serialize, Deserialize, Debug)] -pub struct ZiskShutdownResponse { - #[serde(flatten)] - pub base: ZiskBaseResponse, -} - -pub struct ZiskServiceShutdownHandler; - -impl ZiskServiceShutdownHandler { - pub fn handle( - config: &ServerConfig, - _payload: ZiskShutdownRequest, - asm_services: Option<&AsmServices>, - ) -> (ZiskResponse, Option>) { - tracing::info!( - "<<< [{}] Shutting down ASM microservices.", - config.asm_runner_options.world_rank - ); - - if let Some(asm_services) = asm_services { - let shutdown_result = asm_services.stop_asm_services(); - - if let Err(e) = shutdown_result { - tracing::error!("Failed to stop ASM services: {}", e); - return ( - ZiskResponse::ZiskShutdownResponse(ZiskShutdownResponse { - base: ZiskBaseResponse { - cmd: "shutdown".to_string(), - result: crate::ZiskCmdResult::Error, - code: crate::ZiskResultCode::Error, - msg: Some(format!("Failed to stop ASM services: {e}")), - node: config.asm_runner_options.world_rank, - }, - }), - None, - ); - } - } - - ( - ZiskResponse::ZiskShutdownResponse(ZiskShutdownResponse { - base: ZiskBaseResponse { - cmd: "shutdown".to_string(), - result: crate::ZiskCmdResult::Ok, - code: crate::ZiskResultCode::Ok, - msg: None, - node: config.asm_runner_options.world_rank, - }, - }), - None, - ) - } -} diff --git a/server/src/handler_status.rs b/server/src/handler_status.rs deleted file mode 100644 index 11f244589..000000000 --- a/server/src/handler_status.rs +++ /dev/null @@ -1,62 +0,0 @@ -use std::{ - sync::{atomic::AtomicBool, Arc}, - thread::JoinHandle, -}; - -use crate::{ServerConfig, ZiskBaseResponse, ZiskResponse}; - -use serde::{Deserialize, Serialize}; - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] -pub struct ZiskStatusRequest; - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "snake_case")] -pub enum ZiskStatus { - Idle, - Working, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct ZiskStatusResponse { - #[serde(flatten)] - pub base: ZiskBaseResponse, - - server_id: String, - elf_file: String, - uptime: String, - status: ZiskStatus, -} - -pub struct ZiskServiceStatusHandler; - -impl ZiskServiceStatusHandler { - pub fn handle( - config: &ServerConfig, - _payload: ZiskStatusRequest, - is_busy: Arc, - ) -> (ZiskResponse, Option>) { - let uptime = config.launch_time.elapsed(); - - ( - ZiskResponse::ZiskStatusResponse(ZiskStatusResponse { - base: ZiskBaseResponse { - cmd: "status".to_string(), - result: crate::ZiskCmdResult::Ok, - code: crate::ZiskResultCode::Ok, - msg: None, - node: config.asm_runner_options.world_rank, - }, - server_id: config.server_id.to_string(), - elf_file: config.elf.display().to_string(), - uptime: format!("{uptime:.2?}"), - status: if is_busy.load(std::sync::atomic::Ordering::SeqCst) { - ZiskStatus::Working - } else { - ZiskStatus::Idle - }, - }), - None, - ) - } -} diff --git a/server/src/handler_verify_constraints.rs b/server/src/handler_verify_constraints.rs deleted file mode 100644 index 518b88863..000000000 --- a/server/src/handler_verify_constraints.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::{path::PathBuf, sync::Arc, thread::JoinHandle}; - -use crate::{ - ServerConfig, ZiskBaseResponse, ZiskCmdResult, ZiskResponse, ZiskResultCode, ZiskService, -}; -use colored::Colorize; -use fields::Goldilocks; -use proofman::ProofMan; -use proofman_common::DebugInfo; -use serde::{Deserialize, Serialize}; -use zisk_common::{ExecutorStats, ZiskExecutionResult, ZiskLib}; - -#[cfg(feature = "stats")] -use zisk_common::ExecutorStatsEvent; - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] -pub struct ZiskVerifyConstraintsRequest { - pub input: PathBuf, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct ZiskVerifyConstraintsResponse { - #[serde(flatten)] - pub base: ZiskBaseResponse, - - server_id: String, - elf_file: String, - input: String, -} - -pub struct ZiskServiceVerifyConstraintsHandler; - -impl ZiskServiceVerifyConstraintsHandler { - pub fn handle( - config: Arc, - request: ZiskVerifyConstraintsRequest, - // It is important to keep the witness_lib declaration before the proofman declaration - // to ensure that the witness library is dropped before the proofman. - witness_lib: Arc>>, - proofman: Arc>, - is_busy: Arc, - debug_info: Arc, - ) -> (ZiskResponse, Option>) { - is_busy.store(true, std::sync::atomic::Ordering::SeqCst); - - let handle = std::thread::spawn({ - let config = config.clone(); - move || { - let start = std::time::Instant::now(); - - proofman - .verify_proof_constraints_from_lib(&debug_info, false) - .map_err(|e| anyhow::anyhow!("Error verifying proof: {}", e)) - .expect("Failed to generate proof"); - proofman.set_barrier(); - let elapsed = start.elapsed(); - - #[allow(clippy::type_complexity)] - let (result, mut _stats): (ZiskExecutionResult, ExecutorStats) = - witness_lib.execution_result().expect("Failed to get execution result"); - - println!(); - tracing::info!( - "{}", - "--- VERIFY CONSTRAINTS SUMMARY ------------------------".bright_green().bold() - ); - tracing::info!(" ► Statistics"); - tracing::info!( - " time: {} seconds, steps: {}", - elapsed.as_secs_f32(), - result.executed_steps - ); - - is_busy.store(false, std::sync::atomic::Ordering::SeqCst); - ZiskService::print_waiting_message(&config); - - // Store the stats in stats.json - #[cfg(feature = "stats")] - { - let stats_id = _stats.next_id(); - _stats.add_stat(0, stats_id, "END", 0, ExecutorStatsEvent::Mark); - _stats.store_stats(); - } - } - }); - - ( - ZiskResponse::ZiskVerifyConstraintsResponse(ZiskVerifyConstraintsResponse { - base: ZiskBaseResponse { - cmd: "verify_constraints".to_string(), - result: ZiskCmdResult::InProgress, - code: ZiskResultCode::Ok, - msg: None, - node: config.asm_runner_options.world_rank, - }, - server_id: config.server_id.to_string(), - elf_file: config.elf.display().to_string(), - input: request.input.display().to_string(), - }), - Some(handle), - ) - } - pub fn process_handle( - _request: ZiskVerifyConstraintsRequest, - proofman: Arc>, - debug_info: Arc, - ) { - proofman - .verify_proof_constraints_from_lib(&debug_info, false) - .map_err(|e| anyhow::anyhow!("Error verifying proof: {}", e)) - .expect("Failed to generate proof"); - proofman.set_barrier(); - } -} diff --git a/server/src/lib.rs b/server/src/lib.rs deleted file mode 100644 index 3ac5597ba..000000000 --- a/server/src/lib.rs +++ /dev/null @@ -1,11 +0,0 @@ -mod handler_prove; -mod handler_shutdown; -mod handler_status; -mod handler_verify_constraints; -mod zisk_service; - -pub use handler_prove::*; -pub use handler_shutdown::*; -pub use handler_status::*; -pub use handler_verify_constraints::*; -pub use zisk_service::*; diff --git a/server/src/zisk_service.rs b/server/src/zisk_service.rs deleted file mode 100644 index e23d32fe9..000000000 --- a/server/src/zisk_service.rs +++ /dev/null @@ -1,609 +0,0 @@ -use std::{ - collections::HashMap, - fmt, - io::{BufRead, BufReader, Write}, - net::{TcpListener, TcpStream}, - path::PathBuf, - sync::{atomic::AtomicBool, Arc}, - time::Instant, -}; - -use asm_runner::{AsmRunnerOptions, AsmServices}; -use fields::Goldilocks; -use libloading::{Library, Symbol}; -use proofman::ProofMan; -use proofman_common::{initialize_logger, DebugInfo, ParamsGPU}; -use serde::{Deserialize, Serialize}; -use tracing::error; -use uuid::Uuid; -use zisk_common::{info_file, ZiskLib, ZiskLibInitFn}; - -use anyhow::Result; - -use crate::{ - handler_prove::{ZiskProveRequest, ZiskServiceProveHandler}, - handler_shutdown::ZiskServiceShutdownHandler, - handler_status::{ZiskStatusRequest, ZiskStatusResponse}, - handler_verify_constraints::{ - ZiskServiceVerifyConstraintsHandler, ZiskVerifyConstraintsRequest, - }, - ZiskProveResponse, ZiskServiceStatusHandler, ZiskShutdownRequest, ZiskShutdownResponse, - ZiskVerifyConstraintsResponse, -}; - -pub struct ZiskServerParams { - /// Port number for the server to listen on - pub port: u16, - - /// Path to the ELF file - pub elf: PathBuf, - - /// Path to the witness computation dynamic library - pub witness_lib: PathBuf, - - /// Path to the ASM file (optional) - pub asm: Option, - - /// Path to the ASM ROM file (optional) - pub asm_rom: Option, - - pub asm_port: Option, - - /// Map of custom commits - pub custom_commits_map: HashMap, - - /// Flag indicating whether to use the prebuilt emulator - pub emulator: bool, - - /// Path to the proving key - pub proving_key: PathBuf, - - /// Verbosity level for logging - pub verbose: u8, - - /// Debug information - pub debug_info: DebugInfo, - - /// Time when the server was launched - pub launch_time: Instant, - - /// Unique identifier for the server instance - pub server_id: Uuid, - - pub verify_constraints: bool, - pub aggregation: bool, - pub final_snark: bool, - - pub gpu_params: ParamsGPU, - - pub unlock_mapped_memory: bool, - - pub shared_tables: bool, -} - -#[allow(clippy::too_many_arguments)] -impl ZiskServerParams { - pub fn new( - port: u16, - elf: PathBuf, - witness_lib: PathBuf, - asm: Option, - asm_rom: Option, - asm_port: Option, - custom_commits_map: HashMap, - emulator: bool, - proving_key: PathBuf, - verbose: u8, - debug: DebugInfo, - verify_constraints: bool, - aggregation: bool, - final_snark: bool, - gpu_params: ParamsGPU, - unlock_mapped_memory: bool, - shared_tables: bool, - ) -> Self { - Self { - port, - elf, - witness_lib, - asm, - asm_rom, - asm_port, - custom_commits_map, - emulator, - proving_key, - verbose, - debug_info: debug, - launch_time: Instant::now(), - server_id: Uuid::new_v4(), - verify_constraints, - aggregation, - final_snark, - gpu_params, - unlock_mapped_memory, - shared_tables, - } - } -} - -pub struct ServerConfig { - /// Port number for the server to listen on - pub port: u16, - - /// Path to the ELF file - pub elf: PathBuf, - - /// Path to the witness computation dynamic library - pub witness_lib: PathBuf, - - /// Path to the ASM file (optional) - pub asm: Option, - - /// Path to the ASM ROM file (optional) - pub asm_rom: Option, - - /// Map of custom commits - pub custom_commits_map: HashMap, - - /// Flag indicating whether to use the prebuilt emulator - pub emulator: bool, - - /// Path to the proving key - pub proving_key: PathBuf, - - /// Verbosity level for logging - pub verbose: u8, - - /// Debug information - pub debug_info: Arc, - - /// Time when the server was launched - pub launch_time: Instant, - - /// Unique identifier for the server instance - pub server_id: Uuid, - - /// Additional options for the ASM runner - pub asm_runner_options: AsmRunnerOptions, - - pub verify_constraints: bool, - pub aggregation: bool, - pub final_snark: bool, - - pub gpu_params: ParamsGPU, - - pub shared_tables: bool, -} - -#[allow(clippy::too_many_arguments)] -impl ServerConfig { - pub fn new( - port: u16, - elf: PathBuf, - witness_lib: PathBuf, - asm: Option, - asm_rom: Option, - custom_commits_map: HashMap, - emulator: bool, - proving_key: PathBuf, - verbose: u8, - debug: DebugInfo, - asm_runner_options: AsmRunnerOptions, - verify_constraints: bool, - aggregation: bool, - final_snark: bool, - gpu_params: ParamsGPU, - shared_tables: bool, - ) -> Self { - Self { - port, - elf, - witness_lib, - asm, - asm_rom, - custom_commits_map, - emulator, - proving_key, - verbose, - debug_info: Arc::new(debug), - launch_time: Instant::now(), - server_id: Uuid::new_v4(), - asm_runner_options, - verify_constraints, - aggregation, - final_snark, - gpu_params, - shared_tables, - } - } -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(tag = "command", rename_all = "snake_case")] -pub enum ZiskRequest { - Status { - #[serde(flatten)] - payload: ZiskStatusRequest, - }, - Shutdown { - #[serde(flatten)] - payload: ZiskShutdownRequest, - }, - Prove { - #[serde(flatten)] - payload: ZiskProveRequest, - }, - VerifyConstraints { - #[serde(flatten)] - payload: ZiskVerifyConstraintsRequest, - }, -} - -impl fmt::Display for ZiskRequest { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let variant = match self { - ZiskRequest::Status { .. } => "Status", - ZiskRequest::Shutdown { .. } => "Shutdown", - ZiskRequest::Prove { .. } => "Prove", - ZiskRequest::VerifyConstraints { .. } => "VerifyConstraints", - }; - write!(f, "{variant}") - } -} - -#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] -#[serde(rename_all = "snake_case")] -pub enum ZiskCmdResult { - Ok, - Error, - InProgress, - Busy, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u32)] -pub enum ZiskResultCode { - Ok = 0, - Error = 1001, - InvalidRequest = 1002, - Busy = 1003, -} - -// Serialize as a number -impl Serialize for ZiskResultCode { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serializer.serialize_u32(*self as u32) - } -} - -// Deserialize from a number -impl<'de> Deserialize<'de> for ZiskResultCode { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let value = u32::deserialize(deserializer)?; - match value { - 0 => Ok(ZiskResultCode::Ok), - 1001 => Ok(ZiskResultCode::Error), - 1002 => Ok(ZiskResultCode::InvalidRequest), - 1003 => Ok(ZiskResultCode::Busy), - _ => Err(serde::de::Error::custom(format!("Unknown ZiskResultCode: {value}"))), - } - } -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct ZiskBaseResponse { - pub cmd: String, - pub result: ZiskCmdResult, - pub code: ZiskResultCode, - pub node: i32, - - #[serde(skip_serializing_if = "Option::is_none")] - pub msg: Option, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct ZiskInvalidRequestResponse { - #[serde(flatten)] - pub base: ZiskBaseResponse, -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(tag = "zisk_response", rename_all = "snake_case")] -pub enum ZiskResponse { - ZiskStatusResponse(ZiskStatusResponse), - ZiskShutdownResponse(ZiskShutdownResponse), - ZiskProveResponse(ZiskProveResponse), - ZiskVerifyConstraintsResponse(ZiskVerifyConstraintsResponse), - ZiskErrorResponse(ZiskBaseResponse), - ZiskInvalidRequestResponse { base: ZiskBaseResponse }, -} - -pub struct ZiskService { - config: Arc, - // It is important to keep the witness_lib declaration before the proofman declaration - // to ensure that the witness library is dropped before the proofman. - witness_lib: Arc>>, - proofman: Arc>, - asm_services: Option, - is_busy: Arc, - pending_handles: Vec>, -} - -impl ZiskService { - pub fn new(params: &ZiskServerParams) -> Result { - info_file!("Starting asm microservices..."); - let library = - unsafe { Library::new(params.witness_lib.clone()).expect("Failed to load library") }; - let witness_lib_constructor: Symbol> = - unsafe { library.get(b"init_library").expect("Failed to get symbol") }; - - let unlock_mapped_memory = params.unlock_mapped_memory; - - let mut witness_lib = witness_lib_constructor( - params.verbose.into(), - params.elf.clone(), - params.asm.clone(), - params.asm_rom.clone(), - params.asm_port, - unlock_mapped_memory, - params.shared_tables, - ) - .expect("Failed to initialize witness library"); - - let proofman = ProofMan::::new( - params.proving_key.clone(), - params.custom_commits_map.clone(), - params.verify_constraints, - params.aggregation, - params.final_snark, - params.gpu_params.clone(), - params.verbose.into(), - witness_lib.get_packed_info(), - ) - .expect("Failed to initialize proofman"); - - let world_rank = proofman.get_world_rank(); - let local_rank = proofman.get_local_rank(); - - initialize_logger(params.verbose.into(), Some(world_rank)); - - let port = params.port + local_rank as u16; - - let asm_runner_options = AsmRunnerOptions::new() - .with_verbose(params.verbose > 0) - .with_base_port(params.asm_port) - .with_world_rank(world_rank) - .with_local_rank(local_rank) - .with_unlock_mapped_memory(params.unlock_mapped_memory); - - let asm_services = if params.emulator { - None - } else { - let asm_services = AsmServices::new(world_rank, local_rank, params.asm_port); - asm_services - .start_asm_services(params.asm.as_ref().unwrap(), asm_runner_options.clone())?; - Some(asm_services) - }; - - proofman.register_witness(witness_lib.as_mut(), library)?; - - let witness_lib = Arc::new(witness_lib); - - let config = ServerConfig::new( - port, - params.elf.clone(), - params.witness_lib.clone(), - params.asm.clone(), - params.asm_rom.clone(), - params.custom_commits_map.clone(), - params.emulator, - params.proving_key.clone(), - params.verbose, - params.debug_info.clone(), - asm_runner_options, - params.verify_constraints, - params.aggregation, - params.final_snark, - params.gpu_params.clone(), - params.shared_tables, - ); - - Ok(Self { - config: Arc::new(config), - proofman: Arc::new(proofman), - witness_lib, - asm_services, - is_busy: Arc::new(AtomicBool::new(false)), - pending_handles: Vec::new(), - }) - } - - pub fn print_waiting_message(config: &ServerConfig) { - info_file!( - "ZisK Server waiting for requests on port {} for ELF '{}'", - config.port, - config.elf.display() - ); - } - - pub fn run(&mut self) -> std::io::Result<()> { - if self.proofman.rank() == Some(0) || self.proofman.rank().is_none() { - let listener = TcpListener::bind(("127.0.0.1", self.config.port))?; - Self::print_waiting_message(&self.config); - - for stream in listener.incoming() { - match stream { - Ok(stream) => { - let config = Arc::clone(&self.config); - if let Ok(should_shutdown) = self.handle_client(stream, config) { - if should_shutdown { - info_file!("{}", "Shutdown signal received. Exiting."); - break; - } - } - } - Err(e) => error!("Connection failed: {}", e), - } - } - } else { - // Other MPI ranks just wait for rank 0 instructions - loop { - self.receive_request()?; - } - } - - Ok(()) - } - - fn handle_client( - &mut self, - mut stream: TcpStream, - config: Arc, - ) -> std::io::Result { - let mut reader = BufReader::new(&stream); - let mut line = String::new(); - - reader.read_line(&mut line)?; - - let request: ZiskRequest = match serde_json::from_str(&line) { - Ok(req) => req, - Err(e) => { - let response = ZiskResponse::ZiskInvalidRequestResponse { - base: ZiskBaseResponse { - cmd: "invalid_request".to_string(), - result: ZiskCmdResult::Error, - code: ZiskResultCode::InvalidRequest, - msg: Some(format!("Invalid request format or data. {e}")), - node: config.asm_runner_options.world_rank, - }, - }; - Self::send_json(&mut stream, &response)?; - return Ok(false); - } - }; - - info_file!("Received '{}' request", request); - - let mut must_shutdown = false; - - if self.is_busy.load(std::sync::atomic::Ordering::SeqCst) - && !matches!(request, ZiskRequest::Status { .. }) - { - let response = ZiskResponse::ZiskErrorResponse(ZiskBaseResponse { - cmd: "busy".to_string(), - result: ZiskCmdResult::InProgress, - code: ZiskResultCode::Busy, - msg: Some("Server is busy, please try again later.".to_string()), - node: config.asm_runner_options.world_rank, - }); - Self::send_json(&mut stream, &response)?; - return Ok(false); - } - - // Wait for all pending handles to finish - for handle in self.pending_handles.drain(..) { - handle.join().expect("Failed to join thread"); - } - - let (response, handle) = match request { - ZiskRequest::Status { payload } => { - let result = - ZiskServiceStatusHandler::handle(&config, payload, self.is_busy.clone()); - Self::print_waiting_message(&config); - result - } - ZiskRequest::Shutdown { payload } => { - must_shutdown = true; - ZiskServiceShutdownHandler::handle(&config, payload, self.asm_services.as_ref()) - } - ZiskRequest::VerifyConstraints { payload } => { - let mut bytes = Vec::new(); - bytes.push(0u8); // option 0 for verify constraints - let serialized: Vec = - serde_json::to_vec(&payload).expect("Failed to serialize payload"); - bytes.extend_from_slice(&serialized); - self.proofman.mpi_broadcast(&mut bytes); - - ZiskServiceVerifyConstraintsHandler::handle( - config.clone(), - payload, - self.witness_lib.clone(), - self.proofman.clone(), - self.is_busy.clone(), - self.config.debug_info.clone(), - ) - } - ZiskRequest::Prove { payload } => { - let mut bytes = Vec::new(); - bytes.push(1u8); // option 1 for prove - let serialized: Vec = - serde_json::to_vec(&payload).expect("Failed to serialize payload"); - bytes.extend_from_slice(&serialized); - self.proofman.mpi_broadcast(&mut bytes); - - ZiskServiceProveHandler::handle( - config.clone(), - payload, - self.witness_lib.clone(), - self.proofman.clone(), - self.is_busy.clone(), - ) - } - }; - if let Some(handle) = handle { - self.pending_handles.push(handle); - } - - Self::send_json(&mut stream, &response)?; - Ok(must_shutdown) - } - - fn send_json(stream: &mut TcpStream, response: &ZiskResponse) -> std::io::Result<()> { - let json = serde_json::to_string(response)?; - stream.write_all(json.as_bytes())?; - stream.flush() - } - - fn receive_request(&self) -> std::io::Result<()> { - let mut bytes: Vec = Vec::new(); - self.proofman.mpi_broadcast(&mut bytes); - - // extract byte 0 to decide the option - let option = bytes.first().cloned(); - match option { - Some(0) => { - info_file!("Received process 'VerifyConstraints' request"); - // Deserialize the rest of bytes into ZiskVerifyConstraintsRequest - let payload: ZiskVerifyConstraintsRequest = - serde_json::from_slice(&bytes[1..]).expect("Failed to deserialize payload"); - ZiskServiceVerifyConstraintsHandler::process_handle( - payload, - self.proofman.clone(), - self.config.debug_info.clone(), - ); - } - Some(1) => { - info_file!("Received process 'Prove' request"); - // Prove request - // Deserialize the rest of bytes into ZiskProveRequest - let payload: ZiskProveRequest = - serde_json::from_slice(&bytes[1..]).expect("Failed to deserialize payload"); - ZiskServiceProveHandler::process_handle(payload, self.proofman.clone()); - } - _ => { - info_file!( - "Rank {} received unknown request: {:?}", - self.proofman.rank().unwrap_or(0), - option - ); - } - } - Ok(()) - } -} diff --git a/state-machines/arith/Cargo.toml b/state-machines/arith/Cargo.toml index 3831c52b7..8e89cddfb 100644 --- a/state-machines/arith/Cargo.toml +++ b/state-machines/arith/Cargo.toml @@ -19,7 +19,6 @@ sm-binary = { workspace = true } sm-frequent-ops = { workspace = true } fields = { workspace=true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -33,9 +32,6 @@ static_assertions = { workspace = true } [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -generate_code_arith_range_table = [] -no_lib_link = ["proofman-common/no_lib_link"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] \ No newline at end of file +gpu = ["packed"] +packed = [] +generate_code_arith_range_table = [] \ No newline at end of file diff --git a/state-machines/arith/pil/arith.pil b/state-machines/arith/pil/arith.pil index 4bc4e49b7..6ff146009 100644 --- a/state-machines/arith/pil/arith.pil +++ b/state-machines/arith/pil/arith.pil @@ -8,7 +8,7 @@ require "arith_range_table.pil" // full mul_64 full_32 mul_32 // TOTAL 88 77 57 44 -airtemplate Arith(const int N = 2**18, const int operation_bus_id = OPERATION_BUS_ID) { +airtemplate Arith(const int N = 2**18) { const int CHUNK_SIZE = 2**16; const int CHUNKS_INPUT = 4; @@ -266,19 +266,15 @@ airtemplate Arith(const int N = 2**18, const int operation_bus_id = OPERATION_BU col witness bits(1) multiplicity; - lookup_proves(operation_bus_id, [op, - bus_a0, bus_a1, - bus_b0, bus_b1, - bus_res0, bus_res1, - div_by_zero /*+ div_overflow*/], mul: multiplicity); + proves_operation(op:, a: [bus_a0, bus_a1], b: [bus_b0, bus_b1], c: [bus_res0, bus_res1], + flag: div_by_zero, mul: multiplicity); // Check that remainder (d) is lower than divisor (b) when division is performed // Specifically, we ensure that 0 <= |d| < |b| - lookup_assumes(operation_bus_id, [(1 - nr) * (1 - nb) * OP_LTU + nr * (1 - nb) * OP_LT_ABS_NP + (1 - nr) * nb * OP_LT_ABS_PN + nr * nb * OP_GT, - (d[0] + CHUNK_SIZE * d[1]), (d[2] + CHUNK_SIZE * d[3]) + m32 * nr * 0xFFFFFFFF, // remainder - (b[0] + CHUNK_SIZE * b[1]), (b[2] + CHUNK_SIZE * b[3]) + m32 * nb * 0xFFFFFFFF, // divisor - 1, 0, - 1], sel: div * (1 - div_by_zero)); + assumes_operation(op: (1 - nr) * (1 - nb) * OP_LTU + nr * (1 - nb) * OP_LT_ABS_NP + (1 - nr) * nb * OP_LT_ABS_PN + nr * nb * OP_GT, + a: [(d[0] + CHUNK_SIZE * d[1]), (d[2] + CHUNK_SIZE * d[3]) + m32 * nr * 0xFFFFFFFF], // remainder + b: [(b[0] + CHUNK_SIZE * b[1]), (b[2] + CHUNK_SIZE * b[3]) + m32 * nb * 0xFFFFFFFF], // divisor + c: [1, 0], flag: 1, sel: div * (1 - div_by_zero)); for (int index = 0; index < length(carry); ++index) { arith_range_table_assumes(ARITH_RANGE_CARRY, carry[index]); // TODO: review carry range diff --git a/state-machines/arith/pil/arith_mul64.pil b/state-machines/arith/pil/arith_mul64.pil index 57372a4ed..7d1757e29 100644 --- a/state-machines/arith/pil/arith_mul64.pil +++ b/state-machines/arith/pil/arith_mul64.pil @@ -8,7 +8,7 @@ require "arith_range_table.pil" // full mul_64 full_32 mul_32 // TOTAL 88 77 57 44 -airtemplate ArithMul64(const int N = 2**18, const int operation_bus_id = OPERATION_BUS_ID, const int dual_result = 0) { +airtemplate ArithMul64(const int N = 2**18, const int dual_result = 0) { const int CHUNK_SIZE = 2**16; const int CHUNKS_INPUT = 4; const int CHUNKS_OP = CHUNKS_INPUT * 2; @@ -206,11 +206,12 @@ airtemplate ArithMul64(const int N = 2**18, const int operation_bus_id = OPERATI // Check that remainder (d) is lower than divisor (b) when division is performed // Specifically, we ensure that 0 <= |d| < |b| - lookup_assumes(operation_bus_id, [(1 - nr) * (1 - nb) * OP_LTU + nr * (1 - nb) * OP_LT_ABS_NP + (1 - nr) * nb * OP_LT_ABS_PN + nr * nb * OP_GT, - (d[0] + CHUNK_SIZE * d[1]), (d[2] + CHUNK_SIZE * d[3]) + m32 * nr * 0xFFFFFFFF, // remainder - (b[0] + CHUNK_SIZE * b[1]), (b[2] + CHUNK_SIZE * b[3]) + m32 * nb * 0xFFFFFFFF, // divisor - 1, 0, - 1], sel: div * (1 - div_by_zero)); + assumes_operation(op: [(1 - nr) * (1 - nb) * OP_LTU + nr * (1 - nb) * OP_LT_ABS_NP + (1 - nr) * nb * OP_LT_ABS_PN + nr * nb * OP_GT, + a: [d[0] + CHUNK_SIZE * d[1], (d[2] + CHUNK_SIZE * d[3]) + m32 * nr * 0xFFFFFFFF], // remainder + b: (b[0] + CHUNK_SIZE * b[1]), (b[2] + CHUNK_SIZE * b[3]) + m32 * nb * 0xFFFFFFFF], // divisor + c: [1, 0], + flag: 1, + sel: div * (1 - div_by_zero)); for (int index = 0; index < length(carry); ++index) { arith_range_table_assumes(ARITH_RANGE_CARRY, carry[index]); // TODO: review carry range diff --git a/state-machines/arith/src/arith.rs b/state-machines/arith/src/arith.rs index 92c9c6c8f..103f3bb85 100644 --- a/state-machines/arith/src/arith.rs +++ b/state-machines/arith/src/arith.rs @@ -10,10 +10,7 @@ use std::sync::Arc; use fields::PrimeField64; use pil_std_lib::Std; -use zisk_common::{ - BusDevice, BusDeviceMetrics, BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, - InstanceInfo, PayloadType, Planner, -}; +use zisk_common::{BusDeviceMode, ComponentBuilder, Instance, InstanceCtx, InstanceInfo, Planner}; use zisk_core::ZiskOperationType; use zisk_pil::ArithTrace; @@ -24,6 +21,9 @@ use crate::{ArithCounterInputGen, ArithFullInstance, ArithFullSM, ArithPlanner}; pub struct ArithSM { /// Arith Full state machine arith_full_sm: Arc>, + + /// Standard library instance, providing common functionalities. + std: Arc>, } impl ArithSM { @@ -32,9 +32,9 @@ impl ArithSM { /// # Returns /// An `Arc`-wrapped instance of `ArithSM` containing initialized sub-state machines. pub fn new(std: Arc>) -> Arc { - let arith_full_sm = ArithFullSM::new(std); + let arith_full_sm = ArithFullSM::new(std.clone()); - Arc::new(Self { arith_full_sm }) + Arc::new(Self { arith_full_sm, std }) } pub fn build_arith_counter(&self) -> ArithCounterInputGen { @@ -47,14 +47,6 @@ impl ArithSM { } impl ComponentBuilder for ArithSM { - /// Builds and returns a new counter for monitoring arithmetic operations. - /// - /// # Returns - /// A boxed implementation of `ArithCounter`. - fn build_counter(&self) -> Option> { - Some(Box::new(ArithCounterInputGen::new(BusDeviceMode::Counter))) - } - /// Builds a planner to plan arithmetic-related instances. /// /// # Returns @@ -78,17 +70,9 @@ impl ComponentBuilder for ArithSM { fn build_instance(&self, ictx: InstanceCtx) -> Box> { match ictx.plan.air_id { ArithTrace::::AIR_ID => { - Box::new(ArithFullInstance::new(self.arith_full_sm.clone(), ictx)) + Box::new(ArithFullInstance::new(self.arith_full_sm.clone(), ictx, self.std.clone())) } _ => panic!("BinarySM::get_instance() Unsupported air_id: {:?}", ictx.plan.air_id), } } - - /// Creates and returns an input generator for arithmetic state machine computations. - /// - /// # Returns - /// A boxed implementation of `ArithInputGenerator`. - fn build_inputs_generator(&self) -> Option>> { - Some(Box::new(ArithCounterInputGen::new(BusDeviceMode::InputGenerator))) - } } diff --git a/state-machines/arith/src/arith_bus_device.rs b/state-machines/arith/src/arith_bus_device.rs index c4b4349ff..f59970ce4 100644 --- a/state-machines/arith/src/arith_bus_device.rs +++ b/state-machines/arith/src/arith_bus_device.rs @@ -9,8 +9,7 @@ use fields::Goldilocks; use std::collections::VecDeque; use zisk_common::{ - BusDevice, BusDeviceMode, BusId, Counter, MemCollectorInfo, Metrics, A, B, OP, - OPERATION_BUS_ID, OP_TYPE, + BusDevice, BusDeviceMode, BusId, Counter, Metrics, A, B, OP, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::ZiskOperationType; @@ -55,31 +54,7 @@ impl ArithCounterInputGen { pub fn frops_count(&self, op_type: ZiskOperationType) -> Option { (op_type == ZiskOperationType::Arith).then_some(self.counter.frops_count) } -} - -impl Metrics for ArithCounterInputGen { - /// Tracks activity on the connected bus and updates counters for recognized operations. - /// - /// # Arguments - /// * `data` - The data received from the bus. - /// - /// # Returns - /// An empty vector, as this implementation does not produce any derived inputs for the bus. - #[inline(always)] - fn measure(&mut self, _data: &[u64]) { - self.counter.update(1); - } - - /// Provides a dynamic reference for downcasting purposes. - /// - /// # Returns - /// A reference to `self` as `dyn std::any::Any`. - fn as_any(&self) -> &dyn std::any::Any { - self - } -} -impl BusDevice for ArithCounterInputGen { /// Processes data received on the bus, updating counters and generating inputs when applicable. /// /// # Arguments @@ -91,12 +66,11 @@ impl BusDevice for ArithCounterInputGen { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( + pub fn process_data( &mut self, bus_id: &BusId, data: &[u64], - pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, + pending: &mut VecDeque<(BusId, Vec, Vec)>, ) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); @@ -126,15 +100,31 @@ impl BusDevice for ArithCounterInputGen { true } +} + +impl Metrics for ArithCounterInputGen { + /// Tracks activity on the connected bus and updates counters for recognized operations. + /// + /// # Arguments + /// * `data` - The data received from the bus. + /// + /// # Returns + /// An empty vector, as this implementation does not produce any derived inputs for the bus. + #[inline(always)] + fn measure(&mut self, _data: &[u64]) { + self.counter.update(1); + } - /// Returns the bus IDs associated with this counter. + /// Provides a dynamic reference for downcasting purposes. /// /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] + /// A reference to `self` as `dyn std::any::Any`. + fn as_any(&self) -> &dyn std::any::Any { + self } +} +impl BusDevice for ArithCounterInputGen { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/arith/src/arith_full.rs b/state-machines/arith/src/arith_full.rs index 86b673fe9..0cdea3e04 100644 --- a/state-machines/arith/src/arith_full.rs +++ b/state-machines/arith/src/arith_full.rs @@ -8,8 +8,7 @@ use std::collections::VecDeque; use std::sync::Arc; use crate::{ - ArithFrops, ArithOperation, ArithRangeTableInputs, ArithRangeTableSM, ArithTableInputs, - ArithTableSM, + ArithOperation, ArithRangeTableInputs, ArithRangeTableSM, ArithTableInputs, ArithTableSM, }; use fields::PrimeField64; use pil_std_lib::Std; @@ -49,9 +48,6 @@ pub struct ArithFullSM { /// The table ID for the Range Table State Machine range_table_id: usize, - - /// The table ID for the FROPS - frops_table_id: usize, } impl ArithFullSM { @@ -72,11 +68,7 @@ impl ArithFullSM { .get_virtual_table_id(ArithRangeTableSM::TABLE_ID) .expect("Failed to get range table ID"); - // Get the Arithmetic FROPS table ID - let frops_table_id = - std.get_virtual_table_id(ArithFrops::TABLE_ID).expect("Failed to get FROPS table ID"); - - Arc::new(Self { std, table_id, range_table_id, frops_table_id }) + Arc::new(Self { std, table_id, range_table_id }) } /// Computes the witness for arithmetic operations and updates associated tables. @@ -177,15 +169,12 @@ impl ArithFullSM { Ok(AirInstance::new_from_trace(FromTrace::new(&mut arith_trace))) } - pub fn compute_frops(&self, frops_inputs: &Vec) { - for row in frops_inputs { - self.std.inc_virtual_row(self.frops_table_id, *row as u64, 1); - } - } - /// Generates binary inputs for operations requiring additional validation (e.g., division). #[inline(always)] - pub fn generate_inputs(input: &OperationData, pending: &mut VecDeque<(BusId, Vec)>) { + pub fn generate_inputs( + input: &OperationData, + pending: &mut VecDeque<(BusId, Vec, Vec)>, + ) { let mut aop = ArithOperation::new(); let input_data = ExtOperationData::OperationData(*input); diff --git a/state-machines/arith/src/arith_full_instance.rs b/state-machines/arith/src/arith_full_instance.rs index e25017b01..f3a549d85 100644 --- a/state-machines/arith/src/arith_full_instance.rs +++ b/state-machines/arith/src/arith_full_instance.rs @@ -6,15 +6,13 @@ use crate::{ArithFrops, ArithFullSM}; use fields::PrimeField64; +use pil_std_lib::Std; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; -use std::{ - collections::{HashMap, VecDeque}, - sync::Arc, -}; +use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, BusId, CheckPoint, ChunkId, CollectSkipper, ExtOperationData, Instance, InstanceCtx, - InstanceType, MemCollectorInfo, OperationData, PayloadType, A, B, OP, OPERATION_BUS_ID, - OP_TYPE, + InstanceType, OperationData, PayloadType, A, B, OP, OPERATION_BUS_ID, OP_TYPE, }; use zisk_core::ZiskOperationType; use zisk_pil::ArithTrace; @@ -29,10 +27,13 @@ pub struct ArithFullInstance { arith_full_sm: Arc>, /// Collect info for each chunk ID, containing the number of rows and a skipper for collection. - collect_info: HashMap, + collect_info: HashMap, /// The instance context. ictx: InstanceCtx, + + /// Standard library instance, providing common functionalities. + std: Arc>, } impl ArithFullInstance { @@ -44,7 +45,11 @@ impl ArithFullInstance { /// /// # Returns /// A new `ArithFullInstance` instance initialized with the provided state machine and context. - pub fn new(arith_full_sm: Arc>, mut ictx: InstanceCtx) -> Self { + pub fn new( + arith_full_sm: Arc>, + mut ictx: InstanceCtx, + std: Arc>, + ) -> Self { assert_eq!( ictx.plan.air_id, ArithTrace::::AIR_ID, @@ -55,16 +60,20 @@ impl ArithFullInstance { let meta = ictx.plan.meta.take().expect("Expected metadata in ictx.plan.meta"); let collect_info = *meta - .downcast::>() + .downcast::>() .expect("Failed to downcast ictx.plan.meta to expected type"); - Self { arith_full_sm, collect_info, ictx } + Self { arith_full_sm, collect_info, ictx, std } } - pub fn build_arith_collector(&self, chunk_id: ChunkId) -> ArithInstanceCollector { - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; - ArithInstanceCollector::new(num_ops, num_freq_ops, collect_skipper, force_execute_to_end) + pub fn build_arith_collector(&self, chunk_id: ChunkId) -> ArithInstanceCollector { + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; + ArithInstanceCollector::new( + num_ops, + collect_skipper, + force_execute_to_end, + self.std.clone(), + ) } } @@ -91,8 +100,8 @@ impl Instance for ArithFullInstance { let inputs: Vec<_> = collectors .into_iter() .map(|(_, collector)| { - let _collector = collector.as_any().downcast::().unwrap(); - self.arith_full_sm.compute_frops(&_collector.frops_inputs); + let _collector = + collector.as_any().downcast::>().unwrap(); _collector.inputs }) .collect(); @@ -115,6 +124,10 @@ impl Instance for ArithFullInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Opcodes + } + /// Builds an input collector for the instance. /// /// # Arguments @@ -123,13 +136,12 @@ impl Instance for ArithFullInstance { /// # Returns /// An `Option` containing the input collector for the instance. fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; Some(Box::new(ArithInstanceCollector::new( num_ops, - num_freq_ops, collect_skipper, force_execute_to_end, + self.std.clone(), ))) } @@ -139,11 +151,9 @@ impl Instance for ArithFullInstance { } /// The `ArithInstanceCollector` struct represents an input collector for arithmetic state machines. -pub struct ArithInstanceCollector { +pub struct ArithInstanceCollector { /// Collected inputs for witness computation. inputs: Vec>, - /// Collected rows for FROPS - frops_inputs: Vec, /// The number of operations to collect. num_operations: u64, @@ -153,9 +163,15 @@ pub struct ArithInstanceCollector { /// Flag to indicate that force to execute to end of chunk force_execute_to_end: bool, + + /// The table ID for the Arith FROPS + frops_table_id: usize, + + /// Standard library instance, providing common functionalities. + std: Arc>, } -impl ArithInstanceCollector { +impl ArithInstanceCollector { /// Creates a new `ArithInstanceCollector`. /// /// # Arguments @@ -168,21 +184,22 @@ impl ArithInstanceCollector { /// A new `ArithInstanceCollector` instance initialized with the provided parameters. pub fn new( num_operations: u64, - num_freq_ops: u64, collect_skipper: CollectSkipper, force_execute_to_end: bool, + std: Arc>, ) -> Self { + let frops_table_id = + std.get_virtual_table_id(ArithFrops::TABLE_ID).expect("Failed to get FROPS table ID"); Self { inputs: Vec::with_capacity(num_operations as usize), num_operations, collect_skipper, - frops_inputs: Vec::with_capacity(num_freq_ops as usize), force_execute_to_end, + std, + frops_table_id, } } -} -impl BusDevice for ArithInstanceCollector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -194,13 +211,7 @@ impl BusDevice for ArithInstanceCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); let instance_complete = self.inputs.len() == self.num_operations as usize; @@ -219,7 +230,7 @@ impl BusDevice for ArithInstanceCollector { } if frops_row != ArithFrops::NO_FROPS { - self.frops_inputs.push(frops_row as u32); + self.std.inc_virtual_row(self.frops_table_id, frops_row as u64, 1); return true; } @@ -236,15 +247,9 @@ impl BusDevice for ArithInstanceCollector { self.inputs.len() < self.num_operations as usize || self.force_execute_to_end } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for ArithInstanceCollector { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/arith/src/arith_table_helpers.rs b/state-machines/arith/src/arith_table_helpers.rs index 336d9ab14..d9dda0c4f 100644 --- a/state-machines/arith/src/arith_table_helpers.rs +++ b/state-machines/arith/src/arith_table_helpers.rs @@ -73,6 +73,7 @@ impl ArithTableHelpers { /// Retrieves the row index during testing (optimized for release mode). #[cfg(not(debug_assertions))] #[cfg(test)] + #[allow(clippy::too_many_arguments)] pub fn get_row( op: u8, na: bool, diff --git a/state-machines/binary/Cargo.toml b/state-machines/binary/Cargo.toml index e3339a688..a1559fc95 100644 --- a/state-machines/binary/Cargo.toml +++ b/state-machines/binary/Cargo.toml @@ -22,7 +22,6 @@ zisk-pil = { workspace = true } sm-frequent-ops = { workspace = true } fields = { workspace=true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -35,8 +34,5 @@ static_assertions = { workspace = true } [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] +gpu = ["packed"] +packed = [] diff --git a/state-machines/binary/pil/binary.pil b/state-machines/binary/pil/binary.pil index 121176f6b..07a79b1f6 100644 --- a/state-machines/binary/pil/binary.pil +++ b/state-machines/binary/pil/binary.pil @@ -51,7 +51,7 @@ require "binary_table.pil" Note: op = b_op + 0x10*mode32 */ -airtemplate Binary(const int N = 2**21, const int RC = 2, const int bits = 64, const int operation_bus_id = OPERATION_BUS_ID) { +airtemplate Binary(const int N = 2**21, const int RC = 2, const int bits = 64) { if (RC != 2 || bits != 64) { error(`Currently only RC=2 and bits=64 are supported, got RC=${RC}, bits=${bits}`); } @@ -153,8 +153,8 @@ airtemplate Binary(const int N = 2**21, const int RC = 2, const int bits = 64, c // Otherwise, the result is simply c c[0] += cout; - lookup_proves(operation_bus_id, [b_op + 0x10 * mode32, ...a, ...b, ...c, cout]); + proves_operation(op: b_op + 0x10 * mode32, a:, b:, c:, flag:cout); airval padding_size; - direct_update_assumes(operation_bus_id, [OP_ADD, 0, 0, 0, 0, 0, 0, 0], sel: padding_size); + assumes_padding_operation(op: OP_ADD, padding_size:); } diff --git a/state-machines/binary/pil/binary_add.pil b/state-machines/binary/pil/binary_add.pil index ef564dfbb..2b97eeb1b 100644 --- a/state-machines/binary/pil/binary_add.pil +++ b/state-machines/binary/pil/binary_add.pil @@ -3,7 +3,7 @@ require "std_range_check.pil" require "operations.pil" require "opids.pil" -airtemplate BinaryAdd(const int N = 2**21, const int operation_bus_id = OPERATION_BUS_ID, const int RC = 2) { +airtemplate BinaryAdd(const int N = 2**21, const int RC = 2) { col witness bits(32) a[RC]; col witness bits(32) b[RC]; col witness bits(16) c_chunks[RC*2]; @@ -22,8 +22,8 @@ airtemplate BinaryAdd(const int N = 2**21, const int operation_bus_id = OPERATIO range_check(expression: c_chunks[i * 2 + 1], min: 0, max: 2**16 - 1); } - lookup_proves(operation_bus_id, [OP_ADD, ...a, ...b, ...c, 0]); + proves_operation(op: OP_ADD, a:, b:, c:); airval padding_size; - direct_update_assumes(operation_bus_id, [OP_ADD, 0, 0, 0, 0, 0, 0, 0], sel: padding_size); + assumes_padding_operation(op: OP_ADD, padding_size:); } \ No newline at end of file diff --git a/state-machines/binary/pil/binary_extension.pil b/state-machines/binary/pil/binary_extension.pil index 4a1e013b1..276e7cd81 100644 --- a/state-machines/binary/pil/binary_extension.pil +++ b/state-machines/binary/pil/binary_extension.pil @@ -68,7 +68,7 @@ x in2[x] out[x][0] out[x][1] Result: 0xFFFF8abc 0xFFFFFFFF */ -airtemplate BinaryExtension(const int N = 2**18, const int bits = 64, const int operation_bus_id = OPERATION_BUS_ID) { +airtemplate BinaryExtension(const int N = 2**18, const int bits = 64) { if (bits != 64) { error(`Currently only bits=64 is supported, got bits=${bits}`); } @@ -115,20 +115,10 @@ airtemplate BinaryExtension(const int N = 2**18, const int bits = 64, const int // if op_is_shift == 1 => [op, a[0], a[1], free_in_b + 256 * b[0], b[1], sum(free_in_c[*][0]), sum(free_in_c[*][1]), 0] // if op_is_shift == 0 => [op, b[0], b[1], a[0], a[1], sum(free_in_c[*][0]), sum(free_in_c[*][1]), 0] - lookup_proves( - operation_bus_id, - [ - op, - op_is_shift * (a[0] - b[0]) + b[0], - op_is_shift * (a[1] - b[1]) + b[1], - op_is_shift * (free_in_b + BYTE_BASE * b[0] - a[0]) + a[0], - op_is_shift * (b[1] - a[1]) + a[1], - c[0], - c[1], - 0 - ] - ); + proves_operation(op:, a: [op_is_shift * (a[0] - b[0]) + b[0], op_is_shift * (a[1] - b[1]) + b[1]], + b: [op_is_shift * (free_in_b + BYTE_BASE * b[0] - a[0]) + a[0], op_is_shift * (b[1] - a[1]) + a[1]], + c:); airval padding_size; - direct_update_assumes(operation_bus_id, [OP_SEXT_B, 0, 0, 0, 0, 0, 0, 0], sel: padding_size); + assumes_padding_operation(op: OP_SEXT_B, padding_size:); } diff --git a/state-machines/binary/src/binary.rs b/state-machines/binary/src/binary.rs index 508009833..4d6798af8 100644 --- a/state-machines/binary/src/binary.rs +++ b/state-machines/binary/src/binary.rs @@ -15,7 +15,7 @@ use crate::{ }; use fields::PrimeField64; use pil_std_lib::Std; -use zisk_common::{BusDeviceMetrics, ComponentBuilder, Instance, InstanceCtx, Planner}; +use zisk_common::{ComponentBuilder, Instance, InstanceCtx, Planner}; use zisk_pil::{BinaryAddTrace, BinaryExtensionTrace, BinaryTrace}; /// The `BinarySM` struct represents the Binary State Machine, @@ -30,6 +30,8 @@ pub struct BinarySM { /// Binary Add state machine (optimal only for addition) binary_add_sm: Arc>, + + std: Arc>, } impl BinarySM { @@ -45,9 +47,9 @@ impl BinarySM { let binary_extension_sm = BinaryExtensionSM::new(std.clone()); - let binary_add_sm = BinaryAddSM::new(std); + let binary_add_sm = BinaryAddSM::new(std.clone()); - Arc::new(Self { binary_basic_sm, binary_extension_sm, binary_add_sm }) + Arc::new(Self { binary_basic_sm, binary_extension_sm, binary_add_sm, std }) } pub fn build_binary_counter(&self) -> BinaryCounter { @@ -56,15 +58,6 @@ impl BinarySM { } impl ComponentBuilder for BinarySM { - /// Builds and returns a new counter for monitoring binary operations. - /// - /// # Returns - /// A boxed implementation of `RegularCounters` configured for binary and extension binary - /// operations. - fn build_counter(&self) -> Option> { - Some(Box::new(BinaryCounter::new())) - } - /// Builds a planner to plan binary-related instances. /// /// # Returns @@ -82,14 +75,18 @@ impl ComponentBuilder for BinarySM { /// A boxed implementation of `Instance` for binary operations. fn build_instance(&self, ictx: InstanceCtx) -> Box> { match ictx.plan.air_id { - BinaryTrace::::AIR_ID => { - Box::new(BinaryBasicInstance::new(self.binary_basic_sm.clone(), ictx)) - } - BinaryExtensionTrace::::AIR_ID => { - Box::new(BinaryExtensionInstance::new(self.binary_extension_sm.clone(), ictx)) - } + BinaryTrace::::AIR_ID => Box::new(BinaryBasicInstance::new( + self.binary_basic_sm.clone(), + ictx, + self.std.clone(), + )), + BinaryExtensionTrace::::AIR_ID => Box::new(BinaryExtensionInstance::new( + self.binary_extension_sm.clone(), + ictx, + self.std.clone(), + )), BinaryAddTrace::::AIR_ID => { - Box::new(BinaryAddInstance::new(self.binary_add_sm.clone(), ictx)) + Box::new(BinaryAddInstance::new(self.binary_add_sm.clone(), ictx, self.std.clone())) } _ => panic!("BinarySM::get_instance() Unsupported air_id: {:?}", ictx.plan.air_id), } diff --git a/state-machines/binary/src/binary_add.rs b/state-machines/binary/src/binary_add.rs index 608a2ad30..d30f61250 100644 --- a/state-machines/binary/src/binary_add.rs +++ b/state-machines/binary/src/binary_add.rs @@ -2,7 +2,6 @@ //! //! This state machine processes binary-related operations. -use crate::BinaryBasicFrops; use fields::PrimeField64; use pil_std_lib::Std; use proofman_common::{AirInstance, FromTrace, ProofmanResult}; @@ -31,9 +30,6 @@ pub struct BinaryAddSM { /// Reference to the PIL2 standard library. std: Arc>, range_id: usize, - - /// The table ID for the FROPS - frops_table_id: usize, } impl BinaryAddSM { @@ -47,12 +43,8 @@ impl BinaryAddSM { pub fn new(std: Arc>) -> Arc { let range_id = std.get_range_id(0, 0xFFFF, None).expect("Failed to get range ID"); - // Get the Arithmetic FROPS table ID - let frops_table_id = std - .get_virtual_table_id(BinaryBasicFrops::TABLE_ID) - .expect("Failed to get FROPS table ID"); // Create the BinaryAdd state machine - Arc::new(Self { std, range_id, frops_table_id }) + Arc::new(Self { std, range_id }) } /// Processes a slice of operation data, generating a trace row and updating multiplicities. @@ -168,10 +160,4 @@ impl BinaryAddSM { FromTrace::new(&mut add_trace).with_air_values(&mut air_values), )) } - - pub fn compute_frops(&self, frops_inputs: &Vec) { - for row in frops_inputs { - self.std.inc_virtual_row(self.frops_table_id, *row as u64, 1); - } - } } diff --git a/state-machines/binary/src/binary_add_collector.rs b/state-machines/binary/src/binary_add_collector.rs index 26390c8dc..6827464d9 100644 --- a/state-machines/binary/src/binary_add_collector.rs +++ b/state-machines/binary/src/binary_add_collector.rs @@ -1,28 +1,35 @@ //! The `BinaryAddCollector` struct represents an input collector for binary add operations. use crate::BinaryBasicFrops; -use std::collections::VecDeque; use zisk_common::{ - BusDevice, BusId, CollectSkipper, ExtOperationData, MemCollectorInfo, OperationBusData, A, B, - OP, OPERATION_BUS_ID, + BusDevice, BusId, CollectSkipper, ExtOperationData, OperationBusData, A, B, OP, + OPERATION_BUS_ID, }; use zisk_core::zisk_ops::ZiskOp; +use fields::PrimeField64; +use pil_std_lib::Std; +use std::sync::Arc; + /// The `BinaryAddCollector` struct represents an input collector for binary add operations. -pub struct BinaryAddCollector { +pub struct BinaryAddCollector { /// Collected inputs for witness computation. pub inputs: Vec<[u64; 2]>, - /// Collected rows for FROPS - pub frops_inputs: Vec, pub num_operations: usize, pub collect_skipper: CollectSkipper, /// Flag to indicate that force to execute to end of chunk force_execute_to_end: bool, + + /// The table ID for the Binary Add FROPS + frops_table_id: usize, + + /// Standard library instance, providing common functionalities. + std: Arc>, } -impl BinaryAddCollector { +impl BinaryAddCollector { /// Creates a new `BinaryAddCollector`. /// /// # Arguments @@ -33,21 +40,23 @@ impl BinaryAddCollector { /// A new `BinaryAddCollector` instance initialized with the provided parameters. pub fn new( num_operations: usize, - num_freq_ops: usize, collect_skipper: CollectSkipper, force_execute_to_end: bool, + std: Arc>, ) -> Self { + let frops_table_id = std + .get_virtual_table_id(BinaryBasicFrops::TABLE_ID) + .expect("Failed to get FROPS table ID"); Self { inputs: Vec::with_capacity(num_operations), num_operations, collect_skipper, - frops_inputs: Vec::with_capacity(num_freq_ops), force_execute_to_end, + frops_table_id, + std, } } -} -impl BusDevice for BinaryAddCollector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -59,13 +68,7 @@ impl BusDevice for BinaryAddCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); let instance_complete = self.inputs.len() == self.num_operations; @@ -89,7 +92,7 @@ impl BusDevice for BinaryAddCollector { } if frops_row != BinaryBasicFrops::NO_FROPS { - self.frops_inputs.push(frops_row as u32); + self.std.inc_virtual_row(self.frops_table_id, frops_row as u64, 1); return true; } @@ -102,15 +105,9 @@ impl BusDevice for BinaryAddCollector { self.inputs.len() < self.num_operations || self.force_execute_to_end } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for BinaryAddCollector { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/binary/src/binary_add_instance.rs b/state-machines/binary/src/binary_add_instance.rs index 3642b70f8..9c05b8f1b 100644 --- a/state-machines/binary/src/binary_add_instance.rs +++ b/state-machines/binary/src/binary_add_instance.rs @@ -6,8 +6,10 @@ use crate::{BinaryAddCollector, BinaryAddSM}; use fields::PrimeField64; +use pil_std_lib::Std; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, CollectSkipper, Instance, InstanceCtx, InstanceType, PayloadType, @@ -23,10 +25,13 @@ pub struct BinaryAddInstance { binary_add_sm: Arc>, /// Collect info for each chunk ID, containing the number of rows and a skipper for collection. - collect_info: HashMap, + collect_info: HashMap, /// Instance context. ictx: InstanceCtx, + + /// Standard library instance, providing common functionalities. + std: Arc>, } impl BinaryAddInstance { @@ -39,7 +44,11 @@ impl BinaryAddInstance { /// # Returns /// A new `BinaryAddInstance` instance initialized with the provided state machine and /// context. - pub fn new(binary_add_sm: Arc>, mut ictx: InstanceCtx) -> Self { + pub fn new( + binary_add_sm: Arc>, + mut ictx: InstanceCtx, + std: Arc>, + ) -> Self { assert_eq!( ictx.plan.air_id, BinaryAddTrace::::AIR_ID, @@ -50,26 +59,25 @@ impl BinaryAddInstance { let meta = ictx.plan.meta.take().expect("Expected metadata in ictx.plan.meta"); let collect_info = *meta - .downcast::>() + .downcast::>() .expect("Failed to downcast ictx.plan.meta to expected type"); - Self { binary_add_sm, collect_info, ictx } + Self { binary_add_sm, collect_info, ictx, std } } - pub fn build_binary_add_collector(&self, chunk_id: ChunkId) -> BinaryAddCollector { + pub fn build_binary_add_collector(&self, chunk_id: ChunkId) -> BinaryAddCollector { assert_eq!( self.ictx.plan.air_id, BinaryAddTrace::::AIR_ID, "BinaryAddInstance: Unsupported air_id: {:?}", self.ictx.plan.air_id ); - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; BinaryAddCollector::new( num_ops as usize, - num_freq_ops as usize, collect_skipper, force_execute_to_end, + self.std.clone(), ) } } @@ -97,8 +105,7 @@ impl Instance for BinaryAddInstance { let inputs: Vec<_> = collectors .into_iter() .map(|(_, collector)| { - let _collector = collector.as_any().downcast::().unwrap(); - self.binary_add_sm.compute_frops(&_collector.frops_inputs); + let _collector = collector.as_any().downcast::>().unwrap(); _collector.inputs }) .collect(); @@ -122,6 +129,10 @@ impl Instance for BinaryAddInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Opcodes + } + /// Builds an input collector for the instance. /// /// # Arguments @@ -136,13 +147,12 @@ impl Instance for BinaryAddInstance { "BinaryAddInstance: Unsupported air_id: {:?}", self.ictx.plan.air_id ); - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; Some(Box::new(BinaryAddCollector::new( num_ops as usize, - num_freq_ops as usize, collect_skipper, force_execute_to_end, + self.std.clone(), ))) } diff --git a/state-machines/binary/src/binary_basic.rs b/state-machines/binary/src/binary_basic.rs index 7c376df88..51e4f9fe6 100644 --- a/state-machines/binary/src/binary_basic.rs +++ b/state-machines/binary/src/binary_basic.rs @@ -4,9 +4,7 @@ use std::sync::Arc; -use crate::{ - binary_constants::*, BinaryBasicFrops, BinaryBasicTableOp, BinaryBasicTableSM, BinaryInput, -}; +use crate::{binary_constants::*, BinaryBasicTableOp, BinaryBasicTableSM, BinaryInput}; use fields::PrimeField64; use pil_std_lib::Std; use proofman_common::{AirInstance, FromTrace, ProofmanResult}; @@ -39,9 +37,6 @@ pub struct BinaryBasicSM { /// The table ID for the Binary Basic State Machine table_id: usize, - - /// The table ID for the FROPS - frops_table_id: usize, } impl BinaryBasicSM { @@ -57,12 +52,7 @@ impl BinaryBasicSM { let table_id = std.get_virtual_table_id(BinaryBasicTableSM::TABLE_ID).expect("Failed to get range ID"); - // Get the FROPS table ID - let frops_table_id = std - .get_virtual_table_id(BinaryBasicFrops::TABLE_ID) - .expect("Failed to get FROPS table ID"); - - Arc::new(Self { std, table_id, frops_table_id }) + Arc::new(Self { std, table_id }) } /// Determines if an opcode corresponds to a 32-bit operation. @@ -958,9 +948,4 @@ impl BinaryBasicSM { FromTrace::new(&mut binary_trace).with_air_values(&mut air_values), )) } - pub fn compute_frops(&self, frops_inputs: &Vec) { - for row in frops_inputs { - self.std.inc_virtual_row(self.frops_table_id, *row as u64, 1); - } - } } diff --git a/state-machines/binary/src/binary_basic_collector.rs b/state-machines/binary/src/binary_basic_collector.rs index b03703592..841cf5cbc 100644 --- a/state-machines/binary/src/binary_basic_collector.rs +++ b/state-machines/binary/src/binary_basic_collector.rs @@ -2,21 +2,21 @@ //! //! It manages collected inputs for the `BinaryExtensionSM` to compute witnesses -use std::collections::VecDeque; - use crate::{BinaryBasicFrops, BinaryInput}; use zisk_common::{ - BusDevice, BusId, CollectSkipper, ExtOperationData, MemCollectorInfo, OperationBusData, A, B, - OP, OPERATION_BUS_ID, + BusDevice, BusId, CollectSkipper, ExtOperationData, OperationBusData, A, B, OP, + OPERATION_BUS_ID, }; use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; +use fields::PrimeField64; +use pil_std_lib::Std; +use std::sync::Arc; + /// The `BinaryBasicCollector` struct represents an input collector for binary-related operations. -pub struct BinaryBasicCollector { +pub struct BinaryBasicCollector { /// Collected inputs for witness computation. pub inputs: Vec, - /// Collected rows for FROPS - pub frops_inputs: Vec, pub num_operations: usize, @@ -27,9 +27,15 @@ pub struct BinaryBasicCollector { /// Flag to indicate that force to execute to end of chunk force_execute_to_end: bool, + + /// The table ID for the Binary FROPS + frops_table_id: usize, + + /// Standard library instance, providing common functionalities. + std: Arc>, } -impl BinaryBasicCollector { +impl BinaryBasicCollector { /// Creates a new `BinaryBasicCollector`. /// /// # Arguments @@ -40,23 +46,26 @@ impl BinaryBasicCollector { /// A new `BinaryBasicCollector` instance initialized with the provided parameters. pub fn new( num_operations: usize, - num_freq_ops: usize, collect_skipper: CollectSkipper, with_adds: bool, force_execute_to_end: bool, + std: Arc>, ) -> Self { + let frops_table_id = std + .get_virtual_table_id(BinaryBasicFrops::TABLE_ID) + .expect("Failed to get FROPS table ID"); + Self { inputs: Vec::with_capacity(num_operations), num_operations, collect_skipper, with_adds, - frops_inputs: Vec::with_capacity(num_freq_ops), force_execute_to_end, + frops_table_id, + std, } } -} -impl BusDevice for BinaryBasicCollector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -68,13 +77,7 @@ impl BusDevice for BinaryBasicCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); let instance_complete = self.inputs.len() == self.num_operations; @@ -102,7 +105,7 @@ impl BusDevice for BinaryBasicCollector { } if frops_row != BinaryBasicFrops::NO_FROPS { - self.frops_inputs.push(frops_row as u32); + self.std.inc_virtual_row(self.frops_table_id, frops_row as u64, 1); return true; } @@ -114,15 +117,9 @@ impl BusDevice for BinaryBasicCollector { self.inputs.len() < self.num_operations || self.force_execute_to_end } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for BinaryBasicCollector { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/binary/src/binary_basic_instance.rs b/state-machines/binary/src/binary_basic_instance.rs index 2ebd2ddd8..861b36930 100644 --- a/state-machines/binary/src/binary_basic_instance.rs +++ b/state-machines/binary/src/binary_basic_instance.rs @@ -6,13 +6,14 @@ use crate::{BinaryBasicCollector, BinaryBasicSM}; use fields::PrimeField64; +use pil_std_lib::Std; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, CollectSkipper, Instance, InstanceCtx, InstanceType, PayloadType, }; - use zisk_pil::BinaryTrace; /// The `BinaryBasicInstance` struct represents an instance for binary-related witness computations. @@ -30,7 +31,10 @@ pub struct BinaryBasicInstance { with_adds: bool, /// Collect info for each chunk ID, containing the number of rows and a skipper for collection. - collect_info: HashMap, + collect_info: HashMap, + + /// Standard library instance, providing common functionalities. + std: Arc>, } impl BinaryBasicInstance { @@ -43,7 +47,11 @@ impl BinaryBasicInstance { /// # Returns /// A new `BinaryBasicInstance` instance initialized with the provided state machine and /// context. - pub fn new(binary_basic_sm: Arc>, mut ictx: InstanceCtx) -> Self { + pub fn new( + binary_basic_sm: Arc>, + mut ictx: InstanceCtx, + std: Arc>, + ) -> Self { assert_eq!( ictx.plan.air_id, BinaryTrace::::AIR_ID, @@ -54,21 +62,20 @@ impl BinaryBasicInstance { let meta = ictx.plan.meta.take().expect("Expected metadata in ictx.plan.meta"); let (with_adds, collect_info) = *meta - .downcast::<(bool, HashMap)>() + .downcast::<(bool, HashMap)>() .expect("Failed to downcast ictx.plan.meta to expected type"); - Self { binary_basic_sm, ictx, with_adds, collect_info } + Self { binary_basic_sm, ictx, with_adds, collect_info, std } } - pub fn build_binary_basic_collector(&self, chunk_id: ChunkId) -> BinaryBasicCollector { - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; + pub fn build_binary_basic_collector(&self, chunk_id: ChunkId) -> BinaryBasicCollector { + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; BinaryBasicCollector::new( num_ops as usize, - num_freq_ops as usize, collect_skipper, self.with_adds, force_execute_to_end, + self.std.clone(), ) } } @@ -96,8 +103,7 @@ impl Instance for BinaryBasicInstance { let inputs: Vec<_> = collectors .into_iter() .map(|(_, collector)| { - let _collector = collector.as_any().downcast::().unwrap(); - self.binary_basic_sm.compute_frops(&_collector.frops_inputs); + let _collector = collector.as_any().downcast::>().unwrap(); _collector.inputs }) .collect(); @@ -121,6 +127,10 @@ impl Instance for BinaryBasicInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Opcodes + } + /// Builds an input collector for the instance. /// /// # Arguments @@ -129,14 +139,13 @@ impl Instance for BinaryBasicInstance { /// # Returns /// An `Option` containing the input collector for the instance. fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; Some(Box::new(BinaryBasicCollector::new( num_ops as usize, - num_freq_ops as usize, collect_skipper, self.with_adds, force_execute_to_end, + self.std.clone(), ))) } diff --git a/state-machines/binary/src/binary_counter.rs b/state-machines/binary/src/binary_counter.rs index 2c2b789ce..3d00b8264 100644 --- a/state-machines/binary/src/binary_counter.rs +++ b/state-machines/binary/src/binary_counter.rs @@ -6,10 +6,7 @@ //! the system bus for both monitoring and input generation. use crate::{BinaryBasicFrops, BinaryExtensionFrops}; -use std::collections::VecDeque; -use zisk_common::{ - BusDevice, BusId, Counter, MemCollectorInfo, Metrics, A, B, OP, OPERATION_BUS_ID, OP_TYPE, -}; +use zisk_common::{BusDevice, BusId, Counter, Metrics, A, B, OP, OPERATION_BUS_ID, OP_TYPE}; use zisk_core::{zisk_ops::ZiskOp, ZiskOperationType}; /// The `BinaryCounter` struct represents a counter that monitors and measures @@ -40,6 +37,25 @@ impl BinaryCounter { pub fn new() -> Self { Self::default() } + + /// Processes data received on the bus, updating counters and generating inputs when applicable. + /// + /// # Arguments + /// * `bus_id` - The ID of the bus sending the data. + /// * `data` - The data received from the bus. + /// * `pending` – A queue of pending bus operations used to send derived inputs. + /// + /// # Returns + /// A boolean indicating whether the program should continue execution or terminate. + /// Returns `true` to continue execution, `false` to stop. + #[inline(always)] + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { + debug_assert!(*bus_id == OPERATION_BUS_ID); + + self.measure(data); + + true + } } impl Metrics for BinaryCounter { @@ -91,39 +107,6 @@ impl Metrics for BinaryCounter { } impl BusDevice for BinaryCounter { - /// Processes data received on the bus, updating counters and generating inputs when applicable. - /// - /// # Arguments - /// * `bus_id` - The ID of the bus sending the data. - /// * `data` - The data received from the bus. - /// * `pending` – A queue of pending bus operations used to send derived inputs. - /// - /// # Returns - /// A boolean indicating whether the program should continue execution or terminate. - /// Returns `true` to continue execution, `false` to stop. - #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { - debug_assert!(*bus_id == OPERATION_BUS_ID); - - self.measure(data); - - true - } - - /// Returns the bus IDs associated with this counter. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/binary/src/binary_extension.rs b/state-machines/binary/src/binary_extension.rs index 988903fc0..84098153e 100644 --- a/state-machines/binary/src/binary_extension.rs +++ b/state-machines/binary/src/binary_extension.rs @@ -5,10 +5,7 @@ use std::sync::Arc; -use crate::{ - binary_constants::*, BinaryExtensionFrops, BinaryExtensionTableOp, BinaryExtensionTableSM, - BinaryInput, -}; +use crate::{binary_constants::*, BinaryExtensionTableOp, BinaryExtensionTableSM, BinaryInput}; use fields::PrimeField64; use pil_std_lib::Std; @@ -58,9 +55,6 @@ pub struct BinaryExtensionSM { /// The table ID for the Binary Basic State Machine table_id: usize, - - /// The table ID for the Binary Extension FROPS - frops_table_id: usize, } impl BinaryExtensionSM { @@ -80,12 +74,7 @@ impl BinaryExtensionSM { .get_virtual_table_id(BinaryExtensionTableSM::TABLE_ID) .expect("Failed to get table ID"); - // Get the FROPS table ID - let frops_table_id = std - .get_virtual_table_id(BinaryExtensionFrops::TABLE_ID) - .expect("Failed to get FROPS table ID"); - - Arc::new(Self { std, range_id, table_id, frops_table_id }) + Arc::new(Self { std, range_id, table_id }) } /// Determines if the given opcode represents a shift operation. @@ -413,9 +402,4 @@ impl BinaryExtensionSM { FromTrace::new(&mut binary_e_trace).with_air_values(&mut air_values), )) } - pub fn compute_frops(&self, frops_inputs: &Vec) { - for row in frops_inputs { - self.std.inc_virtual_row(self.frops_table_id, *row as u64, 1); - } - } } diff --git a/state-machines/binary/src/binary_extension_collector.rs b/state-machines/binary/src/binary_extension_collector.rs index 0caa5dfc3..733f65232 100644 --- a/state-machines/binary/src/binary_extension_collector.rs +++ b/state-machines/binary/src/binary_extension_collector.rs @@ -2,47 +2,56 @@ //! //! It manages collected inputs for the `BinaryExtensionSM` to compute witnesses -use std::collections::VecDeque; - use crate::{BinaryExtensionFrops, BinaryInput}; use zisk_common::{ - BusDevice, BusId, CollectSkipper, ExtOperationData, MemCollectorInfo, OperationBusData, A, B, - OP, OPERATION_BUS_ID, + BusDevice, BusId, CollectSkipper, ExtOperationData, OperationBusData, A, B, OP, + OPERATION_BUS_ID, }; + +use fields::PrimeField64; +use pil_std_lib::Std; +use std::sync::Arc; + use zisk_core::ZiskOperationType; /// The `BinaryExtensionCollector` struct represents an input collector for binary extension -pub struct BinaryExtensionCollector { +pub struct BinaryExtensionCollector { /// Collected inputs for witness computation. pub inputs: Vec, - /// Collected rows for FROPS - pub frops_inputs: Vec, pub num_operations: usize, pub collect_skipper: CollectSkipper, /// Flag to indicate that force to execute to end of chunk force_execute_to_end: bool, + + /// The table ID for the Binary Extension FROPS + frops_table_id: usize, + + /// Standard library instance, providing common functionalities. + std: Arc>, } -impl BinaryExtensionCollector { +impl BinaryExtensionCollector { pub fn new( num_operations: usize, - num_freq_ops: usize, collect_skipper: CollectSkipper, force_execute_to_end: bool, + std: Arc>, ) -> Self { + let frops_table_id = std + .get_virtual_table_id(BinaryExtensionFrops::TABLE_ID) + .expect("Failed to get FROPS table ID"); Self { inputs: Vec::with_capacity(num_operations), num_operations, collect_skipper, - frops_inputs: Vec::with_capacity(num_freq_ops), force_execute_to_end, + frops_table_id, + std, } } -} -impl BusDevice for BinaryExtensionCollector { /// Processes data received on the bus, collecting the inputs necessary for witness computation. /// /// # Arguments @@ -54,13 +63,7 @@ impl BusDevice for BinaryExtensionCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); let instance_complete = self.inputs.len() == self.num_operations; @@ -84,7 +87,7 @@ impl BusDevice for BinaryExtensionCollector { } if frops_row != BinaryExtensionFrops::NO_FROPS { - self.frops_inputs.push(frops_row as u32); + self.std.inc_virtual_row(self.frops_table_id, frops_row as u64, 1); return true; } @@ -97,15 +100,9 @@ impl BusDevice for BinaryExtensionCollector { self.inputs.len() < self.num_operations || self.force_execute_to_end } +} - /// Returns the bus IDs associated with this instance. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] - } - +impl BusDevice for BinaryExtensionCollector { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/binary/src/binary_extension_instance.rs b/state-machines/binary/src/binary_extension_instance.rs index ac0c83372..d01307a49 100644 --- a/state-machines/binary/src/binary_extension_instance.rs +++ b/state-machines/binary/src/binary_extension_instance.rs @@ -6,8 +6,10 @@ use crate::{BinaryExtensionCollector, BinaryExtensionSM}; use fields::PrimeField64; +use pil_std_lib::Std; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, CollectSkipper, Instance, InstanceCtx, InstanceType, PayloadType, @@ -24,10 +26,13 @@ pub struct BinaryExtensionInstance { binary_extension_sm: Arc>, /// Collect info for each chunk ID, containing the number of rows and a skipper for collection. - collect_info: HashMap, + collect_info: HashMap, /// Instance context. ictx: InstanceCtx, + + /// Standard library instance, providing common functionalities. + std: Arc>, } impl BinaryExtensionInstance { @@ -41,7 +46,11 @@ impl BinaryExtensionInstance { /// # Returns /// A new `BinaryExtensionInstance` instance initialized with the provided state machine and /// context. - pub fn new(binary_extension_sm: Arc>, mut ictx: InstanceCtx) -> Self { + pub fn new( + binary_extension_sm: Arc>, + mut ictx: InstanceCtx, + std: Arc>, + ) -> Self { assert_eq!( ictx.plan.air_id, BinaryExtensionTrace::::AIR_ID, @@ -52,13 +61,16 @@ impl BinaryExtensionInstance { let meta = ictx.plan.meta.take().expect("Expected metadata in ictx.plan.meta"); let collect_info = *meta - .downcast::>() + .downcast::>() .expect("Failed to downcast ictx.plan.meta to expected type"); - Self { binary_extension_sm, collect_info, ictx } + Self { binary_extension_sm, collect_info, ictx, std } } - pub fn build_binary_extension_collector(&self, chunk_id: ChunkId) -> BinaryExtensionCollector { + pub fn build_binary_extension_collector( + &self, + chunk_id: ChunkId, + ) -> BinaryExtensionCollector { assert_eq!( self.ictx.plan.air_id, BinaryExtensionTrace::::AIR_ID, @@ -66,13 +78,12 @@ impl BinaryExtensionInstance { self.ictx.plan.air_id ); - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; BinaryExtensionCollector::new( num_ops as usize, - num_freq_ops as usize, collect_skipper, force_execute_to_end, + self.std.clone(), ) } } @@ -100,8 +111,8 @@ impl Instance for BinaryExtensionInstance { let inputs: Vec<_> = collectors .into_iter() .map(|(_, collector)| { - let _collector = collector.as_any().downcast::().unwrap(); - self.binary_extension_sm.compute_frops(&_collector.frops_inputs); + let _collector = + collector.as_any().downcast::>().unwrap(); _collector.inputs }) .collect(); @@ -125,6 +136,10 @@ impl Instance for BinaryExtensionInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Opcodes + } + /// Builds an input collector for the instance. /// /// # Arguments @@ -133,13 +148,12 @@ impl Instance for BinaryExtensionInstance { /// # Returns /// An `Option` containing the input collector for the instance. fn build_inputs_collector(&self, chunk_id: ChunkId) -> Option>> { - let (num_ops, num_freq_ops, force_execute_to_end, collect_skipper) = - self.collect_info[&chunk_id]; + let (num_ops, force_execute_to_end, collect_skipper) = self.collect_info[&chunk_id]; Some(Box::new(BinaryExtensionCollector::new( num_ops as usize, - num_freq_ops as usize, collect_skipper, force_execute_to_end, + self.std.clone(), ))) } diff --git a/state-machines/frequent-ops/Cargo.toml b/state-machines/frequent-ops/Cargo.toml index 718b0d92a..19ee0e932 100644 --- a/state-machines/frequent-ops/Cargo.toml +++ b/state-machines/frequent-ops/Cargo.toml @@ -13,15 +13,10 @@ path = "src/frequent_ops_test.rs" [dependencies] zisk-core = { workspace = true } -zisk-common = { workspace = true } -zisk-pil = { workspace = true } fields = { workspace=true } -proofman = { workspace = true } proofman-common = { workspace = true } -proofman-macros = { workspace = true } proofman-util = { workspace = true } -pil-std-lib = { workspace = true } tracing = { workspace = true } clap = "4.0" @@ -31,7 +26,6 @@ static_assertions = { workspace = true } [features] default = [] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -generate_code_arith_range_table = [] -no_lib_link = ["proofman-common/no_lib_link"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] \ No newline at end of file +gpu = ["packed"] +packed = [] +generate_code_arith_range_table = [] \ No newline at end of file diff --git a/state-machines/frequent-ops/pil/frequent_ops.pil b/state-machines/frequent-ops/pil/frequent_ops.pil index 65cb005fe..c4eddd9f4 100644 --- a/state-machines/frequent-ops/pil/frequent_ops.pil +++ b/state-machines/frequent-ops/pil/frequent_ops.pil @@ -1,8 +1,8 @@ require "std_lookup.pil" require "opids.pil" -airtemplate FrequentOps(const int N = 2**24, int RC = 2, const int operation_bus_id = OPERATION_BUS_ID, - const string bin_file = "../src/frequent_ops_fixed.bin", const int table_id = -1) { +airtemplate FrequentOps(const int N = 2**24, int RC = 2, const string bin_file = "../src/frequent_ops_fixed.bin", + const int table_id = -1) { #pragma extern_fixed_file `${bin_file}` @@ -14,5 +14,5 @@ airtemplate FrequentOps(const int N = 2**24, int RC = 2, const int operation_bus col fixed C[RC]; col fixed FLAG; - lookup_proves(operation_bus_id, [OP, ...A, ...B, ...C, FLAG], mul: multiplicity, table_id: table_id); + lookup_proves(OPERATION_BUS_ID, [OP, ...A, ...B, ...C, FLAG], table_id:, mul: multiplicity, surname: PIOP_SURNAME_DYNAMIC); } \ No newline at end of file diff --git a/state-machines/main/Cargo.toml b/state-machines/main/Cargo.toml index 506988046..a2e93d7e3 100644 --- a/state-machines/main/Cargo.toml +++ b/state-machines/main/Cargo.toml @@ -12,11 +12,8 @@ ziskemu = { workspace = true } zisk-core = { workspace = true } zisk-common = { workspace = true } zisk-pil = { workspace = true } -sm-mem = { workspace = true } mem-common = { workspace = true } -asm-runner = { workspace = true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-util = { workspace = true } proofman-macros = { workspace = true } @@ -28,8 +25,5 @@ pil-std-lib = { workspace = true } [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] +gpu = ["packed"] +packed = [] diff --git a/state-machines/main/pil/main.pil b/state-machines/main/pil/main.pil index 9f4054d1d..71d8d6a7f 100644 --- a/state-machines/main/pil/main.pil +++ b/state-machines/main/pil/main.pil @@ -1,7 +1,8 @@ -require "std_lookup.pil" -require "std_permutation.pil" -require "std_direct.pil" -require "opids.pil" +require "std_lookup.pil"; +require "std_permutation.pil"; +require "std_direct.pil"; +require "registers.pil" +require "opids.pil"; const int BOOT_ADDR = 0x1000; const int END_PC_ADDR = 0x1004; @@ -11,7 +12,6 @@ const int MAX_RANGE = (1 << 24) - 1; const int REG_BASE_ADDR = 0xA000_0000; airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, - const int operation_bus_id = OPERATION_BUS_ID, const int REGS_IN_MAIN_FROM = 1, const int REGS_IN_MAIN_TO = 31) { const int REGS_IN_MAIN = REGS_IN_MAIN_TO - REGS_IN_MAIN_FROM + 1; @@ -111,7 +111,7 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, } else { col witness bits(32) air.a_imm1; } - col witness bits(1) a_src_step; + col witness bits(1) is_precompiled; // Selector to include step on operation bus // Source B @@ -139,7 +139,7 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, // Destination C - col witness bits(1) store_ra; + col witness bits(1) store_pc; col witness bits(1) store_mem; @@ -155,9 +155,11 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, col witness bits(64, signed) air.inc_sp; } - + // branch jump if flag = 1, jump to pc + jmp_offset1 + // if set_pc = 1, jump to c[0] + jmp_offset1 col witness bits(64, signed) jmp_offset1; + // default jump if flag = 0, jump to pc + jmp_offset2 col witness bits(64, signed) jmp_offset2; col witness bits(1) m32; @@ -256,9 +258,9 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, // region. As a result, no memory state machine can validate memory access in this area; // only the main state machine can 'prove' such memory access. - col witness bits(40) a_reg_prev_mem_step; - col witness bits(40) b_reg_prev_mem_step; - col witness bits(40) store_reg_prev_mem_step; + col witness bits(REG_STEP_BITS) a_reg_prev_mem_step; + col witness bits(REG_STEP_BITS) b_reg_prev_mem_step; + col witness bits(REG_STEP_BITS) store_reg_prev_mem_step; col witness bits(32) store_reg_prev_value[RC]; col witness bits(1) a_src_reg; col witness bits(1) b_src_reg; @@ -306,8 +308,8 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, const expr store_value[2]; - store_value[0] = store_ra*(pc + jmp_offset2 - c[0]) + c[0]; - store_value[1] = (1 - store_ra) * c[1]; + store_value[0] = store_pc*(pc + jmp_offset2 - c[0]) + c[0]; + store_value[1] = (1 - store_pc) * c[1]; // Memory function to prove that previous register store_offset access. @@ -330,11 +332,7 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, range_check(expression: a_mem_step - a_reg_prev_mem_step - 1, min: 0, max: MAX_RANGE, sel: a_src_reg); range_check(expression: b_mem_step - b_reg_prev_mem_step - 1, min: 0, max: MAX_RANGE, sel: b_src_reg); - range_check(expression: store_mem_step - store_reg_prev_mem_step -1 , min: 0, max: MAX_RANGE, sel: store_reg); - - // Sent to bus the external operation - - lookup_assumes(operation_bus_id, [op, a[0], (1 - m32) * a[1], b[0], (1 - m32) * b[1], ...c, flag], sel: is_external_op); + range_check(expression: store_mem_step - store_reg_prev_mem_step - 1 , min: 0, max: MAX_RANGE, sel: store_reg); const expr a_src_c; const expr b_src_c; @@ -348,17 +346,32 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, // Optimization to avoid use extra columns when c is source for a or b. if (stack_enabled) { - a_src_c = 1 - a_src_step - a_src_mem - a_src_imm - a_src_sp; + a_src_c = 1 - a_src_mem - a_src_imm - a_src_sp; b_src_c = 1 - b_src_mem - b_src_imm - b_src_ind - b_src_reg; a_imm[1] = a_use_sp_imm1; b_imm[1] = b_use_sp_imm1; } else { - a_src_c = 1 - a_src_step - a_src_mem - a_src_imm - a_src_reg; + a_src_c = 1 - a_src_mem - a_src_imm - a_src_reg; b_src_c = 1 - b_src_mem - b_src_imm - b_src_ind - b_src_reg; a_imm[1] = a_imm1; b_imm[1] = b_imm1; } + // Sent to bus the external operation + + // Precompiles are generated throw transpilation because them not exists directly in RISCV + + // The flag m32 is used to avoid send "garbage" values to bus, to simplify the other state + // state machines and to increase the hits to FROPS (frequent operations). + + assumes_operation(op: , + a: [a[0], (1 - m32) * a[1]], + b: [b[0], (1 - m32) * b[1]], + c: , + flag:, + main_step: STEP * is_precompiled, + extended_arg: jmp_offset1 * is_precompiled, + sel: is_external_op); for (int index = 0; index < RC; ++index) { const expr previous_c = SEGMENT_L1 * (segment_previous_c[index] - 'c[index]) + 'c[index]; @@ -368,9 +381,6 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, a_src_sp * (a[index] - (index == 0 ? sp: 0 )) === 0; } - // If source is step, value must be same as STEP - a_src_step * (a[index] - (index == 0 ? STEP : 0)) === 0; - // If source is c, value must be same as previous_c a_src_c * (a[index] - previous_c) === 0; b_src_c * (b[index] - previous_c) === 0; @@ -456,11 +466,11 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, a_src_imm * (1 - a_src_imm) === 0; a_src_mem * (1 - a_src_mem) === 0; - a_src_step * (1 - a_src_step) === 0; + is_precompiled * (1 - is_precompiled) === 0; b_src_imm * (1 - b_src_imm) === 0; b_src_mem * (1 - b_src_mem) === 0; is_external_op * (1 - is_external_op) === 0; - store_ra * (1 - store_ra) === 0; + store_pc * (1 - store_pc) === 0; store_mem * (1 - store_mem) === 0; store_ind * (1 - store_ind) === 0; set_pc * (1 - set_pc) === 0; @@ -470,8 +480,8 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, b_src_reg * (1 - b_src_reg) === 0; store_reg * (1 - store_reg) === 0; - const expr rom_flags = 1 + 2 * a_src_imm + 4 * a_src_mem + 8 * a_src_step + 16 * b_src_imm - + 32 * b_src_mem + 64 * is_external_op + 128 * store_ra + 256 * store_mem + const expr rom_flags = 1 + 2 * a_src_imm + 4 * a_src_mem + 8 * is_precompiled + 16 * b_src_imm + + 32 * b_src_mem + 64 * is_external_op + 128 * store_pc + 256 * store_mem + 512 * store_ind + 1024 * set_pc + 2048 * m32 + 4096 * b_src_ind + 8192 * a_src_reg + 16384 * b_src_reg + 32768 * store_reg; @@ -528,5 +538,5 @@ airtemplate Main(int N = 2**21, int RC = 2, int stack_enabled = 0, // Main Segment constraint - range_check(main_segment, min: 0, max: ((2**32)/N) - 1); + range_check(main_segment, min: 0, max: ((2**MAIN_STEP_BITS)/N) - 1); } \ No newline at end of file diff --git a/state-machines/main/pil/registers.pil b/state-machines/main/pil/registers.pil new file mode 100644 index 000000000..6488dea12 --- /dev/null +++ b/state-machines/main/pil/registers.pil @@ -0,0 +1,67 @@ +const int REG_ZE = 0; +const int REG_RA = 1; +const int REG_SP = 2; +const int REG_GP = 3; +const int REG_TP = 4; +const int REG_T0 = 5; +const int REG_T1 = 6; +const int REG_T2 = 7; +const int REG_S0 = 8; +const int REG_S1 = 9; +const int REG_A0 = 10; +const int REG_A1 = 11; +const int REG_A2 = 12; +const int REG_A3 = 13; +const int REG_A4 = 14; +const int REG_A5 = 15; +const int REG_A6 = 16; +const int REG_A7 = 17; +const int REG_S2 = 18; +const int REG_S3 = 19; +const int REG_S4 = 20; +const int REG_S5 = 21; +const int REG_S6 = 22; +const int REG_S7 = 23; +const int REG_S8 = 24; +const int REG_S9 = 25; +const int REG_S10 = 26; +const int REG_S11 = 27; +const int REG_T3 = 28; +const int REG_T4 = 29; +const int REG_T5 = 30; +const int REG_T6 = 31; + +const int REG_FP = 8; + +const int REG_X0 = 0; +const int REG_X1 = 1; +const int REG_X2 = 2; +const int REG_X3 = 3; +const int REG_X4 = 4; +const int REG_X5 = 5; +const int REG_X6 = 6; +const int REG_X7 = 7; +const int REG_X8 = 8; +const int REG_X9 = 9; +const int REG_X10 = 10; +const int REG_X11 = 11; +const int REG_X12 = 12; +const int REG_X13 = 13; +const int REG_X14 = 14; +const int REG_X15 = 15; +const int REG_X16 = 16; +const int REG_X17 = 17; +const int REG_X18 = 18; +const int REG_X19 = 19; +const int REG_X20 = 20; +const int REG_X21 = 21; +const int REG_X22 = 22; +const int REG_X23 = 23; +const int REG_X24 = 24; +const int REG_X25 = 25; +const int REG_X26 = 26; +const int REG_X27 = 27; +const int REG_X28 = 28; +const int REG_X29 = 29; +const int REG_X30 = 30; +const int REG_X31 = 31; \ No newline at end of file diff --git a/state-machines/main/src/main_counter.rs b/state-machines/main/src/main_counter.rs index 6f055358d..f939569b4 100644 --- a/state-machines/main/src/main_counter.rs +++ b/state-machines/main/src/main_counter.rs @@ -2,8 +2,7 @@ //! sent over the data bus. It connects to the bus and gathers metrics for specific //! `ZiskOperationType::PubOut` instructions. -use std::collections::VecDeque; -use zisk_common::{BusDevice, BusId, MemCollectorInfo, Metrics, A, B, OPERATION_BUS_ID, OP_TYPE}; +use zisk_common::{BusDevice, BusId, Metrics, A, B, OPERATION_BUS_ID, OP_TYPE}; use zisk_core::ZiskOperationType; /// The `MainCounter` struct represents a counter that monitors and measures @@ -33,22 +32,7 @@ impl MainCounter { pub fn new() -> Self { Self { publics: Vec::new() } } -} - -impl Metrics for MainCounter { - #[inline(always)] - fn measure(&mut self, _data: &[u64]) {} - - /// Provides a dynamic reference for downcasting purposes. - /// - /// # Returns - /// A reference to `self` as `dyn std::any::Any`. - fn as_any(&self) -> &dyn std::any::Any { - self - } -} -impl BusDevice for MainCounter { /// Processes data received on the bus, updating counters and generating inputs when applicable. /// /// # Arguments @@ -60,13 +44,7 @@ impl BusDevice for MainCounter { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == OPERATION_BUS_ID); const PUBOUT: u64 = ZiskOperationType::PubOut as u64; @@ -83,15 +61,22 @@ impl BusDevice for MainCounter { true } +} + +impl Metrics for MainCounter { + #[inline(always)] + fn measure(&mut self, _data: &[u64]) {} - /// Returns the bus IDs associated with this counter. + /// Provides a dynamic reference for downcasting purposes. /// /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![OPERATION_BUS_ID] + /// A reference to `self` as `dyn std::any::Any`. + fn as_any(&self) -> &dyn std::any::Any { + self } +} +impl BusDevice for MainCounter { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/main/src/main_planner.rs b/state-machines/main/src/main_planner.rs index d08e43e09..479c0e215 100644 --- a/state-machines/main/src/main_planner.rs +++ b/state-machines/main/src/main_planner.rs @@ -5,9 +5,10 @@ use std::any::Any; -use asm_runner::MinimalTraces; use fields::PrimeField64; -use zisk_common::{BusDeviceMetrics, CheckPoint, ChunkId, InstanceType, Metrics, Plan, SegmentId}; +use zisk_common::{ + BusDeviceMetrics, CheckPoint, ChunkId, EmuTrace, InstanceType, Metrics, Plan, SegmentId, +}; use zisk_pil::{MainTrace, MAIN_AIR_IDS, ZISK_AIRGROUP_ID}; use crate::MainCounter; @@ -32,18 +33,10 @@ impl MainPlanner { /// # Returns /// A vector of `Plan` instances, each corresponding to a segment of the main trace. pub fn plan( - min_traces: &MinimalTraces, + min_traces: &[EmuTrace], main_counters: Vec<(ChunkId, Box)>, min_traces_size: u64, ) -> (Vec, Vec<(u64, u32)>) { - let min_traces = match min_traces { - MinimalTraces::AsmEmuTrace(asm_min_traces) => &asm_min_traces.vec_chunks, - MinimalTraces::EmuTrace(vec_chunks) => vec_chunks, - MinimalTraces::None => { - panic!("Minimal traces are required for planning the main state machine."); - } - }; - let num_rows = MainTrace::::NUM_ROWS as u64; let mut publics = Vec::new(); diff --git a/state-machines/main/src/main_sm.rs b/state-machines/main/src/main_sm.rs index 280f1a8f8..74eec2573 100644 --- a/state-machines/main/src/main_sm.rs +++ b/state-machines/main/src/main_sm.rs @@ -9,14 +9,13 @@ use std::sync::Arc; -use crate::MainCounter; use fields::PrimeField64; use mem_common::{MemHelpers, MEM_REGS_MAX_DIFF, MEM_STEPS_BY_MAIN_STEP}; use pil_std_lib::Std; use proofman_common::{AirInstance, FromTrace, ProofCtx, ProofmanResult, SetupCtx}; use rayon::prelude::*; -use zisk_common::{BusDeviceMetrics, EmuTrace, InstanceCtx, SegmentId}; -use zisk_core::{ZiskRom, REGS_IN_MAIN, REGS_IN_MAIN_FROM, REGS_IN_MAIN_TO}; +use zisk_common::{EmuTrace, InstanceCtx, SegmentId}; +use zisk_core::{ZiskRom, DEFAULT_MAX_STEPS, REGS_IN_MAIN, REGS_IN_MAIN_FROM, REGS_IN_MAIN_TO}; use zisk_pil::MainAirValues; use ziskemu::{Emu, EmuRegTrace}; @@ -45,7 +44,8 @@ pub struct MainInstance { } impl MainInstance { - const MAX_SEGMENT_ID: usize = ((1 << 32) / MainTraceType::::NUM_ROWS) - 1; + const MAX_SEGMENT_ID: usize = + ((DEFAULT_MAX_STEPS + 1) as usize / MainTraceType::::NUM_ROWS) - 1; /// Creates a new `MainInstance`. /// @@ -209,7 +209,6 @@ impl MainInstance { &mut large_range_checks, ); self.update_std_range_checks(segment_id, step_range_check, &large_range_checks); - // Generate and add the AIR instance let from_trace = FromTrace::new(&mut main_trace).with_air_values(&mut air_values); Ok(AirInstance::new_from_trace(from_trace)) @@ -372,8 +371,4 @@ impl MainSM { pub fn debug(_pctx: &ProofCtx, _sctx: &SetupCtx) { // No debug information to display } - - pub fn build_counter() -> Box { - Box::new(MainCounter::new()) - } } diff --git a/state-machines/mem-common/Cargo.toml b/state-machines/mem-common/Cargo.toml index b4703285f..e65b51e38 100644 --- a/state-machines/mem-common/Cargo.toml +++ b/state-machines/mem-common/Cargo.toml @@ -18,10 +18,7 @@ zisk-pil = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } -proofman = { workspace = true } proofman-util = { workspace = true } -witness = { workspace = true } -pil-std-lib = { workspace = true } fields = { workspace=true } tracing = { workspace = true } rayon = { workspace = true } @@ -32,9 +29,6 @@ num-traits = "0.2" [features] default = [] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] debug_mem = [] debug_mem_align = [] save_mem_bus_data = [] diff --git a/state-machines/mem-common/src/mem_align_planner.rs b/state-machines/mem-common/src/mem_align_planner.rs index 72a6e5ace..f477f5a6a 100644 --- a/state-machines/mem-common/src/mem_align_planner.rs +++ b/state-machines/mem-common/src/mem_align_planner.rs @@ -1,5 +1,11 @@ use core::panic; -use std::{collections::HashMap, sync::Arc}; +use std::{ + collections::HashMap, + fs::File, + io::{BufRead, BufReader, BufWriter, Write}, + path::Path, + sync::Arc, +}; use crate::{MemAlignCheckPoint, MemAlignCounters}; use crate::{MemAlignInstanceCounter, MemCounters}; @@ -80,6 +86,76 @@ impl<'a> MemAlignPlanner<'a> { full, } } + + /// Saves the counters to a file. + /// + /// # Parameters + /// - `path`: Path to the file where counters will be saved + /// + /// # Returns + /// Result indicating success or an IO error + pub fn save_counters_to_file>(&self, path: P) -> std::io::Result<()> { + let file = File::create(path)?; + let mut writer = BufWriter::new(file); + + for (chunk_id, mem_counters) in self.counters.as_ref() { + let mc = &mem_counters.mem_align_counters; + writeln!( + writer, + "{} {} {} {} {} {}", + chunk_id.0, mc.full_5, mc.full_3, mc.full_2, mc.read_byte, mc.write_byte + )?; + } + + writer.flush()?; + Ok(()) + } + + /// Loads counters from a file and calculates totals for use with align_plan_from_counters. + /// Returns the loaded counters and calculated totals (full_rows, read_byte, write_byte). + /// + /// # Parameters + /// - `path`: Path to the file containing saved counters + /// + /// # Returns + /// A tuple with (counters, full_rows, read_byte, write_byte) + pub fn load_counters_from_file>( + path: P, + ) -> std::io::Result<(Vec, u32, u32, u32)> { + let file = File::open(path)?; + let reader = BufReader::new(file); + + let mut counters: Vec = Vec::new(); + let mut full_rows = 0; + let mut read_byte = 0; + let mut write_byte = 0; + + for line in reader.lines() { + let line = line?; + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() != 6 { + continue; + } + + let counter = MemAlignCounters { + chunk_id: parts[0].parse().unwrap_or(0), + full_5: parts[1].parse().unwrap_or(0), + full_3: parts[2].parse().unwrap_or(0), + full_2: parts[3].parse().unwrap_or(0), + read_byte: parts[4].parse().unwrap_or(0), + write_byte: parts[5].parse().unwrap_or(0), + }; + + full_rows += counter.full_2 * 2 + counter.full_3 * 3 + counter.full_5 * 5; + read_byte += counter.read_byte; + write_byte += counter.write_byte; + + counters.push(counter); + } + + Ok((counters, full_rows, read_byte, write_byte)) + } + fn check_pendings(&self, pendings: &[u32; 5]) { if pendings.iter().any(|&x| x > 0) { println!( @@ -115,6 +191,7 @@ impl<'a> MemAlignPlanner<'a> { self.full.get_used() ); println!("[Pending] (F5,F3,F2,RB,WB) {pendings:?}"); + let _ = self.save_counters_to_file("tmp/mem_align_counters_crash.txt"); panic!("Some counters are pending"); } } diff --git a/state-machines/mem-common/src/mem_constants.rs b/state-machines/mem-common/src/mem_constants.rs index 8fd746c85..0920d6a2f 100644 --- a/state-machines/mem-common/src/mem_constants.rs +++ b/state-machines/mem-common/src/mem_constants.rs @@ -32,8 +32,3 @@ pub const MAX_MEM_ADDR: u64 = 0xFFFF_FFFF; pub const SEGMENT_ADDR_MAX_RANGE: usize = (1 << 24) - 1; pub const SEGMENT_LARGE_ADDR_C_MAX_RANGE: usize = (1 << 16) - 1; - -pub const MEM_INC_C_BITS: usize = 18; -pub const MEM_INC_C_SIZE: usize = 1 << MEM_INC_C_BITS; -pub const MEM_INC_C_MAX_RANGE: usize = MEM_INC_C_SIZE - 1; -pub const MEM_INC_C_MASK: usize = MEM_INC_C_SIZE - 1; diff --git a/state-machines/mem-common/src/mem_counters.rs b/state-machines/mem-common/src/mem_counters.rs index 31cc51e3c..64d4851f8 100644 --- a/state-machines/mem-common/src/mem_counters.rs +++ b/state-machines/mem-common/src/mem_counters.rs @@ -2,18 +2,12 @@ use rayon::prelude::*; #[cfg(feature = "save_mem_bus_data")] use std::{env, io::Write, slice}; -use std::{ - collections::{HashMap, VecDeque}, - fs::File, - io::Read, -}; +use std::{collections::HashMap, fs::File, io::Read}; use zisk_common::ChunkId; use crate::{MemAlignCounters, MemHelpers}; use std::fmt; -use zisk_common::{ - BusDevice, BusId, MemBusData, MemCollectorInfo, Metrics, MEM_BUS_DATA_SIZE, MEM_BUS_ID, -}; +use zisk_common::{BusDevice, BusId, MemBusData, Metrics, MEM_BUS_DATA_SIZE, MEM_BUS_ID}; const ST_BITS_OFFSET: u32 = 30; const ST_INI: u8 = 0; @@ -78,10 +72,10 @@ impl MemCounters { let point = addr_vector.partition_point(|x| x.0 < (0xA000_0000 / 8)); self.addr_sorted[2] = addr_vector.split_off(point); - let point = addr_vector.partition_point(|x| x.0 < (0x9000_0000 / 8)); - self.addr_sorted[1] = addr_vector.split_off(point); + let point = addr_vector.partition_point(|x| x.0 < (0x8000_0000 / 8)); + self.addr_sorted[0] = addr_vector.split_off(point); - self.addr_sorted[0] = addr_vector; + self.addr_sorted[1] = addr_vector; } #[inline(always)] fn incr_st_counter_aligned(count: u32, is_write: bool) -> u32 { @@ -309,28 +303,9 @@ impl MemCounters { self.mem_measure(data); } } -} - -impl Metrics for MemCounters { - #[inline(always)] - fn measure(&mut self, data: &[u64]) { - self.mem_measure(data); - } - fn as_any(&self) -> &dyn std::any::Any { - self - } -} - -impl BusDevice for MemCounters { #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(bus_id == &MEM_BUS_ID); #[cfg(feature = "save_mem_bus_data")] @@ -343,17 +318,22 @@ impl BusDevice for MemCounters { true } +} - fn bus_id(&self) -> Vec { - vec![MEM_BUS_ID] +impl Metrics for MemCounters { + #[inline(always)] + fn measure(&mut self, data: &[u64]) { + self.mem_measure(data); } - /// Provides a dynamic reference for downcasting purposes. - fn as_any(self: Box) -> Box { + fn as_any(&self) -> &dyn std::any::Any { self } +} - fn on_close(&mut self) { - self.close(); +impl BusDevice for MemCounters { + /// Provides a dynamic reference for downcasting purposes. + fn as_any(self: Box) -> Box { + self } } diff --git a/state-machines/mem-cpp/cpp/Makefile b/state-machines/mem-cpp/cpp/Makefile index 9c12e3405..dcae195f2 100644 --- a/state-machines/mem-cpp/cpp/Makefile +++ b/state-machines/mem-cpp/cpp/Makefile @@ -14,12 +14,16 @@ SRCS := tools.cpp api.cpp mem_count_and_plan.cpp immutable_mem_planner.cpp \ OBJS := $(addprefix $(OUT_DIR)/, $(SRCS:.cpp=.o)) +# Find all header files to track dependencies +HEADERS := $(wildcard *.hpp) + all: $(OUT_DIR)/$(TARGET) $(OUT_DIR)/$(TARGET): $(OBJS) ar rcs $@ $^ -$(OUT_DIR)/%.o: %.cpp +# Each object file depends on all header files to trigger rebuild when headers change +$(OUT_DIR)/%.o: %.cpp $(HEADERS) mkdir -p $(OUT_DIR) $(CXX) $(CXXFLAGS) -c $< -o $@ diff --git a/state-machines/mem-cpp/cpp/mem_align_counter.cpp b/state-machines/mem-cpp/cpp/mem_align_counter.cpp index 103b9b455..b329b3547 100644 --- a/state-machines/mem-cpp/cpp/mem_align_counter.cpp +++ b/state-machines/mem-cpp/cpp/mem_align_counter.cpp @@ -6,17 +6,6 @@ #include #include - -#define FLAGS_1_BYTE_READ 1 -#define FLAGS_2_BYTES_READ 2 -#define FLAGS_4_BYTES_READ 4 -#define FLAGS_8_BYTES_READ 8 -#define FLAGS_1_BYTE_CLEAR_WRITE (MEM_WRITE_FLAG + MEM_WRITE_BYTE_CLEAR_FLAG + 1) -#define FLAGS_1_BYTE_WRITE (MEM_WRITE_FLAG + 1) -#define FLAGS_2_BYTES_WRITE (MEM_WRITE_FLAG + 2) -#define FLAGS_4_BYTES_WRITE (MEM_WRITE_FLAG + 4) -#define FLAGS_8_BYTES_WRITE (MEM_WRITE_FLAG + 8) - MemAlignCounter::MemAlignCounter(std::shared_ptr context) :context(context) { total_counters.chunk_id = 0xFFFFFFFF; total_counters.full_5 = 0; @@ -33,9 +22,17 @@ void MemAlignCounter::execute() uint32_t chunk_id = 0; int64_t elapsed_us = 0; #ifdef MEM_CONTEXT_SEM + #ifdef CHUNK_STATS while ((chunk = context->get_chunk(MAX_THREADS, chunk_id, elapsed_us)) != nullptr) #else + while ((chunk = context->get_chunk(MAX_THREADS, chunk_id)) != nullptr) + #endif + #else + #ifdef CHUNK_STATS while ((chunk = context->get_chunk(chunk_id, elapsed_us)) != nullptr) + #else + while ((chunk = context->get_chunk(chunk_id)) != nullptr) + #endif #endif { execute_chunk(chunk_id, chunk->data, chunk->count); @@ -59,13 +56,13 @@ void MemAlignCounter::execute_chunk(uint32_t chunk_id, const MemCountersBusData uint32_t write_byte = 0; for (uint32_t i = 0; i < chunk_size; i++) { - switch (chunk_data[i].flags & 0xFF) { + switch (chunk_data[i].flags & 0x3F) { // 1 byte read - case FLAGS_1_BYTE_READ: + case MOPS_READ_1: read_byte += 1; break; // 2 bytes read - case FLAGS_2_BYTES_READ: + case MOPS_READ_2: if ((chunk_data[i].addr & 0x07) > 6) { full_3 += 1; } else { @@ -73,7 +70,7 @@ void MemAlignCounter::execute_chunk(uint32_t chunk_id, const MemCountersBusData } break; // 4 bytes read - case FLAGS_4_BYTES_READ: + case MOPS_READ_4: if ((chunk_data[i].addr & 0x07) > 4) { full_3 += 1; } else { @@ -81,23 +78,22 @@ void MemAlignCounter::execute_chunk(uint32_t chunk_id, const MemCountersBusData } break; // 8 bytes read - case FLAGS_8_BYTES_READ: + case MOPS_READ_8: if ((chunk_data[i].addr & 0x07) > 0) { full_3 += 1; } // if chunk_data[i].addr & 0x07 == 0 ==> aligned read break; // 1 byte write (clear) - case FLAGS_1_BYTE_CLEAR_WRITE: + case MOPS_CWRITE_1: write_byte += 1; break; - // 1 byte write - case FLAGS_1_BYTE_WRITE: + case MOPS_WRITE_1: full_3 += 1; break; // 2 bytes write - case FLAGS_2_BYTES_WRITE: + case MOPS_WRITE_2: if ((chunk_data[i].addr & 0x07) > 6) { full_5 += 1; } else { @@ -105,7 +101,7 @@ void MemAlignCounter::execute_chunk(uint32_t chunk_id, const MemCountersBusData } break; // 4 bytes write - case FLAGS_4_BYTES_WRITE: + case MOPS_WRITE_4: if ((chunk_data[i].addr & 0x07) > 4) { full_5 += 1; } else { @@ -113,12 +109,48 @@ void MemAlignCounter::execute_chunk(uint32_t chunk_id, const MemCountersBusData } break; // 8 bytes write - case FLAGS_8_BYTES_WRITE: + case MOPS_WRITE_8: if ((chunk_data[i].addr & 0x07) > 0) { full_5 += 1; } // if chunk_data[i].addr & 0x07 == 0 ==> aligned write break; + case MOPS_BLOCK_READ + 0x00: + case MOPS_BLOCK_READ + 0x10: + case MOPS_BLOCK_READ + 0x20: + case MOPS_BLOCK_READ + 0x30: + if ((chunk_data[i].addr & 0x07) > 0) { + const uint32_t count = chunk_data[i].flags >> MOPS_BLOCK_COUNT_SBITS; + full_5 += count; + } + break; + case MOPS_BLOCK_WRITE + 0x00: + case MOPS_BLOCK_WRITE + 0x10: + case MOPS_BLOCK_WRITE + 0x20: + case MOPS_BLOCK_WRITE + 0x30: + if ((chunk_data[i].addr & 0x07) > 0) { + const uint32_t count = chunk_data[i].flags >> MOPS_BLOCK_COUNT_SBITS; + full_5 += count; + } + break; + + case MOPS_ALIGNED_READ + 0x00: + case MOPS_ALIGNED_READ + 0x10: + case MOPS_ALIGNED_READ + 0x20: + case MOPS_ALIGNED_READ + 0x30: + case MOPS_ALIGNED_WRITE + 0x00: + case MOPS_ALIGNED_WRITE + 0x10: + case MOPS_ALIGNED_WRITE + 0x20: + case MOPS_ALIGNED_WRITE + 0x30: + case MOPS_ALIGNED_BLOCK_READ + 0x00: + case MOPS_ALIGNED_BLOCK_READ + 0x10: + case MOPS_ALIGNED_BLOCK_READ + 0x20: + case MOPS_ALIGNED_BLOCK_READ + 0x30: + case MOPS_ALIGNED_BLOCK_WRITE + 0x00: + case MOPS_ALIGNED_BLOCK_WRITE + 0x10: + case MOPS_ALIGNED_BLOCK_WRITE + 0x20: + case MOPS_ALIGNED_BLOCK_WRITE + 0x30: + break; default: printf("MemAlignCounter: Unknown flags: 0x%X\n", chunk_data[i].flags); assert(false && "Unknown flags in MemAlignCounter"); diff --git a/state-machines/mem-cpp/cpp/mem_config.hpp b/state-machines/mem-cpp/cpp/mem_config.hpp index 9e60f471b..135c8b200 100644 --- a/state-machines/mem-cpp/cpp/mem_config.hpp +++ b/state-machines/mem-cpp/cpp/mem_config.hpp @@ -2,9 +2,13 @@ #define __MEM_CONFIG_HPP__ #define ROM_ADDR 0x80000000 -#define INPUT_ADDR 0x90000000 +#define INPUT_ADDR 0x40000000 #define RAM_ADDR 0xA0000000 +#define ROM_SIZE_MB 128 +#define INPUT_SIZE_MB 1024 +#define RAM_SIZE_MB 512 + #define CHUNK_SIZE_BITS 18 #define CHUNK_SIZE (1 << CHUNK_SIZE_BITS) #define MAX_LOCATORS 2048 @@ -28,7 +32,7 @@ #define ROM_ROWS (1 << 21) #define INPUT_ROWS (1 << 21) #define MEM_ROWS (1 << 22) -#define MAX_CHUNKS 8192 // 2^13 * 2^18 = 2^31 +#define MAX_CHUNKS (1 << 18) // 2^36 / 2^18 = 2^18 // THREAD_BITS >= 1 #define THREAD_BITS 2 @@ -36,7 +40,7 @@ #define MAX_THREADS (1 << THREAD_BITS) #define ADDR_MASK ((MAX_THREADS - 1) * 8) -#define MAX_PAGES 12 +#define MAX_PAGES 26 #define ADDR_PAGE_BITS (23 - THREAD_BITS) #define ADDR_PAGE_SIZE (1 << ADDR_PAGE_BITS) #define RELATIVE_OFFSET_MASK (ADDR_PAGE_SIZE - 1) @@ -44,10 +48,13 @@ #define OFFSET_BITS (25 + 4 - THREAD_BITS) // 4 bits (3 bits for 6 pages, 1 bit security) #define OFFSET_PAGE_SHIFT_BITS (OFFSET_BITS - 3) +#define MAX_SLOT_GB 6 #define ADDR_SLOT_BITS 5 +#define ADDR_SLOT_BYTES 4 #define ADDR_SLOT_SIZE (1 << ADDR_SLOT_BITS) #define ADDR_SLOT_MASK (0xFFFFFFFF << ADDR_SLOT_BITS) -#define ADDR_SLOTS ((1024 * 1024 * 32) / MAX_THREADS) +#define ADDR_TOTAL_SLOTS ((((size_t)MAX_SLOT_GB) << 30) / (ADDR_SLOT_SIZE * ADDR_SLOT_BYTES)) +#define ADDR_SLOTS (ADDR_TOTAL_SLOTS / MAX_THREADS) #define ADDR_SLOTS_SIZE (ADDR_SLOT_SIZE * ADDR_SLOTS) #define TIME_US_BY_CHUNK 173 @@ -55,7 +62,41 @@ #define NO_CHUNK_ID 0xFFFFFFFF #define EMPTY_PAGE 0xFFFFFFFF -#define MEM_WRITE_FLAG 0x10 -#define MEM_WRITE_BYTE_CLEAR_FLAG 0x20 + +// SINGLE WRITE FLAGS +// bits +// bytes(4) 0-3 (values 1,2,4,8) +// write_flag (1) 4 +// clear_flag (1) 8 + +// ALIGNED WRITE BLOCKS FLAGS +// bits +// bytes(4) 0-3 (14 read block/15 write block) +// word_count(28) 4-31 2^28 * 2^3 = 2^31 bytes = 2GB MAX_MEMCPY_SIZE + + +#define MOPS_WRITE_FLAG 0x10 +#define MOPS_WRITE_BYTE_CLEAR_FLAG 0x20 + +#define MOPS_READ_8 0x08 +#define MOPS_READ_4 0x04 +#define MOPS_READ_2 0x02 +#define MOPS_READ_1 0x01 + +#define MOPS_WRITE_8 0x18 +#define MOPS_WRITE_4 0x14 +#define MOPS_WRITE_2 0x12 +#define MOPS_WRITE_1 0x11 + +#define MOPS_CWRITE_1 0x31 + +#define MOPS_BLOCK_READ 0x0A +#define MOPS_BLOCK_WRITE 0x0B +#define MOPS_ALIGNED_READ 0x0C +#define MOPS_ALIGNED_WRITE 0x0D +#define MOPS_ALIGNED_BLOCK_READ 0x0E +#define MOPS_ALIGNED_BLOCK_WRITE 0x0F + +#define MOPS_BLOCK_COUNT_SBITS 4 #endif diff --git a/state-machines/mem-cpp/cpp/mem_context.cpp b/state-machines/mem-cpp/cpp/mem_context.cpp index dbce22d8b..2bbb03c55 100644 --- a/state-machines/mem-cpp/cpp/mem_context.cpp +++ b/state-machines/mem-cpp/cpp/mem_context.cpp @@ -23,9 +23,14 @@ void MemContext::clear () { } #ifdef MEM_CONTEXT_SEM +#ifdef CHUNK_STATS const MemChunk *MemContext::get_chunk(uint32_t thread_id, uint32_t chunk_id, int64_t &elapsed_us) { +#else +const MemChunk *MemContext::get_chunk(uint32_t thread_id, uint32_t chunk_id) { +#endif + #ifdef CHUNK_STATS uint64_t t_ini = get_usec(); - + #endif // semaphore used for synchronization, means that a new chunk data is available while (sem_wait(&semaphores[thread_id]) < 0) { if (errno != EINTR) { @@ -37,28 +42,30 @@ const MemChunk *MemContext::get_chunk(uint32_t thread_id, uint32_t chunk_id, int #ifdef COUNT_CHUNK_STATS #ifdef CHUNK_STATS elapsed_us = (int64_t)chunks_us[chunk_id] - (int64_t)get_usec(); - #else - elapsed_us = 0; #endif #endif return &chunks[chunk_id]; } if (chunks_completed.load(std::memory_order_acquire)) { + #ifdef CHUNK_STATS elapsed_us = get_usec() - t_ini; + #endif return nullptr; } assert(false); } #else +#ifdef CHUNK_STATS const MemChunk *MemContext::get_chunk(uint32_t chunk_id, int64_t &elapsed_us) { +#else +const MemChunk *MemContext::get_chunk(uint32_t chunk_id) { +#endif if (chunk_id < chunks_count.load(std::memory_order_acquire)) { #ifdef COUNT_CHUNK_STATS #ifdef CHUNK_STATS elapsed_us = (int64_t)chunks_us[chunk_id] - (int64_t)get_usec(); - #else - elapsed_us = 0; #endif #endif return &chunks[chunk_id]; diff --git a/state-machines/mem-cpp/cpp/mem_context.hpp b/state-machines/mem-cpp/cpp/mem_context.hpp index 27681eff0..022554119 100644 --- a/state-machines/mem-cpp/cpp/mem_context.hpp +++ b/state-machines/mem-cpp/cpp/mem_context.hpp @@ -47,9 +47,17 @@ class MemContext { #endif void clear (); #ifdef MEM_CONTEXT_SEM +#ifdef CHUNK_STATS const MemChunk *get_chunk(uint32_t thread_id, uint32_t chunk_id, int64_t &elapsed_us); #else + const MemChunk *get_chunk(uint32_t thread_id, uint32_t chunk_id); +#endif +#else +#ifdef CHUNK_STATS const MemChunk *get_chunk(uint32_t chunk_id, int64_t &elapsed_us); +#else + const MemChunk *get_chunk(uint32_t chunk_id); +#endif #endif MemContext(); ~MemContext(); diff --git a/state-machines/mem-cpp/cpp/mem_count_and_plan.cpp b/state-machines/mem-cpp/cpp/mem_count_and_plan.cpp index 9a710bc3c..ca1902570 100644 --- a/state-machines/mem-cpp/cpp/mem_count_and_plan.cpp +++ b/state-machines/mem-cpp/cpp/mem_count_and_plan.cpp @@ -45,8 +45,9 @@ void MemCountAndPlan::clear() { context->clear(); } void MemCountAndPlan::prepare() { +#ifdef MEM_STATS_ACTIVE uint64_t init = get_usec(); - +#endif // Clear existing workers to avoid memory leaks if prepare() called multiple times for (auto* worker : count_workers) { delete worker; @@ -63,14 +64,16 @@ void MemCountAndPlan::prepare() { mem_align_counter = std::make_unique(context); plan_workers.clear(); plan_workers.reserve(MAX_MEM_PLANNERS); - rom_data_planner = std::make_unique(ROM_ROWS, ROM_ADDR, 128, false); + rom_data_planner = std::make_unique(ROM_ROWS, ROM_ADDR, ROM_SIZE_MB, false); rom_data_planner->set_last_addr(ROM_ADDR - 8); - input_data_planner = std::make_unique(INPUT_ROWS, INPUT_ADDR, 128, false); - quick_mem_planner = std::make_unique(0, RAM_ROWS, RAM_ADDR, 512); + input_data_planner = std::make_unique(INPUT_ROWS, INPUT_ADDR, INPUT_SIZE_MB, false); + quick_mem_planner = std::make_unique(0, RAM_ROWS, RAM_ADDR, RAM_SIZE_MB); for (int i = 0; i < MAX_MEM_PLANNERS; ++i) { - plan_workers.emplace_back(i+1, RAM_ROWS, RAM_ADDR, 512); + plan_workers.emplace_back(i+1, RAM_ROWS, RAM_ADDR, RAM_SIZE_MB); } +#ifdef MEM_STATS_ACTIVE t_prepare_us = get_usec() - init; +#endif } void MemCountAndPlan::add_chunk(MemCountersBusData *chunk_data, uint32_t chunk_size) { @@ -92,7 +95,9 @@ void MemCountAndPlan::count_phase() { clock_gettime(CLOCK_REALTIME, &start_time); #endif // MEM_STATS_ACTIVE +#ifdef MEM_STATS_ACTIVE uint64_t init = t_init_us = get_usec(); +#endif std::vector threads; context->init(); @@ -124,9 +129,9 @@ void MemCountAndPlan::count_phase() { wait_mem_align_counters(); +#ifdef MEM_STATS_ACTIVE t_count_us = (uint32_t) (get_usec() - init); -#ifdef MEM_STATS_ACTIVE // Add stats for count phase struct timespec end_time; clock_gettime(CLOCK_REALTIME, &end_time); @@ -145,9 +150,9 @@ void MemCountAndPlan::plan_phase() { // Get start time for stats struct timespec start_time; clock_gettime(CLOCK_REALTIME, &start_time); -#endif // MEM_STATS_ACTIVE uint64_t init = get_usec(); +#endif // MEM_STATS_ACTIVE std::vector threads; plan_threads.emplace_back([this](){ quick_mem_planner->generate_locators(count_workers, context->locators);}); @@ -163,8 +168,9 @@ void MemCountAndPlan::plan_phase() { for (auto& t : plan_threads) { t.join(); } +#ifdef MEM_STATS_ACTIVE t_plan_us = (uint32_t) (get_usec() - init); - +#endif segments[ROM_ID].clear(); rom_data_planner->collect_segments(segments[ROM_ID]); @@ -332,7 +338,7 @@ void MemCountAndPlan::wait() { void MemCountAndPlan::detach_execute() { count_phase(); plan_phase(); - //stats(); + // stats(); // printf("MemCountAndPlan count(ms):%ld plan(ms):%ld tot(ms):%ld\n", // t_count_us / 1000, t_plan_us / 1000, (t_count_us + t_plan_us) / 1000); } diff --git a/state-machines/mem-cpp/cpp/mem_counter.cpp b/state-machines/mem-cpp/cpp/mem_counter.cpp index cc03b8254..39e4195f2 100644 --- a/state-machines/mem-cpp/cpp/mem_counter.cpp +++ b/state-machines/mem-cpp/cpp/mem_counter.cpp @@ -1,6 +1,9 @@ #include "mem_counter.hpp" +#include +#include #include #include +#include #define ST_INI 0 #define ST_READ 1 @@ -10,6 +13,8 @@ #define ST_READ_TO_WRITE ((ST_WRITE - ST_READ) << ST_BITS_OFFSET) #define ST_X_TO_INI_MASK (0xFFFFFFFF >> (32 - ST_BITS_OFFSET)) +#define ALIGN_MASK 0xFFFF'FFFF'FFFF'FFF8ULL + MemCounter::MemCounter(uint32_t id, std::shared_ptr context) :id(id), context(context), addr_mask(id * 8) { count = 0; @@ -35,15 +40,24 @@ MemCounter::~MemCounter() { } void MemCounter::execute() { +#ifdef COUNT_CHUNK_STATS uint64_t init_us = get_usec(); int64_t elapsed_us = 0; - +#endif const MemChunk *chunk = #ifdef MEM_CONTEXT_SEM +#ifdef CHUNK_STATS context->get_chunk(id, 0, elapsed_us); #else - context->get_chunk(0, elapsed_us); + context->get_chunk(id, 0); +#endif +#else +#ifdef CHUNK_STATS + context->get_chunk(0, elapsed_us); +#else + context->get_chunk(0); +#endif #endif #ifdef COUNT_CHUNK_STATS wait_chunks_us[0] = elapsed_us; @@ -55,15 +69,27 @@ void MemCounter::execute() { chunks_us[0] = get_usec() - start_execute_us; tot_wait_us += elapsed_us > 0 ? elapsed_us : 0; #else + #ifdef CHUNK_STATS tot_wait_us += elapsed_us; #endif + #endif + #ifdef CHUNK_STATS first_chunk_us = get_usec() - init_us; + #endif uint32_t chunk_id = 1; #ifdef MEM_CONTEXT_SEM +#ifdef CHUNK_STATS while ((chunk = context->get_chunk(id, chunk_id, elapsed_us)) != nullptr) #else + while ((chunk = context->get_chunk(id, chunk_id)) != nullptr) +#endif +#else +#ifdef CHUNK_STATS while ((chunk = context->get_chunk(chunk_id, elapsed_us)) != nullptr) +#else + while ((chunk = context->get_chunk(chunk_id)) != nullptr) +#endif #endif { #ifdef COUNT_CHUNK_STATS @@ -75,15 +101,19 @@ void MemCounter::execute() { chunks_us[chunk_id] = get_usec() - start_execute_us; tot_wait_us += elapsed_us > 0 ? elapsed_us : 0; #else + #ifdef CHUNK_STATS tot_wait_us += elapsed_us; #endif + #endif ++chunk_id; } #ifdef COUNT_CHUNK_STATS wait_chunks_us[chunk_id] = elapsed_us; #endif } + #ifdef CHUNK_STATS elapsed_ms = ((get_usec() - init_us) / 1000); + #endif } void MemCounter::execute_chunk(uint32_t chunk_id, const MemCountersBusData *chunk_data, uint32_t chunk_size) { @@ -100,32 +130,140 @@ void MemCounter::execute_chunk(uint32_t chunk_id, const MemCountersBusData *chun const uint8_t bytes = chunk_data->flags & 0x0F; const uint32_t addr = chunk_data->addr; switch (bytes) { - case 1: // byte - case 2: // half word - case 4: // word - case 8: // double word + // byte + case 1: + if ((addr & ADDR_MASK) != addr_mask) { + continue; + } + incr_counter(addr & ALIGN_MASK, chunk_id, false, chunk_data->flags & MOPS_WRITE_FLAG); + break; + + // half word + case 2: + if ((addr & ADDR_MASK) == addr_mask) { + incr_counter(addr & ALIGN_MASK, chunk_id, false, chunk_data->flags & MOPS_WRITE_FLAG); + } + else if (((addr + 1) & ADDR_MASK) == addr_mask) { + incr_counter((addr & ALIGN_MASK) + 8 , chunk_id, false, chunk_data->flags & MOPS_WRITE_FLAG); + } + break; + + // word + case 4: + if ((addr & ADDR_MASK) == addr_mask) { + incr_counter(addr & ALIGN_MASK, chunk_id, false, chunk_data->flags & MOPS_WRITE_FLAG); + } + else if (((addr + 3) & ADDR_MASK) == addr_mask) { + incr_counter((addr & ALIGN_MASK) + 8, chunk_id, false, chunk_data->flags & MOPS_WRITE_FLAG); + } break; + + // double word + case 8: + if ((addr & 0x07) == 0) { + // aligned access + if ((addr & ADDR_MASK) != addr_mask) { + continue; + } + incr_counter(addr, chunk_id, true, chunk_data->flags & MOPS_WRITE_FLAG); + } else { + const uint32_t aligned_addr = addr & ALIGN_MASK; + + if ((aligned_addr & ADDR_MASK) == addr_mask) { + incr_counter(aligned_addr, chunk_id, false, chunk_data->flags & MOPS_WRITE_FLAG); + } + else if (((aligned_addr + 7) & ADDR_MASK) == addr_mask) { + incr_counter(aligned_addr + 8 , chunk_id, false, chunk_data->flags & MOPS_WRITE_FLAG); + } + } + break; + + case MOPS_ALIGNED_READ: { + assert((addr & 0x07) == 0); + if ((addr & ADDR_MASK) == addr_mask) { + incr_counter(addr , chunk_id, true, false); + } + break; + } + + case MOPS_ALIGNED_WRITE: { + assert((addr & 0x07) == 0); + if ((addr & ADDR_MASK) == addr_mask) { + incr_counter(addr , chunk_id, true, true); + } + break; + } + + case MOPS_BLOCK_READ: + case MOPS_BLOCK_WRITE: { + bool write = bytes == MOPS_BLOCK_WRITE; + const uint32_t count = chunk_data->flags >> MOPS_BLOCK_COUNT_SBITS; + if ((addr & 0x07) == 0) { + uint32_t to_addr = addr + count * 8; + uint32_t c_addr = (addr & ~ADDR_MASK) + addr_mask; + if (c_addr < addr) { + c_addr += (MAX_THREADS * 8); + } + while (c_addr < to_addr) { + incr_counter(c_addr , chunk_id, true, write); + c_addr += (MAX_THREADS * 8); + } + } else { + // increase range, because if width = 8 and not aligned means + // each access is double, addr and addr + 8 + const uint32_t from_addr = (addr & ~0x07); + const uint32_t to_addr = from_addr + (count + 1) * 8; + uint32_t c_addr = (from_addr & ~ADDR_MASK) + addr_mask; + if (c_addr < from_addr) { + c_addr += (MAX_THREADS * 8); + } + while (c_addr < to_addr) { + incr_counter(c_addr , chunk_id, false, write); + c_addr += (MAX_THREADS * 8); + } + } + break; + } + + case MOPS_ALIGNED_BLOCK_READ: + case MOPS_ALIGNED_BLOCK_WRITE: { + assert((addr & 0x07) == 0); + bool write = bytes == MOPS_ALIGNED_BLOCK_WRITE; + uint32_t count = chunk_data->flags >> 4; + uint32_t to_addr = addr + count * 8; + uint32_t c_addr = (addr & ~ADDR_MASK) + addr_mask; + if (c_addr < addr) { + c_addr += (MAX_THREADS * 8); + } + while (c_addr < to_addr) { + incr_counter(c_addr , chunk_id, true, write); + c_addr += (MAX_THREADS * 8); + } + break; + } + + default: std::ostringstream msg; msg << "ERROR: MemCounter execute_chunk: invalid bytes size " << bytes << " at chunk_id " << chunk_id << " addr 0x" << std::hex << addr; throw std::runtime_error(msg.str()); } - if (bytes == 8 && (addr & 0x07) == 0) { - // aligned access - if ((addr & ADDR_MASK) != addr_mask) { - continue; - } - incr_counter(addr, chunk_id, true, chunk_data->flags & MEM_WRITE_FLAG); - } else { - const uint32_t aligned_addr = addr & 0xFFFFFFF8; + // if (bytes == 8 && (addr & 0x07) == 0) { + // // aligned access + // if ((addr & ADDR_MASK) != addr_mask) { + // continue; + // } + // incr_counter(addr, chunk_id, true, chunk_data->flags & MEM_WRITE_FLAG); + // } else { + // const uint32_t aligned_addr = addr & 0xFFFFFFF8; - if ((aligned_addr & ADDR_MASK) == addr_mask) { - incr_counter(aligned_addr, chunk_id, false, chunk_data->flags & MEM_WRITE_FLAG); - } - else if ((bytes + (addr & 0x07)) > 8 && ((aligned_addr + 8) & ADDR_MASK) == addr_mask) { - incr_counter(aligned_addr + 8 , chunk_id, false, chunk_data->flags & MEM_WRITE_FLAG); - } - } + // if ((aligned_addr & ADDR_MASK) == addr_mask) { + // incr_counter(aligned_addr, chunk_id, false, chunk_data->flags & MEM_WRITE_FLAG); + // } + // else if ((bytes + (addr & 0x07)) > 8 && ((aligned_addr + 8) & ADDR_MASK) == addr_mask) { + // incr_counter(aligned_addr + 8 , chunk_id, false, chunk_data->flags & MEM_WRITE_FLAG); + // } + // } } #ifdef MEM_STATS_ACTIVE diff --git a/state-machines/mem-cpp/cpp/mem_counter.hpp b/state-machines/mem-cpp/cpp/mem_counter.hpp index 075c0eb2c..9193a1a85 100644 --- a/state-machines/mem-cpp/cpp/mem_counter.hpp +++ b/state-machines/mem-cpp/cpp/mem_counter.hpp @@ -190,7 +190,7 @@ uint32_t MemCounter::get_count_table(uint32_t index) const { uint32_t MemCounter::get_next_slot_pos() { if (free_slot >= ADDR_SLOTS) { std::ostringstream msg; - msg << "ERROR: MemCounter no more free slots on thread" << id; + msg << "ERROR: MemCounter: no free slots left for this thread(" << id << "). Increase MAX_SLOT_GB in state-machines/mem-cpp/cpp/mem_config.hpp and recompile zisk."; throw std::runtime_error(msg.str()); } return (free_slot++) * ADDR_SLOT_SIZE; @@ -221,18 +221,32 @@ uint32_t MemCounter::addr_to_offset(uint32_t addr, uint32_t chunk_id) { // ROM: 128 MB case (ROM_ADDR_MASK + 0x00): return ((addr - (ROM_ADDR + 0x00000000)) >> (ADDR_LOW_BITS)); case (ROM_ADDR_MASK + 0x04): return ((addr - (ROM_ADDR + 0x04000000)) >> (ADDR_LOW_BITS)) + ADDR_PAGE_SIZE; - // INPUT: 128 MB + // INPUT: 1024 MB case (INPUT_ADDR_MASK + 0x00): return ((addr - (INPUT_ADDR + 0x00000000)) >> (ADDR_LOW_BITS)) + 2 * ADDR_PAGE_SIZE; case (INPUT_ADDR_MASK + 0x04): return ((addr - (INPUT_ADDR + 0x04000000)) >> (ADDR_LOW_BITS)) + 3 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x08): return ((addr - (INPUT_ADDR + 0x08000000)) >> (ADDR_LOW_BITS)) + 4 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x0C): return ((addr - (INPUT_ADDR + 0x0C000000)) >> (ADDR_LOW_BITS)) + 5 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x10): return ((addr - (INPUT_ADDR + 0x10000000)) >> (ADDR_LOW_BITS)) + 6 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x14): return ((addr - (INPUT_ADDR + 0x14000000)) >> (ADDR_LOW_BITS)) + 7 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x18): return ((addr - (INPUT_ADDR + 0x18000000)) >> (ADDR_LOW_BITS)) + 8 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x1C): return ((addr - (INPUT_ADDR + 0x1C000000)) >> (ADDR_LOW_BITS)) + 9 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x20): return ((addr - (INPUT_ADDR + 0x20000000)) >> (ADDR_LOW_BITS)) + 10 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x24): return ((addr - (INPUT_ADDR + 0x24000000)) >> (ADDR_LOW_BITS)) + 11 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x28): return ((addr - (INPUT_ADDR + 0x28000000)) >> (ADDR_LOW_BITS)) + 12 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x2C): return ((addr - (INPUT_ADDR + 0x2C000000)) >> (ADDR_LOW_BITS)) + 13 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x30): return ((addr - (INPUT_ADDR + 0x30000000)) >> (ADDR_LOW_BITS)) + 14 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x34): return ((addr - (INPUT_ADDR + 0x34000000)) >> (ADDR_LOW_BITS)) + 15 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x38): return ((addr - (INPUT_ADDR + 0x38000000)) >> (ADDR_LOW_BITS)) + 16 * ADDR_PAGE_SIZE; + case (INPUT_ADDR_MASK + 0x3C): return ((addr - (INPUT_ADDR + 0x3C000000)) >> (ADDR_LOW_BITS)) + 17 * ADDR_PAGE_SIZE; // RAM: 512 MB - case (RAM_ADDR_MASK + 0x00): return ((addr - (RAM_ADDR + 0x00000000)) >> (ADDR_LOW_BITS)) + 4 * ADDR_PAGE_SIZE; - case (RAM_ADDR_MASK + 0x04): return ((addr - (RAM_ADDR + 0x04000000)) >> (ADDR_LOW_BITS)) + 5 * ADDR_PAGE_SIZE; - case (RAM_ADDR_MASK + 0x08): return ((addr - (RAM_ADDR + 0x08000000)) >> (ADDR_LOW_BITS)) + 6 * ADDR_PAGE_SIZE; - case (RAM_ADDR_MASK + 0x0C): return ((addr - (RAM_ADDR + 0x0C000000)) >> (ADDR_LOW_BITS)) + 7 * ADDR_PAGE_SIZE; - case (RAM_ADDR_MASK + 0x10): return ((addr - (RAM_ADDR + 0x10000000)) >> (ADDR_LOW_BITS)) + 8 * ADDR_PAGE_SIZE; - case (RAM_ADDR_MASK + 0x14): return ((addr - (RAM_ADDR + 0x14000000)) >> (ADDR_LOW_BITS)) + 9 * ADDR_PAGE_SIZE; - case (RAM_ADDR_MASK + 0x18): return ((addr - (RAM_ADDR + 0x18000000)) >> (ADDR_LOW_BITS)) + 10 * ADDR_PAGE_SIZE; - case (RAM_ADDR_MASK + 0x1C): return ((addr - (RAM_ADDR + 0x1C000000)) >> (ADDR_LOW_BITS)) + 11 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x00): return ((addr - (RAM_ADDR + 0x00000000)) >> (ADDR_LOW_BITS)) + 18 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x04): return ((addr - (RAM_ADDR + 0x04000000)) >> (ADDR_LOW_BITS)) + 19 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x08): return ((addr - (RAM_ADDR + 0x08000000)) >> (ADDR_LOW_BITS)) + 20 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x0C): return ((addr - (RAM_ADDR + 0x0C000000)) >> (ADDR_LOW_BITS)) + 21 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x10): return ((addr - (RAM_ADDR + 0x10000000)) >> (ADDR_LOW_BITS)) + 22 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x14): return ((addr - (RAM_ADDR + 0x14000000)) >> (ADDR_LOW_BITS)) + 23 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x18): return ((addr - (RAM_ADDR + 0x18000000)) >> (ADDR_LOW_BITS)) + 24 * ADDR_PAGE_SIZE; + case (RAM_ADDR_MASK + 0x1C): return ((addr - (RAM_ADDR + 0x1C000000)) >> (ADDR_LOW_BITS)) + 25 * ADDR_PAGE_SIZE; } std::ostringstream msg; msg << "ERROR: addr_to_offset: 0x" << std::hex << addr << " (" << std::dec << chunk_id << ")"; @@ -244,18 +258,32 @@ uint32_t MemCounter::addr_to_page(uint32_t addr, uint32_t chunk_id) { // ROM: 128 MB case (ROM_ADDR_MASK + 0x00): return 0; case (ROM_ADDR_MASK + 0x04): return 1; - // INPUT: 128 MB + // INPUT: 1024 MB case (INPUT_ADDR_MASK + 0x00): return 2; case (INPUT_ADDR_MASK + 0x04): return 3; + case (INPUT_ADDR_MASK + 0x08): return 4; + case (INPUT_ADDR_MASK + 0x0C): return 5; + case (INPUT_ADDR_MASK + 0x10): return 6; + case (INPUT_ADDR_MASK + 0x14): return 7; + case (INPUT_ADDR_MASK + 0x18): return 8; + case (INPUT_ADDR_MASK + 0x1C): return 9; + case (INPUT_ADDR_MASK + 0x20): return 10; + case (INPUT_ADDR_MASK + 0x24): return 11; + case (INPUT_ADDR_MASK + 0x28): return 12; + case (INPUT_ADDR_MASK + 0x2C): return 13; + case (INPUT_ADDR_MASK + 0x30): return 14; + case (INPUT_ADDR_MASK + 0x34): return 15; + case (INPUT_ADDR_MASK + 0x38): return 16; + case (INPUT_ADDR_MASK + 0x3C): return 17; // RAM: 512 MB - case (RAM_ADDR_MASK + 0x00): return 4; - case (RAM_ADDR_MASK + 0x04): return 5; - case (RAM_ADDR_MASK + 0x08): return 6; - case (RAM_ADDR_MASK + 0x0C): return 7; - case (RAM_ADDR_MASK + 0x10): return 8; - case (RAM_ADDR_MASK + 0x14): return 9; - case (RAM_ADDR_MASK + 0x18): return 10; - case (RAM_ADDR_MASK + 0x1C): return 11; + case (RAM_ADDR_MASK + 0x00): return 18; + case (RAM_ADDR_MASK + 0x04): return 19; + case (RAM_ADDR_MASK + 0x08): return 20; + case (RAM_ADDR_MASK + 0x0C): return 21; + case (RAM_ADDR_MASK + 0x10): return 22; + case (RAM_ADDR_MASK + 0x14): return 23; + case (RAM_ADDR_MASK + 0x18): return 24; + case (RAM_ADDR_MASK + 0x1C): return 25; } std::ostringstream msg; msg << "ERROR: addr_to_page: 0x" << std::hex << addr << " (" << std::dec << chunk_id << ")"; @@ -267,18 +295,32 @@ uint32_t MemCounter::page_to_addr(uint8_t page) { // ROM: 128 MB case 0: return (ROM_ADDR + 0x00000000); case 1: return (ROM_ADDR + 0x04000000); - // INPUT: 128 MB + // INPUT: 1024 MB case 2: return (INPUT_ADDR + 0x00000000); case 3: return (INPUT_ADDR + 0x04000000); + case 4: return (INPUT_ADDR + 0x08000000); + case 5: return (INPUT_ADDR + 0x0C000000); + case 6: return (INPUT_ADDR + 0x10000000); + case 7: return (INPUT_ADDR + 0x14000000); + case 8: return (INPUT_ADDR + 0x18000000); + case 9: return (INPUT_ADDR + 0x1C000000); + case 10: return (INPUT_ADDR + 0x20000000); + case 11: return (INPUT_ADDR + 0x24000000); + case 12: return (INPUT_ADDR + 0x28000000); + case 13: return (INPUT_ADDR + 0x2C000000); + case 14: return (INPUT_ADDR + 0x30000000); + case 15: return (INPUT_ADDR + 0x34000000); + case 16: return (INPUT_ADDR + 0x38000000); + case 17: return (INPUT_ADDR + 0x3C000000); // RAM: 512 MB - case 4: return (RAM_ADDR + 0x00000000); - case 5: return (RAM_ADDR + 0x04000000); - case 6: return (RAM_ADDR + 0x08000000); - case 7: return (RAM_ADDR + 0x0C000000); - case 8: return (RAM_ADDR + 0x10000000); - case 9: return (RAM_ADDR + 0x14000000); - case 10: return (RAM_ADDR + 0x18000000); - case 11: return (RAM_ADDR + 0x1C000000); + case 18: return (RAM_ADDR + 0x00000000); + case 19: return (RAM_ADDR + 0x04000000); + case 20: return (RAM_ADDR + 0x08000000); + case 21: return (RAM_ADDR + 0x0C000000); + case 22: return (RAM_ADDR + 0x10000000); + case 23: return (RAM_ADDR + 0x14000000); + case 24: return (RAM_ADDR + 0x18000000); + case 25: return (RAM_ADDR + 0x1C000000); case 0xFF: return 0xFFFFFFFF; } std::ostringstream msg; diff --git a/state-machines/mem-cpp/cpp/mem_planner.cpp b/state-machines/mem-cpp/cpp/mem_planner.cpp index 18a9f06a0..5f3f1b6ac 100644 --- a/state-machines/mem-cpp/cpp/mem_planner.cpp +++ b/state-machines/mem-cpp/cpp/mem_planner.cpp @@ -67,7 +67,9 @@ const MemLocator *MemPlanner::get_next_locator(MemLocators &locators, uint32_t & } void MemPlanner::execute_from_locators(const std::vector &workers, MemLocators &locators, MemSegments &segments) { + #ifdef MEM_PLANNER_STATS uint64_t init = get_usec(); + #endif const MemLocator *locator; uint32_t segment_id = 0; while (true) { @@ -78,7 +80,9 @@ void MemPlanner::execute_from_locators(const std::vector &workers, segments.set(segment_id, current_segment); current_segment = nullptr; } + #ifdef MEM_PLANNER_STATS elapsed = get_usec() - init; + #endif } void MemPlanner::execute_from_locator(const std::vector &workers, uint32_t segment_id, const MemLocator *locator) { @@ -153,7 +157,9 @@ void MemPlanner::update_segment_stats(uint32_t addr_count, uint32_t offset_count #endif void MemPlanner::generate_locators(const std::vector &workers, MemLocators &locators) { + #ifdef MEM_PLANNER_STATS uint64_t init = get_usec(); + #endif rows_available = rows; uint32_t count; uint32_t offset, max_offset; @@ -203,7 +209,9 @@ void MemPlanner::generate_locators(const std::vector &workers, Mem } } locators.set_completed(); + #ifdef MEM_PLANNER_STATS elapsed = get_usec() - init; + #endif } void MemPlanner::get_offset_limits(const std::vector &workers, uint32_t page, uint32_t &first_offset, uint32_t &last_offset) { diff --git a/state-machines/mem-cpp/cpp/mem_test.hpp b/state-machines/mem-cpp/cpp/mem_test.hpp index 0e9968162..1a2250815 100644 --- a/state-machines/mem-cpp/cpp/mem_test.hpp +++ b/state-machines/mem-cpp/cpp/mem_test.hpp @@ -70,7 +70,7 @@ class MemTest { // if (chunk_id == 999999) { for (int32_t i = 0; i < chunk_size; ++i) { const uint32_t addr = chunk_data[i].addr; - if (addr < 0x80000000 || addr >= 0x90000000) continue; + if (addr < 0x40000000 || addr >= 0x80000000) continue; const uint8_t bytes = chunk_data[i].flags & 0xFF; const uint8_t is_write = chunk_data[i].flags >> 16; if (addr == 0x80000000) printf("=================> ADDR 0x80000000\n"); diff --git a/state-machines/mem-cpp/cpp/tools.hpp b/state-machines/mem-cpp/cpp/tools.hpp index be1313c72..af166d9ac 100644 --- a/state-machines/mem-cpp/cpp/tools.hpp +++ b/state-machines/mem-cpp/cpp/tools.hpp @@ -60,7 +60,7 @@ inline uint32_t count_operations(MemCountersBusData *chunk_data, int count) { for (int i = 0; i < count; ++i) { const uint32_t bytes = chunk_data[i].flags & 0x0F; const uint32_t offset = chunk_data[i].addr & 0x07; - const bool wr = (chunk_data[i].flags & MEM_WRITE_FLAG) != 0; + const bool wr = (chunk_data[i].flags & MOPS_WRITE_FLAG) != 0; if (offset == 0 && bytes == 8) { cops = 1; } else if (offset + bytes > 8) { diff --git a/state-machines/mem/Cargo.toml b/state-machines/mem/Cargo.toml index 75169f396..727b6c3d8 100644 --- a/state-machines/mem/Cargo.toml +++ b/state-machines/mem/Cargo.toml @@ -15,10 +15,8 @@ path = "src/mem_sim.rs" zisk-core = { workspace = true } zisk-common = { workspace = true } zisk-pil = { workspace = true } -mem-planner-cpp = { workspace = true } mem-common = { workspace = true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -34,11 +32,8 @@ num-traits = "0.2" [features] default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed"] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] +gpu = ["packed"] +packed = [] debug_mem = [] debug_mem_align = [] save_mem_bus_data = [] diff --git a/state-machines/mem/pil/dual_byte.pil b/state-machines/mem/pil/dual_byte.pil deleted file mode 100644 index 693a651b5..000000000 --- a/state-machines/mem/pil/dual_byte.pil +++ /dev/null @@ -1,12 +0,0 @@ -require "std_lookup.pil" -require "opids.pil" - -airtemplate DualByte(const int N = 2**16) { - - col fixed BYTE_A = [0:256..255:256]...; - col fixed BYTE_B = [0..255]...; - - col witness multiplicity; - - lookup_proves(DUAL_BYTE_TABLE_ID, mul: multiplicity, expressions: [BYTE_A, BYTE_B]); -} \ No newline at end of file diff --git a/state-machines/mem/pil/mem.pil b/state-machines/mem/pil/mem.pil index 429d10a7e..0c142b4f4 100644 --- a/state-machines/mem/pil/mem.pil +++ b/state-machines/mem/pil/mem.pil @@ -80,7 +80,7 @@ const int MAX_RANGE_CHECK_CHUNK = 2**16; airtemplate Mem(const int N = 2**21, const int id = MEMORY_ID, const int RC = 2, const int bytes = 8, const int base_address = 0, const int size_mb = 128, int immutable = 0, - const int free_input_mem = 0, const expr enable_flag = 1, const int use_predefined_ranges = 0, + const int free_input_mem = 0, const expr enable_flag = 1, const int large_mem = 0, const int dual_mem = 0, const int continuous_addresses = 0) { col fixed SEGMENT_L1 = [1,0...]; @@ -107,7 +107,7 @@ airtemplate Mem(const int N = 2**21, const int id = MEMORY_ID, const int RC = 2, is_first_segment * segment_id === 0; col witness bits(29) addr; - col witness bits(40) step; + col witness bits(MEM_STEP_BITS) step; col witness bits(1) sel; col witness bits(1) addr_changes; @@ -119,7 +119,7 @@ airtemplate Mem(const int N = 2**21, const int id = MEMORY_ID, const int RC = 2, // Two columns, one for step (timestap) of second operation, and other // to enable dual operation in the row. - col witness bits(40) air.step_dual; + col witness bits(MEM_STEP_BITS) air.step_dual; col witness bits(1) air.sel_dual; sel_dual * (1 - sel_dual) === 0; @@ -149,8 +149,8 @@ airtemplate Mem(const int N = 2**21, const int id = MEMORY_ID, const int RC = 2, value[index] = value_word[index*2] + MAX_RANGE_CHECK_CHUNK * value_word[index*2 + 1]; // how value is a free-input, must be checked that it's 32-bit well formed value - range_check(value_word[index*2], 0, MAX_RANGE_CHECK_CHUNK - 1, predefined: use_predefined_ranges); - range_check(value_word[index*2+1], 0, MAX_RANGE_CHECK_CHUNK - 1, predefined: use_predefined_ranges); + range_check(value_word[index*2], 0, MAX_RANGE_CHECK_CHUNK - 1); + range_check(value_word[index*2+1], 0, MAX_RANGE_CHECK_CHUNK - 1); } } else { col witness bits(32) air.value[RC]; @@ -370,17 +370,19 @@ airtemplate Mem(const int N = 2**21, const int id = MEMORY_ID, const int RC = 2, } const expr delta_step = step - previous_step + (1 - wr); - col witness bits(18) air.increment[2]; - increment[0] + 2**18 * increment[1] + 1 === addr_changes * (delta_addr - delta_step) + delta_step; + col witness bits(22) air.l_increment; + col witness bits(16) air.h_increment; + l_increment + 2**22 * h_increment + 1 === addr_changes * (delta_addr - delta_step) + delta_step; is_first_segment * SEGMENT_L1 * (1 - addr_changes) === 0; - // addr_change == 1 => [0,2^18-1] => 18 + 18 = 36 bits + 3 => 39 bits => 512 GB - // addr_change == 0 => [0,2^18-1] => 18 + 18 = 36 bits => STEP upto 2^34 => 16 GB steps - // 36 bits x 2^22 = 2^58 + 2^32 address = 2^59 secure. + // addr_change == 1 => 22 + 16 = 38 bits + 3 => 41 bits => 2 TB (limit address 32 bits) + // addr_change == 0 => 22 + 16 = 38 bits => 4 slots => 2^36 => 32 GB steps (16K main instances) + // 38 bits x 2^22 = 2^60 + 2^32 address = 2^61 secure. + // using control in the middle of instance, reduce to 2^60 - range_check(expression: increment[0], min: 0, max: 2**18 - 1); - range_check(expression: increment[1], min: 0, max: 2**18 - 1); + range_check(expression: l_increment, min: 0, max: 2**22 - 1); + range_check(expression: h_increment, min: 0, max: 2**16 - 1); // to avoid intermediate column col witness bits(1) air.read_same_addr; @@ -477,6 +479,10 @@ function precompiled_mem_load(int id = MEMORY_ID, expr addr, expr main_step, exp mem_assumes(id, MEMORY_LOAD_OP, addr, main_step_to_precompiled_mem_step(main_step), 8, value, sel, name); } +function precompiled_mem_load_padding(int id = MEMORY_ID, expr padding = 0, int name = PIOP_NAME_DEFAULT) { + mem_proves(id, MEMORY_LOAD_OP, 0, main_step_to_precompiled_mem_step(0), [0, 0], padding, name); +} + function precompiled_mem_store(int id = MEMORY_ID, expr addr, expr main_step, expr value[], expr sel = 1, int name = PIOP_NAME_DEFAULT ) { mem_assumes(id, MEMORY_STORE_OP, addr, main_step_to_precompiled_mem_step(main_step, 1), 8, value, sel, name); } @@ -487,6 +493,12 @@ function reg_pre_load(int id = MEMORY_ID, expr addr, expr prev_mem_step, expr va mem_proves(id, MEMORY_REG_OP, addr, prev_mem_step, value, sel, name: name); } +function precompiled_reg_load(int id = MEMORY_ID, expr reg, expr prev_mem_step, expr main_step, expr value[], expr sel = 1, int name = PIOP_NAME_DEFAULT) { + mem_proves(id, MEMORY_REG_OP, reg, prev_mem_step, value, sel, name: name); + range_check(expression: main_step_to_precompiled_mem_step(main_step) - prev_mem_step, min: 0, max: MAX_RANGE); + mem_assumes(id, MEMORY_REG_OP, reg, main_step_to_precompiled_mem_step(main_step), 8, value, sel, name: name); +} + function reg_pre_store(int id = MEMORY_ID, expr addr, expr prev_mem_step, expr value[], expr sel = 1, int name = PIOP_NAME_DEFAULT) { mem_proves(id, MEMORY_REG_OP, addr, prev_mem_step, value, sel, name); } diff --git a/state-machines/mem/pil/mem_align.pil b/state-machines/mem/pil/mem_align.pil index 650bcfcf1..2d0db39fc 100644 --- a/state-machines/mem/pil/mem_align.pil +++ b/state-machines/mem/pil/mem_align.pil @@ -30,7 +30,7 @@ require "mem_align_rom.pil" +---+---+---+---+ +---+===+===+---+ | 0 | 1 | 2 | 3 | | 4 | 5 | 6 | 7 | +---+---+---+---+ +---+===+===+---+ - |<- v ->| + |<- v ->| [R] In the first clock cycle, we perform an aligned read to w [W] In the second clock cycle, we compute an aligned write of v to w @@ -41,7 +41,7 @@ require "mem_align_rom.pil" +---+---+---+---+ +---+===+===+===+ +===+===+===+===+ +===+---+---+---+ | 0 | 1 | 2 | 3 | | 4 | 5 | 6 | 7 | | 0 | 1 | 2 | 3 | | 4 | 5 | 6 | 7 | +---+---+---+---+ +---+===+===+===+ +===+===+===+===+ +===+---+---+---+ - |<---------------- v ---------------->| + |<---------------- v ---------------->| [R] In the first clock cycle, we perform an aligned read to w1 [V] In the second clock cycle, we return the demanded value v from w1 and w2 @@ -89,7 +89,7 @@ require "mem_align_rom.pil" Notice that it is enough with 8 combinations. */ -airtemplate MemAlign(const int N = 2**10, const int RC = 2, const int CHUNK_NUM = 8, const int CHUNK_BITS = 8, const int use_predefined_ranges = 0) { +airtemplate MemAlign(const int N = 2**10, const int RC = 2, const int CHUNK_NUM = 8, const int CHUNK_BITS = 8) { const int CHUNKS_BY_RC = CHUNK_NUM / RC; col witness bits(29) addr; @@ -111,7 +111,7 @@ airtemplate MemAlign(const int N = 2**10, const int RC = 2, const int CHUNK_NUM // - 'reg == reg in transitions V <- W, W <- R, // in any case, sel_up_to_down,sel_down_to_up are 0 in [V] steps. for (int i = 0; i < CHUNK_NUM; i++) { - range_check(reg[i], 0, 2**CHUNK_BITS-1, predefined: use_predefined_ranges); + range_check(reg[i], 0, 2**CHUNK_BITS-1); (reg[i]' - reg[i]) * sel[i] * sel_up_to_down === 0; ('reg[i] - reg[i]) * sel[i] * sel_down_to_up === 0; diff --git a/state-machines/mem/pil/mem_align_byte.pil b/state-machines/mem/pil/mem_align_byte.pil index 4e2964461..9915376c1 100644 --- a/state-machines/mem/pil/mem_align_byte.pil +++ b/state-machines/mem/pil/mem_align_byte.pil @@ -1,8 +1,7 @@ -require "std_permutation.pil" -require "std_lookup.pil" -require "std_range_check.pil" -require "opids.pil" -require "dual_byte.pil" +require "std_permutation.pil"; +require "std_lookup.pil"; +require "std_range_check.pil"; +require "opids.pil"; // Specific low cost machine for specific byte unaligned memory access. @@ -102,7 +101,7 @@ airtemplate MemAlignByte(const int N = 2**10, const int read = 1, const int writ } range_check(min: 0, max: 0xFFFF, expression: value_16b); - lookup_assumes(DUAL_BYTE_TABLE_ID, [byte_value, value_8b]); + range_dual_byte(byte_value, value_8b); airval padding_size; direct_update_proves(MEMORY_ID, [MEMORY_LOAD_OP, 0, 0, 8, 0, 0], sel: padding_size); diff --git a/state-machines/mem/pil/mem_align_rom.pil b/state-machines/mem/pil/mem_align_rom.pil index b198a5747..a926257dc 100644 --- a/state-machines/mem/pil/mem_align_rom.pil +++ b/state-machines/mem/pil/mem_align_rom.pil @@ -3,21 +3,11 @@ require "opids.pil" const int MEM_ALIGN_ROM_SIZE = P2_8; -airtemplate MemAlignRom(const int N = MEM_ALIGN_ROM_SIZE, const int CHUNK_NUM = 8, const int DEFAULT_OFFSET = 0, const int DEFAULT_WIDTH = 8, const int disable_fixed = 0) { +airtemplate MemAlignRom(const int N = MEM_ALIGN_ROM_SIZE, const int CHUNK_NUM = 8, const int DEFAULT_OFFSET = 0, const int DEFAULT_WIDTH = 8) { if (N < MEM_ALIGN_ROM_SIZE) { error(`N must be at least ${MEM_ALIGN_ROM_SIZE}, but N=${N} was provided`); } - col witness multiplicity; - - if (disable_fixed) { - col fixed _K = [0...]; - multiplicity * _K === 0; - - println("*** DISABLE_FIXED ***"); - return; - } - // Define the size of each sub-program: RV, RWV, RVR, RWVWR const int spsize[4] = [2, 3, 3, 5]; @@ -319,5 +309,6 @@ airtemplate MemAlignRom(const int N = MEM_ALIGN_ROM_SIZE, const int CHUNK_NUM = } // Ensure the program is being followed by the MemAlign + col witness multiplicity; lookup_proves(MEMORY_ALIGN_ROM_ID, [PC, DELTA_PC, DELTA_ADDR, OFFSET, WIDTH, FLAGS], multiplicity); } \ No newline at end of file diff --git a/state-machines/mem/src/input_data_sm.rs b/state-machines/mem/src/input_data_sm.rs index 227aa802c..2f198b511 100644 --- a/state-machines/mem/src/input_data_sm.rs +++ b/state-machines/mem/src/input_data_sm.rs @@ -30,8 +30,8 @@ const _: () = { "INPUT_DATA memory exceeds the 32-bit addressable range" ); assert!( - (MAX_INPUT_SIZE - 1) <= (128 << 20), - "INPUT_DATA is too large. Input size must be <= 128MB" + (MAX_INPUT_SIZE - 1) <= (1024 << 20), + "INPUT_DATA is too large. Input size must be <= 1024MB" ); }; @@ -43,7 +43,7 @@ pub struct InputDataSM { range_id: usize, /// Range check ID for the 16-bit chunks of the input values - range_chunks_id: usize, + range_16bits_id: usize, } #[allow(unused, unused_variables)] @@ -55,7 +55,7 @@ impl InputDataSM { let range_chunks_id = std.get_range_id(0, (1 << 16) - 1, None).expect("Failed to get range ID"); - Arc::new(Self { range_chunks_id, std: std.clone(), range_id }) + Arc::new(Self { range_16bits_id: range_chunks_id, std: std.clone(), range_id }) } fn get_u16_values(&self, value: u64) -> [u16; 4] { [value as u16, (value >> 16) as u16, (value >> 32) as u16, (value >> 48) as u16] @@ -105,17 +105,11 @@ impl MemModule for InputDataSM { num_rows ); - let mut range_check_data: Vec = vec![0; 1 << 16]; - - // range of instance - self.std.range_check( - self.range_id, - (previous_segment.addr - INPUT_DATA_W_ADDR_INIT) as i64, - 1, - ); + let mut range_16bits: Vec = vec![0; 1 << 16]; let mut max_range_distance_count = 0; + let distance_base = previous_segment.addr - INPUT_DATA_W_ADDR_INIT; let mut last_addr: u32 = previous_segment.addr; let mut last_step: u64 = previous_segment.step; let mut last_value: u64 = previous_segment.value; @@ -161,7 +155,7 @@ impl MemModule for InputDataSM { i += 1; } - range_check_data[0] += 4 * internal_reads; + range_16bits[0] += 4 * internal_reads; if incomplete { break; } @@ -175,7 +169,7 @@ impl MemModule for InputDataSM { let value = mem_op.value; let value_words = self.get_u16_values(value); for j in 0..4 { - range_check_data[value_words[j] as usize] += 1; + range_16bits[value_words[j] as usize] += 1; trace[i].set_value_word(j, value_words[j]); } @@ -218,19 +212,19 @@ impl MemModule for InputDataSM { // address doesn't change in padding rows, no range check is required } + let distance_end = INPUT_DATA_W_ADDR_END - last_addr; + self.std.range_check( self.range_id, SEGMENT_ADDR_MAX_RANGE as i64, max_range_distance_count, ); - self.std.range_check(self.range_id, (INPUT_DATA_W_ADDR_END - last_addr) as i64, 1); // range of chunks for j in 0..4 { let value = trace[last_row_idx].get_value_word(j); - range_check_data[value as usize] += padding_size as u32; + range_16bits[value as usize] += padding_size as u32; } - self.std.range_checks(self.range_chunks_id, range_check_data); let mut air_values = InputDataAirValues::::new(); air_values.segment_id = F::from_usize(segment_id.into()); @@ -247,6 +241,22 @@ impl MemModule for InputDataSM { air_values.segment_last_value[0] = F::from_u32(last_value as u32); air_values.segment_last_value[1] = F::from_u32((last_value >> 32) as u32); + let distance_base = [distance_base as u16, (distance_base >> 16) as u16]; + let distance_end = [distance_end as u16, (distance_end >> 16) as u16]; + + air_values.distance_base[0] = F::from_u16(distance_base[0]); + air_values.distance_base[1] = F::from_u16(distance_base[1]); + + air_values.distance_end[0] = F::from_u16(distance_end[0]); + air_values.distance_end[1] = F::from_u16(distance_end[1]); + + range_16bits[distance_base[0] as usize] += 1; + range_16bits[distance_base[1] as usize] += 1; + range_16bits[distance_end[0] as usize] += 1; + range_16bits[distance_end[1] as usize] += 1; + + self.std.range_checks(self.range_16bits_id, range_16bits); + Ok(AirInstance::new_from_trace(FromTrace::new(&mut trace).with_air_values(&mut air_values))) } } diff --git a/state-machines/mem/src/mem.rs b/state-machines/mem/src/mem.rs index 510b02b35..0cc8d0102 100644 --- a/state-machines/mem/src/mem.rs +++ b/state-machines/mem/src/mem.rs @@ -9,7 +9,7 @@ use fields::PrimeField64; use mem_common::MemCounters; use pil_std_lib::Std; use proofman_common::ProofCtx; -use zisk_common::{BusDeviceMetrics, ComponentBuilder, Instance, InstanceCtx, Plan, Planner}; +use zisk_common::{ComponentBuilder, Instance, InstanceCtx, Plan, Planner}; use zisk_pil::{ InputDataTrace, MemAlignByteTrace, MemAlignReadByteTrace, MemAlignTrace, MemAlignWriteByteTrace, MemTrace, RomDataTrace, ZiskProofValues, @@ -46,10 +46,6 @@ impl Mem { } impl ComponentBuilder for Mem { - fn build_counter(&self) -> Option> { - Some(Box::new(MemCounters::new())) - } - fn build_planner(&self) -> Box { Box::new(MemPlanner::new()) } diff --git a/state-machines/mem/src/mem_align_byte_instance.rs b/state-machines/mem/src/mem_align_byte_instance.rs index e7a4fe5f2..e7e66b9a1 100644 --- a/state-machines/mem/src/mem_align_byte_instance.rs +++ b/state-machines/mem/src/mem_align_byte_instance.rs @@ -5,6 +5,7 @@ use crate::mem_align_byte_sm::{MemAlignByteTraceRowType, MemAlignByteTraceType}; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, Instance, InstanceCtx, InstanceType, PayloadType, }; @@ -72,6 +73,10 @@ impl Instance for MemAlignByteInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Memory + } + fn as_any(&self) -> &dyn std::any::Any { self } diff --git a/state-machines/mem/src/mem_align_byte_sm.rs b/state-machines/mem/src/mem_align_byte_sm.rs index 542b0e915..3cb653675 100644 --- a/state-machines/mem/src/mem_align_byte_sm.rs +++ b/state-machines/mem/src/mem_align_byte_sm.rs @@ -9,7 +9,10 @@ use rayon::prelude::*; use crate::MemAlignInput; use proofman_common::{AirInstance, FromTrace, ProofmanResult}; -use zisk_pil::{MemAlignByteAirValues, MemAlignReadByteAirValues, MemAlignWriteByteAirValues}; +use zisk_pil::{ + MemAlignByteAirValues, MemAlignReadByteAirValues, MemAlignWriteByteAirValues, + DUAL_RANGE_BYTE_ID, +}; #[cfg(not(feature = "packed"))] use zisk_pil::{ @@ -356,7 +359,6 @@ impl MemAlignByteRow> const OFFSET_MASK: u32 = 0x07; const OFFSET_BITS: u32 = 3; -const DUAL_BYTE_TABLE_ID: usize = 88; pub struct MemAlignByteSM { /// PIL2 standard library @@ -378,7 +380,7 @@ impl MemAlignByteSM { Arc::new(Self { std: std.clone(), table_dual_byte_id: std - .get_virtual_table_id(DUAL_BYTE_TABLE_ID) + .get_virtual_table_id(DUAL_RANGE_BYTE_ID) .expect("Failed to get dual byte table ID"), table_16b_id: std.get_range_id(0, 0xFFFF, None).expect("Failed to get 16b table ID"), table_8b_id: std.get_range_id(0, 0xFF, None).expect("Failed to get 8b table ID"), diff --git a/state-machines/mem/src/mem_align_collector.rs b/state-machines/mem/src/mem_align_collector.rs index 7e8097629..018902cdb 100644 --- a/state-machines/mem/src/mem_align_collector.rs +++ b/state-machines/mem/src/mem_align_collector.rs @@ -1,10 +1,7 @@ use crate::MemAlignInput; use mem_common::{MemAlignCheckPoint, MemHelpers}; -use std::collections::VecDeque; -use zisk_common::{ - BusDevice, BusId, ChunkId, CollectCounter, MemBusData, MemCollectorInfo, MEM_BUS_ID, -}; +use zisk_common::{BusDevice, BusId, ChunkId, CollectCounter, MemBusData, MEM_BUS_ID}; pub struct MemAlignCollector { /// Collected inputs @@ -65,17 +62,9 @@ impl MemAlignCollector { + self.read_byte.count() + self.write_byte.count() } -} -impl BusDevice for MemAlignCollector { #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == MEM_BUS_ID); let bytes = MemBusData::get_bytes(data); @@ -129,11 +118,9 @@ impl BusDevice for MemAlignCollector { }; true } +} - fn bus_id(&self) -> Vec { - vec![MEM_BUS_ID] - } - +impl BusDevice for MemAlignCollector { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/mem/src/mem_align_instance.rs b/state-machines/mem/src/mem_align_instance.rs index bedd3756d..08193ce44 100644 --- a/state-machines/mem/src/mem_align_instance.rs +++ b/state-machines/mem/src/mem_align_instance.rs @@ -4,6 +4,7 @@ use mem_common::MemAlignCheckPoint; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, Instance, InstanceCtx, InstanceType, PayloadType, }; @@ -64,6 +65,10 @@ impl Instance for MemAlignInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Memory + } + /// Builds an input collector for the instance. /// /// # Arguments diff --git a/state-machines/mem/src/mem_align_read_byte_instance.rs b/state-machines/mem/src/mem_align_read_byte_instance.rs index 597398498..7b6d7b35e 100644 --- a/state-machines/mem/src/mem_align_read_byte_instance.rs +++ b/state-machines/mem/src/mem_align_read_byte_instance.rs @@ -5,6 +5,7 @@ use crate::mem_align_byte_sm::{MemAlignReadByteTraceRowType, MemAlignReadByteTra use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, Instance, InstanceCtx, InstanceType, PayloadType, }; @@ -72,6 +73,10 @@ impl Instance for MemAlignReadByteInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Memory + } + fn as_any(&self) -> &dyn std::any::Any { self } diff --git a/state-machines/mem/src/mem_align_write_byte_instance.rs b/state-machines/mem/src/mem_align_write_byte_instance.rs index eb497c1b9..d55a06a9e 100644 --- a/state-machines/mem/src/mem_align_write_byte_instance.rs +++ b/state-machines/mem/src/mem_align_write_byte_instance.rs @@ -5,6 +5,7 @@ use crate::mem_align_byte_sm::{MemAlignWriteByteTraceRowType, MemAlignWriteByteT use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::{collections::HashMap, sync::Arc}; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, Instance, InstanceCtx, InstanceType, PayloadType, }; @@ -72,6 +73,10 @@ impl Instance for MemAlignWriteByteInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Memory + } + fn as_any(&self) -> &dyn std::any::Any { self } diff --git a/state-machines/mem/src/mem_counters_cursor.rs b/state-machines/mem/src/mem_counters_cursor.rs index fe04e756d..403291f55 100644 --- a/state-machines/mem/src/mem_counters_cursor.rs +++ b/state-machines/mem/src/mem_counters_cursor.rs @@ -81,7 +81,7 @@ impl MemCountersCursor { return sorted_boxes.first().cloned().unwrap_or_default(); } let total_size: usize = sorted_boxes.iter().map(|b| b.len()).sum(); - let target_size: usize = arity * (total_size / sorted_boxes.len()); + let target_size: usize = std::cmp::max(arity * (total_size / sorted_boxes.len()), 1); let mut groups: Vec<&[Vec]> = Vec::new(); let mut group_weight = 0; diff --git a/state-machines/mem/src/mem_module_collector.rs b/state-machines/mem/src/mem_module_collector.rs index c5dc3392e..1575a0e1a 100644 --- a/state-machines/mem/src/mem_module_collector.rs +++ b/state-machines/mem/src/mem_module_collector.rs @@ -1,8 +1,6 @@ -use std::collections::VecDeque; - use crate::{MemInput, MemPreviousSegment}; use mem_common::{MemHelpers, MemModuleCheckPoint, MEM_BYTES, MEM_BYTES_BITS}; -use zisk_common::{BusDevice, BusId, MemBusData, MemCollectorInfo, SegmentId, MEM_BUS_ID}; +use zisk_common::{BusDevice, BusId, MemBusData, SegmentId, MEM_BUS_ID}; #[derive(Debug, PartialEq, Eq)] enum InputAction { @@ -469,20 +467,22 @@ impl MemModuleCollector { } } - pub fn get_mem_collector_info(&self) -> MemCollectorInfo { - MemCollectorInfo { from_addr: self.filter_min_addr, to_addr: self.filter_max_addr } + pub fn skip_addr(&self, addr: u32) -> bool { + if addr > self.filter_max_addr || addr < self.filter_min_addr { + return true; + } + false + } + + pub fn skip_addr_range(&self, addr_from: u32, addr_to: u32) -> bool { + if addr_from > self.filter_max_addr || addr_to < self.filter_min_addr { + return true; + } + false } -} -impl BusDevice for MemModuleCollector { #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == MEM_BUS_ID); let addr = MemBusData::get_addr(data); @@ -492,11 +492,9 @@ impl BusDevice for MemModuleCollector { } true } +} - fn bus_id(&self) -> Vec { - vec![MEM_BUS_ID] - } - +impl BusDevice for MemModuleCollector { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/mem/src/mem_module_instance.rs b/state-machines/mem/src/mem_module_instance.rs index cd572277a..23508ebcd 100644 --- a/state-machines/mem/src/mem_module_instance.rs +++ b/state-machines/mem/src/mem_module_instance.rs @@ -5,6 +5,7 @@ use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use proofman_util::{timer_start_debug, timer_stop_and_log_debug}; use rayon::prelude::*; use std::sync::Arc; +use zisk_common::StatsType; use zisk_common::{ BusDevice, CheckPoint, ChunkId, Instance, InstanceCtx, InstanceType, PayloadType, }; @@ -136,6 +137,10 @@ impl Instance for MemModuleInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Memory + } + fn as_any(&self) -> &dyn std::any::Any { self } diff --git a/state-machines/mem/src/mem_sm.rs b/state-machines/mem/src/mem_sm.rs index 8d001fd75..eb3fab87f 100644 --- a/state-machines/mem/src/mem_sm.rs +++ b/state-machines/mem/src/mem_sm.rs @@ -12,21 +12,15 @@ type MemTraceType = MemTracePacked; #[cfg(not(feature = "packed"))] type MemTraceType = MemTrace; #[cfg(feature = "debug_mem")] -use { - num_bigint::ToBigInt, - std::{ - env, - fs::File, - io::{BufWriter, Write}, - }, +use std::{ + env, + fs::File, + io::{BufWriter, Write}, }; use crate::{MemInput, MemModule}; use fields::PrimeField64; -use mem_common::{ - MemHelpers, MEM_INC_C_BITS, MEM_INC_C_MASK, MEM_INC_C_MAX_RANGE, MEM_INC_C_SIZE, - RAM_W_ADDR_END, RAM_W_ADDR_INIT, -}; +use mem_common::{MemHelpers, RAM_W_ADDR_END, RAM_W_ADDR_INIT}; use pil_std_lib::Std; use proofman_common::{AirInstance, FromTrace, ProofmanResult}; use zisk_core::{RAM_ADDR, RAM_SIZE}; @@ -38,10 +32,8 @@ pub struct MemSM { /// PIL2 standard library std: Arc>, - range_id: usize, - + range_22bits_id: usize, dual_range_id: usize, - range_16bits_id: usize, } #[derive(Debug, Default)] @@ -54,14 +46,14 @@ pub struct MemPreviousSegment { #[allow(unused, unused_variables)] impl MemSM { pub fn new(std: Arc>) -> Arc { - let range_id = - std.get_range_id(0, MEM_INC_C_MAX_RANGE as i64, None).expect("Failed to get range ID"); + let range_22bits_id = + std.get_range_id(0, (1 << 22) - 1, None).expect("Failed to get 22 bits range ID"); let dual_range_id = std.get_range_id(0, DUAL_RANGE_MAX as i64, None).expect("Failed to get dual range ID"); let range_16bits_id = - std.get_range_id(0, 0xFFFF, None).expect("Failed to get 16 bits range ID"); + std.get_range_id(0, (1 << 16) - 1, None).expect("Failed to get 16 bits range ID"); - Arc::new(Self { range_id, dual_range_id, range_16bits_id, std: std.clone() }) + Arc::new(Self { range_22bits_id, dual_range_id, range_16bits_id, std: std.clone() }) } pub fn get_to_addr() -> u32 { @@ -72,17 +64,27 @@ impl MemSM { println!("[MemDebug] writing information {} .....", file_name); let file = File::create(file_name).unwrap(); let mut writer = BufWriter::new(file); - let num_rows = MemTrace::NUM_ROWS; + let num_rows = MemTrace::::NUM_ROWS; for i in 0..num_rows { - let addr = trace[i].addr.as_canonical_biguint().to_bigint().unwrap() * 8; - let step = trace[i].step.as_canonical_biguint().to_bigint().unwrap(); + let addr = trace[i].addr.as_canonical_u64() * 8; + let step = trace[i].step.as_canonical_u64(); + let main_step = MemHelpers::mem_step_to_main_step(step); + let op = if trace[i].wr.is_zero() { 'R' } else { 'W' }; + let values = + [trace[i].value[0].as_canonical_u64(), trace[i].value[1].as_canonical_u64()]; + let value = values[0] | (values[1] << 32); writeln!( writer, - "{:#010X} {} {} {:?}", - addr, trace[i].step, trace[i].wr, trace[i].value + "{i:<8} {addr:#010X} {step:>13} {main_step:>12} {op} {values:?} 0x{value:016X}" ) .unwrap(); + let dual = !trace[i].sel_dual.is_zero(); + if dual { + let step = trace[i].step_dual.as_canonical_u64(); + writeln!(writer, "{i:<8} {addr:#010X} {step:>13} {main_step:>12} R {values:?} 0x{value:016X} DUAL") + .unwrap(); + } } println!("[MemDebug] done"); } @@ -112,7 +114,8 @@ impl MemModule for MemSM { ) -> ProofmanResult> { let mut trace = MemTraceType::::new_from_vec(trace_buffer)?; - let mut range_check_data: Vec = vec![0; MEM_INC_C_SIZE]; + let mut range_22bits: Vec = vec![0; 1 << 22]; + let mut range_16bits: Vec = vec![0; 1 << 16]; // 2^20 * 2 = 2^21 = 2MB let mut dual_partial_range: Vec = vec![0; DUAL_PARTIAL_RANGE_MAX]; @@ -131,13 +134,6 @@ impl MemModule for MemSM { for index in 0..mem_op_count { let mem_op = &mem_ops[index]; step = mem_op.step; - // if step >= 28184622 && step <= 28184624 { - // println!( - // "@@@@@@@@@@ 0x{:08X} {step} 8 OP:{}", - // mem_op.addr * 8, - // if mem_op.is_write { 2 } else { 1 } - // ); - // } let addr_changes = last_addr != mem_op.addr; if dual_candidate { @@ -222,20 +218,20 @@ impl MemModule for MemSM { } else { trace[i].set_read_same_addr(true); } - let lsb_increment = increment & MEM_INC_C_MASK; - let msb_increment = increment >> MEM_INC_C_BITS; - trace[i].set_increment(0, lsb_increment as u32); - trace[i].set_increment(1, msb_increment as u32); + let l_increment = increment & ((1 << 22) - 1); + let h_increment = increment >> 22; + trace[i].set_l_increment(l_increment as u32); + trace[i].set_h_increment(h_increment as u16); trace[i].set_wr(mem_op.is_write); #[cfg(feature = "debug_mem")] - if (lsb_increment >= MEM_INC_C_SIZE) || (msb_increment > MEM_INC_C_SIZE) { + if (l_increment >= (1 << 22)) || (h_increment >= (1 << 16)) { panic!("MemSM: increment's out of range: {} i:{} addr_changes:{} mem_op.addr:0x{:X} last_addr:0x{:X} mem_op.step:{} last_step:{}", increment, i, addr_changes as u8, mem_op.addr, last_addr, mem_op.step, last_step); } - range_check_data[lsb_increment] += 1; - range_check_data[msb_increment] += 1; + range_22bits[l_increment] += 1; + range_16bits[h_increment] += 1; last_addr = mem_op.addr; last_value = mem_op.value; @@ -273,8 +269,8 @@ impl MemModule for MemSM { trace[i].set_value(1, high_value); trace[i].set_addr_changes(false); - trace[i].set_increment(0, 0); - trace[i].set_increment(1, 0); + trace[i].set_h_increment(0); + trace[i].set_l_increment(0); trace[i].set_read_same_addr(true); trace[i].set_sel_dual(false); trace[i].set_step_dual(0); @@ -282,15 +278,14 @@ impl MemModule for MemSM { if padding_size > 0 { // Store the padding range checks - range_check_data[0] += (2 * padding_size) as u32; + range_16bits[0] += padding_size as u32; + range_22bits[0] += padding_size as u32; } // no add extra +1 because index = value - 1 // RAM_W_ADDR_END - last_addr + 1 - 1 = RAM_W_ADDR_END - last_addr let distance_end = RAM_W_ADDR_END - last_addr; - self.std.range_checks(self.range_id, range_check_data); - // Add one in range_check_data_max because it's used by intermediate reads, and reads // add one to distance to allow same step on read operations. @@ -318,10 +313,13 @@ impl MemModule for MemSM { air_values.distance_end[0] = F::from_u16(distance_end[0]); air_values.distance_end[1] = F::from_u16(distance_end[1]); - self.std.range_check(self.range_16bits_id, distance_base[0] as i64, 1); - self.std.range_check(self.range_16bits_id, distance_base[1] as i64, 1); - self.std.range_check(self.range_16bits_id, distance_end[0] as i64, 1); - self.std.range_check(self.range_16bits_id, distance_end[1] as i64, 1); + range_16bits[distance_base[0] as usize] += 1; + range_16bits[distance_base[1] as usize] += 1; + range_16bits[distance_end[0] as usize] += 1; + range_16bits[distance_end[1] as usize] += 1; + + self.std.range_checks(self.range_22bits_id, range_22bits); + self.std.range_checks(self.range_16bits_id, range_16bits); for (value, count) in dual_partial_range.iter().enumerate() { if *count == 0 { diff --git a/state-machines/mem/src/mem_test.rs b/state-machines/mem/src/mem_test.rs index 6cf625d6b..c51332657 100644 --- a/state-machines/mem/src/mem_test.rs +++ b/state-machines/mem/src/mem_test.rs @@ -1,9 +1,9 @@ #![cfg(test)] -use std::{collections::VecDeque, sync::Arc}; +use std::sync::Arc; use crate::{MemModulePlanner, MemModulePlannerConfig, MemPlanCalculator}; use mem_common::{MemCounters, MEMORY_LOAD_OP, MEMORY_STORE_OP}; -use zisk_common::{BusDevice, ChunkId, Plan, MEM_BUS_ID}; +use zisk_common::{ChunkId, Plan, MEM_BUS_ID}; fn generate_test_plans( from_addr: u32, @@ -12,7 +12,7 @@ fn generate_test_plans( ) -> Vec { let addr_index = match from_addr { 0x8000_0000 => 0, - 0x9000_0000 => 1, + 0x4000_0000 => 1, 0xA000_0000 => 2, _ => panic!("invalid addr 0x{from_addr:X}"), }; @@ -44,8 +44,6 @@ fn add_test_aligned_mem_reads( counter.process_data( &MEM_BUS_ID, &[MEMORY_LOAD_OP as u64, addr as u64, step + i * step_delta, 8, value], - &mut VecDeque::new(), - None, ); } } @@ -72,12 +70,7 @@ fn add_mem_data( let mut step = step; let op = if is_write { MEMORY_STORE_OP } else { MEMORY_LOAD_OP } as u64; for i in 0..count { - counter.process_data( - &MEM_BUS_ID, - &[op, addr, step, width, value], - &mut VecDeque::new(), - None, - ); + counter.process_data(&MEM_BUS_ID, &[op, addr, step, width, value]); if config.step_cycle > 0 { if i > 0 && (config.step_cycle % i) == 0 { step += config.step_delta; @@ -104,21 +97,11 @@ fn add_mem_data( // } fn add_mem_read64(counter: &mut MemCounters, addr: u32, step: u64, value: u64) { - counter.process_data( - &MEM_BUS_ID, - &[MEMORY_LOAD_OP as u64, addr as u64, step, 8, value], - &mut VecDeque::new(), - None, - ); + counter.process_data(&MEM_BUS_ID, &[MEMORY_LOAD_OP as u64, addr as u64, step, 8, value]); } fn add_mem_write64(counter: &mut MemCounters, addr: u32, step: u64, value: u64) { - counter.process_data( - &MEM_BUS_ID, - &[MEMORY_STORE_OP as u64, addr as u64, step, 8, value], - &mut VecDeque::new(), - None, - ); + counter.process_data(&MEM_BUS_ID, &[MEMORY_STORE_OP as u64, addr as u64, step, 8, value]); } #[test] @@ -165,43 +148,10 @@ fn test_counters() { add_mem_write64(&mut counter, 0xA000_0000, 80, 0x2222_2222_6666_1111); add_mem_read64(&mut counter, 0xA000_0016, 85, 0x3333_3333_3333_3333); add_mem_read64(&mut counter, 0xA000_0000, 90, 0x2222_2222_6666_1111); // dual => 2 (A000_0002) + 5 rows = 7 - add_mem_data(&mut counter, 10, 0x9000_0000, 10, 0x4041_4243_4445_4647, 8, false, &cfg); + add_mem_data(&mut counter, 10, 0x4000_0000, 10, 0x4041_4243_4445_4647, 8, false, &cfg); counter.close(); - assert_eq!(format!("{counter:?}"), "[MEM_0,#:10 => 0x80000000:2 0x80000008:2 0x80000010:2 0x80000018:2 0x80000020:2 0x80000028:2 0x80000030:2 0x80000038:2 0x80000040:2 0x80000048:2][MEM_1,#:10 => 0x90000000:1 0x90000008:1 0x90000010:1 0x90000018:1 0x90000020:1 0x90000028:1 0x90000030:1 0x90000038:1 0x90000040:1 0x90000048:1][MEM_2,#:4 => 0xA0000000:7 0xA0000008:2 0xA0000010:1 0xA0000018:1]"); + assert_eq!(format!("{counter:?}"), "[MEM_0,#:10 => 0x80000000:2 0x80000008:2 0x80000010:2 0x80000018:2 0x80000020:2 0x80000028:2 0x80000030:2 0x80000038:2 0x80000040:2 0x80000048:2][MEM_1,#:10 => 0x40000000:1 0x40000008:1 0x40000010:1 0x40000018:1 0x40000020:1 0x40000028:1 0x40000030:1 0x40000038:1 0x40000040:1 0x40000048:1][MEM_2,#:4 => 0xA0000000:7 0xA0000008:2 0xA0000010:1 0xA0000018:1]"); } -/* -#[test] -fn test_mem() { - let mem_sm = MemSM::new(); - let std_sm = - - let mem_bus_device = >::build_counter(&mem_sm); - - let mut data_bus = DataBus::::new(); - data_bus.connect_device( - vec![OPERATION_BUS_ID], - Box::new(BusDeviceMetricsWrapper::new(arith_bus_device, false)), - ); - - let data = vec![ - (OPERATION_BUS_ID, OperationBusData::from_values(Mul as u8, Arith as u64, 1, 2).into()), - (OPERATION_BUS_ID, OperationBusData::from_values(Div as u8, Arith as u64, 1, 2).into()), - (OPERATION_BUS_ID, OperationBusData::from_values(Add as u8, Binary as u64, 1, 2).into()), - (OPERATION_BUS_ID, OperationBusData::from_values(Sub as u8, Binary as u64, 1, 2).into()), - ]; - - DataBusPlayer::play(&mut data_bus, data); - - let arith_counter = data_bus.devices.remove(0).inner; - - let arith_planner = - >::build_planner(&arith_sm); - - let plan = arith_planner.plan(vec![(0, arith_counter)]); - - println!("Plan: {:?}", plan); -} -*/ #[test] fn full() {} diff --git a/state-machines/mem/src/rom_data_sm.rs b/state-machines/mem/src/rom_data_sm.rs index 315d120f8..6233588d7 100644 --- a/state-machines/mem/src/rom_data_sm.rs +++ b/state-machines/mem/src/rom_data_sm.rs @@ -65,7 +65,7 @@ impl RomDataSM { pub fn save_to_file(trace: &RomDataTrace, file_name: &str) { let file = File::create(file_name).unwrap(); let mut writer = BufWriter::new(file); - let num_rows = RomDataTrace::NUM_ROWS; + let num_rows = RomDataTrace::::NUM_ROWS; for i in 0..num_rows { let addr = trace[i].get_addr() * 8; diff --git a/state-machines/publics.json b/state-machines/publics.json index 9e12a690d..3f4911c6e 100644 --- a/state-machines/publics.json +++ b/state-machines/publics.json @@ -1,7 +1,8 @@ { "nPublics": 68, "definitions": [ - { "name": "rom_root", "initialPos": 0, "chunks": [4, 64] }, - { "name": "inputs", "initialPos": 4, "chunks": [64, 32] } - ] + { "name": "rom_root", "initialPos": 0, "nValues": 4, "chunks": [1, 64], "verificationKey": true }, + { "name": "inputs", "initialPos": 4, "nValues": 64, "chunks": [1, 32] } + ], + "hasProgramVK": true } \ No newline at end of file diff --git a/state-machines/rom/Cargo.toml b/state-machines/rom/Cargo.toml index ed4653957..597ad69cd 100644 --- a/state-machines/rom/Cargo.toml +++ b/state-machines/rom/Cargo.toml @@ -14,7 +14,6 @@ zisk-pil = { workspace = true } asm-runner = { workspace = true } fields = { workspace=true } -proofman = { workspace = true } proofman-common = { workspace = true } proofman-macros = { workspace = true } proofman-util = { workspace = true } @@ -25,6 +24,5 @@ itertools = { workspace = true } [features] default = [] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] +gpu = ["packed"] +packed = [] \ No newline at end of file diff --git a/state-machines/rom/src/rom.rs b/state-machines/rom/src/rom.rs index 8858f3114..f12bc563a 100644 --- a/state-machines/rom/src/rom.rs +++ b/state-machines/rom/src/rom.rs @@ -8,14 +8,7 @@ //! - `ComponentBuilder` trait implementations for creating counters, planners, and input //! collectors. -use std::{ - path::PathBuf, - sync::{ - atomic::{AtomicBool, AtomicU32}, - Arc, Mutex, - }, - thread::JoinHandle, -}; +use std::sync::{atomic::AtomicU64, Arc, Mutex}; use crate::{RomInstance, RomPlanner}; use asm_runner::{AsmRHData, AsmRunnerRH}; @@ -23,8 +16,7 @@ use fields::PrimeField64; use itertools::Itertools; use proofman_common::{AirInstance, FromTrace, ProofmanResult}; use zisk_common::{ - create_atomic_vec, BusDeviceMetrics, ComponentBuilder, CounterStats, Instance, InstanceCtx, - Planner, + create_atomic_vec, ComponentBuilder, CounterStats, Instance, InstanceCtx, Planner, }; use zisk_core::{ zisk_ops::ZiskOp, Riscv2zisk, ZiskRom, ROM_ADDR, ROM_ADDR_MAX, ROM_ENTRY, ROM_EXIT, SRC_IMM, @@ -34,15 +26,15 @@ use zisk_pil::{MainTrace, RomRomTrace, RomRomTraceRow, RomTrace}; /// The `RomSM` struct represents the ROM State Machine pub struct RomSM { /// Zisk Rom - zisk_rom: Arc, + zisk_rom: Mutex>>, /// Shared biod instruction counter for monitoring ROM operations. - bios_inst_count: Arc>, + bios_inst_count: Arc>, /// Shared program instruction counter for monitoring ROM operations. - prog_inst_count: Arc>, + prog_inst_count: Arc>, - asm_runner_handler: Mutex>>, + rh_data: Mutex>, } impl RomSM { @@ -53,8 +45,8 @@ impl RomSM { /// /// # Returns /// An `Arc`-wrapped instance of `RomSM`. - pub fn new(zisk_rom: Arc, asm_rom_path: Option) -> Arc { - let (bios_inst_count, prog_inst_count) = if asm_rom_path.is_some() { + pub fn new(is_asm_emulator: bool) -> Arc { + let (bios_inst_count, prog_inst_count) = if is_asm_emulator { (vec![], vec![]) } else { ( @@ -64,15 +56,19 @@ impl RomSM { }; Arc::new(Self { - zisk_rom, + zisk_rom: Mutex::new(None), bios_inst_count: Arc::new(bios_inst_count), prog_inst_count: Arc::new(prog_inst_count), - asm_runner_handler: Mutex::new(None), + rh_data: Mutex::new(None), }) } - pub fn set_asm_runner_handler(&self, handler: JoinHandle) { - *self.asm_runner_handler.lock().unwrap() = Some(handler); + pub fn set_rh_data(&self, handler: AsmRunnerRH) { + *self.rh_data.lock().unwrap() = Some(handler); + } + + pub fn set_rom(&self, zisk_rom: Arc) { + *self.zisk_rom.lock().unwrap() = Some(zisk_rom); } /// Computes the witness for the provided plan using the given ROM. @@ -86,7 +82,6 @@ impl RomSM { pub fn compute_witness( rom: &ZiskRom, counter_stats: &CounterStats, - calculated: &AtomicBool, trace_buffer: Vec, ) -> ProofmanResult> { let mut rom_trace = RomTrace::new_from_vec_zeroes(trace_buffer)?; @@ -107,20 +102,9 @@ impl RomSM { if counter_stats.bios_inst_count.is_empty() { multiplicity = 1; // If the histogram is empty, we use 1 for all pc's } else { - match calculated.load(std::sync::atomic::Ordering::Relaxed) { - true => { - multiplicity = counter_stats.bios_inst_count - [((inst.paddr - ROM_ENTRY) as usize) >> 2] - .swap(0, std::sync::atomic::Ordering::Relaxed) - as u64; - } - false => { - multiplicity = counter_stats.bios_inst_count - [((inst.paddr - ROM_ENTRY) as usize) >> 2] - .load(std::sync::atomic::Ordering::Relaxed) - as u64; - } - } + multiplicity = counter_stats.bios_inst_count + [((inst.paddr - ROM_ENTRY) as usize) >> 2] + .load(std::sync::atomic::Ordering::Relaxed); if multiplicity == 0 { continue; @@ -130,20 +114,8 @@ impl RomSM { } } } else { - match calculated.load(std::sync::atomic::Ordering::Relaxed) { - true => { - multiplicity = counter_stats.prog_inst_count - [(inst.paddr - ROM_ADDR) as usize] - .swap(0, std::sync::atomic::Ordering::Relaxed) - as u64 - } - false => { - multiplicity = counter_stats.prog_inst_count - [(inst.paddr - ROM_ADDR) as usize] - .load(std::sync::atomic::Ordering::Relaxed) - as u64 - } - } + multiplicity = counter_stats.prog_inst_count[(inst.paddr - ROM_ADDR) as usize] + .load(std::sync::atomic::Ordering::Relaxed); if multiplicity == 0 { continue; } @@ -282,17 +254,15 @@ impl RomSM { /// * `rom_path` - The path to the ELF file. /// * `rom_custom_trace` - Reference to the custom ROM trace. pub fn compute_custom_trace_rom( - rom_path: PathBuf, + elf: &[u8], rom_custom_trace: &mut RomRomTrace, ) { - // Get the ELF file path as a string - let elf_filename: String = rom_path.to_str().unwrap().into(); tracing::info!("Computing custom trace ROM"); // Load and parse the ELF file, and transpile it into a ZisK ROM using Riscv2zisk // Create an instance of the RISCV -> ZisK program converter - let riscv2zisk = Riscv2zisk::new(elf_filename); + let riscv2zisk = Riscv2zisk::new(elf); // Convert program to rom let rom = riscv2zisk.run().expect("RomSM::prover() failed converting elf to rom"); @@ -311,14 +281,6 @@ impl RomSM { } impl ComponentBuilder for RomSM { - /// Builds and returns a new counter for monitoring ROM operations. - /// - /// # Returns - /// A boxed implementation of `RomCounter`. - fn build_counter(&self) -> Option> { - None - } - /// Builds a planner for ROM-related instances. /// /// # Returns @@ -335,15 +297,12 @@ impl ComponentBuilder for RomSM { /// # Returns /// A boxed implementation of `RomInstance`. fn build_instance(&self, ictx: InstanceCtx) -> Box> { - let mut handle_rh_guard = self.asm_runner_handler.lock().unwrap(); - let handle_rh = handle_rh_guard.take(); - Box::new(RomInstance::new( - self.zisk_rom.clone(), + self.zisk_rom.lock().unwrap().as_ref().unwrap().clone(), ictx, self.bios_inst_count.clone(), self.prog_inst_count.clone(), - handle_rh, + self.rh_data.lock().unwrap().take(), )) } } diff --git a/state-machines/rom/src/rom_counter.rs b/state-machines/rom/src/rom_counter.rs index 8c5a033ff..1f47d2a9b 100644 --- a/state-machines/rom/src/rom_counter.rs +++ b/state-machines/rom/src/rom_counter.rs @@ -4,7 +4,7 @@ use std::{ any::Any, - sync::{atomic::AtomicU32, Arc}, + sync::{atomic::AtomicU64, Arc}, }; use zisk_common::{CounterStats, Metrics, RomBusData, RomData}; @@ -24,7 +24,7 @@ impl RomCounter { /// /// # Returns /// A new `RomCounter` instance. - pub fn new(bios_inst_count: Arc>, prog_inst_count: Arc>) -> Self { + pub fn new(bios_inst_count: Arc>, prog_inst_count: Arc>) -> Self { let counter_stats = CounterStats::new(bios_inst_count, prog_inst_count); Self { counter_stats } } diff --git a/state-machines/rom/src/rom_instance.rs b/state-machines/rom/src/rom_instance.rs index 8a3489d71..3f8bacfd9 100644 --- a/state-machines/rom/src/rom_instance.rs +++ b/state-machines/rom/src/rom_instance.rs @@ -2,23 +2,17 @@ //! //! It is responsible for computing witnesses for ROM-related execution plans, -use std::{ - collections::VecDeque, - sync::{ - atomic::{AtomicBool, AtomicU32}, - Arc, - }, - thread::JoinHandle, -}; +use std::sync::{atomic::AtomicU64, Arc}; use crate::{rom_counter::RomCounter, RomSM}; use asm_runner::AsmRunnerRH; use fields::PrimeField64; use proofman_common::{AirInstance, ProofCtx, ProofmanResult, SetupCtx}; use std::sync::Mutex; +use zisk_common::StatsType; use zisk_common::{ create_atomic_vec, BusDevice, BusId, CheckPoint, ChunkId, CounterStats, Instance, InstanceCtx, - InstanceType, MemCollectorInfo, Metrics, PayloadType, ROM_BUS_ID, + InstanceType, Metrics, PayloadType, ROM_BUS_ID, }; use zisk_core::ZiskRom; @@ -35,21 +29,19 @@ pub struct RomInstance { ictx: InstanceCtx, /// Shared biod instruction counter for monitoring ROM operations. - bios_inst_count: Mutex>>, + bios_inst_count: Mutex>>, /// Shared program instruction counter for monitoring ROM operations. - prog_inst_count: Mutex>>, + prog_inst_count: Mutex>>, /// Execution statistics counter for ROM instructions. counter_stats: Mutex>, - /// Optional handle for the ROM assembly runner thread. - handle_rh: Mutex>>, + /// Rom Histogram data from the assembly runner thread. + rh_data: Mutex>, /// Cached result from the assembly runner thread. asm_result: Mutex>, - - calculated: AtomicBool, } impl RomInstance { @@ -64,9 +56,9 @@ impl RomInstance { pub fn new( zisk_rom: Arc, ictx: InstanceCtx, - bios_inst_count: Arc>, - prog_inst_count: Arc>, - handle_rh: Option>, + bios_inst_count: Arc>, + prog_inst_count: Arc>, + rh_data: Option, ) -> Self { Self { zisk_rom, @@ -74,9 +66,8 @@ impl RomInstance { bios_inst_count: Mutex::new(bios_inst_count), prog_inst_count: Mutex::new(prog_inst_count), counter_stats: Mutex::new(None), - handle_rh: Mutex::new(handle_rh), + rh_data: Mutex::new(rh_data), asm_result: Mutex::new(None), - calculated: AtomicBool::new(false), } } @@ -85,7 +76,7 @@ impl RomInstance { } pub fn is_asm_execution(&self) -> bool { - self.handle_rh.lock().unwrap().is_some() || self.asm_result.lock().unwrap().is_some() + self.rh_data.lock().unwrap().is_some() || self.asm_result.lock().unwrap().is_some() } pub fn build_rom_collector(&self, _chunk_id: ChunkId) -> Option { @@ -126,11 +117,9 @@ impl Instance for RomInstance { if self.is_asm_execution() { // Check if we already have the result cached if self.asm_result.lock().unwrap().is_none() { - // Join the thread and cache the result - let handle_rh = self.handle_rh.lock().unwrap().take().unwrap(); - let result_rh = - handle_rh.join().expect("Error during Rom Histogram thread execution"); - *self.asm_result.lock().unwrap() = Some(result_rh); + // Retrieve the data from the assembly runner + let rh_data = self.rh_data.lock().unwrap().take().unwrap(); + *self.asm_result.lock().unwrap() = Some(rh_data); } // Use the cached result @@ -172,17 +161,32 @@ impl Instance for RomInstance { let air_instance = Some(RomSM::compute_witness( &self.zisk_rom, self.counter_stats.lock().unwrap().as_ref().unwrap(), - &self.calculated, trace_buffer, )?); - self.calculated.store(true, std::sync::atomic::Ordering::Relaxed); Ok(air_instance) } fn reset(&self) { *self.counter_stats.lock().unwrap() = None; *self.asm_result.lock().unwrap() = None; - self.calculated.store(false, std::sync::atomic::Ordering::Relaxed); + + let bios_counts = self.bios_inst_count.lock().unwrap().clone(); + let prog_counts = self.prog_inst_count.lock().unwrap().clone(); + + rayon::join( + || { + use rayon::prelude::*; + bios_counts + .par_iter() + .for_each(|i| i.store(0, std::sync::atomic::Ordering::Relaxed)); + }, + || { + use rayon::prelude::*; + prog_counts + .par_iter() + .for_each(|i| i.store(0, std::sync::atomic::Ordering::Relaxed)); + }, + ); } /// Retrieves the checkpoint associated with this instance. @@ -201,6 +205,10 @@ impl Instance for RomInstance { InstanceType::Instance } + fn stats_type(&self) -> StatsType { + StatsType::Memory + } + /// Builds an input collector for the instance. /// /// # Arguments @@ -240,15 +248,13 @@ impl RomCollector { /// A new `RomCounter` instance. pub fn new( computed: bool, - bios_inst_count: Arc>, - prog_inst_count: Arc>, + bios_inst_count: Arc>, + prog_inst_count: Arc>, ) -> Self { let rom_counter = RomCounter::new(bios_inst_count, prog_inst_count); Self { already_computed: computed, rom_counter } } -} -impl BusDevice for RomCollector { /// Processes data received on the bus, updating ROM metrics. /// /// # Arguments @@ -260,13 +266,7 @@ impl BusDevice for RomCollector { /// A boolean indicating whether the program should continue execution or terminate. /// Returns `true` to continue execution, `false` to stop. #[inline(always)] - fn process_data( - &mut self, - bus_id: &BusId, - data: &[u64], - _pending: &mut VecDeque<(BusId, Vec)>, - _mem_collector_info: Option<&[MemCollectorInfo]>, - ) -> bool { + pub fn process_data(&mut self, bus_id: &BusId, data: &[u64]) -> bool { debug_assert!(*bus_id == ROM_BUS_ID); if !self.already_computed { @@ -275,15 +275,9 @@ impl BusDevice for RomCollector { true } +} - /// Returns the bus IDs associated with this counter. - /// - /// # Returns - /// A vector containing the connected bus ID. - fn bus_id(&self) -> Vec { - vec![ROM_BUS_ID] - } - +impl BusDevice for RomCollector { /// Provides a dynamic reference for downcasting purposes. fn as_any(self: Box) -> Box { self diff --git a/state-machines/starkstructs.json b/state-machines/starkstructs.json index ee3018636..3e4636601 100644 --- a/state-machines/starkstructs.json +++ b/state-machines/starkstructs.json @@ -4,5 +4,8 @@ "ArithEq": { "hasCompressor": true }, "ArithEq384": { "hasCompressor": true }, "VirtualTable0": { "lastLevelVerification": 1 }, - "VirtualTable1": { "lastLevelVerification": 1 } + "VirtualTable1": { "lastLevelVerification": 1 }, + "SpecifiedRanges": { "lastLevelVerification": 1 }, + "Poseidon2": { "blowupFactor": 2 }, + "Rom": { "powBits": 20 } } \ No newline at end of file diff --git a/tools/emulate_all.sh b/tools/emulate_all.sh index c922a3ccc..8818242f2 100755 --- a/tools/emulate_all.sh +++ b/tools/emulate_all.sh @@ -107,7 +107,7 @@ do echo "Emulating file ${COUNTER} of ${MAX_COUNTER}: ${ELF_FILE}" # Execute it and save output - ./target/debug/ziskemu -e $ELF_FILE -i $INPUT_FILE 2>&1|tee output + ./target/debug/ziskemu -e $ELF_FILE -i $INPUT_FILE -f 2>&1|tee output # Compare output vs reference REFERENCE_FILE=${ELF_FILE%%my.elf}../ref/Reference-sail_c_simulator.signature diff --git a/tools/emulate_asm_all.sh b/tools/emulate_asm_all.sh index 1a87d7115..5f8fb2eed 100755 --- a/tools/emulate_asm_all.sh +++ b/tools/emulate_asm_all.sh @@ -125,12 +125,12 @@ do make # Execute it and save output - build/ziskemuasm -s --gen=1 -o --silent > output 2>&1 & + build/ziskemuasm -s --gen=1 --output_riscof --silent > output 2>&1 & # Store the PID of the background process # BG_PID=$! - echo "Sleeping for 5 seconds to let the emulator server initialize..." - sleep 5 + echo "Sleeping for 8 seconds to let the emulator server initialize..." + sleep 8 build/ziskemuasm -c -i empty_input.bin --gen=1 --shutdown echo "Sleeping for 2 seconds to let the emulator server complete..." sleep 2 diff --git a/tools/hint_file/Cargo.lock b/tools/hint_file/Cargo.lock new file mode 100644 index 000000000..64c945643 --- /dev/null +++ b/tools/hint_file/Cargo.lock @@ -0,0 +1,193 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "clap" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hint_file" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] diff --git a/tools/hint_file/Cargo.toml b/tools/hint_file/Cargo.toml new file mode 100644 index 000000000..b339c5b24 --- /dev/null +++ b/tools/hint_file/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "hint_file" +version = "0.1.0" +edition = "2021" + +[workspace] + +[[bin]] +name = "hint_file" +path = "src/main.rs" + +[dependencies] +anyhow = "1.0" +clap = { version = "4.5", features = ["derive"] } diff --git a/tools/hint_file/README.md b/tools/hint_file/README.md new file mode 100644 index 000000000..c2f0ce09e --- /dev/null +++ b/tools/hint_file/README.md @@ -0,0 +1,310 @@ +# Hint File + +Utility to read, analyze, and filter Zisk hints files. + +## Features + +- **Summary mode**: Display statistics by hint type (count, total bytes, min/max) +- **Detail mode**: Show detailed list of all hints in the file +- **Filter mode**: Exclude specific hint types from output (remove unwanted hints) +- **Extract mode**: Include only specific hint types in output (keep wanted hints) +- **Inject mode**: Interleave binary input file chunks with existing hints +- **Validation**: Check file integrity and detect garbage data + +## Hints File Format + +The hints file has the following structure: + +1. **8-byte header** at the beginning (skipped during processing) +2. **Multiple hints**, each with: + - 8-byte header (length + type) + - Data payload (8-byte aligned) +3. **Final tag**: A hint with type=1 and length=0 marking the end + +Each hint has the following structure: + +``` +┌────────────────────────────────┐ +│ Length (u32 LE) - 4 bytes │ ← Actual length in bytes +├────────────────────────────────┤ +│ Type (u32 LE) - 4 bytes │ ← Hint type +├────────────────────────────────┤ +│ Data - aligned bytes │ ← Hint payload (padded to 8-byte boundary) +└────────────────────────────────┘ +``` + +- **Length**: Actual number of bytes of data (u32, little-endian) +- **Type**: Hint type identifier (e.g., 0xF0000 for INPUT, 0x0100 for SHA256, etc.) +- **Data**: The hint payload, stored in multiples of 8 bytes (padded if needed) + +⚠️ **Important notes**: +- The length field contains the actual data size in bytes +- Data is stored aligned to 8-byte boundaries (e.g., 12 bytes of data will occupy 16 bytes in the file) +- The length does NOT include the 8-byte hint header (u32 + u32) +- The file ends when a hint with type=1 and length=0 is found +- The tool verifies there is no garbage data after the final tag + +## Build + +```bash +cd tools/hint_file +cargo build --release +``` + +The binary will be generated at `target/release/hint_file`. + +## Usage + +### Basic Usage (Summary Mode) + +By default, the tool shows a summary of hints found in the file: + +```bash +./target/release/hint_file +``` + +Or: + +```bash +cargo run --release -- +``` + +### Show Detailed List + +Use `-d` or `--detail` to see all hints: + +```bash +./target/release/hint_file --detail +``` + +### Filter Hints (Exclude) + +Remove specific hint types from the output file using `-f` or `--filter` and `-o` or `--output`: + +```bash +# Filter out single type (hex) - keeps all EXCEPT 0xF0000 +./target/release/hint_file -f 0xF0000 -o output.bin input.bin + +# Filter out multiple types - keeps all EXCEPT these +./target/release/hint_file -f 0xF0000,256,0x0100 -o output.bin input.bin + +# Filter with detail view +./target/release/hint_file -d -f 0x0100 -o no_sha256.bin hints.bin +``` + +### Extract Hints (Include) + +Keep only specific hint types in the output file using `-e` or `--extract` and `-o` or `--output`: + +```bash +# Extract single type (hex) - keeps ONLY 0xF0000 +./target/release/hint_file -e 0xF0000 -o output.bin input.bin + +# Extract multiple types - keeps ONLY these +./target/release/hint_file -e 0xF0000,256,0x0100 -o output.bin input.bin + +# Extract with detail view +./target/release/hint_file -d -e 0x0100 -o sha256_only.bin hints.bin +``` + +### Inject Input (Interleave) + +Interleave chunks from a binary input file with existing hints using `--inject-input` and related options: + +```bash +# Basic inject - split input.dat into chunks and interleave with hints +./target/release/hint_file --inject-input input.dat -o output.bin hints.bin + +# Custom chunk size (1024 bytes including 8-byte header = 1016 bytes of data per chunk) +./target/release/hint_file --inject-input input.dat --inject-chunk-size 1024 -o output.bin hints.bin + +# Start injection after first 10 hints +./target/release/hint_file --inject-input input.dat --inject-start 10 -o output.bin hints.bin + +# Inject 2 chunks at a time, then skip 3 original hints, repeat +./target/release/hint_file --inject-input input.dat \ + --inject-group-size 2 \ + --inject-distance 3 \ + -o output.bin hints.bin + +# Full example with all parameters +./target/release/hint_file --inject-input input.dat \ + --inject-start 5 \ + --inject-chunk-size 512 \ + --inject-group-size 3 \ + --inject-distance 2 \ + --inject-type 0xF0000 \ + -o output.bin hints.bin +``` + +**Inject Parameters:** +- `--inject-input `: Binary file to split and inject (required for inject mode) +- `--inject-start `: Start position (0 = beginning, 1 = after first hint) [default: 0] +- `--inject-chunk-size `: Size of each chunk including 8-byte header [default: 1024] +- `--inject-group-size `: Number of chunks to inject together [default: 1] +- `--inject-distance `: Number of original hints between groups [default: 1] +- `--inject-type `: Hint type for injected chunks (hex/decimal) [default: 0xF0000] + +**How it works:** +1. The input file is split into chunks of `inject-chunk-size` (minus 8 bytes for header) +2. Injection starts at position `inject-start` in the original hints +3. Each group of `inject-group-size` chunks is written together +4. Between groups, `inject-distance` original hints are preserved +5. Any remaining chunks are appended at the end + +**Example pattern** (start=1, group=2, distance=3): +``` +Original: [H0, H1, H2, H3, H4, H5, H6, H7, ...] +Result: [H0, I0, I1, H1, H2, H3, I2, I3, H4, H5, H6, I4, I5, ...] + └─┘ └────┘ └────────┘ └────┘ └────────┘ └────┘ + start group distance group distance group +``` + +### Combined Options + +```bash +# Show both detail and summary +./target/release/hint_file --detail --summary + +# Filter (exclude types) and show summary +./target/release/hint_file -f 0xF0000 -o filtered.bin -s input.bin + +# Extract (include only types) and show summary +./target/release/hint_file -e 0x0100,0x0200 -o extracted.bin -s input.bin + +# Inject with detail view +./target/release/hint_file --inject-input data.bin --detail -o output.bin hints.bin +``` + +## Command-Line Options + +- ``: Input hints file (required) +- `-d, --detail`: Show detailed list of all hints +- `-s, --summary`: Show summary statistics (default if --detail not used) +- `-o, --output `: Output file for filtered/extracted/injected hints +- `-f, --filter `: Exclude hint types (comma-separated, keeps all EXCEPT these) + - Supports decimal: `-f 256,512` + - Supports hexadecimal: `-f 0xF0000,0x0100` + - Mixed formats: `-f 0xF0000,256,0x0100` + - Cannot be used with `--extract` +- `-e, --extract `: Include only hint types (comma-separated, keeps ONLY these) + - Supports decimal: `-e 256,512` + - Supports hexadecimal: `-e 0xF0000,0x0100` + - Mixed formats: `-e 0xF0000,256,0x0100` + - Cannot be used with `--filter` +- `--inject-input `: Binary input file to inject as hints (requires `--output`) +- `--inject-start `: Start position for injection [default: 0] +- `--inject-chunk-size `: Chunk size including header [default: 1024] +- `--inject-group-size `: Chunks per group [default: 1] +- `--inject-distance `: Original hints between groups [default: 1] +- `--inject-type `: Hint type for injected chunks [default: 0xF0000] + +## Example Outputs + +### Summary Mode (Default) + +``` +=== Summary === +File: hints_results_0.bin +Total hints: 150 + + Type (hex) | Count | Total (bytes) | Min (bytes) | Max (bytes) +-------------------------------------------------------------------------------- + 0x00000100 | 10 | 2560 | 256 | 256 + 0x00000200 | 20 | 2560 | 128 | 128 + 0x000F0000 | 120 | 122880 | 1024 | 1024 +``` + +### Detail Mode + +``` +Reading hints file: hints_results_0.bin +Header: [00, 00, 00, 00, 00, 00, 00, 00] +-------------------------------------------------------------------------------- + Index | Type (hex) | Len (bytes) | Aligned (bytes) +-------------------------------------------------------------------------------- + 0 | 0x000F0000 | 1024 | 1024 + 1 | 0x00000100 | 256 | 256 + 2 | 0x00000200 | 128 | 128 + 3 | 0x00000300 | 12 | 16 +-------------------------------------------------------------------------------- +Total hints processed: 4 +Final tag: length=0, type=1 (0x00000001) +``` + +### Filter Mode (Exclude) + +``` +=== Summary === +File: input.bin +Total hints: 150 + + Type (hex) | Count | Total (bytes) | Min (bytes) | Max (bytes) +-------------------------------------------------------------------------------- + 0x00000100 | 10 | 2560 | 256 | 256 + 0x00000200 | 20 | 2560 | 128 | 128 + +Filtered hints written to: output.bin (excluded types: 0x000F0000) +``` + +### Extract Mode (Include) + +``` +=== Summary === +File: input.bin +Total hints: 150 + + Type (hex) | Count | Total (bytes) | Min (bytes) | Max (bytes) +-------------------------------------------------------------------------------- + 0x000F0000 | 120 | 122880 | 1024 | 1024 + +Extracted hints written to: output.bin (included types: 0x000F0000) +``` + +### Inject Mode (Interleave) + +``` +=== Summary === +File: input.bin +Total hints: 150 + + Type (hex) | Count | Total (bytes) | Min (bytes) | Max (bytes) +-------------------------------------------------------------------------------- + 0x00000100 | 10 | 2560 | 256 | 256 + 0x00000200 | 20 | 2560 | 128 | 128 + 0x000F0000 | 145 | 147456 | 1016 | 1024 + +Hints with injected input written to: output.bin (25 chunks injected, type: 0x000F0000) + Inject parameters: start=0, chunk_size=1024, group_size=1, distance=5 +``` + +## Known Hint Codes + +### Control Codes +- `0x0000` - CTRL_START: Reset state +- `0x0001` - CTRL_END: End processing +- `0x0002` - CTRL_CANCEL: Cancel stream +- `0x0003` - CTRL_ERROR: Error + +### Data Hints +- `0xF0000` - HINT_INPUT: Input data +- `0x0100` - HINT_SHA256: SHA-256 +- `0x0200` - HINT_BN254_G1_ADD: BN254 G1 Add +- `0x0201` - HINT_BN254_G1_MUL: BN254 G1 Mul +- `0x0205` - HINT_BN254_PAIRING_CHECK: BN254 Pairing +- `0x0300` - HINT_SECP256K1_ECDSA_ADDRESS_RECOVER: secp256k1 recover +- `0x0301` - HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER: secp256k1 verify+recover +- `0x0380` - HINT_SECP256R1_ECDSA_VERIFY: secp256r1 verify +- `0x0400` - HINT_BLS12_381_G1_ADD: BLS12-381 G1 Add +- `0x0401` - HINT_BLS12_381_G1_MSM: BLS12-381 G1 MSM +- `0x0405` - HINT_BLS12_381_G2_ADD: BLS12-381 G2 Add +- `0x0406` - HINT_BLS12_381_G2_MSM: BLS12-381 G2 MSM +- `0x040A` - HINT_BLS12_381_PAIRING_CHECK: BLS12-381 Pairing +- `0x0410` - HINT_BLS12_381_FP_TO_G1: BLS12-381 Fp to G1 +- `0x0411` - HINT_BLS12_381_FP2_TO_G2: BLS12-381 Fp2 to G2 +- `0x0500` - HINT_MODEXP: Modular exponentiation +- `0x0600` - HINT_VERIFY_KZG_PROOF: KZG verification +- `0x0700` - HINT_KECCAK256: Keccak-256 +- `0x0800` - HINT_BLAKE2B_COMPRESS: Blake2b + +For more details, see `common/src/hints.rs`. diff --git a/tools/hint_file/src/main.rs b/tools/hint_file/src/main.rs new file mode 100644 index 000000000..cb6c57896 --- /dev/null +++ b/tools/hint_file/src/main.rs @@ -0,0 +1,515 @@ +use anyhow::{Context, Result}; +use clap::Parser; +use std::collections::HashMap; +use std::fs::File; +use std::io::{BufReader, BufWriter, Read, Write}; +use std::path::PathBuf; + +/// Utility to read, analyze, and filter Zisk hints files +#[derive(Parser, Debug)] +#[command(name = "hint_file")] +#[command(about = "Read and process Zisk hints files", long_about = None)] +struct Args { + /// Input hints file + input: PathBuf, + + /// Show detailed list of hints + #[arg(short, long)] + detail: bool, + + /// Show summary statistics (enabled by default if --detail is not used) + #[arg(short, long)] + summary: bool, + + /// Output file for filtered hints + #[arg(short, long)] + output: Option, + + /// Filter out (exclude) hint types from output (comma-separated, decimal or hex with 0x prefix) + /// Example: --filter 0xF0000,256,0x0100 + #[arg(short, long, value_delimiter = ',', conflicts_with = "extract")] + filter: Vec, + + /// Extract (include only) specific hint types to output (comma-separated, decimal or hex with 0x prefix) + /// Example: --extract 0xF0000,256,0x0100 + #[arg(short, long, value_delimiter = ',', conflicts_with = "filter")] + extract: Vec, + + /// Input file to inject as hints (binary file without format) + #[arg(long, requires = "output")] + inject_input: Option, + + /// Index position where to start injecting input chunks (0 = beginning, 1 = after first hint, etc.) + #[arg(long, default_value = "0", requires = "inject_input")] + inject_start: usize, + + /// Size of each input chunk in bytes (including 8-byte header) + #[arg(long, default_value = "1024", requires = "inject_input")] + inject_chunk_size: usize, + + /// Number of input chunks to inject together as a group + #[arg(long, default_value = "1", requires = "inject_input")] + inject_group_size: usize, + + /// Number of original hints to keep between injected groups + #[arg(long, default_value = "1", requires = "inject_input")] + inject_distance: usize, + + /// Hint type for injected input chunks (hex or decimal) + #[arg(long, default_value = "0xF0000", requires = "inject_input")] + inject_type: String, +} + +/// Reads a u32 in little-endian format from the buffer +fn read_u32_le(reader: &mut R) -> std::io::Result { + let mut buf = [0u8; 4]; + reader.read_exact(&mut buf)?; + Ok(u32::from_le_bytes(buf)) +} + +/// Writes a u32 in little-endian format to the writer +fn write_u32_le(writer: &mut W, value: u32) -> std::io::Result<()> { + writer.write_all(&value.to_le_bytes()) +} + +/// Structure representing a hint +#[derive(Debug, Clone)] +struct Hint { + length_bytes: u32, // Actual length in bytes (from header) + length_aligned: usize, // Aligned length (multiple of 8 bytes) + hint_type: u32, // Hint type + data: Vec, // Hint data (aligned) +} + +impl Hint { + fn from_reader(reader: &mut R) -> std::io::Result> { + // Try to read the first 4 bytes (length in bytes) + let length_bytes = match read_u32_le(reader) { + Ok(len) => len, + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None), + Err(e) => return Err(e), + }; + + // Read the next 4 bytes (hint type) + let hint_type = read_u32_le(reader)?; + + // Calculate aligned length (rounded up to multiple of 8 bytes) + let length_aligned = ((length_bytes as usize + 7) / 8) * 8; + + // Read the hint data (aligned to 8 bytes) + let mut data = vec![0u8; length_aligned]; + reader.read_exact(&mut data)?; + + Ok(Some(Hint { length_bytes, length_aligned, hint_type, data })) + } + + fn write_to(&self, writer: &mut W) -> std::io::Result<()> { + write_u32_le(writer, self.length_bytes)?; + write_u32_le(writer, self.hint_type)?; + writer.write_all(&self.data)?; + Ok(()) + } +} + +#[derive(Debug, Default)] +struct HintStats { + count: usize, + total_bytes: u64, + min_bytes: u32, + max_bytes: u32, +} + +impl HintStats { + fn update(&mut self, length: u32) { + self.count += 1; + self.total_bytes += length as u64; + if self.count == 1 { + self.min_bytes = length; + self.max_bytes = length; + } else { + self.min_bytes = self.min_bytes.min(length); + self.max_bytes = self.max_bytes.max(length); + } + } +} + +fn parse_hint_types(filter_strings: &[String]) -> Result> { + let mut types = Vec::new(); + for s in filter_strings { + let s = s.trim(); + let value = if s.starts_with("0x") || s.starts_with("0X") { + u32::from_str_radix(&s[2..], 16) + .with_context(|| format!("Invalid hex hint type: {}", s))? + } else { + s.parse::().with_context(|| format!("Invalid decimal hint type: {}", s))? + }; + types.push(value); + } + Ok(types) +} + +fn parse_hint_type(type_string: &str) -> Result { + let s = type_string.trim(); + let value = if s.starts_with("0x") || s.starts_with("0X") { + u32::from_str_radix(&s[2..], 16).with_context(|| format!("Invalid hex hint type: {}", s))? + } else { + s.parse::().with_context(|| format!("Invalid decimal hint type: {}", s))? + }; + Ok(value) +} + +/// Read input file and split it into chunks as Hints +fn create_input_chunks( + input_path: &PathBuf, + chunk_size: usize, + hint_type: u32, +) -> Result> { + if chunk_size <= 8 { + anyhow::bail!("Chunk size must be greater than 8 bytes (to accommodate header)"); + } + + let mut file = File::open(input_path) + .with_context(|| format!("Cannot open inject input file: {}", input_path.display()))?; + + let mut input_data = Vec::new(); + file.read_to_end(&mut input_data).context("Failed to read inject input file")?; + + if input_data.is_empty() { + anyhow::bail!("Inject input file is empty"); + } + + let data_size_per_chunk = chunk_size - 8; // Subtract 8 bytes for header + let mut chunks = Vec::new(); + + for chunk_data in input_data.chunks(data_size_per_chunk) { + let length_bytes = chunk_data.len() as u32; + let length_aligned = ((length_bytes as usize + 7) / 8) * 8; + + // Pad data to 8-byte alignment + let mut data = chunk_data.to_vec(); + data.resize(length_aligned, 0); + + chunks.push(Hint { length_bytes, length_aligned, hint_type, data }); + } + + Ok(chunks) +} + +/// Interleave input chunks with original hints and write to output +fn write_interleaved_hints( + writer: &mut W, + original_hints: &[Hint], + inject_chunks: &[Hint], + start_index: usize, + group_size: usize, + distance: usize, + stats: &mut HashMap, +) -> Result<()> { + let mut original_idx = 0; + let mut inject_idx = 0; + + // Write initial hints before inject_start + while original_idx < start_index && original_idx < original_hints.len() { + original_hints[original_idx].write_to(writer)?; + original_idx += 1; + } + + // Interleave inject chunks with original hints + while inject_idx < inject_chunks.len() && original_idx < original_hints.len() { + // Write a group of inject chunks + let mut group_count = 0; + while group_count < group_size && inject_idx < inject_chunks.len() { + let chunk = &inject_chunks[inject_idx]; + chunk.write_to(writer)?; + + // Update statistics for injected chunk + stats + .entry(chunk.hint_type) + .or_insert_with(HintStats::default) + .update(chunk.length_bytes); + + inject_idx += 1; + group_count += 1; + } + + // Write distance number of original hints + let mut distance_count = 0; + while distance_count < distance && original_idx < original_hints.len() { + original_hints[original_idx].write_to(writer)?; + original_idx += 1; + distance_count += 1; + } + } + + // Write remaining original hints + while original_idx < original_hints.len() { + original_hints[original_idx].write_to(writer)?; + original_idx += 1; + } + + // Write remaining inject chunks (if any left) + while inject_idx < inject_chunks.len() { + let chunk = &inject_chunks[inject_idx]; + chunk.write_to(writer)?; + + // Update statistics for injected chunk + stats.entry(chunk.hint_type).or_insert_with(HintStats::default).update(chunk.length_bytes); + + inject_idx += 1; + } + + Ok(()) +} + +fn process_hints_file(args: &Args) -> Result<()> { + let file = File::open(&args.input) + .with_context(|| format!("Cannot open file: {}", args.input.display()))?; + + let mut reader = BufReader::new(file); + + // Read the 8-byte header at the beginning of the file + let mut header = [0u8; 8]; + reader + .read_exact(&mut header) + .context("Failed to read 8-byte header at the beginning of the file")?; + + // Parse filter types (exclude) or extract types (include only) + let filter_types = + if !args.filter.is_empty() { Some(parse_hint_types(&args.filter)?) } else { None }; + + let extract_types = + if !args.extract.is_empty() { Some(parse_hint_types(&args.extract)?) } else { None }; + + // Read input chunks if inject mode is enabled + let inject_chunks = if let Some(ref inject_path) = args.inject_input { + let inject_type = parse_hint_type(&args.inject_type)?; + Some(create_input_chunks(inject_path, args.inject_chunk_size, inject_type)?) + } else { + None + }; + + let mut hints = Vec::new(); + let mut stats: HashMap = HashMap::new(); + let mut hint_index = 0; + + // Show detail header if requested + if args.detail { + println!("Reading hints file: {}", args.input.display()); + println!("Header: {:02x?}", header); + println!("{:-<80}", ""); + println!( + "{:>6} | {:>12} | {:>12} | {:>14}", + "Index", "Type (hex)", "Len (bytes)", "Aligned (bytes)" + ); + println!("{:-<80}", ""); + } + + // Read all hints from the input file + let mut final_tag: Option = None; + loop { + match Hint::from_reader(&mut reader) { + Ok(Some(hint)) => { + // Check if this is the final tag (type=1) + if hint.hint_type == 1 { + if args.detail { + println!("{:-<80}", ""); + println!("Total hints processed: {}", hint_index); + println!( + "Final tag: length={}, type=1 (0x{:08X})", + hint.length_bytes, hint.hint_type + ); + } + + if hint.length_bytes != 0 { + eprintln!( + "Warning: Expected length=0 in final tag, got {}", + hint.length_bytes + ); + } + + // Check for garbage after the final tag + let mut garbage_buf = [0u8; 1]; + match reader.read(&mut garbage_buf) { + Ok(0) => { + // No more data, good + } + Ok(n) => { + eprintln!( + "Warning: Found {} extra bytes after final tag (garbage data)", + n + ); + // Try to read more to see how much garbage there is + let mut extra_buf = Vec::new(); + if let Ok(extra) = reader.read_to_end(&mut extra_buf) { + eprintln!("Warning: Total garbage bytes: {}", n + extra); + } + } + Err(_) => { + // Error reading, probably end of file + } + } + + final_tag = Some(hint); + break; + } + + // Not the final tag, it's a normal hint + if args.detail { + println!( + "{:>6} | {:>12} | {:>12} | {:>14}", + hint_index, + format!("0x{:08X}", hint.hint_type), + hint.length_bytes, + hint.length_aligned + ); + } + + // Update statistics + stats + .entry(hint.hint_type) + .or_insert_with(HintStats::default) + .update(hint.length_bytes); + + hints.push(hint); + hint_index += 1; + } + Ok(None) => { + // End of file without final tag + if args.detail { + println!("{:-<80}", ""); + println!("Total hints processed: {}", hint_index); + } + eprintln!("Warning: Reached end of file without finding final tag (type=1)"); + break; + } + Err(e) => { + // Read error + if hint_index == 0 { + return Err(e).context("Error reading first hint"); + } else { + eprintln!("Error reading hint {}: {}", hint_index, e); + break; + } + } + } + } + + // Process output if needed (inject mode or filter/extract mode) + if let Some(ref output_path) = args.output { + let output_file = File::create(output_path) + .with_context(|| format!("Cannot create output file: {}", output_path.display()))?; + let mut writer = BufWriter::new(output_file); + + // Write the header to output file + writer.write_all(&header)?; + + if let Some(ref inject_chunks) = inject_chunks { + // Inject mode: interleave input chunks with original hints + write_interleaved_hints( + &mut writer, + &hints, + inject_chunks, + args.inject_start, + args.inject_group_size, + args.inject_distance, + &mut stats, + )?; + } else { + // Filter/extract mode: write hints based on filter/extract criteria + for hint in &hints { + let should_write = if let Some(ref extract) = extract_types { + // Extract mode: write only if hint type is in the list + extract.contains(&hint.hint_type) + } else if let Some(ref filter) = filter_types { + // Filter mode: write only if hint type is NOT in the list (exclude) + !filter.contains(&hint.hint_type) + } else { + // No filter or extract, write all hints + true + }; + + if should_write { + hint.write_to(&mut writer)?; + } + } + } + + // Write final tag if present + if let Some(ref tag) = final_tag { + tag.write_to(&mut writer)?; + } + + writer.flush()?; + } + + // Show summary if requested or if detail is not shown + if args.summary || !args.detail { + println!(); + println!("=== Summary ==="); + println!("File: {}", args.input.display()); + println!("Total hints: {}", hint_index); + println!(); + println!( + "{:>12} | {:>8} | {:>14} | {:>12} | {:>12}", + "Type (hex)", "Count", "Total (bytes)", "Min (bytes)", "Max (bytes)" + ); + println!("{:-<80}", ""); + + // Sort by hint type for consistent output + let mut sorted_stats: Vec<_> = stats.iter().collect(); + sorted_stats.sort_by_key(|(type_id, _)| *type_id); + + for (hint_type, stat) in sorted_stats { + println!( + "{:>12} | {:>8} | {:>14} | {:>12} | {:>12}", + format!("0x{:08X}", hint_type), + stat.count, + stat.total_bytes, + stat.min_bytes, + stat.max_bytes + ); + } + } + + // Report output file if created + if let Some(ref output_path) = args.output { + println!(); + if let Some(ref inject_chunks) = inject_chunks { + let inject_type = parse_hint_type(&args.inject_type)?; + println!( + "Hints with injected input written to: {} ({} chunks injected, type: 0x{:08X})", + output_path.display(), + inject_chunks.len(), + inject_type + ); + println!( + " Inject parameters: start={}, chunk_size={}, group_size={}, distance={}", + args.inject_start, + args.inject_chunk_size, + args.inject_group_size, + args.inject_distance + ); + } else if let Some(ref extract) = extract_types { + println!( + "Extracted hints written to: {} (included types: {})", + output_path.display(), + extract.iter().map(|t| format!("0x{:08X}", t)).collect::>().join(", ") + ); + } else if let Some(ref filter) = filter_types { + println!( + "Filtered hints written to: {} (excluded types: {})", + output_path.display(), + filter.iter().map(|t| format!("0x{:08X}", t)).collect::>().join(", ") + ); + } else { + println!("All hints written to: {}", output_path.display()); + } + } + + Ok(()) +} + +fn main() -> Result<()> { + let args = Args::parse(); + process_hints_file(&args) +} diff --git a/tools/test-env/.env b/tools/test-env/.env index fd4f2f696..bb48f0488 100644 --- a/tools/test-env/.env +++ b/tools/test-env/.env @@ -1,17 +1,18 @@ -ZISK_BRANCH=tags/v0.15.0 -PIL2_PROOFMAN_BRANCH=tags/v0.15.0 -PIL2_PROOFMAN_JS_BRANCH=tags/v0.15.0 -PIL2_COMPILER_BRANCH=tags/v0.8.0 +ZISK_BRANCH=main +PIL2_PROOFMAN_BRANCH=tags/v0.16.0 +PIL2_PROOFMAN_JS_BRANCH=tags/v0.16.0 +PIL2_COMPILER_BRANCH=tags/v0.9.0 ZISK_TESTVECTORS_BRANCH=main +ZISK_TEMPLATE_BRANCH=pre-develop-0.16.0 -ZISK_SETUP_FILE=zisk-provingkey-0.14.0.tar.gz -PACKAGE_SETUP_VERSION=0.14.0 +ZISK_SETUP_FILE=zisk-provingkey-0.16.0.tar.gz +PACKAGE_SETUP_VERSION=0.16.0 SETUP_ADD_DYLIBS=0 PP_INPUTS=pp_input_1_1.bin PP_INPUTS_DISTRIBUTED=pp_input_1_1.bin,pp_input_20_20.bin -BLOCK_INPUTS=20852412_38_3_rsp.bin -BLOCK_INPUTS_DISTRIBUTED=20852412_38_3_rsp.bin,21077746_52_26_rsp.bin,18885301_210_24_rsp.bin +BLOCK_INPUTS=mainnet_24628607_66_7_zec_reth.bin +BLOCK_INPUTS_DISTRIBUTED=mainnet_24628607_66_7_zec_reth.bin,mainnet_24626900_221_16_zec_reth.bin BLOCK_FOLDER= DISTRIBUTED_PROCESSES=2 diff --git a/tools/test-env/build_setup.sh b/tools/test-env/build_setup.sh index bba13bc9b..a3368f081 100755 --- a/tools/test-env/build_setup.sh +++ b/tools/test-env/build_setup.sh @@ -83,7 +83,7 @@ main() { cached=0 if [[ "${USE_CACHE_SETUP}" == "1" ]]; then # Compute setup hash - HASH_SUM=$(sha256sum pil/zisk.pilout tmp/fixed/*.fixed \ + HASH_SUM=$(sha256sum pil/zisk.pilout tmp/fixed/*.fixed state-machines/starkstructs.json \ | sort -k2 \ | sha256sum \ | awk '{print $1}' \ diff --git a/tools/test-env/build_zisk.sh b/tools/test-env/build_zisk.sh index 954b9d7b3..46360e51c 100755 --- a/tools/test-env/build_zisk.sh +++ b/tools/test-env/build_zisk.sh @@ -115,11 +115,19 @@ main() { step "Building ZisK tools..." ensure cargo clean || return 1 ensure cargo update || return 1 - BUILD_FEATURES="" + + # We build features in that way to be ready to support more feature in the future + FEATURES=() if [[ "${BUILD_GPU}" == "1" ]]; then - BUILD_FEATURES="--features gpu" + FEATURES+=("gpu") warn "Building with GPU support..." fi + + BUILD_FEATURES="" + if (( ${#FEATURES[@]} > 0 )); then + BUILD_FEATURES="--features $(IFS=,; echo "${FEATURES[*]}")" + fi + if ! (cargo build --release --target ${TARGET} ${BUILD_FEATURES}); then warn "Build failed. Trying to fix missing stddef.h..." @@ -162,7 +170,6 @@ main() { LIB_EXT="dylib" fi - ensure cp target/${TARGET}/release/libzisk_witness.${LIB_EXT} "${ZISK_BIN_DIR}" || return 1 ensure cp ziskup/ziskup "${ZISK_BIN_DIR}" || return 1 ensure cp target/${TARGET}/release/libziskclib.a "${ZISK_BIN_DIR}" || return 1 diff --git a/tools/test-env/test_eth_block.sh b/tools/test-env/test_eth_block.sh index cc46824f8..3c056d9b3 100755 --- a/tools/test-env/test_eth_block.sh +++ b/tools/test-env/test_eth_block.sh @@ -5,7 +5,7 @@ source "./test_elf.sh" main() { info "▶️ Running $(basename "$0") script..." - ELF_FILE="eth-client/elf/zec-rsp.elf" + ELF_FILE="eth-client/elf/zec-reth.elf" INPUTS_PATH="eth-client/inputs" test_elf "${ELF_FILE}" "${INPUTS_PATH}" "BLOCK_INPUTS" "BLOCK_INPUTS_DISTRIBUTED" "Ethereum blocks" || return 1 diff --git a/tools/test-env/test_sha_hasher.sh b/tools/test-env/test_sha_hasher.sh index 12b5c9f27..3f0ce9bd6 100755 --- a/tools/test-env/test_sha_hasher.sh +++ b/tools/test-env/test_sha_hasher.sh @@ -2,8 +2,8 @@ source "./utils.sh" -PROJECT_NAME="sha_hasher" -EXPECTED_OUTPUT="98211882|bd13089b|6ccf1fca|81f7f0e4|abf6352a|0c39c9b1|1f142cac|233f1280" +PROJECT_NAME="guest" +EXPECTED_OUTPUT="4fcbc136|2ce46a82|2248a8eb|785f0c7e|9dca7861|7267cace|d028d7e5|f6a2309c|000003e8|deadbeef" main() { info "▶️ Running $(basename "$0") script..." @@ -40,25 +40,18 @@ main() { cd "$PROJECT_NAME" step "Building program..." - ensure cargo-zisk build --release || return 1 + ensure cargo build --bin host --release || return 1 - ELF_PATH="target/riscv64ima-zisk-zkvm-elf/release/$PROJECT_NAME" - INPUT_BIN="build/input.bin" + ELF_PATH="target/elf/riscv64ima-zisk-zkvm-elf/release/$PROJECT_NAME" + INPUT_BIN="host/tmp/input.bin" step "Running program with ziskemu..." - ensure ziskemu -e "$ELF_PATH" -i "$INPUT_BIN" | tee ziskemu_output.log || return 1 + ensure ziskemu -e "$ELF_PATH" -i "$INPUT_BIN" -c | tee ziskemu_output.log || return 1 if ! grep -qE ${EXPECTED_OUTPUT} ziskemu_output.log; then err "run ziskemu failed" return 1 fi - step "Running program with cargo-zisk run..." - ensure cargo-zisk run --release -i build/input.bin | tee run_output.log || return 1 - if ! grep -qE ${EXPECTED_OUTPUT} run_output.log; then - err "run program failed" - return 1 - fi - if is_gha && [[ "${PLATFORM}" == "darwin" ]]; then warn "Skipping prove and verify steps on macOS as it's not supported in GHA" else diff --git a/tools/test-env/utils.sh b/tools/test-env/utils.sh index e5b631f7b..0396c1a59 100755 --- a/tools/test-env/utils.sh +++ b/tools/test-env/utils.sh @@ -150,7 +150,7 @@ is_proving_key_installed() { if [[ -d "$HOME/.zisk/provingKey" ]]; then return 0 else - err "Proving key not installed. Please install it first." + err "Proving Key not installed. Please install it first." return 1 fi } diff --git a/tools/update-rust/update-rust.sh b/tools/update-rust/update-rust.sh index 60317e334..1483e49b7 100755 --- a/tools/update-rust/update-rust.sh +++ b/tools/update-rust/update-rust.sh @@ -103,7 +103,7 @@ fi for line in "${commits_array[@]}"; do commit=$(echo "$line" | awk '{print $1}') msg=$(echo "$line" | cut -d' ' -f2-) - + log_info "Applying cherry pick for commit: ${msg} (${commit})" output=$(git cherry-pick $commit -n 2>&1) if ! [[ $? -eq 0 ]]; then @@ -121,7 +121,9 @@ done # Final instructions echo -log_info "Now test build Zisk tool chain using the rust code in the directory ${ZISK_RUST_DIR} and new branch 'zisk-rust-${TO_VERSION}'" +log_info "Now test build Zisk tool chain using the rust code in the directory ${ZISK_RUST_DIR} and new branch 'zisk-rust-${TO_VERSION}':" +log "ZISK_BUILD_DIR=${ZISK_RUST_DIR} cargo-zisk sdk build-toolchain" +echo log_info "When successfully tested, execute the following command to commit/merge the changes to 'zisk' branch and generate the release:" echo log "./release-rust.sh ${TO_VERSION} " diff --git a/tools/verify_all.sh b/tools/verify_all.sh index 6e3f5f97d..b7fa6019d 100755 --- a/tools/verify_all.sh +++ b/tools/verify_all.sh @@ -224,7 +224,6 @@ if [[ $elf_mode -eq 0 ]]; then if (cargo run --release --bin cargo-zisk verify-constraints \ --emulator \ - --witness-lib target/release/libzisk_witness.so \ --elf "$elf_file" \ --proving-key "$proving_key"); then record_result "$elf_file" "PASSED" "$counter" @@ -248,7 +247,6 @@ else if (cargo run --release --bin cargo-zisk verify-constraints \ --emulator \ - --witness-lib target/release/libzisk_witness.so \ --elf "$elf_file" \ --proving-key "$proving_key"); then record_result "$elf_file" "PASSED" @@ -262,7 +260,6 @@ else if (cargo run --release --bin cargo-zisk verify-constraints \ --emulator \ - --witness-lib target/release/libzisk_witness.so \ --elf "$elf_file" \ --input "$input_path" \ --proving-key "$proving_key"); then @@ -295,7 +292,6 @@ else if (cargo run --release --bin cargo-zisk verify-constraints \ --emulator \ - --witness-lib target/release/libzisk_witness.so \ --elf "$elf_file" \ --input "$input_file" \ --proving-key "$proving_key"); then diff --git a/verifier/Cargo.toml b/verifier/Cargo.toml new file mode 100644 index 000000000..734ae3378 --- /dev/null +++ b/verifier/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "zisk-verifier" +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } + +[dependencies] +proofman-verifier = { workspace = true, features = ["verify"] } + +[features] +default = [] diff --git a/verifier/src/lib.rs b/verifier/src/lib.rs new file mode 100644 index 000000000..a1a47de57 --- /dev/null +++ b/verifier/src/lib.rs @@ -0,0 +1,3 @@ +mod verifier; + +pub use verifier::*; diff --git a/verifier/src/verifier.rs b/verifier/src/verifier.rs new file mode 100644 index 000000000..b11227508 --- /dev/null +++ b/verifier/src/verifier.rs @@ -0,0 +1,25 @@ +use proofman_verifier::{verify_vadcop_final_bytes, verify_vadcop_final_compressed_bytes}; + +pub fn verify_vadcop_final_proof(zisk_proof: &[u8], vadcop_final_vk: &[u8]) -> bool { + // Format: [compressed(8)][pubs_len(8)][pubs][proof_bytes] + + // Read compressed flag (8 bytes, u64 little-endian) + let compressed = u64::from_le_bytes([ + zisk_proof[0], + zisk_proof[1], + zisk_proof[2], + zisk_proof[3], + zisk_proof[4], + zisk_proof[5], + zisk_proof[6], + zisk_proof[7], + ]) == 1; + + let vadcop_proof = &zisk_proof[8..]; + + if compressed { + verify_vadcop_final_compressed_bytes(vadcop_proof, vadcop_final_vk) + } else { + verify_vadcop_final_bytes(vadcop_proof, vadcop_final_vk) + } +} diff --git a/witness-computation/Cargo.toml b/witness-computation/Cargo.toml deleted file mode 100644 index 9b82eb41f..000000000 --- a/witness-computation/Cargo.toml +++ /dev/null @@ -1,51 +0,0 @@ -[package] -name = "zisk-witness" -version = { workspace = true } -edition = { workspace = true } -license = { workspace = true } -keywords = { workspace = true } -repository = { workspace = true } -categories = { workspace = true } - -[lib] -crate-type = ["dylib"] - -[dependencies] -executor = { workspace = true } -sm-arith = { workspace = true } -sm-binary = { workspace = true } -sm-main = { workspace = true } -sm-mem = { workspace = true } -mem-common = { workspace = true } -sm-rom = { workspace = true } -sm-frequent-ops = { workspace = true } -data-bus = { workspace = true } -precomp-keccakf = { workspace = true } -precomp-sha256f = { workspace = true } -precomp-big-int = { workspace = true } -precomp-arith-eq = { workspace = true } -precomp-arith-eq-384 = { workspace = true } -zisk-pil = { workspace = true } -ziskemu = { workspace = true } -zisk-core = { workspace = true } -zisk-common = { workspace = true } - -proofman = { workspace = true } -proofman-common = { workspace = true } -proofman-util = { workspace = true } -proofman-macros = { workspace = true } -witness = { workspace = true } -fields = { workspace=true } -pil-std-lib = { workspace = true } -tracing = { workspace = true } - -env_logger = "0.11" -rayon = { workspace = true } - -[features] -default = [] -gpu = ["proofman-common/gpu", "packed"] -packed = ["proofman-common/packed", "proofman/packed"] -no_lib_link = ["proofman-common/no_lib_link"] -diagnostic = ["proofman-macros/diagnostic", "proofman/diagnostic"] -disable_distributed = ["proofman/disable_distributed", "proofman-common/disable_distributed"] \ No newline at end of file diff --git a/witness-computation/README.md b/witness-computation/README.md deleted file mode 100644 index 558deef8d..000000000 --- a/witness-computation/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# Quickstart - -This guide will show you how to compile zisk-wc library and create all the staff needed to generate a proof. - -## Requirements - -Before starting, make sure you have [Rust](https://www.rust-lang.org/tools/install) installed on your system. - -Optional recommendations: - -- [rust-analyzer](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) extension if you are using VS Code to assist you when writing Rust code. -- [PIL2 Highlight syntax code](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) if you are using VS Code to highlight your code when writing PIL2 code. - -Install the following repositories: - -```bash -git clone https://github.com/0xPolygonHermez/pil2-compiler.git -git clone https://github.com/0xPolygonHermez/pil2-proofman-js.git -git clone https://github.com/0xPolygonHermez/pil2-components.git -git clone https://github.com/0xPolygonHermez/pil2-proofman.git -``` - -## Compile the PIL files to generate a PILOUT - -Compiling the PIL using the [PIL2 compiler repository](https://github.com/0xPolygonHermez/pil2-compiler.git) you generate a PILOUT file. Compile the PIL2 compiler by running the following commands: - -```bash -node ../pil2-compiler/src/pil.js ./zkevm/zisk-wc/pil/zisk.pil -I ../pil2-components/lib/std/pil -``` - -This command will generate a `zisk.pilout` file that contains the arithmetization, public inputs, constraints, constant values, and other proof-generation-specific details described by the PIL2 project. - -## Generate the setup files - -``` -node ../pil2-proofman-js/src/setup/main_genSetup.js -a ./zkvm/zisk-wc/pil/zisk.pilout -s ./zkevm/zisk-wc/setup/stark_structs.json -b ./zkvm/zisk-wc/setup -/build -``` - -## Compile the dynamic library - -``` -cd zisk-wc -cargo build - -``` - -## Launch the proof generation - -``` -cd ../pil2-proofman -cargo run --bin proofman-cli prove --wc-lib ../zisk/target/debug/libzisk_wc.dylib --proving-key ../zisk/zkvm/zisk-wc/setup -/build/provingKey --public-inputs ../zisk/zkvm/zisk-wc/inputs/inputs.hex -``` - - - - // fn execute(&self, pctx: &mut ProofCtx, wneeds: &WitnessNeeds) { - // Creates the ectx with the workers pool inside - // TODO let mut ectx = self.wcm.createExecutionContext(wneeds); - self.main_sm.execute(pctx, ectx); - // TODO ectx.terminate(); diff --git a/witness-computation/rom/input.bin b/witness-computation/rom/input.bin deleted file mode 100644 index 515d6f34b..000000000 Binary files a/witness-computation/rom/input.bin and /dev/null differ diff --git a/witness-computation/rom/zisk.elf b/witness-computation/rom/zisk.elf deleted file mode 100755 index d81ec7806..000000000 Binary files a/witness-computation/rom/zisk.elf and /dev/null differ diff --git a/witness-computation/src/lib.rs b/witness-computation/src/lib.rs deleted file mode 100644 index 39ef75d65..000000000 --- a/witness-computation/src/lib.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod zisk_lib; - -pub use zisk_lib::*; diff --git a/witness-computation/src/zisk_lib.rs b/witness-computation/src/zisk_lib.rs deleted file mode 100644 index 399e06c9f..000000000 --- a/witness-computation/src/zisk_lib.rs +++ /dev/null @@ -1,227 +0,0 @@ -//! The `WitnessLib` library defines the core witness computation framework, -//! integrating the ZisK execution environment with state machines and witness components. -//! -//! This module leverages `WitnessLibrary` to orchestrate the setup of state machines, -//! program conversion, and execution pipelines to generate required witnesses. - -use executor::{StateMachines, StaticSMBundle, ZiskExecutor}; -use fields::{Goldilocks, PrimeField64}; -use pil_std_lib::Std; -use precomp_arith_eq::ArithEqManager; -use precomp_arith_eq_384::ArithEq384Manager; -use precomp_big_int::Add256Manager; -use precomp_keccakf::KeccakfManager; -use precomp_sha256f::Sha256fManager; -use proofman::register_std; -use proofman_common::{PackedInfo, ProofmanResult}; -use sm_arith::ArithSM; -use sm_binary::BinarySM; -use sm_mem::Mem; -use sm_rom::RomSM; -use std::{collections::HashMap, path::PathBuf, sync::Arc}; -use witness::{WitnessLibrary, WitnessManager}; -use zisk_common::{io::ZiskStdin, ExecutorStats, ZiskExecutionResult, ZiskLib, ZiskWitnessLibrary}; -use zisk_core::{Riscv2zisk, CHUNK_SIZE}; -#[cfg(feature = "packed")] -use zisk_pil::PACKED_INFO; -use zisk_pil::{ - ADD_256_AIR_IDS, ARITH_AIR_IDS, ARITH_EQ_384_AIR_IDS, ARITH_EQ_AIR_IDS, BINARY_ADD_AIR_IDS, - BINARY_AIR_IDS, BINARY_EXTENSION_AIR_IDS, INPUT_DATA_AIR_IDS, KECCAKF_AIR_IDS, MEM_AIR_IDS, - MEM_ALIGN_AIR_IDS, MEM_ALIGN_BYTE_AIR_IDS, MEM_ALIGN_READ_BYTE_AIR_IDS, - MEM_ALIGN_WRITE_BYTE_AIR_IDS, ROM_AIR_IDS, ROM_DATA_AIR_IDS, SHA_256_F_AIR_IDS, - ZISK_AIRGROUP_ID, -}; - -pub struct WitnessLib { - elf_path: PathBuf, - asm_path: Option, - asm_rom_path: Option, - executor: Option>>, - chunk_size: u64, - base_port: Option, - unlock_mapped_memory: bool, - shared_tables: bool, - verbose_mode: proofman_common::VerboseMode, -} - -#[no_mangle] -#[allow(clippy::too_many_arguments)] -fn init_library( - verbose_mode: proofman_common::VerboseMode, - elf_path: PathBuf, - asm_path: Option, - asm_rom_path: Option, - base_port: Option, - unlock_mapped_memory: bool, - shared_tables: bool, -) -> Result>, Box> { - let chunk_size = CHUNK_SIZE; - - let result = Box::new(WitnessLib { - elf_path, - asm_path, - asm_rom_path, - executor: None, - chunk_size, - base_port, - unlock_mapped_memory, - shared_tables, - verbose_mode, - }); - - Ok(result) -} - -impl WitnessLibrary for WitnessLib { - /// Registers the witness components and initializes the execution pipeline. - /// - /// # Arguments - /// * `wcm` - An `Arc`-wrapped `WitnessManager` instance that orchestrates witness generation. - /// - /// This method performs the following steps: - /// 1. Converts a RISC-V program to the ZisK ROM format using `Riscv2zisk`. - /// 2. Initializes core and secondary state machines for witness generation. - /// 3. Registers the state machines with the `ZiskExecutor`. - /// 4. Registers the `ZiskExecutor` as a component in the `WitnessManager`. - /// - /// # Panics - /// Panics if the `Riscv2zisk` conversion fails or if required paths cannot be resolved. - fn register_witness(&mut self, wcm: &WitnessManager) -> ProofmanResult<()> { - let world_rank = wcm.get_world_rank(); - let local_rank = wcm.get_local_rank(); - - proofman_common::initialize_logger(self.verbose_mode, Some(world_rank)); - - // Step 1: Create an instance of the RISCV -> ZisK program converter - let rv2zk = Riscv2zisk::new(self.elf_path.display().to_string()); - - // Step 2: Convert program to ROM - let zisk_rom = rv2zk.run().unwrap_or_else(|e| panic!("Application error: {e}")); - let zisk_rom = Arc::new(zisk_rom); - - // Step 3: Initialize the secondary state machines - let std = Std::new(wcm.get_pctx(), wcm.get_sctx(), self.shared_tables)?; - register_std(wcm, &std); - - let rom_sm = RomSM::new(zisk_rom.clone(), self.asm_rom_path.clone()); - let binary_sm = BinarySM::new(std.clone()); - let arith_sm = ArithSM::new(std.clone()); - let mem_sm = Mem::new(std.clone()); - // Step 4: Initialize the precompiles state machines - let keccakf_sm = KeccakfManager::new(std.clone()); - let sha256f_sm = Sha256fManager::new(std.clone()); - let arith_eq_sm = ArithEqManager::new(std.clone()); - let arith_eq_384_sm = ArithEq384Manager::new(std.clone()); - let add256_sm = Add256Manager::new(std.clone()); - - let mem_instances = vec![ - (ZISK_AIRGROUP_ID, MEM_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, ROM_DATA_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, INPUT_DATA_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, MEM_ALIGN_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, MEM_ALIGN_BYTE_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, MEM_ALIGN_WRITE_BYTE_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, MEM_ALIGN_READ_BYTE_AIR_IDS[0]), - ]; - - let binary_instances = vec![ - (ZISK_AIRGROUP_ID, BINARY_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, BINARY_ADD_AIR_IDS[0]), - (ZISK_AIRGROUP_ID, BINARY_EXTENSION_AIR_IDS[0]), - ]; - - let sm_bundle = StaticSMBundle::new( - self.asm_path.is_some(), - vec![ - (vec![(ZISK_AIRGROUP_ID, ROM_AIR_IDS[0])], StateMachines::RomSM(rom_sm.clone())), - (mem_instances, StateMachines::MemSM(mem_sm.clone())), - (binary_instances, StateMachines::BinarySM(binary_sm.clone())), - ( - vec![(ZISK_AIRGROUP_ID, ARITH_AIR_IDS[0])], - StateMachines::ArithSM(arith_sm.clone()), - ), - // The precompiles state machines - ( - vec![(ZISK_AIRGROUP_ID, KECCAKF_AIR_IDS[0])], - StateMachines::KeccakfManager(keccakf_sm.clone()), - ), - ( - vec![(ZISK_AIRGROUP_ID, SHA_256_F_AIR_IDS[0])], - StateMachines::Sha256fManager(sha256f_sm.clone()), - ), - ( - vec![(ZISK_AIRGROUP_ID, ARITH_EQ_AIR_IDS[0])], - StateMachines::ArithEqManager(arith_eq_sm.clone()), - ), - ( - vec![(ZISK_AIRGROUP_ID, ARITH_EQ_384_AIR_IDS[0])], - StateMachines::ArithEq384Manager(arith_eq_384_sm.clone()), - ), - ( - vec![(ZISK_AIRGROUP_ID, ADD_256_AIR_IDS[0])], - StateMachines::Add256Manager(add256_sm.clone()), - ), - ], - ); - - // Step 5: Create the executor and register the secondary state machines - let executor: ZiskExecutor = ZiskExecutor::new( - self.elf_path.clone(), - self.asm_path.clone(), - self.asm_rom_path.clone(), - zisk_rom, - std, - sm_bundle, - Some(rom_sm.clone()), - self.chunk_size, - world_rank, - local_rank, - self.base_port, - self.unlock_mapped_memory, - ); - - let executor = Arc::new(executor); - - // Step 7: Register the executor as a component in the Witness Manager - wcm.register_component(executor.clone()); - - self.executor = Some(executor); - Ok(()) - } - - fn get_packed_info(&self) -> HashMap<(usize, usize), PackedInfo> { - let mut _packed_info = HashMap::new(); - #[cfg(feature = "packed")] - { - for packed_info in PACKED_INFO.iter() { - _packed_info.insert( - (packed_info.0, packed_info.1), - PackedInfo::new( - packed_info.2.is_packed, - packed_info.2.num_packed_words, - packed_info.2.unpack_info.to_vec(), - ), - ); - } - } - _packed_info - } -} - -impl ZiskWitnessLibrary for WitnessLib { - fn set_stdin(&self, stdin: ZiskStdin) { - if let Some(executor) = &self.executor { - executor.set_stdin(stdin); - } - } - - /// Returns the execution result of the witness computation. - /// - /// # Returns - /// * `u16` - The execution result code. - fn execution_result(&self) -> Option<(ZiskExecutionResult, ExecutorStats)> { - self.executor.as_ref().map(|executor| executor.get_execution_result()) - } -} - -impl ZiskLib for WitnessLib {} diff --git a/zisk-contracts/IZiskVerifier.sol b/zisk-contracts/IZiskVerifier.sol new file mode 100644 index 000000000..88fb1eb1c --- /dev/null +++ b/zisk-contracts/IZiskVerifier.sol @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: AGPL-3.0 +pragma solidity ^0.8.20; + +/// @title Zisk Verifier Interface +/// @author SilentSig +/// @notice This contract is the interface for the Zisk Verifier. +interface IZiskVerifier { + /// @notice Verifies a proof with given public values and vkey. + /// @param programVK The verification key for the RISC-V program. + /// @param publicValues The public values encoded as bytes. + /// @param proofBytes The proof of the program execution the Zisk zkVM encoded as bytes. + function verifySnarkProof( + uint64[4] calldata programVK, + uint64[4] calldata rootCVadcopFinal, + bytes calldata publicValues, + bytes calldata proofBytes + ) external view; +} \ No newline at end of file diff --git a/zisk-contracts/PlonkVerifier.sol b/zisk-contracts/PlonkVerifier.sol new file mode 100644 index 000000000..782982af5 --- /dev/null +++ b/zisk-contracts/PlonkVerifier.sol @@ -0,0 +1,784 @@ +// SPDX-License-Identifier: GPL-3.0 +/* + Copyright 2021 0KIMS association. + + This file is generated with [snarkJS](https://github.com/iden3/snarkjs). + + snarkJS is a free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + snarkJS is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with snarkJS. If not, see . +*/ + + +pragma solidity >=0.7.0 <0.9.0; + +contract PlonkVerifier { + // Omega + uint256 constant w1 = 5709868443893258075976348696661355716898495876243883251619397131511003808859; + // Scalar field size + uint256 constant q = 21888242871839275222246405745257275088548364400416034343698204186575808495617; + // Base field size + uint256 constant qf = 21888242871839275222246405745257275088696311157297823662689037894645226208583; + + // [1]_1 + uint256 constant G1x = 1; + uint256 constant G1y = 2; + // [1]_2 + uint256 constant G2x1 = 10857046999023057135944570762232829481370756359578518086990519993285655852781; + uint256 constant G2x2 = 11559732032986387107991004021392285783925812861821192530917403151452391805634; + uint256 constant G2y1 = 8495653923123431417604973247489272438418190587263600148770280649306958101930; + uint256 constant G2y2 = 4082367875863433681332203403145435568316851327593401208105741076214120093531; + + // Verification Key data + uint32 constant n = 16777216; + uint16 constant nPublic = 1; + uint16 constant nLagrange = 1; + + uint256 constant Qmx = 15728078222621428176160834771923049445267207177815563086756963442982537322555; + uint256 constant Qmy = 10368134109772487360967338421067531937191459049728961159479449769737397414266; + uint256 constant Qlx = 6266903827059262482682470537784423436414327073503210506957823438202715076858; + uint256 constant Qly = 9930975912673080337533028672610162866366123480191667478375977723280338682627; + uint256 constant Qrx = 7123060314877137702038864858686611023352874626900523603313267911196395216007; + uint256 constant Qry = 1770043357969798948108297406468647422219312923587004610833401748122605130485; + uint256 constant Qox = 429670666919728039002263956209270185134416998857239332019860571923582607053; + uint256 constant Qoy = 11024111753296480975480562321760673479457513637396733200807061983645283791758; + uint256 constant Qcx = 18759539946247966109746686045765624942782666785179752870440507130849473521082; + uint256 constant Qcy = 3887166330695441169968198076272085557699876114516022583985382505001285608090; + uint256 constant S1x = 14529212361763788633811869842580421415134713687938570863064659590110934903740; + uint256 constant S1y = 15130539367607292893898013497807485005818775366682240859886811844322272868379; + uint256 constant S2x = 88575883471378427909504541476422256478241201143920420405836063934407519240; + uint256 constant S2y = 3538048320551462152620488185558124425114005176021275627227409411627962175942; + uint256 constant S3x = 18339485453472040659646089143258381971176107908300548337940061504006306000991; + uint256 constant S3y = 401317798322731345836300355016089251665269411090279566075041830406289046834; + uint256 constant k1 = 2; + uint256 constant k2 = 3; + uint256 constant X2x1 = 21831381940315734285607113342023901060522397560371972897001948545212302161822; + uint256 constant X2x2 = 17231025384763736816414546592865244497437017442647097510447326538965263639101; + uint256 constant X2y1 = 2388026358213174446665280700919698872609886601280537296205114254867301080648; + uint256 constant X2y2 = 11507326595632554467052522095592665270651932854513688777769618397986436103170; + + // Proof calldata + // Byte offset of every parameter of the calldata + // Polynomial commitments + uint16 constant pA = 4 + 0; + uint16 constant pB = 4 + 64; + uint16 constant pC = 4 + 128; + uint16 constant pZ = 4 + 192; + uint16 constant pT1 = 4 + 256; + uint16 constant pT2 = 4 + 320; + uint16 constant pT3 = 4 + 384; + uint16 constant pWxi = 4 + 448; + uint16 constant pWxiw = 4 + 512; + // Opening evaluations + uint16 constant pEval_a = 4 + 576; + uint16 constant pEval_b = 4 + 608; + uint16 constant pEval_c = 4 + 640; + uint16 constant pEval_s1 = 4 + 672; + uint16 constant pEval_s2 = 4 + 704; + uint16 constant pEval_zw = 4 + 736; + + // Memory data + // Challenges + uint16 constant pAlpha = 0; + uint16 constant pBeta = 32; + uint16 constant pGamma = 64; + uint16 constant pXi = 96; + uint16 constant pXin = 128; + uint16 constant pBetaXi = 160; + uint16 constant pV1 = 192; + uint16 constant pV2 = 224; + uint16 constant pV3 = 256; + uint16 constant pV4 = 288; + uint16 constant pV5 = 320; + uint16 constant pU = 352; + + uint16 constant pPI = 384; + uint16 constant pEval_r0 = 416; + uint16 constant pD = 448; + uint16 constant pF = 512; + uint16 constant pE = 576; + uint16 constant pTmp = 640; + uint16 constant pAlpha2 = 704; + uint16 constant pZh = 736; + uint16 constant pZhInv = 768; + + + uint16 constant pEval_l1 = 800; + + + + uint16 constant lastMem = 832; + + function verifyProof(uint256[24] calldata _proof, uint256[1] calldata _pubSignals) public view returns (bool) { + assembly { + ///////// + // Computes the inverse using the extended euclidean algorithm + ///////// + function inverse(a, q) -> inv { + let t := 0 + let newt := 1 + let r := q + let newr := a + let quotient + let aux + + for { } newr { } { + quotient := sdiv(r, newr) + aux := sub(t, mul(quotient, newt)) + t:= newt + newt:= aux + + aux := sub(r,mul(quotient, newr)) + r := newr + newr := aux + } + + if gt(r, 1) { revert(0,0) } + if slt(t, 0) { t:= add(t, q) } + + inv := t + } + + /////// + // Computes the inverse of an array of values + // See https://vitalik.ca/general/2018/07/21/starks_part_3.html in section where explain fields operations + ////// + function inverseArray(pVals, n) { + + let pAux := mload(0x40) // Point to the next free position + let pIn := pVals + let lastPIn := add(pVals, mul(n, 32)) // Read n elements + let acc := mload(pIn) // Read the first element + pIn := add(pIn, 32) // Point to the second element + let inv + + + for { } lt(pIn, lastPIn) { + pAux := add(pAux, 32) + pIn := add(pIn, 32) + } + { + mstore(pAux, acc) + acc := mulmod(acc, mload(pIn), q) + } + acc := inverse(acc, q) + + // At this point pAux pint to the next free position we subtract 1 to point to the last used + pAux := sub(pAux, 32) + // pIn points to the n+1 element, we subtract to point to n + pIn := sub(pIn, 32) + lastPIn := pVals // We don't process the first element + for { } gt(pIn, lastPIn) { + pAux := sub(pAux, 32) + pIn := sub(pIn, 32) + } + { + inv := mulmod(acc, mload(pAux), q) + acc := mulmod(acc, mload(pIn), q) + mstore(pIn, inv) + } + // pIn points to first element, we just set it. + mstore(pIn, acc) + } + + function checkField(v) { + if iszero(lt(v, q)) { + mstore(0, 0) + return(0,0x20) + } + } + + function checkPointBelongsToBN128Curve(p) { + let x := calldataload(p) + let y := calldataload(add(p, 32)) + + // Check that the point is on the curve + // y^2 = x^3 + 3 + let x3_3 := addmod(mulmod(x, mulmod(x, x, qf), qf), 3, qf) + let y2 := mulmod(y, y, qf) + + if iszero(eq(x3_3, y2)) { + mstore(0, 0) + return(0, 0x20) + } + } + + function checkProofData() { + // Check proof commitments belong to the bn128 curve + checkPointBelongsToBN128Curve(pA) + checkPointBelongsToBN128Curve(pB) + checkPointBelongsToBN128Curve(pC) + checkPointBelongsToBN128Curve(pZ) + checkPointBelongsToBN128Curve(pT1) + checkPointBelongsToBN128Curve(pT2) + checkPointBelongsToBN128Curve(pT3) + checkPointBelongsToBN128Curve(pWxi) + checkPointBelongsToBN128Curve(pWxiw) + + // Check proof commitments coordinates are in the field + checkField(calldataload(pA)) + checkField(calldataload(add(pA, 32))) + checkField(calldataload(pB)) + checkField(calldataload(add(pB, 32))) + checkField(calldataload(pC)) + checkField(calldataload(add(pC, 32))) + checkField(calldataload(pZ)) + checkField(calldataload(add(pZ, 32))) + checkField(calldataload(pT1)) + checkField(calldataload(add(pT1, 32))) + checkField(calldataload(pT2)) + checkField(calldataload(add(pT2, 32))) + checkField(calldataload(pT3)) + checkField(calldataload(add(pT3, 32))) + checkField(calldataload(pWxi)) + checkField(calldataload(add(pWxi, 32))) + checkField(calldataload(pWxiw)) + checkField(calldataload(add(pWxiw, 32))) + + // Check proof evaluations are in the field + checkField(calldataload(pEval_a)) + checkField(calldataload(pEval_b)) + checkField(calldataload(pEval_c)) + checkField(calldataload(pEval_s1)) + checkField(calldataload(pEval_s2)) + checkField(calldataload(pEval_zw)) + } + + function calculateChallenges(pMem, pPublic) { + let beta + let aux + + let mIn := mload(0x40) // Pointer to the next free memory position + + // Compute challenge.beta & challenge.gamma + mstore(mIn, Qmx) + mstore(add(mIn, 32), Qmy) + mstore(add(mIn, 64), Qlx) + mstore(add(mIn, 96), Qly) + mstore(add(mIn, 128), Qrx) + mstore(add(mIn, 160), Qry) + mstore(add(mIn, 192), Qox) + mstore(add(mIn, 224), Qoy) + mstore(add(mIn, 256), Qcx) + mstore(add(mIn, 288), Qcy) + mstore(add(mIn, 320), S1x) + mstore(add(mIn, 352), S1y) + mstore(add(mIn, 384), S2x) + mstore(add(mIn, 416), S2y) + mstore(add(mIn, 448), S3x) + mstore(add(mIn, 480), S3y) + + + mstore(add(mIn, 512), calldataload(add(pPublic, 0))) + + mstore(add(mIn, 544 ), calldataload(pA)) + mstore(add(mIn, 576 ), calldataload(add(pA, 32))) + mstore(add(mIn, 608 ), calldataload(pB)) + mstore(add(mIn, 640 ), calldataload(add(pB, 32))) + mstore(add(mIn, 672 ), calldataload(pC)) + mstore(add(mIn, 704 ), calldataload(add(pC, 32))) + + beta := mod(keccak256(mIn, 736), q) + mstore(add(pMem, pBeta), beta) + + // challenges.gamma + mstore(add(pMem, pGamma), mod(keccak256(add(pMem, pBeta), 32), q)) + + // challenges.alpha + mstore(mIn, mload(add(pMem, pBeta))) + mstore(add(mIn, 32), mload(add(pMem, pGamma))) + mstore(add(mIn, 64), calldataload(pZ)) + mstore(add(mIn, 96), calldataload(add(pZ, 32))) + + aux := mod(keccak256(mIn, 128), q) + mstore(add(pMem, pAlpha), aux) + mstore(add(pMem, pAlpha2), mulmod(aux, aux, q)) + + // challenges.xi + mstore(mIn, aux) + mstore(add(mIn, 32), calldataload(pT1)) + mstore(add(mIn, 64), calldataload(add(pT1, 32))) + mstore(add(mIn, 96), calldataload(pT2)) + mstore(add(mIn, 128), calldataload(add(pT2, 32))) + mstore(add(mIn, 160), calldataload(pT3)) + mstore(add(mIn, 192), calldataload(add(pT3, 32))) + + aux := mod(keccak256(mIn, 224), q) + mstore( add(pMem, pXi), aux) + + // challenges.v + mstore(mIn, aux) + mstore(add(mIn, 32), calldataload(pEval_a)) + mstore(add(mIn, 64), calldataload(pEval_b)) + mstore(add(mIn, 96), calldataload(pEval_c)) + mstore(add(mIn, 128), calldataload(pEval_s1)) + mstore(add(mIn, 160), calldataload(pEval_s2)) + mstore(add(mIn, 192), calldataload(pEval_zw)) + + let v1 := mod(keccak256(mIn, 224), q) + mstore(add(pMem, pV1), v1) + + // challenges.beta * challenges.xi + mstore(add(pMem, pBetaXi), mulmod(beta, aux, q)) + + // challenges.xi^n + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + aux:= mulmod(aux, aux, q) + + mstore(add(pMem, pXin), aux) + + // Zh + aux:= mod(add(sub(aux, 1), q), q) + mstore(add(pMem, pZh), aux) + mstore(add(pMem, pZhInv), aux) // We will invert later together with lagrange pols + + // challenges.v^2, challenges.v^3, challenges.v^4, challenges.v^5 + aux := mulmod(v1, v1, q) + mstore(add(pMem, pV2), aux) + aux := mulmod(aux, v1, q) + mstore(add(pMem, pV3), aux) + aux := mulmod(aux, v1, q) + mstore(add(pMem, pV4), aux) + aux := mulmod(aux, v1, q) + mstore(add(pMem, pV5), aux) + + // challenges.u + mstore(mIn, calldataload(pWxi)) + mstore(add(mIn, 32), calldataload(add(pWxi, 32))) + mstore(add(mIn, 64), calldataload(pWxiw)) + mstore(add(mIn, 96), calldataload(add(pWxiw, 32))) + + mstore(add(pMem, pU), mod(keccak256(mIn, 128), q)) + } + + function calculateLagrange(pMem) { + let w := 1 + + mstore( + add(pMem, pEval_l1), + mulmod( + n, + mod( + add( + sub( + mload(add(pMem, pXi)), + w + ), + q + ), + q + ), + q + ) + ) + + + + inverseArray(add(pMem, pZhInv), 2 ) + + let zh := mload(add(pMem, pZh)) + w := 1 + + + mstore( + add(pMem, pEval_l1 ), + mulmod( + mload(add(pMem, pEval_l1 )), + zh, + q + ) + ) + + + + + + } + + function calculatePI(pMem, pPub) { + let pl := 0 + + + pl := mod( + add( + sub( + pl, + mulmod( + mload(add(pMem, pEval_l1)), + calldataload(add(pPub, 0)), + q + ) + ), + q + ), + q + ) + + + mstore(add(pMem, pPI), pl) + } + + function calculateR0(pMem) { + let e1 := mload(add(pMem, pPI)) + + let e2 := mulmod(mload(add(pMem, pEval_l1)), mload(add(pMem, pAlpha2)), q) + + let e3a := addmod( + calldataload(pEval_a), + mulmod(mload(add(pMem, pBeta)), calldataload(pEval_s1), q), + q) + e3a := addmod(e3a, mload(add(pMem, pGamma)), q) + + let e3b := addmod( + calldataload(pEval_b), + mulmod(mload(add(pMem, pBeta)), calldataload(pEval_s2), q), + q) + e3b := addmod(e3b, mload(add(pMem, pGamma)), q) + + let e3c := addmod( + calldataload(pEval_c), + mload(add(pMem, pGamma)), + q) + + let e3 := mulmod(mulmod(e3a, e3b, q), e3c, q) + e3 := mulmod(e3, calldataload(pEval_zw), q) + e3 := mulmod(e3, mload(add(pMem, pAlpha)), q) + + let r0 := addmod(e1, mod(sub(q, e2), q), q) + r0 := addmod(r0, mod(sub(q, e3), q), q) + + mstore(add(pMem, pEval_r0) , r0) + } + + function g1_set(pR, pP) { + mstore(pR, mload(pP)) + mstore(add(pR, 32), mload(add(pP,32))) + } + + function g1_setC(pR, x, y) { + mstore(pR, x) + mstore(add(pR, 32), y) + } + + function g1_calldataSet(pR, pP) { + mstore(pR, calldataload(pP)) + mstore(add(pR, 32), calldataload(add(pP, 32))) + } + + function g1_acc(pR, pP) { + let mIn := mload(0x40) + mstore(mIn, mload(pR)) + mstore(add(mIn,32), mload(add(pR, 32))) + mstore(add(mIn,64), mload(pP)) + mstore(add(mIn,96), mload(add(pP, 32))) + + let success := staticcall(sub(gas(), 2000), 6, mIn, 128, pR, 64) + + if iszero(success) { + mstore(0, 0) + return(0,0x20) + } + } + + function g1_mulAcc(pR, pP, s) { + let success + let mIn := mload(0x40) + mstore(mIn, mload(pP)) + mstore(add(mIn,32), mload(add(pP, 32))) + mstore(add(mIn,64), s) + + success := staticcall(sub(gas(), 2000), 7, mIn, 96, mIn, 64) + + if iszero(success) { + mstore(0, 0) + return(0,0x20) + } + + mstore(add(mIn,64), mload(pR)) + mstore(add(mIn,96), mload(add(pR, 32))) + + success := staticcall(sub(gas(), 2000), 6, mIn, 128, pR, 64) + + if iszero(success) { + mstore(0, 0) + return(0,0x20) + } + + } + + function g1_mulAccC(pR, x, y, s) { + let success + let mIn := mload(0x40) + mstore(mIn, x) + mstore(add(mIn,32), y) + mstore(add(mIn,64), s) + + success := staticcall(sub(gas(), 2000), 7, mIn, 96, mIn, 64) + + if iszero(success) { + mstore(0, 0) + return(0,0x20) + } + + mstore(add(mIn,64), mload(pR)) + mstore(add(mIn,96), mload(add(pR, 32))) + + success := staticcall(sub(gas(), 2000), 6, mIn, 128, pR, 64) + + if iszero(success) { + mstore(0, 0) + return(0,0x20) + } + } + + function g1_mulSetC(pR, x, y, s) { + let success + let mIn := mload(0x40) + mstore(mIn, x) + mstore(add(mIn,32), y) + mstore(add(mIn,64), s) + + success := staticcall(sub(gas(), 2000), 7, mIn, 96, pR, 64) + + if iszero(success) { + mstore(0, 0) + return(0,0x20) + } + } + + function g1_mulSet(pR, pP, s) { + g1_mulSetC(pR, mload(pP), mload(add(pP, 32)), s) + } + + function calculateD(pMem) { + let _pD:= add(pMem, pD) + let gamma := mload(add(pMem, pGamma)) + let mIn := mload(0x40) + mstore(0x40, add(mIn, 256)) // d1, d2, d3 & d4 (4*64 bytes) + + g1_setC(_pD, Qcx, Qcy) + g1_mulAccC(_pD, Qmx, Qmy, mulmod(calldataload(pEval_a), calldataload(pEval_b), q)) + g1_mulAccC(_pD, Qlx, Qly, calldataload(pEval_a)) + g1_mulAccC(_pD, Qrx, Qry, calldataload(pEval_b)) + g1_mulAccC(_pD, Qox, Qoy, calldataload(pEval_c)) + + let betaxi := mload(add(pMem, pBetaXi)) + let val1 := addmod( + addmod(calldataload(pEval_a), betaxi, q), + gamma, q) + + let val2 := addmod( + addmod( + calldataload(pEval_b), + mulmod(betaxi, k1, q), + q), gamma, q) + + let val3 := addmod( + addmod( + calldataload(pEval_c), + mulmod(betaxi, k2, q), + q), gamma, q) + + let d2a := mulmod( + mulmod(mulmod(val1, val2, q), val3, q), + mload(add(pMem, pAlpha)), + q + ) + + let d2b := mulmod( + mload(add(pMem, pEval_l1)), + mload(add(pMem, pAlpha2)), + q + ) + + // We'll use mIn to save d2 + g1_calldataSet(add(mIn, 192), pZ) + g1_mulSet( + mIn, + add(mIn, 192), + addmod(addmod(d2a, d2b, q), mload(add(pMem, pU)), q)) + + + val1 := addmod( + addmod( + calldataload(pEval_a), + mulmod(mload(add(pMem, pBeta)), calldataload(pEval_s1), q), + q), gamma, q) + + val2 := addmod( + addmod( + calldataload(pEval_b), + mulmod(mload(add(pMem, pBeta)), calldataload(pEval_s2), q), + q), gamma, q) + + val3 := mulmod( + mulmod(mload(add(pMem, pAlpha)), mload(add(pMem, pBeta)), q), + calldataload(pEval_zw), q) + + + // We'll use mIn + 64 to save d3 + g1_mulSetC( + add(mIn, 64), + S3x, + S3y, + mulmod(mulmod(val1, val2, q), val3, q)) + + // We'll use mIn + 128 to save d4 + g1_calldataSet(add(mIn, 128), pT1) + + g1_mulAccC(add(mIn, 128), calldataload(pT2), calldataload(add(pT2, 32)), mload(add(pMem, pXin))) + let xin2 := mulmod(mload(add(pMem, pXin)), mload(add(pMem, pXin)), q) + g1_mulAccC(add(mIn, 128), calldataload(pT3), calldataload(add(pT3, 32)) , xin2) + + g1_mulSetC(add(mIn, 128), mload(add(mIn, 128)), mload(add(mIn, 160)), mload(add(pMem, pZh))) + + mstore(add(add(mIn, 64), 32), mod(sub(qf, mload(add(add(mIn, 64), 32))), qf)) + mstore(add(mIn, 160), mod(sub(qf, mload(add(mIn, 160))), qf)) + g1_acc(_pD, mIn) + g1_acc(_pD, add(mIn, 64)) + g1_acc(_pD, add(mIn, 128)) + } + + function calculateF(pMem) { + let p := add(pMem, pF) + + g1_set(p, add(pMem, pD)) + g1_mulAccC(p, calldataload(pA), calldataload(add(pA, 32)), mload(add(pMem, pV1))) + g1_mulAccC(p, calldataload(pB), calldataload(add(pB, 32)), mload(add(pMem, pV2))) + g1_mulAccC(p, calldataload(pC), calldataload(add(pC, 32)), mload(add(pMem, pV3))) + g1_mulAccC(p, S1x, S1y, mload(add(pMem, pV4))) + g1_mulAccC(p, S2x, S2y, mload(add(pMem, pV5))) + } + + function calculateE(pMem) { + let s := mod(sub(q, mload(add(pMem, pEval_r0))), q) + + s := addmod(s, mulmod(calldataload(pEval_a), mload(add(pMem, pV1)), q), q) + s := addmod(s, mulmod(calldataload(pEval_b), mload(add(pMem, pV2)), q), q) + s := addmod(s, mulmod(calldataload(pEval_c), mload(add(pMem, pV3)), q), q) + s := addmod(s, mulmod(calldataload(pEval_s1), mload(add(pMem, pV4)), q), q) + s := addmod(s, mulmod(calldataload(pEval_s2), mload(add(pMem, pV5)), q), q) + s := addmod(s, mulmod(calldataload(pEval_zw), mload(add(pMem, pU)), q), q) + + g1_mulSetC(add(pMem, pE), G1x, G1y, s) + } + + function checkPairing(pMem) -> isOk { + let mIn := mload(0x40) + mstore(0x40, add(mIn, 576)) // [0..383] = pairing data, [384..447] = pWxi, [448..512] = pWxiw + + let _pWxi := add(mIn, 384) + let _pWxiw := add(mIn, 448) + let _aux := add(mIn, 512) + + g1_calldataSet(_pWxi, pWxi) + g1_calldataSet(_pWxiw, pWxiw) + + // A1 + g1_mulSet(mIn, _pWxiw, mload(add(pMem, pU))) + g1_acc(mIn, _pWxi) + mstore(add(mIn, 32), mod(sub(qf, mload(add(mIn, 32))), qf)) + + // [X]_2 + mstore(add(mIn,64), X2x2) + mstore(add(mIn,96), X2x1) + mstore(add(mIn,128), X2y2) + mstore(add(mIn,160), X2y1) + + // B1 + g1_mulSet(add(mIn, 192), _pWxi, mload(add(pMem, pXi))) + + let s := mulmod(mload(add(pMem, pU)), mload(add(pMem, pXi)), q) + s := mulmod(s, w1, q) + g1_mulSet(_aux, _pWxiw, s) + g1_acc(add(mIn, 192), _aux) + g1_acc(add(mIn, 192), add(pMem, pF)) + mstore(add(pMem, add(pE, 32)), mod(sub(qf, mload(add(pMem, add(pE, 32)))), qf)) + g1_acc(add(mIn, 192), add(pMem, pE)) + + // [1]_2 + mstore(add(mIn,256), G2x2) + mstore(add(mIn,288), G2x1) + mstore(add(mIn,320), G2y2) + mstore(add(mIn,352), G2y1) + + let success := staticcall(sub(gas(), 2000), 8, mIn, 384, mIn, 0x20) + + isOk := and(success, mload(mIn)) + } + + let pMem := mload(0x40) + mstore(0x40, add(pMem, lastMem)) + + checkProofData() + calculateChallenges(pMem, _pubSignals) + calculateLagrange(pMem) + calculatePI(pMem, _pubSignals) + calculateR0(pMem) + calculateD(pMem) + calculateF(pMem) + calculateE(pMem) + let isValid := checkPairing(pMem) + + mstore(0x40, sub(pMem, lastMem)) + mstore(0, isValid) + return(0,0x20) + } + + } +} diff --git a/zisk-contracts/ZiskVerifier.sol b/zisk-contracts/ZiskVerifier.sol new file mode 100644 index 000000000..2debb8e1d --- /dev/null +++ b/zisk-contracts/ZiskVerifier.sol @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: AGPL-3.0 +pragma solidity ^0.8.20; + +import {IZiskVerifier} from "./IZiskVerifier.sol"; +import {PlonkVerifier} from "./PlonkVerifier.sol"; + +/// @title Zisk Verifier +/// @author SilentSig +/// @notice This contracts implements a solidity verifier for Zisk. +contract ZiskVerifier is PlonkVerifier, IZiskVerifier { + /// @notice Thrown when the verifier selector from this proof does not match the one in this + /// verifier. This indicates that this proof was sent to the wrong verifier. + + /// @notice Thrown when the proof is invalid. + error InvalidProof(); + + function VERSION() external pure returns (string memory) { + return "v0.16.0"; + } + + function getRootCVadcopFinal() external pure returns (uint64[4] memory) { + return [uint64(9211010158316595036), uint64(7055235338110277438), uint64(2391371252028311145), uint64(10691781997660262077)]; + } + + // Modulus zkSNARK + uint256 internal constant _RFIELD = + 21888242871839275222246405745257275088548364400416034343698204186575808495617; + + /// @notice Hashes the public values to a field elements inside Bn254. + /// @param publicValues The public values. + function hashPublicValues( + uint64[4] calldata programVK, + uint64[4] calldata rootCVadcopFinal, + bytes calldata publicValues + ) public pure returns (uint256) { + return uint256(sha256(abi.encodePacked(bytes8(programVK[0]), bytes8(programVK[1]), bytes8(programVK[2]), bytes8(programVK[3]), publicValues, bytes8(rootCVadcopFinal[0]), bytes8(rootCVadcopFinal[1]), bytes8(rootCVadcopFinal[2]), bytes8(rootCVadcopFinal[3])))) % _RFIELD; + } + + /// @notice Verifies a proof with given public values and vkey. + /// @param programVK The verification key for the RISC-V program. + /// @param rootCVadcopFinal The rootC value for the Vadcop final. + /// @param publicValues The public values encoded as bytes. + /// @param proofBytes The proof of the program execution the Zisk zkVM encoded as bytes. + function verifySnarkProof( + uint64[4] calldata programVK, + uint64[4] calldata rootCVadcopFinal, + bytes calldata publicValues, + bytes calldata proofBytes + ) external view { + uint256 publicValuesDigest = hashPublicValues(programVK, rootCVadcopFinal, publicValues); + + uint256[24] memory proofDecoded = abi.decode(proofBytes, (uint256[24])); + + bool success = this.verifyProof( + proofDecoded, + [publicValuesDigest] + ); + + if (!success) { + revert InvalidProof(); + } + } +} \ No newline at end of file diff --git a/ziskbuild/Cargo.toml b/ziskbuild/Cargo.toml index 03320ef23..de1cdfff4 100644 --- a/ziskbuild/Cargo.toml +++ b/ziskbuild/Cargo.toml @@ -11,10 +11,11 @@ categories.workspace = true clap = { workspace = true } cargo_metadata = "0.23.0" anyhow = { workspace = true } +tracing = { workspace = true } +rom-setup = { workspace = true } [build-dependencies] -vergen = { version = "8", default-features = false, features = [ - "build", - "git", - "git2", -] } +vergen-git2.workspace = true + +[features] +disable_distributed = [] \ No newline at end of file diff --git a/ziskbuild/build.rs b/ziskbuild/build.rs index 0a689d13e..a9af6f870 100644 --- a/ziskbuild/build.rs +++ b/ziskbuild/build.rs @@ -1,5 +1,15 @@ fn main() { - vergen::EmitBuilder::builder().build_timestamp().git_sha(true).emit().unwrap(); + let mut builder = vergen_git2::Emitter::default(); + builder + .add_instructions( + &vergen_git2::BuildBuilder::default().build_timestamp(true).build().unwrap(), + ) + .unwrap(); + builder + .add_instructions(&vergen_git2::Git2Builder::default().sha(true).build().unwrap()) + .unwrap(); + builder.emit().unwrap(); + let disable_distributed = std::env::vars().any(|(k, _)| k == "CARGO_FEATURE_DISABLE_DISTRIBUTED"); let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); diff --git a/ziskbuild/src/build.rs b/ziskbuild/src/build.rs index 0bb105c08..df9987efe 100644 --- a/ziskbuild/src/build.rs +++ b/ziskbuild/src/build.rs @@ -3,6 +3,7 @@ use crate::{ ZISK_TARGET, }; use cargo_metadata::camino::Utf8PathBuf; +use rom_setup::{assembly_files_exist, gen_assembly, get_assembly_file_paths, get_output_path}; use std::{ io::{BufRead, BufReader}, path::PathBuf, @@ -32,8 +33,12 @@ pub(crate) fn build_program_internal(path: &str, args: Option) { if is_clippy_driver { // Still need to set ELF env vars even if build is skipped. let target_elf_paths = generate_elf_paths(&metadata, args.as_ref()); - - print_elf_paths_cargo_directives(&target_elf_paths); + let hints = args + .as_ref() + .and_then(|a| a.hints) + .or_else(|| std::env::var("ZISK_HINTS").ok().and_then(|v| v.parse().ok())) + .unwrap_or(false); + print_elf_paths_cargo_directives(&target_elf_paths, hints); println!("cargo:warning=Skipping build due to clippy invocation."); return; @@ -64,15 +69,23 @@ pub fn execute_build_program( let program_dir: Utf8PathBuf = program_dir.try_into().expect("Failed to convert PathBuf to Utf8PathBuf"); + // Check for ZISK_HINTS environment variables if not set in args + let mut args = args.clone(); + if args.hints.is_none() { + if let Ok(env_hints) = std::env::var("ZISK_HINTS") { + args.hints = env_hints.parse().ok(); + } + } + // Get the program metadata. let program_metadata_file = program_dir.join("Cargo.toml"); let mut program_metadata_cmd = cargo_metadata::MetadataCommand::new(); let program_metadata = program_metadata_cmd.manifest_path(program_metadata_file).exec()?; // Get the command corresponding to Docker or local build. - let cmd = create_command(args, &program_dir, &program_metadata); + let cmd = create_command(&args, &program_dir, &program_metadata); - let target_elf_paths = generate_elf_paths(&program_metadata, Some(args)); + let target_elf_paths = generate_elf_paths(&program_metadata, Some(&args)); if target_elf_paths.len() > 1 && args.elf_name.is_some() { anyhow::bail!("--elf-name is not supported when --output-directory is used and multiple ELFs are built."); @@ -80,6 +93,38 @@ pub fn execute_build_program( execute_command(cmd)?; + // Generate assembly for all ELF files (only if not already generated) + let hints = args.hints.unwrap_or(false); + println!("cargo:rerun-if-env-changed=ZISK_HINTS"); + + let output_path = get_output_path(&None)?; + for (_, elf_path) in target_elf_paths.iter() { + let elf_path_std = elf_path.as_std_path(); + + let assembly_exists = assembly_files_exist(elf_path_std, &output_path, hints)?; + let hints_marker = output_path.join(format!( + "{}.assembly_hints", + elf_path_std.file_name().unwrap().to_string_lossy() + )); + let new_value = if hints { "on" } else { "off" }; + + let hints_changed = match std::fs::read_to_string(&hints_marker) { + Ok(prev) => prev != new_value, + Err(_) => true, + }; + + if !assembly_exists || hints_changed { + gen_assembly(elf_path_std, &None, hints, true)?; + std::fs::write(&hints_marker, new_value)?; + } + + // Tell cargo to rerun if any assembly file is deleted + let assembly_files = get_assembly_file_paths(elf_path_std, &output_path, hints)?; + for asm_file in assembly_files { + println!("cargo:rerun-if-changed={}", asm_file.display()); + } + } + if let Some(output_directory) = &args.output_directory { // The path to the output directory, maybe relative or absolute. let output_directory = PathBuf::from(output_directory); @@ -102,7 +147,7 @@ pub fn execute_build_program( } } - print_elf_paths_cargo_directives(&target_elf_paths); + print_elf_paths_cargo_directives(&target_elf_paths, hints); Ok(target_elf_paths) } @@ -157,8 +202,13 @@ pub fn generate_elf_paths( vec![(bin_target.name.to_owned(), target_elf_path)] } -fn print_elf_paths_cargo_directives(target_elf_paths: &[(String, Utf8PathBuf)]) { +fn print_elf_paths_cargo_directives(target_elf_paths: &[(String, Utf8PathBuf)], hints: bool) { + println!("cargo:rerun-if-env-changed=ZISK_HINTS"); + for (target_name, elf_path) in target_elf_paths.iter() { println!("cargo:rustc-env=ZISK_ELF_{target_name}={elf_path}"); + if hints { + println!("cargo:rustc-env=ZISK_ELF_{target_name}_WITH_HINTS=1"); + } } } diff --git a/ziskbuild/src/lib.rs b/ziskbuild/src/lib.rs index 113af763f..4341902cf 100644 --- a/ziskbuild/src/lib.rs +++ b/ziskbuild/src/lib.rs @@ -43,6 +43,9 @@ pub struct BuildArgs { #[clap(long, value_name = "ELF_NAME")] elf_name: Option, + + #[clap(long, value_name = "HINTS")] + pub hints: Option, } pub fn build_program(path: &str) { diff --git a/ziskclib/src/helpers.rs b/ziskclib/src/helpers.rs index cea9672af..76882c1e4 100644 --- a/ziskclib/src/helpers.rs +++ b/ziskclib/src/helpers.rs @@ -1,41 +1,12 @@ use sha2::compress256; + #[allow(deprecated)] use sha2::digest::generic_array::{typenum::U64, GenericArray}; -pub fn sha256f(state: &mut [u64; 4], input: &[u64; 8]) { - // Convert both the state and the input to appropriate types - let mut state_u32: [u32; 8] = convert_u64_to_u32(state).try_into().unwrap(); - let block = convert_u64_to_generic_array_bytes(input); - compress256(&mut state_u32, &[block]); - - // Convert the state back to u64 and write it to the memory address - *state = convert_u32_to_u64(&state_u32); -} - -pub fn convert_u64_to_u32(input: &[u64]) -> Vec { - let mut out = Vec::with_capacity(input.len() * 2); - for &word in input { - out.push((word >> 32) as u32); - out.push((word & 0xFFFFFFFF) as u32); - } - out -} - #[allow(deprecated)] -pub fn convert_u64_to_generic_array_bytes(input: &[u64; 8]) -> GenericArray { - let mut out = [0u8; 64]; - for (i, word) in input.iter().enumerate() { - for j in 0..8 { - out[i * 8 + j] = (word >> (56 - j * 8)) as u8; - } - } - GenericArray::::clone_from_slice(&out) -} - -pub fn convert_u32_to_u64(words: &[u32; 8]) -> [u64; 4] { - let mut out = [0u64; 4]; - for i in 0..4 { - out[i] = ((words[2 * i] as u64) << 32) | (words[2 * i + 1] as u64); - } - out +pub fn sha256f(state: &mut [u64; 4], input: &[u64; 8]) { + let state_u32: &mut [u32; 8] = unsafe { &mut *(state.as_mut_ptr() as *mut [u32; 8]) }; + let input_u8: &[GenericArray; 1] = + unsafe { &*(input.as_ptr() as *const [GenericArray; 1]) }; + compress256(state_u32, input_u8); } diff --git a/ziskclib/src/lib.rs b/ziskclib/src/lib.rs index c3ee53bf1..6722535ef 100644 --- a/ziskclib/src/lib.rs +++ b/ziskclib/src/lib.rs @@ -5,14 +5,10 @@ use helpers::sha256f; #[no_mangle] pub extern "C" fn zisk_keccakf(data: &mut [u64; 25]) { - //println!("zisk_keccakf() starting..."); keccakf(data); - //println!("zisk_keccakf() ...done"); } #[no_mangle] pub extern "C" fn zisk_sha256(state: &mut [u64; 4], input: &[u64; 8]) { - //println!("zisk_sha256f() starting..."); sha256f(state, input); - //println!("zisk_sha256f() ...done"); } diff --git a/ziskos-hints/Cargo.toml b/ziskos-hints/Cargo.toml new file mode 100644 index 000000000..0d1427842 --- /dev/null +++ b/ziskos-hints/Cargo.toml @@ -0,0 +1,49 @@ +# Wrapper crate around ziskos that compiles with hints feature enabled. +# +# src/core/ is a symlink to ../../ziskos/entrypoint/src/ - same source, different features. +# src/lib.rs wraps the symlinked source and adds hints-specific modules. +# Exports C symbols with "hints_" prefix to avoid linker conflicts with ziskos. +# See README.md for details. + +[package] +name = "ziskos-hints" +version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +repository = { workspace = true } +categories = { workspace = true } + +[dependencies] +lib-c = { workspace = true } + +num-bigint = { workspace = true } +num-integer = { workspace = true } +num-traits = { workspace = true } + +precompiles-helpers = { workspace = true } + +lazy_static = "1.5.0" +rand = "0.8.5" +getrandom = { version = "0.2", features = ["custom"] } +cfg-if = "1.0" +tiny-keccak = { version = "2.0.0", features = ["keccak"] } +serde = { workspace = true, features = ["derive"] } +bincode = { workspace = true } +paste = "1.0" +sha2 = { workspace = true } +fields = { workspace = true } + +anyhow = { workspace = true } +zisk-verifier = { workspace = true } + +[features] +default = ["hints"] +hints = [] + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ +'cfg(zisk_hints)', +'cfg(zisk_hints_metrics)', +'cfg(zisk_hints_debug)', +'cfg(zisk_hints_single_thread)'] } diff --git a/ziskos-hints/README.md b/ziskos-hints/README.md new file mode 100644 index 000000000..857b756ed --- /dev/null +++ b/ziskos-hints/README.md @@ -0,0 +1,60 @@ +# ziskos-hints + +This crate is a **wrapper around `ziskos`** that compiles the same source code with the `hints` feature enabled. + +## How it works + +### Symlinked Source +The `src/` directory in this crate is a **symlink** to `../ziskos/entrypoint/src/`. This means: +- Both `ziskos` and `ziskos-hints` compile from the **same source files** +- No code duplication is needed +- Changes to the source are automatically reflected in both crates + +### Conditional Compilation +The source code uses conditional compilation to export different C symbols: + +```rust +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_")] +pub extern "C" fn (...) { ... } +``` + +When compiled: +- **ziskos** (no hints feature): Exports C symbol `` +- **ziskos-hints** (hints feature enabled): Exports C symbol `hints_` + +### Why This Pattern? + +This solves a Cargo limitation: **feature unification**. In a single build, if multiple crates depend on the same crate, Cargo unifies their features. This means you cannot have different feature sets for the same dependency. + +By creating a separate crate (`ziskos-hints`) that always enables the `hints` feature, we can: +1. Use `ziskos` without hints in most places +2. Use `ziskos-hints` with hints where needed (e.g., in `precompiles-hints`) +3. Link both into the same binary without symbol conflicts + +The different C symbol names (`` vs `hints_`) prevent linker duplicate symbol errors. + +## Usage + +In your `Cargo.toml`: +```toml +# For normal usage without hints: +ziskos = { workspace = true } + +# For usage with hints enabled: +ziskos-hints = { workspace = true } +``` + +From Rust code, both have the same API: +```rust +use ziskos::syscall_arith256_mod; + +// or +use ziskos_hints::syscall_arith256_mod; + +// or rename for consistency: +use ziskos_hints as ziskos; +use ziskos::syscall_arith256_mod; +``` + +The function name in Rust is the same; only the exported C symbol differs. diff --git a/ziskos-hints/src/core b/ziskos-hints/src/core new file mode 120000 index 000000000..4da0cb079 --- /dev/null +++ b/ziskos-hints/src/core @@ -0,0 +1 @@ +../../ziskos/entrypoint/src \ No newline at end of file diff --git a/ziskos-hints/src/handlers/blake2b.rs b/ziskos-hints/src/handlers/blake2b.rs new file mode 100644 index 000000000..7920334d5 --- /dev/null +++ b/ziskos-hints/src/handlers/blake2b.rs @@ -0,0 +1,22 @@ +use crate::{handlers::validate_hint_length, hint_fields, zisklib}; + +use anyhow::Result; + +/// Processes an `HINT_BLAKE2B_COMPRESS` hint. +#[inline] +pub fn blake2b_compress_hint(data: &[u64]) -> Result> { + hint_fields![ROUNDS: 1, STATE: 8, MESSAGE: 16, OFFSET: 2, FINAL_BLOCK: 1]; + + validate_hint_length(data, EXPECTED_LEN, "HINT_BLAKE2B_COMPRESS")?; + + let rounds = data[ROUNDS_OFFSET] as u32; + let mut state: [u64; 8] = data[STATE_OFFSET..STATE_OFFSET + STATE_SIZE].try_into().unwrap(); + let message = data[MESSAGE_OFFSET..MESSAGE_OFFSET + MESSAGE_SIZE].try_into().unwrap(); + let offset = data[OFFSET_OFFSET..OFFSET_OFFSET + OFFSET_SIZE].try_into().unwrap(); + let final_block = data[FINAL_BLOCK_OFFSET] != 0; + + let mut hints = Vec::new(); + zisklib::blake2b_compress(rounds, &mut state, message, offset, final_block, &mut hints); + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/bls381.rs b/ziskos-hints/src/handlers/bls381.rs new file mode 100644 index 000000000..113309730 --- /dev/null +++ b/ziskos-hints/src/handlers/bls381.rs @@ -0,0 +1,173 @@ +use crate::{handlers::validate_hint_length, hint_fields, zisklib}; + +use anyhow::Result; + +/// Processes an `HINT_BLS12_381_G1_ADD` hint. +#[inline] +pub fn bls12_381_g1_add_hint(data: &[u64]) -> Result> { + hint_fields![A: 96, B: 96]; + + let bytes = unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * 8) }; + + validate_hint_length(bytes, EXPECTED_LEN, "HINT_BLS12_381_G1_ADD")?; + + let a: &[u8; A_SIZE] = bytes[A_OFFSET..A_OFFSET + A_SIZE].try_into().unwrap(); + let b: &[u8; B_SIZE] = bytes[B_OFFSET..B_OFFSET + B_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 96] = &mut [0u8; 96]; + unsafe { + zisklib::bls12_381_g1_add_c(result.as_mut_ptr(), a.as_ptr(), b.as_ptr(), &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BLS12_381_G1_MSM` hint. +#[inline] +pub fn bls12_381_g1_msm_hint(data: &[u64]) -> Result> { + if data.is_empty() { + anyhow::bail!("HINT_BLS12_381_G1_MSM: data is empty"); + } + + let num_pairs = data[0] as usize; + + const POINT_SIZE: usize = 96; + const SCALAR_SIZE: usize = 32; + const PAIR_SIZE_BYTES: usize = POINT_SIZE + SCALAR_SIZE; + const PAIR_SIZE: usize = PAIR_SIZE_BYTES.div_ceil(8); + + let expected_len = 1 + num_pairs * PAIR_SIZE; + + validate_hint_length(data, expected_len, "HINT_BLS12_381_G1_MSM")?; + + let bytes = unsafe { + std::slice::from_raw_parts(data.as_ptr().add(1) as *const u8, num_pairs * PAIR_SIZE_BYTES) + }; + + let mut hints = Vec::new(); + let result: &mut [u8; 96] = &mut [0u8; 96]; + unsafe { + zisklib::bls12_381_g1_msm_c(result.as_mut_ptr(), bytes.as_ptr(), num_pairs, &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BLS12_381_G2_ADD` hint. +#[inline] +pub fn bls12_381_g2_add_hint(data: &[u64]) -> Result> { + hint_fields![A: 192, B: 192]; + + let bytes = unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * 8) }; + + validate_hint_length(bytes, EXPECTED_LEN, "HINT_BLS12_381_G2_ADD")?; + + let a: &[u8; A_SIZE] = bytes[A_OFFSET..A_OFFSET + A_SIZE].try_into().unwrap(); + let b: &[u8; B_SIZE] = bytes[B_OFFSET..B_OFFSET + B_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 192] = &mut [0u8; 192]; + unsafe { + zisklib::bls12_381_g2_add_c(result.as_mut_ptr(), a.as_ptr(), b.as_ptr(), &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BLS12_381_G2_MSM` hint. +#[inline] +pub fn bls12_381_g2_msm_hint(data: &[u64]) -> Result> { + if data.is_empty() { + anyhow::bail!("HINT_BLS12_381_G1_MSM: data is empty"); + } + + let num_pairs = data[0] as usize; + + const POINT_SIZE: usize = 192; + const SCALAR_SIZE: usize = 32; + const PAIR_SIZE_BYTES: usize = POINT_SIZE + SCALAR_SIZE; + const PAIR_SIZE: usize = PAIR_SIZE_BYTES.div_ceil(8); + + let expected_len = 1 + num_pairs * PAIR_SIZE; + + validate_hint_length(data, expected_len, "HINT_BLS12_381_G1_MSM")?; + + let bytes = unsafe { + std::slice::from_raw_parts(data.as_ptr().add(1) as *const u8, num_pairs * PAIR_SIZE_BYTES) + }; + + let mut hints = Vec::new(); + let result: &mut [u8; 192] = &mut [0u8; 192]; + unsafe { + zisklib::bls12_381_g2_msm_c(result.as_mut_ptr(), bytes.as_ptr(), num_pairs, &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BLS12_381_PAIRING_CHECK` hint. +#[inline] +pub fn bls12_381_pairing_check_hint(data: &[u64]) -> Result> { + if data.is_empty() { + anyhow::bail!("HINT_BLS12_381_G1_MSM: data is empty"); + } + + let num_pairs = data[0] as usize; + + const G1_SIZE: usize = 96; + const G2_SIZE: usize = 192; + const PAIR_SIZE_BYTES: usize = G1_SIZE + G2_SIZE; + const PAIR_SIZE: usize = PAIR_SIZE_BYTES.div_ceil(8); + + let expected_len = 1 + num_pairs * PAIR_SIZE; + + validate_hint_length(data, expected_len, "HINT_BLS12_381_PAIRING_CHECK")?; + + let pairs = unsafe { + std::slice::from_raw_parts(data.as_ptr().add(1) as *const u8, num_pairs * PAIR_SIZE_BYTES) + }; + + let mut hints = Vec::new(); + unsafe { + zisklib::bls12_381_pairing_check_c(pairs.as_ptr(), num_pairs, &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BLS12_381_FP_TO_G1` hint. +#[inline] +pub fn bls12_381_fp_to_g1_hint(data: &[u64]) -> Result> { + hint_fields![FP: 6]; + + validate_hint_length(data, EXPECTED_LEN, "HINT_BLS12_381_FP_TO_G1")?; + + let fp: &[u64; FP_SIZE] = data[FP_OFFSET..FP_OFFSET + FP_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 96] = &mut [0u8; 96]; + unsafe { + zisklib::bls12_381_fp_to_g1_c(result.as_mut_ptr(), fp.as_ptr() as *const u8, &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BLS12_381_FP2_TO_G2` hint. +#[inline] +pub fn bls12_381_fp2_to_g2_hint(data: &[u64]) -> Result> { + hint_fields![FP2: 12]; + + validate_hint_length(data, EXPECTED_LEN, "HINT_BLS12_381_FP2_TO_G2")?; + + let fp2: &[u64; FP2_SIZE] = data[FP2_OFFSET..FP2_OFFSET + FP2_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 192] = &mut [0u8; 192]; + unsafe { + zisklib::bls12_381_fp2_to_g2_c(result.as_mut_ptr(), fp2.as_ptr() as *const u8, &mut hints); + } + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/bn254.rs b/ziskos-hints/src/handlers/bn254.rs new file mode 100644 index 000000000..2c5953925 --- /dev/null +++ b/ziskos-hints/src/handlers/bn254.rs @@ -0,0 +1,74 @@ +use crate::{handlers::validate_hint_length, hint_fields, zisklib}; + +use anyhow::Result; + +/// Processes an `HINT_BN254_G1_ADD` hint. +#[inline] +pub fn bn254_g1_add_hint(data: &[u64]) -> Result> { + hint_fields![P1: 64, P2: 64]; + + let bytes = unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * 8) }; + + validate_hint_length(bytes, EXPECTED_LEN, "HINT_BN254_G1_ADD")?; + + let p1: &[u8; P1_SIZE] = bytes[P1_OFFSET..P1_OFFSET + P1_SIZE].try_into().unwrap(); + let p2: &[u8; P2_SIZE] = bytes[P2_OFFSET..P2_OFFSET + P2_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 64] = &mut [0u8; 64]; + unsafe { + zisklib::bn254_g1_add_c(p1.as_ptr(), p2.as_ptr(), result.as_mut_ptr(), &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BN254_G1_MUL` hint. +#[inline] +pub fn bn254_g1_mul_hint(data: &[u64]) -> Result> { + hint_fields![POINT: 64, SCALAR: 32]; + + let bytes = unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * 8) }; + + validate_hint_length(bytes, EXPECTED_LEN, "HINT_BN254_G1_MUL")?; + + let point: &[u8; POINT_SIZE] = + bytes[POINT_OFFSET..POINT_OFFSET + POINT_SIZE].try_into().unwrap(); + let scalar: &[u8; SCALAR_SIZE] = + bytes[SCALAR_OFFSET..SCALAR_OFFSET + SCALAR_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 64] = &mut [0u8; 64]; + unsafe { + zisklib::bn254_g1_mul_c(point.as_ptr(), scalar.as_ptr(), result.as_mut_ptr(), &mut hints); + } + + Ok(hints) +} + +/// Processes an `HINT_BN254_PAIRING_CHECK` hint. +#[inline] +pub fn bn254_pairing_check_hint(data: &[u64]) -> Result> { + const G1_WORDS: usize = 8; + const G2_WORDS: usize = 16; + const PAIR_WORDS: usize = G1_WORDS + G2_WORDS; + + if data.is_empty() { + anyhow::bail!("BN254_PAIRING_CHECK: data is empty"); + } + + let num_pairs = data[0] as usize; + + let expected_len = 1 + num_pairs * PAIR_WORDS; + + validate_hint_length(data, expected_len, "PAIRING_BATCH_BN254")?; + + let pairs_data = &data[1..]; + + let mut hints = Vec::new(); + unsafe { + zisklib::bn254_pairing_check_c(pairs_data.as_ptr() as *const u8, num_pairs, &mut hints); + } + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/keccak256.rs b/ziskos-hints/src/handlers/keccak256.rs new file mode 100644 index 000000000..c4b054dc8 --- /dev/null +++ b/ziskos-hints/src/handlers/keccak256.rs @@ -0,0 +1,25 @@ +use crate::zisklib; + +use anyhow::Result; + +/// Processes an `HINT_KECCAK256` hint. +#[inline] +pub fn keccak256_hint(data: &[u64], data_len_bytes: usize) -> Result> { + let data_len_words = data_len_bytes.div_ceil(8); + + if data.len() != data_len_words { + anyhow::bail!( + "HINT_KECCAK256: expected data length of {} bytes ({} words), got {} words", + data_len_bytes, + data_len_words, + data.len() + ); + } + + let bytes = unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, data_len_bytes) }; + + let mut hints = Vec::new(); + zisklib::keccak256(bytes, &mut hints); + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/kzg.rs b/ziskos-hints/src/handlers/kzg.rs new file mode 100644 index 000000000..ebf7407c5 --- /dev/null +++ b/ziskos-hints/src/handlers/kzg.rs @@ -0,0 +1,33 @@ +use crate::{handlers::validate_hint_length, hint_fields, zisklib}; + +use anyhow::Result; + +/// Processes an `HINT_VERIFY_KZG_PROOF` hint. +#[inline] +pub fn verify_kzg_proof_hint(data: &[u64]) -> Result> { + hint_fields![Z: 32, Y: 32, COMMITMENT: 48, PROOF: 48]; + + let bytes = unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * 8) }; + + validate_hint_length(bytes, EXPECTED_LEN, "HINT_VERIFY_KZG_PROOF")?; + + let z: &[u8; Z_SIZE] = bytes[Z_OFFSET..Z_OFFSET + Z_SIZE].try_into().unwrap(); + let y: &[u8; Y_SIZE] = bytes[Y_OFFSET..Y_OFFSET + Y_SIZE].try_into().unwrap(); + let commitment: &[u8; COMMITMENT_SIZE] = + bytes[COMMITMENT_OFFSET..COMMITMENT_OFFSET + COMMITMENT_SIZE].try_into().unwrap(); + let proof: &[u8; PROOF_SIZE] = + bytes[PROOF_OFFSET..PROOF_OFFSET + PROOF_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + unsafe { + zisklib::verify_kzg_proof_c( + z.as_ptr(), + y.as_ptr(), + commitment.as_ptr(), + proof.as_ptr(), + &mut hints, + ) + }; + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/mod.rs b/ziskos-hints/src/handlers/mod.rs new file mode 100644 index 000000000..6149bde03 --- /dev/null +++ b/ziskos-hints/src/handlers/mod.rs @@ -0,0 +1,123 @@ +pub mod blake2b; +pub mod bls381; +pub mod bn254; +pub mod keccak256; +pub mod kzg; +pub mod modexp; +pub mod secp256k1; +pub mod secp256r1; +pub mod sha256; + +/// Macro to generate size, offset, and expected length constants for hint data fields. +/// +/// # Example +/// ```ignore +/// hint_fields![A: 4, B: 4, M: 4] +/// ``` +/// Generates: +/// - `A_SIZE`, `B_SIZE`, `M_SIZE` constants +/// - `A_OFFSET`, `B_OFFSET`, `M_OFFSET` constants (cumulative offsets) +/// - `EXPECTED_LEN` constant (sum of all sizes) +#[macro_export] +macro_rules! hint_fields { + ($($name:ident: $size:expr),+ $(,)?) => { + paste::paste! { + $( + #[allow(dead_code)] + const [<$name _SIZE>]: usize = $size; + )+ + } + + hint_fields!(@offsets 0, $($name: $size),+); + + #[allow(unused)] + const EXPECTED_LEN: usize = hint_fields!(@sum $($size),+); + }; + + (@offsets $offset:expr, $name:ident: $size:expr) => { + paste::paste! { + #[allow(dead_code)] + const [<$name _OFFSET>]: usize = $offset; + } + }; + + (@offsets $offset:expr, $name:ident: $size:expr, $($rest_name:ident: $rest_size:expr),+) => { + paste::paste! { + const [<$name _OFFSET>]: usize = $offset; + } + hint_fields!(@offsets $offset + $size, $($rest_name: $rest_size),+); + }; + + (@sum $size:expr) => { $size }; + (@sum $size:expr, $($rest:expr),+) => { + $size + hint_fields!(@sum $($rest),+) + }; +} + +// #[inline] +// fn read_field<'a>(data: &'a [u64], pos: &mut usize) -> anyhow::Result<&'a [u64]> { +// let len = +// *data.get(*pos).ok_or("MODEXP hint data too short").map_err(anyhow::Error::msg)? as usize; +// *pos += 1; +// let field = data +// .get(*pos..*pos + len) +// .ok_or("MODEXP hint data too short") +// .map_err(anyhow::Error::msg)?; +// *pos += len; +// Ok(field) +// } + +#[inline] +fn read_field_bytes<'a>(data: &'a [u64], pos: &mut usize) -> anyhow::Result<(&'a [u8], usize)> { + // Treat the entire u64 slice as bytes + let byte_data: &[u8] = unsafe { + std::slice::from_raw_parts(data.as_ptr() as *const u8, std::mem::size_of_val(data)) + }; + + // Make sure we have at least 8 bytes for the length header + if *pos + 8 > byte_data.len() { + anyhow::bail!("MODEXP hint data too short to read length"); + } + + // Read length as native-endian u64 + let len_bytes = + u64::from_ne_bytes(byte_data[*pos..*pos + 8].try_into().expect("slice length checked")) + as usize; + *pos += 8; + + // Ensure there are enough bytes for the field + if *pos + len_bytes > byte_data.len() { + anyhow::bail!("MODEXP hint data too short for field"); + } + + // Get the slice + let field = &byte_data[*pos..*pos + len_bytes]; + *pos += len_bytes; + + Ok((field, len_bytes)) +} + +/// Validates that the hint data has the expected length. +/// +/// # Arguments +/// +/// * `data` - The hint data to validate +/// * `expected_len` - The expected number of u64 values +/// * `hint_name` - The name of the hint type for error messages +/// +/// # Returns +/// +/// * `Ok(())` - If the length is correct +/// * `Err(anyhow::Error)` - If the length is incorrect +#[inline] +fn validate_hint_length(data: &[T], expected_len: usize, hint_name: &str) -> anyhow::Result<()> { + if data.len() != expected_len { + anyhow::bail!( + "Invalid {} hint length: expected {}, got {}", + hint_name, + expected_len, + data.len(), + ); + } + Ok(()) +} diff --git a/ziskos-hints/src/handlers/modexp.rs b/ziskos-hints/src/handlers/modexp.rs new file mode 100644 index 000000000..c7578576f --- /dev/null +++ b/ziskos-hints/src/handlers/modexp.rs @@ -0,0 +1,40 @@ +use crate::{handlers::read_field_bytes, zisklib}; + +use anyhow::Result; + +// Processes a `MODEXP` hint. +#[inline] +pub fn modexp_hint(data: &[u64]) -> Result> { + let mut pos = 0; + let (base, base_len) = read_field_bytes(data, &mut pos)?; + let (exp, exp_len) = read_field_bytes(data, &mut pos)?; + let (modulus, modulus_len) = read_field_bytes(data, &mut pos)?; + + // Verify the data length matches: 3 length prefixes (8 bytes each) + field data, + // converted to u64 words (rounded up for alignment). + let expected_words = (24 + base_len + exp_len + modulus_len).div_ceil(8); + if expected_words != data.len() { + anyhow::bail!( + "MODEXP hint data length mismatch: expected {} words, got {} words", + expected_words, + data.len() + ); + } + + let mut hints = Vec::new(); + let mut result = vec![0u8; modulus_len]; + unsafe { + zisklib::modexp_bytes_c( + base.as_ptr(), + base_len, + exp.as_ptr(), + exp_len, + modulus.as_ptr(), + modulus_len, + result.as_mut_ptr(), + &mut hints, + ); + } + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/secp256k1.rs b/ziskos-hints/src/handlers/secp256k1.rs new file mode 100644 index 000000000..e0ca2321d --- /dev/null +++ b/ziskos-hints/src/handlers/secp256k1.rs @@ -0,0 +1,57 @@ +use crate::handlers::validate_hint_length; +use crate::hint_fields; +use crate::zisklib; + +use anyhow::Result; + +/// Processes an `HINT_SECP256K1_ECDSA_ADDRESS_RECOVER` hint. +#[inline] +pub fn secp256k1_ecdsa_address_recover(data: &[u64]) -> Result> { + hint_fields![SIG: 8, RECID: 1, MSG: 4]; + + validate_hint_length(data, EXPECTED_LEN, "HINT_SECP256K1_ECDSA_ADDRESS_RECOVER")?; + + let sig: &[u64; SIG_SIZE] = data[SIG_OFFSET..SIG_OFFSET + SIG_SIZE].try_into().unwrap(); + let recid: u8 = data[RECID_OFFSET] as u8; + let msg: &[u64; MSG_SIZE] = data[MSG_OFFSET..MSG_OFFSET + MSG_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 32] = &mut [0u8; 32]; + unsafe { + zisklib::secp256k1_ecdsa_address_recover_c( + sig.as_ptr() as *const u8, + recid, + msg.as_ptr() as *const u8, + result.as_mut_ptr(), + &mut hints, + ); + } + + Ok(hints) +} + +/// Processes an `HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER` hint. +#[inline] +pub fn secp256k1_ecdsa_verify_address_recover(data: &[u64]) -> Result> { + hint_fields![SIG: 8, MSG: 4, PK: 8]; + + validate_hint_length(data, EXPECTED_LEN, "HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER")?; + + let sig: &[u64; SIG_SIZE] = data[SIG_OFFSET..SIG_OFFSET + SIG_SIZE].try_into().unwrap(); + let msg: &[u64; MSG_SIZE] = data[MSG_OFFSET..MSG_OFFSET + MSG_SIZE].try_into().unwrap(); + let pk: &[u64; PK_SIZE] = data[PK_OFFSET..PK_OFFSET + PK_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + let result: &mut [u8; 32] = &mut [0u8; 32]; + unsafe { + zisklib::secp256k1_ecdsa_verify_and_address_recover_c( + sig.as_ptr() as *const u8, + msg.as_ptr() as *const u8, + pk.as_ptr() as *const u8, + result.as_mut_ptr(), + &mut hints, + ); + } + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/secp256r1.rs b/ziskos-hints/src/handlers/secp256r1.rs new file mode 100644 index 000000000..b2973a0b3 --- /dev/null +++ b/ziskos-hints/src/handlers/secp256r1.rs @@ -0,0 +1,29 @@ +use crate::handlers::validate_hint_length; +use crate::hint_fields; +use crate::zisklib; + +use anyhow::Result; + +/// Processes an `HINT_SECP256R1_ECDSA_VERIFY` hint. +#[inline] +pub fn secp256r1_ecdsa_verify_hint(data: &[u64]) -> Result> { + hint_fields![MSG: 4, SIG: 8, PK: 8]; + + validate_hint_length(data, EXPECTED_LEN, "HINT_SECP256R1_ECDSA_VERIFY")?; + + let msg: &[u64; MSG_SIZE] = data[MSG_OFFSET..MSG_OFFSET + MSG_SIZE].try_into().unwrap(); + let sig: &[u64; SIG_SIZE] = data[SIG_OFFSET..SIG_OFFSET + SIG_SIZE].try_into().unwrap(); + let pk: &[u64; PK_SIZE] = data[PK_OFFSET..PK_OFFSET + PK_SIZE].try_into().unwrap(); + + let mut hints = Vec::new(); + unsafe { + zisklib::secp256r1_ecdsa_verify_c( + msg.as_ptr() as *const u8, + sig.as_ptr() as *const u8, + pk.as_ptr() as *const u8, + &mut hints, + ); + } + + Ok(hints) +} diff --git a/ziskos-hints/src/handlers/sha256.rs b/ziskos-hints/src/handlers/sha256.rs new file mode 100644 index 000000000..1971563ea --- /dev/null +++ b/ziskos-hints/src/handlers/sha256.rs @@ -0,0 +1,25 @@ +use crate::zisklib; + +use anyhow::Result; + +/// Processes an `HINT_SHA256` hint. +#[inline] +pub fn sha256_hint(data: &[u64], data_len_bytes: usize) -> Result> { + let data_len_words = data_len_bytes.div_ceil(8); + + if data.len() != data_len_words { + anyhow::bail!( + "HINT_SHA256: expected data length of {} bytes ({} words), got {} words", + data_len_bytes, + data_len_words, + data.len() + ); + } + + let bytes = unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, data_len_bytes) }; + + let mut hints = Vec::new(); + zisklib::sha256(bytes, &mut hints); + + Ok(hints) +} diff --git a/ziskos-hints/src/lib.rs b/ziskos-hints/src/lib.rs new file mode 100644 index 000000000..a3c64b23e --- /dev/null +++ b/ziskos-hints/src/lib.rs @@ -0,0 +1,14 @@ +//! ziskos-hints: ziskos compiled with hints feature enabled +//! +//! This crate compiles the symlinked core/ (which points to ziskos/entrypoint/src) +//! with the hints feature enabled, and adds hints-specific processing utilities. + +// Include the symlinked source as a module +#[path = "core/lib.rs"] +mod core; + +// Re-export everything from the symlinked implementation +pub use core::*; + +// Add hints-specific modules that only exist in ziskos-hints +pub mod handlers; diff --git a/ziskos/entrypoint/Cargo.toml b/ziskos/entrypoint/Cargo.toml index 5974a5968..d972133f7 100644 --- a/ziskos/entrypoint/Cargo.toml +++ b/ziskos/entrypoint/Cargo.toml @@ -14,11 +14,52 @@ num-bigint = { workspace = true } num-integer = { workspace = true } num-traits = { workspace = true } +precompiles-helpers = { workspace = true } + lazy_static = "1.5.0" -static_assertions = "1.1" rand = "0.8.5" getrandom = { version = "0.2", features = ["custom"] } cfg-if = "1.0" tiny-keccak = { version = "2.0.0", features = ["keccak"] } serde = { workspace = true, features = ["derive"] } -bincode = "2.0" \ No newline at end of file + +zisk-verifier = { workspace = true } +bincode = { workspace = true } +zisk-definitions = { path = "../../definitions" } +critical-section = { version = "1.2.0", optional = true } +embedded-alloc = { version = "0.6.0", optional = true } +talc = { version = "4.4.3", features = ["lock_api"], optional = true } +dlmalloc = { version = "0.2", default-features = false, optional = true } + +sha2 = { workspace = true } +fields = { git = "https://github.com/0xPolygonHermez/pil2-proofman.git", branch = "pre-develop-0.16.0", features = [ + "verify", +] } + +[target.'cfg(all(not(all(target_os = "zkvm", target_vendor = "zisk")), any(zisk_hints, zisk_hints_debug)))'.dependencies] +bytes = { version = "1.11.0", optional = true } +once_cell = { version = "1.21.3", optional = true } +paste = { version = "1.0", optional = true } +zisk-common = { path = "../../common", optional = true } +anyhow = { workspace = true, optional = true } +tokio = { workspace = true, optional = true} + +[target.'cfg(all(not(all(target_os = "zkvm", target_vendor = "zisk")), zisk_hints_metrics))'.dependencies] +ctor = { version = "0.2", optional = true } + +[features] +default = ["user-hints", "inputcpy"] +user-hints = ["dep:zisk-common", "dep:bytes", "dep:paste", "dep:once_cell", "dep:ctor", "dep:anyhow", "dep:tokio"] +inputcpy = [] +zisk-custom-alloc = [] +zisk-embedded-alloc = ["zisk-embedded-dlmalloc-alloc"] +zisk-embedded-dlmalloc-alloc = ["dep:embedded-alloc", "dep:critical-section", "dep:dlmalloc"] +zisk-embedded-talc-alloc = ["dep:embedded-alloc", "dep:critical-section", "dep:talc"] +zisk-embedded-tlfs-alloc = ["dep:embedded-alloc", "dep:critical-section"] + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ +'cfg(zisk_hints)', +'cfg(zisk_hints_metrics)', +'cfg(zisk_hints_debug)', +'cfg(zisk_hints_single_thread)'] } diff --git a/ziskos/entrypoint/src/alloc/alloc.rs b/ziskos/entrypoint/src/alloc/alloc.rs new file mode 100644 index 000000000..0fec67ade --- /dev/null +++ b/ziskos/entrypoint/src/alloc/alloc.rs @@ -0,0 +1,58 @@ +static mut HEAP_POS: usize = 0; +static mut HEAP_TOP: usize = 0; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +#[no_mangle] +#[warn(dead_code)] +pub unsafe extern "C" fn init_sys_alloc() { + extern "C" { + static _kernel_heap_bottom: u8; + static _kernel_heap_top: u8; + } + + unsafe { + HEAP_POS = &_kernel_heap_bottom as *const u8 as usize; + HEAP_TOP = &_kernel_heap_top as *const u8 as usize; + }; +} + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +#[no_mangle] +#[inline(never)] +pub unsafe extern "C" fn sys_alloc_aligned(bytes: usize, align: usize) -> *mut u8 { + // SAFETY: Single threaded, so nothing else can touch this while we're working. + let mut heap_pos = unsafe { HEAP_POS }; + + let offset = heap_pos & (align - 1); + if offset != 0 { + heap_pos += align - offset; + } + + let ptr = heap_pos as *mut u8; + heap_pos += bytes; + + // Check to make sure heap doesn't collide with SYSTEM memory. + if HEAP_TOP < heap_pos { + panic!("OOM limit of heap with bump allocator"); + } + + unsafe { HEAP_POS = heap_pos }; + + ptr +} + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use std::ptr; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +#[no_mangle] +static mut SINK: u64 = 0; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +#[no_mangle] +#[inline(never)] +pub unsafe extern "C" fn sys_alloc_log(op: u64, ptr: *mut u8, bytes: usize, align: usize) { + unsafe { + ptr::write_volatile(&raw mut SINK, bytes as u64 + op + (ptr as u64 & 0x02) + align as u64); + } +} diff --git a/ziskos/entrypoint/src/alloc/bump.rs b/ziskos/entrypoint/src/alloc/bump.rs new file mode 100644 index 000000000..0a040fb69 --- /dev/null +++ b/ziskos/entrypoint/src/alloc/bump.rs @@ -0,0 +1,39 @@ +use core::alloc::{GlobalAlloc, Layout}; + +use crate::alloc::{sys_alloc_aligned, sys_alloc_log}; +use crate::ziskos_memcpy; + +#[global_allocator] +pub static HEAP: BumpPointerAlloc = BumpPointerAlloc; + +pub struct BumpPointerAlloc; + +unsafe impl GlobalAlloc for BumpPointerAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + sys_alloc_aligned(layout.size(), layout.align()) + // let ptr = sys_alloc_aligned(layout.size(), layout.align()); + // sys_alloc_log(0, ptr, layout.size(), layout.align()); + // ptr + } + + unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) { + // sys_alloc_log(1, ptr, layout.size(), layout.align()) + // this allocator never deallocates memory + } + + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + self.alloc(layout) + } + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + let new_layout = Layout::from_size_align_unchecked(new_size, layout.align()); + + let new_ptr = self.alloc(new_layout); + + if !new_ptr.is_null() { + let copy_size = layout.size().min(new_size); + ziskos_memcpy!(ptr: new_ptr, ptr, copy_size); + } + + new_ptr + } +} diff --git a/ziskos/entrypoint/src/alloc/embedded_buddy.rs b/ziskos/entrypoint/src/alloc/embedded_buddy.rs new file mode 100644 index 000000000..a0fe557a3 --- /dev/null +++ b/ziskos/entrypoint/src/alloc/embedded_buddy.rs @@ -0,0 +1,38 @@ +use core::alloc::{GlobalAlloc, Layout}; +use critical_section::RawRestoreState; +use embedded_alloc::TlsfHeap as Heap; + +use super::kernel_heap::*; + +#[global_allocator] +static HEAP: EmbeddedAlloc = EmbeddedAlloc; + +static INNER_HEAP: Heap = Heap::empty(); + +struct CriticalSection; +critical_section::set_impl!(CriticalSection); + +unsafe impl critical_section::Impl for CriticalSection { + unsafe fn acquire() -> RawRestoreState {} + unsafe fn release(_token: RawRestoreState) {} +} + +pub fn init() { + unsafe { + let heap_start = &_kernel_heap_bottom as *const u8 as usize; + let heap_size = &_kernel_heap_size as *const u8 as usize; + INNER_HEAP.init(heap_start, heap_size); + } +} + +struct EmbeddedAlloc; + +unsafe impl GlobalAlloc for EmbeddedAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + INNER_HEAP.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + INNER_HEAP.dealloc(ptr, layout) + } +} diff --git a/ziskos/entrypoint/src/alloc/embedded_dlmalloc.rs b/ziskos/entrypoint/src/alloc/embedded_dlmalloc.rs new file mode 100644 index 000000000..32a093354 --- /dev/null +++ b/ziskos/entrypoint/src/alloc/embedded_dlmalloc.rs @@ -0,0 +1,86 @@ +use crate::ziskos::sys_write; +use core::alloc::{GlobalAlloc, Layout}; +use core::ptr::addr_of_mut; +use dlmalloc::{Allocator as DlAllocator, Dlmalloc}; + +use super::kernel_heap::*; + +// Implementar el backend que le da memoria a dlmalloc +struct ZiskSystem; + +unsafe impl DlAllocator for ZiskSystem { + // Equivalente a sbrk — dlmalloc pide más memoria aquí + fn alloc(&self, size: usize) -> (*mut u8, usize, u32) { + unsafe { + // Devuelves un bloque de tu heap reservado + let ptr = BUMP_PTR; + let aligned = (ptr + 7) & !7; + BUMP_PTR = aligned + size; + if BUMP_PTR > BUMP_END { + return (core::ptr::null_mut(), 0, 0); + } + (aligned as *mut u8, size, 0) + } + } + + fn remap(&self, _ptr: *mut u8, _oldsize: usize, _newsize: usize, _can_move: bool) -> *mut u8 { + core::ptr::null_mut() // no soportado + } + + fn free_part(&self, _ptr: *mut u8, _oldsize: usize, _newsize: usize) -> bool { + false // no soportado + } + + fn free(&self, _ptr: *mut u8, _size: usize) -> bool { + false // devolver memoria al sistema — no necesario + } + + fn can_release_part(&self, _flags: u32) -> bool { + false + } + + fn allocates_zeros(&self) -> bool { + false + } + + fn page_size(&self) -> usize { + 4096 + } +} + +static mut BUMP_PTR: usize = 0; +static mut BUMP_END: usize = 0; + +static mut DLMALLOC: Dlmalloc = Dlmalloc::new_with_allocator(ZiskSystem); + +struct Allocator; + +#[global_allocator] +static GLOBAL: Allocator = Allocator; + +unsafe impl core::alloc::GlobalAlloc for Allocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + (*addr_of_mut!(DLMALLOC)).malloc(layout.size(), layout.align()) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + (*addr_of_mut!(DLMALLOC)).free(ptr, layout.size(), layout.align()) + } + + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + (*addr_of_mut!(DLMALLOC)).calloc(layout.size(), layout.align()) + } + + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + (*addr_of_mut!(DLMALLOC)).realloc(ptr, layout.size(), layout.align(), new_size) + } +} + +pub fn init() { + unsafe { + let heap_start = &_kernel_heap_bottom as *const u8 as usize; + let heap_end = &_kernel_heap_top as *const u8 as usize; + BUMP_PTR = heap_start; + BUMP_END = heap_end; + } +} diff --git a/ziskos/entrypoint/src/alloc/embedded_lla.rs b/ziskos/entrypoint/src/alloc/embedded_lla.rs new file mode 100644 index 000000000..7427a3a7c --- /dev/null +++ b/ziskos/entrypoint/src/alloc/embedded_lla.rs @@ -0,0 +1,35 @@ +use core::alloc::{GlobalAlloc, Layout}; +use core::ptr::addr_of_mut; +use linked_list_allocator::Heap; + +use super::kernel_heap::*; + +static mut HEAP: Heap = Heap::empty(); + +struct Allocator; + +#[global_allocator] +static GLOBAL: Allocator = Allocator; + +unsafe impl GlobalAlloc for Allocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + (*addr_of_mut!(HEAP)) + .allocate_first_fit(layout) + .map(|p| p.as_ptr()) + .unwrap_or(core::ptr::null_mut()) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + if let Some(nn) = core::ptr::NonNull::new(ptr) { + (*addr_of_mut!(HEAP)).deallocate(nn, layout); + } + } +} + +pub fn init() { + unsafe { + let heap_start = &_kernel_heap_bottom as *const u8 as usize; + let heap_size = &_kernel_heap_size as *const u8 as usize; + (*addr_of_mut!(HEAP)).init(heap_start as *mut u8, heap_size); + } +} diff --git a/ziskos/entrypoint/src/alloc/embedded_llff.rs b/ziskos/entrypoint/src/alloc/embedded_llff.rs new file mode 100644 index 000000000..efa73b612 --- /dev/null +++ b/ziskos/entrypoint/src/alloc/embedded_llff.rs @@ -0,0 +1,22 @@ +use core::alloc::{GlobalAlloc, Layout}; +use critical_section::RawRestoreState; +use embedded_alloc::LlffHeap as Heap; + +#[global_allocator] +static HEAP: Heap = Heap::empty(); + +struct CriticalSection; +critical_section::set_impl!(CriticalSection); + +unsafe impl critical_section::Impl for CriticalSection { + unsafe fn acquire() -> RawRestoreState {} + unsafe fn release(_token: RawRestoreState) {} +} + +pub fn init() { + unsafe { + let heap_start = &_kernel_heap_bottom as *const u8 as usize; + let heap_size = &_kernel_heap_size as *const u8 as usize; + HEAP.init(heap_start, heap_size) + } +} diff --git a/ziskos/entrypoint/src/alloc/embedded_talc.rs b/ziskos/entrypoint/src/alloc/embedded_talc.rs new file mode 100644 index 000000000..b8bea9f9a --- /dev/null +++ b/ziskos/entrypoint/src/alloc/embedded_talc.rs @@ -0,0 +1,15 @@ +use super::kernel_heap::*; +use core::alloc::{GlobalAlloc, Layout}; +use talc::{ErrOnOom, Talc, Talck}; + +#[global_allocator] +static HEAP: Talck = Talc::new(ErrOnOom).lock(); + +pub fn init() { + unsafe { + let heap_start = &_kernel_heap_bottom as *const u8 as usize; + let heap_size = &_kernel_heap_size as *const u8 as usize; + let heap_span = talc::Span::from_base_size(heap_start as *mut u8, heap_size); + HEAP.lock().claim(heap_span).unwrap(); + } +} diff --git a/ziskos/entrypoint/src/alloc/embedded_tlfs.rs b/ziskos/entrypoint/src/alloc/embedded_tlfs.rs new file mode 100644 index 000000000..a0fe557a3 --- /dev/null +++ b/ziskos/entrypoint/src/alloc/embedded_tlfs.rs @@ -0,0 +1,38 @@ +use core::alloc::{GlobalAlloc, Layout}; +use critical_section::RawRestoreState; +use embedded_alloc::TlsfHeap as Heap; + +use super::kernel_heap::*; + +#[global_allocator] +static HEAP: EmbeddedAlloc = EmbeddedAlloc; + +static INNER_HEAP: Heap = Heap::empty(); + +struct CriticalSection; +critical_section::set_impl!(CriticalSection); + +unsafe impl critical_section::Impl for CriticalSection { + unsafe fn acquire() -> RawRestoreState {} + unsafe fn release(_token: RawRestoreState) {} +} + +pub fn init() { + unsafe { + let heap_start = &_kernel_heap_bottom as *const u8 as usize; + let heap_size = &_kernel_heap_size as *const u8 as usize; + INNER_HEAP.init(heap_start, heap_size); + } +} + +struct EmbeddedAlloc; + +unsafe impl GlobalAlloc for EmbeddedAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + INNER_HEAP.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + INNER_HEAP.dealloc(ptr, layout) + } +} diff --git a/ziskos/entrypoint/src/alloc/kernel_heap.rs b/ziskos/entrypoint/src/alloc/kernel_heap.rs new file mode 100644 index 000000000..abee0a76a --- /dev/null +++ b/ziskos/entrypoint/src/alloc/kernel_heap.rs @@ -0,0 +1,7 @@ +//! Kernel heap symbols defined in the linker script + +extern "C" { + pub static _kernel_heap_bottom: u8; + pub static _kernel_heap_size: u8; + pub static _kernel_heap_top: u8; +} diff --git a/ziskos/entrypoint/src/alloc/mod.rs b/ziskos/entrypoint/src/alloc/mod.rs new file mode 100644 index 000000000..25743448f --- /dev/null +++ b/ziskos/entrypoint/src/alloc/mod.rs @@ -0,0 +1,37 @@ +mod alloc; +pub use alloc::*; + +mod kernel_heap; + +#[cfg(all( + not(feature = "zisk-embedded-alloc"), + not(feature = "zisk-custom-alloc"), + not(feature = "zisk-embedded-dlmalloc-alloc"), + not(feature = "zisk-embedded-talc-alloc"), + not(feature = "zisk-embedded-tlfs-alloc") +))] +pub mod bump; + +#[cfg(any(feature = "zisk-embedded-alloc", feature = "zisk-embedded-dlmalloc-alloc"))] +pub mod embedded_dlmalloc; + +#[cfg(feature = "zisk-embedded-talc-alloc")] +pub mod embedded_talc; + +#[cfg(feature = "zisk-embedded-tlfs-alloc")] +pub mod embedded_tlfs; + +#[cfg(any(feature = "zisk-embedded-alloc", feature = "zisk-embedded-dlmalloc-alloc"))] +pub use embedded_dlmalloc as embedded; + +#[cfg(feature = "zisk-embedded-talc-alloc")] +pub use embedded_talc as embedded; + +#[cfg(feature = "zisk-embedded-tlfs-alloc")] +pub use embedded_tlfs as embedded; + +// disabled, worse performance +// pub mod embedded_lla; +// pub mod embedded_llff; +// pub use embedded_llff as embedded; +// pub use embedded_lla as embedded; diff --git a/ziskos/entrypoint/src/dma.rs b/ziskos/entrypoint/src/dma.rs new file mode 100644 index 000000000..2df391440 --- /dev/null +++ b/ziskos/entrypoint/src/dma.rs @@ -0,0 +1,220 @@ +/// Copies free-input data, as fcall result, directly to a memory location. +/// +/// This macro writes free-input data to the specified pointer using +/// custom CSR instructions. The memory does not need to be initialized. +/// +/// # Arguments +/// * `$dest` - Mutable reference to the destination (array, slice, or MaybeUninit) +/// * `$size` - Size in bytes (must be a const literal) +/// +/// # Safety +/// The caller must ensure the destination is valid and properly aligned. + +#[macro_export] +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +macro_rules! ziskos_inputcpy { + ($dest:expr, $size:literal) => {{ + unsafe { + core::arch::asm!( + "csrs {port}, {ptr}", + "addi x0, {ptr}, {size}", + port = const zisk_definitions::SYSCALL_DMA_INPUTCPY_ID, + size = const $size, + ptr = in(reg) $dest.as_mut_ptr(), + options(nostack, preserves_flags), + ); + } + }}; + ($dest:expr, $size:expr) => {{ + unsafe { + core::arch::asm!( + "csrs {port}, {ptr}", + "add x0, {ptr}, {size}", + port = const zisk_definitions::SYSCALL_DMA_INPUTCPY_ID, + size = in(reg) $size, + ptr = in(reg) $dest.as_mut_ptr(), + options(nostack, preserves_flags), + ); + } + }}; +} + +/// Copies memory from source to destination using DMA operations. +/// +/// This macro performs a memory copy operation using custom CSR instructions +/// for optimized performance in the zkVM environment. +/// +/// # Arguments +/// * `$dst` - Mutable reference to the destination (array, slice, or MaybeUninit) +/// * `$src` - Reference to the source (array or slice) +/// * `$size` - Size in bytes (can be a literal or expression) +/// +/// # Safety +/// The caller must ensure both source and destination are valid and properly aligned, +/// and that they do not overlap in memory. + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +#[macro_export] +macro_rules! ziskos_memcpy { + ($dst:expr, $src: expr, $size:literal) => {{ + unsafe { + core::arch::asm!( + "csrs {port}, {src}", + "addi x0, {dst}, {size}", + port = const zisk_definitions::SYSCALL_DMA_MEMCPY_ID, + size = const $size, + dst = in(reg) $dst.as_mut_ptr(), + src = in(reg) $src.as_ptr(), + options(nostack, preserves_flags), + ); + } + }}; + ($dst:expr, $src: expr, $size:expr) => {{ + unsafe { + core::arch::asm!( + "csrs {port}, {src}", + "add x0, {dst}, {size}", + port = const zisk_definitions::SYSCALL_DMA_MEMCPY_ID, + size = in(reg) $size, + dst = in(reg) $dst.as_mut_ptr(), + src = in(reg) $src.as_ptr(), + options(nostack, preserves_flags), + ); + } + }}; + (ptr: $dst:expr, $src:expr, $size:expr) => {{ + unsafe { + core::arch::asm!( + "csrs {port}, {src}", + "add x0, {dst}, {size}", + port = const zisk_definitions::SYSCALL_DMA_MEMCPY_ID, + size = in(reg) $size, + dst = in(reg) $dst, // ya es *mut u8, sin as_mut_ptr() + src = in(reg) $src, // ya es *mut u8, sin as_ptr() + options(nostack, preserves_flags), + ); + } + }}; +} + +/// Compares two memory regions for equality using DMA operations. +/// +/// This macro performs a memory comparison operation using custom CSR instructions +/// for optimized performance in the zkVM environment. The result is stored in a register. +/// +/// # Arguments +/// * `$dst` - Mutable reference to the first memory region (array or slice) +/// * `$src` - Reference to the second memory region (array or slice) +/// * `$size` - Size in bytes to compare (can be a literal or expression) +/// +/// # Safety +/// The caller must ensure both memory regions are valid and properly aligned. + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +#[macro_export] +macro_rules! ziskos_memcmp { + ($dst:expr, $src: expr, $size:literal) => {{ + let v: i64; + unsafe { + core::arch::asm!( + "csrs {port}, {src}", + "addi {res}, {dst}, {size}", + port = const zisk_definitions::SYSCALL_DMA_MEMCMP_ID, + size = const $size, + dst = in(reg) $dst.as_ptr(), + src = in(reg) $src.as_ptr(), + res = out(reg) v, + options(nostack, preserves_flags), + ); + } + v + }}; + ($dst:expr, $src: expr, $size:expr) => {{ + let v: i64; + unsafe { + core::arch::asm!( + "csrs {port}, {src}", + "add {res}, {dst}, {size}", + port = const zisk_definitions::SYSCALL_DMA_MEMCMP_ID, + size = in(reg) $size, + dst = in(reg) $dst.as_ptr(), + src = in(reg) $src.as_ptr(), + res = out(reg) v, + options(nostack, preserves_flags), + ); + } + v + }}; +} + +/// Fills a memory region with a constant byte value using DMA operations. +/// +/// This macro performs a memory set operation using custom CSR instructions +/// for optimized performance in the zkVM environment. +/// +/// # Arguments +/// * `$dst` - Mutable reference to the destination memory (array, slice, or MaybeUninit) +/// * `$value` - Byte value to fill (can be a literal or expression) +/// * `$size` - Size in bytes (can be a literal or expression) +/// +/// # Safety +/// The caller must ensure the destination is valid and properly aligned. + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +#[macro_export] +macro_rules! ziskos_memset { + ($dst:expr, $value: literal, $size:literal) => {{ + unsafe { + core::arch::asm!( + "csrsi {port}, 2", + "addi x0, {dst}, {size}", + "addi x0, {dst}, {value}", + port = const zisk_definitions::SYSCALL_DMA_MEMSET_ID, + size = const $size, + value = const $value, + dst = in(reg) $dst.as_mut_ptr(), + options(nostack, preserves_flags), + ); + } + }}; + ($dst:expr, $value: literal, $size:expr) => {{ + unsafe { + core::arch::asm!( + "csrs {port}, {dst}", + "addi x0, {size}, {value}", + port = const zisk_definitions::SYSCALL_DMA_MEMSET_ID, + size = in(reg) $size, + value = const $value, + dst = in(reg) $dst.as_mut_ptr(), + options(nostack, preserves_flags), + ); + } + }}; + (ptr: $dst:expr, $value: literal, $size:expr) => {{ + unsafe { + core::arch::asm!( + "csrs {port}, {dst}", + "addi x0, {size}, {value}", + port = const zisk_definitions::SYSCALL_DMA_MEMSET_ID, + size = in(reg) $size, + value = const $value, + dst = in(reg) $dst, + options(nostack, preserves_flags), + ); + } + }}; + ($dst:expr, $value: expr, $size:expr) => {{ + unsafe { + core::arch::asm!( + "call memset", + in("a0") $dst.as_mut_ptr(), + in("a1") $value, + in("a2") $size, + lateout("t0") _, + lateout("a1") _, + lateout("ra") _, + options(nostack, preserves_flags), + ); + } + }}; +} diff --git a/ziskos/entrypoint/src/dma/inputcpy.s b/ziskos/entrypoint/src/dma/inputcpy.s new file mode 100644 index 000000000..cd2fddd03 --- /dev/null +++ b/ziskos/entrypoint/src/dma/inputcpy.s @@ -0,0 +1,14 @@ + .section ".note.GNU-stack","",@progbits + .text + .attribute 4, 16 + .attribute 5, "rv64im" + .globl inputcpy + .p2align 4 + .type inputcpy,@function +inputcpy: + csrs 0x815, a1 # Marker: Write count (a2) to CSR 0x813 + add x0,a0,a2 + ret + + .size inputcpy, .-inputcpy + .section .text.hot,"ax",@progbits \ No newline at end of file diff --git a/ziskos/entrypoint/src/dma/memcmp.s b/ziskos/entrypoint/src/dma/memcmp.s new file mode 100644 index 000000000..2e6835d69 --- /dev/null +++ b/ziskos/entrypoint/src/dma/memcmp.s @@ -0,0 +1,14 @@ + .section ".note.GNU-stack","",@progbits + .text + .attribute 4, 16 + .attribute 5, "rv64im" + .globl memcmp + .p2align 4 + .type memcmp,@function +memcmp: + csrs 0x814, a1 # Marker: Write count (a2) to CSR 0x814 + add a0,a0,a2 + ret + + .size memcmp, .-memcmp + .section .text.hot,"ax",@progbits \ No newline at end of file diff --git a/ziskos/entrypoint/src/dma/memcpy.s b/ziskos/entrypoint/src/dma/memcpy.s new file mode 100644 index 000000000..a087f7d11 --- /dev/null +++ b/ziskos/entrypoint/src/dma/memcpy.s @@ -0,0 +1,14 @@ + .section ".note.GNU-stack","",@progbits + .text + .attribute 4, 16 + .attribute 5, "rv64im" + .globl memcpy + .p2align 4 + .type memcpy,@function +memcpy: + csrs 0x813, a1 # Marker: Write count (a2) to CSR 0x813 + add x0,a0,a2 + ret + + .size memcpy, .-memcpy + .section .text.hot,"ax",@progbits \ No newline at end of file diff --git a/ziskos/entrypoint/src/dma/memmove.s b/ziskos/entrypoint/src/dma/memmove.s new file mode 100644 index 000000000..af3655cac --- /dev/null +++ b/ziskos/entrypoint/src/dma/memmove.s @@ -0,0 +1,13 @@ + .section ".note.GNU-stack","",@progbits + .text + .attribute 4, 16 + .attribute 5, "rv64im" + .globl memmove + .p2align 4 + .type memmove,@function +memmove: + csrs 0x813, a1 # Marker: Write count (a2) to CSR 0x813 + add x0,a0,a2 + ret + .size memmove, .-memmove + .section .text.hot,"ax",@progbits \ No newline at end of file diff --git a/ziskos/entrypoint/src/dma/memset.s b/ziskos/entrypoint/src/dma/memset.s new file mode 100644 index 000000000..fc70badeb --- /dev/null +++ b/ziskos/entrypoint/src/dma/memset.s @@ -0,0 +1,57 @@ + .section ".note.GNU-stack","",@progbits + .text + .attribute 4, 16 + .attribute 5, "rv64im" + .globl memset + .p2align 4 + .type memset,@function +# memset(void *s, int c, size_t n) +# a0 = destination pointer (s) +# a1 = fill value (c) +# a2 = byte count (n) +memset: + bnez a1, .L_memset_non_zero + csrs 0x816, a0 + addi x0, a2, 0 + ret + +.L_memset_non_zero: + andi a1, a1, 0xff # Mask to byte value + slli a1, a1, 4 + la t0, .Ljump_table + add t0, t0, a1 + jr t0 + + # csrs 0x816, a1; addi x0, a0, fill_byte; ret; nop + # memset(a0, fill_byte, a2) + + # Jump table: 256 entries, each 16 bytes (4 instructions) + .p2align 4 +.Ljump_table: +.irp val, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr +.irp val, 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr +.irp val, 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr +.irp val, 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr +.irp val, 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr +.irp val, 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr +.irp val, 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr +.irp val, 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 + csrs 0x816, a0; addi x0, a2, \val; ret; nop +.endr + + .size memset, .-memset + .section .text.hot,"ax",@progbits \ No newline at end of file diff --git a/ziskos/entrypoint/src/hints/blake2b.rs b/ziskos/entrypoint/src/hints/blake2b.rs new file mode 100644 index 000000000..30fdb07d9 --- /dev/null +++ b/ziskos/entrypoint/src/hints/blake2b.rs @@ -0,0 +1,9 @@ +#[no_mangle] +pub unsafe extern "C" fn hint_blake2b_compress( + _rounds: u32, + _state: *mut u64, + _message: *const u64, + _offset: *const u64, + _final_block: u8, +) { +} diff --git a/ziskos/entrypoint/src/hints/bls12_381.rs b/ziskos/entrypoint/src/hints/bls12_381.rs new file mode 100644 index 000000000..d224ec80a --- /dev/null +++ b/ziskos/entrypoint/src/hints/bls12_381.rs @@ -0,0 +1,62 @@ +use crate::hints::macros::{define_hint, define_hint_pairs}; +use zisk_common::{ + HINT_BLS12_381_FP2_TO_G2, HINT_BLS12_381_FP_TO_G1, HINT_BLS12_381_G1_ADD, + HINT_BLS12_381_G1_MSM, HINT_BLS12_381_G2_ADD, HINT_BLS12_381_G2_MSM, + HINT_BLS12_381_PAIRING_CHECK, +}; + +define_hint! { + bls12_381_g1_add => { + hint_id: HINT_BLS12_381_G1_ADD, + params: (a: 96, b: 96), + is_result: false, + } +} + +define_hint_pairs! { + bls12_381_g1_msm => { + hint_id: HINT_BLS12_381_G1_MSM, + pair_len: 96 + 32, + is_result: false, + } +} + +define_hint! { + bls12_381_g2_add => { + hint_id: HINT_BLS12_381_G2_ADD, + params: (a: 192, b: 192), + is_result: false, + } +} + +define_hint_pairs! { + bls12_381_g2_msm => { + hint_id: HINT_BLS12_381_G2_MSM, + pair_len: 192 + 32, + is_result: false, + } +} + +define_hint_pairs! { + bls12_381_pairing_check => { + hint_id: HINT_BLS12_381_PAIRING_CHECK, + pair_len: 96 + 192, + is_result: false, + } +} + +define_hint! { + bls12_381_fp_to_g1 => { + hint_id: HINT_BLS12_381_FP_TO_G1, + params: (fp: 48), + is_result: false, + } +} + +define_hint! { + bls12_381_fp2_to_g2 => { + hint_id: HINT_BLS12_381_FP2_TO_G2, + params: (fp2: 96), + is_result: false, + } +} diff --git a/ziskos/entrypoint/src/hints/bn254.rs b/ziskos/entrypoint/src/hints/bn254.rs new file mode 100644 index 000000000..f495248de --- /dev/null +++ b/ziskos/entrypoint/src/hints/bn254.rs @@ -0,0 +1,26 @@ +use crate::hints::macros::{define_hint, define_hint_pairs}; +use zisk_common::{HINT_BN254_G1_ADD, HINT_BN254_G1_MUL, HINT_BN254_PAIRING_CHECK}; + +define_hint! { + bn254_g1_add => { + hint_id: HINT_BN254_G1_ADD, + params: (p1: 64, p2: 64), + is_result: false, + } +} + +define_hint! { + bn254_g1_mul => { + hint_id: HINT_BN254_G1_MUL, + params: (point: 64, scalar: 32), + is_result: false, + } +} + +define_hint_pairs! { + bn254_pairing_check => { + hint_id: HINT_BN254_PAIRING_CHECK, + pair_len: 64 + 128, + is_result: false, + } +} diff --git a/ziskos/entrypoint/src/hints/custom.rs b/ziskos/entrypoint/src/hints/custom.rs new file mode 100644 index 000000000..01e1f6032 --- /dev/null +++ b/ziskos/entrypoint/src/hints/custom.rs @@ -0,0 +1,30 @@ +use crate::hints::HINT_BUFFER; + +#[no_mangle] +pub unsafe extern "C" fn hint_custom( + hint_id: u32, + data_ptr: *const u8, + data_len: usize, + is_result: u8, +) { + if !HINT_BUFFER.is_enabled() { + return; + } + + #[cfg(zisk_hints_single_thread)] + if !crate::hints::check_main_thread() { + return; + } + + let mut w = HINT_BUFFER.begin_hint(hint_id, data_len, is_result != 0); + + w.write_data_ptr(data_ptr, data_len); + + let pad = (8 - (data_len & 7)) & 7; + if pad > 0 { + const ZERO_PAD: [u8; 8] = [0; 8]; + w.write_data_slice(&ZERO_PAD[..pad]); + } + + w.commit(); +} diff --git a/ziskos/entrypoint/src/hints/hint_buffer.rs b/ziskos/entrypoint/src/hints/hint_buffer.rs new file mode 100644 index 000000000..c52fc8934 --- /dev/null +++ b/ziskos/entrypoint/src/hints/hint_buffer.rs @@ -0,0 +1,303 @@ +use bytes::{Bytes, BytesMut}; +use std::io::{self, Write}; +use std::sync::{Arc, Condvar, Mutex, MutexGuard}; +use zisk_common::{CTRL_END, CTRL_START, HINT_INPUT}; + +pub const DEFAULT_BUFFER_LEN: usize = 1 << 20; // 1 MiB + // TODO: Set MAX_WRITE_LEN based on writer type (file or socket) +pub const MAX_WRITER_LEN: usize = 128 * 1024; // 128KB is the max write size for Unix sockets +pub const WRITE_BUFFER_FLUSH_LEN: usize = 64 * 1024; // Flush writer buffer once it exceeds 64KB +const MAX_INPUT_DATA_CHUNK: usize = 128 * 1024 - 8; // Max input data chunk size is 128KB minus 8 bytes for the header (length) +pub const HEADER_LEN: usize = 8; + +pub struct HintBuffer { + precompiles: Mutex, + input_data: Mutex, + not_empty: Condvar, + closed: Mutex, + paused: Mutex, +} + +struct HintBufferInner { + buf: BytesMut, + commit_pos: usize, +} + +pub struct WriteBuffer<'a> { + hb: &'a HintBuffer, + g: MutexGuard<'a, HintBufferInner>, +} + +pub fn build_hint_buffer() -> Arc { + Arc::new(HintBuffer { + precompiles: Mutex::new(HintBufferInner { + buf: BytesMut::with_capacity(DEFAULT_BUFFER_LEN), + commit_pos: 0, + }), + input_data: Mutex::new(HintBufferInner { + buf: BytesMut::with_capacity(DEFAULT_BUFFER_LEN), + commit_pos: 0, + }), + not_empty: Condvar::new(), + closed: Mutex::new(true), + paused: Mutex::new(false), + }) +} + +impl HintBufferInner { + #[inline(always)] + fn write_bytes(&mut self, src: &[u8]) { + self.buf.extend_from_slice(src); + } + + #[inline(always)] + fn commit(&mut self) { + self.commit_pos = self.buf.len(); + } +} + +impl HintBuffer { + pub fn close(&self) { + *self.closed.lock().unwrap() = true; + self.not_empty.notify_all(); + } + + pub fn reset(&self) { + let mut g = self.precompiles.lock().unwrap(); + g.buf.clear(); + g.commit_pos = 0; + let mut i = self.input_data.lock().unwrap(); + i.buf.clear(); + i.commit_pos = 0; + + *self.closed.lock().unwrap() = false; + *self.paused.lock().unwrap() = false; + self.not_empty.notify_all(); + } + + #[inline(always)] + pub fn pause(&self) { + *self.paused.lock().unwrap() = true; + } + + #[inline(always)] + pub fn resume(&self) { + *self.paused.lock().unwrap() = false; + } + + #[inline(always)] + pub fn is_paused(&self) -> bool { + *self.paused.lock().unwrap() + } + + #[inline(always)] + pub fn is_enabled(&self) -> bool { + let paused = *self.paused.lock().unwrap(); + let closed = *self.closed.lock().unwrap(); + !paused && !closed + } + + #[inline(always)] + pub fn begin_hint(&self, hint_id: u32, len: usize, is_result: bool) -> WriteBuffer<'_> { + let header = ((((if is_result { 0x8000_0000u64 } else { 0 }) | hint_id as u64) << 32) + | (len as u64)) + .to_le_bytes(); + + let mut g = self.precompiles.lock().unwrap(); + g.write_bytes(&header); + + WriteBuffer { hb: self, g } + } + + #[inline(always)] + pub fn write_hint_start(&self) { + let w = self.begin_hint(CTRL_START, 0, false); + w.commit(); + } + + #[inline(always)] + pub fn write_hint_end(&self) { + let w = self.begin_hint(CTRL_END, 0, false); + w.commit(); + } + + #[inline(always)] + pub fn begin_input_data(&self) -> WriteBuffer<'_> { + WriteBuffer { hb: self, g: self.input_data.lock().unwrap() } + } + + pub fn drain_to_writer( + &self, + writer: &mut W, + mut debug_writer: Option<&mut D>, + write_flush_threshold: usize, + ) -> io::Result<()> + where + W: Write + ?Sized, + D: Write + ?Sized, + { + // Write hints from the buffer to the writer and optionally to a debug writer + let mut write_all = |buf: &[u8]| -> io::Result<()> { + writer.write_all(buf)?; + + if let Some(debug_writer) = debug_writer.as_deref_mut() { + debug_writer.write_all(buf)?; + } + + Ok(()) + }; + + fn flush_write_buf(write_all: &mut F, buf: &mut Vec) -> io::Result<()> + where + F: FnMut(&[u8]) -> io::Result<()>, + { + if buf.is_empty() { + return Ok(()); + } + + debug_assert!(buf.len() <= MAX_WRITER_LEN); + write_all(buf)?; + buf.clear(); + + Ok(()) + } + + let mut flush_threshold = std::cmp::min(write_flush_threshold, MAX_WRITER_LEN); + flush_threshold = flush_threshold.max(1); + + let mut write_buf = Vec::with_capacity(flush_threshold); + 'drain: loop { + // Get chunk of hints to write from HintBuffer (under lock) + let chunk: Bytes = loop { + let mut g = self.precompiles.lock().unwrap(); + let mut i = self.input_data.lock().unwrap(); + let closed = *self.closed.lock().unwrap(); + + if g.commit_pos == 0 && i.commit_pos == 0 && !closed { + drop(i); // Release input_data lock before waiting + g = self.not_empty.wait(g).unwrap(); + continue; // Re-acquire both locks in the next iteration + } + + if g.commit_pos == 0 && i.commit_pos == 0 && closed { + break 'drain; + } + + break if g.commit_pos > 0 { + let n = g.commit_pos; + g.commit_pos = 0; + g.buf.split_to(n).freeze() + } else { + let n = i.commit_pos.min(MAX_INPUT_DATA_CHUNK); + i.commit_pos -= n; + let input_chunk = i.buf.split_to(n); + let header = (((HINT_INPUT as u64) << 32) | n as u64).to_le_bytes(); + let mut chunk = BytesMut::with_capacity(HEADER_LEN + n); + chunk.extend_from_slice(&header); + chunk.unsplit(input_chunk); + chunk.freeze() + }; + }; + + // Write hints from chunk without holding the lock + let mut chunk_pos = 0usize; + let chunk_len = chunk.len(); + let chunk_base = chunk.as_ptr(); + + while chunk_pos < chunk_len { + let hint_header = unsafe { + let header_bytes = core::slice::from_raw_parts(chunk_base.add(chunk_pos), 8); + u64::from_le_bytes(header_bytes.try_into().unwrap()) + }; + + let hint_data_len = (hint_header & 0xFFFF_FFFF) as usize; + let pad = (8 - (hint_data_len & 7)) & 7; + let hint_len = HEADER_LEN + hint_data_len + pad; + + #[cfg(zisk_hints_metrics)] + { + use std::hint; + + let hint_id = (hint_header >> 32) as u32 & 0x7FFF_FFFF; + crate::hints::metrics::inc_hint_count(hint_id, hint_len as u64); + } + + // If single hint exceeds MAX_WRITER_LEN, write it in chunks directly + if hint_len > MAX_WRITER_LEN { + flush_write_buf(&mut write_all, &mut write_buf)?; + + let mut hint_pos = 0usize; + while hint_pos < hint_len { + let chunk_size = std::cmp::min(MAX_WRITER_LEN, hint_len - hint_pos); + let hint_bytes: &[u8] = unsafe { + core::slice::from_raw_parts( + chunk_base.add(chunk_pos + hint_pos), + chunk_size, + ) + }; + + write_all(hint_bytes)?; + + hint_pos += chunk_size; + } + + chunk_pos += hint_len; + continue; + } + + let hint_bytes: &[u8] = + unsafe { core::slice::from_raw_parts(chunk_base.add(chunk_pos), hint_len) }; + + if write_buf.len() + hint_len > MAX_WRITER_LEN { + flush_write_buf(&mut write_all, &mut write_buf)?; + } + + write_buf.extend_from_slice(hint_bytes); + + chunk_pos += hint_len; + } + + if write_buf.len() >= flush_threshold { + flush_write_buf(&mut write_all, &mut write_buf)?; + } + } + + flush_write_buf(&mut write_all, &mut write_buf)?; + + // Flush the writer and debug writer at the end + writer.flush()?; + if let Some(debug_writer) = debug_writer.as_deref_mut() { + debug_writer.flush()?; + } + + Ok(()) + } +} + +impl<'a> WriteBuffer<'a> { + #[inline(always)] + pub fn write_data_ptr(&mut self, data: *const u8, len: usize) { + if len == 0 { + return; + } + debug_assert!(!data.is_null(), "write_data_ptr called with null data pointer"); + let payload = unsafe { std::slice::from_raw_parts(data, len) }; + self.g.write_bytes(payload); + } + + #[inline(always)] + pub fn write_data_slice(&mut self, payload: &[u8]) { + if payload.is_empty() { + return; + } + self.g.write_bytes(payload); + } + + #[inline(always)] + pub fn commit(mut self) { + self.g.commit(); + + drop(self.g); + self.hb.not_empty.notify_one(); + } +} diff --git a/ziskos/entrypoint/src/hints/input_data.rs b/ziskos/entrypoint/src/hints/input_data.rs new file mode 100644 index 000000000..574e64e65 --- /dev/null +++ b/ziskos/entrypoint/src/hints/input_data.rs @@ -0,0 +1,34 @@ +use crate::hints::macros::define_hint_ptr; +use zisk_common::HINT_INPUT; + +#[no_mangle] +pub unsafe extern "C" fn hint_input_data(input_data_ptr: *const u8, input_data_len: usize) { + if !crate::hints::HINT_BUFFER.is_enabled() { + return; + } + + #[cfg(zisk_hints_single_thread)] + if !crate::hints::check_main_thread() { + return; + } + + let pad = (8 - (input_data_len & 7)) & 7; + let mut w = crate::hints::HINT_BUFFER.begin_input_data(); + + // Write the length of the input data as the first 8 bytes of the hint data, + // followed by the input data itself, and then pad with zeros if necessary + let input_data_len_bytes: [u8; 8] = (input_data_len as u64).to_le_bytes(); + w.write_data_slice(&input_data_len_bytes); + w.write_data_ptr(input_data_ptr, input_data_len); + if pad > 0 { + const ZERO_PAD: [u8; 8] = [0; 8]; + w.write_data_slice(&ZERO_PAD[..pad]); + } + w.commit(); +} + +#[cfg(zisk_hints_metrics)] +#[ctor::ctor] +fn input_data_register_meta() { + crate::hints::metrics::register_hint(HINT_INPUT, stringify!(input_data).to_string()); +} diff --git a/ziskos/entrypoint/src/hints/keccak256.rs b/ziskos/entrypoint/src/hints/keccak256.rs new file mode 100644 index 000000000..82b8058c2 --- /dev/null +++ b/ziskos/entrypoint/src/hints/keccak256.rs @@ -0,0 +1,2 @@ +#[no_mangle] +pub unsafe extern "C" fn hint_keccak256(_input_ptr: *const u8, _input_len: usize) {} diff --git a/ziskos/entrypoint/src/hints/kzg.rs b/ziskos/entrypoint/src/hints/kzg.rs new file mode 100644 index 000000000..0985b86d5 --- /dev/null +++ b/ziskos/entrypoint/src/hints/kzg.rs @@ -0,0 +1,10 @@ +use crate::hints::macros::define_hint; +use zisk_common::HINT_VERIFY_KZG_PROOF; + +define_hint! { + verify_kzg_proof => { + hint_id: HINT_VERIFY_KZG_PROOF, + params: (z: 32, y: 32, commitment: 48, proof: 48), + is_result: false, + } +} diff --git a/ziskos/entrypoint/src/hints/macros.rs b/ziskos/entrypoint/src/hints/macros.rs new file mode 100644 index 000000000..97c6f63b8 --- /dev/null +++ b/ziskos/entrypoint/src/hints/macros.rs @@ -0,0 +1,189 @@ +macro_rules! define_hint { + ( + $name:ident => { + hint_id: $hint_id:expr, + params: ( $( $arg:ident : $len:literal ),+ $(,)? ), + is_result: $is_result:expr, + } + ) => { + paste::paste! { + #[no_mangle] + pub unsafe extern "C" fn []($( $arg: *const u8 ),+) { + if !$crate::hints::HINT_BUFFER.is_enabled() { + return; + } + + #[cfg(zisk_hints_single_thread)] + if !$crate::hints::check_main_thread() { return; } + + let mut total_len = 0usize; + $( + total_len += $len; + )+ + + let mut w = $crate::hints::HINT_BUFFER.begin_hint( + $hint_id, + total_len, + $is_result, + ); + + $( + w.write_data_ptr($arg, $len); + )+ + + w.commit(); + } + + $crate::hints::macros::register_hint_meta!($name, $hint_id); + } + }; +} + +macro_rules! define_hint_pairs { + ( + $name:ident => { + hint_id: $hint_id:expr, + pair_len: $pair_len:expr, + is_result: $is_result:expr, + } + ) => { + paste::paste! { + #[no_mangle] + pub unsafe extern "C" fn [](pairs: *const u8, num_pairs: usize) { + if !$crate::hints::HINT_BUFFER.is_enabled() { + return; + } + + #[cfg(zisk_hints_single_thread)] + if !$crate::hints::check_main_thread() { return; } + + let total_len = 8 + (num_pairs * ($pair_len as usize)); + + let mut w = $crate::hints::HINT_BUFFER.begin_hint( + $hint_id, + total_len, + $is_result, + ); + + let num_pairs_bytes: [u8; 8] = (num_pairs as u64).to_le_bytes(); + w.write_data_slice(&num_pairs_bytes); + + w.write_data_ptr(pairs, num_pairs * ($pair_len as usize)); + + w.commit(); + } + + $crate::hints::macros::register_hint_meta!($name, $hint_id); + } + }; +} + +macro_rules! define_hint_ptr { + ( + $name:ident => { + hint_id: $hint_id:expr, + param: $arg:ident, + is_result: $is_result:expr, + } + ) => { + paste::paste! { + #[no_mangle] + pub unsafe extern "C" fn []([<$arg _ptr>]: *const u8, [<$arg _len>]: usize) { + if !$crate::hints::HINT_BUFFER.is_enabled() { + return; + } + + #[cfg(zisk_hints_single_thread)] + if !$crate::hints::check_main_thread() { return; } + + let pad = (8 - ([<$arg _len>] & 7)) & 7; + + let mut w = $crate::hints::HINT_BUFFER.begin_hint( + $hint_id, + [<$arg _len>], + $is_result, + ); + + w.write_data_ptr([<$arg _ptr>], [<$arg _len>]); + + if pad > 0 { + const ZERO_PAD: [u8; 8] = [0; 8]; + w.write_data_slice(&ZERO_PAD[..pad]); + } + + w.commit(); + } + + $crate::hints::macros::register_hint_meta!($name, $hint_id); + } + }; + ( + $name:ident => { + hint_id: $hint_id:expr, + params: ( $( $arg:ident ),+ $(,)? ), + is_result: $is_result:expr, + } + ) => { + paste::paste! { + #[no_mangle] + pub unsafe extern "C" fn []( + $( [<$arg _ptr>]: *const u8, [<$arg _len>]: usize ),+ + ) { + if !$crate::hints::HINT_BUFFER.is_enabled() { + return; + } + + #[cfg(zisk_hints_single_thread)] + if !$crate::hints::check_main_thread() { return; } + + let mut total_len = 0usize; + $( + total_len += 8 + [<$arg _len>]; + )+ + + let pad = (8 - (total_len & 7)) & 7; + + let mut w = $crate::hints::HINT_BUFFER.begin_hint( + $hint_id, + total_len, + $is_result, + ); + + $( + { + let len_bytes: [u8; 8] = ([<$arg _len>] as u64).to_le_bytes(); + w.write_data_slice(&len_bytes); + + w.write_data_ptr([<$arg _ptr>], [<$arg _len>]); + } + )+ + + if pad > 0 { + const ZERO_PAD: [u8; 8] = [0; 8]; + w.write_data_slice(&ZERO_PAD[..pad]); + } + + w.commit(); + } + + $crate::hints::macros::register_hint_meta!($name, $hint_id); + } + }; +} + +macro_rules! register_hint_meta { + ($name:ident, $hint_id:expr) => { + paste::paste! { + #[cfg(zisk_hints_metrics)] + #[ctor::ctor] + fn [<$name _register_meta>]() { + $crate::hints::metrics::register_hint($hint_id, stringify!($name).to_string()); + } + } + }; +} + +pub(crate) use define_hint; +pub(crate) use define_hint_pairs; +pub(crate) use define_hint_ptr; +pub(crate) use register_hint_meta; diff --git a/ziskos/entrypoint/src/hints/metrics.rs b/ziskos/entrypoint/src/hints/metrics.rs new file mode 100644 index 000000000..40ab92aca --- /dev/null +++ b/ziskos/entrypoint/src/hints/metrics.rs @@ -0,0 +1,62 @@ +use once_cell::sync::Lazy; +use std::{collections::HashMap, sync::RwLock}; + +pub(crate) static HINTS_METRICS: Lazy>> = + Lazy::new(|| RwLock::new(HashMap::new())); + +#[derive(Clone, Debug)] +pub(crate) struct HintRegisterInfo { + pub hint_name: String, + pub count: u64, + pub size: u64, +} + +pub(crate) fn register_hint(hint_id: u32, hint_name: String) { + HINTS_METRICS + .write() + .expect("HINTS_METRICS poisoned") + .insert(hint_id, HintRegisterInfo { hint_name, count: 0, size: 0 }); +} + +pub(crate) fn inc_hint_count(hint_id: u32, hint_size: u64) { + if let Ok(mut hints) = HINTS_METRICS.write() { + if let Some(info) = hints.get_mut(&hint_id) { + info.count += 1; + info.size += hint_size; + } + } +} + +pub(crate) fn print_metrics() { + let hints = HINTS_METRICS.read().expect("HINTS_METRICS poisoned"); + let mut total_hints = 0; + let mut total_size = 0; + println!("Hints usage summary:"); + for (_, info) in hints.iter() { + total_hints += info.count; + total_size += info.size; + } + for (_, info) in hints.iter() { + if info.count > 0 { + let percentage = if total_size == 0 { + 0.0 + } else { + ((info.size as f64 * 100.0) / total_size as f64 * 10.0).round() / 10.0 + }; + println!( + " {}: {}, {} bytes ({:.1}%)", + info.hint_name, info.count, info.size, percentage + ); + } + } + println!("Total hints: {}", total_hints); + println!("Total size: {} bytes", total_size); +} + +pub(crate) fn reset_metrics() { + let mut hints = HINTS_METRICS.write().expect("HINTS_METRICS poisoned"); + for (_, info) in hints.iter_mut() { + info.count = 0; + info.size = 0; + } +} diff --git a/ziskos/entrypoint/src/hints/mod.rs b/ziskos/entrypoint/src/hints/mod.rs new file mode 100644 index 000000000..f6c25bbc7 --- /dev/null +++ b/ziskos/entrypoint/src/hints/mod.rs @@ -0,0 +1,348 @@ +mod blake2b; +mod bls12_381; +mod bn254; +mod custom; +mod hint_buffer; +mod input_data; +mod keccak256; +mod kzg; +mod macros; +mod modexp; +mod secp256k1; +mod secp256r1; +mod sha256f; + +#[cfg(zisk_hints_metrics)] +mod metrics; + +use crate::hints::hint_buffer::{ + build_hint_buffer, HintBuffer, MAX_WRITER_LEN, WRITE_BUFFER_FLUSH_LEN, +}; +use anyhow::{anyhow, Result}; +use once_cell::sync::Lazy; +use std::cell::UnsafeCell; +use std::path::PathBuf; +use std::thread::{self, JoinHandle}; +use std::time::{Duration, Instant}; +use std::{ffi::CStr, os::raw::c_char}; +use std::{ + io::{self, BufWriter, Write}, + sync::Arc, +}; +use tokio::sync::oneshot; +use zisk_common::io::{StreamWrite, UnixSocketStreamWriter}; + +#[cfg(zisk_hints_single_thread)] +use std::sync::Mutex; +#[cfg(zisk_hints_single_thread)] +use std::thread::ThreadId; + +pub use blake2b::*; +pub use bls12_381::*; +pub use bn254::*; +pub use custom::*; +pub use input_data::*; +pub use keccak256::*; +pub use kzg::*; +pub use modexp::*; +pub use secp256k1::*; +pub use secp256r1::*; +pub use sha256f::*; + +pub const CLIENT_CONNECT_TIMEOUT: Duration = Duration::from_secs(3); +pub const WAIT_FOR_CLIENT_RETRY_DELAY: Duration = Duration::from_millis(5); + +static HINT_BUFFER: Lazy> = Lazy::new(|| build_hint_buffer()); +static HINT_WRITER_HANDLE: Lazy = + Lazy::new(HintFileWriterHandleCell::new); + +pub struct HintFileWriterHandleCell { + inner: UnsafeCell>>>, +} + +unsafe impl Sync for HintFileWriterHandleCell {} + +impl HintFileWriterHandleCell { + pub const fn new() -> Self { + Self { inner: UnsafeCell::new(None) } + } + + pub fn take(&self) -> Option>> { + unsafe { (*self.inner.get()).take() } + } + + pub fn store(&self, handle: JoinHandle>) { + // Safety: caller guarantees single-threaded access when mutating the handle. + unsafe { + *self.inner.get() = Some(handle); + } + } +} + +fn wait_for_hints_writer() -> Result<()> { + if let Some(handle) = HINT_WRITER_HANDLE.take() { + HINT_BUFFER.close(); + match handle.join() { + Ok(result) => { + if let Err(err) = result { + return Err(anyhow!( + "Failed previous hints writer thread result, error: {}", + err + )); + } + } + Err(e) => { + return Err(anyhow!("Failed previous hints writer thread, error: {:?}", e)); + } + } + } + + Ok(()) +} +pub fn init_hints() { + // Initialize the main thread ID for single-threaded assert (if enabled) + #[cfg(zisk_hints_single_thread)] + { + let tid = std::thread::current().id(); + *MAIN_TID.lock().unwrap() = Some(tid); + } + + #[cfg(zisk_hints_metrics)] + crate::hints::metrics::reset_metrics(); + + HINT_BUFFER.reset(); + + // Write HINT_START + HINT_BUFFER.write_hint_start(); +} + +pub fn init_hints_file(hints_file_path: PathBuf, ready: Option>) -> Result<()> { + wait_for_hints_writer()?; + + if let Some(tx) = ready { + let _ = tx.send(()); + } + + init_hints(); + + let handle = thread::spawn(move || write_hints_to_file(hints_file_path)); + HINT_WRITER_HANDLE.store(handle); + + Ok(()) +} + +pub fn init_hints_socket( + socket_path: PathBuf, + debug_file: Option, + write_flush_threshold: Option, + ready: Option>, +) -> Result<()> { + wait_for_hints_writer()?; + + // Create the Unix socket writer (server) + let mut socket_writer = UnixSocketWriter::new(&socket_path)?; + + // Open the connection + socket_writer.open()?; + + // Notify that socket is ready + if let Some(tx) = ready { + let _ = tx.send(()); + } + + // Wait for client to connect with a timeout + if let Err(e) = socket_writer.wait_for_client(CLIENT_CONNECT_TIMEOUT) { + return Err(anyhow!("Failed to wait for client to connect to hints socket, error: {}", e)); + } + + init_hints(); + + let handle = thread::spawn(move || { + let flush_threshold = write_flush_threshold.unwrap_or(WRITE_BUFFER_FLUSH_LEN); + write_hints_to_socket(socket_writer, debug_file, flush_threshold) + }); + HINT_WRITER_HANDLE.store(handle); + + Ok(()) +} + +pub fn close_hints() -> Result<()> { + #[cfg(zisk_hints_single_thread)] + { + *MAIN_TID.lock().unwrap() = None; + } + + // Write HINT_END + HINT_BUFFER.write_hint_end(); + + // Close the hint buffer to signal the writer thread to finish + HINT_BUFFER.close(); + + // Wait for the writer thread to finish and check for errors + let handle = HINT_WRITER_HANDLE.take(); + if let Some(handle) = handle { + match handle.join() { + Ok(result) => match result { + Ok(()) => Ok(()), + Err(e) => return Err(anyhow!("Failed hints writer thread result, error: {}", e)), + }, + Err(e) => Err(anyhow!("Failed hints writer thread, error: {:?}", e)), + } + } else { + Ok(()) + } +} + +pub fn write_hints( + writer: &mut W, + debug_writer: Option<&mut dyn Write>, + write_flush_threshold: usize, +) -> io::Result<()> { + // Write hints from the buffer + HINT_BUFFER.drain_to_writer(writer, debug_writer, write_flush_threshold)?; + + #[cfg(zisk_hints_metrics)] + crate::hints::metrics::print_metrics(); + + Ok(()) +} + +fn write_hints_to_file(path: PathBuf) -> io::Result<()> { + debug_assert!(cfg!(target_endian = "little")); + + let file = std::fs::File::create(path)?; + let mut file_writer = BufWriter::with_capacity(1 << 20, file); + + write_hints(&mut file_writer, None, MAX_WRITER_LEN)?; + + Ok(()) +} + +struct UnixSocketWriter { + inner: UnixSocketStreamWriter, +} + +impl UnixSocketWriter { + pub fn new(path: &PathBuf) -> Result { + let writer = UnixSocketStreamWriter::new(path)?; + Ok(Self { inner: writer }) + } + + pub fn open(&mut self) -> Result<()> { + self.inner.open() + } + + pub fn wait_for_client(&mut self, timeout: Duration) -> Result<()> { + let start = Instant::now(); + while !self.inner.is_client_connected() { + if start.elapsed() >= timeout { + return Err(anyhow!("Timeout waiting for client to connect to socket")); + } + thread::sleep(WAIT_FOR_CLIENT_RETRY_DELAY); + } + + Ok(()) + } + + pub fn close(&mut self) -> Result<()> { + self.inner.close() + } +} + +impl Write for UnixSocketWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.inner.write(buf).map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string())) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush().map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string())) + } +} + +fn write_hints_to_socket( + mut socket_writer: UnixSocketWriter, + debug_file: Option, + write_flush_threshold: usize, +) -> io::Result<()> { + debug_assert!(cfg!(target_endian = "little")); + + if let Some(path) = debug_file { + let file = std::fs::File::create(path)?; + let mut debug_writer = BufWriter::with_capacity(1 << 20, file); // 1 MiB buffer + write_hints( + &mut socket_writer, + Some(&mut debug_writer as &mut dyn Write), + write_flush_threshold, + )?; + } else { + write_hints(&mut socket_writer, None, write_flush_threshold)?; + } + + socket_writer.close().map_err(io::Error::other)?; + + Ok(()) +} + +#[cfg(zisk_hints_single_thread)] +static MAIN_TID: Mutex> = Mutex::new(None); + +#[cfg(zisk_hints_single_thread)] +#[inline(always)] +pub(crate) fn check_main_thread() -> bool { + let tid = std::thread::current().id(); + let guard = MAIN_TID.lock().unwrap(); + + match *guard { + Some(main_tid) => { + if main_tid != tid { + println!("Warning: trying to write hint from thread {:?} but MAIN_TID is {:?}. Ignoring...", tid, main_tid); + return false; + } + true + } + None => { + println!("Warning: trying to write hint from thread {:?} before MAIN_TID is initialized. Ignoring...", tid); + false + } + } +} + +// Logs hint message; gated by `hints_enabled()` on non-Zisk targets and always-on for Zisk +#[inline(always)] +pub fn hint_log>(msg: S) { + // We check if hints are enable only for non-zisk targets, since in zisk targets hints are not used + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + if !HINT_BUFFER.is_enabled() { + return; + } + + println!("{}", msg.as_ref()); +} + +// Extern functions for C interface + +#[no_mangle] +pub extern "C" fn pause_hints() -> bool { + let already_paused = HINT_BUFFER.is_paused(); + HINT_BUFFER.pause(); + already_paused +} + +#[no_mangle] +pub extern "C" fn resume_hints() { + HINT_BUFFER.resume(); +} + +#[no_mangle] +pub unsafe extern "C" fn hint_log_c(msg: *const c_char) { + if msg.is_null() { + return; + } + + let c_str = unsafe { CStr::from_ptr(msg) }; + + match c_str.to_str() { + Ok(s) => hint_log(s), + Err(_) => return, + } +} diff --git a/ziskos/entrypoint/src/hints/modexp.rs b/ziskos/entrypoint/src/hints/modexp.rs new file mode 100644 index 000000000..e038a81de --- /dev/null +++ b/ziskos/entrypoint/src/hints/modexp.rs @@ -0,0 +1,10 @@ +use crate::hints::macros::define_hint_ptr; +use zisk_common::HINT_MODEXP; + +define_hint_ptr! { + modexp_bytes => { + hint_id: HINT_MODEXP, + params: (base, exp, modulus), + is_result: false, + } +} diff --git a/ziskos/entrypoint/src/hints/secp256k1.rs b/ziskos/entrypoint/src/hints/secp256k1.rs new file mode 100644 index 000000000..4ca4dd3c8 --- /dev/null +++ b/ziskos/entrypoint/src/hints/secp256k1.rs @@ -0,0 +1,20 @@ +use crate::hints::macros::define_hint; +use zisk_common::{ + HINT_SECP256K1_ECDSA_ADDRESS_RECOVER, HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER, +}; + +define_hint! { + secp256k1_ecdsa_address_recover => { + hint_id: HINT_SECP256K1_ECDSA_ADDRESS_RECOVER, + params: (sig: 64, recid: 8, msg: 32), + is_result: false, + } +} + +define_hint! { + secp256k1_ecdsa_verify_and_address_recover => { + hint_id: HINT_SECP256K1_ECDSA_VERIFY_ADDRESS_RECOVER, + params: (sig: 64, msg: 32, pk: 64), + is_result: false, + } +} diff --git a/ziskos/entrypoint/src/hints/secp256r1.rs b/ziskos/entrypoint/src/hints/secp256r1.rs new file mode 100644 index 000000000..41ef46ec7 --- /dev/null +++ b/ziskos/entrypoint/src/hints/secp256r1.rs @@ -0,0 +1,10 @@ +use crate::hints::macros::define_hint; +use zisk_common::HINT_SECP256R1_ECDSA_VERIFY; + +define_hint! { + secp256r1_ecdsa_verify => { + hint_id: HINT_SECP256R1_ECDSA_VERIFY, + params: (msg: 32, sig: 64, pk: 64), + is_result: false, + } +} diff --git a/ziskos/entrypoint/src/hints/sha256f.rs b/ziskos/entrypoint/src/hints/sha256f.rs new file mode 100644 index 000000000..4379df561 --- /dev/null +++ b/ziskos/entrypoint/src/hints/sha256f.rs @@ -0,0 +1,2 @@ +#[no_mangle] +pub unsafe extern "C" fn hint_sha256(_f_ptr: *const u8, _f_len: usize) {} diff --git a/ziskos/entrypoint/src/io.rs b/ziskos/entrypoint/src/io.rs new file mode 100644 index 000000000..8d9deaebf --- /dev/null +++ b/ziskos/entrypoint/src/io.rs @@ -0,0 +1,86 @@ +//! I/O utilities for Zisk zkVM programs. +//! +//! This module provides a high-level API for reading inputs and committing public outputs. + +use crate::{read_input, set_output}; +use serde::{de::DeserializeOwned, Serialize}; + +/// Read a deserializable object from the input stream. +/// +/// ### Examples +/// ```ignore +/// use serde::{Deserialize, Serialize}; +/// +/// #[derive(Serialize, Deserialize)] +/// struct MyStruct { +/// a: u32, +/// b: u32, +/// } +/// +/// let data: MyStruct = ziskos::io::read(); +/// ``` +/// +/// Note: This uses zero-copy deserialization on zkvm to avoid unnecessary data copies. +pub fn read() -> T { + let bytes = read_input_slice(); + bincode::deserialize(&bytes).expect("Deserialization failed") +} + +/// Read raw bytes from the input stream. +/// +/// ### Examples +/// ```ignore +/// let data: Vec = ziskos::io::read_vec(); +/// ``` +pub fn read_vec() -> Vec { + read_input() +} + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +pub fn read_input_slice<'a>() -> &'a [u8] { + crate::read_slice_zerocopy() +} + +#[allow(unused)] +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +pub fn read_input_slice() -> Box<[u8]> { + read_input().into_boxed_slice() +} + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +pub fn read_proof<'a>() -> &'a [u8] { + crate::read_slice_zerocopy() +} + +#[allow(unused)] +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +pub fn read_proof() -> Box<[u8]> { + read_input().into_boxed_slice() +} + +/// Commit a serializable value to public outputs. +/// The value is serialized with bincode and written as 32-bit chunks. +pub fn commit(value: &T) { + let bytes = bincode::serialize(value).expect("Serialization failed"); + write(&bytes); +} + +/// Write raw bytes to public outputs. +/// Bytes are written as 32-bit little-endian values. +pub fn write(buf: &[u8]) { + let chunks = buf.len().div_ceil(4); + + for i in 0..chunks { + let start = i * 4; + let end = (start + 4).min(buf.len()); + let mut bytes = [0u8; 4]; + bytes[..end - start].copy_from_slice(&buf[start..end]); + let val = u32::from_le_bytes(bytes); + set_output(i, val); + } +} + +pub fn verify_zisk_proof(zisk_proof: &[u8]) -> bool { + let (proof, vk) = zisk_proof.split_at(zisk_proof.len() - 32); + zisk_verifier::verify_vadcop_final_proof(proof, vk) +} diff --git a/ziskos/entrypoint/src/lib.rs b/ziskos/entrypoint/src/lib.rs index ae6ae0903..c5a434091 100644 --- a/ziskos/entrypoint/src/lib.rs +++ b/ziskos/entrypoint/src/lib.rs @@ -4,17 +4,56 @@ #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] use core::arch::asm; #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +mod dma; +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] mod fcall; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +mod alloc; + mod profile; #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] pub use fcall::*; +pub mod io; pub use profile::*; - +pub mod syscalls; pub mod zisklib; +pub mod ziskos_definitions; -pub mod syscalls; +#[cfg(all( + not(all(target_os = "zkvm", target_vendor = "zisk")), + any(zisk_hints, zisk_hints_debug), + feature = "user-hints" +))] +pub mod hints; -pub mod ziskos_definitions; +#[cfg(all(not(all(target_os = "zkvm", target_vendor = "zisk")), zisk_hints))] +extern "C" { + fn hint_input_data(input_data_ptr: *const u8, input_data_len: usize); +} + +#[cfg(all(not(all(target_os = "zkvm", target_vendor = "zisk")), zisk_hints_debug))] +extern "C" { + fn hint_log_c(msg: *const std::os::raw::c_char); +} + +#[cfg(zisk_hints_debug)] +pub fn hint_log>(msg: S) { + // On native we call external C function to log hints, since it controls if hints are paused or not + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + use std::ffi::CString; + + if let Ok(c) = CString::new(msg.as_ref()) { + unsafe { hint_log_c(c.as_ptr()) }; + } + } + // On zkvm/zisk, we can just print directly + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + println!("{}", msg.as_ref()); + } +} #[macro_export] macro_rules! entrypoint { @@ -40,41 +79,123 @@ macro_rules! entrypoint { #[allow(unused_imports)] use crate::ziskos_definitions::ziskos_config::*; +/// Initial offset for input reading. +/// zkvm: 8 bytes offset due to INPUT_ADDR memory layout +/// native: 0 bytes offset (file starts at position 0) #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] -pub fn read_input() -> Vec { - read_input_slice().to_vec() -} - +const INPUT_INITIAL_OFFSET: usize = 8; #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] -pub fn read_input() -> Vec { - use std::{fs::File, io::Read}; +const INPUT_INITIAL_OFFSET: usize = 0; - let mut file = - File::open("build/input.bin").expect("Error opening input file at: build/input.bin"); - let mut buffer = Vec::new(); - file.read_to_end(&mut buffer).unwrap(); - buffer +/// Pointer to the current position in the input buffer/file. +static mut INPUT_POS: usize = INPUT_INITIAL_OFFSET; + +/// Reset the input position to the beginning. +pub fn read_reset() { + unsafe { INPUT_POS = INPUT_INITIAL_OFFSET }; } +/// Read a slice directly from INPUT_ADDR without copying (zero-copy). +/// +/// This returns a slice pointing directly to the input memory region. +/// Use this when you want to deserialize directly without an intermediate copy. +/// The INPUT_POS is advanced after this call. #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] -pub fn read_input_slice<'a>() -> &'a [u8] { - // Create a slice of the first 8 bytes to get the size - let bytes = unsafe { core::slice::from_raw_parts((INPUT_ADDR as *const u8).add(8), 8) }; - // Convert the slice to a u64 (little-endian) - let size: u64 = u64::from_le_bytes(bytes.try_into().unwrap()); +pub(crate) fn read_slice_zerocopy<'a>() -> &'a [u8] { + // SAFETY: Single threaded, so nothing else can touch INPUT_POS while we're working. + let input_pos = unsafe { INPUT_POS }; + let addr = (INPUT_ADDR as usize) + input_pos; + + // Ensure the 8-byte length prefix is ready and read it + crate::zisklib::fcall_input_ready(&((addr + 7) as u64)); + let len = unsafe { + let bytes = core::slice::from_raw_parts(addr as *const u8, 8); + u64::from_le_bytes(bytes.try_into().unwrap()) as usize + }; + + // Ensure the data is ready (8-byte aligned) + let data_addr = addr + 8; + let aligned_len = (len + 7) & !0x7; + crate::zisklib::fcall_input_ready(&((data_addr + aligned_len - 1) as u64)); + + // Update input position: move past length (8 bytes) + data (8-byte aligned) + unsafe { INPUT_POS = input_pos + 8 + aligned_len }; + + let data_slice = unsafe { core::slice::from_raw_parts(data_addr as *const u8, len) }; + + #[cfg(zisk_hints_debug)] + { + let start_bytes = &data_slice[..data_slice.len().min(64)]; + let ellipsis = if data_slice.len() > 64 { "..." } else { "" }; + hint_log(format!( + "hint_input_data (input_data: {:x?}{} , input_data_len: {}", + start_bytes, + ellipsis, + data_slice.len() + )); + } - unsafe { core::slice::from_raw_parts((INPUT_ADDR as *const u8).add(16), size as usize) } + data_slice +} + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +pub(crate) fn read_input() -> Vec { + read_slice_zerocopy().to_vec() } #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] -pub fn read_input_slice() -> Box<[u8]> { - read_input().into_boxed_slice() +pub(crate) fn read_input() -> Vec { + use std::{ + fs::File, + io::{Read, Seek, SeekFrom}, + }; + + let input_pos = unsafe { INPUT_POS }; + + let mut file = + File::open("build/input.bin").expect("Error opening input file at: build/input.bin"); + + // Seek to the current position + file.seek(SeekFrom::Start(input_pos as u64)).expect("Failed to seek in input file"); + + // Read the 8-byte length prefix + let mut len_bytes = [0u8; 8]; + file.read_exact(&mut len_bytes).expect("Failed to read length prefix from input file"); + let len = u64::from_le_bytes(len_bytes) as usize; + + // Read the actual data + let mut data = vec![0u8; len]; + file.read_exact(&mut data).expect("Failed to read data from input file"); + + // Advance INPUT_POS: 8 bytes for length + 8-byte aligned data + let aligned_len = (len + 7) & !0x7; + unsafe { + INPUT_POS = input_pos + 8 + aligned_len; + } + + #[cfg(zisk_hints)] + unsafe { + hint_input_data(data.as_ptr(), data.len()); + } + + #[cfg(zisk_hints_debug)] + { + let start_bytes = &data[..data.len().min(64)]; + let ellipsis = if data.len() > 64 { "..." } else { "" }; + hint_log(format!( + "hint_input_data (input_data: {:x?}{} , input_data_len: {})", + start_bytes, + ellipsis, + data.len() + )); + } + + data } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] -pub fn set_output(id: usize, value: u32) { +pub(crate) fn set_output(id: usize, value: u32) { use std::arch::asm; - let addr_n: *mut u32; let addr_v: *mut u32; let arch_id_zisk: usize; @@ -88,33 +209,21 @@ pub fn set_output(id: usize, value: u32) { assert!(id < 64, "Maximum number of public outputs: 64"); if arch_id_zisk == ARCH_ID_ZISK as usize { - addr_n = OUTPUT_ADDR as *mut u32; - addr_v = (OUTPUT_ADDR + 4 + 4 * (id as u64)) as *mut u32; + addr_v = (OUTPUT_ADDR + 4 * (id as u64)) as *mut u32; } else { - addr_n = 0x1000_0000 as *mut u32; - addr_v = (0x1000_0000 + 4 + 4 * (id as u64)) as *mut u32; - } - - let n; - - unsafe { - n = core::ptr::read(addr_n) as usize; - } - - if id + 1 > n { - unsafe { core::ptr::write_volatile(addr_n, (id + 1) as u32) }; + addr_v = (0x1000_0000 + 4 * (id as u64)) as *mut u32; } unsafe { core::ptr::write_volatile(addr_v, value) }; } #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] -pub fn set_output(id: usize, value: u32) { +pub(crate) fn set_output(id: usize, value: u32) { println!("public {id}: {value:#010x}"); } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] -mod ziskos { +pub mod ziskos { use crate::ziskos_definitions::ziskos_config::*; use core::arch::asm; @@ -174,12 +283,27 @@ mod ziskos { extern "C" { fn main(); } + #[cfg(any( + feature = "zisk-embedded-alloc", + feature = "zisk-embedded-dlmalloc-alloc", + feature = "zisk-embedded-talc-alloc", + feature = "zisk-embedded-tlfs-alloc" + ))] + crate::alloc::embedded::init(); + #[cfg(all( + not(feature = "zisk-embedded-alloc"), + not(feature = "zisk-embedded-dlmalloc-alloc"), + not(feature = "zisk-embedded-talc-alloc"), + not(feature = "zisk-embedded-tlfs-alloc") + ))] + crate::alloc::init_sys_alloc(); + main() } } #[no_mangle] - extern "C" fn sys_write(_fd: u32, write_ptr: *const u8, nbytes: usize) { + pub extern "C" fn sys_write(_fd: u32, write_ptr: *const u8, nbytes: usize) { let arch_id_zisk: usize; let mut addr: *mut u8 = 0x1000_0000 as *mut u8; @@ -244,46 +368,26 @@ mod ziskos { unimplemented!("sys_argv"); } - #[no_mangle] - pub unsafe extern "C" fn sys_alloc_aligned(bytes: usize, align: usize) -> *mut u8 { - use core::arch::asm; - let heap_bottom: usize; - // UNSAFE: This is fine, just loading some constants. - unsafe { - // using inline assembly is easier to access linker constants - asm!( - "la {heap_bottom}, _kernel_heap_bottom", - heap_bottom = out(reg) heap_bottom, - options(nomem) - ) - }; - - // Pointer to next heap address to use, or 0 if the heap has not yet been - // initialized. - static mut HEAP_POS: usize = 0; - - // SAFETY: Single threaded, so nothing else can touch this while we're working. - let mut heap_pos = unsafe { HEAP_POS }; - - if heap_pos == 0 { - heap_pos = heap_bottom; + pub extern "C" fn sys_print_hex(val: usize, ln: bool) { + let mut buf = [0u8; 19]; // "0x" + 16 hex + \n — stack, no heap + buf[0] = b'0'; + buf[1] = b'x'; + let mut v = val; + for i in (2..18).rev() { + buf[i] = b"0123456789abcdef"[v & 0xF]; + v >>= 4; } - - let offset = heap_pos & (align - 1); - if offset != 0 { - heap_pos += align - offset; + if ln { + buf[18] = b'\n'; + sys_write(1, buf.as_ptr(), buf.len()); + } else { + sys_write(1, buf.as_ptr(), buf.len() - 1); } - - let ptr = heap_pos as *mut u8; - heap_pos += bytes; - - // Check to make sure heap doesn't collide with SYSTEM memory. - //if SYSTEM_START < heap_pos { - // panic!(); - // } - - unsafe { HEAP_POS = heap_pos }; - - ptr } + + core::arch::global_asm!(include_str!("dma/memcpy.s")); + core::arch::global_asm!(include_str!("dma/memmove.s")); + core::arch::global_asm!(include_str!("dma/memcmp.s")); + //core::arch::global_asm!(include_str!("dma/inputcpy.s")); + core::arch::global_asm!(include_str!("dma/memset.s")); } diff --git a/ziskos/entrypoint/src/memcpy_test.rs b/ziskos/entrypoint/src/memcpy_test.rs new file mode 100644 index 000000000..074a7c635 --- /dev/null +++ b/ziskos/entrypoint/src/memcpy_test.rs @@ -0,0 +1,489 @@ +#[cfg(test)] +mod memcpy_tests { + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + use super::ziskos::memcpy; + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + fn memcpy(dst: *mut u8, src: *const u8, len: usize) -> *mut u8 { + unsafe { + std::ptr::copy(src, dst, len); + } + dst + } + use std::alloc::{alloc, dealloc, Layout}; + + // Helper function to create aligned memory + unsafe fn alloc_aligned(size: usize, align: usize) -> *mut u8 { + let layout = Layout::from_size_align(size + align, align).unwrap(); + let ptr = alloc(layout); + if ptr.is_null() { + panic!("Failed to allocate memory"); + } + // Align the pointer + let aligned = (ptr as usize + align - 1) & !(align - 1); + aligned as *mut u8 + } + + // Helper function to deallocate aligned memory + unsafe fn dealloc_aligned(ptr: *mut u8, size: usize, align: usize) { + let layout = Layout::from_size_align(size + align, align).unwrap(); + // We need to get back to the original pointer, but for simplicity in tests, + // we'll use a different approach + dealloc(ptr, layout); + } + + #[test] + fn test_memcpy_zero_length() { + unsafe { + let src = [1u8, 2, 3, 4]; + let mut dst = [0u8; 4]; + + let result = memcpy(dst.as_mut_ptr(), src.as_ptr(), 0); + + assert_eq!(result, dst.as_mut_ptr()); + assert_eq!(dst, [0, 0, 0, 0]); // Should remain unchanged + } + } + + #[test] + fn test_memcpy_single_byte() { + unsafe { + let src = [0x42u8]; + let mut dst = [0u8; 1]; + + memcpy(dst.as_mut_ptr(), src.as_ptr(), 1); + + assert_eq!(dst[0], 0x42); + } + } + + #[test] + fn test_memcpy_aligned_8_small() { + unsafe { + // Test 8-byte aligned pointers with small copy (< 32 bytes) + let src = alloc_aligned(64, 8); + let dst = alloc_aligned(64, 8); + + // Initialize source data + for i in 0..16 { + *src.add(i) = (i + 1) as u8; + } + + memcpy(dst, src, 16); + + // Verify copy + for i in 0..16 { + assert_eq!(*dst.add(i), (i + 1) as u8, "Mismatch at byte {}", i); + } + + dealloc_aligned(src, 64, 8); + dealloc_aligned(dst, 64, 8); + } + } + + #[test] + fn test_memcpy_aligned_8_large() { + unsafe { + // Test 8-byte aligned pointers with large copy (> 32 bytes) + let src = alloc_aligned(128, 8); + let dst = alloc_aligned(128, 8); + + // Initialize source data + for i in 0..64 { + *src.add(i) = (i % 256) as u8; + } + + memcpy(dst, src, 64); + + // Verify copy + for i in 0..64 { + assert_eq!(*dst.add(i), (i % 256) as u8, "Mismatch at byte {}", i); + } + + dealloc_aligned(src, 128, 8); + dealloc_aligned(dst, 128, 8); + } + } + + #[test] + fn test_memcpy_src_unaligned() { + unsafe { + // Test unaligned source pointer + let src_base = alloc_aligned(64, 8); + let dst = alloc_aligned(64, 8); + let src = src_base.add(3); // Unaligned by 3 bytes + + // Initialize source data + for i in 0..20 { + *src.add(i) = (i + 0x10) as u8; + } + + memcpy(dst, src, 20); + + // Verify copy + for i in 0..20 { + assert_eq!(*dst.add(i), (i + 0x10) as u8, "Mismatch at byte {}", i); + } + + dealloc_aligned(src_base, 64, 8); + dealloc_aligned(dst, 64, 8); + } + } + + #[test] + fn test_memcpy_dst_unaligned() { + unsafe { + // Test unaligned destination pointer + let src = alloc_aligned(64, 8); + let dst_base = alloc_aligned(64, 8); + let dst = dst_base.add(5); // Unaligned by 5 bytes + + // Initialize source data + for i in 0..20 { + *src.add(i) = (i + 0x20) as u8; + } + + memcpy(dst, src, 20); + + // Verify copy + for i in 0..20 { + assert_eq!(*dst.add(i), (i + 0x20) as u8, "Mismatch at byte {}", i); + } + + dealloc_aligned(src, 64, 8); + dealloc_aligned(dst_base, 64, 8); + } + } + + #[test] + fn test_memcpy_both_unaligned() { + unsafe { + // Test both pointers unaligned + let src_base = alloc_aligned(64, 8); + let dst_base = alloc_aligned(64, 8); + let src = src_base.add(2); // Unaligned by 2 bytes + let dst = dst_base.add(6); // Unaligned by 6 bytes + + // Initialize source data + for i in 0..25 { + *src.add(i) = (i + 0x30) as u8; + } + + memcpy(dst, src, 25); + + // Verify copy + for i in 0..25 { + assert_eq!(*dst.add(i), (i + 0x30) as u8, "Mismatch at byte {}", i); + } + + dealloc_aligned(src_base, 64, 8); + dealloc_aligned(dst_base, 64, 8); + } + } + + #[test] + fn test_memcpy_edge_sizes() { + unsafe { + let sizes = [1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65]; + + for &size in &sizes { + let src = alloc_aligned(128, 8); + let dst = alloc_aligned(128, 8); + + // Initialize source data + for i in 0..size { + *src.add(i) = ((i * 3 + 7) % 256) as u8; + } + + memcpy(dst, src, size); + + // Verify copy + for i in 0..size { + assert_eq!( + *dst.add(i), + ((i * 3 + 7) % 256) as u8, + "Size {} mismatch at byte {}", + size, + i + ); + } + + dealloc_aligned(src, 128, 8); + dealloc_aligned(dst, 128, 8); + } + } + } + + #[test] + fn test_memcpy_overlapping_forward() { + unsafe { + // Test overlapping memory (src before dst) - should work + let mut buffer = [0u8; 20]; + + // Initialize data + for i in 0..10 { + buffer[i] = (i + 0x40) as u8; + } + + // Copy from buffer[0..10] to buffer[5..15] + memcpy(buffer.as_mut_ptr().add(5), buffer.as_ptr(), 10); + + // Verify - first 5 bytes unchanged, next 10 are the copy + for i in 0..5 { + assert_eq!(buffer[i], (i + 0x40) as u8); + } + for i in 5..15 { + assert_eq!(buffer[i], (i - 5 + 0x40) as u8); + } + } + } + + #[test] + fn test_memcpy_return_value() { + unsafe { + let src = [1u8, 2, 3, 4]; + let mut dst = [0u8; 4]; + let dst_ptr = dst.as_mut_ptr(); + + let result = memcpy(dst_ptr, src.as_ptr(), 4); + + assert_eq!(result, dst_ptr, "Return value should be original dst pointer"); + } + } + + #[test] + fn test_memcpy_large_unaligned() { + unsafe { + // Test large copy with unaligned pointers + let src_base = alloc_aligned(256, 8); + let dst_base = alloc_aligned(256, 8); + let src = src_base.add(3); // Unaligned + let dst = dst_base.add(1); // Unaligned + + // Initialize source data with pattern + for i in 0..200 { + *src.add(i) = ((i * 7 + 13) % 256) as u8; + } + + memcpy(dst, src, 200); + + // Verify copy + for i in 0..200 { + assert_eq!( + *dst.add(i), + ((i * 7 + 13) % 256) as u8, + "Large unaligned mismatch at byte {}", + i + ); + } + + dealloc_aligned(src_base, 256, 8); + dealloc_aligned(dst_base, 256, 8); + } + } + + #[test] + fn test_memcpy_debug_print() { + unsafe { + let src = [0x12u8, 0x34, 0x56, 0x78]; + let mut dst = [0u8; 4]; + + println!("Before memcpy:"); + println!(" src: {:p} = {:02x?}", src.as_ptr(), src); + println!(" dst: {:p} = {:02x?}", dst.as_ptr(), dst); + + memcpy(dst.as_mut_ptr(), src.as_ptr(), 4); + + println!("After memcpy:"); + println!(" dst: {:p} = {:02x?}", dst.as_ptr(), dst); + + assert_eq!(dst, src); + } + } + + #[test] + fn test_pointer_printing_formats() { + unsafe { + let data = [0xDEu8, 0xAD, 0xBE, 0xEF]; + let ptr = data.as_ptr(); + + println!("\nDifferent ways to print pointers:"); + println!("Standard format: {:p}", ptr); + println!("Hex lowercase: 0x{:x}", ptr as usize); + println!("Hex UPPERCASE: 0x{:X}", ptr as usize); + println!("With padding: 0x{:016x}", ptr as usize); + println!("Auto-prefixed: {:#x}", ptr as usize); + println!("Debug format: {:?}", ptr); + + // También mostrar como imprimir la data + println!("\nData at pointer:"); + println!("Hex bytes: {:02x?}", data); + println!("Hex UPPER bytes: {:02X?}", data); + println!("Pretty debug: {:#02x?}", data); + } + } + + #[test] + fn test_memcpy_large_aligned_568699() { + unsafe { + const SIZE: usize = 568699; + + // Allocate aligned memory for both source and destination + let src = alloc_aligned(SIZE + 64, 8); + let dst = alloc_aligned(SIZE + 64, 8); + + println!("\nTest memcpy with aligned pointers and size: {}", SIZE); + println!("Source pointer: {:p} (0x{:016x})", src, src as usize); + println!("Destination pointer: {:p} (0x{:016x})", dst, dst as usize); + println!("Alignment check src: {} (should be 0)", (src as usize) & 7); + println!("Alignment check dst: {} (should be 0)", (dst as usize) & 7); + + // Initialize source data with a predictable pattern + for i in 0..SIZE { + *src.add(i) = ((i * 73 + 127) % 256) as u8; + } + + // Perform the copy + let start = std::time::Instant::now(); + let result = memcpy(dst, src, SIZE); + let elapsed = start.elapsed(); + + println!("Copy completed in: {:?}", elapsed); + println!( + "Throughput: {:.2} MB/s", + (SIZE as f64) / (1024.0 * 1024.0) / elapsed.as_secs_f64() + ); + + // Verify the return value + assert_eq!(result, dst, "Return value should be original dst pointer"); + + // Verify the copy by checking every byte + let mut mismatches = 0; + for i in 0..SIZE { + let expected = ((i * 73 + 127) % 256) as u8; + let actual = *dst.add(i); + if actual != expected { + if mismatches < 10 { + // Only print first 10 mismatches + println!( + "Mismatch at byte {}: expected 0x{:02x}, got 0x{:02x}", + i, expected, actual + ); + } + mismatches += 1; + } + } + + if mismatches > 0 { + println!("Total mismatches: {}", mismatches); + panic!("Copy verification failed with {} mismatches", mismatches); + } + + println!("✓ All {} bytes copied correctly", SIZE); + + // Test some specific boundary checks + println!("\nBoundary checks:"); + println!("First byte: src[0]=0x{:02x}, dst[0]=0x{:02x}", *src, *dst); + let last_idx = SIZE - 1; + println!( + "Last byte: src[{}]=0x{:02x}, dst[{}]=0x{:02x}", + last_idx, + *src.add(last_idx), + last_idx, + *dst.add(last_idx) + ); + + // Check bytes at key positions (32-byte boundaries, etc.) + let check_positions = [31, 32, 63, 64, 127, 128, 255, 256, 511, 512, 1023, 1024]; + for &pos in &check_positions { + if pos < SIZE { + let expected = ((pos * 73 + 127) % 256) as u8; + let actual = *dst.add(pos); + println!( + "Position {}: expected=0x{:02x}, actual=0x{:02x} {}", + pos, + expected, + actual, + if expected == actual { "✓" } else { "✗" } + ); + assert_eq!(actual, expected, "Mismatch at position {}", pos); + } + } + + dealloc_aligned(src, SIZE + 64, 8); + dealloc_aligned(dst, SIZE + 64, 8); + } + } + + #[test] + fn test_memcpy_various_large_sizes() { + // Test various large sizes to ensure robustness + let test_sizes = [ + 568699, // Original request + 568700, // Just one more + 568698, // Just one less + 1048576, // 1 MB + 524288, // 512 KB + 131072, // 128 KB + 65536, // 64 KB + 32768, // 32 KB + 16384, // 16 KB + ]; + + for &size in &test_sizes { + unsafe { + println!("\nTesting size: {} bytes", size); + + let src = alloc_aligned(size + 64, 8); + let dst = alloc_aligned(size + 64, 8); + + // Simple pattern for faster initialization + for i in 0..size { + *src.add(i) = (i % 256) as u8; + } + + let start = std::time::Instant::now(); + memcpy(dst, src, size); + let elapsed = start.elapsed(); + + println!( + " Time: {:?}, Throughput: {:.2} MB/s", + elapsed, + (size as f64) / (1024.0 * 1024.0) / elapsed.as_secs_f64() + ); + + // Quick verification - check first, last, and some middle bytes + assert_eq!(*dst, 0, "First byte mismatch for size {}", size); + if size > 1 { + let last_idx = size - 1; + let expected_last = (last_idx % 256) as u8; + assert_eq!( + *dst.add(last_idx), + expected_last, + "Last byte mismatch for size {}", + size + ); + } + + // Check a few strategic positions + let check_positions = [size / 4, size / 2, 3 * size / 4]; + for &pos in &check_positions { + if pos < size { + let expected = (pos % 256) as u8; + assert_eq!( + *dst.add(pos), + expected, + "Mismatch at position {} for size {}", + pos, + size + ); + } + } + + dealloc_aligned(src, size + 64, 8); + dealloc_aligned(dst, size + 64, 8); + } + } + + println!("\n✓ All large size tests passed!"); + } +} diff --git a/ziskos/entrypoint/src/syscalls/add256.rs b/ziskos/entrypoint/src/syscalls/add256.rs index e895b524f..58dd63e6b 100644 --- a/ziskos/entrypoint/src/syscalls/add256.rs +++ b/ziskos/entrypoint/src/syscalls/add256.rs @@ -28,10 +28,22 @@ pub struct SyscallAdd256Params<'a> { /// /// The caller must ensure that the data is aligned to a 64-bit boundary. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_add256(params: &mut SyscallAdd256Params) -> u64 { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_add256")] +pub extern "C" fn syscall_add256( + params: &mut SyscallAdd256Params, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u64 { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let cout = precompiles_helpers::add256(params.a, params.b, params.cin, params.c); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(params.c); + hints.push(cout); + } + cout + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall_ret_u64!(0x811, params) + ziskos_syscall_ret_u64!(zisk_definitions::SYSCALL_ADD256_ID, params) } diff --git a/ziskos/entrypoint/src/syscalls/arith256.rs b/ziskos/entrypoint/src/syscalls/arith256.rs index f9b77b7c0..0505f942a 100644 --- a/ziskos/entrypoint/src/syscalls/arith256.rs +++ b/ziskos/entrypoint/src/syscalls/arith256.rs @@ -31,10 +31,21 @@ pub struct SyscallArith256Params<'a> { /// /// The caller must ensure that the data is aligned to a 64-bit boundary. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_arith256(params: &mut SyscallArith256Params) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_arith256")] +pub extern "C" fn syscall_arith256( + params: &mut SyscallArith256Params, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x801, params); + ziskos_syscall!(zisk_definitions::SYSCALL_ARITH256_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + precompiles_helpers::arith256(params.a, params.b, params.c, params.dl, params.dh); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(params.dl); + hints.extend_from_slice(params.dh); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/arith256_mod.rs b/ziskos/entrypoint/src/syscalls/arith256_mod.rs index 60e9f6bcb..9b77f878d 100644 --- a/ziskos/entrypoint/src/syscalls/arith256_mod.rs +++ b/ziskos/entrypoint/src/syscalls/arith256_mod.rs @@ -34,10 +34,20 @@ pub struct SyscallArith256ModParams<'a> { /// /// The caller must ensure that the data is aligned to a 64-bit boundary. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_arith256_mod(params: &mut SyscallArith256ModParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_arith256_mod")] +pub extern "C" fn syscall_arith256_mod( + params: &mut SyscallArith256ModParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x802, params); + ziskos_syscall!(zisk_definitions::SYSCALL_ARITH256_MOD_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + precompiles_helpers::arith256_mod(params.a, params.b, params.c, params.module, params.d); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(params.d); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/arith384_mod.rs b/ziskos/entrypoint/src/syscalls/arith384_mod.rs index d030519ed..a9a9dfde2 100644 --- a/ziskos/entrypoint/src/syscalls/arith384_mod.rs +++ b/ziskos/entrypoint/src/syscalls/arith384_mod.rs @@ -34,10 +34,20 @@ pub struct SyscallArith384ModParams<'a> { /// /// The caller must ensure that the data is aligned to a 64-bit boundary. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_arith384_mod(params: &mut SyscallArith384ModParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_arith384_mod")] +pub extern "C" fn syscall_arith384_mod( + params: &mut SyscallArith384ModParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x80B, params); + ziskos_syscall!(zisk_definitions::SYSCALL_ARITH384_MOD_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + precompiles_helpers::arith384_mod(params.a, params.b, params.c, params.module, params.d); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(params.d); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/blake2br.rs b/ziskos/entrypoint/src/syscalls/blake2br.rs new file mode 100644 index 000000000..8337cd897 --- /dev/null +++ b/ziskos/entrypoint/src/syscalls/blake2br.rs @@ -0,0 +1,39 @@ +//! Blake2br system call interception + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use core::arch::asm; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use crate::ziskos_syscall; + +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +use precompiles_helpers::blake2b_round; + +#[derive(Debug)] +#[repr(C)] +pub struct SyscallBlake2bRoundParams<'a> { + pub index: u64, // a number in [0,10) + pub state: &'a mut [u64; 16], + pub input: &'a [u64; 16], +} + +#[allow(unused_variables)] +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_blake2b_round")] +pub extern "C" fn syscall_blake2b_round( + params: &mut SyscallBlake2bRoundParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + ziskos_syscall!(zisk_definitions::SYSCALL_BLAKE2B_ROUND_ID, params); + + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + blake2b_round(params.state, params.input, params.index as u32); + + #[cfg(feature = "hints")] + { + hints.extend_from_slice(params.state); + } + } +} diff --git a/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs b/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs index 7e5539e95..ef50980ac 100644 --- a/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs +++ b/ziskos/entrypoint/src/syscalls/bls12_381_complex_add.rs @@ -34,10 +34,25 @@ pub struct SyscallBls12_381ComplexAddParams<'a> { /// /// The resulting field element will have both coordinates in the range of the BLS12-381 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bls12_381_complex_add(params: &mut SyscallBls12_381ComplexAddParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bls12_381_complex_add")] +pub extern "C" fn syscall_bls12_381_complex_add( + params: &mut SyscallBls12_381ComplexAddParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x80E, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BLS12_381_COMPLEX_ADD_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let f1 = [params.f1.x, params.f1.y].concat().try_into().unwrap(); + let f2 = [params.f2.x, params.f2.y].concat().try_into().unwrap(); + let mut f3: [u64; 12] = [0; 12]; + precompiles_helpers::bls12_381_complex_add(&f1, &f2, &mut f3); + params.f1.x.copy_from_slice(&f3[0..6]); + params.f1.y.copy_from_slice(&f3[6..12]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&f3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bls12_381_complex_mul.rs b/ziskos/entrypoint/src/syscalls/bls12_381_complex_mul.rs index 72add5b87..ab2d83c57 100644 --- a/ziskos/entrypoint/src/syscalls/bls12_381_complex_mul.rs +++ b/ziskos/entrypoint/src/syscalls/bls12_381_complex_mul.rs @@ -34,10 +34,25 @@ pub struct SyscallBls12_381ComplexMulParams<'a> { /// /// The resulting field element will have both coordinates in the range of the BLS12-381 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bls12_381_complex_mul(params: &mut SyscallBls12_381ComplexMulParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bls12_381_complex_mul")] +pub extern "C" fn syscall_bls12_381_complex_mul( + params: &mut SyscallBls12_381ComplexMulParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x810, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BLS12_381_COMPLEX_MUL_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let f1 = [params.f1.x, params.f1.y].concat().try_into().unwrap(); + let f2 = [params.f2.x, params.f2.y].concat().try_into().unwrap(); + let mut f3: [u64; 12] = [0; 12]; + precompiles_helpers::bls12_381_complex_mul(&f1, &f2, &mut f3); + params.f1.x.copy_from_slice(&f3[0..6]); + params.f1.y.copy_from_slice(&f3[6..12]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&f3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bls12_381_complex_sub.rs b/ziskos/entrypoint/src/syscalls/bls12_381_complex_sub.rs index b448dd368..d20bc9623 100644 --- a/ziskos/entrypoint/src/syscalls/bls12_381_complex_sub.rs +++ b/ziskos/entrypoint/src/syscalls/bls12_381_complex_sub.rs @@ -34,10 +34,25 @@ pub struct SyscallBls12_381ComplexSubParams<'a> { /// /// The resulting field element will have both coordinates in the range of the BLS12-381 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bls12_381_complex_sub(params: &mut SyscallBls12_381ComplexSubParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bls12_381_complex_sub")] +pub extern "C" fn syscall_bls12_381_complex_sub( + params: &mut SyscallBls12_381ComplexSubParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x80F, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BLS12_381_COMPLEX_SUB_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let f1 = [params.f1.x, params.f1.y].concat().try_into().unwrap(); + let f2 = [params.f2.x, params.f2.y].concat().try_into().unwrap(); + let mut f3: [u64; 12] = [0; 12]; + precompiles_helpers::bls12_381_complex_sub(&f1, &f2, &mut f3); + params.f1.x.copy_from_slice(&f3[0..6]); + params.f1.y.copy_from_slice(&f3[6..12]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&f3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bls12_381_curve_add.rs b/ziskos/entrypoint/src/syscalls/bls12_381_curve_add.rs index bb6b7a726..37d28c55e 100644 --- a/ziskos/entrypoint/src/syscalls/bls12_381_curve_add.rs +++ b/ziskos/entrypoint/src/syscalls/bls12_381_curve_add.rs @@ -33,10 +33,25 @@ pub struct SyscallBls12_381CurveAddParams<'a> { /// /// The resulting point will have both coordinates in the range of the BLS12-381 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bls12_381_curve_add(params: &mut SyscallBls12_381CurveAddParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bls12_381_curve_add")] +pub extern "C" fn syscall_bls12_381_curve_add( + params: &mut SyscallBls12_381CurveAddParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x80C, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BLS12_381_CURVE_ADD_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let p1 = [params.p1.x, params.p1.y].concat().try_into().unwrap(); + let p2 = [params.p2.x, params.p2.y].concat().try_into().unwrap(); + let mut p3: [u64; 12] = [0; 12]; + precompiles_helpers::bls12_381_curve_add(&p1, &p2, &mut p3); + params.p1.x.copy_from_slice(&p3[0..6]); + params.p1.y.copy_from_slice(&p3[6..12]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bls12_381_curve_dbl.rs b/ziskos/entrypoint/src/syscalls/bls12_381_curve_dbl.rs index 8ed580b7e..1d73bf051 100644 --- a/ziskos/entrypoint/src/syscalls/bls12_381_curve_dbl.rs +++ b/ziskos/entrypoint/src/syscalls/bls12_381_curve_dbl.rs @@ -25,10 +25,24 @@ use super::point::SyscallPoint384; /// /// The resulting point will have both coordinates in the range of the BLS12-381 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bls12_381_curve_dbl(p1: &mut SyscallPoint384) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bls12_381_curve_dbl")] +pub extern "C" fn syscall_bls12_381_curve_dbl( + p1: &mut SyscallPoint384, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x80D, p1); + ziskos_syscall!(zisk_definitions::SYSCALL_BLS12_381_CURVE_DBL_ID, p1); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let _p1 = [p1.x, p1.y].concat().try_into().unwrap(); + let mut p2: [u64; 12] = [0; 12]; + precompiles_helpers::bls12_381_curve_dbl(&_p1, &mut p2); + p1.x.copy_from_slice(&p2[0..6]); + p1.y.copy_from_slice(&p2[6..12]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p2); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bn254_complex_add.rs b/ziskos/entrypoint/src/syscalls/bn254_complex_add.rs index 870470ce3..25e338ef8 100644 --- a/ziskos/entrypoint/src/syscalls/bn254_complex_add.rs +++ b/ziskos/entrypoint/src/syscalls/bn254_complex_add.rs @@ -34,10 +34,25 @@ pub struct SyscallBn254ComplexAddParams<'a> { /// /// The resulting field element will have both coordinates in the range of the BN254 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bn254_complex_add(params: &mut SyscallBn254ComplexAddParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bn254_complex_add")] +pub extern "C" fn syscall_bn254_complex_add( + params: &mut SyscallBn254ComplexAddParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x808, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BN254_COMPLEX_ADD_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let f1 = [params.f1.x, params.f1.y].concat().try_into().unwrap(); + let f2 = [params.f2.x, params.f2.y].concat().try_into().unwrap(); + let mut f3: [u64; 8] = [0; 8]; + precompiles_helpers::bn254_complex_add(&f1, &f2, &mut f3); + params.f1.x.copy_from_slice(&f3[0..4]); + params.f1.y.copy_from_slice(&f3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&f3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bn254_complex_mul.rs b/ziskos/entrypoint/src/syscalls/bn254_complex_mul.rs index 86b1ed80d..f0946ffb2 100644 --- a/ziskos/entrypoint/src/syscalls/bn254_complex_mul.rs +++ b/ziskos/entrypoint/src/syscalls/bn254_complex_mul.rs @@ -34,10 +34,25 @@ pub struct SyscallBn254ComplexMulParams<'a> { /// /// The resulting field element will have both coordinates in the range of the BN254 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bn254_complex_mul(params: &mut SyscallBn254ComplexMulParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bn254_complex_mul")] +pub extern "C" fn syscall_bn254_complex_mul( + params: &mut SyscallBn254ComplexMulParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x80A, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BN254_COMPLEX_MUL_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let f1 = [params.f1.x, params.f1.y].concat().try_into().unwrap(); + let f2 = [params.f2.x, params.f2.y].concat().try_into().unwrap(); + let mut f3: [u64; 8] = [0; 8]; + precompiles_helpers::bn254_complex_mul(&f1, &f2, &mut f3); + params.f1.x.copy_from_slice(&f3[0..4]); + params.f1.y.copy_from_slice(&f3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&f3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bn254_complex_sub.rs b/ziskos/entrypoint/src/syscalls/bn254_complex_sub.rs index 00a7f9bb4..28d6a3014 100644 --- a/ziskos/entrypoint/src/syscalls/bn254_complex_sub.rs +++ b/ziskos/entrypoint/src/syscalls/bn254_complex_sub.rs @@ -34,10 +34,25 @@ pub struct SyscallBn254ComplexSubParams<'a> { /// /// The resulting field element will have both coordinates in the range of the BN254 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bn254_complex_sub(params: &mut SyscallBn254ComplexSubParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bn254_complex_sub")] +pub extern "C" fn syscall_bn254_complex_sub( + params: &mut SyscallBn254ComplexSubParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x809, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BN254_COMPLEX_SUB_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let f1 = [params.f1.x, params.f1.y].concat().try_into().unwrap(); + let f2 = [params.f2.x, params.f2.y].concat().try_into().unwrap(); + let mut f3: [u64; 8] = [0; 8]; + precompiles_helpers::bn254_complex_sub(&f1, &f2, &mut f3); + params.f1.x.copy_from_slice(&f3[0..4]); + params.f1.y.copy_from_slice(&f3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&f3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bn254_curve_add.rs b/ziskos/entrypoint/src/syscalls/bn254_curve_add.rs index 85d29b490..20cdbc1c5 100644 --- a/ziskos/entrypoint/src/syscalls/bn254_curve_add.rs +++ b/ziskos/entrypoint/src/syscalls/bn254_curve_add.rs @@ -33,10 +33,25 @@ pub struct SyscallBn254CurveAddParams<'a> { /// /// The resulting point will have both coordinates in the range of the BN254 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bn254_curve_add(params: &mut SyscallBn254CurveAddParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bn254_curve_add")] +pub extern "C" fn syscall_bn254_curve_add( + params: &mut SyscallBn254CurveAddParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x806, params); + ziskos_syscall!(zisk_definitions::SYSCALL_BN254_CURVE_ADD_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let p1 = [params.p1.x, params.p1.y].concat().try_into().unwrap(); + let p2 = [params.p2.x, params.p2.y].concat().try_into().unwrap(); + let mut p3: [u64; 8] = [0; 8]; + precompiles_helpers::bn254_curve_add(&p1, &p2, &mut p3); + params.p1.x.copy_from_slice(&p3[0..4]); + params.p1.y.copy_from_slice(&p3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/bn254_curve_dbl.rs b/ziskos/entrypoint/src/syscalls/bn254_curve_dbl.rs index c066f8ad8..01a339a7f 100644 --- a/ziskos/entrypoint/src/syscalls/bn254_curve_dbl.rs +++ b/ziskos/entrypoint/src/syscalls/bn254_curve_dbl.rs @@ -25,10 +25,24 @@ use super::point::SyscallPoint256; /// /// The resulting point will have both coordinates in the range of the BN254 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_bn254_curve_dbl(p1: &mut SyscallPoint256) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_bn254_curve_dbl")] +pub extern "C" fn syscall_bn254_curve_dbl( + p1: &mut SyscallPoint256, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x807, p1); + ziskos_syscall!(zisk_definitions::SYSCALL_BN254_CURVE_DBL_ID, p1); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let _p1 = [p1.x, p1.y].concat().try_into().unwrap(); + let mut p2: [u64; 8] = [0; 8]; + precompiles_helpers::bn254_curve_dbl(&_p1, &mut p2); + p1.x.copy_from_slice(&p2[0..4]); + p1.y.copy_from_slice(&p2[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p2); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/keccakf.rs b/ziskos/entrypoint/src/syscalls/keccakf.rs index a527739de..88e247c30 100644 --- a/ziskos/entrypoint/src/syscalls/keccakf.rs +++ b/ziskos/entrypoint/src/syscalls/keccakf.rs @@ -6,6 +6,9 @@ use core::arch::asm; #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] use crate::ziskos_syscall; +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +use tiny_keccak::keccakf; + /// Executes the Keccak256 permutation on the given state. /// /// The `Keccak` system call executes a CSR set on a custom port. When transpiling from RISC-V to Zisk, @@ -18,10 +21,23 @@ use crate::ziskos_syscall; /// /// The caller must ensure that the data is aligned to a 64-bit boundary. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_keccak_f(state: *mut [u64; 25]) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_keccak_f")] +pub unsafe extern "C" fn syscall_keccak_f( + state: *mut [u64; 25], + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x800, state); + ziskos_syscall!(zisk_definitions::SYSCALL_KECCAKF_ID, state); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + // Call keccakf + keccakf(unsafe { &mut *state }); + + // Store results in hints vector + #[cfg(feature = "hints")] + { + hints.extend_from_slice(unsafe { &*state }); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/mod.rs b/ziskos/entrypoint/src/syscalls/mod.rs index 4822af0c2..81a5efbcb 100644 --- a/ziskos/entrypoint/src/syscalls/mod.rs +++ b/ziskos/entrypoint/src/syscalls/mod.rs @@ -2,6 +2,7 @@ mod add256; mod arith256; mod arith256_mod; mod arith384_mod; +mod blake2br; mod bls12_381_complex_add; mod bls12_381_complex_mul; mod bls12_381_complex_sub; @@ -15,15 +16,18 @@ mod bn254_curve_dbl; mod complex; mod keccakf; mod point; +mod poseidon2; mod secp256k1_add; mod secp256k1_dbl; +mod secp256r1_add; +mod secp256r1_dbl; mod sha256f; -mod syscall; pub use add256::*; pub use arith256::*; pub use arith256_mod::*; pub use arith384_mod::*; +pub use blake2br::*; pub use bls12_381_complex_add::*; pub use bls12_381_complex_mul::*; pub use bls12_381_complex_sub::*; @@ -37,32 +41,49 @@ pub use bn254_curve_dbl::*; pub use complex::*; pub use keccakf::*; pub use point::*; +pub use poseidon2::*; pub use secp256k1_add::*; pub use secp256k1_dbl::*; +pub use secp256r1_add::*; +pub use secp256r1_dbl::*; pub use sha256f::*; -pub use syscall::*; #[macro_export] macro_rules! ziskos_syscall { - ($csr_addr:literal, $addr:expr) => {{ + ($csr_addr:expr, $addr:expr) => {{ unsafe { asm!( - concat!("csrs ", stringify!($csr_addr), ", {value}"), + concat!("csrs {port}, {value}"), + port = const $csr_addr, value = in(reg) $addr ); } }}; + ($csr_addr:expr, $arg0:expr, $arg1:expr, $arg2: expr) => {{ + unsafe { + asm!( + concat!("csrs {port}, {p0}"), + "add x0, {p1}, {p2}", + port = const $csr_addr, + p0 = in(reg) $arg0, // {0} + p1 = in(reg) $arg1, // {1} + p2 = in(reg) $arg2, // {2} + options(nostack) + ); + } + }}; } #[macro_export] macro_rules! ziskos_syscall_ret_u64 { - ($csr_addr:literal, $addr:expr) => {{ + ($csr_addr:expr, $addr:expr) => {{ let v: u64; unsafe { asm!( - concat!("csrrs {0}, ", stringify!($csr_addr), ", {1}"), - out(reg) v, - in(reg) $addr, + concat!("csrrs {rd}, {port}, {rs1}"), + port = const $csr_addr, + rd = out(reg) v, + rs1 = in(reg) $addr, options(nostack) ); } diff --git a/ziskos/entrypoint/src/syscalls/poseidon2.rs b/ziskos/entrypoint/src/syscalls/poseidon2.rs new file mode 100644 index 000000000..afa123196 --- /dev/null +++ b/ziskos/entrypoint/src/syscalls/poseidon2.rs @@ -0,0 +1,50 @@ +//! Poseidon2 system call interception + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use core::arch::asm; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use crate::ziskos_syscall; + +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +use fields::{poseidon2_hash, Goldilocks, Poseidon16, PrimeField64}; + +/// Executes the Poseidon2 permutation on the given state. +/// +/// The `Poseidon2` system call executes a CSR set on a custom port. When transpiling from RISC-V to Zisk, +/// this instruction is replaced with a precompiled operation—specifically, `Poseidon2`. +/// +/// The syscall takes as a parameter the address of a state data (1024 bits = 128 bytes) +/// and the result of the poseidon2 operation is stored at the same location +/// +/// ### Safety +/// +/// The caller must ensure that the data is aligned to a 64-bit boundary. +#[allow(unused_variables)] +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_poseidon2")] +pub unsafe extern "C" fn syscall_poseidon2( + state: *mut [u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) { + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + ziskos_syscall!(zisk_definitions::SYSCALL_POSEIDON2_ID, state); + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + // Get a mutable reference to the state + let state: &mut [u64; 16] = unsafe { &mut *(state) }; + + // Call poseidon2, mapping u64 to Goldilocks elements + let state_gl = state.map(Goldilocks::new); + let new_state_gl = poseidon2_hash::(&state_gl); + for (i, d) in state.iter_mut().enumerate() { + *d = new_state_gl[i].as_canonical_u64(); + } + + #[cfg(feature = "hints")] + { + // For hints, we store the new state in the hints vector + hints.extend_from_slice(state); + } + } +} diff --git a/ziskos/entrypoint/src/syscalls/secp256k1_add.rs b/ziskos/entrypoint/src/syscalls/secp256k1_add.rs index bdc4ba335..d11e3f007 100644 --- a/ziskos/entrypoint/src/syscalls/secp256k1_add.rs +++ b/ziskos/entrypoint/src/syscalls/secp256k1_add.rs @@ -33,10 +33,25 @@ pub struct SyscallSecp256k1AddParams<'a> { /// /// The resulting point will have both coordinates in the range of the Secp256k1 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_secp256k1_add(params: &mut SyscallSecp256k1AddParams) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_secp256k1_add")] +pub extern "C" fn syscall_secp256k1_add( + params: &mut SyscallSecp256k1AddParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x803, params); + ziskos_syscall!(zisk_definitions::SYSCALL_SECP256K1_ADD_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let p1 = [params.p1.x, params.p1.y].concat().try_into().unwrap(); + let p2 = [params.p2.x, params.p2.y].concat().try_into().unwrap(); + let mut p3: [u64; 8] = [0; 8]; + precompiles_helpers::secp256k1_add(&p1, &p2, &mut p3); + params.p1.x.copy_from_slice(&p3[0..4]); + params.p1.y.copy_from_slice(&p3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/secp256k1_dbl.rs b/ziskos/entrypoint/src/syscalls/secp256k1_dbl.rs index a5788f6b1..33ca29619 100644 --- a/ziskos/entrypoint/src/syscalls/secp256k1_dbl.rs +++ b/ziskos/entrypoint/src/syscalls/secp256k1_dbl.rs @@ -25,10 +25,24 @@ use super::point::SyscallPoint256; /// /// The resulting point will have both coordinates in the range of the Secp256k1 base field. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_secp256k1_dbl(p1: &mut SyscallPoint256) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_secp256k1_dbl")] +pub extern "C" fn syscall_secp256k1_dbl( + p1: &mut SyscallPoint256, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x804, p1); + ziskos_syscall!(zisk_definitions::SYSCALL_SECP256K1_DBL_ID, p1); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + let _p1 = [p1.x, p1.y].concat().try_into().unwrap(); + let mut p3: [u64; 8] = [0; 8]; + precompiles_helpers::secp256k1_dbl(&_p1, &mut p3); + p1.x.copy_from_slice(&p3[0..4]); + p1.y.copy_from_slice(&p3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p3); + } + } } diff --git a/ziskos/entrypoint/src/syscalls/secp256r1_add.rs b/ziskos/entrypoint/src/syscalls/secp256r1_add.rs new file mode 100644 index 000000000..fbee9615c --- /dev/null +++ b/ziskos/entrypoint/src/syscalls/secp256r1_add.rs @@ -0,0 +1,57 @@ +//! Secp256r1Add system call interception + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use crate::ziskos_syscall; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use core::arch::asm; + +use super::point::SyscallPoint256; + +#[derive(Debug)] +#[repr(C)] +pub struct SyscallSecp256r1AddParams<'a> { + pub p1: &'a mut SyscallPoint256, + pub p2: &'a SyscallPoint256, +} + +/// Performs the addition of two points on the Secp256r1 curve, storing the result in the first point. +/// +/// The `Secp256r1Add` system call executes a CSR set on a custom port. When transpiling from RISC-V to Zisk, +/// this instruction is replaced with a precompiled operation—specifically, `Secp256r1Add`. +/// +/// `Secp256r1Add` operates on two points, each with two coordinates of 256 bits. +/// Each coordinate is represented as an array of four `u64` elements. +/// The syscall takes as a parameter the address of a structure containing points `p1` and `p2`. +/// The result of the addition is stored in `p1`. +/// +/// ### Safety +/// +/// The caller must ensure that `p1` is a valid pointer to data that is aligned to an eight-byte boundary. +/// +/// The caller must ensure that both `p1` and `p2` coordinates are within the range of the Secp256r1 base field. +/// +/// The resulting point will have both coordinates in the range of the Secp256r1 base field. +#[allow(unused_variables)] +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_secp256r1_add")] +pub extern "C" fn syscall_secp256r1_add( + params: &mut SyscallSecp256r1AddParams, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + ziskos_syscall!(zisk_definitions::SYSCALL_SECP256R1_ADD_ID, params); + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let p1 = [params.p1.x, params.p1.y].concat().try_into().unwrap(); + let p2 = [params.p2.x, params.p2.y].concat().try_into().unwrap(); + let mut p3: [u64; 8] = [0; 8]; + precompiles_helpers::secp256r1_add(&p1, &p2, &mut p3); + params.p1.x.copy_from_slice(&p3[0..4]); + params.p1.y.copy_from_slice(&p3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p3); + } + } +} diff --git a/ziskos/entrypoint/src/syscalls/secp256r1_dbl.rs b/ziskos/entrypoint/src/syscalls/secp256r1_dbl.rs new file mode 100644 index 000000000..c20d2a703 --- /dev/null +++ b/ziskos/entrypoint/src/syscalls/secp256r1_dbl.rs @@ -0,0 +1,48 @@ +//! syscall_secp256r1_dbl system call interception + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use crate::ziskos_syscall; + +#[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] +use core::arch::asm; + +use super::point::SyscallPoint256; + +/// Executes the doubling of a point on the Secp256r1 curve. +/// +/// The `syscall_secp256r1_dbl` system call executes a CSR set on a custom port. When transpiling from RISC-V to Zisk, +/// this instruction is replaced with a precompiled operation—specifically, `Secp256r1Dbl`. +/// +/// `syscall_secp256r1_dbl` operates on a point with two coordinates, each consisting of 256 bits. +/// Each coordinate is represented as an array of four `u64` elements. The syscall takes as a parameter +/// the address of the point, and the result of the doubling operation is stored at the same location. +/// +/// ### Safety +/// +/// The caller must ensure that `p1` is a valid pointer to data that is aligned to an eight-byte boundary. +/// +/// The caller must ensure that `p1` coordinates are within the range of the Secp256r1 base field. +/// +/// The resulting point will have both coordinates in the range of the Secp256r1 base field. +#[allow(unused_variables)] +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_secp256r1_dbl")] +pub extern "C" fn syscall_secp256r1_dbl( + p1: &mut SyscallPoint256, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + ziskos_syscall!(zisk_definitions::SYSCALL_SECP256R1_DBL_ID, p1); + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let _p1 = [p1.x, p1.y].concat().try_into().unwrap(); + let mut p3: [u64; 8] = [0; 8]; + precompiles_helpers::secp256r1_dbl(&_p1, &mut p3); + p1.x.copy_from_slice(&p3[0..4]); + p1.y.copy_from_slice(&p3[4..8]); + #[cfg(feature = "hints")] + { + hints.extend_from_slice(&p3); + } + } +} diff --git a/ziskos/entrypoint/src/syscalls/sha256f.rs b/ziskos/entrypoint/src/syscalls/sha256f.rs index 57a2ad040..f51448a4c 100644 --- a/ziskos/entrypoint/src/syscalls/sha256f.rs +++ b/ziskos/entrypoint/src/syscalls/sha256f.rs @@ -6,6 +6,13 @@ use core::arch::asm; #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] use crate::ziskos_syscall; +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +use sha2::compress256; + +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +#[allow(deprecated)] +use sha2::digest::generic_array::{typenum::U64, GenericArray}; + #[derive(Debug)] #[repr(C)] pub struct SyscallSha256Params<'a> { @@ -26,10 +33,30 @@ pub struct SyscallSha256Params<'a> { /// /// The caller must ensure that the data is aligned to a 64-bit boundary. #[allow(unused_variables)] -#[no_mangle] -pub extern "C" fn syscall_sha256_f(params: &mut SyscallSha256Params) { +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_syscall_sha256_f")] +pub extern "C" fn syscall_sha256_f( + params: &mut SyscallSha256Params, + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - ziskos_syscall!(0x805, params); + ziskos_syscall!(zisk_definitions::SYSCALL_SHA256F_ID, params); #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!() + { + sha256f(params.state, params.input); + + #[cfg(feature = "hints")] + { + hints.extend_from_slice(params.state); + } + } +} + +#[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] +#[allow(deprecated)] +fn sha256f(state: &mut [u64; 4], input: &[u64; 8]) { + let state_u32: &mut [u32; 8] = unsafe { &mut *(state.as_mut_ptr() as *mut [u32; 8]) }; + let input_u8: &[GenericArray; 1] = + unsafe { &*(input.as_ptr() as *const [GenericArray; 1]) }; + compress256(state_u32, input_u8); } diff --git a/ziskos/entrypoint/src/zisklib/fcalls/big_int256_div.rs b/ziskos/entrypoint/src/zisklib/fcalls/big_int256_div.rs index 56fba88df..976529ffd 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls/big_int256_div.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls/big_int256_div.rs @@ -1,10 +1,16 @@ -//! fcall_bigint256_div free call use cfg_if::cfg_if; + cfg_if! { if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; + use crate::{ziskos_fcall, ziskos_fcall_param}; use super::FCALL_BIG_INT256_DIV_ID; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::big_int256_div::big_int256_div; } } @@ -22,17 +28,45 @@ cfg_if! { /// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness /// of the result. It is the caller's responsibility to ensure it. #[allow(unused_variables)] -pub fn fcall_bigint256_div(a_value: &[u64; 4], b_value: &[u64; 4]) -> ([u64; 4], [u64; 4]) { +pub fn fcall_bigint256_div( + a_value: &[u64; 4], + b_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 4], [u64; 4]) { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let (quotient, remainder) = big_int256_div(a_value, b_value); + #[cfg(feature = "hints")] + { + hints.push(8); + hints.extend_from_slice("ient); + hints.extend_from_slice(&remainder); + } + + (quotient, remainder) + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { ziskos_fcall_param!(a_value, 4); ziskos_fcall_param!(b_value, 4); ziskos_fcall!(FCALL_BIG_INT256_DIV_ID); - ( - [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()], - [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()], - ) + #[cfg(not(feature = "inputcpy"))] + { + ( + [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()], + [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()], + ) + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + // TODO: generate an [u64;8] and after return 2 slides + let mut quotient: MaybeUninit<[u64; 4]> = MaybeUninit::uninit(); + ziskos_inputcpy!(quotient, 32); + + let mut remainder: MaybeUninit<[u64; 4]> = MaybeUninit::uninit(); + ziskos_inputcpy!(remainder, 32); + (unsafe { quotient.assume_init() }, unsafe { remainder.assume_init() }) + } } } diff --git a/ziskos/entrypoint/src/zisklib/fcalls/big_int_div.rs b/ziskos/entrypoint/src/zisklib/fcalls/big_int_div.rs index a289ca1dd..9c9f4865d 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls/big_int_div.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls/big_int_div.rs @@ -1,10 +1,14 @@ -//! fcall_division free call use cfg_if::cfg_if; + cfg_if! { if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { use core::arch::asm; use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; use super::FCALL_BIG_INT_DIV_ID; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::big_int_div::big_int_div_into; } } @@ -22,9 +26,28 @@ pub fn fcall_division( b_value: &[u64], quo: &mut [u64], rem: &mut [u64], + #[cfg(feature = "hints")] hints: &mut Vec, ) -> (usize, usize) { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let mut quo_vector: Vec = Vec::new(); + let mut rem_vector: Vec = Vec::new(); + big_int_div_into(a_value, b_value, &mut quo_vector, &mut rem_vector); + quo[..quo_vector.len()].copy_from_slice(&quo_vector); + rem[..rem_vector.len()].copy_from_slice(&rem_vector); + let len_quo = quo_vector.len(); + let len_rem = rem_vector.len(); + #[cfg(feature = "hints")] + { + hints.push(len_quo as u64 + len_rem as u64 + 2); + hints.push(len_quo as u64); + hints.extend_from_slice(&quo_vector); + hints.push(len_rem as u64); + hints.extend_from_slice(&rem_vector); + } + + (len_quo, len_rem) + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { let len_a = a_value.len() as usize; @@ -41,16 +64,28 @@ pub fn fcall_division( ziskos_fcall!(FCALL_BIG_INT_DIV_ID); - let len_quo = ziskos_fcall_get() as usize; - for i in 0..len_quo { - quo[i] = ziskos_fcall_get(); - } + #[cfg(not(feature = "inputcpy"))] + { + let len_quo = ziskos_fcall_get() as usize; + for i in 0..len_quo { + quo[i] = ziskos_fcall_get(); + } + + let len_rem = ziskos_fcall_get() as usize; + for i in 0..len_rem { + rem[i] = ziskos_fcall_get(); + } - let len_rem = ziskos_fcall_get() as usize; - for i in 0..len_rem { - rem[i] = ziskos_fcall_get(); + (len_quo, len_rem) } + #[cfg(feature = "inputcpy")] + { + let len_quo = ziskos_fcall_get() as usize; + ziskos_inputcpy!(quo, len_quo * 8); + let len_rem = ziskos_fcall_get() as usize; + ziskos_inputcpy!(rem, len_rem * 8); - (len_quo, len_rem) + (len_quo, len_rem) + } } } diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bin_decomp.rs b/ziskos/entrypoint/src/zisklib/fcalls/bin_decomp.rs index f506e4491..5be46c9aa 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls/bin_decomp.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls/bin_decomp.rs @@ -1,17 +1,38 @@ use cfg_if::cfg_if; + cfg_if! { if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { use core::arch::asm; use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; use super::FCALL_BIN_DECOMP_ID; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::bin_decomp::bin_decomp; } } /// Computes the binary decomposition of a NON-ZERO unsigned integer `x` into its bits. #[allow(unused_variables)] -pub fn fcall_bin_decomp(x_val: &[u64]) -> (usize, Vec) { +pub fn fcall_bin_decomp( + x_val: &[u64], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> (usize, Vec) { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let len_x = x_val.len(); + let bits = bin_decomp(x_val, len_x); + let len_bits = bits.len(); + let bits_u64: Vec = bits.into_iter().map(|b| b as u64).collect(); + #[cfg(feature = "hints")] + { + hints.push(len_bits as u64 + 1); + hints.push(len_bits as u64); + hints.extend_from_slice(&bits_u64); + } + + (len_bits, bits_u64) + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { let len_x = x_val.len() as usize; @@ -23,11 +44,23 @@ pub fn fcall_bin_decomp(x_val: &[u64]) -> (usize, Vec) { ziskos_fcall!(FCALL_BIN_DECOMP_ID); let len_bits = ziskos_fcall_get() as usize; - let mut bits = vec![0u64; len_bits]; - for i in 0..len_bits { - bits[i] = ziskos_fcall_get(); - } + #[cfg(not(feature = "inputcpy"))] + { + let mut bits = vec![0u64; len_bits]; + for i in 0..len_bits { + bits[i] = ziskos_fcall_get(); + } - (len_bits, bits) + (len_bits, bits) + } + #[cfg(feature = "inputcpy")] + { + let mut bits: Vec = Vec::with_capacity(len_bits); + ziskos_inputcpy!(bits, len_bits * 8); + unsafe { + bits.set_len(len_bits); + } + (len_bits, bits) + } } } diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/fp.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/fp.rs new file mode 100644 index 000000000..4c58544a2 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/fp.rs @@ -0,0 +1,125 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ + ziskos_fcall, ziskos_fcall_param, + zisklib::{FCALL_BLS12_381_FP_INV_ID, FCALL_BLS12_381_FP_SQRT_ID} + }; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::bls12_381::{bls12_381_fp_inv, bls12_381_fp_sqrt}; + } +} + +/// Executes the multiplicative inverse computation over the base field of the `bls12_381` curve. +/// +/// `fcall_bls12_381_fp_inv` performs an inversion of a 256-bit field element, +/// represented as an array of four `u64` values. +/// +/// - `fcall_bls12_381_fp_inv` performs the inversion and **returns the result directly**. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bls12_381_fp_inv( + p_value: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 6] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let result: [u64; 6] = bls12_381_fp_inv(p_value); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 8); + ziskos_fcall!(FCALL_BLS12_381_FP_INV_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut result: MaybeUninit<[u64; 6]> = MaybeUninit::uninit(); + ziskos_inputcpy!(result, 48); + unsafe { result.assume_init() } + } + } +} + +/// Executes the multiplicative inverse computation over the base field of the `bls12_381` curve. +/// +/// `fcall_bls12_381_fp_sqrt` performs an inversion of a 256-bit field element, +/// represented as an array of four `u64` values. +/// +/// - `fcall_bls12_381_fp_sqrt` performs the inversion and **returns the result directly**. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bls12_381_fp_sqrt( + p_value: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 7] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let mut result: [u64; 7] = [0; 7]; + bls12_381_fp_sqrt(p_value, &mut result); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 8); + ziskos_fcall!(FCALL_BLS12_381_FP_SQRT_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), // results[0] - indicates if a sqrt exists (1) or not (0) + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut result: MaybeUninit<[u64; 7]> = MaybeUninit::uninit(); + ziskos_inputcpy!(result, 56); + unsafe { result.assume_init() } + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/fp2.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/fp2.rs new file mode 100644 index 000000000..b82524b3b --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/fp2.rs @@ -0,0 +1,137 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ + ziskos_fcall, ziskos_fcall_param, + zisklib::{FCALL_BLS12_381_FP2_INV_ID, FCALL_BLS12_381_FP2_SQRT_ID} + }; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::bls12_381::{bls12_381_fp2_inv, bls12_381_fp2_sqrt_13}; + } + +} + +/// Executes the multiplicative inverse computation over the complex extension field of the `bls12_381` curve. +/// +/// `fcall_bls12_381_fp2_inv` performs an inversion of a 512-bit extension field element, +/// represented as an array of eight `u64` values. +/// +/// - `fcall_bls12_381_fp2_inv` performs the inversion and **returns the result directly**. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bls12_381_fp2_inv( + p_value: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let result: [u64; 12] = bls12_381_fp2_inv(p_value); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 12); + ziskos_fcall!(FCALL_BLS12_381_FP2_INV_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut res: MaybeUninit<[u64; 12]> = MaybeUninit::uninit(); + ziskos_inputcpy!(res, 96); + unsafe { res.assume_init() } + } + } +} + +/// Executes the multiplicative inverse computation over the base field of the `bls12_381` curve. +/// +/// `fcall_bls12_381_fp2_sqrt` performs an inversion of a 256-bit field element, +/// represented as an array of four `u64` values. +/// +/// - `fcall_bls12_381_fp2_sqrt` performs the inversion and **returns the result directly**. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bls12_381_fp2_sqrt( + p_value: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 13] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let result: [u64; 13] = bls12_381_fp2_sqrt_13(p_value); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 16); + ziskos_fcall!(FCALL_BLS12_381_FP2_SQRT_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), // results[0] - indicates if a sqrt exists (1) or not (0) + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut res: MaybeUninit<[u64; 13]> = MaybeUninit::uninit(); + ziskos_inputcpy!(res, 104); + unsafe { res.assume_init() } + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/mod.rs new file mode 100644 index 000000000..a494177c3 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/mod.rs @@ -0,0 +1,7 @@ +mod fp; +mod fp2; +mod twist; + +pub use fp::*; +pub use fp2::*; +pub use twist::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/twist.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/twist.rs new file mode 100644 index 000000000..6edf3e2a6 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381/twist.rs @@ -0,0 +1,173 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ + ziskos_fcall, ziskos_fcall_param, + zisklib::{FCALL_BLS12_381_TWIST_ADD_LINE_COEFFS_ID, FCALL_BLS12_381_TWIST_DBL_LINE_COEFFS_ID}, + }; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::bls12_381::{bls12_381_twist_add_line_coeffs, bls12_381_twist_dbl_line_coeffs}; + } +} + +/// Computes the coefficients for the line defining the addition of two points on the `bls12_381` twist. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bls12_381_twist_add_line_coeffs( + p1_value: &[u64; 24], + p2_value: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 12], [u64; 12]) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let x1: [u64; 12] = p1_value[0..12].try_into().unwrap(); + let y1: [u64; 12] = p1_value[12..24].try_into().unwrap(); + let x2: [u64; 12] = p2_value[0..12].try_into().unwrap(); + let y2: [u64; 12] = p2_value[12..24].try_into().unwrap(); + let (lambda, mu): ([u64; 12], [u64; 12]) = + bls12_381_twist_add_line_coeffs(&x1, &y1, &x2, &y2); + #[cfg(feature = "hints")] + { + hints.push(24); + hints.extend_from_slice(&lambda); + hints.extend_from_slice(&mu); + } + + (lambda, mu) + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p1_value, 24); + ziskos_fcall_param!(p2_value, 24); + ziskos_fcall!(FCALL_BLS12_381_TWIST_ADD_LINE_COEFFS_ID); + #[cfg(not(feature = "inputcpy"))] + { + ( + [ + ziskos_fcall_get(), // 0 + ziskos_fcall_get(), // 1 + ziskos_fcall_get(), // 2 + ziskos_fcall_get(), // 3 + ziskos_fcall_get(), // 4 + ziskos_fcall_get(), // 5 + ziskos_fcall_get(), // 6 + ziskos_fcall_get(), // 7 + ziskos_fcall_get(), // 8 + ziskos_fcall_get(), // 9 + ziskos_fcall_get(), // 10 + ziskos_fcall_get(), // 11 + ], + [ + ziskos_fcall_get(), // 0 + ziskos_fcall_get(), // 1 + ziskos_fcall_get(), // 2 + ziskos_fcall_get(), // 3 + ziskos_fcall_get(), // 4 + ziskos_fcall_get(), // 5 + ziskos_fcall_get(), // 6 + ziskos_fcall_get(), // 7 + ziskos_fcall_get(), // 8 + ziskos_fcall_get(), // 9 + ziskos_fcall_get(), // 10 + ziskos_fcall_get(), // 11 + ], + ) + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut lambda: MaybeUninit<[u64; 12]> = MaybeUninit::uninit(); + ziskos_inputcpy!(lambda, 12 * 8); + let mut mu: MaybeUninit<[u64; 12]> = MaybeUninit::uninit(); + ziskos_inputcpy!(mu, 12 * 8); + (unsafe { lambda.assume_init() }, unsafe { mu.assume_init() }) + } + } +} + +/// Computes the coefficients for the line defining the doubling of a point on the `bls12_381` twist. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bls12_381_twist_dbl_line_coeffs( + p_value: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 12], [u64; 12]) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let x: [u64; 12] = p_value[0..12].try_into().unwrap(); + let y: [u64; 12] = p_value[12..24].try_into().unwrap(); + let (lambda, mu): ([u64; 12], [u64; 12]) = bls12_381_twist_dbl_line_coeffs(&x, &y); + #[cfg(feature = "hints")] + { + hints.push(24); + hints.extend_from_slice(&lambda); + hints.extend_from_slice(&mu); + } + (lambda, mu) + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 24); + ziskos_fcall!(FCALL_BLS12_381_TWIST_DBL_LINE_COEFFS_ID); + #[cfg(not(feature = "inputcpy"))] + { + ( + [ + ziskos_fcall_get(), // 0 + ziskos_fcall_get(), // 1 + ziskos_fcall_get(), // 2 + ziskos_fcall_get(), // 3 + ziskos_fcall_get(), // 4 + ziskos_fcall_get(), // 5 + ziskos_fcall_get(), // 6 + ziskos_fcall_get(), // 7 + ziskos_fcall_get(), // 8 + ziskos_fcall_get(), // 9 + ziskos_fcall_get(), // 10 + ziskos_fcall_get(), // 11 + ], + [ + ziskos_fcall_get(), // 0 + ziskos_fcall_get(), // 1 + ziskos_fcall_get(), // 2 + ziskos_fcall_get(), // 3 + ziskos_fcall_get(), // 4 + ziskos_fcall_get(), // 5 + ziskos_fcall_get(), // 6 + ziskos_fcall_get(), // 7 + ziskos_fcall_get(), // 8 + ziskos_fcall_get(), // 9 + ziskos_fcall_get(), // 10 + ziskos_fcall_get(), // 11 + ], + ) + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut lambda: MaybeUninit<[u64; 12]> = MaybeUninit::uninit(); + ziskos_inputcpy!(lambda, 12 * 8); + let mut mu: MaybeUninit<[u64; 12]> = MaybeUninit::uninit(); + ziskos_inputcpy!(mu, 12 * 8); + (unsafe { lambda.assume_init() }, unsafe { mu.assume_init() }) + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp2_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp2_inv.rs deleted file mode 100644 index 8cf9cf8b9..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp2_inv.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! fcall_bls12_381_fp2_inv free call -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_BLS12_381_FP2_INV_ID; - } -} - -/// Executes the multiplicative inverse computation over the complex extension field of the `bls12_381` curve. -/// -/// `fcall_bls12_381_fp2_inv` performs an inversion of a 512-bit extension field element, -/// represented as an array of eight `u64` values. -/// -/// - `fcall_bls12_381_fp2_inv` performs the inversion and **returns the result directly**. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bls12_381_fp2_inv(p_value: &[u64; 12]) -> [u64; 12] { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 12); - ziskos_fcall!(FCALL_BLS12_381_FP2_INV_ID); - [ - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ] - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp_inv.rs deleted file mode 100644 index 9f70241f1..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp_inv.rs +++ /dev/null @@ -1,41 +0,0 @@ -//! fcall_bls12_381_fp_inv free call -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_BLS12_381_FP_INV_ID; - } -} - -/// Executes the multiplicative inverse computation over the base field of the `bls12_381` curve. -/// -/// `fcall_bls12_381_fp_inv` performs an inversion of a 256-bit field element, -/// represented as an array of four `u64` values. -/// -/// - `fcall_bls12_381_fp_inv` performs the inversion and **returns the result directly**. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bls12_381_fp_inv(p_value: &[u64; 6]) -> [u64; 6] { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 8); - ziskos_fcall!(FCALL_BLS12_381_FP_INV_ID); - [ - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ] - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp_sqrt.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp_sqrt.rs deleted file mode 100644 index 8c3059895..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_fp_sqrt.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! fcall_bls12_381_fp_sqrt free call -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_BLS12_381_FP_SQRT_ID; - } -} - -/// Executes the multiplicative inverse computation over the base field of the `bls12_381` curve. -/// -/// `fcall_bls12_381_fp_sqrt` performs an inversion of a 256-bit field element, -/// represented as an array of four `u64` values. -/// -/// - `fcall_bls12_381_fp_sqrt` performs the inversion and **returns the result directly**. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bls12_381_fp_sqrt(p_value: &[u64; 6]) -> [u64; 7] { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 8); - ziskos_fcall!(FCALL_BLS12_381_FP_SQRT_ID); - [ - ziskos_fcall_get(), // results[0] - indicates if a sqrt exists (1) or not (0) - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ] - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_twist.rs b/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_twist.rs deleted file mode 100644 index 83dd00247..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/bls12_381_twist.rs +++ /dev/null @@ -1,110 +0,0 @@ -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::{FCALL_BLS12_381_TWIST_ADD_LINE_COEFFS_ID, FCALL_BLS12_381_TWIST_DBL_LINE_COEFFS_ID}; - } -} - -/// Computes the coefficients for the line defining the addition of two points on the `bls12_381` twist. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bls12_381_add_line_coeffs( - p1_value: &[u64; 24], - p2_value: &[u64; 24], -) -> ([u64; 12], [u64; 12]) { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p1_value, 24); - ziskos_fcall_param!(p2_value, 24); - ziskos_fcall!(FCALL_BLS12_381_TWIST_ADD_LINE_COEFFS_ID); - ( - [ - ziskos_fcall_get(), // 0 - ziskos_fcall_get(), // 1 - ziskos_fcall_get(), // 2 - ziskos_fcall_get(), // 3 - ziskos_fcall_get(), // 4 - ziskos_fcall_get(), // 5 - ziskos_fcall_get(), // 6 - ziskos_fcall_get(), // 7 - ziskos_fcall_get(), // 8 - ziskos_fcall_get(), // 9 - ziskos_fcall_get(), // 10 - ziskos_fcall_get(), // 11 - ], - [ - ziskos_fcall_get(), // 0 - ziskos_fcall_get(), // 1 - ziskos_fcall_get(), // 2 - ziskos_fcall_get(), // 3 - ziskos_fcall_get(), // 4 - ziskos_fcall_get(), // 5 - ziskos_fcall_get(), // 6 - ziskos_fcall_get(), // 7 - ziskos_fcall_get(), // 8 - ziskos_fcall_get(), // 9 - ziskos_fcall_get(), // 10 - ziskos_fcall_get(), // 11 - ], - ) - } -} - -/// Computes the coefficients for the line defining the doubling of a point on the `bls12_381` twist. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bls12_381_dbl_line_coeffs(p_value: &[u64; 24]) -> ([u64; 12], [u64; 12]) { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 24); - ziskos_fcall!(FCALL_BLS12_381_TWIST_DBL_LINE_COEFFS_ID); - ( - [ - ziskos_fcall_get(), // 0 - ziskos_fcall_get(), // 1 - ziskos_fcall_get(), // 2 - ziskos_fcall_get(), // 3 - ziskos_fcall_get(), // 4 - ziskos_fcall_get(), // 5 - ziskos_fcall_get(), // 6 - ziskos_fcall_get(), // 7 - ziskos_fcall_get(), // 8 - ziskos_fcall_get(), // 9 - ziskos_fcall_get(), // 10 - ziskos_fcall_get(), // 11 - ], - [ - ziskos_fcall_get(), // 0 - ziskos_fcall_get(), // 1 - ziskos_fcall_get(), // 2 - ziskos_fcall_get(), // 3 - ziskos_fcall_get(), // 4 - ziskos_fcall_get(), // 5 - ziskos_fcall_get(), // 6 - ziskos_fcall_get(), // 7 - ziskos_fcall_get(), // 8 - ziskos_fcall_get(), // 9 - ziskos_fcall_get(), // 10 - ziskos_fcall_get(), // 11 - ], - ) - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bn254/fp.rs b/ziskos/entrypoint/src/zisklib/fcalls/bn254/fp.rs new file mode 100644 index 000000000..596ee11e5 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bn254/fp.rs @@ -0,0 +1,60 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ziskos_fcall, ziskos_fcall_param, zisklib::FCALL_BN254_FP_INV_ID}; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::bn254::bn254_fp_inv; + } +} + +/// Executes the multiplicative inverse computation over the base field of the `bn254` curve. +/// +/// `fcall_bn254_fp_inv` performs an inversion of a 256-bit field element, +/// represented as an array of four `u64` values. +/// +/// - `fcall_bn254_fp_inv` performs the inversion and **returns the result directly**. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bn254_fp_inv( + p_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let result: [u64; 4] = bn254_fp_inv(p_value); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 4); + ziskos_fcall!(FCALL_BN254_FP_INV_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut result: MaybeUninit<[u64; 4]> = MaybeUninit::uninit(); + ziskos_inputcpy!(result, 4 * 8); + unsafe { result.assume_init() } + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bn254/fp2.rs b/ziskos/entrypoint/src/zisklib/fcalls/bn254/fp2.rs new file mode 100644 index 000000000..d5512cbcb --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bn254/fp2.rs @@ -0,0 +1,69 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ziskos_fcall, ziskos_fcall_param, zisklib::FCALL_BN254_FP2_INV_ID}; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::bn254::bn254_fp2_inv; + } +} + +/// Executes the multiplicative inverse computation over the complex extension field of the `bn254` curve. +/// +/// `fcall_bn254_fp2_inv` performs an inversion of a 512-bit extension field element, +/// represented as an array of eight `u64` values. +/// +/// - `fcall_bn254_fp2_inv` performs the inversion and **returns the result directly**. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bn254_fp2_inv( + p_value: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let result: [u64; 8] = bn254_fp2_inv(p_value); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 8); + ziskos_fcall!(FCALL_BN254_FP2_INV_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut result: MaybeUninit<[u64; 8]> = MaybeUninit::uninit(); + ziskos_inputcpy!(result, 8 * 8); + unsafe { result.assume_init() } + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bn254/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls/bn254/mod.rs new file mode 100644 index 000000000..a494177c3 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bn254/mod.rs @@ -0,0 +1,7 @@ +mod fp; +mod fp2; +mod twist; + +pub use fp::*; +pub use fp2::*; +pub use twist::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bn254/twist.rs b/ziskos/entrypoint/src/zisklib/fcalls/bn254/twist.rs new file mode 100644 index 000000000..fb2dd065a --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/bn254/twist.rs @@ -0,0 +1,156 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ + ziskos_fcall, ziskos_fcall_param, + zisklib::{FCALL_BN254_TWIST_ADD_LINE_COEFFS_ID, FCALL_BN254_TWIST_DBL_LINE_COEFFS_ID} + }; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use crate::zisklib::fcalls_impl::bn254::{bn254_twist_add_line_coeffs, bn254_twist_dbl_line_coeffs}; + } + +} + +/// Computes the coefficients for the line defining the addition of two points on the `bn254` twist. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bn254_twist_add_line_coeffs( + p1_value: &[u64; 16], + p2_value: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 8], [u64; 8]) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let x1: [u64; 8] = p1_value[0..8].try_into().unwrap(); + let y1: [u64; 8] = p1_value[8..16].try_into().unwrap(); + let x2: [u64; 8] = p2_value[0..8].try_into().unwrap(); + let y2: [u64; 8] = p2_value[8..16].try_into().unwrap(); + let (lambda, mu): ([u64; 8], [u64; 8]) = bn254_twist_add_line_coeffs(&x1, &y1, &x2, &y2); + #[cfg(feature = "hints")] + { + hints.push(16); + hints.extend_from_slice(&lambda); + hints.extend_from_slice(&mu); + } + (lambda, mu) + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p1_value, 16); + ziskos_fcall_param!(p2_value, 16); + ziskos_fcall!(FCALL_BN254_TWIST_ADD_LINE_COEFFS_ID); + #[cfg(not(feature = "inputcpy"))] + { + ( + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ], + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ], + ) + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut lambda: MaybeUninit<[u64; 8]> = MaybeUninit::uninit(); + ziskos_inputcpy!(lambda, 8 * 8); + let mut mu: MaybeUninit<[u64; 8]> = MaybeUninit::uninit(); + ziskos_inputcpy!(mu, 8 * 8); + unsafe { (lambda.assume_init(), mu.assume_init()) } + } + } +} + +/// Computes the coefficients for the line defining the doubling of a point on the `bn254` twist. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_bn254_twist_dbl_line_coeffs( + p_value: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 8], [u64; 8]) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let x1: [u64; 8] = p_value[0..8].try_into().unwrap(); + let y1: [u64; 8] = p_value[8..16].try_into().unwrap(); + let (lambda, mu): ([u64; 8], [u64; 8]) = bn254_twist_dbl_line_coeffs(&x1, &y1); + #[cfg(feature = "hints")] + { + hints.push(16); + hints.extend_from_slice(&lambda); + hints.extend_from_slice(&mu); + } + (lambda, mu) + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 16); + ziskos_fcall!(FCALL_BN254_TWIST_DBL_LINE_COEFFS_ID); + #[cfg(not(feature = "inputcpy"))] + { + ( + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ], + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ], + ) + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut lambda: MaybeUninit<[u64; 8]> = MaybeUninit::uninit(); + ziskos_inputcpy!(lambda, 8 * 8); + let mut mu: MaybeUninit<[u64; 8]> = MaybeUninit::uninit(); + ziskos_inputcpy!(mu, 8 * 8); + unsafe { (lambda.assume_init(), mu.assume_init()) } + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bn254_fp.rs b/ziskos/entrypoint/src/zisklib/fcalls/bn254_fp.rs deleted file mode 100644 index c34957e5f..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/bn254_fp.rs +++ /dev/null @@ -1,34 +0,0 @@ -//! fcall_bn254_fp_inv free call -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_BN254_FP_INV_ID; - } -} - -/// Executes the multiplicative inverse computation over the base field of the `bn254` curve. -/// -/// `fcall_bn254_fp_inv` performs an inversion of a 256-bit field element, -/// represented as an array of four `u64` values. -/// -/// - `fcall_bn254_fp_inv` performs the inversion and **returns the result directly**. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bn254_fp_inv(p_value: &[u64; 4]) -> [u64; 4] { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 4); - ziskos_fcall!(FCALL_BN254_FP_INV_ID); - [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()] - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bn254_fp2.rs b/ziskos/entrypoint/src/zisklib/fcalls/bn254_fp2.rs deleted file mode 100644 index 5d27436ec..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/bn254_fp2.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! fcall_bn254_fp2_inv free call -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_BN254_FP2_INV_ID; - } -} - -/// Executes the multiplicative inverse computation over the complex extension field of the `bn254` curve. -/// -/// `fcall_bn254_fp2_inv` performs an inversion of a 512-bit extension field element, -/// represented as an array of eight `u64` values. -/// -/// - `fcall_bn254_fp2_inv` performs the inversion and **returns the result directly**. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bn254_fp2_inv(p_value: &[u64; 8]) -> [u64; 8] { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 8); - ziskos_fcall!(FCALL_BN254_FP2_INV_ID); - [ - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ] - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/bn254_twist.rs b/ziskos/entrypoint/src/zisklib/fcalls/bn254_twist.rs deleted file mode 100644 index 756e3976b..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/bn254_twist.rs +++ /dev/null @@ -1,94 +0,0 @@ -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::{FCALL_BN254_TWIST_ADD_LINE_COEFFS_ID, FCALL_BN254_TWIST_DBL_LINE_COEFFS_ID}; - } -} - -/// Computes the coefficients for the line defining the addition of two points on the `bn254` twist. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bn254_add_line_coeffs( - p1_value: &[u64; 16], - p2_value: &[u64; 16], -) -> ([u64; 8], [u64; 8]) { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p1_value, 16); - ziskos_fcall_param!(p2_value, 16); - ziskos_fcall!(FCALL_BN254_TWIST_ADD_LINE_COEFFS_ID); - ( - [ - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ], - [ - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ], - ) - } -} - -/// Computes the coefficients for the line defining the doubling of a point on the `bn254` twist. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_bn254_dbl_line_coeffs(p_value: &[u64; 16]) -> ([u64; 8], [u64; 8]) { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 16); - ziskos_fcall!(FCALL_BN254_TWIST_DBL_LINE_COEFFS_ID); - ( - [ - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ], - [ - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ], - ) - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/input.rs b/ziskos/entrypoint/src/zisklib/fcalls/input.rs new file mode 100644 index 000000000..0a25488a1 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/input.rs @@ -0,0 +1,23 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; + use super::FCALL_INPUT_READY_ID; + } +} + +#[allow(unused_variables)] +pub fn fcall_input_ready(address: &u64) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + // TODO: wait for input to be ready at the given address, then check the input length vs. address and return an error if the input is not long enough. For now, we just return immediately. + unimplemented!("fcall_input_ready is not implemented for non-zisk targets"); + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(*address, 1); // Number of inputs + ziskos_fcall!(FCALL_INPUT_READY_ID); + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls/mod.rs index 616b890ec..769a702f7 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls/mod.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls/mod.rs @@ -18,35 +18,29 @@ pub const FCALL_MSB_POS_384_ID: u16 = 15; pub const FCALL_BIG_INT256_DIV_ID: u16 = 16; pub const FCALL_BIG_INT_DIV_ID: u16 = 17; pub const FCALL_BIN_DECOMP_ID: u16 = 18; +pub const FCALL_BLS12_381_FP2_SQRT_ID: u16 = 19; +pub const FCALL_SECP256K1_ECDSA_VERIFY_ID: u16 = 20; +pub const FCALL_SECP256R1_ECDSA_VERIFY_ID: u16 = 21; +pub const FCALL_INPUT_READY_ID: u16 = 22; mod big_int256_div; mod big_int_div; mod bin_decomp; -mod bls12_381_fp2_inv; -mod bls12_381_fp_inv; -mod bls12_381_fp_sqrt; -mod bls12_381_twist; -mod bn254_fp; -mod bn254_fp2; -mod bn254_twist; +mod bls12_381; +mod bn254; +mod input; mod msb_pos_256; mod msb_pos_384; -mod secp256k1_fn_inv; -mod secp256k1_fp_inv; -mod secp256k1_fp_sqrt; +mod secp256k1; +mod secp256r1; pub use big_int256_div::*; pub use big_int_div::*; pub use bin_decomp::*; -pub use bls12_381_fp2_inv::*; -pub use bls12_381_fp_inv::*; -pub use bls12_381_fp_sqrt::*; -pub use bls12_381_twist::*; -pub use bn254_fp::*; -pub use bn254_fp2::*; -pub use bn254_twist::*; +pub use bls12_381::*; +pub use bn254::*; +pub use input::*; pub use msb_pos_256::*; pub use msb_pos_384::*; -pub use secp256k1_fn_inv::*; -pub use secp256k1_fp_inv::*; -pub use secp256k1_fp_sqrt::*; +pub use secp256k1::*; +pub use secp256r1::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_256.rs b/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_256.rs index 8af583b12..2cf69da19 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_256.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_256.rs @@ -1,19 +1,69 @@ use cfg_if::cfg_if; + cfg_if! { if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { use core::arch::asm; use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; use super::FCALL_MSB_POS_256_ID; + } else { + use crate::zisklib::fcalls_impl::msb_pos_256::msb_pos_256; } } + #[allow(unused_variables)] -pub fn fcall_msb_pos_256(x: &[u64; 4], y: &[u64; 4]) -> (u64, u64) { +pub fn fcall_msb_pos_256( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> (u64, u64) { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let tmp: [u64; 8] = [x[0], x[1], x[2], x[3], y[0], y[1], y[2], y[3]]; + let (i, pos) = msb_pos_256(&tmp, 2); + #[cfg(feature = "hints")] + { + hints.push(2); + hints.push(i as u64); + hints.push(pos as u64); + } + (i as u64, pos as u64) + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(2, 1); // Number of inputs + ziskos_fcall_param!(x, 4); + ziskos_fcall_param!(y, 4); + ziskos_fcall!(FCALL_MSB_POS_256_ID); + (ziskos_fcall_get(), ziskos_fcall_get()) + } +} + +#[allow(unused_variables)] +pub fn fcall_msb_pos_256_3( + x: &[u64; 4], + y: &[u64; 4], + z: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> (u64, u64) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let tmp: [u64; 12] = + [x[0], x[1], x[2], x[3], y[0], y[1], y[2], y[3], z[0], z[1], z[2], z[3]]; + let (i, pos) = msb_pos_256(&tmp, 3); + #[cfg(feature = "hints")] + { + hints.push(2); + hints.push(i as u64); + hints.push(pos as u64); + } + (i as u64, pos as u64) + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + ziskos_fcall_param!(3, 1); // Number of inputs ziskos_fcall_param!(x, 4); ziskos_fcall_param!(y, 4); + ziskos_fcall_param!(z, 4); ziskos_fcall!(FCALL_MSB_POS_256_ID); (ziskos_fcall_get(), ziskos_fcall_get()) } diff --git a/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_384.rs b/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_384.rs index 629063c9f..f370b4627 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_384.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls/msb_pos_384.rs @@ -1,15 +1,32 @@ use cfg_if::cfg_if; + cfg_if! { if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { use core::arch::asm; use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; use super::FCALL_MSB_POS_384_ID; + } else { + use crate::zisklib::fcalls_impl::msb_pos_384::msb_pos_384; } } + #[allow(unused_variables)] -pub fn fcall_msb_pos_384(x: &[u64; 6], y: &[u64; 6]) -> (u64, u64) { +pub fn fcall_msb_pos_384( + x: &[u64; 6], + y: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> (u64, u64) { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let (i, pos) = msb_pos_384(x, y); + #[cfg(feature = "hints")] + { + hints.push(2); + hints.push(i as u64); + hints.push(pos as u64); + } + (i as u64, pos as u64) + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { ziskos_fcall_param!(x, 8); diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/ecdsa.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/ecdsa.rs new file mode 100644 index 000000000..035d548ad --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/ecdsa.rs @@ -0,0 +1,103 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ziskos_fcall, ziskos_fcall_param, zisklib::FCALL_SECP256K1_ECDSA_VERIFY_ID}; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } +} + +/// Hints the ECDSA recovery computation over the `secp256k1` curve. +/// +/// Given the public key `PK`, a message hash `z`, and signature components `(r, s)`, +/// this function hints a curve point `P` such that: +/// +/// ```text +/// P = [s⁻¹·z (mod n)]G + [s⁻¹·r (mod n)]PK +/// ``` +/// +/// ### Parameters +/// +/// - `pk_value`: The public key `PK = (x, y)`, +/// represented as 8 `u64` limbs in little-endian order: `[x₀, x₁, x₂, x₃, y₀, y₁, y₂, y₃]` +/// - `z_value`: The message hash (prehash), represented as 4 `u64` limbs in little-endian order +/// - `r_value`: The signature `r` component, represented as 4 `u64` limbs in little-endian order +/// - `s_value`: The signature `s` component, represented as 4 `u64` limbs in little-endian order +/// +/// ### Returns +/// +/// The curve point `P = (x, y)` as 8 `u64` limbs in little-endian order: +/// `[x₀, x₁, x₂, x₃, y₀, y₁, y₂, y₃]` +/// +/// ### Safety +/// +/// The caller must ensure that all input pointers (`pk_value`, `z_value`, `r_value`, `s_value`) are +/// valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_secp256k1_ecdsa_verify( + pk_value: &[u64; 8], + z_value: &[u64; 4], + r_value: &[u64; 4], + s_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + use crate::zisklib::fcalls_impl; + + // Convert inputs into a single params array + let mut params: [u64; 20] = [0u64; 20]; + params[0..8].copy_from_slice(pk_value); + params[8..12].copy_from_slice(z_value); + params[12..16].copy_from_slice(r_value); + params[16..20].copy_from_slice(s_value); + + // Call the implementation + let mut results = [0u64; 8]; + fcalls_impl::secp256k1::fcall_secp256k1_ecdsa_verify(¶ms, &mut results); + + // Hint the result + #[cfg(feature = "hints")] + { + hints.push(results.len() as u64); + hints.extend_from_slice(&results); + } + + results + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(pk_value, 8); + ziskos_fcall_param!(z_value, 4); + ziskos_fcall_param!(r_value, 4); + ziskos_fcall_param!(s_value, 4); + ziskos_fcall!(FCALL_SECP256K1_ECDSA_VERIFY_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut res: MaybeUninit<[u64; 8]> = MaybeUninit::uninit(); + ziskos_inputcpy!(res, 64); + unsafe { res.assume_init() } + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fn_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/fn.rs similarity index 51% rename from ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fn_inv.rs rename to ziskos/entrypoint/src/zisklib/fcalls/secp256k1/fn.rs index 51dceae11..dc71c98e0 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fn_inv.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/fn.rs @@ -1,12 +1,18 @@ -//! fcall_secp256k1_fn_inv free call use cfg_if::cfg_if; + cfg_if! { if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_SECP256K1_FN_INV_ID; + use crate::{ziskos_fcall, ziskos_fcall_param, zisklib::FCALL_SECP256K1_FN_INV_ID}; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use lib_c::secp256k1_fn_inv_c; } } + /// Executes the multiplicative inverse computation over the scalar field of the `secp256k1` curve. /// /// Both `fcall_secp256k1_fn_inv` and `fcall2_secp256k1_fn_inv` perform an inversion of a 256-bit @@ -23,21 +29,53 @@ cfg_if! { /// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness /// of the result. It is the caller's responsibility to ensure it. #[allow(unused_variables)] -pub fn fcall_secp256k1_fn_inv(p_value: &[u64; 4]) -> [u64; 4] { +pub fn fcall_secp256k1_fn_inv( + p_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let mut result: [u64; 4] = [0; 4]; + secp256k1_fn_inv_c(p_value, &mut result); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { ziskos_fcall_param!(p_value, 4); ziskos_fcall!(FCALL_SECP256K1_FN_INV_ID); - [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()] + #[cfg(not(feature = "inputcpy"))] + { + [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()] + } + #[cfg(feature = "inputcpy")] + { + let mut res: core::mem::MaybeUninit<[u64; 4]> = core::mem::MaybeUninit::uninit(); + ziskos_inputcpy!(res, 32); + unsafe { res.assume_init() } + } } } #[allow(unused_variables)] -pub fn fcall2_secp256k1_fn_inv(p_value: &[u64; 4]) { +pub fn fcall_secp256k1_fn_inv_in_place( + p_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) { #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); + { + let mut result: [u64; 4] = [0; 4]; + secp256k1_fn_inv_c(p_value, &mut result); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + } #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { ziskos_fcall_param!(p_value, 4); diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/fp.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/fp.rs new file mode 100644 index 000000000..359eb866f --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/fp.rs @@ -0,0 +1,163 @@ +use std::{ffi::c_void, mem::MaybeUninit}; + +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ + ziskos_fcall, ziskos_fcall_param, + zisklib::{FCALL_SECP256K1_FP_INV_ID, FCALL_SECP256K1_FP_SQRT_ID} + }; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } else { + use lib_c::{secp256k1_fp_inv_c}; + use crate::zisklib::fcalls_impl::secp256k1::secp256k1_fp_sqrt; + } + +} + +/// Executes the multiplicative inverse computation over the base field of the `secp256k1` curve. +/// +/// Both `fcall_secp256k1_fp_inv` and `fcall2_secp256k1_fp_inv` perform an inversion of a 256-bit field element, +/// represented as an array of four `u64` values. +/// +/// - `fcall_secp256k1_fp_inv` performs the inversion and **returns the result directly**. +/// - `fcall2_secp256k1_fp_inv` performs the inversion but does **not return the result immediately**. +/// You must explicitly retrieve the result using four (4) `fcall_get` instructions. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_secp256k1_fp_inv( + p_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let mut result: [u64; 4] = [0; 4]; + secp256k1_fp_inv_c(p_value, &mut result); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 4); + ziskos_fcall!(FCALL_SECP256K1_FP_INV_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()] + } + #[cfg(feature = "inputcpy")] + { + let mut res: MaybeUninit<[u64; 4]> = MaybeUninit::uninit(); + ziskos_inputcpy!(res, 32); + unsafe { res.assume_init() } + } + } +} + +#[allow(unused_variables)] +pub fn fcall_secp256k1_fp_inv_in_place( + p_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let mut result: [u64; 4] = [0; 4]; + secp256k1_fp_inv_c(p_value, &mut result); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 4); + ziskos_fcall!(FCALL_SECP256K1_FP_INV_ID); + } +} + +/// Executes the square root computation over the base field of the `secp256k1` curve. +/// +/// Both `fcall_secp256k1_fp_inv` and `fcall2_secp256k1_fp_inv` perform an square root of a 256-bit +/// field element, represented as an array of four `u64` values. +/// +/// - `fcall_secp256k1_fp_inv` performs the sqrt and **returns the result directly**. +/// - `fcall2_secp256k1_fp_inv` performs the sqrt but does **not return the result immediately**. +/// You must explicitly retrieve the result using four (4) `fcall_get` instructions. +/// +/// ### Safety +/// +/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_secp256k1_fp_sqrt( + p_value: &[u64; 4], + parity: u64, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 5] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + let mut result: [u64; 5] = [0; 5]; + secp256k1_fp_sqrt(p_value, parity, &mut result); + #[cfg(feature = "hints")] + { + hints.push(result.len() as u64); + hints.extend_from_slice(&result); + } + result + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 4); + ziskos_fcall_param!(parity, 1); + ziskos_fcall!(FCALL_SECP256K1_FP_SQRT_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut res: MaybeUninit<[u64; 5]> = MaybeUninit::uninit(); + ziskos_inputcpy!(res, 40); + unsafe { res.assume_init() } + } + } +} + +#[cfg(feature = "inputcpy")] +#[allow(unused_variables)] +#[inline(always)] +pub fn fcall_secp256k1_fp_sqrt_p(p_value: &[u64; 4], parity: u64, res: &mut [u64; 5]) { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + unreachable!(); + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(p_value, 4); + ziskos_fcall_param!(parity, 1); + ziskos_fcall!(FCALL_SECP256K1_FP_SQRT_ID); + ziskos_inputcpy!(res, 40); + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/mod.rs new file mode 100644 index 000000000..36d3d36d9 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1/mod.rs @@ -0,0 +1,7 @@ +mod ecdsa; +mod r#fn; +mod fp; + +pub use ecdsa::*; +pub use fp::*; +pub use r#fn::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fp_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fp_inv.rs deleted file mode 100644 index 84ad3c4ae..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fp_inv.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! fcall_secp256k1_fp_inv free call -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_SECP256K1_FP_INV_ID; - } -} - -/// Executes the multiplicative inverse computation over the base field of the `secp256k1` curve. -/// -/// Both `fcall_secp256k1_fp_inv` and `fcall2_secp256k1_fp_inv` perform an inversion of a 256-bit field element, -/// represented as an array of four `u64` values. -/// -/// - `fcall_secp256k1_fp_inv` performs the inversion and **returns the result directly**. -/// - `fcall2_secp256k1_fp_inv` performs the inversion but does **not return the result immediately**. -/// You must explicitly retrieve the result using four (4) `fcall_get` instructions. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_secp256k1_fp_inv(p_value: &[u64; 4]) -> [u64; 4] { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 4); - ziskos_fcall!(FCALL_SECP256K1_FP_INV_ID); - [ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get(), ziskos_fcall_get()] - } -} - -#[allow(unused_variables)] -pub fn fcall2_secp256k1_fp_inv(p_value: &[u64; 4]) { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 4); - ziskos_fcall!(FCALL_SECP256K1_FP_INV_ID); - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fp_sqrt.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fp_sqrt.rs deleted file mode 100644 index ed64347aa..000000000 --- a/ziskos/entrypoint/src/zisklib/fcalls/secp256k1_fp_sqrt.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! fcall_secp256k1_fp_sqrt free call -use cfg_if::cfg_if; -cfg_if! { - if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { - use core::arch::asm; - use crate::{ziskos_fcall, ziskos_fcall_get, ziskos_fcall_param}; - use super::FCALL_SECP256K1_FP_SQRT_ID; - } -} - -/// Executes the square root computation over the base field of the `secp256k1` curve. -/// -/// Both `fcall_secp256k1_fp_inv` and `fcall2_secp256k1_fp_inv` perform an square root of a 256-bit -/// field element, represented as an array of four `u64` values. -/// -/// - `fcall_secp256k1_fp_inv` performs the sqrt and **returns the result directly**. -/// - `fcall2_secp256k1_fp_inv` performs the sqrt but does **not return the result immediately**. -/// You must explicitly retrieve the result using four (4) `fcall_get` instructions. -/// -/// ### Safety -/// -/// The caller must ensure that the input pointer (`p_value`) is valid and aligned to an 8-byte boundary. -/// -/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness -/// of the result. It is the caller's responsibility to ensure it. -#[allow(unused_variables)] -pub fn fcall_secp256k1_fp_sqrt(p_value: &[u64; 4], parity: u64) -> [u64; 5] { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 4); - ziskos_fcall_param!(parity, 1); - ziskos_fcall!(FCALL_SECP256K1_FP_SQRT_ID); - [ - ziskos_fcall_get(), // results[0] - indicates if a sqrt exists (1) or not (0) - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ziskos_fcall_get(), - ] - } -} - -#[allow(unused_variables)] -pub fn fcall2_secp256k1_fp_sqrt(p_value: &[u64; 4], parity: u64) { - #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] - unreachable!(); - #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] - { - ziskos_fcall_param!(p_value, 4); - ziskos_fcall_param!(parity, 1); - ziskos_fcall!(FCALL_SECP256K1_FP_SQRT_ID); - } -} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256r1/ecdsa.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256r1/ecdsa.rs new file mode 100644 index 000000000..2ddc45dde --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/secp256r1/ecdsa.rs @@ -0,0 +1,103 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] { + use core::arch::asm; + use crate::{ziskos_fcall, ziskos_fcall_param, zisklib::FCALL_SECP256R1_ECDSA_VERIFY_ID}; + #[cfg(not(feature = "inputcpy"))] + use crate::ziskos_fcall_get; + #[cfg(feature = "inputcpy")] + use crate::ziskos_inputcpy; + } +} + +/// Hints the ECDSA recovery computation over the `secp256r1` curve. +/// +/// Given the public key `PK`, a message hash `z`, and signature components `(r, s)`, +/// this function hints a curve point `P` such that: +/// +/// ```text +/// P = [s⁻¹·z (mod n)]G + [s⁻¹·r (mod n)]PK +/// ``` +/// +/// ### Parameters +/// +/// - `pk_value`: The public key `PK = (x, y)`, +/// represented as 8 `u64` limbs in little-endian order: `[x₀, x₁, x₂, x₃, y₀, y₁, y₂, y₃]` +/// - `z_value`: The message hash (prehash), represented as 4 `u64` limbs in little-endian order +/// - `r_value`: The signature `r` component, represented as 4 `u64` limbs in little-endian order +/// - `s_value`: The signature `s` component, represented as 4 `u64` limbs in little-endian order +/// +/// ### Returns +/// +/// The curve point `P = (x, y)` as 8 `u64` limbs in little-endian order: +/// `[x₀, x₁, x₂, x₃, y₀, y₁, y₂, y₃]` +/// +/// ### Safety +/// +/// The caller must ensure that all input pointers (`pk_value`, `z_value`, `r_value`, `s_value`) are +/// valid and aligned to an 8-byte boundary. +/// +/// Note that this is a *free-input call*, meaning the Zisk VM does not automatically verify the correctness +/// of the result. It is the caller's responsibility to ensure it. +#[allow(unused_variables)] +pub fn fcall_secp256r1_ecdsa_verify( + pk_value: &[u64; 8], + z_value: &[u64; 4], + r_value: &[u64; 4], + s_value: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + use crate::zisklib::fcalls_impl; + + // Convert inputs into a single params array + let mut params: [u64; 20] = [0u64; 20]; + params[0..8].copy_from_slice(pk_value); + params[8..12].copy_from_slice(z_value); + params[12..16].copy_from_slice(r_value); + params[16..20].copy_from_slice(s_value); + + // Call the implementation + let mut results = [0u64; 8]; + fcalls_impl::secp256r1::fcall_secp256r1_ecdsa_verify(¶ms, &mut results); + + // Hint the result + #[cfg(feature = "hints")] + { + hints.push(results.len() as u64); + hints.extend_from_slice(&results); + } + + results + } + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + ziskos_fcall_param!(pk_value, 8); + ziskos_fcall_param!(z_value, 4); + ziskos_fcall_param!(r_value, 4); + ziskos_fcall_param!(s_value, 4); + ziskos_fcall!(FCALL_SECP256R1_ECDSA_VERIFY_ID); + #[cfg(not(feature = "inputcpy"))] + { + [ + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ziskos_fcall_get(), + ] + } + #[cfg(feature = "inputcpy")] + { + use core::mem::MaybeUninit; + let mut res: MaybeUninit<[u64; 8]> = MaybeUninit::uninit(); + ziskos_inputcpy!(res, 64); + unsafe { res.assume_init() } + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls/secp256r1/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls/secp256r1/mod.rs new file mode 100644 index 000000000..c39dd2576 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls/secp256r1/mod.rs @@ -0,0 +1,3 @@ +mod ecdsa; + +pub use ecdsa::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int256_div.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int256_div.rs index 0133ea26f..cddcf523d 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int256_div.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int256_div.rs @@ -18,7 +18,7 @@ pub fn fcall_big_int256_div(params: &[u64], results: &mut [u64]) -> i64 { 8 } -fn big_int256_div(a: &[u64; 4], b: &[u64; 4]) -> ([u64; 4], [u64; 4]) { +pub fn big_int256_div(a: &[u64; 4], b: &[u64; 4]) -> ([u64; 4], [u64; 4]) { let a_big = biguint_from_u64_digits(a); let b_big = biguint_from_u64_digits(b); let (quotient, remainder) = a_big.div_rem(&b_big); diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int_div.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int_div.rs index 8ab86be34..9c6c4b5e2 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int_div.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/big_int_div.rs @@ -1,6 +1,6 @@ use num_integer::Integer; -use super::utils::{biguint_from_u64_digits, u64_digits_from_biguint}; +use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, u64_digits_from_biguint}; /// Perform the division of an unsigned integer `a` by another unsigned integer `b`, /// returning the quotient `q` and the remainder `r`, such that `a = b * q + r` @@ -26,7 +26,7 @@ pub fn fcall_big_int_div(params: &[u64], results: &mut [u64]) -> i64 { (2 + len_q + len_r) as i64 } -fn big_int_div_into(a: &[u64], b: &[u64], q: &mut Vec, r: &mut Vec) { +pub fn big_int_div_into(a: &[u64], b: &[u64], q: &mut Vec, r: &mut Vec) { let a_big = biguint_from_u64_digits(a); let b_big = biguint_from_u64_digits(b); diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bin_decomp.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bin_decomp.rs index 67f51e122..2986b20f7 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bin_decomp.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bin_decomp.rs @@ -12,7 +12,7 @@ pub fn fcall_bin_decomp(parameters: &[u64], results: &mut [u64]) -> i64 { (1 + len_bits) as i64 } -fn bin_decomp(x: &[u64], len_x: usize) -> Vec { +pub fn bin_decomp(x: &[u64], len_x: usize) -> Vec { let mut decomposition = Vec::new(); let mut started = false; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/constants.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/constants.rs new file mode 100644 index 000000000..6f016f9d8 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/constants.rs @@ -0,0 +1,57 @@ +use lazy_static::lazy_static; +use num_bigint::BigUint; + +lazy_static! { + pub(crate) static ref P: BigUint = BigUint::parse_bytes( + b"1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", + 16 + ) + .unwrap(); + + pub static ref P_HALF: BigUint = BigUint::parse_bytes( + b"d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd555", + 16 + ) + .unwrap(); + + pub static ref P_DIV_4: BigUint = BigUint::parse_bytes( + b"680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaab", + 16 + ) + .unwrap(); + + pub static ref P_MINUS_3_DIV_4: BigUint = BigUint::parse_bytes( + b"680447A8E5FF9A692C6E9ED90D2EB35D91DD2E13CE144AFD9CC34A83DAC3D8907AAFFFFAC54FFFFEE7FBFFFFFFFEAAA", + 16 + ) + .unwrap(); + + pub static ref P_MINUS_1_DIV_2: BigUint = BigUint::parse_bytes( + b"D0088F51CBFF34D258DD3DB21A5D66BB23BA5C279C2895FB39869507B587B120F55FFFF58A9FFFFDCFF7FFFFFFFD555", + 16 + ) + .unwrap(); + + pub static ref NQR_FP: BigUint = BigUint::from(2u64); // First non-quadratic residue in Fp +} + +pub const ONE: [u64; 12] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + +pub const P_MINUS_ONE: [u64; 12] = [ + 0xB9FEFFFFFFFFAAAA, + 0x1EABFFFEB153FFFF, + 0x6730D2A0F6B0F624, + 0x64774B84F38512BF, + 0x4B1BA7B6434BACD7, + 0x1A0111EA397FE69A, + 0, + 0, + 0, + 0, + 0, + 0, +]; + +pub const I: [u64; 12] = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]; // 0 + 1*u + +pub const NQR_FP2: [u64; 12] = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]; // 1 + 1*u, a known non-quadratic residue in Fp2 diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp2_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp2_inv.rs similarity index 99% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp2_inv.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp2_inv.rs index 2dcafce29..54529a237 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp2_inv.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp2_inv.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use num_bigint::BigUint; -use super::bls12_381_fp_inv::{ +use super::fp_inv::{ bls12_381_fp_add, bls12_381_fp_dbl, bls12_381_fp_inv, bls12_381_fp_mul, bls12_381_fp_neg, bls12_381_fp_square, bls12_381_fp_sub, }; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp2_sqrt.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp2_sqrt.rs new file mode 100644 index 000000000..53bb79925 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp2_sqrt.rs @@ -0,0 +1,222 @@ +use lazy_static::lazy_static; +use num_bigint::BigUint; +use num_traits::{One, Zero}; + +use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, u64_digits_from_biguint}; + +use super::{ + fp2_inv::{bls12_381_fp2_mul, bls12_381_fp2_square}, + fp_inv::{bls12_381_fp_add, bls12_381_fp_neg}, + I, NQR_FP2, ONE, P_MINUS_1_DIV_2, P_MINUS_3_DIV_4, P_MINUS_ONE, +}; + +/// Computes the square root of a non-zero field element in Fp2 +pub fn fcall_bls12_381_fp2_sqrt(params: &[u64], results: &mut [u64]) -> i64 { + // Get the input + let a: &[u64; 12] = ¶ms[0..12].try_into().unwrap(); + + // Perform the square root + let _results = bls12_381_fp2_sqrt_13(a); + results[0..13].copy_from_slice(&_results); + + 13 +} + +pub fn bls12_381_fp2_sqrt_13(a: &[u64; 12]) -> [u64; 13] { + let mut results = [0u64; 13]; + + // Perform the square root + let (sqrt, is_qr) = bls12_381_fp2_sqrt(a); + results[0] = is_qr as u64; + if !is_qr { + // To check that a is indeed a non-quadratic residue, we check that + // a * NQR is a quadratic residue for some fixed known non-quadratic residue NQR + let a_nqr = bls12_381_fp2_mul(a, &NQR_FP2); + + // Compute the square root of a * NQR + let sqrt_nqr = bls12_381_fp2_sqrt(&a_nqr).0; + + results[1..13].copy_from_slice(&sqrt_nqr); + } else { + results[1..13].copy_from_slice(&sqrt); + } + results +} + +/// Algorithm 9 from https://eprint.iacr.org/2012/685.pdf +/// Square root computation over F_p^2, with p ≡ 3 (mod 4) +fn bls12_381_fp2_sqrt(a: &[u64; 12]) -> ([u64; 12], bool) { + // Step 1: a1 ← a^((p-3)/4) + let a1 = bls12_381_fp2_exp(a, &P_MINUS_3_DIV_4); + + // Step 2: α ← a1 * a1 * a + let a1_a = bls12_381_fp2_mul(&a1, a); + let alpha = bls12_381_fp2_mul(&a1, &a1_a); + + // Step 3: a0 ← α^p * α = conjugate(α) * α + let a0 = bls12_381_fp2_mul(&bls12_381_fp2_conjugate(&alpha), &alpha); + + // Step 4-6: if a0 == -1 then return false (no square root) + if a0 == P_MINUS_ONE { + return ([0u64; 12], false); + } + + // Step 7: x0 ← a1 * a + let x0 = a1_a; + + // Step 8-13: compute x based on α + let x = if alpha == P_MINUS_ONE { + // Step 9: x ← i * x0 + bls12_381_fp2_mul(&I, &x0) + } else { + // Step 11: b ← (1 + α)^((p-1)/2) + let one_plus_alpha = bls12_381_fp2_add(&ONE, &alpha); + let b = bls12_381_fp2_exp(&one_plus_alpha, &P_MINUS_1_DIV_2); + + // Step 12: x ← b * x0 + bls12_381_fp2_mul(&b, &x0) + }; + + (x, true) +} + +pub(crate) fn bls12_381_fp2_conjugate(a: &[u64; 12]) -> [u64; 12] { + let mut result = [0u64; 12]; + result[0..6].copy_from_slice(&a[0..6]); + let imaginary_part: &[u64; 6] = &a[6..12].try_into().unwrap(); + let neg_imaginary_part = bls12_381_fp_neg(imaginary_part); + result[6..12].copy_from_slice(&neg_imaginary_part); + result +} + +pub(crate) fn bls12_381_fp2_add(a: &[u64; 12], b: &[u64; 12]) -> [u64; 12] { + let a_real = &a[0..6].try_into().unwrap(); + let a_imaginary = &a[6..12].try_into().unwrap(); + let b_real = &b[0..6].try_into().unwrap(); + let b_imaginary = &b[6..12].try_into().unwrap(); + + let real_part = bls12_381_fp_add(a_real, b_real); + let imaginary_part = bls12_381_fp_add(a_imaginary, b_imaginary); + + let mut result = [0u64; 12]; + result[0..6].copy_from_slice(&real_part); + result[6..12].copy_from_slice(&imaginary_part); + result +} + +pub(crate) fn bls12_381_fp2_exp(a: &[u64; 12], e: &BigUint) -> [u64; 12] { + let mut result = [0u64; 12]; + result[0] = 1; + + let mut base = *a; + let mut exp = e.clone(); + + while !exp.is_zero() { + if (&exp & BigUint::one()) == BigUint::one() { + result = bls12_381_fp2_mul(&result, &base); + } + base = bls12_381_fp2_mul(&base, &base); + exp >>= 1; + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sqrt_one() { + let x = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let expected_sqrt = P_MINUS_ONE; + + let mut results = [0; 13]; + fcall_bls12_381_fp2_sqrt(&x, &mut results); + let has_sqrt = results[0]; + let sqrt = &results[1..13].try_into().unwrap(); + assert_eq!(has_sqrt, 1); + assert_eq!(sqrt, &expected_sqrt); + assert_eq!(bls12_381_fp2_mul(sqrt, sqrt), x); + } + + #[test] + fn test_sqrt() { + let x = [ + 0x10486089be1876e9, + 0xcf0c3012bf0c13ef, + 0x51621421d2c37a8d, + 0xd52db71259449a47, + 0x370fd7a0a4be29da, + 0xc3d4fd75c076215, + 0x3e6ff1a3151b0959, + 0x9f0b2a8dea2c9f82, + 0xb83d47ccb71501e2, + 0xa8c917818d857f05, + 0xc48150d1cd95e0c6, + 0x112ca78116187cc8, + ]; + let expected_sqrt = [ + 0xcca66dfc0d7f69c9, + 0xaf22cf40d2f4555, + 0x92a6870798aff4d7, + 0xe595438fb87ee1fc, + 0x6f5e96c633b39798, + 0x215675032da3de5, + 0x1ef8b538e151e6f3, + 0x94b37a0021182ef6, + 0xea0d1db797288ba2, + 0x567c72d5af34be56, + 0x5470d2ed597db716, + 0x10b61243878d0170, + ]; + + let mut results = [0; 13]; + fcall_bls12_381_fp2_sqrt(&x, &mut results); + let has_sqrt = results[0]; + let sqrt = &results[1..13].try_into().unwrap(); + assert_eq!(has_sqrt, 1); + assert_eq!(sqrt, &expected_sqrt); + assert_eq!(bls12_381_fp2_mul(sqrt, sqrt), x); + } + + #[test] + fn test_no_sqrt() { + let x = [ + 0x5531f66e0c366bf8, + 0x35f8f154ff2974e6, + 0xaa81eb7e92ae7b5e, + 0x8a521c9ff4654bc0, + 0xa224f0e84356bba8, + 0xffbbc4bdd5425cb, + 0xf16972261c97a569, + 0xbf071b2a52d05a68, + 0xbaa99b2bc5260f74, + 0xedbd0c20e26eb5e5, + 0x6f3229e291d1d67a, + 0x119353ab08784f06, + ]; + let expected_sqrt = [ + 0x6d8e1fc1edb82644, + 0xa6964afc770dab5d, + 0x37d90a0e925a572d, + 0x3547fbc3f051b409, + 0xd3cdef010df23067, + 0x159b8fd2cca0a180, + 0xe0c163a5a7441092, + 0xf61c7202d7c3af80, + 0xf80c7aa929cb1e62, + 0xa076467c356a64cf, + 0x695e3d70b6a86704, + 0xb1ecd8ecdb0e8d2, + ]; // sqrt(x * NQR) + + let mut results = [0; 13]; + fcall_bls12_381_fp2_sqrt(&x, &mut results); + let has_sqrt = results[0]; + let sqrt = &results[1..13].try_into().unwrap(); + assert_eq!(has_sqrt, 0); + assert_eq!(sqrt, &expected_sqrt); + assert_eq!(bls12_381_fp2_mul(sqrt, sqrt), bls12_381_fp2_mul(&x, &NQR_FP2)); + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp_inv.rs similarity index 87% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp_inv.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp_inv.rs index 0402d9e48..2a076f153 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp_inv.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp_inv.rs @@ -2,15 +2,9 @@ use lazy_static::lazy_static; use num_bigint::BigUint; use num_traits::Zero; -use super::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; - -lazy_static! { - pub static ref P: BigUint = BigUint::parse_bytes( - b"1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", - 16 - ) - .unwrap(); -} +use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; + +use super::P; /// Perform the inversion of a non-zero field element in Fp pub fn fcall_bls12_381_fp_inv(params: &[u64], results: &mut [u64]) -> i64 { @@ -31,10 +25,7 @@ pub(crate) fn bls12_381_fp_inv(a: &[u64; 6]) -> [u64; 6] { let inv = a_big.modinv(&P); match inv { Some(inverse) => n_u64_digits_from_biguint::<6>(&inverse), - None => { - // Handle the case where the inverse does not exist - panic!("Inverse does not exist"); - } + None => panic!("Inverse does not exist"), } } diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp_sqrt.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp_sqrt.rs similarity index 78% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp_sqrt.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp_sqrt.rs index 6f0c6192d..58df26ec0 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_fp_sqrt.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/fp_sqrt.rs @@ -1,29 +1,9 @@ use lazy_static::lazy_static; use num_bigint::BigUint; -use super::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; - -lazy_static! { - pub static ref P: BigUint = BigUint::parse_bytes( - b"1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab", - 16 - ) - .unwrap(); - - pub static ref P_HALF: BigUint = BigUint::parse_bytes( - b"d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd555", - 16 - ) - .unwrap(); - - pub static ref P_DIV_4: BigUint = BigUint::parse_bytes( - b"680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaab", - 16 - ) - .unwrap(); - - pub static ref NQR: BigUint = BigUint::from(2u64); // First non-quadratic residue in Fp -} +use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; + +use super::{NQR_FP, P, P_DIV_4}; /// Computes the square root of a non-zero field element in Fp pub fn fcall_bls12_381_fp_sqrt(params: &[u64], results: &mut [u64]) -> i64 { @@ -36,7 +16,7 @@ pub fn fcall_bls12_381_fp_sqrt(params: &[u64], results: &mut [u64]) -> i64 { 7 } -fn bls12_381_fp_sqrt(a: &[u64; 6], results: &mut [u64]) { +pub fn bls12_381_fp_sqrt(a: &[u64; 6], results: &mut [u64]) { let a_big = biguint_from_u64_digits(a); // Attempt to compute the square root of a @@ -49,7 +29,7 @@ fn bls12_381_fp_sqrt(a: &[u64; 6], results: &mut [u64]) { if !a_is_qr { // To check that a is indeed a non-quadratic residue, we check that // a * NQR is a quadratic residue for some fixed known non-quadratic residue NQR - let a_nqr = (a_big * &*NQR) % &*P; + let a_nqr = (a_big * &*NQR_FP) % &*P; // Compute the square root of a * NQR let sqrt_nqr = a_nqr.modpow(&P_DIV_4, &P); @@ -139,7 +119,7 @@ mod tests { let sqrt = &results[1..7].try_into().unwrap(); assert_eq!(has_sqrt, 0); assert_eq!(sqrt, &expected_sqrt); - let nqr = n_u64_digits_from_biguint::<6>(&NQR); + let nqr = n_u64_digits_from_biguint::<6>(&NQR_FP); assert_eq!(bls12_381_fp_mul(sqrt, sqrt), bls12_381_fp_mul(&x, &nqr)); } } diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/mod.rs new file mode 100644 index 000000000..a8aa55e58 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/mod.rs @@ -0,0 +1,13 @@ +mod constants; +mod fp2_inv; +mod fp2_sqrt; +mod fp_inv; +mod fp_sqrt; +mod twist; + +use constants::*; +pub use fp2_inv::*; +pub use fp2_sqrt::*; +pub use fp_inv::*; +pub use fp_sqrt::*; +pub use twist::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_twist.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/twist.rs similarity index 77% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_twist.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/twist.rs index c8f045fa0..cd2e203c6 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381_twist.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bls12_381/twist.rs @@ -1,4 +1,4 @@ -use super::bls12_381_fp2_inv::{ +use super::fp2_inv::{ bls12_381_fp2_dbl, bls12_381_fp2_inv, bls12_381_fp2_mul, bls12_381_fp2_scalar_mul, bls12_381_fp2_square, bls12_381_fp2_sub, }; @@ -11,6 +11,22 @@ pub fn fcall_bls12_381_twist_add_line_coeffs(params: &[u64], results: &mut [u64] let x2: &[u64; 12] = ¶ms[24..36].try_into().unwrap(); let y2: &[u64; 12] = ¶ms[36..48].try_into().unwrap(); + // Compute the line coefficients + let (lambda, mu) = bls12_381_twist_add_line_coeffs(x1, y1, x2, y2); + + // Store the result + results[0..12].copy_from_slice(&lambda); + results[12..24].copy_from_slice(&mu); + + 24 +} + +pub fn bls12_381_twist_add_line_coeffs( + x1: &[u64; 12], + y1: &[u64; 12], + x2: &[u64; 12], + y2: &[u64; 12], +) -> ([u64; 12], [u64; 12]) { // Compute 𝜆 = (y2 - y1)/(x2 - x1) let mut lambda = bls12_381_fp2_inv(&bls12_381_fp2_sub(x2, x1)); lambda = bls12_381_fp2_mul(&lambda, &bls12_381_fp2_sub(y2, y1)); @@ -18,11 +34,7 @@ pub fn fcall_bls12_381_twist_add_line_coeffs(params: &[u64], results: &mut [u64] // Compute 𝜇 = y - 𝜆x let mu = bls12_381_fp2_sub(y1, &bls12_381_fp2_mul(&lambda, x1)); - // Store the result - results[0..12].copy_from_slice(&lambda); - results[12..24].copy_from_slice(&mu); - - 24 + (lambda, mu) } /// Computes the coefficients (𝜆,𝜇) of the tangent line at the point (x,y) @@ -31,6 +43,17 @@ pub fn fcall_bls12_381_twist_dbl_line_coeffs(params: &[u64], results: &mut [u64] let x: &[u64; 12] = ¶ms[0..12].try_into().unwrap(); let y: &[u64; 12] = ¶ms[12..24].try_into().unwrap(); + // Compute the line coefficients + let (lambda, mu) = bls12_381_twist_dbl_line_coeffs(x, y); + + // Store the result + results[0..12].copy_from_slice(&lambda); + results[12..24].copy_from_slice(&mu); + + 24 +} + +pub fn bls12_381_twist_dbl_line_coeffs(x: &[u64; 12], y: &[u64; 12]) -> ([u64; 12], [u64; 12]) { // Compute 𝜆 = 3x²/2y let mut lambda = bls12_381_fp2_inv(&bls12_381_fp2_dbl(y)); let x_sq = bls12_381_fp2_square(x); @@ -39,9 +62,5 @@ pub fn fcall_bls12_381_twist_dbl_line_coeffs(params: &[u64], results: &mut [u64] // Compute 𝜇 = y - 𝜆x let mu = bls12_381_fp2_sub(y, &bls12_381_fp2_mul(&lambda, x)); - // Store the result - results[0..12].copy_from_slice(&lambda); - results[12..24].copy_from_slice(&mu); - - 24 + (lambda, mu) } diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/constants.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/constants.rs new file mode 100644 index 000000000..669cbe1f1 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/constants.rs @@ -0,0 +1,10 @@ +use lazy_static::lazy_static; +use num_bigint::BigUint; + +lazy_static! { + pub(crate) static ref P: BigUint = BigUint::parse_bytes( + b"30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47", + 16 + ) + .unwrap(); +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_fp.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/fp.rs similarity index 91% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_fp.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/fp.rs index df3bc0858..9a0fc0728 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_fp.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/fp.rs @@ -1,15 +1,10 @@ use lazy_static::lazy_static; use num_bigint::BigUint; +use num_traits::Zero; -use super::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; +use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; -lazy_static! { - pub static ref P: BigUint = BigUint::parse_bytes( - b"30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47", - 16 - ) - .unwrap(); -} +use super::P; /// Perform the inversion of a non-zero field element in Fp pub fn fcall_bn254_fp_inv(params: &[u64], results: &mut [u64]) -> i64 { @@ -47,6 +42,9 @@ pub fn bn254_fp_sub(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { pub fn bn254_fp_neg(a: &[u64; 4]) -> [u64; 4] { let a_big = biguint_from_u64_digits(a); + if a_big.is_zero() { + return [0u64; 4]; + } let neg = &*P - a_big; n_u64_digits_from_biguint(&neg) } diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_fp2.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/fp2.rs similarity index 96% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_fp2.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/fp2.rs index 4b16b4c3e..b22e92cea 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_fp2.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/fp2.rs @@ -1,18 +1,12 @@ use lazy_static::lazy_static; use num_bigint::BigUint; -use super::bn254_fp::{ +use super::fp::{ bn254_fp_add, bn254_fp_dbl, bn254_fp_inv, bn254_fp_mul, bn254_fp_neg, bn254_fp_square, bn254_fp_sub, }; -lazy_static! { - static ref P: BigUint = BigUint::parse_bytes( - b"30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47", - 16 - ) - .unwrap(); -} +use super::P; /// Perform the inversion of a non-zero field element in Fp2 pub fn fcall_bn254_fp2_inv(params: &[u64], results: &mut [u64]) -> i64 { diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/mod.rs new file mode 100644 index 000000000..57acbee6b --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/mod.rs @@ -0,0 +1,9 @@ +mod constants; +mod fp; +mod fp2; +mod twist; + +use constants::*; +pub use fp::*; +pub use fp2::*; +pub use twist::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_twist.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/twist.rs similarity index 90% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_twist.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/twist.rs index af0bd98c9..e800cea50 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254_twist.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/bn254/twist.rs @@ -1,4 +1,4 @@ -use super::bn254_fp2::{ +use super::fp2::{ bn254_fp2_dbl, bn254_fp2_inv, bn254_fp2_mul, bn254_fp2_scalar_mul, bn254_fp2_square, bn254_fp2_sub, }; @@ -11,6 +11,22 @@ pub fn fcall_bn254_twist_add_line_coeffs(params: &[u64], results: &mut [u64]) -> let x2: &[u64; 8] = ¶ms[16..24].try_into().unwrap(); let y2: &[u64; 8] = ¶ms[24..32].try_into().unwrap(); + // Call the addition line coefficients function + let (lambda, mu) = bn254_twist_add_line_coeffs(x1, y1, x2, y2); + + // Store the result + results[0..8].copy_from_slice(&lambda); + results[8..16].copy_from_slice(&mu); + + 16 +} + +pub fn bn254_twist_add_line_coeffs( + x1: &[u64; 8], + y1: &[u64; 8], + x2: &[u64; 8], + y2: &[u64; 8], +) -> ([u64; 8], [u64; 8]) { // Compute 𝜆 = (y2 - y1)/(x2 - x1) let mut lambda = bn254_fp2_inv(&bn254_fp2_sub(x2, x1)); lambda = bn254_fp2_mul(&lambda, &bn254_fp2_sub(y2, y1)); @@ -18,11 +34,7 @@ pub fn fcall_bn254_twist_add_line_coeffs(params: &[u64], results: &mut [u64]) -> // Compute 𝜇 = y - 𝜆x let mu = bn254_fp2_sub(y1, &bn254_fp2_mul(&lambda, x1)); - // Store the result - results[0..8].copy_from_slice(&lambda); - results[8..16].copy_from_slice(&mu); - - 16 + (lambda, mu) } /// Computes the coefficients (𝜆,𝜇) of the tangent line at the point (x,y) @@ -31,6 +43,17 @@ pub fn fcall_bn254_twist_dbl_line_coeffs(params: &[u64], results: &mut [u64]) -> let x: &[u64; 8] = ¶ms[0..8].try_into().unwrap(); let y: &[u64; 8] = ¶ms[8..16].try_into().unwrap(); + // Call the doubling line coefficients function + let (lambda, mu) = bn254_twist_dbl_line_coeffs(x, y); + + // Store the result + results[0..8].copy_from_slice(&lambda); + results[8..16].copy_from_slice(&mu); + + 16 +} + +pub fn bn254_twist_dbl_line_coeffs(x: &[u64; 8], y: &[u64; 8]) -> ([u64; 8], [u64; 8]) { // Compute 𝜆 = 3x²/2y let mut lambda = bn254_fp2_inv(&bn254_fp2_dbl(y)); let x_sq = bn254_fp2_square(x); @@ -39,11 +62,7 @@ pub fn fcall_bn254_twist_dbl_line_coeffs(params: &[u64], results: &mut [u64]) -> // Compute 𝜇 = y - 𝜆x let mu = bn254_fp2_sub(y, &bn254_fp2_mul(&lambda, x)); - // Store the result - results[0..8].copy_from_slice(&lambda); - results[8..16].copy_from_slice(&mu); - - 16 + (lambda, mu) } #[cfg(test)] diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/mod.rs index 4c6102389..de7565c5a 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/mod.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/mod.rs @@ -1,19 +1,13 @@ -mod big_int256_div; -mod big_int_div; -mod bin_decomp; -mod bls12_381_fp2_inv; -mod bls12_381_fp_inv; -mod bls12_381_fp_sqrt; -mod bls12_381_twist; -mod bn254_fp; -mod bn254_fp2; -mod bn254_twist; -mod msb_pos_256; -mod msb_pos_384; +pub mod big_int256_div; +pub mod big_int_div; +pub mod bin_decomp; +pub mod bls12_381; +pub mod bn254; +pub mod msb_pos_256; +pub mod msb_pos_384; mod proxy; -mod secp256k1_fn_inv; -mod secp256k1_fp_inv; -mod secp256k1_fp_sqrt; +pub mod secp256k1; +pub mod secp256r1; mod utils; pub use proxy::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_256.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_256.rs index ac5808270..4ca3132a4 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_256.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_256.rs @@ -1,23 +1,34 @@ -pub fn fcall_msb_pos_256(parameters: &[u64], results: &mut [u64]) -> i64 { - // Check if the parameters are valid - let x = ¶meters[0..4].try_into().unwrap(); - let y = ¶meters[4..8].try_into().unwrap(); +pub fn fcall_msb_pos_256(params: &[u64], results: &mut [u64]) -> i64 { + let n = params[0] as usize; - let (i, pos) = msb_pos_256(x, y); - results[0] = i as u64; - results[1] = pos as u64; + let (limb, bit) = msb_pos_256(¶ms[1..], n); + + results[0] = limb as u64; + results[1] = bit as u64; 2 } // Q: Do we prefer constant time functions? -fn msb_pos_256(x: &[u64; 4], y: &[u64; 4]) -> (usize, usize) { - for i in (0..4).rev() { - if x[i] != 0 || y[i] != 0 { - let word = if x[i] > y[i] { x[i] } else { y[i] }; - return (i, msb_pos(word)); +// Finds the most significant bit position among n 256-bit integers +// some of which may be zero, but not all +pub fn msb_pos_256(params: &[u64], n: usize) -> (usize, usize) { + debug_assert!(params.len() >= n * 4, "Not enough data for {} inputs", n); + + for limb in (0..4).rev() { + // Find max value at this limb position across all inputs + let mut max_word = 0u64; + for i in 0..n { + let word = params[i * 4 + limb]; + if word > max_word { + max_word = word; + } + } + + if max_word != 0 { + return (limb, msb_pos(max_word)); } } - panic!("Invalid input: x and y are both zero"); + panic!("Invalid input: all values are zero"); } // Q: Do we prefer constant time functions? diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_384.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_384.rs index 68985ad00..98898a706 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_384.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/msb_pos_384.rs @@ -9,7 +9,7 @@ pub fn fcall_msb_pos_384(parameters: &[u64], results: &mut [u64]) -> i64 { 2 } -fn msb_pos_384(x: &[u64; 6], y: &[u64; 6]) -> (usize, usize) { +pub fn msb_pos_384(x: &[u64; 6], y: &[u64; 6]) -> (usize, usize) { for i in (0..6).rev() { if x[i] != 0 || y[i] != 0 { let word = if x[i] > y[i] { x[i] } else { y[i] }; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/proxy.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/proxy.rs index 831785435..407235cc5 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/proxy.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/proxy.rs @@ -1,16 +1,16 @@ use crate::zisklib::{ FCALL_BIG_INT256_DIV_ID, FCALL_BIG_INT_DIV_ID, FCALL_BIN_DECOMP_ID, FCALL_BLS12_381_FP2_INV_ID, - FCALL_BLS12_381_FP_INV_ID, FCALL_BLS12_381_FP_SQRT_ID, + FCALL_BLS12_381_FP2_SQRT_ID, FCALL_BLS12_381_FP_INV_ID, FCALL_BLS12_381_FP_SQRT_ID, FCALL_BLS12_381_TWIST_ADD_LINE_COEFFS_ID, FCALL_BLS12_381_TWIST_DBL_LINE_COEFFS_ID, FCALL_BN254_FP2_INV_ID, FCALL_BN254_FP_INV_ID, FCALL_BN254_TWIST_ADD_LINE_COEFFS_ID, FCALL_BN254_TWIST_DBL_LINE_COEFFS_ID, FCALL_MSB_POS_256_ID, FCALL_MSB_POS_384_ID, - FCALL_SECP256K1_FN_INV_ID, FCALL_SECP256K1_FP_INV_ID, FCALL_SECP256K1_FP_SQRT_ID, + FCALL_SECP256K1_ECDSA_VERIFY_ID, FCALL_SECP256K1_FN_INV_ID, FCALL_SECP256K1_FP_INV_ID, + FCALL_SECP256K1_FP_SQRT_ID, FCALL_SECP256R1_ECDSA_VERIFY_ID, }; use super::{ - big_int256_div::*, big_int_div::*, bin_decomp::*, bls12_381_fp2_inv::*, bls12_381_fp_inv::*, - bls12_381_fp_sqrt::*, bls12_381_twist::*, bn254_fp::*, bn254_fp2::*, bn254_twist::*, - msb_pos_256::*, msb_pos_384::*, secp256k1_fn_inv::*, secp256k1_fp_inv::*, secp256k1_fp_sqrt::*, + big_int256_div::*, big_int_div::*, bin_decomp::*, bls12_381::*, bn254::*, msb_pos_256::*, + msb_pos_384::*, secp256k1::*, secp256r1::*, }; pub fn fcall_proxy(id: u64, params: &[u64], results: &mut [u64]) -> i64 { @@ -18,7 +18,7 @@ pub fn fcall_proxy(id: u64, params: &[u64], results: &mut [u64]) -> i64 { FCALL_SECP256K1_FN_INV_ID => fcall_secp256k1_fn_inv(params, results), FCALL_SECP256K1_FP_INV_ID => fcall_secp256k1_fp_inv(params, results), FCALL_SECP256K1_FP_SQRT_ID => fcall_secp256k1_fp_sqrt(params, results), - FCALL_MSB_POS_256_ID => fcall_msb_pos_256(params, results), + FCALL_SECP256K1_ECDSA_VERIFY_ID => fcall_secp256k1_ecdsa_verify(params, results), FCALL_BN254_FP_INV_ID => fcall_bn254_fp_inv(params, results), FCALL_BN254_FP2_INV_ID => fcall_bn254_fp2_inv(params, results), FCALL_BN254_TWIST_ADD_LINE_COEFFS_ID => fcall_bn254_twist_add_line_coeffs(params, results), @@ -32,10 +32,13 @@ pub fn fcall_proxy(id: u64, params: &[u64], results: &mut [u64]) -> i64 { FCALL_BLS12_381_TWIST_DBL_LINE_COEFFS_ID => { fcall_bls12_381_twist_dbl_line_coeffs(params, results) } + FCALL_BLS12_381_FP2_SQRT_ID => fcall_bls12_381_fp2_sqrt(params, results), + FCALL_MSB_POS_256_ID => fcall_msb_pos_256(params, results), FCALL_MSB_POS_384_ID => fcall_msb_pos_384(params, results), FCALL_BIG_INT256_DIV_ID => fcall_big_int256_div(params, results), FCALL_BIG_INT_DIV_ID => fcall_big_int_div(params, results), FCALL_BIN_DECOMP_ID => fcall_bin_decomp(params, results), + FCALL_SECP256R1_ECDSA_VERIFY_ID => fcall_secp256r1_ecdsa_verify(params, results), _ => panic!("Unsupported fcall ID {id}"), } } diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/constants.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/constants.rs new file mode 100644 index 000000000..0b298eb52 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/constants.rs @@ -0,0 +1,43 @@ +use lazy_static::lazy_static; +use num_bigint::BigUint; + +lazy_static! { + pub static ref P: BigUint = BigUint::parse_bytes( + b"fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f", + 16 + ) + .unwrap(); + + pub static ref P_HALF: BigUint = BigUint::parse_bytes( + b"7fffffffffffffffffffffffffffffffffffffffffffffffffffffff7ffffe17", + 16 + ) + .unwrap(); + + pub static ref P_DIV_4: BigUint = BigUint::parse_bytes( + b"3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c", + 16 + ) + .unwrap(); + + pub static ref NQR: BigUint = BigUint::from(3u64); // First non-quadratic residue in Fp + + pub static ref N: BigUint = BigUint::parse_bytes( + b"fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141", + 16 + ) + .unwrap(); +} + +pub const IDENTITY: [u64; 8] = [0u64; 8]; + +pub const G: [u64; 8] = [ + 0x59F2815B16F81798, + 0x029BFCDB2DCE28D9, + 0x55A06295CE870B07, + 0x79BE667EF9DCBBAC, + 0x9C47D08FFB10D4B8, + 0xFD17B448A6855419, + 0x5DA4FBFC0E1108A8, + 0x483ADA7726A3C465, +]; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/ecdsa.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/ecdsa.rs new file mode 100644 index 000000000..7c6661bc4 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/ecdsa.rs @@ -0,0 +1,296 @@ +use num_traits::Zero; + +use crate::zisklib::fcalls_impl::utils::{ + biguint_from_u64, biguint_from_u64_digits, n_u64_digits_from_biguint, +}; + +use super::constants::{G, IDENTITY, N, P}; + +pub fn fcall_secp256k1_ecdsa_verify(params: &[u64], results: &mut [u64]) -> i64 { + // Get the input + let pk: &[u64; 8] = ¶ms[0..8].try_into().unwrap(); + let z: &[u64; 4] = ¶ms[8..12].try_into().unwrap(); + let r: &[u64; 4] = ¶ms[12..16].try_into().unwrap(); + let s: &[u64; 4] = ¶ms[16..20].try_into().unwrap(); + + // Get the curve point P + let p = secp256k1_ecdsa_verify(pk, z, r, s); + + // Store the result + results[0..8].copy_from_slice(&p); + + 8 +} + +pub fn secp256k1_ecdsa_verify(pk: &[u64; 8], z: &[u64; 4], r: &[u64; 4], s: &[u64; 4]) -> [u64; 8] { + // Given the public key pk and the signature (r, s) over the message hash z: + // 1. Computes s_inv = s⁻¹ mod n + // 2. Computes u1 = z·s_inv mod n + // 3. Computes u2 = r·s_inv mod n + // 4. Computes and returns the curve point p = u1·G + u2·PK + let s_inv = secp256k1_fn_inv(s); + let u1 = secp256k1_fn_mul(z, &s_inv); + let u2 = secp256k1_fn_mul(r, &s_inv); + secp256k1_curve_dbl_scalar_mul(&u1, &G, &u2, pk) +} + +fn secp256k1_fn_mul(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let product = (a_big * b_big) % &*N; + n_u64_digits_from_biguint(&product) +} + +fn secp256k1_fn_inv(a: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let inv = a_big.modinv(&N); + match inv { + Some(inverse) => n_u64_digits_from_biguint(&inverse), + None => panic!("Inverse does not exist"), + } +} + +fn secp256k1_fp_add(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let sum = (a_big + b_big) % &*P; + n_u64_digits_from_biguint(&sum) +} + +fn secp256k1_fp_sub(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let diff = if a_big >= b_big { a_big - b_big } else { (a_big + &*P) - b_big }; + n_u64_digits_from_biguint(&diff) +} + +fn secp256k1_fp_scalar_mul(a: &[u64; 4], scalar: u64) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let scalar_big = biguint_from_u64(scalar); + let product = (a_big * scalar_big) % &*P; + n_u64_digits_from_biguint(&product) +} + +fn secp256k1_fp_mul(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let product = (a_big * b_big) % &*P; + n_u64_digits_from_biguint(&product) +} + +fn secp256k1_fp_square(a: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let square = (a_big.clone() * a_big) % &*P; + n_u64_digits_from_biguint(&square) +} + +fn secp256k1_fp_inv(a: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let inv = a_big.modinv(&P); + match inv { + Some(inverse) => n_u64_digits_from_biguint(&inverse), + None => panic!("Inverse does not exist"), + } +} + +fn secp256k1_curve_add(p: &[u64; 8], q: &[u64; 8]) -> [u64; 8] { + let x1: &[u64; 4] = &p[0..4].try_into().unwrap(); + let y1: &[u64; 4] = &p[4..8].try_into().unwrap(); + let x2: &[u64; 4] = &q[0..4].try_into().unwrap(); + let y2: &[u64; 4] = &q[4..8].try_into().unwrap(); + + if x1 == x2 { + if y1 == y2 { + return secp256k1_curve_dbl(p); + } else { + return IDENTITY; + } + } + + if p == &IDENTITY { + return *q; + } else if q == &IDENTITY { + return *p; + } + + let lambda = { + let y2_minus_y1 = secp256k1_fp_sub(y2, y1); + let x2_minus_x1 = secp256k1_fp_sub(x2, x1); + let x2_minus_x1_inv = secp256k1_fp_inv(&x2_minus_x1); + secp256k1_fp_mul(&y2_minus_y1, &x2_minus_x1_inv) + }; + + let x3 = { + let lambda_sq = secp256k1_fp_square(&lambda); + let x1_plus_x2 = secp256k1_fp_add(x1, x2); + secp256k1_fp_sub(&lambda_sq, &x1_plus_x2) + }; + + let y3 = { + let lambda_x1_minus_x3 = { + let x1_minus_x3 = secp256k1_fp_sub(x1, &x3); + secp256k1_fp_mul(&lambda, &x1_minus_x3) + }; + secp256k1_fp_sub(&lambda_x1_minus_x3, y1) + }; + + let mut result = [0u64; 8]; + result[0..4].copy_from_slice(&x3); + result[4..8].copy_from_slice(&y3); + result +} + +fn secp256k1_curve_dbl(p: &[u64; 8]) -> [u64; 8] { + if p == &IDENTITY { + return *p; + } + + let x: &[u64; 4] = &p[0..4].try_into().unwrap(); + let y: &[u64; 4] = &p[4..8].try_into().unwrap(); + + let lambda = { + let three_x1_sq = { + let x1_sq = secp256k1_fp_square(x); + secp256k1_fp_scalar_mul(&x1_sq, 3) + }; + + let two_y1 = secp256k1_fp_scalar_mul(y, 2); + let two_y1_inv = secp256k1_fp_inv(&two_y1); + + secp256k1_fp_mul(&three_x1_sq, &two_y1_inv) + }; + + let x3 = { + let lambda_sq = secp256k1_fp_square(&lambda); + let two_x1 = secp256k1_fp_scalar_mul(x, 2); + secp256k1_fp_sub(&lambda_sq, &two_x1) + }; + + let y3 = { + let lambda_x1_minus_x3 = { + let x1_minus_x3 = secp256k1_fp_sub(x, &x3); + secp256k1_fp_mul(&lambda, &x1_minus_x3) + }; + secp256k1_fp_sub(&lambda_x1_minus_x3, y) + }; + + let mut result = [0u64; 8]; + result[0..4].copy_from_slice(&x3); + result[4..8].copy_from_slice(&y3); + result +} + +fn secp256k1_curve_dbl_scalar_mul( + k1: &[u64; 4], + p1: &[u64; 8], + k2: &[u64; 4], + p2: &[u64; 8], +) -> [u64; 8] { + let mut r = IDENTITY; + for i in (0..256).rev() { + r = secp256k1_curve_dbl(&r); + + let k1_bit = (k1[i / 64] >> (i % 64)) & 1; + let k2_bit = (k2[i / 64] >> (i % 64)) & 1; + + if k1_bit == 1 { + r = secp256k1_curve_add(&r, p1); + } + if k2_bit == 1 { + r = secp256k1_curve_add(&r, p2); + } + } + + r +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dbl_scalar_mul() { + // 0 * IDENTITY + 0 * IDENTITY = IDENTITY + let k1 = [0u64; 4]; + let p1 = IDENTITY; + let k2 = [0u64; 4]; + let p2 = IDENTITY; + + let result = secp256k1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + assert_eq!(result, IDENTITY); + + // 1 * G + 0 * IDENTITY = G + let k1 = [1u64, 0, 0, 0]; + let p1 = G; + let k2 = [0u64; 4]; + let p2 = IDENTITY; + + let result = secp256k1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + assert_eq!(result, G); + + // 0 * IDENTITY + 1 * G = G + let k1 = [0u64; 4]; + let p1 = IDENTITY; + let k2 = [1u64, 0, 0, 0]; + let p2 = G; + + let result = secp256k1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + assert_eq!(result, G); + + // 2 * G + 3 * G = 5 * G + let k1 = [2u64, 0, 0, 0]; + let p1 = G; + let k2 = [3u64, 0, 0, 0]; + let p2 = G; + + let result = secp256k1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + let expected = [ + 0xcba8d569b240efe4, + 0xe88b84bddc619ab7, + 0x55b4a7250a5c5128, + 0x2f8bde4d1a072093, + 0xdca87d3aa6ac62d6, + 0xf788271bab0d6840, + 0xd4dba9dda6c9c426, + 0xd8ac222636e5e3d6, + ]; + assert_eq!(result, expected); + + // Random test + let k1 = [0x761923728d37303, 0x1f0e6f2fa8a32ab5, 0x7bb7458c6ea47f08, 0xe2cf4fd21aef19e1]; + let p1 = [ + 0xd77a8f3f445d2c43, + 0xd8404b226e191e33, + 0x3f542469b3a1f4ce, + 0x73613de6799853d9, + 0x9722df4889803b47, + 0x9055e100179fe79a, + 0xdf46f38d013fda72, + 0xd769a27efc36598c, + ]; + let k2 = [0xe9c44fa1510380c0, 0x16d1daea9be6a28, 0x2a4bb6bbdc0a031e, 0xefda864ae6c22f24]; + let p2 = [ + 0x77fb10949fdba7d6, + 0x84e5d96e491b9daf, + 0x66c77ea552e760cd, + 0x434feb1463e34ff8, + 0x5258fc8877bdff59, + 0x25586ed50053a57f, + 0x55858e1de54a18ac, + 0x3393bec7dd4067f7, + ]; + + let result = secp256k1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + let expected = [ + 0xb0531ccb6c1c9b1, + 0xc7c48529c9569495, + 0x18edf1edb9351c8d, + 0x572d78c95d7f964, + 0x9d41caf8f65f3690, + 0x21eea422b3a37e0a, + 0x1c10371d5a68938c, + 0xcc37bbabaf4204de, + ]; + assert_eq!(result, expected); + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fn_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fn_inv.rs similarity index 85% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fn_inv.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fn_inv.rs index d882b71c8..d3b39c32f 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fn_inv.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fn_inv.rs @@ -15,15 +15,9 @@ cfg_if::cfg_if! { use lazy_static::lazy_static; use num_bigint::BigUint; - use super::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; + use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; - lazy_static! { - pub static ref N: BigUint = BigUint::parse_bytes( - b"fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141", - 16 - ) - .unwrap(); - } + use super::N; pub fn fcall_secp256k1_fn_inv(params: &[u64], results: &mut [u64]) -> i64 { // Get the input diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fp_inv.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fp_inv.rs similarity index 85% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fp_inv.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fp_inv.rs index 78bd6c866..b4a732cf7 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fp_inv.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fp_inv.rs @@ -15,15 +15,9 @@ cfg_if::cfg_if! { use lazy_static::lazy_static; use num_bigint::BigUint; - use super::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; + use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; - lazy_static! { - pub static ref P: BigUint = BigUint::parse_bytes( - b"fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f", - 16 - ) - .unwrap(); - } + use super::P; pub fn fcall_secp256k1_fp_inv(params: &[u64], results: &mut [u64]) -> i64 { // Get the input diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fp_sqrt.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fp_sqrt.rs similarity index 86% rename from ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fp_sqrt.rs rename to ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fp_sqrt.rs index dae86a0ba..2db5f3ce7 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1_fp_sqrt.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/fp_sqrt.rs @@ -1,29 +1,9 @@ use lazy_static::lazy_static; use num_bigint::BigUint; -use super::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; - -lazy_static! { - pub static ref P: BigUint = BigUint::parse_bytes( - b"fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f", - 16 - ) - .unwrap(); - - pub static ref P_HALF: BigUint = BigUint::parse_bytes( - b"7fffffffffffffffffffffffffffffffffffffffffffffffffffffff7ffffe17", - 16 - ) - .unwrap(); - - pub static ref P_DIV_4: BigUint = BigUint::parse_bytes( - b"3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c", - 16 - ) - .unwrap(); - - pub static ref NQR: BigUint = BigUint::from(3u64); // First non-quadratic residue in Fp -} +use crate::zisklib::fcalls_impl::utils::{biguint_from_u64_digits, n_u64_digits_from_biguint}; + +use super::{NQR, P, P_DIV_4}; pub fn fcall_secp256k1_fp_sqrt(params: &[u64], results: &mut [u64]) -> i64 { // Get the input @@ -36,7 +16,7 @@ pub fn fcall_secp256k1_fp_sqrt(params: &[u64], results: &mut [u64]) -> i64 { 5 } -fn secp256k1_fp_sqrt(a: &[u64; 4], parity: u64, results: &mut [u64]) { +pub fn secp256k1_fp_sqrt(a: &[u64; 4], parity: u64, results: &mut [u64]) { let a_big = biguint_from_u64_digits(a); // Attempt to compute the square root of a diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/mod.rs new file mode 100644 index 000000000..0343b8ad6 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256k1/mod.rs @@ -0,0 +1,11 @@ +mod constants; +mod ecdsa; +mod fn_inv; +mod fp_inv; +mod fp_sqrt; + +use constants::*; +pub use ecdsa::*; +pub use fn_inv::*; +pub use fp_inv::*; +pub use fp_sqrt::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/constants.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/constants.rs new file mode 100644 index 000000000..000521681 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/constants.rs @@ -0,0 +1,31 @@ +use lazy_static::lazy_static; +use num_bigint::BigUint; + +lazy_static! { + pub static ref P: BigUint = BigUint::parse_bytes( + b"ffffffff00000001000000000000000000000000ffffffffffffffffffffffff", + 16 + ) + .unwrap(); + pub static ref N: BigUint = BigUint::parse_bytes( + b"ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551", + 16 + ) + .unwrap(); +} + +pub const E_A: [u64; 4] = + [0xFFFF_FFFF_FFFF_FFFC, 0x0000_0000_FFFF_FFFF, 0x0000_0000_0000_0000, 0xFFFF_FFFF_0000_0001]; + +pub const IDENTITY: [u64; 8] = [0u64; 8]; + +pub const G: [u64; 8] = [ + 0xF4A1_3945_D898_C296, + 0x7703_7D81_2DEB_33A0, + 0xF8BC_E6E5_63A4_40F2, + 0x6B17_D1F2_E12C_4247, + 0xCBB6_4068_37BF_51F5, + 0x2BCE_3357_6B31_5ECE, + 0x8EE7_EB4A_7C0F_9E16, + 0x4FE3_42E2_FE1A_7F9B, +]; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/ecdsa.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/ecdsa.rs new file mode 100644 index 000000000..1cd2b3145 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/ecdsa.rs @@ -0,0 +1,297 @@ +use num_traits::Zero; + +use crate::zisklib::fcalls_impl::utils::{ + biguint_from_u64, biguint_from_u64_digits, n_u64_digits_from_biguint, +}; + +use super::constants::{E_A, G, IDENTITY, N, P}; + +pub fn fcall_secp256r1_ecdsa_verify(params: &[u64], results: &mut [u64]) -> i64 { + // Get the input + let pk: &[u64; 8] = ¶ms[0..8].try_into().unwrap(); + let z: &[u64; 4] = ¶ms[8..12].try_into().unwrap(); + let r: &[u64; 4] = ¶ms[12..16].try_into().unwrap(); + let s: &[u64; 4] = ¶ms[16..20].try_into().unwrap(); + + // Get the curve point P + let p = secp256r1_ecdsa_verify(pk, z, r, s); + + // Store the result + results[0..8].copy_from_slice(&p); + + 8 +} + +fn secp256r1_ecdsa_verify(pk: &[u64; 8], z: &[u64; 4], r: &[u64; 4], s: &[u64; 4]) -> [u64; 8] { + // Given the public key pk and the signature (r, s) over the message hash z: + // 1. Computes s_inv = s⁻¹ mod n + // 2. Computes u1 = z·s_inv mod n + // 3. Computes u2 = r·s_inv mod n + // 4. Computes and returns the curve point p = u1·G + u2·PK + let s_inv = secp256r1_fn_inv(s); + let u1 = secp256r1_fn_mul(z, &s_inv); + let u2 = secp256r1_fn_mul(r, &s_inv); + secp256r1_curve_dbl_scalar_mul(&u1, &G, &u2, pk) +} + +fn secp256r1_fn_mul(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let product = (a_big * b_big) % &*N; + n_u64_digits_from_biguint(&product) +} + +fn secp256r1_fn_inv(a: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let inv = a_big.modinv(&N); + match inv { + Some(inverse) => n_u64_digits_from_biguint(&inverse), + None => panic!("Inverse does not exist"), + } +} + +fn secp256r1_fp_add(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let sum = (a_big + b_big) % &*P; + n_u64_digits_from_biguint(&sum) +} + +fn secp256r1_fp_sub(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let diff = if a_big >= b_big { a_big - b_big } else { (a_big + &*P) - b_big }; + n_u64_digits_from_biguint(&diff) +} + +fn secp256r1_fp_scalar_mul(a: &[u64; 4], scalar: u64) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let scalar_big = biguint_from_u64(scalar); + let product = (a_big * scalar_big) % &*P; + n_u64_digits_from_biguint(&product) +} + +fn secp256r1_fp_mul(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let b_big = biguint_from_u64_digits(b); + let product = (a_big * b_big) % &*P; + n_u64_digits_from_biguint(&product) +} + +fn secp256r1_fp_square(a: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let square = (a_big.clone() * a_big) % &*P; + n_u64_digits_from_biguint(&square) +} + +fn secp256r1_fp_inv(a: &[u64; 4]) -> [u64; 4] { + let a_big = biguint_from_u64_digits(a); + let inv = a_big.modinv(&P); + match inv { + Some(inverse) => n_u64_digits_from_biguint(&inverse), + None => panic!("Inverse does not exist"), + } +} + +fn secp256r1_curve_add(p: &[u64; 8], q: &[u64; 8]) -> [u64; 8] { + let x1: &[u64; 4] = &p[0..4].try_into().unwrap(); + let y1: &[u64; 4] = &p[4..8].try_into().unwrap(); + let x2: &[u64; 4] = &q[0..4].try_into().unwrap(); + let y2: &[u64; 4] = &q[4..8].try_into().unwrap(); + + if x1 == x2 { + if y1 == y2 { + return secp256r1_curve_dbl(p); + } else { + return IDENTITY; + } + } + + if p == &IDENTITY { + return *q; + } else if q == &IDENTITY { + return *p; + } + + let lambda = { + let y2_minus_y1 = secp256r1_fp_sub(y2, y1); + let x2_minus_x1 = secp256r1_fp_sub(x2, x1); + let x2_minus_x1_inv = secp256r1_fp_inv(&x2_minus_x1); + secp256r1_fp_mul(&y2_minus_y1, &x2_minus_x1_inv) + }; + + let x3 = { + let lambda_sq = secp256r1_fp_square(&lambda); + let x1_plus_x2 = secp256r1_fp_add(x1, x2); + secp256r1_fp_sub(&lambda_sq, &x1_plus_x2) + }; + + let y3 = { + let lambda_x1_minus_x3 = { + let x1_minus_x3 = secp256r1_fp_sub(x1, &x3); + secp256r1_fp_mul(&lambda, &x1_minus_x3) + }; + secp256r1_fp_sub(&lambda_x1_minus_x3, y1) + }; + + let mut result = [0u64; 8]; + result[0..4].copy_from_slice(&x3); + result[4..8].copy_from_slice(&y3); + result +} + +fn secp256r1_curve_dbl(p: &[u64; 8]) -> [u64; 8] { + if p == &IDENTITY { + return *p; + } + + let x: &[u64; 4] = &p[0..4].try_into().unwrap(); + let y: &[u64; 4] = &p[4..8].try_into().unwrap(); + + let lambda = { + let three_x1_sq = { + let x1_sq = secp256r1_fp_square(x); + secp256r1_fp_scalar_mul(&x1_sq, 3) + }; + let num = secp256r1_fp_add(&three_x1_sq, &E_A); + + let two_y1 = secp256r1_fp_scalar_mul(y, 2); + let den = secp256r1_fp_inv(&two_y1); + + secp256r1_fp_mul(&num, &den) + }; + + let x3 = { + let lambda_sq = secp256r1_fp_square(&lambda); + let two_x1 = secp256r1_fp_scalar_mul(x, 2); + secp256r1_fp_sub(&lambda_sq, &two_x1) + }; + + let y3 = { + let lambda_x1_minus_x3 = { + let x1_minus_x3 = secp256r1_fp_sub(x, &x3); + secp256r1_fp_mul(&lambda, &x1_minus_x3) + }; + secp256r1_fp_sub(&lambda_x1_minus_x3, y) + }; + + let mut result = [0u64; 8]; + result[0..4].copy_from_slice(&x3); + result[4..8].copy_from_slice(&y3); + result +} + +fn secp256r1_curve_dbl_scalar_mul( + k1: &[u64; 4], + p1: &[u64; 8], + k2: &[u64; 4], + p2: &[u64; 8], +) -> [u64; 8] { + let mut r = IDENTITY; + for i in (0..256).rev() { + r = secp256r1_curve_dbl(&r); + + let k1_bit = (k1[i / 64] >> (i % 64)) & 1; + let k2_bit = (k2[i / 64] >> (i % 64)) & 1; + + if k1_bit == 1 { + r = secp256r1_curve_add(&r, p1); + } + if k2_bit == 1 { + r = secp256r1_curve_add(&r, p2); + } + } + + r +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dbl_scalar_mul() { + // 0 * IDENTITY + 0 * IDENTITY = IDENTITY + let k1 = [0u64; 4]; + let p1 = IDENTITY; + let k2 = [0u64; 4]; + let p2 = IDENTITY; + + let result = secp256r1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + assert_eq!(result, IDENTITY); + + // 1 * G + 0 * IDENTITY = G + let k1 = [1u64, 0, 0, 0]; + let p1 = G; + let k2 = [0u64; 4]; + let p2 = IDENTITY; + + let result = secp256r1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + assert_eq!(result, G); + + // 0 * IDENTITY + 1 * G = G + let k1 = [0u64; 4]; + let p1 = IDENTITY; + let k2 = [1u64, 0, 0, 0]; + let p2 = G; + + let result = secp256r1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + assert_eq!(result, G); + + // 2 * G + 3 * G = 5 * G + let k1 = [2u64, 0, 0, 0]; + let p1 = G; + let k2 = [3u64, 0, 0, 0]; + let p2 = G; + + let result = secp256r1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + let expected = [ + 0x21554a0dc3d033ed, + 0xef8c82fd1f5be524, + 0xd784c85608668fdf, + 0x51590b7a515140d2, + 0xd1d0bb44fda16da4, + 0xd012f00d4d80888, + 0x8ae1bf36bf8a7926, + 0xe0c17da8904a727d, + ]; + assert_eq!(result, expected); + + // Random test + let k1 = [0xc4bed2f1f47f9a54, 0x9cd109ce498a9b95, 0xd9d5232066758816, 0xf3b0020b50fafcfe]; + let p1 = [ + 0x3c86442bafe51c41, + 0xa709f983d1ad2017, + 0x503d3c4c7699e29f, + 0x51f730041a088667, + 0xb4c365119c4d3bfc, + 0x41f620cca7b9001f, + 0xeb5025341faef867, + 0xf55cbe6ac6ff94ce, + ]; + let k2 = [0xb652a5b177426eaa, 0xe44bcf080ef8aaf7, 0x3966826b0d4eb5f5, 0xe33606d47d23f70a]; + let p2 = [ + 0x8ba8cddeb162e15b, + 0xb33b65b9a6c8945c, + 0x7480c2cff5cea8e0, + 0x3393c7d67a51330d, + 0xf1d29bdb9ed24e90, + 0x5da65af891bf0b50, + 0x99cc7a2be908e44e, + 0x8de4594f14dc559d, + ]; + + let result = secp256r1_curve_dbl_scalar_mul(&k1, &p1, &k2, &p2); + let expected = [ + 0xa57ec301274eaa5c, + 0x7f2d4f49c426a01b, + 0x910612a4889b8c13, + 0x4436050010e76a1e, + 0x2cd45c4320036102, + 0xc2d5e53a2316da0a, + 0x76355a97180de3fe, + 0xd15d039ba7950631, + ]; + assert_eq!(result, expected); + } +} diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/mod.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/mod.rs new file mode 100644 index 000000000..4799a59ba --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/secp256r1/mod.rs @@ -0,0 +1,5 @@ +mod constants; +mod ecdsa; + +use constants::*; +pub use ecdsa::*; diff --git a/ziskos/entrypoint/src/zisklib/fcalls_impl/utils.rs b/ziskos/entrypoint/src/zisklib/fcalls_impl/utils.rs index a6254459d..2e650707f 100644 --- a/ziskos/entrypoint/src/zisklib/fcalls_impl/utils.rs +++ b/ziskos/entrypoint/src/zisklib/fcalls_impl/utils.rs @@ -1,6 +1,10 @@ use num_bigint::BigUint; use num_traits::Zero; +pub fn biguint_from_u64(value: u64) -> BigUint { + BigUint::from(value) +} + pub fn biguint_from_u64_digits(limbs: &[u64]) -> BigUint { limbs.iter().rev().fold(BigUint::zero(), |acc, &limb| (acc << 64) + BigUint::from(limb)) } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/add_agtb.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/add_agtb.rs index 6b4d354b6..778e0bb9a 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/add_agtb.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/add_agtb.rs @@ -11,7 +11,12 @@ use super::U256; /// /// # Returns /// The number of limbs in the result -pub fn add_agtb(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { +pub fn add_agtb( + a: &[U256], + b: &[U256], + out: &mut [U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> usize { let len_a = a.len(); let len_b = b.len(); #[cfg(debug_assertions)] @@ -33,7 +38,11 @@ pub fn add_agtb(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { cin: 0, c: out[0].as_limbs_mut(), }; - let mut carry = syscall_add256(&mut params); + let mut carry = syscall_add256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); for i in 1..len_b { // Compute a[i] + b[i] + carry @@ -43,7 +52,11 @@ pub fn add_agtb(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { cin: carry, c: out[i].as_limbs_mut(), }; - carry = syscall_add256(&mut params); + carry = syscall_add256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); } for i in len_b..len_a { @@ -55,7 +68,11 @@ pub fn add_agtb(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { cin: 1, c: out[i].as_limbs_mut(), }; - carry = syscall_add256(&mut params); + carry = syscall_add256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); } else { // Directly copy a[i] to out[i] out[i] = a[i]; diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/add_short.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/add_short.rs index 2bf2afd4a..3d7e5e00c 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/add_short.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/add_short.rs @@ -11,7 +11,12 @@ use super::U256; /// /// # Returns /// The number of limbs in the result -pub fn add_short(a: &[U256], b: &U256, out: &mut [U256]) -> usize { +pub fn add_short( + a: &[U256], + b: &U256, + out: &mut [U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> usize { let len_a = a.len(); #[cfg(debug_assertions)] { @@ -28,7 +33,11 @@ pub fn add_short(a: &[U256], b: &U256, out: &mut [U256]) -> usize { cin: 0, c: out[0].as_limbs_mut(), }; - let mut carry = syscall_add256(&mut params); + let mut carry = syscall_add256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); for i in 1..len_a { if carry == 1 { @@ -39,7 +48,11 @@ pub fn add_short(a: &[U256], b: &U256, out: &mut [U256]) -> usize { cin: 1, c: out[i].as_limbs_mut(), }; - carry = syscall_add256(&mut params); + carry = syscall_add256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); } else { // Directly copy a[i] to out[i] out[i] = a[i]; diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/div_long.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/div_long.rs index b29b59b65..b414b5a46 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/div_long.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/div_long.rs @@ -16,7 +16,11 @@ use super::{add_agtb, mul_long, U256}; /// /// # Note /// Not optimal for `len(b) == 1`, use `div_short` instead -pub fn div_long(a: &[U256], b: &[U256]) -> (Vec, Vec) { +pub fn div_long( + a: &[U256], + b: &[U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> (Vec, Vec) { let len_a = a.len(); let len_b = b.len(); #[cfg(debug_assertions)] @@ -45,7 +49,14 @@ pub fn div_long(a: &[U256], b: &[U256]) -> (Vec, Vec) { // Hint the quotient and remainder let mut quo_flat = vec![0u64; len_a * 4]; let mut rem_flat = vec![0u64; len_b * 4]; - let (limbs_quo, limbs_rem) = fcall_division(a_flat, b_flat, &mut quo_flat, &mut rem_flat); + let (limbs_quo, limbs_rem) = fcall_division( + a_flat, + b_flat, + &mut quo_flat, + &mut rem_flat, + #[cfg(feature = "hints")] + hints, + ); let quo = U256::flat_to_slice(&quo_flat[..limbs_quo]); let rem = U256::flat_to_slice(&rem_flat[..limbs_rem]); @@ -66,8 +77,14 @@ pub fn div_long(a: &[U256], b: &[U256]) -> (Vec, Vec) { assert!(!quo[len_quo - 1].is_zero(), "Quotient must not have leading zeros"); // Multiply the quotient by b - let mut q_b = vec![U256::ZERO; len_a + 1]; // The +1 is because mul_long and add_agtb are a general purpose functions - let q_b_len = mul_long(quo, b, &mut q_b); + let mut q_b = vec![U256::ZERO; len_a + 1]; // The +1 is because mul_long is a general purpose function + let q_b_len = mul_long( + quo, + b, + &mut q_b, + #[cfg(feature = "hints")] + hints, + ); // Check 1 <= len(r) let len_rem = rem.len(); @@ -75,14 +92,20 @@ pub fn div_long(a: &[U256], b: &[U256]) -> (Vec, Vec) { if rem[len_rem - 1].is_zero() { // If the remainder is zero, then a must be equal to q·b - assert!(U256::eq_slices(a, &q_b), "Remainder is zero, but a != q·b"); + assert!(U256::eq_slices(a, &q_b[..q_b_len]), "Remainder is zero, but a != q·b"); } else { // If the remainder is non-zero, then we should check that a must be equal to q·b + r and r < b assert!(U256::lt_slices(rem, b), "Remainder must be less than divisor"); - let mut q_b_r = vec![U256::ZERO; len_a + 1]; // The +1 is because mul_long and add_agtb are a general purpose functions - let q_b_r_len = add_agtb(&q_b[..q_b_len], rem, &mut q_b_r); + let mut q_b_r = vec![U256::ZERO; len_a + 1]; // The +1 is because add_agtb is a general purpose function + let q_b_r_len = add_agtb( + &q_b[..q_b_len], + rem, + &mut q_b_r, + #[cfg(feature = "hints")] + hints, + ); assert!(U256::eq_slices(a, &q_b_r[..q_b_r_len]), "a != q·b + r"); } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/div_short.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/div_short.rs index 6080484c7..0b78a017a 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/div_short.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/div_short.rs @@ -11,7 +11,11 @@ use super::{add_short, mul_short, U256}; /// /// # Returns /// A tuple of (quotient, remainder) where a = q × b + r -pub fn div_short(a: &[U256], b: &U256) -> (Vec, U256) { +pub fn div_short( + a: &[U256], + b: &U256, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> (Vec, U256) { let len_a = a.len(); #[cfg(debug_assertions)] { @@ -39,7 +43,14 @@ pub fn div_short(a: &[U256], b: &U256) -> (Vec, U256) { // Hint the quotient and remainder let mut quo_flat = vec![0u64; len_a * 4]; let mut rem_flat = [0u64; 4]; - let (limbs_quo, _) = fcall_division(a_flat, b.as_limbs(), &mut quo_flat, &mut rem_flat); + let (limbs_quo, _) = fcall_division( + a_flat, + b.as_limbs(), + &mut quo_flat, + &mut rem_flat, + #[cfg(feature = "hints")] + hints, + ); let quo = U256::flat_to_slice(&quo_flat[..limbs_quo]); let rem = U256::from_u64s(&rem_flat); @@ -52,8 +63,14 @@ pub fn div_short(a: &[U256], b: &U256) -> (Vec, U256) { assert!(!quo[len_quo - 1].is_zero(), "Quotient must not have leading zeros"); // Multiply the quotient by b - let mut q_b = [U256::ZERO; 2]; - let q_b_len = mul_short(quo, b, &mut q_b); + let mut q_b = vec![U256::ZERO; len_a + 1]; // The +1 is because mul_short is a general purpose function + let q_b_len = mul_short( + quo, + b, + &mut q_b, + #[cfg(feature = "hints")] + hints, + ); if rem.is_zero() { // If the remainder is zero, then a must be equal to q·b @@ -62,8 +79,14 @@ pub fn div_short(a: &[U256], b: &U256) -> (Vec, U256) { // If the remainder is non-zero, then we should check that a must be equal to q·b + r and r < b assert!(rem.lt(b), "Remainder must be less than divisor"); - let mut q_b_r = [U256::ZERO; 2]; - let q_b_r_len = add_short(&q_b[..q_b_len], &rem, &mut q_b_r); + let mut q_b_r = vec![U256::ZERO; len_a + 1]; // The +1 is because add_short is a general purpose function + let q_b_r_len = add_short( + &q_b[..q_b_len], + &rem, + &mut q_b_r, + #[cfg(feature = "hints")] + hints, + ); assert!(U256::eq_slices(a, &q_b_r[..q_b_r_len]), "a != q·b + r"); } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/modexp.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/modexp.rs index 806247569..41cdf3e40 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/modexp.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/modexp.rs @@ -13,7 +13,12 @@ use super::{ /// Modular exponentiation of three large numbers /// /// It assumes that modulus > 0 and len(base),len(exp),len(modulus) > 0 -pub fn modexp(base: &[U256], exp: &[u64], modulus: &[U256]) -> Vec { +pub fn modexp( + base: &[U256], + exp: &[u64], + modulus: &[U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Vec { let len_b = base.len(); let len_e = exp.len(); let len_m = modulus.len(); @@ -36,6 +41,11 @@ pub fn modexp(base: &[U256], exp: &[u64], modulus: &[U256]) -> Vec { } } + // If modulus == 0, return zeros + if len_m == 1 && modulus[0].is_zero() { + return vec![U256::ZERO]; + } + // If modulus == 1, then base^exp (mod 1) is always 0 if len_m == 1 && modulus[0].is_one() { return vec![U256::ZERO]; @@ -59,169 +69,276 @@ pub fn modexp(base: &[U256], exp: &[u64], modulus: &[U256]) -> Vec { } // We can assume from now on that base,modulus > 1 and exp > 0 - - // There are two versions: - // - If len(modulus) == 1, we can use short reductions - // - If len(modulus) > 1, we must use long reductions if len_m == 1 { - let modulus = &modulus[0]; - - // Compute base = base (mod modulus) - let base = rem_short_init(base, modulus); - - // Hint exponent bits - let (len, bits) = fcall_bin_decomp(exp); - - // We should recompose the exponent from bits to verify correctness - let mut rec_exp = vec![0u64; len_e]; - - // Recompose the MSB - let bits_pos = len - 1; - let limb_idx = bits_pos / 64; - let bit_in_limb = bits_pos % 64; - rec_exp[limb_idx] = 1u64 << bit_in_limb; - - // Scratch space - let mut scratch = ShortScratch::new(); - - // Initialize out = base - let mut out = base; - for (bit_idx, &bit) in bits.iter().enumerate().skip(1) { - if out.is_zero() { - return vec![U256::ZERO]; - } - - // Compute out = out² (mod modulus) - out = square_and_reduce_short(&out, modulus, &mut scratch); - - if bit == 1 { - // Compute out = (out * base) (mod modulus); - out = mul_and_reduce_short(&out, &base, modulus, &mut scratch); - - // Recompose the exponent - let bits_pos = len - 1 - bit_idx; - let limb_idx = bits_pos / 64; - let bit_in_limb = bits_pos % 64; - rec_exp[limb_idx] |= 1u64 << bit_in_limb; - } - } - - assert_eq!(rec_exp[..], *exp, "Exponent decomposition mismatch"); - - vec![out] + modexp_short( + base, + exp, + &modulus[0], + #[cfg(feature = "hints")] + hints, + ) } else { - // Compute base = base (mod modulus) - let base = rem_long_init(base, modulus); - - // Hint exponent bits - let (len, bits) = fcall_bin_decomp(exp); - - // We should recompose the exponent from bits to verify correctness - let mut rec_exp = vec![0u64; len_e]; - - // Recompose the MSB - let bits_pos = len - 1; - let limb_idx = bits_pos / 64; - let bit_in_limb = bits_pos % 64; - rec_exp[limb_idx] = 1u64 << bit_in_limb; + modexp_long( + base, + exp, + modulus, + #[cfg(feature = "hints")] + hints, + ) + } +} - // Scratch space - let mut scratch = LongScratch::new(len_m); +/// Short modexp when modulus fits in a single U256 +fn modexp_short( + base: &[U256], + exp: &[u64], + modulus: &U256, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Vec { + let len_e = exp.len(); - // Initialize out = base - let mut out = base.clone(); - for (bit_idx, &bit) in bits.iter().enumerate().skip(1) { - if out.len() == 1 && out[0].is_zero() { - return vec![U256::ZERO]; - } + // Compute base = base (mod modulus) + let base = rem_short_init( + base, + modulus, + #[cfg(feature = "hints")] + hints, + ); + + // Hint exponent bits + let (len, bits) = fcall_bin_decomp( + exp, + #[cfg(feature = "hints")] + hints, + ); + + // We should recompose the exponent from bits to verify correctness + let mut rec_exp = vec![0u64; len_e]; + + // Recompose the MSB + let bits_pos = len - 1; + let limb_idx = bits_pos / 64; + let bit_in_limb = bits_pos % 64; + rec_exp[limb_idx] = 1u64 << bit_in_limb; + + // Scratch space + let mut scratch = ShortScratch::new(); + + // Initialize out = base + let mut out = base; + for (bit_idx, &bit) in bits.iter().enumerate().skip(1) { + if out.is_zero() { + return vec![U256::ZERO]; + } - // Compute out = out² (mod modulus) - out = square_and_reduce_long(&out, modulus, &mut scratch); - - if bit == 1 { - // Compute out = (out * base) (mod modulus); - out = mul_and_reduce_long(&out, &base, modulus, &mut scratch); - // Recompose the exponent - let bits_pos = len - 1 - bit_idx; - let limb_idx = bits_pos / 64; - let bit_in_limb = bits_pos % 64; - rec_exp[limb_idx] |= 1u64 << bit_in_limb; - } + // Compute out = out² (mod modulus) + out = square_and_reduce_short( + &out, + modulus, + &mut scratch, + #[cfg(feature = "hints")] + hints, + ); + + if bit == 1 { + // Compute out = (out * base) (mod modulus) + out = mul_and_reduce_short( + &out, + &base, + modulus, + &mut scratch, + #[cfg(feature = "hints")] + hints, + ); + + // Recompose the exponent + let bits_pos = len - 1 - bit_idx; + let limb_idx = bits_pos / 64; + let bit_in_limb = bits_pos % 64; + rec_exp[limb_idx] |= 1u64 << bit_in_limb; } + } - assert_eq!(rec_exp[..], *exp, "Exponent decomposition mismatch"); + assert_eq!(rec_exp[..], *exp, "Exponent decomposition mismatch"); - out - } + vec![out] } -pub fn modexp_u64(base: &[u64], exp: &[u64], modulus: &[u64]) -> Vec { - // Round up to multiple of 4 - let base_len = (base.len() + 3) & !3; - let modulus_len = (modulus.len() + 3) & !3; - - let mut base_padded = vec![0u64; base_len]; - let mut modulus_padded = vec![0u64; modulus_len]; +/// Long modexp when modulus requires multiple U256 limbs +fn modexp_long( + base: &[U256], + exp: &[u64], + modulus: &[U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Vec { + let len_e = exp.len(); + let len_m = modulus.len(); - base_padded[..base.len()].copy_from_slice(base); - modulus_padded[..modulus.len()].copy_from_slice(modulus); + // Compute base = base (mod modulus) + let base = rem_long_init( + base, + modulus, + #[cfg(feature = "hints")] + hints, + ); + + // Hint exponent bits + let (len, bits) = fcall_bin_decomp( + exp, + #[cfg(feature = "hints")] + hints, + ); + + // We should recompose the exponent from bits to verify correctness + let mut rec_exp = vec![0u64; len_e]; + + // Recompose the MSB + let bits_pos = len - 1; + let limb_idx = bits_pos / 64; + let bit_in_limb = bits_pos % 64; + rec_exp[limb_idx] = 1u64 << bit_in_limb; + + // Scratch space + let mut scratch = LongScratch::new(len_m); + + // Initialize out = base + let mut out = base.clone(); + for (bit_idx, &bit) in bits.iter().enumerate().skip(1) { + if out.len() == 1 && out[0].is_zero() { + return vec![U256::ZERO]; + } - // Convert u64 arrays to U256 chunks - let base_u256 = U256::flat_to_slice(&base_padded); - let modulus_u256 = U256::flat_to_slice(&modulus_padded); + // Compute out = out² (mod modulus) + out = square_and_reduce_long( + &out, + modulus, + &mut scratch, + #[cfg(feature = "hints")] + hints, + ); + + if bit == 1 { + // Compute out = (out * base) (mod modulus) + out = mul_and_reduce_long( + &out, + &base, + modulus, + &mut scratch, + #[cfg(feature = "hints")] + hints, + ); + + // Recompose the exponent + let bits_pos = len - 1 - bit_idx; + let limb_idx = bits_pos / 64; + let bit_in_limb = bits_pos % 64; + rec_exp[limb_idx] |= 1u64 << bit_in_limb; + } + } - // Call the main modexp function - let result_u256 = modexp(base_u256, exp, modulus_u256); + assert_eq!(rec_exp[..], *exp, "Exponent decomposition mismatch"); - // Convert result back to u64 array - U256::slice_to_flat(&result_u256).to_vec() + out } -/// Compute modular exponentiation of three large numbers +/// Compute modular exponentiation from big-endian byte arrays /// /// ### Safety /// /// The caller must ensure that: -/// - `base_ptr` points to an array of `base_len` u64 elements -/// - `exp_ptr` points to an array of `exp_len` u64 elements -/// - `modulus_ptr` points to an array of `modulus_len` u64 elements -/// - `result_ptr` points to an array of at least `modulus_len` u64 elements -#[no_mangle] -pub unsafe extern "C" fn modexp_u64_c( - base_ptr: *const u64, +/// - `base_ptr` points to an array of `base_len` bytes (big-endian) +/// - `exp_ptr` points to an array of `exp_len` bytes (big-endian) +/// - `modulus_ptr` points to an array of `modulus_len` bytes (big-endian) +/// - `result_ptr` points to an array of at least `modulus_len` bytes +/// +/// Returns the number of bytes written to `result_ptr` (always equals `modulus_len`, zero-padded) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_modexp_bytes_c")] +pub unsafe extern "C" fn modexp_bytes_c( + base_ptr: *const u8, base_len: usize, - exp_ptr: *const u64, + exp_ptr: *const u8, exp_len: usize, - modulus_ptr: *const u64, + modulus_ptr: *const u8, modulus_len: usize, - result_ptr: *mut u64, + result_ptr: *mut u8, + #[cfg(feature = "hints")] hints: &mut Vec, ) -> usize { - let base = std::slice::from_raw_parts(base_ptr, base_len); - let exp = std::slice::from_raw_parts(exp_ptr, exp_len); - let modulus = std::slice::from_raw_parts(modulus_ptr, modulus_len); + let base_bytes = std::slice::from_raw_parts(base_ptr, base_len); + let exp_bytes = std::slice::from_raw_parts(exp_ptr, exp_len); + let modulus_bytes = std::slice::from_raw_parts(modulus_ptr, modulus_len); + + // Convert from big-endian bytes to little-endian u64/U256 arrays + let base_u256 = bytes_be_to_u256_le(base_bytes); + let exp_u64 = bytes_be_to_u64_le(exp_bytes); + let modulus_u256 = bytes_be_to_u256_le(modulus_bytes); + + let result_u256 = modexp( + &base_u256, + &exp_u64, + &modulus_u256, + #[cfg(feature = "hints")] + hints, + ); + + // Convert result back to big-endian bytes with proper length + let result = std::slice::from_raw_parts_mut(result_ptr, modulus_len); + u256_le_to_bytes_be(&result_u256, result); - // Round up to multiple of 4 - let base_len = (base.len() + 3) & !3; - let modulus_len = (modulus.len() + 3) & !3; + modulus_len +} - let mut base_padded = vec![0u64; base_len]; - let mut modulus_padded = vec![0u64; modulus_len]; +/// Convert big-endian bytes to little-endian u64 array +fn bytes_be_to_u64_le(bytes: &[u8]) -> Vec { + if bytes.is_empty() { + return vec![0]; + } - base_padded[..base.len()].copy_from_slice(base); - modulus_padded[..modulus.len()].copy_from_slice(modulus); + // Skip leading zeros but keep at least one limb + let first_nonzero = bytes.iter().position(|&b| b != 0).unwrap_or(bytes.len() - 1); + let bytes = &bytes[first_nonzero..]; - // Convert u64 arrays to U256 chunks - let base_u256 = U256::flat_to_slice(&base_padded); - let modulus_u256 = U256::flat_to_slice(&modulus_padded); + if bytes.is_empty() { + return vec![0]; + } - // Call the main modexp function - let result_u256 = modexp(base_u256, exp, modulus_u256); - let result_slice = U256::slice_to_flat(&result_u256); - let result_len = result_slice.len(); + // Process bytes into u64 limbs + let num_limbs = bytes.len().div_ceil(8); + let mut result = vec![0u64; num_limbs]; + for (i, &byte) in bytes.iter().rev().enumerate() { + let limb_idx = i / 8; + let byte_idx = i % 8; + result[limb_idx] |= (byte as u64) << (byte_idx * 8); + } - // Convert result back to u64 array - let result = std::slice::from_raw_parts_mut(result_ptr, modulus_len); - result[..result_len].copy_from_slice(result_slice); + result +} + +/// Convert big-endian bytes to little-endian U256 array +fn bytes_be_to_u256_le(bytes: &[u8]) -> Vec { + let u64_le = bytes_be_to_u64_le(bytes); - result_len + // Pad to multiple of 4 u64s + let padded_len = u64_le.len().next_multiple_of(4); + let mut padded = vec![0u64; padded_len]; + padded[..u64_le.len()].copy_from_slice(&u64_le); + + U256::flat_to_slice(&padded).to_vec() +} + +/// Convert little-endian U256 array to big-endian bytes +fn u256_le_to_bytes_be(limbs: &[U256], output: &mut [u8]) { + let flat = U256::slice_to_flat(limbs); + let out_len = output.len(); + output.fill(0); + + for (i, &limb) in flat.iter().enumerate() { + for j in 0..8 { + let byte_val = ((limb >> (j * 8)) & 0xFF) as u8; + let pos_from_end = i * 8 + j; + if pos_from_end < out_len { + output[out_len - 1 - pos_from_end] = byte_val; + } + } + } } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_long.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_long.rs index a04cbed35..4a7046328 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_long.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_long.rs @@ -16,7 +16,12 @@ use super::{mul_short, rem_long, LongScratch, U256}; /// /// # Note /// Not optimal for `len(b) == 1`, use `mul_short` instead -pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { +pub fn mul_long( + a: &[U256], + b: &[U256], + out: &mut [U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> usize { let len_a = a.len(); let len_b = b.len(); #[cfg(debug_assertions)] @@ -39,7 +44,11 @@ pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { dl: out[0].as_limbs_mut(), dh: &mut [0, 0, 0, 0], }; - syscall_arith256(&mut params); + syscall_arith256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); // Propagate the carry out[1] = U256::from_u64s(params.dh); @@ -55,7 +64,11 @@ pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { dl: out[j].as_limbs_mut(), dh: &mut [0, 0, 0, 0], }; - syscall_arith256(&mut params); + syscall_arith256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); // Propagate the carry out[j + 1] = U256::from_u64s(params.dh); @@ -74,7 +87,11 @@ pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { dl: &mut [0, 0, 0, 0], dh: &mut [0, 0, 0, 0], }; - syscall_arith256(&mut params_arith); + syscall_arith256( + &mut params_arith, + #[cfg(feature = "hints")] + hints, + ); // Set the result out[i + j] = U256::from_u64s(params_arith.dl); @@ -86,7 +103,11 @@ pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { cin: 1, c: params_arith.dh, }; - let _carry = syscall_add256(&mut params_add); + let _carry = syscall_add256( + &mut params_add, + #[cfg(feature = "hints")] + hints, + ); debug_assert!(_carry == 0, "Unexpected carry in intermediate addition"); } @@ -99,7 +120,11 @@ pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { cin: 0, c: out[i + j + 1].as_limbs_mut(), }; - carry_flag = syscall_add256(&mut params_add); + carry_flag = syscall_add256( + &mut params_add, + #[cfg(feature = "hints")] + hints, + ); } // Last chunk isolated @@ -113,7 +138,11 @@ pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { dl: out[i + len_b - 1].as_limbs_mut(), dh: &mut [0, 0, 0, 0], }; - syscall_arith256(&mut params_arith); + syscall_arith256( + &mut params_arith, + #[cfg(feature = "hints")] + hints, + ); if carry_flag == 1 { let a_in = *params_arith.dh; @@ -123,7 +152,11 @@ pub fn mul_long(a: &[U256], b: &[U256], out: &mut [U256]) -> usize { cin: 1, c: params_arith.dh, }; - let _carry = syscall_add256(&mut params_add); + let _carry = syscall_add256( + &mut params_add, + #[cfg(feature = "hints")] + hints, + ); debug_assert!(_carry == 0, "Unexpected carry in intermediate addition"); } @@ -153,6 +186,7 @@ pub fn mul_and_reduce_long( b: &[U256], modulus: &[U256], scratch: &mut LongScratch, + #[cfg(feature = "hints")] hints: &mut Vec, ) -> Vec { #[cfg(debug_assertions)] { @@ -162,10 +196,28 @@ pub fn mul_and_reduce_long( } let mul_len = if b.len() == 1 { - mul_short(a, &b[0], &mut scratch.mul) + mul_short( + a, + &b[0], + &mut scratch.mul, + #[cfg(feature = "hints")] + hints, + ) } else { - mul_long(a, b, &mut scratch.mul) + mul_long( + a, + b, + &mut scratch.mul, + #[cfg(feature = "hints")] + hints, + ) }; - rem_long(&scratch.mul[..mul_len], modulus, &mut scratch.rem) + rem_long( + &scratch.mul[..mul_len], + modulus, + &mut scratch.rem, + #[cfg(feature = "hints")] + hints, + ) } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_short.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_short.rs index eb98c333e..8c6c3a49f 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_short.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/mul_short.rs @@ -11,7 +11,12 @@ use super::{rem_short, ShortScratch, U256}; /// /// # Returns /// The number of limbs in the result -pub fn mul_short(a: &[U256], b: &U256, out: &mut [U256]) -> usize { +pub fn mul_short( + a: &[U256], + b: &U256, + out: &mut [U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> usize { let len_a = a.len(); #[cfg(debug_assertions)] { @@ -32,7 +37,11 @@ pub fn mul_short(a: &[U256], b: &U256, out: &mut [U256]) -> usize { dl: out[i].as_limbs_mut(), dh: carry.as_limbs_mut(), }; - syscall_arith256(&mut params); + syscall_arith256( + &mut params, + #[cfg(feature = "hints")] + hints, + ); } if carry.is_zero() { @@ -47,7 +56,11 @@ pub fn mul_short(a: &[U256], b: &U256, out: &mut [U256]) -> usize { /// /// # Returns /// A tuple of (result array, number of limbs used) -pub fn mul_short_one_limb(a: &U256, b: &U256) -> ([U256; 2], usize) { +pub fn mul_short_one_limb( + a: &U256, + b: &U256, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([U256; 2], usize) { let mut out = [U256::ZERO; 2]; // Compute a * b @@ -59,7 +72,11 @@ pub fn mul_short_one_limb(a: &U256, b: &U256) -> ([U256; 2], usize) { dl: out[0].as_limbs_mut(), dh: &mut dh, }; - syscall_arith256(&mut mul_params); + syscall_arith256( + &mut mul_params, + #[cfg(feature = "hints")] + hints, + ); let len = if dh == [0u64; 4] { 1 @@ -83,13 +100,25 @@ pub fn mul_and_reduce_short( b: &U256, modulus: &U256, scratch: &mut ShortScratch, + #[cfg(feature = "hints")] hints: &mut Vec, ) -> U256 { #[cfg(debug_assertions)] { assert!(!modulus.is_zero(), "Input 'modulus' must not be zero"); } - let (mul, len) = mul_short_one_limb(a, b); + let (mul, len) = mul_short_one_limb( + a, + b, + #[cfg(feature = "hints")] + hints, + ); - rem_short(&mul[..len], modulus, scratch) + rem_short( + &mul[..len], + modulus, + scratch, + #[cfg(feature = "hints")] + hints, + ) } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_long.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_long.rs index a1665755b..ba9641bc0 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_long.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_long.rs @@ -17,7 +17,11 @@ use super::{add_agtb, mul_long, RemLongScratch, U256}; /// # Note /// Use this for the first reduction when `a` can be arbitrarily large. /// For subsequent reductions in a loop, use `rem_long` with scratch space. -pub fn rem_long_init(a: &[U256], b: &[U256]) -> Vec { +pub fn rem_long_init( + a: &[U256], + b: &[U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Vec { let len_a = a.len(); let len_b = b.len(); #[cfg(debug_assertions)] @@ -46,14 +50,30 @@ pub fn rem_long_init(a: &[U256], b: &[U256]) -> Vec { // Hint the quotient and remainder let mut quo_flat = vec![0u64; len_a * 4]; let mut rem_flat = vec![0u64; len_b * 4]; - let (limbs_quo, limbs_rem) = fcall_division(a_flat, b_flat, &mut quo_flat, &mut rem_flat); + let (limbs_quo, limbs_rem) = fcall_division( + a_flat, + b_flat, + &mut quo_flat, + &mut rem_flat, + #[cfg(feature = "hints")] + hints, + ); let quo = U256::flat_to_slice(&quo_flat[..limbs_quo]); let rem = U256::flat_to_slice(&rem_flat[..limbs_rem]); // Verify the division let mut q_b = vec![U256::ZERO; len_a + 1]; // The +1 is because mul_long and add_agtb are a general purpose functions let mut q_b_r = vec![U256::ZERO; len_a + 1]; - verify_division(a, b, quo, rem, &mut q_b, &mut q_b_r); + verify_division( + a, + b, + quo, + rem, + &mut q_b, + &mut q_b_r, + #[cfg(feature = "hints")] + hints, + ); rem.to_vec() } @@ -70,7 +90,12 @@ pub fn rem_long_init(a: &[U256], b: &[U256]) -> Vec { /// /// # Note /// Not optimal for `len(b) == 1`, use `rem_short` instead -pub fn rem_long(a: &[U256], b: &[U256], scratch: &mut RemLongScratch) -> Vec { +pub fn rem_long( + a: &[U256], + b: &[U256], + scratch: &mut RemLongScratch, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Vec { #[cfg(debug_assertions)] { let len_a = a.len(); @@ -97,12 +122,28 @@ pub fn rem_long(a: &[U256], b: &[U256], scratch: &mut RemLongScratch) -> Vec, ) { let len_a = a.len(); let len_b = b.len(); @@ -136,7 +178,13 @@ fn verify_division( assert!(!quo[len_quo - 1].is_zero(), "Quotient must not have leading zeros"); // Multiply the quotient by b - let q_b_len = mul_long(quo, b, q_b); + let q_b_len = mul_long( + quo, + b, + q_b, + #[cfg(feature = "hints")] + hints, + ); // Check 1 <= len(r) assert!(len_rem > 0, "Remainder must have at least one limb"); @@ -149,7 +197,13 @@ fn verify_division( assert!(U256::lt_slices(rem, b), "Remainder must be less than divisor"); - let q_b_r_len = add_agtb(&q_b[..q_b_len], rem, q_b_r); + let q_b_r_len = add_agtb( + &q_b[..q_b_len], + rem, + q_b_r, + #[cfg(feature = "hints")] + hints, + ); assert!(U256::eq_slices(a, &q_b_r[..q_b_r_len]), "a != q·b + r"); } } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_short.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_short.rs index f25f041d7..37a7e03cf 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_short.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/rem_short.rs @@ -15,7 +15,11 @@ use super::{add_short, mul_short, ShortScratch, U256}; /// # Note /// Use this for the first reduction when `a` can be arbitrarily large. /// For subsequent reductions in a loop, use `rem_short` with scratch space. -pub fn rem_short_init(a: &[U256], b: &U256) -> U256 { +pub fn rem_short_init( + a: &[U256], + b: &U256, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> U256 { let len_a = a.len(); #[cfg(debug_assertions)] { @@ -43,14 +47,30 @@ pub fn rem_short_init(a: &[U256], b: &U256) -> U256 { // Hint the quotient and remainder let mut quo_flat = vec![0u64; len_a * 4]; let mut rem_flat = [0u64; 4]; - let (limbs_quo, _) = fcall_division(a_flat, b.as_limbs(), &mut quo_flat, &mut rem_flat); + let (limbs_quo, _) = fcall_division( + a_flat, + b.as_limbs(), + &mut quo_flat, + &mut rem_flat, + #[cfg(feature = "hints")] + hints, + ); let quo = U256::flat_to_slice(&quo_flat[..limbs_quo]); let rem = U256::from_u64s(&rem_flat); // Verify the division let mut q_b = vec![U256::ZERO; len_a + 1]; // The +1 is because mul_long and add_agtb are a general purpose functions let mut q_b_r = vec![U256::ZERO; len_a + 1]; - verify_division(a, b, quo, &rem, &mut q_b, &mut q_b_r); + verify_division( + a, + b, + quo, + &rem, + &mut q_b, + &mut q_b_r, + #[cfg(feature = "hints")] + hints, + ); rem } @@ -64,7 +84,12 @@ pub fn rem_short_init(a: &[U256], b: &U256) -> U256 { /// /// # Returns /// The remainder: a mod b -pub fn rem_short(a: &[U256], b: &U256, scratch: &mut ShortScratch) -> U256 { +pub fn rem_short( + a: &[U256], + b: &U256, + scratch: &mut ShortScratch, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> U256 { let len_a = a.len(); #[cfg(debug_assertions)] { @@ -90,12 +115,28 @@ pub fn rem_short(a: &[U256], b: &U256, scratch: &mut ShortScratch) -> U256 { let a_flat = U256::slice_to_flat(a); // Hint the quotient and remainder - let (limbs_quo, _) = fcall_division(a_flat, b.as_limbs(), &mut scratch.quo, &mut scratch.rem); + let (limbs_quo, _) = fcall_division( + a_flat, + b.as_limbs(), + &mut scratch.quo, + &mut scratch.rem, + #[cfg(feature = "hints")] + hints, + ); let quo = U256::flat_to_slice(&scratch.quo[..limbs_quo]); let rem = U256::from_u64s(&scratch.rem); // Verify the division - verify_division(a, b, quo, &rem, &mut scratch.q_b, &mut scratch.q_b_r); + verify_division( + a, + b, + quo, + &rem, + &mut scratch.q_b, + &mut scratch.q_b_r, + #[cfg(feature = "hints")] + hints, + ); rem } @@ -109,6 +150,7 @@ fn verify_division( rem: &U256, q_b: &mut [U256], q_b_r: &mut [U256], + #[cfg(feature = "hints")] hints: &mut Vec, ) { let len_a = a.len(); let len_quo = quo.len(); @@ -119,7 +161,13 @@ fn verify_division( assert!(!quo[len_quo - 1].is_zero(), "Quotient must not have leading zeros"); // Multiply the quotient by b - let q_b_len = mul_short(quo, b, q_b); + let q_b_len = mul_short( + quo, + b, + q_b, + #[cfg(feature = "hints")] + hints, + ); if rem.is_zero() { // If the remainder is zero, then a must be equal to q·b @@ -128,7 +176,13 @@ fn verify_division( // If the remainder is non-zero, then we should check that a must be equal to q·b + r and r < b assert!(rem.lt(b), "Remainder must be less than divisor"); - let q_b_r_len = add_short(&q_b[..q_b_len], rem, q_b_r); + let q_b_r_len = add_short( + &q_b[..q_b_len], + rem, + q_b_r, + #[cfg(feature = "hints")] + hints, + ); assert!(U256::eq_slices(a, &q_b_r[..q_b_r_len]), "a != q·b + r"); } } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/square_long.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/square_long.rs index 7f560bdae..4dd9df42f 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/square_long.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/square_long.rs @@ -16,7 +16,11 @@ use super::{rem_long, LongScratch, U256}; /// /// # Note /// Not optimal for `len(a) == 1`, use `square_short` instead -pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { +pub fn square_long( + a: &[U256], + out: &mut [U256], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> usize { // a3 a2 a1 a0 // * a3 a2 a1 a0 // ------------------------------------------------------- 0 @@ -51,7 +55,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { dl: out[2 * i].as_limbs_mut(), dh: &mut [0, 0, 0, 0], }; - syscall_arith256(&mut ai_ai); + syscall_arith256( + &mut ai_ai, + #[cfg(feature = "hints")] + hints, + ); out[2 * i + 1] = U256::from_u64s(ai_ai.dh); } @@ -67,19 +75,31 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { dl: &mut [0, 0, 0, 0], dh: &mut [0, 0, 0, 0], }; - syscall_arith256(&mut ai_aj); + syscall_arith256( + &mut ai_aj, + #[cfg(feature = "hints")] + hints, + ); // Double the result 2·a[i]·a[j] // Start by doubling the lower chunk: 2·l₁ = [1/0]·B + l₂ let mut dbl_low = SyscallAdd256Params { a: ai_aj.dl, b: ai_aj.dl, cin: 0, c: &mut [0, 0, 0, 0] }; - let dbl_low_carry = syscall_add256(&mut dbl_low); + let dbl_low_carry = syscall_add256( + &mut dbl_low, + #[cfg(feature = "hints")] + hints, + ); // Next, double the higher chunk: 2·h₁·B = [1/0]·B² + h₂·B let mut dbl_high = SyscallAdd256Params { a: ai_aj.dh, b: ai_aj.dh, cin: 0, c: &mut [0, 0, 0, 0] }; - let dbl_high_carry = syscall_add256(&mut dbl_high); + let dbl_high_carry = syscall_add256( + &mut dbl_high, + #[cfg(feature = "hints")] + hints, + ); // If there's a carry from doubling the low part, add it to the high part if dbl_low_carry != 0 { @@ -90,7 +110,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { cin: 1, c: dbl_high.c, }; - let _carry = syscall_add256(&mut add); + let _carry = syscall_add256( + &mut add, + #[cfg(feature = "hints")] + hints, + ); debug_assert!(_carry == 0, "Unexpected carry in intermediate addition"); } @@ -106,7 +130,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { cin: 0, c: &mut [0, 0, 0, 0], }; - let add_low_carry = syscall_add256(&mut add_low); + let add_low_carry = syscall_add256( + &mut add_low, + #[cfg(feature = "hints")] + hints, + ); out[i + j] = U256::from_u64s(add_low.c); if add_low_carry != 0 { @@ -117,7 +145,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { cin: 1, c: out[i + j + 1].as_limbs_mut(), }; - let add_carry = syscall_add256(&mut add); + let add_carry = syscall_add256( + &mut add, + #[cfg(feature = "hints")] + hints, + ); if add_carry != 0 { let a_in = out[i + j + 2]; @@ -127,7 +159,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { cin: 1, c: out[i + j + 2].as_limbs_mut(), }; - let _carry = syscall_add256(&mut add2); + let _carry = syscall_add256( + &mut add2, + #[cfg(feature = "hints")] + hints, + ); debug_assert!(_carry == 0, "Unexpected carry in intermediate addition"); } @@ -140,7 +176,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { cin: 0, c: &mut [0, 0, 0, 0], }; - let add_mid_carry = syscall_add256(&mut add_mid); + let add_mid_carry = syscall_add256( + &mut add_mid, + #[cfg(feature = "hints")] + hints, + ); out[i + j + 1] = U256::from_u64s(add_mid.c); if add_mid_carry != 0 { @@ -151,7 +191,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { cin: 1, c: out[i + j + 2].as_limbs_mut(), }; - let _carry = syscall_add256(&mut add); + let _carry = syscall_add256( + &mut add, + #[cfg(feature = "hints")] + hints, + ); debug_assert!(_carry == 0, "Unexpected carry in intermediate addition"); } @@ -165,7 +209,11 @@ pub fn square_long(a: &[U256], out: &mut [U256]) -> usize { cin: 1, c: out[i + j + 2].as_limbs_mut(), }; - let _carry = syscall_add256(&mut add); + let _carry = syscall_add256( + &mut add, + #[cfg(feature = "hints")] + hints, + ); debug_assert!(_carry == 0, "Unexpected carry in intermediate addition"); } @@ -192,6 +240,7 @@ pub fn square_and_reduce_long( a: &[U256], modulus: &[U256], scratch: &mut LongScratch, + #[cfg(feature = "hints")] hints: &mut Vec, ) -> Vec { #[cfg(debug_assertions)] { @@ -200,7 +249,18 @@ pub fn square_and_reduce_long( assert!(!modulus[len_m - 1].is_zero(), "Input 'modulus' must not have leading zeros"); } - let sq_len = square_long(a, &mut scratch.mul); + let sq_len = square_long( + a, + &mut scratch.mul, + #[cfg(feature = "hints")] + hints, + ); - rem_long(&scratch.mul[..sq_len], modulus, &mut scratch.rem) + rem_long( + &scratch.mul[..sq_len], + modulus, + &mut scratch.rem, + #[cfg(feature = "hints")] + hints, + ) } diff --git a/ziskos/entrypoint/src/zisklib/lib/array_lib/square_short.rs b/ziskos/entrypoint/src/zisklib/lib/array_lib/square_short.rs index 5315a2b3a..72d4ba290 100644 --- a/ziskos/entrypoint/src/zisklib/lib/array_lib/square_short.rs +++ b/ziskos/entrypoint/src/zisklib/lib/array_lib/square_short.rs @@ -9,7 +9,10 @@ use super::{rem_short, ShortScratch, U256}; /// /// # Returns /// A tuple of (result array, number of limbs used) -pub fn square_short(a: &U256) -> ([U256; 2], usize) { +pub fn square_short( + a: &U256, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([U256; 2], usize) { #[cfg(debug_assertions)] { assert!(!a.is_zero(), "Input 'a' must not have leading zeros"); @@ -26,7 +29,11 @@ pub fn square_short(a: &U256) -> ([U256; 2], usize) { dl: out[0].as_limbs_mut(), dh: &mut dh, }; - syscall_arith256(&mut sq_params); + syscall_arith256( + &mut sq_params, + #[cfg(feature = "hints")] + hints, + ); let len = if dh == [0u64; 4] { 1 @@ -45,13 +52,28 @@ pub fn square_short(a: &U256) -> ([U256; 2], usize) { /// /// # Returns /// The remainder: a² mod modulus -pub fn square_and_reduce_short(a: &U256, modulus: &U256, scratch: &mut ShortScratch) -> U256 { +pub fn square_and_reduce_short( + a: &U256, + modulus: &U256, + scratch: &mut ShortScratch, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> U256 { #[cfg(debug_assertions)] { assert!(!modulus.is_zero(), "Input 'modulus' must not be zero"); } - let (sq, len) = square_short(a); + let (sq, len) = square_short( + a, + #[cfg(feature = "hints")] + hints, + ); - rem_short(&sq[..len], modulus, scratch) + rem_short( + &sq[..len], + modulus, + scratch, + #[cfg(feature = "hints")] + hints, + ) } diff --git a/ziskos/entrypoint/src/zisklib/lib/bigint256.rs b/ziskos/entrypoint/src/zisklib/lib/bigint256.rs deleted file mode 100644 index 1088c48b4..000000000 --- a/ziskos/entrypoint/src/zisklib/lib/bigint256.rs +++ /dev/null @@ -1,346 +0,0 @@ -use crate::{ - syscalls::{ - syscall_arith256, syscall_arith256_mod, SyscallArith256ModParams, SyscallArith256Params, - }, - zisklib::{eq, fcall_bigint256_div, fcall_msb_pos_256, lt}, -}; - -pub fn mul256(a: &[u64; 4], b: &[u64; 4]) -> ([u64; 4], [u64; 4]) { - let mut params = - SyscallArith256Params { a, b, c: &[0u64; 4], dl: &mut [0u64; 4], dh: &mut [0u64; 4] }; - syscall_arith256(&mut params); - (*params.dl, *params.dh) -} - -pub fn wmul256(a: &[u64; 4], b: &[u64; 4]) -> [u64; 4] { - let mut params = - SyscallArith256Params { a, b, c: &[0u64; 4], dl: &mut [0u64; 4], dh: &mut [0u64; 4] }; - syscall_arith256(&mut params); - *params.dl -} - -pub fn divrem256(a: &[u64; 4], b: &[u64; 4]) -> ([u64; 4], [u64; 4]) { - // Check for division by zero - assert!(!eq(b, &[0u64; 4]), "Division by zero"); - - // Hint the result of the division - let (quotient, remainder) = fcall_bigint256_div(a, b); - - // Check that a = b * quotient + remainder and remainder < b - assert!(lt(&remainder, b), "Remainder is not less than divisor"); - let mut params = SyscallArith256Params { - a: b, - b: "ient, - c: &remainder, - dl: &mut [0u64; 4], - dh: &mut [0u64; 4], - }; - syscall_arith256(&mut params); - assert!(eq(params.dl, a), "Dividend does not equal divisor * quotient + remainder"); - - (quotient, remainder) -} - -/// Raises `x` to (2^power_log) modulo `module` using repeated squaring -pub fn exp_power_of_two(x: &[u64; 4], module: &[u64; 4], power_log: usize) -> [u64; 4] { - // x^1 = x - if power_log == 0 { - return *x; - } - - let mut result = *x; - let zero = [0u64; 4]; - for _ in 0..power_log { - let mut params = SyscallArith256ModParams { - a: &result, - b: &result, - c: &zero, - module, - d: &mut [0u64; 4], - }; - syscall_arith256_mod(&mut params); - result = *params.d; - } - - result -} - -/// Raises `x` to (2^power_log) modulo `module` using repeated squaring -pub fn exp_power_of_two_self(x: &mut [u64; 4], module: &[u64; 4], power_log: usize) { - if power_log == 0 { - return; - } - - let zero = [0u64; 4]; - for _ in 0..power_log { - let mut params = - SyscallArith256ModParams { a: x, b: x, c: &zero, module, d: &mut [0u64; 4] }; - syscall_arith256_mod(&mut params); - *x = *params.d; - } -} - -pub fn wpow256(a: &[u64; 4], exp: &[u64; 4]) -> [u64; 4] { - // 0^0 = 1 by convention - // 0^n = 0 for n > 0 - if eq(a, &[0u64; 4]) { - return if eq(exp, &[0u64; 4]) { [1, 0, 0, 0] } else { [0u64; 4] }; - } - - // Direct cases: exp = 0,1,2 - match exp { - [0, 0, 0, 0] => { - // Return a^0 = 1 - return [1, 0, 0, 0]; - } - [1, 0, 0, 0] => { - // Return a - return *a; - } - [2, 0, 0, 0] => { - // Return a^2 - let mut dl = [0u64; 4]; - let mut dh = [0u64; 4]; - let mut params = - SyscallArith256Params { a, b: a, c: &[0u64; 4], dl: &mut dl, dh: &mut dh }; - syscall_arith256(&mut params); - return dl; - } - _ => {} - } - - // We can assume exp > 2 from now on - // Hint the length the binary representations of exp - // We will verify the output by recomposing exp - let (max_limb, max_bit) = fcall_msb_pos_256(exp, &[0, 0, 0, 0]); - - // Perform the loop, based on the binary representation of exp - - // We do the first iteration separately - let _max_limb = max_limb as usize; - let exp_bit = (exp[_max_limb] >> max_bit) & 1; - assert_eq!(exp_bit, 1); // the first received bit should be 1 - - // Start at a - let mut result = *a; - let mut exp_rec = [0, 0, 0, 0]; - exp_rec[_max_limb] = 1 << max_bit; - - // Perform the rest of the loop - let _max_bit = max_bit as usize; - let mut dl = [0u64; 4]; - let mut dh = [0u64; 4]; - for i in (0..=_max_limb).rev() { - let bit_len = if i == _max_limb { _max_bit - 1 } else { 63 }; - for j in (0..=bit_len).rev() { - // Always square - let mut params = SyscallArith256Params { - a: &result, - b: &result, - c: &[0u64; 4], - dl: &mut dl, - dh: &mut dh, - }; - syscall_arith256(&mut params); - result = dl; - - // Get the next bit b of exp - // If b == 1, we multiply result by a, otherwise start the next iteration - if ((exp[i] >> j) & 1) == 1 { - let mut params = SyscallArith256Params { - a: &result, - b: a, - c: &[0u64; 4], - dl: &mut dl, - dh: &mut dh, - }; - syscall_arith256(&mut params); - result = dl; - - // Reconstruct exp - exp_rec[i] |= 1 << j; - } - } - } - - // Check that the reconstructed exp is equal to the input exp - assert_eq!(exp_rec, *exp); - - result -} - -// ========== Pointer-based API ========== - -/// Modular reduction of a 256-bit integer -/// -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes). -/// - `m` must point to a valid `[u64; 4]` (32 bytes). -/// - `result` must point to a valid `[u64; 4]` (32 bytes), used as output. -#[no_mangle] -pub unsafe extern "C" fn redmod256_c(a: *const u64, m: *const u64, result: *mut u64) { - let mut d = [0u64; 4]; - let mut params = SyscallArith256ModParams { - a: &*(a as *const [u64; 4]), - b: &[1, 0, 0, 0], - c: &[0u64; 4], - module: &*(m as *const [u64; 4]), - d: &mut d, - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(d.as_ptr(), result, 4); -} - -/// Modular addition of 256-bit integers -/// -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes). -/// - `b` must point to a valid `[u64; 4]` (32 bytes). -/// - `m` must point to a valid `[u64; 4]` (32 bytes). -/// - `result` must point to a valid `[u64; 4]` (32 bytes), used as output. -#[no_mangle] -pub unsafe extern "C" fn addmod256_c( - a: *const u64, - b: *const u64, - m: *const u64, - result: *mut u64, -) { - let mut d = [0u64; 4]; - let mut params = SyscallArith256ModParams { - a: &*(a as *const [u64; 4]), - b: &[1, 0, 0, 0], - c: &*(b as *const [u64; 4]), - module: &*(m as *const [u64; 4]), - d: &mut d, - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(d.as_ptr(), result, 4); -} - -/// Modular multiplication of 256-bit integers -/// -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes). -/// - `b` must point to a valid `[u64; 4]` (32 bytes). -/// - `m` must point to a valid `[u64; 4]` (32 bytes). -/// - `result` must point to a valid `[u64; 4]` (32 bytes), used as output. -#[no_mangle] -pub unsafe extern "C" fn mulmod256_c( - a: *const u64, - b: *const u64, - m: *const u64, - result: *mut u64, -) { - let mut d = [0u64; 4]; - let mut params = SyscallArith256ModParams { - a: &*(a as *const [u64; 4]), - b: &*(b as *const [u64; 4]), - c: &[0u64; 4], - module: &*(m as *const [u64; 4]), - d: &mut d, - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(d.as_ptr(), result, 4); -} - -/// Wrapping multiplication of 256-bit integers -/// -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes). -/// - `b` must point to a valid `[u64; 4]` (32 bytes). -/// - `result` must point to a valid `[u64; 4]` (32 bytes), used as output. -#[no_mangle] -pub unsafe extern "C" fn wmul256_c(a: *const u64, b: *const u64, result: *mut u64) { - let mut dl = [0u64; 4]; - let mut dh = [0u64; 4]; - let mut params = SyscallArith256Params { - a: &*(a as *const [u64; 4]), - b: &*(b as *const [u64; 4]), - c: &[0u64; 4], - dl: &mut dl, - dh: &mut dh, - }; - syscall_arith256(&mut params); - - core::ptr::copy_nonoverlapping(dl.as_ptr(), result, 4); -} - -/// Overflowing multiplication of 256-bit integers -/// -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes). -/// - `b` must point to a valid `[u64; 4]` (32 bytes). -/// - `result` must point to a valid `[u64; 4]` (32 bytes), used as output. -/// -/// Returns `true` if overflow occurred, `false` otherwise. -#[no_mangle] -pub unsafe extern "C" fn omul256_c(a: *const u64, b: *const u64, result: *mut u64) -> bool { - let mut dl = [0u64; 4]; - let mut dh = [0u64; 4]; - let mut params = SyscallArith256Params { - a: &*(a as *const [u64; 4]), - b: &*(b as *const [u64; 4]), - c: &[0u64; 4], - dl: &mut dl, - dh: &mut dh, - }; - syscall_arith256(&mut params); - - core::ptr::copy_nonoverlapping(dl.as_ptr(), result, 4); - - // If the high part is non-zero, we have an overflow - !eq(&dh, &[0u64; 4]) -} - -/// Division and remainder of 256-bit integers -/// -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes). -/// - `b` must point to a valid `[u64; 4]` (32 bytes), and must be non-zero. -/// - `q` must point to a valid `[u64; 4]` (32 bytes), used as quotient output. -/// - `r` must point to a valid `[u64; 4]` (32 bytes), used as remainder output. -/// -/// # Panics -/// Panics if `b` is zero. -#[no_mangle] -pub unsafe extern "C" fn divrem256_c(a: *const u64, b: *const u64, q: *mut u64, r: *mut u64) { - let a_ref = &*(a as *const [u64; 4]); - let b_ref = &*(b as *const [u64; 4]); - - // Check for division by zero - assert!(!eq(b_ref, &[0u64; 4]), "Division by zero"); - - // Hint the result of the division - let (quotient, remainder) = fcall_bigint256_div(a_ref, b_ref); - - // Check that a = b * quotient + remainder and remainder < b - let mut dl = [0u64; 4]; - let mut dh = [0u64; 4]; - let mut params = - SyscallArith256Params { a: b_ref, b: "ient, c: &remainder, dl: &mut dl, dh: &mut dh }; - syscall_arith256(&mut params); - assert!(eq(&dl, a_ref), "Dividend does not equal divisor * quotient + remainder"); - assert!(lt(&remainder, b_ref), "Remainder is not less than divisor"); - - core::ptr::copy_nonoverlapping(quotient.as_ptr(), q, 4); - core::ptr::copy_nonoverlapping(remainder.as_ptr(), r, 4); -} - -/// Wrapping exponentiation of 256-bit integers -/// -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes). -/// - `exp` must point to a valid `[u64; 4]` (32 bytes). -/// - `result` must point to a valid `[u64; 4]` (32 bytes), used as output. -#[no_mangle] -pub unsafe extern "C" fn wpow256_c(a: *const u64, exp: *const u64, result: *mut u64) { - let a_ref = &*(a as *const [u64; 4]); - let exp_ref = &*(exp as *const [u64; 4]); - - let res = wpow256(a_ref, exp_ref); - - core::ptr::copy_nonoverlapping(res.as_ptr(), result, 4); -} diff --git a/ziskos/entrypoint/src/zisklib/lib/blake2b.rs b/ziskos/entrypoint/src/zisklib/lib/blake2b.rs new file mode 100644 index 000000000..9d6b66074 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/blake2b.rs @@ -0,0 +1,101 @@ +use crate::syscalls::{syscall_blake2b_round, SyscallBlake2bRoundParams}; + +/// BLAKE2b initialization vectors +const IV: [u64; 8] = [ + 0x6A09E667F3BCC908, + 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, + 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, + 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, + 0x5BE0CD19137E2179, +]; + +pub fn blake2b_compress( + rounds: u32, + h: &mut [u64; 8], + m: &[u64; 16], + t: &[u64; 2], + f: bool, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + let mut v = [0u64; 16]; + + v[..8].copy_from_slice(h); + v[8..16].copy_from_slice(&IV); + + v[12] ^= t[0]; + v[13] ^= t[1]; + + if f { + v[14] = !v[14]; + } + + for r in 0..rounds { + blake2b_round( + &mut v, + m, + r, + #[cfg(feature = "hints")] + hints, + ); + } + + for i in 0..8 { + h[i] ^= v[i] ^ v[i + 8]; + } +} + +fn blake2b_round( + v: &mut [u64; 16], + m: &[u64; 16], + round: u32, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + let mut params = SyscallBlake2bRoundParams { index: (round % 10) as u64, state: v, input: m }; + syscall_blake2b_round( + &mut params, + #[cfg(feature = "hints")] + hints, + ); +} + +/// C-compatible wrapper for full Blake2b compression function +/// +/// # Safety +/// - `state` must point to a writable buffer of at least 8 `u64`s +/// - `message` must point to at least 16 `u64`s +/// - `offset` must point to at least 2 `u64`s +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_blake2b_compress_c")] +pub unsafe extern "C" fn blake2b_compress_c( + rounds: u32, + state: *mut u64, + message: *const u64, + offset: *const u64, + final_block: u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + // Parse state + let state_slice = core::slice::from_raw_parts_mut(state, 8); + let state_array: &mut [u64; 8] = &mut *(state_slice.as_mut_ptr() as *mut [u64; 8]); + + // Parse message + let message_slice = core::slice::from_raw_parts(message, 16); + let message_array: &[u64; 16] = &*(message_slice.as_ptr() as *const [u64; 16]); + + // Parse offset + let offset_slice = core::slice::from_raw_parts(offset, 2); + let offset_array: &[u64; 2] = &*(offset_slice.as_ptr() as *const [u64; 2]); + + blake2b_compress( + rounds, + state_array, + message_array, + offset_array, + final_block != 0, + #[cfg(feature = "hints")] + hints, + ); +} diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/constants.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/constants.rs index eb1246dcb..0175754d2 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/constants.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/constants.rs @@ -13,27 +13,63 @@ pub const E_B: [u64; 6] = [0x4, 0, 0, 0, 0, 0]; pub const ETWISTED_B: [u64; 12] = [0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0]; /// Identity element in G1 -pub const IDENTITY_G1: [u64; 12] = { - let mut tmp = [0u64; 12]; - tmp[6] = 1; - tmp -}; +pub const G1_IDENTITY: [u64; 12] = [0; 12]; /// Identity element in G2 -pub const IDENTITY_G2: [u64; 24] = { - let mut tmp = [0u64; 24]; - tmp[12] = 1; - tmp -}; +pub const G2_IDENTITY: [u64; 24] = [0; 24]; + +/// G1 generator point for BLS12-381 +pub const G1_GENERATOR: [u64; 12] = [ + 0xFB3A_F00A_DB22_C6BB, + 0x6C55_E83F_F97A_1AEF, + 0xA14E_3A3F_171B_AC58, + 0xC368_8C4F_9774_B905, + 0x2695_638C_4FA9_AC0F, + 0x17F1_D3A7_3197_D794, + 0x0CAA_2329_46C5_E7E1, + 0xD03C_C744_A288_8AE4, + 0x00DB_18CB_2C04_B3ED, + 0xFCF5_E095_D5D0_0AF6, + 0xA09E_30ED_741D_8AE4, + 0x08B3_F481_E3AA_A0F1, +]; + +/// G2 generator point for BLS12-381 +pub const G2_GENERATOR: [u64; 24] = [ + 0xD480_56C8_C121_BDB8, + 0x0BAC_0326_A805_BBEF, + 0xB451_0B64_7AE3_D177, + 0xC6E4_7AD4_FA40_3B02, + 0x2608_0527_2DC5_1051, + 0x024A_A2B2_F08F_0A91, + 0xE5AC_7D05_5D04_2B7E, + 0x334C_F112_1394_5D57, + 0xB5DA_61BB_DC7F_5049, + 0x596B_D0D0_9920_B61A, + 0x7DAC_D3A0_8827_4F65, + 0x13E0_2B60_5271_9F60, + 0xE193_5486_08B8_2801, + 0x923A_C9CC_3BAC_A289, + 0x6D42_9A69_5160_D12C, + 0xADFD_9BAA_8CBD_D3A7, + 0x8CC9_CDC6_DA2E_351A, + 0x0CE5_D527_727D_6E11, + 0xAAA9_075F_F05F_79BE, + 0x3F37_0D27_5CEC_1DA1, + 0x2674_92AB_572E_99AB, + 0xCB3E_287E_85A7_63AF, + 0x32AC_D2B0_2BC2_8B99, + 0x0606_C4A0_2EA7_34CC, +]; /// Base field size pub const P: [u64; 6] = [ - 0xB9FEFFFFFFFFAAAB, - 0x1EABFFFEB153FFFF, - 0x6730D2A0F6B0F624, - 0x64774B84F38512BF, - 0x4B1BA7B6434BACD7, - 0x1A0111EA397FE69A, + 0xB9FE_FFFF_FFFF_AAAB, + 0x1EAB_FFFE_B153_FFFF, + 0x6730_D2A0_F6B0_F624, + 0x6477_4B84_F385_12BF, + 0x4B1B_A7B6_434B_ACD7, + 0x1A01_11EA_397F_E69A, ]; /// Base field size minus one @@ -41,13 +77,16 @@ pub const P_MINUS_ONE: [u64; 6] = [P[0] - 1, P[1], P[2], P[3], P[4], P[5]]; /// Scalar field size pub const R: [u64; 4] = - [0xFFFFFFFF00000001, 0x53BDA402FFFE5BFE, 0x3339D80809A1D805, 0x73EDA753299D7D48]; + [0xFFFF_FFFF_0000_0001, 0x53BD_A402_FFFE_5BFE, 0x3339_D808_09A1_D805, 0x73ED_A753_299D_7D48]; /// Scalar field size minus one pub const R_MINUS_ONE: [u64; 4] = [R[0] - 1, R[1], R[2], R[3]]; /// A known non-quadratic residue in Fp -pub const NQR: [u64; 6] = [2, 0, 0, 0, 0, 0]; +pub const NQR_FP: [u64; 6] = [2, 0, 0, 0, 0, 0]; + +/// A known non-quadratic residue in Fp2 +pub const NQR_FP2: [u64; 12] = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]; /// This is the the order-3 element of for the σ endomorphism pub const GAMMA: [u64; 6] = [ @@ -207,3 +246,779 @@ pub const FROBENIUS_GAMMA25: [u64; 6] = [ 0xEC02_4086_63D4_DE85, 0x1A01_11EA_397F_E699, ]; + +/// Trusted setup G2 point `[τ]₂ := τ·G2` from the Ethereum KZG ceremony (uncompressed format) +/// For reference, see: https://github.com/ethereum/kzg-ceremony +pub const TRUSTED_SETUP_TAU_G2: [u64; 24] = [ + 0xc98edada20c1def2, + 0x087041de621000ed, + 0xa36851477ba4c60b, + 0x3926c911cceceac9, + 0x734429b7b38608e2, + 0x185cbfee53492714, + 0xafaaab24f3499f72, + 0x2914e5870cb452d2, + 0x1009a2ce615ac53d, + 0x26187075cbfbefa8, + 0x843bc287230af389, + 0x15bfd7dd8cdeb128, + 0xee689bfbbb832a99, + 0x4ce26d105941f383, + 0xe82451a496a9c979, + 0x131569490e28de18, + 0xd7d5ee8599d1fca2, + 0x014353bdb96b626d, + 0x23048ef30d0a154f, + 0x9495346f3d7ac9cd, + 0xda5ed1ba9bfa0789, + 0xef79de09fc63671f, + 0x03432fcae0181b4b, + 0x1666c54b0a325295, +]; + +// ============================================================================ +// Constants for G1 mapping (11-isogenous curve E': y² = x³ + A'x + B') +// ============================================================================ + +/// A' coefficient of the isogenous curve E' for G1 +/// A' = 0x144698a3b8e9433d693a02c96d4982b0ea985383ee66a8d8e8981aefd881ac98936f8da0e0f97f5cf428082d584c1d +pub const ISO_A_G1: [u64; 6] = [ + 0x5CF4_2808_2D58_4C1D, + 0x9893_6F8D_A0E0_F97F, + 0xD8E8_981A_EFD8_81AC, + 0xB0EA_9853_83EE_66A8, + 0x3D69_3A02_C96D_4982, + 0x0014_4698_A3B8_E943, +]; + +/// B' coefficient of the isogenous curve E' for G1 +/// B' = 0x12e2908d11688030018b12e8753eee3b2016c1f0f24f4070a0b9c14fcef35ef55a23215a316ceaa5d1cc48e98e172be0 +pub const ISO_B_G1: [u64; 6] = [ + 0xD1CC_48E9_8E17_2BE0, + 0x5A23_215A_316C_EAA5, + 0xA0B9_C14F_CEF3_5EF5, + 0x2016_C1F0_F24F_4070, + 0x018B_12E8_753E_EE3B, + 0x12E2_908D_1168_8030, +]; + +/// Z constant for G1 SWU: Z = 11 +pub const SWU_Z_G1: [u64; 6] = [0x0B, 0, 0, 0, 0, 0]; +pub const SWU_Z2_G1: [u64; 6] = [0x79, 0, 0, 0, 0, 0]; // 0x0B^2 + +/// Cofactor for G1 +pub const COFACTOR_G1: [u64; 4] = [0xD201000000010001, 0x0, 0x0, 0x0]; + +// ============================================================================ +// G1 Isogeny Map Coefficients (11-isogeny from E' to E) +// ============================================================================ + +/// Isogeny map x-numerator coefficients for G1 +pub const ISO_X_NUM_G1: [[u64; 6]; 12] = [ + [ + 0xAEAC_1662_7346_49B7, + 0x5610_C2D5_F2E6_2D6E, + 0xF262_7B56_CDB4_E2C8, + 0x6B30_3E88_A2D7_005F, + 0xB809_101D_D998_1585, + 0x11A0_5F2B_1E83_3340, + ], + [ + 0xE834_EEF1_B3CB_83BB, + 0x4838_F2A6_F318_C356, + 0xF565_E33C_70D1_E86B, + 0x7C17_E75B_2F6A_8417, + 0x0588_BAB2_2147_A81C, + 0x1729_4ED3_E943_AB2F, + ], + [ + 0xE017_9F9D_AC9E_DCB0, + 0x958C_3E3D_2A09_729F, + 0x6878_E501_EC68_E25C, + 0xCE03_2473_2959_83E5, + 0x1D10_48C5_D10A_9A1B, + 0x0D54_005D_B976_78EC, + ], + [ + 0xC5B3_8864_1D9B_6861, + 0x5336_E25C_E310_7193, + 0xF1B3_3289_F1B3_3083, + 0xD7F5_E465_6A8D_BF25, + 0x4E06_09D3_07E5_5412, + 0x1778_E716_6FCC_6DB7, + ], + [ + 0x5115_4CE9_AC88_95D9, + 0x985A_286F_301E_77C4, + 0x086E_EB65_982F_AC18, + 0x99DB_995A_1257_FB3F, + 0x6642_B4B3_E411_8E54, + 0x0E99_726A_3199_F443, + ], + [ + 0xCD13_C1C6_6F65_2983, + 0xA087_0D2D_CAE7_3D19, + 0x9ED3_AB90_97E6_8F90, + 0xDB3C_B17D_D952_799B, + 0x01D1_201B_F7A7_4AB5, + 0x1630_C325_0D73_13FF, + ], + [ + 0xDDD7_F225_A139_ED84, + 0x8DA2_5128_C105_2ECA, + 0x9008_E218_F9C8_6B2A, + 0xB115_8626_4F0F_8CE1, + 0x6A37_26C3_8AE6_52BF, + 0x0D6E_D655_3FE4_4D29, + ], + [ + 0x9CCB_5618_E3F0_C88E, + 0x39B7_C8F8_C8F4_75AF, + 0xA682_C62E_F0F2_7533, + 0x356D_E5AB_275B_4DB1, + 0xE874_3884_D111_7E53, + 0x17B8_1E77_01AB_DBE2, + ], + [ + 0x6D71_986A_8497_E317, + 0x4FA2_95F2_96B7_4E95, + 0xA2C5_96C9_28C5_D1DE, + 0xC43B_756C_E79F_5574, + 0x7B90_B335_63BE_990D, + 0x080D_3CF1_F9A7_8FC4, + ], + [ + 0x7F24_1067_BE39_0C9E, + 0xA319_0B2E_DC03_2779, + 0x6763_14BA_F4BB_1B7F, + 0xDD2E_CB80_3A0C_5C99, + 0x2E0C_3751_5D13_8F22, + 0x169B_1F8E_1BCF_A7C4, + ], + [ + 0xCA67_DF3F_1605_FB7B, + 0xF69B_771F_8C28_5DEC, + 0xD50A_F360_03B1_4866, + 0xFA7D_CCDD_E678_7F96, + 0x72D8_EC09_D256_5B0D, + 0x1032_1DA0_79CE_07E2, + ], + [ + 0xA9C8_BA2E_8BA2_D229, + 0xC24B_1B80_B64D_391F, + 0x23C0_BF1B_C24C_6B68, + 0x31D7_9D7E_22C8_37BC, + 0xBD1E_9623_81ED_EE3D, + 0x06E0_8C24_8E26_0E70, + ], +]; + +/// Isogeny map x-denominator coefficients for G1 +pub const ISO_X_DEN_G1: [[u64; 6]; 11] = [ + [ + 0x993C_F9FA_40D2_1B1C, + 0xB558_D681_BE34_3DF8, + 0x9C95_8861_7FC8_AC62, + 0x01D5_EF4B_A35B_48BA, + 0x18B2_E62F_4BD3_FA6F, + 0x08CA_8D54_8CFF_19AE, + ], + [ + 0xE5C8_276E_C82B_3BFF, + 0x13DA_A884_6CB0_26E9, + 0x0126_C258_8C48_BF57, + 0x7041_E8CA_0CF0_800C, + 0x48B4_7112_98E5_3636, + 0x1256_1A5D_EB55_9C43, + ], + [ + 0xFCC2_39BA_5CB8_3E19, + 0xD6A3_D096_7C94_FEDC, + 0xFCA6_4E00_B11A_CEAC, + 0x6F89_416F_5A71_8CD1, + 0x8137_E629_BFF2_991F, + 0x0B29_62FE_57A3_225E, + ], + [ + 0x130D_E893_8DC6_2CD8, + 0x4976_D524_3EEC_F5C4, + 0x54CC_A8AB_C28D_6FD0, + 0x5B08_243F_16B1_6551, + 0xC83A_AFEF_7C40_EB54, + 0x0342_5581_A58A_E2FE, + ], + [ + 0x539D_395B_3532_A21E, + 0x9BD2_9BA8_1F35_781D, + 0x8D6B_44E8_33B3_06DA, + 0xFFDF_C759_A120_62BB, + 0x0A6F_1D5F_43E7_A07D, + 0x13A8_E162_0229_14A8, + ], + [ + 0xC02D_F9A2_9F63_04A5, + 0x7400_D24B_C422_8F11, + 0x0A43_BCEF_24B8_982F, + 0x3957_35E9_CE9C_AD4D, + 0x5539_0F7F_0506_C6E9, + 0x0E73_55F8_E4E6_67B9, + ], + [ + 0xEC25_7449_6EE8_4A3A, + 0xEA73_B353_8F0D_E06C, + 0x4E2E_0730_62AE_DE9C, + 0x570F_5799_AF53_A189, + 0x0F3E_0C63_E059_6721, + 0x0772_CAAC_F169_3619, + ], + [ + 0x11F7_D99B_BDCC_5A5E, + 0x0FA5_B948_9D11_E2D3, + 0x1996_E1CD_F982_2C58, + 0x6E7F_63C2_1BCA_68A8, + 0x30B3_F5B0_74CF_0199, + 0x14A7_AC2A_9D64_A8B2, + ], + [ + 0x4776_EC3A_79A1_D641, + 0x0382_6692_ABBA_4370, + 0x7410_0DA6_7F39_8835, + 0xE07F_8D1D_7161_366B, + 0x5E92_0B3D_AFC7_A3CC, + 0x0A10_ECF6_ADA5_4F82, + ], + [ + 0x2D63_84D1_68EC_DD0A, + 0x9317_4E4B_4B78_6500, + 0x76DF_5339_78F3_1C15, + 0xF682_B4EE_96F7_D037, + 0x476D_6E3E_B3A5_6680, + 0x095F_C13A_B9E9_2AD4, + ], + [0x1, 0x0, 0x0, 0x0, 0x0, 0x0], +]; + +/// Isogeny map y-numerator coefficients for G1 +pub const ISO_Y_NUM_G1: [[u64; 6]; 16] = [ + [ + 0xBE98_4571_9707_BB33, + 0xCD0C_7AEE_9B3B_A3C2, + 0x2B52_AF6C_9565_43D3, + 0x11AD_138E_48A8_6952, + 0x259D_1F09_4980_DCFA, + 0x090D_97C8_1BA2_4EE0, + ], + [ + 0xE097_E75A_2E41_C696, + 0xD6C5_6711_962F_A8BF, + 0x0F90_6343_EB67_AD34, + 0x1223_E96C_254F_383D, + 0xD510_36D7_76FB_4683, + 0x1349_96A1_04EE_5811, + ], + [ + 0xB8DF_E240_C72D_E1F6, + 0xD26D_5216_28B0_0523, + 0xC344_BE4B_9140_0DA7, + 0x2552_E2D6_58A3_1CE2, + 0xF4A3_84C8_6A3B_4994, + 0x00CC_786B_AA96_6E66, + ], + [ + 0xA635_5C77_B0E5_F4CB, + 0xDE40_5ABA_9EC6_1DEC, + 0x09E4_A3EC_0325_1CF9, + 0xD42A_A7B9_0EEB_791C, + 0x7898_751A_D874_6757, + 0x01F8_6376_E898_1C21, + ], + [ + 0x41B6_DAEC_F2E8_FEDB, + 0x2EE7_F8DC_0990_40A8, + 0x7983_3FD2_2135_1ADC, + 0x1955_36FB_E3CE_50B8, + 0x5CAF_4FE2_A215_29C4, + 0x08CC_03FD_EFE0_FF13, + ], + [ + 0x99B2_3AB1_3633_A5F0, + 0x203F_6326_C95A_8072, + 0x7650_5C3D_3AD5_544E, + 0x74A7_D0D4_AFAD_B7BD, + 0x2211_E11D_B8F0_A6A0, + 0x1660_3FCA_4063_4B6A, + ], + [ + 0xC961_F885_5FE9_D6F2, + 0x47A8_7AC2_460F_415E, + 0x5231_413C_4D63_4F37, + 0xE75B_B8CA_2BE1_84CB, + 0xB2C9_77D0_2779_6B3C, + 0x04AB_0B9B_CFAC_1BBC, + ], + [ + 0xA15E_4CA3_1870_FB29, + 0x42F6_4550_FEDF_E935, + 0xFD03_8DA6_C26C_8426, + 0x170A_05BF_E3BD_D81F, + 0xDE99_26BD_2CA6_C674, + 0x0987_C8D5_333A_B86F, + ], + [ + 0x6037_0E57_7BDB_A587, + 0x69D6_5201_C786_07A3, + 0x1E8B_6E6A_1F20_CABE, + 0x8F3A_BD16_679D_C26C, + 0xE88C_9E22_1E4D_A1BB, + 0x09FC_4018_BD96_684B, + ], + [ + 0x2BAF_AAEB_CA73_1C30, + 0x9B3F_7055_DD4E_BA6F, + 0x0698_5E7E_D1E4_D43B, + 0xC42A_0CA7_915A_F6FE, + 0x223A_BDE7_ADA1_4A23, + 0x0E1B_BA7A_1186_BDB5, + ], + [ + 0xE813_711A_D011_C132, + 0x31BF_3A5C_CE3F_BAFC, + 0xD118_3E41_6389_E610, + 0xCD2F_CBCB_6CAF_493F, + 0x0DFD_0B8F_1D43_FB93, + 0x1971_3E47_937C_D1BE, + ], + [ + 0xCE07_C8A4_D007_4D8E, + 0x49D9_CDF4_1B44_D606, + 0x2E6B_FE7F_911F_6432, + 0x5235_59B8_AAF0_C246, + 0xB918_C143_FED2_EDCC, + 0x18B4_6A90_8F36_F6DE, + ], + [ + 0x0D4C_04F0_0B97_1EF8, + 0x06C8_51C1_9192_11F2, + 0xC027_10E8_07B4_633F, + 0x7AA7_B12A_3426_B08E, + 0xD155_0960_04F5_3F44, + 0x0B18_2CAC_101B_9399, + ], + [ + 0x42D9_D3F5_DB98_0133, + 0xC6CF_90AD_1C23_2A64, + 0x13E6_632D_3C40_659C, + 0x757B_3B08_0D4C_1580, + 0x72FC_00AE_7BE3_15DC, + 0x0245_A394_AD1E_CA9B, + ], + [ + 0x866B_1E71_5475_224B, + 0x6BA1_049B_6579_AFB7, + 0xD9AB_0F5D_396A_7CE4, + 0x5E67_3D81_D7E8_6568, + 0x02A1_59F7_48C4_A3FC, + 0x05C1_2964_5E44_CF11, + ], + [ + 0x04B4_56BE_69C8_B604, + 0xB665_027E_FEC0_1C77, + 0x57AD_D4FA_95AF_01B2, + 0xCB18_1D8F_8496_5A39, + 0x4EA5_0B3B_42DF_2EB5, + 0x15E6_BE4E_990F_03CE, + ], +]; + +/// Isogeny map y-denominator coefficients for G1 +pub const ISO_Y_DEN_G1: [[u64; 6]; 16] = [ + [ + 0x0147_9253_B036_63C1, + 0x07F3_688E_F60C_206D, + 0xEEC3_232B_5BE7_2E7A, + 0x601A_6DE5_7898_0BE6, + 0x5218_1140_FAD0_EAE9, + 0x1611_2C4C_3A9C_98B2, + ], + [ + 0x32F6_102C_2E49_A03D, + 0x78A4_2607_6352_9E35, + 0xA4A1_0356_F453_E01F, + 0x85C8_4FF7_31C4_D59C, + 0x1A0C_BD6C_43C3_48B8, + 0x1962_D75C_2381_201E, + ], + [ + 0x1E25_38B5_3DBF_67F2, + 0xA675_7CD6_36F9_6F89, + 0x0C35_A5DD_279C_D2EC, + 0x78C4_8555_51AE_7F31, + 0x6FAA_AE7D_6E8E_B157, + 0x058D_F330_6640_DA27, + ], + [ + 0xA8D2_6D98_445F_5416, + 0x7273_64F2_C282_97AD, + 0x123D_A489_E726_AF41, + 0xD115_C5DB_DDBC_D30E, + 0xF20D_23BF_89ED_B4D1, + 0x16B7_D288_798E_5395, + ], + [ + 0xDA39_1423_11A5_001D, + 0xA20B_15DC_0FD2_EDED, + 0x542E_DA0F_C9DE_C916, + 0xC6D1_9C9F_0F69_BBB0, + 0xB00C_C912_F822_8DDC, + 0x0BE0_E079_545F_43E4, + ], + [ + 0x02C6_477F_AAF9_B7AC, + 0x49F3_8DB9_DFA9_CCE2, + 0xC5EC_D87B_6F0F_5A64, + 0xB701_52C6_5550_D881, + 0x9FB2_66EA_AC78_3182, + 0x08D9_E529_7186_DB2D, + ], + [ + 0x3D1A_1399_126A_775C, + 0xD5FA_9C01_A58B_1FB9, + 0x5DD3_65BC_400A_0051, + 0x5EEC_FDFA_8D0C_F8EF, + 0xC3BA_8734_ACE9_824B, + 0x1660_07C0_8A99_DB2F, + ], + [ + 0x60EE_415A_1581_2ED9, + 0xB920_F5B0_0801_DEE4, + 0xFEB3_4FD2_0635_7132, + 0xE5A4_375E_FA1F_4FD7, + 0x03BC_DDFA_BBA6_FF6E, + 0x16A3_EF08_BE3E_A7EA, + ], + [ + 0x6B23_3D9D_5553_5D4A, + 0x52CF_E2F7_BB92_4883, + 0xABC5_750C_4BF3_9B48, + 0xF9FB_0CE4_C6AF_5920, + 0x1A1B_E54F_D1D7_4CC4, + 0x1866_C8ED_336C_6123, + ], + [ + 0x346E_F48B_B891_3F55, + 0xC738_5EA3_D529_B35E, + 0x5308_592E_7EA7_D4FB, + 0x3216_F763_E13D_87BB, + 0xEA82_0597_D94A_8490, + 0x167A_55CD_A70A_6E1C, + ], + [ + 0x00F8_B49C_BA8F_6AA8, + 0x71A5_C29F_4F83_0604, + 0x0E59_1B36_E636_A5C8, + 0x9C6D_D039_BB61_A629, + 0x48F0_10A0_1AD2_911D, + 0x04D2_F259_EEA4_05BD, + ], + [ + 0x9684_B529_E256_1092, + 0x16F9_6898_6F7E_BBEA, + 0x8C0F_9A88_CEA7_9135, + 0x7F94_FF8A_EFCE_42D2, + 0xF585_2C1E_48C5_0C47, + 0x0ACC_BB67_481D_033F, + ], + [ + 0x1E99_B138_5733_45CC, + 0x9300_0763_E3B9_0AC1, + 0x7D5C_EEF9_A00D_9B86, + 0x5433_46D9_8ADF_0226, + 0xC361_3144_B45F_1496, + 0x0AD6_B951_4C76_7FE3, + ], + [ + 0xD1FA_DC13_26ED_06F7, + 0x4205_17BD_8714_CC80, + 0xCB74_8DF2_7942_480E, + 0xBF56_5B94_E729_27C1, + 0x628B_DD0D_53CD_76F2, + 0x0266_0400_EB2E_4F3B, + ], + [ + 0x4415_473A_1D63_4B8F, + 0x5CA2_F570_F134_9780, + 0x324E_FCD6_356C_AA20, + 0x71C4_0F65_E273_B853, + 0x6B24_255E_0D78_19C1, + 0x0E0F_A1D8_16DD_C03E, + ], + [0x1, 0x0, 0x0, 0x0, 0x0, 0x0], +]; + +// ============================================================================ +// Constants for G2 mapping (3-isogenous curve E': y² = x³ + A'x + B') +// ============================================================================ + +/// A' coefficient of the isogenous curve E' for G2 +/// A' = 0xF0 * I +pub const ISO_A_G2: [u64; 12] = + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00]; + +/// B' coefficient of the isogenous curve E' for G2 +/// B' = 0x03F4 * (1 + I) +pub const ISO_B_G2: [u64; 12] = [ + 0x03F4, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03F4, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +]; + +/// Z constant for G2 SWU: Z = -(2 + I) +pub const SWU_Z_G2: [u64; 12] = + [P[0] - 2, P[1], P[2], P[3], P[4], P[5], P[0] - 1, P[1], P[2], P[3], P[4], P[5]]; + +// ============================================================================ +// G2 Isogeny Map Coefficients (3-isogeny from E' to E) +// ============================================================================ + +/// Isogeny map x-numerator coefficients for G2 +pub const ISO_X_NUM_G2: [[u64; 12]; 4] = [ + [ + 0x6238_AAAA_AAAA_97D6, + 0x5C26_38E3_43D9_C71C, + 0x88B5_8423_C50A_E15D, + 0x32C5_2D39_FD3A_042A, + 0xBB5B_7A9A_47D7_ED85, + 0x05C7_5950_7E8E_333E, + 0x6238_AAAA_AAAA_97D6, + 0x5C26_38E3_43D9_C71C, + 0x88B5_8423_C50A_E15D, + 0x32C5_2D39_FD3A_042A, + 0xBB5B_7A9A_47D7_ED85, + 0x05C7_5950_7E8E_333E, + ], + [ + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x26A9_FFFF_FFFF_C71A, + 0x1472_AAA9_CB8D_5555, + 0x9A20_8C6B_4F20_A418, + 0x984F_87AD_F7AE_0C7F, + 0x3212_6FCE_D787_C88F, + 0x1156_0BF1_7BAA_99BC, + ], + [ + 0x26A9_FFFF_FFFF_C71E, + 0x1472_AAA9_CB8D_5555, + 0x9A20_8C6B_4F20_A418, + 0x984F_87AD_F7AE_0C7F, + 0x3212_6FCE_D787_C88F, + 0x1156_0BF1_7BAA_99BC, + 0x9354_FFFF_FFFF_E38D, + 0x0A39_5554_E5C6_AAAA, + 0xCD10_4635_A790_520C, + 0xCC27_C3D6_FBD7_063F, + 0x1909_37E7_6BC3_E447, + 0x08AB_05F8_BDD5_4CDE, + ], + [ + 0x88E2_AAAA_AAAA_5ED1, + 0x7098_E38D_0F67_1C71, + 0x22D6_108F_142B_8575, + 0xCB14_B4E7_F4E8_10AA, + 0xED6D_EA69_1F5F_B614, + 0x171D_6541_FA38_CCFA, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + ], +]; + +/// Isogeny map x-denominator coefficients for G2 +pub const ISO_X_DEN_G2: [[u64; 12]; 3] = [ + [ + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0xB9FE_FFFF_FFFF_AA63, + 0x1EAB_FFFE_B153_FFFF, + 0x6730_D2A0_F6B0_F624, + 0x6477_4B84_F385_12BF, + 0x4B1B_A7B6_434B_ACD7, + 0x1A01_11EA_397F_E69A, + ], + [ + 0x0000_0000_0000_000C, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0xB9FE_FFFF_FFFF_AA9F, + 0x1EAB_FFFE_B153_FFFF, + 0x6730_D2A0_F6B0_F624, + 0x6477_4B84_F385_12BF, + 0x4B1B_A7B6_434B_ACD7, + 0x1A01_11EA_397F_E69A, + ], + [0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0], +]; + +/// Isogeny map y-numerator coefficients for G2 +pub const ISO_Y_NUM_G2: [[u64; 12]; 4] = [ + [ + 0x12CF_C71C_71C6_D706, + 0xFC8C_25EB_F8C9_2F68, + 0xF544_39D8_7D27_E500, + 0x0F7D_A5D4_A07F_649B, + 0x59A4_C18B_076D_1193, + 0x1530_477C_7AB4_113B, + 0x12CF_C71C_71C6_D706, + 0xFC8C_25EB_F8C9_2F68, + 0xF544_39D8_7D27_E500, + 0x0F7D_A5D4_A07F_649B, + 0x59A4_C18B_076D_1193, + 0x1530_477C_7AB4_113B, + ], + [ + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x6238_AAAA_AAAA_97BE, + 0x5C26_38E3_43D9_C71C, + 0x88B5_8423_C50A_E15D, + 0x32C5_2D39_FD3A_042A, + 0xBB5B_7A9A_47D7_ED85, + 0x05C7_5950_7E8E_333E, + ], + [ + 0x26A9_FFFF_FFFF_C71C, + 0x1472_AAA9_CB8D_5555, + 0x9A20_8C6B_4F20_A418, + 0x984F_87AD_F7AE_0C7F, + 0x3212_6FCE_D787_C88F, + 0x1156_0BF1_7BAA_99BC, + 0x9354_FFFF_FFFF_E38F, + 0x0A39_5554_E5C6_AAAA, + 0xCD10_4635_A790_520C, + 0xCC27_C3D6_FBD7_063F, + 0x1909_37E7_6BC3_E447, + 0x08AB_05F8_BDD5_4CDE, + ], + [ + 0xE1B3_71C7_1C71_8B10, + 0x4E79_097A_56DC_4BD9, + 0xB0E9_77C6_9AA2_7452, + 0x761B_0F37_A1E2_6286, + 0xFBF7_043D_E381_1AD0, + 0x124C_9AD4_3B6C_F79B, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + ], +]; + +/// Isogeny map y-denominator coefficients for G2 +pub const ISO_Y_DEN_G2: [[u64; 12]; 4] = [ + [ + 0xB9FE_FFFF_FFFF_A8FB, + 0x1EAB_FFFE_B153_FFFF, + 0x6730_D2A0_F6B0_F624, + 0x6477_4B84_F385_12BF, + 0x4B1B_A7B6_434B_ACD7, + 0x1A01_11EA_397F_E69A, + 0xB9FE_FFFF_FFFF_A8FB, + 0x1EAB_FFFE_B153_FFFF, + 0x6730_D2A0_F6B0_F624, + 0x6477_4B84_F385_12BF, + 0x4B1B_A7B6_434B_ACD7, + 0x1A01_11EA_397F_E69A, + ], + [ + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0xB9FE_FFFF_FFFF_A9D3, + 0x1EAB_FFFE_B153_FFFF, + 0x6730_D2A0_F6B0_F624, + 0x6477_4B84_F385_12BF, + 0x4B1B_A7B6_434B_ACD7, + 0x1A01_11EA_397F_E69A, + ], + [ + 0x0000_0000_0000_0012, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0xB9FE_FFFF_FFFF_AA99, + 0x1EAB_FFFE_B153_FFFF, + 0x6730_D2A0_F6B0_F624, + 0x6477_4B84_F385_12BF, + 0x4B1B_A7B6_434B_ACD7, + 0x1A01_11EA_397F_E69A, + ], + [0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0], +]; + +pub const PSI_C1: [u64; 12] = [ + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x8BFD_0000_0000_AAAD, + 0x4094_27EB_4F49_FFFD, + 0x897D_2965_0FB8_5F9B, + 0xAA0D_857D_8975_9AD4, + 0xEC02_4086_63D4_DE85, + 0x1A01_11EA_397F_E699, +]; + +pub const PSI_C2: [u64; 12] = [ + 0xF1EE_7B04_121B_DEA2, + 0x3044_66CF_3E67_FA0A, + 0xEF39_6489_F61E_B45E, + 0x1C3D_EDD9_30B1_CF60, + 0xE2E9_C448_D77A_2CD9, + 0x1352_03E6_0180_A68E, + 0xC810_84FB_EDE3_CC09, + 0xEE67_992F_72EC_05F4, + 0x77F7_6E17_0092_41C5, + 0x4839_5DAB_C2D3_435E, + 0x6831_E36D_6BD1_7FFE, + 0x06AF_0E04_37FF_400B, +]; + +pub const PSI2_C1: [u64; 12] = [ + 0x8BFD_0000_0000_AAAC, + 0x4094_27EB_4F49_FFFD, + 0x897D_2965_0FB8_5F9B, + 0xAA0D_857D_8975_9AD4, + 0xEC02_4086_63D4_DE85, + 0x1A01_11EA_397F_E699, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, + 0x0000_0000_0000_0000, +]; diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/curve.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/curve.rs index 39e696f83..137c71343 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/curve.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/curve.rs @@ -5,48 +5,217 @@ use crate::{ syscall_bls12_381_curve_add, syscall_bls12_381_curve_dbl, SyscallBls12_381CurveAddParams, SyscallPoint384, }, - zisklib::{eq, fcall_msb_pos_384}, + zisklib::{eq, fcall_msb_pos_256, lt}, }; use super::{ - constants::{E_B, GAMMA, IDENTITY_G1}, - fp::{add_fp_bls12_381, mul_fp_bls12_381, neg_fp_bls12_381, square_fp_bls12_381}, + constants::{E_B, G1_IDENTITY, GAMMA, P}, + fp::{ + add_fp_bls12_381, mul_fp_bls12_381, neg_fp_bls12_381, sqrt_fp_bls12_381, + square_fp_bls12_381, + }, + fr::{reduce_fr_bls12_381, scalar_bytes_be_to_u64_le_bls12_381}, }; +// TODO: Check what happens if scalar or ecc coordinates are bigger than the field size + +/// G1 add result codes +const G1_ADD_SUCCESS: u8 = 0; +const G1_ADD_SUCCESS_INFINITY: u8 = 1; +const G1_ADD_ERR_NOT_ON_CURVE: u8 = 2; + +/// G1 MSM result codes +const G1_MSM_SUCCESS: u8 = 0; +const G1_MSM_SUCCESS_INFINITY: u8 = 1; +const G1_MSM_ERR_NOT_ON_CURVE: u8 = 2; +const G1_MSM_ERR_NOT_IN_SUBGROUP: u8 = 3; + +/// Decompresses a point on the BLS12-381 curve from 48 bytes +/// +/// Format: Big-endian x-coordinate with flag bits in the top 3 bits of the first byte: +/// - Bit 7 (0x80): Compression flag (must be 1 for compressed) +/// - Bit 6 (0x40): Infinity flag (1 = point at infinity) +/// - Bit 5 (0x20): Sign flag (1 = y is lexicographically largest) +pub fn decompress_bls12_381( + input: &[u8; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 12], &'static str> { + let flags = input[0]; + + // Check compression bit + if (flags & 0x80) == 0 { + return Err("Expected compressed point"); + } + + // Check infinity bit + if (flags & 0x40) != 0 { + // Verify rest is zero + if (flags & 0x3f) != 0 { + return Err("Invalid infinity encoding"); + } + for input in input.iter().skip(1) { + if *input != 0 { + return Err("Invalid infinity encoding"); + } + } + return Ok(G1_IDENTITY); + } + + // Extract sign bit + let y_sign = (flags & 0x20) != 0; + + // Extract x-coordinate (big-endian), masking off flag bits + let mut x = [0u64; 6]; + let mut bytes = [0u8; 48]; + bytes.copy_from_slice(input); + bytes[0] &= 0x1f; // Clear flag bits + + // Convert from big-endian bytes to little-endian u64 limbs + for i in 0..6 { + for j in 0..8 { + x[5 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // Verify x < p + if !lt(&x, &P) { + return Err("x coordinate >= field modulus"); + } + + // Calculate the y-coordinate of the point: y = sqrt(x³ + 4) + let x_sq = square_fp_bls12_381( + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_cb = mul_fp_bls12_381( + &x_sq, + &x, + #[cfg(feature = "hints")] + hints, + ); + let y_sq = add_fp_bls12_381( + &x_cb, + &E_B, + #[cfg(feature = "hints")] + hints, + ); + + let (y, has_sqrt) = sqrt_fp_bls12_381( + &y_sq, + #[cfg(feature = "hints")] + hints, + ); + if !has_sqrt { + return Err("No square root exists - point not on curve"); + } + + // Determine the sign of y, which is (lexicographically) done by checking if y > -y + let y_neg = neg_fp_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + let y_is_larger = lt(&y_neg, &y); + + // Select the correct y based on sign bit + let final_y = if y_is_larger == y_sign { y } else { y_neg }; + + // Return the point (x, final_y) + let mut result = [0u64; 12]; + result[0..6].copy_from_slice(&x); + result[6..12].copy_from_slice(&final_y); + Ok(result) +} + /// Check if a non-zero point `p` is on the BLS12-381 curve -pub fn is_on_curve_bls12_381(p: &[u64; 12]) -> bool { +pub fn is_on_curve_bls12_381( + p: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { let x: [u64; 6] = p[0..6].try_into().unwrap(); let y: [u64; 6] = p[6..12].try_into().unwrap(); // p in E iff y² == x³ + 4 - let lhs = square_fp_bls12_381(&y); - let mut rhs = square_fp_bls12_381(&x); - rhs = mul_fp_bls12_381(&rhs, &x); - rhs = add_fp_bls12_381(&rhs, &E_B); + let lhs = square_fp_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + let mut rhs = square_fp_bls12_381( + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = mul_fp_bls12_381( + &rhs, + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = add_fp_bls12_381( + &rhs, + &E_B, + #[cfg(feature = "hints")] + hints, + ); eq(&lhs, &rhs) } /// Check if a non-zero point `p` is on the BLS12-381 subgroup -pub fn is_on_subgroup_bls12_381(p: &[u64; 12]) -> bool { +pub fn is_on_subgroup_bls12_381( + p: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // p in subgroup iff: // ((x²-1)/3)(2·σ(P) - P - σ²(P)) == σ²(P) // where σ(x,y) = (ɣ·x,y) // Compute σ(P), σ²(P) - let sigma1 = sigma_endomorphism_bls12_381(p); - let rhs = sigma_endomorphism_bls12_381(&sigma1); + let sigma1 = sigma_endomorphism_bls12_381( + p, + #[cfg(feature = "hints")] + hints, + ); + let rhs = sigma_endomorphism_bls12_381( + &sigma1, + #[cfg(feature = "hints")] + hints, + ); // Compute lhs = ((x²-1)/3)(2·σ(P) - P - σ²(P)) - let mut lhs = dbl_bls12_381(&sigma1); - lhs = sub_bls12_381(&lhs, p); - lhs = sub_bls12_381(&lhs, &rhs); - lhs = scalar_mul_by_x2div3_bls12_381(&lhs); + let mut lhs = dbl_bls12_381( + &sigma1, + #[cfg(feature = "hints")] + hints, + ); + lhs = sub_bls12_381( + &lhs, + p, + #[cfg(feature = "hints")] + hints, + ); + lhs = sub_bls12_381( + &lhs, + &rhs, + #[cfg(feature = "hints")] + hints, + ); + lhs = scalar_mul_by_x2div3_bls12_381( + &lhs, + #[cfg(feature = "hints")] + hints, + ); eq(&lhs, &rhs) } /// Adds two non-zero points `p1` and `p2` on the BLS12-381 curve -pub fn add_bls12_381(p1: &[u64; 12], p2: &[u64; 12]) -> [u64; 12] { +pub fn add_bls12_381( + p1: &[u64; 12], + p2: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let x1: [u64; 6] = p1[0..6].try_into().unwrap(); let y1: [u64; 6] = p1[6..12].try_into().unwrap(); let x2: [u64; 6] = p2[0..6].try_into().unwrap(); @@ -57,10 +226,14 @@ pub fn add_bls12_381(p1: &[u64; 12], p2: &[u64; 12]) -> [u64; 12] { // Is y1 == y2? if eq(&y1, &y2) { // Compute the doubling - return dbl_bls12_381(p1); + return dbl_bls12_381( + p1, + #[cfg(feature = "hints")] + hints, + ); } else { // Return 𝒪 - return IDENTITY_G1; + return G1_IDENTITY; } } @@ -68,7 +241,11 @@ pub fn add_bls12_381(p1: &[u64; 12], p2: &[u64; 12]) -> [u64; 12] { let mut p1 = SyscallPoint384 { x: x1, y: y1 }; let p2 = SyscallPoint384 { x: x2, y: y2 }; let mut params = SyscallBls12_381CurveAddParams { p1: &mut p1, p2: &p2 }; - syscall_bls12_381_curve_add(&mut params); + syscall_bls12_381_curve_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 12]; result[0..6].copy_from_slice(&p1.x); @@ -76,13 +253,76 @@ pub fn add_bls12_381(p1: &[u64; 12], p2: &[u64; 12]) -> [u64; 12] { result } +/// Adds two points `p1` and `p2` on the BLS12-381 curve +pub fn add_complete_bls12_381( + p1: &[u64; 12], + p2: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 12], u8> { + let p1_is_inf = *p1 == G1_IDENTITY; + let p2_is_inf = *p2 == G1_IDENTITY; + + // Handle identity cases + if p1_is_inf && p2_is_inf { + return Ok(G1_IDENTITY); + } + if p1_is_inf { + if !is_on_curve_bls12_381( + p2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + return Ok(*p2); + } + + if p2_is_inf { + if !is_on_curve_bls12_381( + p1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + return Ok(*p1); + } + + // Both points are non-identity, validate both are on curve + if !is_on_curve_bls12_381( + p1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + if !is_on_curve_bls12_381( + p2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + + // Otherwise, perform regular addition + Ok(add_bls12_381( + p1, + p2, + #[cfg(feature = "hints")] + hints, + )) +} + /// Negation of a non-zero point `p` on the BLS12-381 curve -pub fn neg_bls12_381(p: &[u64; 12]) -> [u64; 12] { +pub fn neg_bls12_381(p: &[u64; 12], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 12] { let x: [u64; 6] = p[0..6].try_into().unwrap(); let y: [u64; 6] = p[6..12].try_into().unwrap(); - let y_neg = neg_fp_bls12_381(&y); - + let y_neg = neg_fp_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 12]; result[0..6].copy_from_slice(&x); result[6..12].copy_from_slice(&y_neg); @@ -90,9 +330,13 @@ pub fn neg_bls12_381(p: &[u64; 12]) -> [u64; 12] { } /// Doubling of a non-zero point `p` on the BLS12-381 curve -pub fn dbl_bls12_381(p: &[u64; 12]) -> [u64; 12] { +pub fn dbl_bls12_381(p: &[u64; 12], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 12] { let mut p = SyscallPoint384 { x: p[0..6].try_into().unwrap(), y: p[6..12].try_into().unwrap() }; - syscall_bls12_381_curve_dbl(&mut p); + syscall_bls12_381_curve_dbl( + &mut p, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 12]; result[0..6].copy_from_slice(&p.x); @@ -101,35 +345,90 @@ pub fn dbl_bls12_381(p: &[u64; 12]) -> [u64; 12] { } /// Subtraction of two non-zero points `p1` and `p2` on the BLS12-381 curve -pub fn sub_bls12_381(p1: &[u64; 12], p2: &[u64; 12]) -> [u64; 12] { +pub fn sub_bls12_381( + p1: &[u64; 12], + p2: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let x2: [u64; 6] = p2[0..6].try_into().unwrap(); let y2: [u64; 6] = p2[6..12].try_into().unwrap(); // P1 - P2 = P1 + (-P2) - let y2_neg = neg_fp_bls12_381(&y2); + let y2_neg = neg_fp_bls12_381( + &y2, + #[cfg(feature = "hints")] + hints, + ); let mut p2_neg = [0u64; 12]; p2_neg[0..6].copy_from_slice(&x2); p2_neg[6..12].copy_from_slice(&y2_neg); - add_bls12_381(p1, &p2_neg) + add_bls12_381( + p1, + &p2_neg, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Subtraction of two points `p1` and `p2` on the BLS12-381 curve +pub fn sub_complete_bls12_381( + p1: &[u64; 12], + p2: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { + let p1_is_inf = *p1 == G1_IDENTITY; + let p2_is_inf = *p2 == G1_IDENTITY; + + // Handle identity cases + if p1_is_inf && p2_is_inf { + // O - O = O + return G1_IDENTITY; + } else if p1_is_inf { + // O - P2 = -P2 + return neg_bls12_381( + p2, + #[cfg(feature = "hints")] + hints, + ); + } else if p2_is_inf { + // P1 - O = P1 + return *p1; + } + + // Perform regular subtraction: P1 - P2 = P1 + (-P2) + sub_bls12_381( + p1, + p2, + #[cfg(feature = "hints")] + hints, + ) } /// Multiplies a non-zero point `p` on the BLS12-381 curve by a scalar `k` on the BLS12-381 scalar field -pub fn scalar_mul_bls12_381(p: &[u64; 12], k: &[u64; 6]) -> [u64; 12] { +pub fn scalar_mul_bls12_381( + p: &[u64; 12], + k: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { // Direct cases: k = 0, k = 1, k = 2 match k { - [0, 0, 0, 0, 0, 0] => { + [0, 0, 0, 0] => { // Return 𝒪 - return IDENTITY_G1; + return G1_IDENTITY; } - [1, 0, 0, 0, 0, 0] => { + [1, 0, 0, 0] => { // Return p return *p; } - [2, 0, 0, 0, 0, 0] => { + [2, 0, 0, 0] => { // Return 2p - return dbl_bls12_381(p); + return dbl_bls12_381( + p, + #[cfg(feature = "hints")] + hints, + ); } _ => {} } @@ -138,7 +437,12 @@ pub fn scalar_mul_bls12_381(p: &[u64; 12], k: &[u64; 6]) -> [u64; 12] { // Hint the length the binary representations of k // We will verify the output by recomposing k // Moreover, we should check that the first received bit is 1 - let (max_limb, max_bit) = fcall_msb_pos_384(k, &[0, 0, 0, 0, 0, 0]); + let (max_limb, max_bit) = fcall_msb_pos_256( + k, + &[0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // Perform the loop, based on the binary representation of k @@ -153,7 +457,7 @@ pub fn scalar_mul_bls12_381(p: &[u64; 12], k: &[u64; 6]) -> [u64; 12] { let x1: [u64; 6] = p[0..6].try_into().unwrap(); let y1: [u64; 6] = p[6..12].try_into().unwrap(); let mut q = SyscallPoint384 { x: x1, y: y1 }; - let mut k_rec = [0u64; 6]; + let mut k_rec = [0u64; 4]; k_rec[max_limb] |= 1 << max_bit; // Determine starting limb/bit for the loop @@ -171,13 +475,21 @@ pub fn scalar_mul_bls12_381(p: &[u64; 12], k: &[u64; 6]) -> [u64; 12] { for i in (0..=limb).rev() { for j in (0..=bit).rev() { // Always double - syscall_bls12_381_curve_dbl(&mut q); + syscall_bls12_381_curve_dbl( + &mut q, + #[cfg(feature = "hints")] + hints, + ); // Get the next bit b of k. // If b == 1, we should add P to Q, otherwise start the next iteration if ((k[i] >> j) & 1) == 1 { let mut params = SyscallBls12_381CurveAddParams { p1: &mut q, p2: &p }; - syscall_bls12_381_curve_add(&mut params); + syscall_bls12_381_curve_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); // Reconstruct k k_rec[i] |= 1 << j; @@ -197,17 +509,29 @@ pub fn scalar_mul_bls12_381(p: &[u64; 12], k: &[u64; 6]) -> [u64; 12] { } /// Scalar multiplication of a non-zero point `p` by a binary scalar `k` -pub fn scalar_mul_bin_bls12_381(p: &[u64; 12], k: &[u8]) -> [u64; 12] { +pub fn scalar_mul_bin_bls12_381( + p: &[u64; 12], + k: &[u8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let x1: [u64; 6] = p[0..6].try_into().unwrap(); let y1: [u64; 6] = p[6..12].try_into().unwrap(); let p = SyscallPoint384 { x: x1, y: y1 }; let mut r = SyscallPoint384 { x: x1, y: y1 }; for &bit in k.iter().skip(1) { - syscall_bls12_381_curve_dbl(&mut r); + syscall_bls12_381_curve_dbl( + &mut r, + #[cfg(feature = "hints")] + hints, + ); if bit == 1 { let mut params = SyscallBls12_381CurveAddParams { p1: &mut r, p2: &p }; - syscall_bls12_381_curve_add(&mut params); + syscall_bls12_381_curve_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); } } @@ -218,7 +542,10 @@ pub fn scalar_mul_bin_bls12_381(p: &[u64; 12], k: &[u8]) -> [u64; 12] { } /// Scalar multiplication of a non-zero point by (x²-1)/3 -pub fn scalar_mul_by_x2div3_bls12_381(p: &[u64; 12]) -> [u64; 12] { +pub fn scalar_mul_by_x2div3_bls12_381( + p: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { /// Family parameter (X²-1)/3 const X2DIV3_BIN_BE: [u8; 126] = [ 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -228,54 +555,259 @@ pub fn scalar_mul_by_x2div3_bls12_381(p: &[u64; 12]) -> [u64; 12] { 0, 1, 0, 1, 0, 1, ]; - scalar_mul_bin_bls12_381(p, &X2DIV3_BIN_BE) + scalar_mul_bin_bls12_381( + p, + &X2DIV3_BIN_BE, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Multi-Scalar Multiplication (MSM) for BLS12-381 G1 points +/// It computes k1·P1 + k2·P2 + ... + kn·Pn +// TODO: This is a naive implementation, one can improve it by using, e.g., a windowed strategies! +pub fn msm_complete_bls12_381( + points: &[[u64; 12]], + scalars: &[[u64; 4]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 12], u8> { + assert_eq!(points.len(), scalars.len()); + + let mut acc = G1_IDENTITY; + let mut acc_is_inf = true; + for (point, scalar) in points.iter().zip(scalars.iter()) { + // Skip infinity points + if *point == G1_IDENTITY { + continue; + } + + // Skip zero scalars + if reduce_fr_bls12_381( + scalar, + #[cfg(feature = "hints")] + hints, + ) == [0, 0, 0, 0] + { + continue; + } + + // Verify point is on curve + if !is_on_curve_bls12_381( + point, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_MSM_ERR_NOT_ON_CURVE); + } + + // Verify point is in subgroup + if !is_on_subgroup_bls12_381( + point, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_MSM_ERR_NOT_IN_SUBGROUP); + } + + // Compute P * k + let product = scalar_mul_bls12_381( + point, + scalar, + #[cfg(feature = "hints")] + hints, + ); + + // Skip if product is infinity + if product == G1_IDENTITY { + continue; + } + + // Add to accumulator + if acc_is_inf { + acc = product; + acc_is_inf = false; + } else { + acc = add_bls12_381( + &acc, + &product, + #[cfg(feature = "hints")] + hints, + ); + acc_is_inf = acc == G1_IDENTITY; + } + } + + Ok(acc) } /// Compute the sigma endomorphism σ of a non-zero point `p`, defined as: /// σ : E(Fp) -> E(Fp) /// (x,y) |-> (ɣ·x,y) -pub fn sigma_endomorphism_bls12_381(p: &[u64; 12]) -> [u64; 12] { +pub fn sigma_endomorphism_bls12_381( + p: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut x: [u64; 6] = p[0..6].try_into().unwrap(); let y: [u64; 6] = p[6..12].try_into().unwrap(); - x = mul_fp_bls12_381(&x, &GAMMA); - + x = mul_fp_bls12_381( + &x, + &GAMMA, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 12]; result[0..6].copy_from_slice(&x); result[6..12].copy_from_slice(&y); result } -// ========== Pointer-based API ========== +/// G1 point addition for uncompressed 96-byte points +/// +/// Input format: 96 bytes per point = 48 bytes x-coordinate + 48 bytes y-coordinate (big-endian) +/// Output format: Same as input +/// +/// ### Safety +/// - `a` must point to a valid `[u8; 96]` for the first input point +/// - `b` must point to a valid `[u8; 96]` for the second input point +/// - `ret` must point to a valid `[u8; 96]` for the output +/// +/// Returns: +/// - 0 = success (regular point) +/// - 1 = success (point at infinity) +/// - 2 = error (point not on curve) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bls12_381_g1_add_c")] +pub unsafe extern "C" fn bls12_381_g1_add_c( + ret: *mut u8, + a: *const u8, + b: *const u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let a_bytes: &[u8; 96] = &*(a as *const [u8; 96]); + let b_bytes: &[u8; 96] = &*(b as *const [u8; 96]); + let ret_bytes: &mut [u8; 96] = &mut *(ret as *mut [u8; 96]); + + // Parse points + let a_u64 = g1_bytes_be_to_u64_le_bls12_381(a_bytes); + let b_u64 = g1_bytes_be_to_u64_le_bls12_381(b_bytes); + + // Perform addition + let result = match add_complete_bls12_381( + &a_u64, + &b_u64, + #[cfg(feature = "hints")] + hints, + ) { + Ok(r) => r, + Err(code) => return code, + }; + + // Encode result + if result == G1_IDENTITY { + G1_ADD_SUCCESS_INFINITY + } else { + g1_u64_le_to_bytes_be_bls12_381(&result, ret_bytes); + G1_ADD_SUCCESS + } +} -/// # Safety -/// - `p1` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -/// - `p2` must point to a valid `[u64; 12]` (96 bytes). -/// - Points must be non-zero and distinct. -#[no_mangle] -pub unsafe extern "C" fn add_bls12_381_c(p1: *mut u64, p2: *const u64) { - let mut p1_point = - SyscallPoint384 { x: *(p1 as *const [u64; 6]), y: *(p1.add(6) as *const [u64; 6]) }; - let p2_point = - SyscallPoint384 { x: *(p2 as *const [u64; 6]), y: *(p2.add(6) as *const [u64; 6]) }; +/// G1 Multi-Scalar Multiplication (MSM) for uncompressed points +/// +/// Input format per pair: 128 bytes = 96 bytes G1 point (x || y big-endian) + 32 bytes scalar (big-endian) +/// Output format: 96 bytes G1 point (x || y big-endian) +/// +/// ### Safety +/// - `pairs` must point to an array of `num_pairs * 128` bytes +/// - `ret` must point to a valid `[u8; 96]` for the output +/// +/// Returns: +/// - 0 = success (regular point) +/// - 1 = success (point at infinity) +/// - 2 = error (point not on curve) +/// - 3 = error (point not in subgroup) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bls12_381_g1_msm_c")] +pub unsafe extern "C" fn bls12_381_g1_msm_c( + ret: *mut u8, + pairs: *const u8, + num_pairs: usize, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let ret_bytes: &mut [u8; 96] = &mut *(ret as *mut [u8; 96]); + + // Parse all pairs + let mut points = Vec::with_capacity(num_pairs); + let mut scalars = Vec::with_capacity(num_pairs); + for i in 0..num_pairs { + let pair_ptr = pairs.add(i * 128); + let point_bytes: &[u8; 96] = &*(pair_ptr as *const [u8; 96]); + let scalar_bytes: &[u8; 32] = &*(pair_ptr.add(96) as *const [u8; 32]); + + // Parse point and scalar + let point_u64 = g1_bytes_be_to_u64_le_bls12_381(point_bytes); + let scalar_u64 = scalar_bytes_be_to_u64_le_bls12_381(scalar_bytes); + + points.push(point_u64); + scalars.push(scalar_u64); + } - let mut params = SyscallBls12_381CurveAddParams { p1: &mut p1_point, p2: &p2_point }; - syscall_bls12_381_curve_add(&mut params); + // Perform MSM with validation + let result = match msm_complete_bls12_381( + &points, + &scalars, + #[cfg(feature = "hints")] + hints, + ) { + Ok(r) => r, + Err(code) => return code, + }; - *(p1 as *mut [u64; 6]) = p1_point.x; - *(p1.add(6) as *mut [u64; 6]) = p1_point.y; + // Encode result + if result == G1_IDENTITY { + G1_MSM_SUCCESS_INFINITY + } else { + g1_u64_le_to_bytes_be_bls12_381(&result, ret_bytes); + G1_MSM_SUCCESS + } } -/// # Safety -/// - `p` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -/// - Point must be non-zero. -#[no_mangle] -pub unsafe extern "C" fn dbl_bls12_381_c(p: *mut u64) { - let mut p_point = - SyscallPoint384 { x: *(p as *const [u64; 6]), y: *(p.add(6) as *const [u64; 6]) }; +/// Convert 96-byte big-endian G1 point to [u64; 12] little-endian +pub fn g1_bytes_be_to_u64_le_bls12_381(bytes: &[u8; 96]) -> [u64; 12] { + let mut result = [0u64; 12]; + + // x-coordinate (first 48 bytes) + for i in 0..6 { + for j in 0..8 { + result[5 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // y-coordinate (next 48 bytes) + for i in 0..6 { + for j in 0..8 { + result[11 - i] |= (bytes[48 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + result +} - syscall_bls12_381_curve_dbl(&mut p_point); +/// Convert [u64; 12] little-endian G1 point to 96-byte big-endian +pub fn g1_u64_le_to_bytes_be_bls12_381(limbs: &[u64; 12], bytes: &mut [u8; 96]) { + // x-coordinate (first 48 bytes) + for i in 0..6 { + let limb = limbs[5 - i]; + for j in 0..8 { + bytes[i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xFF) as u8; + } + } - *(p as *mut [u64; 6]) = p_point.x; - *(p.add(6) as *mut [u64; 6]) = p_point.y; + // y-coordinate (next 48 bytes) + for i in 0..6 { + let limb = limbs[11 - i]; + for j in 0..8 { + bytes[48 + i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xFF) as u8; + } + } } diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/cyclotomic.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/cyclotomic.rs index 845a669fc..3da18af5a 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/cyclotomic.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/cyclotomic.rs @@ -48,7 +48,10 @@ pub fn compress_cyclo_bls12_381(a: &[u64; 72]) -> [u64; 48] { /// **NOTE**: If the input is not of the form C(a), where a ∈ GΦ6(p²), then the compression-decompression /// technique is not well defined. This means that D(C(a)) != a. #[inline] -pub fn decompress_cyclo_bls12_381(a: &[u64; 48]) -> [u64; 72] { +pub fn decompress_cyclo_bls12_381( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let a2: &[u64; 12] = &a[0..12].try_into().unwrap(); let a3: &[u64; 12] = &a[12..24].try_into().unwrap(); let a4: &[u64; 12] = &a[24..36].try_into().unwrap(); @@ -56,40 +59,179 @@ pub fn decompress_cyclo_bls12_381(a: &[u64; 48]) -> [u64; 72] { let (a0, a1) = if eq(a2, &[0; 12]) { // a1 = (2·a4·a5)/a3 - let a3_inv = inv_fp2_bls12_381(a3); - let mut a1 = mul_fp2_bls12_381(a4, a5); - a1 = dbl_fp2_bls12_381(&a1); - a1 = mul_fp2_bls12_381(&a1, &a3_inv); + let a3_inv = inv_fp2_bls12_381( + a3, + #[cfg(feature = "hints")] + hints, + ); + let mut a1 = mul_fp2_bls12_381( + a4, + a5, + #[cfg(feature = "hints")] + hints, + ); + a1 = dbl_fp2_bls12_381( + &a1, + #[cfg(feature = "hints")] + hints, + ); + a1 = mul_fp2_bls12_381( + &a1, + &a3_inv, + #[cfg(feature = "hints")] + hints, + ); // a0 = (2·a1² - 3·a3·a4)(1+u) + 1 - let a3a4 = mul_fp2_bls12_381(a3, a4); - let mut a0 = square_fp2_bls12_381(&a1); - a0 = dbl_fp2_bls12_381(&a0); - a0 = sub_fp2_bls12_381(&a0, &scalar_mul_fp2_bls12_381(&a3a4, &[3, 0, 0, 0, 0, 0])); - a0 = mul_fp2_bls12_381(&a0, &EXT_U); - a0 = add_fp2_bls12_381(&a0, &[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let a3a4 = mul_fp2_bls12_381( + a3, + a4, + #[cfg(feature = "hints")] + hints, + ); + let mut a0 = square_fp2_bls12_381( + &a1, + #[cfg(feature = "hints")] + hints, + ); + a0 = dbl_fp2_bls12_381( + &a0, + #[cfg(feature = "hints")] + hints, + ); + a0 = sub_fp2_bls12_381( + &a0, + &scalar_mul_fp2_bls12_381( + &a3a4, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + a0 = mul_fp2_bls12_381( + &a0, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + a0 = add_fp2_bls12_381( + &a0, + &[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); (a0, a1) } else { // a1 = (a5²·(1+u) + 3·a4² - 2·a3)/(4·a2) - let a2_inv = inv_fp2_bls12_381(&scalar_mul_fp2_bls12_381(a2, &[4, 0, 0, 0, 0, 0])); - let mut a4_sq = square_fp2_bls12_381(a4); - a4_sq = scalar_mul_fp2_bls12_381(&a4_sq, &[3, 0, 0, 0, 0, 0]); - let mut a1 = square_fp2_bls12_381(a5); - a1 = mul_fp2_bls12_381(&a1, &EXT_U); - a1 = add_fp2_bls12_381(&a1, &a4_sq); - a1 = sub_fp2_bls12_381(&a1, &dbl_fp2_bls12_381(a3)); - a1 = mul_fp2_bls12_381(&a1, &a2_inv); - + let a2_inv = inv_fp2_bls12_381( + &scalar_mul_fp2_bls12_381( + a2, + &[4, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + let mut a4_sq = square_fp2_bls12_381( + a4, + #[cfg(feature = "hints")] + hints, + ); + a4_sq = scalar_mul_fp2_bls12_381( + &a4_sq, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + let mut a1 = square_fp2_bls12_381( + a5, + #[cfg(feature = "hints")] + hints, + ); + a1 = mul_fp2_bls12_381( + &a1, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + a1 = add_fp2_bls12_381( + &a1, + &a4_sq, + #[cfg(feature = "hints")] + hints, + ); + a1 = sub_fp2_bls12_381( + &a1, + &dbl_fp2_bls12_381( + a3, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + a1 = mul_fp2_bls12_381( + &a1, + &a2_inv, + #[cfg(feature = "hints")] + hints, + ); // a0 = (2·a1² + a2·a5 - 3·a3·a4)(1+u) + 1 - let a3a4 = mul_fp2_bls12_381(a3, a4); - let a2a5 = mul_fp2_bls12_381(a2, a5); - let mut a0 = square_fp2_bls12_381(&a1); - a0 = dbl_fp2_bls12_381(&a0); - a0 = add_fp2_bls12_381(&a0, &a2a5); - a0 = sub_fp2_bls12_381(&a0, &scalar_mul_fp2_bls12_381(&a3a4, &[3, 0, 0, 0, 0, 0])); - a0 = mul_fp2_bls12_381(&a0, &EXT_U); - a0 = add_fp2_bls12_381(&a0, &[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let a3a4 = mul_fp2_bls12_381( + a3, + a4, + #[cfg(feature = "hints")] + hints, + ); + let a2a5 = mul_fp2_bls12_381( + a2, + a5, + #[cfg(feature = "hints")] + hints, + ); + let mut a0 = square_fp2_bls12_381( + &a1, + #[cfg(feature = "hints")] + hints, + ); + a0 = dbl_fp2_bls12_381( + &a0, + #[cfg(feature = "hints")] + hints, + ); + a0 = add_fp2_bls12_381( + &a0, + &a2a5, + #[cfg(feature = "hints")] + hints, + ); + a0 = sub_fp2_bls12_381( + &a0, + &scalar_mul_fp2_bls12_381( + &a3a4, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + a0 = mul_fp2_bls12_381( + &a0, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + a0 = add_fp2_bls12_381( + &a0, + &[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); (a0, a1) }; @@ -118,46 +260,180 @@ pub fn decompress_cyclo_bls12_381(a: &[u64; 48]) -> [u64; 72] { // - B45 = a4·a5 // /// **NOTE**: The output is not guaranteed to be in GΦ6(p²), if the input isn't. -pub fn square_cyclo_bls12_381(a: &[u64; 48]) -> [u64; 48] { +pub fn square_cyclo_bls12_381( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a2: &[u64; 12] = &a[0..12].try_into().unwrap(); let a3: &[u64; 12] = &a[12..24].try_into().unwrap(); let a4: &[u64; 12] = &a[24..36].try_into().unwrap(); let a5: &[u64; 12] = &a[36..48].try_into().unwrap(); // B23 = a2·a3, B45 = a4·a5 - let b23 = mul_fp2_bls12_381(a2, a3); - let b45 = mul_fp2_bls12_381(a4, a5); + let b23 = mul_fp2_bls12_381( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ); + let b45 = mul_fp2_bls12_381( + a4, + a5, + #[cfg(feature = "hints")] + hints, + ); // A23 = (a2 + a3)·(a2 + (1+u)·a3) - let a3xi = mul_fp2_bls12_381(a3, &EXT_U); - let a23 = mul_fp2_bls12_381(&add_fp2_bls12_381(a2, a3), &add_fp2_bls12_381(a2, &a3xi)); + let a3xi = mul_fp2_bls12_381( + a3, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + let a23 = mul_fp2_bls12_381( + &add_fp2_bls12_381( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ), + &add_fp2_bls12_381( + a2, + &a3xi, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // A45 = (a4 + a5)·(a4 + (1+u)·a5) - let a5xi = mul_fp2_bls12_381(a5, &EXT_U); - let a45 = mul_fp2_bls12_381(&add_fp2_bls12_381(a4, a5), &add_fp2_bls12_381(a4, &a5xi)); + let a5xi = mul_fp2_bls12_381( + a5, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + let a45 = mul_fp2_bls12_381( + &add_fp2_bls12_381( + a4, + a5, + #[cfg(feature = "hints")] + hints, + ), + &add_fp2_bls12_381( + a4, + &a5xi, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // b2 = 2(a2 + 3·(1+u)·B45) - let mut b2 = mul_fp2_bls12_381(&b45, &EXT_U); - b2 = scalar_mul_fp2_bls12_381(&b2, &[3, 0, 0, 0, 0, 0]); - b2 = add_fp2_bls12_381(a2, &b2); - b2 = dbl_fp2_bls12_381(&b2); + let mut b2 = mul_fp2_bls12_381( + &b45, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + b2 = scalar_mul_fp2_bls12_381( + &b2, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b2 = add_fp2_bls12_381( + a2, + &b2, + #[cfg(feature = "hints")] + hints, + ); + b2 = dbl_fp2_bls12_381( + &b2, + #[cfg(feature = "hints")] + hints, + ); // b3 = 3·(A45 - (2+u)·B45) - 2·a3 - let mut b3 = mul_fp2_bls12_381(&b45, &[2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]); - b3 = sub_fp2_bls12_381(&a45, &b3); - b3 = scalar_mul_fp2_bls12_381(&b3, &[3, 0, 0, 0, 0, 0]); - b3 = sub_fp2_bls12_381(&b3, &dbl_fp2_bls12_381(a3)); + let mut b3 = mul_fp2_bls12_381( + &b45, + &[2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b3 = sub_fp2_bls12_381( + &a45, + &b3, + #[cfg(feature = "hints")] + hints, + ); + b3 = scalar_mul_fp2_bls12_381( + &b3, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b3 = sub_fp2_bls12_381( + &b3, + &dbl_fp2_bls12_381( + a3, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // b4 = 3·(A23 - (2+u)·B23) - 2·a4 - let mut b4 = mul_fp2_bls12_381(&b23, &[2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]); - b4 = sub_fp2_bls12_381(&a23, &b4); - b4 = scalar_mul_fp2_bls12_381(&b4, &[3, 0, 0, 0, 0, 0]); - b4 = sub_fp2_bls12_381(&b4, &dbl_fp2_bls12_381(a4)); + let mut b4 = mul_fp2_bls12_381( + &b23, + &[2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b4 = sub_fp2_bls12_381( + &a23, + &b4, + #[cfg(feature = "hints")] + hints, + ); + b4 = scalar_mul_fp2_bls12_381( + &b4, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b4 = sub_fp2_bls12_381( + &b4, + &dbl_fp2_bls12_381( + a4, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // b5 = 2·(a5 + 3·B23) - let mut b5 = scalar_mul_fp2_bls12_381(&b23, &[3, 0, 0, 0, 0, 0]); - b5 = add_fp2_bls12_381(a5, &b5); - b5 = dbl_fp2_bls12_381(&b5); + let mut b5 = scalar_mul_fp2_bls12_381( + &b23, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b5 = add_fp2_bls12_381( + a5, + &b5, + #[cfg(feature = "hints")] + hints, + ); + b5 = dbl_fp2_bls12_381( + &b5, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 48]; result[0..12].copy_from_slice(&b2); @@ -173,7 +449,11 @@ pub fn square_cyclo_bls12_381(a: &[u64; 48]) -> [u64; 48] { // out: a^x = (a0 + a4·v + a3·v²) + (a2 + a1·v + a5·v²)·w ∈ ∈ GΦ6(p²) // /// **NOTE**: The output is not guaranteed to be in GΦ6(p²), if the input isn't. -pub fn exp_cyclo_bls12_381(a: &[u64; 72], x: &[u8]) -> [u64; 72] { +pub fn exp_cyclo_bls12_381( + a: &[u64; 72], + x: &[u8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { if eq(a, &[0; 72]) { return [0; 72]; } @@ -190,19 +470,35 @@ pub fn exp_cyclo_bls12_381(a: &[u64; 72], x: &[u8]) -> [u64; 72] { for &bit in x.iter() { if bit == 1 { // decompress and multiply - let decomp = decompress_cyclo_bls12_381(&comp); - result = mul_fp12_bls12_381(&result, &decomp); + let decomp = decompress_cyclo_bls12_381( + &comp, + #[cfg(feature = "hints")] + hints, + ); + result = mul_fp12_bls12_381( + &result, + &decomp, + #[cfg(feature = "hints")] + hints, + ); } // We always square (in compressed form): C(c²) - comp = square_cyclo_bls12_381(&comp); + comp = square_cyclo_bls12_381( + &comp, + #[cfg(feature = "hints")] + hints, + ); } result } /// Exponentiation in the cyclotomic subgroup GΦ6(p²) by x = 15132376222941642752 -pub fn exp_by_x_cyclo_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn exp_by_x_cyclo_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { /// Family parameter X const X_ABS_BIN_LE: [u8; 64] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -210,11 +506,19 @@ pub fn exp_by_x_cyclo_bls12_381(a: &[u64; 72]) -> [u64; 72] { 1, 0, 1, 1, ]; - exp_cyclo_bls12_381(a, &X_ABS_BIN_LE) + exp_cyclo_bls12_381( + a, + &X_ABS_BIN_LE, + #[cfg(feature = "hints")] + hints, + ) } /// Exponentiation in the cyclotomic subgroup GΦ6(p²) by x+1 = 15132376222941642753 -pub fn exp_by_xone_cyclo_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn exp_by_xone_cyclo_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { /// Family parameter X+1 const XONE_ABS_BIN_LE: [u8; 64] = [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -222,11 +526,19 @@ pub fn exp_by_xone_cyclo_bls12_381(a: &[u64; 72]) -> [u64; 72] { 1, 0, 1, 1, ]; - exp_cyclo_bls12_381(a, &XONE_ABS_BIN_LE) + exp_cyclo_bls12_381( + a, + &XONE_ABS_BIN_LE, + #[cfg(feature = "hints")] + hints, + ) } /// Exponentiation in the cyclotomic subgroup GΦ6(p²) by (x+1)/3 = 5044125407647214251 -pub fn exp_by_xdiv3_cyclo_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn exp_by_xdiv3_cyclo_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { /// Family parameter (X+1)/3 const XDIV3_ABS_BIN_LE: [u8; 63] = [ 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, @@ -234,5 +546,10 @@ pub fn exp_by_xdiv3_cyclo_bls12_381(a: &[u64; 72]) -> [u64; 72] { 0, 0, 1, ]; - exp_cyclo_bls12_381(a, &XDIV3_ABS_BIN_LE) + exp_cyclo_bls12_381( + a, + &XDIV3_ABS_BIN_LE, + #[cfg(feature = "hints")] + hints, + ) } diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/final_exp.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/final_exp.rs index fc9d8f727..ff649b1b3 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/final_exp.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/final_exp.rs @@ -14,46 +14,129 @@ use super::{ // However, I dont think its a good idea in general to optimize verification "at all costs". /// Given f ∈ Fp12*, computes f^((p¹²-1)/r) ∈ Fp12* -pub fn final_exp_bls12_381(f: &[u64; 72]) -> [u64; 72] { +/// +/// Note: Unoptimized for the case f == 1 +pub fn final_exp_bls12_381( + f: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { ////////////////// // The easy part: exp by (p^6-1)(p^2+1) ////////////////// // f^(p^6-1) = f̅·f⁻¹ - let f_conj = conjugate_fp12_bls12_381(f); - let f_inv = inv_fp12_bls12_381(f); - let easy1 = mul_fp12_bls12_381(&f_conj, &f_inv); + let f_conj = conjugate_fp12_bls12_381( + f, + #[cfg(feature = "hints")] + hints, + ); + let f_inv = inv_fp12_bls12_381( + f, + #[cfg(feature = "hints")] + hints, + ); + let easy1 = mul_fp12_bls12_381( + &f_conj, + &f_inv, + #[cfg(feature = "hints")] + hints, + ); // easy1^(p²-1) = easy1^p²·easy1 - let mut m = frobenius2_fp12_bls12_381(&easy1); - m = mul_fp12_bls12_381(&m, &easy1); + let mut m = frobenius2_fp12_bls12_381( + &easy1, + #[cfg(feature = "hints")] + hints, + ); + m = mul_fp12_bls12_381( + &m, + &easy1, + #[cfg(feature = "hints")] + hints, + ); ////////////////// // The hard part: exp by (p⁴-p²+1)/r ////////////////// // f = m^{(x+1)/3} - let mut f = exp_by_xdiv3_cyclo_bls12_381(&m); + let mut f = exp_by_xdiv3_cyclo_bls12_381( + &m, + #[cfg(feature = "hints")] + hints, + ); // f = f^(x+1) - f = exp_by_xone_cyclo_bls12_381(&f); + f = exp_by_xone_cyclo_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ); // f1 = f^p, f2 = f̅^x - let f1 = frobenius1_fp12_bls12_381(&f); - let f2 = exp_by_x_cyclo_bls12_381(&conjugate_fp12_bls12_381(&f)); + let f1 = frobenius1_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ); + let f2 = exp_by_x_cyclo_bls12_381( + &conjugate_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // f = f1*f2 - let f = mul_fp12_bls12_381(&f1, &f2); + let f = mul_fp12_bls12_381( + &f1, + &f2, + #[cfg(feature = "hints")] + hints, + ); // f1 = (f^x)^x, f2 = f^p², f3 = f̅ - let f1 = exp_by_x_cyclo_bls12_381(&exp_by_x_cyclo_bls12_381(&f)); - let f2 = frobenius2_fp12_bls12_381(&f); - let f3 = conjugate_fp12_bls12_381(&f); + let f1 = exp_by_x_cyclo_bls12_381( + &exp_by_x_cyclo_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + let f2 = frobenius2_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ); + let f3 = conjugate_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ); // f = f1*f2*f3*m - let mut f = mul_fp12_bls12_381(&f1, &f2); - f = mul_fp12_bls12_381(&f, &f3); - f = mul_fp12_bls12_381(&f, &m); + let mut f = mul_fp12_bls12_381( + &f1, + &f2, + #[cfg(feature = "hints")] + hints, + ); + f = mul_fp12_bls12_381( + &f, + &f3, + #[cfg(feature = "hints")] + hints, + ); + f = mul_fp12_bls12_381( + &f, + &m, + #[cfg(feature = "hints")] + hints, + ); f } diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp.rs index 3298fa314..9654dcc0e 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp.rs @@ -5,11 +5,21 @@ use crate::{ zisklib::{eq, fcall_bls12_381_fp_inv, fcall_bls12_381_fp_sqrt}, }; -use super::constants::{NQR, P, P_MINUS_ONE}; +use super::constants::{NQR_FP, P, P_MINUS_ONE}; + +/// Sign function in Fp +#[inline] +pub fn sgn0_fp_bls12_381(x: &[u64; 6]) -> u64 { + x[0] & 1 +} /// Addition in Fp #[inline] -pub fn add_fp_bls12_381(x: &[u64; 6], y: &[u64; 6]) -> [u64; 6] { +pub fn add_fp_bls12_381( + x: &[u64; 6], + y: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 6] { // x·1 + y let mut params = SyscallArith384ModParams { a: x, @@ -18,13 +28,17 @@ pub fn add_fp_bls12_381(x: &[u64; 6], y: &[u64; 6]) -> [u64; 6] { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Doubling in Fp #[inline] -pub fn dbl_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { +pub fn dbl_fp_bls12_381(x: &[u64; 6], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 6] { // 2·x + 0 or x·1 + x let mut params = SyscallArith384ModParams { a: x, @@ -33,13 +47,21 @@ pub fn dbl_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Subtraction in Fp #[inline] -pub fn sub_fp_bls12_381(x: &[u64; 6], y: &[u64; 6]) -> [u64; 6] { +pub fn sub_fp_bls12_381( + x: &[u64; 6], + y: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 6] { // y·(-1) + x let mut params = SyscallArith384ModParams { a: y, @@ -48,13 +70,17 @@ pub fn sub_fp_bls12_381(x: &[u64; 6], y: &[u64; 6]) -> [u64; 6] { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Negation in Fp #[inline] -pub fn neg_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { +pub fn neg_fp_bls12_381(x: &[u64; 6], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 6] { // x·(-1) + 0 let mut params = SyscallArith384ModParams { a: x, @@ -63,13 +89,21 @@ pub fn neg_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Multiplication in Fp #[inline] -pub fn mul_fp_bls12_381(x: &[u64; 6], y: &[u64; 6]) -> [u64; 6] { +pub fn mul_fp_bls12_381( + x: &[u64; 6], + y: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 6] { // x·y + 0 let mut params = SyscallArith384ModParams { a: x, @@ -78,13 +112,20 @@ pub fn mul_fp_bls12_381(x: &[u64; 6], y: &[u64; 6]) -> [u64; 6] { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Squaring in Fp #[inline] -pub fn square_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { +pub fn square_fp_bls12_381( + x: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 6] { // x·x + 0 let mut params = SyscallArith384ModParams { a: x, @@ -93,15 +134,26 @@ pub fn square_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Square root in Fp #[inline] -pub fn sqrt_fp_bls12_381(x: &[u64; 6]) -> ([u64; 6], bool) { +pub fn sqrt_fp_bls12_381( + x: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 6], bool) { // Hint the sqrt - let hint = fcall_bls12_381_fp_sqrt(x); + let hint = fcall_bls12_381_fp_sqrt( + x, + #[cfg(feature = "hints")] + hints, + ); let is_qr = hint[0] == 1; let sqrt = hint[1..7].try_into().unwrap(); @@ -113,7 +165,11 @@ pub fn sqrt_fp_bls12_381(x: &[u64; 6]) -> ([u64; 6], bool) { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); if is_qr { // Check that sqrt * sqrt == x @@ -121,7 +177,12 @@ pub fn sqrt_fp_bls12_381(x: &[u64; 6]) -> ([u64; 6], bool) { (sqrt, true) } else { // Check that sqrt * sqrt == x * NQR - let nqr = mul_fp_bls12_381(x, &NQR); + let nqr = mul_fp_bls12_381( + x, + &NQR_FP, + #[cfg(feature = "hints")] + hints, + ); assert_eq!(*params.d, nqr); (sqrt, false) } @@ -129,7 +190,7 @@ pub fn sqrt_fp_bls12_381(x: &[u64; 6]) -> ([u64; 6], bool) { /// Inversion of a non-zero element in Fp #[inline] -pub fn inv_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { +pub fn inv_fp_bls12_381(x: &[u64; 6], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 6] { // if x == 0, return 0 if eq(x, &[0; 6]) { return *x; @@ -139,7 +200,11 @@ pub fn inv_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { // Remember that an element y ∈ Fp is the inverse of x ∈ Fp if and only if x·y = 1 in Fp // We will therefore hint the inverse y and check the product with x is 1 - let inv = fcall_bls12_381_fp_inv(x); + let inv = fcall_bls12_381_fp_inv( + x, + #[cfg(feature = "hints")] + hints, + ); // x·y + 0 let mut params = SyscallArith384ModParams { @@ -149,144 +214,23 @@ pub fn inv_fp_bls12_381(x: &[u64; 6]) -> [u64; 6] { module: &P, d: &mut [0, 0, 0, 0, 0, 0], }; - syscall_arith384_mod(&mut params); + syscall_arith384_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); assert_eq!(*params.d, [1, 0, 0, 0, 0, 0]); inv } -// ========== Pointer-based API ========== - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes). -/// - `b` must point to a valid `[u64; 6]` (48 bytes). -#[no_mangle] -pub unsafe extern "C" fn add_fp_bls12_381_c(a: *mut u64, b: *const u64) { - let a_ref = &*(a as *const [u64; 6]); - let b_ref = &*(b as *const [u64; 6]); - - let mut params = SyscallArith384ModParams { - a: a_ref, - b: &[1, 0, 0, 0, 0, 0], - c: b_ref, - module: &P, - d: &mut [0, 0, 0, 0, 0, 0], - }; - syscall_arith384_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 6); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn dbl_fp_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 6]); - - let mut params = SyscallArith384ModParams { - a: a_ref, - b: &[2, 0, 0, 0, 0, 0], - c: &[0, 0, 0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0, 0, 0], - }; - syscall_arith384_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 6); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 6]` (48 bytes). -#[no_mangle] -pub unsafe extern "C" fn sub_fp_bls12_381_c(a: *mut u64, b: *const u64) { - let a_ref = &*(a as *const [u64; 6]); - let b_ref = &*(b as *const [u64; 6]); - - let mut params = SyscallArith384ModParams { - a: b_ref, - b: &P_MINUS_ONE, - c: a_ref, - module: &P, - d: &mut [0, 0, 0, 0, 0, 0], - }; - syscall_arith384_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 6); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn neg_fp_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 6]); - - let mut params = SyscallArith384ModParams { - a: a_ref, - b: &P_MINUS_ONE, - c: &[0, 0, 0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0, 0, 0], - }; - syscall_arith384_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 6); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 6]` (48 bytes). -#[no_mangle] -pub unsafe extern "C" fn mul_fp_bls12_381_c(a: *mut u64, b: *const u64) { - let a_ref = &*(a as *const [u64; 6]); - let b_ref = &*(b as *const [u64; 6]); - - let mut params = SyscallArith384ModParams { - a: a_ref, - b: b_ref, - c: &[0, 0, 0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0, 0, 0], - }; - syscall_arith384_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 6); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn square_fp_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 6]); - - let mut params = SyscallArith384ModParams { - a: a_ref, - b: a_ref, - c: &[0, 0, 0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0, 0, 0], - }; - syscall_arith384_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 6); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes), used as both input and output. -/// - `is_qr` must point to a valid `u8`. -#[no_mangle] -pub unsafe extern "C" fn sqrt_fp_bls12_381_c(a: *mut u64) -> bool { - let a_ref = &*(a as *const [u64; 6]); - let (result, qr) = sqrt_fp_bls12_381(a_ref); - *(a as *mut [u64; 6]) = result; - qr -} - -/// # Safety -/// - `a` must point to a valid `[u64; 6]` (48 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn inv_fp_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 6]); - let result = inv_fp_bls12_381(a_ref); - *(a as *mut [u64; 6]) = result; +/// Convert 48-byte big-endian field element to [u64; 6] little-endian +pub fn bytes_be_to_u64_le_fp_bls12_381(bytes: &[u8; 48]) -> [u64; 6] { + let mut result = [0u64; 6]; + for i in 0..6 { + for j in 0..8 { + result[5 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + result } diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp12.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp12.rs index e6f214dd3..5a2e36e50 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp12.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp12.rs @@ -23,26 +23,75 @@ use super::{ // - c1 = a1·b1 + a2·b2·v // - c2 = (a1+a2)·(b1+b2) - a1·b1 - a2·b2 #[inline] -pub fn mul_fp12_bls12_381(a: &[u64; 72], b: &[u64; 72]) -> [u64; 72] { +pub fn mul_fp12_bls12_381( + a: &[u64; 72], + b: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let a1 = &a[0..36].try_into().unwrap(); let a2 = &a[36..72].try_into().unwrap(); let b1 = &b[0..36].try_into().unwrap(); let b2 = &b[36..72].try_into().unwrap(); // a1·b1, a2·b2 - let a1_b1 = mul_fp6_bls12_381(a1, b1); - let a2_b2 = mul_fp6_bls12_381(a2, b2); + let a1_b1 = mul_fp6_bls12_381( + a1, + b1, + #[cfg(feature = "hints")] + hints, + ); + let a2_b2 = mul_fp6_bls12_381( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ); // c1 = a1·b1 + a2·b2·v - let mut c1 = sparse_mula_fp6_bls12_381(&a2_b2, &EXT_V); - c1 = add_fp6_bls12_381(&c1, &a1_b1); + let mut c1 = sparse_mula_fp6_bls12_381( + &a2_b2, + &EXT_V, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp6_bls12_381( + &c1, + &a1_b1, + #[cfg(feature = "hints")] + hints, + ); // c2 = (a1+a2)·(b1+b2) - a1·b1 - a2·b2 - let a1_plus_a2 = add_fp6_bls12_381(a1, a2); - let b1_plus_b2 = add_fp6_bls12_381(b1, b2); - let mut c2 = mul_fp6_bls12_381(&a1_plus_a2, &b1_plus_b2); - c2 = sub_fp6_bls12_381(&c2, &a1_b1); - c2 = sub_fp6_bls12_381(&c2, &a2_b2); + let a1_plus_a2 = add_fp6_bls12_381( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let b1_plus_b2 = add_fp6_bls12_381( + b1, + b2, + #[cfg(feature = "hints")] + hints, + ); + let mut c2 = mul_fp6_bls12_381( + &a1_plus_a2, + &b1_plus_b2, + #[cfg(feature = "hints")] + hints, + ); + c2 = sub_fp6_bls12_381( + &c2, + &a1_b1, + #[cfg(feature = "hints")] + hints, + ); + c2 = sub_fp6_bls12_381( + &c2, + &a2_b2, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 72]; result[0..36].copy_from_slice(&c1); @@ -57,26 +106,55 @@ pub fn mul_fp12_bls12_381(a: &[u64; 72], b: &[u64; 72]) -> [u64; 72] { // - c1 = a1 + a2·(b23·(1+u) + b22·v²) // - c2 = a2 + a1·(b22·v + b23·v²) #[inline] -pub fn sparse_mul_fp12_bls12_381(a: &[u64; 72], b: &[u64; 24]) -> [u64; 72] { +pub fn sparse_mul_fp12_bls12_381( + a: &[u64; 72], + b: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let a1 = &a[0..36].try_into().unwrap(); let a2 = &a[36..72].try_into().unwrap(); let b22: &[u64; 12] = &b[0..12].try_into().unwrap(); let b23 = &b[12..24].try_into().unwrap(); // c1 = a1 + a2·(b23·(1+u) + b22·v²) - let b23u = mul_fp2_bls12_381(&EXT_U, b23); + let b23u = mul_fp2_bls12_381( + &EXT_U, + b23, + #[cfg(feature = "hints")] + hints, + ); let mut sparse_c1 = [0u64; 24]; sparse_c1[0..12].copy_from_slice(&b23u); sparse_c1[12..24].copy_from_slice(b22); - let mut c1 = sparse_mulc_fp6_bls12_381(a2, &sparse_c1); - c1 = add_fp6_bls12_381(&c1, a1); + let mut c1 = sparse_mulc_fp6_bls12_381( + a2, + &sparse_c1, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp6_bls12_381( + &c1, + a1, + #[cfg(feature = "hints")] + hints, + ); // c2 = a2 + a1·(b22·v + b23·v²) let mut sparse_c2 = [0u64; 24]; sparse_c2[0..12].copy_from_slice(b22); sparse_c2[12..24].copy_from_slice(b23); - let mut c2 = sparse_mulb_fp6_bls12_381(a1, &sparse_c2); - c2 = add_fp6_bls12_381(&c2, a2); + let mut c2 = sparse_mulb_fp6_bls12_381( + a1, + &sparse_c2, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp6_bls12_381( + &c2, + a2, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 72]; result[0..36].copy_from_slice(&c1); @@ -91,24 +169,71 @@ pub fn sparse_mul_fp12_bls12_381(a: &[u64; 72], b: &[u64; 24]) -> [u64; 72] { // - c1 = (a1-a2)·(a1-a2·v) + a1·a2 + a1·a2·v // - c2 = 2·a1·a2 #[inline] -pub fn square_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn square_fp12_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let a1 = &a[0..36].try_into().unwrap(); let a2 = &a[36..72].try_into().unwrap(); // a1·a2, a2·v, a1·a2·v - let a1_a2 = mul_fp6_bls12_381(a1, a2); - let a2_v = sparse_mula_fp6_bls12_381(a2, &EXT_V); - let a1_a2_v = sparse_mula_fp6_bls12_381(&a1_a2, &EXT_V); + let a1_a2 = mul_fp6_bls12_381( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let a2_v = sparse_mula_fp6_bls12_381( + a2, + &EXT_V, + #[cfg(feature = "hints")] + hints, + ); + let a1_a2_v = sparse_mula_fp6_bls12_381( + &a1_a2, + &EXT_V, + #[cfg(feature = "hints")] + hints, + ); // c2 = 2·a1·a2 - let c2 = dbl_fp6_bls12_381(&a1_a2); + let c2 = dbl_fp6_bls12_381( + &a1_a2, + #[cfg(feature = "hints")] + hints, + ); // c1 = (a1-a2)·(a1-a2·v) + a1·a2 + a1·a2·v - let a1_minus_a2 = sub_fp6_bls12_381(a1, a2); - let a1_minus_a2v = sub_fp6_bls12_381(a1, &a2_v); - let mut c1 = mul_fp6_bls12_381(&a1_minus_a2, &a1_minus_a2v); - c1 = add_fp6_bls12_381(&c1, &a1_a2); - c1 = add_fp6_bls12_381(&c1, &a1_a2_v); + let a1_minus_a2 = sub_fp6_bls12_381( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let a1_minus_a2v = sub_fp6_bls12_381( + a1, + &a2_v, + #[cfg(feature = "hints")] + hints, + ); + let mut c1 = mul_fp6_bls12_381( + &a1_minus_a2, + &a1_minus_a2v, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp6_bls12_381( + &c1, + &a1_a2, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp6_bls12_381( + &c1, + &a1_a2_v, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 72]; result[0..36].copy_from_slice(&c1); @@ -123,22 +248,61 @@ pub fn square_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { // - c1 = a1·(a1² - a2²·v)⁻¹ // - c2 = -a2·(a1² - a2²·v)⁻¹ #[inline] -pub fn inv_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn inv_fp12_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let a1 = &a[0..36].try_into().unwrap(); let a2 = &a[36..72].try_into().unwrap(); // a1², a2², a2²·v - let a1_square = square_fp6_bls12_381(a1); - let a2_square = square_fp6_bls12_381(a2); - let a2_square_v = sparse_mula_fp6_bls12_381(&a2_square, &EXT_V); + let a1_square = square_fp6_bls12_381( + a1, + #[cfg(feature = "hints")] + hints, + ); + let a2_square = square_fp6_bls12_381( + a2, + #[cfg(feature = "hints")] + hints, + ); + let a2_square_v = sparse_mula_fp6_bls12_381( + &a2_square, + &EXT_V, + #[cfg(feature = "hints")] + hints, + ); // (a1² - a2²·v)⁻¹ - let mut denom = sub_fp6_bls12_381(&a1_square, &a2_square_v); - denom = inv_fp6_bls12_381(&denom); + let mut denom = sub_fp6_bls12_381( + &a1_square, + &a2_square_v, + #[cfg(feature = "hints")] + hints, + ); + denom = inv_fp6_bls12_381( + &denom, + #[cfg(feature = "hints")] + hints, + ); // c1 = a1·(a1² - a2²·v)⁻¹, c2 = -a2·(a1² - a2²·v)⁻¹ - let c1 = mul_fp6_bls12_381(a1, &denom); - let c2 = neg_fp6_bls12_381(&mul_fp6_bls12_381(a2, &denom)); + let c1 = mul_fp6_bls12_381( + a1, + &denom, + #[cfg(feature = "hints")] + hints, + ); + let c2 = neg_fp6_bls12_381( + &mul_fp6_bls12_381( + a2, + &denom, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 72]; result[0..36].copy_from_slice(&c1); @@ -148,10 +312,17 @@ pub fn inv_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { /// Conjugation in Fp12 #[inline] -pub fn conjugate_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn conjugate_fp12_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let mut result = [0; 72]; result[0..36].copy_from_slice(&a[0..36]); - result[36..72].copy_from_slice(&neg_fp6_bls12_381(&a[36..72].try_into().unwrap())); + result[36..72].copy_from_slice(&neg_fp6_bls12_381( + &a[36..72].try_into().unwrap(), + #[cfg(feature = "hints")] + hints, + )); result } @@ -162,7 +333,10 @@ pub fn conjugate_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { // - c1 = a̅11 + a̅12·γ12·v + a̅13·γ14·v² // - c2 = a̅21·γ11 + a̅22·γ13·v + a̅23·γ15·v² #[inline] -pub fn frobenius1_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn frobenius1_fp12_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let a11 = &a[0..12].try_into().unwrap(); let a12 = &a[12..24].try_into().unwrap(); let a13 = &a[24..36].try_into().unwrap(); @@ -173,19 +347,68 @@ pub fn frobenius1_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { let mut result = [0; 72]; // c1 = a̅11 + a̅12·γ12·v + a̅13·γ14·v² - result[0..12].copy_from_slice(&conjugate_fp2_bls12_381(a11)); - let mut tmp = conjugate_fp2_bls12_381(a12); - result[12..24].copy_from_slice(&mul_fp2_bls12_381(&tmp, &FROBENIUS_GAMMA12)); - tmp = conjugate_fp2_bls12_381(a13); - result[24..36].copy_from_slice(&scalar_mul_fp2_bls12_381(&tmp, &FROBENIUS_GAMMA14)); + result[0..12].copy_from_slice(&conjugate_fp2_bls12_381( + a11, + #[cfg(feature = "hints")] + hints, + )); + let mut tmp = conjugate_fp2_bls12_381( + a12, + #[cfg(feature = "hints")] + hints, + ); + result[12..24].copy_from_slice(&mul_fp2_bls12_381( + &tmp, + &FROBENIUS_GAMMA12, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bls12_381( + a13, + #[cfg(feature = "hints")] + hints, + ); + result[24..36].copy_from_slice(&scalar_mul_fp2_bls12_381( + &tmp, + &FROBENIUS_GAMMA14, + #[cfg(feature = "hints")] + hints, + )); // c2 = a̅21·γ11 + a̅22·γ13·v + a̅23·γ15·v² - tmp = conjugate_fp2_bls12_381(a21); - result[36..48].copy_from_slice(&mul_fp2_bls12_381(&tmp, &FROBENIUS_GAMMA11)); - tmp = conjugate_fp2_bls12_381(a22); - result[48..60].copy_from_slice(&mul_fp2_bls12_381(&tmp, &FROBENIUS_GAMMA13)); - tmp = conjugate_fp2_bls12_381(a23); - result[60..72].copy_from_slice(&mul_fp2_bls12_381(&tmp, &FROBENIUS_GAMMA15)); + tmp = conjugate_fp2_bls12_381( + a21, + #[cfg(feature = "hints")] + hints, + ); + result[36..48].copy_from_slice(&mul_fp2_bls12_381( + &tmp, + &FROBENIUS_GAMMA11, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bls12_381( + a22, + #[cfg(feature = "hints")] + hints, + ); + result[48..60].copy_from_slice(&mul_fp2_bls12_381( + &tmp, + &FROBENIUS_GAMMA13, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bls12_381( + a23, + #[cfg(feature = "hints")] + hints, + ); + result[60..72].copy_from_slice(&mul_fp2_bls12_381( + &tmp, + &FROBENIUS_GAMMA15, + #[cfg(feature = "hints")] + hints, + )); result } @@ -197,7 +420,10 @@ pub fn frobenius1_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { // - c1 = a11 + a12·γ22·v + a13·γ24·v² // - c2 = a21·γ21 + a22·γ23·v + a23·γ25·v² #[inline] -pub fn frobenius2_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { +pub fn frobenius2_fp12_bls12_381( + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let a11: &[u64; 12] = &a[0..12].try_into().unwrap(); let a12 = &a[12..24].try_into().unwrap(); let a13 = &a[24..36].try_into().unwrap(); @@ -209,13 +435,38 @@ pub fn frobenius2_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { // c1 = a11 + a12·γ22·v + a13·γ24·v² result[0..12].copy_from_slice(a11); - result[12..24].copy_from_slice(&scalar_mul_fp2_bls12_381(a12, &FROBENIUS_GAMMA22)); - result[24..36].copy_from_slice(&scalar_mul_fp2_bls12_381(a13, &FROBENIUS_GAMMA24)); + result[12..24].copy_from_slice(&scalar_mul_fp2_bls12_381( + a12, + &FROBENIUS_GAMMA22, + #[cfg(feature = "hints")] + hints, + )); + result[24..36].copy_from_slice(&scalar_mul_fp2_bls12_381( + a13, + &FROBENIUS_GAMMA24, + #[cfg(feature = "hints")] + hints, + )); // c2 = a21·γ21 + a22·γ23·v + a23·γ25·v² - result[36..48].copy_from_slice(&scalar_mul_fp2_bls12_381(a21, &FROBENIUS_GAMMA21)); - result[48..60].copy_from_slice(&scalar_mul_fp2_bls12_381(a22, &FROBENIUS_GAMMA23)); - result[60..72].copy_from_slice(&scalar_mul_fp2_bls12_381(a23, &FROBENIUS_GAMMA25)); + result[36..48].copy_from_slice(&scalar_mul_fp2_bls12_381( + a21, + &FROBENIUS_GAMMA21, + #[cfg(feature = "hints")] + hints, + )); + result[48..60].copy_from_slice(&scalar_mul_fp2_bls12_381( + a22, + &FROBENIUS_GAMMA23, + #[cfg(feature = "hints")] + hints, + )); + result[60..72].copy_from_slice(&scalar_mul_fp2_bls12_381( + a23, + &FROBENIUS_GAMMA25, + #[cfg(feature = "hints")] + hints, + )); result } @@ -225,7 +476,11 @@ pub fn frobenius2_fp12_bls12_381(a: &[u64; 72]) -> [u64; 72] { // in: e, (a1 + a2·w) ∈ Fp12, where e ∈ [0,p¹²-2] ai ∈ Fp6 // out: (c1 + c2·w) = (a1 + a2·w)^e ∈ Fp12 #[inline] -pub fn exp_fp12_bls12_381(e: u64, a: &[u64; 72]) -> [u64; 72] { +pub fn exp_fp12_bls12_381( + e: u64, + a: &[u64; 72], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { let one = { let mut tmp = [0; 72]; tmp[0] = 1; @@ -244,7 +499,12 @@ pub fn exp_fp12_bls12_381(e: u64, a: &[u64; 72]) -> [u64; 72] { return *a; } - let (_, max_bit) = fcall_msb_pos_384(&[e, 0, 0, 0, 0, 0], &[0, 0, 0, 0, 0, 0]); + let (_, max_bit) = fcall_msb_pos_384( + &[e, 0, 0, 0, 0, 0], + &[0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // Perform the loop, based on the binary representation of e @@ -260,12 +520,21 @@ pub fn exp_fp12_bls12_381(e: u64, a: &[u64; 72]) -> [u64; 72] { let _max_bit = max_bit as usize; for i in (0.._max_bit).rev() { // Always square - result = square_fp12_bls12_381(&result); + result = square_fp12_bls12_381( + &result, + #[cfg(feature = "hints")] + hints, + ); // Get the next bit b of e // If b == 1, we should multiply it by a, otherwise start the next iteration if ((e >> i) & 1) == 1 { - result = mul_fp12_bls12_381(&result, a); + result = mul_fp12_bls12_381( + &result, + a, + #[cfg(feature = "hints")] + hints, + ); // Reconstruct e e_rec |= 1 << i; diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp2.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp2.rs index 0e4f96a82..93fc3a25e 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp2.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp2.rs @@ -6,10 +6,10 @@ use crate::{ syscall_bls12_381_complex_sub, SyscallBls12_381ComplexAddParams, SyscallBls12_381ComplexMulParams, SyscallBls12_381ComplexSubParams, SyscallComplex384, }, - zisklib::{eq, fcall_bls12_381_fp2_inv}, + zisklib::{eq, fcall_bls12_381_fp2_inv, fcall_bls12_381_fp2_sqrt, is_zero}, }; -use super::constants::P_MINUS_ONE; +use super::constants::{NQR_FP2, P_MINUS_ONE}; /// Helper to convert from array representation to syscall representation #[inline] @@ -31,81 +31,185 @@ fn from_syscall_complex(complex: &SyscallComplex384) -> [u64; 12] { result } +/// Sign function in Fp2 +pub fn sgn0_fp2_bls12_381(x: &[u64; 12]) -> u64 { + let sign_0 = x[0] & 1; + let zero_0 = is_zero(&x[0..6]) as u64; + let sign_1 = x[6] & 1; + sign_0 | (zero_0 & sign_1) +} + /// Addition in Fp2 #[inline] -pub fn add_fp2_bls12_381(a: &[u64; 12], b: &[u64; 12]) -> [u64; 12] { +pub fn add_fp2_bls12_381( + a: &[u64; 12], + b: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = to_syscall_complex(a); let f2 = to_syscall_complex(b); let mut params = SyscallBls12_381ComplexAddParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_add(&mut params); + syscall_bls12_381_complex_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } /// Doubling in Fp2 #[inline] -pub fn dbl_fp2_bls12_381(a: &[u64; 12]) -> [u64; 12] { +pub fn dbl_fp2_bls12_381( + a: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = to_syscall_complex(a); let f2 = to_syscall_complex(a); let mut params = SyscallBls12_381ComplexAddParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_add(&mut params); + syscall_bls12_381_complex_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } /// Negation in Fp2 #[inline] -pub fn neg_fp2_bls12_381(a: &[u64; 12]) -> [u64; 12] { +pub fn neg_fp2_bls12_381( + a: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = to_syscall_complex(a); let f2 = to_syscall_complex_x(&P_MINUS_ONE); let mut params = SyscallBls12_381ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_mul(&mut params); + syscall_bls12_381_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } /// Subtraction in Fp2 #[inline] -pub fn sub_fp2_bls12_381(a: &[u64; 12], b: &[u64; 12]) -> [u64; 12] { +pub fn sub_fp2_bls12_381( + a: &[u64; 12], + b: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = to_syscall_complex(a); let f2 = to_syscall_complex(b); let mut params = SyscallBls12_381ComplexSubParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_sub(&mut params); + syscall_bls12_381_complex_sub( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } /// Multiplication in Fp2 #[inline] -pub fn mul_fp2_bls12_381(a: &[u64; 12], b: &[u64; 12]) -> [u64; 12] { +pub fn mul_fp2_bls12_381( + a: &[u64; 12], + b: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = to_syscall_complex(a); let f2 = to_syscall_complex(b); let mut params = SyscallBls12_381ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_mul(&mut params); + syscall_bls12_381_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } /// Scalar multiplication in Fp2 #[inline] -pub fn scalar_mul_fp2_bls12_381(a: &[u64; 12], b: &[u64; 6]) -> [u64; 12] { +pub fn scalar_mul_fp2_bls12_381( + a: &[u64; 12], + b: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = SyscallComplex384 { x: a[0..6].try_into().unwrap(), y: a[6..12].try_into().unwrap() }; let f2 = SyscallComplex384 { x: b[0..6].try_into().unwrap(), y: [0, 0, 0, 0, 0, 0] }; let mut params = SyscallBls12_381ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_mul(&mut params); + syscall_bls12_381_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } /// Squaring in Fp2 #[inline] -pub fn square_fp2_bls12_381(a: &[u64; 12]) -> [u64; 12] { +pub fn square_fp2_bls12_381( + a: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = to_syscall_complex(a); let f2 = to_syscall_complex(a); let mut params = SyscallBls12_381ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_mul(&mut params); + syscall_bls12_381_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } +/// Square root in Fp2 +#[inline] +pub fn sqrt_fp2_bls12_381( + x: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 12], bool) { + // Hint the sqrt + let hint = fcall_bls12_381_fp2_sqrt( + x, + #[cfg(feature = "hints")] + hints, + ); + let is_qr = hint[0] == 1; + let sqrt = hint[1..13].try_into().unwrap(); + + // Compute sqrt * sqrt + let mul = mul_fp2_bls12_381( + &sqrt, + &sqrt, + #[cfg(feature = "hints")] + hints, + ); + + if is_qr { + // Check that sqrt * sqrt == x + assert!(eq(&mul, x)); + (sqrt, true) + } else { + // Check that sqrt * sqrt == x * NQR + let nqr = mul_fp2_bls12_381( + x, + &NQR_FP2, + #[cfg(feature = "hints")] + hints, + ); + assert!(eq(&mul, &nqr)); + (sqrt, false) + } +} + /// Inversion in Fp2: returns a⁻¹ #[inline] -pub fn inv_fp2_bls12_381(a: &[u64; 12]) -> [u64; 12] { +pub fn inv_fp2_bls12_381( + a: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { // if a == 0, return 0 if eq(a, &[0; 12]) { return *a; @@ -115,9 +219,18 @@ pub fn inv_fp2_bls12_381(a: &[u64; 12]) -> [u64; 12] { // Remember that an element b ∈ Fp2 is the inverse of a ∈ Fp2 if and only if a·b = 1 in Fp2 // We will therefore hint the inverse b and check the product with a is 1 - let inv = fcall_bls12_381_fp2_inv(a); - - let product = mul_fp2_bls12_381(a, &inv); + let inv = fcall_bls12_381_fp2_inv( + a, + #[cfg(feature = "hints")] + hints, + ); + + let product = mul_fp2_bls12_381( + a, + &inv, + #[cfg(feature = "hints")] + hints, + ); assert_eq!(&product[0..6], &[1, 0, 0, 0, 0, 0]); assert_eq!(&product[6..12], &[0, 0, 0, 0, 0, 0]); @@ -126,116 +239,41 @@ pub fn inv_fp2_bls12_381(a: &[u64; 12]) -> [u64; 12] { /// Conjugation in Fp2 #[inline] -pub fn conjugate_fp2_bls12_381(a: &[u64; 12]) -> [u64; 12] { +pub fn conjugate_fp2_bls12_381( + a: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { let mut f1 = SyscallComplex384 { x: a[0..6].try_into().unwrap(), y: [0, 0, 0, 0, 0, 0] }; let f2 = SyscallComplex384 { x: [0, 0, 0, 0, 0, 0], y: a[6..12].try_into().unwrap() }; let mut params = SyscallBls12_381ComplexSubParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_sub(&mut params); + syscall_bls12_381_complex_sub( + &mut params, + #[cfg(feature = "hints")] + hints, + ); from_syscall_complex(&f1) } -// ========== Pointer-based API ========== - -/// # Safety -/// - `a` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 12]` (96 bytes). -#[no_mangle] -pub unsafe extern "C" fn add_fp2_bls12_381_c(a: *mut u64, b: *const u64) { - let mut f1 = - SyscallComplex384 { x: *(a as *const [u64; 6]), y: *(a.add(6) as *const [u64; 6]) }; - let f2 = SyscallComplex384 { x: *(b as *const [u64; 6]), y: *(b.add(6) as *const [u64; 6]) }; - - let mut params = SyscallBls12_381ComplexAddParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_add(&mut params); - - *(a as *mut [u64; 6]) = f1.x; - *(a.add(6) as *mut [u64; 6]) = f1.y; -} - -/// # Safety -/// - `a` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn dbl_fp2_bls12_381_c(a: *mut u64) { - let mut f1 = - SyscallComplex384 { x: *(a as *const [u64; 6]), y: *(a.add(6) as *const [u64; 6]) }; - let f2 = SyscallComplex384 { x: f1.x, y: f1.y }; - - let mut params = SyscallBls12_381ComplexAddParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_add(&mut params); - - *(a as *mut [u64; 6]) = f1.x; - *(a.add(6) as *mut [u64; 6]) = f1.y; -} - -/// # Safety -/// - `a` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn neg_fp2_bls12_381_c(a: *mut u64) { - let mut f1 = - SyscallComplex384 { x: *(a as *const [u64; 6]), y: *(a.add(6) as *const [u64; 6]) }; - let f2 = SyscallComplex384 { x: P_MINUS_ONE, y: [0, 0, 0, 0, 0, 0] }; - - let mut params = SyscallBls12_381ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_mul(&mut params); - - *(a as *mut [u64; 6]) = f1.x; - *(a.add(6) as *mut [u64; 6]) = f1.y; -} - -/// # Safety -/// - `a` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 12]` (96 bytes). -#[no_mangle] -pub unsafe extern "C" fn sub_fp2_bls12_381_c(a: *mut u64, b: *const u64) { - let mut f1 = - SyscallComplex384 { x: *(a as *const [u64; 6]), y: *(a.add(6) as *const [u64; 6]) }; - let f2 = SyscallComplex384 { x: *(b as *const [u64; 6]), y: *(b.add(6) as *const [u64; 6]) }; - - let mut params = SyscallBls12_381ComplexSubParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_sub(&mut params); - - *(a as *mut [u64; 6]) = f1.x; - *(a.add(6) as *mut [u64; 6]) = f1.y; -} - -/// # Safety -/// - `a` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 12]` (96 bytes). -#[no_mangle] -pub unsafe extern "C" fn mul_fp2_bls12_381_c(a: *mut u64, b: *const u64) { - let mut f1 = - SyscallComplex384 { x: *(a as *const [u64; 6]), y: *(a.add(6) as *const [u64; 6]) }; - let f2 = SyscallComplex384 { x: *(b as *const [u64; 6]), y: *(b.add(6) as *const [u64; 6]) }; - - let mut params = SyscallBls12_381ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_mul(&mut params); - - *(a as *mut [u64; 6]) = f1.x; - *(a.add(6) as *mut [u64; 6]) = f1.y; -} - -/// # Safety -/// - `a` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn square_fp2_bls12_381_c(a: *mut u64) { - let mut f1 = - SyscallComplex384 { x: *(a as *const [u64; 6]), y: *(a.add(6) as *const [u64; 6]) }; - let f2 = SyscallComplex384 { x: f1.x, y: f1.y }; +/// Convert 96-byte big-endian Fp2 element to [u64; 12] little-endian +/// Format: fp2 = (c0, c1) where c0 is real, c1 is imaginary +/// Bytes: c0 (48 bytes) || c1 (48 bytes) +pub fn bytes_be_to_u64_le_fp2_bls12_381(bytes: &[u8; 96]) -> [u64; 12] { + let mut result = [0u64; 12]; - let mut params = SyscallBls12_381ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bls12_381_complex_mul(&mut params); + // c0 (real part, bytes 0-47) -> result[0..6] + for i in 0..6 { + for j in 0..8 { + result[5 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } - *(a as *mut [u64; 6]) = f1.x; - *(a.add(6) as *mut [u64; 6]) = f1.y; -} + // c1 (imaginary part, bytes 48-95) -> result[6..12] + for i in 0..6 { + for j in 0..8 { + result[11 - i] |= (bytes[48 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } -/// # Safety -/// - `a` must point to a valid `[u64; 12]` (96 bytes), used as both input and output. -/// - Element must be non-zero. -#[no_mangle] -pub unsafe extern "C" fn inv_fp2_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 12]); - let result = inv_fp2_bls12_381(a_ref); - *(a as *mut [u64; 12]) = result; + result } diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp6.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp6.rs index 31c3d4ed1..742a5df18 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp6.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fp6.rs @@ -10,12 +10,21 @@ use super::{ /// Addition in Fp6 #[inline] -pub fn add_fp6_bls12_381(a: &[u64; 36], b: &[u64; 36]) -> [u64; 36] { +pub fn add_fp6_bls12_381( + a: &[u64; 36], + b: &[u64; 36], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let mut result = [0; 36]; for i in 0..3 { let a_i = &a[i * 12..(i + 1) * 12].try_into().unwrap(); let b_i = &b[i * 12..(i + 1) * 12].try_into().unwrap(); - let c_i = add_fp2_bls12_381(a_i, b_i); + let c_i = add_fp2_bls12_381( + a_i, + b_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 12..(i + 1) * 12].copy_from_slice(&c_i); } result @@ -23,11 +32,18 @@ pub fn add_fp6_bls12_381(a: &[u64; 36], b: &[u64; 36]) -> [u64; 36] { /// Doubling in Fp6 #[inline] -pub fn dbl_fp6_bls12_381(a: &[u64; 36]) -> [u64; 36] { +pub fn dbl_fp6_bls12_381( + a: &[u64; 36], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let mut result = [0; 36]; for i in 0..3 { let a_i = &a[i * 12..(i + 1) * 12].try_into().unwrap(); - let c_i = dbl_fp2_bls12_381(a_i); + let c_i = dbl_fp2_bls12_381( + a_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 12..(i + 1) * 12].copy_from_slice(&c_i); } result @@ -35,11 +51,18 @@ pub fn dbl_fp6_bls12_381(a: &[u64; 36]) -> [u64; 36] { /// Negation in Fp6 #[inline] -pub fn neg_fp6_bls12_381(a: &[u64; 36]) -> [u64; 36] { +pub fn neg_fp6_bls12_381( + a: &[u64; 36], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let mut result = [0; 36]; for i in 0..3 { let a_i = &a[i * 12..(i + 1) * 12].try_into().unwrap(); - let c_i = neg_fp2_bls12_381(a_i); + let c_i = neg_fp2_bls12_381( + a_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 12..(i + 1) * 12].copy_from_slice(&c_i); } result @@ -47,12 +70,21 @@ pub fn neg_fp6_bls12_381(a: &[u64; 36]) -> [u64; 36] { /// Subtraction in Fp6 #[inline] -pub fn sub_fp6_bls12_381(a: &[u64; 36], b: &[u64; 36]) -> [u64; 36] { +pub fn sub_fp6_bls12_381( + a: &[u64; 36], + b: &[u64; 36], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let mut result = [0; 36]; for i in 0..3 { let a_i = &a[i * 12..(i + 1) * 12].try_into().unwrap(); let b_i = &b[i * 12..(i + 1) * 12].try_into().unwrap(); - let c_i = sub_fp2_bls12_381(a_i, b_i); + let c_i = sub_fp2_bls12_381( + a_i, + b_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 12..(i + 1) * 12].copy_from_slice(&c_i); } result @@ -65,7 +97,11 @@ pub fn sub_fp6_bls12_381(a: &[u64; 36], b: &[u64; 36]) -> [u64; 36] { // - c2 = a1·b2 + a2·b1 + (a3·b3)·(1+u) // - c3 = a1·b3 + a2·b2 + a3·b1 #[inline] -pub fn mul_fp6_bls12_381(a: &[u64; 36], b: &[u64; 36]) -> [u64; 36] { +pub fn mul_fp6_bls12_381( + a: &[u64; 36], + b: &[u64; 36], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let a1 = &a[0..12].try_into().unwrap(); let a2 = &a[12..24].try_into().unwrap(); let a3 = &a[24..36].try_into().unwrap(); @@ -74,21 +110,106 @@ pub fn mul_fp6_bls12_381(a: &[u64; 36], b: &[u64; 36]) -> [u64; 36] { let b3 = &b[24..36].try_into().unwrap(); // c1 = a1·b1 + [a2·b3 + a3·b2]·(1+u) - let mut c1 = mul_fp2_bls12_381(a2, b3); - c1 = add_fp2_bls12_381(&c1, &mul_fp2_bls12_381(a3, b2)); - c1 = mul_fp2_bls12_381(&c1, &EXT_U); - c1 = add_fp2_bls12_381(&c1, &mul_fp2_bls12_381(a1, b1)); + let mut c1 = mul_fp2_bls12_381( + a2, + b3, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bls12_381( + &c1, + &mul_fp2_bls12_381( + a3, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bls12_381( + &c1, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bls12_381( + &c1, + &mul_fp2_bls12_381( + a1, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c2 = a1·b2 + a2·b1 + (a3·b3)·(1+u) - let mut c2 = mul_fp2_bls12_381(a3, b3); - c2 = mul_fp2_bls12_381(&c2, &EXT_U); - c2 = add_fp2_bls12_381(&c2, &mul_fp2_bls12_381(a1, b2)); - c2 = add_fp2_bls12_381(&c2, &mul_fp2_bls12_381(a2, b1)); + let mut c2 = mul_fp2_bls12_381( + a3, + b3, + #[cfg(feature = "hints")] + hints, + ); + c2 = mul_fp2_bls12_381( + &c2, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bls12_381( + &c2, + &mul_fp2_bls12_381( + a1, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bls12_381( + &c2, + &mul_fp2_bls12_381( + a2, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c3 = a1·b3 + a2·b2 + a3·b1 - let mut c3 = mul_fp2_bls12_381(a1, b3); - c3 = add_fp2_bls12_381(&c3, &mul_fp2_bls12_381(a2, b2)); - c3 = add_fp2_bls12_381(&c3, &mul_fp2_bls12_381(a3, b1)); + let mut c3 = mul_fp2_bls12_381( + a1, + b3, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bls12_381( + &c3, + &mul_fp2_bls12_381( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bls12_381( + &c3, + &mul_fp2_bls12_381( + a3, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 36]; result[0..12].copy_from_slice(&c1); @@ -104,20 +225,43 @@ pub fn mul_fp6_bls12_381(a: &[u64; 36], b: &[u64; 36]) -> [u64; 36] { // - c2 = a1·b2 // - c3 = a2·b2 #[inline] -pub fn sparse_mula_fp6_bls12_381(a: &[u64; 36], b2: &[u64; 12]) -> [u64; 36] { +pub fn sparse_mula_fp6_bls12_381( + a: &[u64; 36], + b2: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let a1 = &a[0..12].try_into().unwrap(); let a2 = &a[12..24].try_into().unwrap(); let a3 = &a[24..36].try_into().unwrap(); // c1 = a3·b2·(1+u) - let mut c1 = mul_fp2_bls12_381(a3, b2); - c1 = mul_fp2_bls12_381(&c1, &EXT_U); + let mut c1 = mul_fp2_bls12_381( + a3, + b2, + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bls12_381( + &c1, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); // c2 = a1·b2 - let c2 = mul_fp2_bls12_381(a1, b2); - + let c2 = mul_fp2_bls12_381( + a1, + b2, + #[cfg(feature = "hints")] + hints, + ); // c3 = a2·b2 - let c3 = mul_fp2_bls12_381(a2, b2); + let c3 = mul_fp2_bls12_381( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 36]; result[0..12].copy_from_slice(&c1); @@ -134,7 +278,11 @@ pub fn sparse_mula_fp6_bls12_381(a: &[u64; 36], b2: &[u64; 12]) -> [u64; 36] { // - c2 = a1·b2 + a3·b3·(1+u) // - c3 = a1·b3 + a2·b2 #[inline] -pub fn sparse_mulb_fp6_bls12_381(a: &[u64; 36], b: &[u64; 24]) -> [u64; 36] { +pub fn sparse_mulb_fp6_bls12_381( + a: &[u64; 36], + b: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let a1 = &a[0..12].try_into().unwrap(); let a2 = &a[12..24].try_into().unwrap(); let a3 = &a[24..36].try_into().unwrap(); @@ -142,19 +290,73 @@ pub fn sparse_mulb_fp6_bls12_381(a: &[u64; 36], b: &[u64; 24]) -> [u64; 36] { let b3 = &b[12..24].try_into().unwrap(); // c1 = (a2·b3 + a3·b2)·(1+u) - let mut c1 = mul_fp2_bls12_381(a2, b3); - c1 = add_fp2_bls12_381(&c1, &mul_fp2_bls12_381(a3, b2)); - c1 = mul_fp2_bls12_381(&c1, &EXT_U); + let mut c1 = mul_fp2_bls12_381( + a2, + b3, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bls12_381( + &c1, + &mul_fp2_bls12_381( + a3, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bls12_381( + &c1, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); // c2 = a1·b2 + a3·b3·(1+u) - let mut c2 = mul_fp2_bls12_381(a3, b3); - c2 = mul_fp2_bls12_381(&c2, &EXT_U); - c2 = add_fp2_bls12_381(&c2, &mul_fp2_bls12_381(a1, b2)); + let mut c2 = mul_fp2_bls12_381( + a3, + b3, + #[cfg(feature = "hints")] + hints, + ); + c2 = mul_fp2_bls12_381( + &c2, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bls12_381( + &c2, + &mul_fp2_bls12_381( + a1, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c3 = a1·b3 + a2·b2 - let mut c3 = mul_fp2_bls12_381(a1, b3); - c3 = add_fp2_bls12_381(&c3, &mul_fp2_bls12_381(a2, b2)); - + let mut c3 = mul_fp2_bls12_381( + a1, + b3, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bls12_381( + &c3, + &mul_fp2_bls12_381( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 36]; result[0..12].copy_from_slice(&c1); result[12..24].copy_from_slice(&c2); @@ -170,7 +372,11 @@ pub fn sparse_mulb_fp6_bls12_381(a: &[u64; 36], b: &[u64; 24]) -> [u64; 36] { // - c2 = a2·b1 + a3·b3·(1+u) // - c3 = a1·b3 + a3·b1 #[inline] -pub fn sparse_mulc_fp6_bls12_381(a: &[u64; 36], b: &[u64; 24]) -> [u64; 36] { +pub fn sparse_mulc_fp6_bls12_381( + a: &[u64; 36], + b: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let a1 = &a[0..12].try_into().unwrap(); let a2 = &a[12..24].try_into().unwrap(); let a3 = &a[24..36].try_into().unwrap(); @@ -178,19 +384,73 @@ pub fn sparse_mulc_fp6_bls12_381(a: &[u64; 36], b: &[u64; 24]) -> [u64; 36] { let b3 = &b[12..24].try_into().unwrap(); // c1 = a1·b1 + a2·b3·(1+u) - let mut c1 = mul_fp2_bls12_381(a2, b3); - c1 = mul_fp2_bls12_381(&c1, &EXT_U); - c1 = add_fp2_bls12_381(&c1, &mul_fp2_bls12_381(a1, b1)); + let mut c1 = mul_fp2_bls12_381( + a2, + b3, + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bls12_381( + &c1, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bls12_381( + &c1, + &mul_fp2_bls12_381( + a1, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c2 = a2·b1 + a3·b3·(1+u) - let mut c2 = mul_fp2_bls12_381(a3, b3); - c2 = mul_fp2_bls12_381(&c2, &EXT_U); - c2 = add_fp2_bls12_381(&c2, &mul_fp2_bls12_381(a2, b1)); + let mut c2 = mul_fp2_bls12_381( + a3, + b3, + #[cfg(feature = "hints")] + hints, + ); + c2 = mul_fp2_bls12_381( + &c2, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bls12_381( + &c2, + &mul_fp2_bls12_381( + a2, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c3 = a1·b3 + a3·b1 - let mut c3 = mul_fp2_bls12_381(a1, b3); - c3 = add_fp2_bls12_381(&c3, &mul_fp2_bls12_381(a3, b1)); - + let mut c3 = mul_fp2_bls12_381( + a1, + b3, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bls12_381( + &c3, + &mul_fp2_bls12_381( + a3, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 36]; result[0..12].copy_from_slice(&c1); result[12..24].copy_from_slice(&c2); @@ -206,25 +466,92 @@ pub fn sparse_mulc_fp6_bls12_381(a: &[u64; 36], b: &[u64; 24]) -> [u64; 36] { // - c2 = a3²·(1+u) + 2·a1·a2 // - c3 = a2² + 2·a1·a3 #[inline] -pub fn square_fp6_bls12_381(a: &[u64; 36]) -> [u64; 36] { +pub fn square_fp6_bls12_381( + a: &[u64; 36], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let a1 = &a[0..12].try_into().unwrap(); let a2 = &a[12..24].try_into().unwrap(); let a3 = &a[24..36].try_into().unwrap(); // c1 = a1² + 2·a2·a3·(1+u) - let mut c1 = mul_fp2_bls12_381(a2, a3); - c1 = dbl_fp2_bls12_381(&c1); - c1 = mul_fp2_bls12_381(&c1, &EXT_U); - c1 = add_fp2_bls12_381(&c1, &square_fp2_bls12_381(a1)); + let mut c1 = mul_fp2_bls12_381( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ); + c1 = dbl_fp2_bls12_381( + &c1, + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bls12_381( + &c1, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bls12_381( + &c1, + &square_fp2_bls12_381( + a1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c2 = a3²·(1+u) + 2·a1·a2 - let mut c2 = square_fp2_bls12_381(a3); - c2 = mul_fp2_bls12_381(&c2, &EXT_U); - c2 = add_fp2_bls12_381(&c2, &dbl_fp2_bls12_381(&mul_fp2_bls12_381(a1, a2))); + let mut c2 = square_fp2_bls12_381( + a3, + #[cfg(feature = "hints")] + hints, + ); + c2 = mul_fp2_bls12_381( + &c2, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bls12_381( + &c2, + &dbl_fp2_bls12_381( + &mul_fp2_bls12_381( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c3 = a2² + 2·a1·a3 - let mut c3 = square_fp2_bls12_381(a2); - c3 = add_fp2_bls12_381(&c3, &dbl_fp2_bls12_381(&mul_fp2_bls12_381(a1, a3))); + let mut c3 = square_fp2_bls12_381( + a2, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bls12_381( + &c3, + &dbl_fp2_bls12_381( + &mul_fp2_bls12_381( + a1, + a3, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 36]; result[0..12].copy_from_slice(&c1); @@ -245,43 +572,146 @@ pub fn square_fp6_bls12_381(a: &[u64; 36]) -> [u64; 36] { // * c2mid = (1 + u)·a3² - (a1·a2) // * c3mid = a2² - (a1·a3) #[inline] -pub fn inv_fp6_bls12_381(a: &[u64; 36]) -> [u64; 36] { +pub fn inv_fp6_bls12_381( + a: &[u64; 36], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 36] { let a1 = &a[0..12].try_into().unwrap(); let a2 = &a[12..24].try_into().unwrap(); let a3 = &a[24..36].try_into().unwrap(); // a1², a2², a3² - let a1_squared = square_fp2_bls12_381(a1); - let a2_squared = square_fp2_bls12_381(a2); - let a3_squared = square_fp2_bls12_381(a3); + let a1_squared = square_fp2_bls12_381( + a1, + #[cfg(feature = "hints")] + hints, + ); + let a2_squared = square_fp2_bls12_381( + a2, + #[cfg(feature = "hints")] + hints, + ); + let a3_squared = square_fp2_bls12_381( + a3, + #[cfg(feature = "hints")] + hints, + ); // a1·a2, a1·a3, a2·a3 - let a1_a2 = mul_fp2_bls12_381(a1, a2); - let a1_a3 = mul_fp2_bls12_381(a1, a3); - let a2_a3 = mul_fp2_bls12_381(a2, a3); + let a1_a2 = mul_fp2_bls12_381( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let a1_a3 = mul_fp2_bls12_381( + a1, + a3, + #[cfg(feature = "hints")] + hints, + ); + let a2_a3 = mul_fp2_bls12_381( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ); // c1mid = a1² - (1 + u)·(a2·a3) - let mut c1mid = mul_fp2_bls12_381(&a2_a3, &EXT_U); - c1mid = sub_fp2_bls12_381(&a1_squared, &c1mid); + let mut c1mid = mul_fp2_bls12_381( + &a2_a3, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c1mid = sub_fp2_bls12_381( + &a1_squared, + &c1mid, + #[cfg(feature = "hints")] + hints, + ); // c2mid = (1 + u)·a3² - (a1·a2) - let mut c2mid = mul_fp2_bls12_381(&a3_squared, &EXT_U); - c2mid = sub_fp2_bls12_381(&c2mid, &a1_a2); - + let mut c2mid = mul_fp2_bls12_381( + &a3_squared, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + c2mid = sub_fp2_bls12_381( + &c2mid, + &a1_a2, + #[cfg(feature = "hints")] + hints, + ); // c3mid = a2² - (a1·a3) - let c3mid = sub_fp2_bls12_381(&a2_squared, &a1_a3); + let c3mid = sub_fp2_bls12_381( + &a2_squared, + &a1_a3, + #[cfg(feature = "hints")] + hints, + ); // (a1·c1mid + (1 + u)·(a3·c2mid + a2·c3mid))⁻¹ - let mut last = mul_fp2_bls12_381(a3, &c2mid); - last = add_fp2_bls12_381(&last, &mul_fp2_bls12_381(a2, &c3mid)); - last = mul_fp2_bls12_381(&last, &EXT_U); - last = add_fp2_bls12_381(&last, &mul_fp2_bls12_381(a1, &c1mid)); - let last_inv = inv_fp2_bls12_381(&last); + let mut last = mul_fp2_bls12_381( + a3, + &c2mid, + #[cfg(feature = "hints")] + hints, + ); + last = add_fp2_bls12_381( + &last, + &mul_fp2_bls12_381( + a2, + &c3mid, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + last = mul_fp2_bls12_381( + &last, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + last = add_fp2_bls12_381( + &last, + &mul_fp2_bls12_381( + a1, + &c1mid, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + let last_inv = inv_fp2_bls12_381( + &last, + #[cfg(feature = "hints")] + hints, + ); // c1 = c1mid·last_inv, c2 = c2mid·last_inv, c3 = c3mid·last_inv - let c1 = mul_fp2_bls12_381(&c1mid, &last_inv); - let c2 = mul_fp2_bls12_381(&c2mid, &last_inv); - let c3 = mul_fp2_bls12_381(&c3mid, &last_inv); + let c1 = mul_fp2_bls12_381( + &c1mid, + &last_inv, + #[cfg(feature = "hints")] + hints, + ); + let c2 = mul_fp2_bls12_381( + &c2mid, + &last_inv, + #[cfg(feature = "hints")] + hints, + ); + let c3 = mul_fp2_bls12_381( + &c3mid, + &last_inv, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 36]; result[0..12].copy_from_slice(&c1); diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fr.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fr.rs index b62eac9cf..9bed00563 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/fr.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/fr.rs @@ -1,22 +1,58 @@ //! Finite field Fr operations for BLS12-381 -use crate::syscalls::{syscall_arith256_mod, SyscallArith256ModParams}; +use crate::{ + syscalls::{syscall_arith256_mod, SyscallArith256ModParams}, + zisklib::lt, +}; use super::constants::{R, R_MINUS_ONE}; +pub fn reduce_fr_bls12_381( + x: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { + if lt(x, &R) { + return *x; + } + + // x·1 + 0 + let mut params = SyscallArith256ModParams { + a: x, + b: &[1, 0, 0, 0], + c: &[0, 0, 0, 0], + module: &R, + d: &mut [0, 0, 0, 0], + }; + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + + *params.d +} + /// Addition in Fr #[inline] -pub fn add_fr_bls12_381(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn add_fr_bls12_381( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·1 + y let mut params = SyscallArith256ModParams { a: x, b: &[1, 0, 0, 0], c: y, module: &R, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Doubling in Fr #[inline] -pub fn dbl_fr_bls12_381(x: &[u64; 4]) -> [u64; 4] { +pub fn dbl_fr_bls12_381(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { // 2·x + 0 or x·1 + x let mut params = SyscallArith256ModParams { a: x, @@ -25,23 +61,35 @@ pub fn dbl_fr_bls12_381(x: &[u64; 4]) -> [u64; 4] { module: &R, d: &mut [0, 0, 0, 0], }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Subtraction in Fr #[inline] -pub fn sub_fr_bls12_381(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn sub_fr_bls12_381( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // y·(-1) + x let mut params = SyscallArith256ModParams { a: y, b: &R_MINUS_ONE, c: x, module: &R, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Negation in Fr #[inline] -pub fn neg_fr_bls12_381(x: &[u64; 4]) -> [u64; 4] { +pub fn neg_fr_bls12_381(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { // x·(-1) + 0 let mut params = SyscallArith256ModParams { a: x, @@ -50,142 +98,57 @@ pub fn neg_fr_bls12_381(x: &[u64; 4]) -> [u64; 4] { module: &R, d: &mut [0, 0, 0, 0], }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Multiplication in Fr #[inline] -pub fn mul_fr_bls12_381(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn mul_fr_bls12_381( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·y + 0 let mut params = SyscallArith256ModParams { a: x, b: y, c: &[0, 0, 0, 0], module: &R, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Squaring in Fr #[inline] -pub fn square_fr_bls12_381(x: &[u64; 4]) -> [u64; 4] { +pub fn square_fr_bls12_381( + x: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·x + 0 let mut params = SyscallArith256ModParams { a: x, b: x, c: &[0, 0, 0, 0], module: &R, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } -// ========== Pointer-based API ========== - -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 4]` (32 bytes). -#[no_mangle] -pub unsafe extern "C" fn add_fr_bls12_381_c(a: *mut u64, b: *const u64) { - let a_ref = &*(a as *const [u64; 4]); - let b_ref = &*(b as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: a_ref, - b: &[1, 0, 0, 0], - c: b_ref, - module: &R, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 4); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn dbl_fr_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: a_ref, - b: &[2, 0, 0, 0], - c: &[0, 0, 0, 0], - module: &R, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 4); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 4]` (32 bytes). -#[no_mangle] -pub unsafe extern "C" fn sub_fr_bls12_381_c(a: *mut u64, b: *const u64) { - let a_ref = &*(a as *const [u64; 4]); - let b_ref = &*(b as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: b_ref, - b: &R_MINUS_ONE, - c: a_ref, - module: &R, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 4); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn neg_fr_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: a_ref, - b: &R_MINUS_ONE, - c: &[0, 0, 0, 0], - module: &R, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 4); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes), used as both input and output. -/// - `b` must point to a valid `[u64; 4]` (32 bytes). -#[no_mangle] -pub unsafe extern "C" fn mul_fr_bls12_381_c(a: *mut u64, b: *const u64) { - let a_ref = &*(a as *const [u64; 4]); - let b_ref = &*(b as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: a_ref, - b: b_ref, - c: &[0, 0, 0, 0], - module: &R, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 4); -} - -/// # Safety -/// - `a` must point to a valid `[u64; 4]` (32 bytes), used as both input and output. -#[no_mangle] -pub unsafe extern "C" fn square_fr_bls12_381_c(a: *mut u64) { - let a_ref = &*(a as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: a_ref, - b: a_ref, - c: &[0, 0, 0, 0], - module: &R, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); +/// Convert 32-byte big-endian scalar to [u64; 4] little-endian +pub fn scalar_bytes_be_to_u64_le_bls12_381(bytes: &[u8; 32]) -> [u64; 4] { + let mut result = [0u64; 4]; + for i in 0..4 { + for j in 0..8 { + result[3 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } - core::ptr::copy_nonoverlapping(params.d.as_ptr(), a, 4); + result } diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/kzg.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/kzg.rs new file mode 100644 index 000000000..348a5d2d6 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/kzg.rs @@ -0,0 +1,210 @@ +use crate::zisklib::{ + is_on_subgroup_bls12_381, + lib::utils::{eq, is_one, lt}, +}; + +use super::{ + constants::{G1_GENERATOR, G1_IDENTITY, G2_GENERATOR, G2_IDENTITY, R, TRUSTED_SETUP_TAU_G2}, + curve::{decompress_bls12_381, scalar_mul_bls12_381, sub_bls12_381, sub_complete_bls12_381}, + pairing::pairing_batch_bls12_381, + twist::{ + decompress_twist_bls12_381, neg_twist_bls12_381, scalar_mul_twist_bls12_381, + sub_complete_twist_bls12_381, sub_twist_bls12_381, + }, +}; + +/// Verify KZG proof using BLS12-381 implementation. +pub fn verify_kzg_proof( + z_bytes: &[u8; 32], + y_bytes: &[u8; 32], + commitment_bytes: &[u8; 48], + proof_bytes: &[u8; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { + // Parse the commitment + let commitment = match decompress_bls12_381( + commitment_bytes, + #[cfg(feature = "hints")] + hints, + ) { + Ok(result) => result, + Err(_) => return false, + }; + if !eq(&commitment, &G1_IDENTITY) + && !is_on_subgroup_bls12_381( + &commitment, + #[cfg(feature = "hints")] + hints, + ) + { + return false; + } + + // Parse the proof + let proof = match decompress_bls12_381( + proof_bytes, + #[cfg(feature = "hints")] + hints, + ) { + Ok(result) => result, + Err(_) => return false, + }; + if !eq(&proof, &G1_IDENTITY) + && !is_on_subgroup_bls12_381( + &proof, + #[cfg(feature = "hints")] + hints, + ) + { + return false; + } + + // Parse z and y as scalar field elements + let z = match scalar_bytes_be_to_u64_le_canonical(z_bytes) { + Some(s) => s, + None => return false, + }; + + let y = match scalar_bytes_be_to_u64_le_canonical(y_bytes) { + Some(s) => s, + None => return false, + }; + + // The verification equation is: + // e(C - [y]G₁, G₂) = e(π, [τ]₂ - [z]G₂) + + // Get the trusted setup G2 point [τ]₂ + let tau_g2 = TRUSTED_SETUP_TAU_G2; + + // Get generators + let g1 = G1_GENERATOR; + let g2 = G2_GENERATOR; + + // Compute c_minus_y = C - [y]G₁ + let y_g1 = scalar_mul_bls12_381( + &g1, + &y, + #[cfg(feature = "hints")] + hints, + ); + let c_minus_y = sub_complete_bls12_381( + &commitment, + &y_g1, + #[cfg(feature = "hints")] + hints, + ); + + // Compute t_minus_z = [τ]₂ - [z]G₂ + let z_g2 = scalar_mul_twist_bls12_381( + &g2, + &z, + #[cfg(feature = "hints")] + hints, + ); + let t_minus_z = sub_complete_twist_bls12_381( + &tau_g2, + &z_g2, + #[cfg(feature = "hints")] + hints, + ); + + // LHS: e(C - [y]G₁, G₂) - G₂ is never infinity + // RHS: e(π, [τ]₂ - [z]G₂) + let c_minus_y_is_inf = eq(&c_minus_y, &G1_IDENTITY); + let proof_is_inf = eq(&proof, &G1_IDENTITY); + let t_minus_z_is_inf = eq(&t_minus_z, &G2_IDENTITY); + + // If c_minus_y = O: LHS = e(O, G₂) = 1 + // => RHS must equal 1, i.e., e(π, [τ]₂ - [z]G₂) = 1 + // => π = O or [τ]₂ - [z]G₂ = O + if c_minus_y_is_inf { + return proof_is_inf || t_minus_z_is_inf; + } + + // If π = O or [τ]₂ - [z]G₂ = O: RHS = 1 + // => LHS must equal 1, i.e., e(C - [y]G₁, G₂) = 1 + // => C - [y]G₁ = O (but we already handled that above) + // => This means c_minus_y ≠ O but RHS = 1, so verification fails + if proof_is_inf || t_minus_z_is_inf { + return false; + } + + // General case: no infinities, proceed with pairing check + // The check is equivalent to: + // e(C - [y]G₁, -G₂) · e(π, [τ]₂ - [z]G₂) = 1 + let neg_g2 = neg_twist_bls12_381( + &g2, + #[cfg(feature = "hints")] + hints, + ); + + // Batch pairing check + let g1_points = [c_minus_y, proof]; + let g2_points = [neg_g2, t_minus_z]; + + // Check if the pairing result equals 1 + is_one(&pairing_batch_bls12_381( + &g1_points, + &g2_points, + #[cfg(feature = "hints")] + hints, + )) +} + +/// Verify KZG proof using BLS12-381 implementation. +/// +/// # Arguments +/// * `z` - 32 bytes big-endian scalar (evaluation point) +/// * `y` - 32 bytes big-endian scalar (claimed evaluation) +/// * `commitment` - 48 bytes compressed G1 point (polynomial commitment) +/// * `proof` - 48 bytes compressed G1 point (KZG proof) +/// +/// # Safety +/// All pointers must be valid and properly aligned. +/// +/// # Returns +/// * 1 if the proof is valid +/// * 0 if the proof is invalid +/// * 2 if there was a parsing error (invalid input) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_verify_kzg_proof_c")] +pub unsafe extern "C" fn verify_kzg_proof_c( + z: *const u8, + y: *const u8, + commitment: *const u8, + proof: *const u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { + let z_bytes: &[u8; 32] = &*(z as *const [u8; 32]); + let y_bytes: &[u8; 32] = &*(y as *const [u8; 32]); + let commitment_bytes: &[u8; 48] = &*(commitment as *const [u8; 48]); + let proof_bytes: &[u8; 48] = &*(proof as *const [u8; 48]); + + verify_kzg_proof( + z_bytes, + y_bytes, + commitment_bytes, + proof_bytes, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Convert 32-byte big-endian scalar to [u64; 4] little-endian, checking canonicity +/// Returns None if the scalar is not canonical (>= R) +fn scalar_bytes_be_to_u64_le_canonical(bytes: &[u8; 32]) -> Option<[u64; 4]> { + // Convert big-endian bytes to little-endian u64 limbs + let mut scalar = [0u64; 4]; + for i in 0..4 { + for j in 0..8 { + scalar[3 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // Check if scalar < R + if !lt(&scalar, &R) { + return None; + } + + Some(scalar) +} diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/map_to_curve.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/map_to_curve.rs new file mode 100644 index 000000000..ee3c8d7f5 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/map_to_curve.rs @@ -0,0 +1,711 @@ +use crate::zisklib::{eq, is_zero, lt}; + +use super::{ + constants::{ + COFACTOR_G1, ISO_A_G1, ISO_A_G2, ISO_B_G1, ISO_B_G2, ISO_X_DEN_G1, ISO_X_DEN_G2, + ISO_X_NUM_G1, ISO_X_NUM_G2, ISO_Y_DEN_G1, ISO_Y_DEN_G2, ISO_Y_NUM_G1, ISO_Y_NUM_G2, + SWU_Z2_G1, SWU_Z_G1, SWU_Z_G2, + }, + curve::{g1_u64_le_to_bytes_be_bls12_381, scalar_mul_bls12_381}, + fp::{ + add_fp_bls12_381, bytes_be_to_u64_le_fp_bls12_381, inv_fp_bls12_381, mul_fp_bls12_381, + neg_fp_bls12_381, sgn0_fp_bls12_381, sqrt_fp_bls12_381, square_fp_bls12_381, + }, + fp2::{ + add_fp2_bls12_381, bytes_be_to_u64_le_fp2_bls12_381, inv_fp2_bls12_381, mul_fp2_bls12_381, + neg_fp2_bls12_381, sgn0_fp2_bls12_381, sqrt_fp2_bls12_381, square_fp2_bls12_381, + }, + twist::{ + clear_cofactor_twist_bls12_381, g2_u64_le_to_bytes_be_bls12_381, scalar_mul_twist_bls12_381, + }, +}; + +/// Maps a field element to a point on the BLS12-381 G1 curve +pub fn map_to_curve_g1_bls12_381( + u: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { + // Step 1: Map to isogenous curve E' using simplified SWU + let p_prime = map_to_curve_simple_swu_g1_bls12_381( + u, + #[cfg(feature = "hints")] + hints, + ); + + // Step 2: Apply isogeny map from E' to E + let p = isogeny_map_g1_bls12_381( + &p_prime, + #[cfg(feature = "hints")] + hints, + ); + + // Step 3: Clear cofactor + scalar_mul_bls12_381( + &p, + &COFACTOR_G1, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Maps a field element in Fp2 to a point on the BLS12-381 G2 curve +pub fn map_to_curve_g2_bls12_381( + u: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + // Step 1: Map to isogenous curve E' using simplified SWU + let p_prime = map_to_curve_simple_swu_g2_bls12_381( + u, + #[cfg(feature = "hints")] + hints, + ); + + // Step 2: Apply isogeny map from E' to E + let p = isogeny_map_g2_bls12_381( + &p_prime, + #[cfg(feature = "hints")] + hints, + ); + + // Step 3: Clear cofactor + clear_cofactor_twist_bls12_381( + &p, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Maps a field element u ∈ Fp to a point on the isogenous curve E' +/// using the simplified Shallue-van de Woestijne-Ulas (SWU) method for AB != 0 +fn map_to_curve_simple_swu_g1_bls12_381( + u: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { + // 1. tv1 = inv0(Z^2 * u^4 + Z * u^2) + let u2 = square_fp_bls12_381( + u, + #[cfg(feature = "hints")] + hints, + ); + let u4 = square_fp_bls12_381( + &u2, + #[cfg(feature = "hints")] + hints, + ); + let z_u2 = mul_fp_bls12_381( + &SWU_Z_G1, + &u2, + #[cfg(feature = "hints")] + hints, + ); + let z2_u4 = mul_fp_bls12_381( + &SWU_Z2_G1, + &u4, + #[cfg(feature = "hints")] + hints, + ); + let tv1_denom = add_fp_bls12_381( + &z2_u4, + &z_u2, + #[cfg(feature = "hints")] + hints, + ); + let tv1 = inv_fp_bls12_381( + &tv1_denom, + #[cfg(feature = "hints")] + hints, + ); + + // 2. x1 = (-B / A) * (1 + tv1) + let neg_b = neg_fp_bls12_381( + &ISO_B_G1, + #[cfg(feature = "hints")] + hints, + ); + let a_inv = inv_fp_bls12_381( + &ISO_A_G1, + #[cfg(feature = "hints")] + hints, + ); + let neg_b_over_a = mul_fp_bls12_381( + &neg_b, + &a_inv, + #[cfg(feature = "hints")] + hints, + ); + let one = [1u64, 0, 0, 0, 0, 0]; + let one_plus_tv1 = add_fp_bls12_381( + &one, + &tv1, + #[cfg(feature = "hints")] + hints, + ); + let mut x1 = mul_fp_bls12_381( + &neg_b_over_a, + &one_plus_tv1, + #[cfg(feature = "hints")] + hints, + ); + + // 3. If tv1 == 0, set x1 = B / (Z * A) + if is_zero(&tv1) { + let z_a = mul_fp_bls12_381( + &SWU_Z_G1, + &ISO_A_G1, + #[cfg(feature = "hints")] + hints, + ); + let z_a_inv = inv_fp_bls12_381( + &z_a, + #[cfg(feature = "hints")] + hints, + ); + x1 = mul_fp_bls12_381( + &ISO_B_G1, + &z_a_inv, + #[cfg(feature = "hints")] + hints, + ); + } + + // 4. gx1 = x1^3 + A * x1 + B + let gx1 = compute_y2_iso_g1_bls12_381( + &x1, + #[cfg(feature = "hints")] + hints, + ); + + // 5. x2 = Z * u^2 * x1 (computed lazily below if needed) + + // 6. gx2 = x2^3 + A * x2 + B (computed lazily below if needed) + + // 7-8. Select x and y based on whether gx1 is square + let (y1, gx1_is_qr) = sqrt_fp_bls12_381( + &gx1, + #[cfg(feature = "hints")] + hints, + ); + let (x, mut y) = if gx1_is_qr { + (x1, y1) + } else { + let x2 = mul_fp_bls12_381( + &z_u2, + &x1, + #[cfg(feature = "hints")] + hints, + ); + let gx2 = compute_y2_iso_g1_bls12_381( + &x2, + #[cfg(feature = "hints")] + hints, + ); + let (y2, _) = sqrt_fp_bls12_381( + &gx2, + #[cfg(feature = "hints")] + hints, + ); + (x2, y2) + }; + + // 9. If sgn0(u) != sgn0(y), set y = -y + if sgn0_fp_bls12_381(u) != sgn0_fp_bls12_381(&y) { + y = neg_fp_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + } + + // Return point (x, y) on E' + let mut point = [0u64; 12]; + point[0..6].copy_from_slice(&x); + point[6..12].copy_from_slice(&y); + point +} + +/// Maps a field element u ∈ Fp2 to a point on the isogenous curve E' +/// using the simplified Shallue-van de Woestijne-Ulas (SWU) method for AB != 0 +fn map_to_curve_simple_swu_g2_bls12_381( + u: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + // 1. tv1 = inv0(Z^2 * u^4 + Z * u^2) + let u2 = square_fp2_bls12_381( + u, + #[cfg(feature = "hints")] + hints, + ); + let u4 = square_fp2_bls12_381( + &u2, + #[cfg(feature = "hints")] + hints, + ); + let z_u2 = mul_fp2_bls12_381( + &SWU_Z_G2, + &u2, + #[cfg(feature = "hints")] + hints, + ); + let z2 = square_fp2_bls12_381( + &SWU_Z_G2, + #[cfg(feature = "hints")] + hints, + ); + let z2_u4 = mul_fp2_bls12_381( + &z2, + &u4, + #[cfg(feature = "hints")] + hints, + ); + let tv1_denom = add_fp2_bls12_381( + &z2_u4, + &z_u2, + #[cfg(feature = "hints")] + hints, + ); + let tv1 = inv_fp2_bls12_381( + &tv1_denom, + #[cfg(feature = "hints")] + hints, + ); + + // 2. x1 = (-B / A) * (1 + tv1) + let neg_b = neg_fp2_bls12_381( + &ISO_B_G2, + #[cfg(feature = "hints")] + hints, + ); + let a_inv = inv_fp2_bls12_381( + &ISO_A_G2, + #[cfg(feature = "hints")] + hints, + ); + let neg_b_over_a = mul_fp2_bls12_381( + &neg_b, + &a_inv, + #[cfg(feature = "hints")] + hints, + ); + let one: [u64; 12] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let one_plus_tv1 = add_fp2_bls12_381( + &one, + &tv1, + #[cfg(feature = "hints")] + hints, + ); + let mut x1 = mul_fp2_bls12_381( + &neg_b_over_a, + &one_plus_tv1, + #[cfg(feature = "hints")] + hints, + ); + + // 3. If tv1 == 0, set x1 = B / (Z * A) + if is_zero(&tv1) { + let z_a = mul_fp2_bls12_381( + &SWU_Z_G2, + &ISO_A_G2, + #[cfg(feature = "hints")] + hints, + ); + let z_a_inv = inv_fp2_bls12_381( + &z_a, + #[cfg(feature = "hints")] + hints, + ); + x1 = mul_fp2_bls12_381( + &ISO_B_G2, + &z_a_inv, + #[cfg(feature = "hints")] + hints, + ); + } + + // 4. gx1 = x1^3 + A * x1 + B + let gx1 = compute_y2_iso_g2_bls12_381( + &x1, + #[cfg(feature = "hints")] + hints, + ); + + // 7-8. Select x and y based on whether gx1 is square + let (y1, gx1_is_qr) = sqrt_fp2_bls12_381( + &gx1, + #[cfg(feature = "hints")] + hints, + ); + let (x, mut y) = if gx1_is_qr { + (x1, y1) + } else { + // 5. x2 = Z * u^2 * x1 + let x2 = mul_fp2_bls12_381( + &z_u2, + &x1, + #[cfg(feature = "hints")] + hints, + ); + // 6. gx2 = x2^3 + A * x2 + B + let gx2 = compute_y2_iso_g2_bls12_381( + &x2, + #[cfg(feature = "hints")] + hints, + ); + let (y2, _) = sqrt_fp2_bls12_381( + &gx2, + #[cfg(feature = "hints")] + hints, + ); + (x2, y2) + }; + + // 9. If sgn0(u) != sgn0(y), set y = -y + if sgn0_fp2_bls12_381(u) != sgn0_fp2_bls12_381(&y) { + y = neg_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + } + + // Return point (x, y) on E' + let mut point = [0u64; 24]; + point[0..12].copy_from_slice(&x); + point[12..24].copy_from_slice(&y); + point +} + +/// Compute y² = x³ + A'x + B' for the isogenous curve E' (G1) +fn compute_y2_iso_g1_bls12_381( + x: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 6] { + let x2 = square_fp_bls12_381( + x, + #[cfg(feature = "hints")] + hints, + ); + let x3 = mul_fp_bls12_381( + &x2, + x, + #[cfg(feature = "hints")] + hints, + ); + let ax = mul_fp_bls12_381( + &ISO_A_G1, + x, + #[cfg(feature = "hints")] + hints, + ); + let x3_ax = add_fp_bls12_381( + &x3, + &ax, + #[cfg(feature = "hints")] + hints, + ); + add_fp_bls12_381( + &x3_ax, + &ISO_B_G1, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Compute y² = x³ + A'x + B' for the isogenous curve E' (G2) +fn compute_y2_iso_g2_bls12_381( + x: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { + let x2 = square_fp2_bls12_381( + x, + #[cfg(feature = "hints")] + hints, + ); + let x3 = mul_fp2_bls12_381( + &x2, + x, + #[cfg(feature = "hints")] + hints, + ); + let ax = mul_fp2_bls12_381( + &ISO_A_G2, + x, + #[cfg(feature = "hints")] + hints, + ); + let x3_ax = add_fp2_bls12_381( + &x3, + &ax, + #[cfg(feature = "hints")] + hints, + ); + add_fp2_bls12_381( + &x3_ax, + &ISO_B_G2, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Apply the 11-isogeny map from E' to E for G1 +fn isogeny_map_g1_bls12_381( + p: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { + let x: [u64; 6] = p[0..6].try_into().unwrap(); + let y: [u64; 6] = p[6..12].try_into().unwrap(); + + // Compute x-coordinate: x_num / x_den + let x_num = eval_poly_fp( + &ISO_X_NUM_G1, + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_den = eval_poly_fp( + &ISO_X_DEN_G1, + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_den_inv = inv_fp_bls12_381( + &x_den, + #[cfg(feature = "hints")] + hints, + ); + let x_out = mul_fp_bls12_381( + &x_num, + &x_den_inv, + #[cfg(feature = "hints")] + hints, + ); + + // Compute y-coordinate: y' * y_num / y_den + let y_num = eval_poly_fp( + &ISO_Y_NUM_G1, + &x, + #[cfg(feature = "hints")] + hints, + ); + let y_den = eval_poly_fp( + &ISO_Y_DEN_G1, + &x, + #[cfg(feature = "hints")] + hints, + ); + let y_den_inv = inv_fp_bls12_381( + &y_den, + #[cfg(feature = "hints")] + hints, + ); + let y_frac = mul_fp_bls12_381( + &y_num, + &y_den_inv, + #[cfg(feature = "hints")] + hints, + ); + let y_out = mul_fp_bls12_381( + &y, + &y_frac, + #[cfg(feature = "hints")] + hints, + ); + + let mut result = [0u64; 12]; + result[0..6].copy_from_slice(&x_out); + result[6..12].copy_from_slice(&y_out); + result +} + +/// Apply the 3-isogeny map from E' to E for G2 +fn isogeny_map_g2_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + let x: [u64; 12] = p[0..12].try_into().unwrap(); + let y: [u64; 12] = p[12..24].try_into().unwrap(); + + // Compute x-coordinate: x_num / x_den + let x_num = eval_poly_fp2( + &ISO_X_NUM_G2, + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_den = eval_poly_fp2( + &ISO_X_DEN_G2, + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_den_inv = inv_fp2_bls12_381( + &x_den, + #[cfg(feature = "hints")] + hints, + ); + let x_out = mul_fp2_bls12_381( + &x_num, + &x_den_inv, + #[cfg(feature = "hints")] + hints, + ); + + // Compute y-coordinate: y' * y_num / y_den + let y_num = eval_poly_fp2( + &ISO_Y_NUM_G2, + &x, + #[cfg(feature = "hints")] + hints, + ); + let y_den = eval_poly_fp2( + &ISO_Y_DEN_G2, + &x, + #[cfg(feature = "hints")] + hints, + ); + let y_den_inv = inv_fp2_bls12_381( + &y_den, + #[cfg(feature = "hints")] + hints, + ); + let y_frac = mul_fp2_bls12_381( + &y_num, + &y_den_inv, + #[cfg(feature = "hints")] + hints, + ); + let y_out = mul_fp2_bls12_381( + &y, + &y_frac, + #[cfg(feature = "hints")] + hints, + ); + + let mut result = [0u64; 24]; + result[0..12].copy_from_slice(&x_out); + result[12..24].copy_from_slice(&y_out); + result +} + +/// Evaluate a polynomial at x +fn eval_poly_fp( + coeffs: &[[u64; 6]; N], + x: &[u64; 6], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 6] { + // Use Horner's method + let mut result = coeffs[N - 1]; + for i in (0..N - 1).rev() { + result = mul_fp_bls12_381( + &result, + x, + #[cfg(feature = "hints")] + hints, + ); + result = add_fp_bls12_381( + &result, + &coeffs[i], + #[cfg(feature = "hints")] + hints, + ); + } + result +} + +/// Evaluate a polynomial at x over Fp2 +fn eval_poly_fp2( + coeffs: &[[u64; 12]; N], + x: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 12] { + // Use Horner's method + let mut result = coeffs[N - 1]; + for i in (0..N - 1).rev() { + result = mul_fp2_bls12_381( + &result, + x, + #[cfg(feature = "hints")] + hints, + ); + result = add_fp2_bls12_381( + &result, + &coeffs[i], + #[cfg(feature = "hints")] + hints, + ); + } + result +} + +/// BLS12-381 map Fp field element to G1 point +/// +/// Input format: 48 bytes field element (big-endian) +/// Output format: 96 bytes G1 point (x || y big-endian) +/// +/// ### Safety +/// - `fp` must point to a valid `[u8; 48]` +/// - `ret` must point to a valid `[u8; 96]` for the output +/// +/// Returns: +/// - 0 = success +/// - 1 = error (input not in field) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bls12_381_fp_to_g1_c")] +pub unsafe extern "C" fn bls12_381_fp_to_g1_c( + ret: *mut u8, + fp: *const u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let fp_bytes: &[u8; 48] = &*(fp as *const [u8; 48]); + let ret_bytes: &mut [u8; 96] = &mut *(ret as *mut [u8; 96]); + + // Parse field element + let u = bytes_be_to_u64_le_fp_bls12_381(fp_bytes); + + // Map to curve + let result = map_to_curve_g1_bls12_381( + &u, + #[cfg(feature = "hints")] + hints, + ); + + // Encode result + g1_u64_le_to_bytes_be_bls12_381(&result, ret_bytes); + 0 +} + +/// BLS12-381 map Fp2 field element to G2 point +/// +/// Input format: 96 bytes Fp2 element (c0 || c1, each 48 bytes big-endian) +/// Output format: 192 bytes G2 point (x_r || x_i || y_r || y_i, each 48 bytes big-endian) +/// +/// ### Safety +/// - `fp2` must point to a valid `[u8; 96]` +/// - `ret` must point to a valid `[u8; 192]` for the output +/// +/// Returns: +/// - 0 = success +/// - 1 = error (input not in field) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bls12_381_fp2_to_g2_c")] +pub unsafe extern "C" fn bls12_381_fp2_to_g2_c( + ret: *mut u8, + fp2: *const u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let fp2_bytes: &[u8; 96] = &*(fp2 as *const [u8; 96]); + let ret_bytes: &mut [u8; 192] = &mut *(ret as *mut [u8; 192]); + + // Parse Fp2 element + let u = bytes_be_to_u64_le_fp2_bls12_381(fp2_bytes); + + // Map to curve + let result = map_to_curve_g2_bls12_381( + &u, + #[cfg(feature = "hints")] + hints, + ); + + // Encode result + g2_u64_le_to_bytes_be_bls12_381(&result, ret_bytes); + 0 +} diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/miller_loop.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/miller_loop.rs index 9f46f2b26..0c9f6bdb5 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/miller_loop.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/miller_loop.rs @@ -1,6 +1,8 @@ //! Miller loop for BLS12-381 -use crate::zisklib::{eq, fcall_bls12_381_add_line_coeffs, fcall_bls12_381_dbl_line_coeffs}; +use crate::zisklib::{ + eq, fcall_bls12_381_twist_add_line_coeffs, fcall_bls12_381_twist_dbl_line_coeffs, +}; use super::{ constants::{EXT_U_INV, X_ABS_BIN_BE}, @@ -13,16 +15,45 @@ use super::{ }; /// Computes the Miller loop of a non-zero point `p` in G1 and a non-zero point `q` in G2 -pub fn miller_loop_bls12_381(p: &[u64; 12], q: &[u64; 24]) -> [u64; 72] { +/// +/// Note: It is not optimized for the case where either `p` or `q` is the point at infinity. +pub fn miller_loop_bls12_381( + p: &[u64; 12], + q: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { // Before the loop starts, compute xp' = (-xp/yp)·1/(1+u) and yp' = (1/yp)·1/(1+u) let mut xp: [u64; 6] = p[0..6].try_into().unwrap(); let mut yp: [u64; 6] = p[6..12].try_into().unwrap(); - yp = inv_fp_bls12_381(&yp); - xp = neg_fp_bls12_381(&xp); - xp = mul_fp_bls12_381(&xp, &yp); - - let xp_prime: [u64; 12] = scalar_mul_fp2_bls12_381(&EXT_U_INV, &xp); - let yp_prime: [u64; 12] = scalar_mul_fp2_bls12_381(&EXT_U_INV, &yp); + yp = inv_fp_bls12_381( + &yp, + #[cfg(feature = "hints")] + hints, + ); + xp = neg_fp_bls12_381( + &xp, + #[cfg(feature = "hints")] + hints, + ); + xp = mul_fp_bls12_381( + &xp, + &yp, + #[cfg(feature = "hints")] + hints, + ); + + let xp_prime: [u64; 12] = scalar_mul_fp2_bls12_381( + &EXT_U_INV, + &xp, + #[cfg(feature = "hints")] + hints, + ); + let yp_prime: [u64; 12] = scalar_mul_fp2_bls12_381( + &EXT_U_INV, + &yp, + #[cfg(feature = "hints")] + hints, + ); // Initialize the Miller loop with r = q and f = 1 let mut r: [u64; 24] = q[0..24].try_into().unwrap(); @@ -33,41 +64,112 @@ pub fn miller_loop_bls12_381(p: &[u64; 12], q: &[u64; 24]) -> [u64; 72] { }; for &bit in X_ABS_BIN_BE.iter().skip(1) { // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(r)} - let (lambda, mu) = fcall_bls12_381_dbl_line_coeffs(&r); + let (lambda, mu) = fcall_bls12_381_twist_dbl_line_coeffs( + &r, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_tangent_twist_bls12_381(&r, &lambda, &mu)); + assert!(is_tangent_twist_bls12_381( + &r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); // Compute f = f² · line_{twist(r),twist(r)}(p) - f = square_fp12_bls12_381(&f); - let l = line_eval_twist_bls12_381(&lambda, &mu, &xp_prime, &yp_prime); - f = sparse_mul_fp12_bls12_381(&f, &l); + f = square_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ); + let l = line_eval_twist_bls12_381( + &lambda, + &mu, + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bls12_381( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Double r - r = dbl_twist_with_hints_bls12_381(&r, &lambda, &mu); + r = dbl_twist_with_hints_bls12_381( + &r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); if bit == 1 { // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(q)} - let (lambda, mu) = fcall_bls12_381_add_line_coeffs(&r, q); + let (lambda, mu) = fcall_bls12_381_twist_add_line_coeffs( + &r, + q, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_line_twist_bls12_381(&r, q, &lambda, &mu)); + assert!(is_line_twist_bls12_381( + &r, + q, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); // Compute f = f · line_{twist(r),twist(q)} - let l = line_eval_twist_bls12_381(&lambda, &mu, &xp_prime, &yp_prime); - f = sparse_mul_fp12_bls12_381(&f, &l); + let l = line_eval_twist_bls12_381( + &lambda, + &mu, + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bls12_381( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Add r and q - r = add_twist_with_hints_bls12_381(&r, q, &lambda, &mu); + r = add_twist_with_hints_bls12_381( + &r, + q, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); } } // Finally, compute f̅ - conjugate_fp12_bls12_381(&f) + conjugate_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ) } /// Computes the Miller loop for the BN254 curve for a batch of non-zero points `p_i` in G1 and non-zero points `q_i` in G2 -pub fn miller_loop_batch_bls12_381(g1_points: &[[u64; 12]], g2_points: &[[u64; 24]]) -> [u64; 72] { +pub fn miller_loop_batch_bls12_381( + g1_points: &[[u64; 12]], + g2_points: &[[u64; 24]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { // Before the loop starts, compute xp' = (-xp/yp)·1/(1+u) and yp' = (1/yp)·1/(1+u) let n = g1_points.len(); let mut xp_primes: Vec<[u64; 12]> = Vec::with_capacity(n); @@ -75,12 +177,35 @@ pub fn miller_loop_batch_bls12_381(g1_points: &[[u64; 12]], g2_points: &[[u64; 2 for p in g1_points.iter() { let mut xp: [u64; 6] = p[0..6].try_into().unwrap(); let mut yp: [u64; 6] = p[6..12].try_into().unwrap(); - yp = inv_fp_bls12_381(&yp); - xp = neg_fp_bls12_381(&xp); - xp = mul_fp_bls12_381(&xp, &yp); - - let xp_prime: [u64; 12] = scalar_mul_fp2_bls12_381(&EXT_U_INV, &xp); - let yp_prime: [u64; 12] = scalar_mul_fp2_bls12_381(&EXT_U_INV, &yp); + yp = inv_fp_bls12_381( + &yp, + #[cfg(feature = "hints")] + hints, + ); + xp = neg_fp_bls12_381( + &xp, + #[cfg(feature = "hints")] + hints, + ); + xp = mul_fp_bls12_381( + &xp, + &yp, + #[cfg(feature = "hints")] + hints, + ); + + let xp_prime: [u64; 12] = scalar_mul_fp2_bls12_381( + &EXT_U_INV, + &xp, + #[cfg(feature = "hints")] + hints, + ); + let yp_prime: [u64; 12] = scalar_mul_fp2_bls12_381( + &EXT_U_INV, + &yp, + #[cfg(feature = "hints")] + hints, + ); xp_primes.push(xp_prime); yp_primes.push(yp_prime); } @@ -91,46 +216,113 @@ pub fn miller_loop_batch_bls12_381(g1_points: &[[u64; 12]], g2_points: &[[u64; 2 f[0] = 1; for &bit in X_ABS_BIN_BE.iter().skip(1) { // Compute f = f² · line_{twist(r),twist(r)}(p) - f = square_fp12_bls12_381(&f); + f = square_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ); for i in 0..n { let r = &mut r[i]; // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(r)} - let (lambda, mu) = fcall_bls12_381_dbl_line_coeffs(r); + let (lambda, mu) = fcall_bls12_381_twist_dbl_line_coeffs( + r, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_tangent_twist_bls12_381(r, &lambda, &mu)); + assert!(is_tangent_twist_bls12_381( + r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); let xp_prime = &xp_primes[i]; let yp_prime = &yp_primes[i]; - let l = line_eval_twist_bls12_381(&lambda, &mu, xp_prime, yp_prime); - f = sparse_mul_fp12_bls12_381(&f, &l); + let l = line_eval_twist_bls12_381( + &lambda, + &mu, + xp_prime, + yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bls12_381( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Double r - *r = dbl_twist_with_hints_bls12_381(r, &lambda, &mu); + *r = dbl_twist_with_hints_bls12_381( + r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); if bit == 1 { let q = &g2_points[i]; // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(q')} - let (lambda, mu) = fcall_bls12_381_add_line_coeffs(r, q); + let (lambda, mu) = fcall_bls12_381_twist_add_line_coeffs( + r, + q, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_line_twist_bls12_381(r, q, &lambda, &mu)); + assert!(is_line_twist_bls12_381( + r, + q, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); // Compute f = f · line_{twist(r),twist(q')} - let l = line_eval_twist_bls12_381(&lambda, &mu, xp_prime, yp_prime); - f = sparse_mul_fp12_bls12_381(&f, &l); + let l = line_eval_twist_bls12_381( + &lambda, + &mu, + xp_prime, + yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bls12_381( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Add r and q - *r = add_twist_with_hints_bls12_381(r, q, &lambda, &mu); + *r = add_twist_with_hints_bls12_381( + r, + q, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); } } } // Finally, compute f̅ - conjugate_fp12_bls12_381(&f) + conjugate_fp12_bls12_381( + &f, + #[cfg(feature = "hints")] + hints, + ) } // We follow https://eprint.iacr.org/2024/640.pdf for the line computations. @@ -150,24 +342,66 @@ fn is_line_twist_bls12_381( q2: &[u64; 24], lambda: &[u64; 12], mu: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, ) -> bool { - line_check_twist_bls12_381(q1, lambda, mu) && line_check_twist_bls12_381(q2, lambda, mu) + line_check_twist_bls12_381( + q1, + lambda, + mu, + #[cfg(feature = "hints")] + hints, + ) && line_check_twist_bls12_381( + q2, + lambda, + mu, + #[cfg(feature = "hints")] + hints, + ) } /// Checks if the line defined by (𝜆,𝜇) is tangent to the curve at non-zero point `q` in G2 #[inline] -fn is_tangent_twist_bls12_381(q: &[u64; 24], lambda: &[u64; 12], mu: &[u64; 12]) -> bool { +fn is_tangent_twist_bls12_381( + q: &[u64; 24], + lambda: &[u64; 12], + mu: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // Check the line passes through q - let curve_check = line_check_twist_bls12_381(q, lambda, mu); + let curve_check = line_check_twist_bls12_381( + q, + lambda, + mu, + #[cfg(feature = "hints")] + hints, + ); // Check the line is tangent at q by checking that 2𝜆y = 3x² let x: &[u64; 12] = q[0..12].try_into().unwrap(); let y: &[u64; 12] = q[12..24].try_into().unwrap(); - let mut lhs = mul_fp2_bls12_381(lambda, y); - lhs = dbl_fp2_bls12_381(&lhs); - - let mut rhs = square_fp2_bls12_381(x); - rhs = scalar_mul_fp2_bls12_381(&rhs, &[3, 0, 0, 0, 0, 0]); + let mut lhs = mul_fp2_bls12_381( + lambda, + y, + #[cfg(feature = "hints")] + hints, + ); + lhs = dbl_fp2_bls12_381( + &lhs, + #[cfg(feature = "hints")] + hints, + ); + + let mut rhs = square_fp2_bls12_381( + x, + #[cfg(feature = "hints")] + hints, + ); + rhs = scalar_mul_fp2_bls12_381( + &rhs, + &[3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); let tangent_check = eq(&lhs, &rhs); curve_check && tangent_check @@ -175,13 +409,28 @@ fn is_tangent_twist_bls12_381(q: &[u64; 24], lambda: &[u64; 12], mu: &[u64; 12]) /// Check if the line defined by (𝜆,𝜇) passes through non-zero point `q` in G2 #[inline] -fn line_check_twist_bls12_381(q: &[u64; 24], lambda: &[u64; 12], mu: &[u64; 12]) -> bool { +fn line_check_twist_bls12_381( + q: &[u64; 24], + lambda: &[u64; 12], + mu: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { let x: &[u64; 12] = q[0..12].try_into().unwrap(); let y: &[u64; 12] = q[12..24].try_into().unwrap(); // Check if y = λx + μ - let mut rhs = mul_fp2_bls12_381(lambda, x); - rhs = add_fp2_bls12_381(&rhs, mu); + let mut rhs = mul_fp2_bls12_381( + lambda, + x, + #[cfg(feature = "hints")] + hints, + ); + rhs = add_fp2_bls12_381( + &rhs, + mu, + #[cfg(feature = "hints")] + hints, + ); eq(&rhs, y) } @@ -192,9 +441,24 @@ fn line_eval_twist_bls12_381( mu: &[u64; 12], x: &[u64; 12], y: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, ) -> [u64; 24] { - let coeff1 = mul_fp2_bls12_381(mu, &neg_fp2_bls12_381(y)); - let coeff2 = mul_fp2_bls12_381(lambda, x); + let coeff1 = mul_fp2_bls12_381( + mu, + &neg_fp2_bls12_381( + y, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + let coeff2 = mul_fp2_bls12_381( + lambda, + x, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 24]; result[0..12].copy_from_slice(&coeff1); @@ -210,19 +474,48 @@ fn add_twist_with_hints_bls12_381( q2: &[u64; 24], lambda: &[u64; 12], mu: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, ) -> [u64; 24] { let x1: &[u64; 12] = q1[0..12].try_into().unwrap(); let x2: &[u64; 12] = q2[0..12].try_into().unwrap(); // Compute x3 = λ² - x1 - x2 - let mut x3 = square_fp2_bls12_381(lambda); - x3 = sub_fp2_bls12_381(&x3, x1); - x3 = sub_fp2_bls12_381(&x3, x2); + let mut x3 = square_fp2_bls12_381( + lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bls12_381( + &x3, + x1, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bls12_381( + &x3, + x2, + #[cfg(feature = "hints")] + hints, + ); // Compute y3 = -λx3 - μ - let mut y3 = mul_fp2_bls12_381(lambda, &x3); - y3 = add_fp2_bls12_381(mu, &y3); - y3 = neg_fp2_bls12_381(&y3); + let mut y3 = mul_fp2_bls12_381( + lambda, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = add_fp2_bls12_381( + mu, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = neg_fp2_bls12_381( + &y3, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 24]; result[0..12].copy_from_slice(&x3); @@ -232,17 +525,49 @@ fn add_twist_with_hints_bls12_381( /// Doubling of a non-zero point `q` in G2 with hinted line coefficients (𝜆,𝜇) #[inline] -fn dbl_twist_with_hints_bls12_381(q: &[u64; 24], lambda: &[u64; 12], mu: &[u64; 12]) -> [u64; 24] { +fn dbl_twist_with_hints_bls12_381( + q: &[u64; 24], + lambda: &[u64; 12], + mu: &[u64; 12], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let x: &[u64; 12] = q[0..12].try_into().unwrap(); // Compute x3 = λ² - 2x - let mut x3 = square_fp2_bls12_381(lambda); - x3 = sub_fp2_bls12_381(&x3, &dbl_fp2_bls12_381(x)); + let mut x3 = square_fp2_bls12_381( + lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bls12_381( + &x3, + &dbl_fp2_bls12_381( + x, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // Compute y3 = -λx3 - μ - let mut y3 = mul_fp2_bls12_381(lambda, &x3); - y3 = add_fp2_bls12_381(mu, &y3); - y3 = neg_fp2_bls12_381(&y3); + let mut y3 = mul_fp2_bls12_381( + lambda, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = add_fp2_bls12_381( + mu, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = neg_fp2_bls12_381( + &y3, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 24]; result[0..12].copy_from_slice(&x3); diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/mod.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/mod.rs index f7cf0a85b..42a80c231 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/mod.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/mod.rs @@ -7,6 +7,8 @@ mod fp12; mod fp2; mod fp6; mod fr; +mod kzg; +mod map_to_curve; mod miller_loop; mod pairing; mod twist; @@ -19,5 +21,7 @@ pub use fp12::*; pub use fp2::*; pub use fp6::*; pub use fr::*; +pub use kzg::*; +pub use map_to_curve::*; pub use pairing::*; pub use twist::*; diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/pairing.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/pairing.rs index 5b09d3281..876c1a08f 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/pairing.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/pairing.rs @@ -1,40 +1,73 @@ //! Pairing over BLS12-381 curve -use crate::zisklib::lib::utils::gt; +use crate::zisklib::lib::utils::{eq, gt, is_one}; use super::{ - constants::{IDENTITY_G1, IDENTITY_G2, P_MINUS_ONE}, - curve::{is_on_curve_bls12_381, is_on_subgroup_bls12_381, neg_bls12_381}, + constants::{G1_IDENTITY, G2_IDENTITY, P_MINUS_ONE}, + curve::{ + g1_bytes_be_to_u64_le_bls12_381, is_on_curve_bls12_381, is_on_subgroup_bls12_381, + neg_bls12_381, + }, final_exp::final_exp_bls12_381, miller_loop::{miller_loop_batch_bls12_381, miller_loop_bls12_381}, - twist::{is_on_curve_twist_bls12_381, is_on_subgroup_twist_bls12_381}, + twist::{ + g2_bytes_be_to_u64_le_bls12_381, is_on_curve_twist_bls12_381, + is_on_subgroup_twist_bls12_381, + }, }; +/// Pairing check result codes +const PAIRING_CHECK_SUCCESS: u8 = 0; +const PAIRING_CHECK_FAILED: u8 = 1; +const PAIRING_CHECK_ERR_G1_NOT_ON_CURVE: u8 = 2; +const PAIRING_CHECK_ERR_G1_NOT_IN_SUBGROUP: u8 = 3; +const PAIRING_CHECK_ERR_G2_NOT_ON_CURVE: u8 = 4; +const PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP: u8 = 5; + /// Optimal Ate Pairing e: G1 x G2 -> GT over the BLS12-381 curve /// where G1 = E(Fp)[r] = E(Fp), G2 = E'(Fp2)[r] and GT = μ_r (the r-th roots of unity over Fp12*) /// the involved curves are E/Fp: y² = x³ + 4 and E'/Fp2: y² = x³ + 4·(1+u) /// pairingBLS12-381: /// input: P ∈ G1 and Q ∈ G2 /// output: e(P,Q) ∈ GT -pub fn pairing_bls12_381(p: &[u64; 12], q: &[u64; 24]) -> [u64; 72] { +pub fn pairing_bls12_381( + p: &[u64; 12], + q: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { // e(P, 𝒪) = e(𝒪, Q) = 1; - if *p == IDENTITY_G1 || *q == IDENTITY_G2 { + if *p == G1_IDENTITY || *q == G2_IDENTITY { let mut one = [0; 72]; one[0] = 1; return one; } // Miller loop - let miller_loop = miller_loop_bls12_381(p, q); + let miller_loop = miller_loop_bls12_381( + p, + q, + #[cfg(feature = "hints")] + hints, + ); // Final exponentiation - final_exp_bls12_381(&miller_loop) + final_exp_bls12_381( + &miller_loop, + #[cfg(feature = "hints")] + hints, + ) } -/// Computes the optimal Ate pairing for a batch of G1 and G2 points over the BN254 curve -/// and multiplies the results together, i.e.: +/// Computes the optimal Ate pairing for a batch of G1 and G2 points over the BLS12-381 curve +/// and multiplies the results together: /// e(P₁, Q₁) · e(P₂, Q₂) · ... · e(Pₙ, Qₙ) ∈ GT -pub fn pairing_batch_bls12_381(g1_points: &[[u64; 12]], g2_points: &[[u64; 24]]) -> [u64; 72] { +/// +/// Assumes all points are non-infinity and already validated (on curve and in subgroup). +pub fn pairing_batch_bls12_381( + g1_points: &[[u64; 12]], + g2_points: &[[u64; 24]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 72] { // Since each e(Pi, Qi) := FinalExp(MillerLoop(Pi, Qi)) // We have: // e(P₁, Q₁) · e(P₂, Q₂) · ... · e(Pₙ, Qₙ) = FinalExp(MillerLoop(P₁, Q₁) · MillerLoop(P₂, Q₂) · ... · MillerLoop(Pₙ, Qₙ)) @@ -44,68 +77,188 @@ pub fn pairing_batch_bls12_381(g1_points: &[[u64; 12]], g2_points: &[[u64; 24]]) let n = g1_points.len(); assert_eq!(n, g2_points.len(), "Number of G1 and G2 points must be equal"); - // Miller loop and multiplication - let mut g1_points_ml = Vec::with_capacity(n); - let mut g2_points_ml = Vec::with_capacity(n); - for (p, q) in g1_points.iter().zip(g2_points.iter()) { - // If p = 𝒪 or q = 𝒪 => MillerLoop(P, 𝒪) = MillerLoop(𝒪, Q) = 1; we can skip - if *p != IDENTITY_G1 && *q != IDENTITY_G2 { - g1_points_ml.push(*p); - g2_points_ml.push(*q); - } - } - - if g1_points_ml.is_empty() { - // If all pairing computations were skipped, return 1 + if n == 0 { + // Empty input returns 1 let mut one = [0; 72]; one[0] = 1; return one; } - // Miller loop - let miller_loop = miller_loop_batch_bls12_381(&g1_points_ml, &g2_points_ml); + let miller_loop = miller_loop_batch_bls12_381( + g1_points, + g2_points, + #[cfg(feature = "hints")] + hints, + ); - // Final exponentiation - final_exp_bls12_381(&miller_loop) + final_exp_bls12_381( + &miller_loop, + #[cfg(feature = "hints")] + hints, + ) } -/// C-compatible wrapper for pairing_verify_bls12_381 +/// BLS12-381 pairing check with validation. /// -/// # Safety -/// - All pointers must be valid and properly aligned -/// - `p1` and `p2` must point to at least 12 u64s each -/// - `q1` and `q2` must point to at least 24 u64s each +/// Validates all points are on curve and in subgroup. +/// +/// # Arguments +/// * `g1_points` - Slice of G1 points as [u64; 12] +/// * `g2_points` - Slice of G2 points as [u64; 24] /// -/// Returns 1 if e(P₁, Q₁) == e(P₂, Q₂), 0 otherwise -#[no_mangle] -pub unsafe extern "C" fn pairing_verify_bls12_381_c( - p1_ptr: *const u64, - q1_ptr: *const u64, - p2_ptr: *const u64, - q2_ptr: *const u64, -) -> bool { - let p1: &[u64; 12] = &*(p1_ptr as *const [u64; 12]); - let q1: &[u64; 24] = &*(q1_ptr as *const [u64; 24]); - let p2: &[u64; 12] = &*(p2_ptr as *const [u64; 12]); - let q2: &[u64; 24] = &*(q2_ptr as *const [u64; 24]); - - // Treat P₁,Q₁,P₂,Q₂ == 𝒪 at first, as this is a common case - // e(P₁, 𝒪) == e(P₂, Q₂) <--> P₂ == 𝒪 || Q₂ == 𝒪 - // e(𝒪, Q₁) == e(P₂, Q₂) <--> P₂ == 𝒪 || Q₂ == 𝒪 - if *p1 == IDENTITY_G1 || *q1 == IDENTITY_G2 { - return *p2 == IDENTITY_G1 || *q2 == IDENTITY_G2; - } else if *p2 == IDENTITY_G1 || *q2 == IDENTITY_G2 { - return false; +/// # Returns +/// * `Ok(true)` - Pairing check passed +/// * `Ok(false)` - Pairing check failed +/// * `Err(PAIRING_CHECK_ERR_G1_NOT_ON_CURVE)` - G1 point not on curve +/// * `Err(PAIRING_CHECK_ERR_G1_NOT_IN_SUBGROUP)` - G1 point not in subgroup +/// * `Err(PAIRING_CHECK_ERR_G2_NOT_ON_CURVE)` - G2 point not on curve +/// * `Err(PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP)` - G2 point not in subgroup +pub fn pairing_check_bls12_381( + g1_points: &[[u64; 12]], + g2_points: &[[u64; 24]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result { + assert_eq!(g1_points.len(), g2_points.len(), "Number of G1 and G2 points must be equal"); + + // Collect valid pairs + let mut valid_g1: Vec<[u64; 12]> = Vec::with_capacity(g1_points.len()); + let mut valid_g2: Vec<[u64; 24]> = Vec::with_capacity(g2_points.len()); + for (g1, g2) in g1_points.iter().zip(g2_points.iter()) { + let g1_is_inf = eq(g1, &G1_IDENTITY); + let g2_is_inf = eq(g2, &G2_IDENTITY); + + // If p = 𝒪 or q = 𝒪 => MillerLoop(P, 𝒪) = MillerLoop(𝒪, Q) = 1; we can skip + if g2_is_inf { + if !g1_is_inf { + if !is_on_curve_bls12_381( + g1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G1_NOT_ON_CURVE); + } + if !is_on_subgroup_bls12_381( + g1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G1_NOT_IN_SUBGROUP); + } + } + continue; + } + + if g1_is_inf { + if !is_on_curve_twist_bls12_381( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_ON_CURVE); + } + if !is_on_subgroup_twist_bls12_381( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP); + } + continue; + } + + if !is_on_curve_bls12_381( + g1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G1_NOT_ON_CURVE); + } + if !is_on_subgroup_bls12_381( + g1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G1_NOT_IN_SUBGROUP); + } + + if !is_on_curve_twist_bls12_381( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_ON_CURVE); + } + if !is_on_subgroup_twist_bls12_381( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP); + } + + valid_g1.push(*g1); + valid_g2.push(*g2); } - // Checking e(P1, Q1) == e(P2, Q2) is equivalent to checking e(P1, Q1) * e(-P2, Q2) == 1 - let p2_neg = neg_bls12_381(p2); - let pairing_result = pairing_batch_bls12_381(&[*p1, p2_neg], &[*q1, *q2]); + // If all pairs were skipped, result is 1 + if valid_g1.is_empty() { + return Ok(true); + } - let one = { - let mut one = [0; 72]; - one[0] = 1; - one - }; - pairing_result == one + // Compute batch pairing and check if result is 1 + Ok(is_one(&pairing_batch_bls12_381( + &valid_g1, + &valid_g2, + #[cfg(feature = "hints")] + hints, + ))) +} + +/// BLS12-381 pairing check for big-endian byte format. +/// +/// # Input format +/// Per pair: 288 bytes = 96 bytes G1 point + 192 bytes G2 point (big-endian) +/// - G1 point: 48 bytes x + 48 bytes y +/// - G2 point: 48 bytes x_i + 48 bytes x_r + 48 bytes y_i + 48 bytes y_r +/// +/// # Safety +/// `pairs` must point to an array of `num_pairs * 288` bytes +/// +/// # Returns +/// - 0 = pairing check passed +/// - 1 = pairing check failed +/// - 2 = error: G1 point not on curve +/// - 3 = error: G1 point not in subgroup +/// - 4 = error: G2 point not on curve +/// - 5 = error: G2 point not in subgroup +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bls12_381_pairing_check_c")] +pub unsafe extern "C" fn bls12_381_pairing_check_c( + pairs: *const u8, + num_pairs: usize, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + // Parse all pairs + let mut g1_points: Vec<[u64; 12]> = Vec::with_capacity(num_pairs); + let mut g2_points: Vec<[u64; 24]> = Vec::with_capacity(num_pairs); + for i in 0..num_pairs { + let pair_ptr = pairs.add(i * 288); + + let g1_bytes: &[u8; 96] = &*(pair_ptr as *const [u8; 96]); + let g2_bytes: &[u8; 192] = &*(pair_ptr.add(96) as *const [u8; 192]); + + g1_points.push(g1_bytes_be_to_u64_le_bls12_381(g1_bytes)); + g2_points.push(g2_bytes_be_to_u64_le_bls12_381(g2_bytes)); + } + + match pairing_check_bls12_381( + &g1_points, + &g2_points, + #[cfg(feature = "hints")] + hints, + ) { + Ok(true) => PAIRING_CHECK_SUCCESS, + Ok(false) => PAIRING_CHECK_FAILED, + Err(code) => code, + } } diff --git a/ziskos/entrypoint/src/zisklib/lib/bls12_381/twist.rs b/ziskos/entrypoint/src/zisklib/lib/bls12_381/twist.rs index e31308356..f4a62b8de 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bls12_381/twist.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bls12_381/twist.rs @@ -1,52 +1,380 @@ //! Operations on the twist E': y² = x³ + 4·(1+u) of the BLS12-381 curve -use crate::zisklib::{eq, fcall_msb_pos_384}; +use crate::zisklib::{eq, fcall_msb_pos_256, lt}; use super::{ constants::{ - ETWISTED_B, EXT_U, EXT_U_INV, FROBENIUS_GAMMA13, FROBENIUS_GAMMA14, IDENTITY_G2, - X_ABS_BIN_BE, + ETWISTED_B, EXT_U, EXT_U_INV, FROBENIUS_GAMMA13, FROBENIUS_GAMMA14, G2_IDENTITY, P, + PSI2_C1, PSI_C1, PSI_C2, X_ABS_BIN_BE, }, fp2::{ add_fp2_bls12_381, conjugate_fp2_bls12_381, dbl_fp2_bls12_381, inv_fp2_bls12_381, - mul_fp2_bls12_381, neg_fp2_bls12_381, scalar_mul_fp2_bls12_381, square_fp2_bls12_381, - sub_fp2_bls12_381, + mul_fp2_bls12_381, neg_fp2_bls12_381, scalar_mul_fp2_bls12_381, sqrt_fp2_bls12_381, + square_fp2_bls12_381, sub_fp2_bls12_381, }, + fr::{reduce_fr_bls12_381, scalar_bytes_be_to_u64_le_bls12_381}, }; +/// G2 add result codes +pub const G2_ADD_SUCCESS: u8 = 0; +pub const G2_ADD_SUCCESS_INFINITY: u8 = 1; +pub const G2_ADD_ERR_NOT_ON_CURVE: u8 = 2; + +/// G2 MSM result codes +pub const G2_MSM_SUCCESS: u8 = 0; +pub const G2_MSM_SUCCESS_INFINITY: u8 = 1; +pub const G2_MSM_ERR_NOT_ON_CURVE: u8 = 2; +pub const G2_MSM_ERR_NOT_IN_SUBGROUP: u8 = 3; + +/// Decompresses a G2 point on the BLS12-381 twist from 96 bytes (compressed format). +/// +/// Format: Big-endian x-coordinate (in Fp2) with flag bits in the top 3 bits of the first byte: +/// - Bit 7 (0x80): Compression flag (must be 1 for compressed) +/// - Bit 6 (0x40): Infinity flag (1 = point at infinity) +/// - Bit 5 (0x20): Sign flag (1 = y is lexicographically largest) +pub fn decompress_twist_bls12_381( + input: &[u8; 96], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<([u64; 24], bool), &'static str> { + let flags = input[0]; + + // Check compression bit + if (flags & 0x80) == 0 { + return Err("decompress_twist_bls12_381: Expected compressed point (0x80 flag not set)"); + } + + // Check infinity bit + if (flags & 0x40) != 0 { + // Verify rest is zero + if (flags & 0x3f) != 0 { + return Err("Invalid infinity encoding"); + } + for item in input.iter().skip(1) { + if *item != 0 { + return Err("Invalid infinity encoding"); + } + } + return Ok((G2_IDENTITY, true)); + } + + // Extract sign bit + let y_sign = (flags & 0x20) != 0; + + // Extract x-coordinate from big-endian bytes + // Format: first 48 bytes = x_i (imaginary), next 48 bytes = x_r (real) + let mut x_i = [0u64; 6]; + let mut x_r = [0u64; 6]; + + // Parse x_i (first 48 bytes, masking flag bits in first byte) + let mut bytes_i = [0u8; 48]; + bytes_i.copy_from_slice(&input[0..48]); + bytes_i[0] &= 0x1f; // Clear flag bits + + for i in 0..6 { + for j in 0..8 { + x_i[5 - i] |= (bytes_i[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // Parse x_r (next 48 bytes) + for i in 0..6 { + for j in 0..8 { + x_r[5 - i] |= (input[48 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // Verify x_r < p and x_i < p + if !lt(&x_r, &P) { + return Err("x_r coordinate >= field modulus"); + } + if !lt(&x_i, &P) { + return Err("x_i coordinate >= field modulus"); + } + + // Build x = x_r + x_i * u as [u64; 12] + let mut x = [0u64; 12]; + x[0..6].copy_from_slice(&x_r); + x[6..12].copy_from_slice(&x_i); + + // Calculate y² = x³ + 4(1+u) + let x_sq = square_fp2_bls12_381( + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_cb = mul_fp2_bls12_381( + &x_sq, + &x, + #[cfg(feature = "hints")] + hints, + ); + let y_sq = add_fp2_bls12_381( + &x_cb, + &ETWISTED_B, + #[cfg(feature = "hints")] + hints, + ); + + // Compute sqrt + let (y, has_sqrt) = sqrt_fp2_bls12_381( + &y_sq, + #[cfg(feature = "hints")] + hints, + ); + if !has_sqrt { + return Err("No square root exists - point not on curve"); + } + + // Determine sign of y using lexicographic ordering on Fp2 + // y = y_r + y_i * u is "larger" if: + // - y_i > -y_i, OR + // - y_i == -y_i (i.e., y_i == 0) AND y_r > -y_r + let y_neg = neg_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + let y_r: [u64; 6] = y[0..6].try_into().unwrap(); + let y_i: [u64; 6] = y[6..12].try_into().unwrap(); + let y_neg_r: [u64; 6] = y_neg[0..6].try_into().unwrap(); + let y_neg_i: [u64; 6] = y_neg[6..12].try_into().unwrap(); + + let y_is_larger = if !eq(&y_i, &y_neg_i) { + // Compare i components + lt(&y_neg_i, &y_i) + } else { + // i components equal, compare r + lt(&y_neg_r, &y_r) + }; + + // Select the correct y based on sign bit + let final_y = if y_is_larger == y_sign { y } else { y_neg }; + + // Return the point (x, final_y) + let mut result = [0u64; 24]; + result[0..12].copy_from_slice(&x); + result[12..24].copy_from_slice(&final_y); + Ok((result, false)) +} + /// Check if a non-zero point `p` is on the BLS12-381 twist -pub fn is_on_curve_twist_bls12_381(p: &[u64; 24]) -> bool { +pub fn is_on_curve_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // q in E' iff y² == x³ + 4·(1+u) let x: [u64; 12] = p[0..12].try_into().unwrap(); let y: [u64; 12] = p[12..24].try_into().unwrap(); - let x_sq = square_fp2_bls12_381(&x); - let x_cubed = mul_fp2_bls12_381(&x_sq, &x); - let x_cubed_plus_b = add_fp2_bls12_381(&x_cubed, &ETWISTED_B); - let y_sq = square_fp2_bls12_381(&y); + let x_sq = square_fp2_bls12_381( + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_cubed = mul_fp2_bls12_381( + &x_sq, + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_cubed_plus_b = add_fp2_bls12_381( + &x_cubed, + &ETWISTED_B, + #[cfg(feature = "hints")] + hints, + ); + let y_sq = square_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); eq(&x_cubed_plus_b, &y_sq) } /// Check if a non-zero point `p` is on the BLS12-381 twist subgroup -pub fn is_on_subgroup_twist_bls12_381(p: &[u64; 24]) -> bool { +pub fn is_on_subgroup_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // p in subgroup iff: // x·𝜓³(P) + P == 𝜓²(P) // where ψ := 𝜑⁻¹𝜋ₚ𝜑 is the untwist-Frobenius-twist endomorphism // Compute ψ²(P), ψ³(P) - let utf1 = utf_endomorphism_twist_bls12_381(p); - let rhs = utf_endomorphism_twist_bls12_381(&utf1); - let utf3 = utf_endomorphism_twist_bls12_381(&rhs); + let utf1 = utf_endomorphism_twist_bls12_381( + p, + #[cfg(feature = "hints")] + hints, + ); + let rhs = utf_endomorphism_twist_bls12_381( + &utf1, + #[cfg(feature = "hints")] + hints, + ); + let utf3 = utf_endomorphism_twist_bls12_381( + &rhs, + #[cfg(feature = "hints")] + hints, + ); // Compute [x]ψ³(P) + P (since x is negative, we compute -[|x|]ψ³(P)) - let xutf3: [u64; 24] = scalar_mul_by_abs_x_twist_bls12_381(&utf3); - let mut lhs = neg_twist_bls12_381(&xutf3); - lhs = add_twist_bls12_381(&lhs, p); + let xutf3: [u64; 24] = scalar_mul_by_abs_x_twist_bls12_381( + &utf3, + #[cfg(feature = "hints")] + hints, + ); + let mut lhs = neg_twist_bls12_381( + &xutf3, + #[cfg(feature = "hints")] + hints, + ); + lhs = add_twist_bls12_381( + &lhs, + p, + #[cfg(feature = "hints")] + hints, + ); eq(&lhs, &rhs) } +fn psi_twist_bls12_381(p: &[u64; 24], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 24] { + let x: [u64; 12] = p[0..12].try_into().unwrap(); + let y: [u64; 12] = p[12..24].try_into().unwrap(); + + let mut frobx = conjugate_fp2_bls12_381( + &x, + #[cfg(feature = "hints")] + hints, + ); + frobx = mul_fp2_bls12_381( + &frobx, + &PSI_C1, + #[cfg(feature = "hints")] + hints, + ); + + let mut froby = conjugate_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + froby = mul_fp2_bls12_381( + &froby, + &PSI_C2, + #[cfg(feature = "hints")] + hints, + ); + + let mut result = [0u64; 24]; + result[0..12].copy_from_slice(&frobx); + result[12..24].copy_from_slice(&froby); + result +} + +fn psi2_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + let x: [u64; 12] = p[0..12].try_into().unwrap(); + let y: [u64; 12] = p[12..24].try_into().unwrap(); + + let xa = mul_fp2_bls12_381( + &x, + &PSI2_C1, + #[cfg(feature = "hints")] + hints, + ); + let ya = neg_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + + let mut result = [0u64; 24]; + result[0..12].copy_from_slice(&xa); + result[12..24].copy_from_slice(&ya); + result +} + +/// Efficient cofactor clearing for G2 using endomorphisms +/// Implements: h_eff * P where h_eff is the effective cofactor +pub fn clear_cofactor_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + let mut t1 = scalar_mul_by_abs_x_twist_bls12_381( + p, + #[cfg(feature = "hints")] + hints, + ); + t1 = neg_twist_bls12_381( + &t1, + #[cfg(feature = "hints")] + hints, + ); + let mut t2 = psi_twist_bls12_381( + p, + #[cfg(feature = "hints")] + hints, + ); + let mut t3 = dbl_twist_bls12_381( + p, + #[cfg(feature = "hints")] + hints, + ); + t3 = psi2_twist_bls12_381( + &t3, + #[cfg(feature = "hints")] + hints, + ); + t3 = sub_twist_bls12_381( + &t3, + &t2, + #[cfg(feature = "hints")] + hints, + ); + t2 = add_twist_bls12_381( + &t1, + &t2, + #[cfg(feature = "hints")] + hints, + ); + t2 = scalar_mul_by_abs_x_twist_bls12_381( + &t2, + #[cfg(feature = "hints")] + hints, + ); + t2 = neg_twist_bls12_381( + &t2, + #[cfg(feature = "hints")] + hints, + ); + t3 = add_twist_bls12_381( + &t3, + &t2, + #[cfg(feature = "hints")] + hints, + ); + t3 = sub_twist_bls12_381( + &t3, + &t1, + #[cfg(feature = "hints")] + hints, + ); + sub_twist_bls12_381( + &t3, + p, + #[cfg(feature = "hints")] + hints, + ) +} + /// Addition of two non-zero points -pub fn add_twist_bls12_381(p1: &[u64; 24], p2: &[u64; 24]) -> [u64; 24] { +pub fn add_twist_bls12_381( + p1: &[u64; 24], + p2: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let x1: [u64; 12] = p1[0..12].try_into().unwrap(); let y1: [u64; 12] = p1[12..24].try_into().unwrap(); let x2: [u64; 12] = p2[0..12].try_into().unwrap(); @@ -57,26 +385,77 @@ pub fn add_twist_bls12_381(p1: &[u64; 24], p2: &[u64; 24]) -> [u64; 24] { // Is y1 == y2? if eq(&y1, &y2) { // Compute the doubling - return dbl_twist_bls12_381(p1); + return dbl_twist_bls12_381( + p1, + #[cfg(feature = "hints")] + hints, + ); } else { // Points are the inverse of each other, return the point at infinity - return IDENTITY_G2; + return G2_IDENTITY; } } // Compute the addition - let mut den = sub_fp2_bls12_381(&x2, &x1); - den = inv_fp2_bls12_381(&den); - let mut lambda = sub_fp2_bls12_381(&y2, &y1); - lambda = mul_fp2_bls12_381(&lambda, &den); - - let mut x3 = square_fp2_bls12_381(&lambda); - x3 = sub_fp2_bls12_381(&x3, &x1); - x3 = sub_fp2_bls12_381(&x3, &x2); + let mut den = sub_fp2_bls12_381( + &x2, + &x1, + #[cfg(feature = "hints")] + hints, + ); + den = inv_fp2_bls12_381( + &den, + #[cfg(feature = "hints")] + hints, + ); + let mut lambda = sub_fp2_bls12_381( + &y2, + &y1, + #[cfg(feature = "hints")] + hints, + ); + lambda = mul_fp2_bls12_381( + &lambda, + &den, + #[cfg(feature = "hints")] + hints, + ); - let mut y3 = sub_fp2_bls12_381(&x1, &x3); - y3 = mul_fp2_bls12_381(&lambda, &y3); - y3 = sub_fp2_bls12_381(&y3, &y1); + let mut x3 = square_fp2_bls12_381( + &lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bls12_381( + &x3, + &x1, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bls12_381( + &x3, + &x2, + #[cfg(feature = "hints")] + hints, + ); + let mut y3 = sub_fp2_bls12_381( + &x1, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = mul_fp2_bls12_381( + &lambda, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = sub_fp2_bls12_381( + &y3, + &y1, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 24]; result[0..12].copy_from_slice(&x3); @@ -84,25 +463,143 @@ pub fn add_twist_bls12_381(p1: &[u64; 24], p2: &[u64; 24]) -> [u64; 24] { result } +/// Addition of two points +pub fn add_complete_twist_bls12_381( + p1: &[u64; 24], + p2: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 24], u8> { + let p1_is_inf = eq(p1, &G2_IDENTITY); + let p2_is_inf = eq(p2, &G2_IDENTITY); + + // Handle identity cases + if p1_is_inf && p2_is_inf { + return Ok(G2_IDENTITY); + } + + if p1_is_inf { + // Validate p2 is on curve + if !is_on_curve_twist_bls12_381( + p2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G2_ADD_ERR_NOT_ON_CURVE); + } + return Ok(*p2); + } + + if p2_is_inf { + // Validate p1 is on curve + if !is_on_curve_twist_bls12_381( + p1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G2_ADD_ERR_NOT_ON_CURVE); + } + return Ok(*p1); + } + + // Both points are non-identity, validate both are on curve + if !is_on_curve_twist_bls12_381( + p1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G2_ADD_ERR_NOT_ON_CURVE); + } + if !is_on_curve_twist_bls12_381( + p2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G2_ADD_ERR_NOT_ON_CURVE); + } + + // Perform addition + Ok(add_twist_bls12_381( + p1, + p2, + #[cfg(feature = "hints")] + hints, + )) +} + /// Doubling of a non-zero point -pub fn dbl_twist_bls12_381(p: &[u64; 24]) -> [u64; 24] { +pub fn dbl_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let x: [u64; 12] = p[0..12].try_into().unwrap(); let y: [u64; 12] = p[12..24].try_into().unwrap(); // Compute the doubling - let mut lambda = dbl_fp2_bls12_381(&y); - lambda = inv_fp2_bls12_381(&lambda); - lambda = scalar_mul_fp2_bls12_381(&lambda, &[0x3, 0, 0, 0, 0, 0]); - lambda = mul_fp2_bls12_381(&lambda, &x); - lambda = mul_fp2_bls12_381(&lambda, &x); + let mut lambda = dbl_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + lambda = inv_fp2_bls12_381( + &lambda, + #[cfg(feature = "hints")] + hints, + ); + lambda = scalar_mul_fp2_bls12_381( + &lambda, + &[0x3, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + lambda = mul_fp2_bls12_381( + &lambda, + &x, + #[cfg(feature = "hints")] + hints, + ); + lambda = mul_fp2_bls12_381( + &lambda, + &x, + #[cfg(feature = "hints")] + hints, + ); - let mut x3 = square_fp2_bls12_381(&lambda); - x3 = sub_fp2_bls12_381(&x3, &x); - x3 = sub_fp2_bls12_381(&x3, &x); + let mut x3 = square_fp2_bls12_381( + &lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bls12_381( + &x3, + &x, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bls12_381( + &x3, + &x, + #[cfg(feature = "hints")] + hints, + ); - let mut y3 = sub_fp2_bls12_381(&x, &x3); - y3 = mul_fp2_bls12_381(&lambda, &y3); - y3 = sub_fp2_bls12_381(&y3, &y); + let mut y3 = sub_fp2_bls12_381( + &x, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = mul_fp2_bls12_381( + &lambda, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = sub_fp2_bls12_381( + &y3, + &y, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 24]; result[0..12].copy_from_slice(&x3); @@ -110,13 +607,82 @@ pub fn dbl_twist_bls12_381(p: &[u64; 24]) -> [u64; 24] { result } +/// Subtraction of two non-zero points `p1` and `p2` on the BLS12-381 curve +pub fn sub_twist_bls12_381( + p1: &[u64; 24], + p2: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + let x2: [u64; 12] = p2[0..12].try_into().unwrap(); + let y2: [u64; 12] = p2[12..24].try_into().unwrap(); + + // P1 - P2 = P1 + (-P2) + let y2_neg = neg_fp2_bls12_381( + &y2, + #[cfg(feature = "hints")] + hints, + ); + + let mut p2_neg = [0u64; 24]; + p2_neg[0..12].copy_from_slice(&x2); + p2_neg[12..24].copy_from_slice(&y2_neg); + + add_twist_bls12_381( + p1, + &p2_neg, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Subtraction of two points `p1` and `p2` on the BLS12-381 curve +pub fn sub_complete_twist_bls12_381( + p1: &[u64; 24], + p2: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + let p1_is_inf = *p1 == G2_IDENTITY; + let p2_is_inf = *p2 == G2_IDENTITY; + + // Handle identity cases + if p1_is_inf && p2_is_inf { + // O - O = O + return G2_IDENTITY; + } else if p1_is_inf { + // O - P2 = -P2 + return neg_twist_bls12_381( + p2, + #[cfg(feature = "hints")] + hints, + ); + } else if p2_is_inf { + // P1 - O = P1 + return *p1; + } + + // Perform regular subtraction: P1 - P2 = P1 + (-P2) + sub_twist_bls12_381( + p1, + p2, + #[cfg(feature = "hints")] + hints, + ) +} + /// Negation of a point -pub fn neg_twist_bls12_381(p: &[u64; 24]) -> [u64; 24] { +pub fn neg_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let x: [u64; 12] = p[0..12].try_into().unwrap(); let y: [u64; 12] = p[12..24].try_into().unwrap(); // Compute the negation - let y_neg = neg_fp2_bls12_381(&y); + let y_neg = neg_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 24]; result[0..12].copy_from_slice(&x); @@ -125,20 +691,28 @@ pub fn neg_twist_bls12_381(p: &[u64; 24]) -> [u64; 24] { } /// Multiplies a non-zero point `p` on the BLS12-381 curve by a scalar `k` on the BLS12-381 scalar field -pub fn scalar_mul_twist_bls12_381(p: &[u64; 24], k: &[u64; 6]) -> [u64; 24] { +pub fn scalar_mul_twist_bls12_381( + p: &[u64; 24], + k: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { // Direct cases: k = 0, k = 1, k = 2 match k { - [0, 0, 0, 0, 0, 0] => { + [0, 0, 0, 0] => { // Return 𝒪 - return IDENTITY_G2; + return G2_IDENTITY; } - [1, 0, 0, 0, 0, 0] => { + [1, 0, 0, 0] => { // Return p return *p; } - [2, 0, 0, 0, 0, 0] => { + [2, 0, 0, 0] => { // Return 2p - return dbl_twist_bls12_381(p); + return dbl_twist_bls12_381( + p, + #[cfg(feature = "hints")] + hints, + ); } _ => {} } @@ -147,7 +721,12 @@ pub fn scalar_mul_twist_bls12_381(p: &[u64; 24], k: &[u64; 6]) -> [u64; 24] { // Hint the length the binary representations of k // We will verify the output by recomposing k // Moreover, we should check that the first received bit is 1 - let (max_limb, max_bit) = fcall_msb_pos_384(k, &[0, 0, 0, 0, 0, 0]); + let (max_limb, max_bit) = fcall_msb_pos_256( + k, + &[0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // Perform the loop, based on the binary representation of k @@ -160,7 +739,7 @@ pub fn scalar_mul_twist_bls12_381(p: &[u64; 24], k: &[u64; 6]) -> [u64; 24] { // Start at P let mut q = *p; - let mut k_rec = [0u64; 6]; + let mut k_rec = [0u64; 4]; k_rec[max_limb] |= 1 << max_bit; // Determine starting limb/bit for the loop @@ -177,12 +756,21 @@ pub fn scalar_mul_twist_bls12_381(p: &[u64; 24], k: &[u64; 6]) -> [u64; 24] { for i in (0..=limb).rev() { for j in (0..=bit).rev() { // Always double - q = dbl_twist_bls12_381(&q); + q = dbl_twist_bls12_381( + &q, + #[cfg(feature = "hints")] + hints, + ); // Get the next bit b of k. // If b == 1, we should add P to Q, otherwise start the next iteration if ((k[i] >> j) & 1) == 1 { - q = add_twist_bls12_381(&q, p); + q = add_twist_bls12_381( + &q, + p, + #[cfg(feature = "hints")] + hints, + ); // Reconstruct k k_rec[i] |= 1 << j; @@ -199,49 +787,370 @@ pub fn scalar_mul_twist_bls12_381(p: &[u64; 24], k: &[u64; 6]) -> [u64; 24] { } /// Scalar multiplication of a non-zero point `p` by a binary scalar `k` -pub fn scalar_mul_bin_twist_bls12_381(p: &[u64; 24], k: &[u8]) -> [u64; 24] { +pub fn scalar_mul_bin_twist_bls12_381( + p: &[u64; 24], + k: &[u8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let mut r = *p; for &bit in k.iter().skip(1) { - r = dbl_twist_bls12_381(&r); + r = dbl_twist_bls12_381( + &r, + #[cfg(feature = "hints")] + hints, + ); if bit == 1 { - r = add_twist_bls12_381(&r, p); + r = add_twist_bls12_381( + &r, + p, + #[cfg(feature = "hints")] + hints, + ); } } r } /// Scalar multiplication of a non-zero point by x -pub fn scalar_mul_by_abs_x_twist_bls12_381(p: &[u64; 24]) -> [u64; 24] { - scalar_mul_bin_twist_bls12_381(p, &X_ABS_BIN_BE) +pub fn scalar_mul_by_abs_x_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { + scalar_mul_bin_twist_bls12_381( + p, + &X_ABS_BIN_BE, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Multi-Scalar Multiplication (MSM) for BLS12-381 G2 points +/// It computes k1·P1 + k2·P2 + ... + kn·Pn +// TODO: This is a naive implementation, one can improve it by using, e.g., a windowed strategies! +pub fn msm_complete_twist_bls12_381( + points: &[[u64; 24]], + scalars: &[[u64; 4]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 24], u8> { + debug_assert_eq!(points.len(), scalars.len()); + + let mut acc = G2_IDENTITY; + let mut acc_is_inf = true; + + for (point, scalar) in points.iter().zip(scalars.iter()) { + // Skip infinity points + if *point == G2_IDENTITY { + continue; + } + + // Skip zero scalars + if reduce_fr_bls12_381( + scalar, + #[cfg(feature = "hints")] + hints, + ) == [0, 0, 0, 0] + { + continue; + } + + // Verify point is on curve + if !is_on_curve_twist_bls12_381( + point, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G2_MSM_ERR_NOT_ON_CURVE); + } + + // Verify point is in subgroup (required for MSM per EIP-2537) + if !is_on_subgroup_twist_bls12_381( + point, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G2_MSM_ERR_NOT_IN_SUBGROUP); + } + + // Compute P * k + let product = scalar_mul_twist_bls12_381( + point, + scalar, + #[cfg(feature = "hints")] + hints, + ); + + // Skip if product is infinity + if product == G2_IDENTITY { + continue; + } + + // Add to accumulator + if acc_is_inf { + acc = product; + acc_is_inf = false; + } else { + acc = add_twist_bls12_381( + &acc, + &product, + #[cfg(feature = "hints")] + hints, + ); + acc_is_inf = acc == G2_IDENTITY; + } + } + + Ok(acc) } /// Compute the untwist-frobenius-twist (utf) endomorphism ψ := 𝜑⁻¹𝜋ₚ𝜑 of a non-zero point `p`, where: /// 𝜑 : E'(Fp2) -> E(Fp12) defined by 𝜑(x,y) = (x/ω²,y/ω³) is the untwist map /// 𝜋ₚ : E(Fp12) -> E(Fp12) defined by 𝜋ₚ(x,y) = (xᵖ,yᵖ) is the Frobenius map /// 𝜑⁻¹ : E(Fp12) -> E'(Fp2) defined by 𝜑⁻¹(x,y) = (x·ω²,y·ω³) is the twist map -pub fn utf_endomorphism_twist_bls12_381(p: &[u64; 24]) -> [u64; 24] { +pub fn utf_endomorphism_twist_bls12_381( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let mut x: [u64; 12] = p[0..12].try_into().unwrap(); let mut y: [u64; 12] = p[12..24].try_into().unwrap(); // 1] Compute 𝜑(x,y) = (x/ω²,y/ω³) = (x·(%W_INV_X + %W_INV_Y·u)·ω⁴,y·(%W_INV_X + %W_INV_Y·u)·ω³) ∈ E(Fp12) - x = mul_fp2_bls12_381(&x, &EXT_U_INV); - y = mul_fp2_bls12_381(&y, &EXT_U_INV); + x = mul_fp2_bls12_381( + &x, + &EXT_U_INV, + #[cfg(feature = "hints")] + hints, + ); + y = mul_fp2_bls12_381( + &y, + &EXT_U_INV, + #[cfg(feature = "hints")] + hints, + ); // 2] Compute 𝜋ₚ(a,b) = (aᵖ,bᵖ), i.e., apply the frobenius operator // Since the previous result has only one non-zero coefficient, we can apply a specialized frobenius directly // (a·ω⁴)ᵖ = a̅·γ14·ω⁴, (b·ω³)ᵖ = b̅·γ13·ω³ - x = conjugate_fp2_bls12_381(&x); - x = scalar_mul_fp2_bls12_381(&x, &FROBENIUS_GAMMA14); - y = conjugate_fp2_bls12_381(&y); - y = mul_fp2_bls12_381(&y, &FROBENIUS_GAMMA13); + x = conjugate_fp2_bls12_381( + &x, + #[cfg(feature = "hints")] + hints, + ); + x = scalar_mul_fp2_bls12_381( + &x, + &FROBENIUS_GAMMA14, + #[cfg(feature = "hints")] + hints, + ); + y = conjugate_fp2_bls12_381( + &y, + #[cfg(feature = "hints")] + hints, + ); + y = mul_fp2_bls12_381( + &y, + &FROBENIUS_GAMMA13, + #[cfg(feature = "hints")] + hints, + ); // 3] Compute 𝜑⁻¹(a,b) = (a·ω²,b·ω³) ∈ E'(Fp2). In our particular case, we have: // 𝜑⁻¹((a̅·γ14·ω⁴)·ω²,(b̅·γ13·ω³)·ω³) = (a̅·γ14·(1+u), b̅·γ13·(1+u)) - x = mul_fp2_bls12_381(&x, &EXT_U); - y = mul_fp2_bls12_381(&y, &EXT_U); + x = mul_fp2_bls12_381( + &x, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); + y = mul_fp2_bls12_381( + &y, + &EXT_U, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0u64; 24]; result[0..12].copy_from_slice(&x); result[12..24].copy_from_slice(&y); result } + +/// G2 point addition for uncompressed 192-byte points (big-endian format) +/// +/// Input format: 192 bytes per point = 96 bytes x-coordinate (Fp2) + 96 bytes y-coordinate (Fp2) +/// Each Fp2 element: 48 bytes imaginary + 48 bytes real (big-endian) +/// Output format: Same as input +/// +/// ### Safety +/// - `a` must point to a valid `[u8; 192]` for the first input point +/// - `b` must point to a valid `[u8; 192]` for the second input point +/// - `ret` must point to a valid `[u8; 192]` for the output +/// +/// Returns: +/// - 0 = success (regular point) +/// - 1 = success (point at infinity) +/// - 2 = error (point not on curve) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bls12_381_g2_add_c")] +pub unsafe extern "C" fn bls12_381_g2_add_c( + ret: *mut u8, + a: *const u8, + b: *const u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let a_bytes: &[u8; 192] = &*(a as *const [u8; 192]); + let b_bytes: &[u8; 192] = &*(b as *const [u8; 192]); + let ret_bytes: &mut [u8; 192] = &mut *(ret as *mut [u8; 192]); + + // Parse points + let a_u64 = g2_bytes_be_to_u64_le_bls12_381(a_bytes); + let b_u64 = g2_bytes_be_to_u64_le_bls12_381(b_bytes); + + // Perform addition + let result = match add_complete_twist_bls12_381( + &a_u64, + &b_u64, + #[cfg(feature = "hints")] + hints, + ) { + Ok(r) => r, + Err(code) => return code, + }; + + // Encode result + if result == G2_IDENTITY { + G2_ADD_SUCCESS_INFINITY + } else { + g2_u64_le_to_bytes_be_bls12_381(&result, ret_bytes); + G2_ADD_SUCCESS + } +} + +/// G2 Multi-Scalar Multiplication (MSM) for uncompressed points (big-endian format) +/// +/// Input format per pair: 224 bytes = 192 bytes G2 point + 32 bytes scalar (big-endian) +/// Output format: 192 bytes G2 point +/// +/// ### Safety +/// - `pairs` must point to an array of `num_pairs * 224` bytes +/// - `ret` must point to a valid `[u8; 192]` for the output +/// +/// Returns: +/// - 0 = success (regular point) +/// - 1 = success (point at infinity) +/// - 2 = error (point not on curve) +/// - 3 = error (point not in subgroup) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bls12_381_g2_msm_c")] +pub unsafe extern "C" fn bls12_381_g2_msm_c( + ret: *mut u8, + pairs: *const u8, + num_pairs: usize, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let ret_bytes: &mut [u8; 192] = &mut *(ret as *mut [u8; 192]); + + // Parse all pairs + let mut points = Vec::with_capacity(num_pairs); + let mut scalars = Vec::with_capacity(num_pairs); + for i in 0..num_pairs { + let pair_ptr = pairs.add(i * 224); + let point_bytes: &[u8; 192] = &*(pair_ptr as *const [u8; 192]); + let scalar_bytes: &[u8; 32] = &*(pair_ptr.add(192) as *const [u8; 32]); + + // Parse point and scalar + let point_u64 = g2_bytes_be_to_u64_le_bls12_381(point_bytes); + let scalar_u64 = scalar_bytes_be_to_u64_le_bls12_381(scalar_bytes); + + points.push(point_u64); + scalars.push(scalar_u64); + } + + // Perform MSM with validation + let result = match msm_complete_twist_bls12_381( + &points, + &scalars, + #[cfg(feature = "hints")] + hints, + ) { + Ok(r) => r, + Err(code) => return code, + }; + + // Encode result + if result == G2_IDENTITY { + G2_MSM_SUCCESS_INFINITY + } else { + g2_u64_le_to_bytes_be_bls12_381(&result, ret_bytes); + G2_MSM_SUCCESS + } +} + +/// Convert 192-byte big-endian G2 point to [u64; 24] little-endian +pub fn g2_bytes_be_to_u64_le_bls12_381(bytes: &[u8; 192]) -> [u64; 24] { + let mut result = [0u64; 24]; + + // x_r (bytes 0-47) -> result[0..6] + for i in 0..6 { + for j in 0..8 { + result[5 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // x_i (bytes 48-95) -> result[6..12] + for i in 0..6 { + for j in 0..8 { + result[11 - i] |= (bytes[48 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // y_r (bytes 96-143) -> result[12..18] + for i in 0..6 { + for j in 0..8 { + result[17 - i] |= (bytes[96 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // y_i (bytes 144-191) -> result[18..24] + for i in 0..6 { + for j in 0..8 { + result[23 - i] |= (bytes[144 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + result +} + +/// Convert [u64; 24] little-endian G2 point to 192-byte big-endian +pub fn g2_u64_le_to_bytes_be_bls12_381(limbs: &[u64; 24], bytes: &mut [u8; 192]) { + // x_r (limbs[0..6]) -> bytes 0-47 + for i in 0..6 { + let limb = limbs[5 - i]; + for j in 0..8 { + bytes[i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xFF) as u8; + } + } + + // x_i (limbs[6..12]) -> bytes 48-95 + for i in 0..6 { + let limb = limbs[11 - i]; + for j in 0..8 { + bytes[48 + i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xFF) as u8; + } + } + + // y_r (limbs[12..18]) -> bytes 96-143 + for i in 0..6 { + let limb = limbs[17 - i]; + for j in 0..8 { + bytes[96 + i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xFF) as u8; + } + } + + // y_i (limbs[18..24]) -> bytes 144-191 + for i in 0..6 { + let limb = limbs[23 - i]; + for j in 0..8 { + bytes[144 + i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xFF) as u8; + } + } +} diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/constants.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/constants.rs index 10ffae89f..fe2c092e4 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/constants.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/constants.rs @@ -16,18 +16,10 @@ pub const ETWISTED_B: [u64; 8] = [ ]; /// Identity element in G1 -pub const IDENTITY_G1: [u64; 8] = { - let mut tmp = [0u64; 8]; - tmp[4] = 1; - tmp -}; +pub const G1_IDENTITY: [u64; 8] = [0u64; 8]; /// Identity element in G2 -pub const IDENTITY_G2: [u64; 16] = { - let mut tmp = [0u64; 16]; - tmp[8] = 1; - tmp -}; +pub const G2_IDENTITY: [u64; 16] = [0u64; 16]; /// Base field size pub const P: [u64; 4] = @@ -36,6 +28,10 @@ pub const P: [u64; 4] = /// Base field size minus one pub const P_MINUS_ONE: [u64; 4] = [P[0] - 1, P[1], P[2], P[3]]; +/// Scalar field size +pub const R: [u64; 4] = + [0x43E1F593F0000001, 0x2833E84879B97091, 0xB85045B68181585D, 0x30644E72E131A029]; + /// Frobenius operator constant 𝛾₁₁ := (9 + u)^((p-1)/6) pub const FROBENIUS_GAMMA11: [u64; 8] = [ 0xD60B35DADCC9E470, diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/curve.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/curve.rs index b710bb790..36166a0f9 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/curve.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/curve.rs @@ -1,62 +1,70 @@ //! Operations on the BN254 curve E: y² = x³ + 3 +use num_traits::ops::bytes; + use crate::{ syscalls::{ syscall_bn254_curve_add, syscall_bn254_curve_dbl, SyscallBn254CurveAddParams, SyscallPoint256, }, - zisklib::{eq, fcall_msb_pos_256}, + zisklib::{eq, fcall_msb_pos_256, is_zero, lt}, }; use super::{ - constants::{E_B, IDENTITY_G1}, - fp::{add_fp_bn254, inv_fp_bn254, mul_fp_bn254, square_fp_bn254}, + constants::{E_B, G1_IDENTITY, P}, + fp::{add_fp_bn254, inv_fp_bn254, mul_fp_bn254, neg_fp_bn254, square_fp_bn254}, + fr::{reduce_fr_bn254, scalar_bytes_be_to_u64_le_bn254}, }; +/// G1 add result codes +const G1_ADD_SUCCESS: u8 = 0; +const G1_ADD_SUCCESS_INFINITY: u8 = 1; +const G1_ADD_ERR_INVALID: u8 = 2; +const G1_ADD_ERR_NOT_ON_CURVE: u8 = 3; + +/// G1 mul result codes +const G1_MUL_SUCCESS: u8 = 0; +const G1_MUL_SUCCESS_INFINITY: u8 = 1; +const G1_MUL_ERR_NOT_IN_FIELD: u8 = 2; +const G1_MUL_ERR_NOT_ON_CURVE: u8 = 3; + /// Check if a non-zero point `p` is on the BN254 curve -pub fn is_on_curve_bn254(p: &[u64; 8]) -> bool { +pub fn is_on_curve_bn254(p: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> bool { let x: [u64; 4] = p[0..4].try_into().unwrap(); let y: [u64; 4] = p[4..8].try_into().unwrap(); // p in E iff y² == x³ + 3 - let lhs = square_fp_bn254(&y); - let mut rhs = square_fp_bn254(&x); - rhs = mul_fp_bn254(&rhs, &x); - rhs = add_fp_bn254(&rhs, &E_B); + let lhs = square_fp_bn254( + &y, + #[cfg(feature = "hints")] + hints, + ); + let mut rhs = square_fp_bn254( + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = mul_fp_bn254( + &rhs, + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = add_fp_bn254( + &rhs, + &E_B, + #[cfg(feature = "hints")] + hints, + ); eq(&lhs, &rhs) } -/// Converts a point `p` on the BN254 curve from Jacobian coordinates to affine coordinates -pub fn to_affine_bn254(p: &[u64; 12]) -> [u64; 8] { - let z: [u64; 4] = p[8..12].try_into().unwrap(); - - if z == [0u64; 4] { - return IDENTITY_G1; - } else if z == [1u64, 0, 0, 0] { - return [p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]]; - } - - let x: [u64; 4] = p[0..4].try_into().unwrap(); - let y: [u64; 4] = p[4..8].try_into().unwrap(); - - let zinv = inv_fp_bn254(&z); - let zinv_sq = square_fp_bn254(&zinv); - - let x_res = mul_fp_bn254(&x, &zinv_sq); - let mut y_res = mul_fp_bn254(&y, &zinv_sq); - y_res = mul_fp_bn254(&y_res, &zinv); - - [x_res[0], x_res[1], x_res[2], x_res[3], y_res[0], y_res[1], y_res[2], y_res[3]] -} - -/// Adds two points `p1` and `p2` on the BN254 curve -pub fn add_bn254(p1: &[u64; 8], p2: &[u64; 8]) -> [u64; 8] { - if *p1 == IDENTITY_G1 { - return *p2; - } else if *p2 == IDENTITY_G1 { - return *p1; - } - +/// Adds two non-zero points `p1` and `p2` on the BN254 curve +pub fn add_bn254( + p1: &[u64; 8], + p2: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { let x1: [u64; 4] = p1[0..4].try_into().unwrap(); let y1: [u64; 4] = p1[4..8].try_into().unwrap(); let x2: [u64; 4] = p2[0..4].try_into().unwrap(); @@ -67,10 +75,14 @@ pub fn add_bn254(p1: &[u64; 8], p2: &[u64; 8]) -> [u64; 8] { // Is y1 == y2? if eq(&y1, &y2) { // Compute the doubling - return dbl_bn254(p1); + return dbl_bn254( + p1, + #[cfg(feature = "hints")] + hints, + ); } else { // Return 𝒪 - return IDENTITY_G1; + return G1_IDENTITY; } } @@ -82,7 +94,11 @@ pub fn add_bn254(p1: &[u64; 8], p2: &[u64; 8]) -> [u64; 8] { // Call the syscall to add the two points let mut params = SyscallBn254CurveAddParams { p1: &mut p1, p2: &p2 }; - syscall_bn254_curve_add(&mut params); + syscall_bn254_curve_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); // Convert the result back to a single array let x3 = params.p1.x; @@ -90,23 +106,126 @@ pub fn add_bn254(p1: &[u64; 8], p2: &[u64; 8]) -> [u64; 8] { [x3[0], x3[1], x3[2], x3[3], y3[0], y3[1], y3[2], y3[3]] } -pub fn dbl_bn254(p: &[u64; 8]) -> [u64; 8] { +/// Addition of two points with validation and identity handling +pub fn add_complete_bn254( + p1: &[u64; 8], + p2: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 8], u8> { + let p1_is_inf = eq(p1, &G1_IDENTITY); + let p2_is_inf = eq(p2, &G1_IDENTITY); + + // Handle identity cases + if p1_is_inf && p2_is_inf { + return Ok(G1_IDENTITY); + } + + if p1_is_inf { + // Validate p2 field elements and curve membership + let x2: [u64; 4] = p2[0..4].try_into().unwrap(); + let y2: [u64; 4] = p2[4..8].try_into().unwrap(); + if !lt(&x2, &P) || !lt(&y2, &P) { + return Err(G1_ADD_ERR_INVALID); + } + if !is_on_curve_bn254( + p2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + return Ok(*p2); + } + + if p2_is_inf { + // Validate p1 field elements and curve membership + let x1: [u64; 4] = p1[0..4].try_into().unwrap(); + let y1: [u64; 4] = p1[4..8].try_into().unwrap(); + if !lt(&x1, &P) || !lt(&y1, &P) { + return Err(G1_ADD_ERR_INVALID); + } + if !is_on_curve_bn254( + p1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + return Ok(*p1); + } + + // Both points are non-identity, validate both + let x1: [u64; 4] = p1[0..4].try_into().unwrap(); + let y1: [u64; 4] = p1[4..8].try_into().unwrap(); + if !lt(&x1, &P) || !lt(&y1, &P) { + return Err(G1_ADD_ERR_INVALID); + } + if !is_on_curve_bn254( + p1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + + let x2: [u64; 4] = p2[0..4].try_into().unwrap(); + let y2: [u64; 4] = p2[4..8].try_into().unwrap(); + if !lt(&x2, &P) || !lt(&y2, &P) { + return Err(G1_ADD_ERR_INVALID); + } + if !is_on_curve_bn254( + p2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_ADD_ERR_NOT_ON_CURVE); + } + + // Perform addition + Ok(add_bn254( + p1, + p2, + #[cfg(feature = "hints")] + hints, + )) +} + +/// Doubles a non-zero point `p` on the BN254 curve +pub fn dbl_bn254(p: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 8] { let mut p1 = SyscallPoint256 { x: p[0..4].try_into().unwrap(), y: p[4..8].try_into().unwrap() }; - syscall_bn254_curve_dbl(&mut p1); + syscall_bn254_curve_dbl( + &mut p1, + #[cfg(feature = "hints")] + hints, + ); [p1.x[0], p1.x[1], p1.x[2], p1.x[3], p1.y[0], p1.y[1], p1.y[2], p1.y[3]] } -/// Multiplies a point `p` on the BN254 curve by a scalar `k` on the BN254 scalar field -pub fn mul_bn254(p: &[u64; 8], k: &[u64; 4]) -> [u64; 8] { - if *p == IDENTITY_G1 { - return IDENTITY_G1; - } +/// Negation of a point +pub fn neg_bn254(p: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 8] { + let x: [u64; 4] = p[0..4].try_into().unwrap(); + let y: [u64; 4] = p[4..8].try_into().unwrap(); + // Compute the negation + let y_neg = neg_fp_bn254( + &y, + #[cfg(feature = "hints")] + hints, + ); + [x[0], x[1], x[2], x[3], y_neg[0], y_neg[1], y_neg[2], y_neg[3]] +} + +/// Multiplies a non-zero point `p` on the BN254 curve by a scalar `k` on the BN254 scalar field +pub fn scalar_mul_bn254( + p: &[u64; 8], + k: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { // Direct cases: k = 0, k = 1, k = 2 match k { [0, 0, 0, 0] => { // Return 𝒪 - return IDENTITY_G1; + return G1_IDENTITY; } [1, 0, 0, 0] => { // Return p @@ -114,7 +233,11 @@ pub fn mul_bn254(p: &[u64; 8], k: &[u64; 4]) -> [u64; 8] { } [2, 0, 0, 0] => { // Return 2p - return dbl_bn254(p); + return dbl_bn254( + p, + #[cfg(feature = "hints")] + hints, + ); } _ => {} } @@ -123,7 +246,12 @@ pub fn mul_bn254(p: &[u64; 8], k: &[u64; 4]) -> [u64; 8] { // Hint the length the binary representations of k // We will verify the output by recomposing k // Moreover, we should check that the first received bit is 1 - let (max_limb, max_bit) = fcall_msb_pos_256(k, &[0, 0, 0, 0]); + let (max_limb, max_bit) = fcall_msb_pos_256( + k, + &[0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // Perform the loop, based on the binary representation of k @@ -156,13 +284,21 @@ pub fn mul_bn254(p: &[u64; 8], k: &[u64; 4]) -> [u64; 8] { for i in (0..=limb).rev() { for j in (0..=bit).rev() { // Always double - syscall_bn254_curve_dbl(&mut q); + syscall_bn254_curve_dbl( + &mut q, + #[cfg(feature = "hints")] + hints, + ); // Get the next bit b of k. // If b == 1, we should add P to Q, otherwise start the next iteration if ((k[i] >> j) & 1) == 1 { let mut params = SyscallBn254CurveAddParams { p1: &mut q, p2: &p }; - syscall_bn254_curve_add(&mut params); + syscall_bn254_curve_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); // Reconstruct k k_rec[i] |= 1 << j; @@ -180,82 +316,178 @@ pub fn mul_bn254(p: &[u64; 8], k: &[u64; 4]) -> [u64; 8] { [x3[0], x3[1], x3[2], x3[3], y3[0], y3[1], y3[2], y3[3]] } -/// # Safety -/// `p` must point to a valid `[u64; 8]` (64 bytes, affine G1 point). -#[no_mangle] -pub unsafe extern "C" fn is_on_curve_bn254_c(p_ptr: *const u64) -> bool { - let p = unsafe { &*(p_ptr as *const [u64; 8]) }; - is_on_curve_bn254(p) +/// Scalar multiplication with validation and identity handling +pub fn mul_complete_bn254( + p: &[u64; 8], + k: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 8], u8> { + // If point is infinity, result is infinity + if eq(p, &G1_IDENTITY) { + return Ok(G1_IDENTITY); + } + + // Point is not infinity, validate field elements and curve membership + let x: [u64; 4] = p[0..4].try_into().unwrap(); + let y: [u64; 4] = p[4..8].try_into().unwrap(); + + if !lt(&x, &P) || !lt(&y, &P) { + return Err(G1_MUL_ERR_NOT_IN_FIELD); + } + + if !is_on_curve_bn254( + p, + #[cfg(feature = "hints")] + hints, + ) { + return Err(G1_MUL_ERR_NOT_ON_CURVE); + } + + // Reduce the scalar + let k = reduce_fr_bn254( + k, + #[cfg(feature = "hints")] + hints, + ); + + // Perform scalar multiplication + Ok(scalar_mul_bn254( + p, + &k, + #[cfg(feature = "hints")] + hints, + )) } +/// BN254 G1 point addition with big-endian byte format +/// /// # Safety -/// - `p` must point to a valid `[u64; 12]` (96 bytes, Jacobian G1 point). -/// - `out` must point to a valid `[u64; 8]` (64 bytes) writable buffer. -#[no_mangle] -pub unsafe extern "C" fn to_affine_bn254_c(p_ptr: *const u64, out_ptr: *mut u64) -> bool { - let p = unsafe { &*(p_ptr as *const [u64; 12]) }; - let result = to_affine_bn254(p); - - *out_ptr.add(0) = result[0]; - *out_ptr.add(1) = result[1]; - *out_ptr.add(2) = result[2]; - *out_ptr.add(3) = result[3]; - *out_ptr.add(4) = result[4]; - *out_ptr.add(5) = result[5]; - *out_ptr.add(6) = result[6]; - *out_ptr.add(7) = result[7]; - - result == IDENTITY_G1 +/// - `p1` must point to at least 64 bytes +/// - `p2` must point to at least 64 bytes +/// - `result` must point to a writable buffer of at least 64 bytes +/// +/// # Returns +/// - 0 if the operation succeeded +/// - 1 if p1 is invalid (not on curve or invalid field element) +/// - 2 if p2 is invalid (not on curve or invalid field element) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bn254_g1_add_c")] +pub unsafe extern "C" fn bn254_g1_add_c( + p1: *const u8, + p2: *const u8, + ret: *mut u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let p1_bytes: &[u8; 64] = &*(p1 as *const [u8; 64]); + let p2_bytes: &[u8; 64] = &*(p2 as *const [u8; 64]); + let ret_bytes: &mut [u8; 64] = &mut *(ret as *mut [u8; 64]); + + // Convert to internal format + let p1_u64 = g1_bytes_be_to_u64_le_bn254(p1_bytes); + let p2_u64 = g1_bytes_be_to_u64_le_bn254(p2_bytes); + + // Perform addition with validation + let result = match add_complete_bn254( + &p1_u64, + &p2_u64, + #[cfg(feature = "hints")] + hints, + ) { + Ok(r) => r, + Err(code) => return code, + }; + + // Encode result + if result == G1_IDENTITY { + G1_ADD_SUCCESS_INFINITY + } else { + g1_u64_le_to_bytes_be_bn254(&result, ret_bytes); + G1_ADD_SUCCESS + } } +/// BN254 G1 scalar multiplication with big-endian byte format +/// /// # Safety -/// - `p1_ptr` must point to a valid `[u64; 8]` (64 bytes, affine G1 point). -/// - `p2_ptr` must point to a valid `[u64; 8]` (64 bytes, affine G1 point). -/// - `out_ptr` must point to a valid `[u64; 8]` (64 bytes) writable buffer. -#[no_mangle] -pub unsafe extern "C" fn add_bn254_c( - p1_ptr: *const u64, - p2_ptr: *const u64, - out_ptr: *mut u64, -) -> bool { - let p1 = unsafe { &*(p1_ptr as *const [u64; 8]) }; - let p2 = unsafe { &*(p2_ptr as *const [u64; 8]) }; - let result = add_bn254(p1, p2); - - *out_ptr.add(0) = result[0]; - *out_ptr.add(1) = result[1]; - *out_ptr.add(2) = result[2]; - *out_ptr.add(3) = result[3]; - *out_ptr.add(4) = result[4]; - *out_ptr.add(5) = result[5]; - *out_ptr.add(6) = result[6]; - *out_ptr.add(7) = result[7]; - - result == IDENTITY_G1 +/// - `point` must point to at least 64 bytes +/// - `scalar` must point to at least 32 bytes +/// - `result` must point to a writable buffer of at least 64 bytes +/// +/// # Returns +/// - 0 if the operation succeeded +/// - 1 if point is invalid (not on curve or invalid field element) +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bn254_g1_mul_c")] +pub unsafe extern "C" fn bn254_g1_mul_c( + point: *const u8, + scalar: *const u8, + ret: *mut u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let point_bytes: &[u8; 64] = &*(point as *const [u8; 64]); + let scalar_bytes: &[u8; 32] = &*(scalar as *const [u8; 32]); + let ret_bytes: &mut [u8; 64] = &mut *(ret as *mut [u8; 64]); + + // Convert to internal format + let point_u64 = g1_bytes_be_to_u64_le_bn254(point_bytes); + let scalar_u64 = scalar_bytes_be_to_u64_le_bn254(scalar_bytes); + + // Perform scalar multiplication with validation + let product = match mul_complete_bn254( + &point_u64, + &scalar_u64, + #[cfg(feature = "hints")] + hints, + ) { + Ok(r) => r, + Err(code) => return code, + }; + + // Encode result + if product == G1_IDENTITY { + G1_MUL_SUCCESS_INFINITY + } else { + g1_u64_le_to_bytes_be_bn254(&product, ret_bytes); + G1_MUL_SUCCESS + } } -/// # Safety -/// - `p_ptr` must point to a valid `[u64; 8]` (64 bytes, affine G1 point). -/// - `k_ptr` must point to a valid `[u64; 4]` (32 bytes, scalar). -/// - `out_ptr` must point to a valid `[u64; 8]` (64 bytes) writable buffer. -#[no_mangle] -pub unsafe extern "C" fn mul_bn254_c( - p_ptr: *const u64, - k_ptr: *const u64, - out_ptr: *mut u64, -) -> bool { - let p = unsafe { &*(p_ptr as *const [u64; 8]) }; - let k = unsafe { &*(k_ptr as *const [u64; 4]) }; - let result = mul_bn254(p, k); - - *out_ptr.add(0) = result[0]; - *out_ptr.add(1) = result[1]; - *out_ptr.add(2) = result[2]; - *out_ptr.add(3) = result[3]; - *out_ptr.add(4) = result[4]; - *out_ptr.add(5) = result[5]; - *out_ptr.add(6) = result[6]; - *out_ptr.add(7) = result[7]; - - result == IDENTITY_G1 +/// Convert 64-byte big-endian G1 point to [u64; 8] little-endian +pub fn g1_bytes_be_to_u64_le_bn254(bytes: &[u8; 64]) -> [u64; 8] { + let mut result = [0u64; 8]; + + // x-coordinate (first 32 bytes) + for i in 0..4 { + for j in 0..8 { + result[3 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // y-coordinate (next 32 bytes) + for i in 0..4 { + for j in 0..8 { + result[7 - i] |= (bytes[32 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + result +} + +/// Convert little-endian u64 limbs to big-endian bytes for a G1 point ([u64; 8] -> 64 bytes) +fn g1_u64_le_to_bytes_be_bn254(limbs: &[u64; 8], bytes: &mut [u8; 64]) { + // Encode x coordinate (first 32 bytes, big-endian) + for i in 0..4 { + let limb = limbs[3 - i]; + for j in 0..8 { + bytes[i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xff) as u8; + } + } + + // Encode y coordinate (next 32 bytes, big-endian) + for i in 0..4 { + let limb = limbs[7 - i]; + for j in 0..8 { + bytes[32 + i * 8 + j] = ((limb >> (8 * (7 - j))) & 0xff) as u8; + } + } } diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/cyclotomic.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/cyclotomic.rs index b54e3a55c..771a94379 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/cyclotomic.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/cyclotomic.rs @@ -47,7 +47,10 @@ pub fn compress_cyclo_bn254(a: &[u64; 48]) -> [u64; 32] { /// **NOTE**: If the input is not of the form C(a), where a ∈ GΦ6(p²), then the compression-decompression /// technique is not well defined. This means that D(C(a)) != a. #[inline] -pub fn decompress_cyclo_bn254(a: &[u64; 32]) -> [u64; 48] { +pub fn decompress_cyclo_bn254( + a: &[u64; 32], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a2: &[u64; 8] = &a[0..8].try_into().unwrap(); let a3: &[u64; 8] = &a[8..16].try_into().unwrap(); let a4: &[u64; 8] = &a[16..24].try_into().unwrap(); @@ -55,39 +58,179 @@ pub fn decompress_cyclo_bn254(a: &[u64; 32]) -> [u64; 48] { let (a0, a1) = if eq(a2, &[0, 0, 0, 0, 0, 0, 0, 0]) { // a1 = (2·a4·a5)/a3 - let a3_inv = inv_fp2_bn254(a3); - let mut a1 = mul_fp2_bn254(a4, a5); - a1 = dbl_fp2_bn254(&a1); - a1 = mul_fp2_bn254(&a1, &a3_inv); + let a3_inv = inv_fp2_bn254( + a3, + #[cfg(feature = "hints")] + hints, + ); + let mut a1 = mul_fp2_bn254( + a4, + a5, + #[cfg(feature = "hints")] + hints, + ); + a1 = dbl_fp2_bn254( + &a1, + #[cfg(feature = "hints")] + hints, + ); + a1 = mul_fp2_bn254( + &a1, + &a3_inv, + #[cfg(feature = "hints")] + hints, + ); // a0 = (2·a1² - 3·a3·a4)(9+u) + 1 - let a3a4 = mul_fp2_bn254(a3, a4); - let mut a0 = square_fp2_bn254(&a1); - a0 = dbl_fp2_bn254(&a0); - a0 = sub_fp2_bn254(&a0, &scalar_mul_fp2_bn254(&a3a4, &[3, 0, 0, 0])); - a0 = mul_fp2_bn254(&a0, &[9, 0, 0, 0, 1, 0, 0, 0]); - a0 = add_fp2_bn254(&a0, &[1, 0, 0, 0, 0, 0, 0, 0]); + let a3a4 = mul_fp2_bn254( + a3, + a4, + #[cfg(feature = "hints")] + hints, + ); + let mut a0 = square_fp2_bn254( + &a1, + #[cfg(feature = "hints")] + hints, + ); + a0 = dbl_fp2_bn254( + &a0, + #[cfg(feature = "hints")] + hints, + ); + a0 = sub_fp2_bn254( + &a0, + &scalar_mul_fp2_bn254( + &a3a4, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + a0 = mul_fp2_bn254( + &a0, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + a0 = add_fp2_bn254( + &a0, + &[1, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); (a0, a1) } else { // a1 = (a5²·(9+u) + 3·a4² - 2·a3)/(4·a2) - let a2_inv = inv_fp2_bn254(&scalar_mul_fp2_bn254(a2, &[4, 0, 0, 0])); - let a4_sq = square_fp2_bn254(a4); - let mut a1 = square_fp2_bn254(a5); - a1 = mul_fp2_bn254(&a1, &[9, 0, 0, 0, 1, 0, 0, 0]); - a1 = add_fp2_bn254(&a1, &scalar_mul_fp2_bn254(&a4_sq, &[3, 0, 0, 0])); - a1 = sub_fp2_bn254(&a1, &dbl_fp2_bn254(a3)); - a1 = mul_fp2_bn254(&a1, &a2_inv); + let a2_inv = inv_fp2_bn254( + &scalar_mul_fp2_bn254( + a2, + &[4, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + let a4_sq = square_fp2_bn254( + a4, + #[cfg(feature = "hints")] + hints, + ); + let mut a1 = square_fp2_bn254( + a5, + #[cfg(feature = "hints")] + hints, + ); + a1 = mul_fp2_bn254( + &a1, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + a1 = add_fp2_bn254( + &a1, + &scalar_mul_fp2_bn254( + &a4_sq, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + a1 = sub_fp2_bn254( + &a1, + &dbl_fp2_bn254( + a3, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + a1 = mul_fp2_bn254( + &a1, + &a2_inv, + #[cfg(feature = "hints")] + hints, + ); // a0 = (2·a1² + a2·a5 - 3·a3·a4)(9+u) + 1 - let a3a4 = mul_fp2_bn254(a3, a4); - let a2a5 = mul_fp2_bn254(a2, a5); - let mut a0 = square_fp2_bn254(&a1); - a0 = dbl_fp2_bn254(&a0); - a0 = add_fp2_bn254(&a0, &a2a5); - a0 = sub_fp2_bn254(&a0, &scalar_mul_fp2_bn254(&a3a4, &[3, 0, 0, 0])); - a0 = mul_fp2_bn254(&a0, &[9, 0, 0, 0, 1, 0, 0, 0]); - a0 = add_fp2_bn254(&a0, &[1, 0, 0, 0, 0, 0, 0, 0]); + let a3a4 = mul_fp2_bn254( + a3, + a4, + #[cfg(feature = "hints")] + hints, + ); + let a2a5 = mul_fp2_bn254( + a2, + a5, + #[cfg(feature = "hints")] + hints, + ); + let mut a0 = square_fp2_bn254( + &a1, + #[cfg(feature = "hints")] + hints, + ); + a0 = dbl_fp2_bn254( + &a0, + #[cfg(feature = "hints")] + hints, + ); + a0 = add_fp2_bn254( + &a0, + &a2a5, + #[cfg(feature = "hints")] + hints, + ); + a0 = sub_fp2_bn254( + &a0, + &scalar_mul_fp2_bn254( + &a3a4, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + a0 = mul_fp2_bn254( + &a0, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + a0 = add_fp2_bn254( + &a0, + &[1, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); (a0, a1) }; @@ -117,46 +260,180 @@ pub fn decompress_cyclo_bn254(a: &[u64; 32]) -> [u64; 48] { // - B45 = a4·a5 // /// **NOTE**: The output is not guaranteed to be in GΦ6(p²), if the input isn't. -pub fn square_cyclo_bn254(a: &[u64; 32]) -> [u64; 32] { +pub fn square_cyclo_bn254( + a: &[u64; 32], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 32] { let a2: &[u64; 8] = &a[0..8].try_into().unwrap(); let a3: &[u64; 8] = &a[8..16].try_into().unwrap(); let a4: &[u64; 8] = &a[16..24].try_into().unwrap(); let a5: &[u64; 8] = &a[24..32].try_into().unwrap(); // B23 = a2·a3, B45 = a4·a5 - let b23 = mul_fp2_bn254(a2, a3); - let b45 = mul_fp2_bn254(a4, a5); + let b23 = mul_fp2_bn254( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ); + let b45 = mul_fp2_bn254( + a4, + a5, + #[cfg(feature = "hints")] + hints, + ); // A23 = (a2 + a3)·(a2 + (9+u)·a3) - let a3xi = mul_fp2_bn254(a3, &[9, 0, 0, 0, 1, 0, 0, 0]); - let a23 = mul_fp2_bn254(&add_fp2_bn254(a2, a3), &add_fp2_bn254(a2, &a3xi)); + let a3xi = mul_fp2_bn254( + a3, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + let a23 = mul_fp2_bn254( + &add_fp2_bn254( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ), + &add_fp2_bn254( + a2, + &a3xi, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // A45 = (a4 + a5)·(a4 + (9+u)·a5) - let a5xi = mul_fp2_bn254(a5, &[9, 0, 0, 0, 1, 0, 0, 0]); - let a45 = mul_fp2_bn254(&add_fp2_bn254(a4, a5), &add_fp2_bn254(a4, &a5xi)); + let a5xi = mul_fp2_bn254( + a5, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + let a45 = mul_fp2_bn254( + &add_fp2_bn254( + a4, + a5, + #[cfg(feature = "hints")] + hints, + ), + &add_fp2_bn254( + a4, + &a5xi, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // b2 = 2(a2 + 3·(9+u)·B45) - let mut b2 = mul_fp2_bn254(&b45, &[9, 0, 0, 0, 1, 0, 0, 0]); - b2 = scalar_mul_fp2_bn254(&b2, &[3, 0, 0, 0]); - b2 = add_fp2_bn254(a2, &b2); - b2 = dbl_fp2_bn254(&b2); + let mut b2 = mul_fp2_bn254( + &b45, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b2 = scalar_mul_fp2_bn254( + &b2, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b2 = add_fp2_bn254( + a2, + &b2, + #[cfg(feature = "hints")] + hints, + ); + b2 = dbl_fp2_bn254( + &b2, + #[cfg(feature = "hints")] + hints, + ); // b3 = 3·(A45 - (10+u)·B45) - 2·a3 - let mut b3 = mul_fp2_bn254(&b45, &[10, 0, 0, 0, 1, 0, 0, 0]); - b3 = sub_fp2_bn254(&a45, &b3); - b3 = scalar_mul_fp2_bn254(&b3, &[3, 0, 0, 0]); - b3 = sub_fp2_bn254(&b3, &dbl_fp2_bn254(a3)); + let mut b3 = mul_fp2_bn254( + &b45, + &[10, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b3 = sub_fp2_bn254( + &a45, + &b3, + #[cfg(feature = "hints")] + hints, + ); + b3 = scalar_mul_fp2_bn254( + &b3, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b3 = sub_fp2_bn254( + &b3, + &dbl_fp2_bn254( + a3, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // b4 = 3·(A23 - (10+u)·B23) - 2·a4 - let mut b4 = mul_fp2_bn254(&b23, &[10, 0, 0, 0, 1, 0, 0, 0]); - b4 = sub_fp2_bn254(&a23, &b4); - b4 = scalar_mul_fp2_bn254(&b4, &[3, 0, 0, 0]); - b4 = sub_fp2_bn254(&b4, &dbl_fp2_bn254(a4)); + let mut b4 = mul_fp2_bn254( + &b23, + &[10, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b4 = sub_fp2_bn254( + &a23, + &b4, + #[cfg(feature = "hints")] + hints, + ); + b4 = scalar_mul_fp2_bn254( + &b4, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b4 = sub_fp2_bn254( + &b4, + &dbl_fp2_bn254( + a4, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // b5 = 2·(a5 + 3·B23) - let mut b5 = scalar_mul_fp2_bn254(&b23, &[3, 0, 0, 0]); - b5 = add_fp2_bn254(a5, &b5); - b5 = dbl_fp2_bn254(&b5); + let mut b5 = scalar_mul_fp2_bn254( + &b23, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + b5 = add_fp2_bn254( + a5, + &b5, + #[cfg(feature = "hints")] + hints, + ); + b5 = dbl_fp2_bn254( + &b5, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 32]; result[0..8].copy_from_slice(&b2); @@ -173,7 +450,10 @@ pub fn square_cyclo_bn254(a: &[u64; 32]) -> [u64; 32] { // out: a^x = (a0 + a4·v + a3·v²) + (a2 + a1·v + a5·v²)·w ∈ ∈ GΦ6(p²) // /// **NOTE**: The output is not guaranteed to be in GΦ6(p²), if the input isn't. -pub fn exp_by_x_cyclo_bn254(a: &[u64; 48]) -> [u64; 48] { +pub fn exp_by_x_cyclo_bn254( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { // Binary representation of the exponent x = 4965661367192848881 in big-endian format const X_BIN_LE: [u8; 63] = [ 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, @@ -188,12 +468,25 @@ pub fn exp_by_x_cyclo_bn254(a: &[u64; 48]) -> [u64; 48] { let mut comp = compress_cyclo_bn254(a); for &bit in X_BIN_LE.iter().skip(1) { // We always square (in compressed form): C(c²) - comp = square_cyclo_bn254(&comp); + comp = square_cyclo_bn254( + &comp, + #[cfg(feature = "hints")] + hints, + ); if bit == 1 { // decompress and multiply - let decomp = decompress_cyclo_bn254(&comp); - result = mul_fp12_bn254(&result, &decomp); + let decomp = decompress_cyclo_bn254( + &comp, + #[cfg(feature = "hints")] + hints, + ); + result = mul_fp12_bn254( + &result, + &decomp, + #[cfg(feature = "hints")] + hints, + ); } } diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/final_exp.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/final_exp.rs index 8a6237807..451fee49a 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/final_exp.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/final_exp.rs @@ -12,90 +12,249 @@ use super::{ // However, I dont think its a good idea in general to optimize verification "at all costs". /// Given f ∈ Fp12*, computes f^((p¹²-1)/r) ∈ Fp12* -pub fn final_exp_bn254(f: &[u64; 48]) -> [u64; 48] { +pub fn final_exp_bn254(f: &[u64; 48], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 48] { ////////////////// // The easy part: exp by (p^6-1)(p^2+1) ////////////////// // f^(p^6-1) = f̅·f⁻¹ - let f_conj = conjugate_fp12_bn254(f); - let f_inv = inv_fp12_bn254(f); - let easy1 = mul_fp12_bn254(&f_conj, &f_inv); + let f_conj = conjugate_fp12_bn254( + f, + #[cfg(feature = "hints")] + hints, + ); + let f_inv = inv_fp12_bn254( + f, + #[cfg(feature = "hints")] + hints, + ); + let easy1 = mul_fp12_bn254( + &f_conj, + &f_inv, + #[cfg(feature = "hints")] + hints, + ); // easy1^(p²-1) = easy1^p²·easy1 - let mut m = frobenius2_fp12_bn254(&easy1); - m = mul_fp12_bn254(&m, &easy1); + let mut m = frobenius2_fp12_bn254( + &easy1, + #[cfg(feature = "hints")] + hints, + ); + m = mul_fp12_bn254( + &m, + &easy1, + #[cfg(feature = "hints")] + hints, + ); ////////////////// // The hard part: exp by (p⁴-p²+1)/r ////////////////// // m^x, (m^x)^x, (m^{x²})^x - let mx = exp_by_x_cyclo_bn254(&m); - let mxx = exp_by_x_cyclo_bn254(&mx); - let mxxx = exp_by_x_cyclo_bn254(&mxx); + let mx = exp_by_x_cyclo_bn254( + &m, + #[cfg(feature = "hints")] + hints, + ); + let mxx = exp_by_x_cyclo_bn254( + &mx, + #[cfg(feature = "hints")] + hints, + ); + let mxxx = exp_by_x_cyclo_bn254( + &mxx, + #[cfg(feature = "hints")] + hints, + ); // m^p, m^p², m^p³, (m^x)^p, (m^x²)^p, (m^x³)^p, (m^x²)^p² - let mp = frobenius1_fp12_bn254(&m); - let mpp = frobenius2_fp12_bn254(&m); - let mppp = frobenius3_fp12_bn254(&m); - let mxp = frobenius1_fp12_bn254(&mx); - let mxxp = frobenius1_fp12_bn254(&mxx); - let mxxxp = frobenius1_fp12_bn254(&mxxx); - let mxxpp = frobenius2_fp12_bn254(&mxx); + let mp = frobenius1_fp12_bn254( + &m, + #[cfg(feature = "hints")] + hints, + ); + let mpp = frobenius2_fp12_bn254( + &m, + #[cfg(feature = "hints")] + hints, + ); + let mppp = frobenius3_fp12_bn254( + &m, + #[cfg(feature = "hints")] + hints, + ); + let mxp = frobenius1_fp12_bn254( + &mx, + #[cfg(feature = "hints")] + hints, + ); + let mxxp = frobenius1_fp12_bn254( + &mxx, + #[cfg(feature = "hints")] + hints, + ); + let mxxxp = frobenius1_fp12_bn254( + &mxxx, + #[cfg(feature = "hints")] + hints, + ); + let mxxpp = frobenius2_fp12_bn254( + &mxx, + #[cfg(feature = "hints")] + hints, + ); // y1 = m^p·m^p²·m^p³ - let mut y1 = mul_fp12_bn254(&mp, &mpp); - y1 = mul_fp12_bn254(&y1, &mppp); + let mut y1 = mul_fp12_bn254( + &mp, + &mpp, + #[cfg(feature = "hints")] + hints, + ); + y1 = mul_fp12_bn254( + &y1, + &mppp, + #[cfg(feature = "hints")] + hints, + ); // y2 = m̅ - let y2 = conjugate_fp12_bn254(&m); + let y2 = conjugate_fp12_bn254( + &m, + #[cfg(feature = "hints")] + hints, + ); // y3 = (m^x²)^p² (already done) // y4 = \bar{(m^x)^p} - let y4 = conjugate_fp12_bn254(&mxp); + let y4 = conjugate_fp12_bn254( + &mxp, + #[cfg(feature = "hints")] + hints, + ); // y5 = \bar{m^x·(m^x²)^p} - let mut y5 = mul_fp12_bn254(&mx, &mxxp); - y5 = conjugate_fp12_bn254(&y5); - + let mut y5 = mul_fp12_bn254( + &mx, + &mxxp, + #[cfg(feature = "hints")] + hints, + ); + y5 = conjugate_fp12_bn254( + &y5, + #[cfg(feature = "hints")] + hints, + ); // y6 = \bar{m^x²} - let y6 = conjugate_fp12_bn254(&mxx); + let y6 = conjugate_fp12_bn254( + &mxx, + #[cfg(feature = "hints")] + hints, + ); // y7 = \bar{m^x³·(m^x³)^p} - let mut y7 = mul_fp12_bn254(&mxxx, &mxxxp); - y7 = conjugate_fp12_bn254(&y7); - + let mut y7 = mul_fp12_bn254( + &mxxx, + &mxxxp, + #[cfg(feature = "hints")] + hints, + ); + y7 = conjugate_fp12_bn254( + &y7, + #[cfg(feature = "hints")] + hints, + ); // Compute y1·y2²·y3⁶·y4¹²·y5¹⁸·y6³⁰·y7³⁶ as follows // T11 = y7²·y5·y6 - let mut t11 = square_fp12_bn254(&y7); - t11 = mul_fp12_bn254(&t11, &y5); - t11 = mul_fp12_bn254(&t11, &y6); + let mut t11 = square_fp12_bn254( + &y7, + #[cfg(feature = "hints")] + hints, + ); + t11 = mul_fp12_bn254( + &t11, + &y5, + #[cfg(feature = "hints")] + hints, + ); + t11 = mul_fp12_bn254( + &t11, + &y6, + #[cfg(feature = "hints")] + hints, + ); // T21 = T11·y4·y6 - let mut t21 = mul_fp12_bn254(&t11, &y4); - t21 = mul_fp12_bn254(&t21, &y6); + let mut t21 = mul_fp12_bn254( + &t11, + &y4, + #[cfg(feature = "hints")] + hints, + ); + t21 = mul_fp12_bn254( + &t21, + &y6, + #[cfg(feature = "hints")] + hints, + ); // T12 = T11·y3 - let t12 = mul_fp12_bn254(&t11, &mxxpp); - + let t12 = mul_fp12_bn254( + &t11, + &mxxpp, + #[cfg(feature = "hints")] + hints, + ); // T22 = T21²·T12 - let mut t22 = square_fp12_bn254(&t21); - t22 = mul_fp12_bn254(&t22, &t12); + let mut t22 = square_fp12_bn254( + &t21, + #[cfg(feature = "hints")] + hints, + ); + t22 = mul_fp12_bn254( + &t22, + &t12, + #[cfg(feature = "hints")] + hints, + ); // T23 = T22² - let t23 = square_fp12_bn254(&t22); + let t23 = square_fp12_bn254( + &t22, + #[cfg(feature = "hints")] + hints, + ); // T24 = T23·y1 - let t24 = mul_fp12_bn254(&t23, &y1); + let t24 = mul_fp12_bn254( + &t23, + &y1, + #[cfg(feature = "hints")] + hints, + ); // T13 = T23·y2 - let t13 = mul_fp12_bn254(&t23, &y2); + let t13 = mul_fp12_bn254( + &t23, + &y2, + #[cfg(feature = "hints")] + hints, + ); // T14 = T13²·T24 - let mut t14 = square_fp12_bn254(&t13); - t14 = mul_fp12_bn254(&t14, &t24); - + let mut t14 = square_fp12_bn254( + &t13, + #[cfg(feature = "hints")] + hints, + ); + t14 = mul_fp12_bn254( + &t14, + &t24, + #[cfg(feature = "hints")] + hints, + ); t14 } diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/fp.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/fp.rs index c7323346b..8073bac77 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/fp.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/fp.rs @@ -2,24 +2,32 @@ use crate::{ syscalls::{syscall_arith256_mod, SyscallArith256ModParams}, - zisklib::{eq, fcall_bn254_fp_inv}, + zisklib::{eq, fcall_bn254_fp_inv, lt}, }; use super::constants::{P, P_MINUS_ONE}; /// Addition in the base field of the BN254 curve #[inline] -pub fn add_fp_bn254(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn add_fp_bn254( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·1 + y let mut params = SyscallArith256ModParams { a: x, b: &[1, 0, 0, 0], c: y, module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Negation in the base field of the BN254 curve #[inline] -pub fn neg_fp_bn254(x: &[u64; 4]) -> [u64; 4] { +pub fn neg_fp_bn254(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { // x·(-1) + 0 let mut params = SyscallArith256ModParams { a: x, @@ -28,33 +36,49 @@ pub fn neg_fp_bn254(x: &[u64; 4]) -> [u64; 4] { module: &P, d: &mut [0, 0, 0, 0], }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Multiplication in the base field of the BN254 curve #[inline] -pub fn mul_fp_bn254(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn mul_fp_bn254( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·y + 0 let mut params = SyscallArith256ModParams { a: x, b: y, c: &[0, 0, 0, 0], module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Squaring in the base field of the BN254 curve #[inline] -pub fn square_fp_bn254(x: &[u64; 4]) -> [u64; 4] { +pub fn square_fp_bn254(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { // x·x + 0 let mut params = SyscallArith256ModParams { a: x, b: x, c: &[0, 0, 0, 0], module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } /// Inversion in the base field of the BN254 curve #[inline] -pub fn inv_fp_bn254(x: &[u64; 4]) -> [u64; 4] { +pub fn inv_fp_bn254(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { // if x == 0, return 0 if eq(x, &[0, 0, 0, 0]) { return [0, 0, 0, 0]; @@ -64,7 +88,11 @@ pub fn inv_fp_bn254(x: &[u64; 4]) -> [u64; 4] { // Remember that an element y ∈ Fp is the inverse of x ∈ Fp if and only if x·y = 1 in Fp // We will therefore hint the inverse y and check the product with x is 1 - let inv = fcall_bn254_fp_inv(x); + let inv = fcall_bn254_fp_inv( + x, + #[cfg(feature = "hints")] + hints, + ); // x·y + 0 let mut params = SyscallArith256ModParams { @@ -74,7 +102,11 @@ pub fn inv_fp_bn254(x: &[u64; 4]) -> [u64; 4] { module: &P, d: &mut [0, 0, 0, 0], }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); assert_eq!(*params.d, [1, 0, 0, 0]); inv diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/fp12.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/fp12.rs index 8fea04eb7..b4b76bb9f 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/fp12.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/fp12.rs @@ -24,23 +24,71 @@ use super::{ // - c1 = a1·b1 + a2·b2·v // - c2 = (a1+a2)·(b1+b2) - a1·b1 - a2·b2 #[inline] -pub fn mul_fp12_bn254(a: &[u64; 48], b: &[u64; 48]) -> [u64; 48] { +pub fn mul_fp12_bn254( + a: &[u64; 48], + b: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a1 = &a[0..24].try_into().unwrap(); let a2 = &a[24..48].try_into().unwrap(); let b1 = &b[0..24].try_into().unwrap(); let b2 = &b[24..48].try_into().unwrap(); - let a1b1 = mul_fp6_bn254(a1, b1); - let a2b2 = mul_fp6_bn254(a2, b2); - - let a2b2v = sparse_mula_fp6_bn254(&a2b2, &[1, 0, 0, 0, 0, 0, 0, 0]); - let c1 = add_fp6_bn254(&a1b1, &a2b2v); - - let a1_plus_a2 = add_fp6_bn254(a1, a2); - let b1_plus_b2 = add_fp6_bn254(b1, b2); - let mut c2 = mul_fp6_bn254(&a1_plus_a2, &b1_plus_b2); - c2 = sub_fp6_bn254(&c2, &a1b1); - c2 = sub_fp6_bn254(&c2, &a2b2); + let a1b1 = mul_fp6_bn254( + a1, + b1, + #[cfg(feature = "hints")] + hints, + ); + let a2b2 = mul_fp6_bn254( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ); + + let a2b2v = sparse_mula_fp6_bn254( + &a2b2, + &[1, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + let c1 = add_fp6_bn254( + &a1b1, + &a2b2v, + #[cfg(feature = "hints")] + hints, + ); + let a1_plus_a2 = add_fp6_bn254( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let b1_plus_b2 = add_fp6_bn254( + b1, + b2, + #[cfg(feature = "hints")] + hints, + ); + let mut c2 = mul_fp6_bn254( + &a1_plus_a2, + &b1_plus_b2, + #[cfg(feature = "hints")] + hints, + ); + c2 = sub_fp6_bn254( + &c2, + &a1b1, + #[cfg(feature = "hints")] + hints, + ); + c2 = sub_fp6_bn254( + &c2, + &a2b2, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 48]; result[0..24].copy_from_slice(&c1); @@ -55,15 +103,39 @@ pub fn mul_fp12_bn254(a: &[u64; 48], b: &[u64; 48]) -> [u64; 48] { // - c1 = a1 + a2·(b21·v + b22·v²) // - c2 = a2 + a1·(b21 + b22·v) #[inline] -pub fn sparse_mul_fp12_bn254(a: &[u64; 48], b: &[u64; 16]) -> [u64; 48] { +pub fn sparse_mul_fp12_bn254( + a: &[u64; 48], + b: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a1 = &a[0..24].try_into().unwrap(); let a2 = &a[24..48].try_into().unwrap(); - let mut c1 = sparse_mulc_fp6_bn254(a2, b); - c1 = add_fp6_bn254(&c1, a1); - - let mut c2 = sparse_mulb_fp6_bn254(a1, b); - c2 = add_fp6_bn254(&c2, a2); + let mut c1 = sparse_mulc_fp6_bn254( + a2, + b, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp6_bn254( + &c1, + a1, + #[cfg(feature = "hints")] + hints, + ); + + let mut c2 = sparse_mulb_fp6_bn254( + a1, + b, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp6_bn254( + &c2, + a2, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 48]; result[0..24].copy_from_slice(&c1); @@ -78,24 +150,70 @@ pub fn sparse_mul_fp12_bn254(a: &[u64; 48], b: &[u64; 16]) -> [u64; 48] { // - c1 = (a1-a2)·(a1-a2·v) + a1·a2 + a1·a2·v // - c2 = 2·a1·a2 #[inline] -pub fn square_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { +pub fn square_fp12_bn254( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a1 = &a[0..24].try_into().unwrap(); let a2 = &a[24..48].try_into().unwrap(); // a1·a2, a2·v, a1·a2·v - let a1a2 = mul_fp6_bn254(a1, a2); - let a2v = sparse_mula_fp6_bn254(a2, &[1, 0, 0, 0, 0, 0, 0, 0]); - let a1a2v = sparse_mula_fp6_bn254(&a1a2, &[1, 0, 0, 0, 0, 0, 0, 0]); + let a1a2 = mul_fp6_bn254( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let a2v = sparse_mula_fp6_bn254( + a2, + &[1, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + let a1a2v = sparse_mula_fp6_bn254( + &a1a2, + &[1, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // c1 - let a1_minus_a2 = sub_fp6_bn254(a1, a2); - let a1_minus_a2v = sub_fp6_bn254(a1, &a2v); - let mut c1 = mul_fp6_bn254(&a1_minus_a2, &a1_minus_a2v); - c1 = add_fp6_bn254(&c1, &a1a2); - c1 = add_fp6_bn254(&c1, &a1a2v); - + let a1_minus_a2 = sub_fp6_bn254( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let a1_minus_a2v = sub_fp6_bn254( + a1, + &a2v, + #[cfg(feature = "hints")] + hints, + ); + let mut c1 = mul_fp6_bn254( + &a1_minus_a2, + &a1_minus_a2v, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp6_bn254( + &c1, + &a1a2, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp6_bn254( + &c1, + &a1a2v, + #[cfg(feature = "hints")] + hints, + ); // c2 - let c2 = dbl_fp6_bn254(&a1a2); + let c2 = dbl_fp6_bn254( + &a1a2, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 48]; result[0..24].copy_from_slice(&c1); @@ -110,19 +228,55 @@ pub fn square_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { // - c1 = a1·(a1² - a2²·v)⁻¹ // - c2 = -a2·(a1² - a2²·v)⁻¹ #[inline] -pub fn inv_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { +pub fn inv_fp12_bn254(a: &[u64; 48], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 48] { let a1 = &a[0..24].try_into().unwrap(); let a2 = &a[24..48].try_into().unwrap(); - let a1_sq = square_fp6_bn254(a1); - let a2_sq = square_fp6_bn254(a2); - - let a2_sqv = sparse_mula_fp6_bn254(&a2_sq, &[1, 0, 0, 0, 0, 0, 0, 0]); - let a1_sq_minus_a2_sqv = sub_fp6_bn254(&a1_sq, &a2_sqv); - let inv = inv_fp6_bn254(&a1_sq_minus_a2_sqv); - - let c1 = mul_fp6_bn254(a1, &inv); - let c2 = neg_fp6_bn254(&mul_fp6_bn254(a2, &inv)); + let a1_sq = square_fp6_bn254( + a1, + #[cfg(feature = "hints")] + hints, + ); + let a2_sq = square_fp6_bn254( + a2, + #[cfg(feature = "hints")] + hints, + ); + + let a2_sqv = sparse_mula_fp6_bn254( + &a2_sq, + &[1, 0, 0, 0, 0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + let a1_sq_minus_a2_sqv = sub_fp6_bn254( + &a1_sq, + &a2_sqv, + #[cfg(feature = "hints")] + hints, + ); + let inv = inv_fp6_bn254( + &a1_sq_minus_a2_sqv, + #[cfg(feature = "hints")] + hints, + ); + + let c1 = mul_fp6_bn254( + a1, + &inv, + #[cfg(feature = "hints")] + hints, + ); + let c2 = neg_fp6_bn254( + &mul_fp6_bn254( + a2, + &inv, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 48]; result[0..24].copy_from_slice(&c1); @@ -132,10 +286,17 @@ pub fn inv_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { /// Conjugation in the degree 12 extension of the BN254 curve #[inline] -pub fn conjugate_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { +pub fn conjugate_fp12_bn254( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let mut result = [0; 48]; result[0..24].copy_from_slice(&a[0..24]); - result[24..48].copy_from_slice(&neg_fp6_bn254(&a[24..48].try_into().unwrap())); + result[24..48].copy_from_slice(&neg_fp6_bn254( + &a[24..48].try_into().unwrap(), + #[cfg(feature = "hints")] + hints, + )); result } @@ -146,7 +307,10 @@ pub fn conjugate_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { // - c1 = a̅11 + a̅12·γ12·v + a̅13·γ14·v² // - c2 = a̅21·γ11 + a̅22·γ13·v + a̅23·γ15·v² #[inline] -pub fn frobenius1_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { +pub fn frobenius1_fp12_bn254( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a11 = &a[0..8].try_into().unwrap(); let a12 = &a[8..16].try_into().unwrap(); let a13 = &a[16..24].try_into().unwrap(); @@ -157,19 +321,68 @@ pub fn frobenius1_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { let mut result = [0; 48]; // c1 = a̅11 + a̅12·γ12·v + a̅13·γ14·v² - result[0..8].copy_from_slice(&conjugate_fp2_bn254(a11)); - let mut tmp = conjugate_fp2_bn254(a12); - result[8..16].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA12)); - tmp = conjugate_fp2_bn254(a13); - result[16..24].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA14)); + result[0..8].copy_from_slice(&conjugate_fp2_bn254( + a11, + #[cfg(feature = "hints")] + hints, + )); + let mut tmp = conjugate_fp2_bn254( + a12, + #[cfg(feature = "hints")] + hints, + ); + result[8..16].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA12, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bn254( + a13, + #[cfg(feature = "hints")] + hints, + ); + result[16..24].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA14, + #[cfg(feature = "hints")] + hints, + )); // c2 = a̅21·γ11 + a̅22·γ13·v + a̅23·γ15·v² - tmp = conjugate_fp2_bn254(a21); - result[24..32].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA11)); - tmp = conjugate_fp2_bn254(a22); - result[32..40].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA13)); - tmp = conjugate_fp2_bn254(a23); - result[40..48].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA15)); + tmp = conjugate_fp2_bn254( + a21, + #[cfg(feature = "hints")] + hints, + ); + result[24..32].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA11, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bn254( + a22, + #[cfg(feature = "hints")] + hints, + ); + result[32..40].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA13, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bn254( + a23, + #[cfg(feature = "hints")] + hints, + ); + result[40..48].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA15, + #[cfg(feature = "hints")] + hints, + )); result } @@ -181,7 +394,10 @@ pub fn frobenius1_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { // - c1 = a11 + a12·γ22·v + a13·γ24·v² // - c2 = a21·γ21 + a22·γ23·v + a23·γ25·v² #[inline] -pub fn frobenius2_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { +pub fn frobenius2_fp12_bn254( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a11: &[u64; 8] = &a[0..8].try_into().unwrap(); let a12 = &a[8..16].try_into().unwrap(); let a13 = &a[16..24].try_into().unwrap(); @@ -193,13 +409,38 @@ pub fn frobenius2_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { // c1 = a11 + a12·γ22·v + a13·γ24·v² result[0..8].copy_from_slice(a11); - result[8..16].copy_from_slice(&scalar_mul_fp2_bn254(a12, &FROBENIUS_GAMMA22)); - result[16..24].copy_from_slice(&scalar_mul_fp2_bn254(a13, &FROBENIUS_GAMMA24)); + result[8..16].copy_from_slice(&scalar_mul_fp2_bn254( + a12, + &FROBENIUS_GAMMA22, + #[cfg(feature = "hints")] + hints, + )); + result[16..24].copy_from_slice(&scalar_mul_fp2_bn254( + a13, + &FROBENIUS_GAMMA24, + #[cfg(feature = "hints")] + hints, + )); // c2 = a21·γ21 + a22·γ23·v + a23·γ25·v² - result[24..32].copy_from_slice(&scalar_mul_fp2_bn254(a21, &FROBENIUS_GAMMA21)); - result[32..40].copy_from_slice(&scalar_mul_fp2_bn254(a22, &FROBENIUS_GAMMA23)); - result[40..48].copy_from_slice(&scalar_mul_fp2_bn254(a23, &FROBENIUS_GAMMA25)); + result[24..32].copy_from_slice(&scalar_mul_fp2_bn254( + a21, + &FROBENIUS_GAMMA21, + #[cfg(feature = "hints")] + hints, + )); + result[32..40].copy_from_slice(&scalar_mul_fp2_bn254( + a22, + &FROBENIUS_GAMMA23, + #[cfg(feature = "hints")] + hints, + )); + result[40..48].copy_from_slice(&scalar_mul_fp2_bn254( + a23, + &FROBENIUS_GAMMA25, + #[cfg(feature = "hints")] + hints, + )); result } @@ -211,7 +452,10 @@ pub fn frobenius2_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { // - c1 = a̅11 + a̅12·γ32·v + a̅13·γ34·v² // - c2 = a̅21·γ31 + a̅22·γ33·v + a̅23·γ35·v² #[inline] -pub fn frobenius3_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { +pub fn frobenius3_fp12_bn254( + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let a11 = &a[0..8].try_into().unwrap(); let a12 = &a[8..16].try_into().unwrap(); let a13 = &a[16..24].try_into().unwrap(); @@ -222,19 +466,68 @@ pub fn frobenius3_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { let mut result = [0; 48]; // c1 = a̅11 + a̅12·γ32·v + a̅13·γ34·v² - result[0..8].copy_from_slice(&conjugate_fp2_bn254(a11)); - let mut tmp = conjugate_fp2_bn254(a12); - result[8..16].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA32)); - tmp = conjugate_fp2_bn254(a13); - result[16..24].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA34)); + result[0..8].copy_from_slice(&conjugate_fp2_bn254( + a11, + #[cfg(feature = "hints")] + hints, + )); + let mut tmp = conjugate_fp2_bn254( + a12, + #[cfg(feature = "hints")] + hints, + ); + result[8..16].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA32, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bn254( + a13, + #[cfg(feature = "hints")] + hints, + ); + result[16..24].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA34, + #[cfg(feature = "hints")] + hints, + )); // c2 = a̅21·γ31 + a̅22·γ33·v + a̅23·γ35·v² - tmp = conjugate_fp2_bn254(a21); - result[24..32].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA31)); - tmp = conjugate_fp2_bn254(a22); - result[32..40].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA33)); - tmp = conjugate_fp2_bn254(a23); - result[40..48].copy_from_slice(&mul_fp2_bn254(&tmp, &FROBENIUS_GAMMA35)); + tmp = conjugate_fp2_bn254( + a21, + #[cfg(feature = "hints")] + hints, + ); + result[24..32].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA31, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bn254( + a22, + #[cfg(feature = "hints")] + hints, + ); + result[32..40].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA33, + #[cfg(feature = "hints")] + hints, + )); + tmp = conjugate_fp2_bn254( + a23, + #[cfg(feature = "hints")] + hints, + ); + result[40..48].copy_from_slice(&mul_fp2_bn254( + &tmp, + &FROBENIUS_GAMMA35, + #[cfg(feature = "hints")] + hints, + )); result } @@ -244,7 +537,11 @@ pub fn frobenius3_fp12_bn254(a: &[u64; 48]) -> [u64; 48] { // in: e, (a1 + a2·w) ∈ Fp12, where e ∈ [0,p¹²-2] ai ∈ Fp6 // out: (c1 + c2·w) = (a1 + a2·w)^e ∈ Fp12 #[inline] -pub fn exp_fp12_bn254(e: u64, a: &[u64; 48]) -> [u64; 48] { +pub fn exp_fp12_bn254( + e: u64, + a: &[u64; 48], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { let mut one = [0; 48]; one[0] = 1; if eq(a, &[0; 48]) { @@ -259,7 +556,12 @@ pub fn exp_fp12_bn254(e: u64, a: &[u64; 48]) -> [u64; 48] { return *a; } - let (_, max_bit) = fcall_msb_pos_256(&[e, 0, 0, 0], &[0, 0, 0, 0]); + let (_, max_bit) = fcall_msb_pos_256( + &[e, 0, 0, 0], + &[0, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // Perform the loop, based on the binary representation of e @@ -275,12 +577,21 @@ pub fn exp_fp12_bn254(e: u64, a: &[u64; 48]) -> [u64; 48] { let _max_bit = max_bit as usize; for i in (0.._max_bit).rev() { // Always square - result = square_fp12_bn254(&result); + result = square_fp12_bn254( + &result, + #[cfg(feature = "hints")] + hints, + ); // Get the next bit b of e // If b == 1, we should multiply it by a, otherwise start the next iteration if ((e >> i) & 1) == 1 { - result = mul_fp12_bn254(&result, a); + result = mul_fp12_bn254( + &result, + a, + #[cfg(feature = "hints")] + hints, + ); // Reconstruct e e_rec |= 1 << i; diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/fp2.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/fp2.rs index b5e74bafd..d740ff46b 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/fp2.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/fp2.rs @@ -13,13 +13,21 @@ use super::constants::P_MINUS_ONE; /// Addition in the degree 2 extension of the BN254 curve #[inline] -pub fn add_fp2_bn254(a: &[u64; 8], b: &[u64; 8]) -> [u64; 8] { +pub fn add_fp2_bn254( + a: &[u64; 8], + b: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: b[0..4].try_into().unwrap(), y: b[4..8].try_into().unwrap() }; let mut params = SyscallBn254ComplexAddParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_add(&mut params); + syscall_bn254_complex_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] @@ -27,13 +35,17 @@ pub fn add_fp2_bn254(a: &[u64; 8], b: &[u64; 8]) -> [u64; 8] { /// Doubling in the degree 2 extension of the BN254 curve #[inline] -pub fn dbl_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { +pub fn dbl_fp2_bn254(a: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: f1.x, y: f1.y }; let mut params = SyscallBn254ComplexAddParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_add(&mut params); + syscall_bn254_complex_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] @@ -41,13 +53,17 @@ pub fn dbl_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { /// Negation in the degree 2 extension of the BN254 curve #[inline] -pub fn neg_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { +pub fn neg_fp2_bn254(a: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: P_MINUS_ONE, y: [0u64; 4] }; let mut params = SyscallBn254ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_mul(&mut params); + syscall_bn254_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] @@ -55,13 +71,21 @@ pub fn neg_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { /// Subtraction in the degree 2 extension of the BN254 curve #[inline] -pub fn sub_fp2_bn254(a: &[u64; 8], b: &[u64; 8]) -> [u64; 8] { +pub fn sub_fp2_bn254( + a: &[u64; 8], + b: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: b[0..4].try_into().unwrap(), y: b[4..8].try_into().unwrap() }; let mut params = SyscallBn254ComplexSubParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_sub(&mut params); + syscall_bn254_complex_sub( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] @@ -69,13 +93,21 @@ pub fn sub_fp2_bn254(a: &[u64; 8], b: &[u64; 8]) -> [u64; 8] { /// Multiplication in the degree 2 extension of the BN254 curve #[inline] -pub fn mul_fp2_bn254(a: &[u64; 8], b: &[u64; 8]) -> [u64; 8] { +pub fn mul_fp2_bn254( + a: &[u64; 8], + b: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: b[0..4].try_into().unwrap(), y: b[4..8].try_into().unwrap() }; let mut params = SyscallBn254ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_mul(&mut params); + syscall_bn254_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] @@ -83,13 +115,21 @@ pub fn mul_fp2_bn254(a: &[u64; 8], b: &[u64; 8]) -> [u64; 8] { /// Scalar multiplication in the degree 2 extension of the BN254 curve #[inline] -pub fn scalar_mul_fp2_bn254(a: &[u64; 8], b: &[u64; 4]) -> [u64; 8] { +pub fn scalar_mul_fp2_bn254( + a: &[u64; 8], + b: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: b[0..4].try_into().unwrap(), y: [0, 0, 0, 0] }; let mut params = SyscallBn254ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_mul(&mut params); + syscall_bn254_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] @@ -97,13 +137,17 @@ pub fn scalar_mul_fp2_bn254(a: &[u64; 8], b: &[u64; 4]) -> [u64; 8] { /// Squaring in the degree 2 extension of the BN254 curve #[inline] -pub fn square_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { +pub fn square_fp2_bn254(a: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: f1.x, y: f1.y }; let mut params = SyscallBn254ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_mul(&mut params); + syscall_bn254_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] @@ -111,7 +155,7 @@ pub fn square_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { /// Inversion in the degree 2 extension of the BN254 curve #[inline] -pub fn inv_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { +pub fn inv_fp2_bn254(a: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 8] { // if a == 0, return 0 if eq(a, &[0, 0, 0, 0, 0, 0, 0, 0]) { return [0, 0, 0, 0, 0, 0, 0, 0]; @@ -121,14 +165,22 @@ pub fn inv_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { // Remember that an element b ∈ Fp2 is the inverse of a ∈ Fp2 if and only if a·b = 1 in Fp2 // We will therefore hint the inverse b and check the product with a is 1 - let inv = fcall_bn254_fp2_inv(a); + let inv = fcall_bn254_fp2_inv( + a, + #[cfg(feature = "hints")] + hints, + ); let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: a[4..8].try_into().unwrap() }; let f2 = SyscallComplex256 { x: inv[0..4].try_into().unwrap(), y: inv[4..8].try_into().unwrap() }; let mut params = SyscallBn254ComplexMulParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_mul(&mut params); + syscall_bn254_complex_mul( + &mut params, + #[cfg(feature = "hints")] + hints, + ); assert_eq!(params.f1.x, [1, 0, 0, 0]); assert_eq!(params.f1.y, [0, 0, 0, 0]); @@ -137,12 +189,19 @@ pub fn inv_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { /// Conjugation in the degree 2 extension of the BN254 curve #[inline] -pub fn conjugate_fp2_bn254(a: &[u64; 8]) -> [u64; 8] { +pub fn conjugate_fp2_bn254( + a: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 8] { let mut f1 = SyscallComplex256 { x: a[0..4].try_into().unwrap(), y: [0, 0, 0, 0] }; let f2 = SyscallComplex256 { x: [0, 0, 0, 0], y: a[4..8].try_into().unwrap() }; let mut params = SyscallBn254ComplexSubParams { f1: &mut f1, f2: &f2 }; - syscall_bn254_complex_sub(&mut params); + syscall_bn254_complex_sub( + &mut params, + #[cfg(feature = "hints")] + hints, + ); let res_x = params.f1.x; let res_y = params.f1.y; [res_x[0], res_x[1], res_x[2], res_x[3], res_y[0], res_y[1], res_y[2], res_y[3]] diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/fp6.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/fp6.rs index 15761e5b0..fde424a79 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/fp6.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/fp6.rs @@ -7,12 +7,21 @@ use super::fp2::{ /// Addition in the degree 6 extension of the BN254 curve #[inline] -pub fn add_fp6_bn254(a: &[u64; 24], b: &[u64; 24]) -> [u64; 24] { +pub fn add_fp6_bn254( + a: &[u64; 24], + b: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let mut result = [0; 24]; for i in 0..3 { let a_i = &a[i * 8..(i + 1) * 8].try_into().unwrap(); let b_i = &b[i * 8..(i + 1) * 8].try_into().unwrap(); - let c_i = add_fp2_bn254(a_i, b_i); + let c_i = add_fp2_bn254( + a_i, + b_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 8..(i + 1) * 8].copy_from_slice(&c_i); } result @@ -20,11 +29,15 @@ pub fn add_fp6_bn254(a: &[u64; 24], b: &[u64; 24]) -> [u64; 24] { /// Doubling in the degree 6 extension of the BN254 curve #[inline] -pub fn dbl_fp6_bn254(a: &[u64; 24]) -> [u64; 24] { +pub fn dbl_fp6_bn254(a: &[u64; 24], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 24] { let mut result = [0; 24]; for i in 0..3 { let a_i = &a[i * 8..(i + 1) * 8].try_into().unwrap(); - let c_i = dbl_fp2_bn254(a_i); + let c_i = dbl_fp2_bn254( + a_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 8..(i + 1) * 8].copy_from_slice(&c_i); } result @@ -32,11 +45,15 @@ pub fn dbl_fp6_bn254(a: &[u64; 24]) -> [u64; 24] { /// Negation in the degree 6 extension of the BN254 curve #[inline] -pub fn neg_fp6_bn254(a: &[u64; 24]) -> [u64; 24] { +pub fn neg_fp6_bn254(a: &[u64; 24], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 24] { let mut result = [0; 24]; for i in 0..3 { let a_i = &a[i * 8..(i + 1) * 8].try_into().unwrap(); - let c_i = neg_fp2_bn254(a_i); + let c_i = neg_fp2_bn254( + a_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 8..(i + 1) * 8].copy_from_slice(&c_i); } result @@ -44,12 +61,21 @@ pub fn neg_fp6_bn254(a: &[u64; 24]) -> [u64; 24] { /// Subtraction in the degree 6 extension of the BN254 curve #[inline] -pub fn sub_fp6_bn254(a: &[u64; 24], b: &[u64; 24]) -> [u64; 24] { +pub fn sub_fp6_bn254( + a: &[u64; 24], + b: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let mut result = [0; 24]; for i in 0..3 { let a_i = &a[i * 8..(i + 1) * 8].try_into().unwrap(); let b_i = &b[i * 8..(i + 1) * 8].try_into().unwrap(); - let c_i = sub_fp2_bn254(a_i, b_i); + let c_i = sub_fp2_bn254( + a_i, + b_i, + #[cfg(feature = "hints")] + hints, + ); result[i * 8..(i + 1) * 8].copy_from_slice(&c_i); } result @@ -63,7 +89,11 @@ pub fn sub_fp6_bn254(a: &[u64; 24], b: &[u64; 24]) -> [u64; 24] { // - c2 = (a1+a2)·(b1+b2) - a1·b1 - a2·b2 + a3·b3·(9+u) // - c3 = (a1+a3)·(b1+b3) - a1·b1 + a2·b2 - a3·b3 #[inline] -pub fn mul_fp6_bn254(a: &[u64; 24], b: &[u64; 24]) -> [u64; 24] { +pub fn mul_fp6_bn254( + a: &[u64; 24], + b: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let a1 = &a[0..8].try_into().unwrap(); let a2 = &a[8..16].try_into().unwrap(); let a3 = &a[16..24].try_into().unwrap(); @@ -72,37 +102,151 @@ pub fn mul_fp6_bn254(a: &[u64; 24], b: &[u64; 24]) -> [u64; 24] { let b3 = &b[16..24].try_into().unwrap(); // a1·b1, a2·b2, a3·b3, a3·b3·(9+u) - let a1b1 = mul_fp2_bn254(a1, b1); - let a2b2 = mul_fp2_bn254(a2, b2); - let a3b3 = mul_fp2_bn254(a3, b3); - let a3b3xi = mul_fp2_bn254(&a3b3, &[9, 0, 0, 0, 1, 0, 0, 0]); + let a1b1 = mul_fp2_bn254( + a1, + b1, + #[cfg(feature = "hints")] + hints, + ); + let a2b2 = mul_fp2_bn254( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ); + let a3b3 = mul_fp2_bn254( + a3, + b3, + #[cfg(feature = "hints")] + hints, + ); + let a3b3xi = mul_fp2_bn254( + &a3b3, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // a2+a3, b2+b3, a1+a2, b1+b2, a1+a3, b1+b3 - let a2_plus_a3 = add_fp2_bn254(a2, a3); - let b2_plus_b3 = add_fp2_bn254(b2, b3); - let a1_plus_a2 = add_fp2_bn254(a1, a2); - let b1_plus_b2 = add_fp2_bn254(b1, b2); - let a1_plus_a3 = add_fp2_bn254(a1, a3); - let b1_plus_b3 = add_fp2_bn254(b1, b3); + let a2_plus_a3 = add_fp2_bn254( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ); + let b2_plus_b3 = add_fp2_bn254( + b2, + b3, + #[cfg(feature = "hints")] + hints, + ); + let a1_plus_a2 = add_fp2_bn254( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let b1_plus_b2 = add_fp2_bn254( + b1, + b2, + #[cfg(feature = "hints")] + hints, + ); + let a1_plus_a3 = add_fp2_bn254( + a1, + a3, + #[cfg(feature = "hints")] + hints, + ); + let b1_plus_b3 = add_fp2_bn254( + b1, + b3, + #[cfg(feature = "hints")] + hints, + ); // c1 = [(a2+a3)·(b2+b3) - a2·b2 - a3·b3]·(9+u) + a1·b1 - let mut c1 = mul_fp2_bn254(&a2_plus_a3, &b2_plus_b3); - c1 = sub_fp2_bn254(&c1, &a2b2); - c1 = sub_fp2_bn254(&c1, &a3b3); - c1 = mul_fp2_bn254(&c1, &[9, 0, 0, 0, 1, 0, 0, 0]); - c1 = add_fp2_bn254(&c1, &a1b1); - + let mut c1 = mul_fp2_bn254( + &a2_plus_a3, + &b2_plus_b3, + #[cfg(feature = "hints")] + hints, + ); + c1 = sub_fp2_bn254( + &c1, + &a2b2, + #[cfg(feature = "hints")] + hints, + ); + c1 = sub_fp2_bn254( + &c1, + &a3b3, + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bn254( + &c1, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bn254( + &c1, + &a1b1, + #[cfg(feature = "hints")] + hints, + ); // c2 = (a1+a2)·(b1+b2) - a1·b1 - a2·b2 + a3·b3·(9+u) - let mut c2 = mul_fp2_bn254(&a1_plus_a2, &b1_plus_b2); - c2 = sub_fp2_bn254(&c2, &a1b1); - c2 = sub_fp2_bn254(&c2, &a2b2); - c2 = add_fp2_bn254(&c2, &a3b3xi); + let mut c2 = mul_fp2_bn254( + &a1_plus_a2, + &b1_plus_b2, + #[cfg(feature = "hints")] + hints, + ); + c2 = sub_fp2_bn254( + &c2, + &a1b1, + #[cfg(feature = "hints")] + hints, + ); + c2 = sub_fp2_bn254( + &c2, + &a2b2, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bn254( + &c2, + &a3b3xi, + #[cfg(feature = "hints")] + hints, + ); // c3 = (a1+a3)·(b1+b3) - a1·b1 + a2·b2 - a3·b3 - let mut c3 = mul_fp2_bn254(&a1_plus_a3, &b1_plus_b3); - c3 = sub_fp2_bn254(&c3, &a1b1); - c3 = add_fp2_bn254(&c3, &a2b2); - c3 = sub_fp2_bn254(&c3, &a3b3); + let mut c3 = mul_fp2_bn254( + &a1_plus_a3, + &b1_plus_b3, + #[cfg(feature = "hints")] + hints, + ); + c3 = sub_fp2_bn254( + &c3, + &a1b1, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bn254( + &c3, + &a2b2, + #[cfg(feature = "hints")] + hints, + ); + c3 = sub_fp2_bn254( + &c3, + &a3b3, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 24]; result[0..8].copy_from_slice(&c1); @@ -119,20 +263,43 @@ pub fn mul_fp6_bn254(a: &[u64; 24], b: &[u64; 24]) -> [u64; 24] { // - c2 = b2·a1 // - c3 = b2·a2 #[inline] -pub fn sparse_mula_fp6_bn254(a: &[u64; 24], b2: &[u64; 8]) -> [u64; 24] { +pub fn sparse_mula_fp6_bn254( + a: &[u64; 24], + b2: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let a1 = &a[0..8].try_into().unwrap(); let a2 = &a[8..16].try_into().unwrap(); let a3 = &a[16..24].try_into().unwrap(); // c1 = b2·a3·(9+u) - let mut c1 = mul_fp2_bn254(b2, a3); - c1 = mul_fp2_bn254(&c1, &[9, 0, 0, 0, 1, 0, 0, 0]); + let mut c1 = mul_fp2_bn254( + b2, + a3, + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bn254( + &c1, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // c2 = b2·a1 - let c2 = mul_fp2_bn254(b2, a1); - + let c2 = mul_fp2_bn254( + b2, + a1, + #[cfg(feature = "hints")] + hints, + ); // c3 = b2·a2 - let c3 = mul_fp2_bn254(b2, a2); + let c3 = mul_fp2_bn254( + b2, + a2, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 24]; result[0..8].copy_from_slice(&c1); @@ -149,7 +316,11 @@ pub fn sparse_mula_fp6_bn254(a: &[u64; 24], b2: &[u64; 8]) -> [u64; 24] { // - c2 = a1·b2 + a2·b1 // - c3 = a2·b2 + a3·b1 #[inline] -pub fn sparse_mulb_fp6_bn254(a: &[u64; 24], b: &[u64; 16]) -> [u64; 24] { +pub fn sparse_mulb_fp6_bn254( + a: &[u64; 24], + b: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let a1 = &a[0..8].try_into().unwrap(); let a2 = &a[8..16].try_into().unwrap(); let a3 = &a[16..24].try_into().unwrap(); @@ -157,16 +328,66 @@ pub fn sparse_mulb_fp6_bn254(a: &[u64; 24], b: &[u64; 16]) -> [u64; 24] { let b2 = &b[8..16].try_into().unwrap(); // c1 = a1·b1 + a3·b2·(9+u) - let mut c1 = mul_fp2_bn254(a1, b1); - c1 = add_fp2_bn254(&c1, &mul_fp2_bn254(a3, &mul_fp2_bn254(b2, &[9, 0, 0, 0, 1, 0, 0, 0]))); + let mut c1 = mul_fp2_bn254( + a1, + b1, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bn254( + &c1, + &mul_fp2_bn254( + a3, + &mul_fp2_bn254( + b2, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c2 = a1·b2 + a2·b1 - let mut c2 = mul_fp2_bn254(a1, b2); - c2 = add_fp2_bn254(&c2, &mul_fp2_bn254(a2, b1)); + let mut c2 = mul_fp2_bn254( + a1, + b2, + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bn254( + &c2, + &mul_fp2_bn254( + a2, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c3 = a2·b2 + a3·b1 - let mut c3 = mul_fp2_bn254(a2, b2); - c3 = add_fp2_bn254(&c3, &mul_fp2_bn254(a3, b1)); + let mut c3 = mul_fp2_bn254( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bn254( + &c3, + &mul_fp2_bn254( + a3, + b1, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 24]; result[0..8].copy_from_slice(&c1); @@ -183,7 +404,11 @@ pub fn sparse_mulb_fp6_bn254(a: &[u64; 24], b: &[u64; 16]) -> [u64; 24] { // - c2 = a1·b2 + a3·b3·(9+u) // - c3 = a1·b3 + a2·b2 #[inline] -pub fn sparse_mulc_fp6_bn254(a: &[u64; 24], b: &[u64; 16]) -> [u64; 24] { +pub fn sparse_mulc_fp6_bn254( + a: &[u64; 24], + b: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let a1 = &a[0..8].try_into().unwrap(); let a2 = &a[8..16].try_into().unwrap(); let a3 = &a[16..24].try_into().unwrap(); @@ -191,18 +416,73 @@ pub fn sparse_mulc_fp6_bn254(a: &[u64; 24], b: &[u64; 16]) -> [u64; 24] { let b3 = &b[8..16].try_into().unwrap(); // c1 = (a2·b3 + a3·b2)·(9+u) - let mut c1 = mul_fp2_bn254(a2, b3); - c1 = add_fp2_bn254(&c1, &mul_fp2_bn254(a3, b2)); - c1 = mul_fp2_bn254(&c1, &[9, 0, 0, 0, 1, 0, 0, 0]); + let mut c1 = mul_fp2_bn254( + a2, + b3, + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bn254( + &c1, + &mul_fp2_bn254( + a3, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + c1 = mul_fp2_bn254( + &c1, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); // c2 = a1·b2 + a3·b3·(9+u) - let mut c2 = mul_fp2_bn254(a3, b3); - c2 = mul_fp2_bn254(&c2, &[9, 0, 0, 0, 1, 0, 0, 0]); - c2 = add_fp2_bn254(&c2, &mul_fp2_bn254(a1, b2)); + let mut c2 = mul_fp2_bn254( + a3, + b3, + #[cfg(feature = "hints")] + hints, + ); + c2 = mul_fp2_bn254( + &c2, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bn254( + &c2, + &mul_fp2_bn254( + a1, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // c3 = a2·b3 + a2·b2 - let mut c3 = mul_fp2_bn254(a1, b3); - c3 = add_fp2_bn254(&c3, &mul_fp2_bn254(a2, b2)); + let mut c3 = mul_fp2_bn254( + a1, + b3, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bn254( + &c3, + &mul_fp2_bn254( + a2, + b2, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 24]; result[0..8].copy_from_slice(&c1); @@ -219,37 +499,119 @@ pub fn sparse_mulc_fp6_bn254(a: &[u64; 24], b: &[u64; 16]) -> [u64; 24] { // - c2 = a3²·(9 + u) + 2·a1·a2 // - c3 = 2·a1·a2 - a3² + (a1 - a2 + a3)² + 2·a2·a3 - a1² #[inline] -pub fn square_fp6_bn254(a: &[u64; 24]) -> [u64; 24] { +pub fn square_fp6_bn254( + a: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 24] { let a1 = &a[0..8].try_into().unwrap(); let a2 = &a[8..16].try_into().unwrap(); let a3 = &a[16..24].try_into().unwrap(); - let mut two_a1a2 = mul_fp2_bn254(a1, a2); - two_a1a2 = dbl_fp2_bn254(&two_a1a2); - - let a3_squared = square_fp2_bn254(a3); + let mut two_a1a2 = mul_fp2_bn254( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + two_a1a2 = dbl_fp2_bn254( + &two_a1a2, + #[cfg(feature = "hints")] + hints, + ); + + let a3_squared = square_fp2_bn254( + a3, + #[cfg(feature = "hints")] + hints, + ); // c2 = a3²·(9 + u) + 2·a1·a2 - let mut c2 = mul_fp2_bn254(&a3_squared, &[9, 0, 0, 0, 1, 0, 0, 0]); - c2 = add_fp2_bn254(&c2, &two_a1a2); + let mut c2 = mul_fp2_bn254( + &a3_squared, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + c2 = add_fp2_bn254( + &c2, + &two_a1a2, + #[cfg(feature = "hints")] + hints, + ); // a1², (a1 - a2 + a3)², 2·a2·a3 - let a1_squared = square_fp2_bn254(a1); - let mut a1a2a3 = sub_fp2_bn254(a1, a2); - a1a2a3 = add_fp2_bn254(&a1a2a3, a3); - a1a2a3 = square_fp2_bn254(&a1a2a3); - let mut two_a2a3 = mul_fp2_bn254(a2, a3); - two_a2a3 = dbl_fp2_bn254(&two_a2a3); + let a1_squared = square_fp2_bn254( + a1, + #[cfg(feature = "hints")] + hints, + ); + let mut a1a2a3 = sub_fp2_bn254( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + a1a2a3 = add_fp2_bn254( + &a1a2a3, + a3, + #[cfg(feature = "hints")] + hints, + ); + a1a2a3 = square_fp2_bn254( + &a1a2a3, + #[cfg(feature = "hints")] + hints, + ); + let mut two_a2a3 = mul_fp2_bn254( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ); + two_a2a3 = dbl_fp2_bn254( + &two_a2a3, + #[cfg(feature = "hints")] + hints, + ); // c1 = 2·a2·a3·(9 + u) + a1² - let mut c1 = mul_fp2_bn254(&two_a2a3, &[9, 0, 0, 0, 1, 0, 0, 0]); - c1 = add_fp2_bn254(&c1, &a1_squared); - + let mut c1 = mul_fp2_bn254( + &two_a2a3, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + c1 = add_fp2_bn254( + &c1, + &a1_squared, + #[cfg(feature = "hints")] + hints, + ); // c3 = 2·a1·a2 - a3² + (a1 - a2 + a3)² + 2·a2·a3 - a1² - let mut c3 = sub_fp2_bn254(&two_a1a2, &a3_squared); - c3 = add_fp2_bn254(&c3, &a1a2a3); - c3 = add_fp2_bn254(&c3, &two_a2a3); - c3 = sub_fp2_bn254(&c3, &a1_squared); + let mut c3 = sub_fp2_bn254( + &two_a1a2, + &a3_squared, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bn254( + &c3, + &a1a2a3, + #[cfg(feature = "hints")] + hints, + ); + c3 = add_fp2_bn254( + &c3, + &two_a2a3, + #[cfg(feature = "hints")] + hints, + ); + c3 = sub_fp2_bn254( + &c3, + &a1_squared, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 24]; result[0..8].copy_from_slice(&c1); @@ -270,44 +632,144 @@ pub fn square_fp6_bn254(a: &[u64; 24]) -> [u64; 24] { // * c2mid = (9 + u)·a3² - (a1·a2) // * c3mid = a2² - (a1·a3) #[inline] -pub fn inv_fp6_bn254(a: &[u64; 24]) -> [u64; 24] { +pub fn inv_fp6_bn254(a: &[u64; 24], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 24] { let a1 = &a[0..8].try_into().unwrap(); let a2 = &a[8..16].try_into().unwrap(); let a3 = &a[16..24].try_into().unwrap(); - let a1_squared = square_fp2_bn254(a1); - let a2_squared = square_fp2_bn254(a2); - let a3_squared = square_fp2_bn254(a3); - - let a1a2 = mul_fp2_bn254(a1, a2); - let a1a3 = mul_fp2_bn254(a1, a3); - let a2a3 = mul_fp2_bn254(a2, a3); + let a1_squared = square_fp2_bn254( + a1, + #[cfg(feature = "hints")] + hints, + ); + let a2_squared = square_fp2_bn254( + a2, + #[cfg(feature = "hints")] + hints, + ); + let a3_squared = square_fp2_bn254( + a3, + #[cfg(feature = "hints")] + hints, + ); + + let a1a2 = mul_fp2_bn254( + a1, + a2, + #[cfg(feature = "hints")] + hints, + ); + let a1a3 = mul_fp2_bn254( + a1, + a3, + #[cfg(feature = "hints")] + hints, + ); + let a2a3 = mul_fp2_bn254( + a2, + a3, + #[cfg(feature = "hints")] + hints, + ); // c1mid = a1² - (9 + u)·(a2·a3) - let mut c1mid = mul_fp2_bn254(&a2a3, &[9, 0, 0, 0, 1, 0, 0, 0]); - c1mid = sub_fp2_bn254(&a1_squared, &c1mid); + let mut c1mid = mul_fp2_bn254( + &a2a3, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + c1mid = sub_fp2_bn254( + &a1_squared, + &c1mid, + #[cfg(feature = "hints")] + hints, + ); // c2mid = (9 + u)·a3² - (a1·a2) - let mut c2mid = mul_fp2_bn254(&a3_squared, &[9, 0, 0, 0, 1, 0, 0, 0]); - c2mid = sub_fp2_bn254(&c2mid, &a1a2); - + let mut c2mid = mul_fp2_bn254( + &a3_squared, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + c2mid = sub_fp2_bn254( + &c2mid, + &a1a2, + #[cfg(feature = "hints")] + hints, + ); // c3mid = a2² - (a1·a3) - let c3mid = sub_fp2_bn254(&a2_squared, &a1a3); + let c3mid = sub_fp2_bn254( + &a2_squared, + &a1a3, + #[cfg(feature = "hints")] + hints, + ); // im = a1·c1mid - let im = mul_fp2_bn254(a1, &c1mid); + let im = mul_fp2_bn254( + a1, + &c1mid, + #[cfg(feature = "hints")] + hints, + ); // last = (im + (9 + u)·(a3·c2mid + a2·c3mid))⁻¹ - let mut last = mul_fp2_bn254(a3, &c2mid); - last = add_fp2_bn254(&last, &mul_fp2_bn254(a2, &c3mid)); - last = mul_fp2_bn254(&last, &[9, 0, 0, 0, 1, 0, 0, 0]); - last = add_fp2_bn254(&last, &im); - last = inv_fp2_bn254(&last); + let mut last = mul_fp2_bn254( + a3, + &c2mid, + #[cfg(feature = "hints")] + hints, + ); + last = add_fp2_bn254( + &last, + &mul_fp2_bn254( + a2, + &c3mid, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + last = mul_fp2_bn254( + &last, + &[9, 0, 0, 0, 1, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); + last = add_fp2_bn254( + &last, + &im, + #[cfg(feature = "hints")] + hints, + ); + last = inv_fp2_bn254( + &last, + #[cfg(feature = "hints")] + hints, + ); // c1 = c1mid·last, c2 = c2mid·last, c3 = c3mid·last - let c1 = mul_fp2_bn254(&c1mid, &last); - let c2 = mul_fp2_bn254(&c2mid, &last); - let c3 = mul_fp2_bn254(&c3mid, &last); + let c1 = mul_fp2_bn254( + &c1mid, + &last, + #[cfg(feature = "hints")] + hints, + ); + let c2 = mul_fp2_bn254( + &c2mid, + &last, + #[cfg(feature = "hints")] + hints, + ); + let c3 = mul_fp2_bn254( + &c3mid, + &last, + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 24]; result[0..8].copy_from_slice(&c1); diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/fr.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/fr.rs new file mode 100644 index 000000000..6791350ae --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/fr.rs @@ -0,0 +1,41 @@ +use crate::{ + syscalls::{syscall_arith256_mod, SyscallArith256ModParams}, + zisklib::{eq, lt}, +}; + +use super::constants::R; + +pub fn reduce_fr_bn254(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { + if lt(x, &R) { + return *x; + } + + // x·1 + 0 + let mut params = SyscallArith256ModParams { + a: x, + b: &[1, 0, 0, 0], + c: &[0, 0, 0, 0], + module: &R, + d: &mut [0, 0, 0, 0], + }; + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + + *params.d +} + +/// Convert big-endian bytes to little-endian u64 limbs for a scalar (32 bytes -> [u64; 4]) +pub fn scalar_bytes_be_to_u64_le_bn254(bytes: &[u8; 32]) -> [u64; 4] { + let mut result = [0u64; 4]; + + for i in 0..4 { + for j in 0..8 { + result[3 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + result +} diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/miller_loop.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/miller_loop.rs index 601c850fc..e04fd310e 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/miller_loop.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/miller_loop.rs @@ -1,6 +1,6 @@ //! Miller Loop for the pairings over BN254 -use crate::zisklib::{eq, fcall_bn254_add_line_coeffs, fcall_bn254_dbl_line_coeffs}; +use crate::zisklib::{eq, fcall_bn254_twist_add_line_coeffs, fcall_bn254_twist_dbl_line_coeffs}; use super::{ fp::{inv_fp_bn254, mul_fp_bn254, neg_fp_bn254}, @@ -21,13 +21,30 @@ const LOOP_LENGTH: [i8; 65] = [ ]; /// Computes the Miller loop of a non-zero point `p` in G1 and a non-zero point `q` in G2 -pub fn miller_loop_bn254(p: &[u64; 8], q: &[u64; 16]) -> [u64; 48] { +pub fn miller_loop_bn254( + p: &[u64; 8], + q: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { // Before the loop starts, compute xp' = -xp/yp and yp' = 1/yp let mut xp_prime: [u64; 4] = p[0..4].try_into().unwrap(); let mut yp_prime: [u64; 4] = p[4..8].try_into().unwrap(); - yp_prime = inv_fp_bn254(&yp_prime); - xp_prime = neg_fp_bn254(&xp_prime); - xp_prime = mul_fp_bn254(&xp_prime, &yp_prime); + yp_prime = inv_fp_bn254( + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + xp_prime = neg_fp_bn254( + &xp_prime, + #[cfg(feature = "hints")] + hints, + ); + xp_prime = mul_fp_bn254( + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); // Initialize the Miller loop with r = q and f = 1 let mut r: [u64; 16] = q[0..16].try_into().unwrap(); @@ -35,76 +52,231 @@ pub fn miller_loop_bn254(p: &[u64; 8], q: &[u64; 16]) -> [u64; 48] { f[0] = 1; for &bit in LOOP_LENGTH.iter().skip(1) { // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(r)} - let (lambda, mu) = fcall_bn254_dbl_line_coeffs(&r); + let (lambda, mu) = fcall_bn254_twist_dbl_line_coeffs( + &r, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_tangent_twist_bn254(&r, &lambda, &mu)); + assert!(is_tangent_twist_bn254( + &r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); // Compute f = f² · line_{twist(r),twist(r)}(p) - f = square_fp12_bn254(&f); - let l = line_eval_twist_bn254(&lambda, &mu, &xp_prime, &yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + f = square_fp12_bn254( + &f, + #[cfg(feature = "hints")] + hints, + ); + let l = line_eval_twist_bn254( + &lambda, + &mu, + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Double r - r = dbl_twist_with_hints_bn254(&r, &lambda, &mu); + r = dbl_twist_with_hints_bn254( + &r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); if bit * bit == 1 { - let q_prime = if bit == 1 { q } else { &neg_twist_bn254(q) }; + let q_prime = if bit == 1 { + q + } else { + &neg_twist_bn254( + q, + #[cfg(feature = "hints")] + hints, + ) + }; // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(q')} - let (lambda, mu) = fcall_bn254_add_line_coeffs(&r, q_prime); + let (lambda, mu) = fcall_bn254_twist_add_line_coeffs( + &r, + q_prime, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_line_twist_bn254(&r, q_prime, &lambda, &mu)); + assert!(is_line_twist_bn254( + &r, + q_prime, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); // Compute f = f · line_{twist(r),twist(q')} - let l = line_eval_twist_bn254(&lambda, &mu, &xp_prime, &yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + let l = line_eval_twist_bn254( + &lambda, + &mu, + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Add r and q' - r = add_twist_with_hints_bn254(&r, q_prime, &lambda, &mu); + r = add_twist_with_hints_bn254( + &r, + q_prime, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); } } // Compute the last two lines // f = f · line_{twist(r),twist(utf(q))}(p) - let q_frob = utf_endomorphism_twist_bn254(q); + let q_frob = utf_endomorphism_twist_bn254( + q, + #[cfg(feature = "hints")] + hints, + ); // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(utf(q))} - let (lambda, mu) = fcall_bn254_add_line_coeffs(&r, &q_frob); - assert!(is_line_twist_bn254(&r, &q_frob, &lambda, &mu)); - - let l = line_eval_twist_bn254(&lambda, &mu, &xp_prime, &yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + let (lambda, mu) = fcall_bn254_twist_add_line_coeffs( + &r, + &q_frob, + #[cfg(feature = "hints")] + hints, + ); + assert!(is_line_twist_bn254( + &r, + &q_frob, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); + + let l = line_eval_twist_bn254( + &lambda, + &mu, + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Update r by r + utf(q) - r = add_twist_with_hints_bn254(&r, &q_frob, &lambda, &mu); + r = add_twist_with_hints_bn254( + &r, + &q_frob, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); // f = f · line_{twist(r),twist(-utf(utf(q)))}(p) - let q_frob2 = neg_twist_bn254(&utf_endomorphism_twist_bn254(&q_frob)); + let q_frob2 = neg_twist_bn254( + &utf_endomorphism_twist_bn254( + &q_frob, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(-utf(utf(q)))} - let (lambda, mu) = fcall_bn254_add_line_coeffs(&r, &q_frob2); - assert!(is_line_twist_bn254(&r, &q_frob2, &lambda, &mu)); - - let l = line_eval_twist_bn254(&lambda, &mu, &xp_prime, &yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + let (lambda, mu) = fcall_bn254_twist_add_line_coeffs( + &r, + &q_frob2, + #[cfg(feature = "hints")] + hints, + ); + assert!(is_line_twist_bn254( + &r, + &q_frob2, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints + )); + + let l = line_eval_twist_bn254( + &lambda, + &mu, + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); f } /// Computes the Miller loop for the BN254 curve for a batch of non-zero points `p_i` in G1 and non-zero points `q_i` in G2 -pub fn miller_loop_batch_bn254(g1_points: &[[u64; 8]], g2_points: &[[u64; 16]]) -> [u64; 48] { +pub fn miller_loop_batch_bn254( + g1_points: &[[u64; 8]], + g2_points: &[[u64; 16]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { // Before the loop starts, compute xp' = -xp/yp and yp' = 1/yp for each point p let mut xp_primes: Vec<[u64; 4]> = Vec::with_capacity(g1_points.len()); let mut yp_primes: Vec<[u64; 4]> = Vec::with_capacity(g1_points.len()); for p in g1_points.iter() { let mut xp_prime: [u64; 4] = p[0..4].try_into().unwrap(); let mut yp_prime: [u64; 4] = p[4..8].try_into().unwrap(); - yp_prime = inv_fp_bn254(&yp_prime); - xp_prime = neg_fp_bn254(&xp_prime); - xp_prime = mul_fp_bn254(&xp_prime, &yp_prime); + yp_prime = inv_fp_bn254( + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); + xp_prime = neg_fp_bn254( + &xp_prime, + #[cfg(feature = "hints")] + hints, + ); + xp_prime = mul_fp_bn254( + &xp_prime, + &yp_prime, + #[cfg(feature = "hints")] + hints, + ); xp_primes.push(xp_prime); yp_primes.push(yp_prime); @@ -117,41 +289,112 @@ pub fn miller_loop_batch_bn254(g1_points: &[[u64; 8]], g2_points: &[[u64; 16]]) let n = g1_points.len(); for &bit in LOOP_LENGTH.iter().skip(1) { // Compute f = f² · line_{twist(r),twist(r)}(p) - f = square_fp12_bn254(&f); + f = square_fp12_bn254( + &f, + #[cfg(feature = "hints")] + hints, + ); for i in 0..n { let r = &mut r[i]; // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(r)} - let (lambda, mu) = fcall_bn254_dbl_line_coeffs(r); + let (lambda, mu) = fcall_bn254_twist_dbl_line_coeffs( + r, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_tangent_twist_bn254(r, &lambda, &mu)); + assert!(is_tangent_twist_bn254( + r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); let xp_prime = &xp_primes[i]; let yp_prime = &yp_primes[i]; - let l = line_eval_twist_bn254(&lambda, &mu, xp_prime, yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + let l = line_eval_twist_bn254( + &lambda, + &mu, + xp_prime, + yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Double r - *r = dbl_twist_with_hints_bn254(r, &lambda, &mu); + *r = dbl_twist_with_hints_bn254( + r, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); if bit * bit == 1 { let q = &g2_points[i]; - let q_prime = if bit == 1 { q } else { &neg_twist_bn254(q) }; + let q_prime = if bit == 1 { + q + } else { + &neg_twist_bn254( + q, + #[cfg(feature = "hints")] + hints, + ) + }; // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(q')} - let (lambda, mu) = fcall_bn254_add_line_coeffs(r, q_prime); + let (lambda, mu) = fcall_bn254_twist_add_line_coeffs( + r, + q_prime, + #[cfg(feature = "hints")] + hints, + ); // Check that the line is correct - assert!(is_line_twist_bn254(r, q_prime, &lambda, &mu)); + assert!(is_line_twist_bn254( + r, + q_prime, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); // Compute f = f · line_{twist(r),twist(q')} - let l = line_eval_twist_bn254(&lambda, &mu, xp_prime, yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + let l = line_eval_twist_bn254( + &lambda, + &mu, + xp_prime, + yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Add r and q' - *r = add_twist_with_hints_bn254(r, q_prime, &lambda, &mu); + *r = add_twist_with_hints_bn254( + r, + q_prime, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); } } } @@ -164,27 +407,93 @@ pub fn miller_loop_batch_bn254(g1_points: &[[u64; 8]], g2_points: &[[u64; 16]]) let yp_prime = &yp_primes[i]; // f = f · line_{twist(r),twist(utf(q))}(p) - let q_frob = utf_endomorphism_twist_bn254(q); + let q_frob = utf_endomorphism_twist_bn254( + q, + #[cfg(feature = "hints")] + hints, + ); // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(utf(q))} - let (lambda, mu) = fcall_bn254_add_line_coeffs(r, &q_frob); - assert!(is_line_twist_bn254(r, &q_frob, &lambda, &mu)); - - let l = line_eval_twist_bn254(&lambda, &mu, xp_prime, yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + let (lambda, mu) = fcall_bn254_twist_add_line_coeffs( + r, + &q_frob, + #[cfg(feature = "hints")] + hints, + ); + assert!(is_line_twist_bn254( + r, + &q_frob, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); + + let l = line_eval_twist_bn254( + &lambda, + &mu, + xp_prime, + yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); // Update r by r + utf(q) - *r = add_twist_with_hints_bn254(r, &q_frob, &lambda, &mu); - + *r = add_twist_with_hints_bn254( + r, + &q_frob, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + ); // f = f · line_{twist(r),twist(-utf(utf(q)))}(p) - let q_frob2 = neg_twist_bn254(&utf_endomorphism_twist_bn254(&q_frob)); + let q_frob2 = neg_twist_bn254( + &utf_endomorphism_twist_bn254( + &q_frob, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // Hint the coefficients (𝜆,𝜇) of the line l_{twist(r),twist(-utf(utf(q)))} - let (lambda, mu) = fcall_bn254_add_line_coeffs(r, &q_frob2); - assert!(is_line_twist_bn254(r, &q_frob2, &lambda, &mu)); - - let l = line_eval_twist_bn254(&lambda, &mu, xp_prime, yp_prime); - f = sparse_mul_fp12_bn254(&f, &l); + let (lambda, mu) = fcall_bn254_twist_add_line_coeffs( + r, + &q_frob2, + #[cfg(feature = "hints")] + hints, + ); + assert!(is_line_twist_bn254( + r, + &q_frob2, + &lambda, + &mu, + #[cfg(feature = "hints")] + hints, + )); + + let l = line_eval_twist_bn254( + &lambda, + &mu, + xp_prime, + yp_prime, + #[cfg(feature = "hints")] + hints, + ); + f = sparse_mul_fp12_bn254( + &f, + &l, + #[cfg(feature = "hints")] + hints, + ); } f @@ -202,44 +511,103 @@ pub fn miller_loop_batch_bn254(g1_points: &[[u64; 8]], g2_points: &[[u64; 16]]) /// Checks if the line defined by (𝜆,𝜇) passes through non-zero points `q1,q2` in G2 #[inline] -fn is_line_twist_bn254(q1: &[u64; 16], q2: &[u64; 16], lambda: &[u64; 8], mu: &[u64; 8]) -> bool { +fn is_line_twist_bn254( + q1: &[u64; 16], + q2: &[u64; 16], + lambda: &[u64; 8], + mu: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // Check if the line passes through q1 - let check_q1 = line_check_twist_bn254(q1, lambda, mu); + let check_q1 = line_check_twist_bn254( + q1, + lambda, + mu, + #[cfg(feature = "hints")] + hints, + ); // Check if the line passes through q2 - let check_q2 = line_check_twist_bn254(q2, lambda, mu); - + let check_q2 = line_check_twist_bn254( + q2, + lambda, + mu, + #[cfg(feature = "hints")] + hints, + ); check_q1 && check_q2 } /// Checks if the line defined by (𝜆,𝜇) is tangent to the curve at non-zero point `q` in G2 #[inline] -fn is_tangent_twist_bn254(q: &[u64; 16], lambda: &[u64; 8], mu: &[u64; 8]) -> bool { +fn is_tangent_twist_bn254( + q: &[u64; 16], + lambda: &[u64; 8], + mu: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // Check if the line is tangent to the curve at q // Check if the line passes through q - let check_q = line_check_twist_bn254(q, lambda, mu); - + let check_q = line_check_twist_bn254( + q, + lambda, + mu, + #[cfg(feature = "hints")] + hints, + ); // Check that 2𝜆y = 3x² let x: &[u64; 8] = q[0..8].try_into().unwrap(); let y: &[u64; 8] = q[8..16].try_into().unwrap(); - let mut lhs = mul_fp2_bn254(lambda, y); - lhs = dbl_fp2_bn254(&lhs); - - let mut rhs = square_fp2_bn254(x); - rhs = scalar_mul_fp2_bn254(&rhs, &[3, 0, 0, 0]); - + let mut lhs = mul_fp2_bn254( + lambda, + y, + #[cfg(feature = "hints")] + hints, + ); + lhs = dbl_fp2_bn254( + &lhs, + #[cfg(feature = "hints")] + hints, + ); + + let mut rhs = square_fp2_bn254( + x, + #[cfg(feature = "hints")] + hints, + ); + rhs = scalar_mul_fp2_bn254( + &rhs, + &[3, 0, 0, 0], + #[cfg(feature = "hints")] + hints, + ); check_q && eq(&lhs, &rhs) } /// Check if the line defined by (𝜆,𝜇) passes through non-zero point `q` in G2 #[inline] -fn line_check_twist_bn254(q: &[u64; 16], lambda: &[u64; 8], mu: &[u64; 8]) -> bool { +fn line_check_twist_bn254( + q: &[u64; 16], + lambda: &[u64; 8], + mu: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { let x: &[u64; 8] = q[0..8].try_into().unwrap(); let y: &[u64; 8] = q[8..16].try_into().unwrap(); // Check if y = λx + μ - let mut rhs = mul_fp2_bn254(lambda, x); - rhs = add_fp2_bn254(&rhs, mu); + let mut rhs = mul_fp2_bn254( + lambda, + x, + #[cfg(feature = "hints")] + hints, + ); + rhs = add_fp2_bn254( + &rhs, + mu, + #[cfg(feature = "hints")] + hints, + ); eq(&rhs, y) } @@ -250,9 +618,24 @@ fn line_eval_twist_bn254( mu: &[u64; 8], x: &[u64; 4], y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, ) -> [u64; 16] { - let coeff1 = scalar_mul_fp2_bn254(lambda, x); - let coeff2 = scalar_mul_fp2_bn254(mu, &neg_fp_bn254(y)); + let coeff1 = scalar_mul_fp2_bn254( + lambda, + x, + #[cfg(feature = "hints")] + hints, + ); + let coeff2 = scalar_mul_fp2_bn254( + mu, + &neg_fp_bn254( + y, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); let mut result = [0; 16]; result[0..8].copy_from_slice(&coeff1); @@ -268,19 +651,48 @@ fn add_twist_with_hints_bn254( q2: &[u64; 16], lambda: &[u64; 8], mu: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, ) -> [u64; 16] { let x1: &[u64; 8] = q1[0..8].try_into().unwrap(); let x2: &[u64; 8] = q2[0..8].try_into().unwrap(); // Compute x3 = λ² - x1 - x2 - let mut x3 = square_fp2_bn254(lambda); - x3 = sub_fp2_bn254(&x3, x1); - x3 = sub_fp2_bn254(&x3, x2); + let mut x3 = square_fp2_bn254( + lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bn254( + &x3, + x1, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bn254( + &x3, + x2, + #[cfg(feature = "hints")] + hints, + ); // Compute y3 = -λx3 - μ - let mut y3 = mul_fp2_bn254(lambda, &x3); - y3 = add_fp2_bn254(mu, &y3); - y3 = neg_fp2_bn254(&y3); + let mut y3 = mul_fp2_bn254( + lambda, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = add_fp2_bn254( + mu, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = neg_fp2_bn254( + &y3, + #[cfg(feature = "hints")] + hints, + ); [ x3[0], x3[1], x3[2], x3[3], x3[4], x3[5], x3[6], x3[7], y3[0], y3[1], y3[2], y3[3], y3[4], @@ -290,17 +702,49 @@ fn add_twist_with_hints_bn254( /// Doubling of a non-zero point `q` in G2 with hinted line coefficients (𝜆,𝜇) #[inline] -fn dbl_twist_with_hints_bn254(q: &[u64; 16], lambda: &[u64; 8], mu: &[u64; 8]) -> [u64; 16] { +fn dbl_twist_with_hints_bn254( + q: &[u64; 16], + lambda: &[u64; 8], + mu: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 16] { let x: &[u64; 8] = q[0..8].try_into().unwrap(); // Compute x3 = λ² - 2x - let mut x3 = square_fp2_bn254(lambda); - x3 = sub_fp2_bn254(&x3, &dbl_fp2_bn254(x)); + let mut x3 = square_fp2_bn254( + lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bn254( + &x3, + &dbl_fp2_bn254( + x, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); // Compute y3 = -λx3 - μ - let mut y3 = mul_fp2_bn254(lambda, &x3); - y3 = add_fp2_bn254(mu, &y3); - y3 = neg_fp2_bn254(&y3); + let mut y3 = mul_fp2_bn254( + lambda, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = add_fp2_bn254( + mu, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = neg_fp2_bn254( + &y3, + #[cfg(feature = "hints")] + hints, + ); [ x3[0], x3[1], x3[2], x3[3], x3[4], x3[5], x3[6], x3[7], y3[0], y3[1], y3[2], y3[3], y3[4], diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/mod.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/mod.rs index f180eb90b..f7cf0a85b 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/mod.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/mod.rs @@ -6,6 +6,7 @@ mod fp; mod fp12; mod fp2; mod fp6; +mod fr; mod miller_loop; mod pairing; mod twist; @@ -17,5 +18,6 @@ pub use fp::*; pub use fp12::*; pub use fp2::*; pub use fp6::*; +pub use fr::*; pub use pairing::*; pub use twist::*; diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/pairing.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/pairing.rs index ddbe3bbd9..646755e73 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/pairing.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/pairing.rs @@ -1,15 +1,24 @@ //! Pairing over BN254 -use crate::zisklib::lib::utils::gt; +use crate::zisklib::lib::utils::{eq, is_one, lt}; use super::{ - constants::{IDENTITY_G1, IDENTITY_G2, P_MINUS_ONE}, - curve::is_on_curve_bn254, + constants::{G1_IDENTITY, G2_IDENTITY, P}, + curve::{g1_bytes_be_to_u64_le_bn254, is_on_curve_bn254}, final_exp::final_exp_bn254, miller_loop::{miller_loop_batch_bn254, miller_loop_bn254}, - twist::{is_on_curve_twist_bn254, is_on_subgroup_twist_bn254}, + twist::{g2_bytes_be_to_u64_le_bn254, is_on_curve_twist_bn254, is_on_subgroup_twist_bn254}, }; +/// Pairing check result codes +const PAIRING_CHECK_SUCCESS: u8 = 0; +const PAIRING_CHECK_FAILED: u8 = 1; +const PAIRING_CHECK_ERR_G1_INVALID: u8 = 2; +const PAIRING_CHECK_ERR_G1_NOT_ON_CURVE: u8 = 3; +const PAIRING_CHECK_ERR_G2_INVALID: u8 = 4; +const PAIRING_CHECK_ERR_G2_NOT_ON_CURVE: u8 = 5; +const PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP: u8 = 6; + /// Optimal Ate Pairing e: G1 x G2 -> GT over the BN254 curve /// where G1 = E(Fp)[r] = E(Fp), G2 = E'(Fp2)[r] and GT = μ_r (the r-th roots of unity over Fp12* /// the involved curves are E/Fp: y² = x³ + 3 and E'/Fp2: y² = x³ + 3/(9+u) @@ -17,9 +26,13 @@ use super::{ /// input: P ∈ G1 and Q ∈ G2 /// output: e(P,Q) ∈ GT /// -pub fn pairing_bn254(p: &[u64; 8], q: &[u64; 16]) -> [u64; 48] { +pub fn pairing_bn254( + p: &[u64; 8], + q: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { // Is p = 𝒪? - if *p == IDENTITY_G1 || *q == IDENTITY_G2 { + if *p == G1_IDENTITY || *q == G2_IDENTITY { // e(P, 𝒪) = e(𝒪, Q) = 1; let mut one = [0; 48]; one[0] = 1; @@ -27,16 +40,29 @@ pub fn pairing_bn254(p: &[u64; 8], q: &[u64; 16]) -> [u64; 48] { } // Miller loop - let miller_loop = miller_loop_bn254(p, q); + let miller_loop = miller_loop_bn254( + p, + q, + #[cfg(feature = "hints")] + hints, + ); // Final exponentiation - final_exp_bn254(&miller_loop) + final_exp_bn254( + &miller_loop, + #[cfg(feature = "hints")] + hints, + ) } /// Computes the optimal Ate pairing for a batch of G1 and G2 points over the BN254 curve /// and multiplies the results together, i.e.: /// e(P₁, Q₁) · e(P₂, Q₂) · ... · e(Pₙ, Qₙ) ∈ GT -pub fn pairing_batch_bn254(g1_points: &[[u64; 8]], g2_points: &[[u64; 16]]) -> [u64; 48] { +pub fn pairing_batch_bn254( + g1_points: &[[u64; 8]], + g2_points: &[[u64; 16]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 48] { // Since each e(Pi, Qi) := FinalExp(MillerLoop(Pi, Qi)) // We have: // e(P₁, Q₁) · e(P₂, Q₂) · ... · e(Pₙ, Qₙ) = FinalExp(MillerLoop(P₁, Q₁) · MillerLoop(P₂, Q₂) · ... · MillerLoop(Pₙ, Qₙ)) @@ -51,7 +77,7 @@ pub fn pairing_batch_bn254(g1_points: &[[u64; 8]], g2_points: &[[u64; 16]]) -> [ let mut g2_points_ml = Vec::with_capacity(num_points); for (p, q) in g1_points.iter().zip(g2_points.iter()) { // Is p = 𝒪 or q = 𝒪? - if *p == IDENTITY_G1 || *q == IDENTITY_G2 { + if *p == G1_IDENTITY || *q == G2_IDENTITY { // MillerLoop(P, 𝒪) = MillerLoop(𝒪, Q) = 1; we can skip continue; } @@ -68,29 +94,188 @@ pub fn pairing_batch_bn254(g1_points: &[[u64; 8]], g2_points: &[[u64; 16]]) -> [ } // Compute the Miller loop for the batch - let miller_loop = miller_loop_batch_bn254(&g1_points_ml, &g2_points_ml); + let miller_loop = miller_loop_batch_bn254( + &g1_points_ml, + &g2_points_ml, + #[cfg(feature = "hints")] + hints, + ); // Final exponentiation - final_exp_bn254(&miller_loop) + final_exp_bn254( + &miller_loop, + #[cfg(feature = "hints")] + hints, + ) +} + +/// BN254 pairing check with validation. +/// +/// Validates all points have canonical field elements, are on curve, and G2 points are in subgroup. +/// +/// # Arguments +/// * `g1_points` - Slice of G1 points as [u64; 8] +/// * `g2_points` - Slice of G2 points as [u64; 16] +/// +/// # Returns +/// * `Ok(true)` - Pairing check passed (product of pairings == 1) +/// * `Ok(false)` - Pairing check failed (product of pairings != 1) +/// * `Err(PAIRING_CHECK_ERR_G1_INVALID)` - G1 field element not canonical (>= P) +/// * `Err(PAIRING_CHECK_ERR_G1_NOT_ON_CURVE)` - G1 point not on curve +/// * `Err(PAIRING_CHECK_ERR_G2_INVALID)` - G2 field element not canonical (>= P) +/// * `Err(PAIRING_CHECK_ERR_G2_NOT_ON_CURVE)` - G2 point not on twist curve +/// * `Err(PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP)` - G2 point not in subgroup +pub fn pairing_check_bn254( + g1_points: &[[u64; 8]], + g2_points: &[[u64; 16]], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result { + assert_eq!(g1_points.len(), g2_points.len(), "Number of G1 and G2 points must be equal"); + + // Collect valid pairs + let mut g1_valid = Vec::with_capacity(g1_points.len()); + let mut g2_valid = Vec::with_capacity(g2_points.len()); + for (g1, g2) in g1_points.iter().zip(g2_points.iter()) { + let g1_is_inf = eq(g1, &G1_IDENTITY); + let g2_is_inf = eq(g2, &G2_IDENTITY); + + // If p = 𝒪 or q = 𝒪 => MillerLoop(P, 𝒪) = MillerLoop(𝒪, Q) = 1; we can skip + if g2_is_inf { + if !g1_is_inf + && !is_on_curve_bn254( + g1, + #[cfg(feature = "hints")] + hints, + ) + { + return Err(PAIRING_CHECK_ERR_G1_NOT_ON_CURVE); + } + continue; + } + + if g1_is_inf { + if !is_on_curve_twist_bn254( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_ON_CURVE); + } + if !is_on_subgroup_twist_bn254( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP); + } + continue; + } + + // Validate G1 point field elements + let x1: [u64; 4] = g1[0..4].try_into().unwrap(); + let y1: [u64; 4] = g1[4..8].try_into().unwrap(); + if !lt(&x1, &P) || !lt(&y1, &P) { + return Err(PAIRING_CHECK_ERR_G1_INVALID); + } + + // Verify G1 point is on curve + if !is_on_curve_bn254( + g1, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G1_NOT_ON_CURVE); + } + + // Validate G2 point field elements + let x2_r: [u64; 4] = g2[0..4].try_into().unwrap(); + let x2_i: [u64; 4] = g2[4..8].try_into().unwrap(); + let y2_r: [u64; 4] = g2[8..12].try_into().unwrap(); + let y2_i: [u64; 4] = g2[12..16].try_into().unwrap(); + if !lt(&x2_r, &P) || !lt(&x2_i, &P) || !lt(&y2_r, &P) || !lt(&y2_i, &P) { + return Err(PAIRING_CHECK_ERR_G2_INVALID); + } + + // Verify G2 point is on twist curve + if !is_on_curve_twist_bn254( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_ON_CURVE); + } + + // Verify G2 point is in subgroup + if !is_on_subgroup_twist_bn254( + g2, + #[cfg(feature = "hints")] + hints, + ) { + return Err(PAIRING_CHECK_ERR_G2_NOT_IN_SUBGROUP); + } + + g1_valid.push(*g1); + g2_valid.push(*g2); + } + + // If all pairs were skipped, result is 1 + if g1_valid.is_empty() { + return Ok(true); + } + + // Compute batch pairing and check if result is 1 + Ok(is_one(&pairing_batch_bn254( + &g1_valid, + &g2_valid, + #[cfg(feature = "hints")] + hints, + ))) } +/// BN254 pairing check with big-endian byte format +/// /// # Safety -/// - `g1_ptr` must point to a contiguous array of `num_points` G1 affine points, -/// each being `[u64; 8]` (64 bytes per point). -/// - `g2_ptr` must point to a contiguous array of `num_points` G2 twist affine points, -/// each being `[u64; 16]` (128 bytes per point). -/// - `out_ptr` must point to a valid `[u64; 48]` (384 bytes) writable buffer for the GT result. -/// - `num_points` must correctly reflect the number of points in both arrays. -#[no_mangle] -pub unsafe extern "C" fn pairing_batch_bn254_c( - g1_ptr: *const u64, - g2_ptr: *const u64, - num_points: usize, - out_ptr: *mut u64, -) { - let g1_slice = core::slice::from_raw_parts(g1_ptr as *const [u64; 8], num_points); - let g2_slice = core::slice::from_raw_parts(g2_ptr as *const [u64; 16], num_points); - let result = pairing_batch_bn254(g1_slice, g2_slice); - - out_ptr.copy_from_nonoverlapping(result.as_ptr(), 48); +/// - `pairs` must point to an array of `num_pairs * 192` bytes +/// Each pair is: 64 bytes G1 point + 128 bytes G2 point +/// +/// # Returns +/// - 0 = pairing check passed +/// - 1 = pairing check failed +/// - 2 = G1 field element invalid +/// - 3 = G1 point not on curve +/// - 4 = G2 field element invalid +/// - 5 = G2 point not on curve +/// - 6 = G2 point not in subgroup +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_bn254_pairing_check_c")] +pub unsafe extern "C" fn bn254_pairing_check_c( + pairs: *const u8, + num_pairs: usize, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + // Parse all pairs + let mut g1_points: Vec<[u64; 8]> = Vec::with_capacity(num_pairs); + let mut g2_points: Vec<[u64; 16]> = Vec::with_capacity(num_pairs); + + for i in 0..num_pairs { + let pair_ptr = pairs.add(i * 192); + + let g1_bytes: &[u8; 64] = &*(pair_ptr as *const [u8; 64]); + let g2_bytes: &[u8; 128] = &*(pair_ptr.add(64) as *const [u8; 128]); + + g1_points.push(g1_bytes_be_to_u64_le_bn254(g1_bytes)); + g2_points.push(g2_bytes_be_to_u64_le_bn254(g2_bytes)); + } + + // Perform pairing check with validation + match pairing_check_bn254( + &g1_points, + &g2_points, + #[cfg(feature = "hints")] + hints, + ) { + Ok(true) => PAIRING_CHECK_SUCCESS, + Ok(false) => PAIRING_CHECK_FAILED, + Err(code) => code, + } } diff --git a/ziskos/entrypoint/src/zisklib/lib/bn254/twist.rs b/ziskos/entrypoint/src/zisklib/lib/bn254/twist.rs index a0f30fbe3..ef619e571 100644 --- a/ziskos/entrypoint/src/zisklib/lib/bn254/twist.rs +++ b/ziskos/entrypoint/src/zisklib/lib/bn254/twist.rs @@ -3,7 +3,7 @@ use crate::zisklib::lib::utils::eq; use super::{ - constants::{ETWISTED_B, E_B, FROBENIUS_GAMMA12, FROBENIUS_GAMMA13, IDENTITY_G2}, + constants::{ETWISTED_B, E_B, FROBENIUS_GAMMA12, FROBENIUS_GAMMA13, G2_IDENTITY}, fp2::{ add_fp2_bn254, conjugate_fp2_bn254, dbl_fp2_bn254, inv_fp2_bn254, mul_fp2_bn254, neg_fp2_bn254, scalar_mul_fp2_bn254, square_fp2_bn254, sub_fp2_bn254, @@ -11,43 +11,113 @@ use super::{ }; /// Check if a non-zero point `p` is on the BN254 twist -pub fn is_on_curve_twist_bn254(p: &[u64; 16]) -> bool { +pub fn is_on_curve_twist_bn254( + p: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // q in E' iff y² == x³ + 3 / (9 + u) let x: [u64; 8] = p[0..8].try_into().unwrap(); let y: [u64; 8] = p[8..16].try_into().unwrap(); - let x_sq = square_fp2_bn254(&x); - let x_cubed = mul_fp2_bn254(&x_sq, &x); - let x_cubed_plus_b = add_fp2_bn254(&x_cubed, &ETWISTED_B); - let y_sq = square_fp2_bn254(&y); + let x_sq = square_fp2_bn254( + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_cubed = mul_fp2_bn254( + &x_sq, + &x, + #[cfg(feature = "hints")] + hints, + ); + let x_cubed_plus_b = add_fp2_bn254( + &x_cubed, + &ETWISTED_B, + #[cfg(feature = "hints")] + hints, + ); + let y_sq = square_fp2_bn254( + &y, + #[cfg(feature = "hints")] + hints, + ); eq(&x_cubed_plus_b, &y_sq) } /// Check if a non-zero point `p` is on the BN254 twist subgroup -pub fn is_on_subgroup_twist_bn254(p: &[u64; 16]) -> bool { +pub fn is_on_subgroup_twist_bn254( + p: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { // p in subgroup iff: // (x+1)·Q + 𝜓(x·Q) + 𝜓²(x·Q) == 𝜓³((2x)·Q) // where 𝜓 is the Frobenius endomorphism // as described in https://eprint.iacr.org/2022/348.pdf - let xp: [u64; 16] = scalar_mul_by_x_twist_bn254(p); - let x1p = add_twist_bn254(p, &xp); - let psi_one = utf_endomorphism_twist_bn254(&xp); - let psi_two = utf_endomorphism_twist_bn254(&psi_one); - let mut lhs = add_twist_bn254(&x1p, &psi_one); - lhs = add_twist_bn254(&lhs, &psi_two); - - let mut rhs = dbl_twist_bn254(&xp); - rhs = utf_endomorphism_twist_bn254(&rhs); - rhs = utf_endomorphism_twist_bn254(&rhs); - rhs = utf_endomorphism_twist_bn254(&rhs); + let xp: [u64; 16] = scalar_mul_by_x_twist_bn254( + p, + #[cfg(feature = "hints")] + hints, + ); + let x1p = add_twist_bn254( + p, + &xp, + #[cfg(feature = "hints")] + hints, + ); + let psi_one = utf_endomorphism_twist_bn254( + &xp, + #[cfg(feature = "hints")] + hints, + ); + let psi_two = utf_endomorphism_twist_bn254( + &psi_one, + #[cfg(feature = "hints")] + hints, + ); + let mut lhs = add_twist_bn254( + &x1p, + &psi_one, + #[cfg(feature = "hints")] + hints, + ); + lhs = add_twist_bn254( + &lhs, + &psi_two, + #[cfg(feature = "hints")] + hints, + ); + + let mut rhs = dbl_twist_bn254( + &xp, + #[cfg(feature = "hints")] + hints, + ); + rhs = utf_endomorphism_twist_bn254( + &rhs, + #[cfg(feature = "hints")] + hints, + ); + rhs = utf_endomorphism_twist_bn254( + &rhs, + #[cfg(feature = "hints")] + hints, + ); + rhs = utf_endomorphism_twist_bn254( + &rhs, + #[cfg(feature = "hints")] + hints, + ); eq(&lhs, &rhs) } /// Converts a point `p` on the BN254 curve from Jacobian coordinates to affine coordinates -pub fn to_affine_twist_bn254(p: &[u64; 24]) -> [u64; 16] { +pub fn to_affine_twist_bn254( + p: &[u64; 24], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 16] { let z: [u64; 8] = p[16..24].try_into().unwrap(); if z == [0u64; 8] { - return IDENTITY_G2; + return G2_IDENTITY; } else if z == [1u64, 0, 0, 0, 0, 0, 0, 0] { return [ p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], @@ -58,13 +128,35 @@ pub fn to_affine_twist_bn254(p: &[u64; 24]) -> [u64; 16] { let x: [u64; 8] = p[0..8].try_into().unwrap(); let y: [u64; 8] = p[8..16].try_into().unwrap(); - let zinv = inv_fp2_bn254(&z); - let zinv_sq = square_fp2_bn254(&zinv); - - let x_res = mul_fp2_bn254(&x, &zinv_sq); - let mut y_res = mul_fp2_bn254(&y, &zinv_sq); - y_res = mul_fp2_bn254(&y_res, &zinv); - + let zinv = inv_fp2_bn254( + &z, + #[cfg(feature = "hints")] + hints, + ); + let zinv_sq = square_fp2_bn254( + &zinv, + #[cfg(feature = "hints")] + hints, + ); + + let x_res = mul_fp2_bn254( + &x, + &zinv_sq, + #[cfg(feature = "hints")] + hints, + ); + let mut y_res = mul_fp2_bn254( + &y, + &zinv_sq, + #[cfg(feature = "hints")] + hints, + ); + y_res = mul_fp2_bn254( + &y_res, + &zinv, + #[cfg(feature = "hints")] + hints, + ); [ x_res[0], x_res[1], x_res[2], x_res[3], x_res[4], x_res[5], x_res[6], x_res[7], y_res[0], y_res[1], y_res[2], y_res[3], y_res[4], y_res[5], y_res[6], y_res[7], @@ -72,7 +164,11 @@ pub fn to_affine_twist_bn254(p: &[u64; 24]) -> [u64; 16] { } /// Addition of two non-zero points -pub fn add_twist_bn254(p1: &[u64; 16], p2: &[u64; 16]) -> [u64; 16] { +pub fn add_twist_bn254( + p1: &[u64; 16], + p2: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 16] { let x1: [u64; 8] = p1[0..8].try_into().unwrap(); let y1: [u64; 8] = p1[8..16].try_into().unwrap(); let x2: [u64; 8] = p2[0..8].try_into().unwrap(); @@ -83,27 +179,78 @@ pub fn add_twist_bn254(p1: &[u64; 16], p2: &[u64; 16]) -> [u64; 16] { // Is y1 == y2? if eq(&y1, &y2) { // Compute the doubling - return dbl_twist_bn254(p1); + return dbl_twist_bn254( + p1, + #[cfg(feature = "hints")] + hints, + ); } else { // Points are the inverse of each other, return the point at infinity - return IDENTITY_G2; + return G2_IDENTITY; } } // Compute the addition - let mut den = sub_fp2_bn254(&x2, &x1); - den = inv_fp2_bn254(&den); - let mut lambda = sub_fp2_bn254(&y2, &y1); - lambda = mul_fp2_bn254(&lambda, &den); - - let mut x3 = square_fp2_bn254(&lambda); - x3 = sub_fp2_bn254(&x3, &x1); - x3 = sub_fp2_bn254(&x3, &x2); - - let mut y3 = sub_fp2_bn254(&x1, &x3); - y3 = mul_fp2_bn254(&lambda, &y3); - y3 = sub_fp2_bn254(&y3, &y1); - + let mut den = sub_fp2_bn254( + &x2, + &x1, + #[cfg(feature = "hints")] + hints, + ); + den = inv_fp2_bn254( + &den, + #[cfg(feature = "hints")] + hints, + ); + let mut lambda = sub_fp2_bn254( + &y2, + &y1, + #[cfg(feature = "hints")] + hints, + ); + lambda = mul_fp2_bn254( + &lambda, + &den, + #[cfg(feature = "hints")] + hints, + ); + + let mut x3 = square_fp2_bn254( + &lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bn254( + &x3, + &x1, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bn254( + &x3, + &x2, + #[cfg(feature = "hints")] + hints, + ); + + let mut y3 = sub_fp2_bn254( + &x1, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = mul_fp2_bn254( + &lambda, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = sub_fp2_bn254( + &y3, + &y1, + #[cfg(feature = "hints")] + hints, + ); [ x3[0], x3[1], x3[2], x3[3], x3[4], x3[5], x3[6], x3[7], y3[0], y3[1], y3[2], y3[3], y3[4], y3[5], y3[6], y3[7], @@ -111,24 +258,76 @@ pub fn add_twist_bn254(p1: &[u64; 16], p2: &[u64; 16]) -> [u64; 16] { } /// Doubling of a non-zero point -pub fn dbl_twist_bn254(p: &[u64; 16]) -> [u64; 16] { +pub fn dbl_twist_bn254(p: &[u64; 16], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 16] { let x: [u64; 8] = p[0..8].try_into().unwrap(); let y: [u64; 8] = p[8..16].try_into().unwrap(); // Compute the doubling - let mut lambda = dbl_fp2_bn254(&y); - lambda = inv_fp2_bn254(&lambda); - lambda = scalar_mul_fp2_bn254(&lambda, &E_B); - lambda = mul_fp2_bn254(&lambda, &x); - lambda = mul_fp2_bn254(&lambda, &x); - - let mut x3 = square_fp2_bn254(&lambda); - x3 = sub_fp2_bn254(&x3, &x); - x3 = sub_fp2_bn254(&x3, &x); - - let mut y3 = sub_fp2_bn254(&x, &x3); - y3 = mul_fp2_bn254(&lambda, &y3); - y3 = sub_fp2_bn254(&y3, &y); + let mut lambda = dbl_fp2_bn254( + &y, + #[cfg(feature = "hints")] + hints, + ); + lambda = inv_fp2_bn254( + &lambda, + #[cfg(feature = "hints")] + hints, + ); + lambda = scalar_mul_fp2_bn254( + &lambda, + &E_B, + #[cfg(feature = "hints")] + hints, + ); + lambda = mul_fp2_bn254( + &lambda, + &x, + #[cfg(feature = "hints")] + hints, + ); + lambda = mul_fp2_bn254( + &lambda, + &x, + #[cfg(feature = "hints")] + hints, + ); + + let mut x3 = square_fp2_bn254( + &lambda, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bn254( + &x3, + &x, + #[cfg(feature = "hints")] + hints, + ); + x3 = sub_fp2_bn254( + &x3, + &x, + #[cfg(feature = "hints")] + hints, + ); + + let mut y3 = sub_fp2_bn254( + &x, + &x3, + #[cfg(feature = "hints")] + hints, + ); + y3 = mul_fp2_bn254( + &lambda, + &y3, + #[cfg(feature = "hints")] + hints, + ); + y3 = sub_fp2_bn254( + &y3, + &y, + #[cfg(feature = "hints")] + hints, + ); [ x3[0], x3[1], x3[2], x3[3], x3[4], x3[5], x3[6], x3[7], y3[0], y3[1], y3[2], y3[3], y3[4], @@ -137,12 +336,16 @@ pub fn dbl_twist_bn254(p: &[u64; 16]) -> [u64; 16] { } /// Negation of a point -pub fn neg_twist_bn254(p: &[u64; 16]) -> [u64; 16] { +pub fn neg_twist_bn254(p: &[u64; 16], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 16] { let x: [u64; 8] = p[0..8].try_into().unwrap(); let y: [u64; 8] = p[8..16].try_into().unwrap(); // Compute the negation - let y_neg = neg_fp2_bn254(&y); + let y_neg = neg_fp2_bn254( + &y, + #[cfg(feature = "hints")] + hints, + ); [ x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], y_neg[0], y_neg[1], y_neg[2], y_neg[3], y_neg[4], y_neg[5], y_neg[6], y_neg[7], @@ -150,7 +353,10 @@ pub fn neg_twist_bn254(p: &[u64; 16]) -> [u64; 16] { } /// Scalar multiplication of a non-zero point by x -pub fn scalar_mul_by_x_twist_bn254(p: &[u64; 16]) -> [u64; 16] { +pub fn scalar_mul_by_x_twist_bn254( + p: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 16] { // Binary representation of the exponent x = 4965661367192848881 in big-endian format const X_BIN_BE: [u8; 63] = [ 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, @@ -160,26 +366,56 @@ pub fn scalar_mul_by_x_twist_bn254(p: &[u64; 16]) -> [u64; 16] { let mut q = *p; for &bit in X_BIN_BE.iter().skip(1) { - q = dbl_twist_bn254(&q); + q = dbl_twist_bn254( + &q, + #[cfg(feature = "hints")] + hints, + ); if bit == 1 { - q = add_twist_bn254(&q, p); + q = add_twist_bn254( + &q, + p, + #[cfg(feature = "hints")] + hints, + ); } } q } /// Compute the untwist-frobenius-twist (utf) endomorphism 𝜓: (x,y) = (𝛾₁₂·x̄,𝛾₁₃·ȳ) -pub fn utf_endomorphism_twist_bn254(p: &[u64; 16]) -> [u64; 16] { +pub fn utf_endomorphism_twist_bn254( + p: &[u64; 16], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 16] { let mut x: [u64; 8] = p[0..8].try_into().unwrap(); let mut y: [u64; 8] = p[8..16].try_into().unwrap(); // Compute the conjugate of x and y - x = conjugate_fp2_bn254(&x); - y = conjugate_fp2_bn254(&y); + x = conjugate_fp2_bn254( + &x, + #[cfg(feature = "hints")] + hints, + ); + y = conjugate_fp2_bn254( + &y, + #[cfg(feature = "hints")] + hints, + ); // Compute the multiplication - let qx = mul_fp2_bn254(&FROBENIUS_GAMMA12, &x); - let qy = mul_fp2_bn254(&FROBENIUS_GAMMA13, &y); + let qx = mul_fp2_bn254( + &FROBENIUS_GAMMA12, + &x, + #[cfg(feature = "hints")] + hints, + ); + let qy = mul_fp2_bn254( + &FROBENIUS_GAMMA13, + &y, + #[cfg(feature = "hints")] + hints, + ); [ qx[0], qx[1], qx[2], qx[3], qx[4], qx[5], qx[6], qx[7], qy[0], qy[1], qy[2], qy[3], qy[4], @@ -187,44 +423,37 @@ pub fn utf_endomorphism_twist_bn254(p: &[u64; 16]) -> [u64; 16] { ] } -/// # Safety -/// `p_ptr` must point to a valid `[u64; 16]` (128 bytes, affine G2 twist point). -#[no_mangle] -pub unsafe extern "C" fn is_on_curve_twist_bn254_c(p_ptr: *const u64) -> bool { - let p = unsafe { &*(p_ptr as *const [u64; 16]) }; - is_on_curve_twist_bn254(p) -} +/// Convert 128-byte big-endian G2 point to [u64; 16] little-endian +pub fn g2_bytes_be_to_u64_le_bn254(bytes: &[u8; 128]) -> [u64; 16] { + let mut result = [0u64; 16]; -/// # Safety -/// `p_ptr` must point to a valid `[u64; 16]` (128 bytes, affine G2 twist point). -#[no_mangle] -pub unsafe extern "C" fn is_on_subgroup_twist_bn254_c(p_ptr: *const u64) -> bool { - let p = unsafe { &*(p_ptr as *const [u64; 16]) }; - is_on_subgroup_twist_bn254(p) -} + // x_i (bytes 0-31) -> result[4..8] + for i in 0..4 { + for j in 0..8 { + result[7 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // x_r (bytes 32-63) -> result[0..4] + for i in 0..4 { + for j in 0..8 { + result[3 - i] |= (bytes[32 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // y_i (bytes 64-95) -> result[12..16] + for i in 0..4 { + for j in 0..8 { + result[15 - i] |= (bytes[64 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } + + // y_r (bytes 96-127) -> result[8..12] + for i in 0..4 { + for j in 0..8 { + result[11 - i] |= (bytes[96 + i * 8 + j] as u64) << (8 * (7 - j)); + } + } -/// # Safety -/// - `p_ptr` must point to a valid `[u64; 24]` (192 bytes, Jacobian G2 twist point). -/// - `out_ptr` must point to a valid `[u64; 16]` (128 bytes) writable buffer. -#[no_mangle] -pub unsafe extern "C" fn to_affine_twist_bn254_c(p_ptr: *const u64, out_ptr: *mut u64) { - let p = unsafe { &*(p_ptr as *const [u64; 24]) }; - let result = to_affine_twist_bn254(p); - - *out_ptr.add(0) = result[0]; - *out_ptr.add(1) = result[1]; - *out_ptr.add(2) = result[2]; - *out_ptr.add(3) = result[3]; - *out_ptr.add(4) = result[4]; - *out_ptr.add(5) = result[5]; - *out_ptr.add(6) = result[6]; - *out_ptr.add(7) = result[7]; - *out_ptr.add(8) = result[8]; - *out_ptr.add(9) = result[9]; - *out_ptr.add(10) = result[10]; - *out_ptr.add(11) = result[11]; - *out_ptr.add(12) = result[12]; - *out_ptr.add(13) = result[13]; - *out_ptr.add(14) = result[14]; - *out_ptr.add(15) = result[15]; + result } diff --git a/ziskos/entrypoint/src/zisklib/lib/constants.rs b/ziskos/entrypoint/src/zisklib/lib/constants.rs new file mode 100644 index 000000000..eaeae37b0 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/constants.rs @@ -0,0 +1,8 @@ +/// Zero in 256-bit representation +pub const ZERO_256: [u64; 4] = [0, 0, 0, 0]; + +/// One in 256-bit representation +pub const ONE_256: [u64; 4] = [1, 0, 0, 0]; + +/// Two in 256-bit representation +pub const TWO_256: [u64; 4] = [2, 0, 0, 0]; diff --git a/ziskos/entrypoint/src/zisklib/lib/keccak256.rs b/ziskos/entrypoint/src/zisklib/lib/keccak256.rs new file mode 100644 index 000000000..2ea327fa8 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/keccak256.rs @@ -0,0 +1,168 @@ +use crate::syscalls::syscall_keccak_f; + +#[cfg(zisk_hints_debug)] +use std::os::raw::c_char; + +#[cfg(all(not(all(target_os = "zkvm", target_vendor = "zisk")), zisk_hints))] +extern "C" { + fn hint_keccak256(input_ptr: *const u8, input_len: usize); +} + +#[cfg(all(not(all(target_os = "zkvm", target_vendor = "zisk")), zisk_hints_debug))] +extern "C" { + fn hint_log_c(msg: *const c_char); +} + +#[cfg(zisk_hints_debug)] +pub fn hint_log>(msg: S) { + // On native we call external C function to log hints, since it controls if hints are paused or not + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + use std::ffi::CString; + + if let Ok(c) = CString::new(msg.as_ref()) { + unsafe { hint_log_c(c.as_ptr()) }; + } + } + // On zkvm/zisk, we can just print directly + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + println!("{}", msg.as_ref()); + } +} + +/// Keccak-256 rate in bytes (1600 - 2*256) / 8 = 136 bytes +const KECCAK256_RATE: usize = 136; + +/// Keccak-256 hash function. For reference: https://keccak.team/keccak_specs_summary.html +pub fn keccak256(input: &[u8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u8; 32] { + let mut state = [0u64; 25]; + let input_len = input.len(); + + // Absorb phase: process complete rate-sized blocks + let mut offset = 0; + while offset + KECCAK256_RATE <= input_len { + // XOR block into state + xor_block_into_state(&mut state, &input[offset..offset + KECCAK256_RATE]); + // Apply Keccak-f permutation + unsafe { + syscall_keccak_f( + &mut state, + #[cfg(feature = "hints")] + hints, + ); + } + offset += KECCAK256_RATE; + } + + // Handle final block with padding + let remaining = input_len - offset; + let mut final_block = [0u8; KECCAK256_RATE]; + + // Copy remaining bytes + final_block[..remaining].copy_from_slice(&input[offset..]); + + // Keccak padding: append 0x01, then zeros, then 0x80 at the end of the rate + // For Keccak-256: domain separator is 0x01 + final_block[remaining] = 0x01; + final_block[KECCAK256_RATE - 1] |= 0x80; + + // XOR final padded block into state + xor_block_into_state(&mut state, &final_block); + + // Final permutation + unsafe { + syscall_keccak_f( + &mut state, + #[cfg(feature = "hints")] + hints, + ); + } + + // Squeeze phase: extract first 32 bytes (256 bits) from state + let mut result = [0u8; 32]; + let state_bytes: &[u8; 200] = unsafe { &*(&state as *const [u64; 25] as *const [u8; 200]) }; + result.copy_from_slice(&state_bytes[..32]); + + result +} + +/// XOR a rate-sized block into the state (first 136 bytes = 17 u64 words) +#[inline] +fn xor_block_into_state(state: &mut [u64; 25], block: &[u8]) { + // XOR block bytes into state, interpreting as little-endian u64s + for i in 0..KECCAK256_RATE / 8 { + let word = u64::from_le_bytes(block[i * 8..(i + 1) * 8].try_into().unwrap()); + state[i] ^= word; + } +} + +/// C-compatible wrapper for Keccak-256 hash +/// +/// This is the function that `alloy-primitives` will call when the `native-keccak` feature is enabled. +/// +/// # Safety +/// - `input` must point to at least `input_len` bytes +/// - `output` must point to a writable buffer of at least 32 bytes +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_keccak256_c")] +pub unsafe extern "C" fn keccak256_c( + input: *const u8, + input_len: usize, + output: *mut u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + let input_slice = core::slice::from_raw_parts(input, input_len); + let hash = keccak256( + input_slice, + #[cfg(feature = "hints")] + hints, + ); + let output_slice = core::slice::from_raw_parts_mut(output, 32); + output_slice.copy_from_slice(&hash); +} + +/// Native keccak256 implementation for external callers +/// +/// # Safety +/// - `bytes` must point to at least `len` bytes +/// - `output` must point to a writable buffer of at least 32 bytes +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_native_keccak256_c")] +pub unsafe extern "C" fn native_keccak256(bytes: *const u8, len: usize, output: *mut u8) { + #[cfg(zisk_hints)] + hint_keccak256(bytes, len); + + #[cfg(zisk_hints_debug)] + { + let input_bytes = unsafe { core::slice::from_raw_parts(bytes, len) }; + hint_log(format!("hint_keccak256 (bytes: {:?}, len: {})", input_bytes, len)); + } + + #[cfg(all(target_os = "zkvm", target_vendor = "zisk"))] + { + keccak256_c( + bytes, + len, + output, + #[cfg(feature = "hints")] + hints, + ); + } + + #[cfg(not(all(target_os = "zkvm", target_vendor = "zisk")))] + { + use tiny_keccak::{Hasher, Keccak}; + const OUT_LEN: usize = 32; + + let (input_bytes, out) = unsafe { + let input_bytes = core::slice::from_raw_parts(bytes, len); + let out = core::slice::from_raw_parts_mut(output, OUT_LEN); + (input_bytes, out) + }; + + let mut hasher = Keccak::v256(); + hasher.update(input_bytes); + hasher.finalize(out); + } +} diff --git a/ziskos/entrypoint/src/zisklib/lib/mod.rs b/ziskos/entrypoint/src/zisklib/lib/mod.rs index 0e70a9a4a..ed0a23168 100644 --- a/ziskos/entrypoint/src/zisklib/lib/mod.rs +++ b/ziskos/entrypoint/src/zisklib/lib/mod.rs @@ -1,16 +1,22 @@ mod array_lib; -mod bigint256; +mod blake2b; mod bls12_381; mod bn254; +mod constants; +mod keccak256; mod secp256k1; -mod sha256f_compress; +mod secp256r1; +mod sha256; mod utils; // For public consumption pub use array_lib::*; -pub use bigint256::*; +pub use blake2b::*; pub use bls12_381::*; pub use bn254::*; +pub use constants::*; +pub use keccak256::*; pub use secp256k1::*; -pub use sha256f_compress::*; +pub use secp256r1::*; +pub use sha256::*; pub use utils::*; diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256k1/constants.rs b/ziskos/entrypoint/src/zisklib/lib/secp256k1/constants.rs index 8cc082e21..99520707f 100644 --- a/ziskos/entrypoint/src/zisklib/lib/secp256k1/constants.rs +++ b/ziskos/entrypoint/src/zisklib/lib/secp256k1/constants.rs @@ -6,7 +6,6 @@ pub const E_B: [u64; 4] = [0x7, 0, 0, 0]; /// Secp256k1 base field size pub const P: [u64; 4] = [0xFFFFFFFEFFFFFC2F, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF]; -pub const P_MINUS_ONE: [u64; 4] = [P[0] - 1, P[1], P[2], P[3]]; /// A known non-quadratic residue in Fp pub const NQR: [u64; 4] = [3, 0, 0, 0]; @@ -16,8 +15,13 @@ pub const N: [u64; 4] = [0xBFD25E8CD0364141, 0xBAAEDCE6AF48A03B, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF]; pub const N_MINUS_ONE: [u64; 4] = [N[0] - 1, N[1], N[2], N[3]]; +/// Secp256k1 group identity point +pub const IDENTITY_X: [u64; 4] = [0; 4]; +pub const IDENTITY_Y: [u64; 4] = [0; 4]; + /// Secp256k1 group of points generator pub const G_X: [u64; 4] = [0x59F2815B16F81798, 0x029BFCDB2DCE28D9, 0x55A06295CE870B07, 0x79BE667EF9DCBBAC]; pub const G_Y: [u64; 4] = [0x9C47D08FFB10D4B8, 0xFD17B448A6855419, 0x5DA4FBFC0E1108A8, 0x483ADA7726A3C465]; +pub const G: [u64; 8] = [G_X[0], G_X[1], G_X[2], G_X[3], G_Y[0], G_Y[1], G_Y[2], G_Y[3]]; diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256k1/curve.rs b/ziskos/entrypoint/src/zisklib/lib/secp256k1/curve.rs index 43c600604..05ddae115 100644 --- a/ziskos/entrypoint/src/zisklib/lib/secp256k1/curve.rs +++ b/ziskos/entrypoint/src/zisklib/lib/secp256k1/curve.rs @@ -2,120 +2,217 @@ use crate::{ syscalls::{ syscall_secp256k1_add, syscall_secp256k1_dbl, SyscallPoint256, SyscallSecp256k1AddParams, }, - zisklib::{eq, fcall_msb_pos_256}, + zisklib::{eq, fcall_msb_pos_256, fcall_msb_pos_256_3, is_one, ONE_256, TWO_256, ZERO_256}, }; use super::{ - constants::{E_B, G_X, G_Y}, - field::{ - secp256k1_fp_add, secp256k1_fp_inv, secp256k1_fp_mul, secp256k1_fp_sqrt, - secp256k1_fp_square, - }, - scalar::{secp256k1_fn_inv, secp256k1_fn_mul, secp256k1_fn_reduce}, + constants::{E_B, G, G_X, G_Y, IDENTITY_X, IDENTITY_Y}, + field::{secp256k1_fp_add, secp256k1_fp_mul, secp256k1_fp_sqrt, secp256k1_fp_square}, + scalar::secp256k1_fn_sub, }; -/// Converts a non-zero point `p` on the Secp256k1 curve from projective coordinates to affine coordinates -pub fn secp256k1_to_affine(p: &[u64; 12]) -> [u64; 8] { - let z: [u64; 4] = p[8..12].try_into().unwrap(); - - // Point at infinity cannot be converted to affine - debug_assert!(z != [0u64; 4], "Cannot convert point at infinity to affine"); - - let zinv = secp256k1_fp_inv(&z); - let zinv_sq = secp256k1_fp_square(&zinv); - - let x: [u64; 4] = p[0..4].try_into().unwrap(); - let y: [u64; 4] = p[4..8].try_into().unwrap(); - - let x_res = secp256k1_fp_mul(&x, &zinv_sq); - let mut y_res = secp256k1_fp_mul(&y, &zinv_sq); - y_res = secp256k1_fp_mul(&y_res, &zinv); - - [x_res[0], x_res[1], x_res[2], x_res[3], y_res[0], y_res[1], y_res[2], y_res[3]] -} - -/// Checks if two points `p1` and `p2` on the Secp256k1 curve in projective coordinates are equal -pub fn secp256k1_eq_projective(p1: &[u64; 12], p2: &[u64; 12]) -> bool { - // In essence given two points in projective form p1 = (x₁z₁,y₁z₁,z₁) and p2 = (x₂z₂,y₂z₂,z₂) - // We can simply multiply p1 by z2 and p2 by z1 to get tuples: - // p1 = (x₁z₁z₂,y₁z₁z₂,z₁z₂) and p2 = (x₂z₂z₁,y₂z₂z₁,z₂z₁) - // So we can compare the two points by checking if (x₁z₁)z₂ == (x₂z₁)z₂ and (y₁z₂)z₁ == (y₂z₂)z₁ - let x1 = p1[0..4].try_into().unwrap(); - let y1 = p1[4..8].try_into().unwrap(); - let z1 = p1[8..12].try_into().unwrap(); - let x2 = p2[0..4].try_into().unwrap(); - let y2 = p2[4..8].try_into().unwrap(); - let z2 = p2[8..12].try_into().unwrap(); - - let lhs_x = secp256k1_fp_mul(x1, z2); - let rhs_x = secp256k1_fp_mul(x2, z1); - if !eq(&lhs_x, &rhs_x) { - return false; - } +const IDENTITY_POINT256: SyscallPoint256 = SyscallPoint256 { x: IDENTITY_X, y: IDENTITY_Y }; - let lhs_y = secp256k1_fp_mul(y1, z2); - let rhs_y = secp256k1_fp_mul(y2, z1); - if !eq(&lhs_y, &rhs_y) { - return false; - } - - true -} - -/// Given a x-coordinate `x_bytes` and a parity `y_is_odd`, -/// this function decompresses the point on the secp256k1 curve. -pub fn secp256k1_decompress(x_bytes: &[u8; 32], y_is_odd: bool) -> (([u64; 4], [u64; 4]), bool) { - // Convert the x-coordinate from BEu8 to LEu64 - let mut x = [0u64; 4]; - for i in 0..32 { - x[3 - i / 8] |= (x_bytes[i] as u64) << (8 * (7 - (i % 8))); - } +const G_POINT256: SyscallPoint256 = SyscallPoint256 { x: G_X, y: G_Y }; +/// Given a x-coordinate and a parity bit, returns the corresponding point (x, y) on the curve if it exists +pub fn secp256k1_lift_x( + x: &[u64; 4], + y_is_odd: bool, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 8], bool> { // Calculate the y-coordinate of the point: y = sqrt(x³ + 7) - let x_sq = secp256k1_fp_square(&x); - let x_cb = secp256k1_fp_mul(&x_sq, &x); - let y_sq = secp256k1_fp_add(&x_cb, &E_B); - let (y, has_sqrt) = secp256k1_fp_sqrt(&y_sq, y_is_odd as u64); + let x_sq = secp256k1_fp_square( + x, + #[cfg(feature = "hints")] + hints, + ); + let x_cb = secp256k1_fp_mul( + &x_sq, + x, + #[cfg(feature = "hints")] + hints, + ); + let y_sq = secp256k1_fp_add( + &x_cb, + &E_B, + #[cfg(feature = "hints")] + hints, + ); + let (y, has_sqrt) = secp256k1_fp_sqrt( + &y_sq, + y_is_odd as u64, + #[cfg(feature = "hints")] + hints, + ); + if !has_sqrt { - return (([0u64; 4], [0u64; 4]), false); + return Err(false); } // Check the received parity of the y-coordinate is correct let parity = (y[0] & 1) != 0; assert_eq!(parity, y_is_odd); - ((x, y), true) + Ok([x[0], x[1], x[2], x[3], y[0], y[1], y[2], y[3]]) } -/// Given points `p1` and `p2`, performs the point addition `p1 + p2` and assigns the result to `p1`. -/// It assumes that `p1` and `p2` are from the Secp256k1 curve, that `p1,p2 != 𝒪` and that `p2 != p1,-p1` -fn add_points_assign(p1: &mut SyscallPoint256, p2: &SyscallPoint256) { - let mut params = SyscallSecp256k1AddParams { p1, p2 }; - syscall_secp256k1_add(&mut params); -} +/// Checks whether the given point `p` is on the Secp256k1 curve. +/// It assumes that `p` is not the point at infinity. +pub fn secp256k1_is_on_curve(p: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> bool { + let x: [u64; 4] = p[0..4].try_into().unwrap(); + let y: [u64; 4] = p[4..8].try_into().unwrap(); -/// Given a point `p1`, performs the point doubling `2·p1` and assigns the result to `p1`. -/// It assumes that `p1` is from the Secp256k1 curve and that `p1 != 𝒪` -/// -/// Note: We don't need to assume that 2·p1 != 𝒪 because there are not points of order 2 on the Secp256k1 curve -fn double_point_assign(p1: &mut SyscallPoint256) { - syscall_secp256k1_dbl(p1); + // p in E iff y² == x³ + 7 + let lhs = secp256k1_fp_square( + &y, + #[cfg(feature = "hints")] + hints, + ); + let mut rhs = secp256k1_fp_square( + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = secp256k1_fp_mul( + &rhs, + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = secp256k1_fp_add( + &rhs, + &E_B, + #[cfg(feature = "hints")] + hints, + ); + eq(&lhs, &rhs) } /// Given points `p1` and `p2`, performs the point addition `p1 + p2` and assigns the result to `p1`. -/// It assumes that `p1` and `p2` are from the Secp256k1 curve, that `p2 != 𝒪` -fn add_points_complete_assign( +/// It assumes that `p1` and `p2` are from the Secp256k1 curve, that `p1,p2 != 𝒪` +/// Returns true if the result is the point at infinity. +#[inline] +fn secp256k1_add_non_infinity_points( p1: &mut SyscallPoint256, - p1_is_infinity: &mut bool, p2: &SyscallPoint256, -) { + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { if p1.x != p2.x { - add_points_assign(p1, p2); + let mut params = SyscallSecp256k1AddParams { p1, p2 }; + syscall_secp256k1_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + false } else if p1.y == p2.y { - double_point_assign(p1); + syscall_secp256k1_dbl( + p1, + #[cfg(feature = "hints")] + hints, + ); + false } else { - *p1_is_infinity = true; + // p1 + (-p1) = 𝒪 + true + } +} + +/// Given a non-infinity point `p` and a scalar `k`, computes the scalar multiplication `k·p` +/// +/// Note: There are no (non-infinity) points of order 2 in Secp256k1. +/// All (non-infinity) points are of prime order N. +pub fn secp256k1_scalar_mul( + k: &[u64; 4], + p: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Option<[u64; 8]> { + // Direct cases: k = 0, k = 1, k = 2 + if eq(k, &ZERO_256) { + return None; + } else if eq(k, &ONE_256) { + return Some(*p); + } else if eq(k, &TWO_256) { + let mut res = SyscallPoint256 { x: [p[0], p[1], p[2], p[3]], y: [p[4], p[5], p[6], p[7]] }; + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + return Some([ + res.x[0], res.x[1], res.x[2], res.x[3], res.y[0], res.y[1], res.y[2], res.y[3], + ]); + } + // We can assume k > 2 from now on + + // Hint the length the binary representations of k + // We will verify the output by recomposing k + // Moreover, we should check that the first received bit is 1 + let (max_limb, max_bit) = fcall_msb_pos_256( + k, + &ZERO_256, + #[cfg(feature = "hints")] + hints, + ); + + // Perform the loop, based on the binary representation of k + + // We do the first iteration separately + let max_limb = max_limb as usize; + let max_bit = max_bit as usize; + + // The first received bit should be 1 + assert_eq!((k[max_limb] >> max_bit) & 1, 1); + + // Start at P + let mut res = SyscallPoint256 { x: [p[0], p[1], p[2], p[3]], y: [p[4], p[5], p[6], p[7]] }; + let mut k_rec = ZERO_256; + k_rec[max_limb] = 1 << max_bit; + + // Determine starting limb/bit for the loop + let mut limb = max_limb; + let mut bit = if max_bit == 0 { + // If max_bit is 0 then limb > 0; otherwise k = 1, which is excluded here + limb -= 1; + 63 + } else { + max_bit - 1 + }; + + // Perform the rest of the loop + let p = SyscallPoint256 { x: [p[0], p[1], p[2], p[3]], y: [p[4], p[5], p[6], p[7]] }; + for i in (0..=limb).rev() { + for j in (0..=bit).rev() { + // Always double + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + + // Get the next bit b of k. + // If b == 1, we should add P + if ((k[i] >> j) & 1) == 1 { + let mut params = SyscallSecp256k1AddParams { p1: &mut res, p2: &p }; + syscall_secp256k1_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + + // Reconstruct k + k_rec[i] |= 1 << j; + } + } + bit = 63; } + + // Check that the reconstructed k is equal to the input k + assert!(eq(&k_rec, k)); + + // Convert the result back to a single array + Some([res.x[0], res.x[1], res.x[2], res.x[3], res.y[0], res.y[1], res.y[2], res.y[3]]) } /// Given a point `p` and scalars `k1` and `k2`, computes the double scalar multiplication `k1·G + k2·p` @@ -123,24 +220,51 @@ fn add_points_complete_assign( pub fn secp256k1_double_scalar_mul_with_g( k1: &[u64; 4], k2: &[u64; 4], - p: &SyscallPoint256, -) -> (bool, SyscallPoint256) { + p: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Option<[u64; 8]> { + let p = SyscallPoint256 { x: [p[0], p[1], p[2], p[3]], y: [p[4], p[5], p[6], p[7]] }; + // Start by precomputing g + p - let mut gp = SyscallPoint256 { x: G_X, y: G_Y }; - let mut gp_is_infinity = false; - add_points_complete_assign(&mut gp, &mut gp_is_infinity, p); - - let one = [1u64, 0, 0, 0]; - if *k1 == one && *k2 == one { - // Return G + p - return (gp_is_infinity, gp); + let mut gp = G_POINT256; + let gp_is_infinity = secp256k1_add_non_infinity_points( + &mut gp, + &p, + #[cfg(feature = "hints")] + hints, + ); + + // If G + P = 𝒪 => P = -G and therefore the operation is k1·G + (-k2)·G = (k1-k2)·G + // Fall back to scalar mul + if gp_is_infinity { + return secp256k1_scalar_mul( + &secp256k1_fn_sub( + k1, + k2, + #[cfg(feature = "hints")] + hints, + ), + &G, + #[cfg(feature = "hints")] + hints, + ); + } + + if is_one(k1) && is_one(k2) { + // Return g + p + return Some([gp.x[0], gp.x[1], gp.x[2], gp.x[3], gp.y[0], gp.y[1], gp.y[2], gp.y[3]]); } // From here on, at least one of k1 or k2 is greater than 1 // Hint the maximum length between the binary representations of k1 and k2 // We will verify the output by recomposing both k1 and k2 // Moreover, we should check that the first received bit (of either k1 or k2) is 1 - let (max_limb, max_bit) = fcall_msb_pos_256(k1, k2); + let (max_limb, max_bit) = fcall_msb_pos_256( + k1, + k2, + #[cfg(feature = "hints")] + hints, + ); // Perform the loop, based on the binary representation of k1 and k2 @@ -154,59 +278,44 @@ pub fn secp256k1_double_scalar_mul_with_g( assert!(k1_bit == 1 || k2_bit == 1); // Start at 𝒪 - let mut res = SyscallPoint256 { x: [0u64; 4], y: [0u64; 4] }; + let mut res = IDENTITY_POINT256; let mut res_is_infinity = true; - let mut k1_rec = [0u64; 4]; - let mut k2_rec = [0u64; 4]; - if (k1_bit == 0) && (k2_bit == 1) { - // If res is 𝒪, set res = p; otherwise, double res and add p - if res_is_infinity { + let mut k1_rec = ZERO_256; + let mut k2_rec = ZERO_256; + + // Three cases based on the bits of k1 and k2 + match (k1_bit, k2_bit) { + (0, 1) => { + // Set res = p res.x = p.x; res.y = p.y; res_is_infinity = false; - } else { - double_point_assign(&mut res); - add_points_complete_assign(&mut res, &mut res_is_infinity, p); - } - // Update k2_rec - k2_rec[max_limb] |= 1 << max_bit; - } else if (k1_bit == 1) && (k2_bit == 0) { - // If res is 𝒪, set res = g; otherwise, double res and add g - if res_is_infinity { - res.x = G_X; - res.y = G_Y; - res_is_infinity = false; - } else { - double_point_assign(&mut res); - add_points_complete_assign( - &mut res, - &mut res_is_infinity, - &SyscallPoint256 { x: G_X, y: G_Y }, - ); + // Update k2_rec + k2_rec[max_limb] = 1 << max_bit; } + (1, 0) => { + // Set res = g + res.x = G_POINT256.x; + res.y = G_POINT256.y; + res_is_infinity = false; - // Update k1_rec - k1_rec[max_limb] |= 1 << max_bit; - } else if (k1_bit == 1) && (k2_bit == 1) { - if res_is_infinity { - // If (g + p) is 𝒪, do nothing; otherwise set res = (g + p) + // Update k1_rec + k1_rec[max_limb] = 1 << max_bit; + } + (1, 1) => { + // Set res = g + p if not infinity if !gp_is_infinity { res.x = gp.x; res.y = gp.y; res_is_infinity = false; } - } else { - // If (g + p) is 𝒪, simply double res; otherwise double res and add (g + p) - double_point_assign(&mut res); - if !gp_is_infinity { - add_points_complete_assign(&mut res, &mut res_is_infinity, &gp); - } - } - // Update k1_rec and k2_rec - k1_rec[max_limb] |= 1 << max_bit; - k2_rec[max_limb] |= 1 << max_bit; + // Update k1_rec and k2_rec + k1_rec[max_limb] = 1 << max_bit; + k2_rec[max_limb] = 1 << max_bit; + } + _ => unreachable!(), } // Determine starting limb/bit for the loop @@ -225,199 +334,511 @@ pub fn secp256k1_double_scalar_mul_with_g( let k1_bit = (k1[i] >> j) & 1; let k2_bit = (k2[i] >> j) & 1; - if (k1_bit == 0) && (k2_bit == 0) { - // If res is 𝒪, do nothing; otherwise, double - if !res_is_infinity { - double_point_assign(&mut res); - } - } else if (k1_bit == 0) && (k2_bit == 1) { - // If res is 𝒪, set res = p; otherwise, double res and add p - if res_is_infinity { - res.x = p.x; - res.y = p.y; - res_is_infinity = false; - } else { - double_point_assign(&mut res); - add_points_complete_assign(&mut res, &mut res_is_infinity, p); + // Four cases based on the bits of k1 and k2 + match (k1_bit, k2_bit) { + (0, 0) => { + // If res is 𝒪, do nothing; otherwise, double + if !res_is_infinity { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + } } + (0, 1) => { + // If res is 𝒪, set res = p; otherwise, double res and add p + if res_is_infinity { + res.x = p.x; + res.y = p.y; + res_is_infinity = false; + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &p, + #[cfg(feature = "hints")] + hints, + ); + } - // Update k2_rec - k2_rec[i] |= 1 << j; - } else if (k1_bit == 1) && (k2_bit == 0) { - // If res is 𝒪, set res = g; otherwise, double res and add g - if res_is_infinity { - res.x = G_X; - res.y = G_Y; - res_is_infinity = false; - } else { - double_point_assign(&mut res); - add_points_complete_assign( - &mut res, - &mut res_is_infinity, - &SyscallPoint256 { x: G_X, y: G_Y }, - ); + // Update k2_rec + k2_rec[i] |= 1 << j; } - - // Update k1_rec - k1_rec[i] |= 1 << j; - } else if (k1_bit == 1) && (k2_bit == 1) { - if res_is_infinity { - // If (g + p) is 𝒪, do nothing; otherwise set res = (g + p) - if !gp_is_infinity { - res.x = gp.x; - res.y = gp.y; + (1, 0) => { + // If res is 𝒪, set res = g; otherwise, double res and add g + if res_is_infinity { + res.x = G_POINT256.x; + res.y = G_POINT256.y; res_is_infinity = false; + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &G_POINT256, + #[cfg(feature = "hints")] + hints, + ); } - } else { - // If (g + p) is 𝒪, simply double res; otherwise double res and add (g + p) - double_point_assign(&mut res); - if !gp_is_infinity { - add_points_complete_assign(&mut res, &mut res_is_infinity, &gp); - } + + // Update k1_rec + k1_rec[i] |= 1 << j; } + (1, 1) => { + // If res is 𝒪, set res = g + p if not infinity; otherwise, double res and add (g + p) + if res_is_infinity { + if !gp_is_infinity { + res.x = gp.x; + res.y = gp.y; + res_is_infinity = false; + } + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !gp_is_infinity { + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &gp, + #[cfg(feature = "hints")] + hints, + ); + } + } - // Update k1_rec and k2_rec - k1_rec[i] |= 1 << j; - k2_rec[i] |= 1 << j; + // Update k1_rec and k2_rec + k1_rec[i] |= 1 << j; + k2_rec[i] |= 1 << j; + } + _ => unreachable!(), } } bit = 63; } // Check that the recomposed scalars are the same as the received scalars - assert_eq!(k1_rec, *k1); - assert_eq!(k2_rec, *k2); + assert!(eq(&k1_rec, k1)); + assert!(eq(&k2_rec, k2)); - (res_is_infinity, res) + if res_is_infinity { + None + } else { + Some([res.x[0], res.x[1], res.x[2], res.x[3], res.y[0], res.y[1], res.y[2], res.y[3]]) + } } -pub fn secp256k1_ecdsa_verify( - pk: &SyscallPoint256, - z: &[u64; 4], +/// Given two points `p` and `q` and scalars `r`, `s`, and `t`, computes the triple scalar multiplication `r·g + s·p + t·q` +/// It assumes that `r,s,t ∈ [1, N-1]` and that `p,q != 𝒪` +pub fn secp256k1_triple_scalar_mul_with_g( r: &[u64; 4], s: &[u64; 4], -) -> bool { - let s_inv = secp256k1_fn_inv(s); - - let u1 = secp256k1_fn_mul(z, &s_inv); - let u2 = secp256k1_fn_mul(r, &s_inv); + t: &[u64; 4], + p: &[u64; 8], + q: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Option<[u64; 8]> { + let p = SyscallPoint256 { x: [p[0], p[1], p[2], p[3]], y: [p[4], p[5], p[6], p[7]] }; + let q = SyscallPoint256 { x: [q[0], q[1], q[2], q[3]], y: [q[4], q[5], q[6], q[7]] }; + + // Precompute g + p, g + q, p + q, g + p + q + let mut gp = G_POINT256; + let gp_is_infinity = secp256k1_add_non_infinity_points( + &mut gp, + &p, + #[cfg(feature = "hints")] + hints, + ); + + let mut gq = G_POINT256; + let gq_is_infinity = secp256k1_add_non_infinity_points( + &mut gq, + &q, + #[cfg(feature = "hints")] + hints, + ); + + let mut pq = SyscallPoint256 { x: p.x, y: p.y }; + let pq_is_infinity = secp256k1_add_non_infinity_points( + &mut pq, + &q, + #[cfg(feature = "hints")] + hints, + ); + + let (gpq, gpq_is_infinity) = if gp_is_infinity { + // G + P = 𝒪, so G + P + Q = Q + (SyscallPoint256 { x: q.x, y: q.y }, false) + } else if pq_is_infinity { + // P + Q = 𝒪, so G + P + Q = G + (G_POINT256, false) + } else { + // Normal case: add Q to (G + P) + let mut gpq_temp = SyscallPoint256 { x: gp.x, y: gp.y }; + let is_inf = secp256k1_add_non_infinity_points( + &mut gpq_temp, + &q, + #[cfg(feature = "hints")] + hints, + ); + (gpq_temp, is_inf) + }; - let (is_infinity, res) = secp256k1_double_scalar_mul_with_g(&u1, &u2, pk); - if is_infinity { - return false; + if is_one(r) && is_one(s) && is_one(t) { + // Return g + p + q + if gpq_is_infinity { + return None; + } else { + return Some([ + gpq.x[0], gpq.x[1], gpq.x[2], gpq.x[3], gpq.y[0], gpq.y[1], gpq.y[2], gpq.y[3], + ]); + } } + // From here on, at least one of r,s,t is greater than 1 - eq(&secp256k1_fn_reduce(&res.x), r) -} + // Hint the maximum length between the binary representations of r,s and t + let (max_limb, max_bit) = fcall_msb_pos_256_3( + r, + s, + t, + #[cfg(feature = "hints")] + hints, + ); -/// # Safety -/// - `p_ptr` must point to 12 u64s (projective point: x[4], y[4], z[4]) -/// - `out_ptr` must point to at least 8 u64s (will write affine x[4], y[4]) -/// -/// Returns 1 on success, 0 if point is at infinity -#[no_mangle] -pub unsafe extern "C" fn secp256k1_to_affine_c(p_ptr: *const u64, out_ptr: *mut u64) { - let p: &[u64; 12] = &*(p_ptr as *const [u64; 12]); - let result = secp256k1_to_affine(p); - - *out_ptr.add(0) = result[0]; - *out_ptr.add(1) = result[1]; - *out_ptr.add(2) = result[2]; - *out_ptr.add(3) = result[3]; - *out_ptr.add(4) = result[4]; - *out_ptr.add(5) = result[5]; - *out_ptr.add(6) = result[6]; - *out_ptr.add(7) = result[7]; -} + // Perform the loop, based on the binary representation of r,s and t -/// # Safety -/// - `x_bytes_ptr` must point to 32 bytes (big-endian x-coordinate) -/// - `out_ptr` must point to at least 8 u64s (will write x[4] and y[4] in little-endian) -/// -/// Returns 1 on success, 0 if no valid point exists -#[no_mangle] -pub unsafe extern "C" fn secp256k1_decompress_c( - x_bytes_ptr: *const u8, - y_is_odd: u8, - out_ptr: *mut u64, -) -> u8 { - let x_bytes: &[u8; 32] = &*(x_bytes_ptr as *const [u8; 32]); - - let ((x, y), success) = secp256k1_decompress(x_bytes, y_is_odd != 0); - - if !success { - return 0; - } + // We do the first iteration separately + let max_limb = max_limb as usize; + let max_bit = max_bit as usize; - *out_ptr.add(0) = x[0]; - *out_ptr.add(1) = x[1]; - *out_ptr.add(2) = x[2]; - *out_ptr.add(3) = x[3]; - *out_ptr.add(4) = y[0]; - *out_ptr.add(5) = y[1]; - *out_ptr.add(6) = y[2]; - *out_ptr.add(7) = y[3]; + // At least one of the scalars should have the first received bit as 1 + let r_bit = (r[max_limb] >> max_bit) & 1; + let s_bit = (s[max_limb] >> max_bit) & 1; + let t_bit = (t[max_limb] >> max_bit) & 1; + assert!(r_bit == 1 || s_bit == 1 || t_bit == 1); - 1 -} + // Start at 𝒪 + let mut res = IDENTITY_POINT256; + let mut res_is_infinity = true; + let mut r_rec = ZERO_256; + let mut s_rec = ZERO_256; + let mut t_rec = ZERO_256; + + // Eight cases based on the bits of r,s and t + match (r_bit, s_bit, t_bit) { + (0, 0, 1) => { + // Set res = q + res.x = q.x; + res.y = q.y; + res_is_infinity = false; -/// # Safety -/// - `k1_ptr` must point to 4 u64s (scalar k1) -/// - `k2_ptr` must point to 4 u64s (scalar k2) -/// - `p_ptr` must point to 8 u64s (point P: x[4], y[4]) -/// - `out_ptr` must point to at least 8 u64s (will write result x[4], y[4]) -/// -/// Returns 1 if result is point at infinity, 0 otherwise -#[no_mangle] -pub unsafe extern "C" fn secp256k1_double_scalar_mul_with_g_c( - k1_ptr: *const u64, - k2_ptr: *const u64, - p_ptr: *const u64, - out_ptr: *mut u64, -) -> bool { - let k1: &[u64; 4] = &*(k1_ptr as *const [u64; 4]); - let k2: &[u64; 4] = &*(k2_ptr as *const [u64; 4]); + // Update t_rec + t_rec[max_limb] = 1 << max_bit; + } + (0, 1, 0) => { + // Set res = p + res.x = p.x; + res.y = p.y; + res_is_infinity = false; - let p = SyscallPoint256 { - x: [*p_ptr.add(0), *p_ptr.add(1), *p_ptr.add(2), *p_ptr.add(3)], - y: [*p_ptr.add(4), *p_ptr.add(5), *p_ptr.add(6), *p_ptr.add(7)], - }; + // Update s_rec + s_rec[max_limb] = 1 << max_bit; + } + (0, 1, 1) => { + // Set res = p + q if not infinity + if !pq_is_infinity { + res.x = pq.x; + res.y = pq.y; + res_is_infinity = false; + } - let (is_infinity, res) = secp256k1_double_scalar_mul_with_g(k1, k2, &p); + // Update s_rec and t_rec + s_rec[max_limb] = 1 << max_bit; + t_rec[max_limb] = 1 << max_bit; + } + (1, 0, 0) => { + // Set res = g + res.x = G_POINT256.x; + res.y = G_POINT256.y; + res_is_infinity = false; - *out_ptr.add(0) = res.x[0]; - *out_ptr.add(1) = res.x[1]; - *out_ptr.add(2) = res.x[2]; - *out_ptr.add(3) = res.x[3]; - *out_ptr.add(4) = res.y[0]; - *out_ptr.add(5) = res.y[1]; - *out_ptr.add(6) = res.y[2]; - *out_ptr.add(7) = res.y[3]; + // Update r_rec + r_rec[max_limb] = 1 << max_bit; + } + (1, 0, 1) => { + // Set res = g + q if not infinity + if !gq_is_infinity { + res.x = gq.x; + res.y = gq.y; + res_is_infinity = false; + } - is_infinity -} + // Update r_rec and t_rec + r_rec[max_limb] = 1 << max_bit; + t_rec[max_limb] = 1 << max_bit; + } + (1, 1, 0) => { + // Set res = g + p if not infinity + if !gp_is_infinity { + res.x = gp.x; + res.y = gp.y; + res_is_infinity = false; + } -/// # Safety -/// - `pk_ptr` must point to 8 u64s (public key: x[4], y[4]) -/// - `z_ptr` must point to 4 u64s (message hash) -/// - `r_ptr` must point to 4 u64s (signature r) -/// - `s_ptr` must point to 4 u64s (signature s) -/// -/// Returns 1 if signature is valid, 0 otherwise -#[no_mangle] -pub unsafe extern "C" fn secp256k1_ecdsa_verify_c( - pk_ptr: *const u64, - z_ptr: *const u64, - r_ptr: *const u64, - s_ptr: *const u64, -) -> bool { - let pk = SyscallPoint256 { - x: [*pk_ptr.add(0), *pk_ptr.add(1), *pk_ptr.add(2), *pk_ptr.add(3)], - y: [*pk_ptr.add(4), *pk_ptr.add(5), *pk_ptr.add(6), *pk_ptr.add(7)], + // Update r_rec and s_rec + r_rec[max_limb] = 1 << max_bit; + s_rec[max_limb] = 1 << max_bit; + } + (1, 1, 1) => { + // Set res = g + p + q if not infinity + if !gpq_is_infinity { + res.x = gpq.x; + res.y = gpq.y; + res_is_infinity = false; + } + + // Update r_rec, s_rec and t_rec + r_rec[max_limb] = 1 << max_bit; + s_rec[max_limb] = 1 << max_bit; + t_rec[max_limb] = 1 << max_bit; + } + _ => unreachable!(), + } + + // Determine starting limb/bit for the loop + let mut limb = max_limb; + let mut bit = if max_bit == 0 { + // If max_bit is 0 then limb > 0; otherwise r,s,t = 1, which is excluded here + limb -= 1; + 63 + } else { + max_bit - 1 }; - let z: &[u64; 4] = &*(z_ptr as *const [u64; 4]); - let r: &[u64; 4] = &*(r_ptr as *const [u64; 4]); - let s: &[u64; 4] = &*(s_ptr as *const [u64; 4]); - secp256k1_ecdsa_verify(&pk, z, r, s) + // Perform the rest of the loop + for i in (0..=limb).rev() { + for j in (0..=bit).rev() { + let r_bit = (r[i] >> j) & 1; + let s_bit = (s[i] >> j) & 1; + let t_bit = (t[i] >> j) & 1; + + // Eight cases based on the bits of r,s and t + match (r_bit, s_bit, t_bit) { + (0, 0, 0) => { + // If res is 𝒪, do nothing; otherwise, double + if !res_is_infinity { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + } + } + (0, 0, 1) => { + // If res is 𝒪, set res = q; otherwise, double res and add q + if res_is_infinity { + res.x = q.x; + res.y = q.y; + res_is_infinity = false; + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &q, + #[cfg(feature = "hints")] + hints, + ); + } + + // Update t_rec + t_rec[i] |= 1 << j; + } + (0, 1, 0) => { + // If res is 𝒪, set res = p; otherwise, double res and add p + if res_is_infinity { + res.x = p.x; + res.y = p.y; + res_is_infinity = false; + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &p, + #[cfg(feature = "hints")] + hints, + ); + } + + // Update s_rec + s_rec[i] |= 1 << j; + } + (0, 1, 1) => { + // If res is 𝒪, set res = p + q if not infinity; otherwise, double res and add (p + q) + if res_is_infinity { + if !pq_is_infinity { + res.x = pq.x; + res.y = pq.y; + res_is_infinity = false; + } + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !pq_is_infinity { + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &pq, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update s_rec and t_rec + s_rec[i] |= 1 << j; + t_rec[i] |= 1 << j; + } + (1, 0, 0) => { + // If res is 𝒪, set res = g; otherwise, double res and add g + if res_is_infinity { + res.x = G_POINT256.x; + res.y = G_POINT256.y; + res_is_infinity = false; + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &G_POINT256, + #[cfg(feature = "hints")] + hints, + ); + } + + // Update r_rec + r_rec[i] |= 1 << j; + } + (1, 0, 1) => { + // If res is 𝒪, set res = g + q if not infinity; otherwise, double res and add (g + q) + if res_is_infinity { + if !gq_is_infinity { + res.x = gq.x; + res.y = gq.y; + res_is_infinity = false; + } + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !gq_is_infinity { + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &gq, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update r_rec and t_rec + r_rec[i] |= 1 << j; + t_rec[i] |= 1 << j; + } + (1, 1, 0) => { + // If res is 𝒪, set res = g + p if not infinity + if res_is_infinity { + if !gp_is_infinity { + res.x = gp.x; + res.y = gp.y; + res_is_infinity = false; + } + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !gp_is_infinity { + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &gp, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update r_rec and s_rec + r_rec[i] |= 1 << j; + s_rec[i] |= 1 << j; + } + (1, 1, 1) => { + // If res is 𝒪, set res = g + p + q if not infinity; otherwise, double res and add (g + p + q) + if res_is_infinity { + if !gpq_is_infinity { + res.x = gpq.x; + res.y = gpq.y; + res_is_infinity = false; + } + } else { + syscall_secp256k1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !gpq_is_infinity { + res_is_infinity = secp256k1_add_non_infinity_points( + &mut res, + &gpq, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update r_rec, s_rec and t_rec + r_rec[i] |= 1 << j; + s_rec[i] |= 1 << j; + t_rec[i] |= 1 << j; + } + _ => unreachable!(), + } + } + bit = 63; + } + + // Check that the recomposed scalars are the same as the received scalars + assert!(eq(&r_rec, r)); + assert!(eq(&s_rec, s)); + assert!(eq(&t_rec, t)); + + if res_is_infinity { + None + } else { + Some([res.x[0], res.x[1], res.x[2], res.x[3], res.y[0], res.y[1], res.y[2], res.y[3]]) + } } diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256k1/ecdsa.rs b/ziskos/entrypoint/src/zisklib/lib/secp256k1/ecdsa.rs new file mode 100644 index 000000000..861600916 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/secp256k1/ecdsa.rs @@ -0,0 +1,372 @@ +use tiny_keccak::{Hasher, Keccak}; + +use crate::zisklib::{eq, fcall_secp256k1_ecdsa_verify, gt, ZERO_256}; + +use super::{ + constants::N_MINUS_ONE, + curve::{secp256k1_is_on_curve, secp256k1_lift_x, secp256k1_triple_scalar_mul_with_g}, + scalar::{secp256k1_fn_neg, secp256k1_fn_reduce}, +}; + +// ECDSA verify result codes +pub const ECDSA_VERIFY_SUCCESS: u8 = 0; +pub const ECDSA_VERIFY_ERROR: u8 = 1; + +/// ECDSA recover result codes +pub const ECDSA_RECOVER_SUCCESS: u8 = 0; +pub const ECDSA_RECOVER_ERR_INVALID_R: u8 = 1; +pub const ECDSA_RECOVER_ERR_INVALID_S: u8 = 2; +pub const ECDSA_RECOVER_ERR_INVALID_RECID: u8 = 3; +pub const ECDSA_RECOVER_ERR_POINT_NOT_ON_CURVE: u8 = 4; +pub const ECDSA_RECOVER_ERR_RECOVERY_FAILED: u8 = 5; + +/// Verifies the signature (r, s) over the message hash z using the public key pk +/// +/// # Returns +/// - 0 = valid signature +/// - 1 = public key not on curve +/// - 2 = invalid signature +pub fn secp256k1_ecdsa_verify( + pk: &[u64; 8], + z: &[u64; 4], + r: &[u64; 4], + s: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { + // pk must be on the curve + if !secp256k1_is_on_curve( + pk, + #[cfg(feature = "hints")] + hints, + ) { + return false; + } + + // Ecdsa verification computes (x, y) = [z·s⁻¹ (mod n)]G + [r·s⁻¹ (mod n)]PK + // and checks that x ≡ r (mod n) + // We can equivalently hint (x,y), verify that + // [z]G + [r]PK + [-s](x,y) == 𝒪, + // and ensure that x ≡ r (mod n), saving us from expensive fn arithmetic + + // Hint the result + let point = fcall_secp256k1_ecdsa_verify( + pk, + z, + r, + s, + #[cfg(feature = "hints")] + hints, + ); + + // Check the recovered point is valid + // Note: Identity point would be raised here + if !secp256k1_is_on_curve( + &point, + #[cfg(feature = "hints")] + hints, + ) { + return false; + } + + // Check that [z]G + [r]PK + [-s](x,y) == 𝒪 + let neg_s = secp256k1_fn_neg( + s, + #[cfg(feature = "hints")] + hints, + ); + if secp256k1_triple_scalar_mul_with_g( + z, + r, + &neg_s, + pk, + &point, + #[cfg(feature = "hints")] + hints, + ) + .is_some() + { + return false; + } + + // Check that x ≡ r (mod n) + let point_x: [u64; 4] = [point[0], point[1], point[2], point[3]]; + eq( + &secp256k1_fn_reduce( + &point_x, + #[cfg(feature = "hints")] + hints, + ), + r, + ) +} + +/// Recover the public key point from an ECDSA signature (r, s) over the message hash z and recovery id recid +/// +/// # Returns +/// - 0 = success +/// - 1 = invalid r (not in [1, N)) +/// - 2 = invalid s (not in [1, N)) +/// - 3 = invalid recid (not 0 or 1) +/// - 4 = point not on curve +/// - 5 = recovery failed +pub fn secp256k1_ecdsa_recover( + r: &[u64; 4], + s: &[u64; 4], + z: &[u64; 4], + recid: u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Result<[u64; 8], u8> { + // Validate r + if *r == ZERO_256 || gt(r, &N_MINUS_ONE) { + return Err(ECDSA_RECOVER_ERR_INVALID_R); + } + + // Validate s + if *s == ZERO_256 || gt(s, &N_MINUS_ONE) { + return Err(ECDSA_RECOVER_ERR_INVALID_S); + } + + // Validate recid + if recid > 1 { + return Err(ECDSA_RECOVER_ERR_INVALID_RECID); + } + + // Ecdsa recovery computes R = (x,y) and + // (xQ, yQ) = [-z·r⁻¹ (mod n)]G + [s·r⁻¹ (mod n)]R + // We can equivalently compute R, hint (xQ,yQ) and verify that + // [z]G + [-s]R + [r](xQ,yQ) == 𝒪, + // saving us from expensive fn arithmetic + + // Determine the x-coordinate of R + let x = *r; + + // Compute the y-coordinate from x and the parity bit + let y_is_odd = (recid & 1) == 1; + let r_point = secp256k1_lift_x( + &x, + y_is_odd, + #[cfg(feature = "hints")] + hints, + ) + .map_err(|_| ECDSA_RECOVER_ERR_POINT_NOT_ON_CURVE)?; + + // Check that [z]G + [-s]R + [r](xQ,yQ) == 𝒪 + + // Hint the result + // The following functions hints (x,y) satisfying + // (x, y) == [s⁻¹·z (mod n)]G + [s⁻¹·r (mod n)]R iff [z]G + [r]R + [-s](x, y) == 𝒪 + // We can use it by flipping the signs of r and s and its order + let neg_s = secp256k1_fn_neg( + s, + #[cfg(feature = "hints")] + hints, + ); + let neg_r = secp256k1_fn_neg( + r, + #[cfg(feature = "hints")] + hints, + ); + let point = fcall_secp256k1_ecdsa_verify( + &r_point, + z, + &neg_s, + &neg_r, + #[cfg(feature = "hints")] + hints, + ); + + // Check the recovered point is valid + // Note: Identity point would be raised here + if !secp256k1_is_on_curve( + &point, + #[cfg(feature = "hints")] + hints, + ) { + return Err(ECDSA_RECOVER_ERR_RECOVERY_FAILED); + } + + // Check that [z]G + [-s]R + [r](xQ,yQ) == 𝒪 + if secp256k1_triple_scalar_mul_with_g( + z, + &neg_s, + r, + &r_point, + &point, + #[cfg(feature = "hints")] + hints, + ) + .is_some() + { + return Err(ECDSA_RECOVER_ERR_RECOVERY_FAILED); + } + + // Return the recovered public key + Ok(point) +} + +// ==================== C FFI Functions ==================== + +/// C-compatible wrapper for secp256k1_ecdsa_verify and address recovery +/// +/// # Safety +/// - `sig` must point to at least 64 bytes (r || s, big-endian) +/// - `msg` must point to at least 32 bytes (message hash, big-endian) +/// - `pk` must point to at least 64 bytes (x || y, big-endian) +/// - `output` must point to a writable buffer of at least 32 bytes +/// +/// # Arguments +/// - `sig` - 64 bytes: r (32 bytes) || s (32 bytes), big-endian +/// - `msg` - 32 bytes message hash, big-endian +/// - `pk` - 64 bytes: x (32 bytes) || y (32 bytes), big-endian +/// - `output` - Output buffer for the recovered address (32 bytes) +/// +/// # Returns +/// - `Ok([u8; 32])` - Recovered address if signature is valid +/// - `Err(u8)` - Error code +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_secp256k1_ecdsa_verify_and_address_recover_c")] +pub unsafe extern "C" fn secp256k1_ecdsa_verify_and_address_recover_c( + sig: *const u8, + msg: *const u8, + pk: *const u8, + output: *mut u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let sig_bytes: &[u8; 64] = &*(sig as *const [u8; 64]); + let msg_bytes: &[u8; 32] = &*(msg as *const [u8; 32]); + let pk_bytes: &[u8; 64] = &*(pk as *const [u8; 64]); + let output_bytes: &mut [u8; 32] = &mut *(output as *mut [u8; 32]); + + // Parse r, s from big-endian bytes + let r_bytes: [u8; 32] = sig_bytes[0..32].try_into().unwrap(); + let s_bytes: [u8; 32] = sig_bytes[32..64].try_into().unwrap(); + + // Parse pk_x, pk_y from big-endian bytes + let pk_x_bytes: [u8; 32] = pk_bytes[0..32].try_into().unwrap(); + let pk_y_bytes: [u8; 32] = pk_bytes[32..64].try_into().unwrap(); + + // Convert to little-endian u64 limbs + let r = bytes_be_to_u64_le(&r_bytes); + let s = bytes_be_to_u64_le(&s_bytes); + let z = bytes_be_to_u64_le(msg_bytes); + let pk_x = bytes_be_to_u64_le(&pk_x_bytes); + let pk_y = bytes_be_to_u64_le(&pk_y_bytes); + + let pk_arr: [u64; 8] = [pk_x[0], pk_x[1], pk_x[2], pk_x[3], pk_y[0], pk_y[1], pk_y[2], pk_y[3]]; + if secp256k1_ecdsa_verify( + &pk_arr, + &z, + &r, + &s, + #[cfg(feature = "hints")] + hints, + ) { + // Signature is valid - compute and return the address from the public key + let address = pubkey_to_address(&pk_arr); + output_bytes.copy_from_slice(&address); + ECDSA_VERIFY_SUCCESS + } else { + ECDSA_VERIFY_ERROR + } +} + +/// C-compatible wrapper for secp256k1_ecdsa_recover +/// +/// # Safety +/// - `sig` must point to at least 64 bytes (r || s, big-endian) +/// - `msg` must point to at least 32 bytes (message hash, big-endian) +/// - `output` must point to a writable buffer of at least 32 bytes +/// +/// # Arguments +/// - `sig` - 64 bytes: r (32 bytes) || s (32 bytes), big-endian +/// - `recid` - Recovery ID (0 or 1) +/// - `msg` - 32 bytes message hash, big-endian +/// - `output` - Output buffer for the recovered address (32 bytes) +/// +/// # Returns +/// - `Ok([u32; 8])` - Recovered address if recovery is successful +/// - `Err(u8)` - Error code +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_secp256k1_ecdsa_address_recover_c")] +pub unsafe extern "C" fn secp256k1_ecdsa_address_recover_c( + sig: *const u8, + recid: u8, + msg: *const u8, + output: *mut u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> u8 { + let sig_bytes: &[u8; 64] = &*(sig as *const [u8; 64]); + let msg_bytes: &[u8; 32] = &*(msg as *const [u8; 32]); + let output_bytes: &mut [u8; 32] = &mut *(output as *mut [u8; 32]); + + // Parse r, s, z from big-endian bytes + let r_bytes: [u8; 32] = sig_bytes[0..32].try_into().unwrap(); + let s_bytes: [u8; 32] = sig_bytes[32..64].try_into().unwrap(); + + let r = bytes_be_to_u64_le(&r_bytes); + let s = bytes_be_to_u64_le(&s_bytes); + let z = bytes_be_to_u64_le(msg_bytes); + + // Perform ecrecover + match secp256k1_ecdsa_recover( + &r, + &s, + &z, + recid, + #[cfg(feature = "hints")] + hints, + ) { + Ok(pk) => { + let result = pubkey_to_address(&pk); + output_bytes.copy_from_slice(&result); + ECDSA_RECOVER_SUCCESS + } + Err(code) => code, + } +} + +/// Convert big-endian bytes to little-endian u64 limbs (32 bytes -> [u64; 4]) +fn bytes_be_to_u64_le(bytes: &[u8; 32]) -> [u64; 4] { + let mut result = [0u64; 4]; + for i in 0..4 { + for j in 0..8 { + result[3 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + result +} + +fn u64_le_to_bytes_be(limbs: &[u64; 4]) -> [u8; 32] { + let mut result = [0u8; 32]; + for i in 0..4 { + for j in 0..8 { + result[i * 8 + j] = ((limbs[3 - i] >> (8 * (7 - j))) & 0xff) as u8; + } + } + result +} + +fn pubkey_to_address(pk: &[u64; 8]) -> [u8; 32] { + let x = [pk[0], pk[1], pk[2], pk[3]]; + let y = [pk[4], pk[5], pk[6], pk[7]]; + + let x_bytes = u64_le_to_bytes_be(&x); + let y_bytes = u64_le_to_bytes_be(&y); + + // Concatenate x and y + let mut pk_bytes = [0u8; 64]; + pk_bytes[0..32].copy_from_slice(&x_bytes); + pk_bytes[32..64].copy_from_slice(&y_bytes); + + // Hash with keccak256 + let mut hasher = Keccak::v256(); + hasher.update(&pk_bytes); + let mut hash = [0u8; 32]; + hasher.finalize(&mut hash); + + // Ethereum address is last 20 bytes + let mut result = [0u8; 32]; + result[12..32].copy_from_slice(&hash[12..32]); + + result +} diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256k1/field.rs b/ziskos/entrypoint/src/zisklib/lib/secp256k1/field.rs index 04e4380ce..9681bcdbe 100644 --- a/ziskos/entrypoint/src/zisklib/lib/secp256k1/field.rs +++ b/ziskos/entrypoint/src/zisklib/lib/secp256k1/field.rs @@ -1,103 +1,69 @@ use crate::{ syscalls::{syscall_arith256_mod, SyscallArith256ModParams}, - zisklib::{fcall_secp256k1_fp_inv, fcall_secp256k1_fp_sqrt, lt}, + zisklib::fcall_secp256k1_fp_sqrt, }; -use super::constants::{NQR, P, P_MINUS_ONE}; +use super::constants::{NQR, P}; -pub fn secp256k1_fp_reduce(x: &[u64; 4]) -> [u64; 4] { - if lt(x, &P) { - return *x; - } - - // x·1 + 0 - let mut params = SyscallArith256ModParams { - a: x, - b: &[1, 0, 0, 0], - c: &[0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - *params.d -} - -pub fn secp256k1_fp_add(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn secp256k1_fp_add( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·1 + y let mut params = SyscallArith256ModParams { a: x, b: &[1, 0, 0, 0], c: y, module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } -pub fn secp256k1_fp_negate(x: &[u64; 4]) -> [u64; 4] { - // x·(-1) + 0 - let mut params = SyscallArith256ModParams { - a: x, - b: &P_MINUS_ONE, - c: &[0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - *params.d -} - -pub fn secp256k1_fp_mul(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn secp256k1_fp_mul( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·y + 0 let mut params = SyscallArith256ModParams { a: x, b: y, c: &[0, 0, 0, 0], module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } -pub fn secp256k1_fp_mul_scalar(x: &[u64; 4], scalar: u64) -> [u64; 4] { - // x·scalar + 0 - let mut params = SyscallArith256ModParams { - a: x, - b: &[scalar, 0, 0, 0], - c: &[0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - *params.d -} - -pub fn secp256k1_fp_square(x: &[u64; 4]) -> [u64; 4] { +pub fn secp256k1_fp_square( + x: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // x·x + 0 let mut params = SyscallArith256ModParams { a: x, b: x, c: &[0, 0, 0, 0], module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } -/// Inverts a non-zero element `x` -pub fn secp256k1_fp_inv(x: &[u64; 4]) -> [u64; 4] { - // Hint the inverse - let x_inv = fcall_secp256k1_fp_inv(x); - - // Check that x·x_inv = 1 (P) - let mut params = SyscallArith256ModParams { - a: x, - b: &x_inv, - c: &[0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - assert_eq!(*params.d, [0x1, 0x0, 0x0, 0x0]); - - x_inv -} - -pub fn secp256k1_fp_sqrt(x: &[u64; 4], parity: u64) -> ([u64; 4], bool) { +pub fn secp256k1_fp_sqrt( + x: &[u64; 4], + parity: u64, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> ([u64; 4], bool) { // Hint the sqrt - let hint = fcall_secp256k1_fp_sqrt(x, parity); + let hint = fcall_secp256k1_fp_sqrt( + x, + parity, + #[cfg(feature = "hints")] + hints, + ); let is_qr = hint[0] == 1; let sqrt = hint[1..5].try_into().unwrap(); @@ -109,7 +75,11 @@ pub fn secp256k1_fp_sqrt(x: &[u64; 4], parity: u64) -> ([u64; 4], bool) { module: &P, d: &mut [0, 0, 0, 0], }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); if is_qr { // Check that sqrt * sqrt == x @@ -117,133 +87,13 @@ pub fn secp256k1_fp_sqrt(x: &[u64; 4], parity: u64) -> ([u64; 4], bool) { (sqrt, true) } else { // Check that sqrt * sqrt == x * NQR - let nqr = secp256k1_fp_mul(x, &NQR); + let nqr = secp256k1_fp_mul( + x, + &NQR, + #[cfg(feature = "hints")] + hints, + ); assert_eq!(*params.d, nqr); (sqrt, false) } } - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fp_reduce_c(x_ptr: *const u64, out_ptr: *mut u64) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - - if lt(x, &P) { - *out_ptr.add(0) = x[0]; - *out_ptr.add(1) = x[1]; - *out_ptr.add(2) = x[2]; - *out_ptr.add(3) = x[3]; - return; - } - - let mut params = SyscallArith256ModParams { - a: x, - b: &[1, 0, 0, 0], - c: &[0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `y_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fp_add_c( - x_ptr: *const u64, - y_ptr: *const u64, - out_ptr: *mut u64, -) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - let y: &[u64; 4] = &*(y_ptr as *const [u64; 4]); - - let mut params = - SyscallArith256ModParams { a: x, b: &[1, 0, 0, 0], c: y, module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fp_negate_c(x_ptr: *const u64, out_ptr: *mut u64) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: x, - b: &P_MINUS_ONE, - c: &[0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `y_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fp_mul_c( - x_ptr: *const u64, - y_ptr: *const u64, - out_ptr: *mut u64, -) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - let y: &[u64; 4] = &*(y_ptr as *const [u64; 4]); - - let mut params = - SyscallArith256ModParams { a: x, b: y, c: &[0, 0, 0, 0], module: &P, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `scalar` is a single u64 value -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fp_mul_scalar_c( - x_ptr: *const u64, - scalar: u64, - out_ptr: *mut u64, -) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: x, - b: &[scalar, 0, 0, 0], - c: &[0, 0, 0, 0], - module: &P, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256k1/mod.rs b/ziskos/entrypoint/src/zisklib/lib/secp256k1/mod.rs index 30c4afb6b..794d231fd 100644 --- a/ziskos/entrypoint/src/zisklib/lib/secp256k1/mod.rs +++ b/ziskos/entrypoint/src/zisklib/lib/secp256k1/mod.rs @@ -1,8 +1,10 @@ mod constants; mod curve; +mod ecdsa; mod field; mod scalar; pub use curve::*; +pub use ecdsa::*; pub use field::*; pub use scalar::*; diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256k1/scalar.rs b/ziskos/entrypoint/src/zisklib/lib/secp256k1/scalar.rs index 8ee7f6303..0c93c694d 100644 --- a/ziskos/entrypoint/src/zisklib/lib/secp256k1/scalar.rs +++ b/ziskos/entrypoint/src/zisklib/lib/secp256k1/scalar.rs @@ -1,11 +1,14 @@ use crate::{ syscalls::{syscall_arith256_mod, SyscallArith256ModParams}, - zisklib::{fcall_secp256k1_fn_inv, lt}, + zisklib::lt, }; use super::constants::{N, N_MINUS_ONE}; -pub fn secp256k1_fn_reduce(x: &[u64; 4]) -> [u64; 4] { +pub fn secp256k1_fn_reduce( + x: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { if lt(x, &N) { return *x; } @@ -18,21 +21,16 @@ pub fn secp256k1_fn_reduce(x: &[u64; 4]) -> [u64; 4] { module: &N, d: &mut [0, 0, 0, 0], }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } -pub fn secp256k1_fn_add(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { - // x·1 + y - let mut params = - SyscallArith256ModParams { a: x, b: &[1, 0, 0, 0], c: y, module: &N, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - - *params.d -} - -pub fn secp256k1_fn_neg(x: &[u64; 4]) -> [u64; 4] { +pub fn secp256k1_fn_neg(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { // x·(-1) + 0 let mut params = SyscallArith256ModParams { a: x, @@ -41,192 +39,28 @@ pub fn secp256k1_fn_neg(x: &[u64; 4]) -> [u64; 4] { module: &N, d: &mut [0, 0, 0, 0], }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } -pub fn secp256k1_fn_sub(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { +pub fn secp256k1_fn_sub( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { // y·(-1) + x let mut params = SyscallArith256ModParams { a: y, b: &N_MINUS_ONE, c: x, module: &N, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - - *params.d -} - -pub fn secp256k1_fn_mul(x: &[u64; 4], y: &[u64; 4]) -> [u64; 4] { - // x·y + 0 - let mut params = - SyscallArith256ModParams { a: x, b: y, c: &[0, 0, 0, 0], module: &N, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); *params.d } - -/// Inverts a non-zero element `x` -pub fn secp256k1_fn_inv(x: &[u64; 4]) -> [u64; 4] { - // Hint the inverse - let x_inv = fcall_secp256k1_fn_inv(x); - - // Check that x·x_inv = 1 (N) - let mut params = SyscallArith256ModParams { - a: x, - b: &x_inv, - c: &[0, 0, 0, 0], - module: &N, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - assert_eq!(*params.d, [0x1, 0x0, 0x0, 0x0]); - - x_inv -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fn_reduce_c(x_ptr: *const u64, out_ptr: *mut u64) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - - if lt(x, &N) { - *out_ptr.add(0) = x[0]; - *out_ptr.add(1) = x[1]; - *out_ptr.add(2) = x[2]; - *out_ptr.add(3) = x[3]; - return; - } - - let mut params = SyscallArith256ModParams { - a: x, - b: &[1, 0, 0, 0], - c: &[0, 0, 0, 0], - module: &N, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `y_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fn_add_c( - x_ptr: *const u64, - y_ptr: *const u64, - out_ptr: *mut u64, -) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - let y: &[u64; 4] = &*(y_ptr as *const [u64; 4]); - - let mut params = - SyscallArith256ModParams { a: x, b: &[1, 0, 0, 0], c: y, module: &N, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fn_neg_c(x_ptr: *const u64, out_ptr: *mut u64) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - - let mut params = SyscallArith256ModParams { - a: x, - b: &N_MINUS_ONE, - c: &[0, 0, 0, 0], - module: &N, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `y_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fn_sub_c( - x_ptr: *const u64, - y_ptr: *const u64, - out_ptr: *mut u64, -) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - let y: &[u64; 4] = &*(y_ptr as *const [u64; 4]); - - let mut params = - SyscallArith256ModParams { a: y, b: &N_MINUS_ONE, c: x, module: &N, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s -/// - `y_ptr` must point to 4 u64s -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fn_mul_c( - x_ptr: *const u64, - y_ptr: *const u64, - out_ptr: *mut u64, -) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - let y: &[u64; 4] = &*(y_ptr as *const [u64; 4]); - - let mut params = - SyscallArith256ModParams { a: x, b: y, c: &[0, 0, 0, 0], module: &N, d: &mut [0, 0, 0, 0] }; - syscall_arith256_mod(&mut params); - - *out_ptr.add(0) = params.d[0]; - *out_ptr.add(1) = params.d[1]; - *out_ptr.add(2) = params.d[2]; - *out_ptr.add(3) = params.d[3]; -} - -/// # Safety -/// - `x_ptr` must point to 4 u64s (non-zero element) -/// - `out_ptr` must point to at least 4 u64s -#[no_mangle] -pub unsafe extern "C" fn secp256k1_fn_inv_c(x_ptr: *const u64, out_ptr: *mut u64) { - let x: &[u64; 4] = &*(x_ptr as *const [u64; 4]); - - // Hint the inverse - let x_inv = fcall_secp256k1_fn_inv(x); - - // Check that x·x_inv = 1 (N) - let mut params = SyscallArith256ModParams { - a: x, - b: &x_inv, - c: &[0, 0, 0, 0], - module: &N, - d: &mut [0, 0, 0, 0], - }; - syscall_arith256_mod(&mut params); - assert_eq!(*params.d, [0x1, 0x0, 0x0, 0x0]); - - *out_ptr.add(0) = x_inv[0]; - *out_ptr.add(1) = x_inv[1]; - *out_ptr.add(2) = x_inv[2]; - *out_ptr.add(3) = x_inv[3]; -} diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256r1/constants.rs b/ziskos/entrypoint/src/zisklib/lib/secp256r1/constants.rs new file mode 100644 index 000000000..c30b324cc --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/secp256r1/constants.rs @@ -0,0 +1,37 @@ +//! Constants for the [Secp256r1](https://csrc.nist.gov/pubs/sp/800/186/final) elliptic curve + +/// B parameter of the curve E: y² = x³ + a·x + b +pub const E_A: [u64; 4] = + [0xFFFF_FFFF_FFFF_FFFC, 0x0000_0000_FFFF_FFFF, 0x0000_0000_0000_0000, 0xFFFF_FFFF_0000_0001]; +pub const E_B: [u64; 4] = + [0x3BCE_3C3E_27D2_604B, 0x651D_06B0_CC53_B0F6, 0xB3EB_BD55_7698_86BC, 0x5AC6_35D8_AA3A_93E7]; + +/// Secp256r1 base field size +pub const P: [u64; 4] = + [0xFFFF_FFFF_FFFF_FFFF, 0x0000_0000_FFFF_FFFF, 0x0000_0000_0000_0000, 0xFFFF_FFFF_0000_0001]; +pub const P_MINUS_ONE: [u64; 4] = [P[0] - 1, P[1], P[2], P[3]]; + +/// Secp256r1 scalar field size +pub const N: [u64; 4] = + [0xF3B9_CAC2_FC63_2551, 0xBCE6_FAAD_A717_9E84, 0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_0000_0000]; +pub const N_MINUS_ONE: [u64; 4] = [N[0] - 1, N[1], N[2], N[3]]; + +/// Secp256r1 group identity point +pub const IDENTITY_X: [u64; 4] = [0; 4]; +pub const IDENTITY_Y: [u64; 4] = [0; 4]; +pub const IDENTITY: [u64; 8] = [ + IDENTITY_X[0], + IDENTITY_X[1], + IDENTITY_X[2], + IDENTITY_X[3], + IDENTITY_Y[0], + IDENTITY_Y[1], + IDENTITY_Y[2], + IDENTITY_Y[3], +]; + +/// Secp256r1 group of points generator +pub const G_X: [u64; 4] = + [0xF4A1_3945_D898_C296, 0x7703_7D81_2DEB_33A0, 0xF8BC_E6E5_63A4_40F2, 0x6B17_D1F2_E12C_4247]; +pub const G_Y: [u64; 4] = + [0xCBB6_4068_37BF_51F5, 0x2BCE_3357_6B31_5ECE, 0x8EE7_EB4A_7C0F_9E16, 0x4FE3_42E2_FE1A_7F9B]; diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256r1/curve.rs b/ziskos/entrypoint/src/zisklib/lib/secp256r1/curve.rs new file mode 100644 index 000000000..ceb5665cf --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/secp256r1/curve.rs @@ -0,0 +1,494 @@ +use crate::{ + syscalls::{ + syscall_secp256r1_add, syscall_secp256r1_dbl, SyscallPoint256, SyscallSecp256r1AddParams, + }, + zisklib::{eq, fcall_msb_pos_256, fcall_msb_pos_256_3, is_one, ONE_256, TWO_256, ZERO_256}, +}; + +use super::{ + constants::{E_A, E_B, G_X, G_Y, IDENTITY_X, IDENTITY_Y}, + field::{secp256r1_fp_add, secp256r1_fp_mul, secp256r1_fp_square}, +}; + +const IDENTITY_POINT256: SyscallPoint256 = SyscallPoint256 { x: IDENTITY_X, y: IDENTITY_Y }; + +const G_POINT256: SyscallPoint256 = SyscallPoint256 { x: G_X, y: G_Y }; + +/// Given points `p1` and `p2`, performs the point addition `p1 + p2` and assigns the result to `p1`. +/// It assumes that `p1` and `p2` are from the Secp256r1 curve, that `p1,p2 != 𝒪` +/// Returns true if the result is the point at infinity. +#[inline] +fn secp256r1_add_non_infinity_points( + p1: &mut SyscallPoint256, + p2: &SyscallPoint256, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { + if p1.x != p2.x { + let mut params = SyscallSecp256r1AddParams { p1, p2 }; + syscall_secp256r1_add( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + false + } else if p1.y == p2.y { + syscall_secp256r1_dbl( + p1, + #[cfg(feature = "hints")] + hints, + ); + false + } else { + // p1 + (-p1) = 𝒪 + true + } +} + +/// Checks whether the given point `p` is on the Secp256r1 curve. +/// It assumes that `p` is not the point at infinity. +pub fn secp256r1_is_on_curve(p: &[u64; 8], #[cfg(feature = "hints")] hints: &mut Vec) -> bool { + let x: [u64; 4] = p[0..4].try_into().unwrap(); + let y: [u64; 4] = p[4..8].try_into().unwrap(); + + // p in E iff y² == x³ + a·x + b + let lhs = secp256r1_fp_square( + &y, + #[cfg(feature = "hints")] + hints, + ); + let mut rhs = secp256r1_fp_square( + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = secp256r1_fp_mul( + &rhs, + &x, + #[cfg(feature = "hints")] + hints, + ); + rhs = secp256r1_fp_add( + &rhs, + &secp256r1_fp_mul( + &x, + &E_A, + #[cfg(feature = "hints")] + hints, + ), + #[cfg(feature = "hints")] + hints, + ); + rhs = secp256r1_fp_add( + &rhs, + &E_B, + #[cfg(feature = "hints")] + hints, + ); + eq(&lhs, &rhs) +} + +/// Given two points `p` and `q` and scalars `r`, `s`, and `t`, computes the triple scalar multiplication `r·g + s·p + t·q` +/// It assumes that `r,s,t ∈ [1, N-1]` and that `p,q != 𝒪` +pub fn secp256r1_triple_scalar_mul_with_g( + r: &[u64; 4], + s: &[u64; 4], + t: &[u64; 4], + p: &[u64; 8], + q: &[u64; 8], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> Option<[u64; 8]> { + let p = SyscallPoint256 { x: [p[0], p[1], p[2], p[3]], y: [p[4], p[5], p[6], p[7]] }; + let q = SyscallPoint256 { x: [q[0], q[1], q[2], q[3]], y: [q[4], q[5], q[6], q[7]] }; + + // Precompute g + p, g + q, p + q, g + p + q + let mut gp = G_POINT256; + let gp_is_infinity = secp256r1_add_non_infinity_points( + &mut gp, + &p, + #[cfg(feature = "hints")] + hints, + ); + + let mut gq = G_POINT256; + let gq_is_infinity = secp256r1_add_non_infinity_points( + &mut gq, + &q, + #[cfg(feature = "hints")] + hints, + ); + + let mut pq = SyscallPoint256 { x: p.x, y: p.y }; + let pq_is_infinity = secp256r1_add_non_infinity_points( + &mut pq, + &q, + #[cfg(feature = "hints")] + hints, + ); + + let (gpq, gpq_is_infinity) = if gp_is_infinity { + // G + P = 𝒪, so G + P + Q = Q + (SyscallPoint256 { x: q.x, y: q.y }, false) + } else if pq_is_infinity { + // P + Q = 𝒪, so G + P + Q = G + (G_POINT256, false) + } else { + // Normal case: add Q to (G + P) + let mut gpq_temp = SyscallPoint256 { x: gp.x, y: gp.y }; + let is_inf = secp256r1_add_non_infinity_points( + &mut gpq_temp, + &q, + #[cfg(feature = "hints")] + hints, + ); + (gpq_temp, is_inf) + }; + + if is_one(r) && is_one(s) && is_one(t) { + // Return g + p + q + if gpq_is_infinity { + return None; + } else { + return Some([ + gpq.x[0], gpq.x[1], gpq.x[2], gpq.x[3], gpq.y[0], gpq.y[1], gpq.y[2], gpq.y[3], + ]); + } + } + // From here on, at least one of r,s,t is greater than 1 + + // Hint the maximum length between the binary representations of r,s and t + let (max_limb, max_bit) = fcall_msb_pos_256_3( + r, + s, + t, + #[cfg(feature = "hints")] + hints, + ); + + // Perform the loop, based on the binary representation of r,s and t + + // We do the first iteration separately + let max_limb = max_limb as usize; + let max_bit = max_bit as usize; + + // At least one of the scalars should have the first received bit as 1 + let r_bit = (r[max_limb] >> max_bit) & 1; + let s_bit = (s[max_limb] >> max_bit) & 1; + let t_bit = (t[max_limb] >> max_bit) & 1; + assert!(r_bit == 1 || s_bit == 1 || t_bit == 1); + + // Start at 𝒪 + let mut res = IDENTITY_POINT256; + let mut res_is_infinity = true; + let mut r_rec = ZERO_256; + let mut s_rec = ZERO_256; + let mut t_rec = ZERO_256; + + // Eight cases based on the bits of r,s and t + match (r_bit, s_bit, t_bit) { + (0, 0, 1) => { + // Set res = q + res.x = q.x; + res.y = q.y; + res_is_infinity = false; + + // Update t_rec + t_rec[max_limb] = 1 << max_bit; + } + (0, 1, 0) => { + // Set res = p + res.x = p.x; + res.y = p.y; + res_is_infinity = false; + + // Update s_rec + s_rec[max_limb] = 1 << max_bit; + } + (0, 1, 1) => { + // Set res = p + q if not infinity + if !pq_is_infinity { + res.x = pq.x; + res.y = pq.y; + res_is_infinity = false; + } + + // Update s_rec and t_rec + s_rec[max_limb] = 1 << max_bit; + t_rec[max_limb] = 1 << max_bit; + } + (1, 0, 0) => { + // Set res = g + res.x = G_POINT256.x; + res.y = G_POINT256.y; + res_is_infinity = false; + + // Update r_rec + r_rec[max_limb] = 1 << max_bit; + } + (1, 0, 1) => { + // Set res = g + q if not infinity + if !gq_is_infinity { + res.x = gq.x; + res.y = gq.y; + res_is_infinity = false; + } + + // Update r_rec and t_rec + r_rec[max_limb] = 1 << max_bit; + t_rec[max_limb] = 1 << max_bit; + } + (1, 1, 0) => { + // Set res = g + p if not infinity + if !gp_is_infinity { + res.x = gp.x; + res.y = gp.y; + res_is_infinity = false; + } + + // Update r_rec and s_rec + r_rec[max_limb] = 1 << max_bit; + s_rec[max_limb] = 1 << max_bit; + } + (1, 1, 1) => { + // Set res = g + p + q if not infinity + if !gpq_is_infinity { + res.x = gpq.x; + res.y = gpq.y; + res_is_infinity = false; + } + + // Update r_rec, s_rec and t_rec + r_rec[max_limb] = 1 << max_bit; + s_rec[max_limb] = 1 << max_bit; + t_rec[max_limb] = 1 << max_bit; + } + _ => unreachable!(), + } + + // Determine starting limb/bit for the loop + let mut limb = max_limb; + let mut bit = if max_bit == 0 { + // If max_bit is 0 then limb > 0; otherwise r,s,t = 1, which is excluded here + limb -= 1; + 63 + } else { + max_bit - 1 + }; + + // Perform the rest of the loop + for i in (0..=limb).rev() { + for j in (0..=bit).rev() { + let r_bit = (r[i] >> j) & 1; + let s_bit = (s[i] >> j) & 1; + let t_bit = (t[i] >> j) & 1; + + // Eight cases based on the bits of r,s and t + match (r_bit, s_bit, t_bit) { + (0, 0, 0) => { + // If res is 𝒪, do nothing; otherwise, double + if !res_is_infinity { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + } + } + (0, 0, 1) => { + // If res is 𝒪, set res = q; otherwise, double res and add q + if res_is_infinity { + res.x = q.x; + res.y = q.y; + res_is_infinity = false; + } else { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256r1_add_non_infinity_points( + &mut res, + &q, + #[cfg(feature = "hints")] + hints, + ); + } + + // Update t_rec + t_rec[i] |= 1 << j; + } + (0, 1, 0) => { + // If res is 𝒪, set res = p; otherwise, double res and add p + if res_is_infinity { + res.x = p.x; + res.y = p.y; + res_is_infinity = false; + } else { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256r1_add_non_infinity_points( + &mut res, + &p, + #[cfg(feature = "hints")] + hints, + ); + } + + // Update s_rec + s_rec[i] |= 1 << j; + } + (0, 1, 1) => { + // If res is 𝒪, set res = p + q if not infinity; otherwise, double res and add (p + q) + if res_is_infinity { + if !pq_is_infinity { + res.x = pq.x; + res.y = pq.y; + res_is_infinity = false; + } + } else { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !pq_is_infinity { + res_is_infinity = secp256r1_add_non_infinity_points( + &mut res, + &pq, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update s_rec and t_rec + s_rec[i] |= 1 << j; + t_rec[i] |= 1 << j; + } + (1, 0, 0) => { + // If res is 𝒪, set res = g; otherwise, double res and add g + if res_is_infinity { + res.x = G_POINT256.x; + res.y = G_POINT256.y; + res_is_infinity = false; + } else { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + res_is_infinity = secp256r1_add_non_infinity_points( + &mut res, + &G_POINT256, + #[cfg(feature = "hints")] + hints, + ); + } + + // Update r_rec + r_rec[i] |= 1 << j; + } + (1, 0, 1) => { + // If res is 𝒪, set res = g + q if not infinity; otherwise, double res and add (g + q) + if res_is_infinity { + if !gq_is_infinity { + res.x = gq.x; + res.y = gq.y; + res_is_infinity = false; + } + } else { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !gq_is_infinity { + res_is_infinity = secp256r1_add_non_infinity_points( + &mut res, + &gq, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update r_rec and t_rec + r_rec[i] |= 1 << j; + t_rec[i] |= 1 << j; + } + (1, 1, 0) => { + // If res is 𝒪, set res = g + p if not infinity + if res_is_infinity { + if !gp_is_infinity { + res.x = gp.x; + res.y = gp.y; + res_is_infinity = false; + } + } else { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !gp_is_infinity { + res_is_infinity = secp256r1_add_non_infinity_points( + &mut res, + &gp, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update r_rec and s_rec + r_rec[i] |= 1 << j; + s_rec[i] |= 1 << j; + } + (1, 1, 1) => { + // If res is 𝒪, set res = g + p + q if not infinity; otherwise, double res and add (g + p + q) + if res_is_infinity { + if !gpq_is_infinity { + res.x = gpq.x; + res.y = gpq.y; + res_is_infinity = false; + } + } else { + syscall_secp256r1_dbl( + &mut res, + #[cfg(feature = "hints")] + hints, + ); + if !gpq_is_infinity { + res_is_infinity = secp256r1_add_non_infinity_points( + &mut res, + &gpq, + #[cfg(feature = "hints")] + hints, + ); + } + } + + // Update r_rec, s_rec and t_rec + r_rec[i] |= 1 << j; + s_rec[i] |= 1 << j; + t_rec[i] |= 1 << j; + } + _ => unreachable!(), + } + } + bit = 63; + } + + // Check that the recomposed scalars are the same as the received scalars + assert!(eq(&r_rec, r)); + assert!(eq(&s_rec, s)); + assert!(eq(&t_rec, t)); + + if res_is_infinity { + None + } else { + Some([res.x[0], res.x[1], res.x[2], res.x[3], res.y[0], res.y[1], res.y[2], res.y[3]]) + } +} diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256r1/ecdsa.rs b/ziskos/entrypoint/src/zisklib/lib/secp256r1/ecdsa.rs new file mode 100644 index 000000000..f1995d1d5 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/secp256r1/ecdsa.rs @@ -0,0 +1,158 @@ +use crate::zisklib::{eq, fcall_secp256r1_ecdsa_verify, gt, is_zero}; + +use super::{ + constants::{IDENTITY, N_MINUS_ONE, P_MINUS_ONE}, + curve::{secp256r1_is_on_curve, secp256r1_triple_scalar_mul_with_g}, + scalar::{secp256r1_fn_neg, secp256r1_fn_reduce}, +}; + +/// Verifies the signature (r, s) over the message hash z using the public key pk +/// Returns true if the signature is valid, false otherwise +pub fn secp256r1_ecdsa_verify( + pk: &[u64; 8], + z: &[u64; 4], + r: &[u64; 4], + s: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { + // r and s must be in the range [1, n-1] + if is_zero(r) || gt(r, &N_MINUS_ONE) { + return false; + } + if is_zero(s) || gt(s, &N_MINUS_ONE) { + return false; + } + + // pk must not be the identity point + if eq(pk, &IDENTITY) { + return false; + } + + // pk must be a valid curve point + let pk_x: [u64; 4] = [pk[0], pk[1], pk[2], pk[3]]; + let pk_y: [u64; 4] = [pk[4], pk[5], pk[6], pk[7]]; + if gt(&pk_x, &P_MINUS_ONE) || gt(&pk_y, &P_MINUS_ONE) { + return false; + } + if !secp256r1_is_on_curve( + pk, + #[cfg(feature = "hints")] + hints, + ) { + return false; + } + + // Ecdsa verification computes (x, y) = [s⁻¹·z (mod n)]G + [s⁻¹·r (mod n)]PK + // and checks that x ≡ r (mod n) + // We can equivalently hint (x,y), verify that + // [z]G + [r]PK + [-s](x,y) == 𝒪, + // and ensure that x ≡ r (mod n), saving us from expensive fn arithmetic + + // Hint the result + let point = fcall_secp256r1_ecdsa_verify( + pk, + z, + r, + s, + #[cfg(feature = "hints")] + hints, + ); + + // Check the recovered point is valid + // Note: Identity point would be raised here + if !secp256r1_is_on_curve( + &point, + #[cfg(feature = "hints")] + hints, + ) { + return false; + } + + // Check that [z]G + [r]PK + [-s](x,y) == 𝒪 + let neg_s = secp256r1_fn_neg( + s, + #[cfg(feature = "hints")] + hints, + ); + if secp256r1_triple_scalar_mul_with_g( + z, + r, + &neg_s, + pk, + &point, + #[cfg(feature = "hints")] + hints, + ) + .is_some() + { + return false; + } + + // Check that x ≡ r (mod n) + let point_x: [u64; 4] = [point[0], point[1], point[2], point[3]]; + eq( + &secp256r1_fn_reduce( + &point_x, + #[cfg(feature = "hints")] + hints, + ), + r, + ) +} + +// ==================== C FFI Functions ==================== + +/// # Safety +/// - `msg_ptr` must point to 4 u64s +/// - `sig_ptr` must point to 8 u64s +/// - `pk_ptr` must point to 8 u64s +/// +/// Returns true if signature is valid +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_secp256r1_ecdsa_verify_c")] +pub unsafe extern "C" fn secp256r1_ecdsa_verify_c( + msg: *const u8, + sig: *const u8, + pk: *const u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) -> bool { + let msg_bytes: &[u8; 32] = &*(msg as *const [u8; 32]); + let sig_bytes: &[u8; 64] = &*(sig as *const [u8; 64]); + let pk_bytes: &[u8; 64] = &*(pk as *const [u8; 64]); + + // Parse r, s from big-endian bytes + let r_bytes: [u8; 32] = sig_bytes[0..32].try_into().unwrap(); + let s_bytes: [u8; 32] = sig_bytes[32..64].try_into().unwrap(); + + // Parse pk_x, pk_y from big-endian bytes + let pk_x_bytes: [u8; 32] = pk_bytes[0..32].try_into().unwrap(); + let pk_y_bytes: [u8; 32] = pk_bytes[32..64].try_into().unwrap(); + + // Convert to little-endian u64 limbs + let z = bytes_be_to_u64_le(msg_bytes); + let r = bytes_be_to_u64_le(&r_bytes); + let s = bytes_be_to_u64_le(&s_bytes); + let pk_x = bytes_be_to_u64_le(&pk_x_bytes); + let pk_y = bytes_be_to_u64_le(&pk_y_bytes); + + let pk: [u64; 8] = [pk_x[0], pk_x[1], pk_x[2], pk_x[3], pk_y[0], pk_y[1], pk_y[2], pk_y[3]]; + secp256r1_ecdsa_verify( + &pk, + &z, + &r, + &s, + #[cfg(feature = "hints")] + hints, + ) +} + +/// Convert big-endian bytes to little-endian u64 limbs (32 bytes -> [u64; 4]) +fn bytes_be_to_u64_le(bytes: &[u8; 32]) -> [u64; 4] { + let mut result = [0u64; 4]; + for i in 0..4 { + for j in 0..8 { + result[3 - i] |= (bytes[i * 8 + j] as u64) << (8 * (7 - j)); + } + } + result +} diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256r1/field.rs b/ziskos/entrypoint/src/zisklib/lib/secp256r1/field.rs new file mode 100644 index 000000000..934be0b84 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/secp256r1/field.rs @@ -0,0 +1,50 @@ +use crate::syscalls::{syscall_arith256_mod, SyscallArith256ModParams}; + +use super::constants::P; + +pub fn secp256r1_fp_add( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { + // x·1 + y + let mut params = + SyscallArith256ModParams { a: x, b: &[1, 0, 0, 0], c: y, module: &P, d: &mut [0, 0, 0, 0] }; + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + *params.d +} + +pub fn secp256r1_fp_mul( + x: &[u64; 4], + y: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { + // x·y + 0 + let mut params = + SyscallArith256ModParams { a: x, b: y, c: &[0, 0, 0, 0], module: &P, d: &mut [0, 0, 0, 0] }; + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + *params.d +} + +pub fn secp256r1_fp_square( + x: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { + // x·x + 0 + let mut params = + SyscallArith256ModParams { a: x, b: x, c: &[0, 0, 0, 0], module: &P, d: &mut [0, 0, 0, 0] }; + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + *params.d +} diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256r1/mod.rs b/ziskos/entrypoint/src/zisklib/lib/secp256r1/mod.rs new file mode 100644 index 000000000..794d231fd --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/secp256r1/mod.rs @@ -0,0 +1,10 @@ +mod constants; +mod curve; +mod ecdsa; +mod field; +mod scalar; + +pub use curve::*; +pub use ecdsa::*; +pub use field::*; +pub use scalar::*; diff --git a/ziskos/entrypoint/src/zisklib/lib/secp256r1/scalar.rs b/ziskos/entrypoint/src/zisklib/lib/secp256r1/scalar.rs new file mode 100644 index 000000000..f5b73ca34 --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/secp256r1/scalar.rs @@ -0,0 +1,49 @@ +use crate::{ + syscalls::{syscall_arith256_mod, SyscallArith256ModParams}, + zisklib::lt, +}; + +use super::constants::{N, N_MINUS_ONE}; + +pub fn secp256r1_fn_reduce( + x: &[u64; 4], + #[cfg(feature = "hints")] hints: &mut Vec, +) -> [u64; 4] { + if lt(x, &N) { + return *x; + } + + // x·1 + 0 + let mut params = SyscallArith256ModParams { + a: x, + b: &[1, 0, 0, 0], + c: &[0, 0, 0, 0], + module: &N, + d: &mut [0, 0, 0, 0], + }; + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + + *params.d +} + +pub fn secp256r1_fn_neg(x: &[u64; 4], #[cfg(feature = "hints")] hints: &mut Vec) -> [u64; 4] { + // x·(-1) + 0 + let mut params = SyscallArith256ModParams { + a: x, + b: &N_MINUS_ONE, + c: &[0, 0, 0, 0], + module: &N, + d: &mut [0, 0, 0, 0], + }; + syscall_arith256_mod( + &mut params, + #[cfg(feature = "hints")] + hints, + ); + + *params.d +} diff --git a/ziskos/entrypoint/src/zisklib/lib/sha256.rs b/ziskos/entrypoint/src/zisklib/lib/sha256.rs new file mode 100644 index 000000000..925d29cba --- /dev/null +++ b/ziskos/entrypoint/src/zisklib/lib/sha256.rs @@ -0,0 +1,134 @@ +use crate::syscalls::{syscall_sha256_f, SyscallSha256Params}; + +use super::is_aligned_8; + +/// SHA-256 initial hash values +const SHA256_INIT: [u32; 8] = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +/// SHA-256 hash function. For reference: https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf +pub fn sha256(input: &[u8], #[cfg(feature = "hints")] hints: &mut Vec) -> [u8; 32] { + let mut state = SHA256_INIT; + let input_len = input.len(); + + // Process complete 64-byte blocks + let mut offset = 0; + if is_aligned_8(input.as_ptr()) { + // Fast path: input is aligned, use directly + while offset + 64 <= input_len { + let block: &[u8; 64] = input[offset..offset + 64].try_into().unwrap(); + compress_block( + &mut state, + block, + #[cfg(feature = "hints")] + hints, + ); + offset += 64; + } + } else { + // Slow path: input is unaligned, copy each block + let mut aligned_block = [0u8; 64]; + while offset + 64 <= input_len { + aligned_block.copy_from_slice(&input[offset..offset + 64]); + compress_block( + &mut state, + &aligned_block, + #[cfg(feature = "hints")] + hints, + ); + offset += 64; + } + } + + // Handle final block(s) with padding + let remaining = input_len - offset; + let bit_len = (input_len as u64) * 8; + + // We need: remaining bytes + 1 (0x80) + padding + 8 (length) + let mut final_block = [0u8; 64]; + + // Copy remaining bytes + final_block[..remaining].copy_from_slice(&input[offset..]); + + // Append 0x80 + final_block[remaining] = 0x80; + + // If remaining + 9 > 64, we need 2 blocks + if remaining + 9 > 64 { + // First block + compress_block( + &mut state, + &final_block, + #[cfg(feature = "hints")] + hints, + ); + + // Second block + final_block = [0u8; 64]; + final_block[56..64].copy_from_slice(&bit_len.to_be_bytes()); + compress_block( + &mut state, + &final_block, + #[cfg(feature = "hints")] + hints, + ); + } else { + // Single final block + final_block[56..64].copy_from_slice(&bit_len.to_be_bytes()); + compress_block( + &mut state, + &final_block, + #[cfg(feature = "hints")] + hints, + ); + } + + // Convert state to big-endian bytes + let mut result = [0u8; 32]; + for (i, &word) in state.iter().enumerate() { + result[i * 4..(i + 1) * 4].copy_from_slice(&word.to_be_bytes()); + } + + result +} + +/// Compress a single 64-byte block into the state +#[inline] +fn compress_block( + state: &mut [u32; 8], + block: &[u8; 64], + #[cfg(feature = "hints")] hints: &mut Vec, +) { + let state_64: &mut [u64; 4] = unsafe { &mut *(state.as_mut_ptr() as *mut [u64; 4]) }; + let input_u64: &[u64; 8] = unsafe { &*(block.as_ptr() as *const [u64; 8]) }; + let mut sha256_params = SyscallSha256Params { state: state_64, input: input_u64 }; + syscall_sha256_f( + &mut sha256_params, + #[cfg(feature = "hints")] + hints, + ); +} + +/// C-compatible wrapper for full SHA-256 hash +/// +/// # Safety +/// - `input` must point to at least `input_len` bytes +/// - `output` must point to a writable buffer of at least 32 bytes +#[cfg_attr(not(feature = "hints"), no_mangle)] +#[cfg_attr(feature = "hints", export_name = "hints_sha256_c")] +pub unsafe extern "C" fn sha256_c( + input: *const u8, + input_len: usize, + output: *mut u8, + #[cfg(feature = "hints")] hints: &mut Vec, +) { + let input_slice = core::slice::from_raw_parts(input, input_len); + let hash = sha256( + input_slice, + #[cfg(feature = "hints")] + hints, + ); + let output_slice = core::slice::from_raw_parts_mut(output, 32); + output_slice.copy_from_slice(&hash); +} diff --git a/ziskos/entrypoint/src/zisklib/lib/sha256f_compress.rs b/ziskos/entrypoint/src/zisklib/lib/sha256f_compress.rs deleted file mode 100644 index 81c6e57ac..000000000 --- a/ziskos/entrypoint/src/zisklib/lib/sha256f_compress.rs +++ /dev/null @@ -1,77 +0,0 @@ -use crate::syscalls::{syscall_sha256_f, SyscallSha256Params}; - -pub fn sha256f_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { - let mut state_64 = convert_u32_to_u64(state); - - for block in blocks { - let input_u64 = convert_bytes_to_u64(block); - - let mut sha256_params = SyscallSha256Params { state: &mut state_64, input: &input_u64 }; - syscall_sha256_f(&mut sha256_params); - } - - *state = convert_u64_to_u32(&state_64); -} - -/// C-compatible wrapper for sha256f_compress -/// -/// # Safety -/// - `state_ptr` must point to at least 8 u32s (will be read and written) -/// - `blocks_ptr` must point to at least `num_blocks * 64` bytes -#[no_mangle] -pub unsafe extern "C" fn sha256f_compress_c( - state_ptr: *mut u32, - blocks_ptr: *const u8, - num_blocks: usize, -) { - let state: &mut [u32; 8] = &mut *(state_ptr as *mut [u32; 8]); - let mut state_64 = convert_u32_to_u64(state); - - for i in 0..num_blocks { - let block: &[u8; 64] = &*(blocks_ptr.add(i * 64) as *const [u8; 64]); - let input_u64 = convert_bytes_to_u64(block); - - let mut sha256_params = SyscallSha256Params { state: &mut state_64, input: &input_u64 }; - syscall_sha256_f(&mut sha256_params); - } - - *state = convert_u64_to_u32(&state_64); -} - -#[inline(always)] -fn convert_u32_to_u64(words: &[u32; 8]) -> [u64; 4] { - [ - ((words[0] as u64) << 32) | (words[1] as u64), - ((words[2] as u64) << 32) | (words[3] as u64), - ((words[4] as u64) << 32) | (words[5] as u64), - ((words[6] as u64) << 32) | (words[7] as u64), - ] -} - -#[inline(always)] -fn convert_u64_to_u32(input: &[u64; 4]) -> [u32; 8] { - [ - (input[0] >> 32) as u32, - input[0] as u32, - (input[1] >> 32) as u32, - input[1] as u32, - (input[2] >> 32) as u32, - input[2] as u32, - (input[3] >> 32) as u32, - input[3] as u32, - ] -} - -#[inline(always)] -fn convert_bytes_to_u64(input: &[u8; 64]) -> [u64; 8] { - [ - u64::from_be_bytes(input[0..8].try_into().unwrap()), - u64::from_be_bytes(input[8..16].try_into().unwrap()), - u64::from_be_bytes(input[16..24].try_into().unwrap()), - u64::from_be_bytes(input[24..32].try_into().unwrap()), - u64::from_be_bytes(input[32..40].try_into().unwrap()), - u64::from_be_bytes(input[40..48].try_into().unwrap()), - u64::from_be_bytes(input[48..56].try_into().unwrap()), - u64::from_be_bytes(input[56..64].try_into().unwrap()), - ] -} diff --git a/ziskos/entrypoint/src/zisklib/lib/utils.rs b/ziskos/entrypoint/src/zisklib/lib/utils.rs index 8927ebe98..b16a7cd13 100644 --- a/ziskos/entrypoint/src/zisklib/lib/utils.rs +++ b/ziskos/entrypoint/src/zisklib/lib/utils.rs @@ -1,3 +1,9 @@ +/// Check if a pointer is 8-byte aligned. +#[inline(always)] +pub fn is_aligned_8(ptr: *const u8) -> bool { + (ptr as usize) & 0x7 == 0 +} + /// Given two n-bit number `x` and `y`, compares them and returns true if `x > y`; otherwise, false. pub fn gt(x: &[u64], y: &[u64]) -> bool { debug_assert_eq!(x.len(), y.len(), "x and y must have the same length"); @@ -37,3 +43,26 @@ pub fn eq(x: &[u64], y: &[u64]) -> bool { } true } + +/// Returns true if x == 0 +pub fn is_zero(x: &[u64]) -> bool { + for &word in x { + if word != 0 { + return false; + } + } + true +} + +/// Returns true if x == 1 +pub fn is_one(x: &[u64]) -> bool { + if x[0] != 1 { + return false; + } + for &word in &x[1..] { + if word != 0 { + return false; + } + } + true +} diff --git a/ziskos/entrypoint/src/zisklib/mod.rs b/ziskos/entrypoint/src/zisklib/mod.rs index ddcd0d5ff..ce247341d 100644 --- a/ziskos/entrypoint/src/zisklib/mod.rs +++ b/ziskos/entrypoint/src/zisklib/mod.rs @@ -1,6 +1,6 @@ mod fcalls; mod fcalls_impl; -mod lib; +pub mod lib; pub use fcalls::*; pub use fcalls_impl::*; diff --git a/ziskos/entrypoint/src/ziskos_definitions.rs b/ziskos/entrypoint/src/ziskos_definitions.rs index a606e1651..87fd20688 100644 --- a/ziskos/entrypoint/src/ziskos_definitions.rs +++ b/ziskos/entrypoint/src/ziskos_definitions.rs @@ -3,7 +3,7 @@ pub mod ziskos_config { pub const QEMU_EXIT_ADDR: u64 = 0x100000; pub const QEMU_EXIT_CODE: u64 = 0x5555; - pub const INPUT_ADDR: u64 = 0x9000_0000; + pub const INPUT_ADDR: u64 = 0x4000_0000; pub const OUTPUT_ADDR: u64 = 0xa001_0000; pub const UART_ADDR: u64 = 0xa000_0200; pub const ARCH_ID_ZISK: u64 = 0xFFFEEEE; // TEMPORARY // TODO register one diff --git a/ziskup/ziskup b/ziskup/ziskup index 07844b47a..bf32467c9 100755 --- a/ziskup/ziskup +++ b/ziskup/ziskup @@ -16,6 +16,12 @@ mkdir -p "${ZISK_BIN_DIR}" main() { need_cmd curl + # Check if this is a setup_snark command + if [[ "$1" == "setup_snark" ]]; then + setup_snark_command + exit 0 + fi + while [[ -n $1 ]]; do case $1 in --) @@ -265,6 +271,9 @@ OPTIONS: --provingkey Install the proving key --verifykey Install the verify key --nokey No proving/verify key installation + +COMMANDS: + setup_snark Install SNARK proving key (provingKeySnark) EOF } @@ -365,6 +374,72 @@ install_setup() { fi } +# Install the SNARK proving key +install_setup_snark() { + local KEY_FILE="zisk-provingkey-plonk-inputs-${SETUP_VERSION}.tar.gz" + + step "Downloading SNARK proving key version ${SETUP_VERSION}. This may take a while..." + ensure download "${BUCKET_URL}/${KEY_FILE}" "${KEY_FILE}" + ensure download "${BUCKET_URL}/${KEY_FILE}.md5" "${KEY_FILE}.md5" + + # Verify the md5 checksum + ensure md5sum -c "${KEY_FILE}.md5" + + # Delete old provingKey, verifyKey and cache folders + rm -rf "${HOME}/.zisk/provingKeySnark" + + # Extract the key + step "Installing SNARK proving key version ${SETUP_VERSION}..." + if [ "${PLATFORM}" = "linux" ]; then + ensure tar --overwrite -xf "${KEY_FILE}" -C "${HOME}/.zisk" + else + ensure tar -xf "${KEY_FILE}" -C "${HOME}/.zisk" + fi + + rm -f "${KEY_FILE}" + rm -f "${KEY_FILE}.md5" + say "Installed SNARK proving key version ${SETUP_VERSION}" +} + +# Setup SNARK command handler +setup_snark_command() { + # Print banner + banner + + # Detect platform + uname_s=$(uname -s) + PLATFORM=$(tolower "${uname_s}") + + case "${PLATFORM}" in + linux) ;; + darwin | mac*) ;; + *) + err "unsupported platform ${PLATFORM}" + exit 1 + ;; + esac + + # Determine version from cargo-zisk + if [[ ! -f "${CARGO_ZISK}" ]]; then + err "cargo-zisk not found. Please install ZisK first with 'ziskup'" + exit 1 + fi + + ZISK_VERSION=$("${CARGO_ZISK}" --version | awk '{print $2}') + + # Parse version to get setup version + IFS='.' read -r major minor patch <<< "${ZISK_VERSION}" + SETUP_VERSION="${major}.${minor}.0" + + step "Installing SNARK proving key for ZisK version ${ZISK_VERSION}..." + + # Install the SNARK proving key + install_setup_snark + + step "Done! SNARK proving key installation completed." + echo +} + # Banner Function for ZisK banner() { printf "