diff --git a/Cargo.lock b/Cargo.lock index 4e5a4599..3511ba66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,19 +22,11 @@ dependencies = [ "p3-matrix", "p3-uni-stark", "p3-util", - "packed_pcs", "rand", "tracing", "utils", - "whir-p3", ] -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - [[package]] name = "ansi_term" version = "0.12.1" @@ -44,12 +36,56 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -59,10 +95,10 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "backend" version = "0.3.0" -source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#1d5b5259518a3d74a4cb7d13a9caf65316893a6a" +source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#ed923559dcb49da4c83cfd6ccd5e00ffb8119aa8" dependencies = [ "fiat-shamir", - "itertools 0.14.0", + "itertools", "p3-field", "p3-util", "rand", @@ -79,45 +115,12 @@ dependencies = [ "generic-array", ] -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - [[package]] name = "clap" version = "4.5.50" @@ -125,6 +128,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -133,8 +137,22 @@ version = "4.5.50" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -143,6 +161,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "colored" version = "3.0.0" @@ -155,7 +179,7 @@ dependencies = [ [[package]] name = "constraints-folder" version = "0.3.0" -source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#1d5b5259518a3d74a4cb7d13a9caf65316893a6a" +source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#ed923559dcb49da4c83cfd6ccd5e00ffb8119aa8" dependencies = [ "fiat-shamir", "p3-air", @@ -181,37 +205,6 @@ dependencies = [ "libc", ] -[[package]] -name = "criterion" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools 0.13.0", - "num-traits", - "oorandom", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" -dependencies = [ - "cast", - "itertools 0.13.0", -] - [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -237,12 +230,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - [[package]] name = "crypto-common" version = "0.1.6" @@ -294,7 +281,7 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "fiat-shamir" version = "0.1.0" -source = "git+https://github.com/leanEthereum/fiat-shamir.git#40d6492b109af9e22a9370a6e674df6f7e4db84e" +source = "git+https://github.com/leanEthereum/fiat-shamir.git#211b12c35c9742c3d2ec0477381954208f97986c" dependencies = [ "p3-challenger", "p3-field", @@ -324,24 +311,16 @@ dependencies = [ ] [[package]] -name = "half" -version = "2.7.1" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "zerocopy", -] +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] -name = "itertools" -version = "0.13.0" +name = "is_terminal_polyfill" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" @@ -368,23 +347,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" name = "lean-multisig" version = "0.1.0" dependencies = [ - "air", - "criterion", - "multilinear-toolkit", - "p3-air", - "p3-challenger", - "p3-field", - "p3-koala-bear", - "p3-matrix", - "p3-poseidon2", - "p3-poseidon2-air", - "p3-uni-stark", - "p3-util", - "packed_pcs", - "rand", + "clap", + "poseidon_circuit", "rec_aggregation", - "utils", - "whir-p3", ] [[package]] @@ -436,6 +401,7 @@ dependencies = [ "packed_pcs", "pest", "pest_derive", + "poseidon_circuit", "rand", "rayon", "tracing", @@ -522,7 +488,7 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "multilinear-toolkit" version = "0.3.0" -source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#1d5b5259518a3d74a4cb7d13a9caf65316893a6a" +source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#ed923559dcb49da4c83cfd6ccd5e00ffb8119aa8" dependencies = [ "backend", "constraints-folder", @@ -577,15 +543,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] -name = "oorandom" -version = "11.1.5" +name = "once_cell_polyfill" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "p3-air" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "p3-field", "p3-matrix", @@ -594,7 +560,7 @@ dependencies = [ [[package]] name = "p3-baby-bear" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "p3-field", "p3-mds", @@ -607,7 +573,7 @@ dependencies = [ [[package]] name = "p3-challenger" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "p3-field", "p3-maybe-rayon", @@ -619,9 +585,9 @@ dependencies = [ [[package]] name = "p3-commit" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "p3-challenger", "p3-dft", "p3-field", @@ -633,9 +599,9 @@ dependencies = [ [[package]] name = "p3-dft" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "p3-field", "p3-matrix", "p3-maybe-rayon", @@ -646,9 +612,9 @@ dependencies = [ [[package]] name = "p3-field" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "num-bigint", "p3-maybe-rayon", "p3-util", @@ -661,7 +627,7 @@ dependencies = [ [[package]] name = "p3-interpolation" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "p3-field", "p3-matrix", @@ -672,9 +638,9 @@ dependencies = [ [[package]] name = "p3-koala-bear" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "num-bigint", "p3-field", "p3-monty-31", @@ -688,9 +654,9 @@ dependencies = [ [[package]] name = "p3-matrix" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "p3-field", "p3-maybe-rayon", "p3-util", @@ -703,7 +669,7 @@ dependencies = [ [[package]] name = "p3-maybe-rayon" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "rayon", ] @@ -711,7 +677,7 @@ dependencies = [ [[package]] name = "p3-mds" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "p3-dft", "p3-field", @@ -723,9 +689,9 @@ dependencies = [ [[package]] name = "p3-merkle-tree" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "p3-commit", "p3-field", "p3-matrix", @@ -740,9 +706,9 @@ dependencies = [ [[package]] name = "p3-monty-31" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "num-bigint", "p3-dft", "p3-field", @@ -762,7 +728,7 @@ dependencies = [ [[package]] name = "p3-poseidon2" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "p3-field", "p3-mds", @@ -774,7 +740,7 @@ dependencies = [ [[package]] name = "p3-poseidon2-air" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "p3-air", "p3-field", @@ -788,9 +754,9 @@ dependencies = [ [[package]] name = "p3-symmetric" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "p3-field", "serde", ] @@ -798,9 +764,9 @@ dependencies = [ [[package]] name = "p3-uni-stark" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ - "itertools 0.14.0", + "itertools", "p3-air", "p3-challenger", "p3-commit", @@ -816,7 +782,7 @@ dependencies = [ [[package]] name = "p3-util" version = "0.3.0" -source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#1b3305de665b9d13a9cedfb88ab8dca94ebad116" +source = "git+https://github.com/TomWambsgans/Plonky3.git?branch=lean-multisig#59a799ce00907553aeef3d7cf38f616023e9ad5f" dependencies = [ "rayon", "serde", @@ -892,6 +858,22 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "poseidon_circuit" +version = "0.1.0" +dependencies = [ + "multilinear-toolkit", + "p3-field", + "p3-koala-bear", + "p3-monty-31", + "p3-poseidon2", + "packed_pcs", + "rand", + "tracing", + "utils", + "whir-p3", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -903,9 +885,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e0f6df8eaa422d97d72edcd152e1451618fed47fabbdbd5a8864167b1d4aff7" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] @@ -1003,18 +985,6 @@ dependencies = [ "xmss", ] -[[package]] -name = "regex" -version = "1.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - [[package]] name = "regex-automata" version = "0.4.13" @@ -1038,15 +1008,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - [[package]] name = "serde" version = "1.0.228" @@ -1122,10 +1083,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "sumcheck" version = "0.3.0" -source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#1d5b5259518a3d74a4cb7d13a9caf65316893a6a" +source = "git+https://github.com/leanEthereum/multilinear-toolkit.git#ed923559dcb49da4c83cfd6ccd5e00ffb8119aa8" dependencies = [ "backend", "constraints-folder", @@ -1177,16 +1144,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "tracing" version = "0.1.41" @@ -1301,6 +1258,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "utils" version = "0.1.0" @@ -1364,16 +1327,6 @@ dependencies = [ "xmss", ] -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -1386,9 +1339,9 @@ dependencies = [ [[package]] name = "whir-p3" version = "0.1.0" -source = "git+https://github.com/TomWambsgans/whir-p3?branch=lean-multisig#b2812e409a6465491129db457e3693b0c0127800" +source = "git+https://github.com/TomWambsgans/whir-p3?branch=lean-multisig#57c8d12015260a6aca706a021364225e5ebdd636" dependencies = [ - "itertools 0.14.0", + "itertools", "multilinear-toolkit", "p3-baby-bear", "p3-challenger", @@ -1426,15 +1379,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys 0.61.2", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -1453,7 +1397,16 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", ] [[package]] @@ -1471,14 +1424,31 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -1487,48 +1457,96 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.46.0" @@ -1550,6 +1568,7 @@ dependencies = [ "p3-field", "p3-koala-bear", "p3-matrix", + "p3-monty-31", "p3-poseidon2", "p3-poseidon2-air", "p3-symmetric", @@ -1557,6 +1576,7 @@ dependencies = [ "packed_pcs", "pest", "pest_derive", + "poseidon_circuit", "rand", "rayon", "tracing", diff --git a/Cargo.toml b/Cargo.toml index f5785a71..eae85faa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,9 +51,11 @@ lean_prover = { path = "crates/lean_prover" } rec_aggregation = { path = "crates/rec_aggregation" } witness_generation = { path = "crates/lean_prover/witness_generation" } vm_air = { path = "crates/lean_prover/vm_air" } +poseidon_circuit = { path = "crates/poseidon_circuit" } # External thiserror = "2.0" +clap = { version = "4.3.10", features = ["derive"] } rand = "0.9.2" sha3 = "0.10.8" rayon = "1.5.1" @@ -78,26 +80,15 @@ p3-poseidon2-air = { git = "https://github.com/TomWambsgans/Plonky3.git", branch p3-goldilocks = { git = "https://github.com/TomWambsgans/Plonky3.git", branch = "lean-multisig" } p3-challenger = { git = "https://github.com/TomWambsgans/Plonky3.git", branch = "lean-multisig" } p3-util = { git = "https://github.com/TomWambsgans/Plonky3.git", branch = "lean-multisig" } +p3-monty-31 = { git = "https://github.com/TomWambsgans/Plonky3.git", branch = "lean-multisig" } whir-p3 = { git = "https://github.com/TomWambsgans/whir-p3", branch = "lean-multisig" } multilinear-toolkit = { git = "https://github.com/leanEthereum/multilinear-toolkit.git" } [dependencies] -air.workspace = true -p3-field.workspace = true -p3-koala-bear.workspace = true -p3-poseidon2.workspace = true -rand.workspace = true -p3-poseidon2-air.workspace = true -p3-matrix.workspace = true -p3-challenger.workspace = true -whir-p3.workspace = true -p3-uni-stark.workspace = true -utils.workspace = true -p3-util.workspace = true -packed_pcs.workspace = true -p3-air.workspace = true -multilinear-toolkit.workspace = true +clap.workspace = true +rec_aggregation.workspace = true +poseidon_circuit.workspace = true # [patch."https://github.com/TomWambsgans/Plonky3.git"] # p3-koala-bear = { path = "../zk/Plonky3/koala-bear" } @@ -110,6 +101,7 @@ multilinear-toolkit.workspace = true # p3-poseidon2-air = { path = "../zk/Plonky3/poseidon2-air" } # p3-dft = { path = "../zk/Plonky3/dft" } # p3-challenger = { path = "../zk/Plonky3/challenger" } +# p3-monty-31 = { path = "../zk/Plonky3/monty-31" } # [patch."https://github.com/TomWambsgans/whir-p3.git"] # whir-p3 = { path = "../zk/whir/fork-whir-p3" } @@ -117,21 +109,5 @@ multilinear-toolkit.workspace = true # [patch."https://github.com/leanEthereum/multilinear-toolkit.git"] # multilinear-toolkit = { path = "../zk/multilinear-toolkit" } -[dev-dependencies] -criterion = { version = "0.7", default-features = false, features = ["cargo_bench_support"] } -rec_aggregation.workspace = true - [profile.release] lto = "thin" - -[[bench]] -name = "poseidon2" -harness = false - -[[bench]] -name = "recursion" -harness = false - -[[bench]] -name = "xmss" -harness = false diff --git a/README.md b/README.md index a6831e46..9980faeb 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ XMSS + minimal [zkVM](minimal_zkVM.pdf) = lightweight PQ signatures, with unboun ## Proving System -- AIR tables committed via multilinear polynomial, using [WHIR](https://eprint.iacr.org/2024/1586.pdf) +- [WHIR](https://eprint.iacr.org/2024/1586.pdf) - [SuperSpartan](https://eprint.iacr.org/2023/552.pdf), with AIR-specific optimizations developed by W. Borgeaud in [A simple multivariate AIR argument inspired by SuperSpartan](https://solvable.group/posts/super-air/#fnref:1) - [Univariate Skip](https://eprint.iacr.org/2024/108.pdf) - [Logup*](https://eprint.iacr.org/2025/946.pdf) @@ -14,48 +14,46 @@ XMSS + minimal [zkVM](minimal_zkVM.pdf) = lightweight PQ signatures, with unboun The VM design is inspired by the famous [Cairo paper](https://eprint.iacr.org/2021/1063.pdf). -Details on how to prove AIR constraints in the multilinear settings are described in [Whirlaway.pdf](Whirlaway.pdf). - -[Deep-wiki](https://deepwiki.com/leanEthereum/leanMultisig/1-overview) (thanks [adust09](https://github.com/adust09)) - - ## Benchmarks -cpu: i9-12900H, ram: 32 gb - -> TLDR: Slow, **but there is hope** (cf [TODO](TODO.md)) +Benchmarks are performed on 2 laptops: +- i9-12900H, 32 gb of RAM +- mac m4 max -target ≈ 128 bits of security, currently using conjecture: 4.12 of [WHIR](https://eprint.iacr.org/2024/1586.pdf), "up to capacity" (TODO: a version without any conjecture, requires an extension of koala-bear of degree > 5) +target ≈ 128 bits of security, currently using conjecture: 4.12 of [WHIR](https://eprint.iacr.org/2024/1586.pdf), "up to capacity" (TODO: provable security) ### Poseidon2 +Poseidon2 over 16 KoalaBear field elements. + ```console -RUSTFLAGS='-C target-cpu=native' cargo run --release +RUSTFLAGS='-C target-cpu=native' cargo run --release -- poseidon --log-n-perms 20 ``` -50 % over 16 field elements, 50 % over 24 field elements. rate = 1/2 - ![Alt text](docs/benchmark_graphs/graphs/raw_poseidons.svg) ### Recursion -```console -RUSTFLAGS='-C target-cpu=native' cargo test --release --package rec_aggregation --lib -- recursion::test_whir_recursion --nocapture -``` The full recursion program is not finished yet. Instead, we prove validity of a WHIR opening, with 25 variables, and rate = 1/4. +```console +RUSTFLAGS='-C target-cpu=native' cargo run --release -- recursion +``` + ![Alt text](docs/benchmark_graphs/graphs/recursive_whir_opening.svg) ### XMSS aggregation ```console -RUSTFLAGS='-C target-cpu=native' NUM_XMSS_AGGREGATED='500' cargo test --release --package rec_aggregation --lib -- xmss_aggregate::test_xmss_aggregate --nocapture +RUSTFLAGS='-C target-cpu=native' cargo run --release -- xmss --n-signatures 800 ``` -500 XMSS aggregated. "Trivial encoding" (for now). +[Trivial encoding](docs/XMSS_trivial_encoding.pdf) (for now). + + +![Alt text](docs/benchmark_graphs/graphs/xmss_aggregated.svg) -![Alt text](docs/benchmark_graphs/graphs/xmss_aggregated_time.svg) ![Alt text](docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg) ### Proof size @@ -65,7 +63,7 @@ With conjecture "up to capacity", current proofs with rate = 1/2 are about ≈ 4 - The remaining 100 - 200 KiB will be significantly reduced in the future (this part has not been optimized at all). - WHIR proof size will also be reduced, thanks to merkle pruning (TODO). -Reasonable target: 256 KiB for fast proof, 128 KiB for slower proofs (rate = 1/4 or 1/8). +Target: 256 KiB for fast proof, 128 KiB for slower proofs (rate = 1/4 or 1/8). ## Credits diff --git a/benches/poseidon2.rs b/benches/poseidon2.rs deleted file mode 100644 index 7d2d9e55..00000000 --- a/benches/poseidon2.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::{hint::black_box, time::Duration}; - -use air::examples::prove_poseidon2::{Poseidon2Config, prove_poseidon2}; -use criterion::{Criterion, Throughput, criterion_group, criterion_main}; -use whir_p3::{FoldingFactor, SecurityAssumption}; - -fn bench_poseidon2(c: &mut Criterion) { - const L16: usize = 17; - const L24: usize = 17; - - let mut group = c.benchmark_group("poseidon2"); - group.sample_size(10); - group.measurement_time(Duration::from_secs(60)); - group.warm_up_time(Duration::from_secs(10)); - group.throughput(Throughput::Elements((1u64 << L16) + (1u64 << L24))); - - let config = Poseidon2Config { - log_n_poseidons_16: L16, - log_n_poseidons_24: L24, - univariate_skips: 4, - folding_factor: FoldingFactor::new(7, 4), - log_inv_rate: 1, - soundness_type: SecurityAssumption::CapacityBound, - pow_bits: 16, - security_level: 128, - rs_domain_initial_reduction_factor: 5, - max_num_variables_to_send_coeffs: 3, - display_logs: false, - }; - - group.bench_function("poseidon2", |b| { - b.iter(|| { - let result = prove_poseidon2(black_box(&config)); - black_box(result.prover_time); - }); - }); - - group.finish(); -} - -criterion_group!(benches, bench_poseidon2); -criterion_main!(benches); diff --git a/benches/recursion.rs b/benches/recursion.rs deleted file mode 100644 index ff086a25..00000000 --- a/benches/recursion.rs +++ /dev/null @@ -1,23 +0,0 @@ -use std::{hint::black_box, time::Duration}; - -use criterion::{Criterion, criterion_group, criterion_main}; -use rec_aggregation::bench_recursion; - -fn bench_recursion_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("recursion"); - group.sample_size(10); - group.measurement_time(Duration::from_secs(60)); - group.warm_up_time(Duration::from_secs(10)); - - group.bench_function("recursion", |b| { - b.iter(|| { - let duration = bench_recursion(); - black_box(duration); - }); - }); - - group.finish(); -} - -criterion_group!(benches, bench_recursion_benchmark); -criterion_main!(benches); diff --git a/benches/xmss.rs b/benches/xmss.rs deleted file mode 100644 index 611a23d9..00000000 --- a/benches/xmss.rs +++ /dev/null @@ -1,26 +0,0 @@ -use std::{hint::black_box, time::Duration}; - -use criterion::{Criterion, Throughput, criterion_group, criterion_main}; -use rec_aggregation::run_xmss_benchmark; - -fn bench_xmss_benchmark(c: &mut Criterion) { - const N: usize = 500; - - let mut group = c.benchmark_group("xmss"); - group.sample_size(10); - group.measurement_time(Duration::from_secs(60)); - group.warm_up_time(Duration::from_secs(10)); - group.throughput(Throughput::Elements(N as u64)); - - group.bench_function("xmss", |b| { - b.iter(|| { - let duration = run_xmss_benchmark(N).proving_time; - black_box(duration); - }); - }); - - group.finish(); -} - -criterion_group!(benches, bench_xmss_benchmark); -criterion_main!(benches); diff --git a/crates/air/Cargo.toml b/crates/air/Cargo.toml index 82cc89d5..04d287b5 100644 --- a/crates/air/Cargo.toml +++ b/crates/air/Cargo.toml @@ -15,13 +15,8 @@ p3-uni-stark.workspace = true p3-matrix.workspace = true p3-util.workspace = true multilinear-toolkit.workspace = true -p3-koala-bear.workspace = true -rand.workspace = true -whir-p3.workspace = true -packed_pcs.workspace = true - [dev-dependencies] p3-koala-bear.workspace = true p3-matrix.workspace = true -rand.workspace = true +rand.workspace = true \ No newline at end of file diff --git a/crates/air/src/examples/mod.rs b/crates/air/src/examples/mod.rs deleted file mode 100644 index fb5c0672..00000000 --- a/crates/air/src/examples/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg(test)] -mod simple_air; - -pub mod prove_poseidon2; diff --git a/crates/air/src/examples/prove_poseidon2.rs b/crates/air/src/examples/prove_poseidon2.rs deleted file mode 100644 index 18ef7c6f..00000000 --- a/crates/air/src/examples/prove_poseidon2.rs +++ /dev/null @@ -1,358 +0,0 @@ -use crate::table::AirTable; -use multilinear_toolkit::prelude::*; -use p3_air::BaseAir; -use p3_field::PrimeField64; -use p3_koala_bear::{KoalaBear, QuinticExtensionFieldKB}; -use packed_pcs::{ - ColDims, packed_pcs_commit, packed_pcs_global_statements_for_prover, - packed_pcs_global_statements_for_verifier, packed_pcs_parse_commitment, -}; -use rand::{Rng, SeedableRng, rngs::StdRng}; -use std::collections::BTreeMap; -use std::fmt; -use std::time::{Duration, Instant}; -use utils::{ - MyChallenger, Poseidon16Air, Poseidon24Air, build_poseidon_16_air, - build_poseidon_16_air_packed, build_poseidon_24_air, build_poseidon_24_air_packed, - build_prover_state, build_verifier_state, generate_trace_poseidon_16, - generate_trace_poseidon_24, init_tracing, poseidon16_permute, poseidon24_permute, -}; -use whir_p3::{ - FoldingFactor, SecurityAssumption, WhirConfig, WhirConfigBuilder, precompute_dft_twiddles, -}; - -type F = KoalaBear; -type EF = QuinticExtensionFieldKB; - -#[derive(Clone, Debug)] -pub struct Poseidon2Benchmark { - pub log_n_poseidons_16: usize, - pub log_n_poseidons_24: usize, - pub prover_time: Duration, - pub verifier_time: Duration, - pub proof_size: f64, // in bytes -} - -impl fmt::Display for Poseidon2Benchmark { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!( - f, - "Proved {} poseidon2-16 and {} poseidon2-24 in {:.3} s ({} / s)", - 1 << self.log_n_poseidons_16, - 1 << self.log_n_poseidons_24, - self.prover_time.as_millis() as f64 / 1000.0, - (f64::from((1 << self.log_n_poseidons_16) + (1 << self.log_n_poseidons_24)) - / self.prover_time.as_secs_f64()) - .round() as usize - )?; - writeln!( - f, - "Proof size: {:.1} KiB (not optimized)", - self.proof_size / 1024.0 - )?; - writeln!(f, "Verification: {} ms", self.verifier_time.as_millis()) - } -} - -#[derive(Clone, Debug)] -pub struct Poseidon2Config { - pub log_n_poseidons_16: usize, - pub log_n_poseidons_24: usize, - pub univariate_skips: usize, - pub folding_factor: FoldingFactor, - pub log_inv_rate: usize, - pub soundness_type: SecurityAssumption, - pub pow_bits: usize, - pub security_level: usize, - pub rs_domain_initial_reduction_factor: usize, - pub max_num_variables_to_send_coeffs: usize, - pub display_logs: bool, -} - -struct PoseidonSetup { - n_columns_16: usize, - n_columns_24: usize, - witness_columns_16: Vec>, - witness_columns_24: Vec>, - table_16: AirTable, Poseidon16Air>>, - table_24: AirTable, Poseidon24Air>>, -} - -struct ProverArtifacts { - prover_time: Duration, - whir_config_builder: WhirConfigBuilder, - whir_config: WhirConfig, - dims: Vec>, -} - -fn prepare_poseidon(config: &Poseidon2Config) -> PoseidonSetup { - let n_poseidons_16 = 1 << config.log_n_poseidons_16; - let n_poseidons_24 = 1 << config.log_n_poseidons_24; - - let poseidon_air_16 = build_poseidon_16_air(); - let poseidon_air_16_packed = build_poseidon_16_air_packed(); - let poseidon_air_24 = build_poseidon_24_air(); - let poseidon_air_24_packed = build_poseidon_24_air_packed(); - - let n_columns_16 = poseidon_air_16.width(); - let n_columns_24 = poseidon_air_24.width(); - - let mut rng = StdRng::seed_from_u64(0); - let inputs_16: Vec<[F; 16]> = (0..n_poseidons_16).map(|_| Default::default()).collect(); - let inputs_24: Vec<[F; 24]> = (0..n_poseidons_24) - .map(|_| std::array::from_fn(|_| rng.random())) - .collect(); - let compress = vec![true; n_poseidons_16]; - let index_res: Vec = vec![0; n_poseidons_16]; // unused - - let witness_matrix_16 = generate_trace_poseidon_16(&inputs_16, &compress, &index_res); - let witness_matrix_24 = generate_trace_poseidon_24(&inputs_24); - - assert_eq!( - &witness_matrix_16.values[n_columns_16 - 16..n_columns_16 - 8], - &poseidon16_permute(witness_matrix_16.values[0..16].try_into().unwrap())[..8] - ); - assert_eq!( - &witness_matrix_24.values[n_columns_24 - 8..n_columns_24], - &poseidon24_permute(witness_matrix_24.values[0..24].try_into().unwrap())[16..] - ); - - let witness_matrix_16_transposed = witness_matrix_16.transpose(); - let witness_matrix_24_transposed = witness_matrix_24.transpose(); - - let witness_columns_16 = (0..n_columns_16) - .map(|row| { - witness_matrix_16_transposed.values[row * n_poseidons_16..(row + 1) * n_poseidons_16] - .to_vec() - }) - .collect::>(); - let witness_columns_24 = (0..n_columns_24) - .map(|row| { - witness_matrix_24_transposed.values[row * n_poseidons_24..(row + 1) * n_poseidons_24] - .to_vec() - }) - .collect::>(); - - let table_16: AirTable, Poseidon16Air>> = - AirTable::new(poseidon_air_16, poseidon_air_16_packed); - let table_24: AirTable, Poseidon24Air>> = - AirTable::new(poseidon_air_24, poseidon_air_24_packed); - - PoseidonSetup { - n_columns_16, - n_columns_24, - witness_columns_16, - witness_columns_24, - table_16, - table_24, - } -} - -fn run_prover_phase( - config: &Poseidon2Config, - setup: &PoseidonSetup, - witness_16: &[&[F]], - witness_24: &[&[F]], - prover_state: &mut FSProver, -) -> ProverArtifacts { - let start = Instant::now(); - - let whir_config_builder = WhirConfigBuilder { - folding_factor: config.folding_factor, - soundness_type: config.soundness_type, - pow_bits: config.pow_bits, - max_num_variables_to_send_coeffs: config.max_num_variables_to_send_coeffs, - rs_domain_initial_reduction_factor: config.rs_domain_initial_reduction_factor, - security_level: config.security_level, - starting_log_inv_rate: config.log_inv_rate, - }; - - precompute_dft_twiddles::(1 << 24); - - let dims = [ - vec![ColDims::full(config.log_n_poseidons_16); setup.n_columns_16], - vec![ColDims::full(config.log_n_poseidons_24); setup.n_columns_24], - ] - .concat(); - let log_smallest_decomposition_chunk = 0; - let commited_slices = setup - .witness_columns_16 - .iter() - .chain(setup.witness_columns_24.iter()) - .map(Vec::as_slice) - .collect::>(); - - let commitment_witness = packed_pcs_commit( - &whir_config_builder, - &commited_slices, - &dims, - prover_state, - log_smallest_decomposition_chunk, - ); - - let (p16_point, evaluations_remaining_to_prove_16) = - setup - .table_16 - .prove_base(prover_state, config.univariate_skips, witness_16); - let (p24_point, evaluations_remaining_to_prove_24) = - setup - .table_24 - .prove_base(prover_state, config.univariate_skips, witness_24); - - let global_statements_to_prove = packed_pcs_global_statements_for_prover( - &commited_slices, - &dims, - log_smallest_decomposition_chunk, - &evaluations_remaining_to_prove_16 - .into_iter() - .map(|v| vec![Evaluation::new(p16_point.clone(), v)]) - .chain( - evaluations_remaining_to_prove_24 - .into_iter() - .map(|v| vec![Evaluation::new(p24_point.clone(), v)]), - ) - .collect::>(), - prover_state, - ); - let whir_config = WhirConfig::new( - whir_config_builder.clone(), - commitment_witness.packed_polynomial.by_ref().n_vars(), - ); - whir_config.prove( - prover_state, - global_statements_to_prove, - commitment_witness.inner_witness, - &commitment_witness.packed_polynomial.by_ref(), - ); - - ProverArtifacts { - prover_time: start.elapsed(), - whir_config_builder, - whir_config, - dims, - } -} - -fn run_verifier_phase( - config: &Poseidon2Config, - setup: &PoseidonSetup, - artifacts: &ProverArtifacts, - prover_state: &FSProver, -) -> Duration { - let start = Instant::now(); - let mut verifier_state = build_verifier_state(prover_state); - let log_smallest_decomposition_chunk = 0; // unused (everything is power of two) - - let packed_parsed_commitment = packed_pcs_parse_commitment( - &artifacts.whir_config_builder, - &mut verifier_state, - &artifacts.dims, - log_smallest_decomposition_chunk, - ) - .unwrap(); - - let (p16_point, evaluations_remaining_to_verify_16) = setup - .table_16 - .verify( - &mut verifier_state, - config.univariate_skips, - config.log_n_poseidons_16, - ) - .unwrap(); - let (p24_point, evaluations_remaining_to_verify_24) = setup - .table_24 - .verify( - &mut verifier_state, - config.univariate_skips, - config.log_n_poseidons_24, - ) - .unwrap(); - - let global_statements_to_verify = packed_pcs_global_statements_for_verifier( - &artifacts.dims, - log_smallest_decomposition_chunk, - &evaluations_remaining_to_verify_16 - .into_iter() - .map(|v| vec![Evaluation::new(p16_point.clone(), v)]) - .chain( - evaluations_remaining_to_verify_24 - .into_iter() - .map(|v| vec![Evaluation::new(p24_point.clone(), v)]), - ) - .collect::>(), - &mut verifier_state, - &BTreeMap::default(), - ) - .unwrap(); - artifacts - .whir_config - .verify( - &mut verifier_state, - &packed_parsed_commitment, - global_statements_to_verify, - ) - .unwrap(); - - start.elapsed() -} - -pub fn prove_poseidon2(config: &Poseidon2Config) -> Poseidon2Benchmark { - if config.display_logs { - init_tracing(); - } - - let setup = prepare_poseidon(config); - - let mut prover_state = build_prover_state(); - let artifacts = run_prover_phase( - config, - &setup, - &setup - .witness_columns_16 - .iter() - .map(Vec::as_slice) - .collect::>(), - &setup - .witness_columns_24 - .iter() - .map(Vec::as_slice) - .collect::>(), - &mut prover_state, - ); - let verifier_time = run_verifier_phase(config, &setup, &artifacts, &prover_state); - - let proof_size = prover_state.proof_data().len() as f64 * (F::ORDER_U64 as f64).log2() / 8.0; - - Poseidon2Benchmark { - log_n_poseidons_16: config.log_n_poseidons_16, - log_n_poseidons_24: config.log_n_poseidons_24, - prover_time: artifacts.prover_time, - verifier_time, - proof_size, - } -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - fn test_prove_poseidon2() { - let config = Poseidon2Config { - log_n_poseidons_16: 13, - log_n_poseidons_24: 12, - univariate_skips: 4, - folding_factor: FoldingFactor::new(5, 3), - log_inv_rate: 2, - soundness_type: SecurityAssumption::CapacityBound, - pow_bits: 13, - security_level: 128, - rs_domain_initial_reduction_factor: 1, - max_num_variables_to_send_coeffs: 7, - display_logs: false, - }; - let benchmark = prove_poseidon2(&config); - println!("\n{benchmark}"); - } -} diff --git a/crates/air/src/lib.rs b/crates/air/src/lib.rs index 191d066f..fb02e7cd 100644 --- a/crates/air/src/lib.rs +++ b/crates/air/src/lib.rs @@ -12,38 +12,29 @@ mod uni_skip_utils; mod utils; mod verify; -pub mod examples; +#[cfg(test)] +pub mod tests; -pub trait NormalAir>>: +pub trait MyAir>>: Air>> + for<'a> Air, EF>> + for<'a> Air> + for<'a> Air, EF>> + for<'a> Air> -{ -} - -pub trait PackedAir>>: - for<'a> Air> + + for<'a> Air> + for<'a> Air> { } -impl NormalAir for A +impl MyAir for A where EF: ExtensionField>, A: Air>> + for<'a> Air, EF>> + for<'a> Air> + for<'a> Air, EF>> - + for<'a> Air>, -{ -} - -impl PackedAir for A -where - EF: ExtensionField>, - A: for<'a> Air> + + for<'a> Air> + + for<'a> Air> + for<'a> Air>, { } diff --git a/crates/air/src/prove.rs b/crates/air/src/prove.rs index b0650ea8..db1f18c3 100644 --- a/crates/air/src/prove.rs +++ b/crates/air/src/prove.rs @@ -9,7 +9,7 @@ use utils::{ FSProver, add_multilinears_inplace, fold_multilinear_chunks, multilinears_linear_combination, }; -use crate::{NormalAir, PackedAir}; +use crate::MyAir; use crate::{ uni_skip_utils::{matrix_down_folded, matrix_up_folded}, utils::{column_down, column_up, columns_up_and_down}, @@ -28,12 +28,11 @@ fn prove_air< 'a, WF: ExtensionField>, // witness field EF: ExtensionField> + ExtensionField, - A: NormalAir + 'static, - AP: PackedAir, + A: MyAir + 'static, >( prover_state: &mut FSProver>, univariate_skips: usize, - table: &AirTable, + table: &AirTable, witness: &[&'a [WF]], ) -> (MultilinearPoint, Vec) { let n_rows = witness[0].len(); @@ -76,11 +75,10 @@ fn prove_air< let columns_for_zero_check_packed = columns_for_zero_check.by_ref().pack(); let (outer_sumcheck_challenge, inner_sums, _) = info_span!("zerocheck").in_scope(|| { - sumcheck_prove::<_, _, _, _>( + sumcheck_prove( univariate_skips, columns_for_zero_check_packed, &table.air, - &table.air_packed, &constraints_batching_scalars, Some((zerocheck_challenges, None)), true, @@ -100,18 +98,11 @@ fn prove_air< &outer_sumcheck_challenge, ) } else { - open_unstructured_columns( - prover_state, - univariate_skips, - witness, - &outer_sumcheck_challenge, - ) + unreachable!() } } -impl>, A: NormalAir + 'static, AP: PackedAir> - AirTable -{ +impl>, A: MyAir + 'static> AirTable { #[instrument(name = "air: prove in base", skip_all)] pub fn prove_base( &self, @@ -119,7 +110,7 @@ impl>, A: NormalAir + 'static, AP: PackedAir> univariate_skips: usize, witness: &[&[PF]], ) -> (MultilinearPoint, Vec) { - prove_air::, EF, A, AP>(prover_state, univariate_skips, self, witness) + prove_air::, EF, A>(prover_state, univariate_skips, self, witness) } #[instrument(name = "air: prove in extension", skip_all)] @@ -129,55 +120,10 @@ impl>, A: NormalAir + 'static, AP: PackedAir> univariate_skips: usize, witness: &[&[EF]], ) -> (MultilinearPoint, Vec) { - prove_air::(prover_state, univariate_skips, self, witness) + prove_air::(prover_state, univariate_skips, self, witness) } } -#[instrument(skip_all)] -fn open_unstructured_columns< - WF: ExtensionField>, - EF: ExtensionField> + ExtensionField, ->( - prover_state: &mut FSProver>, - univariate_skips: usize, - witness: &[&[WF]], - outer_sumcheck_challenge: &[EF], -) -> (MultilinearPoint, Vec) { - let log_n_rows = log2_strict_usize(witness[0].len()); - - let columns_batching_scalars = prover_state.sample_vec(log2_ceil_usize(witness.len())); - - let batched_column = multilinears_linear_combination( - witness, - &eval_eq(&columns_batching_scalars)[..witness.len()], - ); - - // TODO opti - let sub_evals = fold_multilinear_chunks( - &batched_column, - &MultilinearPoint(outer_sumcheck_challenge[1..log_n_rows - univariate_skips + 1].to_vec()), - ); - - prover_state.add_extension_scalars(&sub_evals); - - let epsilons = MultilinearPoint(prover_state.sample_vec(univariate_skips)); - let common_point = MultilinearPoint( - [ - epsilons.0.clone(), - outer_sumcheck_challenge[1..log_n_rows - univariate_skips + 1].to_vec(), - ] - .concat(), - ); - - let evaluations_remaining_to_prove = witness - .iter() - .map(|col| col.evaluate(&common_point)) - .collect::>(); - prover_state.add_extension_scalars(&evaluations_remaining_to_prove); - - (common_point, evaluations_remaining_to_prove) -} - #[instrument(skip_all)] fn open_structured_columns> + ExtensionField, IF: Field>( prover_state: &mut FSProver>, @@ -238,11 +184,10 @@ fn open_structured_columns> + ExtensionField, IF: }); let (inner_challenges, _, _) = info_span!("structured columns sumcheck").in_scope(|| { - sumcheck_prove::( + sumcheck_prove::( 1, inner_mle, &ProductComputation, - &ProductComputation, &[], None, false, diff --git a/crates/air/src/table.rs b/crates/air/src/table.rs index c62b7da9..ed706929 100644 --- a/crates/air/src/table.rs +++ b/crates/air/src/table.rs @@ -9,19 +9,18 @@ use p3_uni_stark::get_symbolic_constraints; use tracing::instrument; use utils::ConstraintChecker; -use crate::{NormalAir, PackedAir}; +use crate::MyAir; #[derive(Debug)] -pub struct AirTable { +pub struct AirTable { pub air: A, - pub air_packed: AP, pub n_constraints: usize, _phantom: std::marker::PhantomData, } -impl>, A: NormalAir, AP: PackedAir> AirTable { - pub fn new(air: A, air_packed: AP) -> Self { +impl>, A: MyAir> AirTable { + pub fn new(air: A) -> Self { let symbolic_constraints = get_symbolic_constraints(&air, 0, 0); let n_constraints = symbolic_constraints.len(); let constraint_degree = @@ -29,7 +28,6 @@ impl>, A: NormalAir, AP: PackedAir> AirTable>>::degree(&air)); Self { air, - air_packed, n_constraints, _phantom: std::marker::PhantomData, } diff --git a/crates/air/src/examples/simple_air.rs b/crates/air/src/tests.rs similarity index 50% rename from crates/air/src/examples/simple_air.rs rename to crates/air/src/tests.rs index d6cec491..0d931270 100644 --- a/crates/air/src/examples/simple_air.rs +++ b/crates/air/src/tests.rs @@ -57,44 +57,6 @@ impl; - -impl BaseAir - for ExampleUnstructuredAir -{ - fn width(&self) -> usize { - N_COLUMNS - } - fn structured(&self) -> bool { - false - } - fn degree(&self) -> usize { - N_PREPROCESSED_COLUMNS - } -} - -impl Air - for ExampleUnstructuredAir -{ - #[inline] - fn eval(&self, builder: &mut AB) { - let main = builder.main(); - let up = main.row_slice(0).expect("The matrix is empty?"); - let up: &[AB::Var] = (*up).borrow(); - assert_eq!(up.len(), N_COLUMNS); - - for j in N_PREPROCESSED_COLUMNS..N_COLUMNS { - builder.assert_eq( - up[j].clone(), - (0..N_PREPROCESSED_COLUMNS) - .map(|k| AB::Expr::from(up[k].clone())) - .product::() - + AB::F::from_usize(j), - ); - } - } -} - fn generate_structured_trace( log_length: usize, ) -> Vec> { @@ -125,47 +87,6 @@ fn generate_structured_trace( - log_length: usize, -) -> Vec> { - let n_rows = 1 << log_length; - let mut trace = vec![]; - let mut rng = StdRng::seed_from_u64(0); - for _ in 0..N_PREPROCESSED_COLUMNS { - trace.push((0..n_rows).map(|_| rng.random()).collect::>()); - } - let mut witness_cols = vec![vec![]; N_COLUMNS - N_PREPROCESSED_COLUMNS]; - let mut column_iters = trace[..N_PREPROCESSED_COLUMNS] - .iter() - .map(|col| col.iter()) - .collect::>(); - if column_iters.is_empty() { - trace.extend(witness_cols); - return trace; - } - loop { - let mut row_product = F::ONE; - let mut progressed = true; - for iter in &mut column_iters { - match iter.next() { - Some(value) => row_product *= *value, - None => { - progressed = false; - break; - } - } - } - if !progressed { - break; - } - for (j, witness_col) in witness_cols.iter_mut().enumerate() { - witness_col.push(F::from_usize(j + N_PREPROCESSED_COLUMNS) + row_product); - } - } - trace.extend(witness_cols); - trace -} - #[test] fn test_structured_air() { const N_COLUMNS: usize = 17; @@ -176,44 +97,7 @@ fn test_structured_air() { let columns = generate_structured_trace::(log_n_rows); let columns_ref = columns.iter().map(|col| col.as_slice()).collect::>(); - let table = AirTable::::new( - ExampleStructuredAir::, - ExampleStructuredAir::, - ); - table.check_trace_validity(&columns_ref).unwrap(); - let (point_prover, evaluations_remaining_to_prove) = - table.prove_base(&mut prover_state, UNIVARIATE_SKIPS, &columns_ref); - let mut verifier_state = build_verifier_state(&prover_state); - let (point_verifier, evaluations_remaining_to_verify) = table - .verify(&mut verifier_state, UNIVARIATE_SKIPS, log_n_rows) - .unwrap(); - assert_eq!(point_prover, point_verifier); - assert_eq!( - &evaluations_remaining_to_prove, - &evaluations_remaining_to_verify - ); - for i in 0..N_COLUMNS { - assert_eq!( - columns[i].evaluate(&point_prover), - evaluations_remaining_to_verify[i] - ); - } -} - -#[test] -fn test_unstructured_air() { - const N_COLUMNS: usize = 18; - const N_PREPROCESSED_COLUMNS: usize = 5; - let log_n_rows = 12; - let mut prover_state = build_prover_state::(); - - let columns = generate_unstructured_trace::(log_n_rows); - let columns_ref = columns.iter().map(|col| col.as_slice()).collect::>(); - - let table = AirTable::::new( - ExampleUnstructuredAir::, - ExampleUnstructuredAir::, - ); + let table = AirTable::::new(ExampleStructuredAir::); table.check_trace_validity(&columns_ref).unwrap(); let (point_prover, evaluations_remaining_to_prove) = table.prove_base(&mut prover_state, UNIVARIATE_SKIPS, &columns_ref); diff --git a/crates/air/src/verify.rs b/crates/air/src/verify.rs index 054d9654..5b2b2ef0 100644 --- a/crates/air/src/verify.rs +++ b/crates/air/src/verify.rs @@ -3,14 +3,16 @@ use p3_air::BaseAir; use p3_field::{ExtensionField, cyclic_subgroup_known_order, dot_product}; use p3_util::log2_ceil_usize; -use crate::utils::{matrix_down_lde, matrix_up_lde}; -use crate::{NormalAir, PackedAir}; +use crate::{ + MyAir, + utils::{matrix_down_lde, matrix_up_lde}, +}; use super::table::AirTable; -fn verify_air>, A: NormalAir, AP: PackedAir>( +fn verify_air>, A: MyAir>( verifier_state: &mut FSVerifier>, - table: &AirTable, + table: &AirTable, univariate_skips: usize, log_n_rows: usize, ) -> Result<(MultilinearPoint, Vec), ProofError> { @@ -85,14 +87,14 @@ fn verify_air>, A: NormalAir, AP: PackedAir>( } } -impl>, A: NormalAir, AP: PackedAir> AirTable { +impl>, A: MyAir> AirTable { pub fn verify( &self, verifier_state: &mut FSVerifier>, univariate_skips: usize, log_n_rows: usize, ) -> Result<(MultilinearPoint, Vec), ProofError> { - verify_air::(verifier_state, self, univariate_skips, log_n_rows) + verify_air::(verifier_state, self, univariate_skips, log_n_rows) } } diff --git a/crates/lean_compiler/src/lib.rs b/crates/lean_compiler/src/lib.rs index d2dd2584..8f85618b 100644 --- a/crates/lean_compiler/src/lib.rs +++ b/crates/lean_compiler/src/lib.rs @@ -37,13 +37,15 @@ pub fn compile_and_run( profiler: bool, ) { let bytecode = compile_program(program); - execute_bytecode( + let summary = execute_bytecode( &bytecode, (public_input, private_input), no_vec_runtime_memory, - (profiler, true), + profiler, (&vec![], &vec![]), - ); + ) + .summary; + println!("{summary}"); } #[derive(Debug, Clone, Default)] diff --git a/crates/lean_prover/Cargo.toml b/crates/lean_prover/Cargo.toml index 88554996..95ddc5e6 100644 --- a/crates/lean_prover/Cargo.toml +++ b/crates/lean_prover/Cargo.toml @@ -32,6 +32,7 @@ lean_compiler.workspace = true witness_generation.workspace = true vm_air.workspace = true multilinear-toolkit.workspace = true +poseidon_circuit.workspace = true [dev-dependencies] xmss.workspace = true \ No newline at end of file diff --git a/crates/lean_prover/src/common.rs b/crates/lean_prover/src/common.rs index 59ba303e..26141ccf 100644 --- a/crates/lean_prover/src/common.rs +++ b/crates/lean_prover/src/common.rs @@ -1,12 +1,17 @@ use multilinear_toolkit::prelude::*; use p3_field::{Algebra, BasedVectorSpace}; use p3_field::{ExtensionField, PrimeCharacteristicRing}; +use p3_koala_bear::{KOALABEAR_RC16_INTERNAL, KOALABEAR_RC24_INTERNAL}; use p3_util::log2_ceil_usize; use packed_pcs::ColDims; +use poseidon_circuit::{PoseidonGKRLayers, default_cube_layers}; use crate::*; use lean_vm::*; +pub(crate) const N_COMMITED_CUBES_P16: usize = KOALABEAR_RC16_INTERNAL.len() - 1; +pub(crate) const N_COMMITED_CUBES_P24: usize = KOALABEAR_RC24_INTERNAL.len() - 1; + pub fn get_base_dims( n_cycles: usize, log_public_memory: usize, @@ -14,11 +19,14 @@ pub fn get_base_dims( bytecode_ending_pc: usize, poseidon_counts: (usize, usize), n_rows_table_dot_products: usize, + (p16_gkr_layers, p24_gkr_layers): ( + &PoseidonGKRLayers<16, N_COMMITED_CUBES_P16>, + &PoseidonGKRLayers<24, N_COMMITED_CUBES_P24>, + ), ) -> Vec> { let (n_poseidons_16, n_poseidons_24) = poseidon_counts; - let default_p16_row = - default_poseidon16_air_row(POSEIDON_16_DEFAULT_COMPRESSION, POSEIDON_16_NULL_HASH_PTR); - let default_p24_row = default_poseidon24_air_row(); + let p16_default_cubes = default_cube_layers::(p16_gkr_layers); + let p24_default_cubes = default_cube_layers::(p24_gkr_layers); [ vec![ @@ -30,16 +38,19 @@ pub fn get_base_dims( ColDims::padded(n_cycles, F::ZERO), // mem_addr_c ColDims::padded(n_poseidons_16, F::from_usize(ZERO_VEC_PTR)), // poseidon16 index a ColDims::padded(n_poseidons_16, F::from_usize(ZERO_VEC_PTR)), // poseidon16 index b + ColDims::padded(n_poseidons_16, F::from_usize(POSEIDON_16_NULL_HASH_PTR)), // poseidon16 index res ColDims::padded(n_poseidons_24, F::from_usize(ZERO_VEC_PTR)), // poseidon24 index a ColDims::padded(n_poseidons_24, F::from_usize(ZERO_VEC_PTR)), // poseidon24 index b ColDims::padded(n_poseidons_24, F::from_usize(POSEIDON_24_NULL_HASH_PTR)), // poseidon24 index res ], - (0..POSEIDON16_AIR_N_COLS - 16 * 2) - .map(|i| ColDims::padded(n_poseidons_16, default_p16_row[16 + i])) - .collect::>(), // rest of poseidon16 table - (0..POSEIDON24_AIR_N_COLS - 24 - 8) - .map(|i| ColDims::padded(n_poseidons_24, default_p24_row[24 + i])) - .collect::>(), // rest of poseidon24 table + p16_default_cubes + .iter() + .map(|&c| ColDims::padded(n_poseidons_16, c)) + .collect::>(), // commited cubes for poseidon16 + p24_default_cubes + .iter() + .map(|&c| ColDims::padded(n_poseidons_24, c)) + .collect::>(), // commited cubes for poseidon24 vec![ ColDims::padded(n_rows_table_dot_products, F::ONE), // dot product: (start) flag ColDims::padded(n_rows_table_dot_products, F::ONE), // dot product: length @@ -296,24 +307,22 @@ impl SumcheckComputationPacked for DotProductFootprint { } pub fn get_poseidon_lookup_statements( - (p16_air_width, p24_air_width): (usize, usize), (log_n_p16, log_n_p24): (usize, usize), - (p16_evals, p24_evals): (&[EF], &[EF]), - (p16_air_point, p24_air_point): (&MultilinearPoint, &MultilinearPoint), + (p16_input_point, p16_input_evals): &(MultilinearPoint, Vec), + (p16_output_point, p16_output_evals): &(MultilinearPoint, Vec), + (p24_input_point, p24_input_evals): &(MultilinearPoint, Vec), + (p24_output_point, p24_output_evals): &(MultilinearPoint, Vec), memory_folding_challenges: &MultilinearPoint, ) -> Vec> { - let p16_folded_eval_addr_a = (&p16_evals[..8]).evaluate(memory_folding_challenges); - let p16_folded_eval_addr_b = (&p16_evals[8..16]).evaluate(memory_folding_challenges); - let p16_folded_eval_addr_res_a = - (&p16_evals[p16_air_width - 16..p16_air_width - 8]).evaluate(memory_folding_challenges); - let p16_folded_eval_addr_res_b = - (&p16_evals[p16_air_width - 8..]).evaluate(memory_folding_challenges); + let p16_folded_eval_addr_a = (&p16_input_evals[..8]).evaluate(memory_folding_challenges); + let p16_folded_eval_addr_b = (&p16_input_evals[8..16]).evaluate(memory_folding_challenges); + let p16_folded_eval_addr_res_a = (&p16_output_evals[..8]).evaluate(memory_folding_challenges); + let p16_folded_eval_addr_res_b = (&p16_output_evals[8..16]).evaluate(memory_folding_challenges); - let p24_folded_eval_addr_a = (&p24_evals[..8]).evaluate(memory_folding_challenges); - let p24_folded_eval_addr_b = (&p24_evals[8..16]).evaluate(memory_folding_challenges); - let p24_folded_eval_addr_c = (&p24_evals[16..24]).evaluate(memory_folding_challenges); - let p24_folded_eval_addr_res = - (&p24_evals[p24_air_width - 8..]).evaluate(memory_folding_challenges); + let p24_folded_eval_addr_a = (&p24_input_evals[..8]).evaluate(memory_folding_challenges); + let p24_folded_eval_addr_b = (&p24_input_evals[8..16]).evaluate(memory_folding_challenges); + let p24_folded_eval_addr_c = (&p24_input_evals[16..24]).evaluate(memory_folding_challenges); + let p24_folded_eval_addr_res = (&p24_output_evals[16..24]).evaluate(memory_folding_challenges); let padding_p16 = EF::zero_vec(log_n_p16.max(log_n_p24) - log_n_p16); let padding_p24 = EF::zero_vec(log_n_p16.max(log_n_p24) - log_n_p24); @@ -323,7 +332,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ZERO; 3], padding_p16.clone(), - p16_air_point.0.clone(), + p16_input_point.0.clone(), ] .concat(), p16_folded_eval_addr_a, @@ -332,7 +341,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ZERO, EF::ZERO, EF::ONE], padding_p16.clone(), - p16_air_point.0.clone(), + p16_input_point.0.clone(), ] .concat(), p16_folded_eval_addr_b, @@ -341,7 +350,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ZERO, EF::ONE, EF::ZERO], padding_p16.clone(), - p16_air_point.0.clone(), + p16_output_point.0.clone(), ] .concat(), p16_folded_eval_addr_res_a, @@ -350,7 +359,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ZERO, EF::ONE, EF::ONE], padding_p16.clone(), - p16_air_point.0.clone(), + p16_output_point.0.clone(), ] .concat(), p16_folded_eval_addr_res_b, @@ -359,7 +368,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ONE, EF::ZERO, EF::ZERO], padding_p24.clone(), - p24_air_point.0.clone(), + p24_input_point.0.clone(), ] .concat(), p24_folded_eval_addr_a, @@ -368,7 +377,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ONE, EF::ZERO, EF::ONE], padding_p24.clone(), - p24_air_point.0.clone(), + p24_input_point.0.clone(), ] .concat(), p24_folded_eval_addr_b, @@ -377,7 +386,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ONE, EF::ONE, EF::ZERO], padding_p24.clone(), - p24_air_point.0.clone(), + p24_input_point.0.clone(), ] .concat(), p24_folded_eval_addr_c, @@ -386,7 +395,7 @@ pub fn get_poseidon_lookup_statements( [ vec![EF::ONE, EF::ONE, EF::ONE], padding_p24.clone(), - p24_air_point.0.clone(), + p24_output_point.0.clone(), ] .concat(), p24_folded_eval_addr_res, @@ -415,10 +424,10 @@ pub fn add_poseidon_lookup_statements_on_indexes( log_n_p24: usize, index_lookup_point: &MultilinearPoint, inner_values: &[EF], - p16_index_statements: [&mut Vec>; 4], // a, b, res_1, res_2 - p24_index_statements: [&mut Vec>; 3], // a, b, res + p16_index_statements: [&mut Vec>; 3], // input_a, input_b, res_a + p24_index_statements: [&mut Vec>; 3], // input_a, input_b, res ) { - assert_eq!(inner_values.len(), 7); + assert_eq!(inner_values.len(), 6); let mut idx_point_right_p16 = MultilinearPoint(index_lookup_point[3..].to_vec()); let mut idx_point_right_p24 = MultilinearPoint(index_lookup_point[3 + log_n_p16.abs_diff(log_n_p24)..].to_vec()); @@ -434,7 +443,7 @@ pub fn add_poseidon_lookup_statements_on_indexes( for (i, stmt) in p24_index_statements.into_iter().enumerate() { stmt.push(Evaluation::new( idx_point_right_p24.clone(), - inner_values[i + 4], + inner_values[i + 3], )); } } diff --git a/crates/lean_prover/src/prove_execution.rs b/crates/lean_prover/src/prove_execution.rs index 0b70ecbe..54582e6a 100644 --- a/crates/lean_prover/src/prove_execution.rs +++ b/crates/lean_prover/src/prove_execution.rs @@ -5,20 +5,17 @@ use lean_vm::*; use lookup::prove_gkr_product; use lookup::{compute_pushforward, prove_logup_star}; use multilinear_toolkit::prelude::*; -use p3_air::BaseAir; use p3_field::ExtensionField; use p3_field::Field; use p3_field::PrimeCharacteristicRing; use p3_util::{log2_ceil_usize, log2_strict_usize}; use packed_pcs::*; +use poseidon_circuit::{PoseidonGKRLayers, prove_poseidon_gkr}; use tracing::info_span; use utils::ToUsize; use utils::dot_product_with_base; use utils::field_slice_as_base; -use utils::{ - build_poseidon_16_air, build_poseidon_24_air, build_prover_state, - padd_with_zero_to_next_power_of_two, -}; +use utils::{build_prover_state, padd_with_zero_to_next_power_of_two}; use vm_air::*; use whir_p3::{ WhirConfig, WhirConfigBuilder, precompute_dft_twiddles, second_batched_whir_config_builder, @@ -32,28 +29,31 @@ pub fn prove_execution( no_vec_runtime_memory: usize, // size of the "non-vectorized" runtime memory vm_profiler: bool, (poseidons_16_precomputed, poseidons_24_precomputed): (&Poseidon16History, &Poseidon24History), -) -> (Vec>, usize) { +) -> (Vec>, usize, String) { + let mut exec_summary = String::new(); let ExecutionTrace { full_trace, n_poseidons_16, n_poseidons_24, - poseidons_16, // padded with empty poseidons - poseidons_24, // padded with empty poseidons + poseidons_16, // padded with empty poseidons + poseidons_24, // padded with empty poseidons + n_compressions_16, // included the padding (that are compressions of zeros) dot_products, multilinear_evals: vm_multilinear_evals, public_memory_size, non_zero_memory_size, memory, // padded with zeros to next power of two } = info_span!("Witness generation").in_scope(|| { - let execution_result = info_span!("Executing bytecode").in_scope(|| { + let mut execution_result = info_span!("Executing bytecode").in_scope(|| { execute_bytecode( bytecode, (public_input, private_input), no_vec_runtime_memory, - (vm_profiler, true), + vm_profiler, (poseidons_16_precomputed, poseidons_24_precomputed), ) }); + exec_summary = std::mem::take(&mut execution_result.summary); info_span!("Building execution trace") .in_scope(|| get_execution_trace(bytecode, execution_result)) }); @@ -82,27 +82,18 @@ pub fn prove_execution( .map(Vec::as_slice) .collect::>(), ); - let exec_table = AirTable::::new(VMAir, VMAir); + let exec_table = AirTable::::new(VMAir); let _validity_proof_span = info_span!("Validity proof generation").entered(); - let p16_air = build_poseidon_16_air(); - let p24_air = build_poseidon_24_air(); - let p16_air_packed = build_poseidon_16_air_packed(); - let p24_air_packed = build_poseidon_24_air_packed(); - let p16_table = AirTable::::new(p16_air.clone(), p16_air_packed); - let p24_table = AirTable::::new(p24_air.clone(), p24_air_packed); + let p16_gkr_layers = PoseidonGKRLayers::<16, N_COMMITED_CUBES_P16>::build(Some(VECTOR_LEN)); + let p24_gkr_layers = PoseidonGKRLayers::<24, N_COMMITED_CUBES_P24>::build(None); - let dot_product_table = AirTable::::new(DotProductAir, DotProductAir); + let p16_witness = + generate_poseidon_witness_helper(&p16_gkr_layers, &poseidons_16, Some(n_compressions_16)); + let p24_witness = generate_poseidon_witness_helper(&p24_gkr_layers, &poseidons_24, None); - let p16_columns = build_poseidon_16_columns( - &poseidons_16[..n_poseidons_16], - poseidons_16.len() - n_poseidons_16, - ); - let p24_columns = build_poseidon_24_columns( - &poseidons_24[..n_poseidons_24], - poseidons_24.len() - n_poseidons_24, - ); + let dot_product_table = AirTable::::new(DotProductAir); let (dot_product_columns, dot_product_padding_len) = build_dot_product_columns(&dot_products); @@ -121,6 +112,7 @@ pub fn prove_execution( &[ log_n_cycles, n_poseidons_16, + n_compressions_16, n_poseidons_24, dot_products.len(), n_rows_table_dot_products, @@ -154,11 +146,7 @@ pub fn prove_execution( ) .unwrap(); } - let p16_indexes_input = all_poseidon_16_indexes_input(&poseidons_16); - // 0..16: input, 16: compress, 17: res_index_1, 18: res_index_2 - let p16_compression_col = &p16_columns[16]; - let p16_index_out_1_col = &p16_columns[17]; - + let p16_indexes = all_poseidon_16_indexes(&poseidons_16); let p24_indexes = all_poseidon_24_indexes(&poseidons_24); let base_dims = get_base_dims( @@ -168,6 +156,7 @@ pub fn prove_execution( bytecode.ending_pc, (n_poseidons_16, n_poseidons_24), n_rows_table_dot_products, + (&p16_gkr_layers, &p24_gkr_layers), ); let dot_product_col_index_a = field_slice_as_base(&dot_product_columns[2]).unwrap(); @@ -183,18 +172,17 @@ pub fn prove_execution( full_trace[COL_INDEX_MEM_ADDRESS_B].as_slice(), full_trace[COL_INDEX_MEM_ADDRESS_C].as_slice(), ], - p16_indexes_input - .iter() - .map(Vec::as_slice) - .collect::>(), + p16_indexes.iter().map(Vec::as_slice).collect::>(), p24_indexes.iter().map(Vec::as_slice).collect::>(), - p16_columns[16..p16_air.width() - 16] + p16_witness + .committed_cubes .iter() - .map(Vec::as_slice) + .map(|s| FPacking::::unpack_slice(s)) .collect::>(), - p24_columns[24..p24_air.width() - 8] + p24_witness + .committed_cubes .iter() - .map(Vec::as_slice) + .map(|s| FPacking::::unpack_slice(s)) .collect::>(), vec![ dot_product_flags.as_slice(), @@ -362,18 +350,16 @@ pub fn prove_execution( ); let p16_grand_product_evals_on_indexes_a = - p16_indexes_input[0].evaluate(&grand_product_p16_statement.point); + p16_indexes[0].evaluate(&grand_product_p16_statement.point); let p16_grand_product_evals_on_indexes_b = - p16_indexes_input[1].evaluate(&grand_product_p16_statement.point); + p16_indexes[1].evaluate(&grand_product_p16_statement.point); let p16_grand_product_evals_on_indexes_res = - p16_index_out_1_col.evaluate(&grand_product_p16_statement.point); - let p16_grand_product_evals_on_compression = - p16_compression_col.evaluate(&grand_product_p16_statement.point); + p16_indexes[2].evaluate(&grand_product_p16_statement.point); + prover_state.add_extension_scalars(&[ p16_grand_product_evals_on_indexes_a, p16_grand_product_evals_on_indexes_b, p16_grand_product_evals_on_indexes_res, - p16_grand_product_evals_on_compression, ]); let mut p16_indexes_a_statements = vec![Evaluation::new( @@ -384,6 +370,10 @@ pub fn prove_execution( grand_product_p16_statement.point.clone(), p16_grand_product_evals_on_indexes_b, )]; + let mut p16_indexes_res_statements = vec![Evaluation::new( + grand_product_p16_statement.point.clone(), + p16_grand_product_evals_on_indexes_res, + )]; let p24_grand_product_evals_on_indexes_a = p24_indexes[0].evaluate(&grand_product_p24_statement.point); @@ -429,7 +419,6 @@ pub fn prove_execution( .collect::>(), ), // we do not use packing here because it's slower in practice (this sumcheck is small) &dot_product_footprint_computation, - &dot_product_footprint_computation, &[], Some((grand_product_dot_product_statement.point.0.clone(), None)), false, @@ -486,7 +475,6 @@ pub fn prove_execution( ) .pack(), &precompile_foot_print_computation, - &precompile_foot_print_computation, &[], Some((grand_product_exec_statement.point.0.clone(), None)), false, @@ -539,40 +527,56 @@ pub fn prove_execution( dot_product_table.prove_extension(&mut prover_state, 1, &dot_product_columns_ref) }); - let p16_columns_ref = p16_columns.iter().map(Vec::as_slice).collect::>(); - let (p16_air_point, p16_evals_to_prove) = info_span!("Poseidon-16 AIR proof") - .in_scope(|| p16_table.prove_base(&mut prover_state, UNIVARIATE_SKIPS, &p16_columns_ref)); - let mut p16_statements = p16_evals_to_prove[16..p16_air.width() - 16] + let random_point_p16 = MultilinearPoint(prover_state.sample_vec(log_n_p16)); + let p16_gkr = prove_poseidon_gkr( + &mut prover_state, + &p16_witness, + random_point_p16.0.clone(), + UNIVARIATE_SKIPS, + &p16_gkr_layers, + ); + let p16_cubes_statements = p16_gkr + .cubes_statements + .1 .iter() - .map(|&e| vec![Evaluation::new(p16_air_point.clone(), e)]) + .map(|&e| { + vec![Evaluation { + point: p16_gkr.cubes_statements.0.clone(), + value: e, + }] + }) .collect::>(); - p16_statements[0].push(Evaluation::new( - grand_product_p16_statement.point.clone(), - p16_grand_product_evals_on_compression, - )); - p16_statements[1].push(Evaluation::new( - grand_product_p16_statement.point.clone(), - p16_grand_product_evals_on_indexes_res, - )); - - let p24_columns_ref = p24_columns.iter().map(Vec::as_slice).collect::>(); - let (p24_air_point, p24_evals_to_prove) = info_span!("Poseidon-24 AIR proof") - .in_scope(|| p24_table.prove_base(&mut prover_state, UNIVARIATE_SKIPS, &p24_columns_ref)); - let p24_statements = p24_evals_to_prove[24..p24_air.width() - 8] + let random_point_p24 = MultilinearPoint(prover_state.sample_vec(log_n_p24)); + let p24_gkr = prove_poseidon_gkr( + &mut prover_state, + &p24_witness, + random_point_p24.0.clone(), + UNIVARIATE_SKIPS, + &p24_gkr_layers, + ); + let p24_cubes_statements = p24_gkr + .cubes_statements + .1 .iter() - .map(|&e| vec![Evaluation::new(p24_air_point.clone(), e)]) - .collect(); + .map(|&e| { + vec![Evaluation { + point: p24_gkr.cubes_statements.0.clone(), + value: e, + }] + }) + .collect::>(); // Poseidons 16/24 memory addresses lookup let poseidon_logup_star_alpha = prover_state.sample(); let memory_folding_challenges = MultilinearPoint(prover_state.sample_vec(LOG_VECTOR_LEN)); let poseidon_lookup_statements = get_poseidon_lookup_statements( - (p16_air.width(), p24_air.width()), (log_n_p16, log_n_p24), - (&p16_evals_to_prove, &p24_evals_to_prove), - (&p16_air_point, &p24_air_point), + &p16_gkr.input_statements, + &(random_point_p16.clone(), p16_gkr.output_values), + &p24_gkr.input_statements, + &(random_point_p24.clone(), p24_gkr.output_values), &memory_folding_challenges, ); @@ -906,22 +910,51 @@ pub fn prove_execution( &all_poseidon_indexes, &MultilinearPoint(poseidon_logup_star_statements.on_indexes.point[3..].to_vec()), ); + let inner_values = [ poseidon_index_evals[0] / correcting_factor_p16, poseidon_index_evals[1] / correcting_factor_p16, poseidon_index_evals[2] / correcting_factor_p16, - poseidon_index_evals[3] / correcting_factor_p16, + // skip 3 (16_output_b, proved via sumcheck) poseidon_index_evals[4] / correcting_factor_p24, - // skip 5 + // skip 5 (24_input_b) poseidon_index_evals[6] / correcting_factor_p24, poseidon_index_evals[7] / correcting_factor_p24, ]; - prover_state.add_extension_scalars(&inner_values); - let (left, right) = p16_statements.split_at_mut(2); - let p16_statements_res_1 = &mut left[1]; - let p16_statements_res_2 = &mut right[0]; + let p16_value_index_res_b = poseidon_index_evals[3] / correcting_factor_p16; + // prove this value via sumcheck: index_res_b = (index_res_a + 1) * (1 - compression) + let p16_one_minus_compression = p16_witness + .compression + .as_ref() + .unwrap() + .1 + .par_iter() + .map(|c| FPacking::::ONE - *c) + .collect::>(); + let p16_index_res_a_plus_one = FPacking::::pack_slice(&p16_indexes[2]) + .par_iter() + .map(|c| *c + F::ONE) + .collect::>(); + + // TODO there is a big inneficiency in impl SumcheckComputationPacked for ProductComputation + let (sc_point, sc_values, _) = sumcheck_prove( + 1, // TODO univariate skip + MleGroupRef::BasePacked(vec![&p16_one_minus_compression, &p16_index_res_a_plus_one]), + &ProductComputation, + &[], + Some(( + poseidon_logup_star_statements.on_indexes.point[3..].to_vec(), + None, + )), + false, + &mut prover_state, + p16_value_index_res_b, + None, + ); + prover_state.add_extension_scalar(sc_values[1]); + p16_indexes_res_statements.push(Evaluation::new(sc_point, sc_values[1] - EF::ONE)); add_poseidon_lookup_statements_on_indexes( log_n_p16, @@ -931,8 +964,7 @@ pub fn prove_execution( [ &mut p16_indexes_a_statements, &mut p16_indexes_b_statements, - p16_statements_res_1, - p16_statements_res_2, + &mut p16_indexes_res_statements, ], [ &mut p24_indexes_a_statements, @@ -1060,12 +1092,13 @@ pub fn prove_execution( ], // exec memory address C p16_indexes_a_statements, p16_indexes_b_statements, + p16_indexes_res_statements, p24_indexes_a_statements, p24_indexes_b_statements, p24_indexes_res_statements, ], - p16_statements, - p24_statements, + p16_cubes_statements, + p24_cubes_statements, vec![ vec![ dot_product_air_statement(0), @@ -1132,5 +1165,6 @@ pub fn prove_execution( ( prover_state.proof_data().to_vec(), prover_state.proof_size(), + exec_summary, ) } diff --git a/crates/lean_prover/src/verify_execution.rs b/crates/lean_prover/src/verify_execution.rs index e92e6ba3..53668a7f 100644 --- a/crates/lean_prover/src/verify_execution.rs +++ b/crates/lean_prover/src/verify_execution.rs @@ -5,13 +5,14 @@ use lean_vm::*; use lookup::verify_gkr_product; use lookup::verify_logup_star; use multilinear_toolkit::prelude::*; -use p3_air::BaseAir; use p3_field::PrimeCharacteristicRing; use p3_field::dot_product; use p3_util::{log2_ceil_usize, log2_strict_usize}; use packed_pcs::*; +use poseidon_circuit::PoseidonGKRLayers; +use poseidon_circuit::verify_poseidon_gkr; +use utils::ToUsize; use utils::dot_product_with_base; -use utils::{ToUsize, build_poseidon_16_air, build_poseidon_24_air}; use utils::{build_challenger, padd_with_zero_to_next_power_of_two}; use vm_air::*; use whir_p3::WhirConfig; @@ -26,25 +27,23 @@ pub fn verify_execution( ) -> Result<(), ProofError> { let mut verifier_state = VerifierState::new(proof_data, build_challenger()); - let exec_table = AirTable::::new(VMAir, VMAir); - let p16_air = build_poseidon_16_air(); - let p24_air = build_poseidon_24_air(); - let p16_air_packed = build_poseidon_16_air_packed(); - let p24_air_packed = build_poseidon_24_air_packed(); - let p16_table = AirTable::::new(p16_air.clone(), p16_air_packed); - let p24_table = AirTable::::new(p24_air.clone(), p24_air_packed); - let dot_product_table = AirTable::::new(DotProductAir, DotProductAir); + let exec_table = AirTable::::new(VMAir); + let p16_gkr_layers = PoseidonGKRLayers::<16, N_COMMITED_CUBES_P16>::build(Some(VECTOR_LEN)); + let p24_gkr_layers = PoseidonGKRLayers::<24, N_COMMITED_CUBES_P24>::build(None); + + let dot_product_table = AirTable::::new(DotProductAir); let [ log_n_cycles, n_poseidons_16, + n_compressions_16, n_poseidons_24, n_dot_products, n_rows_table_dot_products, private_memory_len, n_vm_multilinear_evals, ] = verifier_state - .next_base_scalars_const::<7>()? + .next_base_scalars_const::<8>()? .into_iter() .map(|x| x.to_usize()) .collect::>() @@ -53,6 +52,7 @@ pub fn verify_execution( if log_n_cycles > 32 || n_poseidons_16 > 1 << 32 + || n_compressions_16 > n_poseidons_16.next_power_of_two() || n_poseidons_24 > 1 << 32 || n_dot_products > 1 << 32 || n_rows_table_dot_products > 1 << 32 @@ -112,6 +112,7 @@ pub fn verify_execution( bytecode.ending_pc, (n_poseidons_16, n_poseidons_24), n_rows_table_dot_products, + (&p16_gkr_layers, &p24_gkr_layers), ); let parsed_commitment_base = packed_pcs_parse_commitment( @@ -189,8 +190,12 @@ pub fn verify_execution( p16_grand_product_evals_on_indexes_a, p16_grand_product_evals_on_indexes_b, p16_grand_product_evals_on_indexes_res, - p16_grand_product_evals_on_compression, - ] = verifier_state.next_extension_scalars_const()?; + ] = verifier_state.next_extension_scalars_const().unwrap(); + let p16_grand_product_evals_on_compression = mle_of_zeros_then_ones( + n_poseidons_16.next_power_of_two() - n_compressions_16, + &grand_product_p16_statement.point, + ); + if grand_product_challenge_global + finger_print( &[ @@ -214,6 +219,10 @@ pub fn verify_execution( grand_product_p16_statement.point.clone(), p16_grand_product_evals_on_indexes_b, )]; + let mut p16_indexes_res_statements = vec![Evaluation::new( + grand_product_p16_statement.point.clone(), + p16_grand_product_evals_on_indexes_res, + )]; let [ p24_grand_product_evals_on_indexes_a, @@ -340,29 +349,47 @@ pub fn verify_execution( let (dot_product_air_point, dot_product_evals_to_verify) = dot_product_table.verify(&mut verifier_state, 1, table_dot_products_log_n_rows)?; - let (p16_air_point, p16_evals_to_verify) = - p16_table.verify(&mut verifier_state, UNIVARIATE_SKIPS, log_n_p16)?; - let (p24_air_point, p24_evals_to_verify) = - p24_table.verify(&mut verifier_state, UNIVARIATE_SKIPS, log_n_p24)?; - - let mut p16_statements = p16_evals_to_verify[16..p16_air.width() - 16] + let random_point_p16 = MultilinearPoint(verifier_state.sample_vec(log_n_p16)); + let gkr_16 = verify_poseidon_gkr( + &mut verifier_state, + log_n_p16, + &random_point_p16, + &p16_gkr_layers, + UNIVARIATE_SKIPS, + Some(n_compressions_16), + ); + let p16_cubes_statements = gkr_16 + .cubes_statements + .1 .iter() - .map(|&e| vec![Evaluation::new(p16_air_point.clone(), e)]) + .map(|&e| { + vec![Evaluation { + point: gkr_16.cubes_statements.0.clone(), + value: e, + }] + }) .collect::>(); - p16_statements[0].push(Evaluation::new( - grand_product_p16_statement.point.clone(), - p16_grand_product_evals_on_compression, - )); - p16_statements[1].push(Evaluation::new( - grand_product_p16_statement.point.clone(), - p16_grand_product_evals_on_indexes_res, - )); - - let p24_statements = p24_evals_to_verify[24..p24_air.width() - 8] + let random_point_p24 = MultilinearPoint(verifier_state.sample_vec(log_n_p24)); + let gkr_24 = verify_poseidon_gkr( + &mut verifier_state, + log_n_p24, + &random_point_p24, + &p24_gkr_layers, + UNIVARIATE_SKIPS, + None, + ); + let p24_cubes_statements = gkr_24 + .cubes_statements + .1 .iter() - .map(|&e| vec![Evaluation::new(p24_air_point.clone(), e)]) - .collect(); + .map(|&e| { + vec![Evaluation { + point: gkr_24.cubes_statements.0.clone(), + value: e, + }] + }) + .collect::>(); let poseidon_logup_star_alpha = verifier_state.sample(); let memory_folding_challenges = MultilinearPoint(verifier_state.sample_vec(LOG_VECTOR_LEN)); @@ -512,10 +539,11 @@ pub fn verify_execution( .unwrap(); let poseidon_lookup_statements = get_poseidon_lookup_statements( - (p16_air.width(), p24_air.width()), (log_n_p16, log_n_p24), - (&p16_evals_to_verify, &p24_evals_to_verify), - (&p16_air_point, &p24_air_point), + &gkr_16.input_statements, + &(random_point_p16.clone(), gkr_16.output_values), + &gkr_24.input_statements, + &(random_point_p24.clone(), gkr_24.output_values), &memory_folding_challenges, ); @@ -567,11 +595,30 @@ pub fn verify_execution( &poseidon_logup_star_statements.on_indexes.point, ); - let mut inner_values = verifier_state.next_extension_scalars_vec(7)?; + let mut inner_values = verifier_state.next_extension_scalars_vec(6)?; - let (left, right) = p16_statements.split_at_mut(2); - let p16_statements_res_1 = &mut left[1]; - let p16_statements_res_2 = &mut right[0]; + let (p16_value_index_res_b, sc_eval) = sumcheck_verify_with_univariate_skip( + &mut verifier_state, + 3, + log_n_p16, + 1, // TODO univariate skip + )?; + let sc_res_index_value = verifier_state.next_extension_scalar()?; + p16_indexes_res_statements.push(Evaluation::new( + sc_eval.point.clone(), + sc_res_index_value - EF::ONE, + )); + + if sc_res_index_value + * (EF::ONE + - mle_of_zeros_then_ones((1 << log_n_p16) - n_compressions_16, &sc_eval.point)) + * sc_eval.point.eq_poly_outside(&MultilinearPoint( + poseidon_logup_star_statements.on_indexes.point[3..].to_vec(), + )) + != sc_eval.value + { + return Err(ProofError::InvalidProof); + } add_poseidon_lookup_statements_on_indexes( log_n_p16, @@ -581,8 +628,7 @@ pub fn verify_execution( [ &mut p16_indexes_a_statements, &mut p16_indexes_b_statements, - p16_statements_res_1, - p16_statements_res_2, + &mut p16_indexes_res_statements, ], [ &mut p24_indexes_a_statements, @@ -591,6 +637,7 @@ pub fn verify_execution( ], ); + inner_values.insert(3, p16_value_index_res_b); inner_values.insert(5, inner_values[4] + EF::ONE); for v in &mut inner_values[..4] { @@ -760,12 +807,13 @@ pub fn verify_execution( ], // exec memory address C p16_indexes_a_statements, p16_indexes_b_statements, + p16_indexes_res_statements, p24_indexes_a_statements, p24_indexes_b_statements, p24_indexes_res_statements, ], - p16_statements, - p24_statements, + p16_cubes_statements, + p24_cubes_statements, vec![ vec![ dot_product_air_statement(0), diff --git a/crates/lean_prover/tests/hash_chain.rs b/crates/lean_prover/tests/hash_chain.rs index 1e325369..d4a9f874 100644 --- a/crates/lean_prover/tests/hash_chain.rs +++ b/crates/lean_prover/tests/hash_chain.rs @@ -1,13 +1,11 @@ use std::time::Instant; -use air::examples::prove_poseidon2::{Poseidon2Config, prove_poseidon2}; use lean_compiler::*; use lean_prover::{ prove_execution::prove_execution, verify_execution::verify_execution, whir_config_builder, }; use lean_vm::{F, execute_bytecode}; use p3_field::PrimeCharacteristicRing; -use whir_p3::{FoldingFactor, SecurityAssumption}; use xmss::iterate_hash; #[test] @@ -69,7 +67,7 @@ fn benchmark_poseidon_chain() { &bytecode, (&public_input, &private_input), 1 << (3 + LOG_CHAIN_LENGTH), - (false, false), + false, (&vec![], &vec![]), ) .no_vec_runtime_memory; @@ -87,25 +85,5 @@ fn benchmark_poseidon_chain() { let vm_time = time.elapsed(); verify_execution(&bytecode, &public_input, proof_data, whir_config_builder()).unwrap(); - let raw_proof = prove_poseidon2(&Poseidon2Config { - log_n_poseidons_16: LOG_CHAIN_LENGTH, - log_n_poseidons_24: 10, // (almost invisible cost) - univariate_skips: 4, - folding_factor: FoldingFactor::new(7, 4), - log_inv_rate: 1, - soundness_type: SecurityAssumption::CapacityBound, - pow_bits: 16, - security_level: 128, - rs_domain_initial_reduction_factor: 5, - max_num_variables_to_send_coeffs: 7, - display_logs: true, - }); - println!("VM proof time: {vm_time:?}"); - println!("Raw Poseidon proof time: {:?}", raw_proof.prover_time); - - println!( - "VM overhead: {:.2}x", - vm_time.as_secs_f64() / raw_proof.prover_time.as_secs_f64() - ); } diff --git a/crates/lean_prover/witness_generation/Cargo.toml b/crates/lean_prover/witness_generation/Cargo.toml index 63186fe6..b57d4dca 100644 --- a/crates/lean_prover/witness_generation/Cargo.toml +++ b/crates/lean_prover/witness_generation/Cargo.toml @@ -31,3 +31,5 @@ lean_vm.workspace = true lean_compiler.workspace = true derive_more.workspace = true multilinear-toolkit.workspace = true +poseidon_circuit.workspace = true +p3-monty-31.workspace = true diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs index 2125a642..0a7f3134 100644 --- a/crates/lean_prover/witness_generation/src/execution_trace.rs +++ b/crates/lean_prover/witness_generation/src/execution_trace.rs @@ -17,6 +17,7 @@ pub struct ExecutionTrace { pub full_trace: [Vec; N_TOTAL_COLUMNS], pub n_poseidons_16: usize, pub n_poseidons_24: usize, + pub n_compressions_16: usize, pub poseidons_16: Vec, // padded with empty poseidons pub poseidons_24: Vec, // padded with empty poseidons pub dot_products: Vec, @@ -116,10 +117,14 @@ pub fn get_execution_trace( n_poseidons_24.next_power_of_two() - n_poseidons_24 ]); + let n_compressions_16; + (poseidons_16, n_compressions_16) = put_poseidon16_compressions_at_the_end(&poseidons_16); // TODO avoid reallocation + ExecutionTrace { full_trace: trace, n_poseidons_16, n_poseidons_24, + n_compressions_16, poseidons_16, poseidons_24, dot_products, @@ -129,3 +134,20 @@ pub fn get_execution_trace( memory: memory_padded, } } + +fn put_poseidon16_compressions_at_the_end( + poseidons_16: &[WitnessPoseidon16], +) -> (Vec, usize) { + let no_compression = poseidons_16 + .par_iter() + .filter(|p| !p.is_compression) + .cloned() + .collect::>(); + let compression = poseidons_16 + .par_iter() + .filter(|p| p.is_compression) + .cloned() + .collect::>(); + let n_compressions = compression.len(); + ([no_compression, compression].concat(), n_compressions) +} diff --git a/crates/lean_prover/witness_generation/src/poseidon_tables.rs b/crates/lean_prover/witness_generation/src/poseidon_tables.rs index 48513cb2..245ee343 100644 --- a/crates/lean_prover/witness_generation/src/poseidon_tables.rs +++ b/crates/lean_prover/witness_generation/src/poseidon_tables.rs @@ -1,63 +1,14 @@ -use lean_vm::{ - F, POSEIDON_16_DEFAULT_COMPRESSION, POSEIDON_16_NULL_HASH_PTR, WitnessPoseidon16, - WitnessPoseidon24, -}; -use p3_field::PrimeCharacteristicRing; -use rayon::prelude::*; -use tracing::instrument; -use utils::{ - POSEIDON16_AIR_N_COLS, POSEIDON24_AIR_N_COLS, default_poseidon16_air_row, - default_poseidon24_air_row, generate_trace_poseidon_16, generate_trace_poseidon_24, - padd_with_zero_to_next_power_of_two, -}; - -#[instrument(skip_all)] -pub fn build_poseidon_24_columns( - poseidons_24: &[WitnessPoseidon24], - padding: usize, -) -> Vec> { - let inputs = poseidons_24.par_iter().map(|w| w.input).collect::>(); - let matrix = generate_trace_poseidon_24(&inputs); - let mut res = utils::transpose(&matrix.values, POSEIDON24_AIR_N_COLS, padding); - let default_p24_row = default_poseidon24_air_row(); - assert_eq!(default_p24_row.len(), res.len()); - res.par_iter_mut() - .zip(default_p24_row.par_iter()) - .for_each(|(col, default_value)| { - col.resize(col.len() + padding, *default_value); - }); - res -} - -#[instrument(skip_all)] -pub fn build_poseidon_16_columns( - poseidons_16: &[WitnessPoseidon16], - padding: usize, -) -> Vec> { - let inputs = poseidons_16.par_iter().map(|w| w.input).collect::>(); - let compresses = poseidons_16 - .par_iter() - .map(|w| w.is_compression) - .collect::>(); - let index_res = poseidons_16 - .par_iter() - .map(|w| w.addr_output) - .collect::>(); - let matrix = generate_trace_poseidon_16(&inputs, &compresses, &index_res); - let mut res = utils::transpose(&matrix.values, POSEIDON16_AIR_N_COLS, padding); +use std::array; - let default_p16_row = - default_poseidon16_air_row(POSEIDON_16_DEFAULT_COMPRESSION, POSEIDON_16_NULL_HASH_PTR); - assert_eq!(default_p16_row.len(), res.len()); - res.par_iter_mut() - .zip(default_p16_row.par_iter()) - .for_each(|(col, default_value)| { - col.resize(col.len() + padding, *default_value); - }); - res -} +use lean_vm::{F, PoseidonWitnessTrait, WitnessPoseidon16, WitnessPoseidon24}; +use multilinear_toolkit::prelude::*; +use p3_field::PrimeCharacteristicRing; +use p3_koala_bear::{KoalaBearInternalLayerParameters, KoalaBearParameters}; +use p3_monty_31::InternalLayerBaseParameters; +use poseidon_circuit::{PoseidonGKRLayers, PoseidonWitness, generate_poseidon_witness}; +use utils::{padd_with_zero_to_next_power_of_two, transposed_par_iter_mut}; -pub fn all_poseidon_16_indexes_input(poseidons_16: &[WitnessPoseidon16]) -> [Vec; 2] { +pub fn all_poseidon_16_indexes(poseidons_16: &[WitnessPoseidon16]) -> [Vec; 3] { [ poseidons_16 .par_iter() @@ -67,6 +18,10 @@ pub fn all_poseidon_16_indexes_input(poseidons_16: &[WitnessPoseidon16]) -> [Vec .par_iter() .map(|p| F::from_usize(p.addr_input_b)) .collect::>(), + poseidons_16 + .par_iter() + .map(|p| F::from_usize(p.addr_output)) + .collect::>(), ] } @@ -128,3 +83,47 @@ pub fn full_poseidon_indexes_poly( all_poseidon_indexes } + +pub fn generate_poseidon_witness_helper< + const WIDTH: usize, + const N_COMMITED_CUBES: usize, + W: PoseidonWitnessTrait + Send + Sync, +>( + layers: &PoseidonGKRLayers, + inputs: &[W], + n_compressions: Option, +) -> PoseidonWitness, WIDTH, N_COMMITED_CUBES> +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + let n_poseidons = inputs.len(); + assert!(n_poseidons.is_power_of_two()); + let mut inputs_transposed: [_; WIDTH] = + array::from_fn(|_| unsafe { uninitialized_vec(n_poseidons) }); + transposed_par_iter_mut(&mut inputs_transposed) + .enumerate() + .for_each(|(i, row)| { + for (j, p) in row.into_iter().enumerate() { + *p = inputs[i].inputs()[j]; + } + }); + let inputs_transposed_packed: [_; WIDTH] = + array::from_fn(|i| PFPacking::::pack_slice(&inputs_transposed[i]).to_vec()); // TODO avoid cloning + generate_poseidon_witness::, WIDTH, N_COMMITED_CUBES>( + inputs_transposed_packed, + layers, + n_compressions.map(|n_compressions| { + ( + n_compressions, + PFPacking::::pack_slice( + &[ + vec![F::ZERO; n_poseidons - n_compressions], + vec![F::ONE; n_compressions], + ] + .concat(), + ) + .to_vec(), + ) + }), + ) +} diff --git a/crates/lean_vm/src/diagnostics/error.rs b/crates/lean_vm/src/diagnostics/error.rs index 2ad9fac1..0a225733 100644 --- a/crates/lean_vm/src/diagnostics/error.rs +++ b/crates/lean_vm/src/diagnostics/error.rs @@ -45,4 +45,5 @@ pub struct ExecutionResult { pub poseidons_24: Vec, pub dot_products: Vec, pub multilinear_evals: Vec, + pub summary: String, } diff --git a/crates/lean_vm/src/diagnostics/profiler.rs b/crates/lean_vm/src/diagnostics/profiler.rs index 6fe43005..182ee016 100644 --- a/crates/lean_vm/src/diagnostics/profiler.rs +++ b/crates/lean_vm/src/diagnostics/profiler.rs @@ -57,8 +57,9 @@ pub(crate) fn profiling_report( let mut report = String::new(); - report - .push_str("╔═════════════════════════════════════════════════════════════════════════╗\n"); + report.push_str( + "\n╔═════════════════════════════════════════════════════════════════════════╗\n", + ); report .push_str("║ PROFILING REPORT ║\n"); report.push_str( diff --git a/crates/lean_vm/src/execution/runner.rs b/crates/lean_vm/src/execution/runner.rs index ea6b03c6..df7da46a 100644 --- a/crates/lean_vm/src/execution/runner.rs +++ b/crates/lean_vm/src/execution/runner.rs @@ -65,7 +65,7 @@ pub fn execute_bytecode( bytecode: &Bytecode, (public_input, private_input): (&[F], &[F]), no_vec_runtime_memory: usize, // size of the "non-vectorized" runtime memory - (profiler, display_std_out_and_stats): (bool, bool), + profiler: bool, (poseidons_16_precomputed, poseidons_24_precomputed): (&Poseidon16History, &Poseidon24History), ) -> ExecutionResult { let mut std_out = String::new(); @@ -76,7 +76,7 @@ pub fn execute_bytecode( &mut std_out, &mut instruction_history, no_vec_runtime_memory, - (profiler, display_std_out_and_stats), + profiler, (poseidons_16_precomputed, poseidons_24_precomputed), ) .unwrap_or_else(|err| { @@ -151,7 +151,7 @@ fn execute_bytecode_helper( std_out: &mut String, instruction_history: &mut ExecutionHistory, no_vec_runtime_memory: usize, - (profiler, display_std_out_and_stats): (bool, bool), + profiler: bool, (poseidons_16_precomputed, poseidons_24_precomputed): (&Poseidon16History, &Poseidon24History), ) -> Result { // set public memory @@ -284,92 +284,110 @@ fn execute_bytecode_helper( let no_vec_runtime_memory = ap - initial_ap; + let mut summary = String::new(); + if profiler { let report = crate::diagnostics::profiling_report(instruction_history, &bytecode.function_locations); - println!("\n{report}"); + summary.push_str(&report); } - if display_std_out_and_stats { - if !std_out.is_empty() { - println!("╔═════════════════════════════════════════════════════════════════════════╗"); - println!("║ STD-OUT ║"); - println!("╚═════════════════════════════════════════════════════════════════════════╝"); - print!("\n{std_out}"); - println!( - "──────────────────────────────────────────────────────────────────────────\n" - ); - } - - println!("╔═════════════════════════════════════════════════════════════════════════╗"); - println!("║ STATS ║"); - println!("╚═════════════════════════════════════════════════════════════════════════╝\n"); - - println!("CYCLES: {}", pretty_integer(cpu_cycles)); - println!("MEMORY: {}", pretty_integer(memory.0.len())); - println!(); - let runtime_memory_size = memory.0.len() - (PUBLIC_INPUT_START + public_input.len()); - println!( - "Bytecode size: {}", - pretty_integer(bytecode.instructions.len()) + if !std_out.is_empty() { + summary.push_str( + "╔═════════════════════════════════════════════════════════════════════════╗\n", ); - println!("Public input size: {}", pretty_integer(public_input.len())); - println!( - "Private input size: {}", - pretty_integer(private_input.len()) + summary.push_str( + "║ STD-OUT ║\n", ); - println!( - "Runtime memory: {} ({:.2}% vec) (no vec mem: {})", - pretty_integer(runtime_memory_size), - (VECTOR_LEN * (ap_vec - initial_ap_vec)) as f64 / runtime_memory_size as f64 * 100.0, - no_vec_runtime_memory + summary.push_str( + "╚═════════════════════════════════════════════════════════════════════════╝\n", ); - let used_memory_cells = memory - .0 - .iter() - .skip(PUBLIC_INPUT_START + public_input.len()) - .filter(|&&x| x.is_some()) - .count(); - println!( - "Memory usage: {:.1}%", - used_memory_cells as f64 / runtime_memory_size as f64 * 100.0 + summary.push_str(&format!("\n{std_out}")); + summary.push_str( + "──────────────────────────────────────────────────────────────────────────\n\n", ); + } - println!(); - - if poseidons_16.len() + poseidons_24.len() > 0 { - println!( - "Poseidon2_16 calls: {}, Poseidon2_24 calls: {}, (1 poseidon per {} instructions)", - pretty_integer(poseidons_16.len()), - pretty_integer(poseidons_24.len()), - cpu_cycles / (poseidons_16.len() + poseidons_24.len()) - ); - } - if !dot_products.is_empty() { - println!("DotProduct calls: {}", pretty_integer(dot_products.len())); - } - if !multilinear_evals.is_empty() { - println!( - "MultilinearEval calls: {}", - pretty_integer(multilinear_evals.len()) - ); - } + summary + .push_str("╔═════════════════════════════════════════════════════════════════════════╗\n"); + summary + .push_str("║ STATS ║\n"); + summary.push_str( + "╚═════════════════════════════════════════════════════════════════════════╝\n\n", + ); - if false { - println!("Low level instruction counts:"); - println!( - "COMPUTE: {} ({} ADD, {} MUL)", - add_counts + mul_counts, - add_counts, - mul_counts - ); - println!("DEREF: {deref_counts}"); - println!("JUMP: {jump_counts}"); - } + summary.push_str(&format!("CYCLES: {}\n", pretty_integer(cpu_cycles))); + summary.push_str(&format!("MEMORY: {}\n", pretty_integer(memory.0.len()))); + summary.push('\n'); + + let runtime_memory_size = memory.0.len() - (PUBLIC_INPUT_START + public_input.len()); + summary.push_str(&format!( + "Bytecode size: {}\n", + pretty_integer(bytecode.instructions.len()) + )); + summary.push_str(&format!( + "Public input size: {}\n", + pretty_integer(public_input.len()) + )); + summary.push_str(&format!( + "Private input size: {}\n", + pretty_integer(private_input.len()) + )); + summary.push_str(&format!( + "Runtime memory: {} ({:.2}% vec) (no vec mem: {})\n", + pretty_integer(runtime_memory_size), + (VECTOR_LEN * (ap_vec - initial_ap_vec)) as f64 / runtime_memory_size as f64 * 100.0, + no_vec_runtime_memory + )); + let used_memory_cells = memory + .0 + .iter() + .skip(PUBLIC_INPUT_START + public_input.len()) + .filter(|&&x| x.is_some()) + .count(); + summary.push_str(&format!( + "Memory usage: {:.1}%\n", + used_memory_cells as f64 / runtime_memory_size as f64 * 100.0 + )); + + summary.push('\n'); + + if poseidons_16.len() + poseidons_24.len() > 0 { + summary.push_str(&format!( + "Poseidon2_16 calls: {}, Poseidon2_24 calls: {}, (1 poseidon per {} instructions)\n", + pretty_integer(poseidons_16.len()), + pretty_integer(poseidons_24.len()), + cpu_cycles / (poseidons_16.len() + poseidons_24.len()) + )); + } + if !dot_products.is_empty() { + summary.push_str(&format!( + "DotProduct calls: {}\n", + pretty_integer(dot_products.len()) + )); + } + if !multilinear_evals.is_empty() { + summary.push_str(&format!( + "MultilinearEval calls: {}\n", + pretty_integer(multilinear_evals.len()) + )); + } - println!("──────────────────────────────────────────────────────────────────────────\n"); + if false { + summary.push_str("Low level instruction counts:\n"); + summary.push_str(&format!( + "COMPUTE: {} ({} ADD, {} MUL)\n", + add_counts + mul_counts, + add_counts, + mul_counts + )); + summary.push_str(&format!("DEREF: {deref_counts}\n")); + summary.push_str(&format!("JUMP: {jump_counts}\n")); } + summary + .push_str("──────────────────────────────────────────────────────────────────────────\n"); + Ok(ExecutionResult { no_vec_runtime_memory, public_memory_size, @@ -380,5 +398,6 @@ fn execute_bytecode_helper( poseidons_24, dot_products, multilinear_evals, + summary, }) } diff --git a/crates/lean_vm/src/witness/mod.rs b/crates/lean_vm/src/witness/mod.rs index 9af192ec..d0d9d694 100644 --- a/crates/lean_vm/src/witness/mod.rs +++ b/crates/lean_vm/src/witness/mod.rs @@ -9,3 +9,9 @@ pub use dot_product::*; pub use multilinear_eval::*; pub use poseidon16::*; pub use poseidon24::*; + +use crate::F; + +pub trait PoseidonWitnessTrait { + fn inputs(&self) -> &[F; WIDTH]; +} diff --git a/crates/lean_vm/src/witness/poseidon16.rs b/crates/lean_vm/src/witness/poseidon16.rs index 62fe9d34..d223995d 100644 --- a/crates/lean_vm/src/witness/poseidon16.rs +++ b/crates/lean_vm/src/witness/poseidon16.rs @@ -1,6 +1,9 @@ //! Poseidon2 hash witness for 16-element input -use crate::core::{F, POSEIDON_16_NULL_HASH_PTR, ZERO_VEC_PTR}; +use crate::{ + PoseidonWitnessTrait, + core::{F, POSEIDON_16_NULL_HASH_PTR, ZERO_VEC_PTR}, +}; use p3_field::PrimeCharacteristicRing; pub const POSEIDON_16_DEFAULT_COMPRESSION: bool = true; @@ -22,6 +25,13 @@ pub struct WitnessPoseidon16 { pub is_compression: bool, } +impl PoseidonWitnessTrait<16> for WitnessPoseidon16 { + #[inline(always)] + fn inputs(&self) -> &[F; 16] { + &self.input + } +} + impl WitnessPoseidon16 { /// Create a precomputed Poseidon16 witness for all-zero input /// diff --git a/crates/lean_vm/src/witness/poseidon24.rs b/crates/lean_vm/src/witness/poseidon24.rs index d77c8a2b..4912f311 100644 --- a/crates/lean_vm/src/witness/poseidon24.rs +++ b/crates/lean_vm/src/witness/poseidon24.rs @@ -1,6 +1,9 @@ //! Poseidon2 hash witness for 24-element input -use crate::core::{F, POSEIDON_24_NULL_HASH_PTR, ZERO_VEC_PTR}; +use crate::{ + PoseidonWitnessTrait, + core::{F, POSEIDON_24_NULL_HASH_PTR, ZERO_VEC_PTR}, +}; use p3_field::PrimeCharacteristicRing; /// Witness data for Poseidon2 over 24 field elements @@ -18,6 +21,13 @@ pub struct WitnessPoseidon24 { pub input: [F; 24], } +impl PoseidonWitnessTrait<24> for WitnessPoseidon24 { + #[inline(always)] + fn inputs(&self) -> &[F; 24] { + &self.input + } +} + impl WitnessPoseidon24 { /// Create a new Poseidon24 witness with all hash data pub const fn new( diff --git a/crates/lean_vm/tests/test_lean_vm.rs b/crates/lean_vm/tests/test_lean_vm.rs index 26428023..a6bc6e54 100644 --- a/crates/lean_vm/tests/test_lean_vm.rs +++ b/crates/lean_vm/tests/test_lean_vm.rs @@ -208,9 +208,10 @@ fn run_program() -> (Bytecode, ExecutionResult) { &bytecode, (&public_input, &[]), 1 << 20, - (false, true), + false, (&vec![], &vec![]), ); + println!("{}", result.summary); (bytecode, result) } diff --git a/crates/lookup/src/product_gkr.rs b/crates/lookup/src/product_gkr.rs index d31960df..04294797 100644 --- a/crates/lookup/src/product_gkr.rs +++ b/crates/lookup/src/product_gkr.rs @@ -119,11 +119,10 @@ where EF: ExtensionField>, PF: PrimeField64, { - let (sc_point, inner_evals, _) = sumcheck_prove::( + let (sc_point, inner_evals, _) = sumcheck_prove::( 1, up_layer, &ProductComputation, - &ProductComputation, &[], Some((claim.point.0.clone(), None)), false, diff --git a/crates/lookup/src/quotient_gkr.rs b/crates/lookup/src/quotient_gkr.rs index 83fe6008..6979150c 100644 --- a/crates/lookup/src/quotient_gkr.rs +++ b/crates/lookup/src/quotient_gkr.rs @@ -166,11 +166,10 @@ where vec![u0_folded[0], u1_folded[0], u2_folded[0], u3_folded[0]], ) } else { - let (mut sc_point, inner_evals, _) = sumcheck_prove::( + let (mut sc_point, inner_evals, _) = sumcheck_prove::( 1, MleGroupRef::Extension(vec![u0_folded, u1_folded, u2_folded, u3_folded]), &GKRQuotientComputation { u4_const, u5_const }, - &GKRQuotientComputation { u4_const, u5_const }, &[], Some((claim.point.0[1..].to_vec(), None)), false, @@ -409,7 +408,7 @@ where .map(|(&l, &r)| c_packed - l + sumcheck_challenge_2_packed * (l - r)) .collect(); - let (mut sc_point, quarter_evals, _) = sumcheck_fold_and_prove::( + let (mut sc_point, quarter_evals, _) = sumcheck_fold_and_prove::( 1, MleGroupOwned::ExtensionPacked(vec![ u0_folded_packed, @@ -419,7 +418,6 @@ where ]), None, &GKRQuotientComputation { u4_const, u5_const }, - &GKRQuotientComputation { u4_const, u5_const }, &[], Some(( claim.point.0[2..].to_vec(), @@ -620,7 +618,7 @@ where + claim.point[1] * sumcheck_challenge_2) / (EF::ONE - claim.point.get(2).copied().unwrap_or_default()); - let (mut sc_point, quarter_evals, _) = sumcheck_fold_and_prove::( + let (mut sc_point, quarter_evals, _) = sumcheck_fold_and_prove::( 1, MleGroupRef::ExtensionPacked(vec![ u0_folded_packed, @@ -630,7 +628,6 @@ where ]), Some(vec![EF::ONE - sumcheck_challenge_2, sumcheck_challenge_2]), &GKRQuotientComputation { u4_const, u5_const }, - &GKRQuotientComputation { u4_const, u5_const }, &[], Some(( claim.point.0[2..].to_vec(), diff --git a/crates/poseidon_circuit/Cargo.toml b/crates/poseidon_circuit/Cargo.toml new file mode 100644 index 00000000..a995a9d9 --- /dev/null +++ b/crates/poseidon_circuit/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "poseidon_circuit" +version.workspace = true +edition.workspace = true + +[lints] +workspace = true + +[dependencies] +p3-field.workspace = true +tracing.workspace = true +utils.workspace = true +# p3-util.workspace = true +multilinear-toolkit.workspace = true +p3-koala-bear.workspace = true +p3-poseidon2.workspace = true +p3-monty-31.workspace = true +rand.workspace = true +whir-p3.workspace = true +packed_pcs.workspace = true + diff --git a/crates/poseidon_circuit/src/gkr_layers/batch_partial_rounds.rs b/crates/poseidon_circuit/src/gkr_layers/batch_partial_rounds.rs new file mode 100644 index 00000000..f806eed9 --- /dev/null +++ b/crates/poseidon_circuit/src/gkr_layers/batch_partial_rounds.rs @@ -0,0 +1,101 @@ +use std::array; + +use multilinear_toolkit::prelude::*; +use p3_field::ExtensionField; +use p3_koala_bear::{ + GenericPoseidon2LinearLayersKoalaBear, KoalaBearInternalLayerParameters, KoalaBearParameters, +}; +use p3_monty_31::InternalLayerBaseParameters; +use p3_poseidon2::GenericPoseidon2LinearLayers; + +use crate::{EF, F}; + +#[derive(Debug)] +pub struct BatchPartialRounds { + pub constants: [F; N_COMMITED_CUBES], + pub last_constant: F, +} + +impl, const WIDTH: usize, const N_COMMITED_CUBES: usize> + SumcheckComputation for BatchPartialRounds +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, + EF: ExtensionField, +{ + fn degree(&self) -> usize { + 3 + } + + fn eval(&self, point: &[NF], alpha_powers: &[EF]) -> EF { + debug_assert_eq!(point.len(), WIDTH + N_COMMITED_CUBES); + debug_assert_eq!(alpha_powers.len(), WIDTH + N_COMMITED_CUBES); + + let mut res = EF::ZERO; + let mut buff: [NF; WIDTH] = array::from_fn(|j| point[j]); + for (i, &constant) in self.constants.iter().enumerate() { + let computed_cube = (buff[0] + constant).cube(); + res += alpha_powers[WIDTH + i] * computed_cube; + buff[0] = point[WIDTH + i]; // commited cube + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + } + + buff[0] = (buff[0] + self.last_constant).cube(); + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + for i in 0..WIDTH { + res += alpha_powers[i] * buff[i]; + } + res + } +} + +impl SumcheckComputationPacked + for BatchPartialRounds +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + fn degree(&self) -> usize { + 3 + } + + fn eval_packed_base(&self, point: &[FPacking], alpha_powers: &[EF]) -> EFPacking { + debug_assert_eq!(point.len(), WIDTH + N_COMMITED_CUBES); + debug_assert_eq!(alpha_powers.len(), WIDTH + N_COMMITED_CUBES); + + let mut res = EFPacking::::ZERO; + let mut buff: [FPacking; WIDTH] = array::from_fn(|j| point[j]); + for (i, &constant) in self.constants.iter().enumerate() { + let computed_cube = (buff[0] + constant).cube(); + res += EFPacking::::from(alpha_powers[WIDTH + i]) * computed_cube; + buff[0] = point[WIDTH + i]; // commited cube + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + } + + buff[0] = (buff[0] + self.last_constant).cube(); + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + for i in 0..WIDTH { + res += EFPacking::::from(alpha_powers[i]) * buff[i]; + } + res + } + + fn eval_packed_extension(&self, point: &[EFPacking], alpha_powers: &[EF]) -> EFPacking { + debug_assert_eq!(point.len(), WIDTH + N_COMMITED_CUBES); + debug_assert_eq!(alpha_powers.len(), WIDTH + N_COMMITED_CUBES); + + let mut res = EFPacking::::ZERO; + let mut buff: [EFPacking; WIDTH] = array::from_fn(|j| point[j]); + for (i, &constant) in self.constants.iter().enumerate() { + let computed_cube = (buff[0] + PFPacking::::from(constant)).cube(); + res += computed_cube * alpha_powers[WIDTH + i]; + buff[0] = point[WIDTH + i]; // commited cube + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + } + + buff[0] = (buff[0] + PFPacking::::from(self.last_constant)).cube(); + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + for i in 0..WIDTH { + res += buff[i] * alpha_powers[i]; + } + res + } +} diff --git a/crates/poseidon_circuit/src/gkr_layers/full_round.rs b/crates/poseidon_circuit/src/gkr_layers/full_round.rs new file mode 100644 index 00000000..fd1214b3 --- /dev/null +++ b/crates/poseidon_circuit/src/gkr_layers/full_round.rs @@ -0,0 +1,145 @@ +use std::array; + +use multilinear_toolkit::prelude::*; +use p3_field::ExtensionField; +use p3_koala_bear::{ + GenericPoseidon2LinearLayersKoalaBear, KoalaBearInternalLayerParameters, KoalaBearParameters, +}; +use p3_monty_31::InternalLayerBaseParameters; +use p3_poseidon2::GenericPoseidon2LinearLayers; + +use crate::{EF, F}; + +#[derive(Debug)] +pub struct FullRoundComputation { + pub constants: [F; WIDTH], + pub compressed_output: Option, +} + +impl, const WIDTH: usize, const FIRST: bool> SumcheckComputation + for FullRoundComputation +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, + EF: ExtensionField, +{ + fn degree(&self) -> usize { + 3 + self.compressed_output.is_some() as usize + } + + fn eval(&self, point: &[NF], alpha_powers: &[EF]) -> EF { + debug_assert_eq!( + point.len(), + WIDTH + + if self.compressed_output.is_some() { + 1 + } else { + 0 + } + ); + let mut buff: [NF; WIDTH] = array::from_fn(|j| point[j]); + if FIRST { + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + } + buff.iter_mut().enumerate().for_each(|(j, val)| { + *val = (*val + self.constants[j]).cube(); + }); + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + let mut res = EF::ZERO; + if let Some(compression_output_width) = self.compressed_output { + let compressed = point[WIDTH]; + for i in 0..compression_output_width { + res += alpha_powers[i] * buff[i]; + } + for i in compression_output_width..WIDTH { + res += alpha_powers[i] * buff[i] * (EF::ONE - compressed); + } + } else { + for i in 0..WIDTH { + res += alpha_powers[i] * buff[i]; + } + } + res + } +} + +impl SumcheckComputationPacked + for FullRoundComputation +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + fn degree(&self) -> usize { + 3 + self.compressed_output.is_some() as usize + } + + fn eval_packed_base(&self, point: &[PFPacking], alpha_powers: &[EF]) -> EFPacking { + debug_assert_eq!( + point.len(), + WIDTH + + if self.compressed_output.is_some() { + 1 + } else { + 0 + } + ); + let mut buff: [PFPacking; WIDTH] = array::from_fn(|j| point[j]); + if FIRST { + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + } + buff.iter_mut().enumerate().for_each(|(j, val)| { + *val = (*val + self.constants[j]).cube(); + }); + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + let mut res = EFPacking::::ZERO; + if let Some(compression_output_width) = self.compressed_output { + let compressed = point[WIDTH]; + for i in 0..compression_output_width { + res += EFPacking::::from(alpha_powers[i]) * buff[i]; + } + for i in compression_output_width..WIDTH { + res += EFPacking::::from(alpha_powers[i]) + * buff[i] + * (PFPacking::::ONE - compressed); + } + } else { + for j in 0..WIDTH { + res += EFPacking::::from(alpha_powers[j]) * buff[j]; + } + } + res + } + + fn eval_packed_extension(&self, point: &[EFPacking], alpha_powers: &[EF]) -> EFPacking { + debug_assert_eq!( + point.len(), + WIDTH + + if self.compressed_output.is_some() { + 1 + } else { + 0 + } + ); + let mut buff: [EFPacking; WIDTH] = array::from_fn(|j| point[j]); + if FIRST { + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + } + buff.iter_mut().enumerate().for_each(|(j, val)| { + *val = (*val + PFPacking::::from(self.constants[j])).cube(); + }); + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + let mut res = EFPacking::::ZERO; + if let Some(compression_output_width) = self.compressed_output { + let compressed = point[WIDTH]; + for i in 0..compression_output_width { + res += buff[i] * alpha_powers[i]; + } + for i in compression_output_width..WIDTH { + res += buff[i] * (EFPacking::::ONE - compressed) * alpha_powers[i]; + } + } else { + for j in 0..WIDTH { + res += buff[j] * alpha_powers[j]; + } + } + res + } +} diff --git a/crates/poseidon_circuit/src/gkr_layers/mod.rs b/crates/poseidon_circuit/src/gkr_layers/mod.rs new file mode 100644 index 00000000..a3e453b6 --- /dev/null +++ b/crates/poseidon_circuit/src/gkr_layers/mod.rs @@ -0,0 +1,96 @@ +mod full_round; +pub use full_round::*; + +mod partial_round; +pub use partial_round::*; + +mod batch_partial_rounds; +pub use batch_partial_rounds::*; + +use p3_koala_bear::{ + KOALABEAR_RC16_EXTERNAL_FINAL, KOALABEAR_RC16_EXTERNAL_INITIAL, KOALABEAR_RC16_INTERNAL, + KOALABEAR_RC24_EXTERNAL_FINAL, KOALABEAR_RC24_EXTERNAL_INITIAL, KOALABEAR_RC24_INTERNAL, +}; + +use crate::F; + +#[derive(Debug)] +pub struct PoseidonGKRLayers { + pub initial_full_round: FullRoundComputation, + pub initial_full_rounds_remaining: Vec>, + pub batch_partial_rounds: BatchPartialRounds, + pub partial_rounds_remaining: Vec>, + pub final_full_rounds: Vec>, +} + +impl PoseidonGKRLayers { + pub fn build(compressed_output: Option) -> Self { + match WIDTH { + 16 => unsafe { + Self::build_generic( + &*(&KOALABEAR_RC16_EXTERNAL_INITIAL as *const [[F; 16]] + as *const [[F; WIDTH]]), + &KOALABEAR_RC16_INTERNAL, + &*(&KOALABEAR_RC16_EXTERNAL_FINAL as *const [[F; 16]] as *const [[F; WIDTH]]), + compressed_output, + ) + }, + 24 => unsafe { + Self::build_generic( + &*(&KOALABEAR_RC24_EXTERNAL_INITIAL as *const [[F; 24]] + as *const [[F; WIDTH]]), + &KOALABEAR_RC24_INTERNAL, + &*(&KOALABEAR_RC24_EXTERNAL_FINAL as *const [[F; 24]] as *const [[F; WIDTH]]), + compressed_output, + ) + }, + _ => panic!("Only Poseidon 16 and 24 are supported currently"), + } + } + + fn build_generic( + initial_constants: &[[F; WIDTH]], + internal_constants: &[F], + final_constants: &[[F; WIDTH]], + compressed_output: Option, + ) -> Self { + let initial_full_round = FullRoundComputation { + constants: initial_constants[0], + compressed_output: None, + }; + let initial_full_rounds_remaining = initial_constants[1..] + .iter() + .map(|&constants| FullRoundComputation { + constants, + compressed_output: None, + }) + .collect::>(); + let batch_partial_rounds = BatchPartialRounds { + constants: internal_constants[..N_COMMITED_CUBES].try_into().unwrap(), + last_constant: internal_constants[N_COMMITED_CUBES], + }; + let partial_rounds_remaining = internal_constants[N_COMMITED_CUBES + 1..] + .iter() + .map(|&constant| PartialRoundComputation { constant }) + .collect::>(); + let final_full_rounds = final_constants + .iter() + .enumerate() + .map(|(i, &constants)| FullRoundComputation { + constants, + compressed_output: if i == final_constants.len() - 1 { + compressed_output + } else { + None + }, + }) + .collect::>(); + Self { + initial_full_round, + initial_full_rounds_remaining, + batch_partial_rounds, + partial_rounds_remaining, + final_full_rounds, + } + } +} diff --git a/crates/poseidon_circuit/src/gkr_layers/partial_round.rs b/crates/poseidon_circuit/src/gkr_layers/partial_round.rs new file mode 100644 index 00000000..aed313af --- /dev/null +++ b/crates/poseidon_circuit/src/gkr_layers/partial_round.rs @@ -0,0 +1,76 @@ +use multilinear_toolkit::prelude::*; +use p3_field::ExtensionField; +use p3_koala_bear::{ + GenericPoseidon2LinearLayersKoalaBear, KoalaBearInternalLayerParameters, KoalaBearParameters, +}; +use p3_monty_31::InternalLayerBaseParameters; +use p3_poseidon2::GenericPoseidon2LinearLayers; + +use crate::{EF, F}; + +#[derive(Debug)] +pub struct PartialRoundComputation { + pub constant: F, +} + +impl, const WIDTH: usize> SumcheckComputation + for PartialRoundComputation +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, + EF: ExtensionField, +{ + fn degree(&self) -> usize { + 3 + } + + fn eval(&self, point: &[NF], alpha_powers: &[EF]) -> EF { + debug_assert_eq!(point.len(), WIDTH); + let first_cubed = (point[0] + self.constant).cube(); + let mut buff = [NF::ZERO; WIDTH]; + buff[0] = first_cubed; + buff[1..WIDTH].copy_from_slice(&point[1..WIDTH]); + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + let mut res = EF::ZERO; + for i in 0..WIDTH { + res += alpha_powers[i] * buff[i]; + } + res + } +} + +impl SumcheckComputationPacked for PartialRoundComputation +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + fn degree(&self) -> usize { + 3 + } + + fn eval_packed_base(&self, point: &[FPacking], alpha_powers: &[EF]) -> EFPacking { + debug_assert_eq!(point.len(), WIDTH); + let first_cubed = (point[0] + self.constant).cube(); + let mut buff = [PFPacking::::ZERO; WIDTH]; + buff[0] = first_cubed; + buff[1..WIDTH].copy_from_slice(&point[1..WIDTH]); + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + let mut res = EFPacking::::ZERO; + for j in 0..WIDTH { + res += EFPacking::::from(alpha_powers[j]) * buff[j]; + } + res + } + + fn eval_packed_extension(&self, point: &[EFPacking], alpha_powers: &[EF]) -> EFPacking { + debug_assert_eq!(point.len(), WIDTH); + let first_cubed = (point[0] + PFPacking::::from(self.constant)).cube(); + let mut buff = [EFPacking::::ZERO; WIDTH]; + buff[0] = first_cubed; + buff[1..WIDTH].copy_from_slice(&point[1..WIDTH]); + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + let mut res = EFPacking::::ZERO; + for j in 0..WIDTH { + res += buff[j] * alpha_powers[j]; + } + res + } +} diff --git a/crates/poseidon_circuit/src/lib.rs b/crates/poseidon_circuit/src/lib.rs new file mode 100644 index 00000000..91eff4a2 --- /dev/null +++ b/crates/poseidon_circuit/src/lib.rs @@ -0,0 +1,28 @@ +#![cfg_attr(not(test), warn(unused_crate_dependencies))] + +use multilinear_toolkit::prelude::MultilinearPoint; +use p3_koala_bear::{KoalaBear, QuinticExtensionFieldKB}; + +mod prove; +pub use prove::*; + +mod verify; +pub use verify::*; + +mod witness_gen; +pub use witness_gen::*; + +pub mod tests; + +pub mod gkr_layers; +pub use gkr_layers::*; + +pub(crate) type F = KoalaBear; +pub(crate) type EF = QuinticExtensionFieldKB; + +#[derive(Debug, Clone)] +pub struct GKRPoseidonResult { + pub output_values: Vec, // of length width + pub input_statements: (MultilinearPoint, Vec), // of length width, remain to be proven + pub cubes_statements: (MultilinearPoint, Vec), // of length n_committed_cubes, remain to be proven +} diff --git a/crates/poseidon_circuit/src/prove.rs b/crates/poseidon_circuit/src/prove.rs new file mode 100644 index 00000000..b17cbbdd --- /dev/null +++ b/crates/poseidon_circuit/src/prove.rs @@ -0,0 +1,308 @@ +use crate::GKRPoseidonResult; +use crate::{ + EF, F, PoseidonWitness, + gkr_layers::{BatchPartialRounds, PoseidonGKRLayers}, +}; +use multilinear_toolkit::prelude::*; +use p3_koala_bear::{KoalaBearInternalLayerParameters, KoalaBearParameters}; +use p3_monty_31::InternalLayerBaseParameters; +use tracing::{info_span, instrument}; + +#[instrument(skip_all)] +pub fn prove_poseidon_gkr( + prover_state: &mut FSProver>, + witness: &PoseidonWitness, WIDTH, N_COMMITED_CUBES>, + mut claim_point: Vec, + univariate_skips: usize, + layers: &PoseidonGKRLayers, +) -> GKRPoseidonResult +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + let selectors = univariate_selectors::(univariate_skips); + + assert_eq!(claim_point.len(), log2_strict_usize(witness.n_poseidons())); + + let (output_claims, mut claims) = info_span!("computing output claims").in_scope(|| { + let eq_poly = eval_eq(&claim_point[univariate_skips..]); + let inner_evals = witness + .output_layer + .par_iter() + .map(|poly| { + FPacking::::unpack_slice(poly) + .chunks_exact(eq_poly.len()) + .map(|chunk| dot_product(eq_poly.iter().copied(), chunk.iter().copied())) + .collect::>() + }) + .collect::>(); + for evals in &inner_evals { + prover_state.add_extension_scalars(evals); + } + let alpha = prover_state.sample(); + let selectors_at_alpha = selectors + .iter() + .map(|selector| selector.evaluate(alpha)) + .collect::>(); + + let mut output_claims = vec![]; + let mut claims = vec![]; + for evals in inner_evals { + output_claims + .push(evals.evaluate(&MultilinearPoint(claim_point[..univariate_skips].to_vec()))); + claims.push(dot_product( + selectors_at_alpha.iter().copied(), + evals.into_iter(), + )) + } + claim_point = [vec![alpha], claim_point[univariate_skips..].to_vec()].concat(); + (output_claims, claims) + }); + + for (i, (input_layers, full_round)) in witness + .final_full_round_inputs + .iter() + .zip(&layers.final_full_rounds) + .rev() + .enumerate() + { + let mut input_layers = input_layers.iter().map(Vec::as_slice).collect::>(); + if i == 0 + && let Some((_, compression_indicator)) = &witness.compression + { + input_layers.push(compression_indicator); + } + (claim_point, claims) = prove_gkr_round( + prover_state, + full_round, + &input_layers, + &claim_point, + &claims, + univariate_skips, + ); + + if i == 0 && witness.compression.is_some() { + let _ = claims.pop().unwrap(); // the claim on the compression indicator columns can be evaluated by the verifier directly + } + } + + for (input_layers, partial_round) in witness + .remaining_partial_round_inputs + .iter() + .zip(&layers.partial_rounds_remaining) + .rev() + { + (claim_point, claims) = prove_gkr_round( + prover_state, + partial_round, + input_layers, + &claim_point, + &claims, + univariate_skips, + ); + } + + (claim_point, claims) = prove_batch_internal_round( + prover_state, + &witness.batch_partial_round_input, + &witness.committed_cubes, + &layers.batch_partial_rounds, + &claim_point, + &claims, + &selectors, + ); + + let pcs_point_for_cubes = claim_point.clone(); + + claims = claims[..WIDTH].to_vec(); + + for (input_layers, full_round) in witness + .remaining_initial_full_round_inputs + .iter() + .zip(&layers.initial_full_rounds_remaining) + .rev() + { + (claim_point, claims) = prove_gkr_round( + prover_state, + full_round, + input_layers, + &claim_point, + &claims, + univariate_skips, + ); + } + (claim_point, _) = prove_gkr_round( + prover_state, + &layers.initial_full_round, + &witness.input_layer, + &claim_point, + &claims, + univariate_skips, + ); + let pcs_point_for_inputs = claim_point.clone(); + + let input_statements = inner_evals_on_commited_columns( + prover_state, + &pcs_point_for_inputs, + univariate_skips, + &witness.input_layer, + ); + let cubes_statements = inner_evals_on_commited_columns( + prover_state, + &pcs_point_for_cubes, + univariate_skips, + &witness.committed_cubes, + ); + + GKRPoseidonResult { + output_values: output_claims, + input_statements, + cubes_statements, + } +} + +#[instrument(skip_all)] +fn prove_gkr_round< + SC: SumcheckComputation + + SumcheckComputation + + SumcheckComputationPacked + + 'static, +>( + prover_state: &mut FSProver>, + computation: &SC, + input_layers: &[impl AsRef<[PFPacking]>], + claim_point: &[EF], + output_claims: &[EF], + univariate_skips: usize, +) -> (Vec, Vec) { + let batching_scalar = prover_state.sample(); + let batching_scalars_powers = batching_scalar.powers().collect_n(output_claims.len()); + let batched_claim: EF = dot_product( + output_claims.iter().copied(), + batching_scalars_powers.iter().copied(), + ); + + let (sumcheck_point, sumcheck_inner_evals, sumcheck_final_sum) = sumcheck_prove( + univariate_skips, + MleGroupRef::BasePacked(input_layers.iter().map(|l| l.as_ref()).collect()), + computation, + &batching_scalars_powers, + Some((claim_point.to_vec(), None)), + false, + prover_state, + batched_claim, + None, + ); + + // sanity check + debug_assert_eq!( + computation.eval(&sumcheck_inner_evals, &batching_scalars_powers) + * eq_poly_with_skip(&sumcheck_point, claim_point, univariate_skips), + sumcheck_final_sum + ); + + prover_state.add_extension_scalars(&sumcheck_inner_evals); + + (sumcheck_point.0, sumcheck_inner_evals) +} + +#[instrument(skip_all)] +fn prove_batch_internal_round( + prover_state: &mut FSProver>, + input_layers: &[Vec>], + committed_cubes: &[Vec>], + computation: &BatchPartialRounds, + claim_point: &[EF], + output_claims: &[EF], + selectors: &[DensePolynomial], +) -> (Vec, Vec) +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + assert_eq!(input_layers.len(), WIDTH); + assert_eq!(committed_cubes.len(), N_COMMITED_CUBES); + let univariate_skips = log2_strict_usize(selectors.len()); + + let cubes_evals = info_span!("computing cube evals").in_scope(|| { + batch_evaluate_univariate_multilinear( + &committed_cubes + .iter() + .map(|l| PFPacking::::unpack_slice(l)) + .collect::>(), + claim_point, + selectors, + ) + }); + + prover_state.add_extension_scalars(&cubes_evals); + + let batching_scalar = prover_state.sample(); + let batched_claim: EF = dot_product( + output_claims.iter().chain(&cubes_evals).copied(), + batching_scalar.powers(), + ); + let batching_scalars_powers = batching_scalar.powers().collect_n(WIDTH + N_COMMITED_CUBES); + + let (sumcheck_point, sumcheck_inner_evals, sumcheck_final_sum) = sumcheck_prove( + univariate_skips, + MleGroupRef::BasePacked( + input_layers + .iter() + .chain(committed_cubes.iter()) + .map(Vec::as_slice) + .collect(), + ), + computation, + &batching_scalars_powers, + Some((claim_point.to_vec(), None)), + false, + prover_state, + batched_claim, + None, + ); + + // sanity check + debug_assert_eq!( + computation.eval(&sumcheck_inner_evals, &batching_scalars_powers) + * eq_poly_with_skip(&sumcheck_point, claim_point, univariate_skips), + sumcheck_final_sum + ); + + prover_state.add_extension_scalars(&sumcheck_inner_evals); + + (sumcheck_point.0, sumcheck_inner_evals) +} + +fn inner_evals_on_commited_columns( + prover_state: &mut FSProver>, + point: &[EF], + univariate_skips: usize, + columns: &[Vec>], +) -> (MultilinearPoint, Vec) { + let eq_mle = eval_eq_packed(&point[1..]); + let inner_evals = columns + .par_iter() + .map(|col| { + col.chunks_exact(eq_mle.len()) + .map(|chunk| { + let ef_sum = dot_product::, _, _>( + eq_mle.iter().copied(), + chunk.iter().copied(), + ); + as PackedFieldExtension>::to_ext_iter([ef_sum]) + .sum::() + }) + .collect::>() + }) + .flatten() + .collect::>(); + prover_state.add_extension_scalars(&inner_evals); + let mut values_to_prove = vec![]; + let pcs_batching_scalars_inputs = prover_state.sample_vec(univariate_skips); + let point_to_prove = + MultilinearPoint([pcs_batching_scalars_inputs.clone(), point[1..].to_vec()].concat()); + for col_inner_evals in inner_evals.chunks_exact(1 << univariate_skips) { + values_to_prove + .push(col_inner_evals.evaluate(&MultilinearPoint(pcs_batching_scalars_inputs.clone()))); + } + (point_to_prove, values_to_prove) +} diff --git a/crates/poseidon_circuit/src/tests.rs b/crates/poseidon_circuit/src/tests.rs new file mode 100644 index 00000000..8f9ddc42 --- /dev/null +++ b/crates/poseidon_circuit/src/tests.rs @@ -0,0 +1,315 @@ +use multilinear_toolkit::prelude::*; +use p3_koala_bear::{KoalaBear, QuinticExtensionFieldKB}; +use packed_pcs::{ + ColDims, packed_pcs_commit, packed_pcs_global_statements_for_prover, + packed_pcs_global_statements_for_verifier, packed_pcs_parse_commitment, +}; +use rand::{Rng, SeedableRng, rngs::StdRng}; +use std::{array, time::Instant}; +use utils::{ + build_prover_state, build_verifier_state, init_tracing, poseidon16_permute_mut, + poseidon24_permute_mut, transposed_par_iter_mut, +}; +use whir_p3::{ + FoldingFactor, SecurityAssumption, WhirConfig, WhirConfigBuilder, precompute_dft_twiddles, +}; + +use crate::{ + GKRPoseidonResult, default_cube_layers, generate_poseidon_witness, + gkr_layers::PoseidonGKRLayers, prove_poseidon_gkr, verify_poseidon_gkr, +}; + +type F = KoalaBear; +type EF = QuinticExtensionFieldKB; + +const WIDTH: usize = 16; + +const UNIVARIATE_SKIPS: usize = 3; +const N_COMMITED_CUBES: usize = 16; +const COMPRESSION_OUTPUT_WIDTH: usize = 8; + +#[test] +fn test_poseidon_benchmark() { + run_poseidon_benchmark(15, true); + run_poseidon_benchmark(15, false); +} + +pub fn run_poseidon_benchmark(log_n_poseidons: usize, compress: bool) { + init_tracing(); + precompute_dft_twiddles::(1 << 24); + + let whir_config_builder = WhirConfigBuilder { + folding_factor: FoldingFactor::new(7, 4), + soundness_type: SecurityAssumption::CapacityBound, + pow_bits: 16, + max_num_variables_to_send_coeffs: 6, + rs_domain_initial_reduction_factor: 5, + security_level: 128, + starting_log_inv_rate: 1, + }; + let whir_n_vars = log_n_poseidons + log2_ceil_usize(WIDTH + N_COMMITED_CUBES); + let whir_config = WhirConfig::new(whir_config_builder.clone(), whir_n_vars); + + let mut rng = StdRng::seed_from_u64(0); + let n_poseidons = 1 << log_n_poseidons; + let n_compressions = if compress { n_poseidons / 3 } else { 0 }; + + let perm_inputs = (0..n_poseidons) + .map(|_| rng.random()) + .collect::>(); + let input: [_; WIDTH] = + array::from_fn(|i| perm_inputs.par_iter().map(|x| x[i]).collect::>()); + let input_packed: [_; WIDTH] = + array::from_fn(|i| PFPacking::::pack_slice(&input[i]).to_vec()); + + let layers = PoseidonGKRLayers::::build( + compress.then_some(COMPRESSION_OUTPUT_WIDTH), + ); + + let default_cubes = default_cube_layers::(&layers); + let input_col_dims = vec![ColDims::padded(n_poseidons, F::ZERO); WIDTH]; + let cubes_col_dims = default_cubes + .iter() + .map(|&v| ColDims::padded(n_poseidons, v)) + .collect::>(); + let committed_col_dims = [input_col_dims, cubes_col_dims].concat(); + + let log_smallest_decomposition_chunk = 0; // unused because everything is a power of 2 + + let ( + mut verifier_state, + proof_size, + output_layer, + prover_duration, + output_values_prover, + claim_point, + ) = { + // ---------------------------------------------------- PROVER ---------------------------------------------------- + + let prover_time = Instant::now(); + + let witness = generate_poseidon_witness::, WIDTH, N_COMMITED_CUBES>( + input_packed, + &layers, + if compress { + Some(( + n_compressions, + PFPacking::::pack_slice( + &[ + vec![F::ZERO; n_poseidons - n_compressions], + vec![F::ONE; n_compressions], + ] + .concat(), + ) + .to_vec(), + )) + } else { + None + }, + ); + + let mut prover_state = build_prover_state::(); + + let committed_polys = witness + .input_layer + .iter() + .chain(&witness.committed_cubes) + .map(|s| PFPacking::::unpack_slice(s)) + .collect::>(); + + let pcs_commitment_witness = packed_pcs_commit( + &whir_config_builder, + &committed_polys, + &committed_col_dims, + &mut prover_state, + log_smallest_decomposition_chunk, + ); + + let claim_point = prover_state.sample_vec(log_n_poseidons); + + let GKRPoseidonResult { + output_values, + input_statements, + cubes_statements, + } = prove_poseidon_gkr( + &mut prover_state, + &witness, + claim_point.clone(), + UNIVARIATE_SKIPS, + &layers, + ); + + // PCS opening + let mut pcs_statements = vec![]; + for (point_to_prove, evals_to_prove) in [input_statements, cubes_statements] { + for v in evals_to_prove { + pcs_statements.push(vec![Evaluation { + point: point_to_prove.clone(), + value: v, + }]); + } + } + + let global_statements = packed_pcs_global_statements_for_prover( + &committed_polys, + &committed_col_dims, + log_smallest_decomposition_chunk, + &pcs_statements, + &mut prover_state, + ); + whir_config.prove( + &mut prover_state, + global_statements, + pcs_commitment_witness.inner_witness, + &pcs_commitment_witness.packed_polynomial.by_ref(), + ); + + let prover_duration = prover_time.elapsed(); + + ( + build_verifier_state(&prover_state), + prover_state.proof_size(), + witness.output_layer, + prover_duration, + output_values, + claim_point, + ) + }; + + let verifier_time = Instant::now(); + + let output_values_verifier = { + // ---------------------------------------------------- VERIFIER ---------------------------------------------------- + + let parsed_pcs_commitment = packed_pcs_parse_commitment( + &whir_config_builder, + &mut verifier_state, + &committed_col_dims, + log_smallest_decomposition_chunk, + ) + .unwrap(); + + let output_claim_point = verifier_state.sample_vec(log_n_poseidons); + + let GKRPoseidonResult { + output_values, + input_statements, + cubes_statements, + } = verify_poseidon_gkr( + &mut verifier_state, + log_n_poseidons, + &output_claim_point, + &layers, + UNIVARIATE_SKIPS, + if compress { Some(n_compressions) } else { None }, + ); + + // PCS verification + let mut pcs_statements = vec![]; + for (point_to_verif, evals_to_verif) in [input_statements, cubes_statements] { + for v in evals_to_verif { + pcs_statements.push(vec![Evaluation { + point: point_to_verif.clone(), + value: v, + }]); + } + } + + let global_statements = packed_pcs_global_statements_for_verifier( + &committed_col_dims, + log_smallest_decomposition_chunk, + &pcs_statements, + &mut verifier_state, + &Default::default(), + ) + .unwrap(); + + whir_config + .verify::( + &mut verifier_state, + &parsed_pcs_commitment, + global_statements, + ) + .unwrap(); + output_values + }; + let verifier_duration = verifier_time.elapsed(); + + let mut data_to_hash = input.clone(); + let plaintext_time = Instant::now(); + transposed_par_iter_mut(&mut data_to_hash).for_each(|row| { + if WIDTH == 16 { + let mut buff = array::from_fn(|j| *row[j]); + poseidon16_permute_mut(&mut buff); + for j in 0..WIDTH { + *row[j] = buff[j]; + } + } else if WIDTH == 24 { + let mut buff = array::from_fn(|j| *row[j]); + poseidon24_permute_mut(&mut buff); + for j in 0..WIDTH { + *row[j] = buff[j]; + } + } else { + panic!("Unsupported WIDTH"); + } + }); + let plaintext_duration = plaintext_time.elapsed(); + + // sanity check: ensure the plaintext poseidons matches the last GKR layer: + if compress { + output_layer + .iter() + .enumerate() + .take(COMPRESSION_OUTPUT_WIDTH) + .for_each(|(i, layer)| { + assert_eq!(PFPacking::::unpack_slice(layer), data_to_hash[i]); + }); + output_layer + .iter() + .enumerate() + .skip(COMPRESSION_OUTPUT_WIDTH) + .for_each(|(i, layer)| { + assert_eq!( + &PFPacking::::unpack_slice(layer)[..n_poseidons - n_compressions], + &data_to_hash[i][..n_poseidons - n_compressions] + ); + assert!( + PFPacking::::unpack_slice(layer)[n_poseidons - n_compressions..] + .iter() + .all(|&x| x.is_zero()) + ); + }); + } else { + output_layer.iter().enumerate().for_each(|(i, layer)| { + assert_eq!(PFPacking::::unpack_slice(layer), data_to_hash[i]); + }); + } + assert_eq!(output_values_verifier, output_values_prover); + assert_eq!( + output_values_verifier.as_slice(), + &output_layer + .iter() + .map(|layer| PFPacking::::unpack_slice(layer) + .evaluate(&MultilinearPoint(claim_point.clone()))) + .collect::>() + ); + + println!("2^{} Poseidon2", log_n_poseidons); + println!( + "Plaintext (no proof) time: {:.3}s ({:.2}M Poseidons / s)", + plaintext_duration.as_secs_f64(), + n_poseidons as f64 / (plaintext_duration.as_secs_f64() * 1e6) + ); + println!( + "Prover time: {:.3}s ({:.2}M Poseidons / s, {:.1}x slower than plaintext)", + prover_duration.as_secs_f64(), + n_poseidons as f64 / (prover_duration.as_secs_f64() * 1e6), + prover_duration.as_secs_f64() / plaintext_duration.as_secs_f64() + ); + println!( + "Proof size: {:.1} KiB (without merkle pruning)", + (proof_size * F::bits()) as f64 / (8.0 * 1024.0) + ); + println!("Verifier time: {}ms", verifier_duration.as_millis()); +} diff --git a/crates/poseidon_circuit/src/verify.rs b/crates/poseidon_circuit/src/verify.rs new file mode 100644 index 00000000..f21a45a3 --- /dev/null +++ b/crates/poseidon_circuit/src/verify.rs @@ -0,0 +1,222 @@ +use multilinear_toolkit::prelude::*; +use p3_koala_bear::{KoalaBearInternalLayerParameters, KoalaBearParameters}; +use p3_monty_31::InternalLayerBaseParameters; + +use crate::{EF, F, GKRPoseidonResult, gkr_layers::PoseidonGKRLayers}; + +pub fn verify_poseidon_gkr( + verifier_state: &mut FSVerifier>, + log_n_poseidons: usize, + output_claim_point: &[EF], + layers: &PoseidonGKRLayers, + univariate_skips: usize, + n_compressions: Option, +) -> GKRPoseidonResult +where + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + let selectors = univariate_selectors::(univariate_skips); + + let mut output_claims = vec![]; + let mut claims = vec![]; + + let mut claim_point = { + let inner_evals = (0..WIDTH) + .map(|_| { + verifier_state + .next_extension_scalars_vec(1 << univariate_skips) + .unwrap() + }) + .collect::>(); + let alpha = verifier_state.sample(); + let selectors_at_alpha = selectors + .iter() + .map(|selector| selector.evaluate(alpha)) + .collect::>(); + for evals in inner_evals { + output_claims.push(evals.evaluate(&MultilinearPoint( + output_claim_point[..univariate_skips].to_vec(), + ))); + claims.push(dot_product( + selectors_at_alpha.iter().copied(), + evals.into_iter(), + )) + } + [vec![alpha], output_claim_point[univariate_skips..].to_vec()].concat() + }; + + for (i, full_round) in layers.final_full_rounds.iter().rev().enumerate() { + let n_inputs = if i == 0 && n_compressions.is_some() { + WIDTH + 1 + } else { + WIDTH + }; + (claim_point, claims) = verify_gkr_round( + verifier_state, + full_round, + log_n_poseidons, + &claim_point, + &claims, + univariate_skips, + n_inputs, + ); + if i == 0 + && let Some(n_compressions) = n_compressions + { + assert!(n_compressions <= 1 << log_n_poseidons); + let compression_eval = claims.pop().unwrap(); + assert_eq!( + compression_eval, + skipped_mle_of_zeros_then_ones( + (1 << log_n_poseidons) - n_compressions, + &claim_point, + &selectors + ) + ); + } + } + + for partial_round in layers.partial_rounds_remaining.iter().rev() { + (claim_point, claims) = verify_gkr_round( + verifier_state, + partial_round, + log_n_poseidons, + &claim_point, + &claims, + univariate_skips, + WIDTH, + ); + } + let claimed_cubes_evals = verifier_state + .next_extension_scalars_vec(N_COMMITED_CUBES) + .unwrap(); + + (claim_point, claims) = verify_gkr_round( + verifier_state, + &layers.batch_partial_rounds, + log_n_poseidons, + &claim_point, + &[claims, claimed_cubes_evals.clone()].concat(), + univariate_skips, + WIDTH + N_COMMITED_CUBES, + ); + + let pcs_point_for_cubes = claim_point.clone(); + let pcs_evals_for_cubes = claims[WIDTH..].to_vec(); + + claims = claims[..WIDTH].to_vec(); + + for full_round in layers.initial_full_rounds_remaining.iter().rev() { + (claim_point, claims) = verify_gkr_round( + verifier_state, + full_round, + log_n_poseidons, + &claim_point, + &claims, + univariate_skips, + WIDTH, + ); + } + (claim_point, claims) = verify_gkr_round( + verifier_state, + &layers.initial_full_round, + log_n_poseidons, + &claim_point, + &claims, + univariate_skips, + WIDTH, + ); + + let pcs_point_for_inputs = claim_point.clone(); + let pcs_evals_for_inputs = claims; + + let input_statements = verify_inner_evals_on_commited_columns( + verifier_state, + &pcs_point_for_inputs, + &pcs_evals_for_inputs, + &selectors, + ); + + let cubes_statements = verify_inner_evals_on_commited_columns( + verifier_state, + &pcs_point_for_cubes, + &pcs_evals_for_cubes, + &selectors, + ); + + GKRPoseidonResult { + output_values: output_claims, + input_statements, + cubes_statements, + } +} + +fn verify_gkr_round>( + verifier_state: &mut FSVerifier>, + computation: &SC, + log_n_poseidons: usize, + claim_point: &[EF], + output_claims: &[EF], + univariate_skips: usize, + n_inputs: usize, +) -> (Vec, Vec) { + let batching_scalar = verifier_state.sample(); + let batching_scalars_powers = batching_scalar.powers().collect_n(output_claims.len()); + let batched_claim: EF = dot_product(output_claims.iter().copied(), batching_scalar.powers()); + + let (retrieved_batched_claim, sumcheck_postponed_claim) = sumcheck_verify_with_univariate_skip( + verifier_state, + computation.degree() + 1, + log_n_poseidons, + univariate_skips, + ) + .unwrap(); + + assert_eq!(retrieved_batched_claim, batched_claim); + + let sumcheck_inner_evals = verifier_state.next_extension_scalars_vec(n_inputs).unwrap(); + assert_eq!( + computation.eval(&sumcheck_inner_evals, &batching_scalars_powers) + * eq_poly_with_skip( + &sumcheck_postponed_claim.point, + claim_point, + univariate_skips + ), + sumcheck_postponed_claim.value + ); + + (sumcheck_postponed_claim.point.0, sumcheck_inner_evals) +} + +fn verify_inner_evals_on_commited_columns( + verifier_state: &mut FSVerifier>, + point: &[EF], + claimed_evals: &[EF], + selectors: &[DensePolynomial], +) -> (MultilinearPoint, Vec) { + let univariate_skips = log2_strict_usize(selectors.len()); + let inner_evals_inputs = verifier_state + .next_extension_scalars_vec(claimed_evals.len() << univariate_skips) + .unwrap(); + let pcs_batching_scalars_inputs = verifier_state.sample_vec(univariate_skips); + let mut values_to_verif = vec![]; + let point_to_verif = + MultilinearPoint([pcs_batching_scalars_inputs.clone(), point[1..].to_vec()].concat()); + for (&eval, col_inner_evals) in claimed_evals + .iter() + .zip(inner_evals_inputs.chunks_exact(1 << univariate_skips)) + { + assert_eq!( + eval, + evaluate_univariate_multilinear::<_, _, _, false>( + col_inner_evals, + &point[..1], + selectors, + None + ) + ); + values_to_verif + .push(col_inner_evals.evaluate(&MultilinearPoint(pcs_batching_scalars_inputs.clone()))); + } + (point_to_verif, values_to_verif) +} diff --git a/crates/poseidon_circuit/src/witness_gen.rs b/crates/poseidon_circuit/src/witness_gen.rs new file mode 100644 index 00000000..fa090e53 --- /dev/null +++ b/crates/poseidon_circuit/src/witness_gen.rs @@ -0,0 +1,221 @@ +use std::array; + +use multilinear_toolkit::prelude::*; +use p3_koala_bear::GenericPoseidon2LinearLayersKoalaBear; +use p3_koala_bear::KoalaBearInternalLayerParameters; +use p3_koala_bear::KoalaBearParameters; +use p3_monty_31::InternalLayerBaseParameters; +use p3_poseidon2::GenericPoseidon2LinearLayers; +use utils::transposed_par_iter_mut; + +use crate::gkr_layers::BatchPartialRounds; +use crate::gkr_layers::PartialRoundComputation; +use crate::gkr_layers::PoseidonGKRLayers; +use crate::{F, gkr_layers::FullRoundComputation}; + +#[derive(Debug)] +pub struct PoseidonWitness { + pub input_layer: [Vec; WIDTH], // input of the permutation + pub remaining_initial_full_round_inputs: Vec<[Vec; WIDTH]>, // the remaining input of each initial full round + pub batch_partial_round_input: [Vec; WIDTH], // again, the input of the batch (partial) round + pub committed_cubes: [Vec; N_COMMITED_CUBES], // the cubes commited in the batch (partial) rounds + pub remaining_partial_round_inputs: Vec<[Vec; WIDTH]>, // the input of each remaining partial round + pub final_full_round_inputs: Vec<[Vec; WIDTH]>, // the input of each final full round + pub output_layer: [Vec; WIDTH], // output of the permutation + pub compression: Option<(usize, Vec)>, // num compressions, compression indicator column +} + +impl + PoseidonWitness, WIDTH, N_COMMITED_CUBES> +{ + pub fn n_poseidons(&self) -> usize { + self.input_layer[0].len() * packing_width::() + } +} + +pub fn generate_poseidon_witness( + input: [Vec; WIDTH], + layers: &PoseidonGKRLayers, + compression: Option<(usize, Vec)>, +) -> PoseidonWitness +where + A: Algebra + Copy + Send + Sync, + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + let mut remaining_initial_full_layers = vec![apply_full_round::<_, _, true>( + &input, + &layers.initial_full_round, + None, + )]; + for round in &layers.initial_full_rounds_remaining { + remaining_initial_full_layers.push(apply_full_round::<_, _, false>( + remaining_initial_full_layers.last().unwrap(), + round, + None, + )); + } + + let batch_partial_round_layer = remaining_initial_full_layers.pop().unwrap(); + let (next_layer, committed_cubes) = + apply_batch_partial_rounds(&batch_partial_round_layer, &layers.batch_partial_rounds); + + let mut remaining_partial_inputs = vec![next_layer]; + for constant in &layers.partial_rounds_remaining { + remaining_partial_inputs.push(apply_partial_round( + remaining_partial_inputs.last().unwrap(), + constant, + )); + } + + let mut final_full_layer_inputs = vec![remaining_partial_inputs.pop().unwrap()]; + for (i, round) in layers.final_full_rounds.iter().enumerate() { + final_full_layer_inputs.push(apply_full_round::<_, _, false>( + final_full_layer_inputs.last().unwrap(), + round, + if i == layers.final_full_rounds.len() - 1 { + compression.as_ref().map(|(_, v)| v.as_slice()) + } else { + None + }, + )); + } + + let output_layer = final_full_layer_inputs.pop().unwrap(); + + PoseidonWitness { + input_layer: input, + remaining_initial_full_round_inputs: remaining_initial_full_layers, + batch_partial_round_input: batch_partial_round_layer, + committed_cubes, + remaining_partial_round_inputs: remaining_partial_inputs, + final_full_round_inputs: final_full_layer_inputs, + output_layer, + compression, + } +} + +// #[instrument(skip_all)] +fn apply_full_round( + input_layers: &[Vec; WIDTH], + full_round: &FullRoundComputation, + compression_indicator: Option<&[A]>, +) -> [Vec; WIDTH] +where + A: Algebra + Copy + Send + Sync, + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + let mut output_layers: [_; WIDTH] = array::from_fn(|_| A::zero_vec(input_layers[0].len())); + transposed_par_iter_mut(&mut output_layers) + .enumerate() + .for_each(|(row_index, output_row)| { + let mut buff: [A; WIDTH] = array::from_fn(|j| input_layers[j][row_index]); + if FIRST { + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + } + buff.iter_mut().enumerate().for_each(|(j, val)| { + *val = (*val + full_round.constants[j]).cube(); + }); + GenericPoseidon2LinearLayersKoalaBear::external_linear_layer(&mut buff); + if let Some(compression_output_width) = full_round.compressed_output { + let compressed = compression_indicator.unwrap()[row_index]; + for i in 0..compression_output_width { + *output_row[i] = buff[i]; + } + for i in compression_output_width..WIDTH { + *output_row[i] = buff[i] * (A::ONE - compressed); + } + } else { + for j in 0..WIDTH { + *output_row[j] = buff[j]; + } + } + }); + output_layers +} + +// #[instrument(skip_all)] +fn apply_partial_round( + input_layers: &[Vec], + partial_round: &PartialRoundComputation, +) -> [Vec; WIDTH] +where + A: Algebra + Copy + Send + Sync, + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + let mut output_layers: [_; WIDTH] = array::from_fn(|_| A::zero_vec(input_layers[0].len())); + transposed_par_iter_mut(&mut output_layers) + .enumerate() + .for_each(|(row_index, output_row)| { + let first_cubed = (input_layers[0][row_index] + partial_round.constant).cube(); + let mut buff = [A::ZERO; WIDTH]; + buff[0] = first_cubed; + for j in 1..WIDTH { + buff[j] = input_layers[j][row_index]; + } + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + for j in 0..WIDTH { + *output_row[j] = buff[j]; + } + }); + output_layers +} + +// #[instrument(skip_all)] +fn apply_batch_partial_rounds( + input_layers: &[Vec], + rounds: &BatchPartialRounds, +) -> ([Vec; WIDTH], [Vec; N_COMMITED_CUBES]) +where + A: Algebra + Copy + Send + Sync, + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + let mut output_layers: [_; WIDTH] = array::from_fn(|_| A::zero_vec(input_layers[0].len())); + let mut cubes: [_; N_COMMITED_CUBES] = array::from_fn(|_| A::zero_vec(input_layers[0].len())); + transposed_par_iter_mut(&mut output_layers) + .zip(transposed_par_iter_mut(&mut cubes)) + .enumerate() + .for_each(|(row_index, (output_row, cubes))| { + let mut buff: [A; WIDTH] = array::from_fn(|j| input_layers[j][row_index]); + for (i, &constant) in rounds.constants.iter().enumerate() { + *cubes[i] = (buff[0] + constant).cube(); + buff[0] = *cubes[i]; + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + } + buff[0] = (buff[0] + rounds.last_constant).cube(); + GenericPoseidon2LinearLayersKoalaBear::internal_linear_layer(&mut buff); + for j in 0..WIDTH { + *output_row[j] = buff[j]; + } + }); + (output_layers, cubes) +} + +pub fn default_cube_layers( + layers: &PoseidonGKRLayers, +) -> [A; N_COMMITED_CUBES] +where + A: Algebra + Copy + Send + Sync, + KoalaBearInternalLayerParameters: InternalLayerBaseParameters, +{ + generate_poseidon_witness::( + array::from_fn(|_| vec![A::ZERO]), + layers, + if layers + .final_full_rounds + .last() + .unwrap() + .compressed_output + .is_some() + { + Some((0, vec![A::ZERO])) + } else { + None + }, + ) + .committed_cubes + .iter() + .map(|v| v[0]) + .collect::>() + .try_into() + .unwrap() +} diff --git a/crates/rec_aggregation/src/lib.rs b/crates/rec_aggregation/src/lib.rs index 58fff34e..1f3d901e 100644 --- a/crates/rec_aggregation/src/lib.rs +++ b/crates/rec_aggregation/src/lib.rs @@ -2,6 +2,3 @@ pub mod recursion; pub mod xmss_aggregate; - -pub use recursion::bench_recursion; -pub use xmss_aggregate::run_xmss_benchmark; diff --git a/crates/rec_aggregation/src/recursion.rs b/crates/rec_aggregation/src/recursion.rs index 7a609cb4..9066e467 100644 --- a/crates/rec_aggregation/src/recursion.rs +++ b/crates/rec_aggregation/src/recursion.rs @@ -1,5 +1,5 @@ use std::path::Path; -use std::time::{Duration, Instant}; +use std::time::Instant; use lean_compiler::compile_program; use lean_prover::prove_execution::prove_execution; @@ -20,12 +20,7 @@ use whir_p3::{ const NUM_VARIABLES: usize = 25; -struct RecursionBenchStats { - proving_time: Duration, - proof_size: usize, -} - -fn run_recursion_benchmark() -> RecursionBenchStats { +pub fn run_whir_recursion_benchmark() { let src_file = Path::new(env!("CARGO_MANIFEST_DIR")).join("recursion_program.lean_lang"); let mut program_str = std::fs::read_to_string(src_file).unwrap(); let recursion_config_builder = WhirConfigBuilder { @@ -189,14 +184,14 @@ fn run_recursion_benchmark() -> RecursionBenchStats { &bytecode, (&public_input, &[]), 1 << 20, - (false, false), + false, (&vec![], &vec![]), ) .no_vec_runtime_memory; let time = Instant::now(); - let (proof_data, proof_size) = prove_execution( + let (proof_data, proof_size, summary) = prove_execution( &bytecode, (&public_input, &[]), whir_config_builder(), @@ -206,23 +201,16 @@ fn run_recursion_benchmark() -> RecursionBenchStats { ); let proving_time = time.elapsed(); verify_execution(&bytecode, &public_input, proof_data, whir_config_builder()).unwrap(); - RecursionBenchStats { - proving_time, - proof_size, - } -} -pub fn bench_recursion() -> Duration { - run_recursion_benchmark().proving_time + println!("{}", summary); + println!( + "WHIR recursion, proving time: {} ms, proof size: {} KiB (not optimized)", + proving_time.as_millis(), + proof_size * F::bits() / (8 * 1024) + ); } #[test] fn test_whir_recursion() { - use p3_field::Field; - let stats = run_recursion_benchmark(); - println!( - "\nWHIR recursion, proving time: {:?}, proof size: {} KiB (not optimized)", - stats.proving_time, - stats.proof_size * F::bits() / (8 * 1024) - ); + run_whir_recursion_benchmark(); } diff --git a/crates/rec_aggregation/src/xmss_aggregate.rs b/crates/rec_aggregation/src/xmss_aggregate.rs index 15752a0a..c017c04c 100644 --- a/crates/rec_aggregation/src/xmss_aggregate.rs +++ b/crates/rec_aggregation/src/xmss_aggregate.rs @@ -1,9 +1,10 @@ -use std::time::{Duration, Instant}; +use std::time::Instant; use lean_compiler::*; use lean_prover::whir_config_builder; use lean_prover::{prove_execution::prove_execution, verify_execution::verify_execution}; use lean_vm::*; +use p3_field::Field; use p3_field::PrimeCharacteristicRing; use rand::{Rng, SeedableRng, rngs::StdRng}; use rayon::prelude::*; @@ -14,14 +15,7 @@ use xmss::{ const LOG_LIFETIME: usize = 32; -#[derive(Default, Debug)] -pub struct XmssBenchStats { - pub proving_time: Duration, - pub proof_size: usize, - pub n_xmss: usize, -} - -pub fn run_xmss_benchmark(n_xmss: usize) -> XmssBenchStats { +pub fn run_xmss_benchmark(n_xmss: usize) { // Public input: message_hash | all_public_keys | bitield // Private input: signatures = (randomness | chain_tips | merkle_path) let mut program_str = r#" @@ -267,7 +261,7 @@ pub fn run_xmss_benchmark(n_xmss: usize) -> XmssBenchStats { &bytecode, (&public_input, &private_input), 1 << 21, - (false, false), + false, (&vec![], &vec![]), ) .no_vec_runtime_memory; @@ -278,7 +272,7 @@ pub fn run_xmss_benchmark(n_xmss: usize) -> XmssBenchStats { let (poseidons_16_precomputed, poseidons_24_precomputed) = precompute_poseidons(&all_public_keys, &all_signatures, &message_hash); - let (proof_data, proof_size) = prove_execution( + let (proof_data, proof_size, summary) = prove_execution( &bytecode, (&public_input, &private_input), whir_config_builder(), @@ -288,11 +282,14 @@ pub fn run_xmss_benchmark(n_xmss: usize) -> XmssBenchStats { ); let proving_time = time.elapsed(); verify_execution(&bytecode, &public_input, proof_data, whir_config_builder()).unwrap(); - XmssBenchStats { - proving_time, - proof_size, - n_xmss, - } + + println!("{}", summary); + println!( + "XMSS aggregation, proving time: {:.3} s ({:.1} XMSS/s), proof size: {} KiB (not optimized)", + proving_time.as_secs_f64(), + n_xmss as f64 / proving_time.as_secs_f64(), + proof_size * F::bits() / (8 * 1024) + ); } #[instrument(skip_all)] @@ -319,20 +316,9 @@ fn precompute_poseidons( #[test] fn test_xmss_aggregate() { - use p3_field::Field; - let n_public_keys: usize = std::env::var("NUM_XMSS_AGGREGATED") + let n_xmss: usize = std::env::var("NUM_XMSS_AGGREGATED") .unwrap_or("100".to_string()) .parse() .unwrap(); - let stats = run_xmss_benchmark(n_public_keys); - println!( - "\nXMSS aggregation (n_signatures = {}, lifetime = 2^{})", - stats.n_xmss, 32 - ); - println!( - "Proving time: {:?} ({:.1} XMSS/s), proof size: {} KiB (not optimized)", - stats.proving_time, - stats.n_xmss as f64 / stats.proving_time.as_secs_f64(), - stats.proof_size * F::bits() / (8 * 1024) - ); + run_xmss_benchmark(n_xmss); } diff --git a/crates/utils/src/poseidon2.rs b/crates/utils/src/poseidon2.rs index d2946005..4214c8e8 100644 --- a/crates/utils/src/poseidon2.rs +++ b/crates/utils/src/poseidon2.rs @@ -1,7 +1,5 @@ use std::sync::OnceLock; -use multilinear_toolkit::prelude::*; -use p3_koala_bear::GenericPoseidon2LinearLayersKoalaBear; use p3_koala_bear::KOALABEAR_RC16_EXTERNAL_FINAL; use p3_koala_bear::KOALABEAR_RC16_EXTERNAL_INITIAL; use p3_koala_bear::KOALABEAR_RC16_INTERNAL; @@ -10,15 +8,9 @@ use p3_koala_bear::KOALABEAR_RC24_EXTERNAL_INITIAL; use p3_koala_bear::KOALABEAR_RC24_INTERNAL; use p3_koala_bear::KoalaBear; use p3_koala_bear::Poseidon2KoalaBear; -use p3_matrix::dense::RowMajorMatrix; use p3_poseidon2::ExternalLayerConstants; -use p3_poseidon2_air::p16::Poseidon2Air16; use p3_poseidon2_air::p16::RoundConstants16; -use p3_poseidon2_air::p16::generate_trace_rows_16; -use p3_poseidon2_air::p24::Poseidon2Air24; use p3_poseidon2_air::p24::RoundConstants24; -use p3_poseidon2_air::p24::generate_trace_rows_24; -use p3_poseidon2_air::{p16, p24}; use p3_symmetric::Permutation; pub type Poseidon16 = Poseidon2KoalaBear<16>; @@ -32,51 +24,6 @@ pub const QUARTER_FULL_ROUNDS_24: usize = 2; pub const HALF_FULL_ROUNDS_24: usize = 4; pub const PARTIAL_ROUNDS_24: usize = 23; -pub const SBOX_DEGREE: u64 = 3; -pub const SBOX_REGISTERS: usize = 0; - -pub const POSEIDON16_AIR_N_COLS: usize = p16::num_cols::< - 16, - SBOX_DEGREE, - SBOX_REGISTERS, - QUARTER_FULL_ROUNDS_16, - HALF_FULL_ROUNDS_16, - PARTIAL_ROUNDS_16, ->(); - -pub const POSEIDON24_AIR_N_COLS: usize = p24::num_cols::< - 24, - SBOX_DEGREE, - SBOX_REGISTERS, - QUARTER_FULL_ROUNDS_24, - HALF_FULL_ROUNDS_24, - PARTIAL_ROUNDS_24, ->(); - -pub type MyLinearLayers = GenericPoseidon2LinearLayersKoalaBear; - -pub type Poseidon16Air = Poseidon2Air16< - F, - MyLinearLayers, - 16, - SBOX_DEGREE, - SBOX_REGISTERS, - QUARTER_FULL_ROUNDS_16, - HALF_FULL_ROUNDS_16, - PARTIAL_ROUNDS_16, ->; - -pub type Poseidon24Air = Poseidon2Air24< - F, - MyLinearLayers, - 24, - SBOX_DEGREE, - SBOX_REGISTERS, - QUARTER_FULL_ROUNDS_24, - HALF_FULL_ROUNDS_24, - PARTIAL_ROUNDS_24, ->; - pub type MyRoundConstants16 = RoundConstants16; pub type MyRoundConstants24 = RoundConstants24; @@ -102,6 +49,21 @@ pub fn poseidon16_permute(input: [KoalaBear; 16]) -> [KoalaBear; 16] { get_poseidon16().permute(input) } +#[inline(always)] +pub fn poseidon16_permute_mut(input: &mut [KoalaBear; 16]) { + get_poseidon16().permute_mut(input); +} + +#[inline(always)] +pub fn poseidon24_permute(input: [KoalaBear; 24]) -> [KoalaBear; 24] { + get_poseidon24().permute(input) +} + +#[inline(always)] +pub fn poseidon24_permute_mut(input: &mut [KoalaBear; 24]) { + get_poseidon24().permute_mut(input); +} + static POSEIDON24_INSTANCE: OnceLock = OnceLock::new(); #[inline(always)] @@ -119,27 +81,6 @@ pub(crate) fn get_poseidon24() -> &'static Poseidon24 { }) } -#[inline(always)] -pub fn poseidon24_permute(input: [KoalaBear; 24]) -> [KoalaBear; 24] { - get_poseidon24().permute(input) -} - -pub fn build_poseidon_16_air() -> Poseidon16Air { - Poseidon16Air::new(build_poseidon16_constants()) -} - -pub fn build_poseidon_24_air() -> Poseidon24Air { - Poseidon24Air::new(build_poseidon24_constants()) -} - -pub fn build_poseidon_16_air_packed() -> Poseidon16Air> { - Poseidon16Air::new(build_poseidon16_constants_packed()) -} - -pub fn build_poseidon_24_air_packed() -> Poseidon24Air> { - Poseidon24Air::new(build_poseidon24_constants_packed()) -} - pub fn build_poseidon16_constants() -> MyRoundConstants16 { RoundConstants16 { beginning_full_round_constants: KOALABEAR_RC16_EXTERNAL_INITIAL, @@ -155,67 +96,3 @@ pub fn build_poseidon24_constants() -> MyRoundConstants24 { ending_full_round_constants: KOALABEAR_RC24_EXTERNAL_FINAL, } } - -fn build_poseidon16_constants_packed() -> MyRoundConstants16> { - let normal_constants = build_poseidon16_constants(); - RoundConstants16 { - beginning_full_round_constants: normal_constants - .beginning_full_round_constants - .map(|arr| arr.map(Into::into)), - partial_round_constants: normal_constants.partial_round_constants.map(Into::into), - ending_full_round_constants: normal_constants - .ending_full_round_constants - .map(|arr| arr.map(Into::into)), - } -} - -fn build_poseidon24_constants_packed() -> MyRoundConstants24> { - let normal_constants = build_poseidon24_constants(); - MyRoundConstants24 { - beginning_full_round_constants: normal_constants - .beginning_full_round_constants - .map(|arr| arr.map(Into::into)), - partial_round_constants: normal_constants.partial_round_constants.map(Into::into), - ending_full_round_constants: normal_constants - .ending_full_round_constants - .map(|arr| arr.map(Into::into)), - } -} - -pub fn generate_trace_poseidon_16( - inputs: &[[KoalaBear; 16]], - compress: &[bool], - index_res: &[usize], -) -> RowMajorMatrix { - generate_trace_rows_16::< - KoalaBear, - MyLinearLayers, - 16, - SBOX_DEGREE, - SBOX_REGISTERS, - QUARTER_FULL_ROUNDS_16, - HALF_FULL_ROUNDS_16, - PARTIAL_ROUNDS_16, - >(inputs, compress, index_res, &build_poseidon16_constants()) -} - -pub fn generate_trace_poseidon_24(inputs: &[[KoalaBear; 24]]) -> RowMajorMatrix { - generate_trace_rows_24::< - KoalaBear, - MyLinearLayers, - 24, - SBOX_DEGREE, - SBOX_REGISTERS, - QUARTER_FULL_ROUNDS_24, - HALF_FULL_ROUNDS_24, - PARTIAL_ROUNDS_24, - >(inputs, &build_poseidon24_constants()) -} - -pub fn default_poseidon16_air_row(compress: bool, index_res: usize) -> Vec { - generate_trace_poseidon_16(&[Default::default()], &[compress], &[index_res]).values -} - -pub fn default_poseidon24_air_row() -> Vec { - generate_trace_poseidon_24(&[Default::default()]).values -} diff --git a/docs/Proving_Poseidons_with_GKR.pdf b/docs/Proving_Poseidons_with_GKR.pdf new file mode 100644 index 00000000..442e021b Binary files /dev/null and b/docs/Proving_Poseidons_with_GKR.pdf differ diff --git a/docs/Sumcheck_with_low_memory_throughput.pdf b/docs/Sumcheck_with_low_memory_throughput.pdf deleted file mode 100644 index 0ca015a9..00000000 Binary files a/docs/Sumcheck_with_low_memory_throughput.pdf and /dev/null differ diff --git a/Whirlaway.pdf b/docs/Whirlaway.pdf similarity index 100% rename from Whirlaway.pdf rename to docs/Whirlaway.pdf diff --git a/crates/xmss/XMSS_trivial_encoding.pdf b/docs/XMSS_trivial_encoding.pdf similarity index 100% rename from crates/xmss/XMSS_trivial_encoding.pdf rename to docs/XMSS_trivial_encoding.pdf diff --git a/docs/benchmark_graphs/graphs/raw_poseidons.svg b/docs/benchmark_graphs/graphs/raw_poseidons.svg index cd7714ca..20e44048 100644 --- a/docs/benchmark_graphs/graphs/raw_poseidons.svg +++ b/docs/benchmark_graphs/graphs/raw_poseidons.svg @@ -1,12 +1,12 @@ - + - 2025-10-18T16:27:08.901962 + 2025-10-27T17:01:34.195704 image/svg+xml @@ -21,42 +21,42 @@ - - - + - - + - + - + - + - + - + - + - + @@ -266,18 +266,18 @@ L 211.725847 34.3575 - + - + - + - + - + - + - + - + - + @@ -382,18 +382,18 @@ L 427.331492 34.3575 - + - + - + - + - + - + - + - + - + @@ -479,198 +479,249 @@ L 642.937137 34.3575 - - - + - + - - - - + - - - + + + + + + + - + - + - + - - - - - - - - - - + + + + + + + - + + + - + + + + - + - - - - - - - - + + + + + + + + - + - + - + - - - - - - - - + + + + + - + - + - + - - - - - - - - + + + + + - + - + - + - - - - - - - - + + + + + - + - + - + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + - - + + - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - + + - - - - - + - + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + - + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - + + diff --git a/docs/benchmark_graphs/graphs/recursive_whir_opening.svg b/docs/benchmark_graphs/graphs/recursive_whir_opening.svg index 6f39171c..17ad03cc 100644 --- a/docs/benchmark_graphs/graphs/recursive_whir_opening.svg +++ b/docs/benchmark_graphs/graphs/recursive_whir_opening.svg @@ -1,12 +1,12 @@ - + - 2025-10-18T16:27:08.978098 + 2025-10-27T17:01:34.332924 image/svg+xml @@ -21,42 +21,42 @@ - - - + - - + - + - + - + - + - + - + - + @@ -266,18 +266,18 @@ L 194.433004 34.3575 - + - + - + - + - + - + - + - + - + @@ -382,18 +382,18 @@ L 417.507198 34.3575 - + - + - + - + - + - + - + - + - + @@ -479,27 +479,71 @@ L 640.581391 34.3575 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + - - + - - - + + + - + + - - + + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + + + - + - + - - - - + + + + - + - - - + + + - + - + - - - - + + + + - + - - - + + + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - + + + - + - + - - - - + + + + - + - + - + - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - + + + + + @@ -1162,31 +1412,116 @@ z + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + - @@ -1244,19 +1546,17 @@ z - - - - - + + + - - + + diff --git a/docs/benchmark_graphs/graphs/xmss_aggregated_time.svg b/docs/benchmark_graphs/graphs/xmss_aggregated.svg similarity index 56% rename from docs/benchmark_graphs/graphs/xmss_aggregated_time.svg rename to docs/benchmark_graphs/graphs/xmss_aggregated.svg index 019440aa..eb247f2f 100644 --- a/docs/benchmark_graphs/graphs/xmss_aggregated_time.svg +++ b/docs/benchmark_graphs/graphs/xmss_aggregated.svg @@ -1,12 +1,12 @@ - + - 2025-10-18T16:27:09.055074 + 2025-10-27T17:01:34.445473 image/svg+xml @@ -21,42 +21,42 @@ - - - + - - + - + - + - + - + - + - + - + @@ -266,18 +266,18 @@ L 191.966169 34.3575 - + - + - + - + - + - + - + - + - + @@ -382,18 +382,18 @@ L 416.107298 34.3575 - + - + - + - + - + - + - + - + - + @@ -479,247 +479,270 @@ L 640.248427 34.3575 - - - + - + - - - - + - - - + + + + + + + - + - + - + - - - + + + + + + + - + + + - + + + + - + - - - + + + - + - + - + - - - + + + + + - + - + - + - - - + + + + + - + - + - + - - - + + + + - + - + - + - - - - + + + + + - + - + - + - - + + - + + + - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -1271,20 +1355,26 @@ z - - - - - - + + + + + + + + + + + + - - + + diff --git a/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg b/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg index ce5a048b..35758e13 100644 --- a/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg +++ b/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg @@ -1,12 +1,12 @@ - + - 2025-10-18T16:27:09.134509 + 2025-10-27T17:01:34.555931 image/svg+xml @@ -21,42 +21,42 @@ - - - + - - + - + - + - + - + - + - + - + @@ -266,18 +266,18 @@ L 187.588851 34.3575 - + - + - + - + - + - + - + - + - + @@ -382,18 +382,18 @@ L 414.27369 34.3575 - + - + - + - + - + - + - + - + - + @@ -479,27 +479,71 @@ L 640.958528 34.3575 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + - - + - + - + - - + + - + - + - + - + - - + + - + - + - + - + @@ -583,19 +627,19 @@ L 705.725625 286.521822 - - + + - + - + - + - + @@ -603,19 +647,19 @@ L 705.725625 238.434672 - - + + - + - + - + - + @@ -624,19 +668,19 @@ L 705.725625 190.347522 - - + + - + - + - + - + @@ -645,19 +689,19 @@ L 705.725625 142.260372 - - + + - + - + - + - + @@ -666,19 +710,19 @@ L 705.725625 94.173223 - - + + - + - + - + - + @@ -687,23 +731,24 @@ L 705.725625 46.086073 - - + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - - + + + + + + + + + + + + - - - - - - - + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - - + + diff --git a/docs/benchmark_graphs/main.py b/docs/benchmark_graphs/main.py index ac4d23ac..b5e51645 100644 --- a/docs/benchmark_graphs/main.py +++ b/docs/benchmark_graphs/main.py @@ -1,24 +1,64 @@ import matplotlib.pyplot as plt import matplotlib.dates as mdates +from matplotlib.ticker import ScalarFormatter, LogLocator from datetime import datetime, timedelta # uv run python main.py -N_DAYS_SHOWN = 60 +N_DAYS_SHOWN = 70 + +plt.rcParams.update({ + 'font.size': 12, # Base font size + 'axes.titlesize': 14, # Title font size + 'axes.labelsize': 12, # X and Y label font size + 'xtick.labelsize': 12, # X tick label font size + 'ytick.labelsize': 12, # Y tick label font size + 'legend.fontsize': 12, # Legend font size +}) -def create_duration_graph(data, target, target_label, title, y_legend, file): - dates = [] - values = [] - for day, duration in data: - dates.append(datetime.strptime(day, '%Y-%m-%d')) - values.append(duration) +def create_duration_graph(data, target=None, target_label=None, title="", y_legend="", file="", label1="Series 1", label2=None, log_scale=False): + dates = [] + values1 = [] + values2 = [] + + # Check if data contains triplets or pairs + has_second_curve = len(data[0]) == 3 if data else False + + for item in data: + if has_second_curve: + day, perf1, perf2 = item + dates.append(datetime.strptime(day, '%Y-%m-%d')) + values1.append(perf1) + values2.append(perf2) + else: + day, perf1 = item + dates.append(datetime.strptime(day, '%Y-%m-%d')) + values1.append(perf1) color = '#2E86AB' + color2 = '#A23B72' # Different color for second curve _, ax = plt.subplots(figsize=(10, 6)) - ax.plot(dates, values, marker='o', linewidth=2, - markersize=7, color=color) + + # Filter out None values for first curve + dates1_filtered = [d for d, v in zip(dates, values1) if v is not None] + values1_filtered = [v for v in values1 if v is not None] + + ax.plot(dates1_filtered, values1_filtered, marker='o', linewidth=2, + markersize=7, color=color, label=label1) + + # Plot second curve if it exists + if has_second_curve and label2 is not None: + # Filter out None values for second curve + dates2_filtered = [d for d, v in zip(dates, values2) if v is not None] + values2_filtered = [v for v in values2 if v is not None] + + ax.plot(dates2_filtered, values2_filtered, marker='s', linewidth=2, + markersize=7, color=color2, label=label2) + all_values = values1_filtered + values2_filtered + else: + all_values = values1_filtered min_date = min(dates) max_date = max(dates) @@ -32,15 +72,38 @@ def create_duration_graph(data, target, target_label, title, y_legend, file): plt.setp(ax.xaxis.get_majorticklabels(), rotation=50, ha='right') - ax.axhline(y=target, color=color, linestyle='--', - linewidth=2, label=target_label) + if target is not None and target_label is not None: + ax.axhline(y=target, color=color, linestyle='--', + linewidth=2, label=target_label) ax.set_ylabel(y_legend, fontsize=12) ax.set_title(title, fontsize=16, pad=15) - ax.grid(True, alpha=0.3) + ax.grid(True, alpha=0.3, which='both') # Grid for both major and minor ticks ax.legend() - ax.set_ylim(0, max(max(values), target) * 1.1) + # Set log scale if requested + if log_scale: + ax.set_yscale('log') + + # Set locators for major and minor ticks + ax.yaxis.set_major_locator(LogLocator(base=10.0, numticks=15)) + ax.yaxis.set_minor_locator(LogLocator(base=10.0, subs=range(1, 10), numticks=100)) + + # Format labels + ax.yaxis.set_major_formatter(ScalarFormatter()) + ax.yaxis.set_minor_formatter(ScalarFormatter()) + ax.yaxis.get_major_formatter().set_scientific(False) + ax.yaxis.get_minor_formatter().set_scientific(False) + + # Show minor tick labels + ax.tick_params(axis='y', which='minor', labelsize=10) + else: + # Adjust y-limit to accommodate both curves (only for linear scale) + if all_values: + max_value = max(all_values) + if target is not None: + max_value = max(max_value, target) + ax.set_ylim(0, max_value * 1.1) plt.tight_layout() plt.savefig(f'graphs/{file}.svg', format='svg', bbox_inches='tight') @@ -48,60 +111,102 @@ def create_duration_graph(data, target, target_label, title, y_legend, file): if __name__ == "__main__": - create_duration_graph(data=[ - ('2025-08-27', 85000), - ('2025-08-30', 95000), - ('2025-09-09', 108000), - ('2025-09-14', 108000), - ('2025-09-28', 125000), - ('2025-10-01', 185000), - ('2025-10-12', 195000), - ('2025-10-13', 205000), - ('2025-10-18', 210000), - ], target=300_000, target_label="Target (300.000 Poseidon2 / s)", title="Raw Poseidon2", y_legend="Poseidons proven / s", file="raw_poseidons") - - create_duration_graph(data=[ - ('2025-08-27', 2.7), - ('2025-09-07', 1.4), - ('2025-09-09', 1.32), - ('2025-09-10', 0.970), - ('2025-09-14', 0.825), - ('2025-09-28', 0.725), - ('2025-10-01', 0.685), - ('2025-10-03', 0.647), - ('2025-10-12', 0.569), - ('2025-10-13', 0.521), - ('2025-10-18', 0.411), - ], target=0.125, target_label="Target (0.125 s)", title="Recursive WHIR opening", y_legend="Proving time (s)", file="recursive_whir_opening") - - create_duration_graph(data=[ - ('2025-08-27', 14.2), - ('2025-09-02', 13.5), - ('2025-09-03', 9.4), - ('2025-09-09', 8.02), - ('2025-09-10', 6.53), - ('2025-09-14', 4.65), - ('2025-09-28', 3.63), - ('2025-10-01', 2.9), - ('2025-10-03', 2.81), - ('2025-10-07', 2.59), - ('2025-10-12', 2.33), - ('2025-10-13', 2.13), - ('2025-10-18', 1.96), - ], target=0.5, target_label="Target (0.5 s)", title="500 XMSS aggregated: proving time", y_legend="Proving time (s)", file="xmss_aggregated_time") - - create_duration_graph(data=[ - ('2025-08-27', 14.2 / 0.92), - ('2025-09-02', 13.5 / 0.82), - ('2025-09-03', 9.4 / 0.82), - ('2025-09-09', 8.02 / 0.72), - ('2025-09-10', 6.53 / 0.72), - ('2025-09-14', 4.65 / 0.72), - ('2025-09-28', 3.63 / 0.63), - ('2025-10-01', 2.9 / 0.42), - ('2025-10-03', 2.81 / 0.42), - ('2025-10-07', 2.59 / 0.42), - ('2025-10-12', 2.33 / 0.40), - ('2025-10-13', 2.13 / 0.38), - ('2025-10-18', 1.96 / 0.37), - ], target=2.0, target_label="Target (2x)", title="500 XMSS aggregated: zkVM overhead vs raw Poseidons", y_legend="", file="xmss_aggregated_overhead") + create_duration_graph( + data=[ + ('2025-08-27', 85000, None), + ('2025-08-30', 95000, None), + ('2025-09-09', 108000, None), + ('2025-09-14', 108000, None), + ('2025-09-28', 125000, None), + ('2025-10-01', 185000, None), + ('2025-10-12', 195000, None), + ('2025-10-13', 205000, None), + ('2025-10-18', 210000, 620_000), + ('2025-10-27', 610_000, 1_250_000), + ], + target=1_500_000, + target_label="Target (1.5M Poseidon2 / s)", + title="Raw Poseidon2", + y_legend="Poseidons proven / s", + file="raw_poseidons", + label1="i9-12900H", + label2="mac m4 max", + log_scale=False + ) + + create_duration_graph( + data=[ + ('2025-08-27', 2.7, None), + ('2025-09-07', 1.4, None), + ('2025-09-09', 1.32, None), + ('2025-09-10', 0.970, None), + ('2025-09-14', 0.825, None), + ('2025-09-28', 0.725, None), + ('2025-10-01', 0.685, None), + ('2025-10-03', 0.647, None), + ('2025-10-12', 0.569, None), + ('2025-10-13', 0.521, None), + ('2025-10-18', 0.411, 0.320), + ('2025-10-27', 0.425, 0.330), + ], + target=0.1, + target_label="Target (0.1 s)", + title="Recursive WHIR opening (log scale)", + y_legend="Proving time (s)", + file="recursive_whir_opening", + label1="i9-12900H", + label2="mac m4 max", + log_scale=True + ) + + create_duration_graph( + data=[ + ('2025-08-27', 35, None), + ('2025-09-02', 37, None), + ('2025-09-03', 53, None), + ('2025-09-09', 62, None), + ('2025-09-10', 76, None), + ('2025-09-14', 107, None), + ('2025-09-28', 137, None), + ('2025-10-01', 172, None), + ('2025-10-03', 177, None), + ('2025-10-07', 193, None), + ('2025-10-12', 214, None), + ('2025-10-13', 234, None), + ('2025-10-18', 255, 465), + ('2025-10-27', 314, 555), + ], + target=1000, + target_label="Target (1000 XMSS/s)", + title="number of XMSS aggregated / s", + y_legend="", + file="xmss_aggregated", + label1="i9-12900H", + label2="mac m4 max" + ) + + create_duration_graph( + data=[ + ('2025-08-27', 14.2 / 0.92, None), + ('2025-09-02', 13.5 / 0.82, None), + ('2025-09-03', 9.4 / 0.82, None), + ('2025-09-09', 8.02 / 0.72, None), + ('2025-09-10', 6.53 / 0.72, None), + ('2025-09-14', 4.65 / 0.72, None), + ('2025-09-28', 3.63 / 0.63, None), + ('2025-10-01', 2.9 / 0.42, None), + ('2025-10-03', 2.81 / 0.42, None), + ('2025-10-07', 2.59 / 0.42, None), + ('2025-10-12', 2.33 / 0.40, None), + ('2025-10-13', 2.13 / 0.38, None), + ('2025-10-18', 1.96 / 0.37, 1.07 / 0.12), + ('2025-10-27', (610_000 / 157) / 314, (1_250_000 / 157) / 555), + ], + target=2, + target_label="Target (2x)", + title="XMSS aggregated: zkVM overhead vs raw Poseidons", + y_legend="", + file="xmss_aggregated_overhead", + label1="i9-12900H", + label2="mac m4 max" + ) \ No newline at end of file diff --git a/crates/lookup/cost_of_logup_star.pdf b/docs/cost_of_logup_star.pdf similarity index 100% rename from crates/lookup/cost_of_logup_star.pdf rename to docs/cost_of_logup_star.pdf diff --git a/docs/whirlaway_pdf/bibliography.bib b/docs/whirlaway_pdf/bibliography.bib deleted file mode 100644 index 555b434b..00000000 --- a/docs/whirlaway_pdf/bibliography.bib +++ /dev/null @@ -1,41 +0,0 @@ -@article{whir, - author = {Gal Arnon and Alessandro Chiesa and Giacomo Fenzi and Eylon Yogev}, - title = {{WHIR}: Reed–Solomon Proximity Testing with Super-Fast Verification}, - howpublished = {Cryptology {ePrint} Archive, Paper 2024/1586}, - year = {2024}, - url = {https://eprint.iacr.org/2024/1586} -} -@article{fri_binius, - author = {Benjamin E. Diamond and Jim Posen}, - title = {Polylogarithmic Proofs for EvaluationsLists over Binary Towers}, - howpublished = {Cryptology {ePrint} Archive, Paper 2024/504}, - year = {2024}, - url = {https://eprint.iacr.org/2024/504} -} -@article{ccs, - author = {Srinath Setty and Justin Thaler and Riad Wahby}, - title = {Customizable constraint systems for succinct arguments}, - howpublished = {Cryptology {ePrint} Archive, Paper 2023/552}, - year = {2023}, - url = {https://eprint.iacr.org/2023/552} -} -@article{simple_multivariate_AIR, - author = {William Borgeaud}, - title = {A simple multivariate AIR argument inspired by SuperSpartan}, - year = {2023}, - url = {https://solvable.group/posts/super-air/} -} -@article{hyperplonk, - author = {Binyi Chen and Benedikt Bünz and Dan Boneh and Zhenfei Zhang}, - title = {{HyperPlonk}: Plonk with Linear-Time Prover and High-Degree Custom Gates}, - howpublished = {Cryptology {ePrint} Archive, Paper 2022/1355}, - year = {2022}, - url = {https://eprint.iacr.org/2022/1355} -} -@article{univariate_skip, - author = {Angus Gruen}, - title = {Some Improvements for the {PIOP} for {ZeroCheck}}, - howpublished = {Cryptology {ePrint} Archive, Paper 2024/108}, - year = {2024}, - url = {https://eprint.iacr.org/2024/108} -} \ No newline at end of file diff --git a/docs/whirlaway_pdf/main.tex b/docs/whirlaway_pdf/main.tex deleted file mode 100644 index ce1f30b5..00000000 --- a/docs/whirlaway_pdf/main.tex +++ /dev/null @@ -1,729 +0,0 @@ -\documentclass{article} - -\usepackage[english]{babel} - -\usepackage[letterpaper,top=2cm,bottom=2cm,left=3cm,right=3cm,marginparwidth=1.75cm]{geometry} - -% Useful packages -\usepackage{amsmath} -\usepackage{amssymb} -\usepackage{graphicx} -\usepackage{mathtools} -\usepackage{tikz} -\usepackage[colorlinks=true, allcolors=blue]{hyperref} -\usepackage{xcolor} -\usepackage{colortbl} -\usepackage{booktabs} -\usepackage{tikz} -\usepackage{tcolorbox} -\usetikzlibrary{positioning, arrows.meta} - -\newcommand{\Fp}{\mathbb F_p} -\newcommand{\Fq}{\mathbb F_q} -\newcommand{\Pol}{T} - -\title{Whirlaway: multilinear PIOP for AIR} -\author{Thomas Coratger, Tom Wambsgans} -\date{} -\begin{document} -\maketitle - -\section{Introduction} - -AIR (Algebraic Intermediate Representation) is a common arithmetization standard is the context of (ZK)-SNARK (Succint Non-Interactive Argument of Knowledge, potentially Zero-Knowledge). Traditionally, AIR constraints were proven using a univariate PIOP (Polynomial Interactive Oracle Proof), as explained in the EthStark paper \cite{eth_stark}. -Whirlaway is a simple multilinear PIOP, proving AIR constraints, focusing on lightweight proofs. - -The multilinear framework has two main advantages compared to classical approaches. First, it enables encoding and committing the entire AIR execution trace as a single multilinear polynomial, instead of encoding each table column as a separate low-degree univariate polynomial. In many modern proof systems, the correctness of a computation is expressed through multiple tables, each capturing specific CPU instructions (in the context of zkVMs) or constraints. To enable efficient verification, the prover encodes the columns of these tables as low-degree polynomials and commits to them using a polynomial commitment scheme. However, this design introduces a caveat: the prover must commit to and provide evaluation proofs for each polynomial separately, and the verifier must process and check each of these commitments individually. As the number of columns grows, the verifier’s workload and the overall proof size scale linearly, which can significantly impact efficiency. - -Second, the multilinear approach allows proving the validity of AIR constraints using the sumcheck protocol, which removes the need for a quotient polynomial. In the classical univariate approach, AIR constraints are bundled into a single vanishing condition, and the prover must construct and commit to a quotient polynomial that encodes the division of the constraint polynomial by the vanishing polynomial of the domain. The opening proof for the committed quotient polynomial again increases the proof size. From the prover perspective, using a quotient polynomial coupled with a FRI based PCS (superlinear time) may also be slower than the sumcheck (linear time) algorithm - - -Paired with the recent WHIR Polynomial Commitment Scheme \cite{whir}, we obtain a hash-based SNARK, plausibly post quantum, with small proof size (on the order of 128 KiB), and promising proving speed (an experimental implementation reached one million Poseidon2 permutations proven per second, on the KoalaBear field, with 128 bits of security, on a RTX 4090). - -An initial implementation can be found \href{https://github.com/TomWambsgans/Whirlaway}{here}. - -\section{Definitions and notations} - -\subsection{Multilinear framework} - -\subsubsection{Multilinear polynomial} - -A multilinear polynomial over variables $X_1, \dots, X_k$ is a multivariate polynomial where each variable appears with degree at most one. Formally, it can be written as: -$$ - P(X_1, \dots, X_k) = \sum_{S \subseteq [k]} c_S \prod_{i \in S} X_i, -$$ -where each $c_S \in \Fp$ is a coefficient, and the sum ranges over all subsets $S$ of ${1, \dots, k}$. For example, over two variables $X, Y$, the polynomial $3 + 2X + 5Y + 7XY$ is multilinear, but $X^2$ or $Y^3$ are not. - -A multilinear polynomial is uniquely identified by its evaluation on the boolean hypercube: - -$$ - P(X_1, \dots, X_k) = \sum_{b \in \{0, 1\}^k} eq((X_1, \dots, X_k), b) \cdot P(b) -$$ - - -\subsubsection{Multilinear extension} - - - -Given a function $f: \{0,1\}^n \to \Fp$, its multilinear extension $\widehat{f}: \Fp^n \to \Fp$ is the unique multilinear polynomial over $n$ variables that agrees with $f$ on all Boolean inputs. Explicitly, it can be written as: - -$$ -\widehat{f}(x) = \sum_{b \in \{0,1\}^n} f(b) \cdot eq(b, x) -$$ - -This extension allows evaluating the function not just at Boolean points, but at any point $x \in \Fp^n$. - - -\subsection{Notations} - -\begin{itemize} - \item $log$ is always in base 2 - \item $[i]_2$: big-endian bit decomposition of an integer $i$ - \item $eq(x, y) := \prod_{i = 1}^{n} (x_i y_i + (1 - x_i) (1 - y_i))$, for $x$ and $y$ in $\mathbb F^n$. This "equality multilinear polynomial" verifies: $eq(x, y) = 1$ if $x = y$, $0$ otherwise, for $x$ and $y$ both sampled on the hypercube $\{0, 1\}^n$. - \item $\Fp$: base field, typically KoalaBear ($p = 2^{31} - 2^{24} + 1$), or BabyBear ($p = 2^{31} - 2^{27} + 1$) - \item $\Fq$: extension field ($q = p^\kappa$) - \item $M$ $(\text{resp. } M')$: number of columns (resp. non-preprocessed columns) in the AIR table - \item $m$ $(\text{resp. } m')$: smallest integer such that $2^m \geq M$ (resp. $2^{m'} \geq M'$) - \item $N = 2^n$: number of rows in the AIR table - \item $h_1, \dots, h_u$: transition constraints - \item $H$: batched constraint ($H := \sum_{i=0}^{u-1} h_i \alpha^i $) - \item $\Pol$: multilinear polynomial in $\Fp$ encoding all the (non-preprocessed) columns, with $n + m'$ variables -\end{itemize} - -\section{AIR Arithmetization} - -\subsection{Description} - -In the AIR arithmetization, the witness consists of a list of $M$ columns $c_0, \dots, c_{M-1}$ (forming the AIR table). Each column contains $N = 2^n$ elements in $\Fp$ (without loss of generality, we use a power-of-two domain). The goal of the prover is to convince the verifier that the table respects a set of $u$ transition constraints $h_0, \dots, h_{u-1}$. Each constraint $h$ is a polynomial in $2 M$ variables, which is respected if for all rows $r \in \{0, \dots, N-2\}$: - -$$h(c_0[r], \dots, c_{M-1}[r], c_0[r+1], \dots, c_{M-1}[r+1]) = 0$$ - -\subsection{Preprocessed columns} - -Traditional AIR systems allow the verifier to fix certain cells in the table (see "boundary conditions" \href{https://aszepieniec.github.io/stark-anatomy/stark}{here}). For technical reasons, we use a slightly different approach: we allow the verifier to fix certain columns, potentially sparse (called "preprocessed columns"). The work of the verifier associated to each preprocessed column is proportional to its number of nonzero rows. We denote by $c_0, \dots, c_{M'-1}$ the non-preprocessed columns and $c_{M'}, \dots, c_{M-1}$ the preprocessed ones. - -\subsection{Example: Fibonacci sequence} - - -In general, each constraint $h_i$ is a polynomial relation over the current and next rows of the table. We use the notation $X_j^{\text{up}}$ to refer to the value of column $c_j$ at row $r$ (the “upper” row), and $X_j^{\text{down}}$ to refer to its value at row $r + 1$ (the “lower” row). These variables appear as inputs to the $h_i$ constraints, which together enforce the correct behavior across the table. Each constraint has the form: -$$ -h_i(\underbrace{X_0^{\text{up}}, \dots, X_{M-1}^{\text{up}}}_{\text{Upper row columns}}, \underbrace{X_0^{\text{down}}, \dots, X_{M-1}^{\text{down}}}_{\text{Lower row columns}}) = \text{some constraint} -$$ -where: -\begin{itemize} - \item $X_j^{\text{up}} = c_j[r]$ is the value of column $c_j$ at row $r$ - \item $X_j^{\text{down}} = c_j[r + 1]$ is the value of column $c_j$ at row $r + 1$ -\end{itemize} - -Let's say the prover wants to convince the verifier that the $N$-th values of the Fibonacci sequence equals $F_N$. We use $M = 4$ columns, as illustrated in Figure~\ref{fig:fibonacci_air}: - -\begin{itemize} - \item The first $M' = 2$ columns $c_0$ and $c_1$ contain the values of the Fibonacci sequence, which is guaranteed by the constraints: - -\begin{itemize} - \item $h_0$ constraint ensures that the next $c_1$ value equals the sum of the current $c_0$ and $c_1$ values, i.e., $F_{r+2} = F_r + F_{r+1}$: - - $$h_0(X_0^{\text{up}}, X_1^{\text{up}}, -, -, -, X_1^{\text{down}}, -, -) = X_1^{\text{down}} - (X_0^{\text{up}} + X_1^{\text{up}})$$ - \item $h_1$ constraint shifts the sequence forward, ensuring that the next $c_0$ value equals the current $c_1$ value: - - $$h_1(-, X_1^{\text{up}}, -, -, X_0^{\text{down}}, -, -, -) = X_0^{\text{down}} - X_1^{\text{up}}$$ -\end{itemize} - - \item The last two columns $c_2$ and $c_3$ are "preprocessed": their content is enforced by the verifier. In our case we set $c_2 = [1, 0, \dots, 0]$ to act as a selector for the initial row and $c_3 = [0, \dots, 0, 1]$ as a selector for the final row. We finally use the following constraints, to ensure that the 2 initial values of the sequence are correct ($0$ and $1$), and that the final value equals $F_N$: - - \begin{itemize} - \item $h_2$ ensures that $c_0=0$ when $c_2 \neq 0$ (which only occurs at the first row): - $$h_2(X_0^{\text{up}}, -, X_2^{\text{up}}, -, -, -, -, -) = X_2^{\text{up}} \cdot X_0^{\text{up}}$$ - - \item $h_3$ ensures $c_1 = 1$ when $c_2 \neq 0$ (only at the first row). - - $$h_3(-, X_1^{\text{up}}, X_2^{\text{up}}, -, -, -, -, -) = X_2^{\text{up}} \cdot (X_1^{\text{up}} - 1) $$ - -% (When the selector $c_2 \neq 0$ (which turns out to be the case at the initial row), we necessarily have $c_0 = 0$ and $c_1 = 1$) - - \item $h_4$ ensures $c_0 = F_N$ when $c_3 \neq 0$ (only at the last row). - - $$h_4(X_0^{\text{up}}, -, -, X_3^{\text{up}}, -, -, -, -) = X_3^{\text{up}} \cdot (X_0^{\text{up}} - F_n)$$ - -% (When the selector $c_3 \neq 0$ (which turns out to be the case at the final row), we necessarily have $c_0 = F_n$) - -\end{itemize} - -Note that $c_2$ and $c_3$ are sparse, both contain only one non-zero index. As a consequence, they have a negligible impact on the verification time. - - \end{itemize} - - -\begin{figure}[h!] -\centering -\begin{tabular}{ccccc} -\toprule -Row & $c_0$ & $c_1$ & $c_2$ (preproc.) & $c_3$ (preproc.) \\ -\midrule -0 & \cellcolor{blue!10}0 & \cellcolor{blue!10}1 & \cellcolor{orange!10}1 & \cellcolor{orange!10}0 \\ -1 & \cellcolor{blue!10}1 & \cellcolor{blue!10}1 & \cellcolor{orange!10}0 & \cellcolor{orange!10}0 \\ -2 & \cellcolor{blue!10}1 & \cellcolor{blue!10}2 & \cellcolor{orange!10}0 & \cellcolor{orange!10}0 \\ -$\vdots$ & $\vdots$ & $\vdots$ & $\vdots$ & $\vdots$ \\ -N-1 & \cellcolor{blue!10}$F_N$ & \cellcolor{blue!10}$F_{N+1}$ & \cellcolor{orange!10}0 & \cellcolor{orange!10}1 \\ -\bottomrule -\end{tabular} - -\vspace{1em} -\caption{Fibonacci sequence AIR table layout with preprocessed selectors} -\label{fig:fibonacci_air} -\end{figure} - -\section{Proving system} - -\subsection{{Commitment}} - -Contrary to most of the STARK systems, which use a univariate Polynomial Commitment Scheme (PCS), like FRI or KZG, we use instead a multilinear one. - - - -The entire AIR table is encoded and committed as a single multilinear polynomial $\Pol$ (except for the preprocessed columns, which are not committed). $\Pol$ has $n + m'$ variables, where $n = \log N = \log \text{(number of rows)}$ and $m' = \left\lceil \log M' \right\rceil = \left\lceil \log \text{(number of non-preprocessed columns)} \right\rceil$. $\Pol$ is defined by its evaluations over the boolean hypercube: for every (non-preprocessed) column index $i$ ($0 \leq i < M'$) and for every row index $r$ ($0 \leq r < N$): - -$$\Pol([i]_2 [r]_2) := c_{i}[r],$$ - -where: -\begin{itemize} - \item $[i]_2$ is the big-endian bit decomposition of $i$ into $m'$ bits (where $m' = \lceil \log M' \rceil$) - \item $[r]_2$ is the big-endian bit decomposition of $r$ into $n$ bits (where $n = \log N$) - \item $[i]_2 [r]_2$ represents concatenated $m' + n$ bits -\end{itemize} - -For example, if $M' = 20$, $N = 128$, $i = 3$, and $r = 33$, we have: -$$ -[i]_2 = 00011, \quad [r]_2 = 0100001, \quad [i]_2 [r]_2 = 00011 \, | \, 0100001. -$$ - -This means that $\Pol$ evaluates at the point corresponding to these bits to give: -$$ -\Pol(00011 \, | \, 0100001) = c_3[33] -$$ - -% Where $[i]_2$ and $[r]_2$ are the corresponding bit decomposition (big-endian) of $i$ and $r$ (e.g. $M' = 20, N = 128, i = 3, r = 33, [i]_2[r]_2 = (00011 | 0100001)$). - -The undefined evaluations (for indices $M' \leq i < 2^{m'}$) are irrelevant and can be set to zero. - -% Note that the coefficients of $\Pol$ are in the base field $\Fp$. The random evaluation point at which $\Pol$ will be queried later by the verifier is in the extension field $\Fq$ (for soundness). - -Note that the coefficients of $\Pol$ are in the base field $\Fp$, while the verifier will later query $\Pol$ at a random evaluation point drawn from the extension field $\Fq$. Querying over the larger extension field is important for soundness because it reduces the prover's chance of successfully cheating: the Schwartz-Zippel lemma guarantees that a nonzero polynomial will evaluate to zero at a random point with probability at most $\deg / |\Fq|$, and using a larger field strengthens this bound. - -One solution is to embed $\Pol$ into the extension field $\Fq$ before committing to it, but this create overhead. -Section~\ref{embedding_overhead} describes a simple and efficient approach to avoid this "embedding overhead" with WHIR. - -\subsection{Batching the constraints} - -After receiving the commitment to $\Pol$, the verifier sends a random scalar $\alpha \in \Fq$ to the prover. Up to a small soundness error, we can replace the $u$ transition constraints by a single one: - -$$H := \sum_{i=0}^{u-1} h_i \alpha^i $$ - -The batched constraint $H$ is a single combined polynomial that aggregates all the individual transition constraints $h_0, \dots, h_{u-1}$. Instead of checking each $h_i$ separately, we check $H$ once, which compresses the verification work while preserving soundness. - -\subsection{Zerocheck} \label{zerocheck} - -The main argument comes from \cite{ccs} (see also \cite{simple_multivariate_AIR}). For each column $c$, we define two multilinear polynomials over $n$ variables: -\begin{itemize} - \item The shifted copy $c^{\text{up}}$, - \item The forward-shifted copy $c^{\text{down}}$. -\end{itemize} - -Specifically, as illustrated in Figure~\ref{fig:zerocheck_mappings_n4}: - -\begin{equation} -\begin{aligned} -c^{\text{up}}([r]_2) &= -\begin{cases} -c[r] & \text{if } r \in \{0, \dots, N - 2\} \\ -c[N - 2] & \text{if } r = N - 1 -\end{cases} \\ -c^{\text{down}}([r]_2) &= -\begin{cases} -c[r + 1] & \text{if } r \in \{0, \dots, N - 2\} \\ -c[N - 1] & \text{if } r = N - 1 -\end{cases} -\end{aligned} -\end{equation} - - -\begin{figure}[h!] -\centering -\begin{tikzpicture}[ - node distance=0.2cm and 1.6cm, % Further reduced y-distance to 0.2cm - cnode/.style={draw, rectangle, minimum height=0.6cm, minimum width=1.3cm, anchor=center, thick}, % Reduced height - rnode/.style={minimum height=0.6cm, anchor=center, font=\ttfamily}, % Reduced height - arrowstyle/.style={-{Stealth[length=2.5mm, width=2mm]}, thick}, - labelstyle/.style={font=\small\itshape} -] - -\node[cnode] (c0) {$c[0]$}; -\node[cnode] (c1) [below=of c0] {$c[1]$}; -\node[cnode] (c2) [below=of c1] {$c[2]$}; -\node[cnode] (c3) [below=of c2] {$c[3]$}; -\node[labelstyle, above=0.15cm of c0] {Column $c$}; % Slightly reduced space - -% Inputs r for c^{up} (left) -\node[rnode] (r0_up) [left=of c0] {$0$}; -\node[rnode] (r1_up) [left=of c1] {$1$}; -\node[rnode] (r2_up) [left=of c2] {$2$}; -\node[rnode] (r3_up) [left=of c3] {$3$}; -\node[labelstyle, text width=2cm, align=center, above=0.15cm of r0_up] {$c^{\text{up}}$}; % Slightly reduced space - -\draw[arrowstyle] (r0_up.east) -- (c0.west); -\draw[arrowstyle] (r1_up.east) -- (c1.west); -\draw[arrowstyle] (r2_up.east) -- (c2.west); -\draw[arrowstyle] (r3_up.east) -- (c2.west); - -% Inputs r for c^{down} (right) -\node[rnode] (r0_down) [right=of c0] {$0$}; -\node[rnode] (r1_down) [right=of c1] {$1$}; -\node[rnode] (r2_down) [right=of c2] {$2$}; -\node[rnode] (r3_down) [right=of c3] {$3$}; -\node[labelstyle, text width=2cm, align=center, above=0.15cm of r0_down] {$c^{\text{down}}$}; % Slightly reduced space - -\draw[arrowstyle] (r0_down.west) -- (c1.east); -\draw[arrowstyle] (r1_down.west) -- (c2.east); -\draw[arrowstyle] (r2_down.west) -- (c3.east); -\draw[arrowstyle] (r3_down.west) -- (c3.east); - -\end{tikzpicture} -\caption{Visualization of $c^{\text{up}}$ and $c^{\text{down}}$ for $N=4$.} -\label{fig:zerocheck_mappings_n4} -\end{figure} - -The batched constraint $H$ is respected on the table if and only if: - -$$\begin{gathered} -\forall r \in \{0, \dots, N-2\}, \hspace{2mm} H(c_0[r], \dots, c_{M-1}[r], c_0[r+1], \dots, c_{M-1}[r+1]) = 0 \\ -\Leftrightarrow\\ -\forall r \in \{0, \dots, N-1\}, \hspace{2mm} H(c_0^{\text{up}}([r]_2), \dots, c_{M-1}^{\text{up}}([r]_2), c_0^{\text{down}}([r]_2), \dots, c_{M-1}^{\text{down}}([r]_2)) = 0 -\end{gathered}$$ - -The last equality can be proven using a zerocheck (see \cite{hyperplonk}), assuming the verifier has oracle access to $c_0^{\text{up}}, \dots, c_{M-1}^{\text{up}}$ and $ c_0^{\text{down}}, \dots, c_{M-1}^{\text{down}}$, which will be addressed in Section~\ref{shifted_mle}. The zerocheck is performed as follows: - -\begin{enumerate} - \item The verifier sends a random vector $r \in \Fq^n$, - \item Prover and verifier run the sumcheck protocol to prove that: - $$ \sum_{b \in \{0, 1\}^n} eq(b, r) \cdot H(c_0^{\text{up}}(b), \dots, c_{M-1}^{\text{up}}(b), c_0^{\text{down}}(b), \dots, c_{M-1}^{\text{down}}(b)) = 0 $$ - \item At each round $i$ of the sumcheck, the verifier sends a random challenge $\beta_i \in \Fq$. At the end of the final round, the verifier needs to evaluate the expression inside the sum above for $b \xleftarrow{} \beta = (\beta_1, \dots, \beta_n)$. - \begin{itemize} - \item The factor $eq(\beta, r)$ can be computed directly by the verifier. - \item For the remaining part, the prover provides the claimed evaluations (how the verifier checks the correctness of these values will be detailed in Section~\ref{shifted_mle}.): - \begin{equation*} - c_0^{\text{up}}(\beta), \dots, c_{M-1}^{\text{up}}(\beta), \quad c_0^{\text{down}}(\beta), \dots, c_{M-1}^{\text{down}}(\beta) - \end{equation*} - Given these $2M$ values, the verifier can finally evaluate $H$, which concludes the zerocheck. - \end{itemize} -\end{enumerate} - -\subsection{Oracle access to \texorpdfstring{$\textbf{\textit{c}}^{\text{up}}$}{} and \texorpdfstring{$\textbf{\textit{c}}^{\text{down}}$}{}}\label{shifted_mle} - -In Section~\ref{zerocheck}, for each column $c_i$, the prover has sent two values: $\text{claim}^\text{up}_i$ and $\text{claim}^\text{down}_i$ respectively equal to $c_i^{\text{up}}(\beta)$ and $c_i^{\text{down}}(\beta)$ in the honest case. It is now time to prove the correctness of these $2M$ evaluations. - -First, the verifier sends a random challenge $\gamma \in \Fq$. Except with small soundness error, the $2M$ claims can be reduced to the following: - -\begin{equation}\label{eq1} - \sum_{i = 0}^{M-1} (\gamma^i \cdot \text{claim}^\text{up}_i + \gamma^{i+M} \cdot \text{claim}^\text{down}_i) \stackrel{?}{=} \sum_{i = 0}^{M-1} (\gamma^i \cdot c_i^{\text{up}}(\beta) + \gamma^{i+M} \cdot c_i^{\text{down}}(\beta)) -\end{equation} - -\begin{itemize} - \item Left-hand side: the verifier can compute it directly using the claimed values. - \item Right-hand side: we need explicit formulas for $c_i^{\text{up}}$ and $c_i^{\text{down}}$ in terms of the multilinear extension of the the corresponding column $c_i$: -\end{itemize} - - -\subsubsection{\texorpdfstring{Expression of $\textbf{\textit{c}}^{\text{up}}$}{}} \label{shifted_mle_up} - -For every column $c$, for every $r \in \Fq^n$, we have: -\begin{equation*} - \begin{aligned} - c^{\text{up}}(r) &= \sum_{b \in \{0, 1\}^n} \text{shift}^{\text{up}}(r, b) \cdot \widehat{c}(b) \\ - &= \sum_{b \in \{0, 1\}^n} \left[\overbrace{eq(b, r) \cdot (1 - eq(r, (\underbrace{1, \dots, 1}_{n \text{ times}})))}^{\text{picks $c[r]$ when $r \neq N_1$ (i.e. before the last row)}} + \overbrace{eq((r, b), (\underbrace{1, \dots, 1}_{2n - 1 \text{ times}}, 0))}^{\text{picks $c[N - 2]$ when $r=N-1$}}\right] \cdot \widehat{c}(b), - \end{aligned} -\end{equation*} - -where: -\begin{itemize} - \item $\widehat{c}$ represents the multilinear extension (MLE) of $c$, - \item $\text{shift}^{\text{up}}(r, b)$ is a selector polynomial that determines how each $b$ contributes to the sum. -\end{itemize} - - -\subsubsection{\texorpdfstring{Expression of $\textbf{\textit{c}}^{\text{down}}$}{}} - - -For any column $c$, we define its “down-shifted” version $c^{\text{down}}$. For any point $r \in \Fq^n$, the expression is: -\begin{equation*} -\begin{aligned} -c^{\text{down}}(r) &= \sum_{b \in {0, 1}^n} \text{shift}^{\text{down}}(r, b) \cdot \widehat{c}(b) \\ -&= \sum_{b \in {0, 1}^n} \left[ \overbrace{\text{next}(r, b)}^{\text{picks $c[r + 1]$ when $r \neq N - 1$}} + \overbrace{eq((r, b), (\underbrace{1, \dots, 1}_{2n \text{ times}}))}^{\text{picks $c[N - 1]$ when $r = N - 1$ }} \right] \cdot \widehat{c}(b), -\end{aligned} -\end{equation*} - - - -where: -\begin{itemize} - \item $\widehat{c}$ represents the multilinear extension (MLE) of $c$, - \item $\text{shift}^{\text{down}}(r, b)$ is a selector polynomial that determines how each $b$ contributes to the sum, - \item "next" is the multilinear polynomial in $2n$ variables defined on the hypercube by: - $$\text{next}([x]_2 [y]_2) = \begin{cases} - 1 & \text{if } y = x +1\\ - 0 & \text{otherwise} - \end{cases} \text{ for every pair of n-bit integers } (x, y)$$ - - See section 5.1 of \cite{ccs} for more details. -\end{itemize} - - - - -\subsubsection{Final sumcheck} - -The right side of (\ref{eq1}) can thus be expressed as: - -$$\sum_{b \in \{0, 1\}^n} \underbrace{\sum_{i = 0}^{M-1} [\gamma^i \cdot \text{shift}^{\text{up}}(\beta, b) + \gamma^{i+M} \cdot \text{shift}^{\text{down}}(\beta, b) ] \cdot \widehat{c}_i(b)}_{\text{expr}(\beta, b)}$$ - -A second sumcheck (with respect to $b$) is used to compute this sum. Let $\delta \in \Fq^n$ be the corresponding vector of challenges. The verifier must finally evaluate $\text{expr}(\beta, \delta)$. Both $\text{shift}^{\text{up}}$ and $\text{shift}^{\text{down}}$ can be succinctly computed. It remains $(\widehat{c}_i(\delta))_{0 \leq i < M}$, that is, the values of the columns’ multilinear extensions at the common point $\delta$. - -\subsection{PCS opening} - -At the end of the protocol, the verifier needs to check that the prover’s claimed evaluations of the multilinear extensions $\widehat{c}_i$ at the point $\delta$ are correct. This is done as follows: - -\begin{enumerate} -\item For the preprocessed columns ($M' \leq i < M$), the verifier can directly compute the values $\widehat{c}_i(\delta)$, since the verifier already knows these columns. - -\item For the non-preprocessed columns ($0 \leq i < M'$), the prover sends the claimed values $v_i$, which should equal $\widehat{c}_i(\delta)$ if the prover is honest. - -\item To efficiently verify all these claims in one go, the verifier samples a random vector $z \in \Fq^{m'}$, where $m' = \lceil \log M' \rceil$. This vector selects a random linear combination over the non-preprocessed columns. - -% The verifier samples a random vector \( z \in \Fq^{m'} \), where \( m' = \lceil \log M' \rceil \). - -\item Using this $z$, the verifier computes the combined evaluation: -$$ -\sum_{i \in \{0, 1\}^{m'}} eq(i, z) \cdot v_i, -$$ -which collapses all the prover’s claims into a single value. Here, $eq(i, z)$ acts as a selector that matches the random point $z$. -% and requests a PCS opening at the point \( \Pol((z, \delta)) \). - -% \item Finally, the verifier checks that the two evaluations match. - -\item Finally, the verifier requests the prover to open the committed multilinear polynomial $\Pol$ at the combined point $(z, \delta)$ and checks that the opening matches the combined evaluation computed above. -\end{enumerate} - -This process reduces many separate checks into just one, saving both prover and verifier time, while preserving soundness through the use of random linear combinations. - - - -\section{Univariate skip} - -\subsection{The traditional sumcheck protocol} - -Let's consider the case of the zerocheck (see Section~\ref{zerocheck}), which is the most sumcheck-intensive part of the protocol. At this stage, the prover wants to convince the verifier that: - -$$ \sum_{b \in \{0, 1\}^n} eq(b, r) \cdot H(c_0^{\text{up}}(b), \dots, c_{M-1}^{\text{up}}(b), c_0^{\text{down}}(b), \dots, c_{M-1}^{\text{down}}(b)) = 0$$ - -The traditional sumcheck protocol operates across $n$ rounds, one per variable: - -\begin{itemize} - \item \textbf{Round 1:} - - The prover starts by sending $P_1(X)$, supposedly equal to: - - $$ \sum_{b \in \{0, 1\}^{n-1}} eq((X, b), r) \cdot H(c_0^{\text{up}}(X, b), \dots, c_{M-1}^{\text{down}}(X, b)), $$ - - where $ H(c_0^{\text{up}}(X, b), \dots, c_{M-1}^{\text{down}}(X, b)) = H(c_0^{\text{up}}(b), \dots, c_{M-1}^{\text{up}}(b), c_0^{\text{down}}(b), \dots, c_{M-1}^{\text{down}}(b))$ for the sake of simplicity. After receiving $P_1$, the verifier checks that $P_1(0) + P_1(1) = 0$ and responds with a random challenge $\beta_1 \in \Fq$. - \item \textbf{Round 2:} - - The prover then sends $P_2(X)$, supposedly equal to - - $$ \sum_{b \in \{0, 1\}^{n-2}} eq((\beta_1, X, b), r) \cdot H(c_0^{\text{up}}(\beta_1, X, b), \dots, c_{M-1}^{\text{down}}(\beta_1, X, b)) $$ - - After receiving $P_2$, the verifier checks that $P_1(\beta_1) = P_2(0) + P_2(1)$ and responds with a random challenge $\beta_2 \in \Fp$. - \item Continue for $n$ rounds, until all variables are fixed to random points. - \item At the end of the protocol the verifier must check (with $\beta = (\beta_1, \dots, \beta_n)$): - - $$P_n(\beta_n) = eq(\beta, r) \cdot H(c_0^{\text{up}}(\beta), \dots, c_{M-1}^{\text{down}}(\beta))$$ - -\end{itemize} - -\subsection{Where the overhead comes from} - -For soudness reason, if the base field $\Fp$ is too small, the random challenges $\beta$ must be sampled in an extension field $\Fq$. While the first round of the sumcheck happens entirely inside the cheaper base field $\Fp$, the moment the verifier sends $\beta_1 \in \Fq$, the prover is forced to fold the multilinear polynomials over $\Fq$ —meaning every subsequent round involves more expensive operations in the extension field. - - -Indeed, after receiving the first challenge $\beta_1 \in \Fq$, the prover must compute the "folded" multilinear polynomials $c_0^{\text{up}}(\beta_1, \cdot), \dots, c_{M-1}^{\text{down}}(\beta_1, \cdot)$, whose coefficients are now in the extension field $\Fq$. -This may significantly slow down the consecutive rounds, compared to the first one. - -In summary, from the prover perspective, the first round is computationally less expensive than the next few ones. The \emph{univariate skip} optimization\cite{univariate_skip} leverages this asymmetry: by reorganizing the domain, we can perform the first $k$ rounds of sumcheck all at once, entirely within the base field $\Fp$. This leads to major efficiency improvements. - - -\subsection{Changing the evaluation domain} - -The univariate skip optimization (see \cite{univariate_skip}) improves prover efficiency by cleverly restructuring the evaluation domain. Traditionally, the sumcheck protocol works over the Boolean hypercube ${0,1}^n$, handling one variable per round. To skip $k$ variables all at once, we reshape the domain as: -\begin{equation} - D \times \{0,1\}^{n - k} -\end{equation} -where $D \subset \Fp$ is an arbitrary subset of size $2^k$— for example, $D = {0, \dots, 2^k - 1}$. This effectively replaces the first $k$ boolean variables with a single univariate variable over $D$, while keeping the remaining $n - k$ variables in the usual boolean space. - -\vspace{\baselineskip} - -With this new perspective, we define, for $i \in \{0, \dots, M -1 \}$, $\widetilde{c_i}^{\text{up}}(X_1, \dots, X_{n + 1 - k})$ as the polynomial, with degree less than $ 2^k$ in $X_1$, and multilinear in the remaining variables, such that: - -$$\widetilde{c_i}^{\text{up}}(x, b_1, \dots , b_{n - k}) = c_i^{\text{up}}([x]_2, b_1, \dots , b_{n - k}),$$ -for all $x \in D$ and $(b_1, \dots, b_{n - k}) \in \{0, 1\}^{n - k}$, where $[x]_2$ represents the big-endian decomposition of $x$ in $k$ bits. - -\vspace{\baselineskip} - -We define $\widetilde{c_i}^{\text{down}}$ similarly. - - -\vspace{\baselineskip} - -Finally, we define $\widetilde{eq}(X_1, \dots, X_{n + 1 - k}, Y_1, \dots, Y_{n + 1 - k})$ as the polynomial, with degree $< 2^k$ in $X_1$ and $Y_1$, and multilinear in the remaining variables, such that: -$$ -\widetilde{eq}(x, y) = -\begin{cases} -1 & \text{if } x = y \\ -0 & \text{otherwise} -\end{cases} -$$ - -for all $(x, y) \in (D \times {0,1}^{n - k})^2$. - - -\subsection{Zerocheck in the new domain} - -To apply the univariate skip, we need to slightly adapt the zerocheck protocol. Previously, the random challenge $r$ for the zerocheck was drawn from $\Fq^n$, matching the $n$ variables of the hypercube $\{0,1\}^n$. Now, after restructuring the domain to $D \times \{0,1\}^{n - k}$, the challenge must be drawn from the new space: -$$ -r \in \Fq^{n + 1 - k}, -$$ -where the first coordinate corresponds to the univariate component $x \in D$, and the remaining coordinates correspond to the $n - k$ boolean variables. With this setup, the zerocheck equation becomes: -$$ -\sum_{x \in D,\, b \in \{0,1\}^{n - k}} \widetilde{eq}((x, b), r) \cdot H\big(\widetilde{c_0}^{\text{up}}(x, b), \dots, \widetilde{c}_{M-1}^{\text{up}}(x, b),\, \widetilde{c_0}^{\text{down}}(x, b), \dots, \widetilde{c}_{M-1}^{\text{down}}(x, b)\big) \stackrel{?}{=} 0, -$$ - -where: -\begin{itemize} - \item $\widetilde{eq}((x, b), r)$ is the extended equality polynomial matching the combined domain. - \item $H$ is the batched constraint polynomial, evaluating the sum of all constraints at the selected point. -\end{itemize} - - -There is a key difference in the sumcheck protocol. In the traditional hypercube case, after the prover sends the first-round polynomial $P_1$, the verifier checks: -$$ -P_1(0) + P_1(1) = 0, -$$ -but in the new setting, the verifier instead checks: -$$ -\sum_{x \in D} P_1(x) = 0, -$$ -where $D$ has size $2^k$. This adjustment reflects the fact that, thanks to univariate skip, we are treating the first $k$ Boolean variables as one combined univariate block. - -Importantly, when $k = 1$, this construction reduces back to the traditional sumcheck on the hypercube —meaning the univariate skip is a strict generalization of the classic protocol. - - -In summary, by reorganizing the domain and adapting the sumcheck’s first step, the univariate skip allows the prover to efficiently collapse multiple rounds into a single, larger but cheaper univariate sum, all while staying in the base field. - -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -% We need to adapt the protocol: the random zerocheck challenge $r$ is now sampled in $\Fq^{n + 1 - k}$ (instead of $\Fq^n)$. The zerocheck becomes: - -% $$ \sum_{x \in D, b \in \{0, 1\}^{n - k}} \widetilde{eq}((x, b), r) \cdot H(\widetilde{c_0}^{\text{up}}(x, b), \dots, \widetilde{c}_{M-1}^{\text{ up}}(x, b), \widetilde{c_0}^{\text{down}}(x, b), \dots, \widetilde{c}_{M-1}^{\text{ down}}(x, b)) \stackrel{?}{=} 0$$ - -% This equality can be proven using the sumcheck protocol, as before. Only difference: after receiving $P_1$, the verifier checks $\sum_{x \in D} P_1(x) = 0$ (instead of $P_1(0) + P_1(1) = 0$). - -% This is no more, no less, the univariate skip. - -% Note that $k = 1$ corresponds to the traditional sumcheck on the hypercube. - -\subsection{Cost analysis} - -Let’s break down the cost for the prover when using univariate skip. First, consider the degree of the first-round polynomial $P_1$. Because we collapse $k$ variables at once over the set $D$ (with $|D| = 2^k$), -the degree bound becomes: - -$$\text{deg}(P_1) = (\text{deg}(H) + 1) (2^k - 1),$$ -where $\deg(H)$ is the degree of the batched constraint polynomial. - - -% Note that $\forall x \in D, P_1(x) = 0 $, which gives $2^k$ "free" evaluations to the prover when interpolating $P_1$. - - -An important observation is that for all $x \in D$, we already know that $P_1(x) = 0$. These points correspond to the valid domain, so they provide $2^k$ "free" evaluations that the prover doesn’t need to compute explicitly when interpolating $P_1$. - -However, to fully interpolate $P_1$, the prover still needs to compute the remaining evaluations. For each such point, the prover sums over $2^{n - k}$ combinations (from the $n - k$ residual variables). Thus, the total number of prover operations to interpolate $P_1$ is roughly: - -$$[(\text{deg}(H) + 1) (2^k - 1) - 2^k + 1] \cdot 2^{n - k} = \text{deg}(H) (2^k - 1) \cdot 2^{n - k}$$ - -% This turns out to be equal to $\sum_{i = 1}^k {\text{deg}(H) \cdot 2^{n - i}}$, the number of corresponding operations to perform the first $k$ rounds of the traditional sumcheck, but in which case, the rounds $2, 3, \dots, k$ operate over the extension field, and are thus slower. - - -Interestingly, this matches the cost of performing the first $k$ rounds of the traditional sumcheck: -$$ -\sum_{i = 1}^k \deg(H) \cdot 2^{n - i}. -$$ - -But with one key difference. In the traditional protocol, rounds $2, 3, \dots, k$ involve folded polynomials -with coefficients in the extension field $\Fq$, which makes them significantly more expensive to compute. In contrast, univariate skip pushes all this work into the first round, but keeps it entirely within the base field $\Fp$, delivering major efficiency gains in practice. - -\subsection{Rest of the protocol} - -After finishing the zerocheck, the verifier still needs to verify the claimed values of the shifted polynomials. Specifically, he wants to check the evaluations of: - -$$ -\widetilde{c_0}^{\text{up}}, \, \widetilde{c_1}^{\text{up}}, \, \dots, \, \widetilde{c}_{M-1}^{\text{up}}, \quad \widetilde{c_0}^{\text{down}}, \, \widetilde{c_1}^{\text{down}}, \, \dots, \, \widetilde{c}_{M-1}^{\text{down}}, -$$ - -at a random point: - -$$ -\beta = (\beta_1, \beta_2, \dots, \beta_{n + 1 - k}) \in \Fq^{n + 1 - k}. -$$ - -Let’s break down how these are computed. For each column $c$, we have: -$$ -\widetilde{c}^{\text{up}}(\beta) = \sum_{x \in D} \widetilde{eq}(x, \beta_1) \cdot c^{\text{up}}([x]_2, \beta_2, \dots, \beta_{n + 1 - k}). -$$ - -This expression essentially says: -\begin{itemize} - \item For the first coordinate $\beta_1$, we match it against $x \in D$ using the equality polynomial $\widetilde{eq}$. - \item For each $x$, we map it back to its $k$-bit decomposition $[x]_2$ and evaluate the original $c^{\text{up}}$. -\end{itemize} - -We can further rewrite it as: - -$$ -\widetilde{c}^{\text{up}}(\beta) = \sum_{b \in \{0,1\}^k} \Gamma(b, \beta_1) \cdot c^{\text{up}}(b, \beta_2, \dots, \beta_{n + 1 - k}), -$$ -where: -$$ -\Gamma(b, \beta_1) := \sum_{x \in D} eq(b, [x]_2) \cdot \widetilde{eq}(x, \beta_1). -$$ - -The key point is that $\Gamma$ can be computed efficiently by the verifier. To prove the correctness of these evaluations, we could: -\begin{itemize} - \item Either run another sumcheck to reduce the claims about $\widetilde{c}^{\text{up}}$ back to the original $c^{\text{up}}$ (handled as described in Section~\ref{shifted_mle}), - \item Or more directly, expand $c^{\text{up}}$ itself (see Section~\ref{shifted_mle_up}) in terms of the base column $c$, allowing us to run a single sumcheck over $n + k$ variables. -\end{itemize} - -The same approach applies symmetrically for all the $\widetilde{c}^{\text{down}}$ terms. Finally, all these evaluation checks (for the up- and down-shifted polynomials) can be batched together using a random challenge, so that the prover and verifier only need to perform one combined sumcheck over $n + k$ variables. - - - -% Every column $\widetilde{c}^{\text{ up}}$ is defined by: - -% \begin{align} -% \widetilde{c}^{\text{up}}(\beta) &= \sum_{x \in D} \widetilde{eq}(x, \beta_1) \cdot c^{\text{up}}([x]_2, \beta_2, \dots, \beta_{n + 1 - k}) \\ -% &= \sum_{b \in \{0,1\}^k} \underbrace{\sum_{x \in D} eq(b, [x]_2) \cdot \widetilde{eq}(x, \beta_1)}_{\Gamma(b, \alpha_1)} c^{\text{up}}(b, \beta_2, \dots, \beta_{n + 1 - k}) -% \end{align} - -% $\Gamma$ can be efficiently computed by the verifier. - -% As a consequence, we could, again, use the sumcheck protocol to reduce an evaluation claim about $\widetilde{c}^{\text{ up}}$ to one about $c^{\text{up}}$, which is handled by \ref{shifted_mle}. In a more direct manner, we prefer to expand the expression of $c^{\text{up}}$ (see \ref{shifted_mle_up}) in terms of $c$, to get a single sumcheck on $n + k$ variables. - -% The same naturally applies for all the $\widetilde{c}^{\text{ down}}$. - -% And all these sumcheck above can be batched (using a random challenge) into a single one, in $n + k$ variables. - -\section{Avoiding the "embedding overhead" in WHIR}\label{embedding_overhead} - -In its simplest form, a polynomial commitment scheme (PCS) operates over a single finite field: the prover commits to a polynomial with coefficients in that field and later opens it at a chosen evaluation point, all within the same arithmetic setting. However, in many modern constructions, the situation is more nuanced. It’s common for the committed polynomial to have coefficients in a small base field $\Fp$, while the verifier requests openings at points lying in a larger extension field $\Fq$. This shift introduces what’s known as the embedding overhead — the prover must effectively “lift” or embed the polynomial’s coefficients into the larger field before opening, which can add significant computational cost. - -To address this, prior work introduced the ring-switching protocol (see Section 3 of \cite{fri_binius}), which provides an elegant way to sidestep the embedding cost by switching the representation of the polynomial directly into the extension field. While this technique is especially well-suited for binary tower fields, it comes with two main limitations: -\begin{enumerate} - \item The extension degree $[\Fq:\Fp]$ must be a power of two, which restricts field choices. - \item It adds complexity to the protocol, both conceptually and in terms of implementation effort. -\end{enumerate} - -In this section, we propose a simpler alternative to avoid the embedding overhead specifically for the WHIR PCS construction introduced in \cite{whir}. Our approach achieves the same efficiency benefits as ring-switching, but without its rigidity or added protocol complexity — offering a more streamlined and flexible design. - - -\subsection{Ring-Switching costs} - -% Let's first analyze the prover costs when using "ring-switching" on a multilinear polynomial $P$ with $v$ variables in the base field $\Fp$. -% $P$ will be reinterpreted as a multilinear polynomial $P'$ with $v-d$ variablesin $\Fq$, with $[\Fq:\Fp] = 2^d$. - - -To understand the efficiency tradeoffs, let’s first break down the prover’s costs when using the ring-switching approach on a multilinear polynomial $P$ with $v$ variables over the base field $\Fp$. Under ring-switching, the polynomial $P$ is reinterpreted as a multilinear polynomial $P'$ over the larger extension field $\Fq$, where the number of variables is reduced to $v - d$ (with the extension degree $[\Fq:\Fp] = 2^d$). - - -We’ll also introduce: -\begin{itemize} - \item $\rho = 1 / 2^r$, the initial code rate of the Reed–Solomon (RS) code, - \item $f_1, f_2, \dots$, the folding factors applied at each round (these match the $k$ values defined in Section 2.1.3 of \cite{whir}). -\end{itemize} - -% Let's denote by $\rho = 1 / 2^r$ the initial rate of the Reed Solomon (RS) code, and by $f_1, f_2, \dots$ the folding factors of each round (corresponding to values of $k$ in section 2.1.3 of \cite{whir}). - - -In the first round of the protocol, the prover’s dominant cost comes from performing $2^{f_1}$ Fast Fourier Transforms (FFTs), each over a domain of size: -$$ -2^{v - d + r - f_1} -$$ -in the extension field $\Fq$. - -In the second round, the RS domain size is effectively halved. The prover’s work is now dominated by $2^{f_2}$ FFTs, each over a (smaller) domain of size: -$$ -2^{v - d + r - 1 - f_2} -$$ -again over $\Fq$. - - -This pattern continues across subsequent rounds: as the folding progresses, the RS domain keeps shrinking, and the prover’s cost scales with both the number of FFTs and the size of the domains on which they operate. - - - -% During the first round, the prover cost is dominated by $2^{f_1}$ FFTs (Fast Fourrier Transforms), each over a domain of size $2^{v - d + r - f_1}$ (over $\Fq$). For the second round, the size of the RS domain is halved, and the prover costs are dominated by $2^{f_2}$ FFTs, each over a domain of size $2^{v - d + r - 1 - f_2}$. And so on \dots. - -\subsection{Embedding costs} - -Without applying the ring-switching protocol, the prover’s workload in the first round remains quite similar to the ring-switching case. Specifically, the prover must perform $2^{f_1}$ FFTs, each over a domain of size $2^{v + r - f_1}$, but crucially, these computations take place over the smaller base field $\Fp$. While the FFT domain is $2^d$ times larger than in the ring-switching setup, the field itself is $2^d$ times smaller, making the overall computational effort comparable in this initial round. - -However, the situation changes significantly in the subsequent rounds. After the first round, the polynomial has been folded, and the prover is now forced to operate over the larger extension field $\Fq$. In the second round, for example, the prover must carry out $2^{f_2}$ FFTs, each over a domain of size $2^{v + r - 1 - f_2}$. Importantly, the Reed–Solomon domain in this setting is $2^d$ times larger compared to what it would be under ring-switching, since the base-to-extension embedding was avoided. - -As a result, although the first-round costs are effectively balanced, the subsequent rounds become notably more expensive. This slowdown arises from having to perform FFTs over significantly larger domains in the extension field, eroding the overall efficiency that ring-switching was designed to protect. Over multiple rounds, this cumulative cost becomes a meaningful disadvantage, motivating the search for alternative strategies to bypass the embedding overhead. - - -% Without the use of the "ring-switching" protocol, the prover costs for the first round are fairly similar: $2^{f_1}$ FFTs, each over a domain of size $2^{v + r - f_1}$ (over $\Fp$). The domain of the FFTs is $2^d$ times larger, but the field is $2^d$ times smaller. Unfortunately, the next rounds are slower. The second one requires $2^{f_2}$ FFTs, each over a domain of size $2^{v + r - 1 - f_2}$, but now over the extension field $\Fq$ (the polynomial has been folded in the first round). The Reed Solomon domain size is $2^d$ larger compared to "ring-switching". As a result, all except the first rounds are slower, due to FFTs over a larger domain, on the same extension field. - -\subsection{A simple solution} - -% To tackle this overhead, we suggest the following approach: -% \begin{enumerate} -% \item In the first round, increase the folding factor to $f_1' = f_1 + d$. The consecutive folding factors are unaffected: $f_2' = f_2, \dots$ -% \item When transitioning from the first to the second round, instead of dividing by 2 the size of the Reed Solomon domain, divide it by $2^{d + 1}$. -% \end{enumerate} - -% The prover cost for the first round are similar compared to "ring-switching": $2^d$ times more FFTs, but on a field $2^d$ times smaller. -% Starting from the second round, costs become identical: same number of FFTs, over the same domain, with the same field - -% So from the prover perspective, this approach is as efficient as using ring-switching. - -% It turns out it is also the case in terms of verification costs, and proof size: - -% \begin{itemize} -% \item The code rate at each round is the same: Initially $1/2^r$, then $1/2^{r + f_1 - 1}$, $1/2^{r + f_1 + f_2 - 2}$ \dots. -% \item The size of each Merkle tree leaf is the same. The main difference is at the first round, it consists in $2^{f_1}$ elements of $\Fq$ with ring-switching, while it is $2^{f_1 + d}$ elements of $\Fp$ with our approach (both types require the same of bytes to be represented). -% \end{itemize} - -To address the overhead introduced by avoiding ring-switching, we propose a straightforward and effective adjustment to the protocol. Specifically, we suggest increasing the folding factor in the first round to $f_1' = f_1 + d$, where $d$ is the extension degree (i.e., $[\Fq : \Fp] = 2^d$). The folding factors for all subsequent rounds remain unchanged, so $f_2' = f_2$, $f_3' = f_3$, and so on. - -Additionally, when transitioning from the first to the second round, we propose adjusting the Reed–Solomon domain reduction: rather than simply halving the domain size as in the traditional approach, the domain should be divided by $2^{d + 1}$. This adjustment compensates for the lack of ring-switching and aligns the domain size with that of the equivalent protocol using ring-switching. - -With this modification, the prover’s costs in the first round remain comparable to those under ring-switching. While the prover now performs $2^d$ times more FFTs, these are carried out over a field that is $2^d$ times smaller, maintaining a balanced computational load. -$$ -\text{Prover cost (first round)} \sim 2^d \times \text{FFTs over } \Fp \quad \text{vs.} \quad \text{FFTs over } \Fq. -$$ - -From the second round onward, the prover’s workload becomes essentially identical to that of the ring-switching setup: the number of FFTs, the domain sizes, and the field arithmetic all align. - -Importantly, this adjustment not only preserves prover efficiency but also ensures that verification costs and proof size remain on par with the ring-switching protocol. At each round, the code rate progression is preserved: it begins at $1/2^r$, then advances to $1/2^{r + f_1 - 1}$, then to $1/2^{r + f_1 + f_2 - 2}$, and so forth. Furthermore, the size of each Merkle tree leaf remains equivalent. The only notable difference is that, in the first round, ring-switching commits to $2^{f_1}$ elements in the extension field $\Fq$, while our adjusted approach commits to $2^{f_1 + d}$ elements in the base field $\Fp$. Crucially, both representations occupy the same number of bytes, preserving the compactness of the proof. - - - -\bibliographystyle{IEEEtran} -\bibliography{bibliography} - -\end{document} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 98152abe..a0c1db73 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,22 +1,41 @@ -#![cfg_attr(not(test), allow(unused_crate_dependencies))] +use clap::Parser; +use poseidon_circuit::tests::run_poseidon_benchmark; +use rec_aggregation::{ + recursion::run_whir_recursion_benchmark, xmss_aggregate::run_xmss_benchmark, +}; -use air::examples::prove_poseidon2::{Poseidon2Config, prove_poseidon2}; -use whir_p3::{FoldingFactor, SecurityAssumption}; +#[derive(Parser)] +enum Cli { + #[command(about = "Aggregate XMSS signature")] + Xmss { + #[arg(long)] + n_signatures: usize, + }, + #[command(about = "Run 1 WHIR recursive proof")] + Recursion, + #[command(about = "Prove validity of Poseidon2 permutations over 16 field elements")] + Poseidon { + #[arg(long, help = "log2(number of Poseidons)")] + log_n_perms: usize, + }, +} fn main() { - let config = Poseidon2Config { - log_n_poseidons_16: 17, - log_n_poseidons_24: 17, - univariate_skips: 4, - folding_factor: FoldingFactor::new(7, 4), - log_inv_rate: 1, - soundness_type: SecurityAssumption::CapacityBound, - pow_bits: 16, - security_level: 128, - rs_domain_initial_reduction_factor: 5, - max_num_variables_to_send_coeffs: 7, - display_logs: true, - }; - let benchmark = prove_poseidon2(&config); - println!("\n{benchmark}"); + let cli = Cli::parse(); + + match cli { + Cli::Xmss { + n_signatures: count, + } => { + run_xmss_benchmark(count); + } + Cli::Recursion => { + run_whir_recursion_benchmark(); + } + Cli::Poseidon { + log_n_perms: log_count, + } => { + run_poseidon_benchmark(log_count, false); + } + } }