diff --git a/common/address.go b/common/address.go index b5bca6a5bcb..d73823ffe41 100644 --- a/common/address.go +++ b/common/address.go @@ -23,8 +23,7 @@ import ( "fmt" "math/big" - "golang.org/x/crypto/sha3" - + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/hexutil" "github.com/erigontech/erigon/common/length" ) @@ -77,7 +76,7 @@ func (a *Address) checksumHex() []byte { buf := a.hex() // compute checksum - sha := sha3.NewLegacyKeccak256() + sha := keccak.NewLegacyKeccak256() //nolint:errcheck sha.Write(buf[2:]) hash := sha.Sum(nil) diff --git a/common/crypto/crypto.go b/common/crypto/crypto.go index c79b3751d51..625a75e42d9 100644 --- a/common/crypto/crypto.go +++ b/common/crypto/crypto.go @@ -34,9 +34,9 @@ import ( "sync" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/hexutil" "github.com/erigontech/erigon/common/math" ) @@ -111,7 +111,7 @@ func Keccak256Hash(data ...[]byte) (h common.Hash) { // Keccak512 calculates and returns the Keccak512 hash of the input data. func Keccak512(data ...[]byte) []byte { - d := sha3.NewLegacyKeccak512() + d := keccak.NewLegacyKeccak512() for _, b := range data { d.Write(b) } @@ -325,7 +325,7 @@ func PubkeyToAddress(p ecdsa.PublicKey) common.Address { // hasherPool holds LegacyKeccak hashers. var hasherPool = sync.Pool{ New: func() any { - return sha3.NewLegacyKeccak256() + return keccak.NewLegacyKeccak256() }, } diff --git a/common/crypto/keccak/LICENSE b/common/crypto/keccak/LICENSE new file mode 100644 index 00000000000..2a7cf70da6e --- /dev/null +++ b/common/crypto/keccak/LICENSE @@ -0,0 +1,27 @@ +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/common/crypto/keccak/README.md b/common/crypto/keccak/README.md new file mode 100644 index 00000000000..295a5b958c7 --- /dev/null +++ b/common/crypto/keccak/README.md @@ -0,0 +1,6 @@ +This is a vendored and modified copy of golang.org/x/crypto/sha3, with an assembly +implementation of keccak256. We wish to retain the assembly implementation, +which was removed in v0.44.0. + +Ethereum uses a 'legacy' variant of Keccak, which was defined before it became SHA3. As +such, we cannot use the standard library crypto/sha3 package. diff --git a/common/crypto/keccak/hashes.go b/common/crypto/keccak/hashes.go new file mode 100644 index 00000000000..c78c5fe9920 --- /dev/null +++ b/common/crypto/keccak/hashes.go @@ -0,0 +1,44 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package keccak + +// This file provides functions for creating instances of the SHA-3 +// and SHAKE hash functions, as well as utility functions for hashing +// bytes. + +import ( + "hash" +) + +const ( + dsbyteSHA3 = 0b00000110 + dsbyteKeccak = 0b00000001 + dsbyteShake = 0b00011111 + dsbyteCShake = 0b00000100 + + // rateK[c] is the rate in bytes for Keccak[c] where c is the capacity in + // bits. Given the sponge size is 1600 bits, the rate is 1600 - c bits. + rateK256 = (1600 - 256) / 8 + rateK448 = (1600 - 448) / 8 + rateK512 = (1600 - 512) / 8 + rateK768 = (1600 - 768) / 8 + rateK1024 = (1600 - 1024) / 8 +) + +// NewLegacyKeccak256 creates a new Keccak-256 hash. +// +// Only use this function if you require compatibility with an existing cryptosystem +// that uses non-standard padding. All other users should use New256 instead. +func NewLegacyKeccak256() hash.Hash { + return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteKeccak} +} + +// NewLegacyKeccak512 creates a new Keccak-512 hash. +// +// Only use this function if you require compatibility with an existing cryptosystem +// that uses non-standard padding. All other users should use New512 instead. +func NewLegacyKeccak512() hash.Hash { + return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteKeccak} +} diff --git a/common/crypto/keccak/keccakf.go b/common/crypto/keccak/keccakf.go new file mode 100644 index 00000000000..b66d7cfb9b5 --- /dev/null +++ b/common/crypto/keccak/keccakf.go @@ -0,0 +1,414 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build (!amd64 && !arm64) || purego || !gc + +package keccak + +import "math/bits" + +// rc stores the round constants for use in the ι step. +var rc = [24]uint64{ + 0x0000000000000001, + 0x0000000000008082, + 0x800000000000808A, + 0x8000000080008000, + 0x000000000000808B, + 0x0000000080000001, + 0x8000000080008081, + 0x8000000000008009, + 0x000000000000008A, + 0x0000000000000088, + 0x0000000080008009, + 0x000000008000000A, + 0x000000008000808B, + 0x800000000000008B, + 0x8000000000008089, + 0x8000000000008003, + 0x8000000000008002, + 0x8000000000000080, + 0x000000000000800A, + 0x800000008000000A, + 0x8000000080008081, + 0x8000000000008080, + 0x0000000080000001, + 0x8000000080008008, +} + +// keccakF1600 applies the Keccak permutation to a 1600b-wide +// state represented as a slice of 25 uint64s. +func keccakF1600(a *[25]uint64) { + // Implementation translated from Keccak-inplace.c + // in the keccak reference code. + var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64 + + for i := 0; i < 24; i += 4 { + // Combines the 5 steps in each round into 2 steps. + // Unrolls 4 rounds per loop and spreads some steps across rounds. + + // Round 1 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[6] ^ d1 + bc1 = bits.RotateLeft64(t, 44) + t = a[12] ^ d2 + bc2 = bits.RotateLeft64(t, 43) + t = a[18] ^ d3 + bc3 = bits.RotateLeft64(t, 21) + t = a[24] ^ d4 + bc4 = bits.RotateLeft64(t, 14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i] + a[6] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc2 = bits.RotateLeft64(t, 3) + t = a[16] ^ d1 + bc3 = bits.RotateLeft64(t, 45) + t = a[22] ^ d2 + bc4 = bits.RotateLeft64(t, 61) + t = a[3] ^ d3 + bc0 = bits.RotateLeft64(t, 28) + t = a[9] ^ d4 + bc1 = bits.RotateLeft64(t, 20) + a[10] = bc0 ^ (bc2 &^ bc1) + a[16] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc4 = bits.RotateLeft64(t, 18) + t = a[1] ^ d1 + bc0 = bits.RotateLeft64(t, 1) + t = a[7] ^ d2 + bc1 = bits.RotateLeft64(t, 6) + t = a[13] ^ d3 + bc2 = bits.RotateLeft64(t, 25) + t = a[19] ^ d4 + bc3 = bits.RotateLeft64(t, 8) + a[20] = bc0 ^ (bc2 &^ bc1) + a[1] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc1 = bits.RotateLeft64(t, 36) + t = a[11] ^ d1 + bc2 = bits.RotateLeft64(t, 10) + t = a[17] ^ d2 + bc3 = bits.RotateLeft64(t, 15) + t = a[23] ^ d3 + bc4 = bits.RotateLeft64(t, 56) + t = a[4] ^ d4 + bc0 = bits.RotateLeft64(t, 27) + a[5] = bc0 ^ (bc2 &^ bc1) + a[11] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc3 = bits.RotateLeft64(t, 41) + t = a[21] ^ d1 + bc4 = bits.RotateLeft64(t, 2) + t = a[2] ^ d2 + bc0 = bits.RotateLeft64(t, 62) + t = a[8] ^ d3 + bc1 = bits.RotateLeft64(t, 55) + t = a[14] ^ d4 + bc2 = bits.RotateLeft64(t, 39) + a[15] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + // Round 2 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[16] ^ d1 + bc1 = bits.RotateLeft64(t, 44) + t = a[7] ^ d2 + bc2 = bits.RotateLeft64(t, 43) + t = a[23] ^ d3 + bc3 = bits.RotateLeft64(t, 21) + t = a[14] ^ d4 + bc4 = bits.RotateLeft64(t, 14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1] + a[16] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc2 = bits.RotateLeft64(t, 3) + t = a[11] ^ d1 + bc3 = bits.RotateLeft64(t, 45) + t = a[2] ^ d2 + bc4 = bits.RotateLeft64(t, 61) + t = a[18] ^ d3 + bc0 = bits.RotateLeft64(t, 28) + t = a[9] ^ d4 + bc1 = bits.RotateLeft64(t, 20) + a[20] = bc0 ^ (bc2 &^ bc1) + a[11] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc4 = bits.RotateLeft64(t, 18) + t = a[6] ^ d1 + bc0 = bits.RotateLeft64(t, 1) + t = a[22] ^ d2 + bc1 = bits.RotateLeft64(t, 6) + t = a[13] ^ d3 + bc2 = bits.RotateLeft64(t, 25) + t = a[4] ^ d4 + bc3 = bits.RotateLeft64(t, 8) + a[15] = bc0 ^ (bc2 &^ bc1) + a[6] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc1 = bits.RotateLeft64(t, 36) + t = a[1] ^ d1 + bc2 = bits.RotateLeft64(t, 10) + t = a[17] ^ d2 + bc3 = bits.RotateLeft64(t, 15) + t = a[8] ^ d3 + bc4 = bits.RotateLeft64(t, 56) + t = a[24] ^ d4 + bc0 = bits.RotateLeft64(t, 27) + a[10] = bc0 ^ (bc2 &^ bc1) + a[1] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc3 = bits.RotateLeft64(t, 41) + t = a[21] ^ d1 + bc4 = bits.RotateLeft64(t, 2) + t = a[12] ^ d2 + bc0 = bits.RotateLeft64(t, 62) + t = a[3] ^ d3 + bc1 = bits.RotateLeft64(t, 55) + t = a[19] ^ d4 + bc2 = bits.RotateLeft64(t, 39) + a[5] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + // Round 3 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[11] ^ d1 + bc1 = bits.RotateLeft64(t, 44) + t = a[22] ^ d2 + bc2 = bits.RotateLeft64(t, 43) + t = a[8] ^ d3 + bc3 = bits.RotateLeft64(t, 21) + t = a[19] ^ d4 + bc4 = bits.RotateLeft64(t, 14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2] + a[11] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc2 = bits.RotateLeft64(t, 3) + t = a[1] ^ d1 + bc3 = bits.RotateLeft64(t, 45) + t = a[12] ^ d2 + bc4 = bits.RotateLeft64(t, 61) + t = a[23] ^ d3 + bc0 = bits.RotateLeft64(t, 28) + t = a[9] ^ d4 + bc1 = bits.RotateLeft64(t, 20) + a[15] = bc0 ^ (bc2 &^ bc1) + a[1] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc4 = bits.RotateLeft64(t, 18) + t = a[16] ^ d1 + bc0 = bits.RotateLeft64(t, 1) + t = a[2] ^ d2 + bc1 = bits.RotateLeft64(t, 6) + t = a[13] ^ d3 + bc2 = bits.RotateLeft64(t, 25) + t = a[24] ^ d4 + bc3 = bits.RotateLeft64(t, 8) + a[5] = bc0 ^ (bc2 &^ bc1) + a[16] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc1 = bits.RotateLeft64(t, 36) + t = a[6] ^ d1 + bc2 = bits.RotateLeft64(t, 10) + t = a[17] ^ d2 + bc3 = bits.RotateLeft64(t, 15) + t = a[3] ^ d3 + bc4 = bits.RotateLeft64(t, 56) + t = a[14] ^ d4 + bc0 = bits.RotateLeft64(t, 27) + a[20] = bc0 ^ (bc2 &^ bc1) + a[6] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc3 = bits.RotateLeft64(t, 41) + t = a[21] ^ d1 + bc4 = bits.RotateLeft64(t, 2) + t = a[7] ^ d2 + bc0 = bits.RotateLeft64(t, 62) + t = a[18] ^ d3 + bc1 = bits.RotateLeft64(t, 55) + t = a[4] ^ d4 + bc2 = bits.RotateLeft64(t, 39) + a[10] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + // Round 4 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[1] ^ d1 + bc1 = bits.RotateLeft64(t, 44) + t = a[2] ^ d2 + bc2 = bits.RotateLeft64(t, 43) + t = a[3] ^ d3 + bc3 = bits.RotateLeft64(t, 21) + t = a[4] ^ d4 + bc4 = bits.RotateLeft64(t, 14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3] + a[1] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc2 = bits.RotateLeft64(t, 3) + t = a[6] ^ d1 + bc3 = bits.RotateLeft64(t, 45) + t = a[7] ^ d2 + bc4 = bits.RotateLeft64(t, 61) + t = a[8] ^ d3 + bc0 = bits.RotateLeft64(t, 28) + t = a[9] ^ d4 + bc1 = bits.RotateLeft64(t, 20) + a[5] = bc0 ^ (bc2 &^ bc1) + a[6] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc4 = bits.RotateLeft64(t, 18) + t = a[11] ^ d1 + bc0 = bits.RotateLeft64(t, 1) + t = a[12] ^ d2 + bc1 = bits.RotateLeft64(t, 6) + t = a[13] ^ d3 + bc2 = bits.RotateLeft64(t, 25) + t = a[14] ^ d4 + bc3 = bits.RotateLeft64(t, 8) + a[10] = bc0 ^ (bc2 &^ bc1) + a[11] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc1 = bits.RotateLeft64(t, 36) + t = a[16] ^ d1 + bc2 = bits.RotateLeft64(t, 10) + t = a[17] ^ d2 + bc3 = bits.RotateLeft64(t, 15) + t = a[18] ^ d3 + bc4 = bits.RotateLeft64(t, 56) + t = a[19] ^ d4 + bc0 = bits.RotateLeft64(t, 27) + a[15] = bc0 ^ (bc2 &^ bc1) + a[16] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc3 = bits.RotateLeft64(t, 41) + t = a[21] ^ d1 + bc4 = bits.RotateLeft64(t, 2) + t = a[22] ^ d2 + bc0 = bits.RotateLeft64(t, 62) + t = a[23] ^ d3 + bc1 = bits.RotateLeft64(t, 55) + t = a[24] ^ d4 + bc2 = bits.RotateLeft64(t, 39) + a[20] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + } +} diff --git a/common/crypto/keccak/keccakf_amd64.go b/common/crypto/keccak/keccakf_amd64.go new file mode 100644 index 00000000000..cb6eca44c3b --- /dev/null +++ b/common/crypto/keccak/keccakf_amd64.go @@ -0,0 +1,13 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 && !purego && gc + +package keccak + +// This function is implemented in keccakf_amd64.s. + +//go:noescape + +func keccakF1600(a *[25]uint64) diff --git a/common/crypto/keccak/keccakf_amd64.s b/common/crypto/keccak/keccakf_amd64.s new file mode 100644 index 00000000000..99e2f16e971 --- /dev/null +++ b/common/crypto/keccak/keccakf_amd64.s @@ -0,0 +1,5419 @@ +// Code generated by command: go run keccakf_amd64_asm.go -out ../keccakf_amd64.s -pkg sha3. DO NOT EDIT. + +//go:build amd64 && !purego && gc + +// func keccakF1600(a *[25]uint64) +TEXT ·keccakF1600(SB), $200-8 + MOVQ a+0(FP), DI + + // Convert the user state into an internal state + NOTQ 8(DI) + NOTQ 16(DI) + NOTQ 64(DI) + NOTQ 96(DI) + NOTQ 136(DI) + NOTQ 160(DI) + + // Execute the KeccakF permutation + MOVQ (DI), SI + MOVQ 8(DI), BP + MOVQ 32(DI), R15 + XORQ 40(DI), SI + XORQ 48(DI), BP + XORQ 72(DI), R15 + XORQ 80(DI), SI + XORQ 88(DI), BP + XORQ 112(DI), R15 + XORQ 120(DI), SI + XORQ 128(DI), BP + XORQ 152(DI), R15 + XORQ 160(DI), SI + XORQ 168(DI), BP + MOVQ 176(DI), DX + MOVQ 184(DI), R8 + XORQ 192(DI), R15 + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000008082, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000000000808a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008000, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000808b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008081, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008009, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000008a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000000088, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080008009, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000008000000a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000008000808b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000000000008b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008089, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008003, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008002, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000000080, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000800a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000008000000a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008081, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008080, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008008, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + NOP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + NOP + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + NOP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + NOP + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + NOP + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + NOP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + NOP + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + NOP + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + NOP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + NOP + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + NOP + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + NOP + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + NOP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Revert the internal state to the user state + NOTQ 8(DI) + NOTQ 16(DI) + NOTQ 64(DI) + NOTQ 96(DI) + NOTQ 136(DI) + NOTQ 160(DI) + RET diff --git a/common/crypto/keccak/keccakf_arm64.go b/common/crypto/keccak/keccakf_arm64.go new file mode 100644 index 00000000000..f00e15aa3c6 --- /dev/null +++ b/common/crypto/keccak/keccakf_arm64.go @@ -0,0 +1,10 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build gc && !purego && arm64 + +package keccak + +//go:noescape +func keccakF1600(a *[25]uint64) \ No newline at end of file diff --git a/common/crypto/keccak/keccakf_arm64.s b/common/crypto/keccak/keccakf_arm64.s new file mode 100644 index 00000000000..b800afcc4a7 --- /dev/null +++ b/common/crypto/keccak/keccakf_arm64.s @@ -0,0 +1,5178 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build gc && !purego && arm64 + +#include "textflag.h" + +// Code generated by go run gen_arm64_asm.go. DO NOT EDIT. + +// Fully unrolled keccakF1600 for ARM64 +// func keccakF1600(a *[25]uint64) +TEXT ·keccakF1600(SB), NOSPLIT, $0-8 + MOVD a+0(FP), R0 + + + // ========== ROUND 0 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 48(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 96(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 144(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 192(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x0000000000000001, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 48(R0) + MOVD R20, 96(R0) + MOVD R21, 144(R0) + MOVD R22, 192(R0) + // Group 1 + MOVD 80(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 128(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 176(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 24(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 128(R0) + MOVD R20, 176(R0) + MOVD R21, 24(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 160(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 8(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 56(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 152(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 8(R0) + MOVD R20, 56(R0) + MOVD R21, 104(R0) + MOVD R22, 152(R0) + // Group 3 + MOVD 40(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 88(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 184(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 32(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 88(R0) + MOVD R20, 136(R0) + MOVD R21, 184(R0) + MOVD R22, 32(R0) + // Group 4 + MOVD 120(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 16(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 64(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 112(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 168(R0) + MOVD R20, 16(R0) + MOVD R21, 64(R0) + MOVD R22, 112(R0) + + // ========== ROUND 1 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 128(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 56(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 184(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 112(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x0000000000008082, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 128(R0) + MOVD R20, 56(R0) + MOVD R21, 184(R0) + MOVD R22, 112(R0) + // Group 1 + MOVD 160(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 88(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 16(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 144(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 88(R0) + MOVD R20, 16(R0) + MOVD R21, 144(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 120(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 48(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 176(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 32(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 48(R0) + MOVD R20, 176(R0) + MOVD R21, 104(R0) + MOVD R22, 32(R0) + // Group 3 + MOVD 80(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 8(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 64(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 192(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 8(R0) + MOVD R20, 136(R0) + MOVD R21, 64(R0) + MOVD R22, 192(R0) + // Group 4 + MOVD 40(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 96(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 24(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 152(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 168(R0) + MOVD R20, 96(R0) + MOVD R21, 24(R0) + MOVD R22, 152(R0) + + // ========== ROUND 2 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 88(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 176(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 64(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 152(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x800000000000808a, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 88(R0) + MOVD R20, 176(R0) + MOVD R21, 64(R0) + MOVD R22, 152(R0) + // Group 1 + MOVD 120(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 8(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 96(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 184(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 8(R0) + MOVD R20, 96(R0) + MOVD R21, 184(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 40(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 128(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 16(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 192(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 128(R0) + MOVD R20, 16(R0) + MOVD R21, 104(R0) + MOVD R22, 192(R0) + // Group 3 + MOVD 160(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 48(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 24(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 112(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 48(R0) + MOVD R20, 136(R0) + MOVD R21, 24(R0) + MOVD R22, 112(R0) + // Group 4 + MOVD 80(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 56(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 144(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 32(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 168(R0) + MOVD R20, 56(R0) + MOVD R21, 144(R0) + MOVD R22, 32(R0) + + // ========== ROUND 3 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 8(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 16(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 24(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 32(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000080008000, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 8(R0) + MOVD R20, 16(R0) + MOVD R21, 24(R0) + MOVD R22, 32(R0) + // Group 1 + MOVD 40(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 48(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 56(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 64(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 48(R0) + MOVD R20, 56(R0) + MOVD R21, 64(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 80(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 88(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 96(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 112(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 88(R0) + MOVD R20, 96(R0) + MOVD R21, 104(R0) + MOVD R22, 112(R0) + // Group 3 + MOVD 120(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 128(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 144(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 152(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 128(R0) + MOVD R20, 136(R0) + MOVD R21, 144(R0) + MOVD R22, 152(R0) + // Group 4 + MOVD 160(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 176(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 184(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 192(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 168(R0) + MOVD R20, 176(R0) + MOVD R21, 184(R0) + MOVD R22, 192(R0) + + // ========== ROUND 4 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 48(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 96(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 144(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 192(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x000000000000808b, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 48(R0) + MOVD R20, 96(R0) + MOVD R21, 144(R0) + MOVD R22, 192(R0) + // Group 1 + MOVD 80(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 128(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 176(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 24(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 128(R0) + MOVD R20, 176(R0) + MOVD R21, 24(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 160(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 8(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 56(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 152(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 8(R0) + MOVD R20, 56(R0) + MOVD R21, 104(R0) + MOVD R22, 152(R0) + // Group 3 + MOVD 40(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 88(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 184(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 32(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 88(R0) + MOVD R20, 136(R0) + MOVD R21, 184(R0) + MOVD R22, 32(R0) + // Group 4 + MOVD 120(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 16(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 64(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 112(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 168(R0) + MOVD R20, 16(R0) + MOVD R21, 64(R0) + MOVD R22, 112(R0) + + // ========== ROUND 5 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 128(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 56(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 184(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 112(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x0000000080000001, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 128(R0) + MOVD R20, 56(R0) + MOVD R21, 184(R0) + MOVD R22, 112(R0) + // Group 1 + MOVD 160(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 88(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 16(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 144(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 88(R0) + MOVD R20, 16(R0) + MOVD R21, 144(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 120(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 48(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 176(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 32(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 48(R0) + MOVD R20, 176(R0) + MOVD R21, 104(R0) + MOVD R22, 32(R0) + // Group 3 + MOVD 80(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 8(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 64(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 192(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 8(R0) + MOVD R20, 136(R0) + MOVD R21, 64(R0) + MOVD R22, 192(R0) + // Group 4 + MOVD 40(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 96(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 24(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 152(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 168(R0) + MOVD R20, 96(R0) + MOVD R21, 24(R0) + MOVD R22, 152(R0) + + // ========== ROUND 6 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 88(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 176(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 64(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 152(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000080008081, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 88(R0) + MOVD R20, 176(R0) + MOVD R21, 64(R0) + MOVD R22, 152(R0) + // Group 1 + MOVD 120(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 8(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 96(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 184(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 8(R0) + MOVD R20, 96(R0) + MOVD R21, 184(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 40(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 128(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 16(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 192(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 128(R0) + MOVD R20, 16(R0) + MOVD R21, 104(R0) + MOVD R22, 192(R0) + // Group 3 + MOVD 160(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 48(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 24(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 112(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 48(R0) + MOVD R20, 136(R0) + MOVD R21, 24(R0) + MOVD R22, 112(R0) + // Group 4 + MOVD 80(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 56(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 144(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 32(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 168(R0) + MOVD R20, 56(R0) + MOVD R21, 144(R0) + MOVD R22, 32(R0) + + // ========== ROUND 7 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 8(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 16(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 24(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 32(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000000008009, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 8(R0) + MOVD R20, 16(R0) + MOVD R21, 24(R0) + MOVD R22, 32(R0) + // Group 1 + MOVD 40(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 48(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 56(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 64(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 48(R0) + MOVD R20, 56(R0) + MOVD R21, 64(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 80(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 88(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 96(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 112(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 88(R0) + MOVD R20, 96(R0) + MOVD R21, 104(R0) + MOVD R22, 112(R0) + // Group 3 + MOVD 120(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 128(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 144(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 152(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 128(R0) + MOVD R20, 136(R0) + MOVD R21, 144(R0) + MOVD R22, 152(R0) + // Group 4 + MOVD 160(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 176(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 184(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 192(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 168(R0) + MOVD R20, 176(R0) + MOVD R21, 184(R0) + MOVD R22, 192(R0) + + // ========== ROUND 8 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 48(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 96(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 144(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 192(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x000000000000008a, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 48(R0) + MOVD R20, 96(R0) + MOVD R21, 144(R0) + MOVD R22, 192(R0) + // Group 1 + MOVD 80(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 128(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 176(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 24(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 128(R0) + MOVD R20, 176(R0) + MOVD R21, 24(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 160(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 8(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 56(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 152(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 8(R0) + MOVD R20, 56(R0) + MOVD R21, 104(R0) + MOVD R22, 152(R0) + // Group 3 + MOVD 40(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 88(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 184(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 32(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 88(R0) + MOVD R20, 136(R0) + MOVD R21, 184(R0) + MOVD R22, 32(R0) + // Group 4 + MOVD 120(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 16(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 64(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 112(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 168(R0) + MOVD R20, 16(R0) + MOVD R21, 64(R0) + MOVD R22, 112(R0) + + // ========== ROUND 9 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 128(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 56(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 184(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 112(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x0000000000000088, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 128(R0) + MOVD R20, 56(R0) + MOVD R21, 184(R0) + MOVD R22, 112(R0) + // Group 1 + MOVD 160(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 88(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 16(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 144(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 88(R0) + MOVD R20, 16(R0) + MOVD R21, 144(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 120(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 48(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 176(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 32(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 48(R0) + MOVD R20, 176(R0) + MOVD R21, 104(R0) + MOVD R22, 32(R0) + // Group 3 + MOVD 80(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 8(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 64(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 192(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 8(R0) + MOVD R20, 136(R0) + MOVD R21, 64(R0) + MOVD R22, 192(R0) + // Group 4 + MOVD 40(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 96(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 24(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 152(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 168(R0) + MOVD R20, 96(R0) + MOVD R21, 24(R0) + MOVD R22, 152(R0) + + // ========== ROUND 10 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 88(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 176(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 64(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 152(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x0000000080008009, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 88(R0) + MOVD R20, 176(R0) + MOVD R21, 64(R0) + MOVD R22, 152(R0) + // Group 1 + MOVD 120(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 8(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 96(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 184(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 8(R0) + MOVD R20, 96(R0) + MOVD R21, 184(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 40(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 128(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 16(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 192(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 128(R0) + MOVD R20, 16(R0) + MOVD R21, 104(R0) + MOVD R22, 192(R0) + // Group 3 + MOVD 160(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 48(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 24(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 112(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 48(R0) + MOVD R20, 136(R0) + MOVD R21, 24(R0) + MOVD R22, 112(R0) + // Group 4 + MOVD 80(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 56(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 144(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 32(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 168(R0) + MOVD R20, 56(R0) + MOVD R21, 144(R0) + MOVD R22, 32(R0) + + // ========== ROUND 11 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 8(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 16(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 24(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 32(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x000000008000000a, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 8(R0) + MOVD R20, 16(R0) + MOVD R21, 24(R0) + MOVD R22, 32(R0) + // Group 1 + MOVD 40(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 48(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 56(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 64(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 48(R0) + MOVD R20, 56(R0) + MOVD R21, 64(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 80(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 88(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 96(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 112(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 88(R0) + MOVD R20, 96(R0) + MOVD R21, 104(R0) + MOVD R22, 112(R0) + // Group 3 + MOVD 120(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 128(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 144(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 152(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 128(R0) + MOVD R20, 136(R0) + MOVD R21, 144(R0) + MOVD R22, 152(R0) + // Group 4 + MOVD 160(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 176(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 184(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 192(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 168(R0) + MOVD R20, 176(R0) + MOVD R21, 184(R0) + MOVD R22, 192(R0) + + // ========== ROUND 12 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 48(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 96(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 144(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 192(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x000000008000808b, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 48(R0) + MOVD R20, 96(R0) + MOVD R21, 144(R0) + MOVD R22, 192(R0) + // Group 1 + MOVD 80(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 128(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 176(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 24(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 128(R0) + MOVD R20, 176(R0) + MOVD R21, 24(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 160(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 8(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 56(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 152(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 8(R0) + MOVD R20, 56(R0) + MOVD R21, 104(R0) + MOVD R22, 152(R0) + // Group 3 + MOVD 40(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 88(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 184(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 32(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 88(R0) + MOVD R20, 136(R0) + MOVD R21, 184(R0) + MOVD R22, 32(R0) + // Group 4 + MOVD 120(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 16(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 64(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 112(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 168(R0) + MOVD R20, 16(R0) + MOVD R21, 64(R0) + MOVD R22, 112(R0) + + // ========== ROUND 13 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 128(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 56(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 184(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 112(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x800000000000008b, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 128(R0) + MOVD R20, 56(R0) + MOVD R21, 184(R0) + MOVD R22, 112(R0) + // Group 1 + MOVD 160(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 88(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 16(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 144(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 88(R0) + MOVD R20, 16(R0) + MOVD R21, 144(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 120(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 48(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 176(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 32(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 48(R0) + MOVD R20, 176(R0) + MOVD R21, 104(R0) + MOVD R22, 32(R0) + // Group 3 + MOVD 80(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 8(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 64(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 192(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 8(R0) + MOVD R20, 136(R0) + MOVD R21, 64(R0) + MOVD R22, 192(R0) + // Group 4 + MOVD 40(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 96(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 24(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 152(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 168(R0) + MOVD R20, 96(R0) + MOVD R21, 24(R0) + MOVD R22, 152(R0) + + // ========== ROUND 14 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 88(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 176(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 64(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 152(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000000008089, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 88(R0) + MOVD R20, 176(R0) + MOVD R21, 64(R0) + MOVD R22, 152(R0) + // Group 1 + MOVD 120(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 8(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 96(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 184(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 8(R0) + MOVD R20, 96(R0) + MOVD R21, 184(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 40(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 128(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 16(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 192(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 128(R0) + MOVD R20, 16(R0) + MOVD R21, 104(R0) + MOVD R22, 192(R0) + // Group 3 + MOVD 160(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 48(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 24(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 112(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 48(R0) + MOVD R20, 136(R0) + MOVD R21, 24(R0) + MOVD R22, 112(R0) + // Group 4 + MOVD 80(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 56(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 144(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 32(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 168(R0) + MOVD R20, 56(R0) + MOVD R21, 144(R0) + MOVD R22, 32(R0) + + // ========== ROUND 15 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 8(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 16(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 24(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 32(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000000008003, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 8(R0) + MOVD R20, 16(R0) + MOVD R21, 24(R0) + MOVD R22, 32(R0) + // Group 1 + MOVD 40(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 48(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 56(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 64(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 48(R0) + MOVD R20, 56(R0) + MOVD R21, 64(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 80(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 88(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 96(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 112(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 88(R0) + MOVD R20, 96(R0) + MOVD R21, 104(R0) + MOVD R22, 112(R0) + // Group 3 + MOVD 120(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 128(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 144(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 152(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 128(R0) + MOVD R20, 136(R0) + MOVD R21, 144(R0) + MOVD R22, 152(R0) + // Group 4 + MOVD 160(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 176(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 184(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 192(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 168(R0) + MOVD R20, 176(R0) + MOVD R21, 184(R0) + MOVD R22, 192(R0) + + // ========== ROUND 16 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 48(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 96(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 144(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 192(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000000008002, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 48(R0) + MOVD R20, 96(R0) + MOVD R21, 144(R0) + MOVD R22, 192(R0) + // Group 1 + MOVD 80(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 128(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 176(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 24(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 128(R0) + MOVD R20, 176(R0) + MOVD R21, 24(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 160(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 8(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 56(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 152(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 8(R0) + MOVD R20, 56(R0) + MOVD R21, 104(R0) + MOVD R22, 152(R0) + // Group 3 + MOVD 40(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 88(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 184(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 32(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 88(R0) + MOVD R20, 136(R0) + MOVD R21, 184(R0) + MOVD R22, 32(R0) + // Group 4 + MOVD 120(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 16(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 64(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 112(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 168(R0) + MOVD R20, 16(R0) + MOVD R21, 64(R0) + MOVD R22, 112(R0) + + // ========== ROUND 17 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 128(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 56(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 184(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 112(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000000000080, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 128(R0) + MOVD R20, 56(R0) + MOVD R21, 184(R0) + MOVD R22, 112(R0) + // Group 1 + MOVD 160(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 88(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 16(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 144(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 88(R0) + MOVD R20, 16(R0) + MOVD R21, 144(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 120(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 48(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 176(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 32(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 48(R0) + MOVD R20, 176(R0) + MOVD R21, 104(R0) + MOVD R22, 32(R0) + // Group 3 + MOVD 80(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 8(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 64(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 192(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 8(R0) + MOVD R20, 136(R0) + MOVD R21, 64(R0) + MOVD R22, 192(R0) + // Group 4 + MOVD 40(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 96(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 24(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 152(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 168(R0) + MOVD R20, 96(R0) + MOVD R21, 24(R0) + MOVD R22, 152(R0) + + // ========== ROUND 18 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 88(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 176(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 64(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 152(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x000000000000800a, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 88(R0) + MOVD R20, 176(R0) + MOVD R21, 64(R0) + MOVD R22, 152(R0) + // Group 1 + MOVD 120(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 8(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 96(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 184(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 8(R0) + MOVD R20, 96(R0) + MOVD R21, 184(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 40(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 128(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 16(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 192(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 128(R0) + MOVD R20, 16(R0) + MOVD R21, 104(R0) + MOVD R22, 192(R0) + // Group 3 + MOVD 160(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 48(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 24(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 112(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 48(R0) + MOVD R20, 136(R0) + MOVD R21, 24(R0) + MOVD R22, 112(R0) + // Group 4 + MOVD 80(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 56(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 144(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 32(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 168(R0) + MOVD R20, 56(R0) + MOVD R21, 144(R0) + MOVD R22, 32(R0) + + // ========== ROUND 19 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 8(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 16(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 24(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 32(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x800000008000000a, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 8(R0) + MOVD R20, 16(R0) + MOVD R21, 24(R0) + MOVD R22, 32(R0) + // Group 1 + MOVD 40(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 48(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 56(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 64(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 48(R0) + MOVD R20, 56(R0) + MOVD R21, 64(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 80(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 88(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 96(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 112(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 88(R0) + MOVD R20, 96(R0) + MOVD R21, 104(R0) + MOVD R22, 112(R0) + // Group 3 + MOVD 120(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 128(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 144(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 152(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 128(R0) + MOVD R20, 136(R0) + MOVD R21, 144(R0) + MOVD R22, 152(R0) + // Group 4 + MOVD 160(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 176(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 184(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 192(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 168(R0) + MOVD R20, 176(R0) + MOVD R21, 184(R0) + MOVD R22, 192(R0) + + // ========== ROUND 20 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 48(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 96(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 144(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 192(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000080008081, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 48(R0) + MOVD R20, 96(R0) + MOVD R21, 144(R0) + MOVD R22, 192(R0) + // Group 1 + MOVD 80(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 128(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 176(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 24(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 128(R0) + MOVD R20, 176(R0) + MOVD R21, 24(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 160(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 8(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 56(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 152(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 8(R0) + MOVD R20, 56(R0) + MOVD R21, 104(R0) + MOVD R22, 152(R0) + // Group 3 + MOVD 40(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 88(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 184(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 32(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 88(R0) + MOVD R20, 136(R0) + MOVD R21, 184(R0) + MOVD R22, 32(R0) + // Group 4 + MOVD 120(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 16(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 64(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 112(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 168(R0) + MOVD R20, 16(R0) + MOVD R21, 64(R0) + MOVD R22, 112(R0) + + // ========== ROUND 21 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 128(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 56(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 184(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 112(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000000008080, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 128(R0) + MOVD R20, 56(R0) + MOVD R21, 184(R0) + MOVD R22, 112(R0) + // Group 1 + MOVD 160(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 88(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 16(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 144(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 88(R0) + MOVD R20, 16(R0) + MOVD R21, 144(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 120(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 48(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 176(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 32(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 48(R0) + MOVD R20, 176(R0) + MOVD R21, 104(R0) + MOVD R22, 32(R0) + // Group 3 + MOVD 80(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 8(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 64(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 192(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 8(R0) + MOVD R20, 136(R0) + MOVD R21, 64(R0) + MOVD R22, 192(R0) + // Group 4 + MOVD 40(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 96(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 24(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 152(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 168(R0) + MOVD R20, 96(R0) + MOVD R21, 24(R0) + MOVD R22, 152(R0) + + // ========== ROUND 22 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 88(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 176(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 64(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 152(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x0000000080000001, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 88(R0) + MOVD R20, 176(R0) + MOVD R21, 64(R0) + MOVD R22, 152(R0) + // Group 1 + MOVD 120(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 8(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 96(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 184(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 8(R0) + MOVD R20, 96(R0) + MOVD R21, 184(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 40(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 128(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 16(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 192(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 128(R0) + MOVD R20, 16(R0) + MOVD R21, 104(R0) + MOVD R22, 192(R0) + // Group 3 + MOVD 160(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 48(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 24(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 112(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 48(R0) + MOVD R20, 136(R0) + MOVD R21, 24(R0) + MOVD R22, 112(R0) + // Group 4 + MOVD 80(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 56(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 144(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 32(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 168(R0) + MOVD R20, 56(R0) + MOVD R21, 144(R0) + MOVD R22, 32(R0) + + // ========== ROUND 23 ========== + // Theta + MOVD 0(R0), R1 + MOVD 40(R0), R16 + EOR R16, R1, R1 + MOVD 80(R0), R16 + EOR R16, R1, R1 + MOVD 120(R0), R16 + EOR R16, R1, R1 + MOVD 160(R0), R16 + EOR R16, R1, R1 + MOVD 8(R0), R2 + MOVD 48(R0), R16 + EOR R16, R2, R2 + MOVD 88(R0), R16 + EOR R16, R2, R2 + MOVD 128(R0), R16 + EOR R16, R2, R2 + MOVD 168(R0), R16 + EOR R16, R2, R2 + MOVD 16(R0), R3 + MOVD 56(R0), R16 + EOR R16, R3, R3 + MOVD 96(R0), R16 + EOR R16, R3, R3 + MOVD 136(R0), R16 + EOR R16, R3, R3 + MOVD 176(R0), R16 + EOR R16, R3, R3 + MOVD 24(R0), R4 + MOVD 64(R0), R16 + EOR R16, R4, R4 + MOVD 104(R0), R16 + EOR R16, R4, R4 + MOVD 144(R0), R16 + EOR R16, R4, R4 + MOVD 184(R0), R16 + EOR R16, R4, R4 + MOVD 32(R0), R5 + MOVD 72(R0), R16 + EOR R16, R5, R5 + MOVD 112(R0), R16 + EOR R16, R5, R5 + MOVD 152(R0), R16 + EOR R16, R5, R5 + MOVD 192(R0), R16 + EOR R16, R5, R5 + // D values + ROR $63, R2, R6 + EOR R5, R6, R6 + ROR $63, R3, R7 + EOR R1, R7, R7 + ROR $63, R4, R8 + EOR R2, R8, R8 + ROR $63, R5, R9 + EOR R3, R9, R9 + ROR $63, R1, R10 + EOR R4, R10, R10 + // Group 0 + MOVD 0(R0), R11 + EOR R6, R11, R11 + MOVD 8(R0), R12 + EOR R7, R12, R12 + ROR $20, R12, R12 + MOVD 16(R0), R13 + EOR R8, R13, R13 + ROR $21, R13, R13 + MOVD 24(R0), R14 + EOR R9, R14, R14 + ROR $43, R14, R14 + MOVD 32(R0), R15 + EOR R10, R15, R15 + ROR $50, R15, R15 + BIC R12, R13, R17 + EOR R11, R17, R17 + MOVD $0x8000000080008008, R2 + EOR R2, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 0(R0) + MOVD R19, 8(R0) + MOVD R20, 16(R0) + MOVD R21, 24(R0) + MOVD R22, 32(R0) + // Group 1 + MOVD 40(R0), R13 + EOR R6, R13, R13 + ROR $61, R13, R13 + MOVD 48(R0), R14 + EOR R7, R14, R14 + ROR $19, R14, R14 + MOVD 56(R0), R15 + EOR R8, R15, R15 + ROR $3, R15, R15 + MOVD 64(R0), R11 + EOR R9, R11, R11 + ROR $36, R11, R11 + MOVD 72(R0), R12 + EOR R10, R12, R12 + ROR $44, R12, R12 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 40(R0) + MOVD R19, 48(R0) + MOVD R20, 56(R0) + MOVD R21, 64(R0) + MOVD R22, 72(R0) + // Group 2 + MOVD 80(R0), R15 + EOR R6, R15, R15 + ROR $46, R15, R15 + MOVD 88(R0), R11 + EOR R7, R11, R11 + ROR $63, R11, R11 + MOVD 96(R0), R12 + EOR R8, R12, R12 + ROR $58, R12, R12 + MOVD 104(R0), R13 + EOR R9, R13, R13 + ROR $39, R13, R13 + MOVD 112(R0), R14 + EOR R10, R14, R14 + ROR $56, R14, R14 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 80(R0) + MOVD R19, 88(R0) + MOVD R20, 96(R0) + MOVD R21, 104(R0) + MOVD R22, 112(R0) + // Group 3 + MOVD 120(R0), R12 + EOR R6, R12, R12 + ROR $28, R12, R12 + MOVD 128(R0), R13 + EOR R7, R13, R13 + ROR $54, R13, R13 + MOVD 136(R0), R14 + EOR R8, R14, R14 + ROR $49, R14, R14 + MOVD 144(R0), R15 + EOR R9, R15, R15 + ROR $8, R15, R15 + MOVD 152(R0), R11 + EOR R10, R11, R11 + ROR $37, R11, R11 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 120(R0) + MOVD R19, 128(R0) + MOVD R20, 136(R0) + MOVD R21, 144(R0) + MOVD R22, 152(R0) + // Group 4 + MOVD 160(R0), R14 + EOR R6, R14, R14 + ROR $23, R14, R14 + MOVD 168(R0), R15 + EOR R7, R15, R15 + ROR $62, R15, R15 + MOVD 176(R0), R11 + EOR R8, R11, R11 + ROR $2, R11, R11 + MOVD 184(R0), R12 + EOR R9, R12, R12 + ROR $9, R12, R12 + MOVD 192(R0), R13 + EOR R10, R13, R13 + ROR $25, R13, R13 + BIC R12, R13, R17 + EOR R11, R17, R17 + BIC R13, R14, R19 + EOR R12, R19, R19 + BIC R14, R15, R20 + EOR R13, R20, R20 + BIC R15, R11, R21 + EOR R14, R21, R21 + BIC R11, R12, R22 + EOR R15, R22, R22 + MOVD R17, 160(R0) + MOVD R19, 168(R0) + MOVD R20, 176(R0) + MOVD R21, 184(R0) + MOVD R22, 192(R0) + + RET + \ No newline at end of file diff --git a/common/crypto/keccak/sha3.go b/common/crypto/keccak/sha3.go new file mode 100644 index 00000000000..a554323244b --- /dev/null +++ b/common/crypto/keccak/sha3.go @@ -0,0 +1,244 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package keccak + +import ( + "crypto/subtle" + "encoding/binary" + "errors" + "unsafe" + + "golang.org/x/sys/cpu" +) + +// spongeDirection indicates the direction bytes are flowing through the sponge. +type spongeDirection int + +const ( + // spongeAbsorbing indicates that the sponge is absorbing input. + spongeAbsorbing spongeDirection = iota + // spongeSqueezing indicates that the sponge is being squeezed. + spongeSqueezing +) + +type state struct { + a [1600 / 8]byte // main state of the hash + + // a[n:rate] is the buffer. If absorbing, it's the remaining space to XOR + // into before running the permutation. If squeezing, it's the remaining + // output to produce before running the permutation. + n, rate int + + // dsbyte contains the "domain separation" bits and the first bit of + // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the + // SHA-3 and SHAKE functions by appending bitstrings to the message. + // Using a little-endian bit-ordering convention, these are "01" for SHA-3 + // and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the + // padding rule from section 5.1 is applied to pad the message to a multiple + // of the rate, which involves adding a "1" bit, zero or more "0" bits, and + // a final "1" bit. We merge the first "1" bit from the padding into dsbyte, + // giving 00000110b (0x06) and 00011111b (0x1f). + // [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf + // "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and + // Extendable-Output Functions (May 2014)" + dsbyte byte + + outputLen int // the default output size in bytes + state spongeDirection // whether the sponge is absorbing or squeezing +} + +// BlockSize returns the rate of sponge underlying this hash function. +func (d *state) BlockSize() int { return d.rate } + +// Size returns the output size of the hash function in bytes. +func (d *state) Size() int { return d.outputLen } + +// Reset clears the internal state by zeroing the sponge state and +// the buffer indexes, and setting Sponge.state to absorbing. +func (d *state) Reset() { + // Zero the permutation's state. + for i := range d.a { + d.a[i] = 0 + } + d.state = spongeAbsorbing + d.n = 0 +} + +func (d *state) clone() *state { + ret := *d + return &ret +} + +// permute applies the KeccakF-1600 permutation. +func (d *state) permute() { + var a *[25]uint64 + if cpu.IsBigEndian { + a = new([25]uint64) + for i := range a { + a[i] = binary.LittleEndian.Uint64(d.a[i*8:]) + } + } else { + a = (*[25]uint64)(unsafe.Pointer(&d.a)) + } + + keccakF1600(a) + d.n = 0 + + if cpu.IsBigEndian { + for i := range a { + binary.LittleEndian.PutUint64(d.a[i*8:], a[i]) + } + } +} + +// pads appends the domain separation bits in dsbyte, applies +// the multi-bitrate 10..1 padding rule, and permutes the state. +func (d *state) padAndPermute() { + // Pad with this instance's domain-separator bits. We know that there's + // at least one byte of space in the sponge because, if it were full, + // permute would have been called to empty it. dsbyte also contains the + // first one bit for the padding. See the comment in the state struct. + d.a[d.n] ^= d.dsbyte + // This adds the final one bit for the padding. Because of the way that + // bits are numbered from the LSB upwards, the final bit is the MSB of + // the last byte. + d.a[d.rate-1] ^= 0x80 + // Apply the permutation + d.permute() + d.state = spongeSqueezing +} + +// Write absorbs more data into the hash's state. It panics if any +// output has already been read. +func (d *state) Write(p []byte) (n int, err error) { + if d.state != spongeAbsorbing { + panic("sha3: Write after Read") + } + + n = len(p) + + for len(p) > 0 { + x := subtle.XORBytes(d.a[d.n:d.rate], d.a[d.n:d.rate], p) + d.n += x + p = p[x:] + + // If the sponge is full, apply the permutation. + if d.n == d.rate { + d.permute() + } + } + + return +} + +// Read squeezes an arbitrary number of bytes from the sponge. +func (d *state) Read(out []byte) (n int, err error) { + // If we're still absorbing, pad and apply the permutation. + if d.state == spongeAbsorbing { + d.padAndPermute() + } + + n = len(out) + + // Now, do the squeezing. + for len(out) > 0 { + // Apply the permutation if we've squeezed the sponge dry. + if d.n == d.rate { + d.permute() + } + + x := copy(out, d.a[d.n:d.rate]) + d.n += x + out = out[x:] + } + + return +} + +// Sum applies padding to the hash state and then squeezes out the desired +// number of output bytes. It panics if any output has already been read. +func (d *state) Sum(in []byte) []byte { + if d.state != spongeAbsorbing { + panic("sha3: Sum after Read") + } + + // Make a copy of the original hash so that caller can keep writing + // and summing. + dup := d.clone() + hash := make([]byte, dup.outputLen, 64) // explicit cap to allow stack allocation + dup.Read(hash) + return append(in, hash...) +} + +const ( + magicSHA3 = "sha\x08" + magicShake = "sha\x09" + magicCShake = "sha\x0a" + magicKeccak = "sha\x0b" + // magic || rate || main state || n || sponge direction + marshaledSize = len(magicSHA3) + 1 + 200 + 1 + 1 +) + +func (d *state) MarshalBinary() ([]byte, error) { + return d.AppendBinary(make([]byte, 0, marshaledSize)) +} + +func (d *state) AppendBinary(b []byte) ([]byte, error) { + switch d.dsbyte { + case dsbyteSHA3: + b = append(b, magicSHA3...) + case dsbyteShake: + b = append(b, magicShake...) + case dsbyteCShake: + b = append(b, magicCShake...) + case dsbyteKeccak: + b = append(b, magicKeccak...) + default: + panic("unknown dsbyte") + } + // rate is at most 168, and n is at most rate. + b = append(b, byte(d.rate)) + b = append(b, d.a[:]...) + b = append(b, byte(d.n), byte(d.state)) + return b, nil +} + +func (d *state) UnmarshalBinary(b []byte) error { + if len(b) != marshaledSize { + return errors.New("sha3: invalid hash state") + } + + magic := string(b[:len(magicSHA3)]) + b = b[len(magicSHA3):] + switch { + case magic == magicSHA3 && d.dsbyte == dsbyteSHA3: + case magic == magicShake && d.dsbyte == dsbyteShake: + case magic == magicCShake && d.dsbyte == dsbyteCShake: + case magic == magicKeccak && d.dsbyte == dsbyteKeccak: + default: + return errors.New("sha3: invalid hash state identifier") + } + + rate := int(b[0]) + b = b[1:] + if rate != d.rate { + return errors.New("sha3: invalid hash state function") + } + + copy(d.a[:], b) + b = b[len(d.a):] + + n, state := int(b[0]), spongeDirection(b[1]) + if n > d.rate { + return errors.New("sha3: invalid hash state") + } + d.n = n + if state != spongeAbsorbing && state != spongeSqueezing { + return errors.New("sha3: invalid hash state") + } + d.state = state + + return nil +} diff --git a/common/crypto/keccak/sha3_test.go b/common/crypto/keccak/sha3_test.go new file mode 100644 index 00000000000..aec48ccfbaf --- /dev/null +++ b/common/crypto/keccak/sha3_test.go @@ -0,0 +1,210 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package keccak + +// Tests include all the ShortMsgKATs provided by the Keccak team at +// https://github.com/gvanas/KeccakCodePackage +// +// They only include the zero-bit case of the bitwise testvectors +// published by NIST in the draft of FIPS-202. + +import ( + "bytes" + "compress/flate" + "encoding" + "encoding/hex" + "encoding/json" + "hash" + "math/rand" + "os" + "strings" + "testing" +) + +const ( + testString = "brekeccakkeccak koax koax" + katFilename = "testdata/keccakKats.json.deflate" +) + +// testDigests contains functions returning hash.Hash instances +// with output-length equal to the KAT length for SHA-3, Keccak +// and SHAKE instances. +var testDigests = map[string]func() hash.Hash{ + "Keccak-256": NewLegacyKeccak256, + "Keccak-512": NewLegacyKeccak512, +} + +// decodeHex converts a hex-encoded string into a raw byte string. +func decodeHex(s string) []byte { + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +// structs used to marshal JSON test-cases. +type KeccakKats struct { + Kats map[string][]struct { + Digest string `json:"digest"` + Length int64 `json:"length"` + Message string `json:"message"` + + // Defined only for cSHAKE + N string `json:"N"` + S string `json:"S"` + } +} + +// TestKeccakKats tests the SHA-3 and Shake implementations against all the +// ShortMsgKATs from https://github.com/gvanas/KeccakCodePackage +// (The testvectors are stored in keccakKats.json.deflate due to their length.) +func TestKeccakKats(t *testing.T) { + // Read the KATs. + deflated, err := os.Open(katFilename) + if err != nil { + t.Errorf("error opening %s: %s", katFilename, err) + } + file := flate.NewReader(deflated) + dec := json.NewDecoder(file) + var katSet KeccakKats + err = dec.Decode(&katSet) + if err != nil { + t.Errorf("error decoding KATs: %s", err) + } + + for algo, function := range testDigests { + d := function() + for _, kat := range katSet.Kats[algo] { + d.Reset() + in, err := hex.DecodeString(kat.Message) + if err != nil { + t.Errorf("error decoding KAT: %s", err) + } + d.Write(in[:kat.Length/8]) + got := strings.ToUpper(hex.EncodeToString(d.Sum(nil))) + if got != kat.Digest { + t.Errorf("function=%s, length=%d\nmessage:\n %s\ngot:\n %s\nwanted:\n %s", + algo, kat.Length, kat.Message, got, kat.Digest) + t.Logf("wanted %+v", kat) + t.FailNow() + } + continue + } + } +} + +// TestKeccak does a basic test of the non-standardized Keccak hash functions. +func TestKeccak(t *testing.T) { + tests := []struct { + fn func() hash.Hash + data []byte + want string + }{ + { + NewLegacyKeccak256, + []byte("abc"), + "4e03657aea45a94fc7d47ba826c8d667c0d1e6e33a64a036ec44f58fa12d6c45", + }, + { + NewLegacyKeccak512, + []byte("abc"), + "18587dc2ea106b9a1563e32b3312421ca164c7f1f07bc922a9c83d77cea3a1e5d0c69910739025372dc14ac9642629379540c17e2a65b19d77aa511a9d00bb96", + }, + } + + for _, u := range tests { + h := u.fn() + h.Write(u.data) + got := h.Sum(nil) + want := decodeHex(u.want) + if !bytes.Equal(got, want) { + t.Errorf("unexpected hash for size %d: got '%x' want '%s'", h.Size()*8, got, u.want) + } + } +} + +// TestUnalignedWrite tests that writing data in an arbitrary pattern with +// small input buffers. +func TestUnalignedWrite(t *testing.T) { + buf := sequentialBytes(0x10000) + for alg, df := range testDigests { + d := df() + d.Reset() + d.Write(buf) + want := d.Sum(nil) + d.Reset() + for i := 0; i < len(buf); { + // Cycle through offsets which make a 137 byte sequence. + // Because 137 is prime this sequence should exercise all corner cases. + offsets := [17]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1} + for _, j := range offsets { + if v := len(buf) - i; v < j { + j = v + } + d.Write(buf[i : i+j]) + i += j + } + } + got := d.Sum(nil) + if !bytes.Equal(got, want) { + t.Errorf("Unaligned writes, alg=%s\ngot %q, want %q", alg, got, want) + } + } +} + +// sequentialBytes produces a buffer of size consecutive bytes 0x00, 0x01, ..., used for testing. +// +// The alignment of each slice is intentionally randomized to detect alignment +// issues in the implementation. See https://golang.org/issue/37644. +// Ideally, the compiler should fuzz the alignment itself. +// (See https://golang.org/issue/35128.) +func sequentialBytes(size int) []byte { + alignmentOffset := rand.Intn(8) + result := make([]byte, size+alignmentOffset)[alignmentOffset:] + for i := range result { + result[i] = byte(i) + } + return result +} + +func TestMarshalUnmarshal(t *testing.T) { + t.Run("Keccak-256", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak256()) }) + t.Run("Keccak-512", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak512()) }) +} + +// TODO(filippo): move this to crypto/internal/cryptotest. +func testMarshalUnmarshal(t *testing.T, h hash.Hash) { + buf := make([]byte, 200) + rand.Read(buf) + n := rand.Intn(200) + h.Write(buf) + want := h.Sum(nil) + h.Reset() + h.Write(buf[:n]) + b, err := h.(encoding.BinaryMarshaler).MarshalBinary() + if err != nil { + t.Errorf("MarshalBinary: %v", err) + } + h.Write(make([]byte, 200)) + if err := h.(encoding.BinaryUnmarshaler).UnmarshalBinary(b); err != nil { + t.Errorf("UnmarshalBinary: %v", err) + } + h.Write(buf[n:]) + got := h.Sum(nil) + if !bytes.Equal(got, want) { + t.Errorf("got %x, want %x", got, want) + } +} + +// BenchmarkPermutationFunction measures the speed of the permutation function +// with no input data. +func BenchmarkPermutationFunction(b *testing.B) { + b.SetBytes(int64(200)) + var lanes [25]uint64 + for i := 0; i < b.N; i++ { + keccakF1600(&lanes) + } +} diff --git a/common/crypto/keccak/testdata/keccakKats.json.deflate b/common/crypto/keccak/testdata/keccakKats.json.deflate new file mode 100644 index 00000000000..7a94c2f8bce Binary files /dev/null and b/common/crypto/keccak/testdata/keccakKats.json.deflate differ diff --git a/common/hasher.go b/common/hasher.go index d05f6429652..fc65d742cbe 100644 --- a/common/hasher.go +++ b/common/hasher.go @@ -20,7 +20,7 @@ import ( "hash" "sync" - "golang.org/x/crypto/sha3" + "github.com/erigontech/erigon/common/crypto/keccak" ) // keccakState wraps sha3.state. In addition to the usual hash methods, it also supports @@ -37,7 +37,7 @@ type Hasher struct { var hashersPool = sync.Pool{ New: func() any { - return &Hasher{Sha: sha3.NewLegacyKeccak256().(keccakState)} + return &Hasher{Sha: keccak.NewLegacyKeccak256().(keccakState)} }, } diff --git a/db/kv/kvcache/cache.go b/db/kv/kvcache/cache.go index f574369d0b9..af66b89e685 100644 --- a/db/kv/kvcache/cache.go +++ b/db/kv/kvcache/cache.go @@ -30,9 +30,9 @@ import ( "github.com/c2h5oh/datasize" btree2 "github.com/tidwall/btree" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/db/kv" "github.com/erigontech/erigon/diagnostics/metrics" "github.com/erigontech/erigon/node/gointerfaces" @@ -198,7 +198,7 @@ func New(cfg CoherentConfig) *Coherent { roots: map[uint64]*CoherentRoot{}, stateEvict: &ThreadSafeEvictionList{l: NewList()}, codeEvict: &ThreadSafeEvictionList{l: NewList()}, - hasher: sha3.NewLegacyKeccak256(), + hasher: keccak.NewLegacyKeccak256(), cfg: cfg, miss: metrics.GetOrCreateCounter(fmt.Sprintf(`cache_total{result="miss",name="%s"}`, cfg.MetricsLabel)), hits: metrics.GetOrCreateCounter(fmt.Sprintf(`cache_total{result="hit",name="%s"}`, cfg.MetricsLabel)), diff --git a/db/rawdb/accessors_chain_test.go b/db/rawdb/accessors_chain_test.go index e44c6de8587..9db7935e1ab 100644 --- a/db/rawdb/accessors_chain_test.go +++ b/db/rawdb/accessors_chain_test.go @@ -27,10 +27,10 @@ import ( "github.com/holiman/uint256" "github.com/stretchr/testify/require" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/empty" "github.com/erigontech/erigon/common/log/v3" "github.com/erigontech/erigon/common/u256" @@ -346,7 +346,7 @@ func TestHeaderStorage(t *testing.T) { if entry := rawdb.ReadHeaderRLP(tx, header.Hash(), header.Number.Uint64()); entry == nil { t.Fatalf("Stored header RLP not found") } else { - hasher := sha3.NewLegacyKeccak256() + hasher := keccak.NewLegacyKeccak256() hasher.Write(entry) if hash := common.BytesToHash(hasher.Sum(nil)); hash != header.Hash() { @@ -391,7 +391,7 @@ func TestBodyStorage(t *testing.T) { } // Create a test body to move around the database and make sure it's really new - hasher := sha3.NewLegacyKeccak256() + hasher := keccak.NewLegacyKeccak256() _ = rlp.Encode(hasher, body) hash := common.BytesToHash(hasher.Sum(nil)) header := &types.Header{Number: common.Big1} @@ -415,7 +415,7 @@ func TestBodyStorage(t *testing.T) { if err != nil { log.Error("ReadBodyRLP failed", "err", err) } - hasher := sha3.NewLegacyKeccak256() + hasher := keccak.NewLegacyKeccak256() hasher.Write(bodyRlp) if calc := common.BytesToHash(hasher.Sum(nil)); calc != hash { diff --git a/execution/commitment/commitment.go b/execution/commitment/commitment.go index 70d379c73d3..ff437c112f8 100644 --- a/execution/commitment/commitment.go +++ b/execution/commitment/commitment.go @@ -33,10 +33,10 @@ import ( "github.com/google/btree" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/empty" "github.com/erigontech/erigon/common/length" "github.com/erigontech/erigon/common/log/v3" @@ -1111,7 +1111,7 @@ func (branchData BranchData) Validate(branchKey []byte) error { if err = validateAfterMap(afterMap, row); err != nil { return err } - if err = validatePlainKeys(branchKey, row, sha3.NewLegacyKeccak256().(keccakState)); err != nil { + if err = validatePlainKeys(branchKey, row, keccak.NewLegacyKeccak256().(keccakState)); err != nil { return err } return nil diff --git a/execution/commitment/hex_patricia_hashed.go b/execution/commitment/hex_patricia_hashed.go index b7611711189..ab621114f9b 100644 --- a/execution/commitment/hex_patricia_hashed.go +++ b/execution/commitment/hex_patricia_hashed.go @@ -32,10 +32,9 @@ import ( "sync/atomic" "time" - "golang.org/x/crypto/sha3" - "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/dbg" "github.com/erigontech/erigon/common/empty" "github.com/erigontech/erigon/common/length" @@ -132,8 +131,8 @@ func (hph *HexPatriciaHashed) SpawnSubTrie(ctx PatriciaContext, forNibble int) * func NewHexPatriciaHashed(accountKeyLen int16, ctx PatriciaContext) *HexPatriciaHashed { hph := &HexPatriciaHashed{ ctx: ctx, - keccak: sha3.NewLegacyKeccak256().(keccakState), - keccak2: sha3.NewLegacyKeccak256().(keccakState), + keccak: keccak.NewLegacyKeccak256().(keccakState), + keccak2: keccak.NewLegacyKeccak256().(keccakState), accountKeyLen: accountKeyLen, auxBuffer: bytes.NewBuffer(make([]byte, 8192)), hadToLoadL: make(map[uint64]skipStat), diff --git a/execution/commitment/patricia_state_mock_test.go b/execution/commitment/patricia_state_mock_test.go index 86af54b9d71..3d15c252fdc 100644 --- a/execution/commitment/patricia_state_mock_test.go +++ b/execution/commitment/patricia_state_mock_test.go @@ -27,11 +27,11 @@ import ( "testing" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/db/kv" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/length" ) @@ -357,7 +357,7 @@ func (ub *UpdateBuilder) Build() (plainKeys [][]byte, updates []Update) { hashed := make([]string, 0, len(ub.keyset)+len(ub.keyset2)) preimages := make(map[string][]byte) preimages2 := make(map[string][]byte) - keccak := sha3.NewLegacyKeccak256() + keccak := keccak.NewLegacyKeccak256() for key := range ub.keyset { keccak.Reset() keccak.Write([]byte(key)) diff --git a/execution/commitment/trie/account_node_test.go b/execution/commitment/trie/account_node_test.go index 42137d5cd69..a5576acc16a 100644 --- a/execution/commitment/trie/account_node_test.go +++ b/execution/commitment/trie/account_node_test.go @@ -22,10 +22,10 @@ import ( "testing" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/u256" "github.com/erigontech/erigon/db/kv/dbutils" "github.com/erigontech/erigon/execution/types/accounts" @@ -175,7 +175,7 @@ func generateAcc() (*ecdsa.PrivateKey, common.Address, common.Hash, error) { } func hashVal(v []byte) (common.Hash, error) { - sha := sha3.NewLegacyKeccak256().(keccakState) + sha := keccak.NewLegacyKeccak256().(keccakState) sha.Reset() _, err := sha.Write(v) if err != nil { diff --git a/execution/commitment/trie/hashbuilder.go b/execution/commitment/trie/hashbuilder.go index 4754093799f..f450218369b 100644 --- a/execution/commitment/trie/hashbuilder.go +++ b/execution/commitment/trie/hashbuilder.go @@ -23,9 +23,9 @@ import ( "math/bits" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" length2 "github.com/erigontech/erigon/common/length" "github.com/erigontech/erigon/execution/rlp" "github.com/erigontech/erigon/execution/types/accounts" @@ -62,7 +62,7 @@ type HashBuilder struct { // NewHashBuilder creates a new HashBuilder func NewHashBuilder(trace bool) *HashBuilder { return &HashBuilder{ - sha: sha3.NewLegacyKeccak256().(keccakState), + sha: keccak.NewLegacyKeccak256().(keccakState), byteArrayWriter: &ByteArrayWriter{}, trace: trace, } diff --git a/execution/commitment/trie/hasher.go b/execution/commitment/trie/hasher.go index ed050bf739b..fa1255b8997 100644 --- a/execution/commitment/trie/hasher.go +++ b/execution/commitment/trie/hasher.go @@ -24,10 +24,9 @@ import ( "hash" "sync" - "golang.org/x/crypto/sha3" - "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/length" "github.com/erigontech/erigon/execution/rlp" ) @@ -54,7 +53,7 @@ type keccakState interface { var hashersPool = sync.Pool{ New: func() any { return &hasher{ - sha: sha3.NewLegacyKeccak256().(crypto.KeccakState), + sha: keccak.NewLegacyKeccak256().(crypto.KeccakState), bw: &ByteArrayWriter{}, } }, diff --git a/execution/protocol/aa/validation_rules_tracer.go b/execution/protocol/aa/validation_rules_tracer.go index d4b4e6e6f6f..0e76e2491b8 100644 --- a/execution/protocol/aa/validation_rules_tracer.go +++ b/execution/protocol/aa/validation_rules_tracer.go @@ -6,9 +6,9 @@ import ( "fmt" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/execution/tracing" "github.com/erigontech/erigon/execution/types/accounts" "github.com/erigontech/erigon/execution/vm" @@ -205,7 +205,7 @@ func (t *ValidationRulesTracer) isAssociatedStorage(slot accounts.StorageKey, ad buf := make([]byte, 52) copy(buf, addrValue[:]) - hash := sha3.NewLegacyKeccak256() + hash := keccak.NewLegacyKeccak256() result := make([]byte, 32) for x := 0; x < 50; x++ { diff --git a/execution/protocol/block_exec.go b/execution/protocol/block_exec.go index 32e47a251fe..7d1353d44f3 100644 --- a/execution/protocol/block_exec.go +++ b/execution/protocol/block_exec.go @@ -25,9 +25,8 @@ import ( "slices" "time" - "golang.org/x/crypto/sha3" - "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/dbg" "github.com/erigontech/erigon/common/log/v3" "github.com/erigontech/erigon/common/math" @@ -226,7 +225,7 @@ func ExecuteBlockEphemerally( } func rlpHash(x any) (h common.Hash) { - hw := sha3.NewLegacyKeccak256() + hw := keccak.NewLegacyKeccak256() rlp.Encode(hw, x) //nolint:errcheck hw.Sum(h[:0]) return h diff --git a/execution/protocol/rules/ethash/algorithm.go b/execution/protocol/rules/ethash/algorithm.go index 2610b36148f..35ed12e9cb0 100644 --- a/execution/protocol/rules/ethash/algorithm.go +++ b/execution/protocol/rules/ethash/algorithm.go @@ -31,10 +31,9 @@ import ( "time" "unsafe" - "golang.org/x/crypto/sha3" - "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/dbg" "github.com/erigontech/erigon/common/length" "github.com/erigontech/erigon/common/log/v3" @@ -163,7 +162,7 @@ func seedHashOld(block uint64) []byte { if block < epochLength { return seed } - keccak256 := makeHasher(sha3.NewLegacyKeccak256()) + keccak256 := makeHasher(keccak.NewLegacyKeccak256()) for i := 0; i < int(block/epochLength); i++ { keccak256(seed, seed) } @@ -219,7 +218,7 @@ func generateCache(dest []uint32, epoch uint64, seed []byte) { } }() // Create a hasher to reuse between invocations - keccak512 := makeHasher(sha3.NewLegacyKeccak512()) + keccak512 := makeHasher(keccak.NewLegacyKeccak512()) // Sequentially produce the initial dataset keccak512(cache, seed) @@ -372,7 +371,7 @@ func generateDataset(dest []uint32, epoch uint64, cache []uint32) { defer pend.Done() // Create a hasher to reuse between invocations - keccak512 := makeHasher(sha3.NewLegacyKeccak512()) + keccak512 := makeHasher(keccak.NewLegacyKeccak512()) // Calculate the data segment this thread should generate batch := (size + hashBytes*uint64(threads) - 1) / (hashBytes * uint64(threads)) @@ -447,7 +446,7 @@ func hashimoto(hash []byte, nonce uint64, size uint64, lookup func(index uint32) // in-memory cache) in order to produce our final value for a particular header // hash and nonce. func hashimotoLight(size uint64, cache []uint32, hash []byte, nonce uint64) ([]byte, []byte) { - keccak512 := makeHasher(sha3.NewLegacyKeccak512()) + keccak512 := makeHasher(keccak.NewLegacyKeccak512()) lookup := func(index uint32) []uint32 { data := make([]uint32, 16) // 64/4 diff --git a/execution/protocol/rules/ethash/rules.go b/execution/protocol/rules/ethash/rules.go index 2fa3bedfee8..f61aad7337b 100644 --- a/execution/protocol/rules/ethash/rules.go +++ b/execution/protocol/rules/ethash/rules.go @@ -29,9 +29,9 @@ import ( mapset "github.com/deckarep/golang-set/v2" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/empty" "github.com/erigontech/erigon/common/log/v3" "github.com/erigontech/erigon/common/math" @@ -614,7 +614,7 @@ func (ethash *Ethash) FinalizeAndAssemble(chainConfig *chain.Config, header *typ // SealHash returns the hash of a block prior to it being sealed. func (ethash *Ethash) SealHash(header *types.Header) (hash common.Hash) { - hasher := sha3.NewLegacyKeccak256() + hasher := keccak.NewLegacyKeccak256() enc := []any{ header.ParentHash, diff --git a/execution/rlp/rlp_test.go b/execution/rlp/rlp_test.go index 875d3eaa350..ca5bb8f2aff 100644 --- a/execution/rlp/rlp_test.go +++ b/execution/rlp/rlp_test.go @@ -25,10 +25,9 @@ import ( "math/big" "testing" - "golang.org/x/crypto/sha3" - "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/log/v3" "github.com/erigontech/erigon/common/u256" "github.com/erigontech/erigon/execution/chain" @@ -160,7 +159,7 @@ func BenchmarkHashing(b *testing.B) { blockRlp, _ = rlp.EncodeToBytes(block) } var got common.Hash - var hasher = sha3.NewLegacyKeccak256() + var hasher = keccak.NewLegacyKeccak256() b.Run("iteratorhashing", func(b *testing.B) { b.ResetTimer() for b.Loop() { diff --git a/execution/tests/testutil/state_test_util.go b/execution/tests/testutil/state_test_util.go index 82be37aa680..a5b36037ac0 100644 --- a/execution/tests/testutil/state_test_util.go +++ b/execution/tests/testutil/state_test_util.go @@ -32,10 +32,10 @@ import ( "testing" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/empty" "github.com/erigontech/erigon/common/hexutil" "github.com/erigontech/erigon/common/log/v3" @@ -365,7 +365,7 @@ func (t *StateTest) genesis(config *chain.Config) *types.Genesis { } func rlpHash(x any) (h common.Hash) { - hw := sha3.NewLegacyKeccak256() + hw := keccak.NewLegacyKeccak256() if err := rlp.Encode(hw, x); err != nil { panic(err) } diff --git a/execution/vm/instructions.go b/execution/vm/instructions.go index 34374ea3133..f356df58410 100644 --- a/execution/vm/instructions.go +++ b/execution/vm/instructions.go @@ -25,9 +25,9 @@ import ( "math" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/log/v3" "github.com/erigontech/erigon/execution/protocol/misc" "github.com/erigontech/erigon/execution/protocol/params" @@ -362,7 +362,7 @@ func opKeccak256(pc uint64, evm *EVM, scope *CallContext) (uint64, []byte, error data := scope.Memory.GetPtr(offset.Uint64(), size.Uint64()) if evm.hasher == nil { - evm.hasher = sha3.NewLegacyKeccak256().(keccakState) + evm.hasher = keccak.NewLegacyKeccak256().(keccakState) } else { evm.hasher.Reset() } diff --git a/node/shards/state_cache_test.go b/node/shards/state_cache_test.go index df622db1e24..15bee50cef6 100644 --- a/node/shards/state_cache_test.go +++ b/node/shards/state_cache_test.go @@ -23,9 +23,9 @@ import ( "github.com/c2h5oh/datasize" "github.com/stretchr/testify/assert" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/execution/types/accounts" ) @@ -67,7 +67,7 @@ func TestCacheBtreeOrderAccountStorage(t *testing.T) { sc.SetStorageWrite(a1.Bytes(), 1, l2.Bytes(), nil) sc.SetStorageWrite(a2.Bytes(), 1, l3.Bytes(), nil) lastK = lastK[:0] - if err := sc.WalkStorage(common.BytesToHash(sha3.NewLegacyKeccak256().Sum(a1.Bytes())), 1, nil, func(locHash common.Hash, val []byte) error { + if err := sc.WalkStorage(common.BytesToHash(keccak.NewLegacyKeccak256().Sum(a1.Bytes())), 1, nil, func(locHash common.Hash, val []byte) error { curK = append(curK[:0], locHash.Bytes()...) assert.Negative(t, bytes.Compare(lastK, curK)) lastK = append(lastK[:0], curK...) diff --git a/p2p/dnsdisc/tree.go b/p2p/dnsdisc/tree.go index 87bc8cfa75d..1bbc0ce14ee 100644 --- a/p2p/dnsdisc/tree.go +++ b/p2p/dnsdisc/tree.go @@ -30,9 +30,8 @@ import ( "slices" "strings" - "golang.org/x/crypto/sha3" - "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/execution/rlp" "github.com/erigontech/erigon/p2p/enode" "github.com/erigontech/erigon/p2p/enr" @@ -230,7 +229,7 @@ const ( ) func subdomain(e entry) string { - h := sha3.NewLegacyKeccak256() + h := keccak.NewLegacyKeccak256() io.WriteString(h, e.String()) return b32format.EncodeToString(h.Sum(nil)[:16]) } @@ -240,7 +239,7 @@ func (e *rootEntry) String() string { } func (e *rootEntry) sigHash() []byte { - h := sha3.NewLegacyKeccak256() + h := keccak.NewLegacyKeccak256() fmt.Fprintf(h, rootPrefix+" e=%s l=%s seq=%d", e.eroot, e.lroot, e.seq) return h.Sum(nil) } diff --git a/p2p/enode/idscheme.go b/p2p/enode/idscheme.go index 0b89c9ceb80..8ab1375446f 100644 --- a/p2p/enode/idscheme.go +++ b/p2p/enode/idscheme.go @@ -24,9 +24,8 @@ import ( "errors" "io" - "golang.org/x/crypto/sha3" - "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/math" "github.com/erigontech/erigon/execution/rlp" "github.com/erigontech/erigon/p2p/enr" @@ -53,7 +52,7 @@ func SignV4(r *enr.Record, privkey *ecdsa.PrivateKey) error { cpy.Set(enr.ID("v4")) cpy.Set(Secp256k1(privkey.PublicKey)) - h := sha3.NewLegacyKeccak256() + h := keccak.NewLegacyKeccak256() rlp.Encode(h, cpy.AppendElements(nil)) sig, err := crypto.Sign(h.Sum(nil), privkey) if err != nil { @@ -74,7 +73,7 @@ func (V4ID) Verify(r *enr.Record, sig []byte) error { return errors.New("invalid public key") } - h := sha3.NewLegacyKeccak256() + h := keccak.NewLegacyKeccak256() rlp.Encode(h, r.AppendElements(nil)) if !crypto.VerifySignature(entry, h.Sum(nil), sig) { return enr.ErrInvalidSig diff --git a/p2p/rlpx/rlpx.go b/p2p/rlpx/rlpx.go index 7f7fab75018..651a0487b95 100644 --- a/p2p/rlpx/rlpx.go +++ b/p2p/rlpx/rlpx.go @@ -38,10 +38,10 @@ import ( "time" "github.com/golang/snappy" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common/crypto" "github.com/erigontech/erigon/common/crypto/ecies" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/execution/rlp" ) @@ -490,10 +490,10 @@ func (h *handshakeState) secrets(auth, authResp []byte) (Secrets, error) { } // setup sha3 instances for the MACs - mac1 := sha3.NewLegacyKeccak256() + mac1 := keccak.NewLegacyKeccak256() mac1.Write(xor(s.MAC, h.respNonce)) mac1.Write(auth) - mac2 := sha3.NewLegacyKeccak256() + mac2 := keccak.NewLegacyKeccak256() mac2.Write(xor(s.MAC, h.initNonce)) mac2.Write(authResp) if h.initiator { diff --git a/polygon/bor/bor.go b/polygon/bor/bor.go index 4a6cf1cb87a..302ba556a5e 100644 --- a/polygon/bor/bor.go +++ b/polygon/bor/bor.go @@ -34,10 +34,10 @@ import ( lru "github.com/hashicorp/golang-lru/arc/v2" "github.com/holiman/uint256" "github.com/xsleonard/go-merkle" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/dbg" "github.com/erigontech/erigon/common/empty" "github.com/erigontech/erigon/common/length" @@ -1206,7 +1206,7 @@ func ComputeHeadersRootHash(blockHeaders []*types.Header) ([]byte, error) { headers[i] = arr } tree := merkle.NewTreeWithOpts(merkle.TreeOptions{EnableHashSorting: false, DisableHashLeaves: true}) - if err := tree.Generate(Convert(headers), sha3.NewLegacyKeccak256()); err != nil { + if err := tree.Generate(Convert(headers), keccak.NewLegacyKeccak256()); err != nil { return nil, err } diff --git a/txnprovider/shutter/internal/crypto/hash.go b/txnprovider/shutter/internal/crypto/hash.go index cbd2f3440ae..540468cc846 100644 --- a/txnprovider/shutter/internal/crypto/hash.go +++ b/txnprovider/shutter/internal/crypto/hash.go @@ -19,14 +19,14 @@ package crypto import ( "math/big" + "github.com/erigontech/erigon/common/crypto/keccak" blst "github.com/supranational/blst/bindings/go" - "golang.org/x/crypto/sha3" ) const HashToG1DST = "SHUTTER_V01_BLS12381G1_XMD:SHA-256_SSWU_RO_" func keccak256(ds ...[]byte) []byte { - state := sha3.NewLegacyKeccak256() + state := keccak.NewLegacyKeccak256() for _, d := range ds { state.Write(d) } diff --git a/txnprovider/txpool/pool_txn_parser.go b/txnprovider/txpool/pool_txn_parser.go index 3fbef7a987d..1f423984751 100644 --- a/txnprovider/txpool/pool_txn_parser.go +++ b/txnprovider/txpool/pool_txn_parser.go @@ -28,10 +28,10 @@ import ( goethkzg "github.com/crate-crypto/go-eth-kzg" "github.com/erigontech/secp256k1" "github.com/holiman/uint256" - "golang.org/x/crypto/sha3" "github.com/erigontech/erigon/common" "github.com/erigontech/erigon/common/crypto" + "github.com/erigontech/erigon/common/crypto/keccak" "github.com/erigontech/erigon/common/dbg" "github.com/erigontech/erigon/common/length" "github.com/erigontech/erigon/common/u256" @@ -88,8 +88,8 @@ func NewTxnParseContext(chainID uint256.Int) *TxnParseContext { } ctx := &TxnParseContext{ withSender: true, - Keccak1: sha3.NewLegacyKeccak256(), - Keccak2: sha3.NewLegacyKeccak256(), + Keccak1: keccak.NewLegacyKeccak256(), + Keccak2: keccak.NewLegacyKeccak256(), } // behave as of London enabled