Skip to content

Commit 2b71e97

Browse files
authored
Introduce new builtin __BYTES() (#25)
* Introduce new builtin `__BYTES()` * New release
1 parent 0c1c4e5 commit 2b71e97

File tree

12 files changed

+224
-38
lines changed

12 files changed

+224
-38
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44

55
## [Unreleased]
66

7+
## [1.0.7] - 2025-01-29
8+
- Add built-in macros for converting a string to bytes and push it to the stack.
9+
- `__BYTES("hello")` -> `PUSH5 0x68656c6c6f`
10+
- This can also be used here: `__RIGHTPAD(__BYTES("hello"))`.
11+
712
## [1.0.6] - 2025-01-28
813
- Allow to use `--debug` for reverting contracts.
914
- Refactored parsing for ABI and build-in argument.

Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ license = "MIT OR Apache-2.0"
1111
readme = "README.md"
1212
repository = "https://github.com/cakevm/huff-neo"
1313
rust-version = "1.84"
14-
version = "1.0.6"
14+
version = "1.0.7"
1515

1616
[workspace.dependencies]
1717
huff-neo-codegen = { path = "crates/codegen" }

crates/codegen/src/irgen/arg_calls.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ pub fn bubble_arg_call(
3131
tracing::info!(target: "codegen", "GOT \"{:?}\" ARG FROM MACRO INVOCATION", arg);
3232
match arg {
3333
MacroArg::Literal(l) => {
34-
tracing::info!(target: "codegen", "GOT LITERAL {} ARG FROM MACRO INVOCATION", bytes32_to_string(l, false));
34+
tracing::info!(target: "codegen", "GOT LITERAL {} ARG FROM MACRO INVOCATION", bytes32_to_hex_string(l, false));
3535

36-
let hex_literal: String = bytes32_to_string(l, false);
36+
let hex_literal: String = bytes32_to_hex_string(l, false);
3737
let push_bytes = format!("{:02x}{hex_literal}", 95 + hex_literal.len() / 2);
3838
let b = Bytes(push_bytes);
3939
*offset += b.0.len() / 2;
@@ -91,7 +91,7 @@ pub fn bubble_arg_call(
9191
tracing::info!(target: "codegen", "ARGCALL IS CONSTANT: {:?}", constant);
9292
let push_bytes = match &constant.value {
9393
ConstVal::Literal(l) => {
94-
let hex_literal: String = bytes32_to_string(l, false);
94+
let hex_literal: String = bytes32_to_hex_string(l, false);
9595
format!("{:02x}{hex_literal}", 95 + hex_literal.len() / 2)
9696
}
9797
ConstVal::FreeStoragePointer(fsp) => {

crates/codegen/src/irgen/builtin_function.rs

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::Codegen;
22
use alloy_primitives::{hex, keccak256};
33
use huff_neo_utils::bytecode::{BytecodeRes, Bytes, CircularCodeSizeIndices, Jump, Jumps};
4-
use huff_neo_utils::bytes_util::{bytes32_to_string, format_even_bytes, pad_n_bytes};
4+
use huff_neo_utils::bytes_util::{bytes32_to_hex_string, format_even_bytes, literal_gen, pad_n_bytes};
55
use huff_neo_utils::error::{CodegenError, CodegenErrorKind};
66
use huff_neo_utils::evm_version::EVMVersion;
77
use huff_neo_utils::opcodes::Opcode;
@@ -98,7 +98,7 @@ pub fn builtin_function_gen<'a>(
9898
bytes.push((starting_offset, Bytes(push_bytes)));
9999
}
100100
BuiltinFunctionKind::RightPad => {
101-
let push_bytes = right_pad(contract, bf)?;
101+
let push_bytes = right_pad(evm_version, contract, bf)?;
102102
*offset += push_bytes.len() / 2;
103103
bytes.push((starting_offset, Bytes(push_bytes)));
104104
}
@@ -218,6 +218,11 @@ pub fn builtin_function_gen<'a>(
218218

219219
bytes.push((starting_offset, Bytes(push_bytes)));
220220
}
221+
BuiltinFunctionKind::Bytes => {
222+
let push_bytes = builtin_bytes(evm_version, bf)?;
223+
*offset += push_bytes.len() / 2;
224+
bytes.push((starting_offset, Bytes(push_bytes)));
225+
}
221226
}
222227
Ok(())
223228
}
@@ -373,7 +378,7 @@ fn tablesize(contract: &Contract, bf: &BuiltinFunctionCall) -> Result<(TableDefi
373378
});
374379
};
375380

376-
let size = bytes32_to_string(&ir_table.size, false);
381+
let size = bytes32_to_hex_string(&ir_table.size, false);
377382
let push_bytes = format!("{:02x}{size}", 95 + size.len() / 2);
378383
Ok((ir_table, push_bytes))
379384
}
@@ -402,7 +407,7 @@ fn event_hash(contract: &Contract, bf: &BuiltinFunctionCall) -> Result<String, C
402407
});
403408
};
404409
let push_bytes = if let Some(event) = contract.events.iter().find(|e| first_arg.name.as_ref().unwrap().eq(&e.name)) {
405-
let hash = bytes32_to_string(&event.hash, false);
410+
let hash = bytes32_to_hex_string(&event.hash, false);
406411
format!("{}{hash}", Opcode::Push32)
407412
} else if let Some(s) = &first_arg.name {
408413
let event_selector = keccak256(s).0;
@@ -468,7 +473,7 @@ fn function_signature(contract: &Contract, bf: &BuiltinFunctionCall) -> Result<S
468473
Ok(push_bytes)
469474
}
470475

471-
fn right_pad(contract: &Contract, bf: &BuiltinFunctionCall) -> Result<String, CodegenError> {
476+
fn right_pad(evm_version: &EVMVersion, contract: &Contract, bf: &BuiltinFunctionCall) -> Result<String, CodegenError> {
472477
if bf.args.len() != 1 {
473478
tracing::error!(target = "codegen", "Incorrect number of arguments passed to __RIGHTPAD, should be 1: {}", bf.args.len());
474479
return Err(CodegenError {
@@ -488,6 +493,10 @@ fn right_pad(contract: &Contract, bf: &BuiltinFunctionCall) -> Result<String, Co
488493
let push_bytes = function_signature(contract, inner_call)?;
489494
push_bytes[2..].to_string() // remove opcode
490495
}
496+
BuiltinFunctionKind::Bytes => {
497+
let push_bytes = builtin_bytes(evm_version, inner_call)?;
498+
push_bytes[2..].to_string() // remove opcode
499+
}
491500
_ => {
492501
tracing::error!(target: "codegen", "Invalid argument type passed to __RIGHTPAD");
493502
return Err(CodegenError {
@@ -511,3 +520,50 @@ fn right_pad(contract: &Contract, bf: &BuiltinFunctionCall) -> Result<String, Co
511520
let push_bytes = format!("{}{hex}{}", Opcode::Push32, "0".repeat(64 - hex.len()));
512521
Ok(push_bytes)
513522
}
523+
524+
fn builtin_bytes(evm_version: &EVMVersion, bf: &BuiltinFunctionCall) -> Result<String, CodegenError> {
525+
if bf.args.len() != 1 {
526+
tracing::error!(target = "codegen", "Incorrect number of arguments passed to __BYTES, should be 1: {}", bf.args.len());
527+
return Err(CodegenError {
528+
kind: CodegenErrorKind::InvalidArguments(format!(
529+
"Incorrect number of arguments passed to __BYTES, should be 1: {}",
530+
bf.args.len()
531+
)),
532+
span: bf.span.clone(),
533+
token: None,
534+
});
535+
}
536+
let first_arg = match bf.args[0] {
537+
BuiltinFunctionArg::Argument(ref arg) => arg.name.clone().unwrap_or_default(),
538+
_ => {
539+
tracing::error!(target: "codegen", "Invalid argument type passed to __BYTES");
540+
return Err(CodegenError {
541+
kind: CodegenErrorKind::InvalidArguments(String::from("Invalid argument type passed to __BYTES")),
542+
span: bf.span.clone(),
543+
token: None,
544+
});
545+
}
546+
};
547+
548+
if first_arg.is_empty() {
549+
return Err(CodegenError {
550+
kind: CodegenErrorKind::InvalidArguments(String::from("Empty string passed to __BYTES")),
551+
span: bf.span.clone(),
552+
token: None,
553+
});
554+
}
555+
556+
let bytes = first_arg.as_bytes();
557+
if bytes.len() > 32 {
558+
return Err(CodegenError {
559+
kind: CodegenErrorKind::InvalidArguments(String::from("Encoded bytes length exceeds 32 bytes")),
560+
span: bf.span.clone(),
561+
token: None,
562+
});
563+
}
564+
let mut bytes_array = [0u8; 32];
565+
bytes_array[32 - bytes.len()..].copy_from_slice(bytes);
566+
567+
let push_bytes = literal_gen(evm_version, &bytes_array);
568+
Ok(push_bytes)
569+
}

crates/codegen/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ impl Codegen {
144144

145145
res.utilized_tables.iter().try_for_each(|jt| {
146146
table_offsets.insert(jt.name.to_string(), table_offset);
147-
let size = match bytes_util::hex_to_usize(bytes_util::bytes32_to_string(&jt.size, false).as_str()) {
147+
let size = match bytes_util::hex_to_usize(bytes_util::bytes32_to_hex_string(&jt.size, false).as_str()) {
148148
Ok(s) => s,
149149
Err(e) => {
150150
tracing::error!(target: "codegen", "Errored converting bytes32 to str. Bytes {:?} with error: {:?}", jt.size, e);

crates/core/tests/builtins.rs

Lines changed: 129 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,134 @@ fn test_builtin_rightpad_func_sig() {
659659

660660
// Have Codegen create the runtime bytecode
661661
let r_bytes = Codegen::generate_main_bytecode(&EVMVersion::default(), &contract, None).unwrap();
662-
// PUSH32 = 0x7f
663-
// transfer(address,uint256) signature = 0xa9059cbb
662+
// PUSH32 = 0x7f, transfer(address,uint256) signature = 0xa9059cbb
664663
assert_eq!(&r_bytes, "7fa9059cbb00000000000000000000000000000000000000000000000000000000");
665664
}
665+
666+
#[test]
667+
fn test_builtin_rightpad_bytes() {
668+
let source: &str = r#"
669+
#define macro MAIN() = takes (0) returns (0) {
670+
__RIGHTPAD(__BYTES('hello'))
671+
}
672+
"#;
673+
674+
// Parse tokens
675+
let flattened_source = FullFileSource { source, file: None, spans: vec![] };
676+
let lexer = Lexer::new(flattened_source);
677+
let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::<Vec<Token>>();
678+
let mut parser = Parser::new(tokens, None);
679+
680+
// Parse the AST
681+
let mut contract = parser.parse().unwrap();
682+
683+
// Derive storage pointers
684+
contract.derive_storage_pointers();
685+
686+
// Instantiate Codegen
687+
let cg = Codegen::new();
688+
689+
// The codegen instance should have no artifact
690+
assert!(cg.artifact.is_none());
691+
692+
// Have Codegen create the runtime bytecode
693+
let r_bytes = Codegen::generate_main_bytecode(&EVMVersion::default(), &contract, None).unwrap();
694+
// PUSH32 = 0x7f, "hello" = 0x68656c6c6f
695+
assert_eq!(&r_bytes, "7f68656c6c6f000000000000000000000000000000000000000000000000000000");
696+
}
697+
698+
#[test]
699+
fn test_bytes_builtin() {
700+
let source: &str = r#"
701+
#define macro MAIN() = takes (0) returns (0) {
702+
__BYTES("hello")
703+
__BYTES("hellohello")
704+
__BYTES("🙂🙂") // Will be represended as UTF-8 4-byte
705+
}
706+
"#;
707+
708+
// Parse tokens
709+
let flattened_source = FullFileSource { source, file: None, spans: vec![] };
710+
let lexer = Lexer::new(flattened_source);
711+
let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::<Vec<Token>>();
712+
let mut parser = Parser::new(tokens, None);
713+
714+
// Parse the AST
715+
let contract = parser.parse().unwrap();
716+
717+
// Instantiate Codegen
718+
let cg = Codegen::new();
719+
720+
// The codegen instance should have no artifact
721+
assert!(cg.artifact.is_none());
722+
723+
// Have Codegen create the runtime bytecode
724+
let r_bytes = Codegen::generate_main_bytecode(&EVMVersion::default(), &contract, None).unwrap();
725+
// 64 = PUSH5, "hello" = 0x68656c6c6f
726+
assert_eq!(&r_bytes[0..12], "6468656c6c6f"); // 2 + 2 * 5 chars
727+
// 69 = PUSH10, "hellohello" = 0x68656c6c6f68656c6c6f
728+
assert_eq!(&r_bytes[12..34], "6968656c6c6f68656c6c6f"); // 2 + 2 * 10 chars
729+
// 67 = PUSH8, "🙂🙂" = 0xf09f9982f09f9982
730+
assert_eq!(&r_bytes[34..52], "67f09f9982f09f9982"); // 2 + 8 * 2 chars
731+
assert_eq!(r_bytes.len(), (2 + 5 * 2) + (2 + 2 * 10) + (2 + 8 * 2));
732+
}
733+
734+
#[test]
735+
fn test_bytes_builtin_too_large_error() {
736+
let source: &str = r#"
737+
#define macro MAIN() = takes (0) returns (0) {
738+
__BYTES("hellohellohellohellohellohellohello") // 35 characters, codegen will fail
739+
}
740+
"#;
741+
742+
// Parse tokens
743+
let flattened_source = FullFileSource { source, file: None, spans: vec![] };
744+
let lexer = Lexer::new(flattened_source);
745+
let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::<Vec<Token>>();
746+
let mut parser = Parser::new(tokens, None);
747+
748+
// Parse the AST
749+
let contract = parser.parse().unwrap();
750+
751+
// Instantiate Codegen
752+
let cg = Codegen::new();
753+
754+
// The codegen instance should have no artifact
755+
assert!(cg.artifact.is_none());
756+
757+
// Codegen should fail with an error
758+
let codegen_result = Codegen::generate_main_bytecode(&EVMVersion::default(), &contract, None);
759+
760+
assert!(codegen_result.is_err());
761+
assert_eq!(codegen_result.unwrap_err().kind, CodegenErrorKind::InvalidArguments(String::from("Encoded bytes length exceeds 32 bytes")));
762+
}
763+
764+
#[test]
765+
fn test_bytes_builtin_empty_string_error() {
766+
let source: &str = r#"
767+
#define macro MAIN() = takes (0) returns (0) {
768+
__BYTES("")
769+
}
770+
"#;
771+
772+
// Parse tokens
773+
let flattened_source = FullFileSource { source, file: None, spans: vec![] };
774+
let lexer = Lexer::new(flattened_source);
775+
let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::<Vec<Token>>();
776+
let mut parser = Parser::new(tokens, None);
777+
778+
// Parse the AST
779+
let contract = parser.parse().unwrap();
780+
781+
// Instantiate Codegen
782+
let cg = Codegen::new();
783+
784+
// The codegen instance should have no artifact
785+
assert!(cg.artifact.is_none());
786+
787+
// Codegen should fail with an error
788+
let codegen_result = Codegen::generate_main_bytecode(&EVMVersion::default(), &contract, None);
789+
790+
assert!(codegen_result.is_err());
791+
assert_eq!(codegen_result.unwrap_err().kind, CodegenErrorKind::InvalidArguments(String::from("Empty string passed to __BYTES")));
792+
}

0 commit comments

Comments
 (0)