Skip to content

Commit fb8689c

Browse files
committed
[mlir][Sol] Support packed and unpacked string storage layouts
This also adds support for: - Short strings (length < 32 bytes, packed encoding) - Long strings (length >= 32 bytes, unpacked encoding) - String operations: push(), push(x), pop(), length, indexing - Conversions between memory and storage representations
1 parent ffa5ce2 commit fb8689c

File tree

7 files changed

+814
-131
lines changed

7 files changed

+814
-131
lines changed

mlir/include/mlir/Conversion/SolToStandard/EVMUtil.h

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,21 @@ class Builder {
164164

165165
/// Generates {slot, offset} for packed storage array indexing.
166166
Value genPackedStorageAddr(Value baseSlot, Value idx, Type eltTy,
167+
bool isDataLeftAligned = false,
167168
std::optional<Location> locArg = std::nullopt);
168169

169170
/// Loads slot and punches hole: and(sload(slot), holeMask)
170171
/// where holeMask = not(ones(numBits) << shiftBits)
171172
Value genPunchHole(Value slot, Value shiftBits, unsigned numBits,
172173
std::optional<Location> locArg = std::nullopt);
173174

175+
/// Inserts integer value (<=32 bytes) to the slot value:
176+
/// or(and(slot, holeMask), shiftedVal), where
177+
/// holeMask = not(ones(numBits) << offset * 8),
178+
/// shiftedVal = (intVal << offset * 8)
179+
Value genInsertIntToSlot(Value slot, Value offset, Value intVal,
180+
unsigned numBits, std::optional<Location> locArg);
181+
174182
/// Generates a load from the low level integral type address.
175183
Value genLoad(Value addr, sol::DataLocation dataLoc,
176184
std::optional<Location> locArg = std::nullopt);
@@ -187,12 +195,48 @@ class Builder {
187195
void genStringStore(std::string const &str, Value addr,
188196
std::optional<Location> locArg = std::nullopt);
189197

190-
/// Generates a loop to copy the data. This works for low level integral type
191-
/// addresses.
192-
void genCopyLoop(Value srcAddr, Value dstAddr, Value sizeInWords, Type srcTy,
193-
Type dstTy, sol::DataLocation srcDataLoc,
194-
sol::DataLocation dstDataLoc,
195-
std::optional<Location> locArg = std::nullopt);
198+
/// Generates length of a string.
199+
mlir::Value
200+
genStringLength(mlir::Value lengthSlot, mlir::sol::DataLocation dataLoc,
201+
std::optional<mlir::Location> locArg = std::nullopt);
202+
203+
/// Copies a string from storage to memory.
204+
void
205+
genCopyStringToMemory(mlir::Value srcDataAddr, mlir::Value lengthSlot,
206+
mlir::Value length, mlir::Value dstAddr,
207+
std::optional<mlir::Location> locArg = std::nullopt);
208+
209+
/// Copies a string to the storage.
210+
void
211+
genCopyStringToStorage(mlir::Value srcDataAddr, mlir::Value lengthSlot,
212+
mlir::Value length, mlir::Value dstAddr,
213+
mlir::sol::DataLocation srcDataLoc,
214+
std::optional<mlir::Location> locArg = std::nullopt);
215+
216+
/// Copies an object of type \p ty from \p srcAddr to \p dstAddr.
217+
void genCopy(mlir::Type ty, mlir::Value srcAddr, mlir::Value dstAddr,
218+
mlir::sol::DataLocation srcDataLoc,
219+
mlir::sol::DataLocation dstDataLoc,
220+
std::optional<mlir::Location> locArg = std::nullopt);
221+
222+
/// Generates the 'push' of a value to string.
223+
void genPushToString(mlir::Value srcAddr, mlir::Value value,
224+
std::optional<mlir::Location> locArg = std::nullopt);
225+
226+
/// Generates the 'push' of a default value to string and
227+
/// returns a fat pointer to the newly added element.
228+
Value
229+
genPushVoidToString(Value srcAddr,
230+
std::optional<mlir::Location> locArg = std::nullopt);
231+
232+
/// Generates the 'pop' for string.
233+
void genPopString(mlir::Value srcAddr, mlir::Value oldData,
234+
mlir::Value length,
235+
std::optional<mlir::Location> locArg = std::nullopt);
236+
237+
/// Generates {slot, offset} for string storage indexing.
238+
Value genStringItemAddress(mlir::Value srcAddr, mlir::Value idx,
239+
std::optional<Location> locArg = std::nullopt);
196240

197241
/// Generates an assertion that the tuple size should be less than `size`.
198242
void genABITupleSizeAssert(TypeRange tys, Value size,

mlir/include/mlir/Conversion/SolToStandard/Util.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,18 @@ class BuilderExt {
154154
loc, add, genI256Const(~(llvm::APInt(/*numBits=*/256, multiple - 1))));
155155
}
156156

157+
/// Generates the ceiling of value / multiple (power-of-2).
158+
template <unsigned multiple>
159+
mlir::Value
160+
genCeilDivision(mlir::Value val,
161+
std::optional<mlir::Location> locArg = std::nullopt) {
162+
static_assert(llvm::isPowerOf2_32(multiple));
163+
mlir::Location loc = locArg ? *locArg : defLoc;
164+
auto add =
165+
b.create<mlir::arith::AddIOp>(loc, val, genI256Const(multiple - 1));
166+
return b.create<mlir::arith::DivUIOp>(loc, add, genI256Const(multiple));
167+
}
168+
157169
/// Returns an existing or a new (if not found) FuncOp in the ModuleOp `mod`.
158170
sol::FuncOp getOrInsertFuncOp(StringRef name, FunctionType fnTy,
159171
LLVM::Linkage linkage, ModuleOp mod,

mlir/include/mlir/Dialect/Sol/SolOps.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,36 @@ def Sol_PushOp : Sol_Op<"push"> {
207207
let assemblyFormat = "$inp attr-dict `:` type($inp) `->` type($addr)";
208208
}
209209

210+
// String push operations are separate from Sol_PushOp because Solidity strings
211+
// use a dual storage encoding that requires special handling:
212+
//
213+
// - Short strings (< 32 bytes): data and length are packed into a single
214+
// storage slot ("in-place" encoding).
215+
// - Long strings (>= 32 bytes): the slot holds `length * 2 + 1` and the
216+
// byte data is stored starting at keccak256(slot) ("out-of-place"
217+
// encoding).
218+
//
219+
// Appending a byte may trigger a transition from in-place to out-of-place
220+
// encoding at the 31-byte boundary, which generic Sol_PushOp does not handle.
221+
//
222+
// The two ops correspond to Solidity's two push() overloads on dynamic byte
223+
// arrays:
224+
// - Sol_PushStringOp : str.push(x) -- appends a specific byte value.
225+
// - Sol_PushOp : str.push() -- appends a zero byte and returns a
226+
// storage reference ({slot, offset}) to the new element so the caller can
227+
// write to it later.
228+
// We have two separate operations because implementing push(x) as push() = x
229+
// would introduce significant gas overhead. Storage would need to be read and
230+
// written twice: first when updating the length of the packed string
231+
// (inside push()), and again when inserting the new value.
232+
//
233+
def Sol_PushStringOp : Sol_Op<"push_string"> {
234+
let arguments = (ins Sol_StringType:$addr,
235+
AnyTypeOf<[Sol_BytesType, Sol_Int]>:$value);
236+
let assemblyFormat =
237+
"$addr `,` $value attr-dict `:` type($addr) `,` type($value)";
238+
}
239+
210240
def Sol_PopOp : Sol_Op<"pop"> {
211241
let arguments = (ins AnyType:$inp);
212242
let assemblyFormat = "$inp attr-dict `:` type($inp)";

0 commit comments

Comments
 (0)