Skip to content

Commit 8fbfa4f

Browse files
committed
Make subassembly IDs based on fixed-size 64 bit uint
1 parent baef892 commit 8fbfa4f

29 files changed

+186
-122
lines changed

Diff for: Changelog.md

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Compiler Features:
77

88

99
Bugfixes:
10+
* Commandline Interface: Fix possible inconsistency in subassembly IDs between target architectures in `--asm-json` output.
1011

1112

1213
### 0.8.29 (2025-03-12)

Diff for: libevmasm/Assembly.cpp

+37-33
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <fmt/format.h>
4040

4141
#include <range/v3/algorithm/any_of.hpp>
42+
#include <range/v3/algorithm/find_if.hpp>
4243
#include <range/v3/view/drop_exactly.hpp>
4344
#include <range/v3/view/enumerate.hpp>
4445
#include <range/v3/view/map.hpp>
@@ -667,17 +668,17 @@ std::pair<std::shared_ptr<Assembly>, std::vector<std::string>> Assembly::fromJSO
667668
return std::make_pair(result, _level == 0 ? parsedSourceList : std::vector<std::string>{});
668669
}
669670

670-
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
671+
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
671672
{
672673
_assembliesOnPath.push_back(this);
673-
for (_pathFromRoot.push_back(0); _pathFromRoot.back() < m_subs.size(); ++_pathFromRoot.back())
674+
for (_pathFromRoot.push_back(SubAssemblyID{0}); _pathFromRoot.back().value < m_subs.size(); ++_pathFromRoot.back().value)
674675
{
675676
for (size_t distanceFromRoot = 0; distanceFromRoot < _assembliesOnPath.size(); ++distanceFromRoot)
676677
_assembliesOnPath[distanceFromRoot]->encodeSubPath(
677678
_pathFromRoot | ranges::views::drop_exactly(distanceFromRoot) | ranges::to<std::vector>
678679
);
679680

680-
m_subs[_pathFromRoot.back()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
681+
m_subs[static_cast<size_t>(_pathFromRoot.back().value)]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
681682
}
682683
}
683684

@@ -798,20 +799,21 @@ std::map<u256, u256> const& Assembly::optimiseInternal(
798799

799800
// Run optimisation for sub-assemblies.
800801
// TODO: verify and double-check this for EOF.
801-
for (size_t subId = 0; subId < m_subs.size(); ++subId)
802+
for (size_t subIDIndex = 0; subIDIndex < m_subs.size(); ++subIDIndex)
802803
{
804+
SubAssemblyID const subID{subIDIndex};
803805
OptimiserSettings settings = _settings;
804-
Assembly& sub = *m_subs[subId];
806+
Assembly& sub = *m_subs[subIDIndex];
805807
std::set<size_t> referencedTags;
806808
for (auto& codeSection: m_codeSections)
807-
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subId);
809+
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subID);
808810
std::map<u256, u256> const& subTagReplacements = sub.optimiseInternal(
809811
settings,
810812
referencedTags
811813
);
812814
// Apply the replacements (can be empty).
813815
for (auto& codeSection: m_codeSections)
814-
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subId);
816+
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subID);
815817
}
816818

817819
std::map<u256, u256> tagReplacements;
@@ -1188,7 +1190,7 @@ LinkerObject const& Assembly::assemble() const
11881190
[[nodiscard]] bytes Assembly::assembleTag(AssemblyItem const& _item, size_t _pos, bool _addJumpDest) const
11891191
{
11901192
solRequire(_item.data() != 0, AssemblyException, "Invalid tag position.");
1191-
solRequire(_item.splitForeignPushTag().first == std::numeric_limits<size_t>::max(), AssemblyException, "Foreign tag.");
1193+
solRequire(_item.splitForeignPushTag().first.empty(), AssemblyException, "Foreign tag.");
11921194
solRequire(_pos < 0xffffffffL, AssemblyException, "Tag too large.");
11931195
size_t tagId = static_cast<size_t>(_item.data());
11941196
solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits<size_t>::max(), AssemblyException, "Duplicate tag position.");
@@ -1259,10 +1261,10 @@ LinkerObject const& Assembly::assembleLegacy() const
12591261
if (item.type() == PushTag)
12601262
{
12611263
auto [subId, tagId] = item.splitForeignPushTag();
1262-
if (subId == std::numeric_limits<size_t>::max())
1264+
if (subId.empty())
12631265
continue;
1264-
assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id");
1265-
auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId);
1266+
assertThrow(subId.value < m_subs.size(), AssemblyException, "Invalid sub id");
1267+
auto subTagPosition = m_subs[static_cast<size_t>(subId.value)]->m_tagPositionsInBytecode.at(tagId);
12661268
assertThrow(subTagPosition != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
12671269
bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition));
12681270
}
@@ -1332,17 +1334,20 @@ LinkerObject const& Assembly::assembleLegacy() const
13321334
break;
13331335
case PushSub:
13341336
assembleInstruction([&]() {
1335-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1337+
assertThrow(item.data() <= std::numeric_limits<SubAssemblyID::value_type>::max(), AssemblyException, "");
13361338
ret.bytecode.push_back(dataRefPush);
1337-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1339+
subRefs.emplace(
1340+
SubAssemblyID{static_cast<SubAssemblyID::value_type>(item.data())},
1341+
ret.bytecode.size()
1342+
);
13381343
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13391344
});
13401345
break;
13411346
case PushSubSize:
13421347
{
13431348
assembleInstruction([&](){
1344-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1345-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1349+
assertThrow(item.data() <= std::numeric_limits<SubAssemblyID::value_type>::max(), AssemblyException, "");
1350+
auto s = subAssemblyById({static_cast<SubAssemblyID::value_type>(item.data())})->assemble().bytecode.size();
13461351
item.setPushedValue(u256(s));
13471352
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
13481353
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
@@ -1478,14 +1483,14 @@ LinkerObject const& Assembly::assembleLegacy() const
14781483
}
14791484
for (auto const& i: tagRefs)
14801485
{
1481-
size_t subId;
1486+
SubAssemblyID subId;
14821487
size_t tagId;
14831488
std::tie(subId, tagId) = i.second;
1484-
assertThrow(subId == std::numeric_limits<size_t>::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id");
1489+
assertThrow(subId.empty() || subId.value < m_subs.size(), AssemblyException, "Invalid sub id");
14851490
std::vector<size_t> const& tagPositions =
1486-
subId == std::numeric_limits<size_t>::max() ?
1491+
subId.empty() ?
14871492
m_tagPositionsInBytecode :
1488-
m_subs[subId]->m_tagPositionsInBytecode;
1493+
m_subs[static_cast<size_t>(subId.value)]->m_tagPositionsInBytecode;
14891494
assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag.");
14901495
size_t pos = tagPositions[tagId];
14911496
assertThrow(pos != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
@@ -1796,47 +1801,46 @@ LinkerObject const& Assembly::assembleEOF() const
17961801
return ret;
17971802
}
17981803

1799-
std::vector<size_t> Assembly::decodeSubPath(size_t _subObjectId) const
1804+
std::vector<SubAssemblyID> Assembly::decodeSubPath(SubAssemblyID _subObjectId) const
18001805
{
1801-
if (_subObjectId < m_subs.size())
1806+
if (_subObjectId.value < m_subs.size())
18021807
return {_subObjectId};
18031808

1804-
auto subIdPathIt = find_if(
1805-
m_subPaths.begin(),
1806-
m_subPaths.end(),
1809+
auto subIdPathIt = ranges::find_if(
1810+
m_subPaths,
18071811
[_subObjectId](auto const& subId) { return subId.second == _subObjectId; }
18081812
);
18091813

18101814
assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, "");
18111815
return subIdPathIt->first;
18121816
}
18131817

1814-
size_t Assembly::encodeSubPath(std::vector<size_t> const& _subPath)
1818+
SubAssemblyID Assembly::encodeSubPath(std::vector<SubAssemblyID> const& _subPath)
18151819
{
18161820
assertThrow(!_subPath.empty(), AssemblyException, "");
18171821
if (_subPath.size() == 1)
18181822
{
1819-
assertThrow(_subPath[0] < m_subs.size(), AssemblyException, "");
1823+
assertThrow(_subPath[0].value < m_subs.size(), AssemblyException, "");
18201824
return _subPath[0];
18211825
}
18221826

1823-
if (m_subPaths.find(_subPath) == m_subPaths.end())
1827+
if (!m_subPaths.contains(_subPath))
18241828
{
1825-
size_t objectId = std::numeric_limits<size_t>::max() - m_subPaths.size();
1826-
assertThrow(objectId >= m_subs.size(), AssemblyException, "");
1829+
SubAssemblyID const objectId{std::numeric_limits<SubAssemblyID::value_type>::max() - m_subPaths.size()};
1830+
assertThrow(objectId.value >= m_subs.size(), AssemblyException, "");
18271831
m_subPaths[_subPath] = objectId;
18281832
}
18291833

18301834
return m_subPaths[_subPath];
18311835
}
18321836

1833-
Assembly const* Assembly::subAssemblyById(size_t _subId) const
1837+
Assembly const* Assembly::subAssemblyById(SubAssemblyID const _subId) const
18341838
{
1835-
std::vector<size_t> subIds = decodeSubPath(_subId);
1839+
std::vector<SubAssemblyID> subIDs = decodeSubPath(_subId);
18361840
Assembly const* currentAssembly = this;
1837-
for (size_t currentSubId: subIds)
1841+
for (auto [subIDIndex]: subIDs)
18381842
{
1839-
currentAssembly = currentAssembly->m_subs.at(currentSubId).get();
1843+
currentAssembly = currentAssembly->m_subs.at(static_cast<size_t>(subIDIndex)).get();
18401844
assertThrow(currentAssembly, AssemblyException, "");
18411845
}
18421846

Diff for: libevmasm/Assembly.h

+20-11
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <libevmasm/AssemblyItem.h>
2424
#include <libevmasm/LinkerObject.h>
2525
#include <libevmasm/Exceptions.h>
26+
#include <libevmasm/SubAssemblyID.h>
2627

2728
#include <liblangutil/DebugInfoSelection.h>
2829
#include <liblangutil/EVMVersion.h>
@@ -47,9 +48,9 @@ using AssemblyPointer = std::shared_ptr<Assembly>;
4748

4849
class Assembly
4950
{
50-
using TagRefs = std::map<size_t, std::pair<size_t, size_t>>;
51+
using TagRefs = std::map<size_t, std::pair<SubAssemblyID, size_t>>;
5152
using DataRefs = std::multimap<util::h256, unsigned>;
52-
using SubAssemblyRefs = std::multimap<size_t, size_t>;
53+
using SubAssemblyRefs = std::multimap<SubAssemblyID, size_t>;
5354
using ProgramSizeRefs = std::vector<unsigned>;
5455
using LinkRef = std::pair<size_t, std::string>;
5556

@@ -81,8 +82,16 @@ class Assembly
8182
AssemblyItem newData(bytes const& _data) { util::h256 h(util::keccak256(util::asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); }
8283
bytes const& data(util::h256 const& _i) const { return m_data.at(_i); }
8384
AssemblyItem newSub(AssemblyPointer const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); }
84-
Assembly const& sub(size_t _sub) const { return *m_subs.at(_sub); }
85-
Assembly& sub(size_t _sub) { return *m_subs.at(_sub); }
85+
Assembly const& sub(SubAssemblyID const _sub) const
86+
{
87+
solAssert(_sub.value <= std::numeric_limits<size_t>::max());
88+
return *m_subs.at(static_cast<size_t>(_sub.value));
89+
}
90+
Assembly& sub(SubAssemblyID const _sub)
91+
{
92+
solAssert(_sub.value <= std::numeric_limits<size_t>::max());
93+
return *m_subs.at(static_cast<size_t>(_sub.value));
94+
}
8695
size_t numSubs() const { return m_subs.size(); }
8796
AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); }
8897
AssemblyItem newPushLibraryAddress(std::string const& _identifier);
@@ -142,9 +151,9 @@ class Assembly
142151
/// Adds a subroutine to the code (in the data section) and pushes its size (via a tag)
143152
/// on the stack. @returns the pushsub assembly item.
144153
AssemblyItem appendSubroutine(AssemblyPointer const& _assembly) { auto sub = newSub(_assembly); append(newPushSubSize(size_t(sub.data()))); return sub; }
145-
void pushSubroutineSize(size_t _subRoutine) { append(newPushSubSize(_subRoutine)); }
154+
void pushSubroutineSize(SubAssemblyID _subRoutine) { append(newPushSubSize(_subRoutine.value)); }
146155
/// Pushes the offset of the subroutine.
147-
void pushSubroutineOffset(size_t _subRoutine) { append(AssemblyItem(PushSub, _subRoutine)); }
156+
void pushSubroutineOffset(SubAssemblyID _subRoutine) { append(AssemblyItem(PushSub, _subRoutine.value)); }
148157

149158
/// Appends @a _data literally to the very end of the bytecode.
150159
void appendToAuxiliaryData(bytes const& _data) { m_auxiliaryData += _data; }
@@ -216,8 +225,8 @@ class Assembly
216225
/// Mark this assembly as invalid. Calling ``assemble`` on it will throw.
217226
void markAsInvalid() { m_invalid = true; }
218227

219-
std::vector<size_t> decodeSubPath(size_t _subObjectId) const;
220-
size_t encodeSubPath(std::vector<size_t> const& _subPath);
228+
std::vector<SubAssemblyID> decodeSubPath(SubAssemblyID _subObjectId) const;
229+
SubAssemblyID encodeSubPath(std::vector<SubAssemblyID> const& _subPath);
221230

222231
bool isCreation() const { return m_creation; }
223232

@@ -265,9 +274,9 @@ class Assembly
265274
private:
266275
bool m_invalid = false;
267276

268-
Assembly const* subAssemblyById(size_t _subId) const;
277+
Assembly const* subAssemblyById(SubAssemblyID _subId) const;
269278

270-
void encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
279+
void encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
271280

272281
std::shared_ptr<std::string const> sharedSourceName(std::string const& _name) const;
273282

@@ -315,7 +324,7 @@ class Assembly
315324

316325
/// Map from a vector representing a path to a particular sub assembly to sub assembly id.
317326
/// This map is used only for sub-assemblies which are not direct sub-assemblies (where path is having more than one value).
318-
std::map<std::vector<size_t>, size_t> m_subPaths;
327+
std::map<std::vector<SubAssemblyID>, SubAssemblyID> m_subPaths;
319328

320329
/// Contains the tag replacements relevant for super-assemblies.
321330
/// If set, it means the optimizer has run and we will not run it again.

Diff for: libevmasm/AssemblyItem.cpp

+16-16
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ std::string toStringInHex(u256 _value)
5050

5151
}
5252

53-
AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
53+
AssemblyItem AssemblyItem::toSubAssemblyTag(SubAssemblyID _subId) const
5454
{
5555
assertThrow(data() < (u256(1) << 64), util::Exception, "Tag already has subassembly set.");
5656
assertThrow(m_type == PushTag || m_type == Tag, util::Exception, "");
@@ -61,11 +61,11 @@ AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
6161
return r;
6262
}
6363

64-
std::pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const
64+
std::pair<SubAssemblyID, size_t> AssemblyItem::splitForeignPushTag() const
6565
{
6666
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
6767
u256 combined = u256(data());
68-
size_t subId = static_cast<size_t>((combined >> 64) - 1);
68+
SubAssemblyID subId = {static_cast<SubAssemblyID::value_type>((combined >> 64) - 1)};
6969
size_t tag = static_cast<size_t>(combined & 0xffffffffffffffffULL);
7070
return std::make_pair(subId, tag);
7171
}
@@ -74,7 +74,7 @@ size_t AssemblyItem::relativeJumpTagID() const
7474
{
7575
solAssert(m_type == RelativeJump || m_type == ConditionalRelativeJump);
7676
auto const [subId, tagId] = splitForeignPushTag();
77-
solAssert(subId == std::numeric_limits<size_t>::max(), "Relative jump to sub");
77+
solAssert(subId.empty(), "Relative jump to sub");
7878
return tagId;
7979
}
8080

@@ -130,13 +130,13 @@ std::pair<std::string, std::string> AssemblyItem::nameAndData(langutil::EVMVersi
130130
util::unreachable();
131131
}
132132

133-
void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag)
133+
void AssemblyItem::setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag)
134134
{
135135
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
136-
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId == std::numeric_limits<size_t>::max());
136+
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId.empty());
137137
u256 data = _tag;
138-
if (_subId != std::numeric_limits<size_t>::max())
139-
data |= (u256(_subId) + 1) << 64;
138+
if (!_subId.empty())
139+
data |= (u256(_subId.value) + 1) << 64;
140140
setData(data);
141141
}
142142

@@ -352,13 +352,13 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
352352
break;
353353
case PushTag:
354354
{
355-
size_t sub{0};
355+
SubAssemblyID sub;
356356
size_t tag{0};
357357
std::tie(sub, tag) = splitForeignPushTag();
358-
if (sub == std::numeric_limits<size_t>::max())
358+
if (sub.empty())
359359
text = std::string("tag_") + std::to_string(tag);
360360
else
361-
text = std::string("tag_") + std::to_string(sub) + "_" + std::to_string(tag);
361+
text = std::string("tag_") + std::to_string(sub.value) + "_" + std::to_string(tag);
362362
break;
363363
}
364364
case Tag:
@@ -372,8 +372,8 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
372372
case PushSubSize:
373373
{
374374
std::vector<std::string> subPathComponents;
375-
for (size_t subPathComponentId: _assembly.decodeSubPath(static_cast<size_t>(data())))
376-
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId));
375+
for (SubAssemblyID subPathComponentId: _assembly.decodeSubPath({static_cast<SubAssemblyID::value_type>(data())}))
376+
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId.value));
377377
text =
378378
(type() == PushSub ? "dataOffset"s : "dataSize"s) +
379379
"(" +
@@ -469,11 +469,11 @@ std::ostream& solidity::evmasm::operator<<(std::ostream& _out, AssemblyItem cons
469469
break;
470470
case PushTag:
471471
{
472-
size_t subId = _item.splitForeignPushTag().first;
473-
if (subId == std::numeric_limits<size_t>::max())
472+
SubAssemblyID subId = _item.splitForeignPushTag().first;
473+
if (subId.empty())
474474
_out << " PushTag " << _item.splitForeignPushTag().second;
475475
else
476-
_out << " PushTag " << subId << ":" << _item.splitForeignPushTag().second;
476+
_out << " PushTag " << subId.value << ":" << _item.splitForeignPushTag().second;
477477
break;
478478
}
479479
case Tag:

Diff for: libevmasm/AssemblyItem.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include <libevmasm/Instruction.h>
2626
#include <libevmasm/Exceptions.h>
27+
#include <libevmasm/SubAssemblyID.h>
2728
#include <liblangutil/DebugData.h>
2829
#include <liblangutil/Exceptions.h>
2930
#include <libsolutil/Common.h>
@@ -169,14 +170,14 @@ class AssemblyItem
169170
AssemblyItem pushTag() const { solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump); return AssemblyItem(PushTag, data()); }
170171
/// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies.
171172
/// @param _subId the identifier of the subassembly the tag is taken from.
172-
AssemblyItem toSubAssemblyTag(size_t _subId) const;
173+
AssemblyItem toSubAssemblyTag(SubAssemblyID _subId) const;
173174
/// @returns splits the data of the push tag into sub assembly id and actual tag id.
174175
/// The sub assembly id of non-foreign push tags is -1.
175-
std::pair<size_t, size_t> splitForeignPushTag() const;
176+
std::pair<SubAssemblyID, size_t> splitForeignPushTag() const;
176177
/// @returns relative jump target tag ID. Asserts that it is not foreign tag.
177178
size_t relativeJumpTagID() const;
178179
/// Sets sub-assembly part and tag for a push tag.
179-
void setPushTagSubIdAndTag(size_t _subId, size_t _tag);
180+
void setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag);
180181

181182
AssemblyItemType type() const { return m_type; }
182183
u256 const& data() const { solAssert(m_type != Operation && m_data != nullptr); return *m_data; }

0 commit comments

Comments
 (0)