Skip to content

Commit 0d158a4

Browse files
committed
Make subassembly IDs based on fixed-size 64 bit uint
1 parent 614646b commit 0d158a4

29 files changed

+198
-127
lines changed

Diff for: Changelog.md

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Compiler Features:
88

99

1010
Bugfixes:
11+
* Commandline Interface: Fix possible inconsistency in subassembly IDs between target architectures in `--asm-json` output.
1112
* SMTChecker: Fix incorrect analysis when only a subset of contracts is selected with `--model-checker-contracts`.
1213
* SMTChecker: Fix internal compiler error when string literal is used to initialize user-defined type based on fixed bytes.
1314

Diff for: libevmasm/Assembly.cpp

+34-35
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <fmt/format.h>
4040

4141
#include <range/v3/algorithm/any_of.hpp>
42+
#include <range/v3/algorithm/find_if.hpp>
4243
#include <range/v3/view/drop_exactly.hpp>
4344
#include <range/v3/view/enumerate.hpp>
4445
#include <range/v3/view/map.hpp>
@@ -667,17 +668,17 @@ std::pair<std::shared_ptr<Assembly>, std::vector<std::string>> Assembly::fromJSO
667668
return std::make_pair(result, _level == 0 ? parsedSourceList : std::vector<std::string>{});
668669
}
669670

670-
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
671+
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
671672
{
672673
_assembliesOnPath.push_back(this);
673-
for (_pathFromRoot.push_back(0); _pathFromRoot.back() < m_subs.size(); ++_pathFromRoot.back())
674+
for (_pathFromRoot.push_back(SubAssemblyID{0}); _pathFromRoot.back().value < m_subs.size(); ++_pathFromRoot.back().value)
674675
{
675676
for (size_t distanceFromRoot = 0; distanceFromRoot < _assembliesOnPath.size(); ++distanceFromRoot)
676677
_assembliesOnPath[distanceFromRoot]->encodeSubPath(
677678
_pathFromRoot | ranges::views::drop_exactly(distanceFromRoot) | ranges::to<std::vector>
678679
);
679680

680-
m_subs[_pathFromRoot.back()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
681+
m_subs[_pathFromRoot.back().asIndex()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
681682
}
682683
}
683684

@@ -798,20 +799,20 @@ std::map<u256, u256> const& Assembly::optimiseInternal(
798799

799800
// Run optimisation for sub-assemblies.
800801
// TODO: verify and double-check this for EOF.
801-
for (size_t subId = 0; subId < m_subs.size(); ++subId)
802+
for (SubAssemblyID subID {0}; subID.value < m_subs.size(); ++subID.value)
802803
{
803804
OptimiserSettings settings = _settings;
804-
Assembly& sub = *m_subs[subId];
805+
Assembly& sub = *m_subs[subID.asIndex()];
805806
std::set<size_t> referencedTags;
806807
for (auto& codeSection: m_codeSections)
807-
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subId);
808+
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subID);
808809
std::map<u256, u256> const& subTagReplacements = sub.optimiseInternal(
809810
settings,
810811
referencedTags
811812
);
812813
// Apply the replacements (can be empty).
813814
for (auto& codeSection: m_codeSections)
814-
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subId);
815+
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subID);
815816
}
816817

817818
std::map<u256, u256> tagReplacements;
@@ -1188,7 +1189,7 @@ LinkerObject const& Assembly::assemble() const
11881189
[[nodiscard]] bytes Assembly::assembleTag(AssemblyItem const& _item, size_t _pos, bool _addJumpDest) const
11891190
{
11901191
solRequire(_item.data() != 0, AssemblyException, "Invalid tag position.");
1191-
solRequire(_item.splitForeignPushTag().first == std::numeric_limits<size_t>::max(), AssemblyException, "Foreign tag.");
1192+
solRequire(_item.splitForeignPushTag().first.empty(), AssemblyException, "Foreign tag.");
11921193
solRequire(_pos < 0xffffffffL, AssemblyException, "Tag too large.");
11931194
size_t tagId = static_cast<size_t>(_item.data());
11941195
solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits<size_t>::max(), AssemblyException, "Duplicate tag position.");
@@ -1259,10 +1260,10 @@ LinkerObject const& Assembly::assembleLegacy() const
12591260
if (item.type() == PushTag)
12601261
{
12611262
auto [subId, tagId] = item.splitForeignPushTag();
1262-
if (subId == std::numeric_limits<size_t>::max())
1263+
if (subId.empty())
12631264
continue;
1264-
assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id");
1265-
auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId);
1265+
solRequire(subId.value < m_subs.size(), AssemblyException, "Invalid sub id");
1266+
auto subTagPosition = m_subs[subId.asIndex()]->m_tagPositionsInBytecode.at(tagId);
12661267
assertThrow(subTagPosition != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
12671268
bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition));
12681269
}
@@ -1332,17 +1333,18 @@ LinkerObject const& Assembly::assembleLegacy() const
13321333
break;
13331334
case PushSub:
13341335
assembleInstruction([&]() {
1335-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
13361336
ret.bytecode.push_back(dataRefPush);
1337-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1337+
subRefs.emplace(
1338+
SubAssemblyID{item.data()},
1339+
ret.bytecode.size()
1340+
);
13381341
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13391342
});
13401343
break;
13411344
case PushSubSize:
13421345
{
13431346
assembleInstruction([&](){
1344-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1345-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1347+
auto s = subAssemblyById(SubAssemblyID{item.data()})->assemble().bytecode.size();
13461348
item.setPushedValue(u256(s));
13471349
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
13481350
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
@@ -1478,14 +1480,12 @@ LinkerObject const& Assembly::assembleLegacy() const
14781480
}
14791481
for (auto const& i: tagRefs)
14801482
{
1481-
size_t subId;
1482-
size_t tagId;
1483-
std::tie(subId, tagId) = i.second;
1484-
assertThrow(subId == std::numeric_limits<size_t>::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id");
1483+
auto [subId, tagId] = i.second;
1484+
solRequire(subId.empty() || subId.value < m_subs.size(), AssemblyException, "Invalid sub id");
14851485
std::vector<size_t> const& tagPositions =
1486-
subId == std::numeric_limits<size_t>::max() ?
1486+
subId.empty() ?
14871487
m_tagPositionsInBytecode :
1488-
m_subs[subId]->m_tagPositionsInBytecode;
1488+
m_subs[subId.asIndex()]->m_tagPositionsInBytecode;
14891489
assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag.");
14901490
size_t pos = tagPositions[tagId];
14911491
assertThrow(pos != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
@@ -1796,47 +1796,46 @@ LinkerObject const& Assembly::assembleEOF() const
17961796
return ret;
17971797
}
17981798

1799-
std::vector<size_t> Assembly::decodeSubPath(size_t _subObjectId) const
1799+
std::vector<SubAssemblyID> Assembly::decodeSubPath(SubAssemblyID _subObjectId) const
18001800
{
1801-
if (_subObjectId < m_subs.size())
1801+
if (_subObjectId.value < m_subs.size())
18021802
return {_subObjectId};
18031803

1804-
auto subIdPathIt = find_if(
1805-
m_subPaths.begin(),
1806-
m_subPaths.end(),
1804+
auto subIdPathIt = ranges::find_if(
1805+
m_subPaths,
18071806
[_subObjectId](auto const& subId) { return subId.second == _subObjectId; }
18081807
);
18091808

18101809
assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, "");
18111810
return subIdPathIt->first;
18121811
}
18131812

1814-
size_t Assembly::encodeSubPath(std::vector<size_t> const& _subPath)
1813+
SubAssemblyID Assembly::encodeSubPath(std::vector<SubAssemblyID> const& _subPath)
18151814
{
18161815
assertThrow(!_subPath.empty(), AssemblyException, "");
18171816
if (_subPath.size() == 1)
18181817
{
1819-
assertThrow(_subPath[0] < m_subs.size(), AssemblyException, "");
1818+
solAssert(_subPath[0].value < m_subs.size());
18201819
return _subPath[0];
18211820
}
18221821

1823-
if (m_subPaths.find(_subPath) == m_subPaths.end())
1822+
if (!m_subPaths.contains(_subPath))
18241823
{
1825-
size_t objectId = std::numeric_limits<size_t>::max() - m_subPaths.size();
1826-
assertThrow(objectId >= m_subs.size(), AssemblyException, "");
1824+
SubAssemblyID const objectId{std::numeric_limits<SubAssemblyID::value_type>::max() - m_subPaths.size()};
1825+
solAssert(objectId.value >= m_subs.size());
18271826
m_subPaths[_subPath] = objectId;
18281827
}
18291828

18301829
return m_subPaths[_subPath];
18311830
}
18321831

1833-
Assembly const* Assembly::subAssemblyById(size_t _subId) const
1832+
Assembly const* Assembly::subAssemblyById(SubAssemblyID const _subId) const
18341833
{
1835-
std::vector<size_t> subIds = decodeSubPath(_subId);
1834+
std::vector<SubAssemblyID> subIDs = decodeSubPath(_subId);
18361835
Assembly const* currentAssembly = this;
1837-
for (size_t currentSubId: subIds)
1836+
for (auto const& subID: subIDs)
18381837
{
1839-
currentAssembly = currentAssembly->m_subs.at(currentSubId).get();
1838+
currentAssembly = currentAssembly->m_subs.at(subID.asIndex()).get();
18401839
assertThrow(currentAssembly, AssemblyException, "");
18411840
}
18421841

Diff for: libevmasm/Assembly.h

+12-11
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <libevmasm/AssemblyItem.h>
2424
#include <libevmasm/LinkerObject.h>
2525
#include <libevmasm/Exceptions.h>
26+
#include <libevmasm/SubAssemblyID.h>
2627

2728
#include <liblangutil/DebugInfoSelection.h>
2829
#include <liblangutil/EVMVersion.h>
@@ -47,9 +48,9 @@ using AssemblyPointer = std::shared_ptr<Assembly>;
4748

4849
class Assembly
4950
{
50-
using TagRefs = std::map<size_t, std::pair<size_t, size_t>>;
51+
using TagRefs = std::map<size_t, std::pair<SubAssemblyID, size_t>>;
5152
using DataRefs = std::multimap<util::h256, unsigned>;
52-
using SubAssemblyRefs = std::multimap<size_t, size_t>;
53+
using SubAssemblyRefs = std::multimap<SubAssemblyID, size_t>;
5354
using ProgramSizeRefs = std::vector<unsigned>;
5455
using LinkRef = std::pair<size_t, std::string>;
5556

@@ -81,8 +82,8 @@ class Assembly
8182
AssemblyItem newData(bytes const& _data) { util::h256 h(util::keccak256(util::asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); }
8283
bytes const& data(util::h256 const& _i) const { return m_data.at(_i); }
8384
AssemblyItem newSub(AssemblyPointer const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); }
84-
Assembly const& sub(size_t _sub) const { return *m_subs.at(_sub); }
85-
Assembly& sub(size_t _sub) { return *m_subs.at(_sub); }
85+
Assembly const& sub(SubAssemblyID const _sub) const { return *m_subs.at(_sub.asIndex()); }
86+
Assembly& sub(SubAssemblyID const _sub) { return *m_subs.at(_sub.asIndex()); }
8687
size_t numSubs() const { return m_subs.size(); }
8788
AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); }
8889
AssemblyItem newPushLibraryAddress(std::string const& _identifier);
@@ -142,9 +143,9 @@ class Assembly
142143
/// Adds a subroutine to the code (in the data section) and pushes its size (via a tag)
143144
/// on the stack. @returns the pushsub assembly item.
144145
AssemblyItem appendSubroutine(AssemblyPointer const& _assembly) { auto sub = newSub(_assembly); append(newPushSubSize(size_t(sub.data()))); return sub; }
145-
void pushSubroutineSize(size_t _subRoutine) { append(newPushSubSize(_subRoutine)); }
146+
void pushSubroutineSize(SubAssemblyID _subRoutine) { append(newPushSubSize(_subRoutine.value)); }
146147
/// Pushes the offset of the subroutine.
147-
void pushSubroutineOffset(size_t _subRoutine) { append(AssemblyItem(PushSub, _subRoutine)); }
148+
void pushSubroutineOffset(SubAssemblyID _subRoutine) { append(AssemblyItem(PushSub, _subRoutine.value)); }
148149

149150
/// Appends @a _data literally to the very end of the bytecode.
150151
void appendToAuxiliaryData(bytes const& _data) { m_auxiliaryData += _data; }
@@ -216,8 +217,8 @@ class Assembly
216217
/// Mark this assembly as invalid. Calling ``assemble`` on it will throw.
217218
void markAsInvalid() { m_invalid = true; }
218219

219-
std::vector<size_t> decodeSubPath(size_t _subObjectId) const;
220-
size_t encodeSubPath(std::vector<size_t> const& _subPath);
220+
std::vector<SubAssemblyID> decodeSubPath(SubAssemblyID _subObjectId) const;
221+
SubAssemblyID encodeSubPath(std::vector<SubAssemblyID> const& _subPath);
221222

222223
bool isCreation() const { return m_creation; }
223224

@@ -265,9 +266,9 @@ class Assembly
265266
private:
266267
bool m_invalid = false;
267268

268-
Assembly const* subAssemblyById(size_t _subId) const;
269+
Assembly const* subAssemblyById(SubAssemblyID _subId) const;
269270

270-
void encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
271+
void encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
271272

272273
std::shared_ptr<std::string const> sharedSourceName(std::string const& _name) const;
273274

@@ -315,7 +316,7 @@ class Assembly
315316

316317
/// Map from a vector representing a path to a particular sub assembly to sub assembly id.
317318
/// This map is used only for sub-assemblies which are not direct sub-assemblies (where path is having more than one value).
318-
std::map<std::vector<size_t>, size_t> m_subPaths;
319+
std::map<std::vector<SubAssemblyID>, SubAssemblyID> m_subPaths;
319320

320321
/// Contains the tag replacements relevant for super-assemblies.
321322
/// If set, it means the optimizer has run and we will not run it again.

Diff for: libevmasm/AssemblyItem.cpp

+18-19
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ std::string toStringInHex(u256 _value)
5050

5151
}
5252

53-
AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
53+
AssemblyItem AssemblyItem::toSubAssemblyTag(SubAssemblyID _subId) const
5454
{
5555
assertThrow(data() < (u256(1) << 64), util::Exception, "Tag already has subassembly set.");
5656
assertThrow(m_type == PushTag || m_type == Tag, util::Exception, "");
@@ -61,20 +61,21 @@ AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
6161
return r;
6262
}
6363

64-
std::pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const
64+
std::pair<SubAssemblyID, size_t> AssemblyItem::splitForeignPushTag() const
6565
{
6666
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
6767
u256 combined = u256(data());
68-
size_t subId = static_cast<size_t>((combined >> 64) - 1);
68+
// the combined u256 is 'dirty', so we can't use the conversion constructor of SubAssemblyID here
69+
SubAssemblyID subID {static_cast<SubAssemblyID::value_type>((combined >> 64) - 1)};
6970
size_t tag = static_cast<size_t>(combined & 0xffffffffffffffffULL);
70-
return std::make_pair(subId, tag);
71+
return std::make_pair(subID, tag);
7172
}
7273

7374
size_t AssemblyItem::relativeJumpTagID() const
7475
{
7576
solAssert(m_type == RelativeJump || m_type == ConditionalRelativeJump);
7677
auto const [subId, tagId] = splitForeignPushTag();
77-
solAssert(subId == std::numeric_limits<size_t>::max(), "Relative jump to sub");
78+
solAssert(subId.empty(), "Relative jump to sub");
7879
return tagId;
7980
}
8081

@@ -130,13 +131,13 @@ std::pair<std::string, std::string> AssemblyItem::nameAndData(langutil::EVMVersi
130131
util::unreachable();
131132
}
132133

133-
void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag)
134+
void AssemblyItem::setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag)
134135
{
135136
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
136-
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId == std::numeric_limits<size_t>::max());
137+
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId.empty());
137138
u256 data = _tag;
138-
if (_subId != std::numeric_limits<size_t>::max())
139-
data |= (u256(_subId) + 1) << 64;
139+
if (!_subId.empty())
140+
data |= (u256(_subId.value) + 1) << 64;
140141
setData(data);
141142
}
142143

@@ -352,13 +353,11 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
352353
break;
353354
case PushTag:
354355
{
355-
size_t sub{0};
356-
size_t tag{0};
357-
std::tie(sub, tag) = splitForeignPushTag();
358-
if (sub == std::numeric_limits<size_t>::max())
356+
auto [sub, tag] = splitForeignPushTag();
357+
if (sub.empty())
359358
text = std::string("tag_") + std::to_string(tag);
360359
else
361-
text = std::string("tag_") + std::to_string(sub) + "_" + std::to_string(tag);
360+
text = std::string("tag_") + std::to_string(sub.value) + "_" + std::to_string(tag);
362361
break;
363362
}
364363
case Tag:
@@ -372,8 +371,8 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
372371
case PushSubSize:
373372
{
374373
std::vector<std::string> subPathComponents;
375-
for (size_t subPathComponentId: _assembly.decodeSubPath(static_cast<size_t>(data())))
376-
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId));
374+
for (SubAssemblyID subPathComponentId: _assembly.decodeSubPath(SubAssemblyID{data()}))
375+
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId.value));
377376
text =
378377
(type() == PushSub ? "dataOffset"s : "dataSize"s) +
379378
"(" +
@@ -469,11 +468,11 @@ std::ostream& solidity::evmasm::operator<<(std::ostream& _out, AssemblyItem cons
469468
break;
470469
case PushTag:
471470
{
472-
size_t subId = _item.splitForeignPushTag().first;
473-
if (subId == std::numeric_limits<size_t>::max())
471+
SubAssemblyID subId = _item.splitForeignPushTag().first;
472+
if (subId.empty())
474473
_out << " PushTag " << _item.splitForeignPushTag().second;
475474
else
476-
_out << " PushTag " << subId << ":" << _item.splitForeignPushTag().second;
475+
_out << " PushTag " << subId.value << ":" << _item.splitForeignPushTag().second;
477476
break;
478477
}
479478
case Tag:

Diff for: libevmasm/AssemblyItem.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include <libevmasm/Instruction.h>
2626
#include <libevmasm/Exceptions.h>
27+
#include <libevmasm/SubAssemblyID.h>
2728
#include <liblangutil/DebugData.h>
2829
#include <liblangutil/Exceptions.h>
2930
#include <libsolutil/Common.h>
@@ -168,14 +169,14 @@ class AssemblyItem
168169
AssemblyItem pushTag() const { solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump); return AssemblyItem(PushTag, data()); }
169170
/// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies.
170171
/// @param _subId the identifier of the subassembly the tag is taken from.
171-
AssemblyItem toSubAssemblyTag(size_t _subId) const;
172+
AssemblyItem toSubAssemblyTag(SubAssemblyID _subId) const;
172173
/// @returns splits the data of the push tag into sub assembly id and actual tag id.
173174
/// The sub assembly id of non-foreign push tags is -1.
174-
std::pair<size_t, size_t> splitForeignPushTag() const;
175+
std::pair<SubAssemblyID, size_t> splitForeignPushTag() const;
175176
/// @returns relative jump target tag ID. Asserts that it is not foreign tag.
176177
size_t relativeJumpTagID() const;
177178
/// Sets sub-assembly part and tag for a push tag.
178-
void setPushTagSubIdAndTag(size_t _subId, size_t _tag);
179+
void setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag);
179180

180181
AssemblyItemType type() const { return m_type; }
181182
u256 const& data() const { solAssert(m_type != Operation && m_data != nullptr); return *m_data; }

0 commit comments

Comments
 (0)