ethereum · clonker · Jun 11, 2025 · Apr 12, 2025 · Apr 8, 2025 · Apr 11, 2025
diff --git a/Changelog.md b/Changelog.md
@@ -2,13 +2,11 @@
 
 Language Features:
 
-
 Compiler Features:
-
+* ethdebug: Experimental support for instructions and source locations under EOF.
-* ethdebug: Experimental support for instructions and source locations under EOF.
+
+* ethdebug: Experimental support for instructions and source locations under EOF.
-* ethdebug: Experimental support for instructions and source locations under EOF.
+
+* ethdebug: Experimental support for instructions and source locations under EOF.
 
 Bugfixes:
 
-
 ### 0.8.30 (2025-05-07)
 
 Compiler Features:

diff --git a/libevmasm/CMakeLists.txt b/libevmasm/CMakeLists.txt
@@ -6,6 +6,8 @@ set(sources
 	AssemblyItem.h
 	Ethdebug.cpp
 	Ethdebug.h
+	EthdebugSchema.cpp
+	EthdebugSchema.h
 	EVMAssemblyStack.cpp
 	EVMAssemblyStack.h
 	BlockDeduplicator.cpp

diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp
@@ -18,79 +18,135 @@
 
 #include <libevmasm/Ethdebug.h>
 
+#include <libevmasm/EthdebugSchema.h>
+
+#include <range/v3/algorithm/any_of.hpp>
+
 using namespace solidity;
 using namespace solidity::evmasm;
 using namespace solidity::evmasm::ethdebug;
 
 namespace
 {
 
-Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId)
+schema::program::Instruction::Operation instructionOperation(Assembly const& _assembly, LinkerObject const& _linkerObject, size_t const _start, size_t const _end)
 {
-	solUnimplementedAssert(_assembly.eofVersion() == std::nullopt, "ethdebug does not yet support EOF.");
-	solUnimplementedAssert(_assembly.codeSections().size() == 1, "ethdebug does not yet support multiple code-sections.");
-	for (auto const& instruction: _assembly.codeSections()[0].items)
-		solUnimplementedAssert(instruction.type() != VerbatimBytecode, "Verbatim bytecode is currently not supported by ethdebug.");
-
-	solAssert(_linkerObject.codeSectionLocations.size() == 1);
-	solAssert(_linkerObject.codeSectionLocations[0].end <= _linkerObject.bytecode.size());
-	Json instructions = Json::array();
-	for (size_t i = 0; i < _linkerObject.codeSectionLocations[0].instructionLocations.size(); ++i)
+	solAssert(_end <= _linkerObject.bytecode.size());
+	solAssert(_start < _end);
+	schema::program::Instruction::Operation operation;
+	operation.mnemonic = instructionInfo(static_cast<Instruction>(_linkerObject.bytecode[_start]), _assembly.evmVersion()).name;
+	static size_t constexpr instructionSize = 1;
+	if (_start + instructionSize < _end)
 	{
-		LinkerObject::InstructionLocation currentInstruction = _linkerObject.codeSectionLocations[0].instructionLocations[i];
-		size_t start = currentInstruction.start;
-		size_t end = currentInstruction.end;
-		size_t assemblyItemIndex = currentInstruction.assemblyItemIndex;
-		solAssert(end <= _linkerObject.bytecode.size());
-		solAssert(start < end);
-		solAssert(assemblyItemIndex < _assembly.codeSections().at(0).items.size());
-		Json operation = Json::object();
-		operation["mnemonic"] = instructionInfo(static_cast<Instruction>(_linkerObject.bytecode[start]), _assembly.evmVersion()).name;
-		static size_t constexpr instructionSize = 1;
-		if (start + instructionSize < end)
-		{
-			bytes const argumentData(
-				_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(start) + instructionSize,
-				_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(end)
-			);
-			solAssert(!argumentData.empty());
-			operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)});
-		}
-		langutil::SourceLocation const& location = _assembly.codeSections().at(0).items.at(assemblyItemIndex).location();
-		Json instruction = Json::object();
-		instruction["offset"] = start;
-		instruction["operation"] = operation;
-
-		instruction["context"] = Json::object();
-		instruction["context"]["code"] = Json::object();
-		instruction["context"]["code"]["source"] = Json::object();
-		instruction["context"]["code"]["source"]["id"] = static_cast<int>(_sourceId);
-
-		instruction["context"]["code"]["range"] = Json::object();
-		instruction["context"]["code"]["range"]["offset"] = location.start;
-		instruction["context"]["code"]["range"]["length"] = location.end - location.start;
-		instructions.emplace_back(instruction);
+		bytes const argumentData(
+			_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(_start) + instructionSize,
+			_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(_end)
+		);
+		solAssert(!argumentData.empty());
+		operation.arguments = {{schema::data::HexValue{argumentData}}};
 	}
+	return operation;
+}
 
-	return instructions;
+schema::materials::SourceRange::Range locationRange(langutil::SourceLocation const& _location)
+{
+	return {
+		.length = schema::data::Unsigned{_location.end - _location.start},
+		.offset = schema::data::Unsigned{_location.start}
+	};
 }
 
-} // anonymous namespace
+schema::materials::Reference sourceReference(unsigned _sourceID)
+{
+	return {
+		.id = schema::materials::ID{_sourceID},
+		.type = std::nullopt
+	};
+}
 
-Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject)
+std::optional<schema::program::Context> instructionContext(Assembly::CodeSection const& _codeSection, size_t _assemblyItemIndex, unsigned _sourceID)
 {
-	Json result = Json::object();
-	result["contract"] = Json::object();
-	result["contract"]["name"] = _name;
-	result["contract"]["definition"] = Json::object();
-	result["contract"]["definition"]["source"] = Json::object();
-	result["contract"]["definition"]["source"]["id"] = _sourceId;
-	if (_assembly)
+	solAssert(_assemblyItemIndex < _codeSection.items.size());
+	langutil::SourceLocation const& location = _codeSection.items.at(_assemblyItemIndex).location();
+	if (!location.isValid())
+		return std::nullopt;
+
+	return schema::program::Context{
+		schema::materials::SourceRange{
+			.source = sourceReference(_sourceID),
+			.range = locationRange(location)
+		},
+		std::nullopt,
+		std::nullopt
+	};
+}
+
+std::vector<schema::program::Instruction> codeSectionInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned const _sourceID, size_t const _codeSectionIndex)
+{
+	solAssert(_codeSectionIndex < _linkerObject.codeSectionLocations.size());
+	solAssert(_codeSectionIndex < _assembly.codeSections().size());
+	auto const& locations = _linkerObject.codeSectionLocations[_codeSectionIndex];
+	auto const& codeSection = _assembly.codeSections().at(_codeSectionIndex);
+
+	std::vector<schema::program::Instruction> instructions;
+	instructions.reserve(codeSection.items.size());
+
+	bool const codeSectionContainsVerbatim = ranges::any_of(
+		codeSection.items,
+		[](auto const& _instruction) { return _instruction.type() == VerbatimBytecode; }
+	);
+	solUnimplementedAssert(!codeSectionContainsVerbatim, "Verbatim bytecode is currently not supported by ethdebug.");
+
+	for (auto const& currentInstruction: locations.instructionLocations)
 	{
-		result["environment"] = _assembly->isCreation() ? "create" : "call";
-		result["instructions"] = programInstructions(*_assembly, _linkerObject, _sourceId);
+		size_t const start = currentInstruction.start;
+		size_t const end = currentInstruction.end;
+
+		// some instructions do not contribute to the bytecode
+		if (start == end)
+			continue;
+
+		instructions.emplace_back(schema::program::Instruction{
+			.offset = schema::data::Unsigned{start},
+			.operation = instructionOperation(_assembly, _linkerObject, start, end),
+			.context = instructionContext(codeSection, currentInstruction.assemblyItemIndex, _sourceID)
+		});
 	}
-	return result;
+
+	return instructions;
+}
+
+std::vector<schema::program::Instruction> programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned const _sourceID)
+{
+	auto const numCodeSections = _assembly.codeSections().size();
+	solAssert(numCodeSections == _linkerObject.codeSectionLocations.size());
+
+	std::vector<schema::program::Instruction> instructionInfo;
+	for (size_t codeSectionIndex = 0; codeSectionIndex < numCodeSections; ++codeSectionIndex)
+		instructionInfo += codeSectionInstructions(_assembly, _linkerObject, _sourceID, codeSectionIndex);
+	return instructionInfo;
+}
+
+} // anonymous namespace
+
+Json ethdebug::program(std::string_view _name, unsigned _sourceID, Assembly const& _assembly, LinkerObject const& _linkerObject)
+{
+	return schema::Program{
+		.compilation = std::nullopt,
+		.contract = {
+			.name = std::string{_name},
+			.definition = {
+				.source = {
+					.id = {_sourceID},
+					.type = std::nullopt
+				},
+				.range = std::nullopt
+			}
+		},
+		.environment = _assembly.isCreation() ? schema::Program::Environment::CREATE : schema::Program::Environment::CALL,
+		.context = std::nullopt,
+		.instructions = programInstructions(_assembly, _linkerObject, _sourceID)
+	};
 }
 
 Json ethdebug::resources(std::vector<std::string> const& _sources, std::string const& _version)

diff --git a/libevmasm/Ethdebug.h b/libevmasm/Ethdebug.h
@@ -27,7 +27,7 @@ namespace solidity::evmasm::ethdebug
 {
 
 // returns ethdebug/format/program.
-Json program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject);
+Json program(std::string_view _name, unsigned _sourceID, Assembly const& _assembly, LinkerObject const& _linkerObject);
 
 // returns ethdebug/format/info/resources
 Json resources(std::vector<std::string> const& _sources, std::string const& _version);

diff --git a/libevmasm/EthdebugSchema.cpp b/libevmasm/EthdebugSchema.cpp
@@ -0,0 +1,143 @@
+/*
+	This file is part of solidity.
+
+	solidity is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	solidity is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
+*/
+// SPDX-License-Identifier: GPL-3.0
+
+#include <libevmasm/EthdebugSchema.h>
+
+#include <libsolutil/Numeric.h>
+#include <libsolutil/Visitor.h>
+
+using namespace solidity;
+using namespace solidity::evmasm::ethdebug;
+
+void schema::data::to_json(Json& _json, HexValue const& _hexValue)
+{
+	_json = util::toHex(_hexValue.value, util::HexPrefix::Add);
+}
+
+void schema::data::to_json(Json& _json, Unsigned const& _unsigned)
+{
+	std::visit(util::GenericVisitor{
+		[&](HexValue const& _hexValue) { _json = _hexValue; },
+		[&](std::uint64_t const _value) { _json = _value; }
+	}, _unsigned.value);
+}
+
+void schema::materials::to_json(Json& _json, ID const& _id)
+{
+	std::visit(util::GenericVisitor{
+		[&](std::string const& _hexValue) { _json = _hexValue; },
+		[&](std::uint64_t const _value) { _json = _value; }
+	}, _id.value);
+}
+
+void schema::materials::to_json(Json& _json, Reference const& _source)
+{
+	_json["id"] = _source.id;
+	if (_source.type)
+		_json["type"] = *_source.type == Reference::Type::Compilation ? "compilation" : "source";
+}
+
+void schema::materials::to_json(Json& _json, SourceRange::Range const& _range)
+{
+	_json["length"] = _range.length;
+	_json["offset"] = _range.offset;
+}
+
+
+void schema::materials::to_json(Json& _json, SourceRange const& _sourceRange)
+{
+	_json["source"] = _sourceRange.source;
+	if (_sourceRange.range)
+		_json["range"] = *_sourceRange.range;
+}
+
+void schema::to_json(Json& _json, Program::Contract const& _contract)
+{
+	if (_contract.name)
+		_json["name"] = *_contract.name;
+	_json["definition"] = _contract.definition;
+}
+
+void schema::program::to_json(Json& _json, Context::Variable const& _contextVariable)
+{
+	auto const numProperties =
+		_contextVariable.identifier.has_value() +
+		_contextVariable.declaration.has_value();
+	solRequire(numProperties >= 1, EthdebugException, "Context variable has no properties.");
+	if (_contextVariable.identifier)
+	{
+		solRequire(!_contextVariable.identifier->empty(), EthdebugException, "Variable identifier must not be empty.");
+		_json["identifier"] = *_contextVariable.identifier;
+	}
+	if (_contextVariable.declaration)
+		_json["declaration"] = *_contextVariable.declaration;
+}
+
+void schema::program::to_json(Json& _json, Context const& _context)
+{
+	solRequire(_context.code.has_value() + _context.remark.has_value() + _context.variables.has_value() >= 1, EthdebugException, "Context needs >=1 properties.");
+	if (_context.code)
+		_json["code"] = *_context.code;
+	if (_context.variables)
+	{
+		solRequire(!_context.variables->empty(), EthdebugException, "Context variables must not be empty if provided.");
+		_json["variables"] = *_context.variables;
+	}
+	if (_context.remark)
+		_json["remark"] = *_context.remark;
+}
+
+void schema::program::to_json(Json& _json, Instruction::Operation const& _operation)
+{
+	_json = { {"mnemonic", _operation.mnemonic} };
+	if (!_operation.arguments.empty())
+		_json["arguments"] = _operation.arguments;
+}
+
+void schema::program::to_json(Json& _json, Instruction const& _instruction)
+{
+	_json["offset"] = _instruction.offset;
+	if (_instruction.operation)
+		_json["operation"] = *_instruction.operation;
+	if (_instruction.context)
+		_json["context"] = *_instruction.context;
+}
+
+void schema::to_json(Json& _json, Program const& _program)
+{
+	if (_program.compilation)
+		_json["compilation"] = *_program.compilation;
+	_json["contract"] = _program.contract;
+	_json["environment"] = _program.environment;
+	if (_program.context)
+		_json["context"] = *_program.context;
+	_json["instructions"] = _program.instructions;
+}
+
+void schema::to_json(Json& _json, Program::Environment const& _environment)
+{
+	switch (_environment)
+	{
+	case Program::Environment::CALL:
+		_json = "call";
+		break;
+	case Program::Environment::CREATE:
+		_json = "create";
+		break;
+	}
+}