diff --git a/CMakeLists.txt b/CMakeLists.txt index b6a6630e2..984030c32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,7 +121,7 @@ if (NOT WIN32) endif () # warnings and compiler settings -set(PROJECT_CXXWARNINGS "-Wall -Wextra -Wno-unused-parameter -Wno-c++98-compat -Wno-unreachable-code-return -Wno-nested-anon-types -Wno-extended-offsetof -Wno-gnu-anonymous-struct -Wno-gnu-designator -Wno-variadic-macros -Wno-gnu-zero-variadic-macro-arguments -Wno-gnu-statement-expression -Wno-return-type-c-linkage -Wno-c99-extensions -Wno-ignored-attributes -Wno-unused-local-typedef -Wno-unknown-pragmas") +set(PROJECT_CXXWARNINGS "-Wall -Wextra -Wno-unused-parameter -Wno-c++98-compat -Wno-unreachable-code-return -Wno-nested-anon-types -Wno-extended-offsetof -Wgnu-alignof-expression -Wno-gnu-anonymous-struct -Wno-gnu-designator -Wno-variadic-macros -Wno-gnu-zero-variadic-macro-arguments -Wno-gnu-statement-expression -Wno-return-type-c-linkage -Wno-c99-extensions -Wno-ignored-attributes -Wno-unused-local-typedef -Wno-unknown-pragmas") set(PROJECT_CXXFLAGS "${PROJECT_CXXFLAGS} -Wno-unknown-warning-option ${PROJECT_CXXWARNINGS} -fPIC -fno-omit-frame-pointer -fvisibility-inlines-hidden -fno-exceptions -fno-asynchronous-unwind-tables -fno-rtti") # default build type diff --git a/cmake/BCCompiler/CMakeBCInformation.cmake b/cmake/BCCompiler/CMakeBCInformation.cmake index 378146349..8a00e9bc2 100644 --- a/cmake/BCCompiler/CMakeBCInformation.cmake +++ b/cmake/BCCompiler/CMakeBCInformation.cmake @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -set(DEFAULT_BC_COMPILER_FLAGS "-std=gnu++11 -emit-llvm -Wno-unknown-warning-option -Wall -Wshadow -Wconversion -Wpadded -pedantic -Wshorten-64-to-32 -Wno-gnu-anonymous-struct -Wno-return-type-c-linkage -Wno-gnu-zero-variadic-macro-arguments -Wno-nested-anon-types -Wno-extended-offsetof -Wno-gnu-statement-expression -Wno-c99-extensions -Wno-ignored-attributes -mtune=generic -fno-vectorize -fno-slp-vectorize -ffreestanding -fno-common -fno-builtin -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -Wno-unneeded-internal-declaration -Wno-unused-function ") +set(DEFAULT_BC_COMPILER_FLAGS "-std=gnu++11 -emit-llvm -Wno-unknown-warning-option -Wall -Wshadow -Wconversion -Wpadded -pedantic -Wshorten-64-to-32 -Wgnu-alignof-expression -Wno-gnu-anonymous-struct -Wno-return-type-c-linkage -Wno-gnu-zero-variadic-macro-arguments -Wno-nested-anon-types -Wno-extended-offsetof -Wno-gnu-statement-expression -Wno-c99-extensions -Wno-ignored-attributes -mtune=generic -fno-vectorize -fno-slp-vectorize -ffreestanding -fno-common -fno-builtin -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -Wno-unneeded-internal-declaration -Wno-unused-function ") if (NOT CMAKE_BC_COMPILE_OBJECT) if (NOT DEFINED CMAKE_BC_COMPILER) diff --git a/remill/Arch/AArch64/Arch.cpp b/remill/Arch/AArch64/Arch.cpp index 9bf287040..107e61778 100644 --- a/remill/Arch/AArch64/Arch.cpp +++ b/remill/Arch/AArch64/Arch.cpp @@ -113,16 +113,16 @@ class AArch64Arch : public Arch { // Decode an instruction. bool DecodeInstruction( uint64_t address, const std::string &instr_bytes, - Instruction &inst) const override; + Instruction &inst) const final; // Maximum number of bytes in an instruction. - uint64_t MaxInstructionSize(void) const override; + uint64_t MaxInstructionSize(void) const final; - llvm::Triple Triple(void) const override; - llvm::DataLayout DataLayout(void) const override; + llvm::Triple Triple(void) const final; + llvm::DataLayout DataLayout(void) const final; // Default calling convention for this architecture. - llvm::CallingConv::ID DefaultCallingConv(void) const override; + llvm::CallingConv::ID DefaultCallingConv(void) const final; private: AArch64Arch(void) = delete; diff --git a/remill/Arch/Arch.cpp b/remill/Arch/Arch.cpp index 6b290a2b3..e67288318 100644 --- a/remill/Arch/Arch.cpp +++ b/remill/Arch/Arch.cpp @@ -88,6 +88,12 @@ Arch::Arch(OSName os_name_, ArchName arch_name_) Arch::~Arch(void) {} +bool Arch::LazyDecodeInstruction( + uint64_t address, const std::string &instr_bytes, + Instruction &inst) const { + return DecodeInstruction(address, instr_bytes, inst); +} + llvm::Triple Arch::BasicTriple(void) const { llvm::Triple triple; switch (os_name) { diff --git a/remill/Arch/Arch.h b/remill/Arch/Arch.h index 13377c0ff..ef8ef879b 100644 --- a/remill/Arch/Arch.h +++ b/remill/Arch/Arch.h @@ -60,6 +60,12 @@ class Arch { uint64_t address, const std::string &instr_bytes, Instruction &inst) const = 0; + // Fully decode any control-flow transfer instructions, but only partially + // decode other instructions. + virtual bool LazyDecodeInstruction( + uint64_t address, const std::string &instr_bytes, + Instruction &inst) const; + // Maximum number of bytes in an instruction for this particular architecture. virtual uint64_t MaxInstructionSize(void) const = 0; diff --git a/remill/Arch/Instruction.cpp b/remill/Arch/Instruction.cpp index f62faf4b7..6e2d19f51 100644 --- a/remill/Arch/Instruction.cpp +++ b/remill/Arch/Instruction.cpp @@ -249,6 +249,7 @@ Instruction::Instruction(void) branch_not_taken_pc(0), arch_name(kArchInvalid), operand_size(0), + arch_for_decode(nullptr), is_atomic_read_modify_write(false), category(Instruction::kCategoryInvalid) {} @@ -261,7 +262,22 @@ void Instruction::Reset(void) { operand_size = 0; is_atomic_read_modify_write = false; category = Instruction::kCategoryInvalid; + arch_for_decode = nullptr; operands.clear(); + function.clear(); + bytes.clear(); +} + +bool Instruction::FinalizeDecode(void) { + if (!IsValid()) { + return false; + } else if (!arch_for_decode) { + return true; + } else { + auto ret = arch_for_decode->DecodeInstruction(pc, bytes, *this); + arch_for_decode = nullptr; + return ret; + } } std::string Instruction::Serialize(void) const { diff --git a/remill/Arch/Instruction.h b/remill/Arch/Instruction.h index c64c109d8..581d790b9 100644 --- a/remill/Arch/Instruction.h +++ b/remill/Arch/Instruction.h @@ -140,6 +140,8 @@ class Instruction { void Reset(void); + bool FinalizeDecode(void); + // Name of semantics function that implements this instruction. std::string function; @@ -161,6 +163,10 @@ class Instruction { // The effective size of the operand, in bits. uint64_t operand_size; + // Pointer to the `remill::Arch` used to complete the decoding of this + // instruction. + const Arch *arch_for_decode; + // Does the instruction require the use of the `__remill_atomic_begin` and // `__remill_atomic_end`? bool is_atomic_read_modify_write; diff --git a/remill/Arch/X86/Arch.cpp b/remill/Arch/X86/Arch.cpp index 00d1fd257..1642aaa9e 100644 --- a/remill/Arch/X86/Arch.cpp +++ b/remill/Arch/X86/Arch.cpp @@ -751,18 +751,31 @@ class X86Arch : public Arch { // Decode an instuction. bool DecodeInstruction( uint64_t address, const std::string &inst_bytes, - Instruction &inst) const override; + Instruction &inst) const final; + + // Fully decode any control-flow transfer instructions, but only partially + // decode other instructions. To complete the decoding, call + // `Instruction::FinalizeDecode`. + bool LazyDecodeInstruction( + uint64_t address, const std::string &inst_bytes, + Instruction &inst) const final; // Maximum number of bytes in an instruction. - uint64_t MaxInstructionSize(void) const override; + uint64_t MaxInstructionSize(void) const final; - llvm::Triple Triple(void) const override; - llvm::DataLayout DataLayout(void) const override; + llvm::Triple Triple(void) const final; + llvm::DataLayout DataLayout(void) const final; // Default calling convention for this architecture. - llvm::CallingConv::ID DefaultCallingConv(void) const override; + llvm::CallingConv::ID DefaultCallingConv(void) const final; private: + + // Decode an instuction. + bool DecodeInstruction( + uint64_t address, const std::string &inst_bytes, + Instruction &inst, bool is_lazy) const; + X86Arch(void) = delete; }; @@ -906,7 +919,7 @@ llvm::DataLayout X86Arch::DataLayout(void) const { bool X86Arch::DecodeInstruction( uint64_t address, const std::string &inst_bytes, - Instruction &inst) const { + Instruction &inst, bool is_lazy) const { inst.pc = address; inst.arch_name = arch_name; @@ -921,12 +934,11 @@ bool X86Arch::DecodeInstruction( } inst.operand_size = xed_decoded_inst_get_operand_width(xedd); - inst.function = InstructionFunctionName(xedd); inst.bytes = inst_bytes.substr(0, xed_decoded_inst_get_length(xedd)); inst.category = CreateCategory(xedd); inst.next_pc = address + xed_decoded_inst_get_length(xedd); - // Wrap an instuction in atomic begin/end if it accesses memory with RMW + // Wrap an instruction in atomic begin/end if it accesses memory with RMW // semantics or with a LOCK prefix. if (xed_operand_values_get_atomic(xedd) || xed_operand_values_has_lock_prefix(xedd)) { @@ -937,45 +949,49 @@ bool X86Arch::DecodeInstruction( DecodeConditionalInterrupt(inst); } - // Lift the operands. This creates the arguments for us to call the - // instuction implementation. - auto xedi = xed_decoded_inst_inst(xedd); - auto num_operands = xed_decoded_inst_noperands(xedd); - for (auto i = 0U; i < num_operands; ++i) { - auto xedo = xed_inst_operand(xedi, i); - if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) { - DecodeOperand(inst, xedd, xedo); + if (!is_lazy || inst.IsControlFlow()) { + inst.function = InstructionFunctionName(xedd); + + // Lift the operands. This creates the arguments for us to call the + // instuction implementation. + auto xedi = xed_decoded_inst_inst(xedd); + auto num_operands = xed_decoded_inst_noperands(xedd); + for (auto i = 0U; i < num_operands; ++i) { + auto xedo = xed_inst_operand(xedi, i); + if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) { + DecodeOperand(inst, xedd, xedo); + } } - } - if (inst.IsFunctionCall()) { - DecodeFallThroughPC(inst, xedd); - } - - // All non-control FPU instructions update the last instruction pointer - // and opcode. - if (XED_ISA_SET_X87 == xed_decoded_inst_get_isa_set(xedd) || - XED_ISA_SET_FCMOV == xed_decoded_inst_get_isa_set(xedd) || - XED_CATEGORY_X87_ALU == xed_decoded_inst_get_category(xedd)) { - auto set_ip_dp = false; - const auto get_attr = xed_decoded_inst_get_attribute; - switch (xed_decoded_inst_get_iform_enum(xedd)) { - case XED_IFORM_FNOP: - case XED_IFORM_FINCSTP: - case XED_IFORM_FDECSTP: - set_ip_dp = true; - break; - default: - set_ip_dp = !get_attr(xedd, XED_ATTRIBUTE_X87_CONTROL) && - !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_CW) && - !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_R) && - !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_W) && - !get_attr(xedd, XED_ATTRIBUTE_X87_NOWAIT); - break; + if (inst.IsFunctionCall()) { + DecodeFallThroughPC(inst, xedd); } - if (set_ip_dp) { - DecodeX87LastIpDp(inst); + // All non-control FPU instructions update the last instruction pointer + // and opcode. + if (XED_ISA_SET_X87 == xed_decoded_inst_get_isa_set(xedd) || + XED_ISA_SET_FCMOV == xed_decoded_inst_get_isa_set(xedd) || + XED_CATEGORY_X87_ALU == xed_decoded_inst_get_category(xedd)) { + auto set_ip_dp = false; + const auto get_attr = xed_decoded_inst_get_attribute; + switch (xed_decoded_inst_get_iform_enum(xedd)) { + case XED_IFORM_FNOP: + case XED_IFORM_FINCSTP: + case XED_IFORM_FDECSTP: + set_ip_dp = true; + break; + default: + set_ip_dp = !get_attr(xedd, XED_ATTRIBUTE_X87_CONTROL) && + !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_CW) && + !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_R) && + !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_W) && + !get_attr(xedd, XED_ATTRIBUTE_X87_NOWAIT); + break; + } + + if (set_ip_dp) { + DecodeX87LastIpDp(inst); + } } } @@ -1006,6 +1022,30 @@ bool X86Arch::DecodeInstruction( return true; } +bool X86Arch::DecodeInstruction( + uint64_t address, + const std::string &inst_bytes, + Instruction &inst) const { + inst.arch_for_decode = nullptr; + return DecodeInstruction(address, inst_bytes, inst, false); +} + +// Fully decode any control-flow transfer instructions, but only partially +// decode other instructions. +bool X86Arch::LazyDecodeInstruction( + uint64_t address, const std::string &inst_bytes, + Instruction &inst) const { + inst.arch_for_decode = nullptr; + if (DecodeInstruction(address, inst_bytes, inst, true)) { + if (!inst.IsControlFlow()) { + inst.arch_for_decode = this; + } + return true; + } else { + return false; + } +} + } // namespace // TODO(pag): We pretend that these are singletons, but they aren't really!