Skip to content

Commit

Permalink
This adds some generic improvements, including the notion of lazy dec…
Browse files Browse the repository at this point in the history
…oding, which only fully decodes control-flow instructions. (#172)
  • Loading branch information
pgoodman authored Dec 4, 2017
1 parent 885e49d commit a9f796f
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 50 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ if (NOT WIN32)
endif ()

# warnings and compiler settings
set(PROJECT_CXXWARNINGS "-Wall -Wextra -Wno-unused-parameter -Wno-c++98-compat -Wno-unreachable-code-return -Wno-nested-anon-types -Wno-extended-offsetof -Wno-gnu-anonymous-struct -Wno-gnu-designator -Wno-variadic-macros -Wno-gnu-zero-variadic-macro-arguments -Wno-gnu-statement-expression -Wno-return-type-c-linkage -Wno-c99-extensions -Wno-ignored-attributes -Wno-unused-local-typedef -Wno-unknown-pragmas")
set(PROJECT_CXXWARNINGS "-Wall -Wextra -Wno-unused-parameter -Wno-c++98-compat -Wno-unreachable-code-return -Wno-nested-anon-types -Wno-extended-offsetof -Wgnu-alignof-expression -Wno-gnu-anonymous-struct -Wno-gnu-designator -Wno-variadic-macros -Wno-gnu-zero-variadic-macro-arguments -Wno-gnu-statement-expression -Wno-return-type-c-linkage -Wno-c99-extensions -Wno-ignored-attributes -Wno-unused-local-typedef -Wno-unknown-pragmas")
set(PROJECT_CXXFLAGS "${PROJECT_CXXFLAGS} -Wno-unknown-warning-option ${PROJECT_CXXWARNINGS} -fPIC -fno-omit-frame-pointer -fvisibility-inlines-hidden -fno-exceptions -fno-asynchronous-unwind-tables -fno-rtti")

# default build type
Expand Down
2 changes: 1 addition & 1 deletion cmake/BCCompiler/CMakeBCInformation.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set(DEFAULT_BC_COMPILER_FLAGS "-std=gnu++11 -emit-llvm -Wno-unknown-warning-option -Wall -Wshadow -Wconversion -Wpadded -pedantic -Wshorten-64-to-32 -Wno-gnu-anonymous-struct -Wno-return-type-c-linkage -Wno-gnu-zero-variadic-macro-arguments -Wno-nested-anon-types -Wno-extended-offsetof -Wno-gnu-statement-expression -Wno-c99-extensions -Wno-ignored-attributes -mtune=generic -fno-vectorize -fno-slp-vectorize -ffreestanding -fno-common -fno-builtin -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -Wno-unneeded-internal-declaration -Wno-unused-function ")
set(DEFAULT_BC_COMPILER_FLAGS "-std=gnu++11 -emit-llvm -Wno-unknown-warning-option -Wall -Wshadow -Wconversion -Wpadded -pedantic -Wshorten-64-to-32 -Wgnu-alignof-expression -Wno-gnu-anonymous-struct -Wno-return-type-c-linkage -Wno-gnu-zero-variadic-macro-arguments -Wno-nested-anon-types -Wno-extended-offsetof -Wno-gnu-statement-expression -Wno-c99-extensions -Wno-ignored-attributes -mtune=generic -fno-vectorize -fno-slp-vectorize -ffreestanding -fno-common -fno-builtin -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -Wno-unneeded-internal-declaration -Wno-unused-function ")

if (NOT CMAKE_BC_COMPILE_OBJECT)
if (NOT DEFINED CMAKE_BC_COMPILER)
Expand Down
10 changes: 5 additions & 5 deletions remill/Arch/AArch64/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,16 @@ class AArch64Arch : public Arch {
// Decode an instruction.
bool DecodeInstruction(
uint64_t address, const std::string &instr_bytes,
Instruction &inst) const override;
Instruction &inst) const final;

// Maximum number of bytes in an instruction.
uint64_t MaxInstructionSize(void) const override;
uint64_t MaxInstructionSize(void) const final;

llvm::Triple Triple(void) const override;
llvm::DataLayout DataLayout(void) const override;
llvm::Triple Triple(void) const final;
llvm::DataLayout DataLayout(void) const final;

// Default calling convention for this architecture.
llvm::CallingConv::ID DefaultCallingConv(void) const override;
llvm::CallingConv::ID DefaultCallingConv(void) const final;

private:
AArch64Arch(void) = delete;
Expand Down
6 changes: 6 additions & 0 deletions remill/Arch/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ Arch::Arch(OSName os_name_, ArchName arch_name_)

Arch::~Arch(void) {}

bool Arch::LazyDecodeInstruction(
uint64_t address, const std::string &instr_bytes,
Instruction &inst) const {
return DecodeInstruction(address, instr_bytes, inst);
}

llvm::Triple Arch::BasicTriple(void) const {
llvm::Triple triple;
switch (os_name) {
Expand Down
6 changes: 6 additions & 0 deletions remill/Arch/Arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ class Arch {
uint64_t address, const std::string &instr_bytes,
Instruction &inst) const = 0;

// Fully decode any control-flow transfer instructions, but only partially
// decode other instructions.
virtual bool LazyDecodeInstruction(
uint64_t address, const std::string &instr_bytes,
Instruction &inst) const;

// Maximum number of bytes in an instruction for this particular architecture.
virtual uint64_t MaxInstructionSize(void) const = 0;

Expand Down
16 changes: 16 additions & 0 deletions remill/Arch/Instruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ Instruction::Instruction(void)
branch_not_taken_pc(0),
arch_name(kArchInvalid),
operand_size(0),
arch_for_decode(nullptr),
is_atomic_read_modify_write(false),
category(Instruction::kCategoryInvalid) {}

Expand All @@ -261,7 +262,22 @@ void Instruction::Reset(void) {
operand_size = 0;
is_atomic_read_modify_write = false;
category = Instruction::kCategoryInvalid;
arch_for_decode = nullptr;
operands.clear();
function.clear();
bytes.clear();
}

bool Instruction::FinalizeDecode(void) {
if (!IsValid()) {
return false;
} else if (!arch_for_decode) {
return true;
} else {
auto ret = arch_for_decode->DecodeInstruction(pc, bytes, *this);
arch_for_decode = nullptr;
return ret;
}
}

std::string Instruction::Serialize(void) const {
Expand Down
6 changes: 6 additions & 0 deletions remill/Arch/Instruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ class Instruction {

void Reset(void);

bool FinalizeDecode(void);

// Name of semantics function that implements this instruction.
std::string function;

Expand All @@ -161,6 +163,10 @@ class Instruction {
// The effective size of the operand, in bits.
uint64_t operand_size;

// Pointer to the `remill::Arch` used to complete the decoding of this
// instruction.
const Arch *arch_for_decode;

// Does the instruction require the use of the `__remill_atomic_begin` and
// `__remill_atomic_end`?
bool is_atomic_read_modify_write;
Expand Down
126 changes: 83 additions & 43 deletions remill/Arch/X86/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,18 +751,31 @@ class X86Arch : public Arch {
// Decode an instuction.
bool DecodeInstruction(
uint64_t address, const std::string &inst_bytes,
Instruction &inst) const override;
Instruction &inst) const final;

// Fully decode any control-flow transfer instructions, but only partially
// decode other instructions. To complete the decoding, call
// `Instruction::FinalizeDecode`.
bool LazyDecodeInstruction(
uint64_t address, const std::string &inst_bytes,
Instruction &inst) const final;

// Maximum number of bytes in an instruction.
uint64_t MaxInstructionSize(void) const override;
uint64_t MaxInstructionSize(void) const final;

llvm::Triple Triple(void) const override;
llvm::DataLayout DataLayout(void) const override;
llvm::Triple Triple(void) const final;
llvm::DataLayout DataLayout(void) const final;

// Default calling convention for this architecture.
llvm::CallingConv::ID DefaultCallingConv(void) const override;
llvm::CallingConv::ID DefaultCallingConv(void) const final;

private:

// Decode an instuction.
bool DecodeInstruction(
uint64_t address, const std::string &inst_bytes,
Instruction &inst, bool is_lazy) const;

X86Arch(void) = delete;
};

Expand Down Expand Up @@ -906,7 +919,7 @@ llvm::DataLayout X86Arch::DataLayout(void) const {
bool X86Arch::DecodeInstruction(
uint64_t address,
const std::string &inst_bytes,
Instruction &inst) const {
Instruction &inst, bool is_lazy) const {

inst.pc = address;
inst.arch_name = arch_name;
Expand All @@ -921,12 +934,11 @@ bool X86Arch::DecodeInstruction(
}

inst.operand_size = xed_decoded_inst_get_operand_width(xedd);
inst.function = InstructionFunctionName(xedd);
inst.bytes = inst_bytes.substr(0, xed_decoded_inst_get_length(xedd));
inst.category = CreateCategory(xedd);
inst.next_pc = address + xed_decoded_inst_get_length(xedd);

// Wrap an instuction in atomic begin/end if it accesses memory with RMW
// Wrap an instruction in atomic begin/end if it accesses memory with RMW
// semantics or with a LOCK prefix.
if (xed_operand_values_get_atomic(xedd) ||
xed_operand_values_has_lock_prefix(xedd)) {
Expand All @@ -937,45 +949,49 @@ bool X86Arch::DecodeInstruction(
DecodeConditionalInterrupt(inst);
}

// Lift the operands. This creates the arguments for us to call the
// instuction implementation.
auto xedi = xed_decoded_inst_inst(xedd);
auto num_operands = xed_decoded_inst_noperands(xedd);
for (auto i = 0U; i < num_operands; ++i) {
auto xedo = xed_inst_operand(xedi, i);
if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) {
DecodeOperand(inst, xedd, xedo);
if (!is_lazy || inst.IsControlFlow()) {
inst.function = InstructionFunctionName(xedd);

// Lift the operands. This creates the arguments for us to call the
// instuction implementation.
auto xedi = xed_decoded_inst_inst(xedd);
auto num_operands = xed_decoded_inst_noperands(xedd);
for (auto i = 0U; i < num_operands; ++i) {
auto xedo = xed_inst_operand(xedi, i);
if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) {
DecodeOperand(inst, xedd, xedo);
}
}
}

if (inst.IsFunctionCall()) {
DecodeFallThroughPC(inst, xedd);
}

// All non-control FPU instructions update the last instruction pointer
// and opcode.
if (XED_ISA_SET_X87 == xed_decoded_inst_get_isa_set(xedd) ||
XED_ISA_SET_FCMOV == xed_decoded_inst_get_isa_set(xedd) ||
XED_CATEGORY_X87_ALU == xed_decoded_inst_get_category(xedd)) {
auto set_ip_dp = false;
const auto get_attr = xed_decoded_inst_get_attribute;
switch (xed_decoded_inst_get_iform_enum(xedd)) {
case XED_IFORM_FNOP:
case XED_IFORM_FINCSTP:
case XED_IFORM_FDECSTP:
set_ip_dp = true;
break;
default:
set_ip_dp = !get_attr(xedd, XED_ATTRIBUTE_X87_CONTROL) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_CW) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_R) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_W) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_NOWAIT);
break;
if (inst.IsFunctionCall()) {
DecodeFallThroughPC(inst, xedd);
}

if (set_ip_dp) {
DecodeX87LastIpDp(inst);
// All non-control FPU instructions update the last instruction pointer
// and opcode.
if (XED_ISA_SET_X87 == xed_decoded_inst_get_isa_set(xedd) ||
XED_ISA_SET_FCMOV == xed_decoded_inst_get_isa_set(xedd) ||
XED_CATEGORY_X87_ALU == xed_decoded_inst_get_category(xedd)) {
auto set_ip_dp = false;
const auto get_attr = xed_decoded_inst_get_attribute;
switch (xed_decoded_inst_get_iform_enum(xedd)) {
case XED_IFORM_FNOP:
case XED_IFORM_FINCSTP:
case XED_IFORM_FDECSTP:
set_ip_dp = true;
break;
default:
set_ip_dp = !get_attr(xedd, XED_ATTRIBUTE_X87_CONTROL) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_CW) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_R) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_W) &&
!get_attr(xedd, XED_ATTRIBUTE_X87_NOWAIT);
break;
}

if (set_ip_dp) {
DecodeX87LastIpDp(inst);
}
}
}

Expand Down Expand Up @@ -1006,6 +1022,30 @@ bool X86Arch::DecodeInstruction(
return true;
}

bool X86Arch::DecodeInstruction(
uint64_t address,
const std::string &inst_bytes,
Instruction &inst) const {
inst.arch_for_decode = nullptr;
return DecodeInstruction(address, inst_bytes, inst, false);
}

// Fully decode any control-flow transfer instructions, but only partially
// decode other instructions.
bool X86Arch::LazyDecodeInstruction(
uint64_t address, const std::string &inst_bytes,
Instruction &inst) const {
inst.arch_for_decode = nullptr;
if (DecodeInstruction(address, inst_bytes, inst, true)) {
if (!inst.IsControlFlow()) {
inst.arch_for_decode = this;
}
return true;
} else {
return false;
}
}

} // namespace

// TODO(pag): We pretend that these are singletons, but they aren't really!
Expand Down

0 comments on commit a9f796f

Please sign in to comment.