Skip to content

Commit

Permalink
Add Context Structure to Affect State Dependent Liftings (#617)
Browse files Browse the repository at this point in the history
* add empty contexts

* add include

* make function const

* add helper for uniform mappings

* expose cache clearing for operand lifter

* decoding context documentation:

* move virtual inheritance down

* remove unused var names

* add type alias

* remove underscores

* make sure we have poetry

* check version in CI

* try specify python3

* newer poetry install script

* fail fast

* try use pythons pip

* upgrade pip?

* install directly

* update in linux too
  • Loading branch information
2over12 authored Aug 17, 2022
1 parent 854c73e commit c0f90b9
Show file tree
Hide file tree
Showing 20 changed files with 221 additions and 61 deletions.
17 changes: 12 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ jobs:
with:
fetch-depth: 0
- uses: ./.github/actions/prepare_git_user
- name: Get Poetry
shell: bash
run: |
python3 -m pip install poetry
- name: Build with build script
shell: bash
run: |
Expand All @@ -49,7 +53,6 @@ jobs:
export VCPKG_ROOT=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.image.name }}-${{ matrix.image.tag }}_llvm-${{ matrix.llvm }}_amd64
export INSTALL_DIR=$(pwd)/remill-preset-install
./scripts/build-preset.sh release
- name: Install Python Test Deps
shell: bash
run: |
Expand Down Expand Up @@ -109,6 +112,14 @@ jobs:
with:
fetch-depth: 0
- uses: ./.github/actions/prepare_git_user
- name: Get Poetry
shell: bash
run: |
python3 -m pip install poetry
- name: Install Python Test Deps
shell: bash
run: |
python3 -m pip install --user ./scripts/diff_tester_export_insns
- name: Build with build script
shell: bash
run: |
Expand All @@ -121,10 +132,6 @@ jobs:
export VCPKG_ROOT=$(pwd)/../lifting-bits-downloads/vcpkg_${{ matrix.os}}_llvm-${{ matrix.llvm }}_xcode-13.0_amd64
export INSTALL_DIR=$(pwd)/remill-preset-install
./scripts/build-preset.sh release
- name: Install Python Test Deps
shell: bash
run: |
pip3 install --user ./scripts/diff_tester_export_insns
- name: Run tests
shell: bash
working-directory: remill-build
Expand Down
23 changes: 18 additions & 5 deletions include/remill/Arch/Arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <llvm/IR/IRBuilder.h>
#include <remill/BC/InstructionLifter.h>
#include <remill/BC/IntrinsicTable.h>
#include <remill/Arch/Context.h>

#pragma clang diagnostic pop

Expand Down Expand Up @@ -170,6 +171,9 @@ class Arch {

virtual ~Arch(void);


virtual DecodingContext CreateInitialContext(void) const = 0;

// Factory method for loading the correct architecture class for a given
// operating system and architecture class.
static auto Get(llvm::LLVMContext &context, std::string_view os,
Expand Down Expand Up @@ -281,14 +285,23 @@ class Arch {
// walk up, one byte at a time, to `MaxInstructionSize(false)`
// bytes being passed to the decoder, until you successfully decode
// or ultimately fail.
virtual bool DecodeInstruction(uint64_t address, std::string_view instr_bytes,
Instruction &inst) const = 0;

// The decoder takes contextual information in the form of a DecodingContext, making a copy to produce a ContextMap which is a function that maps
// a successor to a new context that updates the old context.

using DecodingResult = std::optional<DecodingContext::ContextMap>;

virtual DecodingResult
DecodeInstruction(uint64_t address, std::string_view instr_bytes,
Instruction &inst, DecodingContext context) const = 0;

// Decode an instruction that is within a delay slot.
bool DecodeDelayedInstruction(uint64_t address, std::string_view instr_bytes,
Instruction &inst) const {
DecodingResult
DecodeDelayedInstruction(uint64_t address, std::string_view instr_bytes,
Instruction &inst, DecodingContext context) const {
inst.in_delay_slot = true;
return this->DecodeInstruction(address, instr_bytes, inst);
return this->DecodeInstruction(address, instr_bytes, inst,
std::move(context));
}

// Minimum alignment of an instruction for this particular architecture.
Expand Down
36 changes: 25 additions & 11 deletions include/remill/Arch/ArchBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <remill/Arch/Arch.h>
#include <remill/Arch/Context.h>

#include <memory>
#include <unordered_map>
Expand All @@ -31,13 +32,9 @@ namespace remill {

struct Register;


// Internal base architecture for all Remill-internal architectures.
class ArchBase : public remill::Arch {
protected:
virtual bool ArchDecodeInstruction(uint64_t address,
std::string_view instr_bytes,
Instruction &inst) const = 0;

public:
using ArchPtr = std::unique_ptr<const Arch>;

Expand Down Expand Up @@ -73,12 +70,6 @@ class ArchBase : public remill::Arch {

unsigned RegMdID(void) const final;

virtual bool DecodeInstruction(uint64_t address, std::string_view instr_bytes,
Instruction &inst) const override;

OperandLifter::OpLifterPtr
DefaultLifter(const remill::IntrinsicTable &intrinsics) const override;

// Get the state pointer and various other types from the `llvm::LLVMContext`
// associated with `module`.
//
Expand Down Expand Up @@ -114,4 +105,27 @@ class ArchBase : public remill::Arch {
mutable std::unique_ptr<IntrinsicTable> instrinsics{nullptr};
};

class DefaultContextAndLifter : virtual public remill::ArchBase {
public:
virtual DecodingContext CreateInitialContext(void) const override;

virtual std::optional<DecodingContext::ContextMap>
DecodeInstruction(uint64_t address, std::string_view instr_bytes,
Instruction &inst, DecodingContext context) const override;


OperandLifter::OpLifterPtr
DefaultLifter(const remill::IntrinsicTable &intrinsics) const override;


DefaultContextAndLifter(llvm::LLVMContext *context_, OSName os_name_,
ArchName arch_name_);

protected:
virtual bool ArchDecodeInstruction(uint64_t address,
std::string_view instr_bytes,
Instruction &inst) const = 0;
};


} // namespace remill
52 changes: 52 additions & 0 deletions include/remill/Arch/Context.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) 2022 Trail of Bits, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


#pragma once


#include <functional>
#include <string_view>
#include <unordered_map>

namespace remill {

/// A decoding context is contextual information about the state of the program that affects decoding, ie. the thumb mode register on ARM
/// We allow clients to interpose on a context for resolution

/// We return a function of successor -> DecodingContext. The decoder defines a relation on the
/// previous context and the successor address that produces a new decoding.
/// This definition of returned contexts allows us to cleanly handle situations like indirect jumps in arm
class DecodingContext {

private:
std::unordered_map<std::string, uint64_t> context_value;

public:
using ContextMap = std::function<DecodingContext(uint64_t)>;

DecodingContext() = default;

DecodingContext(std::unordered_map<std::string, uint64_t> context_value);


uint64_t GetContextValue(const std::string &context_reg) const;
DecodingContext PutContextReg(std::string creg, uint64_t value) const;

static ContextMap UniformContextMapping(DecodingContext cst);
};

} // namespace remill
2 changes: 1 addition & 1 deletion include/remill/Arch/Instruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ class Instruction {
Operand &EmplaceOperand(const Operand::Address &op);


const InstructionLifter::LifterPtr &GetLifter();
const InstructionLifter::LifterPtr &GetLifter() const;

void SetLifter(InstructionLifter::LifterPtr lifter);

Expand Down
4 changes: 3 additions & 1 deletion include/remill/BC/InstructionLifter.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class OperandLifter {
std::string_view reg_name) const = 0;

virtual llvm::Type *GetMemoryType() = 0;

virtual void ClearCache(void) const = 0;
};

// Wraps the process of lifting an instruction into a block. This resolves
Expand Down Expand Up @@ -108,7 +110,7 @@ class InstructionLifter : public OperandLifter {
std::string_view reg_name) const override final;

// Clear out the cache of the current register values/addresses loaded.
void ClearCache(void) const;
void ClearCache(void) const override;


virtual llvm::Type *GetMemoryType() override final;
Expand Down
3 changes: 2 additions & 1 deletion lib/Arch/AArch32/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ namespace remill {
AArch32Arch::AArch32Arch(llvm::LLVMContext *context_, OSName os_name_,
ArchName arch_name_)
: ArchBase(context_, os_name_, arch_name_),
AArch32ArchBase(context_, os_name_, arch_name_) {}
AArch32ArchBase(context_, os_name_, arch_name_),
DefaultContextAndLifter(context_, os_name_, arch_name_) {}

AArch32Arch::~AArch32Arch(void) {}

Expand Down
3 changes: 2 additions & 1 deletion lib/Arch/AArch32/Arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
#include <remill/Arch/AArch32/AArch32Base.h>

namespace remill {
class AArch32Arch final : public AArch32ArchBase {
class AArch32Arch final : public AArch32ArchBase,
public DefaultContextAndLifter {
public:
AArch32Arch(llvm::LLVMContext *context_, OSName os_name_,
ArchName arch_name_);
Expand Down
5 changes: 3 additions & 2 deletions lib/Arch/AArch64/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ Instruction::Category InstCategory(const aarch64::InstData &inst) {
}
}

class AArch64Arch final : public ArchBase {
class AArch64Arch final : public DefaultContextAndLifter {
public:
AArch64Arch(llvm::LLVMContext *context_, OSName os_name_,
ArchName arch_name_);
Expand Down Expand Up @@ -148,7 +148,8 @@ class AArch64Arch final : public ArchBase {

AArch64Arch::AArch64Arch(llvm::LLVMContext *context_, OSName os_name_,
ArchName arch_name_)
: ArchBase(context_, os_name_, arch_name_) {}
: ArchBase(context_, os_name_, arch_name_),
DefaultContextAndLifter(context_, os_name_, arch_name_) {}

AArch64Arch::~AArch64Arch(void) {}

Expand Down
30 changes: 24 additions & 6 deletions lib/Arch/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -850,16 +850,34 @@ const IntrinsicTable *ArchBase::GetInstrinsicTable(void) const {
return this->instrinsics.get();
}

OperandLifter::OpLifterPtr
ArchBase::DefaultLifter(const remill::IntrinsicTable &intrinsics) const {
return std::make_shared<InstructionLifter>(this, intrinsics);

DecodingContext DefaultContextAndLifter::CreateInitialContext(void) const {
return DecodingContext();
}

bool ArchBase::DecodeInstruction(uint64_t address, std::string_view instr_bytes,
Instruction &inst) const {
Arch::DecodingResult DefaultContextAndLifter::DecodeInstruction(
uint64_t address, std::string_view instr_bytes, Instruction &inst,
DecodingContext context) const {
inst.SetLifter(std::make_unique<remill::InstructionLifter>(
this, this->GetInstrinsicTable()));
return this->ArchDecodeInstruction(address, instr_bytes, inst);
if (this->ArchDecodeInstruction(address, instr_bytes, inst)) {
return [](uint64_t) -> DecodingContext { return DecodingContext(); };
}

return std::nullopt;
}


OperandLifter::OpLifterPtr DefaultContextAndLifter::DefaultLifter(
const remill::IntrinsicTable &intrinsics) const {
return std::make_shared<InstructionLifter>(this, intrinsics);
}


DefaultContextAndLifter::DefaultContextAndLifter(llvm::LLVMContext *context_,
OSName os_name_,
ArchName arch_name_)
: ArchBase(context_, os_name_, arch_name_) {}


} // namespace remill
2 changes: 2 additions & 0 deletions lib/Arch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ add_library(remill_arch STATIC
"${REMILL_INCLUDE_DIR}/remill/Arch/Instruction.h"
"${REMILL_INCLUDE_DIR}/remill/Arch/Name.h"
"${REMILL_INCLUDE_DIR}/remill/Arch/ArchBase.h"
"${REMILL_INCLUDE_DIR}/remill/Arch/Context.h"

Arch.cpp
BitManipulation.h
Instruction.cpp
Context.cpp
Name.cpp
)

Expand Down
36 changes: 36 additions & 0 deletions lib/Arch/Context.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

#include <glog/logging.h>
#include <remill/Arch/Context.h>

namespace remill {

DecodingContext::DecodingContext(
std::unordered_map<std::string, uint64_t> context_value)
: context_value(std::move(context_value)) {}


uint64_t
DecodingContext::GetContextValue(const std::string &context_reg) const {

if (auto res = this->context_value.find(context_reg);
res != this->context_value.end()) {
return res->second;
}

LOG(FATAL) << "No context value for " << context_reg
<< " but it is required for decoding";
}
DecodingContext DecodingContext::PutContextReg(std::string creg,
uint64_t value) const {
std::unordered_map<std::string, uint64_t> new_value(this->context_value);
new_value.emplace(creg, value);
return DecodingContext(std::move(new_value));
}

DecodingContext::ContextMap
DecodingContext::UniformContextMapping(DecodingContext cst) {
return [cst = std::move(cst)](uint64_t) -> DecodingContext { return cst; };
}


} // namespace remill
2 changes: 1 addition & 1 deletion lib/Arch/Instruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ std::string Instruction::Serialize(void) const {
return ss.str();
}

const InstructionLifter::LifterPtr &Instruction::GetLifter() {
const InstructionLifter::LifterPtr &Instruction::GetLifter() const {
return this->lifter;
}

Expand Down
5 changes: 3 additions & 2 deletions lib/Arch/SPARC32/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,11 @@ void AddImmop(Instruction &inst, uint64_t imm, unsigned size, bool is_signed) {
}


class SPARC32Arch final : public ArchBase {
class SPARC32Arch final : public DefaultContextAndLifter {
public:
SPARC32Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_)
: ArchBase(context_, os_name_, arch_name_) {}
: ArchBase(context_, os_name_, arch_name_),
DefaultContextAndLifter(context_, os_name_, arch_name_) {}

virtual ~SPARC32Arch(void) = default;

Expand Down
Loading

0 comments on commit c0f90b9

Please sign in to comment.