From ec1f3987496b4df4e6addcdb52069d0b1370829a Mon Sep 17 00:00:00 2001 From: stuxnot <41650734+stuxnot@users.noreply.github.com> Date: Tue, 24 Sep 2024 17:10:45 +0200 Subject: [PATCH] Rewrite CLI to drop Boost dependency (#73) * Rewrite CLI to drop Boost dependency * Move `expected` into `vendor` subdirectory * Use `tl::expected` for option parsing; fix typos, round-tripping issues * Update README for new CLI * Add `vendor` folder to include paths for static analysis tools * Add vendor dir and update CI * Improve README to show user-defined label usage * Update bindings build script to include `vendor` * Fix byte output formatting for CLI Because the output stream was set to `std::left` and not reset back to the default `std::right`, instruction bytes smaller then `0x10` would be formatted wrong. * Add `--bytes-only` flag and basic CLI sanity test --------- Co-authored-by: Jon Palmisciano --- .github/workflows/cpp.yml | 5 +- CMakeLists.txt | 11 +- README.md | 94 ++--- bindings/python/nyxstone-cpp/vendor | 1 + bindings/python/setup.py | 2 +- bindings/rust/build.rs | 1 + bindings/rust/nyxstone/vendor | 1 + bindings/rust/src/nyxstone_ffi.cpp | 2 +- examples/nyxstone-cli.cpp | 288 ++++++++++------ include/nyxstone.h | 2 +- tool/format.sh | 2 +- tool/static-analysis-cppcheck.sh | 4 +- tool/static-analysis-tidy.sh | 2 +- tool/test-cli.sh | 25 ++ {include/tl => vendor}/.clang-tidy | 0 vendor/argh.h | 514 ++++++++++++++++++++++++++++ {include/tl => vendor}/expected.hpp | 0 17 files changed, 777 insertions(+), 177 deletions(-) create mode 120000 bindings/python/nyxstone-cpp/vendor create mode 120000 bindings/rust/nyxstone/vendor create mode 100755 tool/test-cli.sh rename {include/tl => vendor}/.clang-tidy (100%) create mode 100644 vendor/argh.h rename {include/tl => vendor}/expected.hpp (100%) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index de2ea34..9bd3eb0 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -29,7 +29,8 @@ jobs: run: make test working-directory: build/ - name: cli - run: ./nyxstone -A "mov rax, rbx" && - ./nyxstone -A "jmp label" --labels "label=0x1000" + run: | + ./nyxstone "mov rax, rbx" && + ./nyxstone "jmp label" --labels "label=0x1000" working-directory: build/ diff --git a/CMakeLists.txt b/CMakeLists.txt index ecf64b7..16b9509 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,7 @@ target_compile_features(nyxstone PUBLIC ) target_include_directories(nyxstone PUBLIC $ + $ ) target_link_libraries(nyxstone PUBLIC LLVM-Wrapper @@ -65,21 +66,12 @@ set_target_properties(nyxstone PROPERTIES ) if(NYXSTONE_BUILD_EXAMPLES) - # Use -DBOOST_ROOT=C:/boost_1_80_0 to specify the boost root directory - if(WIN32) - set(Boost_USE_STATIC_LIBS ON) # only find static libs - set(Boost_USE_MULTITHREADED ON) - set(Boost_USE_STATIC_RUNTIME OFF) - endif() - find_package(Boost 1.40 COMPONENTS program_options REQUIRED) - add_executable(nyxstone-bin examples/nyxstone-cli.cpp) set_target_properties(nyxstone-bin PROPERTIES OUTPUT_NAME nyxstone ) target_link_libraries(nyxstone-bin PRIVATE nyxstone::nyxstone - Boost::program_options ) add_executable(example examples/example.cpp) @@ -89,4 +81,5 @@ if(NYXSTONE_BUILD_EXAMPLES) include(CTest) add_test(NAME TestExample COMMAND $) + add_test(NAME TestCLI COMMAND "${CMAKE_CURRENT_LIST_DIR}/tool/test-cli.sh") endif() diff --git a/README.md b/README.md index 86bafe1..cce0298 100644 --- a/README.md +++ b/README.md @@ -88,12 +88,9 @@ Also make sure to install any system dependent libraries needed by your LLVM ver ### CLI Tool -Nyxstone comes with a handy [CLI tool](examples/nyxstone-cli.cpp) for quick assembly and disassembly tasks. Install boost with your distribution's package manager, checkout the Nyxstone repository, and build the tool with cmake: +Nyxstone comes with a handy [CLI tool](examples/nyxstone-cli.cpp) for quick assembly and disassembly tasks. Checkout the Nyxstone repository, and build the tool with CMake: ```bash -# install boost on Ubuntu/Debian -apt install boost - # clone directory git clone https://github.com/emproof-com/nyxstone cd nyxstone @@ -105,61 +102,68 @@ mkdir build && cd build && cmake .. && make Then, `nyxstone` can be used from the command line. Here's an output of its help menu: ``` -$ ./nyxstone --help -Allowed options: - --help Show this message - --arch arg (=x86_64) LLVM triple or architecture identifier of triple. - For the most common architectures, we recommend: - x86_32: `i686-linux-gnu` - x86_64: `x86_64-linux-gnu` - armv6m: `armv6m-none-eabi` - armv7m: `armv7m-none-eabi` - armv8m: `armv8m.main-none-eabi` - aarch64: `aarch64-linux-gnueabihf` - Using shorthand identifiers like `arm` can lead to - Nyxstone not being able to assemble certain - instructions. - --cpu arg LLVM cpu specifier, refer to `llc -mtriple=ARCH - -mcpu=help` for a comprehensive list - --features arg LLVM features to enable/disable, comma seperated - feature strings prepended by '+' or '-' to enable or - disable respectively. Refer to `llc -mtriple=ARCH - -mattr=help` for a comprehensive list - --address arg (=0) Address - -Assembling: - --labels arg Labels, for example "label0=0x10,label1=0x20" - -A [ --assemble ] arg Assembly - -Disassembling: - -D [ --disassemble ] arg Byte code in hex, for example: "0203" - +$ ./nyxstone -h +Usage: nyxstone [-t=] [-p=] [-d] + +Examples: + # Assemble an instruction with the default architecture ('x86_64'). + nyxstone 'push eax' + + # Disassemble the bytes 'ffc300d1' as AArch64 code. + nyxstone -t aarch64 -d ffc300d1 + +Options: + -t, --triple= LLVM target triple or alias, e.g. 'aarch64' + -c, --cpu= LLVM CPU specifier, e.g. 'cortex-a53' + -f, --features= LLVM architecture/CPU feature list, e.g. '+mte,-neon' + -p, --address= Initial address to assemble/disassemble relative to + -l, --labels= Label-to-address mappings (used when assembling only) + -d, --disassemble Treat as bytes to disassemble instead of assembly + -h, --help Show this help and usage message + +Notes: + The '--triple' parameter also supports aliases for common target triples: + + 'x86_32' -> 'i686-linux-gnu' + 'x86_64' -> 'x86_64-linux-gnu' + 'armv6m' -> 'armv6m-none-eabi' + 'armv7m' -> 'armv7m-none-eabi' + 'armv8m' -> 'armv8m.main-none-eabi' + 'aarch64' -> 'aarch64-linux-gnueabihf' + + The CPUs for a target can be found with 'llc -mtriple= -mcpu=help'. + The features for a target can be found with 'llc -mtriple= -mattr=help'. ``` Now, we can assemble an instruction for the x86_64 architecture: ``` -$ ./nyxstone --arch "x86_64" -A "mov rax, rbx" -Assembled: - 0x00000000: mov rax, rbx - [ 48 89 d8 ] +$ ./nyxstone -t x86_64 "mov rax, rbx" + 0x00000000: mov rax, rbx ; 48 89 d8 ``` We can also assemble a sequence of instructions. In the following, we make use of label-based addressing and assume the first instruction is mapped to address `0xdeadbeef`: ``` -$ ./nyxstone --arch "x86_64" --address 0xdeadbeef -A "cmp rax, rbx; jz .exit ; inc rax ; .exit: ret" - 0xdeadbeef: cmp rax, rbx - [ 48 39 d8 ] - 0xdeadbef2: je .exit - [ 74 03 ] - 0xdeadbef4: inc rax - [ 48 ff c0 ] - 0xdeadbef7: ret - [ c3 ] +$ ./nyxstone -t x86_64 -p 0xdeadbeef "cmp rax, rbx; jz .exit; inc rax; .exit: ret" + 0xdeadbeef: cmp rax, rbx ; 48 39 d8 + 0xdeadbef2: je .exit ; 74 03 + 0xdeadbef4: inc rax ; 48 ff c0 + 0xdeadbef7: ret ; c3 +``` + +Furthermore, we can disassemble instructions for different instruction sets, here the ARM32 thumb instruction set: + +``` +$ ./nyxstone -t thumbv8 -d "13 37" + 0x00000000: adds r7, #19 ; 13 37 ``` -We can also disassemble an instruction for the ARM32 thumb instruction set: +Using the support for user-defined labels, we can assemble this snippet which does not contain the label `.label` by specifying its memory location ourself. ``` -$ ./nyxstone --arch "thumbv8" -D "13 37" -Disassembled: - 0x00000000: adds r7, #19 - [ 13 37 ] +$ ./nyxstone -p "0x1000" -l ".label=0x1238" "jmp .label" + 0x00001000: jmp .label ; e9 33 02 00 00 ``` ### C++ Library diff --git a/bindings/python/nyxstone-cpp/vendor b/bindings/python/nyxstone-cpp/vendor new file mode 120000 index 0000000..c73e2c9 --- /dev/null +++ b/bindings/python/nyxstone-cpp/vendor @@ -0,0 +1 @@ +../../../vendor \ No newline at end of file diff --git a/bindings/python/setup.py b/bindings/python/setup.py index ebd168b..34fd0ef 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -111,7 +111,7 @@ def get_llvm_include_dir(self) -> str: Pybind11Extension( name="nyxstone_cpp", sources=srcs, - include_dirs=["nyxstone-cpp/include/", "nyxstone-cpp/src/", llvm_inc_dir], + include_dirs=["nyxstone-cpp/include/", "nyxstone-cpp/vendor", "nyxstone-cpp/src/", llvm_inc_dir], libraries=llvm_libs, library_dirs=[llvm_lib_dir], extra_link_args=[ diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs index e6be99a..ce24c9b 100644 --- a/bindings/rust/build.rs +++ b/bindings/rust/build.rs @@ -74,6 +74,7 @@ fn main() { cxx_build::bridge("src/lib.rs") .std("c++17") .include("nyxstone/include") + .include("nyxstone/vendor") .include(llvm_include_dir.trim()) // .include(cxxbridge_dir) .files(sources) diff --git a/bindings/rust/nyxstone/vendor b/bindings/rust/nyxstone/vendor new file mode 120000 index 0000000..c73e2c9 --- /dev/null +++ b/bindings/rust/nyxstone/vendor @@ -0,0 +1 @@ +../../../vendor \ No newline at end of file diff --git a/bindings/rust/src/nyxstone_ffi.cpp b/bindings/rust/src/nyxstone_ffi.cpp index affef5d..c65676f 100644 --- a/bindings/rust/src/nyxstone_ffi.cpp +++ b/bindings/rust/src/nyxstone_ffi.cpp @@ -1,6 +1,6 @@ #include "nyxstone_ffi.hpp" -#include "tl/expected.hpp" +#include using namespace nyxstone; diff --git a/examples/nyxstone-cli.cpp b/examples/nyxstone-cli.cpp index c414d09..3e4a110 100644 --- a/examples/nyxstone-cli.cpp +++ b/examples/nyxstone-cli.cpp @@ -1,164 +1,224 @@ -#include -#include -#include - -#include #include #include #include -#include "nyxstone.h" -#include "tl/expected.hpp" +#include +#include -namespace po = boost::program_options; +#include "nyxstone.h" using nyxstone::Nyxstone; using nyxstone::NyxstoneBuilder; +std::vector decode_instruction_bytes(std::string hex_string); void print_bytes(const std::vector& bytes); std::optional> parse_labels(std::string_view labelstr); void print_instructions(const std::vector& instructions); -int main(int argc, char** argv) +constexpr auto USAGE = R"(Usage: nyxstone [-t=] [-p=] [-d] + +Examples: + # Assemble an instruction with the default architecture ('x86_64'). + nyxstone 'push eax' + + # Disassemble the bytes 'ffc300d1' as AArch64 code. + nyxstone -t aarch64 -d ffc300d1 + +Options: + -t, --triple= LLVM target triple or alias, e.g. 'aarch64' + -c, --cpu= LLVM CPU specifier, e.g. 'cortex-a53' + -f, --features= LLVM architecture/CPU feature list, e.g. '+mte,-neon' + -p, --address= Initial address to assemble/disassemble relative to + -l, --labels= Label-to-address mappings (used when assembling only) + -d, --disassemble Treat as bytes to disassemble instead of assembly + -b, --bytes-only Only output assembled bytes + -h, --help Show this help and usage message + +Notes: + The '--triple' parameter also supports aliases for common target triples: + + 'x86_32' -> 'i686-linux-gnu' + 'x86_64' -> 'x86_64-linux-gnu' + 'armv6m' -> 'armv6m-none-eabi' + 'armv7m' -> 'armv7m-none-eabi' + 'armv8m' -> 'armv8m.main-none-eabi' + 'aarch64' -> 'aarch64-linux-gnueabihf' + + The CPUs for a target can be found with 'llc -mtriple= -mcpu=help'. + The features for a target can be found with 'llc -mtriple= -mattr=help'. +)"; + +/// Parsed program options. +struct Options { + std::string triple = "x86_64-linux-gnu"; + std::string cpu; + std::string features; + uint64_t address = 0; + std::vector labels; + bool disassemble = false; + bool show_help = false; + bool bytes_only = false; + + std::string input; + + static tl::expected parse(int argc, char const** argv); +}; + +tl::expected Options::parse(int argc, char const** argv) { - // clang-format off - po::options_description desc("Allowed options"); - desc.add_options() - ("help", "Show this message") - ("arch", po::value()->default_value("x86_64"), - "LLVM triple or architecture identifier of triple. For the most common architectures, we recommend:\n" - "x86_32: `i686-linux-gnu`\n" - "x86_64: `x86_64-linux-gnu`\n" - "armv6m: `armv6m-none-eabi`\n" - "armv7m: `armv7m-none-eabi`\n" - "armv8m: `armv8m.main-none-eabi`\n" - "aarch64: `aarch64-linux-gnueabihf`\n" - "Using shorthand identifiers like `arm` can lead to Nyxstone not being able to assemble certain instructions." - ) - ("cpu", po::value()->default_value(""), - "LLVM cpu specifier, refer to `llc -mtriple=ARCH -mcpu=help` for a comprehensive list") - ("features", po::value()->default_value(""), - "LLVM features to enable/disable, comma seperated feature strings prepended by '+' or '-' to " - "enable or disable respectively. Refer to `llc -mtriple=ARCH -mattr=help` for a comprehensive list") - ("address", po::value()->default_value("0"), "Address") - ; - - po::options_description desc_asm("Assembling"); - desc_asm.add_options() - ("labels", po::value(), "Labels, for example \"label0=0x10,label1=0x20\"") - ("assemble,A", po::value(), "Assembly") - ; - - po::options_description desc_disasm("Disassembling"); - desc_disasm.add_options() - ("disassemble,D", po::value(), "Byte code in hex, for example: \"0203\"") - ; - // clang-format on - - desc.add(desc_asm); - desc.add(desc_disasm); - - po::variables_map varmap; - try { - po::store(po::parse_command_line(argc, argv, desc), varmap); - } catch (const std::exception& e) { - std::cerr << "Error occured: " << e.what() << "\n"; - return 2; + Options options; + + argh::parser args({ "-t", "--target", "-c", "--cpu", "-f", "--features", "-p", "--address", "-l", "--labels", + "-b" + "--bytes-only" }); + args.parse(argc, argv); + + if (args[{ "-h", "--help" }]) { + options.show_help = true; + return options; } - po::notify(varmap); - if (varmap.empty() || varmap.count("help") != 0) { - std::cout << desc << "\n"; - return 0; + options.triple = args({ "-t", "--triple" }, /*default=*/"x86_64-linux-gnu").str(); + if (options.triple.empty()) { + return tl::unexpected("Target triple not specified"); } - const bool has_assemble = varmap.count("assemble") > 0; - const bool has_disassemble = varmap.count("disassemble") > 0; + // These can both be empty as default options, so no need for a default value like above. + options.cpu = args({ "-c", "--cpu" }).str(); + options.features = args({ "-f", "--features" }).str(); - if (!has_assemble && !has_disassemble) { - std::cout << "Nothing to do\n"; - std::cout << desc << "\n"; - return 1; + std::string address_str = args({ "-p", "--address" }, /*default=*/"0").str(); + if (!address_str.empty()) { + try { + options.address = std::stoul(address_str, nullptr, 0); + } catch (const std::exception&) { + return tl::unexpected("Failed to parse address"); + } + } else { + return tl::unexpected("Address not specified"); } - if (has_assemble && has_disassemble) { - std::cout << "Choose one of assemble/disassemble\n"; - std::cout << desc << "\n"; - return 1; + auto labels_str = args({ "-l", "--labels" }).str(); + if (!labels_str.empty()) { + auto parse_result = parse_labels(labels_str); + if (!parse_result.has_value()) { + return tl::unexpected("Failed to parse labels"); + } + + options.labels = parse_result.value(); } - auto arch = varmap["arch"].as(); + options.disassemble = args[{ "-d", "--disassemble" }]; - auto has_labels = varmap.count("labels") > 0; - if (has_labels && has_disassemble) { - std::cout << "Invalid argument\n"; - std::cout << desc << "\n"; - return 1; - } + options.bytes_only = args[{ "-b", "--bytes-only" }]; - std::vector labels = {}; - if (has_labels && has_assemble) { - auto maybe_labels = parse_labels(varmap["labels"].as()); - if (!maybe_labels.has_value()) { - return 1; - } - labels = maybe_labels.value(); + if (args.pos_args().size() < 2) { + return tl::unexpected("Missing input"); } - uint64_t address = 0; - auto addr = varmap["address"].as(); - try { - address = std::stoul(addr, nullptr, 0); - } catch (const std::exception&) { - std::cerr << "Could not parse address\n"; - return 1; + // TODO: Support multiple positional arguments. + options.input = args[1]; + if (options.input.empty()) { + return tl::unexpected("Input is empty"); } - auto cpu { varmap["cpu"].as() }; - auto features { varmap["features"].as() }; + return options; +} - auto nyxstone_result { std::move( - NyxstoneBuilder(std::move(arch)).with_cpu(std::move(cpu)).with_features(std::move(features)).build()) }; - if (!nyxstone_result) { - std::cerr << "Failure creating nyxstone instance (= " << nyxstone_result.error() << " )\n"; +int main(int argc, char const** argv) +{ + auto options_result = Options::parse(argc, argv); + if (!options_result.has_value()) { + std::cerr << "Error: " << options_result.error() << ".\n"; + std::cerr << "Hint: Try 'nyxstone -h' for help.\n"; return 1; } - auto nyxstone { std::move(nyxstone_result.value()) }; - if (has_assemble) { - const auto& assembly = varmap["assemble"].as(); - nyxstone->assemble_to_instructions(assembly, address, labels) - .map_error([&assembly](const auto& error) { - std::cerr << "Could not assemble " << assembly << " (= " << error << " )\n"; - exit(1); - }) - .map(print_instructions); + auto options = options_result.value(); + if (options.show_help) { + std::cout << USAGE; + return 0; } - if (has_disassemble) { - std::vector bytes {}; - auto byte_code = varmap["disassemble"].as(); - byte_code.erase(std::remove_if(byte_code.begin(), byte_code.end(), ::isspace), byte_code.end()); - boost::algorithm::unhex(byte_code.begin(), byte_code.end(), std::back_inserter(bytes)); + auto builder = NyxstoneBuilder(std::move(options.triple)) + .with_cpu(std::move(options.cpu)) + .with_features(std::move(options.features)); + auto build_result = std::move(builder.build()); + if (!build_result) { + std::cerr << "Error: Failed to create Nyxstone instance (" << build_result.error() << ")\n"; + return 1; + } + auto nyxstone = std::move(build_result.value()); + + if (options.disassemble) { + auto bytes = decode_instruction_bytes(options.input); + if (bytes.empty()) { + std::cerr << "Error: Failed to decode bytes as hex.\n"; + return 1; + } - nyxstone->disassemble_to_instructions(bytes, address, 0) + nyxstone->disassemble_to_instructions(bytes, options.address, 0) .map_error([&bytes](const auto& error) { std::cerr << "Could not disassemble "; print_bytes(bytes); - std::cerr << " (= " << error << " )\n"; + std::cerr << " (" << error << ")\n"; exit(1); }) .map(print_instructions); + } else { + const auto& assembly = options.input; + if (!options.bytes_only) { + nyxstone->assemble_to_instructions(assembly, options.address, options.labels) + .map_error([&assembly](const auto& error) { + std::cerr << "Could not assemble " << assembly << " (" << error << ")\n"; + exit(1); + }) + .map(print_instructions); + } else { + nyxstone->assemble(assembly, options.address, options.labels) + .map_error([&assembly](const auto& error) { + std::cerr << "Could not assemble " << assembly << " (" << error << ")\n"; + exit(1); + }) + .map(print_bytes); + } } +} + +std::vector decode_instruction_bytes(std::string hex_string) +{ + // Drop all spaces first to support round-tripping of Nyxstone output as input. + hex_string.erase(std::remove_if(hex_string.begin(), hex_string.end(), isspace), hex_string.end()); + + auto input_size = hex_string.size(); + if (input_size % 2 != 0) { + return {}; + } + + try { + std::vector result; + result.reserve(input_size / 2); + for (size_t i = 0; i < input_size; i += 2) { + result.emplace_back(std::stoul(hex_string.substr(i, 2), nullptr, 16)); + } + + return result; + } catch (...) { + return {}; + } +} - return 0; +void print_address(uint64_t address) +{ + std::cout << "\t0x" << std::hex << std::setfill('0') << std::setw(8) << address; } void print_instructions(const std::vector& instructions) { for (const auto& instr : instructions) { - std::cout << "\t0x" << std::hex << std::setfill('0') << std::setw(8) << instr.address << ": " << instr.assembly - << " - "; + print_address(instr.address); + std::cout << ": " << std::setfill(' ') << std::left << std::setw(32) << instr.assembly << std::right; + std::cout << "; "; print_bytes(instr.bytes); std::cout << "\n"; } @@ -166,11 +226,11 @@ void print_instructions(const std::vector& instructions) void print_bytes(const std::vector& bytes) { - std::cout << std::hex << "[ "; + std::cout << std::hex; for (const auto& byte : bytes) { std::cout << std::setfill('0') << std::setw(2) << static_cast(byte) << " "; } - std::cout << std::dec << "]"; + std::cout << std::dec; } std::optional> parse_labels(std::string_view labelstr) diff --git a/include/nyxstone.h b/include/nyxstone.h index 76e132f..5317c82 100644 --- a/include/nyxstone.h +++ b/include/nyxstone.h @@ -1,6 +1,6 @@ #pragma once -#include "tl/expected.hpp" +#include #include #include diff --git a/tool/format.sh b/tool/format.sh index 2863475..da59ce4 100755 --- a/tool/format.sh +++ b/tool/format.sh @@ -14,7 +14,7 @@ fi cd "$(git rev-parse --show-toplevel)" # Ignore rust auto-generated c++ files and tl/expected.hpp -files=$(find . ! -wholename "*target*" ! -wholename "*build*" ! -wholename "*tl/expected.hpp" \( -iname "*.cpp" -o -iname "*.hpp" -o -iname "*.h" \)) +files=$(find . ! -wholename "*target*" ! -wholename "*build*" ! -wholename "*expected.hpp" ! -wholename "*argh.h" \( -iname "*.cpp" -o -iname "*.hpp" -o -iname "*.h" \)) if [[ "$1" == "check" ]]; then echo "$files" | xargs clang-format --dry-run -Werror elif [ ! -z "$1" ]; then diff --git a/tool/static-analysis-cppcheck.sh b/tool/static-analysis-cppcheck.sh index 6820d18..a0e47b5 100755 --- a/tool/static-analysis-cppcheck.sh +++ b/tool/static-analysis-cppcheck.sh @@ -17,6 +17,6 @@ cd "$(git rev-parse --show-toplevel)" # the llvm files in `Target` are ingnored, since they are # copied directly from llvm. cxx_files=$(find . -maxdepth 2 -iname "*.cpp" | xargs echo) -includes="-Iinclude -Isrc" +includes="-Iinclude -Ivendor -Isrc" -cppcheck --enable=all --suppress=*:include/tl/expected.hpp --suppress=*:src/Target/* --inline-suppr --error-exitcode=1 --language=c++ --suppress=missingIncludeSystem $cxx_files $cxx_ffi_files $includes +cppcheck --enable=all --suppress=*:vendor/* --suppress=*:src/Target/* --inline-suppr --error-exitcode=1 --language=c++ --suppress=missingIncludeSystem $cxx_files $cxx_ffi_files $includes diff --git a/tool/static-analysis-tidy.sh b/tool/static-analysis-tidy.sh index d9920fa..ea120ee 100755 --- a/tool/static-analysis-tidy.sh +++ b/tool/static-analysis-tidy.sh @@ -17,7 +17,7 @@ cd "$(git rev-parse --show-toplevel)" # to a depth of 2. cxx_files=$(find . -maxdepth 2 -iname "*.cpp" | xargs echo) # Include directories so that headers can be found. -includes="-Iinclude" +includes="-Iinclude -Ivendor" # Set standard to c++17. cxx_flags="-std=c++17" diff --git a/tool/test-cli.sh b/tool/test-cli.sh new file mode 100755 index 0000000..0bbed37 --- /dev/null +++ b/tool/test-cli.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +set -eu + +# Sanity check that assembling and disassembling yields the same output +# NOTE: Currently we cannot test using labels, since we keep them when assembling. +assembly="cmp rax, rbx; inc rax; add rsp, 8; ret" +address="0xdeadbeef" + +assembled=$(./nyxstone -t "x86_64" -p "$address" "$assembly") +assembled_bytes=$(./nyxstone -t "x86_64" --bytes-only -p "$address" "$assembly") +# assembled_bytes="03 02" +disassembled=$(./nyxstone -t "x86_64" -p "$address" -d "$assembled_bytes") + +if [ "$assembled" = "$disassembled" ]; then + exit 0 +else + echo "Output Mismatch" + echo "---------------" + echo "Assembled:" + echo "$assembled" + echo "Disassembled:" + echo "$disassembled" + exit 1 +fi diff --git a/include/tl/.clang-tidy b/vendor/.clang-tidy similarity index 100% rename from include/tl/.clang-tidy rename to vendor/.clang-tidy diff --git a/vendor/argh.h b/vendor/argh.h new file mode 100644 index 0000000..423f35c --- /dev/null +++ b/vendor/argh.h @@ -0,0 +1,514 @@ +// "Argh!" library from . +// +// Copyright (c) 2016, Adi Shavit +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace argh +{ + // Terminology: + // A command line is composed of 2 types of args: + // 1. Positional args, i.e. free standing values + // 2. Options: args beginning with '-'. We identify two kinds: + // 2.1: Flags: boolean options => (exist ? true : false) + // 2.2: Parameters: a name followed by a non-option value + +#if !defined(__GNUC__) || (__GNUC__ >= 5) + using string_stream = std::istringstream; +#else + // Until GCC 5, istringstream did not have a move constructor. + // stringstream_proxy is used instead, as a workaround. + class stringstream_proxy + { + public: + stringstream_proxy() = default; + + // Construct with a value. + stringstream_proxy(std::string const& value) : + stream_(value) + {} + + // Copy constructor. + stringstream_proxy(const stringstream_proxy& other) : + stream_(other.stream_.str()) + { + stream_.setstate(other.stream_.rdstate()); + } + + void setstate(std::ios_base::iostate state) { stream_.setstate(state); } + + // Stream out the value of the parameter. + // If the conversion was not possible, the stream will enter the fail state, + // and operator bool will return false. + template + stringstream_proxy& operator >> (T& thing) + { + stream_ >> thing; + return *this; + } + + + // Get the string value. + std::string str() const { return stream_.str(); } + + std::stringbuf* rdbuf() const { return stream_.rdbuf(); } + + // Check the state of the stream. + // False when the most recent stream operation failed + explicit operator bool() const { return !!stream_; } + + ~stringstream_proxy() = default; + private: + std::istringstream stream_; + }; + using string_stream = stringstream_proxy; +#endif + + class multimap_iteration_wrapper + { + public: + using container_t = std::multimap; + using iterator_t = container_t::const_iterator; + using difference_t = container_t::difference_type; + explicit multimap_iteration_wrapper(const iterator_t& lb, const iterator_t& ub) + : lb_(lb) + , ub_(ub) + {} + + iterator_t begin() const { return lb_; } + iterator_t end() const { return ub_; } + difference_t size() const { return std::distance(lb_, ub_); } + + private: + iterator_t lb_; + iterator_t ub_; + }; + + class parser + { + public: + enum Mode { PREFER_FLAG_FOR_UNREG_OPTION = 1 << 0, + PREFER_PARAM_FOR_UNREG_OPTION = 1 << 1, + NO_SPLIT_ON_EQUALSIGN = 1 << 2, + SINGLE_DASH_IS_MULTIFLAG = 1 << 3, + }; + + parser() = default; + + parser(std::initializer_list pre_reg_names) + { add_params(pre_reg_names); } + + parser(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION) + { parse(argv, mode); } + + parser(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION) + { parse(argc, argv, mode); } + + void add_param(std::string const& name); + void add_params(std::string const& name); + + void add_param(std::initializer_list init_list); + void add_params(std::initializer_list init_list); + + void parse(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION); + void parse(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION); + + std::multiset const& flags() const { return flags_; } + std::multimap const& params() const { return params_; } + multimap_iteration_wrapper params(std::string const& name) const; + std::vector const& pos_args() const { return pos_args_; } + + // begin() and end() for using range-for over positional args. + std::vector::const_iterator begin() const { return pos_args_.cbegin(); } + std::vector::const_iterator end() const { return pos_args_.cend(); } + size_t size() const { return pos_args_.size(); } + + ////////////////////////////////////////////////////////////////////////// + // Accessors + + // flag (boolean) accessors: return true if the flag appeared, otherwise false. + bool operator[](std::string const& name) const; + + // multiple flag (boolean) accessors: return true if at least one of the flag appeared, otherwise false. + bool operator[](std::initializer_list init_list) const; + + // returns positional arg string by order. Like argv[] but without the options + std::string const& operator[](size_t ind) const; + + // returns a std::istream that can be used to convert a positional arg to a typed value. + string_stream operator()(size_t ind) const; + + // same as above, but with a default value in case the arg is missing (index out of range). + template + string_stream operator()(size_t ind, T&& def_val) const; + + // parameter accessors, give a name get an std::istream that can be used to convert to a typed value. + // call .str() on result to get as string + string_stream operator()(std::string const& name) const; + + // accessor for a parameter with multiple names, give a list of names, get an std::istream that can be used to convert to a typed value. + // call .str() on result to get as string + // returns the first value in the list to be found. + string_stream operator()(std::initializer_list init_list) const; + + // same as above, but with a default value in case the param was missing. + // Non-string def_val types must have an operator<<() (output stream operator) + // If T only has an input stream operator, pass the string version of the type as in "3" instead of 3. + template + string_stream operator()(std::string const& name, T&& def_val) const; + + // same as above but for a list of names. returns the first value to be found. + template + string_stream operator()(std::initializer_list init_list, T&& def_val) const; + + private: + string_stream bad_stream() const; + std::string trim_leading_dashes(std::string const& name) const; + bool is_number(std::string const& arg) const; + bool is_option(std::string const& arg) const; + bool got_flag(std::string const& name) const; + bool is_param(std::string const& name) const; + + private: + std::vector args_; + std::multimap params_; + std::vector pos_args_; + std::multiset flags_; + std::set registeredParams_; + std::string empty_; + }; + + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::parse(const char * const argv[], int mode) + { + int argc = 0; + for (auto argvp = argv; *argvp; ++argc, ++argvp); + parse(argc, argv, mode); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::parse(int argc, const char* const argv[], int mode /*= PREFER_FLAG_FOR_UNREG_OPTION*/) + { + // clear out possible previous parsing remnants + flags_.clear(); + params_.clear(); + pos_args_.clear(); + + // convert to strings + args_.resize(static_cast(argc)); + std::transform(argv, argv + argc, args_.begin(), [](const char* const arg) { return arg; }); + + // parse line + for (auto i = 0u; i < args_.size(); ++i) + { + if (!is_option(args_[i])) + { + pos_args_.emplace_back(args_[i]); + continue; + } + + auto name = trim_leading_dashes(args_[i]); + + if (!(mode & NO_SPLIT_ON_EQUALSIGN)) + { + auto equalPos = name.find('='); + if (equalPos != std::string::npos) + { + params_.insert({ name.substr(0, equalPos), name.substr(equalPos + 1) }); + continue; + } + } + + // if the option is unregistered and should be a multi-flag + if (1 == (args_[i].size() - name.size()) && // single dash + argh::parser::SINGLE_DASH_IS_MULTIFLAG & mode && // multi-flag mode + !is_param(name)) // unregistered + { + std::string keep_param; + + if (!name.empty() && is_param(std::string(1ul, name.back()))) // last char is param + { + keep_param += name.back(); + name.resize(name.size() - 1); + } + + for (auto const& c : name) + { + flags_.emplace(std::string{ c }); + } + + if (!keep_param.empty()) + { + name = keep_param; + } + else + { + continue; // do not consider other options for this arg + } + } + + // any potential option will get as its value the next arg, unless that arg is an option too + // in that case it will be determined a flag. + if (i == args_.size() - 1 || is_option(args_[i + 1])) + { + flags_.emplace(name); + continue; + } + + // if 'name' is a pre-registered option, then the next arg cannot be a free parameter to it is skipped + // otherwise we have 2 modes: + // PREFER_FLAG_FOR_UNREG_OPTION: a non-registered 'name' is determined a flag. + // The following value (the next arg) will be a free parameter. + // + // PREFER_PARAM_FOR_UNREG_OPTION: a non-registered 'name' is determined a parameter, the next arg + // will be the value of that option. + + assert(!(mode & argh::parser::PREFER_FLAG_FOR_UNREG_OPTION) + || !(mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION)); + + bool preferParam = mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION; + + if (is_param(name) || preferParam) + { + params_.insert({ name, args_[i + 1] }); + ++i; // skip next value, it is not a free parameter + continue; + } + else + { + flags_.emplace(name); + } + } + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::bad_stream() const + { + string_stream bad; + bad.setstate(std::ios_base::failbit); + return bad; + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::is_number(std::string const& arg) const + { + // inefficient but simple way to determine if a string is a number (which can start with a '-') + std::istringstream istr(arg); + double number; + istr >> number; + return !(istr.fail() || istr.bad()); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::is_option(std::string const& arg) const + { + assert(0 != arg.size()); + if (is_number(arg)) + return false; + return '-' == arg[0]; + } + + ////////////////////////////////////////////////////////////////////////// + + inline std::string parser::trim_leading_dashes(std::string const& name) const + { + auto pos = name.find_first_not_of('-'); + return std::string::npos != pos ? name.substr(pos) : name; + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool argh::parser::got_flag(std::string const& name) const + { + return flags_.end() != flags_.find(trim_leading_dashes(name)); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool argh::parser::is_param(std::string const& name) const + { + return registeredParams_.count(name); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::operator[](std::string const& name) const + { + return got_flag(name); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::operator[](std::initializer_list init_list) const + { + return std::any_of(init_list.begin(), init_list.end(), [&](char const* const name) { return got_flag(name); }); + } + + ////////////////////////////////////////////////////////////////////////// + + inline std::string const& parser::operator[](size_t ind) const + { + if (ind < pos_args_.size()) + return pos_args_[ind]; + return empty_; + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::operator()(std::string const& name) const + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + return bad_stream(); + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::operator()(std::initializer_list init_list) const + { + for (auto& name : init_list) + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + } + return bad_stream(); + } + + ////////////////////////////////////////////////////////////////////////// + + template + string_stream parser::operator()(std::string const& name, T&& def_val) const + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + + std::ostringstream ostr; + ostr.precision(std::numeric_limits::max_digits10); + ostr << def_val; + return string_stream(ostr.str()); // use default + } + + ////////////////////////////////////////////////////////////////////////// + + // same as above but for a list of names. returns the first value to be found. + template + string_stream parser::operator()(std::initializer_list init_list, T&& def_val) const + { + for (auto& name : init_list) + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + } + std::ostringstream ostr; + ostr.precision(std::numeric_limits::max_digits10); + ostr << def_val; + return string_stream(ostr.str()); // use default + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::operator()(size_t ind) const + { + if (pos_args_.size() <= ind) + return bad_stream(); + + return string_stream(pos_args_[ind]); + } + + ////////////////////////////////////////////////////////////////////////// + + template + string_stream parser::operator()(size_t ind, T&& def_val) const + { + if (pos_args_.size() <= ind) + { + std::ostringstream ostr; + ostr.precision(std::numeric_limits::max_digits10); + ostr << def_val; + return string_stream(ostr.str()); + } + + return string_stream(pos_args_[ind]); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::add_param(std::string const& name) + { + registeredParams_.insert(trim_leading_dashes(name)); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::add_param(std::initializer_list init_list) + { + parser::add_params(init_list); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::add_params(std::initializer_list init_list) + { + for (auto& name : init_list) + registeredParams_.insert(trim_leading_dashes(name)); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::add_params(const std::string &name) + { + parser::add_param(name); + } + + ////////////////////////////////////////////////////////////////////////// + + inline multimap_iteration_wrapper parser::params(std::string const& name) const + { + auto trimmed_name = trim_leading_dashes(name); + return multimap_iteration_wrapper(params_.lower_bound(trimmed_name), params_.upper_bound(trimmed_name)); + } +} diff --git a/include/tl/expected.hpp b/vendor/expected.hpp similarity index 100% rename from include/tl/expected.hpp rename to vendor/expected.hpp