diff --git a/.adacore-gitlab-ci.yml b/.adacore-gitlab-ci.yml new file mode 100644 index 00000000..9f70bcaf --- /dev/null +++ b/.adacore-gitlab-ci.yml @@ -0,0 +1,24 @@ +workflow: + rules: + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' + +anod_build: + services: + - image:sandbox + - cpu:8 + - mem:16 + stage: build + script: + - export ANOD_DEFAULT_SANDBOX_DIR=/it/wave + + # Check out QSYM + - cd runtime/qsym_backend + - git clone -b symcc https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_SERVER_HOST}:${CI_SERVER_PORT}/eng/fuzz/qsym + + # Use our repositories + - anod vcs --add-repo symcc $CI_PROJECT_DIR + - anod vcs --add-repo qsym $CI_PROJECT_DIR/runtime/qsym_backend/qsym + + # Build SymCC + - anod source symcc + - anod build symcc diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..f007ce43 --- /dev/null +++ b/.clang-format @@ -0,0 +1,3 @@ +--- +BasedOnStyle: LLVM +... diff --git a/.github/workflows/check_style.yml b/.github/workflows/check_style.yml new file mode 100644 index 00000000..f3580388 --- /dev/null +++ b/.github/workflows/check_style.yml @@ -0,0 +1,19 @@ +name: Check coding style +on: [pull_request] +jobs: + coding_style: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Run clang-format + shell: bash + run: | + format_changes=$(git clang-format-14 --quiet --diff \ + ${{ github.event.pull_request.base.sha }} \ + ${{ github.event.pull_request.head.sha }} | wc -c) + if [[ $format_changes -ne 0 ]]; then + echo "Please format your changes with clang-format using the LLVM style, e.g., git clang-format --style LLVM before committing" + exit 1 + fi diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index fe2c0f82..e3666317 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,5 +1,5 @@ name: Compile and test SymCC -on: [push, pull_request] +on: [pull_request, workflow_dispatch] jobs: build_and_test_symcc: runs-on: ubuntu-20.04 @@ -15,3 +15,64 @@ jobs: run: docker build --target builder_qsym -t symcc . - name: Creation of the final SymCC docker image with Qsym backend and libcxx run: docker build -t symcc . + llvm_compatibility: + runs-on: ubuntu-22.04 + strategy: + matrix: + llvm_version: [11, 12, 13, 14, 15] + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + llvm-${{ matrix.llvm_version }}-dev \ + libz3-dev \ + python2 + - name: Build SymCC with the QSYM backend + run: | + mkdir build + cd build + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DZ3_TRUST_SYSTEM_VERSION=ON \ + -DQSYM_BACKEND=ON \ + -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \ + .. + make + llvm_compatibility_latest_llvm: + runs-on: ubuntu-22.04 + strategy: + matrix: + llvm_version: [16, 17] + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Add LLVM project deb repository + uses: myci-actions/add-deb-repo@11 + with: + repo: deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-${{ matrix.llvm_version }} main + repo-name: llvm + update: false + keys-asc: https://apt.llvm.org/llvm-snapshot.gpg.key + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + llvm-${{ matrix.llvm_version }}-dev \ + libz3-dev \ + python2 + - name: Build SymCC with the QSYM backend + run: | + mkdir build + cd build + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DZ3_TRUST_SYSTEM_VERSION=ON \ + -DQSYM_BACKEND=ON \ + -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \ + .. + make diff --git a/.gitignore b/.gitignore index 9aba1266..a5522f87 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,7 @@ TAGS # Clang tooling compile_commands.json .clangd +.cache + +# Build directories +build* diff --git a/CMakeLists.txt b/CMakeLists.txt index d9832256..5d98a9cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,8 @@ cmake_minimum_required(VERSION 3.5) project(SymbolicCompiler) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") + option(QSYM_BACKEND "Use the Qsym backend instead of our own" OFF) option(TARGET_32BIT "Make the compiler work correctly with -m32" OFF) @@ -33,10 +35,12 @@ set(SYM_RUNTIME_BUILD_ARGS -DCMAKE_CXX_FLAGS_INIT=${CMAKE_CXX_FLAGS_INIT} -DCMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS} -DCMAKE_EXE_LINKER_FLAGS_INIT=${CMAKE_EXE_LINKER_FLAGS_INIT} + -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} -DCMAKE_MODULE_LINKER_FLAGS=${CMAKE_MODULE_LINKER_FLAGS} -DCMAKE_MODULE_LINKER_FLAGS_INIT=${CMAKE_MODULE_LINKER_FLAGS_INIT} -DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -DCMAKE_SHARED_LINKER_FLAGS_INIT=${CMAKE_SHARED_LINKER_FLAGS_INIT} + -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH} -DCMAKE_SYSROOT=${CMAKE_SYSROOT} -DQSYM_BACKEND=${QSYM_BACKEND} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} @@ -76,8 +80,8 @@ find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") -if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 11) - message(WARNING "The software has been developed for LLVM 8 through 11; \ +if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 17) + message(WARNING "The software has been developed for LLVM 8 through 17; \ it is unlikely to work with other versions!") endif() @@ -87,7 +91,7 @@ include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 \ -Wredundant-decls -Wcast-align -Wmissing-include-dirs -Wswitch-default \ -Wextra -Wall -Winvalid-pch -Wredundant-decls -Wformat=2 \ --Wmissing-format-attribute -Wformat-nonliteral -Werror") +-Wmissing-format-attribute -Wformat-nonliteral -Werror -Wno-error=deprecated-declarations") # Mark nodelete to work around unload bug in upstream LLVM 5.0+ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-z,nodelete") @@ -114,6 +118,12 @@ if (NOT CLANG_BINARY) message(FATAL_ERROR "Clang not found; please make sure that the version corresponding to your LLVM installation is available.") endif() +if (${LLVM_VERSION_MAJOR} LESS 13) + set(CLANG_LOAD_PASS "-Xclang -load -Xclang ") +else() + set(CLANG_LOAD_PASS "-fpass-plugin=") +endif() + configure_file("compiler/symcc.in" "symcc" @ONLY) configure_file("compiler/sym++.in" "sym++" @ONLY) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..be09db8d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,17 @@ +# Contributing to SymCC + +We encourage everyone to contribute improvements and bug fixes to SymCC. Our +preferred way of accepting contributions is via GitHub pull requests. Please be +sure to run clang-format on any C/C++ code you change; an easy way to do so is +with `git clang-format --style LLVM` just before committing. (On Ubuntu, you can +get `git-clang-format` via `apt install clang-format`.) Ideally, also add a test +to your patch (see the +[docs](https://github.com/eurecom-s3/symcc/blob/master/docs/Testing.txt) for +details). Unfortunately, since the project is a bit short on developers at the +moment, we have to ask for your patience while we review your PR. + +Please note that any contributions you make are licensed under the same terms as +the code you're contributing to, as per the GitHub Terms of Service, [section +D.6](https://docs.github.com/en/site-policy/github-terms/github-terms-of-service#6-contributions-under-repository-license). +At the time of writing, this means LGPL (version 3 or later) for the SymCC +runtime, and GPL (version 3 or later) for the rest of SymCC. diff --git a/Dockerfile b/Dockerfile index 1d695ac0..5d93d01d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,19 +15,19 @@ # # The base image # -FROM ubuntu:20.04 AS builder +FROM ubuntu:22.04 AS builder # Install dependencies RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ cargo \ - clang-10 \ + clang-15 \ cmake \ g++ \ git \ libz3-dev \ - llvm-10-dev \ - llvm-10-tools \ + llvm-15-dev \ + llvm-15-tools \ ninja-build \ python2 \ python3-pip \ @@ -42,7 +42,7 @@ RUN git clone -b v2.56b https://github.com/google/AFL.git afl \ # Download the LLVM sources already so that we don't need to get them again when # SymCC changes -RUN git clone -b llvmorg-10.0.1 --depth 1 https://github.com/llvm/llvm-project.git /llvm_source +RUN git clone -b llvmorg-15.0.0 --depth 1 https://github.com/llvm/llvm-project.git /llvm_source # Build a version of SymCC with the simple backend to compile libc++ COPY . /symcc_source @@ -105,14 +105,14 @@ RUN cmake -G Ninja \ # # The final image # -FROM ubuntu:20.04 +FROM ubuntu:22.04 RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ build-essential \ - clang-10 \ + clang-15 \ g++ \ - libllvm10 \ + libllvm15 \ zlib1g \ sudo \ && rm -rf /var/lib/apt/lists/* \ @@ -127,8 +127,8 @@ COPY --from=builder_qsym /afl /afl ENV PATH /symcc_build:$PATH ENV AFL_PATH /afl -ENV AFL_CC clang-10 -ENV AFL_CXX clang++-10 +ENV AFL_CC clang-15 +ENV AFL_CXX clang++-15 ENV SYMCC_LIBCXX_PATH=/libcxx_symcc_install USER ubuntu diff --git a/LICENSE.lgpl b/LICENSE.lgpl new file mode 100644 index 00000000..0a041280 --- /dev/null +++ b/LICENSE.lgpl @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md index 075a5b60..4e6cea09 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,32 @@ [![Compile and test SymCC](https://github.com/eurecom-s3/symcc/actions/workflows/run_tests.yml/badge.svg)](https://github.com/eurecom-s3/symcc/actions/workflows/run_tests.yml) +Note: The SymCC project is currently understaffed and therefore maintained in a +best effort mode. In fact, we are hiring, in case you are interested to join +the [S3 group at Eurecom](https://www.s3.eurecom.fr/) to work on this (and other +projects in the group) please [contact us](mailto:aurelien.francillon@eurecom.fr). +We nevertheless appreciate PRs and apologize in advance for the slow processing +of PRs, we will try to merge them when possible. + # SymCC: efficient compiler-based symbolic execution -SymCC is a compiler wrapper which embeds symbolic execution into the program +SymCC is a compiler pass which embeds symbolic execution into the program during compilation, and an associated run-time support library. In essence, the compiler inserts code that computes symbolic expressions for each value in the program. The actual computation happens through calls to the support library at run time. -To build the pass and the support library, make sure that LLVM 8, 9, 10 or 11 -and Z3 version 4.5 or later, as well as a C++ compiler with support for C++17 -are installed. "lit" is also needed which is not always packaged with LLVM. +To build the pass and the support library, install LLVM (any version between 8 +and 17) and Z3 (version 4.5 or later), as well as a C++ compiler with support +for C++17. LLVM lit is only needed to run the tests; if it's not packaged with +your LLVM, you can get it with `pip install lit`. -Under Ubuntu groovy the following one liner should install all required +Under Ubuntu Groovy the following one liner should install all required packages: ``` -sudo apt install -y git cargo clang-10 cmake g++ git libz3-dev llvm-10-dev llvm-10-tools ninja-build python2 python3-pip zlib1g-dev && sudo pip3 install lit +sudo apt install -y git cargo clang-14 cmake g++ git libz3-dev llvm-14-dev llvm-14-tools ninja-build python2 python3-pip zlib1g-dev && sudo pip3 install lit ``` + Alternatively, see below for using the provided Dockerfile, or the file `util/quicktest.sh` for exact steps to perform under Ubuntu (or use with the provided Vagrant file). @@ -173,6 +182,57 @@ every change to SymCC (which is, in principle the right thing to do), whereas in many cases it is sufficient to let the build system figure out what to rebuild (and recompile, e.g., libc++ only when necessary). +## FAQ / BUGS / TODOs + +### Why is SymCC only exploring one path and not all paths? + +SymCC is currently a concolic executor it follows the concrete +path. In theory, it would be possible to make it a forking executor +see [issue #14](https://github.com/eurecom-s3/symcc/issues/14) + +### Why does SymCC not generate some test cases? + +There are multiple possible reasons: + +#### QSym backend performs pruning + +When built with the QSym backend exploration (e.g., loops) symcc is +subject to path pruning, this is part of the optimizations that makes +SymCC/QSym fast, it isn't sound. This is not a problem for using in +hybrid fuzzing, but this may be a problem for other uses. See for +example [issue #88](https://github.com/eurecom-s3/symcc/issues/88). + +When building with the simple backend the paths should be found. If +the paths are not found with the simple backend this may be a bug (or +possibly a limitation of the simple backend). + +#### Incomplete symbolic handing of functions, systems interactions. + +The current symbolic understanding of libc is incomplete. So when an +unsupported libc function is called SymCC can't trace the computations +that happen in the function. + +1. Adding the function to the [collection of wrapped libc + functions](https://github.com/eurecom-s3/symcc/blob/master/runtime/LibcWrappers.cpp) + and [register the + wrapper](https://github.com/eurecom-s3/symcc/blob/b29dc4db2803830ebf50798e72b336473a567655/compiler/Runtime.cpp#L159) + in the compiler. +2. Build a fully instrumented libc. +3. Cherry-pick individual libc functions from a libc implementation (e.g., musl) + +See [issue #23](https://github.com/eurecom-s3/symcc/issues/23) for more details. + + +### Rust support ? + +This would be possible to support RUST, see [issue +#1](https://github.com/eurecom-s3/symcc/issues/1) for tracking this. + +### Bug reporting + +We appreciate bugs with test cases and steps to reproduce, PR with +corresponding test cases. SymCC is currently understaffed, we hope to +catch up and get back to active development at some point. ## Contact @@ -210,17 +270,24 @@ SymCC is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +As an exception from the above, you can redistribute and/or modify the SymCC +runtime under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at your +option) any later version. See #114 for the rationale. + SymCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along with -SymCC. If not, see . +You should have received a copy of the GNU General Public License and the GNU +Lesser General Public License along with SymCC. If not, see +. The following pieces of software have additional or alternate copyrights, licenses, and/or restrictions: -| Program | Directory | -| --- | --- | -| QSYM | `runtime/qsym_backend/qsym` | +| Program | Directory | +|---------------|-----------------------------| +| QSYM | `runtime/qsym_backend/qsym` | +| SymCC runtime | `runtime` | diff --git a/cmake/FindFilesystem.cmake b/cmake/FindFilesystem.cmake new file mode 100644 index 00000000..a152e522 --- /dev/null +++ b/cmake/FindFilesystem.cmake @@ -0,0 +1,247 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: + +FindFilesystem +############## + +This module supports the C++17 standard library's filesystem utilities. Use the +:imp-target:`std::filesystem` imported target to + +Options +******* + +The ``COMPONENTS`` argument to this module supports the following values: + +.. find-component:: Experimental + :name: fs.Experimental + + Allows the module to find the "experimental" Filesystem TS version of the + Filesystem library. This is the library that should be used with the + ``std::experimental::filesystem`` namespace. + +.. find-component:: Final + :name: fs.Final + + Finds the final C++17 standard version of the filesystem library. + +If no components are provided, behaves as if the +:find-component:`fs.Final` component was specified. + +If both :find-component:`fs.Experimental` and :find-component:`fs.Final` are +provided, first looks for ``Final``, and falls back to ``Experimental`` in case +of failure. If ``Final`` is found, :imp-target:`std::filesystem` and all +:ref:`variables ` will refer to the ``Final`` version. + + +Imported Targets +**************** + +.. imp-target:: std::filesystem + + The ``std::filesystem`` imported target is defined when any requested + version of the C++ filesystem library has been found, whether it is + *Experimental* or *Final*. + + If no version of the filesystem library is available, this target will not + be defined. + + .. note:: + This target has ``cxx_std_17`` as an ``INTERFACE`` + :ref:`compile language standard feature `. Linking + to this target will automatically enable C++17 if no later standard + version is already required on the linking target. + + +.. _fs.variables: + +Variables +********* + +.. variable:: CXX_FILESYSTEM_IS_EXPERIMENTAL + + Set to ``TRUE`` when the :find-component:`fs.Experimental` version of C++ + filesystem library was found, otherwise ``FALSE``. + +.. variable:: CXX_FILESYSTEM_HAVE_FS + + Set to ``TRUE`` when a filesystem header was found. + +.. variable:: CXX_FILESYSTEM_HEADER + + Set to either ``filesystem`` or ``experimental/filesystem`` depending on + whether :find-component:`fs.Final` or :find-component:`fs.Experimental` was + found. + +.. variable:: CXX_FILESYSTEM_NAMESPACE + + Set to either ``std::filesystem`` or ``std::experimental::filesystem`` + depending on whether :find-component:`fs.Final` or + :find-component:`fs.Experimental` was found. + + +Examples +******** + +Using `find_package(Filesystem)` with no component arguments: + +.. code-block:: cmake + + find_package(Filesystem REQUIRED) + + add_executable(my-program main.cpp) + target_link_libraries(my-program PRIVATE std::filesystem) + + +#]=======================================================================] + + +if(TARGET std::filesystem) + # This module has already been processed. Don't do it again. + return() +endif() + +cmake_minimum_required(VERSION 3.10) + +include(CMakePushCheckState) +include(CheckIncludeFileCXX) + +# If we're not cross-compiling, try to run test executables. +# Otherwise, assume that compile + link is a sufficient check. +if(CMAKE_CROSSCOMPILING) + include(CheckCXXSourceCompiles) + macro(_cmcm_check_cxx_source code var) + check_cxx_source_compiles("${code}" ${var}) + endmacro() +else() + include(CheckCXXSourceRuns) + macro(_cmcm_check_cxx_source code var) + check_cxx_source_runs("${code}" ${var}) + endmacro() +endif() + +cmake_push_check_state() + +set(CMAKE_REQUIRED_QUIET ${Filesystem_FIND_QUIETLY}) + +# All of our tests required C++17 or later +set(CMAKE_CXX_STANDARD 17) + +# Normalize and check the component list we were given +set(want_components ${Filesystem_FIND_COMPONENTS}) +if(Filesystem_FIND_COMPONENTS STREQUAL "") + set(want_components Final) +endif() + +# Warn on any unrecognized components +set(extra_components ${want_components}) +list(REMOVE_ITEM extra_components Final Experimental) +foreach(component IN LISTS extra_components) + message(WARNING "Extraneous find_package component for Filesystem: ${component}") +endforeach() + +# Detect which of Experimental and Final we should look for +set(find_experimental TRUE) +set(find_final TRUE) +if(NOT "Final" IN_LIST want_components) + set(find_final FALSE) +endif() +if(NOT "Experimental" IN_LIST want_components) + set(find_experimental FALSE) +endif() + +if(find_final) + check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER) + if(_CXX_FILESYSTEM_HAVE_HEADER) + # We found the non-experimental header. Don't bother looking for the + # experimental one. + set(find_experimental FALSE) + endif() +else() + set(_CXX_FILESYSTEM_HAVE_HEADER FALSE) +endif() + +if(find_experimental) + check_include_file_cxx("experimental/filesystem" _CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) +else() + set(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER FALSE) +endif() + +if(_CXX_FILESYSTEM_HAVE_HEADER) + set(_have_fs TRUE) + set(_fs_header filesystem) + set(_fs_namespace std::filesystem) + set(_is_experimental FALSE) +elseif(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + set(_have_fs TRUE) + set(_fs_header experimental/filesystem) + set(_fs_namespace std::experimental::filesystem) + set(_is_experimental TRUE) +else() + set(_have_fs FALSE) +endif() + +set(CXX_FILESYSTEM_HAVE_FS ${_have_fs} CACHE BOOL "TRUE if we have the C++ filesystem headers") +set(CXX_FILESYSTEM_HEADER ${_fs_header} CACHE STRING "The header that should be included to obtain the filesystem APIs") +set(CXX_FILESYSTEM_NAMESPACE ${_fs_namespace} CACHE STRING "The C++ namespace that contains the filesystem APIs") +set(CXX_FILESYSTEM_IS_EXPERIMENTAL ${_is_experimental} CACHE BOOL "TRUE if the C++ filesystem library is the experimental version") + +set(_found FALSE) + +if(CXX_FILESYSTEM_HAVE_FS) + # We have some filesystem library available. Do link checks + string(CONFIGURE [[ + #include + #include <@CXX_FILESYSTEM_HEADER@> + + int main() { + auto cwd = @CXX_FILESYSTEM_NAMESPACE@::current_path(); + printf("%s", cwd.c_str()); + return EXIT_SUCCESS; + } + ]] code @ONLY) + + # Check a simple filesystem program without any linker flags + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED) + + set(can_link ${CXX_FILESYSTEM_NO_LINK_NEEDED}) + + if(NOT CXX_FILESYSTEM_NO_LINK_NEEDED) + set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES}) + # Add the libstdc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs) + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_STDCPPFS_NEEDED}) + if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED) + # Try the libc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs) + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_CPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_CPPFS_NEEDED}) + endif() + endif() + + if(can_link) + add_library(std::filesystem INTERFACE IMPORTED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17) + set(_found TRUE) + + if(CXX_FILESYSTEM_NO_LINK_NEEDED) + # Nothing to add... + elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lstdc++fs) + elseif(CXX_FILESYSTEM_CPPFS_NEEDED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lc++fs) + endif() + endif() +endif() + +cmake_pop_check_state() + +set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE) + +if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND) + message(FATAL_ERROR "Cannot run simple program using std::filesystem") +endif() diff --git a/compiler/Main.cpp b/compiler/Main.cpp index 9be71ff6..e4c7d4e7 100644 --- a/compiler/Main.cpp +++ b/compiler/Main.cpp @@ -13,19 +13,85 @@ // SymCC. If not, see . #include +#if LLVM_VERSION_MAJOR <= 15 #include +#endif +#include +#include + +#if LLVM_VERSION_MAJOR >= 13 +#include +#include + +#if LLVM_VERSION_MAJOR >= 14 +#include +#else +using OptimizationLevel = llvm::PassBuilder::OptimizationLevel; +#endif +#endif + +#if LLVM_VERSION_MAJOR >= 15 +#include +#else +#include +#endif #include "Pass.h" -void addSymbolizePass(const llvm::PassManagerBuilder & /* unused */, - llvm::legacy::PassManagerBase &PM) { - PM.add(new SymbolizePass()); +using namespace llvm; + +// +// Legacy pass registration (up to LLVM 13) +// + +#if LLVM_VERSION_MAJOR <= 15 + +void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */, + legacy::PassManagerBase &PM) { + PM.add(createScalarizerPass()); + PM.add(createLowerAtomicPass()); + PM.add(new SymbolizeLegacyPass()); } // Make the pass known to opt. -static llvm::RegisterPass X("symbolize", "Symbolization Pass"); +static RegisterPass X("symbolize", "Symbolization Pass"); // Tell frontends to run the pass automatically. -static struct llvm::RegisterStandardPasses - Y(llvm::PassManagerBuilder::EP_VectorizerStart, addSymbolizePass); -static struct llvm::RegisterStandardPasses - Z(llvm::PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizePass); +static struct RegisterStandardPasses Y(PassManagerBuilder::EP_VectorizerStart, + addSymbolizeLegacyPass); +static struct RegisterStandardPasses + Z(PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizeLegacyPass); + +#endif + +// +// New pass registration (LLVM 13 and above) +// + +#if LLVM_VERSION_MAJOR >= 13 + +PassPluginLibraryInfo getSymbolizePluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "Symbolization Pass", LLVM_VERSION_STRING, + [](PassBuilder &PB) { + // We need to act on the entire module as well as on each function. + // Those actions are independent from each other, so we register a + // module pass at the start of the pipeline and a function pass just + // before the vectorizer. (There doesn't seem to be a way to run + // module passes at the start of the vectorizer, hence the split.) + PB.registerPipelineStartEPCallback( + [](ModulePassManager &PM, OptimizationLevel) { + PM.addPass(SymbolizePass()); + }); + PB.registerVectorizerStartEPCallback( + [](FunctionPassManager &PM, OptimizationLevel) { + PM.addPass(ScalarizerPass()); + PM.addPass(LowerAtomicPass()); + PM.addPass(SymbolizePass()); + }); + }}; +} + +extern "C" LLVM_ATTRIBUTE_WEAK PassPluginLibraryInfo llvmGetPassPluginInfo() { + return getSymbolizePluginInfo(); +} + +#endif diff --git a/compiler/Pass.cpp b/compiler/Pass.cpp index 122fd571..af0d88a8 100644 --- a/compiler/Pass.cpp +++ b/compiler/Pass.cpp @@ -15,11 +15,22 @@ #include "Pass.h" #include +#include +#include +#include #include #include #include +#include +#include #include +#if LLVM_VERSION_MAJOR < 14 +#include +#else +#include +#endif + #include "Runtime.h" #include "Symbolizer.h" @@ -34,10 +45,14 @@ using namespace llvm; #define DEBUG(X) ((void)0) #endif -char SymbolizePass::ID = 0; +char SymbolizeLegacyPass::ID = 0; -bool SymbolizePass::doInitialization(Module &M) { - DEBUG(errs() << "Symbolizer module init\n"); +namespace { + +static constexpr char kSymCtorName[] = "__sym_ctor"; + +bool instrumentModule(Module &M) { + DEBUG(errs() << "Symbolizer module instrumentation\n"); // Redirect calls to external functions to the corresponding wrappers and // rename internal functions. @@ -56,7 +71,95 @@ bool SymbolizePass::doInitialization(Module &M) { return true; } -bool SymbolizePass::runOnFunction(Function &F) { +bool canLower(const CallInst *CI) { + const Function *Callee = CI->getCalledFunction(); + if (!Callee) + return false; + + switch (Callee->getIntrinsicID()) { + case Intrinsic::expect: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::prefetch: + case Intrinsic::pcmarker: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::eh_typeid_for: + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::assume: +#if LLVM_VERSION_MAJOR > 11 + case Intrinsic::experimental_noalias_scope_decl: +#endif + case Intrinsic::var_annotation: + case Intrinsic::sqrt: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::pow: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::round: +#if LLVM_VERSION_MAJOR > 10 + case Intrinsic::roundeven: +#endif + case Intrinsic::copysign: +#if LLVM_VERSION_MAJOR < 16 + case Intrinsic::flt_rounds: +#else + case Intrinsic::get_rounding: +#endif + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + return true; + default: + return false; + } + + llvm_unreachable("Control cannot reach here"); +} + +void liftInlineAssembly(CallInst *CI) { + // TODO When we don't have to worry about the old pass manager anymore, move + // the initialization to the pass constructor. (Currently there are two + // passes, but only if we're on a recent enough LLVM...) + + Function *F = CI->getFunction(); + Module *M = F->getParent(); + auto triple = M->getTargetTriple(); + + std::string error; + auto target = TargetRegistry::lookupTarget(triple, error); + if (!target) { + errs() << "Warning: can't get target info to lift inline assembly\n"; + return; + } + + auto cpu = F->getFnAttribute("target-cpu").getValueAsString(); + auto features = F->getFnAttribute("target-features").getValueAsString(); + + std::unique_ptr TM( + target->createTargetMachine(triple, cpu, features, TargetOptions(), {})); + auto subTarget = TM->getSubtargetImpl(*F); + if (subTarget == nullptr) + return; + + auto targetLowering = subTarget->getTargetLowering(); + if (targetLowering == nullptr) + return; + + targetLowering->ExpandInlineAsm(CI); +} + +bool instrumentFunction(Function &F) { auto functionName = F.getName(); if (functionName == kSymCtorName) return false; @@ -69,6 +172,21 @@ bool SymbolizePass::runOnFunction(Function &F) { for (auto &I : instructions(F)) allInstructions.push_back(&I); + IntrinsicLowering IL(F.getParent()->getDataLayout()); + for (auto *I : allInstructions) { + if (auto *CI = dyn_cast(I)) { + if (canLower(CI)) { + IL.LowerIntrinsicCall(CI); + } else if (isa(CI->getCalledOperand())) { + liftInlineAssembly(CI); + } + } + } + + allInstructions.clear(); + for (auto &I : instructions(F)) + allInstructions.push_back(&I); + Symbolizer symbolizer(*F.getParent()); symbolizer.symbolizeFunctionArguments(F); @@ -87,3 +205,27 @@ bool SymbolizePass::runOnFunction(Function &F) { return true; } + +} // namespace + +bool SymbolizeLegacyPass::doInitialization(Module &M) { + return instrumentModule(M); +} + +bool SymbolizeLegacyPass::runOnFunction(Function &F) { + return instrumentFunction(F); +} + +#if LLVM_VERSION_MAJOR >= 13 + +PreservedAnalyses SymbolizePass::run(Function &F, FunctionAnalysisManager &) { + return instrumentFunction(F) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +PreservedAnalyses SymbolizePass::run(Module &M, ModuleAnalysisManager &) { + return instrumentModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +#endif diff --git a/compiler/Pass.h b/compiler/Pass.h index 53764931..b06377dc 100644 --- a/compiler/Pass.h +++ b/compiler/Pass.h @@ -19,21 +19,31 @@ #include #include -class SymbolizePass : public llvm::FunctionPass { +#if LLVM_VERSION_MAJOR >= 13 +#include +#endif + +class SymbolizeLegacyPass : public llvm::FunctionPass { public: static char ID; - SymbolizePass() : FunctionPass(ID) {} + SymbolizeLegacyPass() : FunctionPass(ID) {} - bool doInitialization(llvm::Module &M) override; - bool runOnFunction(llvm::Function &F) override; + virtual bool doInitialization(llvm::Module &M) override; + virtual bool runOnFunction(llvm::Function &F) override; +}; -private: - static constexpr char kSymCtorName[] = "__sym_ctor"; +#if LLVM_VERSION_MAJOR >= 13 - /// Mapping from global variables to their corresponding symbolic expressions. - llvm::ValueMap - globalExpressions; +class SymbolizePass : public llvm::PassInfoMixin { +public: + llvm::PreservedAnalyses run(llvm::Function &F, + llvm::FunctionAnalysisManager &); + llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &); + + static bool isRequired() { return true; } }; #endif + +#endif diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 81768841..a97b30f6 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -39,27 +39,25 @@ Runtime::Runtime(Module &M) { auto *intPtrType = M.getDataLayout().getIntPtrType(M.getContext()); auto *ptrT = IRB.getInt8PtrTy(); auto *int8T = IRB.getInt8Ty(); + auto *int1T = IRB.getInt1Ty(); auto *voidT = IRB.getVoidTy(); buildInteger = import(M, "_sym_build_integer", ptrT, IRB.getInt64Ty(), int8T); buildInteger128 = import(M, "_sym_build_integer128", ptrT, IRB.getInt64Ty(), IRB.getInt64Ty()); - buildFloat = - import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), IRB.getInt1Ty()); + buildFloat = import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), int1T); buildNullPointer = import(M, "_sym_build_null_pointer", ptrT); buildTrue = import(M, "_sym_build_true", ptrT); buildFalse = import(M, "_sym_build_false", ptrT); - buildBool = import(M, "_sym_build_bool", ptrT, IRB.getInt1Ty()); + buildBool = import(M, "_sym_build_bool", ptrT, int1T); buildSExt = import(M, "_sym_build_sext", ptrT, ptrT, int8T); buildZExt = import(M, "_sym_build_zext", ptrT, ptrT, int8T); buildTrunc = import(M, "_sym_build_trunc", ptrT, ptrT, int8T); buildBswap = import(M, "_sym_build_bswap", ptrT, ptrT); - buildIntToFloat = import(M, "_sym_build_int_to_float", ptrT, ptrT, - IRB.getInt1Ty(), IRB.getInt1Ty()); - buildFloatToFloat = - import(M, "_sym_build_float_to_float", ptrT, ptrT, IRB.getInt1Ty()); - buildBitsToFloat = - import(M, "_sym_build_bits_to_float", ptrT, ptrT, IRB.getInt1Ty()); + buildIntToFloat = + import(M, "_sym_build_int_to_float", ptrT, ptrT, int1T, int1T); + buildFloatToFloat = import(M, "_sym_build_float_to_float", ptrT, ptrT, int1T); + buildBitsToFloat = import(M, "_sym_build_bits_to_float", ptrT, ptrT, int1T); buildFloatToBits = import(M, "_sym_build_float_to_bits", ptrT, ptrT); buildFloatToSignedInt = import(M, "_sym_build_float_to_signed_integer", ptrT, ptrT, int8T); @@ -69,9 +67,34 @@ Runtime::Runtime(Module &M) { buildBoolAnd = import(M, "_sym_build_bool_and", ptrT, ptrT, ptrT); buildBoolOr = import(M, "_sym_build_bool_or", ptrT, ptrT, ptrT); buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT); - buildBoolToBits = import(M, "_sym_build_bool_to_bits", ptrT, ptrT, int8T); - pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, - IRB.getInt1Ty(), intPtrType); + buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT); + buildBitToBool = import(M, "_sym_build_bit_to_bool", ptrT, ptrT); + buildConcat = + import(M, "_sym_concat_helper", ptrT, ptrT, + ptrT); // doesn't follow naming convention for historic reasons + pushPathConstraint = + import(M, "_sym_push_path_constraint", voidT, ptrT, int1T, intPtrType); + + // Overflow arithmetic + buildAddOverflow = + import(M, "_sym_build_add_overflow", ptrT, ptrT, ptrT, int1T, int1T); + buildSubOverflow = + import(M, "_sym_build_sub_overflow", ptrT, ptrT, ptrT, int1T, int1T); + buildMulOverflow = + import(M, "_sym_build_mul_overflow", ptrT, ptrT, ptrT, int1T, int1T); + + // Saturating arithmetic + buildSAddSat = import(M, "_sym_build_sadd_sat", ptrT, ptrT, ptrT); + buildUAddSat = import(M, "_sym_build_uadd_sat", ptrT, ptrT, ptrT); + buildSSubSat = import(M, "_sym_build_ssub_sat", ptrT, ptrT, ptrT); + buildUSubSat = import(M, "_sym_build_usub_sat", ptrT, ptrT, ptrT); + buildSShlSat = import(M, "_sym_build_sshl_sat", ptrT, ptrT, ptrT); + buildUShlSat = import(M, "_sym_build_ushl_sat", ptrT, ptrT, ptrT); + + buildFshl = import(M, "_sym_build_funnel_shift_left", ptrT, ptrT, ptrT, ptrT); + buildFshr = + import(M, "_sym_build_funnel_shift_right", ptrT, ptrT, ptrT, ptrT); + buildAbs = import(M, "_sym_build_abs", ptrT, ptrT); setParameterExpression = import(M, "_sym_set_parameter_expression", voidT, int8T, ptrT); @@ -107,6 +130,14 @@ Runtime::Runtime(Module &M) { #undef LOAD_BINARY_OPERATOR_HANDLER +#define LOAD_UNARY_OPERATOR_HANDLER(constant, name) \ + unaryOperatorHandlers[Instruction::constant] = \ + import(M, "_sym_build_" #name, ptrT, ptrT); + + LOAD_UNARY_OPERATOR_HANDLER(FNeg, fp_neg) + +#undef LOAD_UNARY_OPERATOR_HANDLER + #define LOAD_COMPARISON_HANDLER(constant, name) \ comparisonHandlers[CmpInst::constant] = \ import(M, "_sym_build_" #name, ptrT, ptrT, ptrT); @@ -144,13 +175,14 @@ Runtime::Runtime(Module &M) { memset = import(M, "_sym_memset", voidT, ptrT, ptrT, intPtrType); memmove = import(M, "_sym_memmove", voidT, ptrT, ptrT, intPtrType); readMemory = - import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int8T); + import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int1T); writeMemory = import(M, "_sym_write_memory", voidT, intPtrType, intPtrType, - ptrT, int8T); + ptrT, int1T); + buildZeroBytes = import(M, "_sym_build_zero_bytes", ptrT, intPtrType); buildInsert = - import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int8T); + import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int1T); buildExtract = import(M, "_sym_build_extract", ptrT, ptrT, IRB.getInt64Ty(), - IRB.getInt64Ty(), int8T); + IRB.getInt64Ty(), int1T); notifyCall = import(M, "_sym_notify_call", voidT, intPtrType); notifyRet = import(M, "_sym_notify_ret", voidT, intPtrType); @@ -160,10 +192,11 @@ Runtime::Runtime(Module &M) { /// Decide whether a function is called symbolically. bool isInterceptedFunction(const Function &f) { static const StringSet<> kInterceptedFunctions = { - "malloc", "calloc", "mmap", "mmap64", "open", "read", "lseek", - "lseek64", "fopen", "fopen64", "fread", "fseek", "fseeko", "rewind", - "fseeko64", "getc", "ungetc", "memcpy", "memset", "strncpy", "strchr", - "memcmp", "memmove", "ntohl", "fgets", "fgetc", "getchar"}; + "malloc", "calloc", "mmap", "mmap64", "open", "read", + "lseek", "lseek64", "fopen", "fopen64", "fread", "fseek", + "fseeko", "rewind", "fseeko64", "getc", "ungetc", "memcpy", + "memset", "strncpy", "strchr", "memcmp", "memmove", "ntohl", + "fgets", "fgetc", "getchar", "bcopy", "bcmp", "bzero"}; return (kInterceptedFunctions.count(f.getName()) > 0); } diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 7bf4a769..3f26c76d 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -19,9 +19,9 @@ #include #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11 - using SymFnT = llvm::Value *; +using SymFnT = llvm::Value *; #else - using SymFnT = llvm::FunctionCallee; +using SymFnT = llvm::FunctionCallee; #endif /// Runtime functions @@ -49,7 +49,21 @@ struct Runtime { SymFnT buildBoolAnd{}; SymFnT buildBoolOr{}; SymFnT buildBoolXor{}; - SymFnT buildBoolToBits{}; + SymFnT buildBoolToBit{}; + SymFnT buildBitToBool{}; + SymFnT buildAddOverflow{}; + SymFnT buildSubOverflow{}; + SymFnT buildMulOverflow{}; + SymFnT buildSAddSat{}; + SymFnT buildUAddSat{}; + SymFnT buildSSubSat{}; + SymFnT buildUSubSat{}; + SymFnT buildSShlSat{}; + SymFnT buildUShlSat{}; + SymFnT buildFshl{}; + SymFnT buildFshr{}; + SymFnT buildAbs{}; + SymFnT buildConcat{}; SymFnT pushPathConstraint{}; SymFnT getParameterExpression{}; SymFnT setParameterExpression{}; @@ -60,6 +74,7 @@ struct Runtime { SymFnT memmove{}; SymFnT readMemory{}; SymFnT writeMemory{}; + SymFnT buildZeroBytes{}; SymFnT buildInsert{}; SymFnT buildExtract{}; SymFnT notifyCall{}; @@ -68,13 +83,15 @@ struct Runtime { /// Mapping from icmp predicates to the functions that build the corresponding /// symbolic expressions. - std::array - comparisonHandlers{}; + std::array comparisonHandlers{}; /// Mapping from binary operators to the functions that build the /// corresponding symbolic expressions. - std::array - binaryOperatorHandlers{}; + std::array binaryOperatorHandlers{}; + + /// Mapping from unary operators to the functions that build the + /// corresponding symbolic expressions. + std::array unaryOperatorHandlers{}; }; bool isInterceptedFunction(const llvm::Function &f); diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index a6206823..d111c52d 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -180,15 +180,9 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { auto *callee = I.getCalledFunction(); switch (callee->getIntrinsicID()) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::is_constant: case Intrinsic::trap: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::assume: // These are safe to ignore. break; case Intrinsic::memcpy: { @@ -258,22 +252,13 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { registerSymbolicComputation(abs, &I); break; } - case Intrinsic::cttz: - case Intrinsic::ctpop: - case Intrinsic::ctlz: { - // Various bit-count operations. Expressing these symbolically is - // difficult, so for now we just concretize. - - errs() << "Warning: losing track of symbolic expressions at bit-count " - "operation " - << I << "\n"; - break; - } - case Intrinsic::returnaddress: { + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::addressofreturnaddress: { // Obtain the return address of the current function or one of its parents // on the stack. We just concretize. - errs() << "Warning: using concrete value for return address\n"; + errs() << "Warning: using concrete value for return/frame address\n"; break; } case Intrinsic::bswap: { @@ -284,6 +269,74 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { registerSymbolicComputation(swapped, &I); break; } + +// Overflow arithmetic +#define DEF_OVF_ARITH_BUILDER(intrinsic_op, runtime_name) \ + case Intrinsic::s##intrinsic_op##_with_overflow: \ + case Intrinsic::u##intrinsic_op##_with_overflow: { \ + IRBuilder<> IRB(&I); \ + \ + bool isSigned = \ + I.getIntrinsicID() == Intrinsic::s##intrinsic_op##_with_overflow; \ + auto overflow = buildRuntimeCall( \ + IRB, runtime.build##runtime_name, \ + {{I.getOperand(0), true}, \ + {I.getOperand(1), true}, \ + {IRB.getInt1(isSigned), false}, \ + {IRB.getInt1(dataLayout.isLittleEndian() ? 1 : 0), false}}); \ + registerSymbolicComputation(overflow, &I); \ + \ + break; \ + } + + DEF_OVF_ARITH_BUILDER(add, AddOverflow) + DEF_OVF_ARITH_BUILDER(sub, SubOverflow) + DEF_OVF_ARITH_BUILDER(mul, MulOverflow) + +#undef DEF_OVF_ARITH_BUILDER + +// Saturating arithmetic +#define DEF_SAT_ARITH_BUILDER(intrinsic_op, runtime_name) \ + case Intrinsic::intrinsic_op##_sat: { \ + IRBuilder<> IRB(&I); \ + auto result = buildRuntimeCall(IRB, runtime.build##runtime_name, \ + {I.getOperand(0), I.getOperand(1)}); \ + registerSymbolicComputation(result, &I); \ + break; \ + } + + DEF_SAT_ARITH_BUILDER(sadd, SAddSat) + DEF_SAT_ARITH_BUILDER(uadd, UAddSat) + DEF_SAT_ARITH_BUILDER(ssub, SSubSat) + DEF_SAT_ARITH_BUILDER(usub, USubSat) +#if LLVM_VERSION_MAJOR > 11 + DEF_SAT_ARITH_BUILDER(sshl, SShlSat) + DEF_SAT_ARITH_BUILDER(ushl, UShlSat) +#endif + +#undef DEF_SAT_ARITH_BUILDER + + case Intrinsic::fshl: + case Intrinsic::fshr: { + IRBuilder<> IRB(&I); + auto funnelShift = buildRuntimeCall( + IRB, + I.getIntrinsicID() == Intrinsic::fshl ? runtime.buildFshl + : runtime.buildFshr, + {I.getOperand(0), I.getOperand(1), I.getOperand(2)}); + registerSymbolicComputation(funnelShift, &I); + break; + } +#if LLVM_VERSION_MAJOR > 11 + case Intrinsic::abs: { + // Integer absolute value + + IRBuilder<> IRB(&I); + auto abs = buildRuntimeCall(IRB, runtime.buildAbs, I.getOperand(0)); + registerSymbolicComputation(abs, &I); + break; + } +#endif default: errs() << "Warning: unhandled LLVM intrinsic " << callee->getName() << "; the result will be concretized\n"; @@ -369,6 +422,15 @@ void Symbolizer::visitBinaryOperator(BinaryOperator &I) { registerSymbolicComputation(runtimeCall, &I); } +void Symbolizer::visitUnaryOperator(UnaryOperator &I) { + IRBuilder<> IRB(&I); + SymFnT handler = runtime.unaryOperatorHandlers.at(I.getOpcode()); + + assert(handler && "Unable to handle unary operator"); + auto runtimeCall = buildRuntimeCall(IRB, handler, I.getOperand(0)); + registerSymbolicComputation(runtimeCall, &I); +} + void Symbolizer::visitSelectInst(SelectInst &I) { // Select is like the ternary operator ("?:") in C. We push the (potentially // negated) condition to the path constraints and copy the symbolic @@ -380,6 +442,13 @@ void Symbolizer::visitSelectInst(SelectInst &I) { {I.getCondition(), false}, {getTargetPreferredInt(&I), false}}); registerSymbolicComputation(runtimeCall); + if (getSymbolicExpression(I.getTrueValue()) || + getSymbolicExpression(I.getFalseValue())) { + auto *data = IRB.CreateSelect( + I.getCondition(), getSymbolicExpressionOrNull(I.getTrueValue()), + getSymbolicExpressionOrNull(I.getFalseValue())); + symbolicExpressions[&I] = data; + } } void Symbolizer::visitCmpInst(CmpInst &I) { @@ -465,14 +534,9 @@ void Symbolizer::visitLoadInst(LoadInst &I) { runtime.readMemory, {IRB.CreatePtrToInt(addr, intPtrType), ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), - ConstantInt::get(IRB.getInt8Ty(), isLittleEndian(dataType) ? 1 : 0)}); - - if (dataType->isFloatingPointTy()) { - data = IRB.CreateCall(runtime.buildBitsToFloat, - {data, IRB.getInt1(dataType->isDoubleTy())}); - } + IRB.getInt1(isLittleEndian(dataType) ? 1 : 0)}); - symbolicExpressions[&I] = data; + symbolicExpressions[&I] = convertBitVectorExprForType(IRB, data, dataType); } void Symbolizer::visitStoreInst(StoreInst &I) { @@ -480,18 +544,22 @@ void Symbolizer::visitStoreInst(StoreInst &I) { tryAlternative(IRB, I.getPointerOperand()); - auto *data = getSymbolicExpressionOrNull(I.getValueOperand()); - auto *dataType = I.getValueOperand()->getType(); - if (dataType->isFloatingPointTy()) { - data = IRB.CreateCall(runtime.buildFloatToBits, data); - } + // Make sure that the expression corresponding to the stored value is of + // bit-vector kind. Shortcutting the runtime calls that we emit here (e.g., + // for floating-point values) is tricky, so instead we make sure that any + // runtime function we call can handle null expressions. + + auto V = I.getValueOperand(); + auto maybeConversion = + convertExprForTypeToBitVectorExpr(IRB, V, getSymbolicExpression(V)); IRB.CreateCall( runtime.writeMemory, {IRB.CreatePtrToInt(I.getPointerOperand(), intPtrType), - ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), - data, - ConstantInt::get(IRB.getInt8Ty(), dataLayout.isLittleEndian() ? 1 : 0)}); + ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(V->getType())), + maybeConversion ? maybeConversion->lastInstruction + : getSymbolicExpressionOrNull(V), + IRB.getInt1(isLittleEndian(V->getType()) ? 1 : 0)}); } void Symbolizer::visitGetElementPtrInst(GetElementPtrInst &I) { @@ -612,11 +680,25 @@ void Symbolizer::visitBitCastInst(BitCastInst &I) { void Symbolizer::visitTruncInst(TruncInst &I) { IRBuilder<> IRB(&I); - auto trunc = buildRuntimeCall( + + if (getSymbolicExpression(I.getOperand(0)) == nullptr) + return; + + SymbolicComputation symbolicComputation; + symbolicComputation.merge(forceBuildRuntimeCall( IRB, runtime.buildTrunc, {{I.getOperand(0), true}, - {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}}); - registerSymbolicComputation(trunc, &I); + {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}})); + + if (I.getDestTy()->isIntegerTy() && + I.getDestTy()->getIntegerBitWidth() == 1) { + // convert from byte back to a bool (i1) + symbolicComputation.merge( + forceBuildRuntimeCall(IRB, runtime.buildBitToBool, + {{symbolicComputation.lastInstruction, false}})); + } + + registerSymbolicComputation(symbolicComputation, &I); } void Symbolizer::visitIntToPtrInst(IntToPtrInst &I) { @@ -696,30 +778,36 @@ void Symbolizer::visitCastInst(CastInst &I) { IRBuilder<> IRB(&I); + SymFnT target; + + switch (I.getOpcode()) { + case Instruction::SExt: + target = runtime.buildSExt; + break; + case Instruction::ZExt: + target = runtime.buildZExt; + break; + default: + llvm_unreachable("Unknown cast opcode"); + } + // LLVM bitcode represents Boolean values as i1. In Z3, those are a not a // bit-vector sort, so trying to cast one into a bit vector of any length // raises an error. The run-time library provides a dedicated conversion // function for this case. if (I.getSrcTy()->getIntegerBitWidth() == 1) { - auto boolToBitConversion = buildRuntimeCall( - IRB, runtime.buildBoolToBits, - {{I.getOperand(0), true}, - {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}}); - registerSymbolicComputation(boolToBitConversion, &I); - } else { - SymFnT target; - switch (I.getOpcode()) { - case Instruction::SExt: - target = runtime.buildSExt; - break; - case Instruction::ZExt: - target = runtime.buildZExt; - break; - default: - llvm_unreachable("Unknown cast opcode"); - } + SymbolicComputation symbolicComputation; + symbolicComputation.merge(forceBuildRuntimeCall(IRB, runtime.buildBoolToBit, + {{I.getOperand(0), true}})); + symbolicComputation.merge(forceBuildRuntimeCall( + IRB, target, + {{symbolicComputation.lastInstruction, false}, + {IRB.getInt8(I.getDestTy()->getIntegerBitWidth() - 1), false}})); + + registerSymbolicComputation(symbolicComputation, &I); + } else { auto symbolicCast = buildRuntimeCall(IRB, target, {{I.getOperand(0), true}, @@ -751,28 +839,61 @@ void Symbolizer::visitPHINode(PHINode &I) { void Symbolizer::visitInsertValueInst(InsertValueInst &I) { IRBuilder<> IRB(&I); - auto insert = buildRuntimeCall( - IRB, runtime.buildInsert, - {{I.getAggregateOperand(), true}, - {I.getInsertedValueOperand(), true}, - {IRB.getInt64(aggregateMemberOffset(I.getAggregateOperand()->getType(), - I.getIndices())), - false}, - {IRB.getInt8(isLittleEndian(I.getInsertedValueOperand()->getType()) ? 1 : 0), false}}); - registerSymbolicComputation(insert, &I); + auto target = I.getAggregateOperand(); + auto insertedValue = I.getInsertedValueOperand(); + + if (getSymbolicExpression(target) == nullptr && + getSymbolicExpression(insertedValue) == nullptr) + return; + + // We may have to convert the expression to bit-vector kind... + auto maybeConversion = convertExprForTypeToBitVectorExpr( + IRB, insertedValue, getSymbolicExpressionOrNull(insertedValue)); + + auto insert = IRB.CreateCall( + runtime.buildInsert, + {getSymbolicExpressionOrNull(target), + // If we had to convert the expression, use the result of the conversion. + maybeConversion ? maybeConversion->lastInstruction + : getSymbolicExpressionOrNull(insertedValue), + IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), + IRB.getInt1(isLittleEndian(insertedValue->getType()) ? 1 : 0)}); + auto insertComputation = + SymbolicComputation(insert, insert, {Input(target, 0, insert)}); + + if (!maybeConversion) { + // If we didn't have to convert, then the inserted value is first used in + // the insertion. + insertComputation.inputs.push_back(Input(insertedValue, 1, insert)); + } else { + // Otherwise, the full computation consists of the conversion followed by + // the insertion. + maybeConversion->merge(insertComputation); + } + + registerSymbolicComputation(maybeConversion.value_or(insertComputation), &I); } void Symbolizer::visitExtractValueInst(ExtractValueInst &I) { IRBuilder<> IRB(&I); - auto extract = buildRuntimeCall( - IRB, runtime.buildExtract, - {{I.getAggregateOperand(), true}, - {IRB.getInt64(aggregateMemberOffset(I.getAggregateOperand()->getType(), - I.getIndices())), - false}, - {IRB.getInt64(dataLayout.getTypeStoreSize(I.getType())), false}, - {IRB.getInt8(isLittleEndian(I.getType()) ? 1 : 0), false}}); - registerSymbolicComputation(extract, &I); + auto target = I.getAggregateOperand(); + auto targetExpr = getSymbolicExpression(target); + auto resultType = I.getType(); + + if (targetExpr == nullptr) + return; + + auto extractedBits = IRB.CreateCall( + runtime.buildExtract, + {targetExpr, + IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), + IRB.getInt64(dataLayout.getTypeStoreSize(resultType)), + IRB.getInt1(isLittleEndian(resultType) ? 1 : 0)}); + + Instruction *result = + convertBitVectorExprForType(IRB, extractedBits, resultType); + registerSymbolicComputation( + {extractedBits, result, {{target, 0, extractedBits}}}, &I); } void Symbolizer::visitSwitchInst(SwitchInst &I) { @@ -818,7 +939,7 @@ void Symbolizer::visitInstruction(Instruction &I) { << "; the result will be concretized\n"; } -CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { +Instruction *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { auto *valueType = V->getType(); if (isa(V)) { @@ -860,12 +981,12 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { {IRB.CreatePtrToInt(V, IRB.getInt64Ty()), IRB.getInt8(ptrBits)}); } - if (valueType->isStructTy()) { + if (auto structType = dyn_cast(valueType)) { // In unoptimized code we may see structures in SSA registers. What we // want is a single bit-vector expression describing their contents, but - // unfortunately we can't take the address of a register. We fix the - // problem with a hack: we write the register to memory and initialize the - // expression from there. + // unfortunately we can't take the address of a register. What we do instead + // is to build the expression recursively by iterating over the elements of + // the structure. // // An alternative would be to change the representation of structures in // SSA registers to "shadow structures" that contain one expression per @@ -873,22 +994,80 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { // cast instructions, because expressions would have to be converted // between different representations according to the type. - auto *memory = IRB.CreateAlloca(V->getType()); - IRB.CreateStore(V, memory); - return IRB.CreateCall( - runtime.readMemory, - {IRB.CreatePtrToInt(memory, intPtrType), - ConstantInt::get(intPtrType, - dataLayout.getTypeStoreSize(V->getType())), - IRB.getInt8(0)}); + if (isa(V)) { + // This is just an optimization for completely undefined structs; we + // create an all-zeros expression without iterating over the elements. + return IRB.CreateCall( + runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, + dataLayout.getTypeStoreSize(valueType))}); + } else { + // Iterate over the elements of the struct and concatenate the + // corresponding expressions (along with any padding that might be + // needed). + + auto structLayout = dataLayout.getStructLayout(structType); + auto constantStructValue = dyn_cast(V); + size_t offset = 0; // The end of the expressed portion in bytes. + Instruction *expr = nullptr; + auto append = [&](Instruction *newExpr) { + expr = expr ? IRB.CreateCall(runtime.buildConcat, {expr, newExpr}) + : newExpr; + }; + + for (size_t i = 0; i < structType->getNumElements(); i++) { + // Build an expression for any padding preceding the current element. + if (auto padding = structLayout->getElementOffset(i) - offset; + padding > 0) { + append(IRB.CreateCall(runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, padding)})); + } + + // Build the expression for the current element. If the struct is not a + // constant, we need to read the element with extractvalue. + auto element = constantStructValue + ? constantStructValue->getAggregateElement(i) + : IRB.CreateExtractValue(V, i); + auto elementExpr = createValueExpression(element, IRB); + + // The expression may be of a different kind than bit vector; in this + // case, we need to convert it. + if (auto conversion = + convertExprForTypeToBitVectorExpr(IRB, element, elementExpr)) { + elementExpr = conversion->lastInstruction; + } + + // If the element is represented in little-endian byte order in memory, + // swap the bytes. + auto elementType = structType->getElementType(i); + if (isLittleEndian(elementType) && + dataLayout.getTypeStoreSize(elementType) > 1) { + elementExpr = IRB.CreateCall(runtime.buildBswap, {elementExpr}); + } + + append(elementExpr); + + offset = structLayout->getElementOffset(i) + + dataLayout.getTypeStoreSize(structType->getElementType(i)); + } + + // Insert padding at the end, if any. + if (auto finalPadding = dataLayout.getTypeStoreSize(structType) - offset; + finalPadding > 0) { + append(IRB.CreateCall(runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, finalPadding)})); + } + + return expr; + } } llvm_unreachable("Unhandled type for constant expression"); } -Symbolizer::SymbolicComputation -Symbolizer::forceBuildRuntimeCall(IRBuilder<> &IRB, SymFnT function, - ArrayRef> args) { +Symbolizer::SymbolicComputation Symbolizer::forceBuildRuntimeCall( + IRBuilder<> &IRB, SymFnT function, + ArrayRef> args) const { std::vector functionArgs; for (const auto &[arg, symbolic] : args) { functionArgs.push_back(symbolic ? getSymbolicExpressionOrNull(arg) : arg); @@ -899,7 +1078,7 @@ Symbolizer::forceBuildRuntimeCall(IRBuilder<> &IRB, SymFnT function, for (unsigned i = 0; i < args.size(); i++) { const auto &[arg, symbolic] = args[i]; if (symbolic) - inputs.push_back({arg, i, call}); + inputs.push_back(Input(arg, i, call)); } return SymbolicComputation(call, call, inputs); @@ -916,7 +1095,7 @@ void Symbolizer::tryAlternative(IRBuilder<> &IRB, Value *V) { runtime.pushPathConstraint, {destAssertion, IRB.getInt1(true), getTargetPreferredInt(V)}); registerSymbolicComputation(SymbolicComputation( - concreteDestExpr, pushAssertion, {{V, 0, destAssertion}})); + concreteDestExpr, pushAssertion, {Input(V, 0, destAssertion)})); } } @@ -942,3 +1121,41 @@ uint64_t Symbolizer::aggregateMemberOffset(Type *aggregateType, return offset; } + +Instruction *Symbolizer::convertBitVectorExprForType(llvm::IRBuilder<> &IRB, + Instruction *I, + Type *T) const { + Instruction *result = I; + + if (T->isFloatingPointTy()) { + result = IRB.CreateCall(runtime.buildBitsToFloat, + {I, IRB.getInt1(T->isDoubleTy())}); + } else if (T->isIntegerTy() && T->getIntegerBitWidth() == 1) { + result = IRB.CreateCall(runtime.buildTrunc, + {I, ConstantInt::get(IRB.getInt8Ty(), 1)}); + result = IRB.CreateCall(runtime.buildBitToBool, {result}); + } + + return result; +} + +std::optional +Symbolizer::convertExprForTypeToBitVectorExpr(IRBuilder<> &IRB, Value *V, + Value *Expr) const { + if (Expr == nullptr) + return {}; + + auto T = V->getType(); + + if (T->isFloatingPointTy()) { + auto floatBits = IRB.CreateCall(runtime.buildFloatToBits, {Expr}); + return SymbolicComputation(floatBits, floatBits, {Input(V, 0, floatBits)}); + } else if (T->isIntegerTy() && T->getIntegerBitWidth() == 1) { + auto bitExpr = IRB.CreateCall(runtime.buildBoolToBit, {Expr}); + auto bitVectorExpr = IRB.CreateCall(runtime.buildZExt, + {bitExpr, IRB.getInt8(7 /* 1 byte */)}); + return SymbolicComputation(bitExpr, bitVectorExpr, {Input(V, 0, bitExpr)}); + } else { + return {}; + } +} diff --git a/compiler/Symbolizer.h b/compiler/Symbolizer.h index 8ab440af..808712ee 100644 --- a/compiler/Symbolizer.h +++ b/compiler/Symbolizer.h @@ -103,6 +103,7 @@ class Symbolizer : public llvm::InstVisitor { // Implementation of InstVisitor // void visitBinaryOperator(llvm::BinaryOperator &I); + void visitUnaryOperator(llvm::UnaryOperator &I); void visitSelectInst(llvm::SelectInst &I); void visitCmpInst(llvm::CmpInst &I); void visitReturnInst(llvm::ReturnInst &I); @@ -142,6 +143,14 @@ class Symbolizer : public llvm::InstVisitor { unsigned operandIndex; llvm::Instruction *user; + Input() = default; + + Input(llvm::Value *concrete, unsigned idx, llvm::Instruction *user) + : concreteValue(concrete), operandIndex(idx), user(user) { + assert(getSymbolicOperand()->getType() == + llvm::Type::getInt8PtrTy(user->getContext())); + } + llvm::Value *getSymbolicOperand() const { return user->getOperand(operandIndex); } @@ -186,22 +195,23 @@ class Symbolizer : public llvm::InstVisitor { << "\n...ending at " << *computation.lastInstruction << "\n...with inputs:\n"; for (const auto &input : computation.inputs) { - out << '\t' << *input.concreteValue << '\n'; + out << '\t' << *input.concreteValue << " => " << *input.user << '\n'; } return out; } }; /// Create an expression that represents the concrete value. - llvm::CallInst *createValueExpression(llvm::Value *V, llvm::IRBuilder<> &IRB); + llvm::Instruction *createValueExpression(llvm::Value *V, + llvm::IRBuilder<> &IRB); /// Get the (already created) symbolic expression for a value. - llvm::Value *getSymbolicExpression(llvm::Value *V) { + llvm::Value *getSymbolicExpression(llvm::Value *V) const { auto exprIt = symbolicExpressions.find(V); return (exprIt != symbolicExpressions.end()) ? exprIt->second : nullptr; } - llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) { + llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) const { auto *expr = getSymbolicExpression(V); if (expr == nullptr) return llvm::ConstantPointerNull::get( @@ -214,9 +224,9 @@ class Symbolizer : public llvm::InstVisitor { } /// Like buildRuntimeCall, but the call is always generated. - SymbolicComputation - forceBuildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef> args); + SymbolicComputation forceBuildRuntimeCall( + llvm::IRBuilder<> &IRB, SymFnT function, + llvm::ArrayRef> args) const; /// Create a call to the specified function in the run-time library. /// @@ -229,7 +239,7 @@ class Symbolizer : public llvm::InstVisitor { /// instruction is emitted and the function returns null. std::optional buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef> args) { + llvm::ArrayRef> args) const { if (std::all_of(args.begin(), args.end(), [this](std::pair arg) { return (getSymbolicExpression(arg.first) == nullptr); @@ -243,7 +253,7 @@ class Symbolizer : public llvm::InstVisitor { /// Convenience overload that treats all arguments as symbolic. std::optional buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef symbolicArgs) { + llvm::ArrayRef symbolicArgs) const { std::vector> args; for (const auto &arg : symbolicArgs) { args.emplace_back(arg, true); @@ -298,6 +308,27 @@ class Symbolizer : public llvm::InstVisitor { uint64_t aggregateMemberOffset(llvm::Type *aggregateType, llvm::ArrayRef indices) const; + /// Emit code that converts the bit-vector expression represented by I to an + /// expression that is appropriate for T; return the instruction that computes + /// the result (which may be I if no conversion is needed). + /// + /// The solver doesn't represent all values as bit vectors. For example, + /// floating-point values and Booleans are of separate kinds, so we emit code + /// that changes the solver kind of the expression to whatever is needed. + llvm::Instruction *convertBitVectorExprForType(llvm::IRBuilder<> &IRB, + llvm::Instruction *I, + llvm::Type *T) const; + + /// Emit code that converts the expression Expr for V to a bit-vector + /// expression. Return the SymbolicComputation representing the conversion + /// (if a conversion is necessary); the last instruction computes the result. + /// + /// This is the inverse operation of convertBitVectorExprForType (see details + /// there). + std::optional + convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, llvm::Value *V, + llvm::Value *Expr) const; + const Runtime runtime; /// The data layout of the currently processed module. diff --git a/compiler/sym++.in b/compiler/sym++.in index 82221945..2b775739 100755 --- a/compiler/sym++.in +++ b/compiler/sym++.in @@ -55,7 +55,7 @@ if [ $# -eq 0 ]; then fi exec $compiler \ - -Xclang -load -Xclang "$pass" \ + @CLANG_LOAD_PASS@"$pass" \ $stdlib_cflags \ "$@" \ $stdlib_ldflags \ diff --git a/compiler/symcc.in b/compiler/symcc.in index a0694c06..4e0ad37e 100755 --- a/compiler/symcc.in +++ b/compiler/symcc.in @@ -39,7 +39,7 @@ if [ $# -eq 0 ]; then fi exec $compiler \ - -Xclang -load -Xclang "$pass" \ + @CLANG_LOAD_PASS@"$pass" \ "$@" \ -L"$runtime_dir" \ -lSymRuntime \ diff --git a/docs/32-bit.txt b/docs/32-bit.txt index f5f6dda8..eae0e9cb 100644 --- a/docs/32-bit.txt +++ b/docs/32-bit.txt @@ -26,12 +26,12 @@ install the 32-bit version of Z3 in your system. Once the dependencies for 32-bit SymCC are available (as well as the 64-bit dependencies mentioned in the main README), configure and build SymCC as usual but add "-DTARGET_32BIT=ON" to the CMake invocation. If the build system doesn't -find your 32-bit versions of LLVM and Z3, specify their locations with +find your 32-bit versions of LLVM and Z3, and specify their locations with "-DLLVM_32BIT_DIR=/some/path" and "-DZ3_32BIT_DIR=/some/other/path", respectively - analogously to how you would hint at the 64-bit versions. The resulting "symcc" and "sym++" scripts work like regular SymCC, but they -additionally understand the "-m32" switch, which tells clang to build 32-bit +additionally understand the "-m32" switch, which tells Clang to build 32-bit artifacts. If you build anything with "-m32", SymCC will make sure that the 32-bit version of the symbolic backend is linked to it instead of the 64-bit variant that would normally be used. Note that, in order to compile C++ code diff --git a/docs/Backends.txt b/docs/Backends.txt index 358412b1..ec3e53ff 100644 --- a/docs/Backends.txt +++ b/docs/Backends.txt @@ -13,7 +13,7 @@ used. Also, we always link against "libSymRuntime.so", so the choice of backend is deferred until run time. From the target program's point of view, the only requirement on a backend is that it be a shared library with the expected name that implements the interface defined in runtime/RuntimeCommon.h (with type -"SymExpr" defined to be something of pointer width). +"SymExpr" is defined to be something of pointer width). Depending on the build option QSYM_BACKEND we build either our own backend or parts of QSYM (which are pulled in via a git submodule) and a small translation diff --git a/docs/Concreteness.txt b/docs/Concreteness.txt index 91eba7a2..26d0e9a1 100644 --- a/docs/Concreteness.txt +++ b/docs/Concreteness.txt @@ -18,9 +18,9 @@ There are two stages at which data can be identified as concrete: If we detect in the compiler pass that a value is a compile-time constant (case 1 above), we do not emit code for symbolic handling at all. However, for any -other type of data we need to generate code that handles the case of it being +other type of data, we need to generate code that handles the case of it being symbolic at run time. Concretely (no pun intended), we mark concrete values at -run time by setting its corresponding symbolic expression in shadow memory to +run time by setting their corresponding symbolic expression in shadow memory to null. This makes it very cheap to check concreteness during execution: just run a null check on the symbolic expression. diff --git a/docs/Configuration.txt b/docs/Configuration.txt index 123aec34..e1239151 100644 --- a/docs/Configuration.txt +++ b/docs/Configuration.txt @@ -9,7 +9,7 @@ SymCC is configured at two different stages: compilation time and the set of dependencies. This is done via arguments to CMake. -2. When you run programs that have been compiled with SymCC, environment +2. When you run programs that have been compiled with SymCC, the environment variables control various aspects of the execution and analysis. We list all available options for each stage in turn. @@ -56,11 +56,19 @@ environment variables. uninstrumented counterparts. - SYMCC_OUTPUT_DIR (default "/tmp/output"): This is the directory where SymCC - will store new inputs (QSYM backend only). + will store new inputs (QSYM backend only). If you prefer to handle them + programmatically, make your program call symcc_set_test_case_handler; the + handler will be called instead of the default handler each time the backend + generates a new input. - SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from standard input as symbolic; when set to a file name, any data read from that - file is considered symbolic. + file is considered symbolic. Ignored if SYMCC_NO_SYMBOLIC_INPUT is set to 1. + +- SYMCC_MEMORY_INPUT=0/1 (default 0): When set to 1, expect the program under + test to communicate symbolic inputs with one or more calls to + symcc_make_symbolic. Can't be combined with SYMCC_INPUT_FILE. Ignored if + SYMCC_NO_SYMBOLIC_INPUT is set to 1. - SYMCC_LOG_FILE (default empty): When set to a file name, SymCC creates the file (or overwrites any existing file!) and uses it to log backend activity @@ -71,12 +79,15 @@ environment variables. repeatedly (QSYM backend only). See the QSYM paper for details; highly recommended for fuzzing and enabled automatically by the fuzzing helper. -- SYMCC_AFL_COVERAGE_MAP (default empty): When set to the file name of an AFL - coverage map, load the map before executing the target program and use it to - skip solver queries for paths that have already been covered (QSYM backend - only). The map is updated in place, so beware of races when running multiple - instances of SymCC! The fuzzing helper uses this to remember the state of - exploration across multiple executions of the target program. +- SYMCC_AFL_COVERAGE_MAP (default empty): When set to the file name of an + AFL-style coverage map, load the map before executing the target program and + use it to skip solver queries for paths that have already been covered (QSYM + backend only). The map is updated in place, so beware of races when running + multiple instances of SymCC! The fuzzing helper uses this to remember the + state of exploration across multiple executions of the target program. + Warning: This setting has a misleading name - while the format of the map + follows (classic) AFL, the variable isn't meant to point at a map file that + AFL uses too! (Most people should stop reading here.) @@ -88,7 +99,7 @@ There is actually a third category of options: when compiling with SymCC, you can specify the location of its various components via environment variables. This is not necessary in most cases because the build system makes sure that all components know about each other; however, in some advanced setups you may need -to move files around after building them, and in that case you can use the +to move files around after building them, and in that case, you can use the variables documented below to communicate the new locations: - SYMCC_RUNTIME_DIR and SYMCC_RUNTIME32_DIR: The directory that contains the diff --git a/docs/Experiments.txt b/docs/Experiments.txt index 81751a4a..001a06fc 100644 --- a/docs/Experiments.txt +++ b/docs/Experiments.txt @@ -8,6 +8,8 @@ Here we document how to reproduce the experiments that we show in the paper are available on our website [1], which also provides our raw results. Feel free to reach out to us if you encounter problems with reproducing the benchmarks. +The datasets are also archived on figshare [10]. + In the paper, we describe two sets of experiments: we first benchmark SymCC on the CGC programs, then we run it on real-world software. @@ -37,7 +39,7 @@ regular 32-bit binaries built by cb-multios. The analysis of real-world software always follows the same procedure. Assuming you have exported CC=symcc, CXX=sym++ and SYMCC_NO_SYMBOLIC_INPUT=1, first -download the code, then build it using its own build system, finally unset +download the code, then build it using its own build system, and finally unset SYMCC_NO_SYMBOLIC_INPUT and analyze the program in concert with AFL (which requires building a second time for AFL, see docs/Fuzzing.txt). We used AFL 2.56b and built the targets with AFL_USE_ASAN=1. Note that the fuzzing helper is @@ -50,7 +52,7 @@ OpenJPEG [4]: we used revision 1f1e9682, built with CMake as described in the files file1.jp2 and file8.jp2 [5]. libarchive [6]: we used revision 9ebb2484, built with CMake as described in the - poject's INSTALL (but adding "-DCMAKE_BUILD_TYPE=Release"), and analyzed + project's INSTALL (but adding "-DCMAKE_BUILD_TYPE=Release"), and analyzed "bin/bsdtar tf @@"; the corpus consisted of just a single dummy file containing the character "A". @@ -78,3 +80,4 @@ running it with AFL according to the QSYM authors' instructions [9]. [7] https://github.com/the-tcpdump-group/tcpdump.git [8] https://github.com/the-tcpdump-group/libpcap.git [9] https://github.com/sslab-gatech/qsym#run-hybrid-fuzzing-with-afl +[10] https://doi.org/10.6084/m9.figshare.24270709.v1 or https://figshare.com/articles/dataset/SymCC_evaluation_data/24270709 diff --git a/docs/Ideas.txt b/docs/Ideas.txt index a2720617..219236ae 100644 --- a/docs/Ideas.txt +++ b/docs/Ideas.txt @@ -35,7 +35,7 @@ because intermediate values are rarely computed without being used: typically, they end up being inputs to future computations, so we couldn't free the corresponding expressions anyway. A notable exception is the computation of values only for output - the expressions for such values could be freed after -the value is output, which would reduce memory consumption especially with +the value is output, which would reduce memory consumption, especially with output-heavy target programs. @@ -63,6 +63,6 @@ improvements they have made over AFL (e.g., AFL++ or Honggfuzz). Forking version Instead of working with a fuzzer, we could also implement forking and some -scheduling strategy ourselves. Georgia Tech has developed some OS-level +scheduling strategies ourselves. Georgia Tech has developed some OS-level primitives that could help to implement such a feature: https://github.com/sslab-gatech/perf-fuzz. diff --git a/docs/Libc.txt b/docs/Libc.txt index 7057ec0e..08ad920e 100644 --- a/docs/Libc.txt +++ b/docs/Libc.txt @@ -11,7 +11,7 @@ as the sanitizers and intercept calls to libc functions, wrapping them with symbolic handling. For example, the wrapper for "memset" obtains the symbolic expression for the value to be written in memory and pushes it to the shadow region of the destination memory. In the future, we may experiment with -compiling (parts of) libc to avoid the effort of manually defining wrappers. +compiling (parts of) the libc to avoid the effort of manually defining wrappers. Initially, we tried the interception mechanism that the LLVM sanitizers use, implemented in the compiler-rt library. The Linux version basically just defines @@ -25,11 +25,11 @@ replaces a given libc function, in the executable and in all libraries that it loads. However, our run-time support library is loaded into the same process and makes heavy use of libc, so we need the ability to use wrappers in one part of the program and concrete functions in another. This turned out to complicate the -compiler-rt based implementation so much that we eventually abandoned the +compiler-rt-based implementation so much that we eventually abandoned the approach. Function renaming provided a convenient alternative: we control all code that is -supposed to call wrappers rather the libc functions proper, so we just rename +supposed to call wrappers rather than the libc functions properly, so we just rename the targets of their calls. For example, a call to "memset" in the program under test is turned into a call to "memset_symbolized", which we can easily define as a regular function wrapping "memset". Calls from our run-time library, on the diff --git a/docs/Optimization.txt b/docs/Optimization.txt index 6b4f2c83..97efdbc2 100644 --- a/docs/Optimization.txt +++ b/docs/Optimization.txt @@ -7,7 +7,7 @@ A popular technique for experimenting with compiler passes is to produce bitcode with "clang -emit-llvm" and run the pass on the resulting bitcode with opt. Note that this approach does not mix well with optimization: simply running "opt -O3" on the instrumented bitcode yields inferior results. Why? In principle, the -instrumentation that adds symbolic-execution capabilities does not interfere +instrumentation that adds symbolic execution capabilities does not interfere with the compiler's regular optimization. However, while "opt -O3" runs the same middle-end optimizations as clang does internally, "clang -O3" performs additional analysis before invoking the middle end. In particular, type-based diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 637fc1ce..07277b8f 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with the SymCC runtime. If not, see . cmake_minimum_required(VERSION 3.5) project(SymRuntime) diff --git a/runtime/Config.cpp b/runtime/Config.cpp index c7d45ee0..23a28df6 100644 --- a/runtime/Config.cpp +++ b/runtime/Config.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #include "Config.h" @@ -19,6 +20,7 @@ #include #include #include +#include namespace { @@ -41,17 +43,26 @@ bool checkFlagString(std::string value) { Config g_config; void loadConfig() { - auto *fullyConcrete = getenv("SYMCC_NO_SYMBOLIC_INPUT"); - if (fullyConcrete != nullptr) - g_config.fullyConcrete = checkFlagString(fullyConcrete); - auto *outputDir = getenv("SYMCC_OUTPUT_DIR"); if (outputDir != nullptr) g_config.outputDir = outputDir; auto *inputFile = getenv("SYMCC_INPUT_FILE"); if (inputFile != nullptr) - g_config.inputFile = inputFile; + g_config.input = FileInput{inputFile}; + + auto *memoryInput = getenv("SYMCC_MEMORY_INPUT"); + if (memoryInput != nullptr && checkFlagString(memoryInput)) { + if (std::holds_alternative(g_config.input)) + throw std::runtime_error{ + "Can't enable file and memory input at the same time"}; + + g_config.input = MemoryInput{}; + } + + auto *fullyConcrete = getenv("SYMCC_NO_SYMBOLIC_INPUT"); + if (fullyConcrete != nullptr && checkFlagString(fullyConcrete)) + g_config.input = NoInput{}; auto *logFile = getenv("SYMCC_LOG_FILE"); if (logFile != nullptr) @@ -76,7 +87,8 @@ void loadConfig() { throw std::runtime_error(msg.str()); } catch (std::out_of_range &) { std::stringstream msg; - msg << "The GC threshold must be between 0 and " << std::numeric_limits::max(); + msg << "The GC threshold must be between 0 and " + << std::numeric_limits::max(); throw std::runtime_error(msg.str()); } } diff --git a/runtime/Config.h b/runtime/Config.h index 450344eb..a866821c 100644 --- a/runtime/Config.h +++ b/runtime/Config.h @@ -1,32 +1,48 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef CONFIG_H #define CONFIG_H #include +#include + +/// Marker struct for fully concrete execution. +struct NoInput {}; + +/// Marker struct for symbolic input from stdin. +struct StdinInput {}; + +/// Marker struct for symbolic input via _sym_make_symbolic. +struct MemoryInput {}; + +/// Configuration for symbolic input from a file. +struct FileInput { + /// The name of input file. + std::string fileName; +}; struct Config { - /// Should we allow symbolic data in the program? - bool fullyConcrete = false; + using InputConfig = std::variant; + + /// The configuration for our symbolic input. + InputConfig input = StdinInput{}; /// The directory for storing new outputs. std::string outputDir = "/tmp/output"; - /// The input file, if any. - std::string inputFile; - /// The file to log constraint solving information to. std::string logFile = ""; diff --git a/runtime/GarbageCollection.cpp b/runtime/GarbageCollection.cpp index 8c1edc37..8afdd327 100644 --- a/runtime/GarbageCollection.cpp +++ b/runtime/GarbageCollection.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include "GarbageCollection.h" diff --git a/runtime/GarbageCollection.h b/runtime/GarbageCollection.h index da77ff83..81b0b8c2 100644 --- a/runtime/GarbageCollection.h +++ b/runtime/GarbageCollection.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef GARBAGECOLLECTION_H #define GARBAGECOLLECTION_H diff --git a/runtime/LibcWrappers.cpp b/runtime/LibcWrappers.cpp index 319ae9f5..cfe55525 100644 --- a/runtime/LibcWrappers.cpp +++ b/runtime/LibcWrappers.cpp @@ -1,16 +1,33 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . + +// +// Libc wrappers +// +// This file contains the wrappers around libc functions which add symbolic +// computations; using the wrappers frees instrumented code from having to link +// against an instrumented libc. +// +// We define a wrapper for function X with SYM(X), which just changes the name +// "X" to something predictable and hopefully unique. It is then up to the +// compiler pass to replace calls of X with calls of SYM(X). +// +// In general, the wrappers ask the solver to generate alternative parameter +// values, then call the wrapped function, create and store symbolic expressions +// matching the libc function's semantics, and finally return the wrapped +// function's result. #include #include @@ -18,6 +35,7 @@ #include #include #include +#include #include #include @@ -56,13 +74,28 @@ template void tryAlternative(E *value, SymExpr valueExpr, F caller) { tryAlternative(reinterpret_cast(value), valueExpr, caller); } -} // namespace -void initLibcWrappers() { - if (g_config.fullyConcrete) +void maybeSetInputFile(const char *path, int fd) { + auto *fileInput = std::get_if(&g_config.input); + if (fileInput == nullptr) + return; + + if (strstr(path, fileInput->fileName.c_str()) == nullptr) return; - if (g_config.inputFile.empty()) { + if (inputFileDescriptor != -1) + std::cerr << "Warning: input file opened multiple times; this is not yet " + "supported" + << std::endl; + + inputFileDescriptor = fd; + inputOffset = 0; +} + +} // namespace + +void initLibcWrappers() { + if (std::holds_alternative(g_config.input)) { // Symbolic data comes from standard input. inputFileDescriptor = 0; } @@ -95,10 +128,31 @@ void *SYM(calloc)(size_t nmemb, size_t size) { void *SYM(mmap64)(void *addr, size_t len, int prot, int flags, int fildes, uint64_t off) { auto *result = mmap64(addr, len, prot, flags, fildes, off); + _sym_set_return_expression(nullptr); + + if (result == MAP_FAILED) // mmap failed + return result; + + if (fildes == inputFileDescriptor) { + /* we update the inputOffset only when mmap() is reading from input file + * HACK! update inputOffset with off parameter sometimes will be dangerous + * We don't know whether there is read() before/after mmap, + * if there is, we have to fix this tricky method :P + */ + inputOffset = off + len; + // Reading symbolic input. + ReadWriteShadow shadow(result, len); + uint8_t *resultBytes = (uint8_t *)result; + std::generate(shadow.begin(), shadow.end(), [resultBytes, i = 0]() mutable { + return _sym_get_input_byte(inputOffset, resultBytes[i++]); + }); + } else if (!isConcrete(result, len)) { + ReadWriteShadow shadow(result, len); + std::fill(shadow.begin(), shadow.end(), nullptr); + } tryAlternative(len, _sym_get_parameter_expression(1), SYM(mmap64)); - _sym_set_return_expression(nullptr); return result; } @@ -111,15 +165,8 @@ int SYM(open)(const char *path, int oflag, mode_t mode) { auto result = open(path, oflag, mode); _sym_set_return_expression(nullptr); - if (result >= 0 && !g_config.fullyConcrete && !g_config.inputFile.empty() && - strstr(path, g_config.inputFile.c_str()) != nullptr) { - if (inputFileDescriptor != -1) - std::cerr << "Warning: input file opened multiple times; this is not yet " - "supported" - << std::endl; - inputFileDescriptor = result; - inputOffset = 0; - } + if (result >= 0) + maybeSetInputFile(path, result); return result; } @@ -136,9 +183,8 @@ ssize_t SYM(read)(int fildes, void *buf, size_t nbyte) { if (fildes == inputFileDescriptor) { // Reading symbolic input. - ReadWriteShadow shadow(buf, result); - std::generate(shadow.begin(), shadow.end(), - []() { return _sym_get_input_byte(inputOffset++); }); + _sym_make_symbolic(buf, result, inputOffset); + inputOffset += result; } else if (!isConcrete(buf, result)) { ReadWriteShadow shadow(buf, result); std::fill(shadow.begin(), shadow.end(), nullptr); @@ -193,16 +239,8 @@ FILE *SYM(fopen)(const char *pathname, const char *mode) { auto *result = fopen(pathname, mode); _sym_set_return_expression(nullptr); - if (result != nullptr && !g_config.fullyConcrete && - !g_config.inputFile.empty() && - strstr(pathname, g_config.inputFile.c_str()) != nullptr) { - if (inputFileDescriptor != -1) - std::cerr << "Warning: input file opened multiple times; this is not yet " - "supported" - << std::endl; - inputFileDescriptor = fileno(result); - inputOffset = 0; - } + if (result != nullptr) + maybeSetInputFile(pathname, fileno(result)); return result; } @@ -211,16 +249,8 @@ FILE *SYM(fopen64)(const char *pathname, const char *mode) { auto *result = fopen64(pathname, mode); _sym_set_return_expression(nullptr); - if (result != nullptr && !g_config.fullyConcrete && - !g_config.inputFile.empty() && - strstr(pathname, g_config.inputFile.c_str()) != nullptr) { - if (inputFileDescriptor != -1) - std::cerr << "Warning: input file opened multiple times; this is not yet " - "supported" - << std::endl; - inputFileDescriptor = fileno(result); - inputOffset = 0; - } + if (result != nullptr) + maybeSetInputFile(pathname, fileno(result)); return result; } @@ -235,9 +265,8 @@ size_t SYM(fread)(void *ptr, size_t size, size_t nmemb, FILE *stream) { if (fileno(stream) == inputFileDescriptor) { // Reading symbolic input. - ReadWriteShadow shadow(ptr, result * size); - std::generate(shadow.begin(), shadow.end(), - []() { return _sym_get_input_byte(inputOffset++); }); + _sym_make_symbolic(ptr, result * size, inputOffset); + inputOffset += result * size; } else if (!isConcrete(ptr, result * size)) { ReadWriteShadow shadow(ptr, result * size); std::fill(shadow.begin(), shadow.end(), nullptr); @@ -255,9 +284,9 @@ char *SYM(fgets)(char *str, int n, FILE *stream) { if (fileno(stream) == inputFileDescriptor) { // Reading symbolic input. - ReadWriteShadow shadow(str, sizeof(char) * strlen(str)); - std::generate(shadow.begin(), shadow.end(), - []() { return _sym_get_input_byte(inputOffset++); }); + const auto length = sizeof(char) * strlen(str); + _sym_make_symbolic(str, length, inputOffset); + inputOffset += length; } else if (!isConcrete(str, sizeof(char) * strlen(str))) { ReadWriteShadow shadow(str, sizeof(char) * strlen(str)); std::fill(shadow.begin(), shadow.end(), nullptr); @@ -338,7 +367,7 @@ int SYM(getc)(FILE *stream) { if (fileno(stream) == inputFileDescriptor) _sym_set_return_expression(_sym_build_zext( - _sym_get_input_byte(inputOffset++), sizeof(int) * 8 - 8)); + _sym_get_input_byte(inputOffset++, result), sizeof(int) * 8 - 8)); else _sym_set_return_expression(nullptr); @@ -354,16 +383,14 @@ int SYM(fgetc)(FILE *stream) { if (fileno(stream) == inputFileDescriptor) _sym_set_return_expression(_sym_build_zext( - _sym_get_input_byte(inputOffset++), sizeof(int) * 8 - 8)); + _sym_get_input_byte(inputOffset++, result), sizeof(int) * 8 - 8)); else _sym_set_return_expression(nullptr); return result; } -int SYM(getchar)(void) { - return SYM(getc)(stdin); -} +int SYM(getchar)(void) { return SYM(getc)(stdin); } int SYM(ungetc)(int c, FILE *stream) { auto result = ungetc(c, stream); @@ -399,6 +426,20 @@ void *SYM(memset)(void *s, int c, size_t n) { return result; } +void SYM(bzero)(void *s, size_t n) { + bzero(s, n); + + // No return value, hence no corresponding expression. + _sym_set_return_expression(nullptr); + + tryAlternative(s, _sym_get_parameter_expression(0), SYM(bzero)); + tryAlternative(n, _sym_get_parameter_expression(1), SYM(bzero)); + + // Concretize the memory region, which now is all zeros. + ReadWriteShadow shadow(s, n); + std::fill(shadow.begin(), shadow.end(), nullptr); +} + void *SYM(memmove)(void *dest, const void *src, size_t n) { tryAlternative(dest, _sym_get_parameter_expression(0), SYM(memmove)); tryAlternative(src, _sym_get_parameter_expression(1), SYM(memmove)); @@ -412,6 +453,22 @@ void *SYM(memmove)(void *dest, const void *src, size_t n) { return result; } +void SYM(bcopy)(const void *src, void *dest, size_t n) { + tryAlternative(src, _sym_get_parameter_expression(0), SYM(bcopy)); + tryAlternative(dest, _sym_get_parameter_expression(1), SYM(bcopy)); + tryAlternative(n, _sym_get_parameter_expression(2), SYM(bcopy)); + + bcopy(src, dest, n); + + // bcopy is mostly equivalent to memmove, so we can use our symbolic version + // of memmove to copy any symbolic expressions over to the destination. + _sym_memmove(static_cast(dest), static_cast(src), + n); + + // void function, so there is no return value and hence no expression for it. + _sym_set_return_expression(nullptr); +} + char *SYM(strncpy)(char *dest, const char *src, size_t n) { tryAlternative(dest, _sym_get_parameter_expression(0), SYM(strncpy)); tryAlternative(src, _sym_get_parameter_expression(1), SYM(strncpy)); @@ -495,6 +552,43 @@ int SYM(memcmp)(const void *a, const void *b, size_t n) { return result; } +int SYM(bcmp)(const void *a, const void *b, size_t n) { + tryAlternative(a, _sym_get_parameter_expression(0), SYM(bcmp)); + tryAlternative(b, _sym_get_parameter_expression(1), SYM(bcmp)); + tryAlternative(n, _sym_get_parameter_expression(2), SYM(bcmp)); + + auto result = bcmp(a, b, n); + + // bcmp returns zero if the input regions are equal and an unspecified + // non-zero value otherwise. Instead of expressing this symbolically, we + // directly ask the solver for an alternative solution (assuming that the + // result is used for a conditional branch later), and return a concrete + // value. + _sym_set_return_expression(nullptr); + + // The result of the comparison depends on whether the input regions are equal + // byte by byte. Construct the corresponding expression, but only if there is + // at least one symbolic byte in either of the regions; otherwise, the result + // is concrete. + + if (isConcrete(a, n) && isConcrete(b, n)) + return result; + + auto aShadowIt = ReadOnlyShadow(a, n).begin_non_null(); + auto bShadowIt = ReadOnlyShadow(b, n).begin_non_null(); + auto *allEqual = _sym_build_equal(*aShadowIt, *bShadowIt); + for (size_t i = 1; i < n; i++) { + ++aShadowIt; + ++bShadowIt; + allEqual = + _sym_build_bool_and(allEqual, _sym_build_equal(*aShadowIt, *bShadowIt)); + } + + _sym_push_path_constraint(allEqual, result == 0, + reinterpret_cast(SYM(bcmp))); + return result; +} + uint32_t SYM(ntohl)(uint32_t netlong) { auto netlongExpr = _sym_get_parameter_expression(0); auto result = ntohl(netlong); diff --git a/runtime/LibcWrappers.h b/runtime/LibcWrappers.h index d84c1f47..2304a3a0 100644 --- a/runtime/LibcWrappers.h +++ b/runtime/LibcWrappers.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef LIBCWRAPPERS_H #define LIBCWRAPPERS_H diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 32081e3e..127c81de 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -1,23 +1,29 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include +#include #include #include +#include #include +#include +#include +#include "Config.h" #include "GarbageCollection.h" #include "RuntimeCommon.h" #include "Shadow.h" @@ -31,6 +37,51 @@ SymExpr g_return_value; std::array g_function_arguments; // TODO make thread-local +SymExpr buildMinSignedInt(uint8_t bits) { + return _sym_build_integer((uint64_t)(1) << (bits - 1), bits); +} + +SymExpr buildMaxSignedInt(uint8_t bits) { + uint64_t mask = ((uint64_t)(1) << bits) - 1; + return _sym_build_integer(((uint64_t)(~0) & mask) >> 1, bits); +} + +SymExpr buildMaxUnsignedInt(uint8_t bits) { + uint64_t mask = ((uint64_t)(1) << bits) - 1; + return _sym_build_integer((uint64_t)(~0) & mask, bits); +} + +/// Construct an expression describing the in-memory representation of the +/// bitcode structure {iN, i1} returned by the intrinsics for arithmetic with +/// overflow (see +/// https://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics). The +/// overflow parameter is expected to be a symbolic Boolean. +SymExpr buildOverflowResult(SymExpr result_expr, SymExpr overflow, + bool little_endian) { + auto result_bits = _sym_bits_helper(result_expr); + assert(result_bits % 8 == 0 && + "Arithmetic with overflow on integers of invalid length"); + + // When storing {iN, i1} in memory, the compiler would insert padding between + // the two elements, extending the Boolean to the same size as the integer. We + // simulate the same here, taking endianness into account. + + auto result_expr_mem = + little_endian ? _sym_build_bswap(result_expr) : result_expr; + auto overflow_byte = _sym_build_zext(_sym_build_bool_to_bit(overflow), 7); + + // There's no padding if the result is a single byte. + if (result_bits == 8) { + return _sym_concat_helper(result_expr_mem, overflow_byte); + } + + auto padding = _sym_build_zero_bytes(result_bits / 8 - 1); + return _sym_concat_helper(result_expr_mem, + little_endian + ? _sym_concat_helper(overflow_byte, padding) + : _sym_concat_helper(padding, overflow_byte)); +} + } // namespace void _sym_set_return_expression(SymExpr expr) { g_return_value = expr; } @@ -68,6 +119,10 @@ void _sym_memset(uint8_t *memory, SymExpr value, size_t length) { } void _sym_memmove(uint8_t *dest, const uint8_t *src, size_t length) { + // Unless both the source and the destination are fully concrete memory + // regions, we need to copy the symbolic expressions over. (In the case where + // only the destination is symbolic, this means making it concrete.) + if (isConcrete(src, length) && isConcrete(dest, length)) return; @@ -171,7 +226,9 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, SymExpr beforeInsert = (offset == 0) ? nullptr : _sym_build_extract(target, 0, offset, false); - SymExpr newPiece = little_endian ? _sym_build_bswap(to_insert) : to_insert; + SymExpr newPiece = (little_endian && bitsToInsert > 8) + ? _sym_build_bswap(to_insert) + : to_insert; uint64_t afterLen = (_sym_bits_helper(target) / 8) - offset - (bitsToInsert / 8); SymExpr afterInsert = @@ -193,6 +250,239 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, return result; } +SymExpr _sym_build_zero_bytes(size_t length) { + auto zero_byte = _sym_build_integer(0, 8); + + auto result = zero_byte; + for (size_t i = 1; i < length; i++) { + result = _sym_concat_helper(result, zero_byte); + } + + return result; +} + +SymExpr _sym_build_sadd_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr min = buildMinSignedInt(bits); + SymExpr max = buildMaxSignedInt(bits); + SymExpr add_sext = + _sym_build_add(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + + return _sym_build_ite( + // If the result is less than the min signed integer... + _sym_build_signed_less_equal(add_sext, _sym_build_sext(min, 1)), + // ... Return the min signed integer + min, + _sym_build_ite( + // Otherwise, if the result is greater than the max signed integer... + _sym_build_signed_greater_equal(add_sext, _sym_build_sext(max, 1)), + // ... Return the max signed integer + max, + // Otherwise, return the addition + _sym_build_add(a, b))); +} + +SymExpr _sym_build_uadd_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr max = buildMaxUnsignedInt(bits); + SymExpr add_zext = + _sym_build_add(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + + return _sym_build_ite( + // If the top bit is set, an overflow has occurred and... + _sym_build_bit_to_bool(_sym_extract_helper(add_zext, bits, bits)), + // ... Return the max unsigned integer + max, + // Otherwise, return the addition + _sym_build_add(a, b)); +} + +SymExpr _sym_build_ssub_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr min = buildMinSignedInt(bits); + SymExpr max = buildMaxSignedInt(bits); + + SymExpr sub_sext = + _sym_build_sub(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + + return _sym_build_ite( + // If the result is less than the min signed integer... + _sym_build_signed_less_equal(sub_sext, _sym_build_sext(min, 1)), + // ... Return the min signed integer + min, + _sym_build_ite( + // Otherwise, if the result is greater than the max signed integer... + _sym_build_signed_greater_equal(sub_sext, _sym_build_sext(max, 1)), + // ... Return the max signed integer + max, + // Otherwise, return the subtraction + _sym_build_sub(a, b))); +} + +SymExpr _sym_build_usub_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If `a >= b`, then no overflow occurs and... + _sym_build_unsigned_greater_equal(a, b), + // ... Return the subtraction + _sym_build_sub(a, b), + // Otherwise, saturate at zero + _sym_build_integer(0, bits)); +} + +static SymExpr _sym_build_shift_left_overflow(SymExpr a, SymExpr b) { + return _sym_build_not_equal( + _sym_build_arithmetic_shift_right(_sym_build_shift_left(a, b), b), a); +} + +SymExpr _sym_build_sshl_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If an overflow occurred... + _sym_build_shift_left_overflow(a, b), + _sym_build_ite( + // ... And the LHS is negative... + _sym_build_bit_to_bool(_sym_extract_helper(a, bits - 1, bits - 1)), + // ... Return the min signed integer... + buildMinSignedInt(bits), + // ... Otherwise, return the max signed integer + buildMaxSignedInt(bits)), + // Otherwise, return the left shift + _sym_build_shift_left(a, b)); +} + +SymExpr _sym_build_ushl_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If an overflow occurred... + _sym_build_shift_left_overflow(a, b), + // ... Return the max unsigned integer + buildMaxUnsignedInt(bits), + // Otherwise, return the left shift + _sym_build_shift_left(a, b)); +} + +SymExpr _sym_build_add_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the additions are different + SymExpr add_sext = + _sym_build_add(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + return _sym_build_not_equal(add_sext, + _sym_build_sext(_sym_build_add(a, b), 1)); + } else { + // Check if the addition overflowed into the extra bit + SymExpr add_zext = + _sym_build_add(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + return _sym_build_equal(_sym_extract_helper(add_zext, bits, bits), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_add(a, b), overflow, little_endian); +} + +SymExpr _sym_build_sub_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the subtractions are different + SymExpr sub_sext = + _sym_build_sub(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + return _sym_build_not_equal(sub_sext, + _sym_build_sext(_sym_build_sub(a, b), 1)); + } else { + // Check if the subtraction overflowed into the extra bit + SymExpr sub_zext = + _sym_build_sub(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + return _sym_build_equal(_sym_extract_helper(sub_zext, bits, bits), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_sub(a, b), overflow, little_endian); +} + +SymExpr _sym_build_mul_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the multiplications are different + SymExpr mul_sext = + _sym_build_mul(_sym_build_sext(a, bits), _sym_build_sext(b, bits)); + return _sym_build_not_equal(mul_sext, + _sym_build_sext(_sym_build_mul(a, b), bits)); + } else { + // Check if the multiplication overflowed into the extra bit + SymExpr mul_zext = + _sym_build_mul(_sym_build_zext(a, bits), _sym_build_zext(b, bits)); + return _sym_build_equal( + _sym_extract_helper(mul_zext, 2 * bits - 1, 2 * bits - 1), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_mul(a, b), overflow, little_endian); +} + +SymExpr _sym_build_funnel_shift_left(SymExpr a, SymExpr b, SymExpr c) { + size_t bits = _sym_bits_helper(c); + SymExpr concat = _sym_concat_helper(a, b); + SymExpr shift = _sym_build_unsigned_rem(c, _sym_build_integer(bits, bits)); + + return _sym_extract_helper(_sym_build_shift_left(concat, shift), 0, bits); +} + +SymExpr _sym_build_funnel_shift_right(SymExpr a, SymExpr b, SymExpr c) { + size_t bits = _sym_bits_helper(c); + SymExpr concat = _sym_concat_helper(a, b); + SymExpr shift = _sym_build_unsigned_rem(c, _sym_build_integer(bits, bits)); + + return _sym_extract_helper(_sym_build_logical_shift_right(concat, shift), 0, + bits); +} + +SymExpr _sym_build_abs(SymExpr expr) { + size_t bits = _sym_bits_helper(expr); + return _sym_build_ite( + _sym_build_signed_greater_equal(expr, _sym_build_integer(0, bits)), expr, + _sym_build_sub(_sym_build_integer(0, bits), expr)); +} + void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } + +void _sym_make_symbolic(const void *data, size_t byte_length, + size_t input_offset) { + ReadWriteShadow shadow(data, byte_length); + const uint8_t *data_bytes = reinterpret_cast(data); + std::generate(shadow.begin(), shadow.end(), [&, i = 0]() mutable { + return _sym_get_input_byte(input_offset++, data_bytes[i++]); + }); +} + +void symcc_make_symbolic(const void *start, size_t byte_length) { + if (!std::holds_alternative(g_config.input)) + throw std::runtime_error{"Calls to symcc_make_symbolic aren't allowed when " + "SYMCC_MEMORY_INPUT isn't set"}; + + static size_t inputOffset = 0; // track the offset across calls + _sym_make_symbolic(start, byte_length, inputOffset); + inputOffset += byte_length; +} + +SymExpr _sym_build_bit_to_bool(SymExpr expr) { + if (expr == nullptr) + return nullptr; + + return _sym_build_not_equal(expr, + _sym_build_integer(0, _sym_bits_helper(expr))); +} diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index f00176ea..4c05ceb0 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -3,32 +3,41 @@ // This header defines the interface of the run-time library. It is not actually // used anywhere because the compiler pass inserts calls to the library // functions at the level of LLVM bitcode, but it serves as documentation of the -// intended interface. +// intended interface. Unless documented otherwise, functions taking symbolic +// expressions can't handle null values (i.e., they shouldn't be called for +// concrete values); exceptions are made if it's too difficult to check for +// concreteness in bitcode. // // Whoever uses this file has to define the type "SymExpr" first; we use it to // keep this header independent of the back-end implementation. -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef RUNTIMECOMMON_H #define RUNTIMECOMMON_H +/* Marker for expression parameters which may be null. */ +#define nullable + #ifdef __cplusplus +#include #include extern "C" { #else +#include #include #endif @@ -42,6 +51,7 @@ void _sym_initialize(void); */ SymExpr _sym_build_integer(uint64_t value, uint8_t bits); SymExpr _sym_build_integer128(uint64_t high, uint64_t low); +SymExpr _sym_build_integer_from_buffer(void *buffer, unsigned num_bits); SymExpr _sym_build_float(double value, int is_double); SymExpr _sym_build_null_pointer(void); SymExpr _sym_build_true(void); @@ -49,7 +59,7 @@ SymExpr _sym_build_false(void); SymExpr _sym_build_bool(bool value); /* - * Arithmetic and shifts + * Integer arithmetic and shifts */ SymExpr _sym_build_neg(SymExpr expr); SymExpr _sym_build_add(SymExpr a, SymExpr b); @@ -62,13 +72,40 @@ SymExpr _sym_build_signed_rem(SymExpr a, SymExpr b); SymExpr _sym_build_shift_left(SymExpr a, SymExpr b); SymExpr _sym_build_logical_shift_right(SymExpr a, SymExpr b); SymExpr _sym_build_arithmetic_shift_right(SymExpr a, SymExpr b); +SymExpr _sym_build_funnel_shift_left(SymExpr a, SymExpr b, SymExpr c); +SymExpr _sym_build_funnel_shift_right(SymExpr a, SymExpr b, SymExpr c); +SymExpr _sym_build_abs(SymExpr expr); + +/* + * Arithmetic with overflow + */ +SymExpr _sym_build_add_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); +SymExpr _sym_build_sub_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); +SymExpr _sym_build_mul_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); + +/* + * Saturating integer arithmetic and shifts + */ +SymExpr _sym_build_sadd_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_uadd_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_ssub_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_usub_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_sshl_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_ushl_sat(SymExpr a, SymExpr b); +/* + * Floating-point arithmetic and shifts + */ SymExpr _sym_build_fp_add(SymExpr a, SymExpr b); SymExpr _sym_build_fp_sub(SymExpr a, SymExpr b); SymExpr _sym_build_fp_mul(SymExpr a, SymExpr b); SymExpr _sym_build_fp_div(SymExpr a, SymExpr b); SymExpr _sym_build_fp_rem(SymExpr a, SymExpr b); SymExpr _sym_build_fp_abs(SymExpr a); +SymExpr _sym_build_fp_neg(SymExpr a); /* * Boolean operations @@ -90,6 +127,7 @@ SymExpr _sym_build_bool_or(SymExpr a, SymExpr b); SymExpr _sym_build_or(SymExpr a, SymExpr b); SymExpr _sym_build_bool_xor(SymExpr a, SymExpr b); SymExpr _sym_build_xor(SymExpr a, SymExpr b); +SymExpr _sym_build_ite(SymExpr cond, SymExpr a, SymExpr b); SymExpr _sym_build_float_ordered_greater_than(SymExpr a, SymExpr b); SymExpr _sym_build_float_ordered_greater_equal(SymExpr a, SymExpr b); @@ -109,9 +147,9 @@ SymExpr _sym_build_float_unordered_not_equal(SymExpr a, SymExpr b); /* * Casts */ -SymExpr _sym_build_sext(SymExpr expr, uint8_t bits); -SymExpr _sym_build_zext(SymExpr expr, uint8_t bits); -SymExpr _sym_build_trunc(SymExpr expr, uint8_t bits); +SymExpr _sym_build_sext(nullable SymExpr expr, uint8_t bits); +SymExpr _sym_build_zext(nullable SymExpr expr, uint8_t bits); +SymExpr _sym_build_trunc(nullable SymExpr expr, uint8_t bits); SymExpr _sym_build_bswap(SymExpr expr); SymExpr _sym_build_int_to_float(SymExpr value, int is_double, int is_signed); SymExpr _sym_build_float_to_float(SymExpr expr, int to_double); @@ -119,7 +157,8 @@ SymExpr _sym_build_bits_to_float(SymExpr expr, int to_double); SymExpr _sym_build_float_to_bits(SymExpr expr); SymExpr _sym_build_float_to_signed_integer(SymExpr expr, uint8_t bits); SymExpr _sym_build_float_to_unsigned_integer(SymExpr expr, uint8_t bits); -SymExpr _sym_build_bool_to_bits(SymExpr expr, uint8_t bits); +SymExpr _sym_build_bool_to_bit(nullable SymExpr expr); +SymExpr _sym_build_bit_to_bool(nullable SymExpr expr); /* * Bit-array helpers @@ -131,27 +170,30 @@ size_t _sym_bits_helper(SymExpr expr); /* * Function-call helpers */ -void _sym_set_parameter_expression(uint8_t index, SymExpr expr); +void _sym_set_parameter_expression(uint8_t index, nullable SymExpr expr); SymExpr _sym_get_parameter_expression(uint8_t index); -void _sym_set_return_expression(SymExpr expr); +void _sym_set_return_expression(nullable SymExpr expr); SymExpr _sym_get_return_expression(void); /* * Constraint handling */ -void _sym_push_path_constraint(SymExpr constraint, int taken, +void _sym_push_path_constraint(nullable SymExpr constraint, int taken, uintptr_t site_id); -SymExpr _sym_get_input_byte(size_t offset); +SymExpr _sym_get_input_byte(size_t offset, uint8_t concrete_value); +void _sym_make_symbolic(const void *data, size_t byte_length, + size_t input_offset); /* * Memory management */ SymExpr _sym_read_memory(uint8_t *addr, size_t length, bool little_endian); -void _sym_write_memory(uint8_t *addr, size_t length, SymExpr expr, +void _sym_write_memory(uint8_t *addr, size_t length, nullable SymExpr expr, bool little_endian); void _sym_memcpy(uint8_t *dest, const uint8_t *src, size_t length); void _sym_memset(uint8_t *memory, SymExpr value, size_t length); void _sym_memmove(uint8_t *dest, const uint8_t *src, size_t length); +SymExpr _sym_build_zero_bytes(size_t length); SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, bool little_endian); SymExpr _sym_build_extract(SymExpr expr, uint64_t offset, uint64_t length, @@ -176,8 +218,20 @@ bool _sym_feasible(SymExpr expr); void _sym_register_expression_region(SymExpr *start, size_t length); void _sym_collect_garbage(void); +/* + * User-facing functionality + * + * These are the only functions in the interface that we expect to be called by + * users (i.e., calls to it aren't auto-generated by our compiler pass). + */ +void symcc_make_symbolic(const void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const void *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); + #ifdef __cplusplus } #endif +#undef nullable + #endif diff --git a/runtime/Shadow.cpp b/runtime/Shadow.cpp index 2b69a08a..2852fad1 100644 --- a/runtime/Shadow.cpp +++ b/runtime/Shadow.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include "Shadow.h" diff --git a/runtime/Shadow.h b/runtime/Shadow.h index 967e11ca..fe630bb4 100644 --- a/runtime/Shadow.h +++ b/runtime/Shadow.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef SHADOW_H #define SHADOW_H @@ -18,7 +19,6 @@ #include #include #include -#include #include #include @@ -58,12 +58,21 @@ extern std::map g_shadow_pages; /// An iterator that walks over the shadow bytes corresponding to a memory /// region. If there is no shadow for any given memory address, it just returns /// null. -class ReadShadowIterator - : public std::iterator { +class ReadShadowIterator { public: explicit ReadShadowIterator(uintptr_t address) - : std::iterator(), - address_(address), shadow_(getShadow(address)) {} + : address_(address), shadow_(getShadow(address)) {} + + // The STL requires iterator types to expose the following type definitions + // (see std::iterator_traits). Before C++17, it was possible to get them by + // deriving from std::iterator, which is just an empty template struct with + // five typedefs. However, std::iterator was deprecated in C++17 and hence its + // use causes a warning in recent compilers. + using iterator_category = std::bidirectional_iterator_tag; + using value_type = SymExpr; + using difference_type = ptrdiff_t; + using pointer = SymExpr *; + using reference = SymExpr &; ReadShadowIterator &operator++() { auto previousAddress = address_++; diff --git a/runtime/bindings/README b/runtime/bindings/README new file mode 100644 index 00000000..d36686f4 --- /dev/null +++ b/runtime/bindings/README @@ -0,0 +1,9 @@ + + + Runtime bindings + + +This directory contains bindings to the user-facing functionality of the runtime +(see runtime/RuntimeCommon.h). The bindings give target programs written in +different languages access to runtime features like in-memory input or custom +test-case handlers. diff --git a/runtime/bindings/ada/README b/runtime/bindings/ada/README new file mode 100644 index 00000000..312576ac --- /dev/null +++ b/runtime/bindings/ada/README @@ -0,0 +1,36 @@ + + + Ada bindings + + +This directory contains Ada bindings for the SymCC runtime. To use them in your +Ada code, you can either point gprbuild here directly (e.g., by setting +GPR_PROJECT_PATH appropriately), or you can install them in the system: + +$ gprbuild -Psymcc +$ gprinstall -Psymcc + +Either way, you'll then be able to include SymCC in your project definition +(i.e., the .gpr file for your project): + + with "symcc"; + +This will let you use the bindings in your Ada code, for example: + + with SymCC; use SymCC; + + -- ... + + -- Register a procedure that receives new program inputs. + SymCC_Set_Test_Case_Handler (My_Handler); + + -- Tell SymCC where to find the input in memory. Note that the variable needs + -- to be declared with the "aliased" keyword. + SymCC_Make_Symbolic (Input'Address, Input'Size / System.Storage_Unit); + + -- Run your code on the input; SymCC will follow the computations + -- symbolically and call My_Handler whenever it produces a new test input. + My_Code_Under_Test (Input); + +See the doc comments in symcc.ads for details, or generate HTML documentation +with "gnatdoc -Psymcc". diff --git a/runtime/bindings/ada/symcc.ads b/runtime/bindings/ada/symcc.ads new file mode 100644 index 00000000..bdda6c41 --- /dev/null +++ b/runtime/bindings/ada/symcc.ads @@ -0,0 +1,45 @@ +-- This file is part of the SymCC runtime. + +-- The SymCC runtime is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Lesser General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or (at your +-- option) any later version. + +-- The SymCC runtime is distributed in the hope that it will be useful, but +-- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +-- License for more details. + +with Interfaces.C; +with System; + +-- @summary +-- Ada bindings to the SymCC runtime API. +-- +-- @description +-- This package provides thin bindings to the user-facing functionality of the +-- SymCC runtime (see RuntimeCommon.h). +package SymCC is + + procedure Make_Symbolic + (Address : System.Address; Size : Interfaces.C.size_t) with + Import => True, Convention => C, External_Name => "symcc_make_symbolic"; + -- Mark a memory region as symbolic program input. + -- @param Address The start of the region containing the input data. + -- @param Size The length in bytes of the region. + + type Test_Case_Handler_Callback_Type is + access procedure + (Data_Block : System.Address; Size : Interfaces.C.size_t) with + Convention => C; + -- Type of functions that the runtime can call when it generates new + -- program inputs (see Set_Test_Case_Handler). + + procedure Set_Test_Case_Handler + (Callback : Test_Case_Handler_Callback_Type) with + Import => True, Convention => C, + External_Name => "symcc_set_test_case_handler"; + -- Define a custom handler for new program inputs. + -- @param Callback The procedure to be called for each new input. + +end SymCC; diff --git a/runtime/bindings/ada/symcc.gpr b/runtime/bindings/ada/symcc.gpr new file mode 100644 index 00000000..31305fc9 --- /dev/null +++ b/runtime/bindings/ada/symcc.gpr @@ -0,0 +1,21 @@ +-- This file is part of the SymCC runtime. + +-- The SymCC runtime is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Lesser General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or (at your +-- option) any later version. + +-- The SymCC runtime is distributed in the hope that it will be useful, but +-- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +-- License for more details. + +library project SymCC is + + for Library_Name use "symcc"; + for Library_Dir use "lib"; + + for Languages use ("Ada"); + for Object_Dir use "obj"; + +end SymCC; diff --git a/runtime/qsym_backend/CMakeLists.txt b/runtime/qsym_backend/CMakeLists.txt index feaea43c..34c01320 100644 --- a/runtime/qsym_backend/CMakeLists.txt +++ b/runtime/qsym_backend/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with SymCC. If not, see . # Build the parts of the Qsym backend that are relevant for us @@ -91,6 +92,5 @@ target_link_libraries(SymRuntime ${Z3_LIBRARIES} ${QSYM_LLVM_DEPS}) # some current LTS distributions ship a GCC that requires libstdc++fs for # std::filesystem - we catch this case in order to enable users of such systems # to build with the default compiler. -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - target_link_libraries(SymRuntime stdc++fs) -endif() +find_package(Filesystem COMPONENTS Final Experimental) +target_link_libraries(SymRuntime std::filesystem) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index d305821d..174e3101 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -1,19 +1,20 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . // -// Definitions that we need for the Qsym backend +// Definitions that we need for the QSYM backend // #include "Runtime.h" @@ -34,6 +35,7 @@ #include #include #include +#include #if HAVE_FILESYSTEM #include @@ -46,9 +48,10 @@ #endif // C +#include #include -// Qsym +// QSYM #include #include #include @@ -77,12 +80,7 @@ namespace { /// Indicate whether the runtime has been initialized. std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; -/// The file that contains out input. -std::string inputFileName; - -void deleteInputFile() { std::remove(inputFileName.c_str()); } - -/// A mapping of all expressions that we have ever received from Qsym to the +/// A mapping of all expressions that we have ever received from QSYM to the /// corresponding shared pointers on the heap. /// /// We can't expect C clients to handle std::shared_ptr, so we maintain a single @@ -105,6 +103,55 @@ SymExpr registerExpression(const qsym::ExprRef &expr) { return rawExpr; } +/// The user-provided test case handler, if any. +/// +/// If the user doesn't register a handler, we use QSYM's default behavior of +/// writing the test case to a file in the output directory. +TestCaseHandler g_test_case_handler = nullptr; + +/// A QSYM solver that doesn't require the entire input on initialization. +class EnhancedQsymSolver : public qsym::Solver { + // Warning! + // + // Before we can override methods of qsym::Solver, we need to declare them + // virtual because the QSYM code refers to the solver with a pointer of type + // qsym::Solver*; for non-virtual methods, it will always choose the + // implementation of the base class. Beware that making a method virtual adds + // a small performance overhead and requires us to change QSYM code. + // + // Subclassing the QSYM solver is ugly but helps us to avoid making too many + // changes in the QSYM codebase. + +public: + EnhancedQsymSolver() + : qsym::Solver("/dev/null", g_config.outputDir, g_config.aflCoverageMap) { + } + + void pushInputByte(size_t offset, uint8_t value) { + if (inputs_.size() <= offset) + inputs_.resize(offset + 1); + + inputs_[offset] = value; + } + + void saveValues(const std::string &suffix) override { + if (auto handler = g_test_case_handler) { + auto values = getConcreteValues(); + // The test-case handler may be instrumented, so let's call it with + // argument expressions to meet instrumented code's expectations. + // Otherwise, we might end up erroneously using whatever expression was + // last registered for a function parameter. + _sym_set_parameter_expression(0, nullptr); + _sym_set_parameter_expression(1, nullptr); + handler(values.data(), values.size()); + } else { + Solver::saveValues(suffix); + } + } +}; + +EnhancedQsymSolver *g_enhanced_solver; + } // namespace using namespace qsym; @@ -122,7 +169,7 @@ void _sym_initialize(void) { loadConfig(); initLibcWrappers(); std::cerr << "This is SymCC running with the QSYM backend" << std::endl; - if (g_config.fullyConcrete) { + if (std::holds_alternative(g_config.input)) { std::cerr << "Performing fully concrete execution (i.e., without symbolic input)" << std::endl; @@ -138,48 +185,15 @@ void _sym_initialize(void) { exit(-1); } - // Qsym requires the full input in a file - if (g_config.inputFile.empty()) { - std::cerr << "Reading program input until EOF (use Ctrl+D in a terminal)..." - << std::endl; - std::istreambuf_iterator in_begin(std::cin), in_end; - std::vector inputData(in_begin, in_end); - inputFileName = std::tmpnam(nullptr); - std::ofstream inputFile(inputFileName, std::ios::trunc); - std::copy(inputData.begin(), inputData.end(), - std::ostreambuf_iterator(inputFile)); - inputFile.close(); - -#ifdef DEBUG_RUNTIME - std::cerr << "Loaded input:" << std::endl; - std::copy(inputData.begin(), inputData.end(), - std::ostreambuf_iterator(std::cerr)); - std::cerr << std::endl; -#endif - - atexit(deleteInputFile); - - // Restore some semblance of standard input - auto *newStdin = freopen(inputFileName.c_str(), "r", stdin); - if (newStdin == nullptr) { - perror("Failed to reopen stdin"); - exit(-1); - } - } else { - inputFileName = g_config.inputFile; - std::cerr << "Making data read from " << inputFileName << " as symbolic" - << std::endl; - } - g_z3_context = new z3::context{}; - g_solver = - new Solver(inputFileName, g_config.outputDir, g_config.aflCoverageMap); + g_enhanced_solver = new EnhancedQsymSolver{}; + g_solver = g_enhanced_solver; // for QSYM-internal use g_expr_builder = g_config.pruning ? PruneExprBuilder::create() : SymbolicExprBuilder::create(); } SymExpr _sym_build_integer(uint64_t value, uint8_t bits) { - // Qsym's API takes uintptr_t, so we need to be careful when compiling for + // QSYM's API takes uintptr_t, so we need to be careful when compiling for // 32-bit systems: the compiler would helpfully truncate our uint64_t to fit // into 32 bits. if constexpr (sizeof(uint64_t) == sizeof(uintptr_t)) { @@ -201,6 +215,12 @@ SymExpr _sym_build_integer128(uint64_t high, uint64_t low) { return registerExpression(g_expr_builder->createConstant({128, words}, 128)); } +SymExpr _sym_build_integer_from_buffer(void *buffer, unsigned num_bits) { + assert(num_bits % 64 == 0); + return registerExpression(g_expr_builder->createConstant( + {num_bits, num_bits / 64, (uint64_t *)buffer}, num_bits)); +} + SymExpr _sym_build_null_pointer() { return registerExpression( g_expr_builder->createConstant(0, sizeof(uintptr_t) * 8)); @@ -266,17 +286,32 @@ SymExpr _sym_build_not(SymExpr expr) { g_expr_builder->createNot(allocatedExpressions.at(expr))); } +SymExpr _sym_build_ite(SymExpr cond, SymExpr a, SymExpr b) { + return registerExpression(g_expr_builder->createIte( + allocatedExpressions.at(cond), allocatedExpressions.at(a), + allocatedExpressions.at(b))); +} + SymExpr _sym_build_sext(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(g_expr_builder->createSExt( allocatedExpressions.at(expr), bits + expr->bits())); } SymExpr _sym_build_zext(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(g_expr_builder->createZExt( allocatedExpressions.at(expr), bits + expr->bits())); } SymExpr _sym_build_trunc(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression( g_expr_builder->createTrunc(allocatedExpressions.at(expr), bits)); } @@ -289,7 +324,8 @@ void _sym_push_path_constraint(SymExpr constraint, int taken, g_solver->addJcc(allocatedExpressions.at(constraint), taken != 0, site_id); } -SymExpr _sym_get_input_byte(size_t offset) { +SymExpr _sym_get_input_byte(size_t offset, uint8_t value) { + g_enhanced_solver->pushInputByte(offset, value); return registerExpression(g_expr_builder->createRead(offset)); } @@ -305,25 +341,44 @@ SymExpr _sym_extract_helper(SymExpr expr, size_t first_bit, size_t last_bit) { size_t _sym_bits_helper(SymExpr expr) { return expr->bits(); } -SymExpr _sym_build_bool_to_bits(SymExpr expr, uint8_t bits) { +SymExpr _sym_build_bool_to_bit(SymExpr expr) { + if (expr == nullptr) + return nullptr; + return registerExpression( - g_expr_builder->boolToBit(allocatedExpressions.at(expr), bits)); + g_expr_builder->boolToBit(allocatedExpressions.at(expr), 1)); } // -// Floating-point operations (unsupported in Qsym) +// Floating-point operations (unsupported in QSYM) // +// Even if we don't generally support operations on floats in this backend, we +// need dummy implementations of a few functions to help the parts of the +// instrumentation that deal with structures; if structs contain floats, the +// instrumentation expects to be able to create bit-vector expressions for +// them. + +SymExpr _sym_build_float(double, int is_double) { + // We create an all-zeros bit vector, mainly to capture the length of the + // value. This is compatible with our dummy implementation of + // _sym_build_float_to_bits. + return registerExpression( + g_expr_builder->createConstant(0, is_double ? 64 : 32)); +} + +SymExpr _sym_build_float_to_bits(SymExpr expr) { return expr; } + #define UNSUPPORTED(prototype) \ prototype { return nullptr; } -UNSUPPORTED(SymExpr _sym_build_float(double, int)) UNSUPPORTED(SymExpr _sym_build_fp_add(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_sub(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_mul(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_div(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_rem(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_abs(SymExpr)) +UNSUPPORTED(SymExpr _sym_build_fp_neg(SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_greater_than(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_greater_equal(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_less_than(SymExpr, SymExpr)) @@ -341,7 +396,6 @@ UNSUPPORTED(SymExpr _sym_build_float_unordered_not_equal(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_int_to_float(SymExpr, int, int)) UNSUPPORTED(SymExpr _sym_build_float_to_float(SymExpr, int)) UNSUPPORTED(SymExpr _sym_build_bits_to_float(SymExpr, int)) -UNSUPPORTED(SymExpr _sym_build_float_to_bits(SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_to_signed_integer(SymExpr, uint8_t)) UNSUPPORTED(SymExpr _sym_build_float_to_unsigned_integer(SymExpr, uint8_t)) @@ -424,3 +478,11 @@ void _sym_collect_garbage() { << " milliseconds)" << std::endl; #endif } + +// +// Test-case handling +// + +void symcc_set_test_case_handler(TestCaseHandler handler) { + g_test_case_handler = handler; +} diff --git a/runtime/qsym_backend/Runtime.h b/runtime/qsym_backend/Runtime.h index e0a23526..8f19d2af 100644 --- a/runtime/qsym_backend/Runtime.h +++ b/runtime/qsym_backend/Runtime.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef RUNTIME_H #define RUNTIME_H diff --git a/runtime/qsym_backend/pin.H b/runtime/qsym_backend/pin.H index 84e8f7c6..083d79a8 100644 --- a/runtime/qsym_backend/pin.H +++ b/runtime/qsym_backend/pin.H @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef PIN_H #define PIN_H diff --git a/runtime/qsym_backend/qsym b/runtime/qsym_backend/qsym index d17a39d4..fd5e08ea 160000 --- a/runtime/qsym_backend/qsym +++ b/runtime/qsym_backend/qsym @@ -1 +1 @@ -Subproject commit d17a39d40dc3ea1d17262dd52607f8a6527dde10 +Subproject commit fd5e08eaea764af9a2e2c8bfefead8b23c2a3c5f diff --git a/runtime/simple_backend/CMakeLists.txt b/runtime/simple_backend/CMakeLists.txt index a8fef903..64822050 100644 --- a/runtime/simple_backend/CMakeLists.txt +++ b/runtime/simple_backend/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with SymCC. If not, see . find_package(Z3 4 CONFIG) if (NOT Z3_FOUND) @@ -39,4 +40,4 @@ target_include_directories(SymRuntime PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. ${Z3_C_INCLUDE_DIRS}) -set_target_properties(SymRuntime PROPERTIES COMPILE_FLAGS "-Werror") +set_target_properties(SymRuntime PROPERTIES COMPILE_FLAGS "-Werror -Wno-error=deprecated-declarations") diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index c329f019..43f409e9 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -1,22 +1,24 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #include #include #include #include +#include #include #include #include @@ -79,7 +81,7 @@ void handle_z3_error(Z3_context c [[maybe_unused]], Z3_error_code e) { } #endif -Z3_ast build_variable(const char *name, uint8_t bits) { +SymExpr build_variable(const char *name, uint8_t bits) { Z3_symbol sym = Z3_mk_string_symbol(g_context, name); auto *sort = Z3_mk_bv_sort(g_context, bits); Z3_inc_ref(g_context, (Z3_ast)sort); @@ -92,7 +94,7 @@ Z3_ast build_variable(const char *name, uint8_t bits) { /// The set of all expressions we have ever passed to client code. std::set allocatedExpressions; -SymExpr registerExpression(Z3_ast expr) { +SymExpr registerExpression(SymExpr expr) { if (allocatedExpressions.count(expr) == 0) { // We don't know this expression yet. Record it and increase the reference // counter. @@ -178,7 +180,7 @@ Z3_ast _sym_build_float(double value, int is_double) { return result; } -Z3_ast _sym_get_input_byte(size_t offset) { +Z3_ast _sym_get_input_byte(size_t offset, uint8_t) { static std::vector stdinBytes; if (offset < stdinBytes.size()) @@ -241,6 +243,10 @@ DEF_BINARY_EXPR_BUILDER(float_ordered_equal, fpa_eq) #undef DEF_BINARY_EXPR_BUILDER +Z3_ast _sym_build_ite(Z3_ast cond, Z3_ast a, Z3_ast b) { + return registerExpression(Z3_mk_ite(g_context, cond, a, b)); +} + Z3_ast _sym_build_fp_add(Z3_ast a, Z3_ast b) { return registerExpression(Z3_mk_fpa_add(g_context, g_rounding_mode, a, b)); } @@ -265,6 +271,10 @@ Z3_ast _sym_build_fp_abs(Z3_ast a) { return registerExpression(Z3_mk_fpa_abs(g_context, a)); } +Z3_ast _sym_build_fp_neg(Z3_ast a) { + return registerExpression(Z3_mk_fpa_neg(g_context, a)); +} + Z3_ast _sym_build_not(Z3_ast expr) { return registerExpression(Z3_mk_bvnot(g_context, expr)); } @@ -349,14 +359,21 @@ Z3_ast _sym_build_float_unordered_not_equal(Z3_ast a, Z3_ast b) { } Z3_ast _sym_build_sext(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; return registerExpression(Z3_mk_sign_ext(g_context, bits, expr)); } Z3_ast _sym_build_zext(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; return registerExpression(Z3_mk_zero_ext(g_context, bits, expr)); } Z3_ast _sym_build_trunc(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(Z3_mk_extract(g_context, bits - 1, 0, expr)); } @@ -407,10 +424,11 @@ Z3_ast _sym_build_float_to_unsigned_integer(Z3_ast expr, uint8_t bits) { g_context, Z3_mk_fpa_round_toward_zero(g_context), expr, bits)); } -Z3_ast _sym_build_bool_to_bits(Z3_ast expr, uint8_t bits) { - return registerExpression(Z3_mk_ite(g_context, expr, - _sym_build_integer(1, bits), - _sym_build_integer(0, bits))); +Z3_ast _sym_build_bool_to_bit(Z3_ast expr) { + if (expr == nullptr) + return nullptr; + return _sym_build_ite(expr, _sym_build_integer(1, 1), + _sym_build_integer(0, 1)); } void _sym_push_path_constraint(Z3_ast constraint, int taken, @@ -425,13 +443,13 @@ void _sym_push_path_constraint(Z3_ast constraint, int taken, "true" or "false", there is no point in trying to solve the negation or * pushing the constraint to the solver... */ - if (Z3_is_eq_ast(g_context, constraint, Z3_mk_true(g_context))) { + if (Z3_is_eq_ast(g_context, constraint, g_true)) { assert(taken && "We have taken an impossible branch"); Z3_dec_ref(g_context, constraint); return; } - if (Z3_is_eq_ast(g_context, constraint, Z3_mk_false(g_context))) { + if (Z3_is_eq_ast(g_context, constraint, g_false)) { assert(!taken && "We have taken an impossible branch"); Z3_dec_ref(g_context, constraint); return; @@ -544,3 +562,13 @@ void _sym_collect_garbage() { << " milliseconds)" << std::endl; #endif } + +/* Test-case handling */ +void symcc_set_test_case_handler(TestCaseHandler) { + // The simple backend doesn't support test-case handlers. However, let's not + // make this a fatal error; otherwise, users would have to change their + // programs to make them work with the simple backend. + fprintf( + g_log, + "Warning: test-case handlers aren't supported in the simple backend\n"); +} diff --git a/runtime/simple_backend/Runtime.h b/runtime/simple_backend/Runtime.h index 953d7db0..66fe8b91 100644 --- a/runtime/simple_backend/Runtime.h +++ b/runtime/simple_backend/Runtime.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #ifndef RUNTIME_H #define RUNTIME_H diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c9613e57..c6b0d1b5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,6 +18,13 @@ else() set(SYM_TEST_FILECHECK_ARGS "--check-prefix=SIMPLE --check-prefix=ANY") endif() +if (${LLVM_VERSION_MAJOR} VERSION_GREATER_EQUAL 14) + # FileCheck used to be fine with unused prefixes when more than one prefix was + # defined. This changed in LLVM version 14, requiring the new option + # "--allow-unused-prefixes" (added in LLVM 13) to restore the old behavior. + set(SYM_TEST_FILECHECK_ARGS "${SYM_TEST_FILECHECK_ARGS} --allow-unused-prefixes") +endif() + configure_file("lit.site.cfg.in" "lit.site.cfg") add_custom_target(check diff --git a/test/README b/test/README new file mode 120000 index 00000000..410eae0b --- /dev/null +++ b/test/README @@ -0,0 +1 @@ +../docs/Testing.txt \ No newline at end of file diff --git a/test/bcopy_bcmp_bzero.c b/test/bcopy_bcmp_bzero.c new file mode 100644 index 00000000..fff833f2 --- /dev/null +++ b/test/bcopy_bcmp_bzero.c @@ -0,0 +1,65 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +// +// Test symbolic handling of bcopy, bcmp, and bzero. We copy symbolic data with +// bcmp, then compare it with bcmp, expecting the solver to be triggered +// (indicating that the two functions are represented correctly); then we bzero +// the region and perform another comparison, which should not result in a +// solver query (indicating that bzero concretized as expected). + +#include +#include +#include +#include + +void symcc_make_symbolic(const void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const void *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); + +int solved = 0; + +void handle_test_case(const void *data, size_t data_length) { + assert(data_length == 4); + assert(bcmp(data, "bar", 4) == 0); + solved = 1; +} + +int main(int argc, char *argv[]) { + symcc_set_test_case_handler(handle_test_case); + + const char input[] = "foo"; + symcc_make_symbolic(input, 4); + + // Make a copy and compare it in order to trigger the solver. + char copy[4]; + bcopy(input, copy, 4); + int bcmp_result = bcmp(copy, "bar", 4); + assert(bcmp_result != 0); + + // Zero out the symbolic data and compare again (which should not trigger the + // solver this time). + bzero(copy, 4); + bcmp_result = bcmp(copy, "abc", 4); + assert(bcmp_result != 0); + + // The simple backend doesn't support test-case handlers, so we only expect a + // solution with the QSYM backend. + printf("Solved: %d\n", solved); + // SIMPLE: Solved: 0 + // QSYM: Solved: 1 + return 0; +} diff --git a/test/bool_cast.c b/test/bool_cast.c new file mode 100644 index 00000000..0d3764a0 --- /dev/null +++ b/test/bool_cast.c @@ -0,0 +1,42 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O1 %s -o %t +// RUN: echo b | %t 2>&1 | %filecheck %s +// +// Check that bool cast is handled correctly (Issue #108) + +#include +#include +#include + +int bar(unsigned char a) { + if (a == 0xCA) return -1; + else return 0; +} + +int main() { + unsigned char input = 0; + read(0, &input, sizeof(input)); + int r = bar(input); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin0 -> #xca + // QSYM-COUNT-2: SMT + // QSYM: New testcase + if (r == -1) printf("Bingo!\n"); + else printf("Ok\n"); + // ANY: Ok + return r; +} diff --git a/test/concrete_structs.ll b/test/concrete_structs.ll new file mode 100644 index 00000000..e3d8748c --- /dev/null +++ b/test/concrete_structs.ll @@ -0,0 +1,107 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that we create correct expressions from struct values. For each kind of +; value, we trigger expression creation by inserting a symbolic value into the +; struct. Compiling this code with SymCC and verifying that the resulting binary +; exits cleanly shows that SymCC's instrumentation doesn't break the execution +; of the program. Moreover, we store a struct value to memory, load one of its +; elements back into a register, and branch based on it in order to trigger the +; solver; by checking the generated test case we can verify that the expression +; was correct. +; +; This test reproduces a bug where creating expressions for some structs would +; lead to a program crash. +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. + +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s + +target triple = "x86_64-pc-linux-gnu" + +; The struct type which we'll create expressions for. Include a floating-point +; value and a Boolean because they're represented with non-bitvector solver +; variables (reproducing eurecom-s3/symcc#138). +%struct_type = type { i8, i32, i8, float, i1 } + +; Global variable to record whether we've found a solution. Since the simple +; backend doesn't support test-case handlers, we start with "true". +@solved = global i1 1 + +; Our test-case handler verifies that the new test case is a 32-bit integer +; with the value 42. +define void @test_case_handler(i8* %data, i64 %data_length) { + %correct_length = icmp eq i64 %data_length, 4 + br i1 %correct_length, label %check_data, label %failed + +check_data: + %value_pointer = bitcast i8* %data to i32* + %value = load i32, i32* %value_pointer + %correct_value = icmp eq i32 %value, 42 + br i1 %correct_value, label %all_good, label %failed + +all_good: + store i1 1, i1* @solved + ret void + +failed: + store i1 0, i1* @solved + ret void +} + +define i32 @main(i32 %argc, i8** %argv) { + ; Register our test-case handler. + call void @symcc_set_test_case_handler(void (i8*, i64)* @test_case_handler) + ; SIMPLE: Warning: test-case handlers + + ; Create a symbolic value that we can use to trigger the creation of struct + ; expressions. + %symbolic_value_mem = alloca i32 + store i32 1, i32* %symbolic_value_mem + call void @symcc_make_symbolic(i32* %symbolic_value_mem, i64 4) + %symbolic_value = load i32, i32* %symbolic_value_mem + %symbolic_byte = trunc i32 %symbolic_value to i8 + + ; Undef struct + insertvalue %struct_type undef, i32 %symbolic_value, 1 + + ; Struct with concrete value + insertvalue %struct_type { i8 1, i32 undef, i8 2, float undef, i1 undef }, i32 %symbolic_value, 1 + + ; Write a struct to memory and load one of its elements back into a register. + ; It's important to also insert a symbolic value into the struct, so that we + ; generate an expression in the first place. + %struct_mem = alloca %struct_type + %struct_value = insertvalue %struct_type { i8 0, i32 42, i8 undef, float undef, i1 undef }, i8 %symbolic_byte, 2 + store %struct_type %struct_value, %struct_type* %struct_mem + %value_address = getelementptr %struct_type, %struct_type* %struct_mem, i32 0, i32 1 + %value_loaded = load i32, i32* %value_address + %is_forty_two = icmp eq i32 %value_loaded, %symbolic_value + br i1 %is_forty_two, label %never_executed, label %done + ; QSYM: SMT + +never_executed: + br label %done + +done: + %solved = load i1, i1* @solved + %result = select i1 %solved, i32 0, i32 1 + ret i32 %result +} + +declare void @symcc_make_symbolic(i32*, i64) +declare void @symcc_set_test_case_handler(void (i8*, i64)*) diff --git a/test/if.c b/test/if.c index 036c4402..dda1d13a 100644 --- a/test/if.c +++ b/test/if.c @@ -51,5 +51,6 @@ int main(int argc, char* argv[]) { } fprintf(stderr, "%d\n", x); fprintf(stderr, "%d\n", foo(x, 7)); + // ANY: 7 return 0; } diff --git a/test/load_store.ll b/test/load_store.ll new file mode 100644 index 00000000..6dc25689 --- /dev/null +++ b/test/load_store.ll @@ -0,0 +1,45 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that loading and storing concrete values of various types works. For +; each type, we allocate space on the stack, then store a constant value into +; it, and finally load it back. Compiling this code with SymCC and verifying +; that the resulting binary exits cleanly shows that SymCC's instrumentation +; doesn't break the load/store operations. +; +; This test reproduces a bug where loading a concrete Boolean would lead to a +; program crash. +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. +; +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: %t 2>&1 + +target triple = "x86_64-pc-linux-gnu" + +define i32 @main(i32 %argc, i8** %argv) { + ; Load and store a Boolean. + %stack_bool = alloca i1 + store i1 0, i1* %stack_bool + %copy_of_stack_bool = load i1, i1* %stack_bool + + ; Load and store a float. + %stack_float = alloca float + store float 0.0, float* %stack_float + %copy_of_stack_float = load float, float* %stack_float + + ret i32 0 +} diff --git a/test/loop.c b/test/loop.c index d411e180..5d90eb9d 100644 --- a/test/loop.c +++ b/test/loop.c @@ -50,5 +50,6 @@ int main(int argc, char* argv[]) { } x = ntohl(x); fprintf(stderr, "%d\n", fac(x)); + // ANY: 120 return 0; } diff --git a/test/memory_input.c b/test/memory_input.c new file mode 100644 index 00000000..9e2282ab --- /dev/null +++ b/test/memory_input.c @@ -0,0 +1,51 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +#include +#include +#include + +void symcc_make_symbolic(const void *start, size_t byte_length); + +uint64_t g_value = 0xaaaabbbbccccdddd; + +int main(int argc, char *argv[]) { + uint64_t x = 10; + uint8_t y = 0; + + symcc_make_symbolic(&x, sizeof(x)); + symcc_make_symbolic(&y, sizeof(y)); + + fprintf(stderr, "%s\n", (x == g_value) ? "yes" : "no"); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE-DAG: #xaa + // SIMPLE-DAG: #xbb + // SIMPLE-DAG: #xcc + // SIMPLE-DAG: #xdd + // QSYM-COUNT-2: SMT + // ANY: no + + fprintf(stderr, "%s\n", (y == 10) ? "yes" : "no"); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // y should be part of the input, just after x + // SIMPLE: stdin8 -> #x0a + // QSYM-COUNT-2: SMT + // ANY: no + + return 0; +} diff --git a/test/propagation_select.c b/test/propagation_select.c new file mode 100644 index 00000000..a04af9e0 --- /dev/null +++ b/test/propagation_select.c @@ -0,0 +1,47 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O1 %s -o %t +// RUN: echo xxx | %t 2>&1 | %filecheck %s +// +// Check that select instruction is propagating the symbolic value (issue #109) + +#include +#include +#include + +char bar(char a, char b, char c) { return (a == 0xA) ? b : c; } + +int main() { + char input[3] = {0}; + read(0, &input, sizeof(input)); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin0 -> #x0a + // QSYM-COUNT-2: SMT + // QSYM: New testcase + char r = bar(input[0], input[1], input[2]); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE-DAG: stdin2 -> #x0b + // SIMPLE-DAG: stdin0 -> #x00 + // QSYM-COUNT-2: SMT + // QSYM: New testcase + // ANY: KO + if (r == 0xB) + printf("OK!\n"); + else + printf("KO\n"); + return 0; +} diff --git a/test/regression/cxa_vector.ll b/test/regression/cxa_vector.ll index 65a0c3be..40e7a546 100644 --- a/test/regression/cxa_vector.ll +++ b/test/regression/cxa_vector.ll @@ -3,11 +3,6 @@ ; This file exposed a bug in our handling of "invoke" instructions that would ; lead to invalid byte code. -; ModuleID = '/home/seba/work/compiler/llvm-project/libcxxabi/src/cxa_vector.cpp' -source_filename = "/home/seba/work/compiler/llvm-project/libcxxabi/src/cxa_vector.cpp" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux-gnu" - $__clang_call_terminate = comdat any ; Function Attrs: sspstrong uwtable diff --git a/test/symbolic_structs.ll b/test/symbolic_structs.ll new file mode 100644 index 00000000..93a112b4 --- /dev/null +++ b/test/symbolic_structs.ll @@ -0,0 +1,52 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that we correctly insert into symbolic struct values. We insert values +; of various types into a symbolic struct, thus triggering expression updates. +; Compiling this code with SymCC and verifying that the resulting binary exits +; cleanly shows that SymCC's instrumentation doesn't break the execution of the +; program. +; +; This test reproduces a bug where inserting a concrete floating-point value +; into a symbolic struct would lead to a program crash (eurecom-s3/symcc#138). +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. + +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 + +target triple = "x86_64-pc-linux-gnu" + +; The struct type of our symbolic value. Include a floating-point value and a +; Boolean because they're represented with non-bitvector solver variables +; (reproducing eurecom-s3/symcc#138). +%struct_type = type { i8, i32, i8, float, i1 } + +define i32 @main(i32 %argc, i8** %argv) { + ; Create a symbolic struct value that we can subsequently insert values into. + %struct_value_mem = alloca %struct_type + call void @symcc_make_symbolic(%struct_type* %struct_value_mem, i64 20) + %symbolic_struct = load %struct_type, %struct_type* %struct_value_mem + + ; Insert values of various types, triggering the creation of new expressions. + insertvalue %struct_type %symbolic_struct, i32 5, 1 + insertvalue %struct_type %symbolic_struct, float 42.0, 3 + insertvalue %struct_type %symbolic_struct, i1 1, 4 + + ret i32 0 +} + +declare void @symcc_make_symbolic(%struct_type*, i64) diff --git a/test/test_case_handler.c b/test/test_case_handler.c new file mode 100644 index 00000000..414c8843 --- /dev/null +++ b/test/test_case_handler.c @@ -0,0 +1,59 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +#include +#include +#include + +#define MAGIC 0xab + +void symcc_make_symbolic(const void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const void *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); + +int solved = 0; +int num_test_cases = 0; + +void handle_test_case(const void *data, size_t data_length) { + num_test_cases++; + if (data_length == 1 && ((const uint8_t *)data)[0] == MAGIC) + solved = 1; +} + +int main(int argc, char *argv[]) { + symcc_set_test_case_handler(handle_test_case); + // SIMPLE: Warning: test-case handlers + + uint8_t input = 0; + symcc_make_symbolic(&input, sizeof(input)); + + fprintf(stderr, "%s\n", (input == MAGIC) ? "yes" : "no"); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin0 -> #xab + // QSYM: SMT + // ANY: no + + fprintf(stderr, "%d\n", solved); + // QSYM: 1 + // SIMPLE: 0 + + fprintf(stderr, "%d\n", num_test_cases); + // QSYM: 1 + // SIMPLE: 0 + + return 0; +} diff --git a/test/uadd_sat.ll b/test/uadd_sat.ll new file mode 100644 index 00000000..4248fa33 --- /dev/null +++ b/test/uadd_sat.ll @@ -0,0 +1,50 @@ +; RUN: %symcc -O2 %s -o %t +; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@g_value = dso_local local_unnamed_addr global i16 40, align 2 +@stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 +@.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 + +; Function Attrs: nofree nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %x = alloca i16, align 2 + %0 = bitcast i16* %x to i8* + %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 + %cmp.not = icmp eq i64 %call, 2 + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 + br label %cleanup + +if.end: ; preds = %entry + %3 = load i16, i16* %x, align 2 + %4 = load i16, i16* @g_value, align 2 + %add = call i16 @llvm.uadd.sat.i16(i16 %3, i16 %4) + %cmp = icmp eq i16 %add, 43981 + %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) + ; SIMPLE: Trying to solve + ; SIMPLE: Found diverging input + ; SIMPLE-DAG: stdin0 -> #xa5 + ; SIMPLE-DAG: stdin1 -> #xab + ; ANY: no + %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare i64 @read(i32, i8* nocapture, i64) +declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) +declare i16 @llvm.uadd.sat.i16(i16, i16) diff --git a/test/uadd_sat.test32 b/test/uadd_sat.test32 new file mode 100644 index 00000000..647cc3c7 --- /dev/null +++ b/test/uadd_sat.test32 @@ -0,0 +1,2 @@ +RUN: %symcc -m32 -O2 %S/uadd_sat.ll -o %t_32 +RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s diff --git a/test/usub_sat.ll b/test/usub_sat.ll new file mode 100644 index 00000000..62fe200a --- /dev/null +++ b/test/usub_sat.ll @@ -0,0 +1,50 @@ +; RUN: %symcc -O2 %s -o %t +; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@g_value = dso_local local_unnamed_addr global i16 40, align 2 +@stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 +@.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 + +; Function Attrs: nofree nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %x = alloca i16, align 2 + %0 = bitcast i16* %x to i8* + %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 + %cmp.not = icmp eq i64 %call, 2 + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 + br label %cleanup + +if.end: ; preds = %entry + %3 = load i16, i16* %x, align 2 + %4 = load i16, i16* @g_value, align 2 + %add = call i16 @llvm.usub.sat.i16(i16 %3, i16 %4) + %cmp = icmp eq i16 %add, 43981 + %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) + ; SIMPLE: Trying to solve + ; SIMPLE: Found diverging input + ; SIMPLE-DAG: stdin0 -> #xf5 + ; SIMPLE-DAG: stdin1 -> #xab + ; ANY: no + %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare i64 @read(i32, i8* nocapture, i64) +declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) +declare i16 @llvm.usub.sat.i16(i16, i16) diff --git a/test/usub_sat.test32 b/test/usub_sat.test32 new file mode 100644 index 00000000..16de7355 --- /dev/null +++ b/test/usub_sat.test32 @@ -0,0 +1,2 @@ +RUN: %symcc -m32 -O2 %S/usub_sat.ll -o %t_32 +RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s diff --git a/util/pure_concolic_execution.sh b/util/pure_concolic_execution.sh index 34ba7a89..0c6d61c9 100755 --- a/util/pure_concolic_execution.sh +++ b/util/pure_concolic_execution.sh @@ -3,19 +3,20 @@ set -u function usage() { - echo "Usage: $0 -i INPUT_DIR [-o OUTPUT_DIR] TARGET..." + echo "Usage: $0 -i INPUT_DIR [-o OUTPUT_DIR] [-f FAILED_DIR] TARGET..." echo echo "Run SymCC-instrumented TARGET in a loop, feeding newly generated inputs back " echo "into it. Initial inputs are expected in INPUT_DIR, and new inputs are " echo "continuously read from there. If OUTPUT_DIR is specified, a copy of the corpus " echo "and of each generated input is preserved there. TARGET may contain the special " echo "string \"@@\", which is replaced with the name of the current input file." + echo "If FAILED_DIR is specified, a copy of the failing test cases is preserved there." echo echo "Note that SymCC never changes the length of the input, so be sure that the " echo "initial inputs cover all required input lengths." } -while getopts "i:o:" opt; do +while getopts "i:o:f:" opt; do case "$opt" in i) in=$OPTARG @@ -23,6 +24,9 @@ while getopts "i:o:" opt; do o) out=$OPTARG ;; + f) + failed_dir=$OPTARG + ;; *) usage exit 1 @@ -30,7 +34,9 @@ while getopts "i:o:" opt; do esac done shift $((OPTIND-1)) -target=$@ +target=("$@") +target[0]=$(realpath "${target[0]}") +target="${target[@]}" timeout="timeout -k 5 90" if [[ ! -v in ]]; then @@ -46,13 +52,26 @@ touch $work_dir/analyzed_inputs if [[ -v out ]]; then mkdir -p $out fi +if [[ -v failed_dir ]]; then + mkdir -p "$failed_dir" +fi function cleanup() { - rm -rf $work_dir + rm -rf --preserve-root -- $work_dir } trap cleanup EXIT +# Copy one file to the destination directory, renaming it according to its hash. +function copy_file_with_unique_name() { + local file_name="$1" + local dest_dir="$2" + + local dest="$dest_dir/$(sha256sum "$file_name" | cut -d' ' -f1)" + cp "$file_name" "$dest" + +} + # Copy all files in the source directory to the destination directory, renaming # them according to their hash. function copy_with_unique_name() { @@ -62,8 +81,7 @@ function copy_with_unique_name() { if [ -n "$(ls -A $source_dir)" ]; then local f for f in $source_dir/*; do - local dest="$dest_dir/$(sha256sum $f | cut -d' ' -f1)" - cp "$f" "$dest" + copy_file_with_unique_name "$f" "$dest_dir" done fi } @@ -82,6 +100,17 @@ function maybe_export() { fi } +# Remove input files which has been already analysed. Used to prevent infinite loop. +function remove_analysed() { + local source_dir="$1" + local f + for f in $source_dir/*; do + if grep -q "$(basename $f)" $work_dir/analyzed_inputs; then + rm $f + fi + done +} + # Copy those files from the input directory to the next generation that haven't # been analyzed yet. function maybe_import() { @@ -102,6 +131,15 @@ function maybe_import() { fi } +# If the input file caused non 0 return code, then copy it to the FAILED_DIR. +function save_failed() { + local ret_code=$1 + local input_file="$2" + if [ $ret_code -ne 0 ] && [[ -v failed_dir ]] ; then + copy_file_with_unique_name "$input_file" "$failed_dir" + fi +} + # Set up the shell environment export SYMCC_OUTPUT_DIR=$work_dir/symcc_out export SYMCC_ENABLE_LINEARIZATION=1 @@ -123,13 +161,17 @@ while true; do echo "Running on $f" if [[ "$target " =~ " @@ " ]]; then env SYMCC_INPUT_FILE=$f $timeout ${target[@]/@@/$f} >/dev/null 2>&1 + ret_code=$? else $timeout $target <$f >/dev/null 2>&1 + ret_code=$? fi # Make the new test cases part of the next generation add_to_next_generation $work_dir/symcc_out maybe_export $work_dir/symcc_out + remove_analysed $work_dir/next + save_failed $ret_code "$f" echo $(basename $f) >> $work_dir/analyzed_inputs rm -f $f done diff --git a/util/symcc_fuzzing_helper/Cargo.lock b/util/symcc_fuzzing_helper/Cargo.lock index 5fa913db..9f8b5069 100644 --- a/util/symcc_fuzzing_helper/Cargo.lock +++ b/util/symcc_fuzzing_helper/Cargo.lock @@ -1,459 +1,436 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "aho-corasick" version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743ad5a418686aad3b87fd14c43badd828cf26e214a00f92a384291cf22e1811" dependencies = [ - "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr", ] [[package]] name = "anyhow" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "hermit-abi", + "libc", + "winapi", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "c2-chacha" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" dependencies = [ - "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "ppv-lite86", ] [[package]] name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" [[package]] name = "clap" -version = "2.33.0" +version = "3.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "lazy_static", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_derive" +version = "3.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213" dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "os_str_bytes", ] [[package]] name = "env_logger" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" dependencies = [ - "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", - "humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty", + "humantime", + "log", + "regex", + "termcolor", ] [[package]] name = "getrandom" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "wasi", ] +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + [[package]] name = "heck" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "hermit-abi" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772" dependencies = [ - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", ] [[package]] name = "humantime" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + +[[package]] +name = "indexmap" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" dependencies = [ - "quick-error 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "hashbrown", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.66" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" [[package]] name = "log" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", ] [[package]] name = "memchr" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" + +[[package]] +name = "os_str_bytes" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" [[package]] name = "ppv-lite86" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" [[package]] name = "proc-macro-error" -version = "0.4.5" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ - "proc-macro-error-attr 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rustversion 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", ] [[package]] name = "proc-macro-error-attr" -version = "0.4.5" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rustversion 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", - "syn-mid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "version_check", ] [[package]] name = "proc-macro2" -version = "1.0.8" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" dependencies = [ - "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-ident", ] [[package]] name = "quick-error" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.2" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", ] [[package]] name = "rand" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ - "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", + "libc", + "rand_chacha", + "rand_core", + "rand_hc", ] [[package]] name = "rand_chacha" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" dependencies = [ - "c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "c2-chacha", + "rand_core", ] [[package]] name = "rand_core" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" dependencies = [ - "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", ] [[package]] name = "rand_hc" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core", ] [[package]] name = "redox_syscall" version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" [[package]] name = "regex" version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" dependencies = [ - "aho-corasick 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", ] [[package]] name = "regex-syntax" version = "0.6.14" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" [[package]] name = "remove_dir_all" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rustversion" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "strsim" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "structopt" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "structopt-derive 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "structopt-derive" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro-error 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "symcc_fuzzing_helper" version = "0.1.0" dependencies = [ - "anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "structopt 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "anyhow", + "clap", + "env_logger", + "log", + "regex", + "tempfile", ] [[package]] name = "syn" -version = "1.0.14" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "syn-mid" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] name = "tempfile" version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", - "remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "rand", + "redox_syscall", + "remove_dir_all", + "winapi", ] [[package]] name = "termcolor" -version = "1.1.0" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ - "winapi-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util", ] [[package]] name = "textwrap" -version = "0.11.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" [[package]] name = "thread_local" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" dependencies = [ - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", ] [[package]] -name = "unicode-segmentation" -version = "1.6.0" +name = "unicode-ident" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" [[package]] -name = "unicode-width" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "unicode-xid" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "vec_map" -version = "0.8.1" +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" [[package]] name = "winapi" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" dependencies = [ - "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[metadata] -"checksum aho-corasick 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "743ad5a418686aad3b87fd14c43badd828cf26e214a00f92a384291cf22e1811" -"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -"checksum anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" -"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" -"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" -"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" -"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" -"checksum env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" -"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" -"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" -"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772" -"checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -"checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" -"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" -"checksum memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" -"checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" -"checksum proc-macro-error 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "1b79a464461615532fcc8a6ed8296fa66cc12350c18460ab3f4594a6cee0fcb6" -"checksum proc-macro-error-attr 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "23832e5eae6bac56bbac190500eef1aaede63776b5cd131eaa4ee7fe120cd892" -"checksum proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3acb317c6ff86a4e579dfa00fc5e6cca91ecbb4e7eb2df0468805b674eb88548" -"checksum quick-error 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" -"checksum rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" -"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" -"checksum regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" -"checksum regex-syntax 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)" = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" -"checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" -"checksum rustversion 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b3bba175698996010c4f6dce5e7f173b6eb781fce25d2cfc45e27091ce0b79f6" -"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" -"checksum structopt 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "df136b42d76b1fbea72e2ab3057343977b04b4a2e00836c3c7c0673829572713" -"checksum structopt-derive 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd50a87d2f7b8958055f3e73a963d78feaccca3836767a9069844e34b5b03c0a" -"checksum syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "af6f3550d8dff9ef7dc34d384ac6f107e5d31c8f57d9f28e0081503f547ac8f5" -"checksum syn-mid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9fd3937748a7eccff61ba5b90af1a20dbf610858923a9192ea0ecb0cb77db1d0" -"checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" -"checksum termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" -"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" -"checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" -"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" -"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" -"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -"checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" -"checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" -"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" -"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/util/symcc_fuzzing_helper/Cargo.toml b/util/symcc_fuzzing_helper/Cargo.toml index d09d8ae3..d884dce1 100644 --- a/util/symcc_fuzzing_helper/Cargo.toml +++ b/util/symcc_fuzzing_helper/Cargo.toml @@ -20,7 +20,7 @@ edition = "2018" license = "GPL-3.0-or-later" [dependencies] -structopt = "0.3" +clap = { version = "3", features = ["derive"] } tempfile = "3.1" anyhow = "1.0" log = "0.4.0" diff --git a/util/symcc_fuzzing_helper/src/main.rs b/util/symcc_fuzzing_helper/src/main.rs index 6d284e1e..378e79d1 100644 --- a/util/symcc_fuzzing_helper/src/main.rs +++ b/util/symcc_fuzzing_helper/src/main.rs @@ -15,6 +15,7 @@ mod symcc; use anyhow::{Context, Result}; +use clap::{self, StructOpt}; use std::collections::HashSet; use std::fs; use std::fs::File; @@ -22,7 +23,6 @@ use std::io::Write; use std::path::{Path, PathBuf}; use std::thread; use std::time::{Duration, Instant}; -use structopt::StructOpt; use symcc::{AflConfig, AflMap, AflShowmapResult, SymCC, TestcaseDir}; use tempfile::tempdir; @@ -32,22 +32,22 @@ const STATS_INTERVAL_SEC: u64 = 60; // inputs. #[derive(Debug, StructOpt)] -#[structopt(about = "Make SymCC collaborate with AFL.", no_version)] +#[clap(about = "Make SymCC collaborate with AFL.")] struct CLI { /// The name of the fuzzer to work with - #[structopt(short = "a")] + #[clap(short = 'a')] fuzzer_name: String, /// The AFL output directory - #[structopt(short = "o")] + #[clap(short = 'o')] output_dir: PathBuf, /// Name to use for SymCC - #[structopt(short = "n")] + #[clap(short = 'n')] name: String, /// Enable verbose logging - #[structopt(short = "v")] + #[clap(short = 'v')] verbose: bool, /// Program under test @@ -264,7 +264,7 @@ impl State { } fn main() -> Result<()> { - let options = CLI::from_args(); + let options = CLI::parse(); env_logger::builder() .filter_level(if options.verbose { log::LevelFilter::Debug