Skip to content

Commit

Permalink
#Centipede Update ExportCorpusFromLocalDir and SaveCorpusToLocalDir t…
Browse files Browse the repository at this point in the history
…o support remote files.

The names no longer make sense since the directories are no longer local. These
functions are also associated with similarly named flags. I've renamed
everything for consistency.

PiperOrigin-RevId: 577933102
  • Loading branch information
Markus Kusano authored and copybara-github committed Oct 30, 2023
1 parent a6a1710 commit b6dbff0
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 42 deletions.
1 change: 1 addition & 0 deletions centipede/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ cc_library(
":defs",
":feature",
":logging",
":remote_file",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
Expand Down
34 changes: 15 additions & 19 deletions centipede/centipede.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <filesystem> // NOLINT
#include <filesystem>
#include <functional>
#include <iostream>
#include <memory>
Expand Down Expand Up @@ -129,9 +129,8 @@ Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks,
input_filter_cmd_.StartForkServer(TemporaryLocalDirPath(), "input_filter");
}

void Centipede::SaveCorpusToLocalDir(
const Environment &env, std::string_view save_corpus_to_local_dir) {
const WorkDir wd{env};
void Centipede::CorpusToFiles(const Environment &env, std::string_view dir) {
WorkDir wd{env};
for (size_t shard = 0; shard < env.total_shards; shard++) {
auto reader = DefaultBlobFileReaderFactory();
auto corpus_path = wd.CorpusPath(shard);
Expand All @@ -140,26 +139,23 @@ void Centipede::SaveCorpusToLocalDir(
size_t num_read = 0;
while (reader->Read(blob).ok()) {
++num_read;
WriteToLocalHashedFileInDir(save_corpus_to_local_dir, blob);
WriteToRemoteHashedFileInDir(dir, blob);
}
LOG(INFO) << "Read " << num_read << " from " << corpus_path;
}
}

void Centipede::ExportCorpusFromLocalDir(const Environment &env,
std::string_view local_dir) {
const WorkDir wd{env};
// Shard the file paths in `local_dir` based on hashes of filenames.
void Centipede::CorpusFromFiles(const Environment &env, std::string_view dir) {
WorkDir wd{env};
// Shard the file paths in `dir` based on hashes of filenames.
// Such partition is stable: a given file always goes to a specific shard.
std::vector<std::vector<std::string>> sharded_paths(env.total_shards);
std::vector<std::string> paths;
size_t total_paths = 0;
for (const auto &entry :
std::filesystem::recursive_directory_iterator(local_dir)) {
if (entry.is_regular_file()) {
size_t filename_hash = std::hash<std::string>{}(entry.path().filename());
sharded_paths[filename_hash % env.total_shards].push_back(entry.path());
++total_paths;
}
for (const std::string &path : RemoteListFilesRecursively(dir)) {
size_t filename_hash = std::hash<std::string>{}(path);
sharded_paths[filename_hash % env.total_shards].push_back(path);
++total_paths;
}
// Iterate over all shards.
size_t inputs_added = 0;
Expand All @@ -184,13 +180,13 @@ void Centipede::ExportCorpusFromLocalDir(const Environment &env,
<< "Failed to open corpus file: " << corpus_path;
ByteArray shard_data;
for (const auto &path : sharded_paths[shard]) {
ByteArray input;
ReadFromLocalFile(path, input);
std::string input;
RemoteFileGetContents(path, input);
if (input.empty() || existing_hashes.contains(Hash(input))) {
++inputs_ignored;
continue;
}
CHECK_OK(appender->Write(input));
CHECK_OK(appender->Write(ByteArray{input.begin(), input.end()}));
++inputs_added;
}
LOG(INFO) << VV(shard) << VV(inputs_added) << VV(inputs_ignored)
Expand Down
6 changes: 2 additions & 4 deletions centipede/centipede.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,12 @@ class Centipede {
void FuzzingLoop();

// Saves the sharded corpus into `dir`, one file per input.
static void SaveCorpusToLocalDir(const Environment &env,
std::string_view dir);
static void CorpusToFiles(const Environment &env, std::string_view dir);
// Exports the corpus from `dir` (one file per input) into the sharded corpus.
// Reads `dir` recursively.
// Ignores inputs that already exist in the shard they need to be added to.
// Sharding is stable and depends only on env.total_shards and the file name.
static void ExportCorpusFromLocalDir(const Environment &env,
std::string_view dir);
static void CorpusFromFiles(const Environment &env, std::string_view dir);

private:
// Executes inputs from `input_vec`.
Expand Down
10 changes: 5 additions & 5 deletions centipede/centipede_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ int CentipedeMain(const Environment &env,
CentipedeCallbacksFactory &callbacks_factory) {
SetSignalHandlers(env.stop_at);

if (!env.save_corpus_to_local_dir.empty()) {
Centipede::SaveCorpusToLocalDir(env, env.save_corpus_to_local_dir);
if (!env.corpus_to_files.empty()) {
Centipede::CorpusToFiles(env, env.corpus_to_files);
return EXIT_SUCCESS;
}

Expand All @@ -201,15 +201,15 @@ int CentipedeMain(const Environment &env,
}

// Just export the corpus from a local dir and exit.
if (!env.export_corpus_from_local_dir.empty()) {
Centipede::ExportCorpusFromLocalDir(env, env.export_corpus_from_local_dir);
if (!env.corpus_from_files.empty()) {
Centipede::CorpusFromFiles(env, env.corpus_from_files);
return EXIT_SUCCESS;
}

// Export the corpus from a local dir and then fuzz.
if (!env.corpus_dir.empty()) {
for (const auto &corpus_dir : env.corpus_dir) {
Centipede::ExportCorpusFromLocalDir(env, corpus_dir);
Centipede::CorpusFromFiles(env, corpus_dir);
}
}

Expand Down
4 changes: 2 additions & 2 deletions centipede/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ struct Environment {
bool distill = false;
size_t log_features_shards = 0;
std::string knobs_file;
std::string save_corpus_to_local_dir;
std::string export_corpus_from_local_dir;
std::string corpus_to_files;
std::string corpus_from_files;
std::vector<std::string> corpus_dir;
std::string symbolizer_path = "llvm-symbolizer";
std::string objdump_path = "objdump";
Expand Down
11 changes: 4 additions & 7 deletions centipede/environment_flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,12 +263,10 @@ ABSL_FLAG(bool, print_runner_log, default_env->print_runner_log,
ABSL_FLAG(std::string, knobs_file, default_env->knobs_file,
"If not empty, knobs will be read from this (possibly remote) file."
" The feature is experimental, not yet fully functional.");
ABSL_FLAG(std::string, save_corpus_to_local_dir,
default_env->save_corpus_to_local_dir,
ABSL_FLAG(std::string, corpus_to_files, default_env->corpus_to_files,
"Save the remote corpus from working to the given directory, one "
"file per corpus.");
ABSL_FLAG(std::string, export_corpus_from_local_dir,
default_env->export_corpus_from_local_dir,
ABSL_FLAG(std::string, corpus_from_files, default_env->corpus_from_files,
"Export a corpus from a local directory with one file per input into "
"the sharded remote corpus in workdir. Not recursive.");
ABSL_FLAG(std::vector<std::string>, corpus_dir, default_env->corpus_dir,
Expand Down Expand Up @@ -470,9 +468,8 @@ Environment CreateEnvironmentFromFlags(const std::vector<std::string> &argv) {
.distill = absl::GetFlag(FLAGS_distill),
.log_features_shards = absl::GetFlag(FLAGS_log_features_shards),
.knobs_file = absl::GetFlag(FLAGS_knobs_file),
.save_corpus_to_local_dir = absl::GetFlag(FLAGS_save_corpus_to_local_dir),
.export_corpus_from_local_dir =
absl::GetFlag(FLAGS_export_corpus_from_local_dir),
.corpus_to_files = absl::GetFlag(FLAGS_corpus_to_files),
.corpus_from_files = absl::GetFlag(FLAGS_corpus_from_files),
.corpus_dir = absl::GetFlag(FLAGS_corpus_dir),
.symbolizer_path = absl::GetFlag(FLAGS_symbolizer_path),
.objdump_path = absl::GetFlag(FLAGS_objdump_path),
Expand Down
2 changes: 1 addition & 1 deletion centipede/test_fuzzing_util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ function centipede::test_crashing_target() {
# Create a corpus with one crasher and one other input.
echo -n "${crash_input}" >"${TMPCORPUS}/${crash_input}" # induces abort in the target.
echo -n "${nice_input}" >"${TMPCORPUS}/${nice_input}" # just some input.
${target} --workdir="${WD}" --export_corpus_from_local_dir="${TMPCORPUS}"
${target} --workdir="${WD}" --corpus_from_files="${TMPCORPUS}"

# Run fuzzing with num_runs=0, i.e. only run the inputs from the corpus.
# Expecting a crash to be observed and reported.
Expand Down
6 changes: 3 additions & 3 deletions centipede/testing/centipede_main_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ test_debug_symbols() {
centipede::assert_regex_in_file "EDGE: LLVMFuzzerTestOneInput .*testing/test_fuzz_target.cc" "${LOG}"

echo "============ ${FUNC}: add func1/func2-A inputs to the corpus."
test_fuzz --workdir="${WD}" --export_corpus_from_local_dir="${TMPCORPUS}"
test_fuzz --workdir="${WD}" --corpus_from_files="${TMPCORPUS}"

echo "============ ${FUNC}: run again, append to the same LOG file."
# TODO(b/282845630): Passing `--num_runs=1` only to trigger telemetry dumping.
Expand Down Expand Up @@ -129,7 +129,7 @@ test_dictionary() {
echo "foo" >"${TMPCORPUS}"/foo
echo "bat" >"${TMPCORPUS}"/binary
centipede::ensure_empty_dir "${WD}"
test_fuzz --workdir="${WD}" --export_corpus_from_local_dir "${TMPCORPUS}"
test_fuzz --workdir="${WD}" --corpus_from_files "${TMPCORPUS}"
cp "${WD}/corpus.000000" "${DICT}"

echo "============ ${FUNC}: testing binary dictionary file"
Expand All @@ -150,7 +150,7 @@ test_for_each_blob() {
echo "FoO" >"${TMPCORPUS}"/a
echo "bAr" >"${TMPCORPUS}"/b

test_fuzz --workdir="${WD}" --export_corpus_from_local_dir "${TMPCORPUS}"
test_fuzz --workdir="${WD}" --corpus_from_files "${TMPCORPUS}"
echo "============ ${FUNC}: test for_each_blob"
test_fuzz --for_each_blob="cat %P" "${WD}"/corpus.000000 | tee "${LOG}"
centipede::assert_regex_in_file "Running 'cat %P' on ${WD}/corpus.000000" "${LOG}"
Expand Down
2 changes: 1 addition & 1 deletion centipede/testing/clusterfuzz_format_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ test_crashing_target() {
# Create a corpus with one crasher and one other input.
cp "$1" "${TMPCORPUS}" # Triggers an error.
echo -n "foo" >"${TMPCORPUS}/foo" # Just some input.
abort_test_fuzz --export_corpus_from_local_dir="${TMPCORPUS}"
abort_test_fuzz --corpus_from_files="${TMPCORPUS}"

# Run fuzzing with num_runs=0, i.e. only run the inputs from the corpus.
# Expecting a crash to be observed and reported.
Expand Down
8 changes: 8 additions & 0 deletions centipede/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include "./centipede/defs.h"
#include "./centipede/feature.h"
#include "./centipede/logging.h"
#include "./centipede/remote_file.h"

namespace centipede {

Expand Down Expand Up @@ -142,6 +143,13 @@ void WriteToLocalHashedFileInDir(std::string_view dir_path,
WriteToLocalFile(file_path, data);
}

void WriteToRemoteHashedFileInDir(std::string_view dir_path,
absl::Span<const uint8_t> data) {
if (dir_path.empty()) return;
std::string file_path = std::filesystem::path(dir_path).append(Hash(data));
RemoteFileSetContents(file_path, std::string(data.begin(), data.end()));
}

std::string HashOfFileContents(std::string_view file_path) {
ByteArray ba;
ReadFromLocalFile(file_path, ba);
Expand Down
3 changes: 3 additions & 0 deletions centipede/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ void WriteToLocalFile(std::string_view file_path, const FeatureVec &data);
// Writes `data` to `dir_path`/Hash(`data`). Does nothing if `dir_path.empty()`.
void WriteToLocalHashedFileInDir(std::string_view dir_path,
absl::Span<const uint8_t> data);
// Same as `WriteToLocalHashedFileInDir` except supports remote files.
void WriteToRemoteHashedFileInDir(std::string_view dir_path,
absl::Span<const uint8_t> data);
// Returns a path string suitable to create a temporary local directory.
// Will return the same value every time it is called within one thread,
// but different values for different threads and difference processes.
Expand Down

0 comments on commit b6dbff0

Please sign in to comment.