diff --git a/centipede/BUILD b/centipede/BUILD index 4fbd803d4..d2cf1b5ce 100644 --- a/centipede/BUILD +++ b/centipede/BUILD @@ -351,13 +351,13 @@ cc_library( ":defs", ":feature", ":logging", + ":pc_info", ":remote_file", ":shard_reader", ":workdir", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", - "@com_google_absl//absl/strings", ], ) diff --git a/centipede/analyze_corpora.cc b/centipede/analyze_corpora.cc index 5f9132429..549c26619 100644 --- a/centipede/analyze_corpora.cc +++ b/centipede/analyze_corpora.cc @@ -15,6 +15,7 @@ #include "./centipede/analyze_corpora.h" #include +#include #include #include #include @@ -23,7 +24,6 @@ #include "absl/container/flat_hash_set.h" #include "absl/log/check.h" #include "absl/log/log.h" -#include "absl/strings/str_cat.h" #include "./centipede/binary_info.h" #include "./centipede/control_flow.h" #include "./centipede/corpus.h" @@ -31,6 +31,7 @@ #include "./centipede/defs.h" #include "./centipede/feature.h" #include "./centipede/logging.h" +#include "./centipede/pc_info.h" #include "./centipede/remote_file.h" #include "./centipede/shard_reader.h" #include "./centipede/workdir.h" @@ -45,10 +46,9 @@ std::vector ReadCorpora(std::string_view binary_name, WorkDir workdir(std::string(workdir_path), std::string(binary_name), std::string(binary_hash), /*my_shard_index=*/0); std::vector corpus_paths; - RemoteGlobMatch(absl::StrCat(workdir.CorpusPathPrefix(), "*"), corpus_paths); + RemoteGlobMatch(workdir.CorpusPath().Glob(), corpus_paths); std::vector features_paths; - RemoteGlobMatch(absl::StrCat(workdir.FeaturesPathPrefix(), "*"), - features_paths); + RemoteGlobMatch(workdir.FeaturesPath().Glob(), features_paths); CHECK_EQ(corpus_paths.size(), features_paths.size()); std::vector corpus; diff --git a/centipede/centipede.cc b/centipede/centipede.cc index aa71120c8..81f935b9e 100644 --- a/centipede/centipede.cc +++ b/centipede/centipede.cc @@ -131,10 +131,10 @@ Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks, void Centipede::SaveCorpusToLocalDir( const Environment &env, std::string_view save_corpus_to_local_dir) { - const WorkDir wd{env}; + const auto corpus_path_info = WorkDir{env}.CorpusPath(); for (size_t shard = 0; shard < env.total_shards; shard++) { auto reader = DefaultBlobFileReaderFactory(); - auto corpus_path = wd.CorpusPath(shard); + auto corpus_path = corpus_path_info.Shard(shard); reader->Open(corpus_path).IgnoreError(); // may not exist. absl::Span blob; size_t num_read = 0; @@ -148,7 +148,6 @@ void Centipede::SaveCorpusToLocalDir( void Centipede::ExportCorpusFromLocalDir(const Environment &env, std::string_view local_dir) { - const WorkDir wd{env}; // Shard the file paths in `local_dir` based on hashes of filenames. // Such partition is stable: a given file always goes to a specific shard. std::vector> sharded_paths(env.total_shards); @@ -164,8 +163,9 @@ void Centipede::ExportCorpusFromLocalDir(const Environment &env, // Iterate over all shards. size_t inputs_added = 0; size_t inputs_ignored = 0; + const auto corpus_path_info = WorkDir{env}.CorpusPath(); for (size_t shard = 0; shard < env.total_shards; shard++) { - const std::string corpus_path = wd.CorpusPath(shard); + const std::string corpus_path = corpus_path_info.Shard(shard); size_t num_shard_bytes = 0; // Read the shard (if it exists), collect input hashes from it. absl::flat_hash_set existing_hashes; @@ -401,8 +401,8 @@ void Centipede::LoadShard(const Environment &load_env, size_t shard_index, // See serialize_shard_loads on why we may want to serialize shard loads. // TODO(kcc): remove serialize_shard_loads when LoadShards() uses less RAM. const WorkDir wd{load_env}; - const std::string corpus_path = wd.CorpusPath(shard_index); - const std::string features_path = wd.FeaturesPath(shard_index); + const std::string corpus_path = wd.CorpusPath().Shard(shard_index); + const std::string features_path = wd.FeaturesPath().Shard(shard_index); if (env_.serialize_shard_loads) { ABSL_CONST_INIT static absl::Mutex load_shard_mu{absl::kConstInit}; absl::MutexLock lock(&load_shard_mu); @@ -436,7 +436,7 @@ void Centipede::LoadAllShardsInRandomOrder(const Environment &load_env, void Centipede::Rerun(std::vector &to_rerun) { if (to_rerun.empty()) return; - auto features_file_path = wd_.FeaturesPath(env_.my_shard_index); + auto features_file_path = wd_.FeaturesPath().Shard(env_.my_shard_index); auto features_file = DefaultBlobFileWriterFactory(); CHECK_OK(features_file->Open(features_file_path, "a")); @@ -585,7 +585,7 @@ void Centipede::MergeFromOtherCorpus(std::string_view merge_from_dir, CHECK_GE(new_corpus_size, initial_corpus_size); // Corpus can't shrink here. if (new_corpus_size > initial_corpus_size) { auto appender = DefaultBlobFileWriterFactory(); - CHECK_OK(appender->Open(wd_.CorpusPath(env_.my_shard_index), "a")); + CHECK_OK(appender->Open(wd_.CorpusPath().Shard(env_.my_shard_index), "a")); for (size_t idx = initial_corpus_size; idx < new_corpus_size; ++idx) { CHECK_OK(appender->Write(corpus_.Get(idx))); } @@ -604,7 +604,7 @@ void Centipede::ReloadAllShardsAndWriteDistilledCorpus() { // Save the distilled corpus to a file in workdir and possibly to a hashed // file in the first corpus dir passed in `--corpus_dir`. - const auto distill_to_path = wd_.DistilledCorpusPath(); + const auto distill_to_path = wd_.DistilledCorpusPath().MyShard(); LOG(INFO) << "Distilling: shard: " << env_.my_shard_index << " output: " << distill_to_path << " " << " distilled size: " << corpus_.NumActive(); @@ -666,8 +666,9 @@ void Centipede::FuzzingLoop() { auto corpus_file = DefaultBlobFileWriterFactory(); auto features_file = DefaultBlobFileWriterFactory(); - CHECK_OK(corpus_file->Open(wd_.CorpusPath(env_.my_shard_index), "a")); - CHECK_OK(features_file->Open(wd_.FeaturesPath(env_.my_shard_index), "a")); + CHECK_OK(corpus_file->Open(wd_.CorpusPath().Shard(env_.my_shard_index), "a")); + CHECK_OK( + features_file->Open(wd_.FeaturesPath().Shard(env_.my_shard_index), "a")); // Load seed corpus when there is no external corpus loaded. if (corpus_.NumTotal() == 0) LoadSeedInputs(); diff --git a/centipede/distill.cc b/centipede/distill.cc index 23a6bef01..8277bd951 100644 --- a/centipede/distill.cc +++ b/centipede/distill.cc @@ -42,8 +42,8 @@ void DistillTask(const Environment &env, const std::vector &shard_indices) { const WorkDir wd{env}; std::string log_line = absl::StrCat("DISTILL[S.", env.my_shard_index, "]: "); - const auto corpus_path = wd.DistilledCorpusPath(); - const auto features_path = wd.DistilledFeaturesPath(); + const auto corpus_path = wd.DistilledCorpusPath().MyShard(); + const auto features_path = wd.DistilledFeaturesPath().MyShard(); LOG(INFO) << log_line << VV(env.total_shards) << VV(corpus_path) << VV(features_path); @@ -59,9 +59,11 @@ void DistillTask(const Environment &env, const size_t num_total_shards = shard_indices.size(); size_t num_shards_read = 0; size_t num_distilled_corpus_elements = 0; + const auto corpus_path_info = wd.CorpusPath(); + const auto features_path_info = wd.FeaturesPath(); for (size_t shard_idx : shard_indices) { - const std::string corpus_path = wd.CorpusPath(shard_idx); - const std::string features_path = wd.FeaturesPath(shard_idx); + const std::string corpus_path = corpus_path_info.Shard(shard_idx); + const std::string features_path = features_path_info.Shard(shard_idx); VLOG(2) << log_line << "reading shard " << shard_idx << " from:\n" << VV(corpus_path) << "\n" << VV(features_path); diff --git a/centipede/distill_test.cc b/centipede/distill_test.cc index c3e10614b..995b1eafe 100644 --- a/centipede/distill_test.cc +++ b/centipede/distill_test.cc @@ -24,7 +24,6 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/flags/declare.h" -#include "absl/flags/flag.h" #include "absl/flags/reflection.h" #include "absl/log/check.h" #include "./centipede/blob_file.h" @@ -69,8 +68,8 @@ using InputVec = std::vector; void WriteToShard(const Environment &env, const TestCorpusRecord &record, size_t shard_index) { const WorkDir wd{env}; - auto corpus_path = wd.CorpusPath(shard_index); - auto features_path = wd.FeaturesPath(shard_index); + const auto corpus_path = wd.CorpusPath().Shard(shard_index); + const auto features_path = wd.FeaturesPath().Shard(shard_index); const auto corpus_appender = DefaultBlobFileWriterFactory(); const auto features_appender = DefaultBlobFileWriterFactory(); CHECK_OK(corpus_appender->Open(corpus_path, "a")); @@ -83,15 +82,15 @@ void WriteToShard(const Environment &env, const TestCorpusRecord &record, // Reads and returns the distilled corpus record from // `wd.DistilledCorpusPath()` and `wd.DistilledFeaturesPath()`. std::vector ReadFromDistilled(const WorkDir &wd) { - auto distilled_corpus_path = wd.DistilledCorpusPath(); - auto distilled_features_path = wd.DistilledFeaturesPath(); + const auto distilled_corpus_path = wd.DistilledCorpusPath().MyShard(); + const auto distilled_features_path = wd.DistilledFeaturesPath().MyShard(); std::vector result; auto shard_reader_callback = [&result](const ByteArray &input, FeatureVec &features) { result.push_back({input, features}); }; - ReadShard(wd.DistilledCorpusPath(), wd.DistilledFeaturesPath(), + ReadShard(distilled_corpus_path, distilled_features_path, shard_reader_callback); return result; } diff --git a/centipede/workdir.cc b/centipede/workdir.cc index dc996776a..4584a8b68 100644 --- a/centipede/workdir.cc +++ b/centipede/workdir.cc @@ -45,6 +45,29 @@ std::string NormalizeAnnotation(std::string_view annotation) { } // namespace +//------------------------------------------------------------------------------ +// WorkDir::PathInfo + +WorkDir::PathInfo::PathInfo(std::string base_dir, std::string rel_prefix, + size_t my_shard_index) + : prefix_{std::filesystem::path(base_dir) / rel_prefix}, + my_shard_index_{my_shard_index} {} + +std::string WorkDir::PathInfo::Shard(size_t shard_index) const { + return absl::StrFormat("%s%0*d", prefix_, kDigitsInShardIndex, shard_index); +} + +std::string WorkDir::PathInfo::MyShard() const { + return Shard(my_shard_index_); +} + +std::string WorkDir::PathInfo::Glob() const { + return absl::StrCat(prefix_, "*"); +} + +//------------------------------------------------------------------------------ +// WorkDir + WorkDir::WorkDir( // std::string workdir, // std::string binary_name, // @@ -78,36 +101,23 @@ std::string WorkDir::BinaryInfoDirPath() const { return std::filesystem::path(CoverageDirPath()) / "binary-info"; } -std::string WorkDir::CorpusPathPrefix() const { - return std::filesystem::path(workdir_) / "corpus."; -} - -std::string WorkDir::CorpusPath(size_t shard_index) const { - return absl::StrCat( - CorpusPathPrefix(), - absl::StrFormat("%0*d", kDigitsInShardIndex, shard_index)); +WorkDir::PathInfo WorkDir::CorpusPath() const { + return {workdir_, "corpus.", my_shard_index_}; } -std::string WorkDir::FeaturesPathPrefix() const { - return std::filesystem::path(CoverageDirPath()) / "features."; +WorkDir::PathInfo WorkDir::DistilledCorpusPath() const { + return {workdir_, absl::StrCat("distilled-", binary_name_, "."), + my_shard_index_}; } -std::string WorkDir::FeaturesPath(size_t shard_index) const { - return absl::StrCat( - FeaturesPathPrefix(), - absl::StrFormat("%0*d", kDigitsInShardIndex, shard_index)); -} - -std::string WorkDir::DistilledCorpusPath() const { - return std::filesystem::path(workdir_) / - absl::StrFormat("distilled-%s.%0*d", binary_name_, kDigitsInShardIndex, - my_shard_index_); +WorkDir::PathInfo WorkDir::FeaturesPath() const { + return {CoverageDirPath(), "features.", my_shard_index_}; } -std::string WorkDir::DistilledFeaturesPath() const { - return std::filesystem::path(CoverageDirPath()) - .append(absl::StrFormat("distilled-features-%s.%0*d", binary_name_, - kDigitsInShardIndex, my_shard_index_)); +WorkDir::PathInfo WorkDir::DistilledFeaturesPath() const { + return {CoverageDirPath(), + absl::StrCat("distilled-features-", binary_name_, "."), + my_shard_index_}; } std::string WorkDir::CoverageReportPath(std::string_view annotation) const { diff --git a/centipede/workdir.h b/centipede/workdir.h index abdb36d51..c9f2517a0 100644 --- a/centipede/workdir.h +++ b/centipede/workdir.h @@ -31,6 +31,24 @@ class WorkDir { // pad indices with 0's in output file names so the names are sorted by index. static constexpr int kDigitsInShardIndex = 6; + class PathInfo { + public: + // Returns the path of the file for `shard_index`. + std::string Shard(size_t shard_index) const; + // Returns the path of the file for `my_shard_index_`. + std::string MyShard() const; + // Returns a glob matching all the corpus shards. + std::string Glob() const; + + private: + friend class WorkDir; + PathInfo(std::string base_dir, std::string rel_prefix, + size_t my_shard_index); + + const std::string prefix_; + const size_t my_shard_index_; + }; + // Constructs an object from directly provided field values. WorkDir( // std::string workdir, // @@ -57,21 +75,14 @@ class WorkDir { // Returns the path where the BinaryInfo will be serialized within workdir. std::string BinaryInfoDirPath() const; - // Returns the path for a corpus file by its shard_index. - std::string CorpusPath(size_t shard_index) const; - std::string CorpusPath() const { return CorpusPath(my_shard_index_); } - // Returns the prefix of all corpus shards - std::string CorpusPathPrefix() const; - // Returns the path for the distilled corpus file for my_shard_index. - std::string DistilledCorpusPath() const; - - // Returns the path for a features file by its shard_index. - std::string FeaturesPath(size_t shard_index) const; - std::string FeaturesPath() const { return FeaturesPath(my_shard_index_); } - // Returns the prefix of all feature shards - std::string FeaturesPathPrefix() const; - // Returns the path for the distilled features file for my_shard_index. - std::string DistilledFeaturesPath() const; + // Returns the path info for the corpus files. + PathInfo CorpusPath() const; + // Returns the path info for the distilled corpus files. + PathInfo DistilledCorpusPath() const; + // Returns the path info for the features files. + PathInfo FeaturesPath() const; + // Returns the path info for the distilled features files. + PathInfo DistilledFeaturesPath() const; // Returns the path for the coverage report file for my_shard_index. // Non-default `annotation` becomes a part of the returned filename. @@ -102,6 +113,11 @@ class WorkDir { std::string RUsageReportPath(std::string_view annotation = "") const; private: + std::string CorpusPrefix() const; + std::string DistilledCorpusPrefix() const; + std::string FeaturesPrefix() const; + std::string DistilledFeaturesPrefix() const; + // Internal value holders for when the object is constructed from direct // values rather than an `Environment` object. std::string workdir_holder_; diff --git a/centipede/workdir_test.cc b/centipede/workdir_test.cc index 6486d83f3..ded7efd4b 100644 --- a/centipede/workdir_test.cc +++ b/centipede/workdir_test.cc @@ -43,14 +43,22 @@ TEST(WorkDirTest, Main) { EXPECT_EQ(wd.CrashReproducerDirPath(), "/dir/crashes"); EXPECT_EQ(wd.BinaryInfoDirPath(), "/dir/bin-hash/binary-info"); - EXPECT_EQ(wd.CorpusPath(), "/dir/corpus.000003"); - EXPECT_EQ(wd.CorpusPath(7), "/dir/corpus.000007"); - EXPECT_EQ(wd.DistilledCorpusPath(), "/dir/distilled-bin.000003"); + EXPECT_EQ(wd.CorpusPath().MyShard(), "/dir/corpus.000003"); + EXPECT_EQ(wd.CorpusPath().Shard(7), "/dir/corpus.000007"); + EXPECT_EQ(wd.CorpusPath().Glob(), "/dir/corpus.*"); + EXPECT_EQ(wd.DistilledCorpusPath().MyShard(), "/dir/distilled-bin.000003"); + EXPECT_EQ(wd.DistilledCorpusPath().Shard(7), "/dir/distilled-bin.000007"); + EXPECT_EQ(wd.DistilledCorpusPath().Glob(), "/dir/distilled-bin.*"); - EXPECT_EQ(wd.FeaturesPath(), "/dir/bin-hash/features.000003"); - EXPECT_EQ(wd.FeaturesPath(7), "/dir/bin-hash/features.000007"); - EXPECT_EQ(wd.DistilledFeaturesPath(), + EXPECT_EQ(wd.FeaturesPath().MyShard(), "/dir/bin-hash/features.000003"); + EXPECT_EQ(wd.FeaturesPath().Shard(7), "/dir/bin-hash/features.000007"); + EXPECT_EQ(wd.FeaturesPath().Glob(), "/dir/bin-hash/features.*"); + EXPECT_EQ(wd.DistilledFeaturesPath().MyShard(), "/dir/bin-hash/distilled-features-bin.000003"); + EXPECT_EQ(wd.DistilledFeaturesPath().Shard(7), + "/dir/bin-hash/distilled-features-bin.000007"); + EXPECT_EQ(wd.DistilledFeaturesPath().Glob(), + "/dir/bin-hash/distilled-features-bin.*"); EXPECT_EQ(wd.CoverageReportPath(), // "/dir/coverage-report-bin.000003.txt");