Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Filestore] Implement a tool which replays filestore-vhost's profile log - Tool for removing sensitive data (#2284) #2424

Merged
merged 3 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ target_sources(analytics-profile_tool-lib PRIVATE
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/dump_events.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/factory.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/find_bytes_access.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/mask.cpp
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ target_sources(analytics-profile_tool-lib PRIVATE
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/dump_events.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/factory.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/find_bytes_access.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/mask.cpp
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ target_sources(analytics-profile_tool-lib PRIVATE
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/dump_events.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/factory.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/find_bytes_access.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/mask.cpp
)
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ target_sources(analytics-profile_tool-lib PRIVATE
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/dump_events.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/factory.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/find_bytes_access.cpp
${CMAKE_SOURCE_DIR}/cloud/filestore/tools/analytics/profile_tool/lib/mask.cpp
)
6 changes: 4 additions & 2 deletions cloud/filestore/tools/analytics/profile_tool/lib/factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ namespace NCloud::NFileStore::NProfileTool {

TCommandPtr NewDumpEventsCommand();
TCommandPtr NewFindBytesAccessCommand();
TCommandPtr NewMaskSensitiveData();

////////////////////////////////////////////////////////////////////////////////

using TFactoryFunc = std::function<TCommandPtr()>;
using TFactoryMap = TMap<TString, TFactoryFunc>;

static const TFactoryMap Commands = {
{ "dumpevents", NewDumpEventsCommand },
{ "findbytesaccess", NewFindBytesAccessCommand },
{"dumpevents", NewDumpEventsCommand},
{"findbytesaccess", NewFindBytesAccessCommand},
{"masksensitivedata", NewMaskSensitiveData},
};

////////////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ Y_UNIT_TEST_SUITE(TFactory)
{
const auto names = GetCommandNames();

UNIT_ASSERT_VALUES_EQUAL(2, names.size());
UNIT_ASSERT_VALUES_EQUAL(3, names.size());

UNIT_ASSERT_VALUES_EQUAL("dumpevents", names[0]);
UNIT_ASSERT_VALUES_EQUAL("findbytesaccess", names[1]);
UNIT_ASSERT_VALUES_EQUAL("masksensitivedata", names[2]);
}
}

Expand Down
158 changes: 158 additions & 0 deletions cloud/filestore/tools/analytics/profile_tool/lib/mask.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#include "mask.h"

#include "public.h"

#include "command.h"

#include <cloud/filestore/libs/storage/tablet/profile_log_events.h>
#include <cloud/filestore/tools/analytics/libs/event-log/dump.h>
#include <cloud/filestore/tools/analytics/libs/event-log/request_filter.h>
#include <cloud/filestore/tools/analytics/profile_tool/lib/common_filter_params.h>

#include <library/cpp/digest/md5/md5.h>
#include <library/cpp/getopt/last_getopt.h>

#include <util/generic/guid.h>

namespace NCloud::NFileStore::NProfileTool {

constexpr TStringBuf OutProfileLogLabel = "out-profile-log";

namespace {

////////////////////////////////////////////////////////////////////////////////

class TMaskCommand final: public TCommand
{
private:
TString PathToOutProfileLog;
TMaskSensitiveData::EMode Mode{};

public:
TMaskCommand()
{
Opts.AddLongOption(
OutProfileLogLabel.Data(),
"Path to output profile log")
.Required()
.RequiredArgument("STR")
.StoreResult(&PathToOutProfileLog);

Opts.AddLongOption("mode", "Transform mode")
.RequiredArgument("STR")
.Choices({"empty", "hash", "nodeid"})
.DefaultValue("nodeid");
}

bool Init(NLastGetopt::TOptsParseResultException& parseResult) override
{
TString modeOpt = parseResult.Get("mode");
if (modeOpt == "nodeid") {
Mode = TMaskSensitiveData::EMode::NodeId;
return true;
}

if (modeOpt == "hash") {
Mode = TMaskSensitiveData::EMode::Hash;
return true;
}

if (modeOpt == "empty") {
Mode = TMaskSensitiveData::EMode::Empty;
return true;
}

return false;
}

int Execute() override
{
TMaskSensitiveData mask{Mode};
mask.MaskSensitiveData(PathToProfileLog, PathToOutProfileLog);
return 0;
}
};

} // namespace

TMaskSensitiveData::TMaskSensitiveData(const EMode mode)
: Mode{mode}
{}

bool TMaskSensitiveData::Advance()
{
while (EventPtr = CurrentEvent->Next()) {
MessagePtr = dynamic_cast<const NProto::TProfileLogRecord*>(
EventPtr->GetProto());

if (!MessagePtr) {
continue;
}

EventMessageNumber = MessagePtr->GetRequests().size();
return true;
}
return false;
}

TString TMaskSensitiveData::Transform(const TString& str, const ui64 nodeId)
{
switch (Mode) {
case EMode::Empty: {
return "";
}
case EMode::NodeId: {
return "nodeid-" + ToString(nodeId);
}
case EMode::Hash: {
return MD5::Data(Seed + str);
}
}
}

void TMaskSensitiveData::MaskSensitiveData(
const TString& in,
const TString& out)
{
Seed = CreateGuidAsString();

NEventLog::TOptions options;
options.FileName = in;

// Sort eventlog items by timestamp
options.SetForceStrongOrdering(true);
CurrentEvent = CreateIterator(options);

TEventLog eventLog(out, 0);
TSelfFlushLogFrame logFrame(eventLog);
while (Advance()) {
NProto::TProfileLogRecord recordOut;
recordOut.SetFileSystemId(MessagePtr->GetFileSystemId());

while (EventMessageNumber > 0) {
auto request = MessagePtr->GetRequests()[--EventMessageNumber];

if (request.GetNodeInfo().HasNodeName()) {
request.MutableNodeInfo()->SetNodeName(Transform(
request.GetNodeInfo().GetNodeName(),
request.GetNodeInfo().GetNodeId()));
}
if (request.GetNodeInfo().HasNewNodeName()) {
request.MutableNodeInfo()->SetNewNodeName(Transform(
request.GetNodeInfo().GetNewNodeName(),
request.GetNodeInfo().GetNodeId()));
}
*recordOut.AddRequests() = std::move(request);
}
logFrame.LogEvent(recordOut);
}
}

////////////////////////////////////////////////////////////////////////////////

TCommandPtr NewMaskSensitiveData()
{
return std::make_shared<TMaskCommand>();
}

} // namespace NCloud::NFileStore::NProfileTool
40 changes: 40 additions & 0 deletions cloud/filestore/tools/analytics/profile_tool/lib/mask.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#pragma once

#include <cloud/filestore/libs/diagnostics/events/profile_events.ev.pb.h>

#include <library/cpp/eventlog/eventlog.h>
#include <library/cpp/eventlog/iterator.h>

namespace NCloud::NFileStore::NProfileTool {

////////////////////////////////////////////////////////////////////////////////

class TMaskSensitiveData
{
THolder<NEventLog::IIterator> CurrentEvent;
TConstEventPtr EventPtr;
int EventMessageNumber = 0;
const NProto::TProfileLogRecord* MessagePtr{};

// Some random string but stable in one session
TString Seed;

public:
enum class EMode
{
NodeId,
Hash,
Empty,
};

private:
EMode Mode;

public:
explicit TMaskSensitiveData(const EMode mode);
bool Advance();
TString Transform(const TString& str, const ui64 nodeId);
void MaskSensitiveData(const TString& in, const TString& out);
};

} // namespace NCloud::NFileStore::NProfileTool
1 change: 1 addition & 0 deletions cloud/filestore/tools/analytics/profile_tool/lib/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ SRCS(
dump_events.cpp
factory.cpp
find_bytes_access.cpp
mask.cpp
)

PEERDIR(
Expand Down
21 changes: 10 additions & 11 deletions library/cpp/getopt/last_getopt_demo/demo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,16 @@ class TMain: public TMainClassArgs {
.Help("specify HTTP method")
.CompletionArgHelp("http method")
.StoreResult(&ExplicitMethod_)
.Completer(
NLastGetopt::NComp::Choice(
{{"GET", "request representation of the specified resource"},
{"HEAD", "request response identical to that of GET, but without response body"},
{"POST", "submit an entry to the specified resource"},
{"PUT", "replace representation of the specified resource with the request body"},
{"DELETE", "delete the specified resource"},
{"CONNECT", "establish a tunnel to the server identified by the target resource"},
{"OPTIONS", "describe the communication options for the target resource"},
{"TRACE", "perform a message loop-back test"},
{"PATCH", "apply partial modifications to the specified resource"}}));
.ChoicesWithCompletion({
{"GET", "request representation of the specified resource"},
{"HEAD", "request response identical to that of GET, but without response body"},
{"POST", "submit an entry to the specified resource"},
{"PUT", "replace representation of the specified resource with the request body"},
{"DELETE", "delete the specified resource"},
{"CONNECT", "establish a tunnel to the server identified by the target resource"},
{"OPTIONS", "describe the communication options for the target resource"},
{"TRACE", "perform a message loop-back test"},
{"PATCH", "apply partial modifications to the specified resource"}});

opts.AddLongOption('U', "user-agent")
.RequiredArgument("agent-string")
Expand Down
42 changes: 42 additions & 0 deletions library/cpp/getopt/small/last_getopt_opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
#include "last_getopt_handlers.h"

#include <util/string/split.h>
#include <util/generic/hash_set.h>
#include <util/generic/ptr.h>
#include <util/generic/string.h>
#include <util/generic/maybe.h>
#include <util/generic/vector.h>
#include <util/string/cast.h>
#include <util/string/join.h>

#include <optional>
#include <stdarg.h>
Expand Down Expand Up @@ -80,6 +82,7 @@ namespace NLastGetopt {
TdOptVal OptionalValue_;
TdOptVal DefaultValue_;
TOptHandlers Handlers_;
THashSet<TString> Choices_;

public:
/**
Expand Down Expand Up @@ -398,6 +401,10 @@ namespace NLastGetopt {
return Help_;
}

TString GetChoicesHelp() const {
return JoinSeq(", ", Choices_);
}

/**
* Set help string that appears when argument completer lists available options.
*
Expand Down Expand Up @@ -728,6 +735,41 @@ namespace NLastGetopt {
TOpt& KVHandler(TpFunc func, const char kvdelim = '=') {
return Handler(new NLastGetopt::TOptKVHandler<TpFunc>(func, kvdelim));
}

template <typename TIterator>
TOpt& Choices(TIterator begin, TIterator end) {
return Choices(THashSet<typename TIterator::value_type>{begin, end});
}

template <typename TValue>
TOpt& Choices(THashSet<TValue> choices) {
Choices_ = std::move(choices);
return Handler1T<TValue>(
[this] (const TValue& arg) {
if (!Choices_.contains(arg)) {
throw TUsageException() << " value '" << arg
<< "' is not allowed for option '" << GetName() << "'";
}
});
}

TOpt& Choices(TVector<TString> choices) {
return Choices(
THashSet<TString>{
std::make_move_iterator(choices.begin()),
std::make_move_iterator(choices.end())
});
}

TOpt& ChoicesWithCompletion(TVector<NComp::TChoice> choices) {
Completer(NComp::Choice(choices));
THashSet<TString> choicesSet;
choicesSet.reserve(choices.size());
for (const auto& choice : choices) {
choicesSet.insert(choice.Choice);
}
return Choices(std::move(choicesSet));
}
};

/**
Expand Down
8 changes: 8 additions & 0 deletions library/cpp/getopt/small/last_getopt_opts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,14 @@ namespace NLastGetopt {
os << Wrap(Wrap_, help, SPad + leftPadding + " ", &lastLineLength, &helpHasParagraphs);
}

auto choicesHelp = opt->GetChoicesHelp();
if (!choicesHelp.empty()) {
if (help) {
os << Endl << SPad << leftPadding << " ";
}
os << "(values: " << choicesHelp << ")";
}

if (opt->HasDefaultValue()) {
auto quotedDef = QuoteForHelp(opt->GetDefaultValue());
if (helpHasParagraphs) {
Expand Down
Loading