From 45a1854ca9a22ec4d94d27c2f9f6bd851e7b4148 Mon Sep 17 00:00:00 2001 From: Aleksey Skvortsov <46933547+Rattysed@users.noreply.github.com> Date: Tue, 3 Sep 2024 00:43:45 +0300 Subject: [PATCH] [Disk Manager] better YDB client errors metric (#1853) --- cloud/tasks/persistence/ydb.go | 62 ++++++++++++++++++++++---- cloud/tasks/persistence/ydb_metrics.go | 26 ++++++++++- cloud/tasks/persistence/ydb_test.go | 56 +++++++++++++++++++---- 3 files changed, 127 insertions(+), 17 deletions(-) diff --git a/cloud/tasks/persistence/ydb.go b/cloud/tasks/persistence/ydb.go index f53ef1e3e8a..90f4dd07a2e 100644 --- a/cloud/tasks/persistence/ydb.go +++ b/cloud/tasks/persistence/ydb.go @@ -484,6 +484,48 @@ func (s *Session) execute( return tx, Result{res: ydbRes}, nil } +func (s *Session) CreateOrAlterTable( + ctx context.Context, + fullPath string, + description CreateTableDescription, + dropUnusedColumns bool, +) (err error) { + + ctx, cancel := context.WithTimeout(ctx, s.callTimeout) + defer cancel() + + defer s.metrics.StatCall( + ctx, + "session/CreateOrAlterTable", + fmt.Sprintf("At path: %q", fullPath), + )(&err) + + return createOrAlterTable( + ctx, + s.session, + fullPath, + description, + dropUnusedColumns, + ) +} + +func (s *Session) DropTable( + ctx context.Context, + fullPath string, +) (err error) { + + ctx, cancel := context.WithTimeout(ctx, s.callTimeout) + defer cancel() + + defer s.metrics.StatCall( + ctx, + "session/DropTable", + fmt.Sprintf("At path: %q", fullPath), + )(&err) + + return dropTable(ctx, s.session, fullPath) +} + //////////////////////////////////////////////////////////////////////////////// type YDBClient struct { @@ -515,15 +557,13 @@ func (c *YDBClient) CreateOrAlterTable( return c.Execute( ctx, - func(ctx context.Context, s *Session) error { - err := c.makeDirs(ctx, folderFullPath) + func(ctx context.Context, s *Session) (err error) { + err = c.makeDirs(ctx, folderFullPath) if err != nil { return err } - return createOrAlterTable( - ctx, - s.session, + return s.CreateOrAlterTable(ctx, fullPath, description, dropUnusedColumns, @@ -544,7 +584,7 @@ func (c *YDBClient) DropTable( return c.Execute( ctx, func(ctx context.Context, s *Session) error { - return dropTable(ctx, s.session, fullPath) + return s.DropTable(ctx, fullPath) }, ) } @@ -607,7 +647,13 @@ func (c *YDBClient) ExecuteRW( //////////////////////////////////////////////////////////////////////////////// -func (c *YDBClient) makeDirs(ctx context.Context, absolutePath string) error { +func (c *YDBClient) makeDirs(ctx context.Context, absolutePath string) (err error) { + defer c.metrics.StatCall( + ctx, + "client/makeDirs", + fmt.Sprintf("At path: %q", absolutePath), + )(&err) + if !strings.HasPrefix(absolutePath, c.database) { return errors.NewNonRetriableErrorf( "'%v' is expected to be rooted at '%v'", @@ -626,7 +672,7 @@ func (c *YDBClient) makeDirs(ctx context.Context, absolutePath string) error { } dirPath = path.Join(dirPath, part) - err := c.db.Scheme().MakeDirectory(ctx, dirPath) + err = c.db.Scheme().MakeDirectory(ctx, dirPath) if err != nil { return errors.NewNonRetriableErrorf( "cannot make directory %v: %w", diff --git a/cloud/tasks/persistence/ydb_metrics.go b/cloud/tasks/persistence/ydb_metrics.go index e606f1bb00a..548ee2072c2 100644 --- a/cloud/tasks/persistence/ydb_metrics.go +++ b/cloud/tasks/persistence/ydb_metrics.go @@ -35,7 +35,6 @@ func (m *ydbMetrics) StatCall( // Should initialize all counters before using them, to avoid 'no data'. hangingCounter := subRegistry.Counter("hanging") - errorCounter := subRegistry.Counter("errors") timeoutCounter := subRegistry.Counter("timeout") successCounter := subRegistry.Counter("success") @@ -45,6 +44,31 @@ func (m *ydbMetrics) StatCall( } if *err != nil { + var errorType string + + switch { + case ydb.IsOperationErrorTransactionLocksInvalidated(*err): + errorType = "TLI" + case ydb.IsOperationErrorSchemeError(*err): + errorType = "scheme" + case ydb.IsTransportError(*err): + errorType = "transport" + case ydb.IsOperationErrorOverloaded(*err): + errorType = "overloaded" + case ydb.IsOperationErrorUnavailable(*err): + errorType = "unavailable" + case ydb.IsRatelimiterAcquireError(*err): + errorType = "ratelimiterAcquire" + default: + errorType = "unknown" + } + + errorRegistry := subRegistry.WithTags(map[string]string{ + "type": errorType, + }) + + // Should initialize all counters before using them, to avoid 'no data'. + errorCounter := errorRegistry.Counter("errors") errorCounter.Inc() if errors.Is(*err, context.DeadlineExceeded) { diff --git a/cloud/tasks/persistence/ydb_test.go b/cloud/tasks/persistence/ydb_test.go index c1f684ee47c..1f7fc35ec28 100644 --- a/cloud/tasks/persistence/ydb_test.go +++ b/cloud/tasks/persistence/ydb_test.go @@ -4,13 +4,17 @@ import ( "context" "fmt" "os" + "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/ydb-platform/nbs/cloud/tasks/logging" + "github.com/ydb-platform/nbs/cloud/tasks/metrics" "github.com/ydb-platform/nbs/cloud/tasks/metrics/empty" + "github.com/ydb-platform/nbs/cloud/tasks/metrics/mocks" persistence_config "github.com/ydb-platform/nbs/cloud/tasks/persistence/config" + "github.com/ydb-platform/ydb-go-sdk/v3" ) //////////////////////////////////////////////////////////////////////////////// @@ -22,7 +26,11 @@ func newContext() context.Context { ) } -func newYDB(ctx context.Context) (*YDBClient, error) { +func newYDB( + ctx context.Context, + metricsRegistry metrics.Registry, +) (*YDBClient, error) { + endpoint := os.Getenv("YDB_ENDPOINT") database := os.Getenv("YDB_DATABASE") rootPath := "disk_manager" @@ -34,7 +42,7 @@ func newYDB(ctx context.Context) (*YDBClient, error) { Database: &database, RootPath: &rootPath, }, - empty.NewRegistry(), + metricsRegistry, ) } @@ -231,7 +239,7 @@ func TestYDBCreateTableV1(t *testing.T) { ctx, cancel := context.WithCancel(newContext()) defer cancel() - db, err := newYDB(ctx) + db, err := newYDB(ctx, empty.NewRegistry()) require.NoError(t, err) defer db.Close(ctx) @@ -265,7 +273,7 @@ func TestYDBCreateTableV1Twice(t *testing.T) { ctx, cancel := context.WithCancel(newContext()) defer cancel() - db, err := newYDB(ctx) + db, err := newYDB(ctx, empty.NewRegistry()) require.NoError(t, err) defer db.Close(ctx) @@ -316,7 +324,7 @@ func TestYDBMigrateTableV1ToTableV2(t *testing.T) { ctx, cancel := context.WithCancel(newContext()) defer cancel() - db, err := newYDB(ctx) + db, err := newYDB(ctx, empty.NewRegistry()) require.NoError(t, err) defer db.Close(ctx) @@ -374,7 +382,7 @@ func TestYDBUseV1InV2(t *testing.T) { ctx, cancel := context.WithCancel(newContext()) defer cancel() - db, err := newYDB(ctx) + db, err := newYDB(ctx, empty.NewRegistry()) require.NoError(t, err) defer db.Close(ctx) @@ -432,7 +440,7 @@ func TestYDBFailMigrationChangingType(t *testing.T) { ctx, cancel := context.WithCancel(newContext()) defer cancel() - db, err := newYDB(ctx) + db, err := newYDB(ctx, empty.NewRegistry()) require.NoError(t, err) defer db.Close(ctx) @@ -466,7 +474,7 @@ func TestYDBFailMigrationChangingPrimaryKey(t *testing.T) { ctx, cancel := context.WithCancel(newContext()) defer cancel() - db, err := newYDB(ctx) + db, err := newYDB(ctx, empty.NewRegistry()) require.NoError(t, err) defer db.Close(ctx) @@ -495,3 +503,35 @@ func TestYDBFailMigrationChangingPrimaryKey(t *testing.T) { ) assert.Error(t, err) } + +func TestYDBShouldSendSchemeErrorMetric(t *testing.T) { + ctx, cancel := context.WithCancel(newContext()) + defer cancel() + + metricsRegistry := mocks.NewRegistryMock() + + db, err := newYDB(ctx, metricsRegistry) + require.NoError(t, err) + defer db.Close(ctx) + + metricsRegistry.GetCounter( + "errors", + map[string]string{"call": "client/makeDirs", "type": "scheme"}, + ).On("Inc").Once() + + // YDB has limited length of object name. Current limit is 255. + extraLargeString := strings.Repeat("x", 100500) + folder := fmt.Sprintf("ydb_test/%s/%s", extraLargeString, t.Name()) + table := "table" + + err = db.CreateOrAlterTable( + ctx, + folder, + table, + tableV1TableDescription(), + false, // dropUnusedColumns + ) + require.True(t, ydb.IsOperationErrorSchemeError(err)) + + metricsRegistry.AssertAllExpectations(t) +}