Skip to content

Commit

Permalink
Revert "Remove fs-error label"
Browse files Browse the repository at this point in the history
This reverts commit 848d75a.
  • Loading branch information
kislaykishore committed Aug 22, 2024
1 parent 848d75a commit a3eb99c
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 8 deletions.
23 changes: 16 additions & 7 deletions internal/fs/wrappers/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func init() {
Measure: opsErrorCount,
Description: "The cumulative number of errors generated by file system operations",
Aggregation: view.Sum(),
TagKeys: []tag.Key{tags.FSOp, tags.FSErrCategory},
TagKeys: []tag.Key{tags.FSOp, tags.FSError, tags.FSErrCategory},
},
&view.View{
Name: "fs/ops_latency",
Expand All @@ -75,17 +75,24 @@ func init() {
}
}

// categorize maps an error to an error-category.
// This helps reduce the cardinality of the labels to less than 30.
// This lower number of errors allows the various errors to get piped to Cloud metrics without getting dropped.
func categorize(err error) string {
// errStrAndCategory maps an error to an error string and an error category.
// Uncommon errors are bucketed into categories to reduce the cardinality of the
// error so that the metric is not rejected by Cloud Monarch.
func errStrAndCategory(err error) (str string, category string) {
if err == nil {
return ""
return "", ""
}
var errno syscall.Errno
if !errors.As(err, &errno) {
errno = DefaultFSError
}
return errno.Error(), errCategory(errno)
}

// errCategory maps an error to an error-category.
// This helps reduce the cardinality of the labels to less than 30.
// This lower number of errors allows the various errors to get piped to Cloud metrics without getting dropped.
func errCategory(errno syscall.Errno) string {
switch errno {
case syscall.ELNRNG,
syscall.ENODEV,
Expand Down Expand Up @@ -254,6 +261,7 @@ func categorize(err error) string {

// Records file system operation count, failed operation count and the operation latency.
func recordOp(ctx context.Context, method string, start time.Time, fsErr error) {

// Recording opCount.
if err := stats.RecordWithTags(
ctx,
Expand All @@ -268,11 +276,12 @@ func recordOp(ctx context.Context, method string, start time.Time, fsErr error)

// Recording opErrorCount.
if fsErr != nil {
errCategory := categorize(fsErr)
errStr, errCategory := errStrAndCategory(fsErr)
if err := stats.RecordWithTags(
ctx,
[]tag.Mutator{
tag.Upsert(tags.FSOp, method),
tag.Upsert(tags.FSError, errStr),
tag.Upsert(tags.FSErrCategory, errCategory),
},
opsErrorCount.M(1),
Expand Down
21 changes: 20 additions & 1 deletion internal/fs/wrappers/monitoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,66 +33,82 @@ func TestFsErrStrAndCategory(t *testing.T) {
t.Parallel()
tests := []struct {
fsErr error
expectedStr string
expectedCategory string
}{
{
fsErr: fmt.Errorf("some random error"),
expectedStr: "input/output error",
expectedCategory: "input/output error",
},
{
fsErr: syscall.ENOTEMPTY,
expectedStr: "directory not empty",
expectedCategory: "directory not empty",
},
{
fsErr: syscall.EEXIST,
expectedStr: "file exists",
expectedCategory: "file exists",
},
{
fsErr: syscall.EINVAL,
expectedStr: "invalid argument",
expectedCategory: "invalid argument",
},
{
fsErr: syscall.EINTR,
expectedStr: "interrupted system call",
expectedCategory: "interrupt errors",
},
{
fsErr: syscall.ENOSYS,
expectedStr: "function not implemented",
expectedCategory: "function not implemented",
},
{
fsErr: syscall.ENOSPC,
expectedStr: "no space left on device",
expectedCategory: "process/resource management errors",
},
{
fsErr: syscall.E2BIG,
expectedStr: "argument list too long",
expectedCategory: "invalid operation",
},
{
fsErr: syscall.EHOSTDOWN,
expectedStr: "host is down",
expectedCategory: "network errors",
},
{
fsErr: syscall.ENODATA,
expectedStr: "no data available",
expectedCategory: "miscellaneous errors",
},
{
fsErr: syscall.ENODEV,
expectedStr: "no such device",
expectedCategory: "device errors",
},
{
fsErr: syscall.EISDIR,
expectedStr: "is a directory",
expectedCategory: "file/directory errors",
},
{
fsErr: syscall.ENOSYS,
expectedStr: "function not implemented",
expectedCategory: "function not implemented",
},
{
fsErr: syscall.ENFILE,
expectedStr: "too many open files in system",
expectedCategory: "too many open files",
},
{
fsErr: syscall.EPERM,
expectedStr: "operation not permitted",
expectedCategory: "permission errors",
},
}
Expand All @@ -101,7 +117,10 @@ func TestFsErrStrAndCategory(t *testing.T) {
t.Run(fmt.Sprintf("fsErrStrAndCategor_case_%d", idx), func(t *testing.T) {
t.Parallel()

assert.Equal(t, tc.expectedCategory, categorize(tc.fsErr))
actualErrStr, actualErrGrp := errStrAndCategory(tc.fsErr)

assert.Equal(t, tc.expectedStr, actualErrStr)
assert.Equal(t, tc.expectedCategory, actualErrGrp)
})
}
}
Expand Down
3 changes: 3 additions & 0 deletions internal/monitor/tags/tags.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ var (
// FSOp annotates the file system op processed.
FSOp = tag.MustNewKey("fs_op")

// FSError annotates the file system failed operations with the error type
FSError = tag.MustNewKey("fs_error")

// FSErrCategory reduces the cardinality of FSError by grouping errors together.
FSErrCategory = tag.MustNewKey("fs_error_category")

Expand Down

0 comments on commit a3eb99c

Please sign in to comment.