diff --git a/internal/cli/cmd/auth/login.go b/internal/cli/cmd/auth/login.go index 984500fc7..6aed72415 100644 --- a/internal/cli/cmd/auth/login.go +++ b/internal/cli/cmd/auth/login.go @@ -82,10 +82,8 @@ type tenant struct { } func completeLogin(ctx context.Context, id, kind string) (tenant, error) { - ephemeralCliId := fnapi.TelemetryOn(ctx).GetClientID() - if kind == "tenant" { - res, err := fnapi.CompleteTenantLogin(ctx, id, ephemeralCliId) + res, err := fnapi.CompleteTenantLogin(ctx, id) if err != nil { return tenant{}, err } @@ -97,7 +95,7 @@ func completeLogin(ctx context.Context, id, kind string) (tenant, error) { } // TODO remove old login path - userAuth, err := getUserAuth(ctx, id, kind, ephemeralCliId) + userAuth, err := getUserAuth(ctx, id, kind) if err != nil { return tenant{}, err } @@ -119,13 +117,13 @@ func completeLogin(ctx context.Context, id, kind string) (tenant, error) { return tenant{token: tt.TenantToken}, nil } -func getUserAuth(ctx context.Context, id, kind, ephemeralCliId string) (*auth.UserAuth, error) { +func getUserAuth(ctx context.Context, id, kind string) (*auth.UserAuth, error) { switch kind { case "tenant": return nil, fnerrors.InternalError("tenant login cannot produce user auth") case "clerk": - t, err := fnapi.CompleteClerkLogin(ctx, id, ephemeralCliId) + t, err := fnapi.CompleteClerkLogin(ctx, id) if err != nil { return nil, err } @@ -141,6 +139,6 @@ func getUserAuth(ctx context.Context, id, kind, ephemeralCliId string) (*auth.Us }, nil default: - return fnapi.CompleteLogin(ctx, id, ephemeralCliId) + return fnapi.CompleteLogin(ctx, id) } } diff --git a/internal/cli/fncobra/main.go b/internal/cli/fncobra/main.go index 0acf7903a..94cc1b8d1 100644 --- a/internal/cli/fncobra/main.go +++ b/internal/cli/fncobra/main.go @@ -30,7 +30,6 @@ import ( "namespacelabs.dev/foundation/internal/console/colors" "namespacelabs.dev/foundation/internal/console/consolesink" "namespacelabs.dev/foundation/internal/console/termios" - "namespacelabs.dev/foundation/internal/environment" "namespacelabs.dev/foundation/internal/fnapi" "namespacelabs.dev/foundation/internal/fnerrors" "namespacelabs.dev/foundation/internal/fnerrors/format" @@ -118,19 +117,6 @@ func doMain(opts MainOpts) (colors.Style, error) { DeferCheckVersion(ctx, opts.Name) } - tel := fnapi.TelemetryOn(ctx) - - // XXX move id management out of telemetry, it's used for other purposes too. - if tel.IsFirstRun() && !environment.IsRunningInCI() { - // First NS run - print a welcome message. - welcome.PrintWelcome(ctx, true /* firstRun */, opts.Name) - } - - // Now that "useTelemetry" flag is parsed, we can conditionally enable telemetry. - if useTelemetry && tel != nil { - tel.Enable() - } - if viper.GetBool("enable_pprof") { go ListenPProf(console.Debug(cmd.Context())) } @@ -140,9 +126,6 @@ func doMain(opts MainOpts) (colors.Style, error) { // Setting up container registry logging, which is unfortunately global. crlogs.Warn = log.New(console.TypedOutput(cmd.Context(), "cr-warn", idtypes.CatOutputTool), "", log.LstdFlags|log.Lmicroseconds) - // Telemetry. - tel.RecordInvocation(ctx, cmd, args) - out := logrus.New() out.SetOutput(console.NamedDebug(ctx, "containerd")) // Because we can have concurrent builds producing the same output; the @@ -208,16 +191,6 @@ func doMain(opts MainOpts) (colors.Style, error) { cleanupTracer() } - if err != nil && !errors.Is(err, context.Canceled) { - if tel := fnapi.TelemetryOn(rootCtx); tel != nil { - // Record errors only after the user sees them to hide potential latency implications. - // We pass the original ctx without sink since logs have already been flushed. - tel.RecordError(rootCtx, err) - } - - return style, err - } - return style, err } @@ -406,5 +379,5 @@ func setupContext(ctx context.Context, inhibitReport bool, rendered consolesink. flushLogs = func() {} } - return fnapi.WithTelemetry(ctx), style, flushLogs + return ctx, style, flushLogs } diff --git a/internal/fnapi/fnapi.go b/internal/fnapi/fnapi.go index 374aae67b..65c14ff50 100644 --- a/internal/fnapi/fnapi.go +++ b/internal/fnapi/fnapi.go @@ -78,10 +78,6 @@ func DecodeJSONResponse(resp any) func(io.Reader) error { } func AddNamespaceHeaders(ctx context.Context, headers *http.Header) { - if tel := TelemetryOn(ctx); tel != nil && tel.IsTelemetryEnabled() { - headers.Add("NS-Client-ID", tel.GetClientID()) - } - headers.Add("NS-Internal-Version", fmt.Sprintf("%d", versions.Builtin().APIVersion)) if AdminMode { diff --git a/internal/fnapi/signin.go b/internal/fnapi/signin.go index 5556687c2..6f9b04ff8 100644 --- a/internal/fnapi/signin.go +++ b/internal/fnapi/signin.go @@ -24,8 +24,7 @@ type StartLoginResponse struct { } type CompleteLoginRequest struct { - LoginId string `json:"login_id"` - EphemeralCliId string `json:"ephemeral_cli_id"` + LoginId string `json:"login_id"` } // Returns the URL which the user should open. @@ -48,10 +47,9 @@ func StartLogin(ctx context.Context, kind, tenantId string) (*StartLoginResponse return &resp, nil } -func CompleteLogin(ctx context.Context, id, ephemeralCliId string) (*auth.UserAuth, error) { +func CompleteLogin(ctx context.Context, id string) (*auth.UserAuth, error) { req := CompleteLoginRequest{ - LoginId: id, - EphemeralCliId: ephemeralCliId, + LoginId: id, } method := "nsl.signin.SigninService/CompleteLogin" @@ -73,10 +71,9 @@ type CompleteClerkLoginResponse struct { Ticket string `json:"ticket,omitempty"` } -func CompleteClerkLogin(ctx context.Context, id, ephemeralCliId string) (*CompleteClerkLoginResponse, error) { +func CompleteClerkLogin(ctx context.Context, id string) (*CompleteClerkLoginResponse, error) { req := CompleteLoginRequest{ - LoginId: id, - EphemeralCliId: ephemeralCliId, + LoginId: id, } method := "nsl.signin.SigninService/CompleteClerkLogin" @@ -99,10 +96,9 @@ type CompleteTenantLoginResponse struct { TenantName string `json:"tenant_name,omitempty"` } -func CompleteTenantLogin(ctx context.Context, id, ephemeralCliId string) (*CompleteTenantLoginResponse, error) { +func CompleteTenantLogin(ctx context.Context, id string) (*CompleteTenantLoginResponse, error) { req := CompleteLoginRequest{ - LoginId: id, - EphemeralCliId: ephemeralCliId, + LoginId: id, } method := "nsl.signin.SigninService/CompleteTenantLogin" diff --git a/internal/fnapi/telemetry.go b/internal/fnapi/telemetry.go deleted file mode 100644 index a5d7227cf..000000000 --- a/internal/fnapi/telemetry.go +++ /dev/null @@ -1,336 +0,0 @@ -// Copyright 2022 Namespace Labs Inc; All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. - -package fnapi - -import ( - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" - "os" - "path/filepath" - "runtime" - "strings" - "time" - - "github.com/spf13/cobra" - "github.com/spf13/pflag" - "github.com/spf13/viper" - "go.uber.org/atomic" - spb "google.golang.org/genproto/googleapis/rpc/status" - "google.golang.org/grpc/status" - "namespacelabs.dev/foundation/internal/cli/version" - "namespacelabs.dev/foundation/internal/console" - "namespacelabs.dev/foundation/internal/environment" - "namespacelabs.dev/foundation/internal/fnerrors" - "namespacelabs.dev/foundation/internal/fnerrors/format" - "namespacelabs.dev/foundation/internal/workspace/dirs" - "namespacelabs.dev/go-ids" -) - -// TODO #93 compute this when we can move the service definition into the foundation repo. -const telemetryServiceName = "telemetry.TelemetryService" -const postTimeout = 1 * time.Second - -type Telemetry struct { - enabled bool - errorLogging bool // For testing and debugging. - - backendAddress string - recID *atomic.String // Never nil, set after an invocation is recorded. - id ephemeralCliID - created bool // True if this the first invocation with a new ID. -} - -func NewTelemetry(ctx context.Context) *Telemetry { - return InternalNewTelemetry(ctx, getOrGenerateEphemeralCliID) -} - -func InternalNewTelemetry(ctx context.Context, makeID func(context.Context) (ephemeralCliID, bool)) *Telemetry { - id, created := makeID(ctx) - - return &Telemetry{ - errorLogging: false, - backendAddress: EndpointAddress, - recID: atomic.NewString(""), - id: id, - created: created, - } -} - -type contextKey string - -var ( - _telemetryKey = contextKey("fn.telemetry") -) - -func TelemetryOn(ctx context.Context) *Telemetry { - v := ctx.Value(_telemetryKey) - if v == nil { - return nil - } - return v.(*Telemetry) -} - -func WithTelemetry(ctx context.Context) context.Context { - if isTelemetryDisabled() { - return ctx - } - - return context.WithValue(ctx, _telemetryKey, NewTelemetry(ctx)) -} - -// Telemetry needs to be excplicitly enabled by calling this function. -// IsTelemetryEnabled() may still be false if telemetry is disabled through DO_NOT_TRACK, etc. -func (tel *Telemetry) Enable() { - tel.enabled = true -} - -func isTelemetryDisabled() bool { - doNotTrack := os.Getenv("DO_NOT_TRACK") - enableTelemetry := viper.GetBool("telemetry") - return environment.IsRunningInCI() || doNotTrack != "" || !enableTelemetry -} - -func (tel *Telemetry) IsTelemetryEnabled() bool { - return !isTelemetryDisabled() && tel != nil && tel.enabled -} - -func (tel *Telemetry) logError(ctx context.Context, err error) { - if tel.errorLogging { - format.Format(console.Stderr(ctx), err) - } -} - -func combinedHash(ins ...string) string { - h := sha256.New() - for _, in := range ins { - h.Write([]byte(in)) - } - return hex.EncodeToString(h.Sum(nil)) -} - -// TODO #93 remove structs when we can move the service definition into the foundation repo. -type flag struct { - Name string `json:"name,omitempty"` - Hash string `json:"hash,omitempty"` - Plaintext string `json:"plaintext,omitempty"` -} - -type arg struct { - Hash string `json:"hash,omitempty"` - Plaintext string `json:"plaintext,omitempty"` -} - -type binaryVersion struct { - Version string `json:"version,omitempty"` - BuildTime string `json:"build_time,omitempty"` - Modified bool `json:"modified,omitempty"` -} - -type recordInvocationRequest struct { - ID string `json:"id,omitempty"` - Command string `json:"command,omitempty"` - Arg []arg `json:"arg"` - Flag []flag `json:"flag"` - UserId string `json:"user_id,omitempty"` - Os string `json:"os,omitempty"` - Arch string `json:"arch,omitempty"` - NumCpu int `json:"num_cpu"` - Version *binaryVersion `json:"version"` -} - -type recordErrorRequest struct { - ID string `json:"id,omitempty"` - Message string `json:"message,omitempty"` - Status *spb.Status `json:"status,omitempty"` -} - -type ephemeralCliID struct { - ID string `json:"id"` - Salt string `json:"salt"` -} - -func newRandID() string { - return ids.NewRandomBase62ID(16) -} - -func getOrGenerateEphemeralCliID(ctx context.Context) (ephemeralCliID, bool) { - configDir, err := dirs.Config() - if err != nil { - panic(err) // XXX Config() should not return an error. - } - - idfile := filepath.Join(configDir, "clientid.json") - idcontents, err := os.ReadFile(idfile) - if err == nil { - var clientID ephemeralCliID - if err := json.Unmarshal(idcontents, &clientID); err == nil { - if clientID.ID != "" && clientID.Salt != "" { - return clientID, false - } - } - } - - newClientID := ephemeralCliID{newRandID(), newRandID()} - if err := writeJSON(idfile, newClientID); err != nil { - fmt.Fprintln(console.Warnings(ctx), "failed to persist user-id", err) - } - - return newClientID, os.IsNotExist(err) -} - -func writeJSON(path string, msg interface{}) error { - data, err := json.Marshal(msg) - if err != nil { - return err - } - return os.WriteFile(path, data, 0644) -} - -func fullCommand(cmd *cobra.Command) string { - commands := []string{cmd.Use} - for cmd.HasParent() { - cmd = cmd.Parent() - commands = append([]string{cmd.Use}, commands...) - } - return strings.Join(commands, " ") -} - -// Extracts command name and set flags from cmd. Reports args and flag values in hashed form. -func buildRecordInvocationRequest(ctx context.Context, cmd *cobra.Command, c ephemeralCliID, reqID string, args []string) *recordInvocationRequest { - req := recordInvocationRequest{ - ID: reqID, - Command: fullCommand(cmd), - UserId: c.ID, - Os: runtime.GOOS, - Arch: runtime.GOARCH, - NumCpu: runtime.NumCPU(), - } - - if v, err := version.Current(); err == nil { - if v.Modified { - // don't upload version information on modified binaries - req.Version = &binaryVersion{ - Modified: true, - } - } else { - req.Version = &binaryVersion{ - Version: v.GitCommit, - BuildTime: v.BuildTimeStr, - Modified: false, - } - } - } - - cmd.Flags().Visit(func(pflag *pflag.Flag) { - req.Flag = append(req.Flag, flag{ - Name: pflag.Name, - Hash: combinedHash(pflag.Value.String(), pflag.Name, c.Salt), - }) - }) - - for _, a := range args { - req.Arg = append(req.Arg, arg{Hash: combinedHash(a, c.Salt)}) - } - - return &req -} - -func (tel *Telemetry) postRecordInvocationRequest(ctx context.Context, req *recordInvocationRequest) error { - ctx, cancel := context.WithTimeout(ctx, postTimeout) - defer cancel() - - return AnonymousCall(ctx, tel.backendAddress, fmt.Sprintf("%s/RecordInvocation", telemetryServiceName), *req, nil) -} - -func (tel *Telemetry) recordInvocation(ctx context.Context, cmd *cobra.Command, reqID string, args []string) { - if !tel.IsTelemetryEnabled() { - return - } - - req := buildRecordInvocationRequest(ctx, cmd, tel.id, reqID, args) - - if err := tel.postRecordInvocationRequest(ctx, req); err != nil { - tel.logError(ctx, err) - return - } - - // Only store request id if recoding the invocation succeeded. - tel.recID.Store(req.ID) -} - -func (tel *Telemetry) RecordInvocation(ctx context.Context, cmd *cobra.Command, args []string) string { - reqID := ids.NewRandomBase62ID(16) - - // Telemetry should be best effort and not block the user. - go tel.recordInvocation(ctx, cmd, reqID, args) - - return reqID -} - -func (tel *Telemetry) postRecordErrorRequest(ctx context.Context, req recordErrorRequest) error { - ctx, cancel := context.WithTimeout(ctx, postTimeout) - defer cancel() - - return AnonymousCall(ctx, tel.backendAddress, fmt.Sprintf("%s/RecordError", telemetryServiceName), req, nil) -} - -func (tel *Telemetry) RecordError(ctx context.Context, err error) { - if !tel.IsTelemetryEnabled() || err == nil { - return - } - - tel.recordError(ctx, tel.recID.Load(), err) -} - -func (tel *Telemetry) recordError(ctx context.Context, recID string, err error) { - if err == nil { - return - } - - if _, isExpected := fnerrors.IsExpected(err); isExpected { - // We are only interested in unexpected errors. - return - } - - // If we never saw a recorded ID, bail out. - if recID == "" { - tel.logError(ctx, fmt.Errorf("didn't receive telemetry record id")) - return - } - - req := recordErrorRequest{ID: recID} - - st, _ := status.FromError(err) - - req.Message = err.Error() - req.Status = st.Proto() - - if err := tel.postRecordErrorRequest(ctx, req); err != nil { - tel.logError(ctx, err) - } -} - -func (tel *Telemetry) IsFirstRun() bool { - if tel == nil { - return false - } - - return tel.created -} - -func (tel *Telemetry) GetClientID() string { - if tel == nil { - return "" - } - - if !tel.IsTelemetryEnabled() { - return "" - } - - return tel.id.ID -} diff --git a/internal/fnapi/telemetry_test.go b/internal/fnapi/telemetry_test.go deleted file mode 100644 index 01a032905..000000000 --- a/internal/fnapi/telemetry_test.go +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright 2022 Namespace Labs Inc; All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. - -package fnapi - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" - - "github.com/spf13/cobra" - "github.com/spf13/viper" - "go.uber.org/atomic" - "gotest.tools/assert" -) - -func TestTelemetryDisabled(t *testing.T) { - reset := setupEnv(t) - defer reset() - - tel := InternalNewTelemetry(context.Background(), generateTestIDs) - tel.enabled = false - tel.errorLogging = true - - cmd := &cobra.Command{ - Use: "fake-command", - Run: func(cmd *cobra.Command, args []string) { - tel.RecordInvocation(context.Background(), cmd, args) - }} - - svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - t.Errorf("Calls to TelemetryService are fobidden when telemetry is disabled.") - })) - defer svr.Close() - - tel.backendAddress = svr.URL - - _ = cmd.Execute() - tel.RecordError(context.Background(), fmt.Errorf("foo error")) - - // Due to the async http server nature it may not have time to handle the request. - time.Sleep(time.Millisecond * 100) -} - -func generateTestIDs(ctx context.Context) (ephemeralCliID, bool) { - return ephemeralCliID{newRandID(), newRandID()}, true -} - -func TestTelemetryDisabledViaEnv(t *testing.T) { - reset := setupEnv(t) - defer reset() - - tel := InternalNewTelemetry(context.Background(), generateTestIDs) - tel.enabled = true - tel.errorLogging = true - - t.Setenv("DO_NOT_TRACK", "1") - - cmd := &cobra.Command{ - Use: "fake-command", - Run: func(cmd *cobra.Command, args []string) { - tel.RecordInvocation(context.Background(), cmd, args) - }} - - svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if strings.HasPrefix(r.URL.EscapedPath(), "/telemetry.TelemetryService") { - t.Errorf("Calls to TelemetryService are fobidden when telemetry is disabled.") - } - - })) - defer svr.Close() - - tel.backendAddress = svr.URL - - _ = cmd.Execute() - tel.RecordError(context.Background(), fmt.Errorf("foo error")) - - // Due to the async http server nature it may not have time to handle the request. - time.Sleep(time.Millisecond * 100) -} - -func TestTelemetryDisabledViaViper(t *testing.T) { - reset := setupEnv(t) - defer reset() - - viper.Set("telemetry", false) - - tel := InternalNewTelemetry(context.Background(), generateTestIDs) - tel.enabled = true - tel.errorLogging = true - - cmd := &cobra.Command{ - Use: "fake-command", - Run: func(cmd *cobra.Command, args []string) { - tel.RecordInvocation(context.Background(), cmd, args) - }} - - svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if strings.HasPrefix(r.URL.EscapedPath(), "/telemetry.TelemetryService") { - t.Errorf("Calls to TelemetryService are fobidden when telemetry is disabled.") - } - - })) - defer svr.Close() - - tel.backendAddress = svr.URL - - _ = cmd.Execute() - tel.RecordError(context.Background(), fmt.Errorf("foo error")) - - // Due to the async http server nature it may not have time to handle the request. - time.Sleep(time.Millisecond * 100) -} - -func TestTelemetryRecordInvocationAnon(t *testing.T) { - reset := setupEnv(t) - defer reset() - - tel := InternalNewTelemetry(context.Background(), generateTestIDs) - tel.enabled = true - tel.errorLogging = true - - sentID := make(chan string, 1) - cmd := &cobra.Command{ - Use: "fake-command", - Run: func(cmd *cobra.Command, args []string) { - defer close(sentID) - sentID <- tel.RecordInvocation(context.Background(), cmd, args) - }} - cmd.PersistentFlags().Bool("dummy_flag", false, "") - - fakeArg := "fake/arg/value" - fakeArgs := []string{"--dummy_flag", fakeArg} - cmd.SetArgs(fakeArgs) - - var req recordInvocationRequest - - receivedID := make(chan string, 1) - svr := httptest.NewServer(assertGrpcInvocation(t, "/telemetry.TelemetryService/RecordInvocation", &req, func(w http.ResponseWriter) { - defer close(receivedID) - - assert.Equal(t, req.Command, cmd.Use, req) - - // Assert that we don't transmit user data in plain text. - assert.Equal(t, len(req.Arg), 1, req) - assert.Assert(t, req.Arg[0].Hash != fakeArg, req) - assert.Equal(t, req.Arg[0].Plaintext, "", req) - assert.Equal(t, len(req.Flag), 1, req) - assert.Equal(t, req.Flag[0].Name, "dummy_flag", req) - assert.Assert(t, req.Flag[0].Hash != "true", req) - assert.Equal(t, req.Flag[0].Plaintext, "", req) - - receivedID <- req.ID - })) - - defer svr.Close() - - tel.backendAddress = svr.URL - - err := cmd.Execute() - assert.NilError(t, err) - - assert.Equal(t, <-receivedID, <-sentID) // Make sure we validated the request. -} - -func TestTelemetryRecordErrorPlaintext(t *testing.T) { - reset := setupEnv(t) - defer reset() - - tel := InternalNewTelemetry(context.Background(), generateTestIDs) - tel.enabled = true - tel.errorLogging = true - tel.recID = atomic.NewString("fake-id") - - var req recordErrorRequest - receivedID := make(chan string, 1) - svr := httptest.NewServer(assertGrpcInvocation(t, "/telemetry.TelemetryService/RecordError", &req, func(_ http.ResponseWriter) { - defer close(receivedID) - - assert.Assert(t, req.Message != "", req) - - receivedID <- req.ID - })) - defer svr.Close() - - tel.backendAddress = svr.URL - - tel.RecordError(context.Background(), fmt.Errorf("foo error")) - - // Assert on intercepted request outside the HandlerFunc to ensure the handler is called - assert.Equal(t, <-receivedID, tel.recID.Load()) -} - -func assertGrpcInvocation(t *testing.T, method string, request interface{}, handle func(http.ResponseWriter)) http.HandlerFunc { - return func(rw http.ResponseWriter, r *http.Request) { - defer r.Body.Close() - - if r.Method != "POST" { - t.Errorf("expected method=POST, got method=%v", r.Method) - } - - bodyBytes, err := io.ReadAll(r.Body) - assert.NilError(t, err) - - if r.URL.EscapedPath() == method { - err := json.Unmarshal(bodyBytes, request) - assert.NilError(t, err) - handle(rw) - } else { - t.Errorf("expected url=%q, got url=%q", method, r.URL.EscapedPath()) - } - } -} - -func setupEnv(t *testing.T) func() { - t.Setenv("DO_NOT_TRACK", "") - t.Setenv("CI", "") - - viper.Set("telemetry", true) - - return func() { viper.Reset() } -}