diff --git a/core/go.mod b/core/go.mod index 47f5faf..b5631e5 100644 --- a/core/go.mod +++ b/core/go.mod @@ -11,14 +11,14 @@ require ( github.com/go-faster/jx v1.1.0 github.com/jmoiron/sqlx v1.4.0 github.com/marcboeker/go-duckdb v1.7.1 - github.com/medama-io/go-referrer-parser v0.0.0-20240706151617-0106555291e7 - github.com/medama-io/go-timezone-country v0.0.0-20240125021558-8a6127efd8f7 - github.com/medama-io/go-useragent v0.0.0-20240707203018-4bd80a87eb23 - github.com/ncruces/go-sqlite3 v0.18.0 + github.com/medama-io/go-referrer-parser v0.0.0-20240903120234-0a63376371c3 + github.com/medama-io/go-timezone-country v0.0.0-20240903121643-db228bdc5dc1 + github.com/medama-io/go-useragent v0.0.0-20240903122205-1e1d5231c715 + github.com/ncruces/go-sqlite3 v0.18.1 github.com/ogen-go/ogen v1.3.0 - github.com/rs/cors v1.11.0 + github.com/rs/cors v1.11.1 github.com/rs/zerolog v1.33.0 - github.com/shirou/gopsutil/v4 v4.24.7 + github.com/shirou/gopsutil/v4 v4.24.8 github.com/stretchr/testify v1.9.0 go.jetify.com/typeid v1.3.0 go.uber.org/multierr v1.11.0 @@ -74,7 +74,7 @@ require ( golang.org/x/sync v0.8.0 // indirect golang.org/x/sys v0.24.0 // indirect golang.org/x/tools v0.24.0 // indirect - golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/core/go.sum b/core/go.sum index 84ea82a..485315b 100644 --- a/core/go.sum +++ b/core/go.sum @@ -74,16 +74,16 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/medama-io/go-referrer-parser v0.0.0-20240706151617-0106555291e7 h1:r/vA05on+hrhPcTSY9TVFvXUHWB5ZNtynC8CPX1OGS4= -github.com/medama-io/go-referrer-parser v0.0.0-20240706151617-0106555291e7/go.mod h1:y/Y+TQijcFNVXWiZ7YhiThXVRbORFdhcY0osQZXQw8Q= -github.com/medama-io/go-timezone-country v0.0.0-20240125021558-8a6127efd8f7 h1:mydNOo0Zm10bC/RX4h9iwe18hGS0KB5cTqo/y/WpbLQ= -github.com/medama-io/go-timezone-country v0.0.0-20240125021558-8a6127efd8f7/go.mod h1:Wq7lg5D0ZdQ3bHnzOTKsb1YGlxm/l82OVA4aIbAA5w4= -github.com/medama-io/go-useragent v0.0.0-20240707203018-4bd80a87eb23 h1:myjtzE9EGr2zS0d9jguGbZGCgj2117X82L9ZAK1AeYo= -github.com/medama-io/go-useragent v0.0.0-20240707203018-4bd80a87eb23/go.mod h1:H9GYWth4IN8vAFZh5LeARza7VwM4jK9uk7Tb9huVzLw= +github.com/medama-io/go-referrer-parser v0.0.0-20240903120234-0a63376371c3 h1:6/WegW654ZlIovpvD9TpTxNzqxiVZrYyWmqDJDMZa00= +github.com/medama-io/go-referrer-parser v0.0.0-20240903120234-0a63376371c3/go.mod h1:Zng9ySjx7KXIpvVqT/mZbYfKE39CkyS/aQR4kXdJuG0= +github.com/medama-io/go-timezone-country v0.0.0-20240903121643-db228bdc5dc1 h1:/Q1ZWbdGSRpExJRlQZybxwxXa6u4lYH7K/OLD9t/d8M= +github.com/medama-io/go-timezone-country v0.0.0-20240903121643-db228bdc5dc1/go.mod h1:Wq7lg5D0ZdQ3bHnzOTKsb1YGlxm/l82OVA4aIbAA5w4= +github.com/medama-io/go-useragent v0.0.0-20240903122205-1e1d5231c715 h1:reA8qNAKTC8jfKxOtMdZU+JqyI3qhJ23upLk/8dhgEQ= +github.com/medama-io/go-useragent v0.0.0-20240903122205-1e1d5231c715/go.mod h1:H9GYWth4IN8vAFZh5LeARza7VwM4jK9uk7Tb9huVzLw= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/ncruces/go-sqlite3 v0.18.0 h1:aH7WGzOC0CYpUPG1LdFg7JApybiuXgYUE2itzLBwhPM= -github.com/ncruces/go-sqlite3 v0.18.0/go.mod h1:eEOyZnW1dGTJ+zDpMuzfYamEUBtdFz5zeYhqLBtHxvM= +github.com/ncruces/go-sqlite3 v0.18.1 h1:iN8IMZV5EMxpH88NUac9vId23eTKNFUhP7jgY0EBbNc= +github.com/ncruces/go-sqlite3 v0.18.1/go.mod h1:eEOyZnW1dGTJ+zDpMuzfYamEUBtdFz5zeYhqLBtHxvM= github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M= github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g= github.com/ogen-go/ogen v1.3.0 h1:c0+CvdbwvKmaHQUqbPpRKflvkiJ/NAsEw3L3HhofDso= @@ -100,15 +100,15 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:Om github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/rs/cors v1.11.0 h1:0B9GE/r9Bc2UxRMMtymBkHTenPkHDv0CW4Y98GBY+po= -github.com/rs/cors v1.11.0/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= +github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= +github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8= github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys= github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= -github.com/shirou/gopsutil/v4 v4.24.7 h1:V9UGTK4gQ8HvcnPKf6Zt3XHyQq/peaekfxpJ2HSocJk= -github.com/shirou/gopsutil/v4 v4.24.7/go.mod h1:0uW/073rP7FYLOkvxolUQM5rMOLTNmRXnFKafpb71rw= +github.com/shirou/gopsutil/v4 v4.24.8 h1:pVQjIenQkIhqO81mwTaXjTzOMT7d3TZkf43PlVFHENI= +github.com/shirou/gopsutil/v4 v4.24.8/go.mod h1:wE0OrJtj4dG+hYkxqDH3QiBICdKSf04/npcvLLc/oRg= github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= @@ -205,8 +205,8 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk= -golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/core/services/event.go b/core/services/event.go index c7af536..8c94212 100644 --- a/core/services/event.go +++ b/core/services/event.go @@ -89,9 +89,22 @@ func (h *Handler) GetEventPing(_ctx context.Context, params api.GetEventPingPara }, nil } +const ( + // IsBotThreshold is the threshold of unknown metrics for determining if a + // user agent is a bot. + IsBotThreshold = 2 +) + func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params api.PostEventHitParams) (api.PostEventHitRes, error) { log := logger.Get() + // If this counter exceeds 2, we want to return early as the event is likely + // a bot. + // + // Ensure all functions that increment this counter occur at the beginning + // rather than the end of the function. + unknownCounter := 0 + switch req.Type { case api.EventLoadEventHit: hostname := req.EventLoad.U.Hostname() @@ -129,11 +142,13 @@ func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params ap uaBrowser := ua.Browser if uaBrowser == "" { uaBrowser = Unknown + unknownCounter++ } uaOS := ua.OS if uaOS == "" { uaOS = Unknown + unknownCounter++ } uaDevice := Unknown @@ -146,57 +161,30 @@ func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params ap uaDevice = "Tablet" case ua.TV: uaDevice = "TV" + default: + unknownCounter++ } - if ua.Browser == "" || ua.OS == "" || uaDevice == Unknown { + if uaBrowser == Unknown || uaOS == Unknown || uaDevice == Unknown { log.Debug().Str("user_agent", rawUserAgent).Msg("hit: unknown user agent") - } - - if ua.Browser == "" && ua.OS == "" && uaDevice == Unknown { - // Do not log the event if every element of the user agent is unknown. - return &api.PostEventHitNoContent{}, nil - } - - // Parse referrer URL and remove any query parameters or self-referencing - // hostnames. - referrerHost := "" - if req.EventLoad.R.Value != "" { - referrer, err := url.Parse(req.EventLoad.R.Value) - if err != nil { - log.Warn().Err(err).Msg("hit: failed to parse referrer URL") - return ErrBadRequest(err), nil - } - - // If the referrer hostname is the same as the current hostname, we - // want to remove it. - referrerHost = referrer.Hostname() - if referrerHost == hostname { - referrerHost = "" + if unknownCounter >= IsBotThreshold { + return &api.PostEventHitNoContent{}, nil } } - referrerGroup := "" - if referrerHost != "" { - // Get the referrer group from the referrer URL. - referrerGroup = h.referrer.Parse(referrerHost) - } - // Get country code from user's timezone. This is used as a best effort // to determine the country of the user's location without compromising // their privacy using IP addresses. - var countryName string - countryCode, err := h.timezoneMap.GetCode(req.EventLoad.T.Value) + countryName, err := h.timezoneCountryMap.GetCountry(req.EventLoad.T.Value) if err != nil { - log.Debug().Err(err).Msg("hit: failed to get country code from timezone") - countryCode = "" + log.Debug().Err(err).Msg("hit: failed to get country name from timezone") countryName = Unknown } - if countryCode != "" { - countryName, err = h.codeCountryMap.GetCountry(countryCode) - if err != nil { - log.Debug().Err(err).Msg("hit: failed to get country name from country code") - countryName = Unknown + if countryName == "" { + unknownCounter++ + if unknownCounter >= IsBotThreshold { + return &api.PostEventHitNoContent{}, nil } } @@ -216,6 +204,37 @@ func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params ap languageDialect = display.English.Tags().Name(languages[0]) } + if languageBase == Unknown { + unknownCounter++ + if unknownCounter >= IsBotThreshold { + return &api.PostEventHitNoContent{}, nil + } + } + + // Parse referrer URL and remove any query parameters or self-referencing + // hostnames. + referrerHost := "" + if req.EventLoad.R.Value != "" { + referrer, err := url.Parse(req.EventLoad.R.Value) + if err != nil { + log.Warn().Err(err).Msg("hit: failed to parse referrer URL") + return ErrBadRequest(err), nil + } + + // If the referrer hostname is the same as the current hostname, we + // want to remove it. + referrerHost = referrer.Hostname() + if referrerHost == hostname { + referrerHost = "" + } + } + + referrerGroup := "" + if referrerHost != "" { + // Get the referrer group from the referrer URL. + referrerGroup = h.referrer.Parse(referrerHost) + } + // Get utm source, medium, and campaigm from URL query parameters. queries := req.EventLoad.U.Query() utmSource := queries.Get("utm_source") diff --git a/core/services/oas.go b/core/services/oas.go index d5c446c..2a8f3df 100644 --- a/core/services/oas.go +++ b/core/services/oas.go @@ -33,10 +33,9 @@ type Handler struct { analyticsDB *duckdb.Client // Parsing libraries - useragent *useragent.Parser - referrer *referrer.Parser - timezoneMap *tz.TimezoneCodeMap - codeCountryMap *tz.CodeCountryMap + useragent *useragent.Parser + referrer *referrer.Parser + timezoneCountryMap *tz.TimezoneCountryMap // Cache store for hostnames hostnames *util.CacheStore @@ -48,12 +47,7 @@ type Handler struct { // NewService returns a new instance of the ogen service handler. func NewService(ctx context.Context, auth *util.AuthService, sqlite *sqlite.Client, duckdb *duckdb.Client, commit string) (*Handler, error) { // Load timezone and country maps - tzMap, err := tz.NewTimezoneCodeMap() - if err != nil { - return nil, errors.Wrap(err, "services init") - } - - codeCountryMap, err := tz.NewCodeCountryMap() + tzMap, err := tz.NewTimezoneCountryMap() if err != nil { return nil, errors.Wrap(err, "services init") } @@ -78,15 +72,14 @@ func NewService(ctx context.Context, auth *util.AuthService, sqlite *sqlite.Clie } return &Handler{ - auth: auth, - db: sqlite, - analyticsDB: duckdb, - useragent: useragent.NewParser(), - referrer: referrerParser, - timezoneMap: &tzMap, - codeCountryMap: &codeCountryMap, - hostnames: &hostnameCache, - RuntimeConfig: &runtimeConfig, + auth: auth, + db: sqlite, + analyticsDB: duckdb, + useragent: useragent.NewParser(), + referrer: referrerParser, + timezoneCountryMap: &tzMap, + hostnames: &hostnameCache, + RuntimeConfig: &runtimeConfig, }, nil }