Skip to content

Commit

Permalink
fix(core): reduce spam using a threshold system (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
ayuhito authored Sep 3, 2024
1 parent b035067 commit b0be574
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 78 deletions.
14 changes: 7 additions & 7 deletions core/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ require (
github.com/go-faster/jx v1.1.0
github.com/jmoiron/sqlx v1.4.0
github.com/marcboeker/go-duckdb v1.7.1
github.com/medama-io/go-referrer-parser v0.0.0-20240706151617-0106555291e7
github.com/medama-io/go-timezone-country v0.0.0-20240125021558-8a6127efd8f7
github.com/medama-io/go-useragent v0.0.0-20240707203018-4bd80a87eb23
github.com/ncruces/go-sqlite3 v0.18.0
github.com/medama-io/go-referrer-parser v0.0.0-20240903120234-0a63376371c3
github.com/medama-io/go-timezone-country v0.0.0-20240903121643-db228bdc5dc1
github.com/medama-io/go-useragent v0.0.0-20240903122205-1e1d5231c715
github.com/ncruces/go-sqlite3 v0.18.1
github.com/ogen-go/ogen v1.3.0
github.com/rs/cors v1.11.0
github.com/rs/cors v1.11.1
github.com/rs/zerolog v1.33.0
github.com/shirou/gopsutil/v4 v4.24.7
github.com/shirou/gopsutil/v4 v4.24.8
github.com/stretchr/testify v1.9.0
go.jetify.com/typeid v1.3.0
go.uber.org/multierr v1.11.0
Expand Down Expand Up @@ -74,7 +74,7 @@ require (
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/tools v0.24.0 // indirect
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
28 changes: 14 additions & 14 deletions core/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,16 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/medama-io/go-referrer-parser v0.0.0-20240706151617-0106555291e7 h1:r/vA05on+hrhPcTSY9TVFvXUHWB5ZNtynC8CPX1OGS4=
github.com/medama-io/go-referrer-parser v0.0.0-20240706151617-0106555291e7/go.mod h1:y/Y+TQijcFNVXWiZ7YhiThXVRbORFdhcY0osQZXQw8Q=
github.com/medama-io/go-timezone-country v0.0.0-20240125021558-8a6127efd8f7 h1:mydNOo0Zm10bC/RX4h9iwe18hGS0KB5cTqo/y/WpbLQ=
github.com/medama-io/go-timezone-country v0.0.0-20240125021558-8a6127efd8f7/go.mod h1:Wq7lg5D0ZdQ3bHnzOTKsb1YGlxm/l82OVA4aIbAA5w4=
github.com/medama-io/go-useragent v0.0.0-20240707203018-4bd80a87eb23 h1:myjtzE9EGr2zS0d9jguGbZGCgj2117X82L9ZAK1AeYo=
github.com/medama-io/go-useragent v0.0.0-20240707203018-4bd80a87eb23/go.mod h1:H9GYWth4IN8vAFZh5LeARza7VwM4jK9uk7Tb9huVzLw=
github.com/medama-io/go-referrer-parser v0.0.0-20240903120234-0a63376371c3 h1:6/WegW654ZlIovpvD9TpTxNzqxiVZrYyWmqDJDMZa00=
github.com/medama-io/go-referrer-parser v0.0.0-20240903120234-0a63376371c3/go.mod h1:Zng9ySjx7KXIpvVqT/mZbYfKE39CkyS/aQR4kXdJuG0=
github.com/medama-io/go-timezone-country v0.0.0-20240903121643-db228bdc5dc1 h1:/Q1ZWbdGSRpExJRlQZybxwxXa6u4lYH7K/OLD9t/d8M=
github.com/medama-io/go-timezone-country v0.0.0-20240903121643-db228bdc5dc1/go.mod h1:Wq7lg5D0ZdQ3bHnzOTKsb1YGlxm/l82OVA4aIbAA5w4=
github.com/medama-io/go-useragent v0.0.0-20240903122205-1e1d5231c715 h1:reA8qNAKTC8jfKxOtMdZU+JqyI3qhJ23upLk/8dhgEQ=
github.com/medama-io/go-useragent v0.0.0-20240903122205-1e1d5231c715/go.mod h1:H9GYWth4IN8vAFZh5LeARza7VwM4jK9uk7Tb9huVzLw=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/ncruces/go-sqlite3 v0.18.0 h1:aH7WGzOC0CYpUPG1LdFg7JApybiuXgYUE2itzLBwhPM=
github.com/ncruces/go-sqlite3 v0.18.0/go.mod h1:eEOyZnW1dGTJ+zDpMuzfYamEUBtdFz5zeYhqLBtHxvM=
github.com/ncruces/go-sqlite3 v0.18.1 h1:iN8IMZV5EMxpH88NUac9vId23eTKNFUhP7jgY0EBbNc=
github.com/ncruces/go-sqlite3 v0.18.1/go.mod h1:eEOyZnW1dGTJ+zDpMuzfYamEUBtdFz5zeYhqLBtHxvM=
github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g=
github.com/ogen-go/ogen v1.3.0 h1:c0+CvdbwvKmaHQUqbPpRKflvkiJ/NAsEw3L3HhofDso=
Expand All @@ -100,15 +100,15 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:Om
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/rs/cors v1.11.0 h1:0B9GE/r9Bc2UxRMMtymBkHTenPkHDv0CW4Y98GBY+po=
github.com/rs/cors v1.11.0/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU=
github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA=
github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU=
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
github.com/shirou/gopsutil/v4 v4.24.7 h1:V9UGTK4gQ8HvcnPKf6Zt3XHyQq/peaekfxpJ2HSocJk=
github.com/shirou/gopsutil/v4 v4.24.7/go.mod h1:0uW/073rP7FYLOkvxolUQM5rMOLTNmRXnFKafpb71rw=
github.com/shirou/gopsutil/v4 v4.24.8 h1:pVQjIenQkIhqO81mwTaXjTzOMT7d3TZkf43PlVFHENI=
github.com/shirou/gopsutil/v4 v4.24.8/go.mod h1:wE0OrJtj4dG+hYkxqDH3QiBICdKSf04/npcvLLc/oRg=
github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
Expand Down Expand Up @@ -205,8 +205,8 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk=
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
95 changes: 57 additions & 38 deletions core/services/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,22 @@ func (h *Handler) GetEventPing(_ctx context.Context, params api.GetEventPingPara
}, nil
}

const (
// IsBotThreshold is the threshold of unknown metrics for determining if a
// user agent is a bot.
IsBotThreshold = 2
)

func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params api.PostEventHitParams) (api.PostEventHitRes, error) {
log := logger.Get()

// If this counter exceeds 2, we want to return early as the event is likely
// a bot.
//
// Ensure all functions that increment this counter occur at the beginning
// rather than the end of the function.
unknownCounter := 0

switch req.Type {
case api.EventLoadEventHit:
hostname := req.EventLoad.U.Hostname()
Expand Down Expand Up @@ -161,11 +174,13 @@ func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params ap
uaBrowser := ua.Browser
if uaBrowser == "" {
uaBrowser = Unknown
unknownCounter++
}

uaOS := ua.OS
if uaOS == "" {
uaOS = Unknown
unknownCounter++
}

uaDevice := Unknown
Expand All @@ -178,57 +193,30 @@ func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params ap
uaDevice = "Tablet"
case ua.TV:
uaDevice = "TV"
default:
unknownCounter++
}

if ua.Browser == "" || ua.OS == "" || uaDevice == Unknown {
if uaBrowser == Unknown || uaOS == Unknown || uaDevice == Unknown {
log.Debug().Str("user_agent", rawUserAgent).Msg("hit: unknown user agent")
}

if ua.Browser == "" && ua.OS == "" && uaDevice == Unknown {
// Do not log the event if every element of the user agent is unknown.
return &api.PostEventHitNoContent{}, nil
}

// Parse referrer URL and remove any query parameters or self-referencing
// hostnames.
referrerHost := ""
if req.EventLoad.R.Value != "" {
referrer, err := url.Parse(req.EventLoad.R.Value)
if err != nil {
log.Warn().Err(err).Msg("hit: failed to parse referrer URL")
return ErrBadRequest(err), nil
}

// If the referrer hostname is the same as the current hostname, we
// want to remove it.
referrerHost = referrer.Hostname()
if referrerHost == hostname {
referrerHost = ""
if unknownCounter >= IsBotThreshold {
return &api.PostEventHitNoContent{}, nil
}
}

referrerGroup := ""
if referrerHost != "" {
// Get the referrer group from the referrer URL.
referrerGroup = h.referrer.Parse(referrerHost)
}

// Get country code from user's timezone. This is used as a best effort
// to determine the country of the user's location without compromising
// their privacy using IP addresses.
var countryName string
countryCode, err := h.timezoneMap.GetCode(req.EventLoad.T.Value)
countryName, err := h.timezoneCountryMap.GetCountry(req.EventLoad.T.Value)
if err != nil {
log.Debug().Err(err).Msg("hit: failed to get country code from timezone")
countryCode = ""
log.Debug().Err(err).Msg("hit: failed to get country name from timezone")
countryName = Unknown
}

if countryCode != "" {
countryName, err = h.codeCountryMap.GetCountry(countryCode)
if err != nil {
log.Debug().Err(err).Msg("hit: failed to get country name from country code")
countryName = Unknown
if countryName == "" {
unknownCounter++
if unknownCounter >= IsBotThreshold {
return &api.PostEventHitNoContent{}, nil
}
}

Expand All @@ -248,6 +236,37 @@ func (h *Handler) PostEventHit(ctx context.Context, req api.EventHit, _params ap
languageDialect = display.English.Tags().Name(languages[0])
}

if languageBase == Unknown {
unknownCounter++
if unknownCounter >= IsBotThreshold {
return &api.PostEventHitNoContent{}, nil
}
}

// Parse referrer URL and remove any query parameters or self-referencing
// hostnames.
referrerHost := ""
if req.EventLoad.R.Value != "" {
referrer, err := url.Parse(req.EventLoad.R.Value)
if err != nil {
log.Warn().Err(err).Msg("hit: failed to parse referrer URL")
return ErrBadRequest(err), nil
}

// If the referrer hostname is the same as the current hostname, we
// want to remove it.
referrerHost = referrer.Hostname()
if referrerHost == hostname {
referrerHost = ""
}
}

referrerGroup := ""
if referrerHost != "" {
// Get the referrer group from the referrer URL.
referrerGroup = h.referrer.Parse(referrerHost)
}

// Get utm source, medium, and campaigm from URL query parameters.
queries := req.EventLoad.U.Query()
utmSource := queries.Get("utm_source")
Expand Down
31 changes: 12 additions & 19 deletions core/services/oas.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ type Handler struct {
analyticsDB *duckdb.Client

// Parsing libraries
useragent *useragent.Parser
referrer *referrer.Parser
timezoneMap *tz.TimezoneCodeMap
codeCountryMap *tz.CodeCountryMap
useragent *useragent.Parser
referrer *referrer.Parser
timezoneCountryMap *tz.TimezoneCountryMap

// Cache store for hostnames
hostnames *util.CacheStore
Expand All @@ -48,12 +47,7 @@ type Handler struct {
// NewService returns a new instance of the ogen service handler.
func NewService(ctx context.Context, auth *util.AuthService, sqlite *sqlite.Client, duckdb *duckdb.Client, commit string) (*Handler, error) {
// Load timezone and country maps
tzMap, err := tz.NewTimezoneCodeMap()
if err != nil {
return nil, errors.Wrap(err, "services init")
}

codeCountryMap, err := tz.NewCodeCountryMap()
tzMap, err := tz.NewTimezoneCountryMap()
if err != nil {
return nil, errors.Wrap(err, "services init")
}
Expand All @@ -78,15 +72,14 @@ func NewService(ctx context.Context, auth *util.AuthService, sqlite *sqlite.Clie
}

return &Handler{
auth: auth,
db: sqlite,
analyticsDB: duckdb,
useragent: useragent.NewParser(),
referrer: referrerParser,
timezoneMap: &tzMap,
codeCountryMap: &codeCountryMap,
hostnames: &hostnameCache,
RuntimeConfig: &runtimeConfig,
auth: auth,
db: sqlite,
analyticsDB: duckdb,
useragent: useragent.NewParser(),
referrer: referrerParser,
timezoneCountryMap: &tzMap,
hostnames: &hostnameCache,
RuntimeConfig: &runtimeConfig,
}, nil
}

Expand Down

0 comments on commit b0be574

Please sign in to comment.