Skip to content

Commit

Permalink
[bugfix] Fix clone event caching due to missing pod info
Browse files Browse the repository at this point in the history
The eventcache API provides 2 handlers.

These are:
RetryInternal -> called to setup process information
Retry -> called to setup pod information

In the case of clone events, we used to have en empty implementation on
the Retry handler. This results in an issue with missing pod information
which is described in detail here: #2902

This patch provides the proper Retry implementation to handle also those
cases.

FIXES: #2902

Signed-off-by: Anastasios Papagiannis <[email protected]>
  • Loading branch information
tpapagian authored and kkourt committed Sep 10, 2024
1 parent 02c4fdd commit 20bba35
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 8 deletions.
23 changes: 19 additions & 4 deletions pkg/grpc/exec/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,32 @@ func (msg *MsgCloneEventUnix) Notify() bool {
}

func (msg *MsgCloneEventUnix) RetryInternal(_ notify.Event, _ uint64) (*process.ProcessInternal, error) {
return nil, process.AddCloneEvent(&msg.MsgCloneEvent)
return process.AddCloneEvent(&msg.MsgCloneEvent)
}

func (msg *MsgCloneEventUnix) Retry(_ *process.ProcessInternal, _ notify.Event) error {
func (msg *MsgCloneEventUnix) Retry(internal *process.ProcessInternal, _ notify.Event) error {
proc := internal.UnsafeGetProcess()
if option.Config.EnableK8s && proc.Docker != "" && proc.Pod == nil {
podInfo := process.GetPodInfo(internal.GetCgID(), proc.Docker, proc.Binary, proc.Arguments, msg.NSPID)
if podInfo == nil {
eventcache.CacheRetries(eventcache.PodInfo).Inc()
return eventcache.ErrFailedToGetPodInfo
}
internal.AddPodInfo(podInfo)
}
return nil
}

func (msg *MsgCloneEventUnix) HandleMessage() *tetragon.GetEventsResponse {
if err := process.AddCloneEvent(&msg.MsgCloneEvent); err != nil {
ec := eventcache.Get()
ec := eventcache.Get()
if internal, err := process.AddCloneEvent(&msg.MsgCloneEvent); err == nil {
if ec != nil && ec.Needed(internal.UnsafeGetProcess()) {
// adding to the cache due to missing pod info
ec.Add(internal, nil, msg.MsgCloneEvent.Common.Ktime, msg.MsgCloneEvent.Ktime, msg)
}
} else {
if ec != nil {
// adding to the cache due to missing parent
ec.Add(nil, nil, msg.MsgCloneEvent.Common.Ktime, msg.MsgCloneEvent.Ktime, msg)
}
}
Expand Down
12 changes: 8 additions & 4 deletions pkg/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ func (pi *ProcessInternal) UnsafeGetProcess() *tetragon.Process {
return pi.process
}

func (pi *ProcessInternal) GetCgID() uint64 {
return pi.cgID
}

// UpdateExecOutsideCache() checks if we must augment the ProcessExec.Process
// with more fields without propagating again those fields into the process
// cache. This means that those added fields will only show up for the
Expand Down Expand Up @@ -502,7 +506,7 @@ func AddExecEvent(event *tetragonAPI.MsgExecveEventUnix) *ProcessInternal {
}

// AddCloneEvent adds a new process into the cache from a CloneEvent
func AddCloneEvent(event *tetragonAPI.MsgCloneEvent) error {
func AddCloneEvent(event *tetragonAPI.MsgCloneEvent) (*ProcessInternal, error) {
parentExecId := GetProcessID(event.Parent.Pid, event.Parent.Ktime)
parent, err := Get(parentExecId)
if err != nil {
Expand All @@ -511,17 +515,17 @@ func AddCloneEvent(event *tetragonAPI.MsgCloneEvent) error {
"event.parent.pid": event.Parent.Pid,
"event.parent.exec_id": parentExecId,
}).WithError(err).Debug("CloneEvent: parent process not found in cache")
return err
return nil, err
}

proc, err := initProcessInternalClone(event, parent, parentExecId)
if err != nil {
return err
return nil, err
}

parent.RefInc("parent")
procCache.add(proc)
return nil
return proc, nil
}

func Get(execId string) (*ProcessInternal, error) {
Expand Down

0 comments on commit 20bba35

Please sign in to comment.