Skip to content

Commit

Permalink
Revert "drm/i915: Fix premature release of request's reusable memory"
Browse files Browse the repository at this point in the history
This reverts commit 930f3f1.
This is used to resolve i915 memory leak problem

Signed-off-by: Fei Jiang <[email protected]>
  • Loading branch information
feijiang1 committed Dec 27, 2023
1 parent b0f94b1 commit 80bb41f
Showing 1 changed file with 29 additions and 70 deletions.
99 changes: 29 additions & 70 deletions drivers/gpu/drm/i915/i915_active.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,11 +448,8 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
}
} while (unlikely(is_barrier(active)));

fence = __i915_active_fence_set(active, fence);
if (!fence)
if (!__i915_active_fence_set(active, fence))
__i915_active_acquire(ref);
else
dma_fence_put(fence);

out:
i915_active_release(ref);
Expand All @@ -471,9 +468,13 @@ __i915_active_set_fence(struct i915_active *ref,
return NULL;
}

rcu_read_lock();
prev = __i915_active_fence_set(active, fence);
if (!prev)
if (prev)
prev = dma_fence_get_rcu(prev);
else
__i915_active_acquire(ref);
rcu_read_unlock();

return prev;
}
Expand Down Expand Up @@ -1017,11 +1018,10 @@ void i915_request_add_active_barriers(struct i915_request *rq)
*
* Records the new @fence as the last active fence along its timeline in
* this active tracker, moving the tracking callbacks from the previous
* fence onto this one. Gets and returns a reference to the previous fence
* (if not already completed), which the caller must put after making sure
* that it is executed before the new fence. To ensure that the order of
* fences within the timeline of the i915_active_fence is understood, it
* should be locked by the caller.
* fence onto this one. Returns the previous fence (if not already completed),
* which the caller must ensure is executed before the new fence. To ensure
* that the order of fences within the timeline of the i915_active_fence is
* understood, it should be locked by the caller.
*/
struct dma_fence *
__i915_active_fence_set(struct i915_active_fence *active,
Expand All @@ -1030,23 +1030,7 @@ __i915_active_fence_set(struct i915_active_fence *active,
struct dma_fence *prev;
unsigned long flags;

/*
* In case of fences embedded in i915_requests, their memory is
* SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
* by new requests. Then, there is a risk of passing back a pointer
* to a new, completely unrelated fence that reuses the same memory
* while tracked under a different active tracker. Combined with i915
* perf open/close operations that build await dependencies between
* engine kernel context requests and user requests from different
* timelines, this can lead to dependency loops and infinite waits.
*
* As a countermeasure, we try to get a reference to the active->fence
* first, so if we succeed and pass it back to our user then it is not
* released and potentially reused by an unrelated request before the
* user has a chance to set up an await dependency on it.
*/
prev = i915_active_fence_get(active);
if (fence == prev)
if (fence == rcu_access_pointer(active->fence))
return fence;

GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
Expand All @@ -1055,56 +1039,27 @@ __i915_active_fence_set(struct i915_active_fence *active,
* Consider that we have two threads arriving (A and B), with
* C already resident as the active->fence.
*
* Both A and B have got a reference to C or NULL, depending on the
* timing of the interrupt handler. Let's assume that if A has got C
* then it has locked C first (before B).
* A does the xchg first, and so it sees C or NULL depending
* on the timing of the interrupt handler. If it is NULL, the
* previous fence must have been signaled and we know that
* we are first on the timeline. If it is still present,
* we acquire the lock on that fence and serialise with the interrupt
* handler, in the process removing it from any future interrupt
* callback. A will then wait on C before executing (if present).
*
* As B is second, it sees A as the previous fence and so waits for
* it to complete its transition and takes over the occupancy for
* itself -- remembering that it needs to wait on A before executing.
*
* Note the strong ordering of the timeline also provides consistent
* nesting rules for the fence->lock; the inner lock is always the
* older lock.
*/
spin_lock_irqsave(fence->lock, flags);
if (prev)
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);

/*
* A does the cmpxchg first, and so it sees C or NULL, as before, or
* something else, depending on the timing of other threads and/or
* interrupt handler. If not the same as before then A unlocks C if
* applicable and retries, starting from an attempt to get a new
* active->fence. Meanwhile, B follows the same path as A.
* Once A succeeds with cmpxch, B fails again, retires, gets A from
* active->fence, locks it as soon as A completes, and possibly
* succeeds with cmpxchg.
*/
while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
if (prev) {
spin_unlock(prev->lock);
dma_fence_put(prev);
}
spin_unlock_irqrestore(fence->lock, flags);

prev = i915_active_fence_get(active);
GEM_BUG_ON(prev == fence);

spin_lock_irqsave(fence->lock, flags);
if (prev)
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
}

/*
* If prev is NULL then the previous fence must have been signaled
* and we know that we are first on the timeline. If it is still
* present then, having the lock on that fence already acquired, we
* serialise with the interrupt handler, in the process of removing it
* from any future interrupt callback. A will then wait on C before
* executing (if present).
*
* As B is second, it sees A as the previous fence and so waits for
* it to complete its transition and takes over the occupancy for
* itself -- remembering that it needs to wait on A before executing.
*/
prev = xchg(__active_fence_slot(active), fence);
if (prev) {
GEM_BUG_ON(prev == fence);
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
__list_del_entry(&active->cb.node);
spin_unlock(prev->lock); /* serialise with prev->cb_list */
}
Expand All @@ -1121,7 +1076,11 @@ int i915_active_fence_set(struct i915_active_fence *active,
int err = 0;

/* Must maintain timeline ordering wrt previous active requests */
rcu_read_lock();
fence = __i915_active_fence_set(active, &rq->fence);
if (fence) /* but the previous fence may not belong to that timeline! */
fence = dma_fence_get_rcu(fence);
rcu_read_unlock();
if (fence) {
err = i915_request_await_dma_fence(rq, fence);
dma_fence_put(fence);
Expand Down

0 comments on commit 80bb41f

Please sign in to comment.