Merge pull request #862 from hodgesds/layered-dispatch-refactor

scx_layered: Refactor dispatch
sched-ext · Oct 30, 2024 · d56650c · d56650c
2 parents 3dcaefc + a8d245b
commit d56650c
Showing 1 changed file with 100 additions and 57 deletions.
diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@@ -1306,81 +1306,52 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev)
 	scx_bpf_consume(LO_FALLBACK_DSQ);
 }
 
-void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
+int consume_preempting(struct cost *costc, u32 my_llc_id)
 {
-	if (disable_topology)
-		return layered_dispatch_no_topo(cpu, prev);
-
-	struct cpu_ctx *cctx, *sib_cctx;
 	struct layer *layer;
-	struct cost *cost;
 	u64 dsq_id;
 	u32 idx, llc_idx, layer_idx;
-	s32 sib = sibling_cpu(cpu);
-
-	if (!(cctx = lookup_cpu_ctx(-1)) ||
-	    !(cost = lookup_cpu_cost(cpu)))
-		return;
-
-	/*
-	 * if @prev was on SCX and is still runnable, we are here because @prev
-	 * has exhausted its slice. We may want to keep running it on this CPU
-	 * rather than giving this CPU to another task and then try to schedule
-	 * @prev somewhere else.
-	 *
-	 * Let's not dispatch any task if we want to keep running @prev. This
-	 * will trigger the automatic local enq behavior which will put @prev on
-	 * @cpu's local DSQ. A more straightforward way to implement this would
-	 * be extending slice from ops.tick() but that's not available in older
-	 * kernels, so let's make do with this for now.
-	 */
-	if (prev && keep_running(cctx, prev))
-		return;
 
-	/*
-	 * If the sibling CPU is running an exclusive task, keep this CPU idle.
-	 * This test is a racy test but should be good enough for best-effort
-	 * optimization.
-	 */
-	if (sib >= 0 && (sib_cctx = lookup_cpu_ctx(sib)) &&
-	    sib_cctx->current_exclusive) {
-		gstat_inc(GSTAT_EXCL_IDLE, cctx);
-		return;
-	}
-
-	u32 my_llc_id = cpu_to_llc_id(cpu);
+	if (!costc)
+		return -EINVAL;
 
-	/* consume preempting layers first */
 	bpf_for(idx, 0, nr_layers) {
-		layer_idx = rotate_layer_id(cost->pref_layer, idx);
+		layer_idx = rotate_layer_id(costc->pref_layer, idx);
 		if (layer_idx >= nr_layers) {
 			scx_bpf_error("can't happen");
-			return;
+			return -EINVAL;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
-		if (has_budget(cost, layer) == 0)
+		if (has_budget(costc, layer) == 0)
 			continue;
 		bpf_for(llc_idx, 0, nr_llcs) {
 			u32 llc_id = rotate_llc_id(my_llc_id, llc_idx);
 			dsq_id = layer_dsq_id(layer_idx, llc_id);
 			if (layer->preempt && scx_bpf_consume(dsq_id))
-				return;
+				return 0;
 		}
 	}
 
-	dsq_id = cpu_hi_fallback_dsq_id(cpu);
-	if (scx_bpf_consume(dsq_id))
-		return;
+	return -ENOENT;
+}
+
+int consume_non_open(struct cost *costc, s32 cpu, u32 my_llc_id)
+{
+	struct layer *layer;
+	u64 dsq_id;
+	u32 idx, llc_idx, layer_idx;
+
+	if (!costc)
+		return -EINVAL;
 
-	/* consume !open layers second */
 	bpf_for(idx, 0, nr_layers) {
-		layer_idx = rotate_layer_id(cost->pref_layer, idx);
+		layer_idx = rotate_layer_id(costc->pref_layer, idx);
 		if (layer_idx >= nr_layers) {
 			scx_bpf_error("can't happen");
-			return;
+			return -EINVAL;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
-		if (has_budget(cost, layer) == 0)
+		if (has_budget(costc, layer) == 0)
 			continue;
 		bpf_for(llc_idx, 0, nr_llcs) {
 			u32 llc_id = rotate_llc_id(my_llc_id, llc_idx);
@@ -1389,36 +1360,108 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
 
 			/* consume matching layers */
 			if (!(layer_cpumask = lookup_layer_cpumask(layer_idx)))
-				return;
+				return 0;
 
 			if (bpf_cpumask_test_cpu(cpu, layer_cpumask) ||
 			    (cpu <= nr_possible_cpus && cpu == fallback_cpu &&
 			    layer->nr_cpus == 0)) {
 				if (scx_bpf_consume(dsq_id))
-					return;
+					return 0;
 			}
 		}
 	}
 
-	/* consume !preempting open layers */
+	return -ENOENT;
+}
+
+int consume_open_no_preempt(struct cost *costc, u32 my_llc_id)
+{
+	struct layer *layer;
+	u64 dsq_id;
+	u32 idx, llc_idx, layer_idx;
+
+	if (!costc)
+		return -EINVAL;
+
 	bpf_for(idx, 0, nr_layers) {
-		layer_idx = rotate_layer_id(cost->pref_layer, idx);
+		layer_idx = rotate_layer_id(costc->pref_layer, idx);
 		if (layer_idx >= nr_layers) {
 			scx_bpf_error("can't happen");
-			return;
+			return -EINVAL;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
-		if (has_budget(cost, layer) == 0)
+		if (has_budget(costc, layer) == 0)
 			continue;
 		bpf_for(llc_idx, 0, nr_llcs) {
 			u32 llc_id = rotate_llc_id(my_llc_id, llc_idx);
 			dsq_id = layer_dsq_id(layer_idx, llc_id);
 
 			if (!layer->preempt && layer->open && scx_bpf_consume(dsq_id))
-				return;
+				return 0;
 		}
 	}
 
+	return -ENOENT;
+}
+
+void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
+{
+	if (disable_topology)
+		return layered_dispatch_no_topo(cpu, prev);
+
+	struct cpu_ctx *cctx, *sib_cctx;
+	struct cost *costc;
+	u64 dsq_id;
+	s32 sib = sibling_cpu(cpu);
+
+	if (!(cctx = lookup_cpu_ctx(-1)) ||
+	    !(costc = lookup_cpu_cost(cpu)))
+		return;
+
+	/*
+	 * if @prev was on SCX and is still runnable, we are here because @prev
+	 * has exhausted its slice. We may want to keep running it on this CPU
+	 * rather than giving this CPU to another task and then try to schedule
+	 * @prev somewhere else.
+	 *
+	 * Let's not dispatch any task if we want to keep running @prev. This
+	 * will trigger the automatic local enq behavior which will put @prev on
+	 * @cpu's local DSQ. A more straightforward way to implement this would
+	 * be extending slice from ops.tick() but that's not available in older
+	 * kernels, so let's make do with this for now.
+	 */
+	if (prev && keep_running(cctx, prev))
+		return;
+
+	/*
+	 * If the sibling CPU is running an exclusive task, keep this CPU idle.
+	 * This test is a racy test but should be good enough for best-effort
+	 * optimization.
+	 */
+	if (sib >= 0 && (sib_cctx = lookup_cpu_ctx(sib)) &&
+	    sib_cctx->current_exclusive) {
+		gstat_inc(GSTAT_EXCL_IDLE, cctx);
+		return;
+	}
+
+	u32 my_llc_id = cpu_to_llc_id(cpu);
+
+	/* consume preempting layers first */
+	if (consume_preempting(costc, my_llc_id) == 0)
+		return;
+
+	dsq_id = cpu_hi_fallback_dsq_id(cpu);
+	if (scx_bpf_consume(dsq_id))
+		return;
+
+	/* consume !open layers second */
+	if (consume_non_open(costc, cpu, my_llc_id) == 0)
+		return;
+
+	/* consume !preempting open layers */
+	if (consume_open_no_preempt(costc, my_llc_id) == 0)
+		return;
+
 	scx_bpf_consume(LO_FALLBACK_DSQ);
 }