diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c index 51fb1551..1c3fb558 100644 --- a/scheds/rust/scx_layered/src/bpf/main.bpf.c +++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c @@ -1306,81 +1306,52 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev) scx_bpf_consume(LO_FALLBACK_DSQ); } -void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev) +int consume_preempting(struct cost *costc, u32 my_llc_id) { - if (disable_topology) - return layered_dispatch_no_topo(cpu, prev); - - struct cpu_ctx *cctx, *sib_cctx; struct layer *layer; - struct cost *cost; u64 dsq_id; u32 idx, llc_idx, layer_idx; - s32 sib = sibling_cpu(cpu); - - if (!(cctx = lookup_cpu_ctx(-1)) || - !(cost = lookup_cpu_cost(cpu))) - return; - - /* - * if @prev was on SCX and is still runnable, we are here because @prev - * has exhausted its slice. We may want to keep running it on this CPU - * rather than giving this CPU to another task and then try to schedule - * @prev somewhere else. - * - * Let's not dispatch any task if we want to keep running @prev. This - * will trigger the automatic local enq behavior which will put @prev on - * @cpu's local DSQ. A more straightforward way to implement this would - * be extending slice from ops.tick() but that's not available in older - * kernels, so let's make do with this for now. - */ - if (prev && keep_running(cctx, prev)) - return; - /* - * If the sibling CPU is running an exclusive task, keep this CPU idle. - * This test is a racy test but should be good enough for best-effort - * optimization. - */ - if (sib >= 0 && (sib_cctx = lookup_cpu_ctx(sib)) && - sib_cctx->current_exclusive) { - gstat_inc(GSTAT_EXCL_IDLE, cctx); - return; - } - - u32 my_llc_id = cpu_to_llc_id(cpu); + if (!costc) + return -EINVAL; - /* consume preempting layers first */ bpf_for(idx, 0, nr_layers) { - layer_idx = rotate_layer_id(cost->pref_layer, idx); + layer_idx = rotate_layer_id(costc->pref_layer, idx); if (layer_idx >= nr_layers) { scx_bpf_error("can't happen"); - return; + return -EINVAL; } layer = MEMBER_VPTR(layers, [layer_idx]); - if (has_budget(cost, layer) == 0) + if (has_budget(costc, layer) == 0) continue; bpf_for(llc_idx, 0, nr_llcs) { u32 llc_id = rotate_llc_id(my_llc_id, llc_idx); dsq_id = layer_dsq_id(layer_idx, llc_id); if (layer->preempt && scx_bpf_consume(dsq_id)) - return; + return 0; } } - dsq_id = cpu_hi_fallback_dsq_id(cpu); - if (scx_bpf_consume(dsq_id)) - return; + return -ENOENT; +} + +int consume_non_open(struct cost *costc, s32 cpu, u32 my_llc_id) +{ + struct layer *layer; + u64 dsq_id; + u32 idx, llc_idx, layer_idx; + + if (!costc) + return -EINVAL; - /* consume !open layers second */ bpf_for(idx, 0, nr_layers) { - layer_idx = rotate_layer_id(cost->pref_layer, idx); + layer_idx = rotate_layer_id(costc->pref_layer, idx); if (layer_idx >= nr_layers) { scx_bpf_error("can't happen"); - return; + return -EINVAL; } layer = MEMBER_VPTR(layers, [layer_idx]); - if (has_budget(cost, layer) == 0) + if (has_budget(costc, layer) == 0) continue; bpf_for(llc_idx, 0, nr_llcs) { u32 llc_id = rotate_llc_id(my_llc_id, llc_idx); @@ -1389,36 +1360,108 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev) /* consume matching layers */ if (!(layer_cpumask = lookup_layer_cpumask(layer_idx))) - return; + return 0; if (bpf_cpumask_test_cpu(cpu, layer_cpumask) || (cpu <= nr_possible_cpus && cpu == fallback_cpu && layer->nr_cpus == 0)) { if (scx_bpf_consume(dsq_id)) - return; + return 0; } } } - /* consume !preempting open layers */ + return -ENOENT; +} + +int consume_open_no_preempt(struct cost *costc, u32 my_llc_id) +{ + struct layer *layer; + u64 dsq_id; + u32 idx, llc_idx, layer_idx; + + if (!costc) + return -EINVAL; + bpf_for(idx, 0, nr_layers) { - layer_idx = rotate_layer_id(cost->pref_layer, idx); + layer_idx = rotate_layer_id(costc->pref_layer, idx); if (layer_idx >= nr_layers) { scx_bpf_error("can't happen"); - return; + return -EINVAL; } layer = MEMBER_VPTR(layers, [layer_idx]); - if (has_budget(cost, layer) == 0) + if (has_budget(costc, layer) == 0) continue; bpf_for(llc_idx, 0, nr_llcs) { u32 llc_id = rotate_llc_id(my_llc_id, llc_idx); dsq_id = layer_dsq_id(layer_idx, llc_id); if (!layer->preempt && layer->open && scx_bpf_consume(dsq_id)) - return; + return 0; } } + return -ENOENT; +} + +void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev) +{ + if (disable_topology) + return layered_dispatch_no_topo(cpu, prev); + + struct cpu_ctx *cctx, *sib_cctx; + struct cost *costc; + u64 dsq_id; + s32 sib = sibling_cpu(cpu); + + if (!(cctx = lookup_cpu_ctx(-1)) || + !(costc = lookup_cpu_cost(cpu))) + return; + + /* + * if @prev was on SCX and is still runnable, we are here because @prev + * has exhausted its slice. We may want to keep running it on this CPU + * rather than giving this CPU to another task and then try to schedule + * @prev somewhere else. + * + * Let's not dispatch any task if we want to keep running @prev. This + * will trigger the automatic local enq behavior which will put @prev on + * @cpu's local DSQ. A more straightforward way to implement this would + * be extending slice from ops.tick() but that's not available in older + * kernels, so let's make do with this for now. + */ + if (prev && keep_running(cctx, prev)) + return; + + /* + * If the sibling CPU is running an exclusive task, keep this CPU idle. + * This test is a racy test but should be good enough for best-effort + * optimization. + */ + if (sib >= 0 && (sib_cctx = lookup_cpu_ctx(sib)) && + sib_cctx->current_exclusive) { + gstat_inc(GSTAT_EXCL_IDLE, cctx); + return; + } + + u32 my_llc_id = cpu_to_llc_id(cpu); + + /* consume preempting layers first */ + if (consume_preempting(costc, my_llc_id) == 0) + return; + + dsq_id = cpu_hi_fallback_dsq_id(cpu); + if (scx_bpf_consume(dsq_id)) + return; + + /* consume !open layers second */ + if (consume_non_open(costc, cpu, my_llc_id) == 0) + return; + + /* consume !preempting open layers */ + if (consume_open_no_preempt(costc, my_llc_id) == 0) + return; + scx_bpf_consume(LO_FALLBACK_DSQ); }