diff --git a/rust/scx_utils/src/bpf_builder.rs b/rust/scx_utils/src/bpf_builder.rs index 6830ce236..7e8a13c9d 100644 --- a/rust/scx_utils/src/bpf_builder.rs +++ b/rust/scx_utils/src/bpf_builder.rs @@ -362,7 +362,7 @@ impl BpfBuilder { } /// Return `(VER, SHA1)` from which the bulit-in `vmlinux.h` is generated. - pub fn vmlinux_h_ver_sha1() -> (String, String) { + pub fn vmlinux_h_ver_sha1() -> String { let mut ar = tar::Archive::new(Self::BPF_H_TAR); for file in ar.entries().unwrap() { @@ -378,7 +378,7 @@ impl BpfBuilder { .to_string_lossy() .to_string(); - return sscanf!(name, "vmlinux-v{String}-g{String}.h").unwrap(); + return sscanf!(name, "vmlinux-{String}.h").unwrap(); } panic!("vmlinux/vmlinux.h not found"); @@ -586,15 +586,10 @@ mod tests { #[test] fn test_vmlinux_h_ver_sha1() { - let (ver, sha1) = super::BpfBuilder::vmlinux_h_ver_sha1(); + let ver = super::BpfBuilder::vmlinux_h_ver_sha1(); - println!("vmlinux.h: ver={:?} sha1={:?}", &ver, &sha1,); + println!("vmlinux.h: ver={:?}", &ver); - assert!(regex::Regex::new(r"^([1-9][0-9]*\.[1-9][0-9][a-z0-9-]*)$") - .unwrap() - .is_match(&ver)); - assert!(regex::Regex::new(r"^[0-9a-z]{12}$") - .unwrap() - .is_match(&sha1)); + assert!(regex::Regex::new(r"^[a-f0-9]{7}$").unwrap().is_match(&ver)); } } diff --git a/rust/scx_utils/src/compat.rs b/rust/scx_utils/src/compat.rs index c2d977b0e..977457d02 100644 --- a/rust/scx_utils/src/compat.rs +++ b/rust/scx_utils/src/compat.rs @@ -164,9 +164,9 @@ macro_rules! unwrap_or_break { pub fn check_min_requirements() -> Result<()> { // ec7e3b0463e1 ("implement-ops") in https://github.com/sched-ext/sched_ext // is the current minimum required kernel version. - if let Ok(false) | Err(_) = struct_has_field("sched_ext_ops", "dump") { - bail!("sched_ext_ops.dump() missing, kernel too old?"); - } + // if let Ok(false) | Err(_) = struct_has_field("sched_ext_ops", "dump") { + // bail!("sched_ext_ops.dump() missing, kernel too old?"); + // } Ok(()) } @@ -187,21 +187,21 @@ macro_rules! scx_ops_open { }; let ops = skel.struct_ops.[<$ops _mut>](); - let path = std::path::Path::new("/sys/kernel/sched_ext/hotplug_seq"); - - let val = match std::fs::read_to_string(&path) { - Ok(val) => val, - Err(_) => { - break 'block Err(anyhow::anyhow!("Failed to open or read file {:?}", path)); - } - }; - - ops.hotplug_seq = match val.trim().parse::() { - Ok(parsed) => parsed, - Err(_) => { - break 'block Err(anyhow::anyhow!("Failed to parse hotplug seq {}", val)); - } - }; + // let path = std::path::Path::new("/sys/kernel/sched_ext/hotplug_seq"); + + // let val = match std::fs::read_to_string(&path) { + // Ok(val) => val, + // Err(_) => { + // break 'block Err(anyhow::anyhow!("Failed to open or read file {:?}", path)); + // } + // }; + + // ops.hotplug_seq = match val.trim().parse::() { + // Ok(parsed) => parsed, + // Err(_) => { + // break 'block Err(anyhow::anyhow!("Failed to parse hotplug seq {}", val)); + // } + // }; let result : Result, anyhow::Error> = Ok(skel); result @@ -218,7 +218,7 @@ macro_rules! scx_ops_open { macro_rules! scx_ops_load { ($skel: expr, $ops: ident, $uei: ident) => { 'block: { scx_utils::paste! { - scx_utils::uei_set_size!($skel, $ops, $uei); + //scx_utils::uei_set_size!($skel, $ops, $uei); $skel.load().context("Failed to load BPF program") } }}; diff --git a/rust/scx_utils/src/lib.rs b/rust/scx_utils/src/lib.rs index 3aa0ccd3d..265507899 100644 --- a/rust/scx_utils/src/lib.rs +++ b/rust/scx_utils/src/lib.rs @@ -41,14 +41,14 @@ pub use bpf_builder::BpfBuilder; mod builder; pub use builder::Builder; -mod user_exit_info; -pub use user_exit_info::ScxConsts; -pub use user_exit_info::ScxExitKind; -pub use user_exit_info::UeiDumpPtr; -pub use user_exit_info::UserExitInfo; -pub use user_exit_info::SCX_ECODE_ACT_RESTART; -pub use user_exit_info::SCX_ECODE_RSN_HOTPLUG; -pub use user_exit_info::UEI_DUMP_PTR_MUTEX; +// mod user_exit_info; +// // pub use user_exit_info::ScxConsts; +// // pub use user_exit_info::ScxExitKind; +// pub use user_exit_info::UeiDumpPtr; +// pub use user_exit_info::UserExitInfo; +// pub use user_exit_info::SCX_ECODE_ACT_RESTART; +// pub use user_exit_info::SCX_ECODE_RSN_HOTPLUG; +// pub use user_exit_info::UEI_DUMP_PTR_MUTEX; pub mod build_id; pub mod compat; diff --git a/scheds/include/scx/compat.bpf.h b/scheds/include/scx/compat.bpf.h index e5afe9efd..21548815b 100644 --- a/scheds/include/scx/compat.bpf.h +++ b/scheds/include/scx/compat.bpf.h @@ -34,6 +34,97 @@ (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq) ? \ scx_bpf_dispatch_vtime_from_dsq((it), (p), (dsq_id), (enq_flags)) : false) +/* + * %SCX_KICK_IDLE is a later addition. To support both before and after, use + * %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it. + * Users can use %SCX_KICK_IDLE directly in the future. + */ +#define __COMPAT_SCX_KICK_IDLE \ + __COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE) + +/* + * scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See + * %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h. This can be dropped in the + * future. + */ +void scx_bpf_switch_all(void) __ksym __weak; + +static inline void __COMPAT_scx_bpf_switch_all(void) +{ + scx_bpf_switch_all(); +} + +/* + * scx_bpf_exit() is a new addition. Fall back to scx_bpf_error() if + * unavailable. Users can use scx_bpf_exit() directly in the future. + */ +#define __COMPAT_scx_bpf_exit(code, fmt, args...) \ +({ \ + if (bpf_ksym_exists(scx_bpf_exit_bstr)) \ + scx_bpf_exit((code), fmt, ##args); \ + else \ + scx_bpf_error(fmt, ##args); \ +}) + +/* + * scx_bpf_dump() is a new addition. Ignore if unavailable. Users can use + * scx_bpf_dump() directly in the future. + */ +#define __COMPAT_scx_bpf_dump(fmt, args...) \ +({ \ + if (bpf_ksym_exists(scx_bpf_dump_bstr)) \ + scx_bpf_dump(fmt, ##args); \ +}) + +/* + * scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. No good + * way to noop these kfuncs. Provide a test macro. Users can assume existence in + * the future. + */ +#define __COMPAT_HAS_CPUMASKS \ + bpf_ksym_exists(scx_bpf_nr_cpu_ids) + +/* + * cpuperf is new. The followings become noop on older kernels. Callers can be + * updated to call cpuperf kfuncs directly in the future. + */ +static inline u32 __COMPAT_scx_bpf_cpuperf_cap(s32 cpu) +{ + if (bpf_ksym_exists(scx_bpf_cpuperf_cap)) + return scx_bpf_cpuperf_cap(cpu); + else + return 1024; +} + +static inline u32 __COMPAT_scx_bpf_cpuperf_cur(s32 cpu) +{ + if (bpf_ksym_exists(scx_bpf_cpuperf_cur)) + return scx_bpf_cpuperf_cur(cpu); + else + return 1024; +} + +static inline void __COMPAT_scx_bpf_cpuperf_set(s32 cpu, u32 perf) +{ + if (bpf_ksym_exists(scx_bpf_cpuperf_set)) + return scx_bpf_cpuperf_set(cpu, perf); +} + +/* + * Iteration and scx_bpf_consume_task() are new. The following become noop on + * older kernels. The users can switch to bpf_for_each(scx_dsq) and directly + * call scx_bpf_consume_task() in the future. + */ +#define __COMPAT_DSQ_FOR_EACH(p, dsq_id, flags) \ + if (bpf_ksym_exists(bpf_iter_scx_dsq_new)) \ + bpf_for_each(scx_dsq, (p), (dsq_id), (flags)) + +static inline bool __COMPAT_scx_bpf_consume_task(struct bpf_iter_scx_dsq *it, + struct task_struct *p) +{ + return false; +} + /* * Define sched_ext_ops. This may be expanded to define multiple variants for * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). @@ -45,3 +136,4 @@ }; #endif /* __SCX_COMPAT_BPF_H */ + diff --git a/scheds/include/scx/compat.h b/scheds/include/scx/compat.h index cc56ff9aa..70021b999 100644 --- a/scheds/include/scx/compat.h +++ b/scheds/include/scx/compat.h @@ -143,8 +143,8 @@ static inline long scx_hotplug_seq(void) #define SCX_OPS_OPEN(__ops_name, __scx_name) ({ \ struct __scx_name *__skel; \ \ - SCX_BUG_ON(!__COMPAT_struct_has_field("sched_ext_ops", "dump"), \ - "sched_ext_ops.dump() missing, kernel too old?"); \ + /* SCX_BUG_ON(!__COMPAT_struct_has_field("sched_ext_ops", "dump"), */ \ + /* "sched_ext_ops.dump() missing, kernel too old?"); */ \ \ __skel = __scx_name##__open(); \ SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \ diff --git a/scheds/include/scx/user_exit_info.h b/scheds/include/scx/user_exit_info.h index 8ce273440..4856760b4 100644 --- a/scheds/include/scx/user_exit_info.h +++ b/scheds/include/scx/user_exit_info.h @@ -18,7 +18,7 @@ enum uei_sizes { struct user_exit_info { int kind; - s64 exit_code; + // s64 exit_code; char reason[UEI_REASON_LEN]; char msg[UEI_MSG_LEN]; }; @@ -32,6 +32,15 @@ struct user_exit_info { #endif #include +static inline void uei_record(struct user_exit_info *uei, + const struct scx_exit_info *ei) +{ + bpf_probe_read_kernel_str(uei->reason, sizeof(uei->reason), ei->reason); + bpf_probe_read_kernel_str(uei->msg, sizeof(uei->msg), ei->msg); + /* use __sync to force memory barrier */ + __sync_val_compare_and_swap(&uei->kind, uei->kind, ei->type); +} + #define UEI_DEFINE(__name) \ char RESIZABLE_ARRAY(data, __name##_dump); \ const volatile u32 __name##_dump_len; \ @@ -42,13 +51,13 @@ struct user_exit_info { sizeof(__uei_name.reason), (__ei)->reason); \ bpf_probe_read_kernel_str(__uei_name.msg, \ sizeof(__uei_name.msg), (__ei)->msg); \ - bpf_probe_read_kernel_str(__uei_name##_dump, \ - __uei_name##_dump_len, (__ei)->dump); \ - if (bpf_core_field_exists((__ei)->exit_code)) \ - __uei_name.exit_code = (__ei)->exit_code; \ + /* bpf_probe_read_kernel_str(__uei_name##_dump, */ \ + /* __uei_name##_dump_len, (__ei)->dump); */ \ + /* if (bpf_core_field_exists((__ei)->exit_code)) */ \ + /* __uei_name.exit_code = (__ei)->exit_code; */ \ /* use __sync to force memory barrier */ \ - __sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \ - (__ei)->kind); \ + __sync_val_compare_and_swap(&__uei_name.type, __uei_name.type, \ + (__ei)->type); \ }) #else /* !__bpf__ */ @@ -57,11 +66,11 @@ struct user_exit_info { #include /* no need to call the following explicitly if SCX_OPS_LOAD() is used */ -#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \ - u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \ - (__skel)->rodata->__uei_name##_dump_len = __len; \ - RESIZE_ARRAY((__skel), data, __uei_name##_dump, __len); \ -}) +// #define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \ +// u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \ +// (__skel)->rodata->__uei_name##_dump_len = __len; \ +// RESIZE_ARRAY((__skel), data, __uei_name##_dump, __len); \ +// }) #define UEI_EXITED(__skel, __uei_name) ({ \ /* use __sync to force memory barrier */ \ @@ -70,18 +79,18 @@ struct user_exit_info { #define UEI_REPORT(__skel, __uei_name) ({ \ struct user_exit_info *__uei = &(__skel)->data->__uei_name; \ - char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \ - if (__uei_dump[0] != '\0') { \ - fputs("\nDEBUG DUMP\n", stderr); \ - fputs("================================================================================\n\n", stderr); \ - fputs(__uei_dump, stderr); \ - fputs("\n================================================================================\n\n", stderr); \ - } \ + /* char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; *\ \ + /* if (__uei_dump[0] != '\0') { *\ \ + /* fputs("\nDEBUG DUMP\n", stderr); *\ \ + /* fputs("================================================================================\n\n", stderr); *\ \ + /* fputs(__uei_dump, stderr); *\ \ + /* fputs("\n================================================================================\n\n", stderr); *\ \ + /* } *\ \ fprintf(stderr, "EXIT: %s", __uei->reason); \ if (__uei->msg[0] != '\0') \ fprintf(stderr, " (%s)", __uei->msg); \ fputs("\n", stderr); \ - __uei->exit_code; \ + /* __uei->exit_code; */ \ }) /* diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c index 59fb749f7..adb7dd1cc 100644 --- a/scheds/rust/scx_layered/src/bpf/main.bpf.c +++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c @@ -47,9 +47,14 @@ static u32 preempt_cursor; #define dbg(fmt, args...) do { if (debug) bpf_printk(fmt, ##args); } while (0) #define trace(fmt, args...) do { if (debug > 1) bpf_printk(fmt, ##args); } while (0) -#include "util.bpf.c" +#include "util.bpf.h" -UEI_DEFINE(uei); +#define __COMPAT_scx_bpf_error(fmt, args...) \ + do { \ + bpf_printk(fmt, ##args); \ + } while (0) + +struct user_exit_info uei; static inline bool vtime_before(u64 a, u64 b) { @@ -113,7 +118,7 @@ static u32 dsq_iter_rr_cpu_ctx(u32 layer_idx, int idx) offset -= nr_layers; if (offset > MAX_LAYERS) { - scx_bpf_error("invalid layer id %u", layer_idx); + __COMPAT_scx_bpf_error("invalid layer id %u", layer_idx); return 0; } return offset; @@ -136,7 +141,7 @@ static __noinline u32 iter_layer_dsq_ctx(int idx, u32 layer_idx) u32 ret; ret = dsq_iter_weight_ctx(idx); if (ret >= nr_layers) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return ret; } return ret; @@ -147,7 +152,7 @@ static __noinline u32 iter_layer_dsq_ctx(int idx, u32 layer_idx) return dsq_iter_weight_ctx(idx); } default: - scx_bpf_error("unknown dsq iter algo"); + __COMPAT_scx_bpf_error("unknown dsq iter algo"); return 0; } } @@ -164,7 +169,7 @@ static __noinline u32 cpu_to_llc_id(s32 cpu_id) llc_ptr = MEMBER_VPTR(cpu_llc_id_map, [cpu_id]); if (!llc_ptr) { - scx_bpf_error("Couldn't look up llc ID for cpu %d", cpu_id); + __COMPAT_scx_bpf_error("Couldn't look up llc ID for cpu %d", cpu_id); return 0; } return *llc_ptr; @@ -176,7 +181,7 @@ u32 llc_node_id(u32 llc_id) llc_ptr = MEMBER_VPTR(llc_numa_id_map, [llc_id]); if (!llc_ptr) { - scx_bpf_error("Couldn't look up llc ID for %d", llc_id); + __COMPAT_scx_bpf_error("Couldn't look up llc ID for %d", llc_id); return 0; } return *llc_ptr; @@ -232,13 +237,28 @@ static struct cpu_ctx *lookup_cpu_ctx(int cpu) cctx = bpf_map_lookup_percpu_elem(&cpu_ctxs, &zero, cpu); if (!cctx) { - scx_bpf_error("no cpu_ctx for cpu %d", cpu); + __COMPAT_scx_bpf_error("no cpu_ctx for cpu %d", cpu); return NULL; } return cctx; } +static u32 cpu_to_llc_id(s32 cpu_id) +{ + const volatile u32 *llc_ptr; + + llc_ptr = MEMBER_VPTR(cpu_llc_id_map, [cpu_id]); + if (!llc_ptr) { + __COMPAT_scx_bpf_error("Couldn't look up llc ID for cpu %d", cpu_id); + return 0; + } + return *llc_ptr; +} + +/* + * Numa node context + */ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); @@ -274,7 +294,7 @@ static struct cache_ctx *lookup_cache_ctx(u32 cache_idx) static void gstat_inc(enum global_stat_idx idx, struct cpu_ctx *cctx) { if (idx < 0 || idx >= NR_GSTATS) { - scx_bpf_error("invalid global stat idx %d", idx); + __COMPAT_scx_bpf_error("invalid global stat idx %d", idx); return; } @@ -289,7 +309,7 @@ static void lstat_add(enum layer_stat_idx idx, struct layer *layer, if ((vptr = MEMBER_VPTR(*cctx, .lstats[layer->idx][idx]))) (*vptr) += delta; else - scx_bpf_error("invalid layer or stat idxs: %d, %d", idx, layer->idx); + __COMPAT_scx_bpf_error("invalid layer or stat idxs: %d, %d", idx, layer->idx); } static void lstat_inc(enum layer_stat_idx idx, struct layer *layer, @@ -319,7 +339,7 @@ static void adj_load(u32 layer_idx, s64 adj, u64 now) lockw = bpf_map_lookup_elem(&layer_load_locks, &layer_idx); if (!layer || !lockw) { - scx_bpf_error("Can't access layer%d or its load_lock", layer_idx); + __COMPAT_scx_bpf_error("Can't access layer%d or its load_lock", layer_idx); return; } @@ -329,7 +349,7 @@ static void adj_load(u32 layer_idx, s64 adj, u64 now) bpf_spin_unlock(&lockw->lock); if (debug && adj < 0 && (s64)layer->load < 0) - scx_bpf_error("cpu%d layer%d load underflow (load=%lld adj=%lld)", + __COMPAT_scx_bpf_error("cpu%d layer%d load underflow (load=%lld adj=%lld)", bpf_get_smp_processor_id(), layer_idx, layer->load, adj); } @@ -352,7 +372,7 @@ static struct cpumask *lookup_layer_cpumask(int idx) if ((cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &idx))) { return (struct cpumask *)cpumaskw->cpumask; } else { - scx_bpf_error("no layer_cpumask"); + __COMPAT_scx_bpf_error("no layer_cpumask"); return NULL; } } @@ -373,7 +393,7 @@ static void refresh_cpumasks(int idx) u8 *u8_ptr; if (!(cctx = lookup_cpu_ctx(cpu))) { - scx_bpf_error("unknown cpu"); + __COMPAT_scx_bpf_error("unknown cpu"); return; } @@ -385,7 +405,7 @@ static void refresh_cpumasks(int idx) */ barrier_var(cpumaskw); if (!cpumaskw || !cpumaskw->cpumask) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } @@ -396,14 +416,14 @@ static void refresh_cpumasks(int idx) bpf_cpumask_clear_cpu(cpu, cpumaskw->cpumask); } } else { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); } } // XXX - shouldn't be necessary layer = MEMBER_VPTR(layers, [idx]); if (!layer) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } @@ -412,6 +432,18 @@ static void refresh_cpumasks(int idx) trace("LAYER[%d] now has %d cpus, seq=%llu", idx, layer->nr_cpus, layer->cpus_seq); } +u32 llc_node_id(u32 llc_id) +{ + const volatile u32 *llc_ptr; + + llc_ptr = MEMBER_VPTR(llc_numa_id_map, [llc_id]); + if (!llc_ptr) { + __COMPAT_scx_bpf_error("Couldn't look up llc ID for %d", llc_id); + return 0; + } + return *llc_ptr; +} + SEC("fentry") int BPF_PROG(sched_tick_fentry) { @@ -433,6 +465,7 @@ struct task_ctx { struct bpf_cpumask __kptr *layered_cpumask; bool all_cpus_allowed; + bool dispatch_local; u64 runnable_at; u64 running_at; }; @@ -454,7 +487,7 @@ static struct task_ctx *lookup_task_ctx(struct task_struct *p) struct task_ctx *tctx = lookup_task_ctx_may_fail(p); if (!tctx) - scx_bpf_error("task_ctx lookup failed"); + __COMPAT_scx_bpf_error("task_ctx lookup failed"); return tctx; } @@ -462,7 +495,7 @@ static struct task_ctx *lookup_task_ctx(struct task_struct *p) static struct layer *lookup_layer(int idx) { if (idx < 0 || idx >= nr_layers) { - scx_bpf_error("invalid layer %d", idx); + __COMPAT_scx_bpf_error("invalid layer %d", idx); return NULL; } return &layers[idx]; @@ -495,7 +528,7 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path, thread_head = &leader->signal->thread_head; if (!(next = bpf_task_acquire(leader))) { - scx_bpf_error("failed to acquire leader"); + __COMPAT_scx_bpf_error("failed to acquire leader"); return 0; } @@ -808,9 +841,8 @@ s32 BPF_STRUCT_OPS(layered_select_cpu, struct task_struct *p, s32 prev_cpu, u64 cpu = pick_idle_cpu(p, prev_cpu, cctx, tctx, layer, true); if (cpu >= 0) { - lstat_inc(LSTAT_SEL_LOCAL, layer, cctx); u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns; - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, layer_slice_ns, 0); + tctx->dispatch_local = true; return cpu; } else { return prev_cpu; @@ -828,7 +860,7 @@ bool pick_idle_cpu_and_kick(struct task_struct *p, s32 task_cpu, if (cpu >= 0) { lstat_inc(LSTAT_KICK, layer, cctx); - scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); + scx_bpf_kick_cpu(cpu, 0 /*SCX_KICK_IDLE*/); return true; } else { return false; @@ -1052,6 +1084,13 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags) !(layer = lookup_layer(tctx->layer))) return; + if (tctx->dispatch_local) { + tctx->dispatch_local = false; + lstat_inc(LSTAT_SEL_LOCAL, layer, cctx); + scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); + return; + } + try_preempt_first = cctx->try_preempt_first; cctx->try_preempt_first = false; u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns; @@ -1272,7 +1311,7 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev) bpf_for(idx, 0, nr_layers) { layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx); if (layer_idx >= nr_layers) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } layer = MEMBER_VPTR(layers, [layer_idx]); @@ -1288,7 +1327,7 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev) bpf_for(idx, 0, nr_layers) { layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx); if (layer_idx >= nr_layers) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } layer = MEMBER_VPTR(layers, [layer_idx]); @@ -1309,7 +1348,7 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev) bpf_for(idx, 0, nr_layers) { layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx); if (layer_idx >= nr_layers) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } layer = MEMBER_VPTR(layers, [layer_idx]); @@ -1372,7 +1411,7 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev) bpf_for(idx, 0, nr_layers) { layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx); if (layer_idx >= nr_layers) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } layer = MEMBER_VPTR(layers, [layer_idx]); @@ -1392,7 +1431,7 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev) bpf_for(idx, 0, nr_layers) { layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx); if (layer_idx >= nr_layers) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } layer = MEMBER_VPTR(layers, [layer_idx]); @@ -1418,7 +1457,7 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev) bpf_for(idx, 0, nr_layers) { layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx); if (layer_idx >= nr_layers) { - scx_bpf_error("can't happen"); + __COMPAT_scx_bpf_error("can't happen"); return; } layer = MEMBER_VPTR(layers, [layer_idx]); @@ -1482,7 +1521,7 @@ static __noinline bool match_one(struct layer_match *match, case MATCH_TGID_EQUALS: return p->tgid == match->tgid; default: - scx_bpf_error("invalid match kind %d", match->kind); + __COMPAT_scx_bpf_error("invalid match kind %d", match->kind); return result; } } @@ -1591,7 +1630,7 @@ static void maybe_refresh_layer(struct task_struct *p, struct task_ctx *tctx) */ p->scx.dsq_vtime = layer->vtime_now; } else { - scx_bpf_error("[%s]%d didn't match any layer", p->comm, p->pid); + __COMPAT_scx_bpf_error("[%s]%d didn't match any layer", p->comm, p->pid); } if (tctx->layer < nr_layers - 1) @@ -1605,13 +1644,13 @@ static s32 create_save_cpumask(struct bpf_cpumask **kptr) cpumask = bpf_cpumask_create(); if (!cpumask) { - scx_bpf_error("Failed to create cpumask"); + __COMPAT_scx_bpf_error("Failed to create cpumask"); return -ENOMEM; } cpumask = bpf_kptr_xchg(kptr, cpumask); if (cpumask) { - scx_bpf_error("kptr already had cpumask"); + __COMPAT_scx_bpf_error("kptr already had cpumask"); bpf_cpumask_release(cpumask); } @@ -1629,7 +1668,7 @@ static s32 create_node(u32 node_id) nodec = bpf_map_lookup_elem(&node_data, &node_id); if (!nodec) { /* Should never happen, it's created statically at load time. */ - scx_bpf_error("No node%u", node_id); + __COMPAT_scx_bpf_error("No node%u", node_id); return -ENOENT; } nodec->id = node_id; @@ -1642,7 +1681,7 @@ static s32 create_node(u32 node_id) cpumask = nodec->cpumask; if (!cpumask) { bpf_rcu_read_unlock(); - scx_bpf_error("Failed to lookup node cpumask"); + __COMPAT_scx_bpf_error("Failed to lookup node cpumask"); return -ENOENT; } @@ -1651,7 +1690,7 @@ static s32 create_node(u32 node_id) nmask = MEMBER_VPTR(numa_cpumasks, [node_id][cpu / 64]); if (!nmask) { - scx_bpf_error("array index error"); + __COMPAT_scx_bpf_error("array index error"); ret = -ENOENT; break; } @@ -1659,7 +1698,7 @@ static s32 create_node(u32 node_id) if (*nmask & (1LLU << (cpu % 64))) { bpf_cpumask_set_cpu(cpu, cpumask); if (!(cctx = lookup_cpu_ctx(cpu))) { - scx_bpf_error("cpu ctx error"); + __COMPAT_scx_bpf_error("cpu ctx error"); ret = -ENOENT; break; } @@ -1685,7 +1724,7 @@ static s32 create_cache(u32 cache_id) cachec = bpf_map_lookup_elem(&cache_data, &cache_id); if (!cachec) { - scx_bpf_error("No cache%u", cache_id); + __COMPAT_scx_bpf_error("No cache%u", cache_id); return -ENOENT; } cachec->id = cache_id; @@ -1698,14 +1737,14 @@ static s32 create_cache(u32 cache_id) cpumask = cachec->cpumask; if (!cpumask) { bpf_rcu_read_unlock(); - scx_bpf_error("Failed to lookup node cpumask"); + __COMPAT_scx_bpf_error("Failed to lookup node cpumask"); return -ENOENT; } bpf_for(cpu, 0, nr_possible_cpus) { if (!(cctx = lookup_cpu_ctx(cpu))) { bpf_rcu_read_unlock(); - scx_bpf_error("cpu ctx error"); + __COMPAT_scx_bpf_error("cpu ctx error"); return -ENOENT; } @@ -1823,7 +1862,7 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p) } if (layer->perf > 0) - scx_bpf_cpuperf_set(task_cpu, layer->perf); + __COMPAT_scx_bpf_cpuperf_set(task_cpu, layer->perf); cctx->maybe_idle = false; } @@ -1918,7 +1957,7 @@ void BPF_STRUCT_OPS(layered_set_cpumask, struct task_struct *p, return; if (!all_cpumask) { - scx_bpf_error("NULL all_cpumask"); + __COMPAT_scx_bpf_error("NULL all_cpumask"); return; } @@ -1932,8 +1971,7 @@ void BPF_STRUCT_OPS(layered_cpu_release, s32 cpu, scx_bpf_reenqueue_local(); } -s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p, - struct scx_init_task_args *args) +s32 BPF_STRUCT_OPS(layered_prep_enable, struct task_struct *p, struct scx_enable_args *args) { struct task_ctx *tctx; struct bpf_cpumask *cpumask; @@ -1946,7 +1984,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p, tctx = bpf_task_storage_get(&task_ctxs, p, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!tctx) { - scx_bpf_error("task_ctx allocation failure"); + __COMPAT_scx_bpf_error("task_ctx allocation failure"); return -ENOMEM; } @@ -1970,7 +2008,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p, tctx->all_cpus_allowed = bpf_cpumask_subset((const struct cpumask *)all_cpumask, p->cpus_ptr); else - scx_bpf_error("missing all_cpumask"); + __COMPAT_scx_bpf_error("missing all_cpumask"); /* * We are matching cgroup hierarchy path directly rather than the CPU @@ -1982,8 +2020,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p, return 0; } -void BPF_STRUCT_OPS(layered_exit_task, struct task_struct *p, - struct scx_exit_task_args *args) +void BPF_STRUCT_OPS(layered_disable, struct task_struct *p) { struct cpu_ctx *cctx; struct task_ctx *tctx; @@ -1995,49 +2032,50 @@ void BPF_STRUCT_OPS(layered_exit_task, struct task_struct *p, __sync_fetch_and_add(&layers[tctx->layer].nr_tasks, -1); } -static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now) -{ - struct task_struct *p; - - if (dsq_id > LO_FALLBACK_DSQ) - return 0; +// static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now) +// { +// struct task_struct *p; +// +// if (dsq_id > LO_FALLBACK_DSQ) +// return 0; +// +// bpf_for_each(scx_dsq, p, dsq_id, 0) { +// struct task_ctx *tctx; +// +// if ((tctx = lookup_task_ctx(p))) +// return (now - tctx->runnable_at) / 1000000; +// } +// +// return 0; +// } - bpf_for_each(scx_dsq, p, dsq_id, 0) { - struct task_ctx *tctx; - - if ((tctx = lookup_task_ctx(p))) - return (now - tctx->runnable_at) / 1000000; - } - - return 0; -} static void dump_layer_cpumask(int idx) { - struct cpumask *layer_cpumask; - s32 cpu; - char buf[128] = "", *p; - - if (!(layer_cpumask = lookup_layer_cpumask(idx))) - return; - - bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { - if (!(p = MEMBER_VPTR(buf, [idx++]))) - break; - if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) - *p++ = '0' + cpu % 10; - else - *p++ = '.'; - - if ((cpu & 7) == 7) { - if (!(p = MEMBER_VPTR(buf, [idx++]))) - break; - *p++ = '|'; - } - } - buf[sizeof(buf) - 1] = '\0'; - - scx_bpf_dump("%s", buf); + /* struct cpumask *layer_cpumask; */ + /* s32 cpu; */ + /* char buf[128] = "", *p; */ + /**/ + /* if (!(layer_cpumask = lookup_layer_cpumask(idx))) */ + /* return; */ + /**/ + /* bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { */ + /* if (!(p = MEMBER_VPTR(buf, [idx++]))) */ + /* break; */ + /* if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) */ + /* *p++ = '0' + cpu % 10; */ + /* else */ + /* *p++ = '.'; */ + /**/ + /* if ((cpu & 7) == 7) { */ + /* if (!(p = MEMBER_VPTR(buf, [idx++]))) */ + /* break; */ + /* *p++ = '|'; */ + /* } */ + /* } */ + /* buf[sizeof(buf) - 1] = '\0'; */ + /**/ + /* scx_bpf_dump("%s", buf); */ } void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx) @@ -2047,50 +2085,185 @@ void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx) int i, j, idx; struct layer *layer; - bpf_for(i, 0, nr_layers) { - layer = lookup_layer(i); - if (!layer) { - scx_bpf_error("unabled to lookup layer %d", i); - continue; - } - - if (disable_topology) { - scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", - i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), - dsq_first_runnable_for_ms(i, now)); - } else { - bpf_for(j, 0, nr_llcs) { - if (!(layer->cache_mask & (1 << j))) - continue; - - idx = layer_dsq_id(layer->idx, j); - scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", - i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), - dsq_first_runnable_for_ms(idx, now)); - scx_bpf_dump("\n"); - } - } - dump_layer_cpumask(i); - scx_bpf_dump("\n"); - } - bpf_for(i, 0, nr_llcs) { - dsq_id = llc_hi_fallback_dsq_id(i); - scx_bpf_dump("HI_FALLBACK[%llu] nr_queued=%d -%llums\n", - dsq_id, scx_bpf_dsq_nr_queued(dsq_id), - dsq_first_runnable_for_ms(dsq_id, now)); - } - scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", - scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), - dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); + /* bpf_for(i, 0, nr_layers) { */ + /* layer = lookup_layer(i); */ + /* if (!layer) { */ + /* __COMPAT_scx_bpf_error("unabled to lookup layer %d", i); */ + /* continue; */ + /* } */ + /**/ + /* if (disable_topology) { */ + /* scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */ + /* i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), */ + /* dsq_first_runnable_for_ms(i, now)); */ + /* } else { */ + /* bpf_for(j, 0, nr_llcs) { */ + /* if (!(layer->cache_mask & (1 << j))) */ + /* continue; */ + /**/ + /* idx = layer_dsq_id(layer->idx, j); */ + /* scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */ + /* i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), */ + /* dsq_first_runnable_for_ms(idx, now)); */ + /* scx_bpf_dump("\n"); */ + /* } */ + /* } */ + /* dump_layer_cpumask(i); */ + /* scx_bpf_dump("\n"); */ + /* } */ + /* bpf_for(i, 0, nr_llcs) { */ + /* dsq_id = llc_hi_fallback_dsq_id(i); */ + /* scx_bpf_dump("HI_FALLBACK[%llu] nr_queued=%d -%llums\n", */ + /* dsq_id, scx_bpf_dsq_nr_queued(dsq_id), */ + /* dsq_first_runnable_for_ms(dsq_id, now)); */ + /* } */ + /* scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", */ + /* scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), */ + /* dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); */ } +static void dump_layer_cpumask(int idx) +{ + /* struct cpumask *layer_cpumask; */ + /* s32 cpu; */ + /* char buf[128] = "", *p; */ + /**/ + /* if (!(layer_cpumask = lookup_layer_cpumask(idx))) */ + /* return; */ + /**/ + /* bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { */ + /* if (!(p = MEMBER_VPTR(buf, [idx++]))) */ + /* break; */ + /* if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) */ + /* *p++ = '0' + cpu % 10; */ + /* else */ + /* *p++ = '.'; */ + /**/ + /* if ((cpu & 7) == 7) { */ + /* if (!(p = MEMBER_VPTR(buf, [idx++]))) */ + /* break; */ + /* *p++ = '|'; */ + /* } */ + /* } */ + /* buf[sizeof(buf) - 1] = '\0'; */ + /**/ + /* scx_bpf_dump("%s", buf); */ +} + +/* void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx) */ +/* { */ +/* u64 now = bpf_ktime_get_ns(); */ +/* int i, j, idx; */ +/* struct layer *layer; */ +/**/ +/* bpf_for(i, 0, nr_layers) { */ +/* layer = lookup_layer(i); */ +/* if (!layer) { */ +/* __COMPAT_scx_bpf_error("unabled to lookup layer %d", i); */ +/* continue; */ +/* } */ +/**/ +/* if (disable_topology) { */ +/* scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */ +/* i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), */ +/* dsq_first_runnable_for_ms(i, now)); */ +/* } else { */ +/* bpf_for(j, 0, nr_llcs) { */ +/* if (!(layer->cache_mask & (1 << j))) */ +/* continue; */ +/**/ +/* idx = layer_dsq_id(layer->idx, j); */ +/* scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */ +/* i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), */ +/* dsq_first_runnable_for_ms(idx, now)); */ +/* } */ +/* } */ +/* dump_layer_cpumask(i); */ +/* scx_bpf_dump("\n"); */ +/* } */ +/**/ +/* scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n", */ +/* scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ), */ +/* dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now)); */ +/* scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", */ +/* scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), */ +/* dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); */ +/* } */ +// static void dump_layer_cpumask(int idx) +// { +// struct cpumask *layer_cpumask; +// s32 cpu; +// char buf[128] = "", *p; +// +// if (!(layer_cpumask = lookup_layer_cpumask(idx))) +// return; +// +// bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { +// if (!(p = MEMBER_VPTR(buf, [idx++]))) +// break; +// if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) +// *p++ = '0' + cpu % 10; +// else +// *p++ = '.'; +// +// if ((cpu & 7) == 7) { +// if (!(p = MEMBER_VPTR(buf, [idx++]))) +// break; +// *p++ = '|'; +// } +// } +// buf[sizeof(buf) - 1] = '\0'; +// +// scx_bpf_dump("%s", buf); +// } +// +// void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx) +// { +// u64 now = bpf_ktime_get_ns(); +// int i, j, idx; +// struct layer *layer; +// +// bpf_for(i, 0, nr_layers) { +// layer = lookup_layer(i); +// if (!layer) { +// __COMPAT_scx_bpf_error("unabled to lookup layer %d", i); +// continue; +// } +// +// if (disable_topology) { +// scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", +// i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), +// dsq_first_runnable_for_ms(i, now)); +// } else { +// bpf_for(j, 0, nr_llcs) { +// if (!(layer->cache_mask & (1 << j))) +// continue; +// +// idx = layer_dsq_id(layer->idx, j); +// scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", +// i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), +// dsq_first_runnable_for_ms(idx, now)); +// } +// } +// dump_layer_cpumask(i); +// scx_bpf_dump("\n"); +// } +// +// scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n", +// scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ), +// dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now)); +// scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", +// scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), +// dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); +// } + static void print_iter_order() { struct cpu_ctx *cctx; int i; u32 layer_idx; if (!(cctx = lookup_cpu_ctx(-1))) { - scx_bpf_error("failed to get cpu ctx"); + __COMPAT_scx_bpf_error("failed to get cpu ctx"); return; } @@ -2107,7 +2280,9 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) struct cpu_ctx *cctx; int i, j, k, nr_online_cpus, ret; - ret = scx_bpf_create_dsq(LO_FALLBACK_DSQ, -1); + __COMPAT_scx_bpf_switch_all(); + + ret = scx_bpf_create_dsq(HI_FALLBACK_DSQ, -1); if (ret < 0) return ret; @@ -2176,19 +2351,19 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) layer->exclusive); if (layer->nr_match_ors > MAX_LAYER_MATCH_ORS) { - scx_bpf_error("too many ORs"); + __COMPAT_scx_bpf_error("too many ORs"); return -EINVAL; } bpf_for(j, 0, layer->nr_match_ors) { struct layer_match_ands *ands = MEMBER_VPTR(layers, [i].matches[j]); if (!ands) { - scx_bpf_error("shouldn't happen"); + __COMPAT_scx_bpf_error("shouldn't happen"); return -EINVAL; } if (ands->nr_match_ands > NR_LAYER_MATCH_KINDS) { - scx_bpf_error("too many ANDs"); + __COMPAT_scx_bpf_error("too many ANDs"); return -EINVAL; } @@ -2204,7 +2379,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) match = MEMBER_VPTR(layers, [i].matches[j].matches[k]); if (!match) { - scx_bpf_error("shouldn't happen"); + __COMPAT_scx_bpf_error("shouldn't happen"); return -EINVAL; } @@ -2243,7 +2418,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) dbg("%s TGID %u", header, match->tgid); break; default: - scx_bpf_error("%s Invalid kind", header); + __COMPAT_scx_bpf_error("%s Invalid kind", header); return -EINVAL; } } @@ -2301,7 +2476,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) void BPF_STRUCT_OPS(layered_exit, struct scx_exit_info *ei) { - UEI_RECORD(uei, ei); + uei_record(&uei, ei); } SCX_OPS_DEFINE(layered, @@ -2316,9 +2491,11 @@ SCX_OPS_DEFINE(layered, .set_weight = (void *)layered_set_weight, .set_cpumask = (void *)layered_set_cpumask, .cpu_release = (void *)layered_cpu_release, - .init_task = (void *)layered_init_task, - .exit_task = (void *)layered_exit_task, - .dump = (void *)layered_dump, + .prep_enable = (void *)layered_prep_enable, + .disable = (void *)layered_disable, + // .dump = (void *)layered_dump, .init = (void *)layered_init, .exit = (void *)layered_exit, + .flags = SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_LAST, .name = "layered"); + diff --git a/scheds/rust/scx_layered/src/bpf/util.bpf.h b/scheds/rust/scx_layered/src/bpf/util.bpf.h new file mode 120000 index 000000000..ee7b16c86 --- /dev/null +++ b/scheds/rust/scx_layered/src/bpf/util.bpf.h @@ -0,0 +1 @@ +util.bpf.c \ No newline at end of file diff --git a/scheds/rust/scx_layered/src/main.rs b/scheds/rust/scx_layered/src/main.rs index 17301321c..f359ac3b4 100644 --- a/scheds/rust/scx_layered/src/main.rs +++ b/scheds/rust/scx_layered/src/main.rs @@ -8,6 +8,11 @@ mod stats; pub use bpf_skel::*; pub mod bpf_intf; +use core::ffi::CStr; +use stats::LayerStats; +use stats::StatsReq; +use stats::StatsRes; +use stats::SysStats; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; @@ -47,23 +52,13 @@ use scx_stats::prelude::*; use scx_utils::compat; use scx_utils::init_libbpf_logging; use scx_utils::ravg::ravg_read; -use scx_utils::scx_ops_attach; -use scx_utils::scx_ops_load; -use scx_utils::scx_ops_open; -use scx_utils::uei_exited; -use scx_utils::uei_report; use scx_utils::Cache; use scx_utils::Core; use scx_utils::CoreType; use scx_utils::LoadAggregator; use scx_utils::Topology; -use scx_utils::UserExitInfo; use serde::Deserialize; use serde::Serialize; -use stats::LayerStats; -use stats::StatsReq; -use stats::StatsRes; -use stats::SysStats; const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS; const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize; @@ -462,6 +457,12 @@ struct Opts { #[clap(long)] monitor: Option, + /// DEPRECATED: Enable output of stats in OpenMetrics format instead of via + /// log macros. This option is useful if you want to collect stats in some + /// monitoring database like prometheseus. + #[clap(short = 'o', long)] + open_metrics_format: bool, + /// Run with example layer specifications (useful for e.g. CI pipelines) #[clap(long)] run_example: bool, @@ -1073,6 +1074,66 @@ impl Stats { } } +#[derive(Debug, Default)] +struct UserExitInfo { + kind: i32, + reason: Option, + msg: Option, +} + +impl UserExitInfo { + fn read(bpf_uei: &types::user_exit_info) -> Result { + let kind = unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) }; + + let (reason, msg) = if kind != 0 { + ( + Some( + unsafe { CStr::from_ptr(bpf_uei.reason.as_ptr() as *const _) } + .to_str() + .context("Failed to convert reason to string")? + .to_string(), + ) + .filter(|s| !s.is_empty()), + Some( + unsafe { CStr::from_ptr(bpf_uei.msg.as_ptr() as *const _) } + .to_str() + .context("Failed to convert msg to string")? + .to_string(), + ) + .filter(|s| !s.is_empty()), + ) + } else { + (None, None) + }; + + Ok(Self { kind, reason, msg }) + } + + fn exited(bpf_uei: &types::user_exit_info) -> Result { + Ok(Self::read(bpf_uei)?.kind != 0) + } + + fn report(&self) -> Result<()> { + let why = match (&self.reason, &self.msg) { + (Some(reason), None) => format!("{}", reason), + (Some(reason), Some(msg)) => format!("{} ({})", reason, msg), + _ => "".into(), + }; + + match self.kind { + 0 => Ok(()), + etype => { + if etype != 64 { + bail!("EXIT: kind={} {}", etype, why); + } else { + info!("EXIT: {}", why); + Ok(()) + } + } + } + } +} + #[derive(Debug)] /// `CpuPool` represents the CPU core and logical CPU topology within the system. /// It manages the mapping and availability of physical and logical cores, including @@ -1344,6 +1405,18 @@ where index: usize, iters: Vec, } +fn layer_core_order(growth_algo: LayerGrowthAlgo, layer_idx: usize, topo: &Topology) -> Vec { + let mut core_order = vec![]; + match growth_algo { + LayerGrowthAlgo::Sticky => { + let is_left = layer_idx % 2 == 0; + let rot_by = |layer_idx, len| -> usize { + if layer_idx <= len { + layer_idx + } else { + layer_idx % len + } + }; impl IteratorInterleaver where @@ -1399,20 +1472,27 @@ struct Layer { } impl Layer { - fn new(spec: &LayerSpec, idx: usize, cpu_pool: &CpuPool, topo: &Topology) -> Result { - let name = &spec.name; - let kind = spec.kind.clone(); + fn new( + idx: usize, + cpu_pool: &CpuPool, + name: &str, + kind: LayerKind, + topo: &Topology, + ) -> Result { let mut cpus = bitvec![0; cpu_pool.nr_cpus]; cpus.fill(false); let mut allowed_cpus = bitvec![0; cpu_pool.nr_cpus]; + let mut layer_growth_algo = LayerGrowthAlgo::Sticky; match &kind { LayerKind::Confined { cpus_range, util_range, nodes, llcs, + growth_algo, .. } => { + layer_growth_algo = growth_algo.clone(); let cpus_range = cpus_range.unwrap_or((0, std::usize::MAX)); if cpus_range.0 > cpus_range.1 || cpus_range.1 == 0 { bail!("invalid cpus_range {:?}", cpus_range); @@ -1448,7 +1528,19 @@ impl Layer { bail!("invalid util_range {:?}", util_range); } } - LayerKind::Grouped { nodes, llcs, .. } | LayerKind::Open { nodes, llcs, .. } => { + LayerKind::Grouped { + growth_algo, + nodes, + llcs, + .. + } + | LayerKind::Open { + growth_algo, + nodes, + llcs, + .. + } => { + layer_growth_algo = growth_algo.clone(); if nodes.len() == 0 && llcs.len() == 0 { allowed_cpus.fill(true); } else { @@ -1473,6 +1565,7 @@ impl Layer { } } +<<<<<<< HEAD let layer_growth_algo = match &kind { LayerKind::Confined { growth_algo, .. } | LayerKind::Grouped { growth_algo, .. } @@ -1491,6 +1584,13 @@ impl Layer { layer_growth_algo.clone(), core_order ); + let layer_growth_algo = match &kind { + LayerKind::Confined { growth_algo, .. } + | LayerKind::Grouped { growth_algo, .. } + | LayerKind::Open { growth_algo, .. } => growth_algo.clone(), + }; + + let core_order = layer_core_order(layer_growth_algo, idx, topo); Ok(Self { name: name.into(), @@ -1536,7 +1636,8 @@ impl Layer { { trace!( "layer-{} needs more CPUs (util={:.3}) but is over the load fraction", - &self.name, layer_util + &self.name, + layer_util ); return Ok(false); } @@ -1880,7 +1981,7 @@ impl<'a, 'b> Scheduler<'a, 'b> { skel.maps.rodata_data.nr_llcs = 0; for node in topo.nodes() { - debug!( + info!( "configuring node {}, LLCs {:?}", node.id(), node.llcs().len() @@ -1897,7 +1998,7 @@ impl<'a, 'b> Scheduler<'a, 'b> { ); for (_, llc) in node.llcs() { - debug!("configuring llc {:?} for node {:?}", llc.id(), node.id()); + info!("configuring llc {:?} for node {:?}", llc.id(), node.id()); skel.maps.rodata_data.llc_numa_id_map[llc.id()] = node.id() as u32; } } @@ -1920,7 +2021,9 @@ impl<'a, 'b> Scheduler<'a, 'b> { let mut skel_builder = BpfSkelBuilder::default(); skel_builder.obj_builder.debug(opts.verbose > 1); init_libbpf_logging(None); - let mut skel = scx_ops_open!(skel_builder, open_object, layered)?; + let mut skel = skel_builder + .open(open_object) + .context("failed to open BPF program")?; // scheduler_tick() got renamed to sched_tick() during v6.10-rc. let sched_tick_name = match compat::ksym_exists("sched_tick")? { @@ -1934,7 +2037,7 @@ impl<'a, 'b> Scheduler<'a, 'b> { .context("Failed to set attach target for sched_tick_fentry()")?; // Initialize skel according to @opts. - skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len; + // skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len; skel.maps.rodata_data.debug = opts.verbose as u32; skel.maps.rodata_data.slice_ns = opts.slice_us * 1000; @@ -1958,11 +2061,17 @@ impl<'a, 'b> Scheduler<'a, 'b> { Self::init_layers(&mut skel, opts, layer_specs, &topo)?; Self::init_nodes(&mut skel, opts, &topo); - let mut skel = scx_ops_load!(skel, layered, uei)?; + let mut skel = skel.load().context("Failed to load BPF program")?; let mut layers = vec![]; for (idx, spec) in layer_specs.iter().enumerate() { - layers.push(Layer::new(&spec, idx, &cpu_pool, &topo)?); + layers.push(Layer::new( + idx, + &cpu_pool, + &spec.name, + spec.kind.clone(), + &topo, + )?); } initialize_cpu_ctxs(&skel, &topo).unwrap(); @@ -1976,11 +2085,10 @@ impl<'a, 'b> Scheduler<'a, 'b> { // huge problem in the interim until we figure it out. // Attach. - let struct_ops = scx_ops_attach!(skel, layered)?; let stats_server = StatsServer::new(stats::server_data()).launch()?; - let sched = Self { - struct_ops: Some(struct_ops), + let mut sched = Self { + struct_ops: None, layer_specs, sched_intv: Duration::from_secs_f64(opts.interval), @@ -2002,6 +2110,20 @@ impl<'a, 'b> Scheduler<'a, 'b> { stats_server, }; + sched + .skel + .attach() + .context("Failed to attach BPF program")?; + + sched.struct_ops = Some( + sched + .skel + .maps + .layered + .attach_struct_ops() + .context("Failed to attach layered struct ops")?, + ); + info!("Layered Scheduler Attached. Run `scx_layered --monitor` for metrics."); Ok(sched) @@ -2167,12 +2289,14 @@ impl<'a, 'b> Scheduler<'a, 'b> { Ok(sys_stats) } - fn run(&mut self, shutdown: Arc) -> Result { + fn run(&mut self, shutdown: Arc) -> Result<()> { let (res_ch, req_ch) = self.stats_server.channels(); let mut next_sched_at = Instant::now() + self.sched_intv; let mut cpus_ranges = HashMap::>::new(); - while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) { + while !shutdown.load(Ordering::Relaxed) + && !UserExitInfo::exited(&self.skel.maps.bss_data.uei)? + { let now = Instant::now(); if now >= next_sched_at { @@ -2226,7 +2350,7 @@ impl<'a, 'b> Scheduler<'a, 'b> { } self.struct_ops.take(); - uei_report!(&self.skel, uei) + UserExitInfo::read(&self.skel.maps.bss_data.uei)?.report() } } @@ -2418,6 +2542,10 @@ fn main() -> Result<()> { ); } + if opts.open_metrics_format { + warn!("open_metrics_format is deprecated"); + } + debug!("specs={}", serde_json::to_string_pretty(&layer_config)?); verify_layer_specs(&layer_config.specs)?; @@ -2439,12 +2567,6 @@ fn main() -> Result<()> { } let mut open_object = MaybeUninit::uninit(); - loop { - let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?; - if !sched.run(shutdown.clone())?.should_restart() { - break; - } - } - - Ok(()) + let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?; + sched.run(shutdown.clone()) } diff --git a/scheds/rust/scx_layered/src/stats.rs b/scheds/rust/scx_layered/src/stats.rs index 9eaf4c4a1..7f3709107 100644 --- a/scheds/rust/scx_layered/src/stats.rs +++ b/scheds/rust/scx_layered/src/stats.rs @@ -171,7 +171,11 @@ impl LayerStats { } }; let calc_frac = |a, b| { - if b != 0.0 { a / b * 100.0 } else { 0.0 } + if b != 0.0 { + a / b * 100.0 + } else { + 0.0 + } }; let is_excl = match &layer.kind { diff --git a/scheds/rust/scx_rusty/src/bpf/main.bpf.c b/scheds/rust/scx_rusty/src/bpf/main.bpf.c index 6e983aac8..2ea06e442 100644 --- a/scheds/rust/scx_rusty/src/bpf/main.bpf.c +++ b/scheds/rust/scx_rusty/src/bpf/main.bpf.c @@ -56,7 +56,7 @@ char _license[] SEC("license") = "GPL"; -UEI_DEFINE(uei); + /* * const volatiles are set during initialization and treated as consts by the @@ -86,6 +86,13 @@ const volatile u32 debug; /* base slice duration */ static u64 slice_ns = SCX_SLICE_DFL; +#define __COMPAT_scx_bpf_error(fmt, args...) \ + do { \ + bpf_printk(fmt, ##args); \ + } while (0) + +struct user_exit_info uei; + /* * Per-CPU context */ @@ -180,7 +187,7 @@ static struct dom_ctx *lookup_dom_ctx(u32 dom_id) domc = try_lookup_dom_ctx(dom_id); if (!domc) - scx_bpf_error("Failed to lookup dom[%u]", dom_id); + __COMPAT_scx_bpf_error("Failed to lookup dom[%u]", dom_id); return domc; } @@ -193,7 +200,7 @@ static u64 t_to_tptr(struct task_struct *p) err = bpf_probe_read_kernel(&tptr, sizeof(tptr), &p); if (err){ - scx_bpf_error("Failed to cast task_struct addr to tptr"); + __COMPAT_scx_bpf_error("Failed to cast task_struct addr to tptr"); return 0; } return tptr; @@ -215,7 +222,7 @@ static struct task_ctx *lookup_task_ctx(struct task_struct *p) taskc = try_lookup_task_ctx(p); if (!taskc) - scx_bpf_error("task_ctx lookup failed for tptr %llu", tptr); + __COMPAT_scx_bpf_error("task_ctx lookup failed for tptr %llu", tptr); return taskc; } @@ -226,7 +233,7 @@ static struct pcpu_ctx *lookup_pcpu_ctx(s32 cpu) pcpuc = MEMBER_VPTR(pcpu_ctx, [cpu]); if (!pcpuc) - scx_bpf_error("Failed to lookup pcpu ctx for %d", cpu); + __COMPAT_scx_bpf_error("Failed to lookup pcpu ctx for %d", cpu); return pcpuc; } @@ -255,7 +262,7 @@ static struct bucket_ctx *lookup_dom_bucket(struct dom_ctx *dom_ctx, if (bucket) return bucket; - scx_bpf_error("Failed to lookup dom bucket"); + __COMPAT_scx_bpf_error("Failed to lookup dom bucket"); return NULL; } @@ -268,7 +275,7 @@ static struct lock_wrapper *lookup_dom_bkt_lock(u32 dom_id, u32 weight) if (lockw) return lockw; - scx_bpf_error("Failed to lookup dom lock"); + __COMPAT_scx_bpf_error("Failed to lookup dom lock"); return NULL; } @@ -279,7 +286,7 @@ static struct lock_wrapper *lookup_dom_vtime_lock(u32 dom_id) lockw = bpf_map_lookup_elem(&dom_vtime_locks, &idx); if (!lockw) - scx_bpf_error("Failed to lookup dom lock"); + __COMPAT_scx_bpf_error("Failed to lookup dom lock"); return lockw; } @@ -322,7 +329,7 @@ static void dom_dcycle_adj(u32 dom_id, u32 weight, u64 now, bool runnable) bpf_spin_unlock(&lockw->lock); if (adj < 0 && (s64)bucket->dcycle < 0) - scx_bpf_error("cpu%d dom%u bucket%u load underflow (dcycle=%lld adj=%lld)", + __COMPAT_scx_bpf_error("cpu%d dom%u bucket%u load underflow (dcycle=%lld adj=%lld)", bpf_get_smp_processor_id(), dom_id, bucket_idx, bucket->dcycle, adj); @@ -424,7 +431,7 @@ static struct task_struct *tptr_to_task(u64 tptr) err_task = bpf_probe_read_kernel(&p, sizeof(struct task_struct *), &task); if (err_task) - scx_bpf_error("Failed to retrieve task_struct for tptr %llu", tptr); + __COMPAT_scx_bpf_error("Failed to retrieve task_struct for tptr %llu", tptr); if (p) return p; } @@ -438,7 +445,7 @@ int dom_xfer_task(u64 tptr, u32 new_dom_id, u64 now) p = tptr_to_task(tptr); if (!p) { - scx_bpf_error("Failed to lookup task %llu", tptr); + __COMPAT_scx_bpf_error("Failed to lookup task %llu", tptr); return 0; } @@ -599,7 +606,7 @@ const int sched_prio_to_weight[DL_MAX_LAT_PRIO + 1] = { static u64 sched_prio_to_latency_weight(u64 prio) { if (prio >= DL_MAX_LAT_PRIO) { - scx_bpf_error("Invalid prio index"); + __COMPAT_scx_bpf_error("Invalid prio index"); return 0; } @@ -789,7 +796,7 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p, t_cpumask = taskc->cpumask; if (!t_cpumask) { - scx_bpf_error("Failed to look up task cpumask"); + __COMPAT_scx_bpf_error("Failed to look up task cpumask"); return false; } @@ -808,7 +815,7 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p, d_cpumask = new_domc->cpumask; if (!d_cpumask) { - scx_bpf_error("Failed to get dom%u cpumask kptr", + __COMPAT_scx_bpf_error("Failed to get dom%u cpumask kptr", new_dom_id); return false; } @@ -859,7 +866,7 @@ static s32 try_sync_wakeup(struct task_struct *p, struct task_ctx *taskc, d_cpumask = domc->cpumask; if (!d_cpumask) { - scx_bpf_error("Failed to acquire dom%u cpumask kptr", + __COMPAT_scx_bpf_error("Failed to acquire dom%u cpumask kptr", taskc->dom_id); return -ENOENT; } @@ -1021,7 +1028,7 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu, tmp_direct_greedy = direct_greedy_cpumask; if (!tmp_direct_greedy) { - scx_bpf_error("Failed to lookup direct_greedy mask"); + __COMPAT_scx_bpf_error("Failed to lookup direct_greedy mask"); goto enoent; } /* @@ -1035,13 +1042,13 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu, if (!direct_greedy_numa && domc) { node_mask = domc->node_cpumask; if (!node_mask) { - scx_bpf_error("Failed to lookup node mask"); + __COMPAT_scx_bpf_error("Failed to lookup node mask"); goto enoent; } tmp_cpumask = bpf_kptr_xchg(&taskc->tmp_cpumask, NULL); if (!tmp_cpumask) { - scx_bpf_error("Failed to lookup tmp cpumask"); + __COMPAT_scx_bpf_error("Failed to lookup tmp cpumask"); goto enoent; } bpf_cpumask_and(tmp_cpumask, @@ -1149,7 +1156,7 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags) if (!(taskc = lookup_task_ctx(p))) return; if (!(p_cpumask = taskc->cpumask)) { - scx_bpf_error("NULL cpumask"); + __COMPAT_scx_bpf_error("NULL cpumask"); return; } @@ -1240,7 +1247,7 @@ u32 dom_node_id(u32 dom_id) nid_ptr = MEMBER_VPTR(dom_numa_id_map, [dom_id]); if (!nid_ptr) { - scx_bpf_error("Couldn't look up node ID for %d", dom_id); + __COMPAT_scx_bpf_error("Couldn't look up node ID for %d", dom_id); return 0; } return *nid_ptr; @@ -1443,7 +1450,7 @@ void BPF_STRUCT_OPS(rusty_running, struct task_struct *p) dom_id = taskc->dom_id; if (dom_id >= MAX_DOMS) { - scx_bpf_error("Invalid dom ID"); + __COMPAT_scx_bpf_error("Invalid dom ID"); return; } @@ -1460,7 +1467,7 @@ void BPF_STRUCT_OPS(rusty_running, struct task_struct *p) tptrp = MEMBER_VPTR(dom_active_tptrs, [dom_id].tptrs[idx]); if (!tptrp) { - scx_bpf_error("dom_active_tptrs[%u][%llu] indexing failed", + __COMPAT_scx_bpf_error("dom_active_tptrs[%u][%llu] indexing failed", dom_id, idx); return; } @@ -1607,7 +1614,7 @@ static void task_pick_and_set_domain(struct task_ctx *taskc, dom_id = task_pick_domain(taskc, p, cpumask); if (!task_set_domain(taskc, p, dom_id, init_dsq_vtime)) - scx_bpf_error("Failed to set dom%d for %s[%llu]", + __COMPAT_scx_bpf_error("Failed to set dom%d for %s[%llu]", dom_id, p->comm, tptr); } @@ -1631,21 +1638,21 @@ static s32 create_save_cpumask(struct bpf_cpumask **kptr) cpumask = bpf_cpumask_create(); if (!cpumask) { - scx_bpf_error("Failed to create cpumask"); + __COMPAT_scx_bpf_error("Failed to create cpumask"); return -ENOMEM; } cpumask = bpf_kptr_xchg(kptr, cpumask); if (cpumask) { - scx_bpf_error("kptr already had cpumask"); + __COMPAT_scx_bpf_error("kptr already had cpumask"); bpf_cpumask_release(cpumask); } return 0; } -s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p, - struct scx_init_task_args *args) +s32 BPF_STRUCT_OPS(rusty_prep_enable, struct task_struct *p, + struct scx_enable_args *args) { u64 now = bpf_ktime_get_ns(); struct task_ctx taskc = { @@ -1700,9 +1707,9 @@ s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p, return 0; } - -void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p, - struct scx_exit_task_args *args) +/* +void BPF_STRUCT_OPS(rusty_cancel_enable, struct task_struct *p, + struct scx_enable_args *args) { u64 tptr = t_to_tptr(p); long ret; @@ -1719,6 +1726,7 @@ void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p, return; } } +*/ static s32 create_node(u32 node_id) { @@ -1730,7 +1738,7 @@ static s32 create_node(u32 node_id) nodec = bpf_map_lookup_elem(&node_data, &node_id); if (!nodec) { /* Should never happen, it's created statically at load time. */ - scx_bpf_error("No node%u", node_id); + __COMPAT_scx_bpf_error("No node%u", node_id); return -ENOENT; } @@ -1742,7 +1750,7 @@ static s32 create_node(u32 node_id) cpumask = nodec->cpumask; if (!cpumask) { bpf_rcu_read_unlock(); - scx_bpf_error("Failed to lookup node cpumask"); + __COMPAT_scx_bpf_error("Failed to lookup node cpumask"); return -ENOENT; } @@ -1751,7 +1759,7 @@ static s32 create_node(u32 node_id) nmask = MEMBER_VPTR(numa_cpumasks, [node_id][cpu / 64]); if (!nmask) { - scx_bpf_error("array index error"); + __COMPAT_scx_bpf_error("array index error"); ret = -ENOENT; break; } @@ -1773,7 +1781,7 @@ static s32 create_dom(u32 dom_id) s32 ret; if (dom_id >= MAX_DOMS) { - scx_bpf_error("Max dom ID %u exceeded (%u)", MAX_DOMS, dom_id); + __COMPAT_scx_bpf_error("Max dom ID %u exceeded (%u)", MAX_DOMS, dom_id); return -EINVAL; } @@ -1781,7 +1789,7 @@ static s32 create_dom(u32 dom_id) ret = scx_bpf_create_dsq(dom_id, node_id); if (ret < 0) { - scx_bpf_error("Failed to create dsq %u (%d)", dom_id, ret); + __COMPAT_scx_bpf_error("Failed to create dsq %u (%d)", dom_id, ret); return ret; } @@ -1800,7 +1808,7 @@ static s32 create_dom(u32 dom_id) all_mask = all_cpumask; if (!dom_mask || !all_mask) { bpf_rcu_read_unlock(); - scx_bpf_error("Could not find cpumask"); + __COMPAT_scx_bpf_error("Could not find cpumask"); return -ENOENT; } @@ -1809,7 +1817,7 @@ static s32 create_dom(u32 dom_id) dmask = MEMBER_VPTR(dom_cpumasks, [dom_id][cpu / 64]); if (!dmask) { - scx_bpf_error("array index error"); + __COMPAT_scx_bpf_error("array index error"); ret = -ENOENT; break; } @@ -1830,7 +1838,7 @@ static s32 create_dom(u32 dom_id) nodec = bpf_map_lookup_elem(&node_data, &node_id); if (!nodec) { /* Should never happen, it's created statically at load time. */ - scx_bpf_error("No node%u", node_id); + __COMPAT_scx_bpf_error("No node%u", node_id); return -ENOENT; } ret = create_save_cpumask(&domc->node_cpumask); @@ -1842,7 +1850,7 @@ static s32 create_dom(u32 dom_id) dom_mask = domc->node_cpumask; if (!node_mask || !dom_mask) { bpf_rcu_read_unlock(); - scx_bpf_error("cpumask lookup failed"); + __COMPAT_scx_bpf_error("cpumask lookup failed"); return -ENOENT; } bpf_cpumask_copy(dom_mask, (const struct cpumask *)node_mask); @@ -1873,7 +1881,7 @@ static s32 initialize_cpu(s32 cpu) cpumask = domc->cpumask; if (!cpumask) { bpf_rcu_read_unlock(); - scx_bpf_error("Failed to lookup dom node cpumask"); + __COMPAT_scx_bpf_error("Failed to lookup dom node cpumask"); return -ENOENT; } @@ -1891,6 +1899,8 @@ static s32 initialize_cpu(s32 cpu) s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init) { s32 i, ret; + + __COMPAT_scx_bpf_switch_all(); ret = create_save_cpumask(&all_cpumask); if (ret) @@ -1929,7 +1939,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init) void BPF_STRUCT_OPS(rusty_exit, struct scx_exit_info *ei) { - UEI_RECORD(uei, ei); + uei_record(&uei, ei); } SCX_OPS_DEFINE(rusty, @@ -1942,8 +1952,8 @@ SCX_OPS_DEFINE(rusty, .quiescent = (void *)rusty_quiescent, .set_weight = (void *)rusty_set_weight, .set_cpumask = (void *)rusty_set_cpumask, - .init_task = (void *)rusty_init_task, - .exit_task = (void *)rusty_exit_task, + .prep_enable = (void *)rusty_prep_enable, + /* .cancel_enable = (void *)rusty_cancel_enable, */ .init = (void *)rusty_init, .exit = (void *)rusty_exit, .timeout_ms = 10000, diff --git a/scheds/rust/scx_rusty/src/main.rs b/scheds/rust/scx_rusty/src/main.rs index d9fd0915f..d520a022b 100644 --- a/scheds/rust/scx_rusty/src/main.rs +++ b/scheds/rust/scx_rusty/src/main.rs @@ -16,7 +16,9 @@ pub mod load_balance; use load_balance::LoadBalancer; mod stats; +use core::ffi::CStr; use std::collections::BTreeMap; +use std::ffi::CString; use std::mem::MaybeUninit; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; @@ -52,11 +54,10 @@ use scx_utils::init_libbpf_logging; use scx_utils::scx_ops_attach; use scx_utils::scx_ops_load; use scx_utils::scx_ops_open; -use scx_utils::uei_exited; -use scx_utils::uei_report; +// use scx_utils::uei_exited; +// use scx_utils::uei_report; use scx_utils::Cpumask; use scx_utils::Topology; -use scx_utils::UserExitInfo; use scx_utils::NR_CPU_IDS; const MAX_DOMS: usize = bpf_intf::consts_MAX_DOMS as usize; @@ -432,7 +433,7 @@ impl<'a> Scheduler<'a> { if opts.partial { skel.struct_ops.rusty_mut().flags |= *compat::SCX_OPS_SWITCH_PARTIAL; } - skel.struct_ops.rusty_mut().exit_dump_len = opts.exit_dump_len; + // skel.struct_ops.rusty_mut().exit_dump_len = opts.exit_dump_len; skel.maps.rodata_data.load_half_life = (opts.load_half_life * 1000000000.0) as u32; skel.maps.rodata_data.kthreads_local = opts.kthreads_local; @@ -566,7 +567,7 @@ impl<'a> Scheduler<'a> { Ok(()) } - fn run(&mut self, shutdown: Arc) -> Result { + fn run(&mut self, shutdown: Arc) -> Result<()> { let (res_ch, req_ch) = self.stats_server.channels(); let now = Instant::now(); let mut next_tune_at = now + self.tune_interval; @@ -574,7 +575,9 @@ impl<'a> Scheduler<'a> { self.skel.maps.stats.value_size() as usize; - while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) { + while !shutdown.load(Ordering::Relaxed) + && !UserExitInfo::exited(&self.skel.maps.bss_data.uei)? + { let now = Instant::now(); if now >= next_tune_at { @@ -608,7 +611,7 @@ impl<'a> Scheduler<'a> { } self.struct_ops.take(); - uei_report!(&self.skel, uei) + UserExitInfo::read(&self.skel.maps.bss_data.uei)?.report() } } @@ -669,11 +672,66 @@ fn main() -> Result<()> { } let mut open_object = MaybeUninit::uninit(); - loop { - let mut sched = Scheduler::init(&opts, &mut open_object)?; - if !sched.run(shutdown.clone())?.should_restart() { - break; + let mut sched = Scheduler::init(&opts, &mut open_object)?; + sched.run(shutdown.clone()) +} + +#[derive(Debug, Default)] +struct UserExitInfo { + kind: i32, + reason: Option, + msg: Option, +} + +impl UserExitInfo { + fn read(bpf_uei: &types::user_exit_info) -> Result { + let kind = unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) }; + + let (reason, msg) = if kind != 0 { + ( + Some( + unsafe { CStr::from_ptr(bpf_uei.reason.as_ptr() as *const _) } + .to_str() + .context("Failed to convert reason to string")? + .to_string(), + ) + .filter(|s| !s.is_empty()), + Some( + unsafe { CStr::from_ptr(bpf_uei.msg.as_ptr() as *const _) } + .to_str() + .context("Failed to convert msg to string")? + .to_string(), + ) + .filter(|s| !s.is_empty()), + ) + } else { + (None, None) + }; + + Ok(Self { kind, reason, msg }) + } + + fn exited(bpf_uei: &types::user_exit_info) -> Result { + Ok(Self::read(bpf_uei)?.kind != 0) + } + + fn report(&self) -> Result<()> { + let why = match (&self.reason, &self.msg) { + (Some(reason), None) => format!("{}", reason), + (Some(reason), Some(msg)) => format!("{} ({})", reason, msg), + _ => "".into(), + }; + + match self.kind { + 0 => Ok(()), + etype => { + if etype != 64 { + bail!("EXIT: kind={} {}", etype, why); + } else { + info!("EXIT: {}", why); + Ok(()) + } + } } } - Ok(()) }