diff --git a/rust/scx_utils/src/bpf_builder.rs b/rust/scx_utils/src/bpf_builder.rs
index 6830ce236..7e8a13c9d 100644
--- a/rust/scx_utils/src/bpf_builder.rs
+++ b/rust/scx_utils/src/bpf_builder.rs
@@ -362,7 +362,7 @@ impl BpfBuilder {
     }
 
     /// Return `(VER, SHA1)` from which the bulit-in `vmlinux.h` is generated.
-    pub fn vmlinux_h_ver_sha1() -> (String, String) {
+    pub fn vmlinux_h_ver_sha1() -> String {
         let mut ar = tar::Archive::new(Self::BPF_H_TAR);
 
         for file in ar.entries().unwrap() {
@@ -378,7 +378,7 @@ impl BpfBuilder {
                 .to_string_lossy()
                 .to_string();
 
-            return sscanf!(name, "vmlinux-v{String}-g{String}.h").unwrap();
+            return sscanf!(name, "vmlinux-{String}.h").unwrap();
         }
 
         panic!("vmlinux/vmlinux.h not found");
@@ -586,15 +586,10 @@ mod tests {
 
     #[test]
     fn test_vmlinux_h_ver_sha1() {
-        let (ver, sha1) = super::BpfBuilder::vmlinux_h_ver_sha1();
+        let ver = super::BpfBuilder::vmlinux_h_ver_sha1();
 
-        println!("vmlinux.h: ver={:?} sha1={:?}", &ver, &sha1,);
+        println!("vmlinux.h: ver={:?}", &ver);
 
-        assert!(regex::Regex::new(r"^([1-9][0-9]*\.[1-9][0-9][a-z0-9-]*)$")
-            .unwrap()
-            .is_match(&ver));
-        assert!(regex::Regex::new(r"^[0-9a-z]{12}$")
-            .unwrap()
-            .is_match(&sha1));
+        assert!(regex::Regex::new(r"^[a-f0-9]{7}$").unwrap().is_match(&ver));
     }
 }
diff --git a/rust/scx_utils/src/compat.rs b/rust/scx_utils/src/compat.rs
index c2d977b0e..977457d02 100644
--- a/rust/scx_utils/src/compat.rs
+++ b/rust/scx_utils/src/compat.rs
@@ -164,9 +164,9 @@ macro_rules! unwrap_or_break {
 pub fn check_min_requirements() -> Result<()> {
     // ec7e3b0463e1 ("implement-ops") in https://github.com/sched-ext/sched_ext
     // is the current minimum required kernel version.
-    if let Ok(false) | Err(_) = struct_has_field("sched_ext_ops", "dump") {
-        bail!("sched_ext_ops.dump() missing, kernel too old?");
-    }
+    // if let Ok(false) | Err(_) = struct_has_field("sched_ext_ops", "dump") {
+    //     bail!("sched_ext_ops.dump() missing, kernel too old?");
+    // }
     Ok(())
 }
 
@@ -187,21 +187,21 @@ macro_rules! scx_ops_open {
             };
 
             let ops = skel.struct_ops.[<$ops _mut>]();
-            let path = std::path::Path::new("/sys/kernel/sched_ext/hotplug_seq");
-
-            let val = match std::fs::read_to_string(&path) {
-                Ok(val) => val,
-                Err(_) => {
-                    break 'block Err(anyhow::anyhow!("Failed to open or read file {:?}", path));
-                }
-            };
-
-            ops.hotplug_seq = match val.trim().parse::<u64>() {
-                Ok(parsed) => parsed,
-                Err(_) => {
-                    break 'block Err(anyhow::anyhow!("Failed to parse hotplug seq {}", val));
-                }
-            };
+            //    let path = std::path::Path::new("/sys/kernel/sched_ext/hotplug_seq");
+
+            //     let val = match std::fs::read_to_string(&path) {
+            //         Ok(val) => val,
+            //         Err(_) => {
+            //             break 'block Err(anyhow::anyhow!("Failed to open or read file {:?}", path));
+            //         }
+            //     };
+
+            //     ops.hotplug_seq = match val.trim().parse::<u64>() {
+            //         Ok(parsed) => parsed,
+            //         Err(_) => {
+            //             break 'block Err(anyhow::anyhow!("Failed to parse hotplug seq {}", val));
+            //         }
+            //     };
 
             let result : Result<OpenBpfSkel<'_>, anyhow::Error> = Ok(skel);
             result
@@ -218,7 +218,7 @@ macro_rules! scx_ops_open {
 macro_rules! scx_ops_load {
     ($skel: expr, $ops: ident, $uei: ident) => { 'block: {
         scx_utils::paste! {
-            scx_utils::uei_set_size!($skel, $ops, $uei);
+            //scx_utils::uei_set_size!($skel, $ops, $uei);
             $skel.load().context("Failed to load BPF program")
         }
     }};
diff --git a/rust/scx_utils/src/lib.rs b/rust/scx_utils/src/lib.rs
index 3aa0ccd3d..265507899 100644
--- a/rust/scx_utils/src/lib.rs
+++ b/rust/scx_utils/src/lib.rs
@@ -41,14 +41,14 @@ pub use bpf_builder::BpfBuilder;
 mod builder;
 pub use builder::Builder;
 
-mod user_exit_info;
-pub use user_exit_info::ScxConsts;
-pub use user_exit_info::ScxExitKind;
-pub use user_exit_info::UeiDumpPtr;
-pub use user_exit_info::UserExitInfo;
-pub use user_exit_info::SCX_ECODE_ACT_RESTART;
-pub use user_exit_info::SCX_ECODE_RSN_HOTPLUG;
-pub use user_exit_info::UEI_DUMP_PTR_MUTEX;
+// mod user_exit_info;
+// // pub use user_exit_info::ScxConsts;
+// // pub use user_exit_info::ScxExitKind;
+// pub use user_exit_info::UeiDumpPtr;
+// pub use user_exit_info::UserExitInfo;
+// pub use user_exit_info::SCX_ECODE_ACT_RESTART;
+// pub use user_exit_info::SCX_ECODE_RSN_HOTPLUG;
+// pub use user_exit_info::UEI_DUMP_PTR_MUTEX;
 
 pub mod build_id;
 pub mod compat;
diff --git a/scheds/include/scx/compat.bpf.h b/scheds/include/scx/compat.bpf.h
index e5afe9efd..21548815b 100644
--- a/scheds/include/scx/compat.bpf.h
+++ b/scheds/include/scx/compat.bpf.h
@@ -34,6 +34,97 @@
 	(bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq) ?			\
 	 scx_bpf_dispatch_vtime_from_dsq((it), (p), (dsq_id), (enq_flags)) : false)
 
+/*
+ * %SCX_KICK_IDLE is a later addition. To support both before and after, use
+ * %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it.
+ * Users can use %SCX_KICK_IDLE directly in the future.
+ */
+#define __COMPAT_SCX_KICK_IDLE							\
+	__COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE)
+
+/*
+ * scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See
+ * %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h. This can be dropped in the
+ * future.
+ */
+void scx_bpf_switch_all(void) __ksym __weak;
+
+static inline void __COMPAT_scx_bpf_switch_all(void)
+{
+	scx_bpf_switch_all();
+}
+
+/*
+ * scx_bpf_exit() is a new addition. Fall back to scx_bpf_error() if
+ * unavailable. Users can use scx_bpf_exit() directly in the future.
+ */
+#define __COMPAT_scx_bpf_exit(code, fmt, args...)				\
+({										\
+	if (bpf_ksym_exists(scx_bpf_exit_bstr))					\
+		scx_bpf_exit((code), fmt, ##args);				\
+	else									\
+		scx_bpf_error(fmt, ##args);					\
+})
+
+/*
+ * scx_bpf_dump() is a new addition. Ignore if unavailable. Users can use
+ * scx_bpf_dump() directly in the future.
+ */
+#define __COMPAT_scx_bpf_dump(fmt, args...)					\
+({										\
+	if (bpf_ksym_exists(scx_bpf_dump_bstr))					\
+		scx_bpf_dump(fmt, ##args);					\
+})
+
+/*
+ * scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. No good
+ * way to noop these kfuncs. Provide a test macro. Users can assume existence in
+ * the future.
+ */
+#define __COMPAT_HAS_CPUMASKS							\
+	bpf_ksym_exists(scx_bpf_nr_cpu_ids)
+
+/*
+ * cpuperf is new. The followings become noop on older kernels. Callers can be
+ * updated to call cpuperf kfuncs directly in the future.
+ */
+static inline u32 __COMPAT_scx_bpf_cpuperf_cap(s32 cpu)
+{
+	if (bpf_ksym_exists(scx_bpf_cpuperf_cap))
+		return scx_bpf_cpuperf_cap(cpu);
+	else
+		return 1024;
+}
+
+static inline u32 __COMPAT_scx_bpf_cpuperf_cur(s32 cpu)
+{
+	if (bpf_ksym_exists(scx_bpf_cpuperf_cur))
+		return scx_bpf_cpuperf_cur(cpu);
+	else
+		return 1024;
+}
+
+static inline void __COMPAT_scx_bpf_cpuperf_set(s32 cpu, u32 perf)
+{
+	if (bpf_ksym_exists(scx_bpf_cpuperf_set))
+		return scx_bpf_cpuperf_set(cpu, perf);
+}
+
+/*
+ * Iteration and scx_bpf_consume_task() are new. The following become noop on
+ * older kernels. The users can switch to bpf_for_each(scx_dsq) and directly
+ * call scx_bpf_consume_task() in the future.
+ */
+#define __COMPAT_DSQ_FOR_EACH(p, dsq_id, flags)					\
+	if (bpf_ksym_exists(bpf_iter_scx_dsq_new))				\
+		bpf_for_each(scx_dsq, (p), (dsq_id), (flags))
+
+static inline bool __COMPAT_scx_bpf_consume_task(struct bpf_iter_scx_dsq *it,
+						 struct task_struct *p)
+{
+	return false;
+}
+
 /*
  * Define sched_ext_ops. This may be expanded to define multiple variants for
  * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
@@ -45,3 +136,4 @@
 	};
 
 #endif	/* __SCX_COMPAT_BPF_H */
+
diff --git a/scheds/include/scx/compat.h b/scheds/include/scx/compat.h
index cc56ff9aa..70021b999 100644
--- a/scheds/include/scx/compat.h
+++ b/scheds/include/scx/compat.h
@@ -143,8 +143,8 @@ static inline long scx_hotplug_seq(void)
 #define SCX_OPS_OPEN(__ops_name, __scx_name) ({					\
 	struct __scx_name *__skel;						\
 										\
-	SCX_BUG_ON(!__COMPAT_struct_has_field("sched_ext_ops", "dump"),		\
-		   "sched_ext_ops.dump() missing, kernel too old?");		\
+	/* SCX_BUG_ON(!__COMPAT_struct_has_field("sched_ext_ops", "dump"),	*/	\
+	/*	   "sched_ext_ops.dump() missing, kernel too old?");	*/	\
 										\
 	__skel = __scx_name##__open();						\
 	SCX_BUG_ON(!__skel, "Could not open " #__scx_name);			\
diff --git a/scheds/include/scx/user_exit_info.h b/scheds/include/scx/user_exit_info.h
index 8ce273440..4856760b4 100644
--- a/scheds/include/scx/user_exit_info.h
+++ b/scheds/include/scx/user_exit_info.h
@@ -18,7 +18,7 @@ enum uei_sizes {
 
 struct user_exit_info {
 	int		kind;
-	s64		exit_code;
+	// s64		exit_code;
 	char		reason[UEI_REASON_LEN];
 	char		msg[UEI_MSG_LEN];
 };
@@ -32,6 +32,15 @@ struct user_exit_info {
 #endif
 #include <bpf/bpf_core_read.h>
 
+static inline void uei_record(struct user_exit_info *uei,
+			      const struct scx_exit_info *ei)
+{
+	bpf_probe_read_kernel_str(uei->reason, sizeof(uei->reason), ei->reason);
+	bpf_probe_read_kernel_str(uei->msg, sizeof(uei->msg), ei->msg);
+	/* use __sync to force memory barrier */
+	__sync_val_compare_and_swap(&uei->kind, uei->kind, ei->type);
+}
+
 #define UEI_DEFINE(__name)							\
 	char RESIZABLE_ARRAY(data, __name##_dump);				\
 	const volatile u32 __name##_dump_len;					\
@@ -42,13 +51,13 @@ struct user_exit_info {
 				  sizeof(__uei_name.reason), (__ei)->reason);	\
 	bpf_probe_read_kernel_str(__uei_name.msg,				\
 				  sizeof(__uei_name.msg), (__ei)->msg);		\
-	bpf_probe_read_kernel_str(__uei_name##_dump,				\
-				  __uei_name##_dump_len, (__ei)->dump);		\
-	if (bpf_core_field_exists((__ei)->exit_code))				\
-		__uei_name.exit_code = (__ei)->exit_code;			\
+	/* bpf_probe_read_kernel_str(__uei_name##_dump,		*/		\
+	/* 			  __uei_name##_dump_len, (__ei)->dump);	*/	\
+	/* if (bpf_core_field_exists((__ei)->exit_code))	*/			\
+	/*	__uei_name.exit_code = (__ei)->exit_code;	*/		\
 	/* use __sync to force memory barrier */				\
-	__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind,		\
-				    (__ei)->kind);				\
+	__sync_val_compare_and_swap(&__uei_name.type, __uei_name.type,		\
+				    (__ei)->type);				\
 })
 
 #else	/* !__bpf__ */
@@ -57,11 +66,11 @@ struct user_exit_info {
 #include <stdbool.h>
 
 /* no need to call the following explicitly if SCX_OPS_LOAD() is used */
-#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({					\
-	u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN;	\
-	(__skel)->rodata->__uei_name##_dump_len = __len;				\
-	RESIZE_ARRAY((__skel), data, __uei_name##_dump, __len);				\
-})
+// #define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({					\
+// 	u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN;	\
+// 	(__skel)->rodata->__uei_name##_dump_len = __len;				\
+// 	RESIZE_ARRAY((__skel), data, __uei_name##_dump, __len);				\
+// })
 
 #define UEI_EXITED(__skel, __uei_name) ({					\
 	/* use __sync to force memory barrier */				\
@@ -70,18 +79,18 @@ struct user_exit_info {
 
 #define UEI_REPORT(__skel, __uei_name) ({					\
 	struct user_exit_info *__uei = &(__skel)->data->__uei_name;		\
-	char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \
-	if (__uei_dump[0] != '\0') {						\
-		fputs("\nDEBUG DUMP\n", stderr);				\
-		fputs("================================================================================\n\n", stderr); \
-		fputs(__uei_dump, stderr);					\
-		fputs("\n================================================================================\n\n", stderr); \
-	}									\
+	/* char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; *\ \
+	/* if (__uei_dump[0] != '\0') {						*\ \
+	/* 	fputs("\nDEBUG DUMP\n", stderr);				*\ \
+	/* 	fputs("================================================================================\n\n", stderr); *\ \
+	/* 	fputs(__uei_dump, stderr);			*\		\
+	/* 	fputs("\n================================================================================\n\n", stderr); *\ \
+	/* }									*\ \
 	fprintf(stderr, "EXIT: %s", __uei->reason);				\
 	if (__uei->msg[0] != '\0')						\
 		fprintf(stderr, " (%s)", __uei->msg);				\
 	fputs("\n", stderr);							\
-	__uei->exit_code;							\
+	/* __uei->exit_code;	*/						\
 })
 
 /*
diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c
index 59fb749f7..adb7dd1cc 100644
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@@ -47,9 +47,14 @@ static u32 preempt_cursor;
 #define dbg(fmt, args...)	do { if (debug) bpf_printk(fmt, ##args); } while (0)
 #define trace(fmt, args...)	do { if (debug > 1) bpf_printk(fmt, ##args); } while (0)
 
-#include "util.bpf.c"
+#include "util.bpf.h"
 
-UEI_DEFINE(uei);
+#define __COMPAT_scx_bpf_error(fmt, args...)            \
+	do {                                    \
+		bpf_printk(fmt, ##args);        \
+	} while (0)
+
+struct user_exit_info uei;
 
 static inline bool vtime_before(u64 a, u64 b)
 {
@@ -113,7 +118,7 @@ static u32 dsq_iter_rr_cpu_ctx(u32 layer_idx, int idx)
 		offset -= nr_layers;
 
 	if (offset > MAX_LAYERS) {
-		scx_bpf_error("invalid layer id %u", layer_idx);
+		__COMPAT_scx_bpf_error("invalid layer id %u", layer_idx);
 		return 0;
 	}
 	return offset;
@@ -136,7 +141,7 @@ static __noinline u32 iter_layer_dsq_ctx(int idx, u32 layer_idx)
 		u32 ret;
 		ret = dsq_iter_weight_ctx(idx);
 		if (ret >= nr_layers) {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 			return ret;
 		}
 		return ret;
@@ -147,7 +152,7 @@ static __noinline u32 iter_layer_dsq_ctx(int idx, u32 layer_idx)
 		return dsq_iter_weight_ctx(idx);
 	}
 	default:
-		scx_bpf_error("unknown dsq iter algo");
+		__COMPAT_scx_bpf_error("unknown dsq iter algo");
 		return 0;
 	}
 }
@@ -164,7 +169,7 @@ static __noinline u32 cpu_to_llc_id(s32 cpu_id)
 
         llc_ptr = MEMBER_VPTR(cpu_llc_id_map, [cpu_id]);
         if (!llc_ptr) {
-                scx_bpf_error("Couldn't look up llc ID for cpu %d", cpu_id);
+                __COMPAT_scx_bpf_error("Couldn't look up llc ID for cpu %d", cpu_id);
                 return 0;
         }
         return *llc_ptr;
@@ -176,7 +181,7 @@ u32 llc_node_id(u32 llc_id)
 
         llc_ptr = MEMBER_VPTR(llc_numa_id_map, [llc_id]);
         if (!llc_ptr) {
-                scx_bpf_error("Couldn't look up llc ID for %d", llc_id);
+                __COMPAT_scx_bpf_error("Couldn't look up llc ID for %d", llc_id);
                 return 0;
         }
         return *llc_ptr;
@@ -232,13 +237,28 @@ static struct cpu_ctx *lookup_cpu_ctx(int cpu)
 		cctx = bpf_map_lookup_percpu_elem(&cpu_ctxs, &zero, cpu);
 
 	if (!cctx) {
-		scx_bpf_error("no cpu_ctx for cpu %d", cpu);
+		__COMPAT_scx_bpf_error("no cpu_ctx for cpu %d", cpu);
 		return NULL;
 	}
 
 	return cctx;
 }
 
+static u32 cpu_to_llc_id(s32 cpu_id)
+{
+        const volatile u32 *llc_ptr;
+
+        llc_ptr = MEMBER_VPTR(cpu_llc_id_map, [cpu_id]);
+        if (!llc_ptr) {
+                __COMPAT_scx_bpf_error("Couldn't look up llc ID for cpu %d", cpu_id);
+                return 0;
+        }
+        return *llc_ptr;
+}
+
+/*
+ * Numa node context
+ */
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__type(key, u32);
@@ -274,7 +294,7 @@ static struct cache_ctx *lookup_cache_ctx(u32 cache_idx)
 static void gstat_inc(enum global_stat_idx idx, struct cpu_ctx *cctx)
 {
 	if (idx < 0 || idx >= NR_GSTATS) {
-		scx_bpf_error("invalid global stat idx %d", idx);
+		__COMPAT_scx_bpf_error("invalid global stat idx %d", idx);
 		return;
 	}
 
@@ -289,7 +309,7 @@ static void lstat_add(enum layer_stat_idx idx, struct layer *layer,
 	if ((vptr = MEMBER_VPTR(*cctx, .lstats[layer->idx][idx])))
 		(*vptr) += delta;
 	else
-		scx_bpf_error("invalid layer or stat idxs: %d, %d", idx, layer->idx);
+		__COMPAT_scx_bpf_error("invalid layer or stat idxs: %d, %d", idx, layer->idx);
 }
 
 static void lstat_inc(enum layer_stat_idx idx, struct layer *layer,
@@ -319,7 +339,7 @@ static void adj_load(u32 layer_idx, s64 adj, u64 now)
 	lockw = bpf_map_lookup_elem(&layer_load_locks, &layer_idx);
 
 	if (!layer || !lockw) {
-		scx_bpf_error("Can't access layer%d or its load_lock", layer_idx);
+		__COMPAT_scx_bpf_error("Can't access layer%d or its load_lock", layer_idx);
 		return;
 	}
 
@@ -329,7 +349,7 @@ static void adj_load(u32 layer_idx, s64 adj, u64 now)
 	bpf_spin_unlock(&lockw->lock);
 
 	if (debug && adj < 0 && (s64)layer->load < 0)
-		scx_bpf_error("cpu%d layer%d load underflow (load=%lld adj=%lld)",
+		__COMPAT_scx_bpf_error("cpu%d layer%d load underflow (load=%lld adj=%lld)",
 			      bpf_get_smp_processor_id(), layer_idx, layer->load, adj);
 }
 
@@ -352,7 +372,7 @@ static struct cpumask *lookup_layer_cpumask(int idx)
 	if ((cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &idx))) {
 		return (struct cpumask *)cpumaskw->cpumask;
 	} else {
-		scx_bpf_error("no layer_cpumask");
+		__COMPAT_scx_bpf_error("no layer_cpumask");
 		return NULL;
 	}
 }
@@ -373,7 +393,7 @@ static void refresh_cpumasks(int idx)
 		u8 *u8_ptr;
 
 		if (!(cctx = lookup_cpu_ctx(cpu))) {
-			scx_bpf_error("unknown cpu");
+			__COMPAT_scx_bpf_error("unknown cpu");
 			return;
 		}
 
@@ -385,7 +405,7 @@ static void refresh_cpumasks(int idx)
 			 */
 			barrier_var(cpumaskw);
 			if (!cpumaskw || !cpumaskw->cpumask) {
-				scx_bpf_error("can't happen");
+				__COMPAT_scx_bpf_error("can't happen");
 				return;
 			}
 
@@ -396,14 +416,14 @@ static void refresh_cpumasks(int idx)
 				bpf_cpumask_clear_cpu(cpu, cpumaskw->cpumask);
 			}
 		} else {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 		}
 	}
 
 	// XXX - shouldn't be necessary
 	layer = MEMBER_VPTR(layers, [idx]);
 	if (!layer) {
-		scx_bpf_error("can't happen");
+		__COMPAT_scx_bpf_error("can't happen");
 		return;
 	}
 
@@ -412,6 +432,18 @@ static void refresh_cpumasks(int idx)
 	trace("LAYER[%d] now has %d cpus, seq=%llu", idx, layer->nr_cpus, layer->cpus_seq);
 }
 
+u32 llc_node_id(u32 llc_id)
+{
+        const volatile u32 *llc_ptr;
+
+        llc_ptr = MEMBER_VPTR(llc_numa_id_map, [llc_id]);
+        if (!llc_ptr) {
+                __COMPAT_scx_bpf_error("Couldn't look up llc ID for %d", llc_id);
+                return 0;
+        }
+        return *llc_ptr;
+}
+
 SEC("fentry")
 int BPF_PROG(sched_tick_fentry)
 {
@@ -433,6 +465,7 @@ struct task_ctx {
 	struct bpf_cpumask __kptr *layered_cpumask;
 
 	bool			all_cpus_allowed;
+	bool			dispatch_local;
 	u64			runnable_at;
 	u64			running_at;
 };
@@ -454,7 +487,7 @@ static struct task_ctx *lookup_task_ctx(struct task_struct *p)
 	struct task_ctx *tctx = lookup_task_ctx_may_fail(p);
 
 	if (!tctx)
-		scx_bpf_error("task_ctx lookup failed");
+		__COMPAT_scx_bpf_error("task_ctx lookup failed");
 
 	return tctx;
 }
@@ -462,7 +495,7 @@ static struct task_ctx *lookup_task_ctx(struct task_struct *p)
 static struct layer *lookup_layer(int idx)
 {
 	if (idx < 0 || idx >= nr_layers) {
-		scx_bpf_error("invalid layer %d", idx);
+		__COMPAT_scx_bpf_error("invalid layer %d", idx);
 		return NULL;
 	}
 	return &layers[idx];
@@ -495,7 +528,7 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
 	thread_head = &leader->signal->thread_head;
 
 	if (!(next = bpf_task_acquire(leader))) {
-		scx_bpf_error("failed to acquire leader");
+		__COMPAT_scx_bpf_error("failed to acquire leader");
 		return 0;
 	}
 
@@ -808,9 +841,8 @@ s32 BPF_STRUCT_OPS(layered_select_cpu, struct task_struct *p, s32 prev_cpu, u64
 	cpu = pick_idle_cpu(p, prev_cpu, cctx, tctx, layer, true);
 
 	if (cpu >= 0) {
-		lstat_inc(LSTAT_SEL_LOCAL, layer, cctx);
 		u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
-		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, layer_slice_ns, 0);
+		tctx->dispatch_local = true;
 		return cpu;
 	} else {
 		return prev_cpu;
@@ -828,7 +860,7 @@ bool pick_idle_cpu_and_kick(struct task_struct *p, s32 task_cpu,
 
 	if (cpu >= 0) {
 		lstat_inc(LSTAT_KICK, layer, cctx);
-		scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
+		scx_bpf_kick_cpu(cpu, 0 /*SCX_KICK_IDLE*/);
 		return true;
 	} else {
 		return false;
@@ -1052,6 +1084,13 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
 	    !(layer = lookup_layer(tctx->layer)))
 		return;
 
+	if (tctx->dispatch_local) {
+		tctx->dispatch_local = false;
+		lstat_inc(LSTAT_SEL_LOCAL, layer, cctx);
+		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags);
+		return;
+	}
+
 	try_preempt_first = cctx->try_preempt_first;
 	cctx->try_preempt_first = false;
 	u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
@@ -1272,7 +1311,7 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev)
 	bpf_for(idx, 0, nr_layers) {
 		layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx);
 		if (layer_idx >= nr_layers) {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 			return;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
@@ -1288,7 +1327,7 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev)
 	bpf_for(idx, 0, nr_layers) {
 		layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx);
 		if (layer_idx >= nr_layers) {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 			return;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
@@ -1309,7 +1348,7 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev)
 	bpf_for(idx, 0, nr_layers) {
 		layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx);
 		if (layer_idx >= nr_layers) {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 			return;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
@@ -1372,7 +1411,7 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
 	bpf_for(idx, 0, nr_layers) {
 		layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx);
 		if (layer_idx >= nr_layers) {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 			return;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
@@ -1392,7 +1431,7 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
 	bpf_for(idx, 0, nr_layers) {
 		layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx);
 		if (layer_idx >= nr_layers) {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 			return;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
@@ -1418,7 +1457,7 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
 	bpf_for(idx, 0, nr_layers) {
 		layer_idx = iter_layer_dsq_ctx(idx, cctx->layer_idx);
 		if (layer_idx >= nr_layers) {
-			scx_bpf_error("can't happen");
+			__COMPAT_scx_bpf_error("can't happen");
 			return;
 		}
 		layer = MEMBER_VPTR(layers, [layer_idx]);
@@ -1482,7 +1521,7 @@ static __noinline bool match_one(struct layer_match *match,
 	case MATCH_TGID_EQUALS:
 		return p->tgid == match->tgid;
 	default:
-		scx_bpf_error("invalid match kind %d", match->kind);
+		__COMPAT_scx_bpf_error("invalid match kind %d", match->kind);
 		return result;
 	}
 }
@@ -1591,7 +1630,7 @@ static void maybe_refresh_layer(struct task_struct *p, struct task_ctx *tctx)
 		 */
 		p->scx.dsq_vtime = layer->vtime_now;
 	} else {
-		scx_bpf_error("[%s]%d didn't match any layer", p->comm, p->pid);
+		__COMPAT_scx_bpf_error("[%s]%d didn't match any layer", p->comm, p->pid);
 	}
 
 	if (tctx->layer < nr_layers - 1)
@@ -1605,13 +1644,13 @@ static s32 create_save_cpumask(struct bpf_cpumask **kptr)
 
 	cpumask = bpf_cpumask_create();
 	if (!cpumask) {
-		scx_bpf_error("Failed to create cpumask");
+		__COMPAT_scx_bpf_error("Failed to create cpumask");
 		return -ENOMEM;
 	}
 
 	cpumask = bpf_kptr_xchg(kptr, cpumask);
 	if (cpumask) {
-		scx_bpf_error("kptr already had cpumask");
+		__COMPAT_scx_bpf_error("kptr already had cpumask");
 		bpf_cpumask_release(cpumask);
 	}
 
@@ -1629,7 +1668,7 @@ static s32 create_node(u32 node_id)
 	nodec = bpf_map_lookup_elem(&node_data, &node_id);
 	if (!nodec) {
 		/* Should never happen, it's created statically at load time. */
-		scx_bpf_error("No node%u", node_id);
+		__COMPAT_scx_bpf_error("No node%u", node_id);
 		return -ENOENT;
 	}
 	nodec->id = node_id;
@@ -1642,7 +1681,7 @@ static s32 create_node(u32 node_id)
 	cpumask = nodec->cpumask;
 	if (!cpumask) {
 		bpf_rcu_read_unlock();
-		scx_bpf_error("Failed to lookup node cpumask");
+		__COMPAT_scx_bpf_error("Failed to lookup node cpumask");
 		return -ENOENT;
 	}
 
@@ -1651,7 +1690,7 @@ static s32 create_node(u32 node_id)
 
 		nmask = MEMBER_VPTR(numa_cpumasks, [node_id][cpu / 64]);
 		if (!nmask) {
-			scx_bpf_error("array index error");
+			__COMPAT_scx_bpf_error("array index error");
 			ret = -ENOENT;
 			break;
 		}
@@ -1659,7 +1698,7 @@ static s32 create_node(u32 node_id)
 		if (*nmask & (1LLU << (cpu % 64))) {
 			bpf_cpumask_set_cpu(cpu, cpumask);
 			if (!(cctx = lookup_cpu_ctx(cpu))) {
-				scx_bpf_error("cpu ctx error");
+				__COMPAT_scx_bpf_error("cpu ctx error");
 				ret = -ENOENT;
 				break;
 			}
@@ -1685,7 +1724,7 @@ static s32 create_cache(u32 cache_id)
 
 	cachec = bpf_map_lookup_elem(&cache_data, &cache_id);
 	if (!cachec) {
-		scx_bpf_error("No cache%u", cache_id);
+		__COMPAT_scx_bpf_error("No cache%u", cache_id);
 		return -ENOENT;
 	}
 	cachec->id = cache_id;
@@ -1698,14 +1737,14 @@ static s32 create_cache(u32 cache_id)
 	cpumask = cachec->cpumask;
 	if (!cpumask) {
 		bpf_rcu_read_unlock();
-		scx_bpf_error("Failed to lookup node cpumask");
+		__COMPAT_scx_bpf_error("Failed to lookup node cpumask");
 		return -ENOENT;
 	}
 
 	bpf_for(cpu, 0, nr_possible_cpus) {
 		if (!(cctx = lookup_cpu_ctx(cpu))) {
 			bpf_rcu_read_unlock();
-			scx_bpf_error("cpu ctx error");
+			__COMPAT_scx_bpf_error("cpu ctx error");
 			return -ENOENT;
 		}
 
@@ -1823,7 +1862,7 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p)
 	}
 
 	if (layer->perf > 0)
-		scx_bpf_cpuperf_set(task_cpu, layer->perf);
+		__COMPAT_scx_bpf_cpuperf_set(task_cpu, layer->perf);
 
 	cctx->maybe_idle = false;
 }
@@ -1918,7 +1957,7 @@ void BPF_STRUCT_OPS(layered_set_cpumask, struct task_struct *p,
 		return;
 
 	if (!all_cpumask) {
-		scx_bpf_error("NULL all_cpumask");
+		__COMPAT_scx_bpf_error("NULL all_cpumask");
 		return;
 	}
 
@@ -1932,8 +1971,7 @@ void BPF_STRUCT_OPS(layered_cpu_release, s32 cpu,
 	scx_bpf_reenqueue_local();
 }
 
-s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p,
-		   struct scx_init_task_args *args)
+s32 BPF_STRUCT_OPS(layered_prep_enable, struct task_struct *p, struct scx_enable_args *args)
 {
 	struct task_ctx *tctx;
 	struct bpf_cpumask *cpumask;
@@ -1946,7 +1984,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p,
 	tctx = bpf_task_storage_get(&task_ctxs, p, 0,
 				    BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!tctx) {
-		scx_bpf_error("task_ctx allocation failure");
+		__COMPAT_scx_bpf_error("task_ctx allocation failure");
 		return -ENOMEM;
 	}
 
@@ -1970,7 +2008,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p,
 		tctx->all_cpus_allowed =
 			bpf_cpumask_subset((const struct cpumask *)all_cpumask, p->cpus_ptr);
 	else
-		scx_bpf_error("missing all_cpumask");
+		__COMPAT_scx_bpf_error("missing all_cpumask");
 
 	/*
 	 * We are matching cgroup hierarchy path directly rather than the CPU
@@ -1982,8 +2020,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p,
 	return 0;
 }
 
-void BPF_STRUCT_OPS(layered_exit_task, struct task_struct *p,
-		    struct scx_exit_task_args *args)
+void BPF_STRUCT_OPS(layered_disable, struct task_struct *p)
 {
 	struct cpu_ctx *cctx;
 	struct task_ctx *tctx;
@@ -1995,49 +2032,50 @@ void BPF_STRUCT_OPS(layered_exit_task, struct task_struct *p,
 		__sync_fetch_and_add(&layers[tctx->layer].nr_tasks, -1);
 }
 
-static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now)
-{
-	struct task_struct *p;
-
-	if (dsq_id > LO_FALLBACK_DSQ)
-		return 0;
+// static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now)
+// {
+// 	struct task_struct *p;
+// 
+// 	if (dsq_id > LO_FALLBACK_DSQ)
+// 		return 0;
+// 
+// 	bpf_for_each(scx_dsq, p, dsq_id, 0) {
+// 		struct task_ctx *tctx;
+// 
+// 		if ((tctx = lookup_task_ctx(p)))
+// 			return (now - tctx->runnable_at) / 1000000;
+// 	}
+// 
+// 	return 0;
+// }
 
-	bpf_for_each(scx_dsq, p, dsq_id, 0) {
-		struct task_ctx *tctx;
-
-		if ((tctx = lookup_task_ctx(p)))
-			return (now - tctx->runnable_at) / 1000000;
-	}
-
-	return 0;
-}
 
 static void dump_layer_cpumask(int idx)
 {
-	struct cpumask *layer_cpumask;
-	s32 cpu;
-	char buf[128] = "", *p;
-
-	if (!(layer_cpumask = lookup_layer_cpumask(idx)))
-		return;
-
-	bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
-		if (!(p = MEMBER_VPTR(buf, [idx++])))
-			break;
-		if (bpf_cpumask_test_cpu(cpu, layer_cpumask))
-			*p++ = '0' + cpu % 10;
-		else
-			*p++ = '.';
-
-		if ((cpu & 7) == 7) {
-			if (!(p = MEMBER_VPTR(buf, [idx++])))
-				break;
-			*p++ = '|';
-		}
-	}
-	buf[sizeof(buf) - 1] = '\0';
-
-	scx_bpf_dump("%s", buf);
+	/* struct cpumask *layer_cpumask; */
+	/* s32 cpu; */
+	/* char buf[128] = "", *p; */
+	/**/
+	/* if (!(layer_cpumask = lookup_layer_cpumask(idx))) */
+	/* 	return; */
+	/**/
+	/* bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { */
+	/* 	if (!(p = MEMBER_VPTR(buf, [idx++]))) */
+	/* 		break; */
+	/* 	if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) */
+	/* 		*p++ = '0' + cpu % 10; */
+	/* 	else */
+	/* 		*p++ = '.'; */
+	/**/
+	/* 	if ((cpu & 7) == 7) { */
+	/* 		if (!(p = MEMBER_VPTR(buf, [idx++]))) */
+	/* 			break; */
+	/* 		*p++ = '|'; */
+	/* 	} */
+	/* } */
+	/* buf[sizeof(buf) - 1] = '\0'; */
+	/**/
+	/* scx_bpf_dump("%s", buf); */
 }
 
 void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx)
@@ -2047,50 +2085,185 @@ void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx)
 	int i, j, idx;
 	struct layer *layer;
 
-	bpf_for(i, 0, nr_layers) {
-		layer = lookup_layer(i);
-		if (!layer) {
-			scx_bpf_error("unabled to lookup layer %d", i);
-			continue;
-		}
-
-		if (disable_topology) {
-			scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
-				     i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i),
-				     dsq_first_runnable_for_ms(i, now));
-		} else {
-			bpf_for(j, 0, nr_llcs) {
-				if (!(layer->cache_mask & (1 << j)))
-					continue;
-
-				idx = layer_dsq_id(layer->idx, j);
-				scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
-					     i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx),
-					     dsq_first_runnable_for_ms(idx, now));
-				scx_bpf_dump("\n");
-			}
-		}
-		dump_layer_cpumask(i);
-		scx_bpf_dump("\n");
-	}
-	bpf_for(i, 0, nr_llcs) {
-		dsq_id = llc_hi_fallback_dsq_id(i);
-		scx_bpf_dump("HI_FALLBACK[%llu] nr_queued=%d -%llums\n",
-			     dsq_id, scx_bpf_dsq_nr_queued(dsq_id),
-			     dsq_first_runnable_for_ms(dsq_id, now));
-	}
-	scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n",
-		     scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ),
-		     dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now));
+	/* bpf_for(i, 0, nr_layers) { */
+	/* 	layer = lookup_layer(i); */
+	/* 	if (!layer) { */
+	/* 		__COMPAT_scx_bpf_error("unabled to lookup layer %d", i); */
+	/* 		continue; */
+	/* 	} */
+	/**/
+	/* 	if (disable_topology) { */
+	/* 		scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */
+	/* 			     i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), */
+	/* 			     dsq_first_runnable_for_ms(i, now)); */
+	/* 	} else { */
+	/* 		bpf_for(j, 0, nr_llcs) { */
+	/* 			if (!(layer->cache_mask & (1 << j))) */
+	/* 				continue; */
+	/**/
+	/* 			idx = layer_dsq_id(layer->idx, j); */
+	/* 			scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */
+	/* 				     i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), */
+	/* 				     dsq_first_runnable_for_ms(idx, now)); */
+	/* 			scx_bpf_dump("\n"); */
+	/* 		} */
+	/* 	} */
+	/* 	dump_layer_cpumask(i); */
+	/* 	scx_bpf_dump("\n"); */
+	/* } */
+	/* bpf_for(i, 0, nr_llcs) { */
+	/* 	dsq_id = llc_hi_fallback_dsq_id(i); */
+	/* 	scx_bpf_dump("HI_FALLBACK[%llu] nr_queued=%d -%llums\n", */
+	/* 		     dsq_id, scx_bpf_dsq_nr_queued(dsq_id), */
+	/* 		     dsq_first_runnable_for_ms(dsq_id, now)); */
+	/* } */
+	/* scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", */
+	/* 	     scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), */
+	/* 	     dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); */
 }
 
+static void dump_layer_cpumask(int idx)
+{
+	/* struct cpumask *layer_cpumask; */
+	/* s32 cpu; */
+	/* char buf[128] = "", *p; */
+	/**/
+	/* if (!(layer_cpumask = lookup_layer_cpumask(idx))) */
+	/* 	return; */
+	/**/
+	/* bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { */
+	/* 	if (!(p = MEMBER_VPTR(buf, [idx++]))) */
+	/* 		break; */
+	/* 	if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) */
+	/* 		*p++ = '0' + cpu % 10; */
+	/* 	else */
+	/* 		*p++ = '.'; */
+	/**/
+	/* 	if ((cpu & 7) == 7) { */
+	/* 		if (!(p = MEMBER_VPTR(buf, [idx++]))) */
+	/* 			break; */
+	/* 		*p++ = '|'; */
+	/* 	} */
+	/* } */
+	/* buf[sizeof(buf) - 1] = '\0'; */
+	/**/
+	/* scx_bpf_dump("%s", buf); */
+}
+
+/* void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx) */
+/* { */
+/* 	u64 now = bpf_ktime_get_ns(); */
+/* 	int i, j, idx; */
+/* 	struct layer *layer; */
+/**/
+/* 	bpf_for(i, 0, nr_layers) { */
+/* 		layer = lookup_layer(i); */
+/* 		if (!layer) { */
+/* 			__COMPAT_scx_bpf_error("unabled to lookup layer %d", i); */
+/* 			continue; */
+/* 		} */
+/**/
+/* 		if (disable_topology) { */
+/* 			scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */
+/* 				     i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), */
+/* 				     dsq_first_runnable_for_ms(i, now)); */
+/* 		} else { */
+/* 			bpf_for(j, 0, nr_llcs) { */
+/* 				if (!(layer->cache_mask & (1 << j))) */
+/* 					continue; */
+/**/
+/* 				idx = layer_dsq_id(layer->idx, j); */
+/* 				scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", */
+/* 					     i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), */
+/* 					     dsq_first_runnable_for_ms(idx, now)); */
+/* 			} */
+/* 		} */
+/* 		dump_layer_cpumask(i); */
+/* 		scx_bpf_dump("\n"); */
+/* 	} */
+/**/
+/* 	scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n", */
+/* 		     scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ), */
+/* 		     dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now)); */
+/* 	scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", */
+/* 		     scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), */
+/* 		     dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); */
+/* } */
+// static void dump_layer_cpumask(int idx)
+// {
+// 	struct cpumask *layer_cpumask;
+// 	s32 cpu;
+// 	char buf[128] = "", *p;
+// 
+// 	if (!(layer_cpumask = lookup_layer_cpumask(idx)))
+// 		return;
+// 
+// 	bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
+// 		if (!(p = MEMBER_VPTR(buf, [idx++])))
+// 			break;
+// 		if (bpf_cpumask_test_cpu(cpu, layer_cpumask))
+// 			*p++ = '0' + cpu % 10;
+// 		else
+// 			*p++ = '.';
+// 
+// 		if ((cpu & 7) == 7) {
+// 			if (!(p = MEMBER_VPTR(buf, [idx++])))
+// 				break;
+// 			*p++ = '|';
+// 		}
+// 	}
+// 	buf[sizeof(buf) - 1] = '\0';
+// 
+// 	scx_bpf_dump("%s", buf);
+// }
+// 
+// void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx)
+// {
+// 	u64 now = bpf_ktime_get_ns();
+// 	int i, j, idx;
+// 	struct layer *layer;
+// 
+// 	bpf_for(i, 0, nr_layers) {
+// 		layer = lookup_layer(i);
+// 		if (!layer) {
+// 			__COMPAT_scx_bpf_error("unabled to lookup layer %d", i);
+// 			continue;
+// 		}
+// 
+// 		if (disable_topology) {
+// 			scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
+// 				     i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i),
+// 				     dsq_first_runnable_for_ms(i, now));
+// 		} else {
+// 			bpf_for(j, 0, nr_llcs) {
+// 				if (!(layer->cache_mask & (1 << j)))
+// 					continue;
+// 
+// 				idx = layer_dsq_id(layer->idx, j);
+// 				scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
+// 					     i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx),
+// 					     dsq_first_runnable_for_ms(idx, now));
+// 			}
+// 		}
+// 		dump_layer_cpumask(i);
+// 		scx_bpf_dump("\n");
+// 	}
+// 
+// 	scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n",
+// 		     scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ),
+// 		     dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now));
+// 	scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n",
+// 		     scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ),
+// 		     dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now));
+// }
+
 static void print_iter_order() {
 	struct cpu_ctx *cctx;
 	int i;
 	u32 layer_idx;
 
 	if (!(cctx = lookup_cpu_ctx(-1))) {
-		scx_bpf_error("failed to get cpu ctx");
+		__COMPAT_scx_bpf_error("failed to get cpu ctx");
 		return;
 	}
 
@@ -2107,7 +2280,9 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 	struct cpu_ctx *cctx;
 	int i, j, k, nr_online_cpus, ret;
 
-	ret = scx_bpf_create_dsq(LO_FALLBACK_DSQ, -1);
+	__COMPAT_scx_bpf_switch_all();
+
+	ret = scx_bpf_create_dsq(HI_FALLBACK_DSQ, -1);
 	if (ret < 0)
 		return ret;
 
@@ -2176,19 +2351,19 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 		    layer->exclusive);
 
 		if (layer->nr_match_ors > MAX_LAYER_MATCH_ORS) {
-			scx_bpf_error("too many ORs");
+			__COMPAT_scx_bpf_error("too many ORs");
 			return -EINVAL;
 		}
 
 		bpf_for(j, 0, layer->nr_match_ors) {
 			struct layer_match_ands *ands = MEMBER_VPTR(layers, [i].matches[j]);
 			if (!ands) {
-				scx_bpf_error("shouldn't happen");
+				__COMPAT_scx_bpf_error("shouldn't happen");
 				return -EINVAL;
 			}
 
 			if (ands->nr_match_ands > NR_LAYER_MATCH_KINDS) {
-				scx_bpf_error("too many ANDs");
+				__COMPAT_scx_bpf_error("too many ANDs");
 				return -EINVAL;
 			}
 
@@ -2204,7 +2379,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 
 				match = MEMBER_VPTR(layers, [i].matches[j].matches[k]);
 				if (!match) {
-					scx_bpf_error("shouldn't happen");
+					__COMPAT_scx_bpf_error("shouldn't happen");
 					return -EINVAL;
 				}
 
@@ -2243,7 +2418,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 					dbg("%s TGID %u", header, match->tgid);
 					break;
 				default:
-					scx_bpf_error("%s Invalid kind", header);
+					__COMPAT_scx_bpf_error("%s Invalid kind", header);
 					return -EINVAL;
 				}
 			}
@@ -2301,7 +2476,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 
 void BPF_STRUCT_OPS(layered_exit, struct scx_exit_info *ei)
 {
-	UEI_RECORD(uei, ei);
+	uei_record(&uei, ei);
 }
 
 SCX_OPS_DEFINE(layered,
@@ -2316,9 +2491,11 @@ SCX_OPS_DEFINE(layered,
 	       .set_weight		= (void *)layered_set_weight,
 	       .set_cpumask		= (void *)layered_set_cpumask,
 	       .cpu_release		= (void *)layered_cpu_release,
-	       .init_task		= (void *)layered_init_task,
-	       .exit_task		= (void *)layered_exit_task,
-	       .dump			= (void *)layered_dump,
+	       .prep_enable		= (void *)layered_prep_enable,
+	       .disable 		= (void *)layered_disable,
+	       // .dump			= (void *)layered_dump,
 	       .init			= (void *)layered_init,
 	       .exit			= (void *)layered_exit,
+	       .flags			= SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_LAST,
 	       .name			= "layered");
+
diff --git a/scheds/rust/scx_layered/src/bpf/util.bpf.h b/scheds/rust/scx_layered/src/bpf/util.bpf.h
new file mode 120000
index 000000000..ee7b16c86
--- /dev/null
+++ b/scheds/rust/scx_layered/src/bpf/util.bpf.h
@@ -0,0 +1 @@
+util.bpf.c
\ No newline at end of file
diff --git a/scheds/rust/scx_layered/src/main.rs b/scheds/rust/scx_layered/src/main.rs
index 17301321c..f359ac3b4 100644
--- a/scheds/rust/scx_layered/src/main.rs
+++ b/scheds/rust/scx_layered/src/main.rs
@@ -8,6 +8,11 @@ mod stats;
 
 pub use bpf_skel::*;
 pub mod bpf_intf;
+use core::ffi::CStr;
+use stats::LayerStats;
+use stats::StatsReq;
+use stats::StatsRes;
+use stats::SysStats;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::collections::HashMap;
@@ -47,23 +52,13 @@ use scx_stats::prelude::*;
 use scx_utils::compat;
 use scx_utils::init_libbpf_logging;
 use scx_utils::ravg::ravg_read;
-use scx_utils::scx_ops_attach;
-use scx_utils::scx_ops_load;
-use scx_utils::scx_ops_open;
-use scx_utils::uei_exited;
-use scx_utils::uei_report;
 use scx_utils::Cache;
 use scx_utils::Core;
 use scx_utils::CoreType;
 use scx_utils::LoadAggregator;
 use scx_utils::Topology;
-use scx_utils::UserExitInfo;
 use serde::Deserialize;
 use serde::Serialize;
-use stats::LayerStats;
-use stats::StatsReq;
-use stats::StatsRes;
-use stats::SysStats;
 
 const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS;
 const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
@@ -462,6 +457,12 @@ struct Opts {
     #[clap(long)]
     monitor: Option<f64>,
 
+    /// DEPRECATED: Enable output of stats in OpenMetrics format instead of via
+    /// log macros.  This option is useful if you want to collect stats in some
+    /// monitoring database like prometheseus.
+    #[clap(short = 'o', long)]
+    open_metrics_format: bool,
+
     /// Run with example layer specifications (useful for e.g. CI pipelines)
     #[clap(long)]
     run_example: bool,
@@ -1073,6 +1074,66 @@ impl Stats {
     }
 }
 
+#[derive(Debug, Default)]
+struct UserExitInfo {
+    kind: i32,
+    reason: Option<String>,
+    msg: Option<String>,
+}
+
+impl UserExitInfo {
+    fn read(bpf_uei: &types::user_exit_info) -> Result<Self> {
+        let kind = unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) };
+
+        let (reason, msg) = if kind != 0 {
+            (
+                Some(
+                    unsafe { CStr::from_ptr(bpf_uei.reason.as_ptr() as *const _) }
+                        .to_str()
+                        .context("Failed to convert reason to string")?
+                        .to_string(),
+                )
+                .filter(|s| !s.is_empty()),
+                Some(
+                    unsafe { CStr::from_ptr(bpf_uei.msg.as_ptr() as *const _) }
+                        .to_str()
+                        .context("Failed to convert msg to string")?
+                        .to_string(),
+                )
+                .filter(|s| !s.is_empty()),
+            )
+        } else {
+            (None, None)
+        };
+
+        Ok(Self { kind, reason, msg })
+    }
+
+    fn exited(bpf_uei: &types::user_exit_info) -> Result<bool> {
+        Ok(Self::read(bpf_uei)?.kind != 0)
+    }
+
+    fn report(&self) -> Result<()> {
+        let why = match (&self.reason, &self.msg) {
+            (Some(reason), None) => format!("{}", reason),
+            (Some(reason), Some(msg)) => format!("{} ({})", reason, msg),
+            _ => "".into(),
+        };
+
+        match self.kind {
+            0 => Ok(()),
+            etype => {
+                if etype != 64 {
+                    bail!("EXIT: kind={} {}", etype, why);
+                } else {
+                    info!("EXIT: {}", why);
+                    Ok(())
+                }
+            }
+        }
+    }
+}
+
 #[derive(Debug)]
 /// `CpuPool` represents the CPU core and logical CPU topology within the system.
 /// It manages the mapping and availability of physical and logical cores, including
@@ -1344,6 +1405,18 @@ where
     index: usize,
     iters: Vec<T>,
 }
+fn layer_core_order(growth_algo: LayerGrowthAlgo, layer_idx: usize, topo: &Topology) -> Vec<usize> {
+    let mut core_order = vec![];
+    match growth_algo {
+        LayerGrowthAlgo::Sticky => {
+            let is_left = layer_idx % 2 == 0;
+            let rot_by = |layer_idx, len| -> usize {
+                if layer_idx <= len {
+                    layer_idx
+                } else {
+                    layer_idx % len
+                }
+            };
 
 impl<T> IteratorInterleaver<T>
 where
@@ -1399,20 +1472,27 @@ struct Layer {
 }
 
 impl Layer {
-    fn new(spec: &LayerSpec, idx: usize, cpu_pool: &CpuPool, topo: &Topology) -> Result<Self> {
-        let name = &spec.name;
-        let kind = spec.kind.clone();
+    fn new(
+        idx: usize,
+        cpu_pool: &CpuPool,
+        name: &str,
+        kind: LayerKind,
+        topo: &Topology,
+    ) -> Result<Self> {
         let mut cpus = bitvec![0; cpu_pool.nr_cpus];
         cpus.fill(false);
         let mut allowed_cpus = bitvec![0; cpu_pool.nr_cpus];
+        let mut layer_growth_algo = LayerGrowthAlgo::Sticky;
         match &kind {
             LayerKind::Confined {
                 cpus_range,
                 util_range,
                 nodes,
                 llcs,
+                growth_algo,
                 ..
             } => {
+                layer_growth_algo = growth_algo.clone();
                 let cpus_range = cpus_range.unwrap_or((0, std::usize::MAX));
                 if cpus_range.0 > cpus_range.1 || cpus_range.1 == 0 {
                     bail!("invalid cpus_range {:?}", cpus_range);
@@ -1448,7 +1528,19 @@ impl Layer {
                     bail!("invalid util_range {:?}", util_range);
                 }
             }
-            LayerKind::Grouped { nodes, llcs, .. } | LayerKind::Open { nodes, llcs, .. } => {
+            LayerKind::Grouped {
+                growth_algo,
+                nodes,
+                llcs,
+                ..
+            }
+            | LayerKind::Open {
+                growth_algo,
+                nodes,
+                llcs,
+                ..
+            } => {
+                layer_growth_algo = growth_algo.clone();
                 if nodes.len() == 0 && llcs.len() == 0 {
                     allowed_cpus.fill(true);
                 } else {
@@ -1473,6 +1565,7 @@ impl Layer {
             }
         }
 
+<<<<<<< HEAD
         let layer_growth_algo = match &kind {
             LayerKind::Confined { growth_algo, .. }
             | LayerKind::Grouped { growth_algo, .. }
@@ -1491,6 +1584,13 @@ impl Layer {
             layer_growth_algo.clone(),
             core_order
         );
+        let layer_growth_algo = match &kind {
+            LayerKind::Confined { growth_algo, .. }
+            | LayerKind::Grouped { growth_algo, .. }
+            | LayerKind::Open { growth_algo, .. } => growth_algo.clone(),
+        };
+
+        let core_order = layer_core_order(layer_growth_algo, idx, topo);
 
         Ok(Self {
             name: name.into(),
@@ -1536,7 +1636,8 @@ impl Layer {
         {
             trace!(
                 "layer-{} needs more CPUs (util={:.3}) but is over the load fraction",
-                &self.name, layer_util
+                &self.name,
+                layer_util
             );
             return Ok(false);
         }
@@ -1880,7 +1981,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         skel.maps.rodata_data.nr_llcs = 0;
 
         for node in topo.nodes() {
-            debug!(
+            info!(
                 "configuring node {}, LLCs {:?}",
                 node.id(),
                 node.llcs().len()
@@ -1897,7 +1998,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
             );
 
             for (_, llc) in node.llcs() {
-                debug!("configuring llc {:?} for node {:?}", llc.id(), node.id());
+                info!("configuring llc {:?} for node {:?}", llc.id(), node.id());
                 skel.maps.rodata_data.llc_numa_id_map[llc.id()] = node.id() as u32;
             }
         }
@@ -1920,7 +2021,9 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         let mut skel_builder = BpfSkelBuilder::default();
         skel_builder.obj_builder.debug(opts.verbose > 1);
         init_libbpf_logging(None);
-        let mut skel = scx_ops_open!(skel_builder, open_object, layered)?;
+        let mut skel = skel_builder
+            .open(open_object)
+            .context("failed to open BPF program")?;
 
         // scheduler_tick() got renamed to sched_tick() during v6.10-rc.
         let sched_tick_name = match compat::ksym_exists("sched_tick")? {
@@ -1934,7 +2037,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
             .context("Failed to set attach target for sched_tick_fentry()")?;
 
         // Initialize skel according to @opts.
-        skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;
+        // skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;
 
         skel.maps.rodata_data.debug = opts.verbose as u32;
         skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
@@ -1958,11 +2061,17 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         Self::init_layers(&mut skel, opts, layer_specs, &topo)?;
         Self::init_nodes(&mut skel, opts, &topo);
 
-        let mut skel = scx_ops_load!(skel, layered, uei)?;
+        let mut skel = skel.load().context("Failed to load BPF program")?;
 
         let mut layers = vec![];
         for (idx, spec) in layer_specs.iter().enumerate() {
-            layers.push(Layer::new(&spec, idx, &cpu_pool, &topo)?);
+            layers.push(Layer::new(
+                idx,
+                &cpu_pool,
+                &spec.name,
+                spec.kind.clone(),
+                &topo,
+            )?);
         }
         initialize_cpu_ctxs(&skel, &topo).unwrap();
 
@@ -1976,11 +2085,10 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         // huge problem in the interim until we figure it out.
 
         // Attach.
-        let struct_ops = scx_ops_attach!(skel, layered)?;
         let stats_server = StatsServer::new(stats::server_data()).launch()?;
 
-        let sched = Self {
-            struct_ops: Some(struct_ops),
+        let mut sched = Self {
+            struct_ops: None,
             layer_specs,
 
             sched_intv: Duration::from_secs_f64(opts.interval),
@@ -2002,6 +2110,20 @@ impl<'a, 'b> Scheduler<'a, 'b> {
             stats_server,
         };
 
+        sched
+            .skel
+            .attach()
+            .context("Failed to attach BPF program")?;
+
+        sched.struct_ops = Some(
+            sched
+                .skel
+                .maps
+                .layered
+                .attach_struct_ops()
+                .context("Failed to attach layered struct ops")?,
+        );
+
         info!("Layered Scheduler Attached. Run `scx_layered --monitor` for metrics.");
 
         Ok(sched)
@@ -2167,12 +2289,14 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         Ok(sys_stats)
     }
 
-    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
+    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<()> {
         let (res_ch, req_ch) = self.stats_server.channels();
         let mut next_sched_at = Instant::now() + self.sched_intv;
         let mut cpus_ranges = HashMap::<ThreadId, Vec<(usize, usize)>>::new();
 
-        while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) {
+        while !shutdown.load(Ordering::Relaxed)
+            && !UserExitInfo::exited(&self.skel.maps.bss_data.uei)?
+        {
             let now = Instant::now();
 
             if now >= next_sched_at {
@@ -2226,7 +2350,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         }
 
         self.struct_ops.take();
-        uei_report!(&self.skel, uei)
+        UserExitInfo::read(&self.skel.maps.bss_data.uei)?.report()
     }
 }
 
@@ -2418,6 +2542,10 @@ fn main() -> Result<()> {
         );
     }
 
+    if opts.open_metrics_format {
+        warn!("open_metrics_format is deprecated");
+    }
+
     debug!("specs={}", serde_json::to_string_pretty(&layer_config)?);
     verify_layer_specs(&layer_config.specs)?;
 
@@ -2439,12 +2567,6 @@ fn main() -> Result<()> {
     }
 
     let mut open_object = MaybeUninit::uninit();
-    loop {
-        let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?;
-        if !sched.run(shutdown.clone())?.should_restart() {
-            break;
-        }
-    }
-
-    Ok(())
+    let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?;
+    sched.run(shutdown.clone())
 }
diff --git a/scheds/rust/scx_layered/src/stats.rs b/scheds/rust/scx_layered/src/stats.rs
index 9eaf4c4a1..7f3709107 100644
--- a/scheds/rust/scx_layered/src/stats.rs
+++ b/scheds/rust/scx_layered/src/stats.rs
@@ -171,7 +171,11 @@ impl LayerStats {
             }
         };
         let calc_frac = |a, b| {
-            if b != 0.0 { a / b * 100.0 } else { 0.0 }
+            if b != 0.0 {
+                a / b * 100.0
+            } else {
+                0.0
+            }
         };
 
         let is_excl = match &layer.kind {
diff --git a/scheds/rust/scx_rusty/src/bpf/main.bpf.c b/scheds/rust/scx_rusty/src/bpf/main.bpf.c
index 6e983aac8..2ea06e442 100644
--- a/scheds/rust/scx_rusty/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_rusty/src/bpf/main.bpf.c
@@ -56,7 +56,7 @@
 
 char _license[] SEC("license") = "GPL";
 
-UEI_DEFINE(uei);
+
 
 /*
  * const volatiles are set during initialization and treated as consts by the
@@ -86,6 +86,13 @@ const volatile u32 debug;
 /* base slice duration */
 static u64 slice_ns = SCX_SLICE_DFL;
 
+#define __COMPAT_scx_bpf_error(fmt, args...)            \
+	do {                                    \
+		bpf_printk(fmt, ##args);        \
+	} while (0)
+
+struct user_exit_info uei;
+
 /*
  * Per-CPU context
  */
@@ -180,7 +187,7 @@ static struct dom_ctx *lookup_dom_ctx(u32 dom_id)
 
 	domc = try_lookup_dom_ctx(dom_id);
 	if (!domc)
-		scx_bpf_error("Failed to lookup dom[%u]", dom_id);
+		__COMPAT_scx_bpf_error("Failed to lookup dom[%u]", dom_id);
 
 	return domc;
 }
@@ -193,7 +200,7 @@ static u64 t_to_tptr(struct task_struct *p)
 	err = bpf_probe_read_kernel(&tptr, sizeof(tptr), &p);
 
 	if (err){
-		scx_bpf_error("Failed to cast task_struct addr to tptr");
+		__COMPAT_scx_bpf_error("Failed to cast task_struct addr to tptr");
 		return 0;
 	}
 	return tptr;
@@ -215,7 +222,7 @@ static struct task_ctx *lookup_task_ctx(struct task_struct *p)
 
 	taskc = try_lookup_task_ctx(p);
 	if (!taskc)
-		scx_bpf_error("task_ctx lookup failed for tptr %llu", tptr);
+		__COMPAT_scx_bpf_error("task_ctx lookup failed for tptr %llu", tptr);
 
 	return taskc;
 }
@@ -226,7 +233,7 @@ static struct pcpu_ctx *lookup_pcpu_ctx(s32 cpu)
 
 	pcpuc = MEMBER_VPTR(pcpu_ctx, [cpu]);
 	if (!pcpuc)
-		scx_bpf_error("Failed to lookup pcpu ctx for %d", cpu);
+		__COMPAT_scx_bpf_error("Failed to lookup pcpu ctx for %d", cpu);
 
 	return pcpuc;
 }
@@ -255,7 +262,7 @@ static struct bucket_ctx *lookup_dom_bucket(struct dom_ctx *dom_ctx,
 	if (bucket)
 		return bucket;
 
-	scx_bpf_error("Failed to lookup dom bucket");
+	__COMPAT_scx_bpf_error("Failed to lookup dom bucket");
 	return NULL;
 }
 
@@ -268,7 +275,7 @@ static struct lock_wrapper *lookup_dom_bkt_lock(u32 dom_id, u32 weight)
 	if (lockw)
 		return lockw;
 
-	scx_bpf_error("Failed to lookup dom lock");
+	__COMPAT_scx_bpf_error("Failed to lookup dom lock");
 	return NULL;
 }
 
@@ -279,7 +286,7 @@ static struct lock_wrapper *lookup_dom_vtime_lock(u32 dom_id)
 
 	lockw = bpf_map_lookup_elem(&dom_vtime_locks, &idx);
 	if (!lockw)
-		scx_bpf_error("Failed to lookup dom lock");
+		__COMPAT_scx_bpf_error("Failed to lookup dom lock");
 
 	return lockw;
 }
@@ -322,7 +329,7 @@ static void dom_dcycle_adj(u32 dom_id, u32 weight, u64 now, bool runnable)
 	bpf_spin_unlock(&lockw->lock);
 
 	if (adj < 0 && (s64)bucket->dcycle < 0)
-		scx_bpf_error("cpu%d dom%u bucket%u load underflow (dcycle=%lld adj=%lld)",
+		__COMPAT_scx_bpf_error("cpu%d dom%u bucket%u load underflow (dcycle=%lld adj=%lld)",
 			      bpf_get_smp_processor_id(), dom_id, bucket_idx,
 			      bucket->dcycle, adj);
 
@@ -424,7 +431,7 @@ static struct task_struct *tptr_to_task(u64 tptr)
 	err_task = bpf_probe_read_kernel(&p, sizeof(struct task_struct *), &task);
 
 	if (err_task)
-		scx_bpf_error("Failed to retrieve task_struct for tptr %llu", tptr);
+		__COMPAT_scx_bpf_error("Failed to retrieve task_struct for tptr %llu", tptr);
 	if (p)
 		return p;
 }
@@ -438,7 +445,7 @@ int dom_xfer_task(u64 tptr, u32 new_dom_id, u64 now)
 	p = tptr_to_task(tptr);
 
 	if (!p) {
-		scx_bpf_error("Failed to lookup task %llu", tptr);
+		__COMPAT_scx_bpf_error("Failed to lookup task %llu", tptr);
 		return 0;
 	}
 
@@ -599,7 +606,7 @@ const int sched_prio_to_weight[DL_MAX_LAT_PRIO + 1] = {
 static u64 sched_prio_to_latency_weight(u64 prio)
 {
 	if (prio >= DL_MAX_LAT_PRIO) {
-		scx_bpf_error("Invalid prio index");
+		__COMPAT_scx_bpf_error("Invalid prio index");
 		return 0;
 	}
 
@@ -789,7 +796,7 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p,
 
 	t_cpumask = taskc->cpumask;
 	if (!t_cpumask) {
-		scx_bpf_error("Failed to look up task cpumask");
+		__COMPAT_scx_bpf_error("Failed to look up task cpumask");
 		return false;
 	}
 
@@ -808,7 +815,7 @@ static bool task_set_domain(struct task_ctx *taskc, struct task_struct *p,
 
 	d_cpumask = new_domc->cpumask;
 	if (!d_cpumask) {
-		scx_bpf_error("Failed to get dom%u cpumask kptr",
+		__COMPAT_scx_bpf_error("Failed to get dom%u cpumask kptr",
 			      new_dom_id);
 		return false;
 	}
@@ -859,7 +866,7 @@ static s32 try_sync_wakeup(struct task_struct *p, struct task_ctx *taskc,
 
 	d_cpumask = domc->cpumask;
 	if (!d_cpumask) {
-		scx_bpf_error("Failed to acquire dom%u cpumask kptr",
+		__COMPAT_scx_bpf_error("Failed to acquire dom%u cpumask kptr",
 				taskc->dom_id);
 		return -ENOENT;
 	}
@@ -1021,7 +1028,7 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu,
 
 		tmp_direct_greedy = direct_greedy_cpumask;
 		if (!tmp_direct_greedy) {
-			scx_bpf_error("Failed to lookup direct_greedy mask");
+			__COMPAT_scx_bpf_error("Failed to lookup direct_greedy mask");
 			goto enoent;
 		}
 		/*
@@ -1035,13 +1042,13 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu,
 		if (!direct_greedy_numa && domc) {
 			node_mask = domc->node_cpumask;
 			if (!node_mask) {
-				scx_bpf_error("Failed to lookup node mask");
+				__COMPAT_scx_bpf_error("Failed to lookup node mask");
 				goto enoent;
 			}
 
 			tmp_cpumask = bpf_kptr_xchg(&taskc->tmp_cpumask, NULL);
 			if (!tmp_cpumask) {
-				scx_bpf_error("Failed to lookup tmp cpumask");
+				__COMPAT_scx_bpf_error("Failed to lookup tmp cpumask");
 				goto enoent;
 			}
 			bpf_cpumask_and(tmp_cpumask,
@@ -1149,7 +1156,7 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags)
 	if (!(taskc = lookup_task_ctx(p)))
 		return;
 	if (!(p_cpumask = taskc->cpumask)) {
-		scx_bpf_error("NULL cpumask");
+		__COMPAT_scx_bpf_error("NULL cpumask");
 		return;
 	}
 
@@ -1240,7 +1247,7 @@ u32 dom_node_id(u32 dom_id)
 
 	nid_ptr = MEMBER_VPTR(dom_numa_id_map, [dom_id]);
 	if (!nid_ptr) {
-		scx_bpf_error("Couldn't look up node ID for %d", dom_id);
+		__COMPAT_scx_bpf_error("Couldn't look up node ID for %d", dom_id);
 		return 0;
 	}
 	return *nid_ptr;
@@ -1443,7 +1450,7 @@ void BPF_STRUCT_OPS(rusty_running, struct task_struct *p)
 
 	dom_id = taskc->dom_id;
 	if (dom_id >= MAX_DOMS) {
-		scx_bpf_error("Invalid dom ID");
+		__COMPAT_scx_bpf_error("Invalid dom ID");
 		return;
 	}
 
@@ -1460,7 +1467,7 @@ void BPF_STRUCT_OPS(rusty_running, struct task_struct *p)
 
 		tptrp = MEMBER_VPTR(dom_active_tptrs, [dom_id].tptrs[idx]);
 		if (!tptrp) {
-			scx_bpf_error("dom_active_tptrs[%u][%llu] indexing failed",
+			__COMPAT_scx_bpf_error("dom_active_tptrs[%u][%llu] indexing failed",
 				      dom_id, idx);
 			return;
 		}
@@ -1607,7 +1614,7 @@ static void task_pick_and_set_domain(struct task_ctx *taskc,
 		dom_id = task_pick_domain(taskc, p, cpumask);
 
 	if (!task_set_domain(taskc, p, dom_id, init_dsq_vtime))
-		scx_bpf_error("Failed to set dom%d for %s[%llu]",
+		__COMPAT_scx_bpf_error("Failed to set dom%d for %s[%llu]",
 			      dom_id, p->comm, tptr);
 }
 
@@ -1631,21 +1638,21 @@ static s32 create_save_cpumask(struct bpf_cpumask **kptr)
 
 	cpumask = bpf_cpumask_create();
 	if (!cpumask) {
-		scx_bpf_error("Failed to create cpumask");
+		__COMPAT_scx_bpf_error("Failed to create cpumask");
 		return -ENOMEM;
 	}
 
 	cpumask = bpf_kptr_xchg(kptr, cpumask);
 	if (cpumask) {
-		scx_bpf_error("kptr already had cpumask");
+		__COMPAT_scx_bpf_error("kptr already had cpumask");
 		bpf_cpumask_release(cpumask);
 	}
 
 	return 0;
 }
 
-s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p,
-		   struct scx_init_task_args *args)
+s32 BPF_STRUCT_OPS(rusty_prep_enable, struct task_struct *p,
+		   struct scx_enable_args *args)
 {
 	u64 now = bpf_ktime_get_ns();
 	struct task_ctx taskc = {
@@ -1700,9 +1707,9 @@ s32 BPF_STRUCT_OPS(rusty_init_task, struct task_struct *p,
 
 	return 0;
 }
-
-void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p,
-		    struct scx_exit_task_args *args)
+/*
+void BPF_STRUCT_OPS(rusty_cancel_enable, struct task_struct *p,
+		    struct scx_enable_args *args)
 {
 	u64 tptr = t_to_tptr(p);
 	long ret;
@@ -1719,6 +1726,7 @@ void BPF_STRUCT_OPS(rusty_exit_task, struct task_struct *p,
 		return;
 	}
 }
+*/
 
 static s32 create_node(u32 node_id)
 {
@@ -1730,7 +1738,7 @@ static s32 create_node(u32 node_id)
 	nodec = bpf_map_lookup_elem(&node_data, &node_id);
 	if (!nodec) {
 		/* Should never happen, it's created statically at load time. */
-		scx_bpf_error("No node%u", node_id);
+		__COMPAT_scx_bpf_error("No node%u", node_id);
 		return -ENOENT;
 	}
 
@@ -1742,7 +1750,7 @@ static s32 create_node(u32 node_id)
 	cpumask = nodec->cpumask;
 	if (!cpumask) {
 		bpf_rcu_read_unlock();
-		scx_bpf_error("Failed to lookup node cpumask");
+		__COMPAT_scx_bpf_error("Failed to lookup node cpumask");
 		return -ENOENT;
 	}
 
@@ -1751,7 +1759,7 @@ static s32 create_node(u32 node_id)
 
 		nmask = MEMBER_VPTR(numa_cpumasks, [node_id][cpu / 64]);
 		if (!nmask) {
-			scx_bpf_error("array index error");
+			__COMPAT_scx_bpf_error("array index error");
 			ret = -ENOENT;
 			break;
 		}
@@ -1773,7 +1781,7 @@ static s32 create_dom(u32 dom_id)
 	s32 ret;
 
 	if (dom_id >= MAX_DOMS) {
-		scx_bpf_error("Max dom ID %u exceeded (%u)", MAX_DOMS, dom_id);
+		__COMPAT_scx_bpf_error("Max dom ID %u exceeded (%u)", MAX_DOMS, dom_id);
 		return -EINVAL;
 	}
 
@@ -1781,7 +1789,7 @@ static s32 create_dom(u32 dom_id)
 
 	ret = scx_bpf_create_dsq(dom_id, node_id);
 	if (ret < 0) {
-		scx_bpf_error("Failed to create dsq %u (%d)", dom_id, ret);
+		__COMPAT_scx_bpf_error("Failed to create dsq %u (%d)", dom_id, ret);
 		return ret;
 	}
 
@@ -1800,7 +1808,7 @@ static s32 create_dom(u32 dom_id)
 	all_mask = all_cpumask;
 	if (!dom_mask || !all_mask) {
 		bpf_rcu_read_unlock();
-		scx_bpf_error("Could not find cpumask");
+		__COMPAT_scx_bpf_error("Could not find cpumask");
 		return -ENOENT;
 	}
 
@@ -1809,7 +1817,7 @@ static s32 create_dom(u32 dom_id)
 
 		dmask = MEMBER_VPTR(dom_cpumasks, [dom_id][cpu / 64]);
 		if (!dmask) {
-			scx_bpf_error("array index error");
+			__COMPAT_scx_bpf_error("array index error");
 			ret = -ENOENT;
 			break;
 		}
@@ -1830,7 +1838,7 @@ static s32 create_dom(u32 dom_id)
 	nodec = bpf_map_lookup_elem(&node_data, &node_id);
 	if (!nodec) {
 		/* Should never happen, it's created statically at load time. */
-		scx_bpf_error("No node%u", node_id);
+		__COMPAT_scx_bpf_error("No node%u", node_id);
 		return -ENOENT;
 	}
 	ret = create_save_cpumask(&domc->node_cpumask);
@@ -1842,7 +1850,7 @@ static s32 create_dom(u32 dom_id)
 	dom_mask = domc->node_cpumask;
 	if (!node_mask || !dom_mask) {
 		bpf_rcu_read_unlock();
-		scx_bpf_error("cpumask lookup failed");
+		__COMPAT_scx_bpf_error("cpumask lookup failed");
 		return -ENOENT;
 	}
 	bpf_cpumask_copy(dom_mask, (const struct cpumask *)node_mask);
@@ -1873,7 +1881,7 @@ static s32 initialize_cpu(s32 cpu)
 		cpumask = domc->cpumask;
 		if (!cpumask) {
 			bpf_rcu_read_unlock();
-			scx_bpf_error("Failed to lookup dom node cpumask");
+			__COMPAT_scx_bpf_error("Failed to lookup dom node cpumask");
 			return -ENOENT;
 		}
 
@@ -1891,6 +1899,8 @@ static s32 initialize_cpu(s32 cpu)
 s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init)
 {
 	s32 i, ret;
+	
+  __COMPAT_scx_bpf_switch_all();
 
 	ret = create_save_cpumask(&all_cpumask);
 	if (ret)
@@ -1929,7 +1939,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init)
 
 void BPF_STRUCT_OPS(rusty_exit, struct scx_exit_info *ei)
 {
-	UEI_RECORD(uei, ei);
+	uei_record(&uei, ei);
 }
 
 SCX_OPS_DEFINE(rusty,
@@ -1942,8 +1952,8 @@ SCX_OPS_DEFINE(rusty,
 	       .quiescent		= (void *)rusty_quiescent,
 	       .set_weight		= (void *)rusty_set_weight,
 	       .set_cpumask		= (void *)rusty_set_cpumask,
-	       .init_task		= (void *)rusty_init_task,
-	       .exit_task		= (void *)rusty_exit_task,
+	       .prep_enable		= (void *)rusty_prep_enable, 
+	       /* .cancel_enable		= (void *)rusty_cancel_enable, */
 	       .init			= (void *)rusty_init,
 	       .exit			= (void *)rusty_exit,
 	       .timeout_ms		= 10000,
diff --git a/scheds/rust/scx_rusty/src/main.rs b/scheds/rust/scx_rusty/src/main.rs
index d9fd0915f..d520a022b 100644
--- a/scheds/rust/scx_rusty/src/main.rs
+++ b/scheds/rust/scx_rusty/src/main.rs
@@ -16,7 +16,9 @@ pub mod load_balance;
 use load_balance::LoadBalancer;
 
 mod stats;
+use core::ffi::CStr;
 use std::collections::BTreeMap;
+use std::ffi::CString;
 use std::mem::MaybeUninit;
 use std::sync::atomic::AtomicBool;
 use std::sync::atomic::Ordering;
@@ -52,11 +54,10 @@ use scx_utils::init_libbpf_logging;
 use scx_utils::scx_ops_attach;
 use scx_utils::scx_ops_load;
 use scx_utils::scx_ops_open;
-use scx_utils::uei_exited;
-use scx_utils::uei_report;
+// use scx_utils::uei_exited;
+// use scx_utils::uei_report;
 use scx_utils::Cpumask;
 use scx_utils::Topology;
-use scx_utils::UserExitInfo;
 use scx_utils::NR_CPU_IDS;
 
 const MAX_DOMS: usize = bpf_intf::consts_MAX_DOMS as usize;
@@ -432,7 +433,7 @@ impl<'a> Scheduler<'a> {
         if opts.partial {
             skel.struct_ops.rusty_mut().flags |= *compat::SCX_OPS_SWITCH_PARTIAL;
         }
-        skel.struct_ops.rusty_mut().exit_dump_len = opts.exit_dump_len;
+        // skel.struct_ops.rusty_mut().exit_dump_len = opts.exit_dump_len;
 
         skel.maps.rodata_data.load_half_life = (opts.load_half_life * 1000000000.0) as u32;
         skel.maps.rodata_data.kthreads_local = opts.kthreads_local;
@@ -566,7 +567,7 @@ impl<'a> Scheduler<'a> {
         Ok(())
     }
 
-    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
+    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<()> {
         let (res_ch, req_ch) = self.stats_server.channels();
         let now = Instant::now();
         let mut next_tune_at = now + self.tune_interval;
@@ -574,7 +575,9 @@ impl<'a> Scheduler<'a> {
 
         self.skel.maps.stats.value_size() as usize;
 
-        while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) {
+        while !shutdown.load(Ordering::Relaxed)
+            && !UserExitInfo::exited(&self.skel.maps.bss_data.uei)?
+        {
             let now = Instant::now();
 
             if now >= next_tune_at {
@@ -608,7 +611,7 @@ impl<'a> Scheduler<'a> {
         }
 
         self.struct_ops.take();
-        uei_report!(&self.skel, uei)
+        UserExitInfo::read(&self.skel.maps.bss_data.uei)?.report()
     }
 }
 
@@ -669,11 +672,66 @@ fn main() -> Result<()> {
     }
 
     let mut open_object = MaybeUninit::uninit();
-    loop {
-        let mut sched = Scheduler::init(&opts, &mut open_object)?;
-        if !sched.run(shutdown.clone())?.should_restart() {
-            break;
+    let mut sched = Scheduler::init(&opts, &mut open_object)?;
+    sched.run(shutdown.clone())
+}
+
+#[derive(Debug, Default)]
+struct UserExitInfo {
+    kind: i32,
+    reason: Option<String>,
+    msg: Option<String>,
+}
+
+impl UserExitInfo {
+    fn read(bpf_uei: &types::user_exit_info) -> Result<Self> {
+        let kind = unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) };
+
+        let (reason, msg) = if kind != 0 {
+            (
+                Some(
+                    unsafe { CStr::from_ptr(bpf_uei.reason.as_ptr() as *const _) }
+                        .to_str()
+                        .context("Failed to convert reason to string")?
+                        .to_string(),
+                )
+                .filter(|s| !s.is_empty()),
+                Some(
+                    unsafe { CStr::from_ptr(bpf_uei.msg.as_ptr() as *const _) }
+                        .to_str()
+                        .context("Failed to convert msg to string")?
+                        .to_string(),
+                )
+                .filter(|s| !s.is_empty()),
+            )
+        } else {
+            (None, None)
+        };
+
+        Ok(Self { kind, reason, msg })
+    }
+
+    fn exited(bpf_uei: &types::user_exit_info) -> Result<bool> {
+        Ok(Self::read(bpf_uei)?.kind != 0)
+    }
+
+    fn report(&self) -> Result<()> {
+        let why = match (&self.reason, &self.msg) {
+            (Some(reason), None) => format!("{}", reason),
+            (Some(reason), Some(msg)) => format!("{} ({})", reason, msg),
+            _ => "".into(),
+        };
+
+        match self.kind {
+            0 => Ok(()),
+            etype => {
+                if etype != 64 {
+                    bail!("EXIT: kind={} {}", etype, why);
+                } else {
+                    info!("EXIT: {}", why);
+                    Ok(())
+                }
+            }
         }
     }
-    Ok(())
 }