Skip to content

Commit

Permalink
Merge branch 'sched-ext:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
hodgesds authored Sep 12, 2024
2 parents d35b596 + 632fcfe commit a5ab930
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 7 deletions.
4 changes: 4 additions & 0 deletions rust/scx_utils/src/topology.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ impl Cpu {
#[derive(Debug, Clone)]
pub struct Core {
id: usize,
pub node_id: usize,
pub llc_id: usize,
cpus: BTreeMap<usize, Cpu>,
span: Cpumask,
pub core_type: CoreType,
Expand Down Expand Up @@ -525,6 +527,8 @@ fn create_insert_cpu(

let core = cache.cores.entry(core_id).or_insert(Core {
id: core_id,
llc_id: llc_id,
node_id: node.id,
cpus: BTreeMap::new(),
span: Cpumask::new()?,
core_type: core_type.clone(),
Expand Down
4 changes: 4 additions & 0 deletions scheds/rust/scx_layered/src/bpf/intf.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ enum layer_stat_idx {
LSTAT_YIELD,
LSTAT_YIELD_IGNORE,
LSTAT_MIGRATION,
LSTAT_XNUMA_MIGRATION,
LSTAT_XLLC_MIGRATION,
NR_LSTATS,
};

Expand All @@ -86,6 +88,8 @@ struct cpu_ctx {
u64 lstats[MAX_LAYERS][NR_LSTATS];
u64 ran_current_for;
u32 layer_idx;
u32 node_idx;
u32 cache_idx;
};

struct cache_ctx {
Expand Down
100 changes: 95 additions & 5 deletions scheds/rust/scx_layered/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,6 @@ static u32 cpu_to_llc_id(s32 cpu_id)
return *llc_ptr;
}

/*
* Numa node context
*/
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
Expand All @@ -147,6 +144,30 @@ struct {
__uint(map_flags, 0);
} node_data SEC(".maps");

static struct node_ctx *lookup_node_ctx(u32 node)
{
struct node_ctx *nodec;

nodec = bpf_map_lookup_elem(&node_data, &node);
return nodec;
}

struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, struct cache_ctx);
__uint(max_entries, MAX_DOMS);
__uint(map_flags, 0);
} cache_data SEC(".maps");

static struct cache_ctx *lookup_cache_ctx(u32 cache_idx)
{
struct cache_ctx *cachec;

cachec = bpf_map_lookup_elem(&cache_data, &cache_idx);
return cachec;
}

static void gstat_inc(enum global_stat_idx idx, struct cpu_ctx *cctx)
{
if (idx < 0 || idx >= NR_GSTATS) {
Expand Down Expand Up @@ -1166,6 +1187,7 @@ static s32 create_node(u32 node_id)
u32 cpu;
struct bpf_cpumask *cpumask;
struct node_ctx *nodec;
struct cpu_ctx *cctx;
s32 ret;

nodec = bpf_map_lookup_elem(&node_data, &node_id);
Expand Down Expand Up @@ -1198,8 +1220,58 @@ static s32 create_node(u32 node_id)
break;
}

if (*nmask & (1LLU << (cpu % 64)))
if (*nmask & (1LLU << (cpu % 64))) {
bpf_cpumask_set_cpu(cpu, cpumask);
if (!(cctx = lookup_cpu_ctx(-1))) {
scx_bpf_error("cpu ctx error");
ret = -ENOENT;
break;
}
cctx->node_idx = node_id;
}
}

bpf_rcu_read_unlock();
return ret;
}

static s32 create_cache(u32 cache_id)
{
u32 cpu, llc_id;
struct bpf_cpumask *cpumask;
struct cache_ctx *cachec;
struct cpu_ctx *cctx;
s32 ret;

cachec = bpf_map_lookup_elem(&cache_data, &cache_id);
if (!cachec) {
scx_bpf_error("No cache%u", cache_id);
return -ENOENT;
}
cachec->id = cache_id;

ret = create_save_cpumask(&cachec->cpumask);
if (ret)
return ret;

bpf_rcu_read_lock();
cpumask = cachec->cpumask;
if (!cpumask) {
bpf_rcu_read_unlock();
scx_bpf_error("Failed to lookup node cpumask");
return -ENOENT;
}

bpf_for(cpu, 0, MAX_CPUS) {
llc_id = cpu_to_llc_id(cpu);
if (llc_id != cache_id)
continue;

bpf_cpumask_set_cpu(cpu, cpumask);
if (!(cctx = lookup_cpu_ctx(-1))) {
scx_bpf_error("cpu ctx error"); ret = -ENOENT; break;
}
cctx->cache_idx = cache_id;
}

bpf_rcu_read_unlock();
Expand All @@ -1225,14 +1297,27 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p)
struct cpu_ctx *cctx;
struct task_ctx *tctx;
struct layer *layer;
struct node_ctx *nodec;
struct cache_ctx *cachec;
s32 task_cpu = scx_bpf_task_cpu(p);

if (!(cctx = lookup_cpu_ctx(-1)) || !(tctx = lookup_task_ctx(p)) ||
!(layer = lookup_layer(tctx->layer)))
return;

if (tctx->last_cpu >= 0 && tctx->last_cpu != task_cpu)
if (tctx->last_cpu >= 0 && tctx->last_cpu != task_cpu) {
lstat_inc(LSTAT_MIGRATION, layer, cctx);
if (!(nodec = lookup_node_ctx(cctx->node_idx)))
return;
if (nodec->cpumask &&
!bpf_cpumask_test_cpu(tctx->last_cpu, nodec->cpumask))
lstat_inc(LSTAT_XNUMA_MIGRATION, layer, cctx);
if (!(cachec = lookup_cache_ctx(cctx->cache_idx)))
return;
if (cachec->cpumask &&
!bpf_cpumask_test_cpu(tctx->last_cpu, cachec->cpumask))
lstat_inc(LSTAT_XLLC_MIGRATION, layer, cctx);
}
tctx->last_cpu = task_cpu;

if (vtime_before(layer->vtime_now, p->scx.dsq_vtime))
Expand Down Expand Up @@ -1560,6 +1645,11 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
if (ret)
return ret;
}
bpf_for(i, 0, nr_llcs) {
ret = create_cache(i);
if (ret)
return ret;
}

dbg("CFG: Dumping configuration, nr_online_cpus=%d smt_enabled=%d",
nr_online_cpus, smt_enabled);
Expand Down
82 changes: 81 additions & 1 deletion scheds/rust/scx_layered/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ lazy_static::lazy_static! {
///
/// - slice_us: Scheduling slice duration in microseconds.
///
/// - growth_algo: When a layer is allocated new CPUs different algorithms can
/// be used to determine which CPU should be allocated next. The default
/// algorithm is a "sticky" algorithm that attempts to spread layers evenly
/// across cores.
///
/// - perf: CPU performance target. 0 means no configuration. A value
/// between 1 and 1024 indicates the performance level CPUs running tasks
/// in this layer are configured to using scx_bpf_cpuperf_set().
Expand Down Expand Up @@ -450,9 +455,18 @@ enum LayerMatch {
#[derive(Clone, Debug, Parser, Serialize, Deserialize)]
#[clap(rename_all = "snake_case")]
enum LayerGrowthAlgo {
/// Sticky attempts to place layers evenly spaced across cores.
Sticky,
/// Linear starts with the lowest number CPU and grows towards the total
/// number of CPUs.
Linear,
/// Random core selection order.
Random,
/// Topo uses the order of the nodes/llcs in the layer config to determine
/// the order of CPUs to select when growing a layer. It starts from the
/// llcs configuration and then the NUMA configuration for any CPUs not
/// specified.
Topo,
}

impl Default for LayerGrowthAlgo {
Expand Down Expand Up @@ -559,6 +573,20 @@ impl LayerSpec {
};
Ok(config.specs)
}
fn nodes(&self) -> Vec<usize> {
match &self.kind {
LayerKind::Confined { nodes, .. }
| LayerKind::Open { nodes, .. }
| LayerKind::Grouped { nodes, .. } => nodes.clone(),
}
}
fn llcs(&self) -> Vec<usize> {
match &self.kind {
LayerKind::Confined { llcs, .. }
| LayerKind::Open { llcs, .. }
| LayerKind::Grouped { llcs, .. } => llcs.clone(),
}
}
}

#[derive(Clone, Debug, Serialize, Deserialize)]
Expand Down Expand Up @@ -1093,6 +1121,7 @@ impl CpuPool {
}

fn layer_core_order(
cpu_pool: &CpuPool,
spec: &LayerSpec,
growth_algo: LayerGrowthAlgo,
layer_idx: usize,
Expand Down Expand Up @@ -1141,6 +1170,48 @@ fn layer_core_order(
fastrand::seed(layer_idx.try_into().unwrap());
fastrand::shuffle(&mut core_order);
}
LayerGrowthAlgo::Topo => {
let spec_nodes = spec.nodes();
let spec_llcs = spec.llcs();
let topo_nodes = topo.nodes();

if spec_nodes.len() + spec_llcs.len() == 0 {
// XXX: fallback to something more sane (round robin when it exists)
linear();
} else {
let mut core_id = 0;
spec_llcs.iter().for_each(|spec_llc| {
core_id = 0;
topo_nodes.iter().for_each(|topo_node| {
topo_node.cores().values().for_each(|core| {
if core.llc_id != *spec_llc {
core_id += 1;
return;
}
if !core_order.contains(&core_id) {
core_order.push(core_id);
}
core_id += 1;
});
});
});
spec_nodes.iter().for_each(|spec_node| {
core_id = 0;
topo_nodes.iter().for_each(|topo_node| {
if topo_node.id() != *spec_node {
core_id += topo_node.cores().len();
return;
}
topo_node.cores().values().for_each(|_core| {
if !core_order.contains(&core_id) {
core_order.push(core_id);
}
core_id += 1;
});
});
});
}
}
}
core_order
}
Expand Down Expand Up @@ -1237,7 +1308,7 @@ impl Layer {
| LayerKind::Open { growth_algo, .. } => growth_algo.clone(),
};

let core_order = layer_core_order(spec, layer_growth_algo.clone(), idx, topo);
let core_order = layer_core_order(cpu_pool, spec, layer_growth_algo.clone(), idx, topo);
debug!(
"layer: {} algo: {:?} core order: {:?}",
name,
Expand Down Expand Up @@ -1614,6 +1685,15 @@ impl<'a, 'b> Scheduler<'a, 'b> {
node.llcs().len()
);
skel.maps.rodata_data.nr_llcs += node.llcs().len() as u32;
let raw_numa_slice = node.span().as_raw_slice();
let node_cpumask_slice = &mut skel.maps.rodata_data.numa_cpumasks[node.id()];
let (left, _) = node_cpumask_slice.split_at_mut(raw_numa_slice.len());
left.clone_from_slice(raw_numa_slice);
debug!(
"node {} mask: {:?}",
node.id(),
skel.maps.rodata_data.numa_cpumasks[node.id()]
);

for (_, llc) in node.llcs() {
debug!("configuring llc {:?} for node {:?}", llc.id(), node.id());
Expand Down
10 changes: 9 additions & 1 deletion scheds/rust/scx_layered/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ pub struct LayerStats {
pub yield_ignore: u64,
#[stat(desc = "% migrated across CPUs")]
pub migration: f64,
#[stat(desc = "% migrated across NUMA nodes")]
pub xnuma_migration: f64,
#[stat(desc = "% migrated across LLCs")]
pub xllc_migration: f64,
#[stat(desc = "mask of allocated CPUs", _om_skip)]
pub cpus: Vec<u32>,
#[stat(desc = "# of CPUs assigned")]
Expand Down Expand Up @@ -188,6 +192,8 @@ impl LayerStats {
yielded: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_YIELD),
yield_ignore: lstat(bpf_intf::layer_stat_idx_LSTAT_YIELD_IGNORE) as u64,
migration: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_MIGRATION),
xnuma_migration: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_XNUMA_MIGRATION),
xllc_migration: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_XLLC_MIGRATION),
cpus: Self::bitvec_to_u32s(&layer.cpus),
cur_nr_cpus: layer.cpus.count_ones() as u32,
min_nr_cpus: nr_cpus_range.0 as u32,
Expand Down Expand Up @@ -235,10 +241,12 @@ impl LayerStats {

writeln!(
w,
" {:<width$} open_idle={} mig={} affn_viol={}",
" {:<width$} open_idle={} mig={} xnuma_mig={} xllc_mig={} affn_viol={}",
"",
fmt_pct(self.open_idle),
fmt_pct(self.migration),
fmt_pct(self.xnuma_migration),
fmt_pct(self.xllc_migration),
fmt_pct(self.affn_viol),
width = header_width,
)?;
Expand Down

0 comments on commit a5ab930

Please sign in to comment.