Skip to content

Commit

Permalink
Merge pull request #621 from multics69/lavd-greedy-fix
Browse files Browse the repository at this point in the history
scx_lavd: improve greedy ratio calculation and more
  • Loading branch information
multics69 authored Sep 7, 2024
2 parents 6f8917c + 36df970 commit 17e0e08
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 54 deletions.
65 changes: 29 additions & 36 deletions scheds/rust/scx_lavd/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,12 @@ static bool is_greedy(struct task_ctx *taskc)
return taskc->greedy_ratio > 1000;
}

static bool is_eligible(struct task_ctx *taskc)
{
return !is_greedy(taskc);
}


static __always_inline
int submit_task_ctx(struct task_struct *p, struct task_ctx *taskc, u32 cpu_id)
{
Expand Down Expand Up @@ -764,6 +770,7 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
static void update_sys_stat_next(struct sys_stat_ctx *c)
{
static int cnt = 0;
u64 avg_svc_time = 0;

/*
* Update the CPU utilization to the next version.
Expand All @@ -788,8 +795,10 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
stat_next->nr_violation =
calc_avg32(stat_cur->nr_violation, c->nr_violation);

stat_next->avg_svc_time = (c->nr_sched == 0) ? 0 :
c->tot_svc_time / c->nr_sched;
if (c->nr_sched > 0)
avg_svc_time = c->tot_svc_time / c->nr_sched;
stat_next->avg_svc_time =
calc_avg(stat_cur->avg_svc_time, avg_svc_time);

stat_next->nr_queued_task =
calc_avg(stat_cur->nr_queued_task, c->nr_queued_task);
Expand Down Expand Up @@ -1168,11 +1177,6 @@ static u64 calc_freq_factor(u64 freq)
return ft + 1;
}

static bool is_eligible(struct task_ctx *taskc)
{
return taskc->greedy_ratio <= 1000;
}

static s64 calc_static_prio_factor(struct task_struct *p)
{
/*
Expand Down Expand Up @@ -1386,12 +1390,6 @@ static void update_stat_for_running(struct task_struct *p,
*/
advance_cur_logical_clk(taskc);

/*
* Update the current service time if necessary.
*/
if (cur_svc_time < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);

/*
* Since this is the start of a new schedule for @p, we update run
* frequency in a second using an exponential weighted moving average.
Expand Down Expand Up @@ -1460,34 +1458,22 @@ static void update_stat_for_running(struct task_struct *p,
if (taskc->victim_cpu >= 0)
cpuc->nr_preemption++;

if (is_lat_cri(taskc, stat_cur)) {
if (is_lat_cri(taskc, stat_cur))
cpuc->nr_lat_cri++;
// debugln("------------------------ lc = %llu", cpuc->nr__cri);
}

if (is_perf_cri(taskc, stat_cur)) {
if (is_perf_cri(taskc, stat_cur))
cpuc->nr_perf_cri++;
// debugln("------------------------ pc = %llu", cpuc->nr_perf_cri);
}

if (is_greedy(taskc))
cpuc->nr_greedy++;
}

static u64 calc_svc_time(struct task_struct *p, struct task_ctx *taskc)
{
/*
* Scale the execution time by the inverse of the weight and charge.
*/
return (taskc->last_stopping_clk - taskc->last_running_clk) / p->scx.weight;
}

static void update_stat_for_stopping(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
u64 now = bpf_ktime_get_ns();
u64 old_run_time_ns, suspended_duration, task_svc_time;
u64 old_run_time_ns, suspended_duration, task_run_time;

/*
* Update task's run_time. When a task is scheduled consecutively
Expand All @@ -1500,13 +1486,12 @@ static void update_stat_for_stopping(struct task_struct *p,
*/
old_run_time_ns = taskc->run_time_ns;
suspended_duration = get_suspended_duration_and_reset(cpuc);
taskc->acc_run_time_ns += now - taskc->last_running_clk -
suspended_duration;
taskc->run_time_ns = calc_avg(taskc->run_time_ns,
taskc->acc_run_time_ns);
task_run_time = now - taskc->last_running_clk - suspended_duration;
taskc->acc_run_time_ns += task_run_time;
taskc->run_time_ns = calc_avg(taskc->run_time_ns, taskc->acc_run_time_ns);
taskc->last_stopping_clk = now;
task_svc_time = calc_svc_time(p, taskc);
taskc->svc_time += task_svc_time;

taskc->svc_time += task_run_time / p->scx.weight;
taskc->victim_cpu = (s32)LAVD_CPU_ID_NONE;

/*
Expand All @@ -1519,7 +1504,13 @@ static void update_stat_for_stopping(struct task_struct *p,
/*
* Increase total service time of this CPU.
*/
cpuc->tot_svc_time += task_svc_time;
cpuc->tot_svc_time += taskc->svc_time;

/*
* Update the current service time if necessary.
*/
if (READ_ONCE(cur_svc_time) < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);
}

static void update_stat_for_quiescent(struct task_struct *p,
Expand Down Expand Up @@ -3248,8 +3239,10 @@ static s32 init_per_cpu_ctx(u64 now)
}

cpuc->turbo_core = cpuc->capacity == turbo_cap;
if (cpuc->turbo_core)
if (cpuc->turbo_core) {
bpf_cpumask_set_cpu(cpu, turbo);
debugln("CPU %d is a turbo core.", cpu);
}
}

/*
Expand Down
52 changes: 38 additions & 14 deletions scheds/rust/scx_lavd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ use rlimit::{getrlimit, setrlimit, Resource};
#[derive(Debug, Parser)]
struct Opts {
/// Automatically decide the scheduler's power mode based on system load.
/// This is a recommended mode if you don't understand the following options:
/// This is a default mode if you don't specify the following options:
#[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
autopilot: bool,

Expand Down Expand Up @@ -151,22 +151,41 @@ struct Opts {
}

impl Opts {
fn nothing_specified(&self) -> bool {
return self.autopilot == false &&
self.autopower == false &&
self.performance == false &&
self.powersave == false &&
self.balanced == false &&
self.no_core_compaction == false &&
self.prefer_smt_core == false &&
self.prefer_little_core == false &&
self.no_prefer_turbo_core == false &&
self.no_freq_scaling == false &&
self.monitor == None &&
self.monitor_sched_samples == None;
}

fn proc(&mut self) -> Option<&mut Self> {
if self.nothing_specified() {
self.autopilot = true;
info!("Autopilot mode is enabled by default.");
return Some(self);
}

if self.performance {
self.no_core_compaction = true;
self.prefer_smt_core = false;
self.prefer_little_core = false;
self.no_prefer_turbo_core = false;
self.no_freq_scaling = true;
}
if self.powersave {
} else if self.powersave {
self.no_core_compaction = false;
self.prefer_smt_core = true;
self.prefer_little_core = true;
self.no_prefer_turbo_core = true;
self.no_freq_scaling = false;
}
if self.balanced {
} else if self.balanced {
self.no_core_compaction = false;
self.prefer_smt_core = false;
self.prefer_little_core = false;
Expand Down Expand Up @@ -757,7 +776,7 @@ impl<'a> Scheduler<'a> {
uei_exited!(&self.skel, uei)
}

fn set_power_profile(&mut self, mode: i32) -> Result<(), u32> {
fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
let prog = &mut self.skel.progs.set_power_profile;
let mut args = power_arg {
power_mode: mode as c_int,
Expand Down Expand Up @@ -792,10 +811,6 @@ impl<'a> Scheduler<'a> {
}

fn update_power_profile(&mut self, prev_profile: String) -> (bool, String) {
const LAVD_PM_PERFORMANCE: s32 = 0;
const LAVD_PM_BALANCED: s32 = 1;
const LAVD_PM_POWERSAVE: s32 = 2;

let profile = Self::read_energy_profile();
if profile == prev_profile {
// If the profile is the same, skip updaring the profile for BPF.
Expand All @@ -820,11 +835,19 @@ impl<'a> Scheduler<'a> {
(true, profile)
}

fn run(&mut self, autopower: bool, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
let (res_ch, req_ch) = self.stats_server.channels();
let mut autopower = autopower;
let mut autopower = opts.autopower;
let mut profile = "unknown".to_string();

if opts.performance {
let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
} else if opts.powersave {
let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
} else {
let _ = self.set_power_profile(LAVD_PM_BALANCED);
}

while !shutdown.load(Ordering::Relaxed) && !self.exited() {
if autopower {
(autopower, profile) = self.update_power_profile(profile);
Expand Down Expand Up @@ -877,7 +900,6 @@ fn init_log(opts: &Opts) {

fn main() -> Result<()> {
let mut opts = Opts::parse();
opts.proc().unwrap();

if opts.version {
println!("scx_lavd {}", *build_id::SCX_FULL_VERSION);
Expand All @@ -890,6 +912,8 @@ fn main() -> Result<()> {
}

init_log(&opts);

opts.proc().unwrap();
debug!("{:#?}", opts);

let shutdown = Arc::new(AtomicBool::new(false));
Expand Down Expand Up @@ -925,7 +949,7 @@ fn main() -> Result<()> {
*build_id::SCX_FULL_VERSION
);
info!("scx_lavd scheduler starts running.");
if !sched.run(opts.autopower, shutdown.clone())?.should_restart() {
if !sched.run(&opts, shutdown.clone())?.should_restart() {
break;
}
}
Expand Down
7 changes: 3 additions & 4 deletions scheds/rust/scx_lavd/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ impl SysStats {
pub fn format_header<W: Write>(w: &mut W) -> Result<()> {
writeln!(
w,
"\x1b[93m| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
"\x1b[93m| {:8} | {:13} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
"MSEQ",
"SVC_TIME",
"# Q TASK",
Expand Down Expand Up @@ -101,7 +101,7 @@ impl SysStats {

writeln!(
w,
"| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |",
"| {:8} | {:13} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |",
self.mseq,
self.avg_svc_time,
self.nr_queued_task,
Expand Down Expand Up @@ -406,6 +406,5 @@ pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
intv,
|| shutdown.load(Ordering::Relaxed),
|sysstats| sysstats.format(&mut std::io::stdout()),
);
Ok(())
)
}

0 comments on commit 17e0e08

Please sign in to comment.