Skip to content

Commit

Permalink
Compilable rack-scale code.
Browse files Browse the repository at this point in the history
Signed-off-by: Gerd Zellweger <[email protected]>
  • Loading branch information
gz committed Sep 25, 2023
1 parent 2b02343 commit ad53903
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 102 deletions.
39 changes: 20 additions & 19 deletions kernel/src/arch/x86_64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,21 @@

use alloc::sync::Arc;
use core::mem::transmute;
use core::num::NonZeroUsize;
use core::sync::atomic::AtomicBool;
use core::sync::atomic::Ordering;

#[cfg(feature = "rackscale")]
use crate::nr::NR_LOG;
pub use bootloader_shared::*;
use cnr::Replica as MlnrReplica;
use fallible_collections::TryClone;
use klogger::sprint;
use log::{debug, error, info};
use nr2::nr::{AffinityChange, NodeReplicated};
use x86::{controlregs, cpuid};

use crate::cmdline::CommandLineArguments;
use crate::fs::cnrfs::MlnrKernelNode;
use crate::memory::global::GlobalMemory;
use crate::memory::mcache;
use crate::memory::per_core::PerCoreMemory;
use crate::nr::KernelNode;
use crate::ExitReason;

use coreboot::AppCoreArgs;
Expand Down Expand Up @@ -450,12 +445,16 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
// Set-up interrupt routing drivers (I/O APIC controllers)
irq::ioapic_initialize();

// Let's go with one replica per NUMA node for now:
let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");

#[cfg(not(feature = "rackscale"))]
let kernel_node = {
use core::num::NonZeroUsize;
use crate::nr::KernelNode;
use nr2::nr::{AffinityChange, NodeReplicated};

// Let's go with one replica per NUMA node for now:
let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");

// Create the global operation log and first replica and store it (needs
// TLS)
let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(
Expand Down Expand Up @@ -517,7 +516,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {

// Initialize processes
#[cfg(feature = "rackscale")]
lazy_static::initialize(&process::PROCESS_LOGS);
if crate::CMDLINE
.get()
.map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
{
lazy_static::initialize(&process::PROCESS_TABLE);
}

#[cfg(not(feature = "rackscale"))]
{
Expand All @@ -526,7 +530,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
}

#[cfg(feature = "rackscale")]
let (log, bsp_replica) = {
let kernel_node = {
if crate::CMDLINE
.get()
.map_or(false, |c| c.mode == crate::cmdline::Mode::Client)
Expand All @@ -535,15 +539,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
crate::nrproc::register_thread_with_process_replicas();
}

// this calls an RPC on the client, which is why we do this later in initialization than in non-rackscale
lazy_static::initialize(&NR_LOG);
lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone();

// For rackscale, only the controller is going to create the base log.
// All clients will use this to create replicas.
let bsp_replica = Replica::<KernelNode>::new(&NR_LOG);
let local_ridx = bsp_replica.register().unwrap();
crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx));
(&NR_LOG.clone(), bsp_replica)
let local_ridx = kernel_node.register(0).unwrap();
crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
kernel_node
};

#[cfg(feature = "gdb")]
Expand Down
105 changes: 57 additions & 48 deletions kernel/src/arch/x86_64/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ use kpi::arch::SaveArea;
use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET};
use lazy_static::lazy_static;
use log::{debug, info, trace, warn};
#[cfg(feature = "rackscale")]
use node_replication::{Dispatch, Log, Replica};
use nr2::nr::NodeReplicated;
use crate::arch::kcb::{self, per_core_mem};
use core::num::NonZeroUsize;
use nr2::nr::{NodeReplicated, AffinityChange};
use x86::bits64::paging::*;
use x86::bits64::rflags;
use x86::{controlregs, Ring};
Expand Down Expand Up @@ -72,25 +72,16 @@ pub(crate) fn current_pid() -> KResult<Pid> {

#[cfg(feature = "rackscale")]
lazy_static! {
pub(crate) static ref PROCESS_LOGS: Box<
ArrayVec<
Arc<Log::<'static, <NrProcess<Ring3Process> as Dispatch>::WriteOperation>>,
MAX_PROCESSES,
>,
> = {

pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> = {
use crate::memory::shmem_affinity::mid_to_shmem_affinity;

if crate::CMDLINE
if !crate::CMDLINE
.get()
.map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
{
// We want to allocate the logs in controller shared memory
use crate::memory::shmem_affinity::local_shmem_affinity;
let pcm = per_core_mem();
pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity");
} else {
// Get location of the logs from the controller, who will have created them in shared memory
use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};

/*use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
let mut log_ptrs = [0u64; MAX_PROCESSES];
rpc_get_shmem_structure(ShmemStructure::NrProcLogs, &mut log_ptrs[..]).expect("Failed to get process log pointers");
Expand All @@ -103,12 +94,54 @@ lazy_static! {
};
process_logs.push(local_log_arc);
}
return process_logs;
return process_logs;*/
unimplemented!("Need to get NodeReplicated from controller")
}

// TODO(dynrep): here we create the Log on the controller for sending it
// to the data-kernels this would probably need to create a
// NodeReplicated<DataKernel> NodeReplicated<Process> instance
// We want to allocate the logs in controller shared memory
use crate::memory::shmem_affinity::local_shmem_affinity;
let pcm = per_core_mem();
pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity");

// Want at least one replica...
let num_replicas =
NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
let mut processes = ArrayVec::new();

for _pid in 0..MAX_PROCESSES {
debug_assert_eq!(
*crate::environment::NODE_ID,
0,
"Expect initialization to happen on node 0."
);

let process: Arc<NodeReplicated<NrProcess<Ring3Process>>> = Arc::try_new(
NodeReplicated::new(num_replicas, |afc: AffinityChange| {
let pcm = kcb::per_core_mem();
match afc {
AffinityChange::Replica(r) => {
pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity");
}
AffinityChange::Revert(_orig) => {
pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity")
}
}
return 0; // TODO(dynrep): Return error code
})
.expect("Not enough memory to initialize system"),
)
.expect("Not enough memory to initialize system");

processes.push(process)
}


// Reset mem allocator to use per core memory again
let pcm = per_core_mem();
pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity");

processes


// NodeReplicated::new(#data-kernels) ->
// - for data_kernel in 0..#data-kernels {
Expand Down Expand Up @@ -143,42 +176,17 @@ lazy_static! {
- The closure when set on controller probably won't work in data-kernel (diff symbol addresses?)
- The binary might be fine because it's identical!
*/
let process_logs = {
let mut process_logs = Box::try_new(ArrayVec::new()).expect("Can't initialize process log vector.");
for _pid in 0..MAX_PROCESSES {
let log = Arc::try_new(
Log::<<NrProcess<Ring3Process> as Dispatch>::WriteOperation>::new(LARGE_PAGE_SIZE),
)
.expect("Can't initialize process logs, out of memory.");
process_logs.push(log);
}
process_logs
};

if crate::CMDLINE
.get()
.map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
{
// Reset mem allocator to use per core memory again
let pcm = per_core_mem();
pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity");
}

process_logs
};
}

#[cfg(not(feature = "rackscale"))]
lazy_static! {
pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> =
create_process_table();
}

#[cfg(not(feature = "rackscale"))]
fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> {
use crate::arch::kcb;
use core::num::NonZeroUsize;
use nr2::nr::AffinityChange;

// Want at least one replica...
let num_replicas =
NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
Expand Down Expand Up @@ -214,6 +222,7 @@ fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>
processes
}

/*
#[cfg(feature = "rackscale")]
fn create_process_table(
) -> ArrayVec<ArrayVec<Arc<Replica<'static, NrProcess<Ring3Process>>>, MAX_PROCESSES>, MAX_NUMA_NODES>
Expand Down Expand Up @@ -283,7 +292,7 @@ fn create_process_table(
numa_cache
}

*/
pub(crate) struct ArchProcessManagement;

impl crate::nrproc::ProcessManager for ArchProcessManagement {
Expand Down
17 changes: 7 additions & 10 deletions kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@ use core2::io::Write;

use atopology::NodeId;
use crossbeam_queue::ArrayQueue;
use nr2::nr::{Dispatch, Log};
use nr2::nr::{Dispatch, Log, NodeReplicated};
use rpc::rpc::*;
use rpc::RPCClient;

use super::client_state::CLIENT_STATE;
use super::kernelrpc::*;
use crate::arch::kcb::per_core_mem;
use crate::arch::process::{Ring3Process, PROCESS_LOGS};
use crate::arch::process::{Ring3Process, PROCESS_TABLE};
use crate::arch::tlb::{Shootdown, RACKSCALE_CLIENT_WORKQUEUES};
use crate::error::{KError, KResult};
use crate::memory::shmem_affinity::local_shmem_affinity;
use crate::memory::vspace::TlbFlushHandle;
use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr};
use crate::nr::{Op, NR_LOG};
use crate::nr::{Op, KERNEL_NODE_INSTANCE};
use crate::nrproc::NrProcess;
use crate::process::MAX_PROCESSES;

Expand Down Expand Up @@ -128,20 +128,17 @@ pub(crate) fn handle_get_shmem_structure(
ShmemStructure::NrProcLogs => {
let mut logs = [0u64; MAX_PROCESSES];

for i in 0..PROCESS_LOGS.len() {
for i in 0..PROCESS_TABLE.len() {
// Create a clone in shared memory, and get the raw representation of it
// The clone increments the strong counter, and the into_raw consumes this clone of the arc.
let client_clone = Arc::into_raw(Arc::clone(&PROCESS_LOGS[i]));
let client_clone = Arc::into_raw(Arc::clone(&PROCESS_TABLE[i]));

// Send the raw pointer to the client clone address. To do this, we'll convert the kernel address
// to a physical address, and then change it to a shmem offset by subtracting the shmem base.
// TODO(rackscale): try to simplify this, and below?
let arc_log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64(
(*&client_clone
as *const Log<
'static,
<NrProcess<Ring3Process> as Dispatch>::WriteOperation,
>) as u64,
as *const NodeReplicated<NrProcess<Ring3Process>>) as u64,
));
logs[i] = arc_log_paddr.as_u64();
}
Expand All @@ -151,7 +148,7 @@ pub(crate) fn handle_get_shmem_structure(
hdr.msg_len = core::mem::size_of::<[u64; MAX_PROCESSES]>() as u64;
}
ShmemStructure::NrLog => {
let log_clone = Arc::into_raw(Arc::clone(&NR_LOG));
let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE));
let log_paddr =
kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const Log<Op>) as u64))
.as_u64();
Expand Down
1 change: 1 addition & 0 deletions kernel/src/memory/shmemalloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub(crate) struct ShmemAlloc {
}

impl ShmemAlloc {
#[allow(dead_code)]
pub(crate) fn new(affinity: NodeId) -> ShmemAlloc {
assert!(
is_shmem_affinity(affinity)
Expand Down
Loading

0 comments on commit ad53903

Please sign in to comment.