From ad53903d7a2960b518a319d9c155ad37e3027e2c Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Mon, 25 Sep 2023 01:48:54 -0700 Subject: [PATCH] Compilable rack-scale code. Signed-off-by: Gerd Zellweger --- kernel/src/arch/x86_64/mod.rs | 39 +++---- kernel/src/arch/x86_64/process.rs | 105 ++++++++++-------- .../x86_64/rackscale/get_shmem_structure.rs | 17 ++- kernel/src/memory/shmemalloc.rs | 1 + kernel/src/nr.rs | 56 +++++----- 5 files changed, 116 insertions(+), 102 deletions(-) diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index 7fd51bc20..fddb63e84 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -21,18 +21,14 @@ use alloc::sync::Arc; use core::mem::transmute; -use core::num::NonZeroUsize; use core::sync::atomic::AtomicBool; use core::sync::atomic::Ordering; -#[cfg(feature = "rackscale")] -use crate::nr::NR_LOG; pub use bootloader_shared::*; use cnr::Replica as MlnrReplica; use fallible_collections::TryClone; use klogger::sprint; use log::{debug, error, info}; -use nr2::nr::{AffinityChange, NodeReplicated}; use x86::{controlregs, cpuid}; use crate::cmdline::CommandLineArguments; @@ -40,7 +36,6 @@ use crate::fs::cnrfs::MlnrKernelNode; use crate::memory::global::GlobalMemory; use crate::memory::mcache; use crate::memory::per_core::PerCoreMemory; -use crate::nr::KernelNode; use crate::ExitReason; use coreboot::AppCoreArgs; @@ -450,12 +445,16 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { // Set-up interrupt routing drivers (I/O APIC controllers) irq::ioapic_initialize(); - // Let's go with one replica per NUMA node for now: - let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); - let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); - #[cfg(not(feature = "rackscale"))] let kernel_node = { + use core::num::NonZeroUsize; + use crate::nr::KernelNode; + use nr2::nr::{AffinityChange, NodeReplicated}; + + // Let's go with one replica per NUMA node for now: + let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); + let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); + // Create the global operation log and first replica and store it (needs // TLS) let kernel_node: Arc> = Arc::try_new( @@ -517,7 +516,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { // Initialize processes #[cfg(feature = "rackscale")] - lazy_static::initialize(&process::PROCESS_LOGS); + if crate::CMDLINE + .get() + .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) + { + lazy_static::initialize(&process::PROCESS_TABLE); + } #[cfg(not(feature = "rackscale"))] { @@ -526,7 +530,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { } #[cfg(feature = "rackscale")] - let (log, bsp_replica) = { + let kernel_node = { if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Client) @@ -535,15 +539,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { crate::nrproc::register_thread_with_process_replicas(); } - // this calls an RPC on the client, which is why we do this later in initialization than in non-rackscale - lazy_static::initialize(&NR_LOG); + lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); + let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone(); - // For rackscale, only the controller is going to create the base log. - // All clients will use this to create replicas. - let bsp_replica = Replica::::new(&NR_LOG); - let local_ridx = bsp_replica.register().unwrap(); - crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx)); - (&NR_LOG.clone(), bsp_replica) + let local_ridx = kernel_node.register(0).unwrap(); + crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); + kernel_node }; #[cfg(feature = "gdb")] diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index b1d523280..818102c5b 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -19,9 +19,9 @@ use kpi::arch::SaveArea; use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET}; use lazy_static::lazy_static; use log::{debug, info, trace, warn}; -#[cfg(feature = "rackscale")] -use node_replication::{Dispatch, Log, Replica}; -use nr2::nr::NodeReplicated; +use crate::arch::kcb::{self, per_core_mem}; +use core::num::NonZeroUsize; +use nr2::nr::{NodeReplicated, AffinityChange}; use x86::bits64::paging::*; use x86::bits64::rflags; use x86::{controlregs, Ring}; @@ -72,25 +72,16 @@ pub(crate) fn current_pid() -> KResult { #[cfg(feature = "rackscale")] lazy_static! { - pub(crate) static ref PROCESS_LOGS: Box< - ArrayVec< - Arc as Dispatch>::WriteOperation>>, - MAX_PROCESSES, - >, - > = { - + pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = { + use crate::memory::shmem_affinity::mid_to_shmem_affinity; - if crate::CMDLINE + if !crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { - // We want to allocate the logs in controller shared memory - use crate::memory::shmem_affinity::local_shmem_affinity; - let pcm = per_core_mem(); - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity"); - } else { // Get location of the logs from the controller, who will have created them in shared memory - use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; + + /*use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; let mut log_ptrs = [0u64; MAX_PROCESSES]; rpc_get_shmem_structure(ShmemStructure::NrProcLogs, &mut log_ptrs[..]).expect("Failed to get process log pointers"); @@ -103,12 +94,54 @@ lazy_static! { }; process_logs.push(local_log_arc); } - return process_logs; + return process_logs;*/ + unimplemented!("Need to get NodeReplicated from controller") } - // TODO(dynrep): here we create the Log on the controller for sending it - // to the data-kernels this would probably need to create a - // NodeReplicated NodeReplicated instance + // We want to allocate the logs in controller shared memory + use crate::memory::shmem_affinity::local_shmem_affinity; + let pcm = per_core_mem(); + pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity"); + + // Want at least one replica... + let num_replicas = + NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); + let mut processes = ArrayVec::new(); + + for _pid in 0..MAX_PROCESSES { + debug_assert_eq!( + *crate::environment::NODE_ID, + 0, + "Expect initialization to happen on node 0." + ); + + let process: Arc>> = Arc::try_new( + NodeReplicated::new(num_replicas, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => { + pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); + } + AffinityChange::Revert(_orig) => { + pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity") + } + } + return 0; // TODO(dynrep): Return error code + }) + .expect("Not enough memory to initialize system"), + ) + .expect("Not enough memory to initialize system"); + + processes.push(process) + } + + + // Reset mem allocator to use per core memory again + let pcm = per_core_mem(); + pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity"); + + processes + // NodeReplicated::new(#data-kernels) -> // - for data_kernel in 0..#data-kernels { @@ -143,31 +176,10 @@ lazy_static! { - The closure when set on controller probably won't work in data-kernel (diff symbol addresses?) - The binary might be fine because it's identical! */ - let process_logs = { - let mut process_logs = Box::try_new(ArrayVec::new()).expect("Can't initialize process log vector."); - for _pid in 0..MAX_PROCESSES { - let log = Arc::try_new( - Log::< as Dispatch>::WriteOperation>::new(LARGE_PAGE_SIZE), - ) - .expect("Can't initialize process logs, out of memory."); - process_logs.push(log); - } - process_logs - }; - - if crate::CMDLINE - .get() - .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) - { - // Reset mem allocator to use per core memory again - let pcm = per_core_mem(); - pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity"); - } - - process_logs }; } +#[cfg(not(feature = "rackscale"))] lazy_static! { pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = create_process_table(); @@ -175,10 +187,6 @@ lazy_static! { #[cfg(not(feature = "rackscale"))] fn create_process_table() -> ArrayVec>>, MAX_PROCESSES> { - use crate::arch::kcb; - use core::num::NonZeroUsize; - use nr2::nr::AffinityChange; - // Want at least one replica... let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); @@ -214,6 +222,7 @@ fn create_process_table() -> ArrayVec processes } +/* #[cfg(feature = "rackscale")] fn create_process_table( ) -> ArrayVec>>, MAX_PROCESSES>, MAX_NUMA_NODES> @@ -283,7 +292,7 @@ fn create_process_table( numa_cache } - + */ pub(crate) struct ArchProcessManagement; impl crate::nrproc::ProcessManager for ArchProcessManagement { diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs index e7a059845..c1a6bf312 100644 --- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs +++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs @@ -9,20 +9,20 @@ use core2::io::Write; use atopology::NodeId; use crossbeam_queue::ArrayQueue; -use nr2::nr::{Dispatch, Log}; +use nr2::nr::{Dispatch, Log, NodeReplicated}; use rpc::rpc::*; use rpc::RPCClient; use super::client_state::CLIENT_STATE; use super::kernelrpc::*; use crate::arch::kcb::per_core_mem; -use crate::arch::process::{Ring3Process, PROCESS_LOGS}; +use crate::arch::process::{Ring3Process, PROCESS_TABLE}; use crate::arch::tlb::{Shootdown, RACKSCALE_CLIENT_WORKQUEUES}; use crate::error::{KError, KResult}; use crate::memory::shmem_affinity::local_shmem_affinity; use crate::memory::vspace::TlbFlushHandle; use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr}; -use crate::nr::{Op, NR_LOG}; +use crate::nr::{Op, KERNEL_NODE_INSTANCE}; use crate::nrproc::NrProcess; use crate::process::MAX_PROCESSES; @@ -128,20 +128,17 @@ pub(crate) fn handle_get_shmem_structure( ShmemStructure::NrProcLogs => { let mut logs = [0u64; MAX_PROCESSES]; - for i in 0..PROCESS_LOGS.len() { + for i in 0..PROCESS_TABLE.len() { // Create a clone in shared memory, and get the raw representation of it // The clone increments the strong counter, and the into_raw consumes this clone of the arc. - let client_clone = Arc::into_raw(Arc::clone(&PROCESS_LOGS[i])); + let client_clone = Arc::into_raw(Arc::clone(&PROCESS_TABLE[i])); // Send the raw pointer to the client clone address. To do this, we'll convert the kernel address // to a physical address, and then change it to a shmem offset by subtracting the shmem base. // TODO(rackscale): try to simplify this, and below? let arc_log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64( (*&client_clone - as *const Log< - 'static, - as Dispatch>::WriteOperation, - >) as u64, + as *const NodeReplicated>) as u64, )); logs[i] = arc_log_paddr.as_u64(); } @@ -151,7 +148,7 @@ pub(crate) fn handle_get_shmem_structure( hdr.msg_len = core::mem::size_of::<[u64; MAX_PROCESSES]>() as u64; } ShmemStructure::NrLog => { - let log_clone = Arc::into_raw(Arc::clone(&NR_LOG)); + let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE)); let log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const Log) as u64)) .as_u64(); diff --git a/kernel/src/memory/shmemalloc.rs b/kernel/src/memory/shmemalloc.rs index e1f39d842..2b2c13b5d 100644 --- a/kernel/src/memory/shmemalloc.rs +++ b/kernel/src/memory/shmemalloc.rs @@ -20,6 +20,7 @@ pub(crate) struct ShmemAlloc { } impl ShmemAlloc { + #[allow(dead_code)] pub(crate) fn new(affinity: NodeId) -> ShmemAlloc { assert!( is_shmem_affinity(affinity) diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index 0bf9cc279..181b49358 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -3,12 +3,14 @@ use crate::prelude::*; use core::fmt::Debug; +use core::num::NonZeroUsize; use alloc::sync::Arc; use hashbrown::HashMap; use log::{error, trace}; -use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; +use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, AffinityChange}; use spin::Once; +use crate::arch::kcb; #[cfg(feature = "rackscale")] use lazy_static::lazy_static; @@ -25,39 +27,43 @@ pub(crate) static NR_REPLICA: Once<(Arc>, ThreadToken // clones to client so they can create replicas of their own. #[cfg(feature = "rackscale")] lazy_static! { - pub(crate) static ref NR_LOG: Arc> = { + pub(crate) static ref KERNEL_NODE_INSTANCE: Arc> = { + use crate::memory::shmem_affinity::mid_to_shmem_affinity; + use crate::memory::shmem_affinity::local_shmem_affinity; + if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { - use nr2::nr::Log; - use crate::arch::kcb::per_core_mem; - use crate::memory::{LARGE_PAGE_SIZE, shmem_affinity::local_shmem_affinity}; - - let pcm = per_core_mem(); - pcm.set_mem_affinity(local_shmem_affinity()) - .expect("Can't change affinity"); - - let log = Arc::try_new(Log::::new(LARGE_PAGE_SIZE)).expect("Not enough memory to initialize system"); - - // Reset mem allocator to use per core memory again - let pcm = per_core_mem(); - pcm.set_mem_affinity(0 as atopology::NodeId) - .expect("Can't change affinity"); - - log + // Want at least one replica... + let num_replicas = + NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); + Arc::try_new( + NodeReplicated::new(num_replicas, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => { + pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); + } + AffinityChange::Revert(_orig) => { + pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity") + } + } + return 0; // TODO(dynrep): Return error code + }) + .expect("Not enough memory to initialize system"), + ) + .expect("Not enough memory to initialize system") } else { - use nr2::nr::Log; use crate::memory::{paddr_to_kernel_vaddr, PAddr}; - use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; // Get location of the nr log from the controller, who will created them in shared memory - let mut log_ptrs = [0u64; 1]; - rpc_get_shmem_structure(ShmemStructure::NrLog, &mut log_ptrs).expect("Failed to get nr log from controller"); - let log_ptr = paddr_to_kernel_vaddr(PAddr::from(log_ptrs[0])); - let local_log_arc = unsafe { Arc::from_raw(log_ptr.as_u64() as *const Log<'static, Op>) }; - local_log_arc + let mut node_replicated_ptrs = [0u64; 1]; + rpc_get_shmem_structure(ShmemStructure::NrLog, &mut node_replicated_ptrs).expect("Failed to get nr log from controller"); + let nr_ptr = paddr_to_kernel_vaddr(PAddr::from(node_replicated_ptrs[0])); + let nr_instance = unsafe { Arc::from_raw(nr_ptr.as_u64() as *const NodeReplicated) }; + nr_instance } }; }