Skip to content

Commit

Permalink
feat: add RDMA listener to Mayastor Nvmf target
Browse files Browse the repository at this point in the history
This adds the capability to listen for rdma connections to the Mayastor
Nvmf target if the rdma feature is enabled during installation. Any Nvmf
subsystem facing the host i.e. the nexus nvmf subsystem will now be able
to support tcp and rdma both.

Signed-off-by: Diwakar Sharma <[email protected]>
  • Loading branch information
dsharma-dc committed Aug 13, 2024
1 parent d09082c commit 7e27a13
Show file tree
Hide file tree
Showing 9 changed files with 299 additions and 36 deletions.
2 changes: 1 addition & 1 deletion io-engine/src/bin/io-engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ fn start_tokio_runtime(args: &MayastorCliArgs) {

if args.rdma {
env::set_var("ENABLE_RDMA", "true");
warn!("RDMA is enabled for Mayastor NVMEoF target");
warn!("RDMA is requested to be enabled for Mayastor NVMEoF target");
}

unsafe {
Expand Down
3 changes: 2 additions & 1 deletion io-engine/src/core/bdev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ where
) -> Result<Self::Output, Self::Error> {
let me = unsafe { self.get_unchecked_mut() };
let props = NvmfShareProps::from(props);
let is_lvol = me.driver() == "lvol";

let ptpl = props.ptpl().as_ref().map(|ptpl| ptpl.path());

Expand All @@ -232,7 +233,7 @@ where
.await
.context(ShareNvmf {})?;

subsystem.start().await.context(ShareNvmf {})
subsystem.start(!is_lvol).await.context(ShareNvmf {})
}

fn create_ptpl(&self) -> Result<Option<PtplProps>, Self::Error> {
Expand Down
5 changes: 5 additions & 0 deletions io-engine/src/core/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,11 @@ impl MayastorEnvironment {
.map(|s| s.clone())
}

/// Check if RDMA needs to be enabled for Mayastor target.
pub fn rdma(&self) -> bool {
self.rdma
}

/// Detects IP address for NVMF target by the interface specified in CLI
/// arguments.
fn detect_nvmf_tgt_iface_ip(iface: &str) -> Result<String, String> {
Expand Down
106 changes: 103 additions & 3 deletions io-engine/src/subsys/config/opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use spdk_rs::{

use std::{
convert::TryFrom,
fmt::{Debug, Display},
fmt::{Debug, Display, Formatter},
mem::zeroed,
ptr::null_mut,
str::FromStr,
Expand Down Expand Up @@ -84,6 +84,26 @@ impl GetOpts for NexusOpts {
/// Must be equal to the size of `spdk_nvmf_target_opts.crdt`.
pub const TARGET_CRDT_LEN: usize = 3;

#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
pub enum NvmfTgtTransport {
Rdma,
#[default]
Tcp,
}

impl Display for NvmfTgtTransport {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
NvmfTgtTransport::Rdma => "rdma",
NvmfTgtTransport::Tcp => "tcp",
}
)
}
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(default, deny_unknown_fields)]
pub struct NvmfTgtConfig {
Expand All @@ -94,11 +114,13 @@ pub struct NvmfTgtConfig {
/// NVMF target Command Retry Delay in x100 ms.
pub crdt: [u16; TARGET_CRDT_LEN],
/// TCP transport options
pub opts: NvmfTcpTransportOpts,
pub opts_tcp: NvmfTcpTransportOpts,
/// NVMF target interface (ip, mac, name or subnet).
pub interface: Option<String>,
/// Enable RDMA for NVMF target or not
pub rdma: Option<bool>,
/// RDMA transport options
pub opts_rdma: NvmfRdmaTransportOpts,
}

impl From<NvmfTgtConfig> for Box<spdk_nvmf_target_opts> {
Expand Down Expand Up @@ -126,9 +148,10 @@ impl Default for NvmfTgtConfig {
name: "mayastor_target".to_string(),
max_namespaces: 2048,
crdt: args.nvmf_tgt_crdt,
opts: NvmfTcpTransportOpts::default(),
opts_tcp: NvmfTcpTransportOpts::default(),
interface: None,
rdma: None,
opts_rdma: NvmfRdmaTransportOpts::default(),
}
}
}
Expand Down Expand Up @@ -173,6 +196,36 @@ pub struct NvmfTcpTransportOpts {
data_wr_pool_size: u32,
}

/// Settings for the RDMA transport
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
#[serde(default, deny_unknown_fields)]
pub struct NvmfRdmaTransportOpts {
/// max queue depth
max_queue_depth: u16,
/// max qpairs per controller
max_qpairs_per_ctrl: u16,
/// encapsulated data size
in_capsule_data_size: u32,
/// max IO size
max_io_size: u32,
/// IO unit size
io_unit_size: u32,
/// max admin queue depth per admin queue
max_aq_depth: u32,
/// num of shared buffers
num_shared_buf: u32,
/// cache size
buf_cache_size: u32,
/// dif
dif_insert_or_strip: bool,
/// abort execution timeout
abort_timeout_sec: u32,
/// acceptor poll rate, microseconds
acceptor_poll_rate: u32,
/// Use zero-copy operations if the underlying bdev supports them
zcopy: bool,
}

/// try to read an env variable or returns the default when not found
pub(crate) fn try_from_env<T>(name: &str, default: T) -> T
where
Expand Down Expand Up @@ -288,6 +341,29 @@ impl Default for NvmfTcpTransportOpts {
}
}

// todo: Tune the defaults by experiments or recommendations, if required.
impl Default for NvmfRdmaTransportOpts {
fn default() -> Self {
Self {
max_queue_depth: try_from_env("NVMF_RDMA_MAX_QUEUE_DEPTH", 128),
in_capsule_data_size: 8192,
max_io_size: 131_072,
io_unit_size: 8192,
max_qpairs_per_ctrl: try_from_env(
"NVMF_RDMA_MAX_QPAIRS_PER_CTRL",
32,
),
num_shared_buf: try_from_env("NVMF_RDMA_NUM_SHARED_BUF", 2047),
buf_cache_size: try_from_env("NVMF_RDMA_BUF_CACHE_SIZE", 64),
dif_insert_or_strip: false,
max_aq_depth: 32,
abort_timeout_sec: 1,
acceptor_poll_rate: try_from_env("NVMF_ACCEPTOR_POLL_RATE", 10_000),
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
}
}
}

/// we cannot add derives for YAML to these structs directly, so we need to
/// copy them. The upside though, is that if the FFI structures change, we will
/// know about it during compile time.
Expand Down Expand Up @@ -319,6 +395,30 @@ impl From<NvmfTcpTransportOpts> for spdk_nvmf_transport_opts {
}
}

impl From<NvmfRdmaTransportOpts> for spdk_nvmf_transport_opts {
fn from(o: NvmfRdmaTransportOpts) -> Self {
Self {
max_queue_depth: o.max_queue_depth,
max_qpairs_per_ctrlr: o.max_qpairs_per_ctrl,
in_capsule_data_size: o.in_capsule_data_size,
max_io_size: o.max_io_size,
io_unit_size: o.io_unit_size,
max_aq_depth: o.max_aq_depth,
num_shared_buffers: o.num_shared_buf,
buf_cache_size: o.buf_cache_size,
dif_insert_or_strip: o.dif_insert_or_strip,
reserved29: Default::default(),
abort_timeout_sec: o.abort_timeout_sec,
association_timeout: 120000,
transport_specific: std::ptr::null(),
opts_size: std::mem::size_of::<spdk_nvmf_transport_opts>() as u64,
acceptor_poll_rate: o.acceptor_poll_rate,
zcopy: o.zcopy,
reserved61: Default::default(),
}
}
}

/// generic settings for the NVMe bdev (all our replicas)
#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(default, deny_unknown_fields)]
Expand Down
32 changes: 26 additions & 6 deletions io-engine/src/subsys/nvmf/subsystem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ use crate::{
ffihelper::{cb_arg, done_cb, AsStr, FfiResult, IntoCString},
lvs::Lvol,
subsys::{
config::opts::NvmfTgtTransport,
make_subsystem_serial,
nvmf::{transport::TransportId, Error, NVMF_TGT},
Config,
Expand Down Expand Up @@ -798,7 +799,7 @@ impl NvmfSubsystem {
}

// we currently allow all listeners to the subsystem
async fn add_listener(&self) -> Result<(), Error> {
async fn add_listener(&self, xprt: NvmfTgtTransport) -> Result<(), Error> {
extern "C" fn listen_cb(arg: *mut c_void, status: i32) {
let s = unsafe { Box::from_raw(arg as *mut oneshot::Sender<i32>) };
s.send(status).unwrap();
Expand All @@ -807,8 +808,11 @@ impl NvmfSubsystem {
let cfg = Config::get();

// dont yet enable both ports, IOW just add one transportID now

let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
let trid_replica = if xprt == NvmfTgtTransport::Tcp {
TransportId::new(cfg.nexus_opts.nvmf_replica_port)
} else {
TransportId::new_with_rdma(cfg.nexus_opts.nvmf_replica_port)
};

let (s, r) = oneshot::channel::<i32>();
unsafe {
Expand Down Expand Up @@ -907,8 +911,21 @@ impl NvmfSubsystem {
/// start the subsystem previously created -- note that we destroy it on
/// failure to ensure the state is not in limbo and to avoid leaking
/// resources
pub async fn start(self) -> Result<String, Error> {
self.add_listener().await?;
pub async fn start(self, need_rdma: bool) -> Result<String, Error> {
self.add_listener(NvmfTgtTransport::Tcp).await?;
if need_rdma {
let _ =
self.add_listener(NvmfTgtTransport::Rdma)
.await
.map_err(|e| {
warn!(
"NvmfSubsystem RDMA listener add failed {}. \
Subsystem will be accessible over TCP only.\
{:?}",
e, self
);
});
}

if let Err(e) = self
.change_state("start", |ss, cb, arg| unsafe {
Expand Down Expand Up @@ -1118,7 +1135,10 @@ impl NvmfSubsystem {
pub fn uri_endpoints(&self) -> Option<Vec<String>> {
if let Some(v) = self.listeners_to_vec() {
let nqn = self.get_nqn();
Some(v.iter().map(|t| format!("{t}/{nqn}")).collect::<Vec<_>>())
let retvec = Some(
v.iter().map(|t| format!("{t}/{nqn}")).collect::<Vec<_>>(),
);
retvec
} else {
None
}
Expand Down
Loading

0 comments on commit 7e27a13

Please sign in to comment.