Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mpid: add MPIR_Data and MPID_{Send,Recv}_data (WIP) #7096

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions doc/wiki/developer_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,7 @@ MPI_Aint MPIDI_[NM|SHM]_am_eager_limit(void)
MPI_Aint MPIDI_[NM|SHM]_am_eager_buf_limit(void)

/* return true/false if pt2pt message can be sent eagerly */
bool MPIDI_[NM|SHM]_am_check_eager(MPI_Aint am_hdr_sz, MPI_Aint data_sz,
const void *data, MPI_Aint count,
MPI_Datatype datatype, MPIR_Request * sreq)
bool MPIDI_[NM|SHM]_am_check_eager(MPI_Aint am_hdr_sz, MPI_Aint data_sz, MPIR_Request * sreq)

/****************** Callback APIs ******************/

Expand Down
4 changes: 2 additions & 2 deletions src/include/mpir_gpu_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ MPL_STATIC_INLINE_PREFIX void MPIR_gpu_host_swap_gpu(const void *buf, MPI_Aint c
MPL_pointer_attr_t attr, void *host_buf)
{
if (host_buf) {
MPIR_Localcopy_gpu(buf, count, datatype, 0, &attr, host_buf, count, datatype, 0, NULL,
MPIR_Localcopy_gpu(buf, count, datatype, 0, -1, &attr, host_buf, count, datatype, 0, NULL,
MPL_GPU_COPY_DIRECTION_NONE, MPL_GPU_ENGINE_TYPE_COPY_HIGH_BANDWIDTH,
true);
}
Expand All @@ -99,7 +99,7 @@ MPL_STATIC_INLINE_PREFIX void MPIR_gpu_swap_back(void *host_buf, void *gpu_buf,
MPL_STATIC_INLINE_PREFIX void MPIR_gpu_swap_back_gpu(void *host_buf, void *gpu_buf, MPI_Aint count,
MPI_Datatype datatype, MPL_pointer_attr_t attr)
{
MPIR_Localcopy_gpu(host_buf, count, datatype, 0, NULL, gpu_buf, count, datatype, 0, &attr,
MPIR_Localcopy_gpu(host_buf, count, datatype, 0, -1, NULL, gpu_buf, count, datatype, 0, &attr,
MPL_GPU_COPY_DIRECTION_NONE, MPL_GPU_ENGINE_TYPE_COPY_HIGH_BANDWIDTH, true);
}

Expand Down
30 changes: 21 additions & 9 deletions src/include/mpir_misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
#define MPIR_FINALIZE_CALLBACK_DEFAULT_PRIO 0
#define MPIR_FINALIZE_CALLBACK_MAX_PRIO 10

/* Misc. declarations that need be included before e.g. mpidpre.h */

typedef struct MPIR_Data {
void *buf;
MPI_Aint count;
MPI_Datatype datatype;
MPI_Aint offset;
MPI_Aint length;
} MPIR_Data;

/* Define a typedef for the errflag value used by many internal
* functions. If an error needs to be returned, these values can be
* used to signal such. More details can be found further down in the
Expand Down Expand Up @@ -74,20 +84,22 @@ typedef struct {
int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype);
int MPIR_Ilocalcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype,
MPI_Aint sendoffset, MPI_Aint sendlength,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, MPI_Aint recvoffset,
MPIR_Typerep_req * typerep_req);
int MPIR_Localcopy_stream(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, void *stream);
int MPIR_Localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
MPI_Aint sendoffset, MPL_pointer_attr_t * sendattr, void *recvbuf,
MPI_Aint recvcount, MPI_Datatype recvtype, MPI_Aint recvoffset,
MPL_pointer_attr_t * recvattr, MPL_gpu_copy_direction_t dir,
MPL_gpu_engine_type_t enginetype, bool commit);
MPI_Aint sendoffset, MPI_Aint sendlength, MPL_pointer_attr_t * sendattr,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype,
MPI_Aint recvoffset, MPL_pointer_attr_t * recvattr,
MPL_gpu_copy_direction_t dir, MPL_gpu_engine_type_t enginetype, bool commit);
int MPIR_Ilocalcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
MPI_Aint sendoffset, MPL_pointer_attr_t * sendattr, void *recvbuf,
MPI_Aint recvcount, MPI_Datatype recvtype, MPI_Aint recvoffset,
MPL_pointer_attr_t * recvattr, MPL_gpu_copy_direction_t dir,
MPL_gpu_engine_type_t enginetype, bool commit, MPIR_gpu_req * req);
MPI_Aint sendoffset, MPI_Aint sendlength, MPL_pointer_attr_t * sendattr,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype,
MPI_Aint recvoffset, MPL_pointer_attr_t * recvattr,
MPL_gpu_copy_direction_t dir, MPL_gpu_engine_type_t enginetype,
bool commit, MPIR_gpu_req * req);

/* Contiguous datatype calculates buffer address with `(char *) buf + dt_true_lb`.
* However, dt_true_lb is treated as ptrdiff_t (signed), and when buf is MPI_BOTTOM
Expand Down
71 changes: 42 additions & 29 deletions src/mpi/misc/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ enum {
LOCALCOPY_STREAM,
};

/* sendoffset, recvoffset, and sendlength enable partial data chunk copy. Use offset 0 and sendlength -1
* to copy the entire data. */
static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
MPI_Aint sendoffset, void *recvbuf, MPI_Aint recvcount,
MPI_Aint sendoffset, MPI_Aint sendlength, void *recvbuf, MPI_Aint recvcount,
MPI_Datatype recvtype, MPI_Aint recvoffset, int localcopy_kind,
void *extra_param)
{
Expand All @@ -41,9 +43,13 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
if (!sdata_sz || !rdata_sz)
goto fn_exit;

copy_sz = sdata_sz;
if (copy_sz > rdata_sz)
copy_sz = rdata_sz;
if (sendlength == -1) {
copy_sz = sdata_sz;
if (copy_sz > rdata_sz)
copy_sz = rdata_sz;
} else {
copy_sz = sendlength;
}

/* Builtin types is the common case; optimize for it */
MPIR_Datatype_is_contig(sendtype, &sendtype_iscontig);
Expand Down Expand Up @@ -185,7 +191,8 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se

#ifdef MPL_HAVE_GPU
static int do_localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
MPI_Aint sendoffset, MPL_pointer_attr_t * send_attr, void *recvbuf,
MPI_Aint sendoffset, MPI_Aint sendlength,
MPL_pointer_attr_t * send_attr, void *recvbuf,
MPI_Aint recvcount, MPI_Datatype recvtype, MPI_Aint recvoffset,
MPL_pointer_attr_t * recv_attr, MPL_gpu_copy_direction_t dir,
MPL_gpu_engine_type_t enginetype, bool commit, MPIR_gpu_req * gpu_req)
Expand Down Expand Up @@ -213,9 +220,13 @@ static int do_localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatyp
if (!sdata_sz || !rdata_sz)
goto fn_exit;

copy_sz = sdata_sz;
if (copy_sz > rdata_sz)
copy_sz = rdata_sz;
if (sendlength == -1) {
copy_sz = sdata_sz;
if (copy_sz > rdata_sz)
copy_sz = rdata_sz;
} else {
copy_sz = sendlength;
}

/* This case is specific for contig datatypes */
MPIR_Datatype_is_contig(sendtype, &sendtype_iscontig);
Expand Down Expand Up @@ -327,9 +338,8 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp

MPIR_FUNC_ENTER;

mpi_errno =
do_localcopy(sendbuf, sendcount, sendtype, 0, recvbuf, recvcount, recvtype, 0,
LOCALCOPY_BLOCKING, NULL);
mpi_errno = do_localcopy(sendbuf, sendcount, sendtype, 0, -1, recvbuf, recvcount, recvtype, 0,
LOCALCOPY_BLOCKING, NULL);
MPIR_ERR_CHECK(mpi_errno);

fn_exit:
Expand All @@ -340,15 +350,17 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
}

int MPIR_Ilocalcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype,
MPI_Aint sendoffset, MPI_Aint sendlength,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, MPI_Aint recvoffset,
MPIR_Typerep_req * typerep_req)
{
int mpi_errno = MPI_SUCCESS;

MPIR_FUNC_ENTER;

mpi_errno = do_localcopy(sendbuf, sendcount, sendtype, 0, recvbuf, recvcount, recvtype,
0, LOCALCOPY_NONBLOCKING, typerep_req);
mpi_errno = do_localcopy(sendbuf, sendcount, sendtype, sendoffset, sendlength,
recvbuf, recvcount, recvtype, recvoffset,
LOCALCOPY_NONBLOCKING, typerep_req);
MPIR_ERR_CHECK(mpi_errno);

fn_exit:
Expand All @@ -370,7 +382,7 @@ int MPIR_Localcopy_stream(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype

MPIR_FUNC_ENTER;

mpi_errno = do_localcopy(sendbuf, sendcount, sendtype, 0, recvbuf, recvcount,
mpi_errno = do_localcopy(sendbuf, sendcount, sendtype, 0, -1, recvbuf, recvcount,
recvtype, 0, LOCALCOPY_STREAM, stream);
MPIR_ERR_CHECK(mpi_errno);

Expand All @@ -382,7 +394,8 @@ int MPIR_Localcopy_stream(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype
}

int MPIR_Localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
MPI_Aint sendoffset, MPL_pointer_attr_t * sendattr, void *recvbuf,
MPI_Aint sendoffset, MPI_Aint sendlength,
MPL_pointer_attr_t * sendattr, void *recvbuf,
MPI_Aint recvcount, MPI_Datatype recvtype, MPI_Aint recvoffset,
MPL_pointer_attr_t * recvattr, MPL_gpu_copy_direction_t dir,
MPL_gpu_engine_type_t enginetype, bool commit)
Expand All @@ -392,14 +405,13 @@ int MPIR_Localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sen
MPIR_FUNC_ENTER;

#ifdef MPL_HAVE_GPU
mpi_errno =
do_localcopy_gpu(sendbuf, sendcount, sendtype, sendoffset, sendattr, recvbuf, recvcount,
recvtype, recvoffset, recvattr, dir, enginetype, commit, NULL);
mpi_errno = do_localcopy_gpu(sendbuf, sendcount, sendtype, sendoffset, sendlength, sendattr,
recvbuf, recvcount, recvtype, recvoffset, recvattr,
dir, enginetype, commit, NULL);
MPIR_ERR_CHECK(mpi_errno);
#else
mpi_errno =
do_localcopy(sendbuf, sendcount, sendtype, sendoffset, recvbuf, recvcount, recvtype,
recvoffset, LOCALCOPY_BLOCKING, NULL);
mpi_errno = do_localcopy(sendbuf, sendcount, sendtype, sendoffset, sendlength,
recvbuf, recvcount, recvtype, recvoffset, LOCALCOPY_BLOCKING, NULL);
MPIR_ERR_CHECK(mpi_errno);
#endif

Expand All @@ -411,7 +423,8 @@ int MPIR_Localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sen
}

int MPIR_Ilocalcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
MPI_Aint sendoffset, MPL_pointer_attr_t * sendattr, void *recvbuf,
MPI_Aint sendoffset, MPI_Aint sendlength,
MPL_pointer_attr_t * sendattr, void *recvbuf,
MPI_Aint recvcount, MPI_Datatype recvtype, MPI_Aint recvoffset,
MPL_pointer_attr_t * recvattr, MPL_gpu_copy_direction_t dir,
MPL_gpu_engine_type_t enginetype, bool commit, MPIR_gpu_req * req)
Expand All @@ -421,14 +434,14 @@ int MPIR_Ilocalcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
MPIR_FUNC_ENTER;

#ifdef MPL_HAVE_GPU
mpi_errno =
do_localcopy_gpu(sendbuf, sendcount, sendtype, sendoffset, sendattr, recvbuf, recvcount,
recvtype, recvoffset, recvattr, dir, enginetype, commit, req);
mpi_errno = do_localcopy_gpu(sendbuf, sendcount, sendtype, sendoffset, sendlength, sendattr,
recvbuf, recvcount, recvtype, recvoffset, recvattr,
dir, enginetype, commit, req);
MPIR_ERR_CHECK(mpi_errno);
#else
mpi_errno =
do_localcopy(sendbuf, sendcount, sendtype, sendoffset, recvbuf, recvcount, recvtype,
recvoffset, LOCALCOPY_NONBLOCKING, &req->u.y_req);
mpi_errno = do_localcopy(sendbuf, sendcount, sendtype, sendoffset, sendoffset,
recvbuf, recvcount, recvtype, recvoffset,
LOCALCOPY_NONBLOCKING, &req->u.y_req);
MPIR_ERR_CHECK(mpi_errno);
req->type = MPIR_TYPEREP_REQUEST;
#endif
Expand Down
11 changes: 9 additions & 2 deletions src/mpid/ch4/ch4_api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ Non Native API:
NM*: void
SHM*: void
am_check_eager: bool
NM*: am_hdr_sz, data_sz, data, count, datatype, sreq
SHM*: am_hdr_sz, data_sz, data, count, datatype, sreq
NM*: am_hdr_sz, data_sz, sreq
SHM*: am_hdr_sz, data_sz, sreq
comm_get_gpid : int
NM*: comm_ptr, idx, gpid_ptr, is_remote
get_local_upids : int
Expand Down Expand Up @@ -138,12 +138,18 @@ Non Native API:
SHM*: req

Native API:
send_data : int
NM*: data-2, rank, tag, comm, attr-2, addr, req_p
SHM*: data-2, rank, tag, comm, attr-2, addr, req_p
mpi_isend : int
NM*: buf, count, datatype, rank, tag, comm, attr-2, addr, req_p
SHM*: buf, count, datatype, rank, tag, comm, attr-2, addr, req_p
mpi_cancel_send : int
NM*: sreq
SHM*: sreq
recv_data : int
NM*: data-2, rank, tag, comm, attr-2, addr, req_p, partner
SHM*: data-2, rank, tag, comm, attr-2, req_p
mpi_irecv : int
NM*: buf-2, count, datatype, rank, tag, comm, attr-2, addr, req_p, partner
SHM*: buf-2, count, datatype, rank, tag, comm, attr-2, req_p
Expand Down Expand Up @@ -446,6 +452,7 @@ PARAM:
context_id: MPIR_Context_id_t
count: MPI_Aint
data: const void *
data-2: MPIR_Data *
data_sz: MPI_Aint
datatype: MPI_Datatype
datatype_p: MPIR_Datatype *
Expand Down
4 changes: 4 additions & 0 deletions src/mpid/ch4/include/mpidch4.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,16 @@ int MPID_Progress_activate(int id);
int MPID_Progress_deactivate(int id);
MPL_STATIC_INLINE_PREFIX int MPID_Recv(void *, MPI_Aint, MPI_Datatype, int, int, MPIR_Comm *, int,
MPI_Status *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Recv_data(MPIR_Data * data, int source, int tag, MPIR_Comm * comm,
int attr, MPIR_Request ** req) MPL_STATIC_INLINE_SUFFIX;
int MPID_Recv_init(void *, MPI_Aint, MPI_Datatype, int, int, MPIR_Comm *, int, MPIR_Request **);
MPL_STATIC_INLINE_PREFIX void MPID_Request_set_completed(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Request_complete(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Request_is_anysource(MPIR_Request *) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Send(const void *, MPI_Aint, MPI_Datatype, int, int, MPIR_Comm *,
int, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Send_data(MPIR_Data * data, int dest, int tag, MPIR_Comm * comm,
int attr, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Ssend(const void *, MPI_Aint, MPI_Datatype, int, int, MPIR_Comm *,
int, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX;
MPL_STATIC_INLINE_PREFIX int MPID_Rsend(const void *, MPI_Aint, MPI_Datatype, int, int, MPIR_Comm *,
Expand Down
4 changes: 1 addition & 3 deletions src/mpid/ch4/include/mpidpre.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,7 @@ typedef struct MPIDI_part_request {
/* message queue within "self"-comms, i.e. MPI_COMM_SELF and all communicators with size of 1. */

typedef struct {
void *buf;
MPI_Aint count;
MPI_Datatype datatype;
struct MPIR_Data data;
int tag;
int context_id;
MPIR_Request *match_req; /* for mrecv */
Expand Down
3 changes: 1 addition & 2 deletions src/mpid/ch4/netmod/ofi/ofi_am.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_send_hdr_reply(MPIR_Comm * comm,
}

MPL_STATIC_INLINE_PREFIX bool MPIDI_NM_am_check_eager(MPI_Aint am_hdr_sz, MPI_Aint data_sz,
const void *data, MPI_Aint count,
MPI_Datatype datatype, MPIR_Request * sreq)
MPIR_Request * sreq)
{
MPIDI_OFI_AMREQUEST(sreq, data_sz) = data_sz;
if ((am_hdr_sz + data_sz)
Expand Down
7 changes: 4 additions & 3 deletions src/mpid/ch4/netmod/ofi/ofi_events.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@ static int pipeline_recv_event(struct fi_cq_tagged_entry *wc, MPIR_Request * r,
MPI_Aint actual_unpack_bytes;
MPIR_gpu_req yreq;
mpi_errno =
MPIR_Ilocalcopy_gpu(wc_buf, wc->len, MPI_BYTE, 0, NULL, recv_buf, recv_count,
datatype, 0, NULL, MPL_GPU_COPY_H2D, engine_type, 1, &yreq);
MPIR_Ilocalcopy_gpu(wc_buf, wc->len, MPI_BYTE, 0, -1, NULL, recv_buf,
recv_count, datatype, 0, NULL, MPL_GPU_COPY_H2D,
engine_type, 1, &yreq);
MPIR_ERR_CHECK(mpi_errno);
actual_unpack_bytes = wc->len;
task =
Expand Down Expand Up @@ -216,7 +217,7 @@ static int pipeline_recv_event(struct fi_cq_tagged_entry *wc, MPIR_Request * r,
MPI_Aint actual_unpack_bytes;
MPIR_gpu_req yreq;
mpi_errno =
MPIR_Ilocalcopy_gpu(wc_buf, (MPI_Aint) wc->len, MPI_BYTE, 0, NULL,
MPIR_Ilocalcopy_gpu(wc_buf, (MPI_Aint) wc->len, MPI_BYTE, 0, -1, NULL,
(char *) recv_buf, (MPI_Aint) recv_count, datatype,
MPIDI_OFI_REQUEST(rreq, pipeline_info.offset), NULL,
MPL_GPU_COPY_H2D, engine_type, 1, &yreq);
Expand Down
2 changes: 1 addition & 1 deletion src/mpid/ch4/netmod/ofi/ofi_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_recv_event(int vci, struct fi_cq_tagged_e
actual_unpack_bytes = wc->len;
mpi_errno =
MPIR_Localcopy_gpu(MPIDI_OFI_REQUEST(rreq, noncontig.pack.pack_buffer), count,
MPI_BYTE, 0, NULL, recv_buf, count, MPI_BYTE, 0, &attr,
MPI_BYTE, 0, -1, NULL, recv_buf, count, MPI_BYTE, 0, &attr,
MPL_GPU_COPY_DIRECTION_NONE, engine, true);
MPIR_ERR_CHECK(mpi_errno);
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/mpid/ch4/netmod/ofi/ofi_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -931,8 +931,8 @@ static int MPIDI_OFI_gpu_progress_send(void)
commit = 1;
mpi_errno =
MPIR_Ilocalcopy_gpu((char *) send_task->send_buf, send_task->count, datatype,
send_task->offset, &send_task->attr, host_buf, chunk_sz,
MPI_BYTE, 0, NULL, MPL_GPU_COPY_D2H, engine_type,
send_task->offset, chunk_sz, &send_task->attr, host_buf,
chunk_sz, MPI_BYTE, 0, NULL, MPL_GPU_COPY_D2H, engine_type,
commit, &yreq);
MPIR_ERR_CHECK(mpi_errno);
actual_pack_bytes = chunk_sz;
Expand Down
Loading