From e588e9b3ef3d914346a28947a751586242d545cf Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Mon, 1 Apr 2024 15:57:42 -0600 Subject: [PATCH 1/4] sycl - fix vec memory copy --- backends/sycl-ref/ceed-sycl-vector.sycl.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backends/sycl-ref/ceed-sycl-vector.sycl.cpp b/backends/sycl-ref/ceed-sycl-vector.sycl.cpp index 1922758997..1bbc8a69fa 100644 --- a/backends/sycl-ref/ceed-sycl-vector.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-vector.sycl.cpp @@ -212,10 +212,11 @@ static int CeedVectorSetArrayDevice_Sycl(const CeedVector vec, const CeedCopyMod switch (copy_mode) { case CEED_COPY_VALUES: { - if (!impl->d_array_owned) + if (!impl->d_array_owned) { CeedCallSycl(ceed, impl->d_array_owned = sycl::malloc_device(length, data->sycl_device, data->sycl_context)); + } if (array) { - sycl::event copy_event = data->sycl_queue.copy(array, impl->d_array, length, {e}); + sycl::event copy_event = data->sycl_queue.copy(array, impl->d_array_owned, length, {e}); // Wait for copy to finish and handle exceptions. CeedCallSycl(ceed, copy_event.wait_and_throw()); } From 6078361e13d267a256f9ecdaa6d422dec7e654b0 Mon Sep 17 00:00:00 2001 From: James Wright Date: Mon, 1 Apr 2024 16:39:30 -0600 Subject: [PATCH 2/4] fix(sycl): Use `CEED_VECTOR_NONE` for weight evals --- backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp index cb00bf95da..3572814d26 100644 --- a/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-ref-operator.sycl.cpp @@ -191,7 +191,7 @@ static int CeedOperatorSetupFields_Sycl(CeedQFunction qf, CeedOperator op, bool CeedCallBackend(CeedOperatorFieldGetBasis(op_fields[i], &basis)); q_size = (CeedSize)num_elem * Q; CeedCallBackend(CeedVectorCreate(ceed, q_size, &q_vecs[i])); - CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, NULL, q_vecs[i])); + CeedCallBackend(CeedBasisApply(basis, num_elem, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i])); break; case CEED_EVAL_DIV: break; // TODO: Not implemented From d07cdbe5b933b2ee94e4361e6ff18d258e72d53d Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 2 Apr 2024 10:02:14 -0600 Subject: [PATCH 3/4] sycl - clearer template parameters --- .../sycl-ref/ceed-sycl-ref-basis.sycl.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/backends/sycl-ref/ceed-sycl-ref-basis.sycl.cpp b/backends/sycl-ref/ceed-sycl-ref-basis.sycl.cpp index a19029e7cf..4d26b1d1a1 100644 --- a/backends/sycl-ref/ceed-sycl-ref-basis.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-ref-basis.sycl.cpp @@ -290,23 +290,23 @@ static int CeedBasisApply_Sycl(CeedBasis basis, const CeedInt num_elem, CeedTran // Basis action switch (eval_mode) { - case CEED_EVAL_INTERP: { + case CEED_EVAL_INTERP: if (is_transpose) { - CeedCallBackend(CeedBasisApplyInterp_Sycl(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); + CeedCallBackend(CeedBasisApplyInterp_Sycl(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); } else { - CeedCallBackend(CeedBasisApplyInterp_Sycl(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); + CeedCallBackend(CeedBasisApplyInterp_Sycl(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); } - } break; - case CEED_EVAL_GRAD: { + break; + case CEED_EVAL_GRAD: if (is_transpose) { - CeedCallBackend(CeedBasisApplyGrad_Sycl<1>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); + CeedCallBackend(CeedBasisApplyGrad_Sycl(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); } else { - CeedCallBackend(CeedBasisApplyGrad_Sycl<0>(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); + CeedCallBackend(CeedBasisApplyGrad_Sycl(data->sycl_queue, *impl->sycl_module, num_elem, impl, d_u, d_v)); } - } break; - case CEED_EVAL_WEIGHT: { + break; + case CEED_EVAL_WEIGHT: CeedCallBackend(CeedBasisApplyWeight_Sycl(data->sycl_queue, num_elem, impl, d_v)); - } break; + break; case CEED_EVAL_NONE: /* handled separately below */ break; // LCOV_EXCL_START @@ -467,15 +467,15 @@ static int CeedBasisApplyNonTensor_Sycl(CeedBasis basis, const CeedInt num_elem, // Apply basis operation switch (eval_mode) { - case CEED_EVAL_INTERP: { + case CEED_EVAL_INTERP: CeedCallBackend(CeedBasisApplyNonTensorInterp_Sycl(data->sycl_queue, num_elem, is_transpose, impl, d_u, d_v)); - } break; - case CEED_EVAL_GRAD: { + break; + case CEED_EVAL_GRAD: CeedCallBackend(CeedBasisApplyNonTensorGrad_Sycl(data->sycl_queue, num_elem, is_transpose, impl, d_u, d_v)); - } break; - case CEED_EVAL_WEIGHT: { + break; + case CEED_EVAL_WEIGHT: CeedCallBackend(CeedBasisApplyNonTensorWeight_Sycl(data->sycl_queue, num_elem, impl, d_v)); - } break; + break; case CEED_EVAL_NONE: /* handled separately below */ break; // LCOV_EXCL_START From 8330fa8358e0538fd6980df480dcd5567c9ca368 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Tue, 2 Apr 2024 10:20:16 -0600 Subject: [PATCH 4/4] test - clarify output of t506 --- tests/t506-operator.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/t506-operator.c b/tests/t506-operator.c index fb74656a25..a17b6cbb0a 100644 --- a/tests/t506-operator.c +++ b/tests/t506-operator.c @@ -15,7 +15,7 @@ int main(int argc, char **argv) { CeedQFunction qf_setup, qf_mass; CeedOperator op_setup_small, op_mass_small, op_setup_large, op_mass_large; CeedVector q_data_small, q_data_large, x, u, v; - CeedInt num_elem = 15, p = 5, q = 8, scale = 3; + CeedInt num_elem = 15, p = 5, q = 8, scale = 3, num_comp = 2; CeedInt num_nodes_x = num_elem + 1, num_nodes_u = num_elem * (p - 1) + 1; CeedInt ind_x[num_elem * 2], ind_u[num_elem * p]; @@ -28,8 +28,8 @@ int main(int argc, char **argv) { for (CeedInt i = 0; i < num_nodes_x; i++) x_array[i] = (CeedScalar)i / (num_nodes_x - 1); CeedVectorSetArray(x, CEED_MEM_HOST, CEED_COPY_VALUES, x_array); } - CeedVectorCreate(ceed, 2 * num_nodes_u, &u); - CeedVectorCreate(ceed, 2 * num_nodes_u, &v); + CeedVectorCreate(ceed, num_comp * num_nodes_u, &u); + CeedVectorCreate(ceed, num_comp * num_nodes_u, &v); CeedVectorCreate(ceed, num_elem * q, &q_data_small); CeedVectorCreate(ceed, num_elem * q * scale, &q_data_large); @@ -38,14 +38,14 @@ int main(int argc, char **argv) { ind_x[2 * i + 0] = i; ind_x[2 * i + 1] = i + 1; } - CeedElemRestrictionCreate(ceed, num_elem, 2, 1, 1, num_nodes_x, CEED_MEM_HOST, CEED_USE_POINTER, ind_x, &elem_restriction_x); + CeedElemRestrictionCreate(ceed, num_elem, num_comp, 1, 1, num_nodes_x, CEED_MEM_HOST, CEED_USE_POINTER, ind_x, &elem_restriction_x); for (CeedInt i = 0; i < num_elem; i++) { for (CeedInt j = 0; j < p; j++) { - ind_u[p * i + j] = 2 * (i * (p - 1) + j); + ind_u[p * i + j] = num_comp * (i * (p - 1) + j); } } - CeedElemRestrictionCreate(ceed, num_elem, p, 2, 1, 2 * num_nodes_u, CEED_MEM_HOST, CEED_USE_POINTER, ind_u, &elem_restriction_u); + CeedElemRestrictionCreate(ceed, num_elem, p, num_comp, 1, num_comp * num_nodes_u, CEED_MEM_HOST, CEED_USE_POINTER, ind_u, &elem_restriction_u); CeedInt strides_q_data_small[3] = {1, q, q}; CeedElemRestrictionCreateStrided(ceed, num_elem, q, 1, q * num_elem, strides_q_data_small, &elem_restriction_q_data_small); @@ -55,9 +55,9 @@ int main(int argc, char **argv) { // Bases CeedBasisCreateTensorH1Lagrange(ceed, 1, 1, 2, q, CEED_GAUSS, &basis_x_small); - CeedBasisCreateTensorH1Lagrange(ceed, 1, 2, p, q, CEED_GAUSS, &basis_u_small); + CeedBasisCreateTensorH1Lagrange(ceed, 1, num_comp, p, q, CEED_GAUSS, &basis_u_small); CeedBasisCreateTensorH1Lagrange(ceed, 1, 1, 2, q * scale, CEED_GAUSS, &basis_x_large); - CeedBasisCreateTensorH1Lagrange(ceed, 1, 2, p, q * scale, CEED_GAUSS, &basis_u_large); + CeedBasisCreateTensorH1Lagrange(ceed, 1, num_comp, p, q * scale, CEED_GAUSS, &basis_u_large); // QFunctions CeedQFunctionCreateInterior(ceed, 1, setup, setup_loc, &qf_setup); @@ -67,8 +67,8 @@ int main(int argc, char **argv) { CeedQFunctionCreateInterior(ceed, 1, mass, mass_loc, &qf_mass); CeedQFunctionAddInput(qf_mass, "rho", 1, CEED_EVAL_NONE); - CeedQFunctionAddInput(qf_mass, "u", 2, CEED_EVAL_INTERP); - CeedQFunctionAddOutput(qf_mass, "v", 2, CEED_EVAL_INTERP); + CeedQFunctionAddInput(qf_mass, "u", num_comp, CEED_EVAL_INTERP); + CeedQFunctionAddOutput(qf_mass, "v", num_comp, CEED_EVAL_INTERP); // 'Small' Operators CeedOperatorCreate(ceed, qf_setup, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_small); @@ -101,8 +101,8 @@ int main(int argc, char **argv) { CeedVectorGetArrayWrite(u, CEED_MEM_HOST, &u_array); for (int i = 0; i < num_nodes_u; i++) { - u_array[2 * i] = 1.0; - u_array[2 * i + 1] = 2.0; + u_array[num_comp * i] = 1.0; + u_array[num_comp * i + 1] = 2.0; } CeedVectorRestoreArray(u, &u_array); } @@ -117,12 +117,12 @@ int main(int argc, char **argv) { CeedVectorGetArrayRead(v, CEED_MEM_HOST, &v_array); for (CeedInt i = 0; i < num_nodes_u; i++) { - sum_1 += v_array[2 * i]; - sum_2 += v_array[2 * i + 1]; + sum_1 += v_array[num_comp * i]; + sum_2 += v_array[num_comp * i + 1]; } CeedVectorRestoreArrayRead(v, &v_array); - if (fabs(sum_1 - 1.) > 1000. * CEED_EPSILON) printf("Computed Area: %f != True Area: 1.0\n", sum_1); - if (fabs(sum_2 - 2.) > 1000. * CEED_EPSILON) printf("Computed Area: %f != True Area: 2.0\n", sum_2); + if (fabs(sum_1 - 1.) > 1000. * CEED_EPSILON) printf("Small Problem, Component 1: Computed Area %f != True Area 1.0\n", sum_1); + if (fabs(sum_2 - 2.) > 1000. * CEED_EPSILON) printf("Small Problem, Component 2: Computed Area %f != True Area 2.0\n", sum_2); } // 'Large' operator @@ -135,13 +135,13 @@ int main(int argc, char **argv) { CeedVectorGetArrayRead(v, CEED_MEM_HOST, &v_array); for (CeedInt i = 0; i < num_nodes_u; i++) { - sum_1 += v_array[2 * i]; - sum_2 += v_array[2 * i + 1]; + sum_1 += v_array[num_comp * i]; + sum_2 += v_array[num_comp * i + 1]; } CeedVectorRestoreArrayRead(v, &v_array); - if (fabs(sum_1 - 1.) > 1000. * CEED_EPSILON) printf("Computed Area: %f != True Area: 1.0\n", sum_1); - if (fabs(sum_2 - 2.) > 1000. * CEED_EPSILON) printf("Computed Area: %f != True Area: 2.0\n", sum_2); + if (fabs(sum_1 - 1.) > 1000. * CEED_EPSILON) printf("Large Problem, Component 1: Computed Area %f != True Area 1.0\n", sum_1); + if (fabs(sum_2 - 2.) > 1000. * CEED_EPSILON) printf("Large Problem, Component 2: Computed Area %f != True Area 2.0\n", sum_2); } CeedVectorDestroy(&x);