From 722e99810cd78fe631ea881f412e59071f191ba0 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Sat, 5 Aug 2023 20:48:58 +0300 Subject: [PATCH] bench: compress saved data --- fast/setup_wave3d.py | 4 +- fast/temp1 | 201 ---------------------------------------- fast/temp2 | 212 ------------------------------------------- fast/wave3d_b.py | 3 +- 4 files changed, 4 insertions(+), 416 deletions(-) delete mode 100644 fast/temp1 delete mode 100644 fast/temp2 diff --git a/fast/setup_wave3d.py b/fast/setup_wave3d.py index 795c96b284..b5c27b229d 100644 --- a/fast/setup_wave3d.py +++ b/fast/setup_wave3d.py @@ -112,4 +112,6 @@ shape_str = '_'.join(str(item) for item in shape) np.save("so%s_critical_dt%s.npy" % (so, shape_str), model.critical_dt, allow_pickle=True) np.save("so%s_wave_dat%s.npy" % (so, shape_str), u.data[:], allow_pickle=True) -np.save("so%s_grid_extent%s.npy" % (so, shape_str), model.grid.extent, allow_pickle=True) + +np.savez_compressed("so%s_grid_extent%s" % (so, shape_str), model.grid.extent, + allow_pickle=True) diff --git a/fast/temp1 b/fast/temp1 deleted file mode 100644 index 46f207a419..0000000000 --- a/fast/temp1 +++ /dev/null @@ -1,201 +0,0 @@ -module { - func.func @apply_kernel(%arg0: memref<260x260xf32>, %arg1: memref<260x260xf32>) -> memref<260x260xf32> attributes {param_names = ["u_vec_0", "u_vec_1"]} { - %c28_i64 = arith.constant 28 : i64 - %c12_i64 = arith.constant 12 : i64 - %c24_i64 = arith.constant 24 : i64 - %c8_i64 = arith.constant 8 : i64 - %c20_i64 = arith.constant 20 : i64 - %c16_i64 = arith.constant 16 : i64 - %c257 = arith.constant 257 : index - %cst = arith.constant 1.000000e-01 : f32 - %cst_0 = arith.constant -2.000000e+00 : f32 - %c-2_i64 = arith.constant -2 : i64 - %cst_1 = arith.constant 0.00392156886 : f32 - %c-1_i64 = arith.constant -1 : i64 - %cst_2 = arith.constant 3.075740e-05 : f32 - %cst_3 = arith.constant 0.00999999977 : f32 - %c-1 = arith.constant -1 : index - %c64 = arith.constant 64 : index - %c738197504_i32 = arith.constant 738197504 : i32 - %c4_i64 = arith.constant 4 : i64 - %c-1_i32 = arith.constant -1 : i32 - %c4_i32 = arith.constant 4 : i32 - %c1_i32 = arith.constant 1 : i32 - %c0_i32 = arith.constant 0 : i32 - %c1275069450_i32 = arith.constant 1275069450 : i32 - %c66_i32 = arith.constant 66 : i32 - %c1_i64 = arith.constant 1 : i64 - %c1140850688_i32 = arith.constant 1140850688 : i32 - %c8_i32 = arith.constant 8 : i32 - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = llvm.alloca %c8_i32 x i32 {alignment = 32 : i64} : (i32) -> !llvm.ptr - %1 = llvm.alloca %c1_i64 x i32 {alignment = 32 : i64} : (i64) -> !llvm.ptr - %2 = call @MPI_Comm_rank(%c1140850688_i32, %1) : (i32, !llvm.ptr) -> i32 - %3 = llvm.load %1 : !llvm.ptr - %alloc = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr = memref.extract_aligned_pointer_as_index %alloc : memref<66xf32> -> index - %4 = arith.index_cast %intptr : index to i64 - %5 = llvm.inttoptr %4 : i64 to !llvm.ptr - %alloc_4 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_5 = memref.extract_aligned_pointer_as_index %alloc_4 : memref<66xf32> -> index - %6 = arith.index_cast %intptr_5 : index to i64 - %7 = llvm.inttoptr %6 : i64 to !llvm.ptr - %alloc_6 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_7 = memref.extract_aligned_pointer_as_index %alloc_6 : memref<66xf32> -> index - %8 = arith.index_cast %intptr_7 : index to i64 - %9 = llvm.inttoptr %8 : i64 to !llvm.ptr - %alloc_8 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_9 = memref.extract_aligned_pointer_as_index %alloc_8 : memref<66xf32> -> index - %10 = arith.index_cast %intptr_9 : index to i64 - %11 = llvm.inttoptr %10 : i64 to !llvm.ptr - %alloc_10 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_11 = memref.extract_aligned_pointer_as_index %alloc_10 : memref<66xf32> -> index - %12 = arith.index_cast %intptr_11 : index to i64 - %13 = llvm.inttoptr %12 : i64 to !llvm.ptr - %alloc_12 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_13 = memref.extract_aligned_pointer_as_index %alloc_12 : memref<66xf32> -> index - %14 = arith.index_cast %intptr_13 : index to i64 - %15 = llvm.inttoptr %14 : i64 to !llvm.ptr - %alloc_14 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_15 = memref.extract_aligned_pointer_as_index %alloc_14 : memref<66xf32> -> index - %16 = arith.index_cast %intptr_15 : index to i64 - %17 = llvm.inttoptr %16 : i64 to !llvm.ptr - %alloc_16 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_17 = memref.extract_aligned_pointer_as_index %alloc_16 : memref<66xf32> -> index - %18 = arith.index_cast %intptr_17 : index to i64 - %19 = llvm.inttoptr %18 : i64 to !llvm.ptr - %20 = arith.remui %3, %c4_i32 : i32 - %21 = arith.divui %3, %c4_i32 : i32 - %22 = arith.remui %21, %c4_i32 : i32 - %23 = arith.addi %22, %c-1_i32 : i32 - %24 = arith.cmpi sge, %23, %c0_i32 : i32 - %25 = arith.muli %23, %c4_i32 : i32 - %26 = arith.addi %20, %25 : i32 - %27 = llvm.ptrtoint %0 : !llvm.ptr to i64 - %28 = llvm.inttoptr %27 : i64 to !llvm.ptr - %29 = arith.addi %27, %c16_i64 : i64 - %30 = llvm.inttoptr %29 : i64 to !llvm.ptr - %31 = arith.addi %22, %c1_i32 : i32 - %32 = arith.cmpi slt, %31, %c4_i32 : i32 - %33 = arith.muli %31, %c4_i32 : i32 - %34 = arith.addi %20, %33 : i32 - %35 = arith.addi %27, %c4_i64 : i64 - %36 = llvm.inttoptr %35 : i64 to !llvm.ptr - %37 = arith.addi %27, %c20_i64 : i64 - %38 = llvm.inttoptr %37 : i64 to !llvm.ptr - %39 = arith.addi %20, %c-1_i32 : i32 - %40 = arith.cmpi sge, %39, %c0_i32 : i32 - %41 = arith.muli %22, %c4_i32 : i32 - %42 = arith.addi %39, %41 : i32 - %43 = arith.addi %27, %c8_i64 : i64 - %44 = llvm.inttoptr %43 : i64 to !llvm.ptr - %45 = arith.addi %27, %c24_i64 : i64 - %46 = llvm.inttoptr %45 : i64 to !llvm.ptr - %47 = arith.addi %20, %c1_i32 : i32 - %48 = arith.cmpi slt, %47, %c4_i32 : i32 - %49 = arith.addi %47, %41 : i32 - %50 = arith.addi %27, %c12_i64 : i64 - %51 = llvm.inttoptr %50 : i64 to !llvm.ptr - %52 = arith.addi %27, %c28_i64 : i64 - %53 = llvm.inttoptr %52 : i64 to !llvm.ptr - %54 = llvm.inttoptr %c1_i64 : i64 to !llvm.ptr - %55 = math.fpowi %cst_2, %c-1_i64 : f32, i64 - %56 = math.fpowi %cst_1, %c-2_i64 : f32, i64 - %57 = arith.mulf %56, %cst_0 : f32 - %58:2 = scf.for %arg2 = %c0 to %c257 step %c1 iter_args(%arg3 = %arg0, %arg4 = %arg1) -> (memref<260x260xf32>, memref<260x260xf32>) { - %subview = memref.subview %arg4[2, 2] [64, 64] [1, 1] : memref<260x260xf32> to memref<64x64xf32, strided<[260, 1], offset: 522>> - %subview_18 = memref.subview %arg3[2, 2] [66, 66] [1, 1] : memref<260x260xf32> to memref<66x66xf32, strided<[260, 1], offset: 522>> - scf.if %24 { - %subview_19 = memref.subview %subview_18[-1, 0] [66, 1] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[260], offset: 262>> - memref.copy %subview_19, %alloc : memref<66xf32, strided<[260], offset: 262>> to memref<66xf32> - %60 = func.call @MPI_Isend(%5, %c66_i32, %c1275069450_i32, %26, %c0_i32, %c1140850688_i32, %28) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%7, %c66_i32, %c1275069450_i32, %26, %c0_i32, %c1140850688_i32, %30) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %28 : !llvm.ptr - llvm.store %c738197504_i32, %30 : !llvm.ptr - } - scf.if %32 { - %subview_19 = memref.subview %subview_18[-1, 63] [66, 1] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[260], offset: 325>> - memref.copy %subview_19, %alloc_6 : memref<66xf32, strided<[260], offset: 325>> to memref<66xf32> - %60 = func.call @MPI_Isend(%9, %c66_i32, %c1275069450_i32, %34, %c0_i32, %c1140850688_i32, %36) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%11, %c66_i32, %c1275069450_i32, %34, %c0_i32, %c1140850688_i32, %38) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %36 : !llvm.ptr - llvm.store %c738197504_i32, %38 : !llvm.ptr - } - scf.if %40 { - %subview_19 = memref.subview %subview_18[0, -1] [1, 66] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[1], offset: 521>> - memref.copy %subview_19, %alloc_10 : memref<66xf32, strided<[1], offset: 521>> to memref<66xf32> - %60 = func.call @MPI_Isend(%13, %c66_i32, %c1275069450_i32, %42, %c0_i32, %c1140850688_i32, %44) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%15, %c66_i32, %c1275069450_i32, %42, %c0_i32, %c1140850688_i32, %46) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %44 : !llvm.ptr - llvm.store %c738197504_i32, %46 : !llvm.ptr - } - scf.if %48 { - %subview_19 = memref.subview %subview_18[63, -1] [1, 66] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[1], offset: 16901>> - memref.copy %subview_19, %alloc_14 : memref<66xf32, strided<[1], offset: 16901>> to memref<66xf32> - %60 = func.call @MPI_Isend(%17, %c66_i32, %c1275069450_i32, %49, %c0_i32, %c1140850688_i32, %51) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%19, %c66_i32, %c1275069450_i32, %49, %c0_i32, %c1140850688_i32, %53) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %51 : !llvm.ptr - llvm.store %c738197504_i32, %53 : !llvm.ptr - } - %59 = func.call @MPI_Waitall(%c8_i32, %0, %54) : (i32, !llvm.ptr, !llvm.ptr) -> i32 - scf.if %24 { - %subview_19 = memref.subview %subview_18[-1, -1] [66, 1] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[260], offset: 261>> - memref.copy %subview_19, %alloc_4 : memref<66xf32, strided<[260], offset: 261>> to memref<66xf32> - } - scf.if %32 { - %subview_19 = memref.subview %subview_18[-1, 64] [66, 1] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[260], offset: 326>> - memref.copy %subview_19, %alloc_8 : memref<66xf32, strided<[260], offset: 326>> to memref<66xf32> - } - scf.if %40 { - %subview_19 = memref.subview %subview_18[-1, -1] [1, 66] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[1], offset: 261>> - memref.copy %subview_19, %alloc_12 : memref<66xf32, strided<[1], offset: 261>> to memref<66xf32> - } - scf.if %48 { - %subview_19 = memref.subview %subview_18[64, -1] [1, 66] [1, 1] : memref<66x66xf32, strided<[260, 1], offset: 522>> to memref<66xf32, strided<[1], offset: 17161>> - memref.copy %subview_19, %alloc_16 : memref<66xf32, strided<[1], offset: 17161>> to memref<66xf32> - } - scf.parallel (%arg5) = (%c0) to (%c64) step (%c1) { - %60 = arith.addi %arg5, %c-1 : index - %61 = arith.addi %arg5, %c1 : index - scf.for %arg6 = %c0 to %c64 step %c1 { - %62 = memref.load %subview_18[%arg5, %arg6] : memref<66x66xf32, strided<[260, 1], offset: 522>> - %63 = memref.load %subview_18[%60, %arg6] : memref<66x66xf32, strided<[260, 1], offset: 522>> - %64 = memref.load %subview_18[%61, %arg6] : memref<66x66xf32, strided<[260, 1], offset: 522>> - %65 = arith.addi %arg6, %c-1 : index - %66 = memref.load %subview_18[%arg5, %65] : memref<66x66xf32, strided<[260, 1], offset: 522>> - %67 = arith.addi %arg6, %c1 : index - %68 = memref.load %subview_18[%arg5, %67] : memref<66x66xf32, strided<[260, 1], offset: 522>> - %69 = arith.mulf %55, %62 : f32 - %70 = arith.mulf %56, %63 : f32 - %71 = arith.mulf %56, %64 : f32 - %72 = arith.mulf %57, %62 : f32 - %73 = arith.addf %70, %71 : f32 - %74 = arith.addf %73, %72 : f32 - %75 = arith.mulf %56, %66 : f32 - %76 = arith.mulf %56, %68 : f32 - %77 = arith.addf %75, %76 : f32 - %78 = arith.addf %77, %72 : f32 - %79 = arith.addf %74, %78 : f32 - %80 = arith.mulf %79, %cst : f32 - %81 = arith.addf %69, %cst_3 : f32 - %82 = arith.addf %81, %80 : f32 - %83 = arith.mulf %82, %cst_2 : f32 - memref.store %83, %subview[%arg5, %arg6] : memref<64x64xf32, strided<[260, 1], offset: 522>> - } - scf.yield - } - scf.yield %arg4, %arg3 : memref<260x260xf32>, memref<260x260xf32> - } - return %58#0 : memref<260x260xf32> - } - func.func private @MPI_Comm_rank(i32, !llvm.ptr) -> i32 - func.func private @MPI_Isend(!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - func.func private @MPI_Irecv(!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - func.func private @MPI_Waitall(i32, !llvm.ptr, !llvm.ptr) -> i32 -} - diff --git a/fast/temp2 b/fast/temp2 deleted file mode 100644 index 40783a347a..0000000000 --- a/fast/temp2 +++ /dev/null @@ -1,212 +0,0 @@ -#map = affine_map<()[s0] -> (s0 + 2)> -module { - func.func @apply_kernel(%arg0: memref<260x260xf32>, %arg1: memref<260x260xf32>) -> memref<260x260xf32> attributes {param_names = ["u_vec_0", "u_vec_1"]} { - %c28_i64 = arith.constant 28 : i64 - %c12_i64 = arith.constant 12 : i64 - %c24_i64 = arith.constant 24 : i64 - %c8_i64 = arith.constant 8 : i64 - %c20_i64 = arith.constant 20 : i64 - %c16_i64 = arith.constant 16 : i64 - %c257 = arith.constant 257 : index - %cst = arith.constant 1.000000e-01 : f32 - %cst_0 = arith.constant -2.000000e+00 : f32 - %c-2_i64 = arith.constant -2 : i64 - %cst_1 = arith.constant 0.00392156886 : f32 - %c-1_i64 = arith.constant -1 : i64 - %cst_2 = arith.constant 3.075740e-05 : f32 - %cst_3 = arith.constant 0.00999999977 : f32 - %c-1 = arith.constant -1 : index - %c64 = arith.constant 64 : index - %c738197504_i32 = arith.constant 738197504 : i32 - %c4_i64 = arith.constant 4 : i64 - %c-1_i32 = arith.constant -1 : i32 - %c4_i32 = arith.constant 4 : i32 - %c1_i32 = arith.constant 1 : i32 - %c0_i32 = arith.constant 0 : i32 - %c1275069450_i32 = arith.constant 1275069450 : i32 - %c66_i32 = arith.constant 66 : i32 - %c1_i64 = arith.constant 1 : i64 - %c1140850688_i32 = arith.constant 1140850688 : i32 - %c8_i32 = arith.constant 8 : i32 - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = llvm.alloca %c8_i32 x i32 {alignment = 32 : i64} : (i32) -> !llvm.ptr - %1 = llvm.alloca %c1_i64 x i32 {alignment = 32 : i64} : (i64) -> !llvm.ptr - %2 = call @MPI_Comm_rank(%c1140850688_i32, %1) : (i32, !llvm.ptr) -> i32 - %3 = llvm.load %1 : !llvm.ptr - %alloc = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr = memref.extract_aligned_pointer_as_index %alloc : memref<66xf32> -> index - %4 = arith.index_cast %intptr : index to i64 - %5 = llvm.inttoptr %4 : i64 to !llvm.ptr - %alloc_4 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_5 = memref.extract_aligned_pointer_as_index %alloc_4 : memref<66xf32> -> index - %6 = arith.index_cast %intptr_5 : index to i64 - %7 = llvm.inttoptr %6 : i64 to !llvm.ptr - %alloc_6 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_7 = memref.extract_aligned_pointer_as_index %alloc_6 : memref<66xf32> -> index - %8 = arith.index_cast %intptr_7 : index to i64 - %9 = llvm.inttoptr %8 : i64 to !llvm.ptr - %alloc_8 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_9 = memref.extract_aligned_pointer_as_index %alloc_8 : memref<66xf32> -> index - %10 = arith.index_cast %intptr_9 : index to i64 - %11 = llvm.inttoptr %10 : i64 to !llvm.ptr - %alloc_10 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_11 = memref.extract_aligned_pointer_as_index %alloc_10 : memref<66xf32> -> index - %12 = arith.index_cast %intptr_11 : index to i64 - %13 = llvm.inttoptr %12 : i64 to !llvm.ptr - %alloc_12 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_13 = memref.extract_aligned_pointer_as_index %alloc_12 : memref<66xf32> -> index - %14 = arith.index_cast %intptr_13 : index to i64 - %15 = llvm.inttoptr %14 : i64 to !llvm.ptr - %alloc_14 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_15 = memref.extract_aligned_pointer_as_index %alloc_14 : memref<66xf32> -> index - %16 = arith.index_cast %intptr_15 : index to i64 - %17 = llvm.inttoptr %16 : i64 to !llvm.ptr - %alloc_16 = memref.alloc() {alignment = 64 : i64} : memref<66xf32> - %intptr_17 = memref.extract_aligned_pointer_as_index %alloc_16 : memref<66xf32> -> index - %18 = arith.index_cast %intptr_17 : index to i64 - %19 = llvm.inttoptr %18 : i64 to !llvm.ptr - %20 = arith.remui %3, %c4_i32 : i32 - %21 = arith.divui %3, %c4_i32 : i32 - %22 = arith.remui %21, %c4_i32 : i32 - %23 = arith.addi %22, %c-1_i32 : i32 - %24 = arith.cmpi sge, %23, %c0_i32 : i32 - %25 = arith.muli %23, %c4_i32 : i32 - %26 = arith.addi %20, %25 : i32 - %27 = llvm.ptrtoint %0 : !llvm.ptr to i64 - %28 = llvm.inttoptr %27 : i64 to !llvm.ptr - %29 = arith.addi %27, %c16_i64 : i64 - %30 = llvm.inttoptr %29 : i64 to !llvm.ptr - %31 = arith.addi %22, %c1_i32 : i32 - %32 = arith.cmpi slt, %31, %c4_i32 : i32 - %33 = arith.muli %31, %c4_i32 : i32 - %34 = arith.addi %20, %33 : i32 - %35 = arith.addi %27, %c4_i64 : i64 - %36 = llvm.inttoptr %35 : i64 to !llvm.ptr - %37 = arith.addi %27, %c20_i64 : i64 - %38 = llvm.inttoptr %37 : i64 to !llvm.ptr - %39 = arith.addi %20, %c-1_i32 : i32 - %40 = arith.cmpi sge, %39, %c0_i32 : i32 - %41 = arith.muli %22, %c4_i32 : i32 - %42 = arith.addi %39, %41 : i32 - %43 = arith.addi %27, %c8_i64 : i64 - %44 = llvm.inttoptr %43 : i64 to !llvm.ptr - %45 = arith.addi %27, %c24_i64 : i64 - %46 = llvm.inttoptr %45 : i64 to !llvm.ptr - %47 = arith.addi %20, %c1_i32 : i32 - %48 = arith.cmpi slt, %47, %c4_i32 : i32 - %49 = arith.addi %47, %41 : i32 - %50 = arith.addi %27, %c12_i64 : i64 - %51 = llvm.inttoptr %50 : i64 to !llvm.ptr - %52 = arith.addi %27, %c28_i64 : i64 - %53 = llvm.inttoptr %52 : i64 to !llvm.ptr - %54 = llvm.inttoptr %c1_i64 : i64 to !llvm.ptr - %55 = math.fpowi %cst_2, %c-1_i64 : f32, i64 - %56 = math.fpowi %cst_1, %c-2_i64 : f32, i64 - %57 = arith.mulf %56, %cst_0 : f32 - %58:2 = scf.for %arg2 = %c0 to %c257 step %c1 iter_args(%arg3 = %arg0, %arg4 = %arg1) -> (memref<260x260xf32>, memref<260x260xf32>) { - scf.if %24 { - %subview = memref.subview %arg3[1, 2] [66, 1] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[260], offset: 262>> - memref.copy %subview, %alloc : memref<66xf32, strided<[260], offset: 262>> to memref<66xf32> - %60 = func.call @MPI_Isend(%5, %c66_i32, %c1275069450_i32, %26, %c0_i32, %c1140850688_i32, %28) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%7, %c66_i32, %c1275069450_i32, %26, %c0_i32, %c1140850688_i32, %30) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %28 : !llvm.ptr - llvm.store %c738197504_i32, %30 : !llvm.ptr - } - scf.if %32 { - %subview = memref.subview %arg3[1, 65] [66, 1] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[260], offset: 325>> - memref.copy %subview, %alloc_6 : memref<66xf32, strided<[260], offset: 325>> to memref<66xf32> - %60 = func.call @MPI_Isend(%9, %c66_i32, %c1275069450_i32, %34, %c0_i32, %c1140850688_i32, %36) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%11, %c66_i32, %c1275069450_i32, %34, %c0_i32, %c1140850688_i32, %38) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %36 : !llvm.ptr - llvm.store %c738197504_i32, %38 : !llvm.ptr - } - scf.if %40 { - %subview = memref.subview %arg3[2, 1] [1, 66] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[1], offset: 521>> - memref.copy %subview, %alloc_10 : memref<66xf32, strided<[1], offset: 521>> to memref<66xf32> - %60 = func.call @MPI_Isend(%13, %c66_i32, %c1275069450_i32, %42, %c0_i32, %c1140850688_i32, %44) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%15, %c66_i32, %c1275069450_i32, %42, %c0_i32, %c1140850688_i32, %46) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %44 : !llvm.ptr - llvm.store %c738197504_i32, %46 : !llvm.ptr - } - scf.if %48 { - %subview = memref.subview %arg3[65, 1] [1, 66] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[1], offset: 16901>> - memref.copy %subview, %alloc_14 : memref<66xf32, strided<[1], offset: 16901>> to memref<66xf32> - %60 = func.call @MPI_Isend(%17, %c66_i32, %c1275069450_i32, %49, %c0_i32, %c1140850688_i32, %51) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - %61 = func.call @MPI_Irecv(%19, %c66_i32, %c1275069450_i32, %49, %c0_i32, %c1140850688_i32, %53) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - } else { - llvm.store %c738197504_i32, %51 : !llvm.ptr - llvm.store %c738197504_i32, %53 : !llvm.ptr - } - %59 = func.call @MPI_Waitall(%c8_i32, %0, %54) : (i32, !llvm.ptr, !llvm.ptr) -> i32 - scf.if %24 { - %subview = memref.subview %arg3[1, 1] [66, 1] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[260], offset: 261>> - memref.copy %subview, %alloc_4 : memref<66xf32, strided<[260], offset: 261>> to memref<66xf32> - } - scf.if %32 { - %subview = memref.subview %arg3[1, 66] [66, 1] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[260], offset: 326>> - memref.copy %subview, %alloc_8 : memref<66xf32, strided<[260], offset: 326>> to memref<66xf32> - } - scf.if %40 { - %subview = memref.subview %arg3[1, 1] [1, 66] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[1], offset: 261>> - memref.copy %subview, %alloc_12 : memref<66xf32, strided<[1], offset: 261>> to memref<66xf32> - } - scf.if %48 { - %subview = memref.subview %arg3[66, 1] [1, 66] [1, 1] : memref<260x260xf32> to memref<66xf32, strided<[1], offset: 17161>> - memref.copy %subview, %alloc_16 : memref<66xf32, strided<[1], offset: 17161>> to memref<66xf32> - } - scf.parallel (%arg5) = (%c0) to (%c64) step (%c1) { - %60 = arith.addi %arg5, %c-1 : index - %61 = arith.addi %arg5, %c1 : index - scf.for %arg6 = %c0 to %c64 step %c1 { - %62 = affine.apply #map()[%arg5] - %63 = affine.apply #map()[%arg6] - %64 = memref.load %arg3[%62, %63] : memref<260x260xf32> - %65 = affine.apply #map()[%60] - %66 = affine.apply #map()[%arg6] - %67 = memref.load %arg3[%65, %66] : memref<260x260xf32> - %68 = affine.apply #map()[%61] - %69 = affine.apply #map()[%arg6] - %70 = memref.load %arg3[%68, %69] : memref<260x260xf32> - %71 = arith.addi %arg6, %c-1 : index - %72 = affine.apply #map()[%arg5] - %73 = affine.apply #map()[%71] - %74 = memref.load %arg3[%72, %73] : memref<260x260xf32> - %75 = arith.addi %arg6, %c1 : index - %76 = affine.apply #map()[%arg5] - %77 = affine.apply #map()[%75] - %78 = memref.load %arg3[%76, %77] : memref<260x260xf32> - %79 = arith.mulf %55, %64 : f32 - %80 = arith.mulf %56, %67 : f32 - %81 = arith.mulf %56, %70 : f32 - %82 = arith.mulf %57, %64 : f32 - %83 = arith.addf %80, %81 : f32 - %84 = arith.addf %83, %82 : f32 - %85 = arith.mulf %56, %74 : f32 - %86 = arith.mulf %56, %78 : f32 - %87 = arith.addf %85, %86 : f32 - %88 = arith.addf %87, %82 : f32 - %89 = arith.addf %84, %88 : f32 - %90 = arith.mulf %89, %cst : f32 - %91 = arith.addf %79, %cst_3 : f32 - %92 = arith.addf %91, %90 : f32 - %93 = arith.mulf %92, %cst_2 : f32 - %94 = affine.apply #map()[%arg5] - %95 = affine.apply #map()[%arg6] - memref.store %93, %arg4[%94, %95] : memref<260x260xf32> - } - scf.yield - } - scf.yield %arg4, %arg3 : memref<260x260xf32>, memref<260x260xf32> - } - return %58#0 : memref<260x260xf32> - } - func.func private @MPI_Comm_rank(i32, !llvm.ptr) -> i32 - func.func private @MPI_Isend(!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - func.func private @MPI_Irecv(!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 - func.func private @MPI_Waitall(i32, !llvm.ptr, !llvm.ptr) -> i32 -} - diff --git a/fast/wave3d_b.py b/fast/wave3d_b.py index 9e0aa32190..a2a412e8ef 100644 --- a/fast/wave3d_b.py +++ b/fast/wave3d_b.py @@ -44,8 +44,7 @@ spacing = as_tuple(10.0 for _ in range(len(shape))) # Grid spacing in m. The domain size is now 1km by 1km origin = as_tuple(0.0 for _ in range(len(shape))) # What is the location of the top left corner. domain_size = tuple((d-1) * s for d, s in zip(shape, spacing)) -extent = np.load("so%s_grid_extent%s.npy" % (so, shape_str), allow_pickle=True) - +extent = np.load("so%s_grid_extent%s.npz" % (so, shape_str), allow_pickle=True)['arr_0'] grid = Grid(shape=shape, extent=as_tuple(extent)) # With the velocity and model size defined, we can create the seismic model that