Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/shift : copy the border cells when shifting M #221

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8183e01
Merge pull request #1 from mumax/master
jsampaio Sep 21, 2016
5109559
Merge pull request #2 from mumax/master
jsampaio Jan 11, 2018
32a1531
Update relax.go
jsampaio Jan 11, 2018
7f9d162
Update relax.go
jsampaio Jan 11, 2018
e1c9d8d
Update relax.go
jsampaio Jan 11, 2018
de8c4c9
Update relax.go
jsampaio Jan 11, 2018
4bd8b24
keep old behaviour by default.
jsampaio Jan 17, 2018
9f27a3b
Update relax.go
jsampaio Jan 18, 2018
21a0d49
adding ext_centerWallInRegion(R,c)
jsampaio Feb 6, 2018
162d9cc
Merge pull request #3 from mumax/master
jsampaio Feb 6, 2018
cbabbdb
server page: added collapsible job lists
jsampaio Apr 17, 2018
4f37133
Uniformed style of collapsible elements
jsampaio Apr 18, 2018
97deec2
Merge pull request #5 from mumax/master
jsampaio Apr 23, 2018
e486f60
collapsible job lists that are collapsed on page load
jsampaio Sep 21, 2018
b9eede6
Merge pull request #4 from jsampaio/jsampaio-server-collapsibleJobs
jsampaio Sep 21, 2018
46ff500
Merge pull request #6 from mumax/master
jsampaio Sep 21, 2018
40e9645
Merge pull request #7 from mumax/master
jsampaio Oct 9, 2018
8e6c458
mumax-server preserves last opened job list opened
jsampaio Oct 17, 2018
4342b28
Merge pull request #8 from jsampaio/jsampaio-better-collapsible-jobs
jsampaio Oct 17, 2018
d705c65
Merge pull request #9 from mumax/master
jsampaio Jan 24, 2019
d21a93d
add loupe to see details in the image on mumax result page
jsampaio Jan 26, 2019
2b91cdf
Merge pull request #10 from jsampaio/jsampaio-loupe
jsampaio Jan 26, 2019
6c31398
bug in the update image
jsampaio Jan 26, 2019
73954f2
Merge pull request #11 from jsampaio/jsampaio-loupe
jsampaio Jan 26, 2019
217f2e0
resetting to current mumax/3 version
jsampaio Mar 8, 2019
59450b7
Merge pull request #12 from mumax/master
jsampaio Mar 8, 2019
994df82
copy the border cells when shifting M
jsampaio Mar 10, 2019
c1cc6d8
copy the border cells when shifting M
jsampaio Mar 10, 2019
c010af5
copy the border cells when shifting M
jsampaio Mar 10, 2019
0f51bf1
ext_centerwall back to the version on mumax/3
jsampaio Mar 10, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cuda/shift.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ func ShiftX(dst, src *data.Slice, shiftX int, clampL, clampR float32) {
k_shiftx_async(dst.DevPtr(0), src.DevPtr(0), N[X], N[Y], N[Z], shiftX, clampL, clampR, cfg)
}

// shift dst by shx cells (positive or negative) along X-axis.
// new edge value is the current value at the border.
func ShiftFudgeX(dst, src *data.Slice, shiftX int) {
util.Argument(dst.NComp() == 1 && src.NComp() == 1)
util.Assert(dst.Len() == src.Len())
N := dst.Size()
cfg := make3DConf(N)
k_shiftfudgex_async(dst.DevPtr(0), src.DevPtr(0), N[X], N[Y], N[Z], shiftX, cfg)
}

func ShiftY(dst, src *data.Slice, shiftY int, clampL, clampR float32) {
util.Argument(dst.NComp() == 1 && src.NComp() == 1)
util.Assert(dst.Len() == src.Len())
Expand All @@ -23,6 +33,14 @@ func ShiftY(dst, src *data.Slice, shiftY int, clampL, clampR float32) {
k_shifty_async(dst.DevPtr(0), src.DevPtr(0), N[X], N[Y], N[Z], shiftY, clampL, clampR, cfg)
}

func ShiftFudgeY(dst, src *data.Slice, shiftY int) {
util.Argument(dst.NComp() == 1 && src.NComp() == 1)
util.Assert(dst.Len() == src.Len())
N := dst.Size()
cfg := make3DConf(N)
k_shiftfudgey_async(dst.DevPtr(0), src.DevPtr(0), N[X], N[Y], N[Z], shiftY, cfg)
}

func ShiftZ(dst, src *data.Slice, shiftZ int, clampL, clampR float32) {
util.Argument(dst.NComp() == 1 && src.NComp() == 1)
util.Assert(dst.Len() == src.Len())
Expand Down
25 changes: 25 additions & 0 deletions cuda/shiftfudgex.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include "stencil.h"

// shift dst by shx cells (positive or negative) along X-axis.
// new edge value is the current edge value.
extern "C" __global__ void
shiftfudgex(float* __restrict__ dst, float* __restrict__ src,
int Nx, int Ny, int Nz, int shx) {

int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
int iz = blockIdx.z * blockDim.z + threadIdx.z;

if(ix < Nx && iy < Ny && iz < Nz) {
int ix2 = ix-shx;
float newval;
if (ix2 < 0) {
newval = src[idx(0, iy, iz)];
} else if (ix2 >= Nx) {
newval = src[idx(Nx-1, iy, iz)];
} else {
newval = src[idx(ix2, iy, iz)];
}
dst[idx(ix, iy, iz)] = newval;
}
}
80 changes: 80 additions & 0 deletions cuda/shiftfudgex_30.ptx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//

.version 6.3
.target sm_30
.address_size 64

// .globl shiftfudgex

.visible .entry shiftfudgex(
.param .u64 shiftfudgex_param_0,
.param .u64 shiftfudgex_param_1,
.param .u32 shiftfudgex_param_2,
.param .u32 shiftfudgex_param_3,
.param .u32 shiftfudgex_param_4,
.param .u32 shiftfudgex_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<2>;
.reg .b32 %r<25>;
.reg .b64 %rd<9>;


ld.param.u64 %rd1, [shiftfudgex_param_0];
ld.param.u64 %rd2, [shiftfudgex_param_1];
ld.param.u32 %r4, [shiftfudgex_param_2];
ld.param.u32 %r5, [shiftfudgex_param_3];
ld.param.u32 %r7, [shiftfudgex_param_4];
ld.param.u32 %r6, [shiftfudgex_param_5];
mov.u32 %r8, %ctaid.x;
mov.u32 %r9, %ntid.x;
mov.u32 %r10, %tid.x;
mad.lo.s32 %r1, %r9, %r8, %r10;
mov.u32 %r11, %ntid.y;
mov.u32 %r12, %ctaid.y;
mov.u32 %r13, %tid.y;
mad.lo.s32 %r2, %r11, %r12, %r13;
mov.u32 %r14, %ntid.z;
mov.u32 %r15, %ctaid.z;
mov.u32 %r16, %tid.z;
mad.lo.s32 %r3, %r14, %r15, %r16;
setp.lt.s32 %p1, %r1, %r4;
setp.lt.s32 %p2, %r2, %r5;
and.pred %p3, %p1, %p2;
setp.lt.s32 %p4, %r3, %r7;
and.pred %p5, %p3, %p4;
@!%p5 bra BB0_2;
bra.uni BB0_1;

BB0_1:
cvta.to.global.u64 %rd3, %rd2;
sub.s32 %r17, %r1, %r6;
setp.lt.s32 %p6, %r17, 0;
mad.lo.s32 %r18, %r3, %r5, %r2;
mul.lo.s32 %r19, %r18, %r4;
setp.lt.s32 %p7, %r17, %r4;
add.s32 %r20, %r4, -1;
selp.b32 %r21, %r17, %r20, %p7;
selp.b32 %r22, 0, %r21, %p6;
add.s32 %r23, %r19, %r22;
mul.wide.s32 %rd4, %r23, 4;
add.s64 %rd5, %rd3, %rd4;
ld.global.f32 %f1, [%rd5];
add.s32 %r24, %r19, %r1;
cvta.to.global.u64 %rd6, %rd1;
mul.wide.s32 %rd7, %r24, 4;
add.s64 %rd8, %rd6, %rd7;
st.global.f32 [%rd8], %f1;

BB0_2:
ret;
}


80 changes: 80 additions & 0 deletions cuda/shiftfudgex_35.ptx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//

.version 6.3
.target sm_35
.address_size 64

// .globl shiftfudgex

.visible .entry shiftfudgex(
.param .u64 shiftfudgex_param_0,
.param .u64 shiftfudgex_param_1,
.param .u32 shiftfudgex_param_2,
.param .u32 shiftfudgex_param_3,
.param .u32 shiftfudgex_param_4,
.param .u32 shiftfudgex_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<2>;
.reg .b32 %r<25>;
.reg .b64 %rd<9>;


ld.param.u64 %rd1, [shiftfudgex_param_0];
ld.param.u64 %rd2, [shiftfudgex_param_1];
ld.param.u32 %r4, [shiftfudgex_param_2];
ld.param.u32 %r5, [shiftfudgex_param_3];
ld.param.u32 %r7, [shiftfudgex_param_4];
ld.param.u32 %r6, [shiftfudgex_param_5];
mov.u32 %r8, %ctaid.x;
mov.u32 %r9, %ntid.x;
mov.u32 %r10, %tid.x;
mad.lo.s32 %r1, %r9, %r8, %r10;
mov.u32 %r11, %ntid.y;
mov.u32 %r12, %ctaid.y;
mov.u32 %r13, %tid.y;
mad.lo.s32 %r2, %r11, %r12, %r13;
mov.u32 %r14, %ntid.z;
mov.u32 %r15, %ctaid.z;
mov.u32 %r16, %tid.z;
mad.lo.s32 %r3, %r14, %r15, %r16;
setp.lt.s32 %p1, %r1, %r4;
setp.lt.s32 %p2, %r2, %r5;
and.pred %p3, %p1, %p2;
setp.lt.s32 %p4, %r3, %r7;
and.pred %p5, %p3, %p4;
@!%p5 bra BB0_2;
bra.uni BB0_1;

BB0_1:
cvta.to.global.u64 %rd3, %rd2;
sub.s32 %r17, %r1, %r6;
setp.lt.s32 %p6, %r17, 0;
mad.lo.s32 %r18, %r3, %r5, %r2;
mul.lo.s32 %r19, %r18, %r4;
setp.lt.s32 %p7, %r17, %r4;
add.s32 %r20, %r4, -1;
selp.b32 %r21, %r17, %r20, %p7;
selp.b32 %r22, 0, %r21, %p6;
add.s32 %r23, %r19, %r22;
mul.wide.s32 %rd4, %r23, 4;
add.s64 %rd5, %rd3, %rd4;
ld.global.nc.f32 %f1, [%rd5];
add.s32 %r24, %r19, %r1;
cvta.to.global.u64 %rd6, %rd1;
mul.wide.s32 %rd7, %r24, 4;
add.s64 %rd8, %rd6, %rd7;
st.global.f32 [%rd8], %f1;

BB0_2:
ret;
}


80 changes: 80 additions & 0 deletions cuda/shiftfudgex_37.ptx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//

.version 6.3
.target sm_37
.address_size 64

// .globl shiftfudgex

.visible .entry shiftfudgex(
.param .u64 shiftfudgex_param_0,
.param .u64 shiftfudgex_param_1,
.param .u32 shiftfudgex_param_2,
.param .u32 shiftfudgex_param_3,
.param .u32 shiftfudgex_param_4,
.param .u32 shiftfudgex_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<2>;
.reg .b32 %r<25>;
.reg .b64 %rd<9>;


ld.param.u64 %rd1, [shiftfudgex_param_0];
ld.param.u64 %rd2, [shiftfudgex_param_1];
ld.param.u32 %r4, [shiftfudgex_param_2];
ld.param.u32 %r5, [shiftfudgex_param_3];
ld.param.u32 %r7, [shiftfudgex_param_4];
ld.param.u32 %r6, [shiftfudgex_param_5];
mov.u32 %r8, %ctaid.x;
mov.u32 %r9, %ntid.x;
mov.u32 %r10, %tid.x;
mad.lo.s32 %r1, %r9, %r8, %r10;
mov.u32 %r11, %ntid.y;
mov.u32 %r12, %ctaid.y;
mov.u32 %r13, %tid.y;
mad.lo.s32 %r2, %r11, %r12, %r13;
mov.u32 %r14, %ntid.z;
mov.u32 %r15, %ctaid.z;
mov.u32 %r16, %tid.z;
mad.lo.s32 %r3, %r14, %r15, %r16;
setp.lt.s32 %p1, %r1, %r4;
setp.lt.s32 %p2, %r2, %r5;
and.pred %p3, %p1, %p2;
setp.lt.s32 %p4, %r3, %r7;
and.pred %p5, %p3, %p4;
@!%p5 bra BB0_2;
bra.uni BB0_1;

BB0_1:
cvta.to.global.u64 %rd3, %rd2;
sub.s32 %r17, %r1, %r6;
setp.lt.s32 %p6, %r17, 0;
mad.lo.s32 %r18, %r3, %r5, %r2;
mul.lo.s32 %r19, %r18, %r4;
setp.lt.s32 %p7, %r17, %r4;
add.s32 %r20, %r4, -1;
selp.b32 %r21, %r17, %r20, %p7;
selp.b32 %r22, 0, %r21, %p6;
add.s32 %r23, %r19, %r22;
mul.wide.s32 %rd4, %r23, 4;
add.s64 %rd5, %rd3, %rd4;
ld.global.nc.f32 %f1, [%rd5];
add.s32 %r24, %r19, %r1;
cvta.to.global.u64 %rd6, %rd1;
mul.wide.s32 %rd7, %r24, 4;
add.s64 %rd8, %rd6, %rd7;
st.global.f32 [%rd8], %f1;

BB0_2:
ret;
}


80 changes: 80 additions & 0 deletions cuda/shiftfudgex_50.ptx
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//

.version 6.3
.target sm_50
.address_size 64

// .globl shiftfudgex

.visible .entry shiftfudgex(
.param .u64 shiftfudgex_param_0,
.param .u64 shiftfudgex_param_1,
.param .u32 shiftfudgex_param_2,
.param .u32 shiftfudgex_param_3,
.param .u32 shiftfudgex_param_4,
.param .u32 shiftfudgex_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<2>;
.reg .b32 %r<25>;
.reg .b64 %rd<9>;


ld.param.u64 %rd1, [shiftfudgex_param_0];
ld.param.u64 %rd2, [shiftfudgex_param_1];
ld.param.u32 %r4, [shiftfudgex_param_2];
ld.param.u32 %r5, [shiftfudgex_param_3];
ld.param.u32 %r7, [shiftfudgex_param_4];
ld.param.u32 %r6, [shiftfudgex_param_5];
mov.u32 %r8, %ctaid.x;
mov.u32 %r9, %ntid.x;
mov.u32 %r10, %tid.x;
mad.lo.s32 %r1, %r9, %r8, %r10;
mov.u32 %r11, %ntid.y;
mov.u32 %r12, %ctaid.y;
mov.u32 %r13, %tid.y;
mad.lo.s32 %r2, %r11, %r12, %r13;
mov.u32 %r14, %ntid.z;
mov.u32 %r15, %ctaid.z;
mov.u32 %r16, %tid.z;
mad.lo.s32 %r3, %r14, %r15, %r16;
setp.lt.s32 %p1, %r1, %r4;
setp.lt.s32 %p2, %r2, %r5;
and.pred %p3, %p1, %p2;
setp.lt.s32 %p4, %r3, %r7;
and.pred %p5, %p3, %p4;
@!%p5 bra BB0_2;
bra.uni BB0_1;

BB0_1:
cvta.to.global.u64 %rd3, %rd2;
sub.s32 %r17, %r1, %r6;
setp.lt.s32 %p6, %r17, 0;
mad.lo.s32 %r18, %r3, %r5, %r2;
mul.lo.s32 %r19, %r18, %r4;
setp.lt.s32 %p7, %r17, %r4;
add.s32 %r20, %r4, -1;
selp.b32 %r21, %r17, %r20, %p7;
selp.b32 %r22, 0, %r21, %p6;
add.s32 %r23, %r19, %r22;
mul.wide.s32 %rd4, %r23, 4;
add.s64 %rd5, %rd3, %rd4;
ld.global.nc.f32 %f1, [%rd5];
add.s32 %r24, %r19, %r1;
cvta.to.global.u64 %rd6, %rd1;
mul.wide.s32 %rd7, %r24, 4;
add.s64 %rd8, %rd6, %rd7;
st.global.f32 [%rd8], %f1;

BB0_2:
ret;
}


Loading