Skip to content

Commit

Permalink
ext: fix generated gmp x86_64 linux assemby sources
Browse files Browse the repository at this point in the history
  • Loading branch information
ripperi committed Oct 10, 2024
1 parent 526287c commit 43c95d6
Show file tree
Hide file tree
Showing 49 changed files with 5,334 additions and 5,566 deletions.
2 changes: 1 addition & 1 deletion ext/gmp/build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ const x86_64_linux_asm_sources = [_][]const u8{
"gen/x86_64-linux/mpn/mul_2.s",
"gen/x86_64-linux/mpn/mul_basecase.s",
"gen/x86_64-linux/mpn/mullo_basecase.s",
"gen/x86_64-linux/mpn/mulmid_basecase.s",
"gen/x86_64-linux/mpn/nand_n.s",
"gen/x86_64-linux/mpn/nior_n.s",
"gen/x86_64-linux/mpn/popcount.s",
Expand All @@ -703,7 +704,6 @@ const x86_64_linux_asm_sources = [_][]const u8{
"gen/x86_64-linux/mpn/sub_err3_n.s",
"gen/x86_64-linux/mpn/sub_n.s",
"gen/x86_64-linux/mpn/sublsh1_n.s",
"gen/x86_64-linux/mpn/sublsh2_n.s",
"gen/x86_64-linux/mpn/submul_1.s",
"gen/x86_64-linux/mpn/xnor_n.s",
"gen/x86_64-linux/mpn/xor_n.s",
Expand Down
14 changes: 7 additions & 7 deletions ext/gmp/gen/x86_64-linux/mpn/add_err1_n.s
Original file line number Diff line number Diff line change
Expand Up @@ -189,20 +189,20 @@ __gmpn_add_err1_n:

.align 32, 0x90
.Lloop:
mov (%rsi,%r9,8), %r14
shr $1, %al
mov -8(%r8), %r10
mov $0, %r13d
mov (%rsi,%r9,8), %r14
mov 8(%rsi,%r9,8), %r15
adc (%rdx,%r9,8), %r14
cmovnc %r13, %r10
mov %r14, (%rdi,%r9,8)
mov 8(%rsi,%r9,8), %r15
mov 16(%rsi,%r9,8), %r14
adc 8(%rdx,%r9,8), %r15
mov -16(%r8), %r11
mov %r14, (%rdi,%r9,8)
mov 16(%rsi,%r9,8), %r14
mov %r15, 8(%rdi,%r9,8)
cmovnc %r13, %r11
mov -24(%r8), %r12
mov %r15, 8(%rdi,%r9,8)
adc 16(%rdx,%r9,8), %r14
cmovnc %r13, %r12
mov 24(%rsi,%r9,8), %r15
Expand All @@ -215,12 +215,12 @@ __gmpn_add_err1_n:
adc $0, %rbp
add %r12, %rbx
adc $0, %rbp
lea -32(%r8), %r8
mov %r14, 16(%rdi,%r9,8)
add %r13, %rbx
lea -32(%r8), %r8
adc $0, %rbp
mov %r15, 24(%rdi,%r9,8)
add $4, %r9
mov %r15, -8(%rdi,%r9,8)
jnz .Lloop

.Lend:
Expand Down
207 changes: 64 additions & 143 deletions ext/gmp/gen/x86_64-linux/mpn/add_n.s
Original file line number Diff line number Diff line change
Expand Up @@ -94,180 +94,101 @@ __gmpn_add_nc:




mov %ecx, %eax
shr $3, %rcx
and $7, %eax

lea .Ltab(%rip), %r9
neg %r8
shr $2, %rcx
and $3, %eax
bt $0, %r8
jrcxz .Llt4

movslq (%r9,%rax,4), %rax
lea (%r9,%rax), %rax
jmp *%rax
mov (%rsi), %r8
mov 8(%rsi), %r9
dec %rcx
jmp .Lmid

.size __gmpn_add_nc,.-__gmpn_add_nc

.align 16, 0x90
.globl __gmpn_add_n
.type __gmpn_add_n,@function

__gmpn_add_n:



mov %ecx, %eax
shr $3, %rcx
and $7, %eax

lea .Ltab(%rip), %r9
shr $2, %rcx
and $3, %eax
jrcxz .Llt4

movslq (%r9,%rax,4), %rax
lea (%r9,%rax), %rax
jmp *%rax


.L0: mov (%rsi), %r8
mov (%rsi), %r8
mov 8(%rsi), %r9
dec %rcx
jmp .Lmid

.Llt4: dec %eax
mov (%rsi), %r8
jnz .L2
adc (%rdx), %r8
jmp .Le0
mov %r8, (%rdi)
adc %eax, %eax

ret

.L4: mov (%rsi), %r8
.L2: dec %eax
mov 8(%rsi), %r9
jnz .L3
adc (%rdx), %r8
lea -32(%rsi), %rsi
lea -32(%rdx), %rdx
lea -32(%rdi), %rdi
inc %rcx
jmp .Le4

.L5: mov (%rsi), %r11
mov 8(%rsi), %r8
mov 16(%rsi), %r9
adc (%rdx), %r11
lea -24(%rsi), %rsi
lea -24(%rdx), %rdx
lea -24(%rdi), %rdi
inc %rcx
jmp .Le5

.L6: mov (%rsi), %r10
adc (%rdx), %r10
mov 8(%rsi), %r11
lea -16(%rsi), %rsi
lea -16(%rdx), %rdx
lea -16(%rdi), %rdi
inc %rcx
jmp .Le6

.L7: mov (%rsi), %r9
mov 8(%rsi), %r10
adc (%rdx), %r9
adc 8(%rdx), %r10
lea -8(%rsi), %rsi
lea -8(%rdx), %rdx
lea -8(%rdi), %rdi
inc %rcx
jmp .Le7
adc 8(%rdx), %r9
mov %r8, (%rdi)
mov %r9, 8(%rdi)
adc %eax, %eax

ret

.align 16, 0x90
.Ltop:
.Le3: mov %r9, 40(%rdi)
.Le2: mov %r10, 48(%rdi)
.Le1: mov (%rsi), %r8
mov 8(%rsi), %r9
.L3: mov 16(%rsi), %r10
adc (%rdx), %r8
mov %r11, 56(%rdi)
lea 64(%rdi), %rdi
.Le0: mov 16(%rsi), %r10
adc 8(%rdx), %r9
adc 16(%rdx), %r10
mov %r8, (%rdi)
.Le7: mov 24(%rsi), %r11
mov %r9, 8(%rdi)
.Le6: mov 32(%rsi), %r8
mov 40(%rsi), %r9
adc 24(%rdx), %r11
mov %r10, 16(%rdi)
.Le5: adc 32(%rdx), %r8
mov %r11, 24(%rdi)
.Le4: mov 48(%rsi), %r10
mov 56(%rsi), %r11
mov %r8, 32(%rdi)
lea 64(%rsi), %rsi
adc 40(%rdx), %r9
adc 48(%rdx), %r10
adc 56(%rdx), %r11
lea 64(%rdx), %rdx
dec %rcx
jnz .Ltop

.Lend: mov %r9, 40(%rdi)
mov %r10, 48(%rdi)
mov %r11, 56(%rdi)
mov %ecx, %eax
adc %ecx, %eax
setc %al

ret

.align 16, 0x90
.L3: mov (%rsi), %r9
mov 8(%rsi), %r10
mov 16(%rsi), %r11
adc (%rdx), %r9
adc 8(%rdx), %r10
adc 16(%rdx), %r11
jrcxz .Lx3
lea 24(%rsi), %rsi
lea 24(%rdx), %rdx
lea -40(%rdi), %rdi
jmp .Le3
.Lx3: mov %r9, (%rdi)
mov %r10, 8(%rdi)
mov %r11, 16(%rdi)
mov %ecx, %eax
adc %ecx, %eax

ret
.Ltop: adc (%rdx), %r8
adc 8(%rdx), %r9
adc 16(%rdx), %r10
adc 24(%rdx), %r11
mov %r8, (%rdi)
lea 32(%rsi), %rsi
mov %r9, 8(%rdi)
mov %r10, 16(%rdi)
dec %rcx
mov %r11, 24(%rdi)
lea 32(%rdx), %rdx
mov (%rsi), %r8
mov 8(%rsi), %r9
lea 32(%rdi), %rdi
.Lmid: mov 16(%rsi), %r10
mov 24(%rsi), %r11
jnz .Ltop

.align 16, 0x90
.L1: mov (%rsi), %r11
adc (%rdx), %r11
jrcxz .Lx1
lea 8(%rsi), %rsi
lea 8(%rdx), %rdx
lea -56(%rdi), %rdi
jmp .Le1
.Lx1: mov %r11, (%rdi)
mov %ecx, %eax
adc %ecx, %eax

ret
.Lend: lea 32(%rsi), %rsi
adc (%rdx), %r8
adc 8(%rdx), %r9
adc 16(%rdx), %r10
adc 24(%rdx), %r11
lea 32(%rdx), %rdx
mov %r8, (%rdi)
mov %r9, 8(%rdi)
mov %r10, 16(%rdi)
mov %r11, 24(%rdi)
lea 32(%rdi), %rdi

.align 16, 0x90
.L2: mov (%rsi), %r10
mov 8(%rsi), %r11
adc (%rdx), %r10
adc 8(%rdx), %r11
jrcxz .Lx2
lea 16(%rsi), %rsi
lea 16(%rdx), %rdx
lea -48(%rdi), %rdi
jmp .Le2
.Lx2: mov %r10, (%rdi)
mov %r11, 8(%rdi)
mov %ecx, %eax
adc %ecx, %eax
inc %eax
dec %eax
jnz .Llt4
adc %eax, %eax

ret
.size __gmpn_add_n,.-__gmpn_add_n
.section .data.rel.ro.local,"a",@progbits
.align 8, 0x90
.Ltab: .long .L0-.Ltab
.long .L1-.Ltab
.long .L2-.Ltab
.long .L3-.Ltab
.long .L4-.Ltab
.long .L5-.Ltab
.long .L6-.Ltab
.long .L7-.Ltab
Loading

0 comments on commit 43c95d6

Please sign in to comment.