diff --git a/circom/src/execution_user.rs b/circom/src/execution_user.rs index 40ad6981..15c744c8 100644 --- a/circom/src/execution_user.rs +++ b/circom/src/execution_user.rs @@ -4,6 +4,7 @@ use constraint_writers::debug_writer::DebugWriter; use constraint_writers::ConstraintExporter; use program_structure::program_archive::ProgramArchive; + pub struct ExecutionConfig { pub r1cs: String, pub sym: String, @@ -18,6 +19,7 @@ pub struct ExecutionConfig { pub r1cs_flag: bool, pub json_substitution_flag: bool, pub json_constraint_flag: bool, + pub prime: String, } pub fn execute_project( @@ -34,6 +36,7 @@ pub fn execute_project( flag_p: config.flag_p, flag_verbose: config.flag_verbose, inspect_constraints: config.inspect_constraints_flag, + prime : config.prime, }; let (exporter, vcp) = build_circuit(program_archive, build_config)?; if config.r1cs_flag { diff --git a/circom/src/input_user.rs b/circom/src/input_user.rs index 205729e0..8fbb9472 100644 --- a/circom/src/input_user.rs +++ b/circom/src/input_user.rs @@ -13,7 +13,7 @@ pub struct Input { pub out_c_code: PathBuf, pub out_c_dat: PathBuf, pub out_sym: PathBuf, - pub field: &'static str, + //pub field: &'static str, pub c_flag: bool, pub wasm_flag: bool, pub wat_flag: bool, @@ -29,10 +29,10 @@ pub struct Input { pub inspect_constraints_flag: bool, pub no_rounds: usize, pub flag_verbose: bool, + pub prime: String, } -const P_0: &'static str = - "21888242871839275222246405745257275088548364400416034343698204186575808495617"; + const R1CS: &'static str = "r1cs"; const WAT: &'static str = "wat"; const WASM: &'static str = "wasm"; @@ -42,6 +42,7 @@ const DAT: &'static str = "dat"; const SYM: &'static str = "sym"; const JSON: &'static str = "json"; + impl Input { pub fn new() -> Result { use input_processing::SimplificationStyle; @@ -53,15 +54,15 @@ impl Input { let output_js_path = Input::build_folder(&output_path, &file_name, JS); let o_style = input_processing::get_simplification_style(&matches)?; Result::Ok(Input { - field: P_0, + //field: P_BN128, input_program: input, out_r1cs: Input::build_output(&output_path, &file_name, R1CS), out_wat_code: Input::build_output(&output_js_path, &file_name, WAT), out_wasm_code: Input::build_output(&output_js_path, &file_name, WASM), - out_js_folder: output_js_path.clone(), - out_wasm_name: file_name.clone(), - out_c_folder: output_c_path.clone(), - out_c_run_name: file_name.clone(), + out_js_folder: output_js_path.clone(), + out_wasm_name: file_name.clone(), + out_c_folder: output_c_path.clone(), + out_c_run_name: file_name.clone(), out_c_code: Input::build_output(&output_c_path, &file_name, CPP), out_c_dat: Input::build_output(&output_c_path, &file_name, DAT), out_sym: Input::build_output(&output_path, &file_name, SYM), @@ -84,15 +85,16 @@ impl Input { reduced_simplification_flag: o_style == SimplificationStyle::O1, parallel_simplification_flag: input_processing::get_parallel_simplification(&matches), inspect_constraints_flag: input_processing::get_inspect_constraints(&matches), - flag_verbose: input_processing::get_flag_verbose(&matches) + flag_verbose: input_processing::get_flag_verbose(&matches), + prime: input_processing::get_prime(&matches)?, }) } fn build_folder(output_path: &PathBuf, filename: &str, ext: &str) -> PathBuf { let mut file = output_path.clone(); - let folder_name = format!("{}_{}",filename,ext); - file.push(folder_name); - file + let folder_name = format!("{}_{}",filename,ext); + file.push(folder_name); + file } fn build_output(output_path: &PathBuf, filename: &str, ext: &str) -> PathBuf { @@ -184,6 +186,9 @@ impl Input { pub fn no_rounds(&self) -> usize { self.no_rounds } + pub fn prime(&self) -> String{ + self.prime.clone() + } } mod input_processing { use ansi_term::Colour; @@ -279,6 +284,26 @@ mod input_processing { matches.is_present("flag_verbose") } + pub fn get_prime(matches: &ArgMatches) -> Result { + + match matches.is_present("prime"){ + true => + { + let prime_value = matches.value_of("prime").unwrap(); + if prime_value == "bn128" + || prime_value == "bls12381" + || prime_value == "goldilocks"{ + Ok(String::from(matches.value_of("prime").unwrap())) + } + else{ + Result::Err(eprintln!("{}", Colour::Red.paint("invalid prime number"))) + } + } + + false => Ok(String::from("bn128")), + } + } + pub fn view() -> ArgMatches<'static> { App::new("circom compiler") .version(VERSION) @@ -397,6 +422,14 @@ mod input_processing { .takes_value(false) .help("Shows logs during compilation"), ) + .arg ( + Arg::with_name("prime") + .short("prime") + .long("prime") + .takes_value(true) + .default_value("bn128") + .help("To choose the prime number to use to generate the circuit. Receives the name of the curve (bn128, bls12381, goldilocks)"), + ) .get_matches() } } diff --git a/circom/src/main.rs b/circom/src/main.rs index 115c6a59..7b0b19d0 100644 --- a/circom/src/main.rs +++ b/circom/src/main.rs @@ -39,6 +39,7 @@ fn start() -> Result<(), ()> { sym: user_input.sym_file().to_string(), r1cs: user_input.r1cs_file().to_string(), json_constraints: user_input.json_constraints_file().to_string(), + prime: user_input.prime(), }; let circuit = execution_user::execute_project(program_archive, config)?; let compilation_config = CompilerConfig { diff --git a/code_producers/src/c_elements/calcwit.cpp b/code_producers/src/c_elements/bls12381/calcwit.cpp similarity index 100% rename from code_producers/src/c_elements/calcwit.cpp rename to code_producers/src/c_elements/bls12381/calcwit.cpp diff --git a/code_producers/src/c_elements/calcwit.hpp b/code_producers/src/c_elements/bls12381/calcwit.hpp similarity index 100% rename from code_producers/src/c_elements/calcwit.hpp rename to code_producers/src/c_elements/bls12381/calcwit.hpp diff --git a/code_producers/src/c_elements/circom.hpp b/code_producers/src/c_elements/bls12381/circom.hpp similarity index 100% rename from code_producers/src/c_elements/circom.hpp rename to code_producers/src/c_elements/bls12381/circom.hpp diff --git a/code_producers/src/c_elements/bls12381/fr.asm b/code_producers/src/c_elements/bls12381/fr.asm new file mode 100644 index 00000000..6cf5886f --- /dev/null +++ b/code_producers/src/c_elements/bls12381/fr.asm @@ -0,0 +1,8793 @@ + + + global Fr_copy + global Fr_copyn + global Fr_add + global Fr_sub + global Fr_neg + global Fr_mul + global Fr_square + global Fr_band + global Fr_bor + global Fr_bxor + global Fr_bnot + global Fr_shl + global Fr_shr + global Fr_eq + global Fr_neq + global Fr_lt + global Fr_gt + global Fr_leq + global Fr_geq + global Fr_land + global Fr_lor + global Fr_lnot + global Fr_toNormal + global Fr_toLongNormal + global Fr_toMontgomery + global Fr_toInt + global Fr_isTrue + global Fr_q + global Fr_R3 + + global Fr_rawCopy + global Fr_rawZero + global Fr_rawSwap + global Fr_rawAdd + global Fr_rawSub + global Fr_rawNeg + global Fr_rawMMul + global Fr_rawMSquare + global Fr_rawToMontgomery + global Fr_rawFromMontgomery + global Fr_rawIsEq + global Fr_rawIsZero + global Fr_rawq + global Fr_rawR3 + + extern Fr_fail + DEFAULT REL + + section .text + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; copy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + mov [rdi + 24], rax + + mov rax, [rsi + 32] + mov [rdi + 32], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawCopy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + mov [rdi + 24], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawZero +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawZero: + xor rax, rax + + mov [rdi + 0], rax + + mov [rdi + 8], rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSwap +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rdi <= a +; rsi <= p +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawSwap: + + mov rax, [rsi + 0] + mov rcx, [rdi + 0] + mov [rdi + 0], rax + mov [rsi + 0], rbx + + mov rax, [rsi + 8] + mov rcx, [rdi + 8] + mov [rdi + 8], rax + mov [rsi + 8], rbx + + mov rax, [rsi + 16] + mov rcx, [rdi + 16] + mov [rdi + 16], rax + mov [rsi + 16], rbx + + mov rax, [rsi + 24] + mov rcx, [rdi + 24] + mov [rdi + 24], rax + mov [rsi + 24], rbx + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; copy an array of integers +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; rdx <= number of integers to copy +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copyn: +Fr_copyn_loop: + mov r8, rsi + mov r9, rdi + mov rax, 5 + mul rdx + mov rcx, rax + cld + rep movsq + mov rsi, r8 + mov rdi, r9 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopyS2L +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= the integer +; rdi <= Pointer to the overwritted element +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; + +rawCopyS2L: + mov al, 0x80 + shl rax, 56 + mov [rdi], rax ; set the result to LONG normal + + cmp rsi, 0 + js u64toLong_adjust_neg + + mov [rdi + 8], rsi + xor rax, rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + mov [rdi + 32], rax + + ret + +u64toLong_adjust_neg: + add rsi, [q] ; Set the first digit + mov [rdi + 8], rsi ; + + mov rsi, -1 ; all ones + + mov rax, rsi ; Add to q + adc rax, [q + 8 ] + mov [rdi + 16], rax + + mov rax, rsi ; Add to q + adc rax, [q + 16 ] + mov [rdi + 24], rax + + mov rax, rsi ; Add to q + adc rax, [q + 24 ] + mov [rdi + 32], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toInt +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= The value +;;;;;;;;;;;;;;;;;;;;;;; +Fr_toInt: + mov rax, [rdi] + bt rax, 63 + jc Fr_long + movsx rax, eax + ret + +Fr_long: + push rbp + push rsi + push rdx + mov rbp, rsp + bt rax, 62 + jnc Fr_longNormal +Fr_longMontgomery: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + +Fr_longNormal: + mov rax, [rdi + 8] + mov rcx, rax + shr rcx, 31 + jnz Fr_longNeg + + mov rcx, [rdi + 16] + test rcx, rcx + jnz Fr_longNeg + + mov rcx, [rdi + 24] + test rcx, rcx + jnz Fr_longNeg + + mov rcx, [rdi + 32] + test rcx, rcx + jnz Fr_longNeg + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longNeg: + mov rax, [rdi + 8] + sub rax, [q] + jnc Fr_longErr + + mov rcx, [rdi + 16] + sbb rcx, [q + 8] + jnc Fr_longErr + + mov rcx, [rdi + 24] + sbb rcx, [q + 16] + jnc Fr_longErr + + mov rcx, [rdi + 32] + sbb rcx, [q + 24] + jnc Fr_longErr + + mov rcx, rax + sar rcx, 31 + add rcx, 1 + jnz Fr_longErr + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longErr: + push rdi + mov rdi, 0 + call Fr_fail + pop rdi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +Fr_rawMMul: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,[rcx] + mulx r8,r12,[rcx +8] + adcx r12,rax + mulx rax,r13,[rcx +16] + adcx r13,r8 + mulx r8,r14,[rcx +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 8] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 16] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 24] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r13,[q + 16] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r12,[q + 8] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r11,[q + 0] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq +Fr_rawMMul_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMMul_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawMSquare: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,rdx + mulx r8,r12,[rsi +8] + adcx r12,rax + mulx rax,r13,[rsi +16] + adcx r13,r8 + mulx r8,r14,[rsi +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 8] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 16] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 24] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r13,[q + 16] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r12,[q + 8] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r11,[q + 0] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq +Fr_rawMSquare_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMSquare_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawMMul1: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,rcx + mulx rax,r11,[rsi] + mulx r8,r12,[rsi +8] + adcx r12,rax + mulx rax,r13,[rsi +16] + adcx r13,r8 + mulx r8,r14,[rsi +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r13,[q + 16] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r12,[q + 8] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r11,[q + 0] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq +Fr_rawMMul1_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMMul1_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawFromMontgomery: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov r11,[rsi +0] + mov r12,[rsi +8] + mov r13,[rsi +16] + mov r14,[rsi +24] + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r13,[q + 16] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r12,[q + 8] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r11,[q + 0] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq +Fr_rawFromMontgomery_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawFromMontgomery_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawToMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Pointer destination element +; rsi <= Pointer to src element +;;;;;;;;;;;;;;;;;;;; +Fr_rawToMontgomery: + push rdx + lea rdx, [R2] + call Fr_rawMMul + pop rdx + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Destination +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toMontgomery: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jc toMontgomery_doNothing + bt rax, 63 + jc toMontgomeryLong + +toMontgomeryShort: + movsx rdx, eax + mov [rdi], rdx + add rdi, 8 + lea rsi, [R2] + cmp rdx, 0 + js negMontgomeryShort +posMontgomeryShort: + call Fr_rawMMul1 + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + +negMontgomeryShort: + neg rdx ; Do the multiplication positive and then negate the result. + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomeryLong: + mov [rdi], rax + add rdi, 8 + add rsi, 8 + lea rdx, [R2] + call Fr_rawMMul + sub rsi, 8 + sub rdi, 8 + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomery_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number from Montgomery +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toNormal: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jnc toNormal_doNothing + bt rax, 63 ; if short, it means it's converted + jnc toNormal_doNothing + +toNormalLong: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toNormal_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toLongNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to long normal +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toLongNormal: + mov rax, [rsi] + bt rax, 63 ; check if long + jnc toLongNormal_fromShort + bt rax, 62 ; check if montgomery + jc toLongNormal_fromMontgomery + call Fr_copy ; It is already long + ret + +toLongNormal_fromMontgomery: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toLongNormal_fromShort: + mov r8, rsi ; save rsi + movsx rsi, eax + call rawCopyS2L + mov rsi, r8 ; recover rsi + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; add +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_add: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc add_l1 + bt rcx, 63 ; Check if is short second operand + jc add_s1l2 + +add_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + add edx, ecx + jo add_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + add rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_l1: + bt rcx, 63 ; Check if is short second operand + jc add_l1l2 + +;;;;;;;; +add_l1s2: + bt rax, 62 ; check if montgomery first + jc add_l1ms2 +add_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_1 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_1: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +add_l1ms2: + bt rcx, 62 ; check if montgomery second + jc add_l1ms2m +add_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +add_s1l2: + bt rcx, 62 ; check if montgomery second + jc add_s1l2m +add_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + cmp rdx, 0 + + jns tmp_2 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_2: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1l2m: + bt rax, 62 ; check if montgomery first + jc add_s1ml2m +add_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +add_l1l2: + bt rax, 62 ; check if montgomery first + jc add_l1ml2 +add_l1nl2: + bt rcx, 62 ; check if montgomery second + jc add_l1nl2m +add_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc add_l1ml2m +add_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLL +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rsi <= Pointer to the long data of element 1 +; rdx <= Pointer to the long data of element 2 +; rdi <= Pointer to the long data of result +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawAddLL: +Fr_rawAdd: + ; Add component by component with carry + + mov rax, [rsi + 0] + add rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + adc rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + adc rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + adc rax, [rdx + 24] + mov [rdi + 24], rax + + jc rawAddLL_sq ; if overflow, substract q + + ; Compare with q + + + cmp rax, [q + 24] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 16] + + cmp rax, [q + 16] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 8] + + cmp rax, [q + 8] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 0] + + cmp rax, [q + 0] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + ; If equal substract q +rawAddLL_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLL_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLS +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; rdx <= Value to be added +;;;;;;;;;;;;;;;;;;;;;; +rawAddLS: + ; Add component by component with carry + + add rdx, [rsi] + mov [rdi] ,rdx + + mov rdx, 0 + adc rdx, [rsi + 8] + mov [rdi + 8], rdx + + mov rdx, 0 + adc rdx, [rsi + 16] + mov [rdi + 16], rdx + + mov rdx, 0 + adc rdx, [rsi + 24] + mov [rdi + 24], rdx + + jc rawAddLS_sq ; if overflow, substract q + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + ; If equal substract q +rawAddLS_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLS_done: + ret + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; sub +;;;;;;;;;;;;;;;;;;;;;; +; Substracts two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_sub: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is long first operand + jc sub_l1 + bt rcx, 63 ; Check if is long second operand + jc sub_s1l2 + +sub_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + sub edx, ecx + jo sub_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + sub rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_l1: + bt rcx, 63 ; Check if is short second operand + jc sub_l1l2 + +;;;;;;;; +sub_l1s2: + bt rax, 62 ; check if montgomery first + jc sub_l1ms2 +sub_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_3 + neg rdx + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_3: + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2: + bt rcx, 62 ; check if montgomery second + jc sub_l1ms2m +sub_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +sub_s1l2: + bt rcx, 62 ; check if montgomery first + jc sub_s1l2m +sub_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + cmp eax, 0 + + js tmp_4 + + ; First Operand is positive + push rsi + add rdi, 8 + movsx rsi, eax + add rdx, 8 + call rawSubSL + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_4: ; First operand is negative + push rsi + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + neg rdx + call rawNegLS + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1l2m: + bt rax, 62 ; check if montgomery second + jc sub_s1ml2m +sub_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +sub_l1l2: + bt rax, 62 ; check if montgomery first + jc sub_l1ml2 +sub_l1nl2: + bt rcx, 62 ; check if montgomery second + jc sub_l1nl2m +sub_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc sub_l1ml2m +sub_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a short element from the long element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 where will be substracted +; rdx <= Value to be substracted +; [rdi] = [rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLS: + ; Substract first digit + + mov rax, [rsi] + sub rax, rdx + mov [rdi] ,rax + mov rdx, 0 + + mov rax, [rsi + 8] + sbb rax, rdx + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, rdx + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, rdx + mov [rdi + 24], rax + + jnc rawSubLS_done ; if overflow, add q + + ; Add q +rawSubLS_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLS_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubSL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Value from where will bo substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = rsi - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubSL: + ; Substract first digit + sub rsi, [rdx] + mov [rdi] ,rsi + + + mov rax, 0 + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, 0 + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, 0 + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubSL_done ; if overflow, add q + + ; Add q +rawSubSL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubSL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = [rsi] - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLL: +Fr_rawSub: + ; Substract first digit + + mov rax, [rsi + 0] + sub rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubLL_done ; if overflow, add q + + ; Add q +rawSubLL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawNegLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element and a short element form 0 +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= short value to be substracted too +; +; [rdi] = -[rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawNegLS: + mov rax, [q] + sub rax, rdx + mov [rdi], rax + + mov rax, [q + 8 ] + sbb rax, 0 + mov [rdi + 8], rax + + mov rax, [q + 16 ] + sbb rax, 0 + mov [rdi + 16], rax + + mov rax, [q + 24 ] + sbb rax, 0 + mov [rdi + 24], rax + + setc dl + + + mov rax, [rdi + 0 ] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rdi + 8 ] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rdi + 16 ] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rdi + 24 ] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + + setc dh + or dl, dh + jz rawNegSL_done + + ; it is a negative value, so add q + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + + +rawNegSL_done: + ret + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; neg +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element to be negated +; rdi <= Pointer to result +; [rdi] = -[rsi] +;;;;;;;;;;;;;;;;;;;;;; +Fr_neg: + mov rax, [rsi] + bt rax, 63 ; Check if is short first operand + jc neg_l + +neg_s: ; Operand is short + + neg eax + jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case) + + mov [rdi], rax ; not necessary to adjust so just save and return + ret + +neg_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + neg rsi + call rawCopyS2L + pop rsi + ret + + + +neg_l: + mov [rdi], rax ; Copy the type + + add rdi, 8 + add rsi, 8 + call rawNegL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawNeg +;;;;;;;;;;;;;;;;;;;;;; +; Negates a value +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; +; [rdi] = - [rsi] +;;;;;;;;;;;;;;;;;;;;;; +rawNegL: +Fr_rawNeg: + ; Compare is zero + + xor rax, rax + + cmp [rsi + 0], rax + jnz doNegate + + cmp [rsi + 8], rax + jnz doNegate + + cmp [rsi + 16], rax + jnz doNegate + + cmp [rsi + 24], rax + jnz doNegate + + ; it's zero so just set to zero + + mov [rdi + 0], rax + + mov [rdi + 8], rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + ret +doNegate: + + mov rax, [q + 0] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [q + 8] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [q + 16] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [q + 24] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; square +;;;;;;;;;;;;;;;;;;;;;; +; Squares a field element +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rsi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_square: + mov r8, [rsi] + bt r8, 63 ; Check if is short first operand + jc square_l1 + +square_s1: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul eax + jo square_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +square_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + imul rax + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +square_l1: + bt r8, 62 ; check if montgomery first + jc square_l1m +square_l1n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +square_l1m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; mul +;;;;;;;;;;;;;;;;;;;;;; +; Multiplies two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rdi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_mul: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc mul_l1 + bt r9, 63 ; Check if is short second operand + jc mul_s1l2 + +mul_s1s2: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul r9d + jo mul_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +mul_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + movsx rcx, r9d + imul rcx + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +mul_l1: + bt r9, 63 ; Check if is short second operand + jc mul_l1l2 + +;;;;;;;; +mul_l1s2: + bt r8, 62 ; check if montgomery first + jc mul_l1ms2 +mul_l1ns2: + bt r9, 62 ; check if montgomery first + jc mul_l1ns2m +mul_l1ns2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_5 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_6 +tmp_5: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_6: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + + +mul_l1ns2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +mul_l1ms2: + bt r9, 62 ; check if montgomery second + jc mul_l1ms2m +mul_l1ms2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_7 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_8 +tmp_7: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_8: + + + ret + +mul_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +;;;;;;;; +mul_s1l2: + bt r8, 62 ; check if montgomery first + jc mul_s1ml2 +mul_s1nl2: + bt r9, 62 ; check if montgomery first + jc mul_s1nl2m +mul_s1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_9 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_10 +tmp_9: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_10: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_s1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_11 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_12 +tmp_11: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_12: + + + ret + +mul_s1ml2: + bt r9, 62 ; check if montgomery first + jc mul_s1ml2m +mul_s1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +;;;; +mul_l1l2: + bt r8, 62 ; check if montgomery first + jc mul_l1ml2 +mul_l1nl2: + bt r9, 62 ; check if montgomery second + jc mul_l1nl2m +mul_l1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2: + bt r9, 62 ; check if montgomery seconf + jc mul_l1ml2m +mul_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; band +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_band: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc and_l1 + bt rcx, 63 ; Check if is short second operand + jc and_s1l2 + +and_s1s2: + + cmp eax, 0 + + js tmp_13 + + cmp ecx, 0 + js tmp_13 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + and edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_13: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + ; If equal substract q +tmp_14: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_15: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +and_l1: + bt rcx, 63 ; Check if is short second operand + jc and_l1l2 + + +and_l1s2: + bt rax, 62 ; check if montgomery first + jc and_l1ms2 +and_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_16 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + ; If equal substract q +tmp_17: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_18: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_16: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + ; If equal substract q +tmp_19: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_20: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_21 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + ; If equal substract q +tmp_22: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_23: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_21: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + ; If equal substract q +tmp_24: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_25: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_s1l2: + bt rcx, 62 ; check if montgomery first + jc and_s1l2m +and_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_26 + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + ; If equal substract q +tmp_27: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_28: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_26: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + ; If equal substract q +tmp_29: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_30: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_31 + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + ; If equal substract q +tmp_32: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_33: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_31: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + ; If equal substract q +tmp_34: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_35: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_l1l2: + bt rax, 62 ; check if montgomery first + jc and_l1ml2 + bt rcx, 62 ; check if montgomery first + jc and_l1nl2m +and_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + ; If equal substract q +tmp_36: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_37: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + ; If equal substract q +tmp_38: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_39: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2: + bt rcx, 62 ; check if montgomery first + jc and_l1ml2m +and_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + ; If equal substract q +tmp_40: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_41: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + ; If equal substract q +tmp_42: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_43: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc or_l1 + bt rcx, 63 ; Check if is short second operand + jc or_s1l2 + +or_s1s2: + + cmp eax, 0 + + js tmp_44 + + cmp ecx, 0 + js tmp_44 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + or edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_44: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + ; If equal substract q +tmp_45: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_46: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +or_l1: + bt rcx, 63 ; Check if is short second operand + jc or_l1l2 + + +or_l1s2: + bt rax, 62 ; check if montgomery first + jc or_l1ms2 +or_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_47 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + ; If equal substract q +tmp_48: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_49: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_47: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + ; If equal substract q +tmp_50: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_51: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_52 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + ; If equal substract q +tmp_53: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_54: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_52: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + ; If equal substract q +tmp_55: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_56: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_s1l2: + bt rcx, 62 ; check if montgomery first + jc or_s1l2m +or_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_57 + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + ; If equal substract q +tmp_58: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_59: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_57: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + ; If equal substract q +tmp_60: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_61: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_62 + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + ; If equal substract q +tmp_63: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_64: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_62: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + ; If equal substract q +tmp_65: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_66: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_l1l2: + bt rax, 62 ; check if montgomery first + jc or_l1ml2 + bt rcx, 62 ; check if montgomery first + jc or_l1nl2m +or_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + ; If equal substract q +tmp_67: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_68: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + ; If equal substract q +tmp_69: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_70: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2: + bt rcx, 62 ; check if montgomery first + jc or_l1ml2m +or_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + ; If equal substract q +tmp_71: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_72: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + ; If equal substract q +tmp_73: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_74: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bxor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bxor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc xor_l1 + bt rcx, 63 ; Check if is short second operand + jc xor_s1l2 + +xor_s1s2: + + cmp eax, 0 + + js tmp_75 + + cmp ecx, 0 + js tmp_75 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + xor edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_75: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + ; If equal substract q +tmp_76: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_77: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +xor_l1: + bt rcx, 63 ; Check if is short second operand + jc xor_l1l2 + + +xor_l1s2: + bt rax, 62 ; check if montgomery first + jc xor_l1ms2 +xor_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_78 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + ; If equal substract q +tmp_79: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_80: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_78: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + ; If equal substract q +tmp_81: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_82: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_83 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + ; If equal substract q +tmp_84: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_85: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_83: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + ; If equal substract q +tmp_86: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_87: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_s1l2: + bt rcx, 62 ; check if montgomery first + jc xor_s1l2m +xor_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_88 + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + ; If equal substract q +tmp_89: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_90: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_88: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + ; If equal substract q +tmp_91: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_92: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_93 + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + ; If equal substract q +tmp_94: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_95: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_93: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + ; If equal substract q +tmp_96: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_97: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_l1l2: + bt rax, 62 ; check if montgomery first + jc xor_l1ml2 + bt rcx, 62 ; check if montgomery first + jc xor_l1nl2m +xor_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + ; If equal substract q +tmp_98: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_99: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + ; If equal substract q +tmp_100: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_101: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2: + bt rcx, 62 ; check if montgomery first + jc xor_l1ml2m +xor_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + ; If equal substract q +tmp_102: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_103: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + ; If equal substract q +tmp_104: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_105: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; bnot +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bnot: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rax, [rsi] + bt rax, 63 ; Check if is long operand + jc bnot_l1 +bnot_s: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp bnot_l1n + +bnot_l1: + bt rax, 62 ; check if montgomery first + jnc bnot_l1n + +bnot_l1m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + +bnot_l1n: + + mov rax, [rsi + 8] + not rax + + mov [rdi + 8], rax + + mov rax, [rsi + 16] + not rax + + mov [rdi + 16], rax + + mov rax, [rsi + 24] + not rax + + mov [rdi + 24], rax + + mov rax, [rsi + 32] + not rax + + and rax, [lboMask] + + mov [rdi + 32], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + ; If equal substract q +tmp_106: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_107: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +rawShr: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 255 + jae Fr_rawZero + +rawShr_nz: + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShr_aligned + mov ch, 64 + sub ch, cl + + mov r9, 1 + rol cx, 8 + shl r9, cl + rol cx, 8 + sub r9, 1 + mov r10, r9 + not r10 + + + cmp r8, 3 + jae rawShr_if2_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 8 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_if2_0: + jne rawShr_else_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif_0: + + cmp r8, 2 + jae rawShr_if2_1 + + mov rax, [rsi + r8*8 + 8 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 16 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 8], rax + + jmp rawShr_endif_1 +rawShr_if2_1: + jne rawShr_else_1 + + mov rax, [rsi + r8*8 + 8 ] + shr rax, cl + and rax, r9 + mov [rdi + 8], rax + + jmp rawShr_endif_1 +rawShr_else_1: + xor rax, rax + mov [rdi + 8], rax +rawShr_endif_1: + + cmp r8, 1 + jae rawShr_if2_2 + + mov rax, [rsi + r8*8 + 16 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 24 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 16], rax + + jmp rawShr_endif_2 +rawShr_if2_2: + jne rawShr_else_2 + + mov rax, [rsi + r8*8 + 16 ] + shr rax, cl + and rax, r9 + mov [rdi + 16], rax + + jmp rawShr_endif_2 +rawShr_else_2: + xor rax, rax + mov [rdi + 16], rax +rawShr_endif_2: + + cmp r8, 0 + jae rawShr_if2_3 + + mov rax, [rsi + r8*8 + 24 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 32 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 24], rax + + jmp rawShr_endif_3 +rawShr_if2_3: + jne rawShr_else_3 + + mov rax, [rsi + r8*8 + 24 ] + shr rax, cl + and rax, r9 + mov [rdi + 24], rax + + jmp rawShr_endif_3 +rawShr_else_3: + xor rax, rax + mov [rdi + 24], rax +rawShr_endif_3: + + + ret + +rawShr_aligned: + + cmp r8, 3 + ja rawShr_if3_0 + mov rax, [rsi + r8*8 + 0 ] + mov [rdi + 0], rax + jmp rawShr_endif3_0 +rawShr_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif3_0: + + cmp r8, 2 + ja rawShr_if3_1 + mov rax, [rsi + r8*8 + 8 ] + mov [rdi + 8], rax + jmp rawShr_endif3_1 +rawShr_if3_1: + xor rax, rax + mov [rdi + 8], rax +rawShr_endif3_1: + + cmp r8, 1 + ja rawShr_if3_2 + mov rax, [rsi + r8*8 + 16 ] + mov [rdi + 16], rax + jmp rawShr_endif3_2 +rawShr_if3_2: + xor rax, rax + mov [rdi + 16], rax +rawShr_endif3_2: + + cmp r8, 0 + ja rawShr_if3_3 + mov rax, [rsi + r8*8 + 24 ] + mov [rdi + 24], rax + jmp rawShr_endif3_3 +rawShr_if3_3: + xor rax, rax + mov [rdi + 24], rax +rawShr_endif3_3: + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +rawShl: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 255 + jae Fr_rawZero + + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShl_aligned + mov ch, 64 + sub ch, cl + + + mov r10, 1 + shl r10, cl + sub r10, 1 + mov r9, r10 + not r9 + + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 3 + jae rawShl_if2_3 + + mov rax, [rdx + 24 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 16 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + and rax, [lboMask] + + + mov [rdi + 24], rax + + jmp rawShl_endif_3 +rawShl_if2_3: + jne rawShl_else_3 + + mov rax, [rdx + 24 ] + shl rax, cl + and rax, r9 + + and rax, [lboMask] + + + mov [rdi + 24], rax + + jmp rawShl_endif_3 +rawShl_else_3: + xor rax, rax + mov [rdi + 24], rax +rawShl_endif_3: + + cmp r8, 2 + jae rawShl_if2_2 + + mov rax, [rdx + 16 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 8 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 16], rax + + jmp rawShl_endif_2 +rawShl_if2_2: + jne rawShl_else_2 + + mov rax, [rdx + 16 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 16], rax + + jmp rawShl_endif_2 +rawShl_else_2: + xor rax, rax + mov [rdi + 16], rax +rawShl_endif_2: + + cmp r8, 1 + jae rawShl_if2_1 + + mov rax, [rdx + 8 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 0 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 8], rax + + jmp rawShl_endif_1 +rawShl_if2_1: + jne rawShl_else_1 + + mov rax, [rdx + 8 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 8], rax + + jmp rawShl_endif_1 +rawShl_else_1: + xor rax, rax + mov [rdi + 8], rax +rawShl_endif_1: + + cmp r8, 0 + jae rawShl_if2_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + -8 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_if2_0: + jne rawShl_else_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif_0: + + + + + + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + ; If equal substract q +tmp_108: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +tmp_109: + + ret; + +rawShl_aligned: + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 3 + ja rawShl_if3_3 + mov rax, [rdx + 24 ] + + and rax, [lboMask] + + mov [rdi + 24], rax + jmp rawShl_endif3_3 +rawShl_if3_3: + xor rax, rax + mov [rdi + 24], rax +rawShl_endif3_3: + + cmp r8, 2 + ja rawShl_if3_2 + mov rax, [rdx + 16 ] + + mov [rdi + 16], rax + jmp rawShl_endif3_2 +rawShl_if3_2: + xor rax, rax + mov [rdi + 16], rax +rawShl_endif3_2: + + cmp r8, 1 + ja rawShl_if3_1 + mov rax, [rdx + 8 ] + + mov [rdi + 8], rax + jmp rawShl_endif3_1 +rawShl_if3_1: + xor rax, rax + mov [rdi + 8], rax +rawShl_endif3_1: + + cmp r8, 0 + ja rawShl_if3_0 + mov rax, [rdx + 0 ] + + mov [rdi + 0], rax + jmp rawShl_endif3_0 +rawShl_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif3_0: + + + + + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + ; If equal substract q +tmp_110: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +tmp_111: + + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shr: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_112 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_113 + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_113: + mov rcx, [rdx + 8] + cmp rcx, 255 + jae tmp_114 + xor rax, rax + + cmp [rdx + 16], rax + jnz tmp_114 + + cmp [rdx + 24], rax + jnz tmp_114 + + cmp [rdx + 32], rax + jnz tmp_114 + + mov rdx, rcx + jmp do_shr + +tmp_114: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 255 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rax, [q+ 8] + sbb rax, [rdx + 16] + jnz setzero + + mov rax, [q+ 16] + sbb rax, [rdx + 24] + jnz setzero + + mov rax, [q+ 24] + sbb rax, [rdx + 32] + jnz setzero + + mov rdx, rcx + jmp do_shl + +tmp_112: + cmp ecx, 0 + jl tmp_115 + cmp ecx, 255 + jae setzero + movsx rdx, ecx + jmp do_shr +tmp_115: + neg ecx + cmp ecx, 255 + jae setzero + movsx rdx, ecx + jmp do_shl + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shl: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_116 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_117 + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_117: + mov rcx, [rdx + 8] + cmp rcx, 255 + jae tmp_118 + xor rax, rax + + cmp [rdx + 16], rax + jnz tmp_118 + + cmp [rdx + 24], rax + jnz tmp_118 + + cmp [rdx + 32], rax + jnz tmp_118 + + mov rdx, rcx + jmp do_shl + +tmp_118: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 255 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rax, [q+ 8] + sbb rax, [rdx + 16] + jnz setzero + + mov rax, [q+ 16] + sbb rax, [rdx + 24] + jnz setzero + + mov rax, [q+ 24] + sbb rax, [rdx + 32] + jnz setzero + + mov rdx, rcx + jmp do_shr + +tmp_116: + cmp ecx, 0 + jl tmp_119 + cmp ecx, 255 + jae setzero + movsx rdx, ecx + jmp do_shl +tmp_119: + neg ecx + cmp ecx, 255 + jae setzero + movsx rdx, ecx + jmp do_shr + + + +;;;;;;;;;; +;;; doShl +;;;;;;;;;; +do_shl: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shll +do_shls: + + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shlcl + + cmp rdx, 31 + jae do_shlcl + + mov cl, dl + shl rax, cl + mov rcx, rax + shr rcx, 31 + jnz do_shlcl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shlcl: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp do_shlln + +do_shll: + bt rcx, 62 ; Check if is short second operand + jnc do_shlln + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shlln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShl + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + +;;;;;;;;;; +;;; doShr +;;;;;;;;;; +do_shr: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shrl +do_shrs: + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shrcl + + cmp rdx, 31 + jae setzero + + mov cl, dl + shr rax, cl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shrcl: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + +do_shrl: + bt rcx, 62 ; Check if is short second operand + jnc do_shrln + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shrln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShr + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +setzero: + xor rax, rax + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rgt - Raw Greater Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rgt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rgt_l1 + bt r9, 63 ; Check if is short second operand + jc rgt_s1l2 + +rgt_s1s2: ; Both operands are short + cmp r8d, r9d + jg rgt_ret1 + jmp rgt_ret0 + + +rgt_l1: + bt r9, 63 ; Check if is short second operand + jc rgt_l1l2 + +;;;;;;;; +rgt_l1s2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ms2 +rgt_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + + +;;;;;;;; +rgt_s1l2: + bt r9, 62 ; check if montgomery second + jc rgt_s1l2m +rgt_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +;;;; +rgt_l1l2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ml2 +rgt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rgt_l1nl2m +rgt_l1nl2n: + jmp rgtL1L2 + +rgt_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rgt_l1ml2m +rgt_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_l1ml2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + + +;;;;;; +; rgtL1L2 +;;;;;; + +rgtL1L2: + + + mov rax, [rsi + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtl1l2_p1 + + + +rgtl1l2_p1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtRawL1L2 + + + + +rgtl1l2_n1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rgt_ret0 + + + + + +rgtRawL1L2: + + mov rax, [rsi + 32] + cmp [rdx + 32], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 24] + cmp [rdx + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 16] + cmp [rdx + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + + +rgt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rgt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rlt - Raw Less Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rlt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rlt_l1 + bt r9, 63 ; Check if is short second operand + jc rlt_s1l2 + +rlt_s1s2: ; Both operands are short + cmp r8d, r9d + jl rlt_ret1 + jmp rlt_ret0 + + +rlt_l1: + bt r9, 63 ; Check if is short second operand + jc rlt_l1l2 + +;;;;;;;; +rlt_l1s2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ms2 +rlt_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + + +;;;;;;;; +rlt_s1l2: + bt r9, 62 ; check if montgomery second + jc rlt_s1l2m +rlt_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +;;;; +rlt_l1l2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ml2 +rlt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rlt_l1nl2m +rlt_l1nl2n: + jmp rltL1L2 + +rlt_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rlt_l1ml2m +rlt_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_l1ml2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + + +;;;;;; +; rltL1L2 +;;;;;; + +rltL1L2: + + + mov rax, [rsi + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rltl1l2_p1 + + + +rltl1l2_p1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jmp rltRawL1L2 + + + + +rltl1l2_n1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rlt_ret1 + + + + + +rltRawL1L2: + + mov rax, [rsi + 32] + cmp [rdx + 32], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 24] + cmp [rdx + 24], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 16] + cmp [rdx + 16], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + +rlt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rlt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; req - Raw Eq +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi == *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_req: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc req_l1 + bt r9, 63 ; Check if is short second operand + jc req_s1l2 + +req_s1s2: ; Both operands are short + cmp r8d, r9d + je req_ret1 + jmp req_ret0 + + +req_l1: + bt r9, 63 ; Check if is short second operand + jc req_l1l2 + +;;;;;;;; +req_l1s2: + bt r8, 62 ; check if montgomery first + jc req_l1ms2 +req_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + + +;;;;;;;; +req_s1l2: + bt r9, 62 ; check if montgomery second + jc req_s1l2m +req_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +;;;; +req_l1l2: + bt r8, 62 ; check if montgomery first + jc req_l1ml2 +req_l1nl2: + bt r9, 62 ; check if montgomery second + jc req_l1nl2m +req_l1nl2n: + jmp reqL1L2 + +req_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_l1ml2: + bt r9, 62 ; check if montgomery second + jc req_l1ml2m +req_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ml2m: + jmp reqL1L2 + + +;;;;;; +; eqL1L2 +;;;;;; + +reqL1L2: + + mov rax, [rsi + 8] + cmp [rdx + 8], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 16] + cmp [rdx + 16], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 24] + cmp [rdx + 24], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 32] + cmp [rdx + 32], rax + jne req_ret0 ; rsi 1st > 2nd + + +req_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +req_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; gt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_gt: + call Fr_rgt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; lt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_lt: + call Fr_rlt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; eq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_eq: + call Fr_req + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; neq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_neq: + call Fr_req + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; geq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_geq: + call Fr_rlt + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; leq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_leq: + call Fr_rgt + xor rax, 1 + mov [rdi], rax + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsEq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; rsi <= Pointer to element 2 +; Returns +; ax <= 1 if are equal 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsEq: + + mov rax, [rsi + 0] + cmp [rdi + 0], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 8] + cmp [rdi + 8], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 16] + cmp [rdi + 16], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 24] + cmp [rdi + 24], rax + jne rawIsEq_ret0 + +rawIsEq_ret1: + mov rax, 1 + ret + +rawIsEq_ret0: + xor rax, rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsZero +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; Returns +; ax <= 1 if is 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsZero: + + cmp qword [rdi + 0], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 8], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 16], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 24], $0 + jne rawIsZero_ret0 + + +rawIsZero_ret1: + mov rax, 1 + ret + +rawIsZero_ret0: + xor rax, rax + ret + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; land +;;;;;;;;;;;;;;;;;;;;;; +; Logical and between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_land: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_120 + + test eax, eax + jz retZero_122 + jmp retOne_121 + +tmp_120: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_121 + + +retZero_122: + mov qword r8, 0 + jmp done_123 + +retOne_121: + mov qword r8, 1 + +done_123: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_124 + + test eax, eax + jz retZero_126 + jmp retOne_125 + +tmp_124: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_125 + + +retZero_126: + mov qword rcx, 0 + jmp done_127 + +retOne_125: + mov qword rcx, 1 + +done_127: + + and rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lor +;;;;;;;;;;;;;;;;;;;;;; +; Logical or between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lor: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_128 + + test eax, eax + jz retZero_130 + jmp retOne_129 + +tmp_128: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_129 + + +retZero_130: + mov qword r8, 0 + jmp done_131 + +retOne_129: + mov qword r8, 1 + +done_131: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_132 + + test eax, eax + jz retZero_134 + jmp retOne_133 + +tmp_132: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_133 + + +retZero_134: + mov qword rcx, 0 + jmp done_135 + +retOne_133: + mov qword rcx, 1 + +done_135: + + or rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lnot +;;;;;;;;;;;;;;;;;;;;;; +; Do the logical not of an element +; Params: +; rsi <= Pointer to element to be tested +; rdi <= Pointer to result one if element1 is zero and zero otherwise +; Modified Registers: +; rax, rax, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lnot: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_136 + + test eax, eax + jz retZero_138 + jmp retOne_137 + +tmp_136: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_137 + + +retZero_138: + mov qword rcx, 0 + jmp done_139 + +retOne_137: + mov qword rcx, 1 + +done_139: + + test rcx, rcx + + jz lnot_retOne +lnot_retZero: + mov qword [rdi], 0 + ret +lnot_retOne: + mov qword [rdi], 1 + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; isTrue +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= 1 if true 0 if false +;;;;;;;;;;;;;;;;;;;;;;; +Fr_isTrue: + + + + + + + mov rax, [rdi] + bt rax, 63 + jc tmp_140 + + test eax, eax + jz retZero_142 + jmp retOne_141 + +tmp_140: + + mov rax, [rdi + 8] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 16] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 24] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 32] + test rax, rax + jnz retOne_141 + + +retZero_142: + mov qword rax, 0 + jmp done_143 + +retOne_141: + mov qword rax, 1 + +done_143: + + ret + + + + + + section .data +Fr_q: + dd 0 + dd 0x80000000 +Fr_rawq: +q dq 0xffffffff00000001,0x53bda402fffe5bfe,0x3339d80809a1d805,0x73eda753299d7d48 +half dq 0x7fffffff80000000,0xa9ded2017fff2dff,0x199cec0404d0ec02,0x39f6d3a994cebea4 +R2 dq 0xc999e990f3f29c6d,0x2b6cedcb87925c23,0x05d314967254398f,0x0748d9d99f59ff11 +Fr_R3: + dd 0 + dd 0x80000000 +Fr_rawR3: +R3 dq 0xc62c1807439b73af,0x1b3e0d188cf06990,0x73d13c71c7b5f418,0x6e2a5bb9c8db33e9 +lboMask dq 0x7fffffffffffffff +np dq 0xfffffffeffffffff + diff --git a/code_producers/src/c_elements/bls12381/fr.cpp b/code_producers/src/c_elements/bls12381/fr.cpp new file mode 100644 index 00000000..39f5257d --- /dev/null +++ b/code_producers/src/c_elements/bls12381/fr.cpp @@ -0,0 +1,321 @@ +#include "fr.hpp" +#include +#include +#include +#include +#include + + +static mpz_t q; +static mpz_t zero; +static mpz_t one; +static mpz_t mask; +static size_t nBits; +static bool initialized = false; + + +void Fr_toMpz(mpz_t r, PFrElement pE) { + FrElement tmp; + Fr_toNormal(&tmp, pE); + if (!(tmp.type & Fr_LONG)) { + mpz_set_si(r, tmp.shortVal); + if (tmp.shortVal<0) { + mpz_add(r, r, q); + } + } else { + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } +} + +void Fr_fromMpz(PFrElement pE, mpz_t v) { + if (mpz_fits_sint_p(v)) { + pE->type = Fr_SHORT; + pE->shortVal = mpz_get_si(v); + } else { + pE->type = Fr_LONG; + for (int i=0; ilongVal[i] = 0; + mpz_export((void *)(pE->longVal), NULL, -1, 8, -1, 0, v); + } +} + + +bool Fr_init() { + if (initialized) return false; + initialized = true; + mpz_init(q); + mpz_import(q, Fr_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + mpz_init_set_ui(zero, 0); + mpz_init_set_ui(one, 1); + nBits = mpz_sizeinbase (q, 2); + mpz_init(mask); + mpz_mul_2exp(mask, one, nBits); + mpz_sub(mask, mask, one); + return true; +} + +void Fr_str2element(PFrElement pE, char const *s) { + mpz_t mr; + mpz_init_set_str(mr, s, 10); + mpz_fdiv_r(mr, mr, q); + Fr_fromMpz(pE, mr); + mpz_clear(mr); +} + +char *Fr_element2str(PFrElement pE) { + FrElement tmp; + mpz_t r; + if (!(pE->type & Fr_LONG)) { + if (pE->shortVal>=0) { + char *r = new char[32]; + sprintf(r, "%d", pE->shortVal); + return r; + } else { + mpz_init_set_si(r, pE->shortVal); + mpz_add(r, r, q); + } + } else { + Fr_toNormal(&tmp, pE); + mpz_init(r); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } + char *res = mpz_get_str (0, 10, r); + mpz_clear(r); + return res; +} + +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + // char *s1 = mpz_get_str (0, 10, ma); + // printf("s1 %s\n", s1); + Fr_toMpz(mb, b); + // char *s2 = mpz_get_str (0, 10, mb); + // printf("s2 %s\n", s2); + mpz_fdiv_q(mr, ma, mb); + // char *sr = mpz_get_str (0, 10, mr); + // printf("r %s\n", sr); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_mod(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_fdiv_r(mr, ma, mb); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_pow(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_powm(mr, ma, mb, q); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_inv(PFrElement r, PFrElement a) { + mpz_t ma; + mpz_t mr; + mpz_init(ma); + mpz_init(mr); + + Fr_toMpz(ma, a); + mpz_invert(mr, ma, q); + Fr_fromMpz(r, mr); + mpz_clear(ma); + mpz_clear(mr); +} + +void Fr_div(PFrElement r, PFrElement a, PFrElement b) { + FrElement tmp; + Fr_inv(&tmp, b); + Fr_mul(r, a, &tmp); +} + +void Fr_fail() { + assert(false); +} + + +RawFr::RawFr() { + Fr_init(); + set(fZero, 0); + set(fOne, 1); + neg(fNegOne, fOne); +} + +RawFr::~RawFr() { +} + +void RawFr::fromString(Element &r, const std::string &s, uint32_t radix) { + mpz_t mr; + mpz_init_set_str(mr, s.c_str(), radix); + mpz_fdiv_r(mr, mr, q); + for (int i=0; i>3] & (1 << (p & 0x7))) +void RawFr::exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize) { + bool oneFound = false; + Element copyBase; + copy(copyBase, base); + for (int i=scalarSize*8-1; i>=0; i--) { + if (!oneFound) { + if ( !BIT_IS_SET(scalar, i) ) continue; + copy(r, copyBase); + oneFound = true; + continue; + } + square(r, r); + if ( BIT_IS_SET(scalar, i) ) { + mul(r, r, copyBase); + } + } + if (!oneFound) { + copy(r, fOne); + } +} + +void RawFr::toMpz(mpz_t r, const Element &a) { + Element tmp; + Fr_rawFromMontgomery(tmp.v, a.v); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.v); +} + +void RawFr::fromMpz(Element &r, const mpz_t a) { + for (int i=0; i +#include +#include + +#define Fr_N64 4 +#define Fr_SHORT 0x00000000 +#define Fr_LONG 0x80000000 +#define Fr_LONGMONTGOMERY 0xC0000000 +typedef uint64_t FrRawElement[Fr_N64]; +typedef struct __attribute__((__packed__)) { + int32_t shortVal; + uint32_t type; + FrRawElement longVal; +} FrElement; +typedef FrElement *PFrElement; +extern FrElement Fr_q; +extern FrElement Fr_R3; +extern FrRawElement Fr_rawq; +extern FrRawElement Fr_rawR3; + +extern "C" void Fr_copy(PFrElement r, PFrElement a); +extern "C" void Fr_copyn(PFrElement r, PFrElement a, int n); +extern "C" void Fr_add(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_sub(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neg(PFrElement r, PFrElement a); +extern "C" void Fr_mul(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_square(PFrElement r, PFrElement a); +extern "C" void Fr_band(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bxor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bnot(PFrElement r, PFrElement a); +extern "C" void Fr_shl(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_shr(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_eq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_gt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_leq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_geq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_land(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lnot(PFrElement r, PFrElement a); +extern "C" void Fr_toNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toLongNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toMontgomery(PFrElement r, PFrElement a); + +extern "C" int Fr_isTrue(PFrElement pE); +extern "C" int Fr_toInt(PFrElement pE); + +extern "C" void Fr_rawCopy(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA); +extern "C" void Fr_rawAdd(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawSub(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawNeg(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawMSquare(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul1(FrRawElement pRawResult, const FrRawElement pRawA, uint64_t pRawB); +extern "C" void Fr_rawToMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" void Fr_rawFromMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" int Fr_rawIsEq(const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" int Fr_rawIsZero(const FrRawElement pRawB); + +extern "C" void Fr_fail(); + + +// Pending functions to convert + +void Fr_str2element(PFrElement pE, char const*s); +char *Fr_element2str(PFrElement pE); +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b); +void Fr_mod(PFrElement r, PFrElement a, PFrElement b); +void Fr_inv(PFrElement r, PFrElement a); +void Fr_div(PFrElement r, PFrElement a, PFrElement b); +void Fr_pow(PFrElement r, PFrElement a, PFrElement b); + +class RawFr { + +public: + const static int N64 = Fr_N64; + const static int MaxBits = 255; + + + struct Element { + FrRawElement v; + }; + +private: + Element fZero; + Element fOne; + Element fNegOne; + +public: + + RawFr(); + ~RawFr(); + + const Element &zero() { return fZero; }; + const Element &one() { return fOne; }; + const Element &negOne() { return fNegOne; }; + Element set(int value); + void set(Element &r, int value); + + void fromString(Element &r, const std::string &n, uint32_t radix = 10); + std::string toString(const Element &a, uint32_t radix = 10); + + void inline copy(Element &r, const Element &a) { Fr_rawCopy(r.v, a.v); }; + void inline swap(Element &a, Element &b) { Fr_rawSwap(a.v, b.v); }; + void inline add(Element &r, const Element &a, const Element &b) { Fr_rawAdd(r.v, a.v, b.v); }; + void inline sub(Element &r, const Element &a, const Element &b) { Fr_rawSub(r.v, a.v, b.v); }; + void inline mul(Element &r, const Element &a, const Element &b) { Fr_rawMMul(r.v, a.v, b.v); }; + + Element inline add(const Element &a, const Element &b) { Element r; Fr_rawAdd(r.v, a.v, b.v); return r;}; + Element inline sub(const Element &a, const Element &b) { Element r; Fr_rawSub(r.v, a.v, b.v); return r;}; + Element inline mul(const Element &a, const Element &b) { Element r; Fr_rawMMul(r.v, a.v, b.v); return r;}; + + Element inline neg(const Element &a) { Element r; Fr_rawNeg(r.v, a.v); return r; }; + Element inline square(const Element &a) { Element r; Fr_rawMSquare(r.v, a.v); return r; }; + + Element inline add(int a, const Element &b) { return add(set(a), b);}; + Element inline sub(int a, const Element &b) { return sub(set(a), b);}; + Element inline mul(int a, const Element &b) { return mul(set(a), b);}; + + Element inline add(const Element &a, int b) { return add(a, set(b));}; + Element inline sub(const Element &a, int b) { return sub(a, set(b));}; + Element inline mul(const Element &a, int b) { return mul(a, set(b));}; + + void inline mul1(Element &r, const Element &a, uint64_t b) { Fr_rawMMul1(r.v, a.v, b); }; + void inline neg(Element &r, const Element &a) { Fr_rawNeg(r.v, a.v); }; + void inline square(Element &r, const Element &a) { Fr_rawMSquare(r.v, a.v); }; + void inv(Element &r, const Element &a); + void div(Element &r, const Element &a, const Element &b); + void exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize); + + void inline toMontgomery(Element &r, const Element &a) { Fr_rawToMontgomery(r.v, a.v); }; + void inline fromMontgomery(Element &r, const Element &a) { Fr_rawFromMontgomery(r.v, a.v); }; + int inline eq(const Element &a, const Element &b) { return Fr_rawIsEq(a.v, b.v); }; + int inline isZero(const Element &a) { return Fr_rawIsZero(a.v); }; + + void toMpz(mpz_t r, const Element &a); + void fromMpz(Element &a, const mpz_t r); + + int toRprBE(const Element &element, uint8_t *data, int bytes); + int fromRprBE(Element &element, const uint8_t *data, int bytes); + + int bytes ( void ) { return Fr_N64 * 8; }; + + void fromUI(Element &r, unsigned long int v); + + static RawFr field; + +}; + + +#endif // __FR_H + + + diff --git a/code_producers/src/c_elements/main.cpp b/code_producers/src/c_elements/bls12381/main.cpp similarity index 100% rename from code_producers/src/c_elements/main.cpp rename to code_producers/src/c_elements/bls12381/main.cpp diff --git a/code_producers/src/c_elements/makefile b/code_producers/src/c_elements/bls12381/makefile similarity index 100% rename from code_producers/src/c_elements/makefile rename to code_producers/src/c_elements/bls12381/makefile diff --git a/code_producers/src/c_elements/bn128/calcwit.cpp b/code_producers/src/c_elements/bn128/calcwit.cpp new file mode 100644 index 00000000..fc7ea033 --- /dev/null +++ b/code_producers/src/c_elements/bn128/calcwit.cpp @@ -0,0 +1,122 @@ +#include +#include +#include +#include "calcwit.hpp" + +extern void run(Circom_CalcWit* ctx); + +std::string int_to_hex( u64 i ) +{ + std::stringstream stream; + stream << "0x" + << std::setfill ('0') << std::setw(16) + << std::hex << i; + return stream.str(); +} + +u64 fnv1a(std::string s) { + u64 hash = 0xCBF29CE484222325LL; + for(char& c : s) { + hash ^= u64(c); + hash *= 0x100000001B3LL; + } + return hash; +} + +Circom_CalcWit::Circom_CalcWit (Circom_Circuit *aCircuit, uint maxTh) { + circuit = aCircuit; + inputSignalAssignedCounter = get_main_input_signal_no(); + inputSignalAssigned = new bool[inputSignalAssignedCounter]; + for (int i = 0; i< inputSignalAssignedCounter; i++) { + inputSignalAssigned[i] = false; + } + signalValues = new FrElement[get_total_signal_no()]; + Fr_str2element(&signalValues[0], "1"); + componentMemory = new Circom_Component[get_number_of_components()]; + circuitConstants = circuit ->circuitConstants; + templateInsId2IOSignalInfo = circuit -> templateInsId2IOSignalInfo; + + maxThread = maxTh; + + // parallelism + numThread = 0; + +} + +Circom_CalcWit::~Circom_CalcWit() { + // ... +} + +uint Circom_CalcWit::getInputSignalHashPosition(u64 h) { + uint n = get_size_of_input_hashmap(); + uint pos = (uint)(h % (u64)n); + if (circuit->InputHashMap[pos].hash!=h){ + uint inipos = pos; + pos++; + while (pos != inipos) { + if (circuit->InputHashMap[pos].hash==h) return pos; + if (circuit->InputHashMap[pos].hash==0) { + fprintf(stderr, "Signal not found\n"); + assert(false); + } + pos = (pos+1)%n; + } + fprintf(stderr, "Signals not found\n"); + assert(false); + } + return pos; +} + +void Circom_CalcWit::setInputSignal(u64 h, uint i, FrElement & val){ + if (inputSignalAssignedCounter == 0) { + fprintf(stderr, "No more signals to be assigned\n"); + assert(false); + } + uint pos = getInputSignalHashPosition(h); + if (i >= circuit->InputHashMap[pos].signalsize) { + fprintf(stderr, "Input signal array access exceeds the size\n"); + assert(false); + } + + uint si = circuit->InputHashMap[pos].signalid+i; + if (inputSignalAssigned[si-get_main_input_signal_start()]) { + fprintf(stderr, "Signal assigned twice: %d\n", si); + assert(false); + } + signalValues[si] = val; + inputSignalAssigned[si-get_main_input_signal_start()] = true; + inputSignalAssignedCounter--; + if (inputSignalAssignedCounter == 0) { + run(this); + } +} + +u64 Circom_CalcWit::getInputSignalSize(u64 h) { + uint pos = getInputSignalHashPosition(h); + return circuit->InputHashMap[pos].signalsize; +} + +std::string Circom_CalcWit::getTrace(u64 id_cmp){ + if (id_cmp == 0) return componentMemory[id_cmp].componentName; + else{ + u64 id_father = componentMemory[id_cmp].idFather; + std::string my_name = componentMemory[id_cmp].componentName; + + return Circom_CalcWit::getTrace(id_father) + "." + my_name; + } + + +} + +std::string Circom_CalcWit::generate_position_array(uint* dimensions, uint size_dimensions, uint index){ + std::string positions = ""; + + for (uint i = 0 ; i < size_dimensions; i++){ + uint last_pos = index % dimensions[size_dimensions -1 - i]; + index = index / dimensions[size_dimensions -1 - i]; + std::string new_pos = "[" + std::to_string(last_pos) + "]"; + positions = new_pos + positions; + } + return positions; +} + diff --git a/code_producers/src/c_elements/bn128/calcwit.hpp b/code_producers/src/c_elements/bn128/calcwit.hpp new file mode 100644 index 00000000..8df8e98e --- /dev/null +++ b/code_producers/src/c_elements/bn128/calcwit.hpp @@ -0,0 +1,68 @@ +#ifndef CIRCOM_CALCWIT_H +#define CIRCOM_CALCWIT_H + +#include +#include +#include +#include +#include + +#include "circom.hpp" +#include "fr.hpp" + +#define NMUTEXES 12 //512 + +u64 fnv1a(std::string s); + +class Circom_CalcWit { + + bool *inputSignalAssigned; + uint inputSignalAssignedCounter; + + Circom_Circuit *circuit; + +public: + + FrElement *signalValues; + Circom_Component* componentMemory; + FrElement* circuitConstants; + std::map templateInsId2IOSignalInfo; + std::string* listOfTemplateMessages; + + // parallelism + std::mutex numThreadMutex; + std::condition_variable ntcvs; + uint numThread; + + uint maxThread; + + // Functions called by the circuit + Circom_CalcWit(Circom_Circuit *aCircuit, uint numTh = NMUTEXES); + ~Circom_CalcWit(); + + // Public functions + void setInputSignal(u64 h, uint i, FrElement &val); + + u64 getInputSignalSize(u64 h); + + inline uint getRemaingInputsToBeSet() { + return inputSignalAssignedCounter; + } + + inline void getWitness(uint idx, PFrElement val) { + Fr_copy(val, &signalValues[circuit->witness2SignalList[idx]]); + } + + std::string getTrace(u64 id_cmp); + + std::string generate_position_array(uint* dimensions, uint size_dimensions, uint index); + +private: + + uint getInputSignalHashPosition(u64 h); + +}; + +typedef void (*Circom_TemplateFunction)(uint __cIdx, Circom_CalcWit* __ctx); + +#endif // CIRCOM_CALCWIT_H diff --git a/code_producers/src/c_elements/bn128/circom.hpp b/code_producers/src/c_elements/bn128/circom.hpp new file mode 100644 index 00000000..f5a9cef5 --- /dev/null +++ b/code_producers/src/c_elements/bn128/circom.hpp @@ -0,0 +1,84 @@ +#ifndef __CIRCOM_H +#define __CIRCOM_H + +#include +#include +#include +#include +#include + +#include "fr.hpp" + +typedef unsigned long long u64; +typedef uint32_t u32; +typedef uint8_t u8; + +//only for the main inputs +struct __attribute__((__packed__)) HashSignalInfo { + u64 hash; + u64 signalid; + u64 signalsize; +}; + +struct IODef { + u32 offset; + u32 len; + u32 *lengths; +}; + +struct IODefPair { + u32 len; + IODef* defs; +}; + +struct Circom_Circuit { + // const char *P; + HashSignalInfo* InputHashMap; + u64* witness2SignalList; + FrElement* circuitConstants; + std::map templateInsId2IOSignalInfo; +}; + + +struct Circom_Component { + u32 templateId; + u64 signalStart; + u32 inputCounter; + std::string templateName; + std::string componentName; + u64 idFather; + u32* subcomponents; + bool *outputIsSet; //one for each output + std::mutex *mutexes; //one for each output + std::condition_variable *cvs; + std::thread *sbct; //subcomponent threads +}; + +/* +For every template instantiation create two functions: +- name_create +- name_run + +//PFrElement: pointer to FrElement + +Every name_run or circom_function has: +===================================== + +//array of PFrElements for auxiliars in expression computation (known size); +PFrElements expaux[]; + +//array of PFrElements for local vars (known size) +PFrElements lvar[]; + +*/ + +uint get_main_input_signal_start(); +uint get_main_input_signal_no(); +uint get_total_signal_no(); +uint get_number_of_components(); +uint get_size_of_input_hashmap(); +uint get_size_of_witness(); +uint get_size_of_constants(); +uint get_size_of_io_map(); + +#endif // __CIRCOM_H diff --git a/code_producers/src/c_elements/fr.asm b/code_producers/src/c_elements/bn128/fr.asm similarity index 100% rename from code_producers/src/c_elements/fr.asm rename to code_producers/src/c_elements/bn128/fr.asm diff --git a/code_producers/src/c_elements/fr.cpp b/code_producers/src/c_elements/bn128/fr.cpp similarity index 100% rename from code_producers/src/c_elements/fr.cpp rename to code_producers/src/c_elements/bn128/fr.cpp diff --git a/code_producers/src/c_elements/fr.hpp b/code_producers/src/c_elements/bn128/fr.hpp similarity index 100% rename from code_producers/src/c_elements/fr.hpp rename to code_producers/src/c_elements/bn128/fr.hpp diff --git a/code_producers/src/c_elements/bn128/main.cpp b/code_producers/src/c_elements/bn128/main.cpp new file mode 100644 index 00000000..92b25d47 --- /dev/null +++ b/code_producers/src/c_elements/bn128/main.cpp @@ -0,0 +1,244 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::json; + +#include "calcwit.hpp" +#include "circom.hpp" + + +#define handle_error(msg) \ + do { perror(msg); exit(EXIT_FAILURE); } while (0) + +Circom_Circuit* loadCircuit(std::string const &datFileName) { + Circom_Circuit *circuit = new Circom_Circuit; + + int fd; + struct stat sb; + + fd = open(datFileName.c_str(), O_RDONLY); + if (fd == -1) { + std::cout << ".dat file not found: " << datFileName << "\n"; + throw std::system_error(errno, std::generic_category(), "open"); + } + + if (fstat(fd, &sb) == -1) { /* To obtain file size */ + throw std::system_error(errno, std::generic_category(), "fstat"); + } + + u8* bdata = (u8*)mmap(NULL, sb.st_size, PROT_READ , MAP_PRIVATE, fd, 0); + close(fd); + + circuit->InputHashMap = new HashSignalInfo[get_size_of_input_hashmap()]; + uint dsize = get_size_of_input_hashmap()*sizeof(HashSignalInfo); + memcpy((void *)(circuit->InputHashMap), (void *)bdata, dsize); + + circuit->witness2SignalList = new u64[get_size_of_witness()]; + uint inisize = dsize; + dsize = get_size_of_witness()*sizeof(u64); + memcpy((void *)(circuit->witness2SignalList), (void *)(bdata+inisize), dsize); + + circuit->circuitConstants = new FrElement[get_size_of_constants()]; + if (get_size_of_constants()>0) { + inisize += dsize; + dsize = get_size_of_constants()*sizeof(FrElement); + memcpy((void *)(circuit->circuitConstants), (void *)(bdata+inisize), dsize); + } + + std::map templateInsId2IOSignalInfo1; + if (get_size_of_io_map()>0) { + u32 index[get_size_of_io_map()]; + inisize += dsize; + dsize = get_size_of_io_map()*sizeof(u32); + memcpy((void *)index, (void *)(bdata+inisize), dsize); + inisize += dsize; + assert(inisize % sizeof(u32) == 0); + assert(sb.st_size % sizeof(u32) == 0); + u32 dataiomap[(sb.st_size-inisize)/sizeof(u32)]; + memcpy((void *)dataiomap, (void *)(bdata+inisize), sb.st_size-inisize); + u32* pu32 = dataiomap; + + for (int i = 0; i < get_size_of_io_map(); i++) { + u32 n = *pu32; + IODefPair p; + p.len = n; + IODef defs[n]; + pu32 += 1; + for (u32 j = 0; j templateInsId2IOSignalInfo = move(templateInsId2IOSignalInfo1); + + munmap(bdata, sb.st_size); + + return circuit; +} + +void json2FrElements (json val, std::vector & vval){ + if (!val.is_array()) { + FrElement v; + std::string s; + if (val.is_string()) { + s = val.get(); + } else if (val.is_number()) { + double vd = val.get(); + std::stringstream stream; + stream << std::fixed << std::setprecision(0) << vd; + s = stream.str(); + } else { + throw new std::runtime_error("Invalid JSON type"); + } + Fr_str2element (&v, s.c_str()); + vval.push_back(v); + } else { + for (uint i = 0; i < val.size(); i++) { + json2FrElements (val[i], vval); + } + } +} + + +void loadJson(Circom_CalcWit *ctx, std::string filename) { + std::ifstream inStream(filename); + json j; + inStream >> j; + + u64 nItems = j.size(); + // printf("Items : %llu\n",nItems); + for (json::iterator it = j.begin(); it != j.end(); ++it) { + // std::cout << it.key() << " => " << it.value() << '\n'; + u64 h = fnv1a(it.key()); + std::vector v; + json2FrElements(it.value(),v); + uint signalSize = ctx->getInputSignalSize(h); + if (v.size() < signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Not enough values\n"; + throw std::runtime_error(errStrStream.str() ); + } + if (v.size() > signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Too many values\n"; + throw std::runtime_error(errStrStream.str() ); + } + for (uint i = 0; i " << Fr_element2str(&(v[i])) << '\n'; + ctx->setInputSignal(h,i,v[i]); + } catch (std::runtime_error e) { + std::ostringstream errStrStream; + errStrStream << "Error setting signal: " << it.key() << "\n" << e.what(); + throw std::runtime_error(errStrStream.str() ); + } + } + } +} + +void writeBinWitness(Circom_CalcWit *ctx, std::string wtnsFileName) { + FILE *write_ptr; + + write_ptr = fopen(wtnsFileName.c_str(),"wb"); + + fwrite("wtns", 4, 1, write_ptr); + + u32 version = 2; + fwrite(&version, 4, 1, write_ptr); + + u32 nSections = 2; + fwrite(&nSections, 4, 1, write_ptr); + + // Header + u32 idSection1 = 1; + fwrite(&idSection1, 4, 1, write_ptr); + + u32 n8 = Fr_N64*8; + + u64 idSection1length = 8 + n8; + fwrite(&idSection1length, 8, 1, write_ptr); + + fwrite(&n8, 4, 1, write_ptr); + + fwrite(Fr_q.longVal, Fr_N64*8, 1, write_ptr); + + uint Nwtns = get_size_of_witness(); + + u32 nVars = (u32)Nwtns; + fwrite(&nVars, 4, 1, write_ptr); + + // Data + u32 idSection2 = 2; + fwrite(&idSection2, 4, 1, write_ptr); + + u64 idSection2length = (u64)n8*(u64)Nwtns; + fwrite(&idSection2length, 8, 1, write_ptr); + + FrElement v; + + for (int i=0;igetWitness(i, &v); + Fr_toLongNormal(&v, &v); + fwrite(v.longVal, Fr_N64*8, 1, write_ptr); + } + fclose(write_ptr); +} + +int main (int argc, char *argv[]) { + std::string cl(argv[0]); + if (argc!=3) { + std::cout << "Usage: " << cl << " \n"; + } else { + std::string datfile = cl + ".dat"; + std::string jsonfile(argv[1]); + std::string wtnsfile(argv[2]); + + // auto t_start = std::chrono::high_resolution_clock::now(); + + Circom_Circuit *circuit = loadCircuit(datfile); + + Circom_CalcWit *ctx = new Circom_CalcWit(circuit); + + loadJson(ctx, jsonfile); + if (ctx->getRemaingInputsToBeSet()!=0) { + std::cerr << "Not all inputs have been set. Only " << get_main_input_signal_no()-ctx->getRemaingInputsToBeSet() << " out of " << get_main_input_signal_no() << std::endl; + assert(false); + } + /* + for (uint i = 0; igetWitness(i, &x); + std::cout << i << ": " << Fr_element2str(&x) << std::endl; + } + */ + + //auto t_mid = std::chrono::high_resolution_clock::now(); + //std::cout << std::chrono::duration(t_mid-t_start).count()<(t_end-t_mid).count()< std::io::Result<()> { +pub fn generate_main_cpp_file(c_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = c_folder.clone(); file_path.push("main"); @@ -683,7 +683,12 @@ pub fn generate_main_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { let file_name = file_path.to_str().unwrap(); let mut c_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("main.cpp"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/main.cpp"), + "bls12381" => include_str!("bls12381/main.cpp"), + "goldilocks" => include_str!("goldilocks/main.cpp"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -692,7 +697,7 @@ pub fn generate_main_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { Ok(()) } -pub fn generate_circom_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_circom_hpp_file(c_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = c_folder.clone(); file_path.push("circom"); @@ -700,7 +705,12 @@ pub fn generate_circom_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { let file_name = file_path.to_str().unwrap(); let mut c_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("circom.hpp"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/circom.hpp"), + "bls12381" => include_str!("bls12381/circom.hpp"), + "goldilocks" => include_str!("goldilocks/circom.hpp"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -709,7 +719,7 @@ pub fn generate_circom_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { Ok(()) } -pub fn generate_fr_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_fr_hpp_file(c_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = c_folder.clone(); file_path.push("fr"); @@ -717,7 +727,12 @@ pub fn generate_fr_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { let file_name = file_path.to_str().unwrap(); let mut c_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("fr.hpp"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/fr.hpp"), + "bls12381" => include_str!("bls12381/fr.hpp"), + "goldilocks" => include_str!("goldilocks/fr.hpp"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -726,7 +741,7 @@ pub fn generate_fr_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { Ok(()) } -pub fn generate_calcwit_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_calcwit_hpp_file(c_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = c_folder.clone(); file_path.push("calcwit"); @@ -734,7 +749,12 @@ pub fn generate_calcwit_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { let file_name = file_path.to_str().unwrap(); let mut c_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("calcwit.hpp"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/calcwit.hpp"), + "bls12381" => include_str!("bls12381/calcwit.hpp"), + "goldilocks" => include_str!("goldilocks/calcwit.hpp"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -743,7 +763,7 @@ pub fn generate_calcwit_hpp_file(c_folder: &PathBuf) -> std::io::Result<()> { Ok(()) } -pub fn generate_fr_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_fr_cpp_file(c_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = c_folder.clone(); file_path.push("fr"); @@ -751,7 +771,12 @@ pub fn generate_fr_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { let file_name = file_path.to_str().unwrap(); let mut c_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("fr.cpp"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/fr.cpp"), + "bls12381" => include_str!("bls12381/fr.cpp"), + "goldilocks" => include_str!("goldilocks/fr.cpp"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -760,7 +785,7 @@ pub fn generate_fr_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { Ok(()) } -pub fn generate_calcwit_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_calcwit_cpp_file(c_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = c_folder.clone(); file_path.push("calcwit"); @@ -768,7 +793,12 @@ pub fn generate_calcwit_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { let file_name = file_path.to_str().unwrap(); let mut c_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("calcwit.cpp"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/calcwit.cpp"), + "bls12381" => include_str!("bls12381/calcwit.cpp"), + "goldilocks" => include_str!("goldilocks/calcwit.cpp"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -777,7 +807,7 @@ pub fn generate_calcwit_cpp_file(c_folder: &PathBuf) -> std::io::Result<()> { Ok(()) } -pub fn generate_fr_asm_file(c_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_fr_asm_file(c_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = c_folder.clone(); file_path.push("fr"); @@ -785,7 +815,12 @@ pub fn generate_fr_asm_file(c_folder: &PathBuf) -> std::io::Result<()> { let file_name = file_path.to_str().unwrap(); let mut c_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("fr.asm"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/fr.asm"), + "bls12381" => include_str!("bls12381/fr.asm"), + "goldilocks" => include_str!("goldilocks/fr.asm"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -798,15 +833,21 @@ pub fn generate_make_file( c_folder: &PathBuf, run_name: &str, producer: &CProducer, + prime: &String, ) -> std::io::Result<()> { use std::io::BufWriter; - const MAKEFILE_TEMPLATE: &str = include_str!("./makefile"); + let makefile_template: &str = match prime.as_ref(){ + "bn128" => include_str!("bn128/makefile"), + "bls12381" => include_str!("bls12381/makefile"), + "goldilocks" => include_str!("goldilocks/makefile"), + _ => unreachable!(), + }; let template = handlebars::Handlebars::new(); let code = template .render_template( - MAKEFILE_TEMPLATE, + makefile_template, &json!({ "run_name": run_name, "has_parallelism": producer.has_parallelism, diff --git a/code_producers/src/c_elements/goldilocks/calcwit.cpp b/code_producers/src/c_elements/goldilocks/calcwit.cpp new file mode 100644 index 00000000..fc7ea033 --- /dev/null +++ b/code_producers/src/c_elements/goldilocks/calcwit.cpp @@ -0,0 +1,122 @@ +#include +#include +#include +#include "calcwit.hpp" + +extern void run(Circom_CalcWit* ctx); + +std::string int_to_hex( u64 i ) +{ + std::stringstream stream; + stream << "0x" + << std::setfill ('0') << std::setw(16) + << std::hex << i; + return stream.str(); +} + +u64 fnv1a(std::string s) { + u64 hash = 0xCBF29CE484222325LL; + for(char& c : s) { + hash ^= u64(c); + hash *= 0x100000001B3LL; + } + return hash; +} + +Circom_CalcWit::Circom_CalcWit (Circom_Circuit *aCircuit, uint maxTh) { + circuit = aCircuit; + inputSignalAssignedCounter = get_main_input_signal_no(); + inputSignalAssigned = new bool[inputSignalAssignedCounter]; + for (int i = 0; i< inputSignalAssignedCounter; i++) { + inputSignalAssigned[i] = false; + } + signalValues = new FrElement[get_total_signal_no()]; + Fr_str2element(&signalValues[0], "1"); + componentMemory = new Circom_Component[get_number_of_components()]; + circuitConstants = circuit ->circuitConstants; + templateInsId2IOSignalInfo = circuit -> templateInsId2IOSignalInfo; + + maxThread = maxTh; + + // parallelism + numThread = 0; + +} + +Circom_CalcWit::~Circom_CalcWit() { + // ... +} + +uint Circom_CalcWit::getInputSignalHashPosition(u64 h) { + uint n = get_size_of_input_hashmap(); + uint pos = (uint)(h % (u64)n); + if (circuit->InputHashMap[pos].hash!=h){ + uint inipos = pos; + pos++; + while (pos != inipos) { + if (circuit->InputHashMap[pos].hash==h) return pos; + if (circuit->InputHashMap[pos].hash==0) { + fprintf(stderr, "Signal not found\n"); + assert(false); + } + pos = (pos+1)%n; + } + fprintf(stderr, "Signals not found\n"); + assert(false); + } + return pos; +} + +void Circom_CalcWit::setInputSignal(u64 h, uint i, FrElement & val){ + if (inputSignalAssignedCounter == 0) { + fprintf(stderr, "No more signals to be assigned\n"); + assert(false); + } + uint pos = getInputSignalHashPosition(h); + if (i >= circuit->InputHashMap[pos].signalsize) { + fprintf(stderr, "Input signal array access exceeds the size\n"); + assert(false); + } + + uint si = circuit->InputHashMap[pos].signalid+i; + if (inputSignalAssigned[si-get_main_input_signal_start()]) { + fprintf(stderr, "Signal assigned twice: %d\n", si); + assert(false); + } + signalValues[si] = val; + inputSignalAssigned[si-get_main_input_signal_start()] = true; + inputSignalAssignedCounter--; + if (inputSignalAssignedCounter == 0) { + run(this); + } +} + +u64 Circom_CalcWit::getInputSignalSize(u64 h) { + uint pos = getInputSignalHashPosition(h); + return circuit->InputHashMap[pos].signalsize; +} + +std::string Circom_CalcWit::getTrace(u64 id_cmp){ + if (id_cmp == 0) return componentMemory[id_cmp].componentName; + else{ + u64 id_father = componentMemory[id_cmp].idFather; + std::string my_name = componentMemory[id_cmp].componentName; + + return Circom_CalcWit::getTrace(id_father) + "." + my_name; + } + + +} + +std::string Circom_CalcWit::generate_position_array(uint* dimensions, uint size_dimensions, uint index){ + std::string positions = ""; + + for (uint i = 0 ; i < size_dimensions; i++){ + uint last_pos = index % dimensions[size_dimensions -1 - i]; + index = index / dimensions[size_dimensions -1 - i]; + std::string new_pos = "[" + std::to_string(last_pos) + "]"; + positions = new_pos + positions; + } + return positions; +} + diff --git a/code_producers/src/c_elements/goldilocks/calcwit.hpp b/code_producers/src/c_elements/goldilocks/calcwit.hpp new file mode 100644 index 00000000..8df8e98e --- /dev/null +++ b/code_producers/src/c_elements/goldilocks/calcwit.hpp @@ -0,0 +1,68 @@ +#ifndef CIRCOM_CALCWIT_H +#define CIRCOM_CALCWIT_H + +#include +#include +#include +#include +#include + +#include "circom.hpp" +#include "fr.hpp" + +#define NMUTEXES 12 //512 + +u64 fnv1a(std::string s); + +class Circom_CalcWit { + + bool *inputSignalAssigned; + uint inputSignalAssignedCounter; + + Circom_Circuit *circuit; + +public: + + FrElement *signalValues; + Circom_Component* componentMemory; + FrElement* circuitConstants; + std::map templateInsId2IOSignalInfo; + std::string* listOfTemplateMessages; + + // parallelism + std::mutex numThreadMutex; + std::condition_variable ntcvs; + uint numThread; + + uint maxThread; + + // Functions called by the circuit + Circom_CalcWit(Circom_Circuit *aCircuit, uint numTh = NMUTEXES); + ~Circom_CalcWit(); + + // Public functions + void setInputSignal(u64 h, uint i, FrElement &val); + + u64 getInputSignalSize(u64 h); + + inline uint getRemaingInputsToBeSet() { + return inputSignalAssignedCounter; + } + + inline void getWitness(uint idx, PFrElement val) { + Fr_copy(val, &signalValues[circuit->witness2SignalList[idx]]); + } + + std::string getTrace(u64 id_cmp); + + std::string generate_position_array(uint* dimensions, uint size_dimensions, uint index); + +private: + + uint getInputSignalHashPosition(u64 h); + +}; + +typedef void (*Circom_TemplateFunction)(uint __cIdx, Circom_CalcWit* __ctx); + +#endif // CIRCOM_CALCWIT_H diff --git a/code_producers/src/c_elements/goldilocks/circom.hpp b/code_producers/src/c_elements/goldilocks/circom.hpp new file mode 100644 index 00000000..f5a9cef5 --- /dev/null +++ b/code_producers/src/c_elements/goldilocks/circom.hpp @@ -0,0 +1,84 @@ +#ifndef __CIRCOM_H +#define __CIRCOM_H + +#include +#include +#include +#include +#include + +#include "fr.hpp" + +typedef unsigned long long u64; +typedef uint32_t u32; +typedef uint8_t u8; + +//only for the main inputs +struct __attribute__((__packed__)) HashSignalInfo { + u64 hash; + u64 signalid; + u64 signalsize; +}; + +struct IODef { + u32 offset; + u32 len; + u32 *lengths; +}; + +struct IODefPair { + u32 len; + IODef* defs; +}; + +struct Circom_Circuit { + // const char *P; + HashSignalInfo* InputHashMap; + u64* witness2SignalList; + FrElement* circuitConstants; + std::map templateInsId2IOSignalInfo; +}; + + +struct Circom_Component { + u32 templateId; + u64 signalStart; + u32 inputCounter; + std::string templateName; + std::string componentName; + u64 idFather; + u32* subcomponents; + bool *outputIsSet; //one for each output + std::mutex *mutexes; //one for each output + std::condition_variable *cvs; + std::thread *sbct; //subcomponent threads +}; + +/* +For every template instantiation create two functions: +- name_create +- name_run + +//PFrElement: pointer to FrElement + +Every name_run or circom_function has: +===================================== + +//array of PFrElements for auxiliars in expression computation (known size); +PFrElements expaux[]; + +//array of PFrElements for local vars (known size) +PFrElements lvar[]; + +*/ + +uint get_main_input_signal_start(); +uint get_main_input_signal_no(); +uint get_total_signal_no(); +uint get_number_of_components(); +uint get_size_of_input_hashmap(); +uint get_size_of_witness(); +uint get_size_of_constants(); +uint get_size_of_io_map(); + +#endif // __CIRCOM_H diff --git a/code_producers/src/c_elements/goldilocks/fr.asm b/code_producers/src/c_elements/goldilocks/fr.asm new file mode 100644 index 00000000..3de5e241 --- /dev/null +++ b/code_producers/src/c_elements/goldilocks/fr.asm @@ -0,0 +1,5897 @@ + + + global Fr_copy + global Fr_copyn + global Fr_add + global Fr_sub + global Fr_neg + global Fr_mul + global Fr_square + global Fr_band + global Fr_bor + global Fr_bxor + global Fr_bnot + global Fr_shl + global Fr_shr + global Fr_eq + global Fr_neq + global Fr_lt + global Fr_gt + global Fr_leq + global Fr_geq + global Fr_land + global Fr_lor + global Fr_lnot + global Fr_toNormal + global Fr_toLongNormal + global Fr_toMontgomery + global Fr_toInt + global Fr_isTrue + global Fr_q + global Fr_R3 + + global Fr_rawCopy + global Fr_rawZero + global Fr_rawSwap + global Fr_rawAdd + global Fr_rawSub + global Fr_rawNeg + global Fr_rawMMul + global Fr_rawMSquare + global Fr_rawToMontgomery + global Fr_rawFromMontgomery + global Fr_rawIsEq + global Fr_rawIsZero + global Fr_rawq + global Fr_rawR3 + + extern Fr_fail + DEFAULT REL + + section .text + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; copy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawCopy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawZero +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawZero: + xor rax, rax + + mov [rdi + 0], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSwap +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rdi <= a +; rsi <= p +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawSwap: + + mov rax, [rsi + 0] + mov rcx, [rdi + 0] + mov [rdi + 0], rax + mov [rsi + 0], rbx + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; copy an array of integers +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; rdx <= number of integers to copy +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copyn: +Fr_copyn_loop: + mov r8, rsi + mov r9, rdi + mov rax, 2 + mul rdx + mov rcx, rax + cld + rep movsq + mov rsi, r8 + mov rdi, r9 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopyS2L +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= the integer +; rdi <= Pointer to the overwritted element +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; + +rawCopyS2L: + mov al, 0x80 + shl rax, 56 + mov [rdi], rax ; set the result to LONG normal + + cmp rsi, 0 + js u64toLong_adjust_neg + + mov [rdi + 8], rsi + xor rax, rax + + ret + +u64toLong_adjust_neg: + add rsi, [q] ; Set the first digit + mov [rdi + 8], rsi ; + + mov rsi, -1 ; all ones + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toInt +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= The value +;;;;;;;;;;;;;;;;;;;;;;; +Fr_toInt: + mov rax, [rdi] + bt rax, 63 + jc Fr_long + movsx rax, eax + ret + +Fr_long: + push rbp + push rsi + push rdx + mov rbp, rsp + bt rax, 62 + jnc Fr_longNormal +Fr_longMontgomery: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + +Fr_longNormal: + mov rax, [rdi + 8] + mov rcx, rax + shr rcx, 31 + jnz Fr_longNeg + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longNeg: + mov rax, [rdi + 8] + sub rax, [q] + jnc Fr_longErr + + mov rcx, rax + sar rcx, 31 + add rcx, 1 + jnz Fr_longErr + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longErr: + push rdi + mov rdi, 0 + call Fr_fail + pop rdi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +Fr_rawMMul: + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,[rcx] + mov r12,r10 + adcx r12,rax + mov r13,r10 + adcx r13,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mov r11,r10 + adcx r11,r8 + adox r11,r12 + mov r12,r10 + adcx r12,r10 + adox r12,r13 + +;comparison + test r12,r12 +jnz Fr_rawMMul_sq + cmp r11,[q + 0] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq +Fr_rawMMul_sq: + sub r11,[q +0] +Fr_rawMMul_done: + mov [rdi + 0],r11 + pop r12 + pop r13 + ret +Fr_rawMSquare: + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,rdx + mov r12,r10 + adcx r12,rax + mov r13,r10 + adcx r13,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mov r11,r10 + adcx r11,r8 + adox r11,r12 + mov r12,r10 + adcx r12,r10 + adox r12,r13 + +;comparison + test r12,r12 +jnz Fr_rawMSquare_sq + cmp r11,[q + 0] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq +Fr_rawMSquare_sq: + sub r11,[q +0] +Fr_rawMSquare_done: + mov [rdi + 0],r11 + pop r12 + pop r13 + ret +Fr_rawMMul1: + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,rcx + mulx rax,r11,[rsi] + mov r12,r10 + adcx r12,rax + mov r13,r10 + adcx r13,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mov r11,r10 + adcx r11,r8 + adox r11,r12 + mov r12,r10 + adcx r12,r10 + adox r12,r13 + +;comparison + test r12,r12 +jnz Fr_rawMMul1_sq + cmp r11,[q + 0] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq +Fr_rawMMul1_sq: + sub r11,[q +0] +Fr_rawMMul1_done: + mov [rdi + 0],r11 + pop r12 + pop r13 + ret +Fr_rawFromMontgomery: + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov r11,[rsi +0] + mov r12,r10 + mov r13,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mov r11,r10 + adcx r11,r8 + adox r11,r12 + mov r12,r10 + adcx r12,r10 + adox r12,r13 + +;comparison + test r12,r12 +jnz Fr_rawFromMontgomery_sq + cmp r11,[q + 0] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq +Fr_rawFromMontgomery_sq: + sub r11,[q +0] +Fr_rawFromMontgomery_done: + mov [rdi + 0],r11 + pop r12 + pop r13 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawToMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Pointer destination element +; rsi <= Pointer to src element +;;;;;;;;;;;;;;;;;;;; +Fr_rawToMontgomery: + push rdx + lea rdx, [R2] + call Fr_rawMMul + pop rdx + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Destination +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toMontgomery: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jc toMontgomery_doNothing + bt rax, 63 + jc toMontgomeryLong + +toMontgomeryShort: + movsx rdx, eax + mov [rdi], rdx + add rdi, 8 + lea rsi, [R2] + cmp rdx, 0 + js negMontgomeryShort +posMontgomeryShort: + call Fr_rawMMul1 + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + +negMontgomeryShort: + neg rdx ; Do the multiplication positive and then negate the result. + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomeryLong: + mov [rdi], rax + add rdi, 8 + add rsi, 8 + lea rdx, [R2] + call Fr_rawMMul + sub rsi, 8 + sub rdi, 8 + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomery_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number from Montgomery +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toNormal: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jnc toNormal_doNothing + bt rax, 63 ; if short, it means it's converted + jnc toNormal_doNothing + +toNormalLong: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toNormal_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toLongNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to long normal +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toLongNormal: + mov rax, [rsi] + bt rax, 63 ; check if long + jnc toLongNormal_fromShort + bt rax, 62 ; check if montgomery + jc toLongNormal_fromMontgomery + call Fr_copy ; It is already long + ret + +toLongNormal_fromMontgomery: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toLongNormal_fromShort: + mov r8, rsi ; save rsi + movsx rsi, eax + call rawCopyS2L + mov rsi, r8 ; recover rsi + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; add +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_add: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc add_l1 + bt rcx, 63 ; Check if is short second operand + jc add_s1l2 + +add_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + add edx, ecx + jo add_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + add rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_l1: + bt rcx, 63 ; Check if is short second operand + jc add_l1l2 + +;;;;;;;; +add_l1s2: + bt rax, 62 ; check if montgomery first + jc add_l1ms2 +add_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_1 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_1: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +add_l1ms2: + bt rcx, 62 ; check if montgomery second + jc add_l1ms2m +add_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +add_s1l2: + bt rcx, 62 ; check if montgomery second + jc add_s1l2m +add_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + cmp rdx, 0 + + jns tmp_2 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_2: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1l2m: + bt rax, 62 ; check if montgomery first + jc add_s1ml2m +add_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +add_l1l2: + bt rax, 62 ; check if montgomery first + jc add_l1ml2 +add_l1nl2: + bt rcx, 62 ; check if montgomery second + jc add_l1nl2m +add_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc add_l1ml2m +add_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLL +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rsi <= Pointer to the long data of element 1 +; rdx <= Pointer to the long data of element 2 +; rdi <= Pointer to the long data of result +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawAddLL: +Fr_rawAdd: + ; Add component by component with carry + + mov rax, [rsi + 0] + add rax, [rdx + 0] + mov [rdi + 0], rax + + jc rawAddLL_sq ; if overflow, substract q + + ; Compare with q + + + cmp rax, [q + 0] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + ; If equal substract q +rawAddLL_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + +rawAddLL_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLS +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; rdx <= Value to be added +;;;;;;;;;;;;;;;;;;;;;; +rawAddLS: + ; Add component by component with carry + + add rdx, [rsi] + mov [rdi] ,rdx + + jc rawAddLS_sq ; if overflow, substract q + + ; Compare with q + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + ; If equal substract q +rawAddLS_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + +rawAddLS_done: + ret + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; sub +;;;;;;;;;;;;;;;;;;;;;; +; Substracts two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_sub: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is long first operand + jc sub_l1 + bt rcx, 63 ; Check if is long second operand + jc sub_s1l2 + +sub_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + sub edx, ecx + jo sub_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + sub rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_l1: + bt rcx, 63 ; Check if is short second operand + jc sub_l1l2 + +;;;;;;;; +sub_l1s2: + bt rax, 62 ; check if montgomery first + jc sub_l1ms2 +sub_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_3 + neg rdx + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_3: + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2: + bt rcx, 62 ; check if montgomery second + jc sub_l1ms2m +sub_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +sub_s1l2: + bt rcx, 62 ; check if montgomery first + jc sub_s1l2m +sub_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + cmp eax, 0 + + js tmp_4 + + ; First Operand is positive + push rsi + add rdi, 8 + movsx rsi, eax + add rdx, 8 + call rawSubSL + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_4: ; First operand is negative + push rsi + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + neg rdx + call rawNegLS + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1l2m: + bt rax, 62 ; check if montgomery second + jc sub_s1ml2m +sub_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +sub_l1l2: + bt rax, 62 ; check if montgomery first + jc sub_l1ml2 +sub_l1nl2: + bt rcx, 62 ; check if montgomery second + jc sub_l1nl2m +sub_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc sub_l1ml2m +sub_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a short element from the long element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 where will be substracted +; rdx <= Value to be substracted +; [rdi] = [rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLS: + ; Substract first digit + + mov rax, [rsi] + sub rax, rdx + mov [rdi] ,rax + mov rdx, 0 + + jnc rawSubLS_done ; if overflow, add q + + ; Add q +rawSubLS_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + +rawSubLS_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubSL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Value from where will bo substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = rsi - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubSL: + ; Substract first digit + sub rsi, [rdx] + mov [rdi] ,rsi + + + jnc rawSubSL_done ; if overflow, add q + + ; Add q +rawSubSL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + +rawSubSL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = [rsi] - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLL: +Fr_rawSub: + ; Substract first digit + + mov rax, [rsi + 0] + sub rax, [rdx + 0] + mov [rdi + 0], rax + + jnc rawSubLL_done ; if overflow, add q + + ; Add q +rawSubLL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + +rawSubLL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawNegLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element and a short element form 0 +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= short value to be substracted too +; +; [rdi] = -[rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawNegLS: + mov rax, [q] + sub rax, rdx + mov [rdi], rax + + setc dl + + + mov rax, [rdi + 0 ] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + + setc dh + or dl, dh + jz rawNegSL_done + + ; it is a negative value, so add q + + mov rax, [q + 0] + add [rdi + 0], rax + + +rawNegSL_done: + ret + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; neg +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element to be negated +; rdi <= Pointer to result +; [rdi] = -[rsi] +;;;;;;;;;;;;;;;;;;;;;; +Fr_neg: + mov rax, [rsi] + bt rax, 63 ; Check if is short first operand + jc neg_l + +neg_s: ; Operand is short + + neg eax + jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case) + + mov [rdi], rax ; not necessary to adjust so just save and return + ret + +neg_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + neg rsi + call rawCopyS2L + pop rsi + ret + + + +neg_l: + mov [rdi], rax ; Copy the type + + add rdi, 8 + add rsi, 8 + call rawNegL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawNeg +;;;;;;;;;;;;;;;;;;;;;; +; Negates a value +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; +; [rdi] = - [rsi] +;;;;;;;;;;;;;;;;;;;;;; +rawNegL: +Fr_rawNeg: + ; Compare is zero + + xor rax, rax + + cmp [rsi + 0], rax + jnz doNegate + + ; it's zero so just set to zero + + mov [rdi + 0], rax + + ret +doNegate: + + mov rax, [q + 0] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; square +;;;;;;;;;;;;;;;;;;;;;; +; Squares a field element +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rsi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_square: + mov r8, [rsi] + bt r8, 63 ; Check if is short first operand + jc square_l1 + +square_s1: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul eax + jo square_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +square_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + imul rax + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +square_l1: + bt r8, 62 ; check if montgomery first + jc square_l1m +square_l1n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +square_l1m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; mul +;;;;;;;;;;;;;;;;;;;;;; +; Multiplies two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rdi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_mul: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc mul_l1 + bt r9, 63 ; Check if is short second operand + jc mul_s1l2 + +mul_s1s2: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul r9d + jo mul_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +mul_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + movsx rcx, r9d + imul rcx + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +mul_l1: + bt r9, 63 ; Check if is short second operand + jc mul_l1l2 + +;;;;;;;; +mul_l1s2: + bt r8, 62 ; check if montgomery first + jc mul_l1ms2 +mul_l1ns2: + bt r9, 62 ; check if montgomery first + jc mul_l1ns2m +mul_l1ns2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_5 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_6 +tmp_5: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_6: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + + +mul_l1ns2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +mul_l1ms2: + bt r9, 62 ; check if montgomery second + jc mul_l1ms2m +mul_l1ms2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_7 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_8 +tmp_7: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_8: + + + ret + +mul_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +;;;;;;;; +mul_s1l2: + bt r8, 62 ; check if montgomery first + jc mul_s1ml2 +mul_s1nl2: + bt r9, 62 ; check if montgomery first + jc mul_s1nl2m +mul_s1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_9 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_10 +tmp_9: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_10: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_s1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_11 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_12 +tmp_11: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_12: + + + ret + +mul_s1ml2: + bt r9, 62 ; check if montgomery first + jc mul_s1ml2m +mul_s1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +;;;; +mul_l1l2: + bt r8, 62 ; check if montgomery first + jc mul_l1ml2 +mul_l1nl2: + bt r9, 62 ; check if montgomery second + jc mul_l1nl2m +mul_l1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2: + bt r9, 62 ; check if montgomery seconf + jc mul_l1ml2m +mul_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; band +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_band: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc and_l1 + bt rcx, 63 ; Check if is short second operand + jc and_s1l2 + +and_s1s2: + + cmp eax, 0 + + js tmp_13 + + cmp ecx, 0 + js tmp_13 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + and edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_13: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + ; If equal substract q +tmp_14: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_15: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +and_l1: + bt rcx, 63 ; Check if is short second operand + jc and_l1l2 + + +and_l1s2: + bt rax, 62 ; check if montgomery first + jc and_l1ms2 +and_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_16 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + ; If equal substract q +tmp_17: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_18: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_16: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + ; If equal substract q +tmp_19: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_20: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_21 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + ; If equal substract q +tmp_22: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_23: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_21: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + ; If equal substract q +tmp_24: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_25: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_s1l2: + bt rcx, 62 ; check if montgomery first + jc and_s1l2m +and_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_26 + and rax, [rdx +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + ; If equal substract q +tmp_27: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_28: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_26: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + ; If equal substract q +tmp_29: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_30: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_31 + and rax, [rdx +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + ; If equal substract q +tmp_32: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_33: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_31: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + ; If equal substract q +tmp_34: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_35: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_l1l2: + bt rax, 62 ; check if montgomery first + jc and_l1ml2 + bt rcx, 62 ; check if montgomery first + jc and_l1nl2m +and_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + ; If equal substract q +tmp_36: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_37: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + ; If equal substract q +tmp_38: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_39: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2: + bt rcx, 62 ; check if montgomery first + jc and_l1ml2m +and_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + ; If equal substract q +tmp_40: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_41: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + ; If equal substract q +tmp_42: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_43: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc or_l1 + bt rcx, 63 ; Check if is short second operand + jc or_s1l2 + +or_s1s2: + + cmp eax, 0 + + js tmp_44 + + cmp ecx, 0 + js tmp_44 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + or edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_44: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + ; If equal substract q +tmp_45: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_46: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +or_l1: + bt rcx, 63 ; Check if is short second operand + jc or_l1l2 + + +or_l1s2: + bt rax, 62 ; check if montgomery first + jc or_l1ms2 +or_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_47 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + ; If equal substract q +tmp_48: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_49: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_47: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + ; If equal substract q +tmp_50: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_51: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_52 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + ; If equal substract q +tmp_53: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_54: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_52: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + ; If equal substract q +tmp_55: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_56: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_s1l2: + bt rcx, 62 ; check if montgomery first + jc or_s1l2m +or_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_57 + or rax, [rdx +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + ; If equal substract q +tmp_58: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_59: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_57: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + ; If equal substract q +tmp_60: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_61: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_62 + or rax, [rdx +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + ; If equal substract q +tmp_63: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_64: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_62: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + ; If equal substract q +tmp_65: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_66: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_l1l2: + bt rax, 62 ; check if montgomery first + jc or_l1ml2 + bt rcx, 62 ; check if montgomery first + jc or_l1nl2m +or_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + ; If equal substract q +tmp_67: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_68: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + ; If equal substract q +tmp_69: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_70: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2: + bt rcx, 62 ; check if montgomery first + jc or_l1ml2m +or_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + ; If equal substract q +tmp_71: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_72: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + ; If equal substract q +tmp_73: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_74: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bxor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bxor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc xor_l1 + bt rcx, 63 ; Check if is short second operand + jc xor_s1l2 + +xor_s1s2: + + cmp eax, 0 + + js tmp_75 + + cmp ecx, 0 + js tmp_75 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + xor edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_75: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + ; If equal substract q +tmp_76: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_77: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +xor_l1: + bt rcx, 63 ; Check if is short second operand + jc xor_l1l2 + + +xor_l1s2: + bt rax, 62 ; check if montgomery first + jc xor_l1ms2 +xor_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_78 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + ; If equal substract q +tmp_79: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_80: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_78: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + ; If equal substract q +tmp_81: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_82: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_83 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + ; If equal substract q +tmp_84: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_85: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_83: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + ; If equal substract q +tmp_86: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_87: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_s1l2: + bt rcx, 62 ; check if montgomery first + jc xor_s1l2m +xor_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_88 + xor rax, [rdx +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + ; If equal substract q +tmp_89: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_90: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_88: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + ; If equal substract q +tmp_91: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_92: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_93 + xor rax, [rdx +8] + mov [rdi+8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + ; If equal substract q +tmp_94: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_95: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_93: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + ; If equal substract q +tmp_96: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_97: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_l1l2: + bt rax, 62 ; check if montgomery first + jc xor_l1ml2 + bt rcx, 62 ; check if montgomery first + jc xor_l1nl2m +xor_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + ; If equal substract q +tmp_98: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_99: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + ; If equal substract q +tmp_100: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_101: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2: + bt rcx, 62 ; check if montgomery first + jc xor_l1ml2m +xor_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + ; If equal substract q +tmp_102: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_103: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + and rax, [lboMask] + + mov [rdi + 8 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + ; If equal substract q +tmp_104: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_105: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; bnot +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bnot: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rax, [rsi] + bt rax, 63 ; Check if is long operand + jc bnot_l1 +bnot_s: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp bnot_l1n + +bnot_l1: + bt rax, 62 ; check if montgomery first + jnc bnot_l1n + +bnot_l1m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + +bnot_l1n: + + mov rax, [rsi + 8] + not rax + + and rax, [lboMask] + + mov [rdi + 8], rax + + + + + + ; Compare with q + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + ; If equal substract q +tmp_106: + + mov rax, [q + 0] + sub [rdi + 8], rax + +tmp_107: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +rawShr: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 64 + jae Fr_rawZero + +rawShr_nz: + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShr_aligned + mov ch, 64 + sub ch, cl + + mov r9, 1 + rol cx, 8 + shl r9, cl + rol cx, 8 + sub r9, 1 + mov r10, r9 + not r10 + + + cmp r8, 0 + jae rawShr_if2_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 8 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_if2_0: + jne rawShr_else_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif_0: + + + ret + +rawShr_aligned: + + cmp r8, 0 + ja rawShr_if3_0 + mov rax, [rsi + r8*8 + 0 ] + mov [rdi + 0], rax + jmp rawShr_endif3_0 +rawShr_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif3_0: + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +rawShl: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 64 + jae Fr_rawZero + + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShl_aligned + mov ch, 64 + sub ch, cl + + + mov r10, 1 + shl r10, cl + sub r10, 1 + mov r9, r10 + not r9 + + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 0 + jae rawShl_if2_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + -8 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + and rax, [lboMask] + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_if2_0: + jne rawShl_else_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + + and rax, [lboMask] + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif_0: + + + + + + + ; Compare with q + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + ; If equal substract q +tmp_108: + + mov rax, [q + 0] + sub [rdi + 0], rax + +tmp_109: + + ret; + +rawShl_aligned: + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 0 + ja rawShl_if3_0 + mov rax, [rdx + 0 ] + + and rax, [lboMask] + + mov [rdi + 0], rax + jmp rawShl_endif3_0 +rawShl_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif3_0: + + + + + + ; Compare with q + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + ; If equal substract q +tmp_110: + + mov rax, [q + 0] + sub [rdi + 0], rax + +tmp_111: + + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shr: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_112 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_113 + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_113: + mov rcx, [rdx + 8] + cmp rcx, 64 + jae tmp_114 + xor rax, rax + + mov rdx, rcx + jmp do_shr + +tmp_114: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 64 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rdx, rcx + jmp do_shl + +tmp_112: + cmp ecx, 0 + jl tmp_115 + cmp ecx, 64 + jae setzero + movsx rdx, ecx + jmp do_shr +tmp_115: + neg ecx + cmp ecx, 64 + jae setzero + movsx rdx, ecx + jmp do_shl + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shl: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_116 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_117 + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_117: + mov rcx, [rdx + 8] + cmp rcx, 64 + jae tmp_118 + xor rax, rax + + mov rdx, rcx + jmp do_shl + +tmp_118: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 64 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rdx, rcx + jmp do_shr + +tmp_116: + cmp ecx, 0 + jl tmp_119 + cmp ecx, 64 + jae setzero + movsx rdx, ecx + jmp do_shl +tmp_119: + neg ecx + cmp ecx, 64 + jae setzero + movsx rdx, ecx + jmp do_shr + + + +;;;;;;;;;; +;;; doShl +;;;;;;;;;; +do_shl: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shll +do_shls: + + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shlcl + + cmp rdx, 31 + jae do_shlcl + + mov cl, dl + shl rax, cl + mov rcx, rax + shr rcx, 31 + jnz do_shlcl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shlcl: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp do_shlln + +do_shll: + bt rcx, 62 ; Check if is short second operand + jnc do_shlln + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shlln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShl + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + +;;;;;;;;;; +;;; doShr +;;;;;;;;;; +do_shr: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shrl +do_shrs: + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shrcl + + cmp rdx, 31 + jae setzero + + mov cl, dl + shr rax, cl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shrcl: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + +do_shrl: + bt rcx, 62 ; Check if is short second operand + jnc do_shrln + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shrln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShr + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +setzero: + xor rax, rax + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rgt - Raw Greater Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rgt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rgt_l1 + bt r9, 63 ; Check if is short second operand + jc rgt_s1l2 + +rgt_s1s2: ; Both operands are short + cmp r8d, r9d + jg rgt_ret1 + jmp rgt_ret0 + + +rgt_l1: + bt r9, 63 ; Check if is short second operand + jc rgt_l1l2 + +;;;;;;;; +rgt_l1s2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ms2 +rgt_l1ns2: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ms2: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + + +;;;;;;;; +rgt_s1l2: + bt r9, 62 ; check if montgomery second + jc rgt_s1l2m +rgt_s1l2n: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_s1l2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +;;;; +rgt_l1l2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ml2 +rgt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rgt_l1nl2m +rgt_l1nl2n: + jmp rgtL1L2 + +rgt_l1nl2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rgt_l1ml2m +rgt_l1ml2n: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_l1ml2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + + +;;;;;; +; rgtL1L2 +;;;;;; + +rgtL1L2: + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtl1l2_p1 + + + +rgtl1l2_p1: + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtRawL1L2 + + + + +rgtl1l2_n1: + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rgt_ret0 + + + + + +rgtRawL1L2: + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + + +rgt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rgt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rlt - Raw Less Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rlt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rlt_l1 + bt r9, 63 ; Check if is short second operand + jc rlt_s1l2 + +rlt_s1s2: ; Both operands are short + cmp r8d, r9d + jl rlt_ret1 + jmp rlt_ret0 + + +rlt_l1: + bt r9, 63 ; Check if is short second operand + jc rlt_l1l2 + +;;;;;;;; +rlt_l1s2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ms2 +rlt_l1ns2: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ms2: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + + +;;;;;;;; +rlt_s1l2: + bt r9, 62 ; check if montgomery second + jc rlt_s1l2m +rlt_s1l2n: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_s1l2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +;;;; +rlt_l1l2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ml2 +rlt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rlt_l1nl2m +rlt_l1nl2n: + jmp rltL1L2 + +rlt_l1nl2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rlt_l1ml2m +rlt_l1ml2n: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_l1ml2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + + +;;;;;; +; rltL1L2 +;;;;;; + +rltL1L2: + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rltl1l2_p1 + + + +rltl1l2_p1: + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jmp rltRawL1L2 + + + + +rltl1l2_n1: + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rlt_ret1 + + + + + +rltRawL1L2: + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + +rlt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rlt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; req - Raw Eq +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi == *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_req: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc req_l1 + bt r9, 63 ; Check if is short second operand + jc req_s1l2 + +req_s1s2: ; Both operands are short + cmp r8d, r9d + je req_ret1 + jmp req_ret0 + + +req_l1: + bt r9, 63 ; Check if is short second operand + jc req_l1l2 + +;;;;;;;; +req_l1s2: + bt r8, 62 ; check if montgomery first + jc req_l1ms2 +req_l1ns2: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ms2: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + + +;;;;;;;; +req_s1l2: + bt r9, 62 ; check if montgomery second + jc req_s1l2m +req_s1l2n: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_s1l2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +;;;; +req_l1l2: + bt r8, 62 ; check if montgomery first + jc req_l1ml2 +req_l1nl2: + bt r9, 62 ; check if montgomery second + jc req_l1nl2m +req_l1nl2n: + jmp reqL1L2 + +req_l1nl2m: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_l1ml2: + bt r9, 62 ; check if montgomery second + jc req_l1ml2m +req_l1ml2n: + + mov r8, rdi + sub rsp, 16 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ml2m: + jmp reqL1L2 + + +;;;;;; +; eqL1L2 +;;;;;; + +reqL1L2: + + mov rax, [rsi + 8] + cmp [rdx + 8], rax + jne req_ret0 ; rsi 1st > 2nd + + +req_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +req_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; gt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_gt: + call Fr_rgt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; lt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_lt: + call Fr_rlt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; eq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_eq: + call Fr_req + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; neq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_neq: + call Fr_req + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; geq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_geq: + call Fr_rlt + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; leq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_leq: + call Fr_rgt + xor rax, 1 + mov [rdi], rax + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsEq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; rsi <= Pointer to element 2 +; Returns +; ax <= 1 if are equal 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsEq: + + mov rax, [rsi + 0] + cmp [rdi + 0], rax + jne rawIsEq_ret0 + +rawIsEq_ret1: + mov rax, 1 + ret + +rawIsEq_ret0: + xor rax, rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsZero +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; Returns +; ax <= 1 if is 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsZero: + + cmp qword [rdi + 0], $0 + jne rawIsZero_ret0 + + +rawIsZero_ret1: + mov rax, 1 + ret + +rawIsZero_ret0: + xor rax, rax + ret + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; land +;;;;;;;;;;;;;;;;;;;;;; +; Logical and between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_land: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_120 + + test eax, eax + jz retZero_122 + jmp retOne_121 + +tmp_120: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_121 + + +retZero_122: + mov qword r8, 0 + jmp done_123 + +retOne_121: + mov qword r8, 1 + +done_123: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_124 + + test eax, eax + jz retZero_126 + jmp retOne_125 + +tmp_124: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_125 + + +retZero_126: + mov qword rcx, 0 + jmp done_127 + +retOne_125: + mov qword rcx, 1 + +done_127: + + and rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lor +;;;;;;;;;;;;;;;;;;;;;; +; Logical or between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lor: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_128 + + test eax, eax + jz retZero_130 + jmp retOne_129 + +tmp_128: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_129 + + +retZero_130: + mov qword r8, 0 + jmp done_131 + +retOne_129: + mov qword r8, 1 + +done_131: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_132 + + test eax, eax + jz retZero_134 + jmp retOne_133 + +tmp_132: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_133 + + +retZero_134: + mov qword rcx, 0 + jmp done_135 + +retOne_133: + mov qword rcx, 1 + +done_135: + + or rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lnot +;;;;;;;;;;;;;;;;;;;;;; +; Do the logical not of an element +; Params: +; rsi <= Pointer to element to be tested +; rdi <= Pointer to result one if element1 is zero and zero otherwise +; Modified Registers: +; rax, rax, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lnot: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_136 + + test eax, eax + jz retZero_138 + jmp retOne_137 + +tmp_136: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_137 + + +retZero_138: + mov qword rcx, 0 + jmp done_139 + +retOne_137: + mov qword rcx, 1 + +done_139: + + test rcx, rcx + + jz lnot_retOne +lnot_retZero: + mov qword [rdi], 0 + ret +lnot_retOne: + mov qword [rdi], 1 + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; isTrue +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= 1 if true 0 if false +;;;;;;;;;;;;;;;;;;;;;;; +Fr_isTrue: + + + + + + + mov rax, [rdi] + bt rax, 63 + jc tmp_140 + + test eax, eax + jz retZero_142 + jmp retOne_141 + +tmp_140: + + mov rax, [rdi + 8] + test rax, rax + jnz retOne_141 + + +retZero_142: + mov qword rax, 0 + jmp done_143 + +retOne_141: + mov qword rax, 1 + +done_143: + + ret + + + + + + section .data +Fr_q: + dd 0 + dd 0x80000000 +Fr_rawq: +q dq 0xffffffff00000001 +half dq 0x7fffffff80000000 +R2 dq 0xfffffffe00000001 +Fr_R3: + dd 0 + dd 0x80000000 +Fr_rawR3: +R3 dq 0x0000000000000001 +lboMask dq 0xffffffffffffffff +np dq 0xfffffffeffffffff + diff --git a/code_producers/src/c_elements/goldilocks/fr.cpp b/code_producers/src/c_elements/goldilocks/fr.cpp new file mode 100644 index 00000000..39f5257d --- /dev/null +++ b/code_producers/src/c_elements/goldilocks/fr.cpp @@ -0,0 +1,321 @@ +#include "fr.hpp" +#include +#include +#include +#include +#include + + +static mpz_t q; +static mpz_t zero; +static mpz_t one; +static mpz_t mask; +static size_t nBits; +static bool initialized = false; + + +void Fr_toMpz(mpz_t r, PFrElement pE) { + FrElement tmp; + Fr_toNormal(&tmp, pE); + if (!(tmp.type & Fr_LONG)) { + mpz_set_si(r, tmp.shortVal); + if (tmp.shortVal<0) { + mpz_add(r, r, q); + } + } else { + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } +} + +void Fr_fromMpz(PFrElement pE, mpz_t v) { + if (mpz_fits_sint_p(v)) { + pE->type = Fr_SHORT; + pE->shortVal = mpz_get_si(v); + } else { + pE->type = Fr_LONG; + for (int i=0; ilongVal[i] = 0; + mpz_export((void *)(pE->longVal), NULL, -1, 8, -1, 0, v); + } +} + + +bool Fr_init() { + if (initialized) return false; + initialized = true; + mpz_init(q); + mpz_import(q, Fr_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + mpz_init_set_ui(zero, 0); + mpz_init_set_ui(one, 1); + nBits = mpz_sizeinbase (q, 2); + mpz_init(mask); + mpz_mul_2exp(mask, one, nBits); + mpz_sub(mask, mask, one); + return true; +} + +void Fr_str2element(PFrElement pE, char const *s) { + mpz_t mr; + mpz_init_set_str(mr, s, 10); + mpz_fdiv_r(mr, mr, q); + Fr_fromMpz(pE, mr); + mpz_clear(mr); +} + +char *Fr_element2str(PFrElement pE) { + FrElement tmp; + mpz_t r; + if (!(pE->type & Fr_LONG)) { + if (pE->shortVal>=0) { + char *r = new char[32]; + sprintf(r, "%d", pE->shortVal); + return r; + } else { + mpz_init_set_si(r, pE->shortVal); + mpz_add(r, r, q); + } + } else { + Fr_toNormal(&tmp, pE); + mpz_init(r); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } + char *res = mpz_get_str (0, 10, r); + mpz_clear(r); + return res; +} + +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + // char *s1 = mpz_get_str (0, 10, ma); + // printf("s1 %s\n", s1); + Fr_toMpz(mb, b); + // char *s2 = mpz_get_str (0, 10, mb); + // printf("s2 %s\n", s2); + mpz_fdiv_q(mr, ma, mb); + // char *sr = mpz_get_str (0, 10, mr); + // printf("r %s\n", sr); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_mod(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_fdiv_r(mr, ma, mb); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_pow(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_powm(mr, ma, mb, q); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_inv(PFrElement r, PFrElement a) { + mpz_t ma; + mpz_t mr; + mpz_init(ma); + mpz_init(mr); + + Fr_toMpz(ma, a); + mpz_invert(mr, ma, q); + Fr_fromMpz(r, mr); + mpz_clear(ma); + mpz_clear(mr); +} + +void Fr_div(PFrElement r, PFrElement a, PFrElement b) { + FrElement tmp; + Fr_inv(&tmp, b); + Fr_mul(r, a, &tmp); +} + +void Fr_fail() { + assert(false); +} + + +RawFr::RawFr() { + Fr_init(); + set(fZero, 0); + set(fOne, 1); + neg(fNegOne, fOne); +} + +RawFr::~RawFr() { +} + +void RawFr::fromString(Element &r, const std::string &s, uint32_t radix) { + mpz_t mr; + mpz_init_set_str(mr, s.c_str(), radix); + mpz_fdiv_r(mr, mr, q); + for (int i=0; i>3] & (1 << (p & 0x7))) +void RawFr::exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize) { + bool oneFound = false; + Element copyBase; + copy(copyBase, base); + for (int i=scalarSize*8-1; i>=0; i--) { + if (!oneFound) { + if ( !BIT_IS_SET(scalar, i) ) continue; + copy(r, copyBase); + oneFound = true; + continue; + } + square(r, r); + if ( BIT_IS_SET(scalar, i) ) { + mul(r, r, copyBase); + } + } + if (!oneFound) { + copy(r, fOne); + } +} + +void RawFr::toMpz(mpz_t r, const Element &a) { + Element tmp; + Fr_rawFromMontgomery(tmp.v, a.v); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.v); +} + +void RawFr::fromMpz(Element &r, const mpz_t a) { + for (int i=0; i +#include +#include + +#define Fr_N64 1 +#define Fr_SHORT 0x00000000 +#define Fr_LONG 0x80000000 +#define Fr_LONGMONTGOMERY 0xC0000000 +typedef uint64_t FrRawElement[Fr_N64]; +typedef struct __attribute__((__packed__)) { + int32_t shortVal; + uint32_t type; + FrRawElement longVal; +} FrElement; +typedef FrElement *PFrElement; +extern FrElement Fr_q; +extern FrElement Fr_R3; +extern FrRawElement Fr_rawq; +extern FrRawElement Fr_rawR3; + +extern "C" void Fr_copy(PFrElement r, PFrElement a); +extern "C" void Fr_copyn(PFrElement r, PFrElement a, int n); +extern "C" void Fr_add(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_sub(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neg(PFrElement r, PFrElement a); +extern "C" void Fr_mul(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_square(PFrElement r, PFrElement a); +extern "C" void Fr_band(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bxor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bnot(PFrElement r, PFrElement a); +extern "C" void Fr_shl(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_shr(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_eq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_gt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_leq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_geq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_land(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lnot(PFrElement r, PFrElement a); +extern "C" void Fr_toNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toLongNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toMontgomery(PFrElement r, PFrElement a); + +extern "C" int Fr_isTrue(PFrElement pE); +extern "C" int Fr_toInt(PFrElement pE); + +extern "C" void Fr_rawCopy(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA); +extern "C" void Fr_rawAdd(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawSub(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawNeg(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawMSquare(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul1(FrRawElement pRawResult, const FrRawElement pRawA, uint64_t pRawB); +extern "C" void Fr_rawToMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" void Fr_rawFromMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" int Fr_rawIsEq(const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" int Fr_rawIsZero(const FrRawElement pRawB); + +extern "C" void Fr_fail(); + + +// Pending functions to convert + +void Fr_str2element(PFrElement pE, char const*s); +char *Fr_element2str(PFrElement pE); +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b); +void Fr_mod(PFrElement r, PFrElement a, PFrElement b); +void Fr_inv(PFrElement r, PFrElement a); +void Fr_div(PFrElement r, PFrElement a, PFrElement b); +void Fr_pow(PFrElement r, PFrElement a, PFrElement b); + +class RawFr { + +public: + const static int N64 = Fr_N64; + const static int MaxBits = 64; + + + struct Element { + FrRawElement v; + }; + +private: + Element fZero; + Element fOne; + Element fNegOne; + +public: + + RawFr(); + ~RawFr(); + + const Element &zero() { return fZero; }; + const Element &one() { return fOne; }; + const Element &negOne() { return fNegOne; }; + Element set(int value); + void set(Element &r, int value); + + void fromString(Element &r, const std::string &n, uint32_t radix = 10); + std::string toString(const Element &a, uint32_t radix = 10); + + void inline copy(Element &r, const Element &a) { Fr_rawCopy(r.v, a.v); }; + void inline swap(Element &a, Element &b) { Fr_rawSwap(a.v, b.v); }; + void inline add(Element &r, const Element &a, const Element &b) { Fr_rawAdd(r.v, a.v, b.v); }; + void inline sub(Element &r, const Element &a, const Element &b) { Fr_rawSub(r.v, a.v, b.v); }; + void inline mul(Element &r, const Element &a, const Element &b) { Fr_rawMMul(r.v, a.v, b.v); }; + + Element inline add(const Element &a, const Element &b) { Element r; Fr_rawAdd(r.v, a.v, b.v); return r;}; + Element inline sub(const Element &a, const Element &b) { Element r; Fr_rawSub(r.v, a.v, b.v); return r;}; + Element inline mul(const Element &a, const Element &b) { Element r; Fr_rawMMul(r.v, a.v, b.v); return r;}; + + Element inline neg(const Element &a) { Element r; Fr_rawNeg(r.v, a.v); return r; }; + Element inline square(const Element &a) { Element r; Fr_rawMSquare(r.v, a.v); return r; }; + + Element inline add(int a, const Element &b) { return add(set(a), b);}; + Element inline sub(int a, const Element &b) { return sub(set(a), b);}; + Element inline mul(int a, const Element &b) { return mul(set(a), b);}; + + Element inline add(const Element &a, int b) { return add(a, set(b));}; + Element inline sub(const Element &a, int b) { return sub(a, set(b));}; + Element inline mul(const Element &a, int b) { return mul(a, set(b));}; + + void inline mul1(Element &r, const Element &a, uint64_t b) { Fr_rawMMul1(r.v, a.v, b); }; + void inline neg(Element &r, const Element &a) { Fr_rawNeg(r.v, a.v); }; + void inline square(Element &r, const Element &a) { Fr_rawMSquare(r.v, a.v); }; + void inv(Element &r, const Element &a); + void div(Element &r, const Element &a, const Element &b); + void exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize); + + void inline toMontgomery(Element &r, const Element &a) { Fr_rawToMontgomery(r.v, a.v); }; + void inline fromMontgomery(Element &r, const Element &a) { Fr_rawFromMontgomery(r.v, a.v); }; + int inline eq(const Element &a, const Element &b) { return Fr_rawIsEq(a.v, b.v); }; + int inline isZero(const Element &a) { return Fr_rawIsZero(a.v); }; + + void toMpz(mpz_t r, const Element &a); + void fromMpz(Element &a, const mpz_t r); + + int toRprBE(const Element &element, uint8_t *data, int bytes); + int fromRprBE(Element &element, const uint8_t *data, int bytes); + + int bytes ( void ) { return Fr_N64 * 8; }; + + void fromUI(Element &r, unsigned long int v); + + static RawFr field; + +}; + + +#endif // __FR_H + + + diff --git a/code_producers/src/c_elements/goldilocks/main.cpp b/code_producers/src/c_elements/goldilocks/main.cpp new file mode 100644 index 00000000..92b25d47 --- /dev/null +++ b/code_producers/src/c_elements/goldilocks/main.cpp @@ -0,0 +1,244 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::json; + +#include "calcwit.hpp" +#include "circom.hpp" + + +#define handle_error(msg) \ + do { perror(msg); exit(EXIT_FAILURE); } while (0) + +Circom_Circuit* loadCircuit(std::string const &datFileName) { + Circom_Circuit *circuit = new Circom_Circuit; + + int fd; + struct stat sb; + + fd = open(datFileName.c_str(), O_RDONLY); + if (fd == -1) { + std::cout << ".dat file not found: " << datFileName << "\n"; + throw std::system_error(errno, std::generic_category(), "open"); + } + + if (fstat(fd, &sb) == -1) { /* To obtain file size */ + throw std::system_error(errno, std::generic_category(), "fstat"); + } + + u8* bdata = (u8*)mmap(NULL, sb.st_size, PROT_READ , MAP_PRIVATE, fd, 0); + close(fd); + + circuit->InputHashMap = new HashSignalInfo[get_size_of_input_hashmap()]; + uint dsize = get_size_of_input_hashmap()*sizeof(HashSignalInfo); + memcpy((void *)(circuit->InputHashMap), (void *)bdata, dsize); + + circuit->witness2SignalList = new u64[get_size_of_witness()]; + uint inisize = dsize; + dsize = get_size_of_witness()*sizeof(u64); + memcpy((void *)(circuit->witness2SignalList), (void *)(bdata+inisize), dsize); + + circuit->circuitConstants = new FrElement[get_size_of_constants()]; + if (get_size_of_constants()>0) { + inisize += dsize; + dsize = get_size_of_constants()*sizeof(FrElement); + memcpy((void *)(circuit->circuitConstants), (void *)(bdata+inisize), dsize); + } + + std::map templateInsId2IOSignalInfo1; + if (get_size_of_io_map()>0) { + u32 index[get_size_of_io_map()]; + inisize += dsize; + dsize = get_size_of_io_map()*sizeof(u32); + memcpy((void *)index, (void *)(bdata+inisize), dsize); + inisize += dsize; + assert(inisize % sizeof(u32) == 0); + assert(sb.st_size % sizeof(u32) == 0); + u32 dataiomap[(sb.st_size-inisize)/sizeof(u32)]; + memcpy((void *)dataiomap, (void *)(bdata+inisize), sb.st_size-inisize); + u32* pu32 = dataiomap; + + for (int i = 0; i < get_size_of_io_map(); i++) { + u32 n = *pu32; + IODefPair p; + p.len = n; + IODef defs[n]; + pu32 += 1; + for (u32 j = 0; j templateInsId2IOSignalInfo = move(templateInsId2IOSignalInfo1); + + munmap(bdata, sb.st_size); + + return circuit; +} + +void json2FrElements (json val, std::vector & vval){ + if (!val.is_array()) { + FrElement v; + std::string s; + if (val.is_string()) { + s = val.get(); + } else if (val.is_number()) { + double vd = val.get(); + std::stringstream stream; + stream << std::fixed << std::setprecision(0) << vd; + s = stream.str(); + } else { + throw new std::runtime_error("Invalid JSON type"); + } + Fr_str2element (&v, s.c_str()); + vval.push_back(v); + } else { + for (uint i = 0; i < val.size(); i++) { + json2FrElements (val[i], vval); + } + } +} + + +void loadJson(Circom_CalcWit *ctx, std::string filename) { + std::ifstream inStream(filename); + json j; + inStream >> j; + + u64 nItems = j.size(); + // printf("Items : %llu\n",nItems); + for (json::iterator it = j.begin(); it != j.end(); ++it) { + // std::cout << it.key() << " => " << it.value() << '\n'; + u64 h = fnv1a(it.key()); + std::vector v; + json2FrElements(it.value(),v); + uint signalSize = ctx->getInputSignalSize(h); + if (v.size() < signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Not enough values\n"; + throw std::runtime_error(errStrStream.str() ); + } + if (v.size() > signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Too many values\n"; + throw std::runtime_error(errStrStream.str() ); + } + for (uint i = 0; i " << Fr_element2str(&(v[i])) << '\n'; + ctx->setInputSignal(h,i,v[i]); + } catch (std::runtime_error e) { + std::ostringstream errStrStream; + errStrStream << "Error setting signal: " << it.key() << "\n" << e.what(); + throw std::runtime_error(errStrStream.str() ); + } + } + } +} + +void writeBinWitness(Circom_CalcWit *ctx, std::string wtnsFileName) { + FILE *write_ptr; + + write_ptr = fopen(wtnsFileName.c_str(),"wb"); + + fwrite("wtns", 4, 1, write_ptr); + + u32 version = 2; + fwrite(&version, 4, 1, write_ptr); + + u32 nSections = 2; + fwrite(&nSections, 4, 1, write_ptr); + + // Header + u32 idSection1 = 1; + fwrite(&idSection1, 4, 1, write_ptr); + + u32 n8 = Fr_N64*8; + + u64 idSection1length = 8 + n8; + fwrite(&idSection1length, 8, 1, write_ptr); + + fwrite(&n8, 4, 1, write_ptr); + + fwrite(Fr_q.longVal, Fr_N64*8, 1, write_ptr); + + uint Nwtns = get_size_of_witness(); + + u32 nVars = (u32)Nwtns; + fwrite(&nVars, 4, 1, write_ptr); + + // Data + u32 idSection2 = 2; + fwrite(&idSection2, 4, 1, write_ptr); + + u64 idSection2length = (u64)n8*(u64)Nwtns; + fwrite(&idSection2length, 8, 1, write_ptr); + + FrElement v; + + for (int i=0;igetWitness(i, &v); + Fr_toLongNormal(&v, &v); + fwrite(v.longVal, Fr_N64*8, 1, write_ptr); + } + fclose(write_ptr); +} + +int main (int argc, char *argv[]) { + std::string cl(argv[0]); + if (argc!=3) { + std::cout << "Usage: " << cl << " \n"; + } else { + std::string datfile = cl + ".dat"; + std::string jsonfile(argv[1]); + std::string wtnsfile(argv[2]); + + // auto t_start = std::chrono::high_resolution_clock::now(); + + Circom_Circuit *circuit = loadCircuit(datfile); + + Circom_CalcWit *ctx = new Circom_CalcWit(circuit); + + loadJson(ctx, jsonfile); + if (ctx->getRemaingInputsToBeSet()!=0) { + std::cerr << "Not all inputs have been set. Only " << get_main_input_signal_no()-ctx->getRemaingInputsToBeSet() << " out of " << get_main_input_signal_no() << std::endl; + assert(false); + } + /* + for (uint i = 0; igetWitness(i, &x); + std::cout << i << ": " << Fr_element2str(&x) << std::endl; + } + */ + + //auto t_mid = std::chrono::high_resolution_clock::now(); + //std::cout << std::chrono::duration(t_mid-t_start).count()<(t_end-t_mid).count()<\1b\18\f4\b5\c7q<\d1s\e93\db\c8\b9[*n") +(data (i32.const 736) "\fe\ff\ff\ff\01\00\00\00\02H\03\00\fa\b7\84X\f5O\bc\ec\efO\8c\99o\05\c5\acY\b1$\18") +(data (i32.const 768) "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00") +(data (i32.const 800) "\00\00\00\80\ff\ff\ff\7f\ff-\ff\7f\01\d2\de\a9\02\ec\d0\04\04\ec\9c\19\a4\be\ce\94\a9\d3\f69") +(data (i32.const 832) "\01\00\00\80\ff\ff\ff\7f\ff-\ff\7f\01\d2\de\a9\02\ec\d0\04\04\ec\9c\19\a4\be\ce\94\a9\d3\f69") +(data (i32.const 864) "\f5\ff\ff\ff\0a\00\00\00\0b\0c\12\00\df\f3\d9f\c5\b7\0b\96\a7\b7\83\cc\e5\9d;6m\cf\c9\04") +(data (i32.const 896) "\ff\ff\ff\ff\fe[\fe\ff\02\a4\bdS\05\d8\a1\09\08\d893H}\9d)S\a7\eds\00\00\00\00") +(data (i32.const 928) "|\f4\17\0c\5cm\ab\9c\e5qK\fd=\e9\e1\1c\05\d5\1dG0\b2m\0dj;:t\90\e9\0e?") +(data (i32.const 960) "\00\00\00\80\ff-\ff\7f\01\d2\de\a9\02\ec\d0\04\04\ec\9c\19\a4\be\ce\94\a9\d3\f69\00\00\00\00") +(data (i32.const 1920) "\00\00\00\80\ff\ff\ff\7f\ff-\ff\7f\01\d2\de\a9\02\ec\d0\04\04\ec\9c\19\a4\be\ce\94\a9\d3\f69") diff --git a/code_producers/src/wasm_elements/fr-types.wat b/code_producers/src/wasm_elements/bls12381/fr-types.wat similarity index 100% rename from code_producers/src/wasm_elements/fr-types.wat rename to code_producers/src/wasm_elements/bls12381/fr-types.wat diff --git a/code_producers/src/wasm_elements/generate_witness.js b/code_producers/src/wasm_elements/bls12381/generate_witness.js similarity index 100% rename from code_producers/src/wasm_elements/generate_witness.js rename to code_producers/src/wasm_elements/bls12381/generate_witness.js diff --git a/code_producers/src/wasm_elements/utils.js b/code_producers/src/wasm_elements/bls12381/utils.js similarity index 100% rename from code_producers/src/wasm_elements/utils.js rename to code_producers/src/wasm_elements/bls12381/utils.js diff --git a/code_producers/src/wasm_elements/witness_calculator.js b/code_producers/src/wasm_elements/bls12381/witness_calculator.js similarity index 97% rename from code_producers/src/wasm_elements/witness_calculator.js rename to code_producers/src/wasm_elements/bls12381/witness_calculator.js index 9f8d1ae0..9bae8839 100755 --- a/code_producers/src/wasm_elements/witness_calculator.js +++ b/code_producers/src/wasm_elements/bls12381/witness_calculator.js @@ -88,7 +88,7 @@ class WitnessCalculator { this.n32 = this.instance.exports.getFieldNumLen32(); this.instance.exports.getRawPrime(); - const arr = new Array(this.n32); + const arr = new Uint32Array(this.n32); for (let i=0; i "); +} else { + const input = JSON.parse(readFileSync(process.argv[3], "utf8")); + + const buffer = readFileSync(process.argv[2]); + wc(buffer).then(async witnessCalculator => { + // const w= await witnessCalculator.calculateWitness(input,0); + // for (let i=0; i< w.length; i++){ + // console.log(w[i]); + // } + const buff= await witnessCalculator.calculateWTNSBin(input,0); + writeFile(process.argv[4], buff, function(err) { + if (err) throw err; + }); + }); +} diff --git a/code_producers/src/wasm_elements/bn128/utils.js b/code_producers/src/wasm_elements/bn128/utils.js new file mode 100755 index 00000000..5d3b5d28 --- /dev/null +++ b/code_producers/src/wasm_elements/bn128/utils.js @@ -0,0 +1,61 @@ +module.exports.fnvHash = fnvHash; +module.exports.toArray32 = toArray32; +module.exports.fromArray32 = fromArray32; +module.exports.flatArray = flatArray; + +function toArray32(s,size) { + const res = []; //new Uint32Array(size); //has no unshift + let rem = BigInt(s); + const radix = BigInt(0x100000000); + while (rem) { + res.unshift( Number(rem % radix)); + rem = rem / radix; + } + if (size) { + var i = size - res.length; + while (i>0) { + res.unshift(0); + i--; + } + } + return res; +} + +function fromArray32(arr) { //returns a BigInt + var res = BigInt(0); + const radix = BigInt(0x100000000); + for (let i = 0; i { + const h = fnvHash(k); + const hMSB = parseInt(h.slice(0,8), 16); + const hLSB = parseInt(h.slice(8,16), 16); + const fArr = flatArray(input[k]); + let signalSize = this.instance.exports.getInputSignalSize(hMSB, hLSB); + if (signalSize < 0){ + throw new Error(`Signal ${k} not found\n`); + } + if (fArr.length < signalSize) { + throw new Error(`Not enough values for input signal ${k}\n`); + } + if (fArr.length > signalSize) { + throw new Error(`Too many values for input signal ${k}\n`); + } + for (let i=0; i0) { + res.unshift(0); + i--; + } + } + return res; +} + +function fromArray32(arr) { //returns a BigInt + var res = BigInt(0); + const radix = BigInt(0x100000000); + for (let i = 0; i "); +} else { + const input = JSON.parse(readFileSync(process.argv[3], "utf8")); + + const buffer = readFileSync(process.argv[2]); + wc(buffer).then(async witnessCalculator => { + // const w= await witnessCalculator.calculateWitness(input,0); + // for (let i=0; i< w.length; i++){ + // console.log(w[i]); + // } + const buff= await witnessCalculator.calculateWTNSBin(input,0); + writeFile(process.argv[4], buff, function(err) { + if (err) throw err; + }); + }); +} diff --git a/code_producers/src/wasm_elements/goldilocks/utils.js b/code_producers/src/wasm_elements/goldilocks/utils.js new file mode 100755 index 00000000..5d3b5d28 --- /dev/null +++ b/code_producers/src/wasm_elements/goldilocks/utils.js @@ -0,0 +1,61 @@ +module.exports.fnvHash = fnvHash; +module.exports.toArray32 = toArray32; +module.exports.fromArray32 = fromArray32; +module.exports.flatArray = flatArray; + +function toArray32(s,size) { + const res = []; //new Uint32Array(size); //has no unshift + let rem = BigInt(s); + const radix = BigInt(0x100000000); + while (rem) { + res.unshift( Number(rem % radix)); + rem = rem / radix; + } + if (size) { + var i = size - res.length; + while (i>0) { + res.unshift(0); + i--; + } + } + return res; +} + +function fromArray32(arr) { //returns a BigInt + var res = BigInt(0); + const radix = BigInt(0x100000000); + for (let i = 0; i { + const h = fnvHash(k); + const hMSB = parseInt(h.slice(0,8), 16); + const hLSB = parseInt(h.slice(8,16), 16); + const fArr = flatArray(input[k]); + let signalSize = this.instance.exports.getInputSignalSize(hMSB, hLSB); + if (signalSize < 0){ + throw new Error(`Signal ${k} not found\n`); + } + if (fArr.length < signalSize) { + throw new Error(`Not enough values for input signal ${k}\n`); + } + if (fArr.length > signalSize) { + throw new Error(`Too many values for input signal ${k}\n`); + } + for (let i=0; i0) { + res.unshift(0); + i--; + } + } + return res; +} + +function fromArray32(arr) { //returns a BigInt + var res = BigInt(0); + const radix = BigInt(0x100000000); + for (let i = 0; i Vec { wdata.push(format!( "(data (i32.const {}) \"{}\")", producer.get_raw_prime_start(), - wasm_hexa(32, &p) + wasm_hexa(producer.get_size_32_bit()*4, &p) )); wdata.push(format!( "(data (i32.const {}) \"{}\")", @@ -569,7 +569,7 @@ pub fn generate_data_list(producer: &WASMProducer) -> Vec { producer.get_witness_signal_id_list_start(), s )); - wdata.push(format!("(data (i32.const {}) \"{}\")",producer.get_signal_memory_start(),"\\00\\00\\00\\00\\00\\00\\00\\80\\01\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00\\00")); //setting 'one' as long normal 1 + wdata.push(format!("(data (i32.const {}) \"{}{}\")",producer.get_signal_memory_start(),"\\00\\00\\00\\00\\00\\00\\00\\80",wasm_hexa(producer.get_size_32_bit()*4, &BigInt::from(1)))); //setting 'one' as long normal 1 wdata.push(format!( "(data (i32.const {}) \"{}\")", producer.get_template_instance_to_io_signal_start(), @@ -1071,15 +1071,12 @@ pub fn copy_32_in_shared_rw_memory_generator(producer: &WASMProducer) -> Vec Vec { instructions.push(shl32()); instructions.push(add32()); // address of the witness in the witness list instructions.push(load32(None)); // number of the signal in the signal Memory - instructions.push(set_constant("40")); + instructions.push(set_constant(&format!("{}",producer.get_size_32_bit()*4+8)));//40 instructions.push(mul32()); instructions.push(set_constant(&producer.get_signal_memory_start().to_string())); instructions.push(add32()); // address of the signal in the signal Memory @@ -1443,26 +1440,41 @@ fn get_file_instructions(name: &str) -> Vec { instructions } -pub fn fr_types() -> Vec { +pub fn fr_types(prime: &String) -> Vec { let mut instructions = vec![]; - let file = include_str!("fr-types.wat"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/fr-types.wat"), + "bls12381" => include_str!("bls12381/fr-types.wat"), + "goldilocks" => include_str!("goldilocks/fr-types.wat"), + _ => unreachable!(), + }; for line in file.lines() { instructions.push(line.to_string()); } instructions } -pub fn fr_data() -> Vec { +pub fn fr_data(prime: &String) -> Vec { let mut instructions = vec![]; - let file = include_str!("fr-data.wat"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/fr-data.wat"), + "bls12381" => include_str!("bls12381/fr-data.wat"), + "goldilocks" => include_str!("goldilocks/fr-data.wat"), + _ => unreachable!(), + }; for line in file.lines() { instructions.push(line.to_string()); } instructions } -pub fn fr_code() -> Vec { +pub fn fr_code(prime: &String) -> Vec { let mut instructions = vec![]; - let file = include_str!("fr-code.wat"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/fr-code.wat"), + "bls12381" => include_str!("bls12381/fr-code.wat"), + "goldilocks" => include_str!("goldilocks/fr-code.wat"), + _ => unreachable!(), + }; for line in file.lines() { instructions.push(line.to_string()); } @@ -1488,7 +1500,7 @@ pub fn generate_utils_js_file(js_folder: &PathBuf) -> std::io::Result<()> { } */ -pub fn generate_generate_witness_js_file(js_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_generate_witness_js_file(js_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = js_folder.clone(); file_path.push("generate_witness"); @@ -1496,7 +1508,12 @@ pub fn generate_generate_witness_js_file(js_folder: &PathBuf) -> std::io::Result let file_name = file_path.to_str().unwrap(); let mut js_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("generate_witness.js"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/generate_witness.js"), + "bls12381" => include_str!("bls12381/generate_witness.js"), + "goldilocks" => include_str!("goldilocks/generate_witness.js"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } @@ -1505,7 +1522,7 @@ pub fn generate_generate_witness_js_file(js_folder: &PathBuf) -> std::io::Result Ok(()) } -pub fn generate_witness_calculator_js_file(js_folder: &PathBuf) -> std::io::Result<()> { +pub fn generate_witness_calculator_js_file(js_folder: &PathBuf, prime: &String) -> std::io::Result<()> { use std::io::BufWriter; let mut file_path = js_folder.clone(); file_path.push("witness_calculator"); @@ -1513,7 +1530,12 @@ pub fn generate_witness_calculator_js_file(js_folder: &PathBuf) -> std::io::Resu let file_name = file_path.to_str().unwrap(); let mut js_file = BufWriter::new(File::create(file_name).unwrap()); let mut code = "".to_string(); - let file = include_str!("witness_calculator.js"); + let file = match prime.as_ref(){ + "bn128" => include_str!("bn128/witness_calculator.js"), + "bls12381" => include_str!("bls12381/witness_calculator.js"), + "goldilocks" => include_str!("goldilocks/witness_calculator.js"), + _ => unreachable!(), + }; for line in file.lines() { code = format!("{}{}\n", code, line); } diff --git a/compiler/src/circuit_design/build.rs b/compiler/src/circuit_design/build.rs index f491b7c5..a331bfc2 100644 --- a/compiler/src/circuit_design/build.rs +++ b/compiler/src/circuit_design/build.rs @@ -148,12 +148,20 @@ fn build_function_instances( fn initialize_wasm_producer(vcp: &VCP, database: &TemplateDB, wat_flag:bool) -> WASMProducer { use program_structure::utils::constants::UsefulConstants; let initial_node = vcp.get_main_id(); - let prime = UsefulConstants::new().get_p().clone(); + let prime = UsefulConstants::new(&vcp.prime).get_p().clone(); let mut producer = WASMProducer::default(); let stats = vcp.get_stats(); producer.main_header = vcp.get_main_instance().unwrap().template_header.clone(); producer.main_signal_offset = 1; producer.prime = prime.to_str_radix(10); + producer.prime_str = vcp.prime.clone(); + producer.fr_memory_size = match vcp.prime.as_str(){ + "goldilocks" => 412, + "bn128" => 1948, + "bls12381" => 1948, + _ => unreachable!() + }; + //producer.fr_memory_size = 412 if goldilocks and 1948 for bn128 and bls12381 // for each created component we store three u32, for each son we store a u32 in its father producer.size_of_component_tree = stats.all_created_components * 3 + stats.all_needed_subcomponents_indexes; producer.total_number_of_signals = stats.all_signals + 1; @@ -181,12 +189,13 @@ fn initialize_wasm_producer(vcp: &VCP, database: &TemplateDB, wat_flag:bool) -> fn initialize_c_producer(vcp: &VCP, database: &TemplateDB) -> CProducer { use program_structure::utils::constants::UsefulConstants; let initial_node = vcp.get_main_id(); - let prime = UsefulConstants::new().get_p().clone(); + let prime = UsefulConstants::new(&vcp.prime).get_p().clone(); let mut producer = CProducer::default(); let stats = vcp.get_stats(); producer.main_header = vcp.get_main_instance().unwrap().template_header.clone(); producer.main_signal_offset = 1; producer.prime = prime.to_str_radix(10); + producer.prime_str = vcp.prime.clone(); producer.size_of_component_tree = stats.all_created_components * 3 + stats.all_needed_subcomponents_indexes; producer.total_number_of_signals = stats.all_signals + 1; producer.size_32_bit = prime.bits() / 32 + if prime.bits() % 32 != 0 { 1 } else { 0 }; diff --git a/compiler/src/circuit_design/circuit.rs b/compiler/src/circuit_design/circuit.rs index 2d231c0b..396a3a5a 100644 --- a/compiler/src/circuit_design/circuit.rs +++ b/compiler/src/circuit_design/circuit.rs @@ -40,7 +40,7 @@ impl WriteWasm for Circuit { code_aux = generate_memory_def_list(&producer); code.append(&mut code_aux); - code_aux = fr_types(); + code_aux = fr_types(&producer.prime_str); code.append(&mut code_aux); code_aux = generate_types_list(); @@ -48,7 +48,7 @@ impl WriteWasm for Circuit { code_aux = generate_exports_list(); code.append(&mut code_aux); - code_aux = fr_code(); + code_aux = fr_code(&producer.prime_str); code.append(&mut code_aux); code_aux = desp_io_subcomponent_generator(&producer); @@ -118,7 +118,7 @@ impl WriteWasm for Circuit { code_aux = generate_table_of_template_runs(&producer); code.append(&mut code_aux); - code_aux = fr_data(); + code_aux = fr_data(&producer.prime_str); code.append(&mut code_aux); code_aux = generate_data_list(&producer); @@ -143,7 +143,7 @@ impl WriteWasm for Circuit { writer.write_all(code.as_bytes()).map_err(|_| {})?; writer.flush().map_err(|_| {})?; - code_aux = fr_types(); + code_aux = fr_types(&producer.prime_str); code = merge_code(code_aux); writer.write_all(code.as_bytes()).map_err(|_| {})?; writer.flush().map_err(|_| {})?; @@ -158,7 +158,7 @@ impl WriteWasm for Circuit { writer.write_all(code.as_bytes()).map_err(|_| {})?; writer.flush().map_err(|_| {})?; - code_aux = fr_code(); + code_aux = fr_code(&producer.prime_str); code = merge_code(code_aux); writer.write_all(code.as_bytes()).map_err(|_| {})?; writer.flush().map_err(|_| {})?; @@ -270,7 +270,7 @@ impl WriteWasm for Circuit { writer.write_all(code.as_bytes()).map_err(|_| {})?; writer.flush().map_err(|_| {})?; - code_aux = fr_data(); + code_aux = fr_data(&producer.prime_str); code = merge_code(code_aux); writer.write_all(code.as_bytes()).map_err(|_| {})?; writer.flush().map_err(|_| {})?; @@ -415,22 +415,22 @@ impl Circuit { pub fn produce_c(&self, c_folder: &str, run_name: &str, c_circuit: &mut W, c_dat: &mut W) -> Result<(), ()> { use std::path::Path; let c_folder_path = Path::new(c_folder.clone()).to_path_buf(); - c_code_generator::generate_main_cpp_file(&c_folder_path).map_err(|_err| {})?; - c_code_generator::generate_circom_hpp_file(&c_folder_path).map_err(|_err| {})?; - c_code_generator::generate_fr_hpp_file(&c_folder_path).map_err(|_err| {})?; - c_code_generator::generate_calcwit_hpp_file(&c_folder_path).map_err(|_err| {})?; - c_code_generator::generate_fr_cpp_file(&c_folder_path).map_err(|_err| {})?; - c_code_generator::generate_calcwit_cpp_file(&c_folder_path).map_err(|_err| {})?; - c_code_generator::generate_fr_asm_file(&c_folder_path).map_err(|_err| {})?; - c_code_generator::generate_make_file(&c_folder_path,run_name,&self.c_producer).map_err(|_err| {})?; + c_code_generator::generate_main_cpp_file(&c_folder_path, &self.c_producer.prime_str).map_err(|_err| {})?; + c_code_generator::generate_circom_hpp_file(&c_folder_path, &self.c_producer.prime_str).map_err(|_err| {})?; + c_code_generator::generate_fr_hpp_file(&c_folder_path, &self.c_producer.prime_str).map_err(|_err| {})?; + c_code_generator::generate_calcwit_hpp_file(&c_folder_path, &self.c_producer.prime_str).map_err(|_err| {})?; + c_code_generator::generate_fr_cpp_file(&c_folder_path, &self.c_producer.prime_str).map_err(|_err| {})?; + c_code_generator::generate_calcwit_cpp_file(&c_folder_path, &self.c_producer.prime_str).map_err(|_err| {})?; + c_code_generator::generate_fr_asm_file(&c_folder_path, &self.c_producer.prime_str).map_err(|_err| {})?; + c_code_generator::generate_make_file(&c_folder_path,run_name,&self.c_producer, &self.c_producer.prime_str).map_err(|_err| {})?; c_code_generator::generate_dat_file(c_dat, &self.c_producer).map_err(|_err| {})?; self.write_c(c_circuit, &self.c_producer) } pub fn produce_wasm(&self, js_folder: &str, _wasm_name: &str, writer: &mut W) -> Result<(), ()> { use std::path::Path; let js_folder_path = Path::new(js_folder.clone()).to_path_buf(); - wasm_code_generator::generate_generate_witness_js_file(&js_folder_path).map_err(|_err| {})?; - wasm_code_generator::generate_witness_calculator_js_file(&js_folder_path).map_err(|_err| {})?; + wasm_code_generator::generate_generate_witness_js_file(&js_folder_path, &self.wasm_producer.prime_str).map_err(|_err| {})?; + wasm_code_generator::generate_witness_calculator_js_file(&js_folder_path, &self.wasm_producer.prime_str).map_err(|_err| {})?; self.write_wasm(writer, &self.wasm_producer) } } diff --git a/compiler/src/hir/very_concrete_program.rs b/compiler/src/hir/very_concrete_program.rs index 857ca2db..84ae72aa 100644 --- a/compiler/src/hir/very_concrete_program.rs +++ b/compiler/src/hir/very_concrete_program.rs @@ -177,6 +177,7 @@ pub struct VCPConfig { pub templates: Vec, pub templates_in_mixed: Vec, pub program: ProgramArchive, + pub prime: String, } #[derive(Clone)] @@ -189,6 +190,7 @@ pub struct VCP { pub templates: Vec, pub quick_knowledge: HashMap, pub templates_in_mixed: Vec, + pub prime: String, } impl VCP { pub fn new(config: VCPConfig) -> VCP { @@ -201,6 +203,7 @@ impl VCP { templates_in_mixed: config.templates_in_mixed, functions: vec![], quick_knowledge: HashMap::new(), + prime: config.prime, }; super::merger::run_preprocessing(&mut vcp, config.program); vcp diff --git a/constraint_generation/src/compute_constants.rs b/constraint_generation/src/compute_constants.rs index 727c1380..472189c5 100644 --- a/constraint_generation/src/compute_constants.rs +++ b/constraint_generation/src/compute_constants.rs @@ -16,7 +16,7 @@ struct Context<'a> { program_archive: &'a ProgramArchive, } -pub fn manage_functions(program_archive: &mut ProgramArchive, flag_verbose: bool) -> CCResult { +pub fn manage_functions(program_archive: &mut ProgramArchive, flag_verbose: bool, prime: &String) -> CCResult { let mut reports = vec![]; let mut processed = HashMap::new(); for (name, data) in program_archive.get_functions() { @@ -24,7 +24,7 @@ pub fn manage_functions(program_archive: &mut ProgramArchive, flag_verbose: bool let environment = EE::new(); let context = Context { program_archive, inside_template: false, environment: &environment }; - treat_statement(&mut code, &context, &mut reports, flag_verbose); + treat_statement(&mut code, &context, &mut reports, flag_verbose, prime); processed.insert(name.clone(), code); } for (k, v) in processed { @@ -41,12 +41,13 @@ pub fn compute_vct( instances: &mut Vec, program_archive: &ProgramArchive, flag_verbose: bool, + prime: &String ) -> CCResult { let mut reports = vec![]; for instance in instances { let environment = transform_header_into_environment(&instance.header); let context = Context { program_archive, inside_template: true, environment: &environment }; - treat_statement(&mut instance.code, &context, &mut reports, flag_verbose); + treat_statement(&mut instance.code, &context, &mut reports, flag_verbose, prime); } if reports.is_empty() { Result::Ok(()) @@ -73,25 +74,25 @@ fn argument_into_slice(argument: &Argument) -> AExpressionSlice { AExpressionSlice::new_array(dimensions, arithmetic_expressions) } -fn treat_statement(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool) { +fn treat_statement(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool, prime: &String) { if stmt.is_initialization_block() { - treat_init_block(stmt, context, reports, flag_verbose) + treat_init_block(stmt, context, reports, flag_verbose, prime) } else if stmt.is_block() { - treat_block(stmt, context, reports, flag_verbose) + treat_block(stmt, context, reports, flag_verbose, prime) } else if stmt.is_if_then_else() { - treat_conditional(stmt, context, reports, flag_verbose) + treat_conditional(stmt, context, reports, flag_verbose, prime) } else if stmt.is_while() { - treat_while(stmt, context, reports, flag_verbose) + treat_while(stmt, context, reports, flag_verbose, prime) } else { } } -fn treat_init_block(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool) { +fn treat_init_block(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool, prime: &String) { use Statement::InitializationBlock; if let InitializationBlock { initializations, .. } = stmt { for init in initializations { if init.is_declaration() { - treat_declaration(init, context, reports, flag_verbose) + treat_declaration(init, context, reports, flag_verbose, prime) } } } else { @@ -99,44 +100,44 @@ fn treat_init_block(stmt: &mut Statement, context: &Context, reports: &mut Repor } } -fn treat_block(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool) { +fn treat_block(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool, prime: &String) { use Statement::Block; if let Block { stmts, .. } = stmt { for s in stmts { - treat_statement(s, context, reports, flag_verbose); + treat_statement(s, context, reports, flag_verbose, prime); } } else { unreachable!() } } -fn treat_while(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool) { +fn treat_while(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool, prime: &String) { use Statement::While; if let While { stmt, .. } = stmt { - treat_statement(stmt, context, reports, flag_verbose); + treat_statement(stmt, context, reports, flag_verbose, prime); } else { unreachable!() } } -fn treat_conditional(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool) { +fn treat_conditional(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool, prime: &String) { use Statement::IfThenElse; if let IfThenElse { if_case, else_case, .. } = stmt { - treat_statement(if_case, context, reports, flag_verbose); + treat_statement(if_case, context, reports, flag_verbose, prime); if let Option::Some(s) = else_case { - treat_statement(s, context, reports, flag_verbose); + treat_statement(s, context, reports, flag_verbose, prime); } } else { unreachable!() } } -fn treat_declaration(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool) { +fn treat_declaration(stmt: &mut Statement, context: &Context, reports: &mut ReportCollection, flag_verbose: bool, prime: &String) { use Statement::Declaration; if let Declaration { meta, dimensions, .. } = stmt { let mut concrete_dimensions = vec![]; for d in dimensions.iter_mut() { - let execution_response = treat_dimension(d, context, reports, flag_verbose); + let execution_response = treat_dimension(d, context, reports, flag_verbose, prime); if let Option::Some(v) = execution_response { concrete_dimensions.push(v); } else { @@ -153,7 +154,8 @@ fn treat_dimension( dim: &Expression, context: &Context, reports: &mut ReportCollection, - flag_verbose: bool + flag_verbose: bool, + prime: &String, ) -> Option { use crate::execute::execute_constant_expression; if context.inside_template && !dim.is_number() { @@ -163,7 +165,7 @@ fn treat_dimension( } else { let program = context.program_archive; let env = context.environment; - let execution_result = execute_constant_expression(dim, program, env.clone(), flag_verbose); + let execution_result = execute_constant_expression(dim, program, env.clone(), flag_verbose, prime); match execution_result { Result::Err(mut r) => { reports.append(&mut r); diff --git a/constraint_generation/src/execute.rs b/constraint_generation/src/execute.rs index 82d44757..e8407ccf 100644 --- a/constraint_generation/src/execute.rs +++ b/constraint_generation/src/execute.rs @@ -9,14 +9,16 @@ use super::environment_utils::{ ComponentSlice, MemoryError, MemorySlice, SignalSlice, SliceCapacity, }, }; + +use program_structure::constants::UsefulConstants; + use super::execution_data::analysis::Analysis; use super::execution_data::{ExecutedProgram, ExecutedTemplate, NodePointer}; use super::{ - ast::*, ArithmeticError, FileID, ProgramArchive, Report, ReportCode, ReportCollection, - UsefulConstants, + ast::*, ArithmeticError, FileID, ProgramArchive, Report, ReportCode, ReportCollection }; use circom_algebra::num_bigint::BigInt; -use std::collections::HashMap; +use std::collections::BTreeMap; type AExpr = ArithmeticExpressionGen; #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] @@ -37,17 +39,17 @@ struct RuntimeInformation { pub exec_program: ExecutedProgram, } impl RuntimeInformation { - pub fn new(current_file: FileID, id_max: usize) -> RuntimeInformation { + pub fn new(current_file: FileID, id_max: usize, prime: &String) -> RuntimeInformation { RuntimeInformation { current_file, block_type: BlockType::Known, analysis: Analysis::new(id_max), public_inputs: vec![], - constants: UsefulConstants::new(), + constants: UsefulConstants::new(prime), call_trace: Vec::new(), runtime_errors: ReportCollection::new(), environment: ExecutionEnvironment::new(), - exec_program: ExecutedProgram::new(), + exec_program: ExecutedProgram::new(prime), } } } @@ -78,10 +80,11 @@ enum ExecutionError { pub fn constraint_execution( program_archive: &ProgramArchive, - flag_verbose: bool + flag_verbose: bool, + prime: &String, ) -> Result { let main_file_id = program_archive.get_file_id_main(); - let mut runtime_information = RuntimeInformation::new(*main_file_id, program_archive.id_max); + let mut runtime_information = RuntimeInformation::new(*main_file_id, program_archive.id_max, prime); runtime_information.public_inputs = program_archive.get_public_inputs_main_component().clone(); let folded_value_result = execute_expression( program_archive.get_main_expression(), @@ -102,10 +105,11 @@ pub fn execute_constant_expression( expression: &Expression, program_archive: &ProgramArchive, environment: ExecutionEnvironment, - flag_verbose: bool + flag_verbose: bool, + prime: &String, ) -> Result { let current_file = expression.get_meta().get_file_id(); - let mut runtime_information = RuntimeInformation::new(current_file, program_archive.id_max); + let mut runtime_information = RuntimeInformation::new(current_file, program_archive.id_max, prime); runtime_information.environment = environment; let folded_value_result = execute_expression(expression, program_archive, &mut runtime_information, flag_verbose); @@ -986,7 +990,7 @@ fn execute_template_call( let is_parallel = program_archive.get_template_data(id).is_parallel(); let args_names = program_archive.get_template_data(id).get_name_of_params(); let template_body = program_archive.get_template_data(id).get_body_as_vec(); - let mut args_to_values = HashMap::new(); + let mut args_to_values = BTreeMap::new(); debug_assert_eq!(args_names.len(), parameter_values.len()); let mut instantiation_name = format!("{}(", id); for (name, value) in args_names.iter().zip(parameter_values) { diff --git a/constraint_generation/src/execution_data/executed_program.rs b/constraint_generation/src/execution_data/executed_program.rs index 56e7eedc..16031913 100644 --- a/constraint_generation/src/execution_data/executed_program.rs +++ b/constraint_generation/src/execution_data/executed_program.rs @@ -13,11 +13,16 @@ pub type ExportResult = Result<(DAG, VCP, ReportCollection), ReportCollection>; pub struct ExecutedProgram { pub model: Vec, pub template_to_nodes: HashMap>, + pub prime: String, } impl ExecutedProgram { - pub fn new() -> ExecutedProgram { - ExecutedProgram::default() + pub fn new(prime: &String) -> ExecutedProgram { + ExecutedProgram{ + model: Vec::new(), + template_to_nodes: HashMap::new(), + prime: prime.clone(), + } } pub fn identify_node(&self, name: &str, context: &ParameterContext) -> Option { if !self.template_to_nodes.contains_key(name) { @@ -49,7 +54,7 @@ impl ExecutedProgram { ) -> NodePointer { use super::filters::*; // Clean code - apply_unused(&mut node.code, &analysis); + apply_unused(&mut node.code, &analysis, &self.prime); apply_computed(&mut node.code, &analysis); // Insert template let possible_index = self.identify_node(node.template_name(), node.parameter_instances()); @@ -77,7 +82,7 @@ impl ExecutedProgram { } let mut warnings = vec![]; - let mut dag = DAG::new(); + let mut dag = DAG::new(&self.prime); let mut temp_instances = Vec::with_capacity(self.model.len()); let mut mixed_instances = vec![false; self.model.len()]; @@ -98,8 +103,8 @@ impl ExecutedProgram { warnings.append(&mut w); let dag_stats = produce_dags_stats(&dag); - crate::compute_constants::manage_functions(&mut program, flag_verbose)?; - crate::compute_constants::compute_vct(&mut temp_instances, &program, flag_verbose)?; + crate::compute_constants::manage_functions(&mut program, flag_verbose, &self.prime)?; + crate::compute_constants::compute_vct(&mut temp_instances, &program, flag_verbose, &self.prime)?; let mut mixed = vec![]; let mut index = 0; for in_mixed in mixed_instances { @@ -115,6 +120,7 @@ impl ExecutedProgram { templates: temp_instances, templates_in_mixed: mixed, program, + prime: self.prime, }; let vcp = VCP::new(config); Result::Ok((dag, vcp, warnings)) diff --git a/constraint_generation/src/execution_data/filters.rs b/constraint_generation/src/execution_data/filters.rs index 6618fe21..bd7f0837 100644 --- a/constraint_generation/src/execution_data/filters.rs +++ b/constraint_generation/src/execution_data/filters.rs @@ -1,21 +1,21 @@ use super::analysis::Analysis; use program_structure::ast::*; -pub fn apply_unused(stmt: &mut Statement, analysis: &Analysis) { - clean_dead_code(stmt, analysis); +pub fn apply_unused(stmt: &mut Statement, analysis: &Analysis, prime: &String) { + clean_dead_code(stmt, analysis, prime); } -fn clean_dead_code(stmt: &mut Statement, analysis: &Analysis) -> bool { +fn clean_dead_code(stmt: &mut Statement, analysis: &Analysis, prime: &String) -> bool { use circom_algebra::modular_arithmetic::as_bool; use Statement::*; match stmt { - While { stmt, .. } => clean_dead_code(stmt, analysis), + While { stmt, .. } => clean_dead_code(stmt, analysis, prime), IfThenElse { if_case, else_case, cond, meta } => { - let field = program_structure::constants::UsefulConstants::new().get_p().clone(); + let field = program_structure::constants::UsefulConstants::new(prime).get_p().clone(); let empty_block = Box::new(Block { meta: meta.clone(), stmts: vec![] }); - let if_case_empty = clean_dead_code(if_case, analysis); + let if_case_empty = clean_dead_code(if_case, analysis, prime); let else_case_empty = - if let Some(case) = else_case { clean_dead_code(case, analysis) } else { true }; + if let Some(case) = else_case { clean_dead_code(case, analysis, prime) } else { true }; if else_case_empty { *else_case = None; } @@ -34,7 +34,7 @@ fn clean_dead_code(stmt: &mut Statement, analysis: &Analysis) -> bool { for mut w in work { let id = w.get_meta().elem_id; if Analysis::is_reached(analysis, id) { - let empty = clean_dead_code(&mut w, analysis); + let empty = clean_dead_code(&mut w, analysis, prime); if !empty { stmts.push(w) } diff --git a/constraint_generation/src/execution_data/type_definitions.rs b/constraint_generation/src/execution_data/type_definitions.rs index e3372062..60cffcf4 100644 --- a/constraint_generation/src/execution_data/type_definitions.rs +++ b/constraint_generation/src/execution_data/type_definitions.rs @@ -1,10 +1,10 @@ use super::AExpressionSlice; use super::Constraint as ConstraintGen; -use std::collections::HashMap; +use std::collections::BTreeMap; pub type NodePointer = usize; pub type Constraint = ConstraintGen; -pub type ParameterContext = HashMap; +pub type ParameterContext = BTreeMap; pub type SignalCollector = Vec<(String, Vec)>; pub type ComponentCollector = Vec<(String, Vec)>; pub struct SubComponentData { diff --git a/constraint_generation/src/lib.rs b/constraint_generation/src/lib.rs index 45ed8655..0ce8da26 100644 --- a/constraint_generation/src/lib.rs +++ b/constraint_generation/src/lib.rs @@ -15,7 +15,6 @@ use dag::DAG; use execution_data::executed_program::ExportResult; use execution_data::ExecutedProgram; use program_structure::ast::{self}; -use program_structure::constants::UsefulConstants; use program_structure::error_code::ReportCode; use program_structure::error_definition::{Report, ReportCollection}; use program_structure::file_definition::FileID; @@ -30,13 +29,14 @@ pub struct BuildConfig { pub flag_p: bool, pub flag_verbose: bool, pub inspect_constraints: bool, + pub prime: String, } pub type ConstraintWriter = Box; type BuildResponse = Result<(ConstraintWriter, VCP), ()>; pub fn build_circuit(program: ProgramArchive, config: BuildConfig) -> BuildResponse { let files = program.file_library.clone(); - let exe = instantiation(&program, config.flag_verbose).map_err(|r| { + let exe = instantiation(&program, config.flag_verbose, &config.prime).map_err(|r| { Report::print_reports(&r, &files); })?; let (mut dag, mut vcp, warnings) = export(exe, program, config.flag_verbose).map_err(|r| { @@ -55,8 +55,8 @@ pub fn build_circuit(program: ProgramArchive, config: BuildConfig) -> BuildRespo } type InstantiationResponse = Result; -fn instantiation(program: &ProgramArchive, flag_verbose: bool) -> InstantiationResponse { - let execution_result = execute::constraint_execution(&program, flag_verbose); +fn instantiation(program: &ProgramArchive, flag_verbose: bool, prime: &String) -> InstantiationResponse { + let execution_result = execute::constraint_execution(&program, flag_verbose, prime); match execution_result { Ok(program_exe) => { let no_nodes = program_exe.number_of_nodes(); @@ -86,6 +86,7 @@ fn simplification_process(vcp: &mut VCP, dag: DAG, config: &BuildConfig) -> Cons parallel_flag: config.flag_p, port_substitution: config.flag_json_sub, no_rounds: config.no_rounds, + prime : config.prime.clone(), }; let list = DAG::map_to_list(dag, flags); VCP::add_witness_list(vcp, Rc::new(list.get_witness_as_vec())); diff --git a/dag/src/lib.rs b/dag/src/lib.rs index a04becd5..666cc64c 100644 --- a/dag/src/lib.rs +++ b/dag/src/lib.rs @@ -11,7 +11,6 @@ use constraint_writers::ConstraintExporter; use program_structure::constants::UsefulConstants; use program_structure::error_definition::ReportCollection; use std::collections::{HashMap, HashSet}; - type Signal = usize; type Constraint = circom_algebra::algebra::Constraint; type Substitution = circom_algebra::algebra::Substitution; @@ -33,7 +32,7 @@ pub struct Tree<'a> { impl<'a> Tree<'a> { pub fn new(dag: &DAG) -> Tree { - let constants = UsefulConstants::new(); + let constants = UsefulConstants::new(&dag.prime); let field = constants.get_p().clone(); let root = dag.get_main().unwrap(); let node_id = dag.main_id(); @@ -55,8 +54,7 @@ impl<'a> Tree<'a> { } pub fn go_to_subtree(current: &'a Tree, edge: &Edge) -> Tree<'a> { - let constants = UsefulConstants::new(); - let field = constants.get_p().clone(); + let field = current.field.clone(); let dag = current.dag; let node_id = edge.goes_to; let node = ¤t.dag.nodes[node_id]; @@ -259,11 +257,11 @@ impl Node { } } -#[derive(Default)] pub struct DAG { pub one_signal: usize, pub nodes: Vec, pub adjacency: Vec>, + pub prime: String, } impl ConstraintExporter for DAG { @@ -281,8 +279,13 @@ impl ConstraintExporter for DAG { } impl DAG { - pub fn new() -> DAG { - DAG::default() + pub fn new(prime: &String) -> DAG { + DAG{ + prime : prime.clone(), + one_signal: 0, + nodes: Vec::new(), + adjacency: Vec::new(), + } } pub fn add_edge(&mut self, to: usize, label: &str) -> Option<&Edge> { @@ -478,4 +481,5 @@ pub struct SimplificationFlags { pub flag_s: bool, pub parallel_flag: bool, pub port_substitution: bool, + pub prime : String, } diff --git a/dag/src/map_to_constraint_list.rs b/dag/src/map_to_constraint_list.rs index 10140b1b..0098a69b 100644 --- a/dag/src/map_to_constraint_list.rs +++ b/dag/src/map_to_constraint_list.rs @@ -88,7 +88,7 @@ pub fn map(dag: DAG, flags: SimplificationFlags) -> ConstraintList { use std::time::SystemTime; // println!("Start of dag to list mapping"); let now = SystemTime::now(); - let constants = UsefulConstants::new(); + let constants = UsefulConstants::new(&dag.prime); let field = constants.get_p().clone(); let init_id = dag.main_id(); let no_public_inputs = dag.public_inputs(); diff --git a/program_structure/src/utils/constants.rs b/program_structure/src/utils/constants.rs index 2a74d2be..ec715291 100644 --- a/program_structure/src/utils/constants.rs +++ b/program_structure/src/utils/constants.rs @@ -1,6 +1,12 @@ use num_bigint::BigInt; -const P_STR: &str = "21888242871839275222246405745257275088548364400416034343698204186575808495617"; +const P_BN128: &str = + "21888242871839275222246405745257275088548364400416034343698204186575808495617"; +const P_BLS12381: &str = + "52435875175126190479447740508185965837690552500527637822603658699938581184513"; +const P_GOLDILOCKS: &str = + "18446744069414584321"; +//const P_STR: &str = "21888242871839275222246405745257275088548364400416034343698204186575808495617"; pub struct UsefulConstants { p: BigInt, @@ -11,16 +17,29 @@ impl Clone for UsefulConstants { UsefulConstants { p: self.p.clone() } } } -impl Default for UsefulConstants { - fn default() -> Self { - UsefulConstants { p: BigInt::parse_bytes(P_STR.as_bytes(), 10).expect("can not parse p") } - } -} + + + +// impl Default for UsefulConstants { +// fn default() -> Self { +// let possible_prime : String = String::from("bn128"); +// let prime_to_use = if possible_prime.eq("bn128") {P_BN128} +// else if possible_prime.eq("bls12381") { P_BLS12381} +// else {P_GOLDILOCKS}; + +// UsefulConstants { p: BigInt::parse_bytes(prime_to_use.as_bytes(), 10).expect("can not parse p") } +// } +// } impl UsefulConstants { - pub fn new() -> UsefulConstants { - UsefulConstants::default() + pub fn new(possible_prime: &String) -> UsefulConstants { + let prime_to_use = if possible_prime.eq("bn128") {P_BN128} + else if possible_prime.eq("bls12381") { P_BLS12381} + else {P_GOLDILOCKS}; + + UsefulConstants { p: BigInt::parse_bytes(prime_to_use.as_bytes(), 10).expect("can not parse p") } } + pub fn get_p(&self) -> &BigInt { &self.p }