Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Builtin SHA256 hashing #6977

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e19e816
Adds files and first function
Jun 26, 2024
32e1e3d
zig functions
MatthewJohnHeath Jul 1, 2024
5c6348d
Adds crypt to main.zig
MatthewJohnHeath Jul 2, 2024
6e0f94d
Converts to pointers
MatthewJohnHeath Jul 4, 2024
385e0ec
Zig functions export
MatthewJohnHeath Jul 5, 2024
88acae8
WIP broken. trying to do plumbing
MatthewJohnHeath Jul 6, 2024
f7ed577
WIP filling in missing match cases
MatthewJohnHeath Jul 6, 2024
8ecf644
Filling in missing matches before rebase
MatthewJohnHeath Jul 7, 2024
ee8640e
Fixed rebase
MatthewJohnHeath Jul 7, 2024
6845d47
Adds case staements for lowlevel to compile
MatthewJohnHeath Jul 7, 2024
053d3b5
switch out bad pointer rep
MatthewJohnHeath Aug 9, 2024
7a1dc3c
Fix Crypt builtin
smores56 Aug 30, 2024
1e20be7
Fix formatting
smores56 Aug 30, 2024
4e10dbc
Adding digest inspectors
MatthewJohnHeath Sep 2, 2024
907a8ff
Revert "Adding digest inspectors"
MatthewJohnHeath Sep 3, 2024
1cc2751
Adds functions to access digest
MatthewJohnHeath Sep 4, 2024
4248bac
add missing module import
MatthewJohnHeath Sep 4, 2024
01cda95
Fixes structured binding in digest256ToBytes
MatthewJohnHeath Sep 9, 2024
e1e2a5e
Name changes
MatthewJohnHeath Sep 10, 2024
e29ad5b
Tidy formatting
MatthewJohnHeath Sep 10, 2024
87c44e7
Attempt to fix formatting
MatthewJohnHeath Sep 10, 2024
de025dd
Ran fomatter
MatthewJohnHeath Sep 10, 2024
d61a7ee
tests
MatthewJohnHeath Sep 12, 2024
2bfad6f
Docs for exposed functions and types
MatthewJohnHeath Sep 12, 2024
aada47b
Fix spelling and zig fmt
MatthewJohnHeath Sep 12, 2024
6717da6
Response to review on comment
MatthewJohnHeath Sep 13, 2024
154ec64
Fixes typo in test
MatthewJohnHeath Sep 13, 2024
208fc98
Merge branch 'main' into main
MatthewJohnHeath Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions crates/compiler/builtins/bitcode/src/crypt.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
const std = @import("std");
const crypto = std.crypto;
const sha2 = crypto.hash.sha2;
const list = @import("list.zig");
const utils = @import("utils.zig");

const Sha256 = extern struct {
location: [*]u8,
fn pointer(self: Sha256) *sha2.Sha256 {
return @alignCast(@ptrCast(self.location));
}
};

pub fn emptySha256() callconv(.C) Sha256 {
const allocation = utils.allocateWithRefcount(@sizeOf(sha2.Sha256), @alignOf(sha2.Sha256), false);
const ptr: *sha2.Sha256 = @alignCast(@ptrCast(allocation));
ptr.* = sha2.Sha256.init(.{});
return Sha256{
.location = @alignCast(@ptrCast(ptr)),
};
}

pub fn sha256AddBytes(sha: Sha256, data: list.RocList) callconv(.C) Sha256 {
var out = emptySha256();
out.pointer().* = sha.pointer().*;
if (data.bytes) |bytes| {
const byteSlice: []u8 = bytes[0..data.length];
out.pointer().*.update(byteSlice);
}
return out;
}

pub const Digest256 = extern struct {
firstHalf: u128,
secondHalf: u128,
};

pub fn sha256Digest(sha: Sha256) callconv(.C) Digest256 {
return @bitCast(sha.pointer().*.peek());
}
11 changes: 11 additions & 0 deletions crates/compiler/builtins/bitcode/src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ const ROC_BUILTINS = "roc_builtins";
const NUM = "num";
const STR = "str";

// Crypt Module
const crypt = @import("crypt.zig");
comptime {
exportCryptFn(crypt.emptySha256, "emptySha256");
exportCryptFn(crypt.sha256AddBytes, "sha256AddBytes");
exportCryptFn(crypt.sha256Digest, "sha256Digest");
}

// Dec Module
const dec = @import("dec.zig");

Expand Down Expand Up @@ -387,6 +395,9 @@ fn exportListFn(comptime func: anytype, comptime func_name: []const u8) void {
fn exportDecFn(comptime func: anytype, comptime func_name: []const u8) void {
exportBuiltinFn(func, "dec." ++ func_name);
}
fn exportCryptFn(comptime func: anytype, comptime func_name: []const u8) void {
exportBuiltinFn(func, "crypt." ++ func_name);
}

fn exportUtilsFn(comptime func: anytype, comptime func_name: []const u8) void {
exportBuiltinFn(func, "utils." ++ func_name);
Expand Down
123 changes: 123 additions & 0 deletions crates/compiler/builtins/roc/Crypt.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
module [
emptySha256,
sha256AddBytes,
sha256Digest,
hashSha256,
digest256ToBytes,
Sha256,
Digest256,
]

import Bool exposing [Eq]
import List
import Num exposing [U8, U64, U128]
import Result
import Str

## Represents, as an opaque type, the state of a SHA256 cryptographic hashing function, after some (or no) data have been added to the hash.
Copy link
Sponsor Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: it being an opaque type is implied, I don't think we need to explicitly mention that here.

Sha256 := { location : U64 }

## Represents the digest of some data produced by the SHA256 cryptographic hashing function as an opaque type.
## `Digest256`implements the `Eq` ability.
Copy link
Sponsor Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: Digest256(space)implements ...

also, consider adding an empty doc comment between these two doc comments, that'll follow the "one-line summary on top, context beneath" structure for docs.

Digest256 := { firstHalf : U128, secondHalf : U128 } implements [Eq]

## Returns a `Sha256` to which no data have been added.
emptySha256 : {} -> Sha256
Copy link
Sponsor Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend "An empty SHA-256 digest."

And it's "to which no data has been added." https://www.thesaurus.com/e/grammar/data-is-or-data-are/

Copy link
Author

@MatthewJohnHeath MatthewJohnHeath Sep 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The word shouldn't be "digest", I think. We should save that for the Digest256 returned after "finalize" has been called on the zig object.
I am struggling to find a good term to describe what the Sha256 object does. My mental model of it is as the state of the algorithm, but that doesn't seem useful wording here. Something like "An empty SHA-256 hasher", maybe?

I agree that data (EDIT)is in this context; I've had too much time in settings where I get corrected the other way.


## Adds bytes of data to be hashed in the `Sha256`.
sha256AddBytes : Sha256, List U8 -> Sha256

## Returns the digest of the cryptographic hashing function represted by a`Sha256`.
sha256Digest : Sha256 -> Digest256

## Applies the SHA256 crytographic hashing function to some bytes.
hashSha256 : List U8 -> Digest256
hashSha256 = \bytes -> emptySha256 {} |> sha256AddBytes bytes |> sha256Digest

u128Bytes : U128 -> List U8
u128Bytes = \number ->
loop = \n, bytes, place ->
if place == 16 then
bytes
else
newByte = n |> Num.bitwiseAnd 255 |> Num.toU8
loop (Num.shiftRightBy n 8) (List.prepend bytes newByte) (place + 1)
loop number [] 0

## Returns the bytes of a `Digest256`as a list.
digest256ToBytes : Digest256 -> List U8
digest256ToBytes = \@Digest256 { firstHalf, secondHalf } ->
List.concat (u128Bytes firstHalf) (u128Bytes secondHalf)

# test data taken from https://ziglang.org/documentation/0.11.0/std/src/std/crypto/sha2.zig.html#L434
digestBytesOfEmpty : List U8
digestBytesOfEmpty = fromHexString "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"

digestBytesOfAbc : List U8
digestBytesOfAbc = fromHexString "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"

digestBytesOfLong : List U8
digestBytesOfLong = fromHexString "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1"

expect
data : List U8
data = []
want = digestBytesOfEmpty
got = data |> hashSha256 |> digest256ToBytes
want == got

expect
data = ['a', 'b', 'c']
want = digestBytesOfAbc
got = data |> hashSha256 |> digest256ToBytes
want == got

expect
data = Str.toUtf8 "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
want = digestBytesOfLong
got = data |> hashSha256 |> digest256ToBytes
want == got

expect
want = digestBytesOfEmpty
got = emptySha256 {} |> sha256Digest |> digest256ToBytes
want == got

expect
data = Str.toUtf8 "abc"
want = digestBytesOfAbc
got =
emptySha256 {}
|> sha256AddBytes data
|> sha256Digest
|> digest256ToBytes
want == got

expect
want = digestBytesOfAbc
got =
emptySha256 {}
|> sha256AddBytes ['a']
|> sha256AddBytes ['b']
|> sha256AddBytes ['c']
|> sha256Digest
|> digest256ToBytes
want == got

fromHexString : Str -> List U8
fromHexString = \hex ->
fromHexDigit = \smallNumber ->
if smallNumber <= '9' then
smallNumber - '0'
else
smallNumber - 'a'

fromHexDigits = \pair ->
first = pair |> List.first |> Result.withDefault 0
second = pair |> List.get 1 |> Result.withDefault 0
16 * (fromHexDigit first) + (fromHexDigit second)

hex
|> Str.toUtf8
|> List.chunksOf 2
|> List.map fromHexDigits
1 change: 1 addition & 0 deletions crates/compiler/builtins/roc/main.roc
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ package [
Box,
Inspect,
Task,
Crypt,
] {}
4 changes: 4 additions & 0 deletions crates/compiler/builtins/src/bitcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,10 @@ pub const DEC_ROUND: IntrinsicName = int_intrinsic!("roc_builtins.dec.round");
pub const DEC_FLOOR: IntrinsicName = int_intrinsic!("roc_builtins.dec.floor");
pub const DEC_CEILING: IntrinsicName = int_intrinsic!("roc_builtins.dec.ceiling");

pub const CRYPT_EMPTY_SHA256: &str = "roc_builtins.crypt.emptySha256";
pub const CRYPT_SHA256_ADD_BYTES: &str = "roc_builtins.crypt.sha256AddBytes";
pub const CRYPT_SHA256_DIGEST: &str = "roc_builtins.crypt.sha156Digest";

pub const UTILS_DBG_IMPL: &str = "roc_builtins.utils.dbg_impl";
pub const UTILS_TEST_PANIC: &str = "roc_builtins.utils.test_panic";
pub const UTILS_ALLOCATE_WITH_REFCOUNT: &str = "roc_builtins.utils.allocate_with_refcount";
Expand Down
2 changes: 2 additions & 0 deletions crates/compiler/builtins/src/roc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub fn module_source(module_id: ModuleId) -> &'static str {
ModuleId::HASH => HASH,
ModuleId::INSPECT => INSPECT,
ModuleId::TASK => TASK,
ModuleId::CRYPT => CRYPT,
_ => internal_error!(
"ModuleId {:?} is not part of the standard library",
module_id
Expand All @@ -37,3 +38,4 @@ const DECODE: &str = include_str!("../roc/Decode.roc");
const HASH: &str = include_str!("../roc/Hash.roc");
const INSPECT: &str = include_str!("../roc/Inspect.roc");
const TASK: &str = include_str!("../roc/Task.roc");
const CRYPT: &str = include_str!("../roc/Crypt.roc");
4 changes: 4 additions & 0 deletions crates/compiler/can/src/builtins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ map_symbol_to_lowlevel_and_arity! {
NumF32FromParts; NUM_F32_FROM_PARTS; 1,
NumF64FromParts; NUM_F64_FROM_PARTS; 1,

CryptEmptySha256; CRYPT_EMPTY_SHA_256; 1,
CryptSha256AddBytes; CRYPT_SHA256_ADD_BYTES; 2,
CryptSha256Digest; CRYPT_SHA256_DIGEST; 1,

Eq; BOOL_STRUCTURAL_EQ; 2,
NotEq; BOOL_STRUCTURAL_NOT_EQ; 2,
And; BOOL_AND; 2,
Expand Down
15 changes: 15 additions & 0 deletions crates/compiler/gen_dev/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2236,6 +2236,21 @@ trait Backend<'a> {
self.build_fn_call(sym, intrinsic, args, arg_layouts, ret_layout)
}

LowLevel::CryptEmptySha256 => {
let intrinsic = bitcode::CRYPT_EMPTY_SHA256.to_string();
self.build_fn_call(sym, intrinsic, args, arg_layouts, ret_layout);
}

LowLevel::CryptSha256AddBytes => {
let intrinsic = bitcode::CRYPT_SHA256_ADD_BYTES.to_string();
self.build_fn_call(sym, intrinsic, args, arg_layouts, ret_layout);
}

LowLevel::CryptSha256Digest => {
let intrinsic = bitcode::CRYPT_SHA256_DIGEST.to_string();
self.build_fn_call(sym, intrinsic, args, arg_layouts, ret_layout);
}

x => todo!("low level, {:?}", x),
}
}
Expand Down
9 changes: 9 additions & 0 deletions crates/compiler/gen_llvm/src/llvm/lowlevel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,15 @@ pub(crate) fn run_low_level<'a, 'ctx>(

call_bitcode_fn(env, &[], bitcode::UTILS_DICT_PSEUDO_SEED)
}
CryptEmptySha256 => call_bitcode_fn(env, &[], bitcode::CRYPT_EMPTY_SHA256),
CryptSha256AddBytes => {
arguments!(sha, data);
call_bitcode_fn(env, &[sha, data], bitcode::CRYPT_SHA256_ADD_BYTES)
}
CryptSha256Digest => {
arguments!(sha);
call_bitcode_fn(env, &[sha], bitcode::CRYPT_SHA256_DIGEST)
}

ListIncref | ListDecref | SetJmp | LongJmp | SetLongJmpBuffer => {
unreachable!("only inserted in dev backend codegen")
Expand Down
7 changes: 7 additions & 0 deletions crates/compiler/gen_wasm/src/low_level.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2176,6 +2176,13 @@ impl<'a> LowLevelCall<'a> {
NumF64ToParts => self.load_args_and_call_zig(backend, bitcode::NUM_F64_TO_PARTS),
NumF32FromParts => self.load_args_and_call_zig(backend, bitcode::NUM_F32_FROM_PARTS),
NumF64FromParts => self.load_args_and_call_zig(backend, bitcode::NUM_F64_FROM_PARTS),
// Crypt
CryptEmptySha256 => self.load_args_and_call_zig(backend, bitcode::CRYPT_EMPTY_SHA256),
CryptSha256AddBytes => {
self.load_args_and_call_zig(backend, bitcode::CRYPT_SHA256_ADD_BYTES)
}
CryptSha256Digest => self.load_args_and_call_zig(backend, bitcode::CRYPT_SHA256_DIGEST),

And => {
self.load_args(backend);
backend.code_builder.i32_and();
Expand Down
1 change: 1 addition & 0 deletions crates/compiler/load/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const MODULES: &[(ModuleId, &str)] = &[
(ModuleId::HASH, "Hash.roc"),
(ModuleId::INSPECT, "Inspect.roc"),
(ModuleId::TASK, "Task.roc"),
(ModuleId::CRYPT, "Crypt.roc"),
];

fn main() {
Expand Down
3 changes: 3 additions & 0 deletions crates/compiler/load/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ fn read_cached_types() -> MutMap<ModuleId, TypeState> {
let mod_hash = include_bytes_align_as!(u128, concat!(env!("OUT_DIR"), "/Hash.dat"));
let mod_inspect = include_bytes_align_as!(u128, concat!(env!("OUT_DIR"), "/Inspect.dat"));
let mod_task = include_bytes_align_as!(u128, concat!(env!("OUT_DIR"), "/Task.dat"));
let mod_crypt = include_bytes_align_as!(u128, concat!(env!("OUT_DIR"), "/Crypt.dat"));

let mut output = MutMap::default();

Expand All @@ -282,6 +283,8 @@ fn read_cached_types() -> MutMap<ModuleId, TypeState> {
output.insert(ModuleId::INSPECT, deserialize_help(mod_inspect));

output.insert(ModuleId::TASK, deserialize_help(mod_task));

output.insert(ModuleId::CRYPT, deserialize_help(mod_crypt));
}

output
Expand Down
3 changes: 3 additions & 0 deletions crates/compiler/load_internal/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,7 @@ fn update<'a>(
extend_module_with_builtin_import(parsed, ModuleId::HASH);
extend_module_with_builtin_import(parsed, ModuleId::INSPECT);
extend_module_with_builtin_import(parsed, ModuleId::TASK);
extend_module_with_builtin_import(parsed, ModuleId::CRYPT);
}

state
Expand Down Expand Up @@ -3652,6 +3653,7 @@ fn load_module<'a>(
"Hash", ModuleId::HASH
"Inspect", ModuleId::INSPECT
"Task", ModuleId::TASK
"Crypt", ModuleId::CRYPT
}

let (filename, opt_shorthand) = module_name_to_path(src_dir, &module_name, arc_shorthands);
Expand Down Expand Up @@ -5197,6 +5199,7 @@ fn canonicalize_and_constrain<'a>(
| ModuleId::HASH
| ModuleId::INSPECT
| ModuleId::TASK
| ModuleId::CRYPT
);

if !name.is_builtin() || should_include_builtin {
Expand Down
1 change: 1 addition & 0 deletions crates/compiler/load_internal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ pub const BUILTIN_MODULES: &[(ModuleId, &str)] = &[
(ModuleId::HASH, "Hash"),
(ModuleId::INSPECT, "Inspect"),
(ModuleId::TASK, "Task"),
(ModuleId::CRYPT, "Crypt"),
];
1 change: 1 addition & 0 deletions crates/compiler/load_internal/src/module_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ impl Default for ModuleCache<'_> {
HASH,
INSPECT,
TASK,
CRYPT,
}

Self {
Expand Down
1 change: 1 addition & 0 deletions crates/compiler/module/src/ident.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ impl ModuleName {
pub const HASH: &'static str = "Hash";
pub const INSPECT: &'static str = "Inspect";
pub const TASK: &'static str = "Task";
pub const CRYPT: &'static str = "Crypt";

pub fn as_str(&self) -> &str {
self.0.as_str()
Expand Down
6 changes: 6 additions & 0 deletions crates/compiler/module/src/low_level.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ pub enum LowLevel {
SetJmp,
LongJmp,
SetLongJmpBuffer,
CryptEmptySha256,
CryptSha256AddBytes,
CryptSha256Digest,
}

macro_rules! higher_order {
Expand Down Expand Up @@ -348,4 +351,7 @@ map_symbol_to_lowlevel! {
Not <= BOOL_NOT;
Unreachable <= LIST_UNREACHABLE;
DictPseudoSeed <= DICT_PSEUDO_SEED;
CryptEmptySha256 <= CRYPT_EMPTY_SHA_256;
CryptSha256AddBytes <= CRYPT_SHA256_ADD_BYTES;
CryptSha256Digest <= CRYPT_SHA256_DIGEST;
}
10 changes: 9 additions & 1 deletion crates/compiler/module/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,14 @@ define_builtins! {
13 TASK_FOR_EACH: "forEach"
14 TASK_RESULT: "result"
}
16 CRYPT: "Crypt" =>{
0 CRYPT_SHA_256: "Sha256" exposed_type=true
1 CRYPT_DIGEST_256: "Digest256" exposed_type=true
2 CRYPT_EMPTY_SHA_256: "emptySha256"
3 CRYPT_SHA256_ADD_BYTES: "sha256AddBytes"
4 CRYPT_SHA256_DIGEST: "sha256Digest"
5 CRYPT_HASH_SHA_256: "hashSha256"
}

num_modules: 16 // Keep this count up to date by hand! (TODO: see the mut_map! macro for how we could determine this count correctly in the macro)
num_modules: 17 // Keep this count up to date by hand! (TODO: see the mut_map! macro for how we could determine this count correctly in the macro)
}
1 change: 1 addition & 0 deletions crates/compiler/mono/src/drop_specialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,7 @@ fn low_level_no_rc(lowlevel: &LowLevel) -> RC {

ListIsUnique => RC::Rc,
ListClone => RC::Rc,
CryptEmptySha256 | CryptSha256AddBytes | CryptSha256Digest => RC::NoRc,

BoxExpr | UnboxExpr => {
unreachable!("These lowlevel operations are turned into mono Expr's")
Expand Down
Loading
Loading