Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

isa-l_crypto slower than openssl/sha for hashing sha256 #111

Open
tsv1991 opened this issue Jan 31, 2023 · 2 comments
Open

isa-l_crypto slower than openssl/sha for hashing sha256 #111

tsv1991 opened this issue Jan 31, 2023 · 2 comments
Labels

Comments

@tsv1991
Copy link

tsv1991 commented Jan 31, 2023

I created a file with 1GB size.
fallocate -l 1G bench.dat

And made 3 test program.
Python with hashlib:

time python3 ./bench.py
49bc20df15e412a64472421e13fe86ff1c5165e18b2afccf160d4dc19fe68a14

real	0m2.699s
user	0m2.313s
sys	0m0.379s

C with openssl/sha.h:

time ./sha256 ../bench.dat
File('../bench.dat'): 49bc20df15e412a64472421e13fe86ff1c5165e18b2afccf160d4dc19fe68a14

real	0m2.476s
user	0m2.310s
sys	0m0.153s

And C with sha256_mb.h (isa-l_crypto):

time ./a.out ../bench.dat
49bc20df15e412a64472421e13fe86ff1c5165e18b2afccf160d4dc19fe68a14

real	0m3.357s
user	0m3.171s
sys	0m0.175s

And I was very surprised when saw that time of hashing with isa-l_crypto was bigger. What could be the problem?


I was looking at this code https://github.com/01org/isa-l_crypto/blob/master/sha256_mb/sha256_mb_test.c and #3

#include "sha256_mb.h"
#include <stdlib.h>
#include <stdio.h>
#define BUFF_SIZE (1024*1024*64)

#define NUM_JOBS 1

int main() {
    SHA256_HASH_CTX_MGR *mgr = NULL;
    SHA256_HASH_CTX ctxpool[NUM_JOBS];

    posix_memalign((void *) &mgr, 16, sizeof(SHA256_HASH_CTX_MGR));
    sha256_ctx_mgr_init(mgr);
    hash_ctx_init(&ctxpool[0]);


    FILE *f = fopen("path_to_input_file", "r");
    if (f == NULL) {
        printf("can not open file");
        return 0;
    }
    void *buff = malloc(BUFF_SIZE);

    size_t count = 0;
    size_t read_in;

    sha256_ctx_mgr_submit(mgr, &ctxpool[0], buff, 0, HASH_FIRST);
    sha256_ctx_mgr_flush(mgr);
    do {
        read_in = fread(buff, 1, BUFF_SIZE, f);
        sha256_ctx_mgr_submit(mgr, &ctxpool[0], buff, (uint32_t) read_in, HASH_UPDATE);
        sha256_ctx_mgr_flush(mgr);
        count += read_in;
    } while (read_in > 0);

    sha256_ctx_mgr_submit(mgr, &ctxpool[0], buff, 0, HASH_LAST);
    sha256_ctx_mgr_flush(mgr);
    printf("file size:%zu\n", count);

    int j;
    for (j = 0; j < SHA256_DIGEST_NWORDS; j++) {
        printf("%08x ", ctxpool[0].job.result_digest[j]);
    }
    return 0;
}

Also I try use this example and got same time of hashing:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <isa-l_crypto.h>
#include <endian.h>
#include <unistd.h>
#include <sys/time.h>

#define HASH_CTX_MGR    SHA256_HASH_CTX_MGR
#define HASH_CTX        SHA256_HASH_CTX
#define BLOCK_SIZE      SHA256_BLOCK_SIZE
#define ctx_mgr_init    sha256_ctx_mgr_init
#define ctx_mgr_submit  sha256_ctx_mgr_submit
#define ctx_mgr_flush   sha256_ctx_mgr_flush

#define ARRAY_SIZE(a)   (sizeof(a) / sizeof(*(a)))

static void print_hash(HASH_CTX *job, bool htobe) {
    const int *p = job->job.result_digest;
    int bytes_count = sizeof job->job.result_digest[0];

    for (size_t i = 0; i < ARRAY_SIZE(job->job.result_digest); i++) {
        if (htobe) {
            if (bytes_count == 4) {
                printf("%08x", htobe32(p[i]));
            } else if (bytes_count == 8) {
                printf("%016lx", htobe64(p[i]));
            } else {
                assert(0);
            }
        } else {
            if (bytes_count == 4) {
                printf("%08x", p[i]);
            } else if (bytes_count == 8) {
                printf("%016lx", p[i]);
            } else {
                assert(0);
            }
        }
    }

    putchar('\n');
}

static HASH_CTX *
submit_flush(HASH_CTX_MGR *mgr, HASH_CTX *ctx, const void *buffer, uint32_t len, HASH_CTX_FLAG flags) {
    HASH_CTX *job = ctx_mgr_submit(mgr, ctx, buffer, len, flags);
    if (job == NULL && hash_ctx_processing(ctx)) {
        HASH_CTX *job2 = ctx_mgr_flush(mgr);
        if (job2 != NULL) {
            assert(job2 == ctx);
            assert(job2->error == HASH_CTX_ERROR_NONE);
            if (flags & HASH_LAST) {
                job = job2;
            }
        }
    }
    return job;
}

static void calc_hash(const char *filename) {
    HASH_CTX_MGR *mgr = NULL;

    int e = posix_memalign((void **) &mgr, 16, sizeof *mgr);
    assert(e == 0);
    assert(mgr != NULL);
    ctx_mgr_init(mgr);

    HASH_CTX ctx;
    hash_ctx_init(&ctx);

    FILE *fp = fopen(filename, "r");
    //char buffer[BLOCK_SIZE * 2];
    char buffer[4 * 1024 * 1024];
    int i = 0;
    size_t nRead;
    HASH_CTX_FLAG flag;
    while (!feof(fp)) {
        //size_t x = ((unsigned long) random()) % sizeof buffer;
        nRead = fread(&buffer, 1, sizeof buffer, fp);
        e = ferror(fp);
        assert(e == 0);
        flag = i != 0 ? HASH_UPDATE : HASH_FIRST;
        submit_flush(mgr, &ctx, buffer, nRead, flag);
        i = 1;
    }
    (void) fclose(fp);

    HASH_CTX *ctx2 = submit_flush(mgr, &ctx, NULL, 0, HASH_LAST);
    assert(ctx2 != NULL);
    assert(ctx2 == &ctx);
    assert(ctx2->error == HASH_CTX_ERROR_NONE);

    print_hash(&ctx, false);

    free(mgr);
}

static void rand_init() {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    srandom((getpid() << 16) ^ tv.tv_sec ^ tv.tv_usec);
}

int main(int argc, const char *argv[]) {
    if (argc <= 1) {
        printf("ISA-L crypto version: %u.%u.%u\n",
               ISAL_CRYPTO_MAJOR_VERSION, ISAL_CRYPTO_MINOR_VERSION, ISAL_CRYPTO_PATCH_VERSION);
        return 0;
    }

    rand_init();

    for (int i = 1; i < argc; i++) {
        calc_hash(argv[i]);
    }

    return 0;
} 
@gbtucker
Copy link
Contributor

gbtucker commented Feb 1, 2023

@tsv1991, you are not using the best part of multi-buffer hashing; hashing multiple independent jobs at one time. Disregarding file I/O time, you might be able to do 10-12 hashes in the time a single buffer hashing can do one. By just doing summit() followed immediately by flush() you loose any advantage of doing more than one job at a time. You do have to have independent hash jobs around as there is no way to split up a single hash block into independently calculated chunks.

@tsv1991
Copy link
Author

tsv1991 commented Feb 1, 2023

@gbtucker should the hash calculated using multi-buffer hashing and the classic approach be the same?
I couldn't get a hash to be the same.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

2 participants