-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
vcomp/cmpto_j2k: run GPU conv as prepreprocess
Set the pixfmt conversion CUDA kernel as cmpto_j2k_enc preprocessor, not run directly. This also eliminates to need to have the conversion kernel if conversion is needed - CPU conversion will be sufficient. Currently not effective, only R12L is converted for which there is the kernel. refer to GH-406
- Loading branch information
1 parent
caacebc
commit 92565ca
Showing
3 changed files
with
84 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
/** | ||
* @file cuda_wrapper/kernels.cu | ||
* @author Martin Pulec <[email protected]> | ||
* | ||
* This file hosts various CUDA kernels. Currently there are only kernels | ||
* for cmpto_j2k compression and decompression. | ||
*/ | ||
/* | ||
* Copyright (c) 2024 CESNET | ||
|
@@ -298,7 +301,7 @@ int postprocess_rg48_to_r12l( | |
|
||
MEASURE_KERNEL_DURATION_STOP(stream) | ||
|
||
return 0; | ||
return cudaGetLastError(); | ||
} | ||
|
||
// ___ ___ ___ __ __ ___ _____ ____ ___ | ||
|
@@ -446,28 +449,42 @@ r12l_to_rg48_compute_blk(const uint8_t *in, uint8_t *out) | |
} | ||
} | ||
|
||
void | ||
preprocess_r12l_to_rg48(int width, int height, void *src, void *dst) | ||
int | ||
preprocess_r12l_to_rg48(void *preprocessor, void *img_custom_data, | ||
size_t img_custom_data_size, int size_x, int size_y, | ||
struct cmpto_j2k_enc_comp_format *comp_formats, | ||
int comp_count, void *input_samples, | ||
size_t input_samples_size, void *output_samples, | ||
size_t output_samples_size, void *vstream) | ||
{ | ||
(void) width, (void) height, (void) src, (void) dst; | ||
(void) preprocessor, (void) img_custom_data, | ||
(void) img_custom_data_size, (void) comp_formats, (void) comp_count, | ||
(void) input_samples_size, (void) output_samples_size; | ||
|
||
cudaStream_t stream = (cudaStream_t) vstream; | ||
dim3 threads_per_block(256); | ||
dim3 blocks((((width + 7) / 8) + 255) / 256, height); | ||
dim3 blocks((((size_x+ 7) / 8) + 255) / 256, size_y); | ||
|
||
MEASURE_KERNEL_DURATION_START(0) | ||
if (width % 2 == 0) { | ||
kernel_r12l_to_rg48<uint32_t><<<blocks, threads_per_block>>>( | ||
(uint8_t *) src, (uint8_t *) dst, width); | ||
MEASURE_KERNEL_DURATION_START(stream) | ||
if (size_x % 2 == 0) { | ||
kernel_r12l_to_rg48<uint32_t> | ||
<<<blocks, threads_per_block, 0, stream>>>( | ||
(uint8_t *) input_samples, (uint8_t *) output_samples, | ||
size_x); | ||
} else { | ||
thread_local bool warn_print; | ||
if (!warn_print) { | ||
fprintf(stderr, | ||
"%s: Odd width %d px will use slower kernel!\n", | ||
__func__, width); | ||
__func__, size_x); | ||
warn_print = true; | ||
} | ||
kernel_r12l_to_rg48<uint16_t><<<blocks, threads_per_block>>>( | ||
(uint8_t *) src, (uint8_t *) dst, width); | ||
kernel_r12l_to_rg48<uint16_t> | ||
<<<blocks, threads_per_block, 0, stream>>>( | ||
(uint8_t *) input_samples, (uint8_t *) output_samples, | ||
size_x); | ||
} | ||
MEASURE_KERNEL_DURATION_STOP(0) | ||
} | ||
MEASURE_KERNEL_DURATION_STOP(stream) | ||
|
||
return cudaGetLastError(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters