diff --git a/.gitignore b/.gitignore index 613ade66..dc358c34 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,5 @@ test-driver Debug Release +# VSCode +.vscode/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 654870d7..92ab0d44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,15 @@ cmake_minimum_required(VERSION 3.8.0 FATAL_ERROR) # change version also in configure.ac -project(gpujpeg VERSION 0.21.0 LANGUAGES C CUDA) +project(gpujpeg VERSION 0.21.0 LANGUAGES C) +include(CheckLanguage) + +check_language(CUDA) +if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + add_definitions(-DGPUJPEG_USE_CUDA) +else() + message(STATUS "No CUDA support") +endif() # options set(BUILD_OPENGL AUTO CACHE STRING "Build with OpenGL support, options are: AUTO ON OFF") @@ -56,6 +65,8 @@ set(NEEDED_COMPILER_FEATURES c_std_11) set(COMPILED_OPTIONS) +set(CMAKE_VERBOSE_MAKEFILE ON) + # allow passing _ROOT to find_package() if(POLICY CMP0074) cmake_policy(SET CMP0074 NEW) @@ -65,6 +76,17 @@ endif() if(POLICY CMP0092) cmake_policy(SET CMP0092 NEW) endif() + +# Fix behavior of CMAKE_CXX_STANDARD when targeting macOS. +if (POLICY CMP0025) + cmake_policy(SET CMP0025 NEW) +endif () + +# Find OpenGL, GLEW, GLUT and GLFW +if(POLICY CMP0072) + cmake_policy(SET CMP0072 NEW) +endif() + if (MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /W4") @@ -73,10 +95,6 @@ else() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra") endif() -# Find OpenGL, GLEW, GLUT and GLFW -if(POLICY CMP0072) - cmake_policy(SET CMP0072 NEW) -endif() find_package(OpenGL) find_package(GLEW) find_package(GLUT) @@ -99,15 +117,17 @@ if(NOT BUILD_OPENGL STREQUAL "OFF" AND NOT OPENGL_ERROR) # Build GPUJPEG library with OpenGL support add_definitions("-DGPUJPEG_USE_OPENGL") + add_definitions("-DGL_SILENCE_DEPRECATION") + set(GPUJPEG_OPENGL_LIBRARIES) include_directories(${OPENGL_INCLUDE_DIR} ${GLEW_INCLUDE_DIRS}) - list(APPEND GPUJPEG_OPENGL_LIBRARIES ${GLEW_LIBRARIES}) + list(APPEND GPUJPEG_OPENGL_LIBRARIES GLEW::GLEW) list(APPEND GPUJPEG_OPENGL_LIBRARIES ${OPENGL_LIBRARIES}) if(GLUT_FOUND) include_directories(${GLUT_INCLUDE_DIR}) list(APPEND GPUJPEG_OPENGL_LIBRARIES ${GLUT_glut_LIBRARY}) endif() - if(X11_FOUND AND (OPENGL_CONTEXT STREQUAL "AUTO" OR OPENGL_CONTEXT STREQUAL "GLX")) + if(NOT APPLE AND X11_FOUND AND (OPENGL_CONTEXT STREQUAL "AUTO" OR OPENGL_CONTEXT STREQUAL "GLX")) include_directories(${X11_INCLUDE_DIR}) list(APPEND GPUJPEG_OPENGL_LIBRARIES ${X11_LIBRARIES}) add_definitions("-DGPUJPEG_USE_GLX") @@ -182,7 +202,7 @@ if(GPUJPEG_OPENGL_ENABLED AND GLUT_FOUND) add_executable(decoder_gltex ${FILES}) target_compile_features(decoder_gltex PRIVATE ${NEEDED_COMPILER_FEATURES}) set_property(TARGET decoder_gltex PROPERTY C_STANDARD 99) - target_link_libraries(decoder_gltex ${PROJECT_NAME} -lglut) + target_link_libraries(decoder_gltex ${PROJECT_NAME} ${GPUJPEG_OPENGL_LIBRARIES}) # OpenGL interoperability example (currently not working) #file(GLOB FILES test/opengl_interop/*.c test/opengl_interop/*.h test/opengl_interop/*.cu) diff --git a/examples/decode_to_cuda_pnm.c b/examples/decode_to_cuda_pnm.c index 36db6a0c..5af953a6 100644 --- a/examples/decode_to_cuda_pnm.c +++ b/examples/decode_to_cuda_pnm.c @@ -3,7 +3,9 @@ * then copied back to RAM and written to a PNM file. */ -#include +#ifdef GPUJPEG_USE_CUDA + #include +#endif #include #include #include diff --git a/src/gpujpeg_common.c b/src/gpujpeg_common.c index b797196d..20e96794 100644 --- a/src/gpujpeg_common.c +++ b/src/gpujpeg_common.c @@ -59,7 +59,9 @@ #elif defined(GPUJPEG_USE_GLX) #include #endif - #include + #ifdef GPUJPEG_USE_CUDA + #include + #endif #endif #if _STDC_VERSION__ >= 201112L @@ -128,7 +130,7 @@ struct gpujpeg_devices_info gpujpeg_get_devices_info(void) { struct gpujpeg_devices_info devices_info = { 0 }; - +#ifdef GPUJPEG_USE_CUDA cudaGetDeviceCount(&devices_info.device_count); gpujpeg_cuda_check_error("Cannot get number of CUDA devices", return devices_info); @@ -157,7 +159,9 @@ gpujpeg_get_devices_info(void) device_info->multiprocessor_count = device_properties.multiProcessorCount; #endif } - +#else +// TODO: NEED IMPLEMENTATION +#endif return devices_info; } @@ -192,6 +196,7 @@ gpujpeg_print_devices_info(void) int gpujpeg_init_device(int device_id, int flags) { +#ifdef GPUJPEG_USE_CUDA int dev_count; cudaGetDeviceCount(&dev_count); gpujpeg_cuda_check_error("Cannot get number of CUDA devices", return -1); @@ -255,7 +260,9 @@ gpujpeg_init_device(int device_id, int flags) fprintf(stderr, "[GPUJPEG] [Info] OpenGL interoperability is used, is OpenGL context available?\n"); return -1; } - +#else +// TODO: NEED IMPLEMENTATION +#endif return 0; } @@ -429,7 +436,11 @@ gpujpeg_image_get_file_format(const char* filename) void gpujpeg_set_device(int index) { +#ifdef GPUJPEG_USE_CUDA cudaSetDevice(index); +#else + // TODO: NEED IMPLEMENTATION +#endif } /* Documented at declaration */ @@ -438,6 +449,7 @@ gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data) { int data_size = component->data_width * component->data_height; uint8_t* data = NULL; +#ifdef GPUJPEG_USE_CUDA cudaMallocHost((void**)&data, data_size * sizeof(uint8_t)); cudaMemcpy(data, d_data, data_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); @@ -449,6 +461,9 @@ gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data) printf("\n"); } cudaFreeHost(data); +#else + // TODO: NEED IMPLEMENTATION +#endif } /* Documented at declaration */ @@ -457,6 +472,7 @@ gpujpeg_component_print16(struct gpujpeg_component* component, int16_t* d_data) { int data_size = component->data_width * component->data_height; int16_t* data = NULL; +#ifdef GPUJPEG_USE_CUDA cudaMallocHost((void**)&data, data_size * sizeof(int16_t)); cudaMemcpy(data, d_data, data_size * sizeof(int16_t), cudaMemcpyDeviceToHost); @@ -468,12 +484,16 @@ gpujpeg_component_print16(struct gpujpeg_component* component, int16_t* d_data) printf("\n"); } cudaFreeHost(data); +#else + // TODO: NEED IMPLEMENTATION +#endif } /* Documented at declaration */ int gpujpeg_coder_init(struct gpujpeg_coder * coder) { +#ifdef GPUJPEG_USE_CUDA // Get info about the device struct cudaDeviceProp device_properties; int device_idx; @@ -486,7 +506,9 @@ gpujpeg_coder_init(struct gpujpeg_coder * coder) fprintf(stderr, "GPUJPEG coder is currently broken on cards with cc < 2.0\n"); return -1; } - +#else + // TODO: NEED IMPLEMENTATION +#endif // Initialize coder for no image coder->param.quality = -1; coder->param.restart_interval = -1; @@ -530,8 +552,14 @@ gpujpeg_coder_init(struct gpujpeg_coder * coder) return 0; } +#ifdef GPUJPEG_USE_CUDA size_t gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, cudaStream_t stream) +#else +// TODO: NEED TO BE IMPLEMENTED +size_t +gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, void* stream) +#endif { if (gpujpeg_parameters_equals(&coder->param, param) && gpujpeg_image_parameters_equals(&coder->param_image, param_image)) { coder->param.verbose = param->verbose; @@ -548,7 +576,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para // Allocate color components if (param_image->comp_count > coder->component_allocated_size) { coder->component_allocated_size = 0; - +#ifdef GPUJPEG_USE_CUDA // (Re)allocate color components in host memory if (coder->component != NULL) { cudaFreeHost(coder->component); @@ -564,9 +592,12 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para } cudaMalloc((void**)&coder->d_component, param_image->comp_count * sizeof(struct gpujpeg_component)); gpujpeg_cuda_check_error("Coder color component device allocation", return 0); - +#else + // TODO: NEED IMPLEMENTATION +#endif coder->component_allocated_size = param_image->comp_count; } + allocated_gpu_memory_size += coder->component_allocated_size * sizeof(struct gpujpeg_component); // Calculate raw data size @@ -689,7 +720,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para // Allocate segments if (coder->segment_count > coder->segment_allocated_size) { coder->segment_allocated_size = 0; - +#ifdef GPUJPEG_USE_CUDA // (Re)allocate segments in host memory if (coder->segment != NULL) { cudaFreeHost(coder->segment); @@ -705,7 +736,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para } cudaMalloc((void**)&coder->d_segment, coder->segment_count * sizeof(struct gpujpeg_segment)); gpujpeg_cuda_check_error("Coder segment device allocation", return 0); - +#else + // TODO: NEED IMPLEMENTATION +#endif coder->segment_allocated_size = coder->segment_count; } allocated_gpu_memory_size += coder->segment_allocated_size * sizeof(struct gpujpeg_segment); @@ -811,7 +844,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para * GPUJPEG_BLOCK_SIZE * coder->component[0].data_width; if (coder->data_size + idct_overhead > coder->data_allocated_size) { coder->data_allocated_size = 0; - +#ifdef GPUJPEG_USE_CUDA // (Re)allocate preprocessor data in device memory if (coder->d_data != NULL) { cudaFree(coder->d_data); @@ -835,15 +868,21 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para } cudaMalloc((void**)&coder->d_data_quantized, (coder->data_size + idct_overhead) * sizeof(int16_t)); gpujpeg_cuda_check_error("Coder quantized data device allocation", return 0); - +#else + // TODO: NEED IMPLEMENTATION +#endif coder->data_allocated_size = coder->data_size + idct_overhead; } allocated_gpu_memory_size += coder->data_allocated_size * sizeof(uint8_t); allocated_gpu_memory_size += coder->data_allocated_size * sizeof(int16_t); if (coder->encoder) { // clear the buffer for preprocessor when the image size is not divisible by 8x8 +#ifdef GPUJPEG_USE_CUDA cudaMemset(coder->d_data, 0, coder->data_size * sizeof(uint8_t)); gpujpeg_cuda_check_error("d_data memset failed", return 0); +#else + // TODO: NEED IMPLEMENTATION +#endif } // Set data buffer to color components @@ -869,7 +908,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para //max_compressed_data_size *= 2; if (max_compressed_data_size > coder->data_compressed_allocated_size) { coder->data_compressed_allocated_size = 0; - +#ifdef GPUJPEG_USE_CUDA // (Re)allocate huffman coder data in host memory if (coder->data_compressed != NULL) { cudaFreeHost(coder->data_compressed); @@ -893,7 +932,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para } cudaMalloc((void**)&coder->d_temp_huffman, max_compressed_data_size * sizeof(uint8_t)); gpujpeg_cuda_check_error("Huffman temp buffer device allocation", return 0); - +#else + // TODO: NEED IMPLEMENTATION +#endif coder->data_compressed_allocated_size = max_compressed_data_size; } allocated_gpu_memory_size += coder->data_compressed_allocated_size * sizeof(uint8_t); @@ -906,7 +947,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para } if (coder->block_count > coder->block_allocated_size) { coder->block_allocated_size = 0; - +#ifdef GPUJPEG_USE_CUDA // (Re)allocate list of block indices in host memory if (coder->block_list != NULL) { cudaFreeHost(coder->block_list); @@ -922,7 +963,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para } cudaMalloc((void**)&coder->d_block_list, coder->block_count * sizeof(*coder->d_block_list)); gpujpeg_cuda_check_error("Coder block list device allocation", return 0); - +#else + // TODO: NEED IMPLEMENTATION +#endif coder->block_allocated_size = coder->block_count; } allocated_gpu_memory_size += coder->block_allocated_size * sizeof(*coder->d_block_list); @@ -985,7 +1028,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para segment->block_count = block_idx - segment->block_index_list_begin; } assert(block_idx == coder->block_count); - +#ifdef GPUJPEG_USE_CUDA // Copy components to device memory cudaMemcpyAsync(coder->d_component, coder->component, coder->param_image.comp_count * sizeof(struct gpujpeg_component), cudaMemcpyHostToDevice, stream); gpujpeg_cuda_check_error("Coder component copy", return 0); @@ -997,7 +1040,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para // Copy segments to device memory cudaMemcpyAsync(coder->d_segment, coder->segment, coder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyHostToDevice, stream); gpujpeg_cuda_check_error("Coder segment copy", return 0); - +#else + // TODO: NEED IMPLEMENTATION +#endif coder->allocated_gpu_memory_size = allocated_gpu_memory_size; return allocated_gpu_memory_size; @@ -1023,6 +1068,7 @@ gpujpeg_coder_get_stats(struct gpujpeg_coder *coder, struct gpujpeg_duration_sta int gpujpeg_coder_deinit(struct gpujpeg_coder* coder) { +#ifdef GPUJPEG_USE_CUDA if (coder->component != NULL) cudaFreeHost(coder->component); if (coder->d_component != NULL) @@ -1051,7 +1097,9 @@ gpujpeg_coder_deinit(struct gpujpeg_coder* coder) cudaFreeHost(coder->block_list); if ( coder->d_block_list != NULL ) cudaFree(coder->d_block_list); - +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_to, return -1); GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_from, return -1); GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_map, return -1); @@ -1091,7 +1139,11 @@ gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param) static void *gpujpeg_cuda_malloc_host(size_t size) { void *ptr; +#ifdef GPUJPEG_USE_CUDA GPUJPEG_CHECK_EX(cudaMallocHost(&ptr, size), "Could not alloc host pointer", return NULL); +#else + // TODO: NEED IMPLEMENTATION +#endif return ptr; } @@ -1119,12 +1171,16 @@ gpujpeg_image_load_from_file(const char* filename, uint8_t** image, size_t* imag } uint8_t* data = NULL; +#ifdef GPUJPEG_USE_CUDA cudaMallocHost((void**)&data, *image_size * sizeof(uint8_t)); gpujpeg_cuda_check_error("Initialize CUDA host buffer", return -1); if ( *image_size != fread(data, sizeof(uint8_t), *image_size, file) ) { fprintf(stderr, "[GPUJPEG] [Error] Failed to load image data [%zd bytes] from file %s!\n", *image_size, filename); return -1; } +#else + // TODO: NEED IMPLEMENTATION +#endif fclose(file); *image = data; @@ -1206,8 +1262,11 @@ gpujpeg_image_get_properties(const char *filename, struct gpujpeg_image_paramete int gpujpeg_image_destroy(uint8_t* image) { +#if GPUJPEG_USE_CUDA cudaFreeHost(image); - +#else + // TODO: NEED IMPLEMENTATION +#endif return 0; } @@ -1569,6 +1628,7 @@ gpujpeg_opengl_texture_destroy(int texture_id) struct gpujpeg_opengl_texture* gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type texture_type) { +#if GPUJPEG_USE_CUDA struct gpujpeg_opengl_texture* texture = NULL; cudaMallocHost((void**)&texture, sizeof(struct gpujpeg_opengl_texture)); assert(texture != NULL); @@ -1622,6 +1682,9 @@ gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type #else GPUJPEG_MISSING_OPENGL(return NULL); #endif +#else + // TODO: NEED IMPLEMENTATION +#endif } /* Documented at declaration */ @@ -1634,10 +1697,14 @@ gpujpeg_opengl_texture_unregister(struct gpujpeg_opengl_texture* texture) if ( texture->texture_pbo_id != 0 ) { glDeleteBuffers(1, (GLuint*)&texture->texture_pbo_id); } +#ifdef GPUJPEG_USE_CUDA if ( texture->texture_pbo_resource != NULL ) { cudaGraphicsUnregisterResource(texture->texture_pbo_resource); } cudaFreeHost(texture); +#else + // TODO: NEED TO BE IMPLEMENTED +#endif #else (void) texture; GPUJPEG_MISSING_OPENGL(return); @@ -1672,6 +1739,7 @@ gpujpeg_opengl_texture_map(struct gpujpeg_opengl_texture* texture, size_t* data_ } // Map pixel buffer object to cuda +#ifdef GPUJPEG_USE_CUDA cudaGraphicsMapResources(1, &texture->texture_pbo_resource, 0); gpujpeg_cuda_check_error("Encoder map texture PBO resource", return NULL); @@ -1681,7 +1749,11 @@ gpujpeg_opengl_texture_map(struct gpujpeg_opengl_texture* texture, size_t* data_ gpujpeg_cuda_check_error("Encoder get device pointer for texture PBO resource", return NULL); if ( data_size != NULL ) *data_size = d_data_size; - +#else + // TODO: NEED TO BE IMPLEMENTED + (void) data_size; + GPUJPEG_MISSING_OPENGL(return NULL); +#endif return d_data; #else (void) data_size; @@ -1694,6 +1766,7 @@ void gpujpeg_opengl_texture_unmap(struct gpujpeg_opengl_texture* texture) { // Unmap pbo +#ifdef GPUJPEG_USE_CUDA cudaGraphicsUnmapResources(1, &texture->texture_pbo_resource, 0); gpujpeg_cuda_check_error("Encoder unmap texture PBO resource", {}); @@ -1717,6 +1790,9 @@ gpujpeg_opengl_texture_unmap(struct gpujpeg_opengl_texture* texture) #else GPUJPEG_MISSING_OPENGL(return); #endif +#else + // TODO: NEED IMPLEMENTATION +#endif } int gpujpeg_version(void) @@ -1890,6 +1966,7 @@ int gpujpeg_pixel_format_get_subsampling(enum gpujpeg_pixel_format pixel_format) return -1; } +#ifdef GPUJPEG_USE_CUDA float gpujpeg_custom_timer_get_duration(cudaEvent_t start, cudaEvent_t stop) { float elapsedTime = NAN; cudaError_t err = cudaEventSynchronize(stop); @@ -1912,5 +1989,8 @@ float gpujpeg_custom_timer_get_duration(cudaEvent_t start, cudaEvent_t stop) { } return elapsedTime; } +#else +// TODO: NEED IMPLEMENTATION +#endif /* vi: set expandtab sw=4 : */ diff --git a/src/gpujpeg_common_internal.h b/src/gpujpeg_common_internal.h index 46b45ce0..c67ebdf2 100644 --- a/src/gpujpeg_common_internal.h +++ b/src/gpujpeg_common_internal.h @@ -35,7 +35,9 @@ #ifndef GPUJPEG_COMMON_INTERNAL_H #define GPUJPEG_COMMON_INTERNAL_H -#include +#ifdef GPUJPEG_USE_CUDA + #include +#endif #include // NAN #include #include @@ -70,10 +72,13 @@ struct gpujpeg_timer { int started; +#ifdef GPUJPEG_USE_CUDA cudaEvent_t start; cudaEvent_t stop; +#endif }; +#ifdef GPUJPEG_USE_CUDA #define GPUJPEG_CUSTOM_TIMER_CREATE(name, err_action) \ do { \ GPUJPEG_CHECK(cudaEventCreate(&(name).start), err_action); \ @@ -118,6 +123,32 @@ struct gpujpeg_timer { */ #define GPUJPEG_CUSTOM_TIMER_DURATION(name) \ (name).started == 1 ? gpujpeg_custom_timer_get_duration((name).start, (name).stop) : (name).started == 0 ? 0 : ( fprintf(stderr, "Debug timer disabled!\n"), 0) +#else +#define GPUJPEG_CUSTOM_TIMER_CREATE(name, err_action) +#define GPUJPEG_CUSTOM_TIMER_DESTROY(name, err_action) + +/** + * Start timer + * + * @param name + * @todo stream + */ +#define GPUJPEG_CUSTOM_TIMER_START(name, record_perf, stream, err_action) + +/** + * Stop timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_STOP(name, record_perf, stream, err_action) + +/** + * Get duration for timer + * + * @param name + */ +#define GPUJPEG_CUSTOM_TIMER_DURATION(name) 0 +#endif #ifdef __cplusplus extern "C" { @@ -317,8 +348,12 @@ struct gpujpeg_coder /// Allocated size size_t data_compressed_allocated_size; +#ifdef GPUJPEG_USE_CUDA int cuda_cc_major; ///< CUDA Compute capability (major version) int cuda_cc_minor; ///< CUDA Compute capability (minor version) +#else + // TODO: NEED IMPLEMENTATION +#endif // Operation durations struct gpujpeg_timer duration_memory_to; @@ -354,8 +389,13 @@ gpujpeg_coder_init(struct gpujpeg_coder* coder); * @param stream CUDA stream * @return size of allocated device memory in bytes if succeeds, otherwise 0 */ +#ifdef GPUJPEG_USE_CUDA size_t gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, cudaStream_t stream); +#else +size_t +gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, void* stream); +#endif /** * Returns duration statistics for last coded image @@ -420,9 +460,10 @@ gpujpeg_image_parameters_equals(const struct gpujpeg_image_parameters *p1 , cons * * @returns duration in ms, 0.0F in case of error */ +#ifdef GPUJPEG_USE_CUDA float gpujpeg_custom_timer_get_duration(cudaEvent_t start, cudaEvent_t stop); - +#endif #ifdef __cplusplus } // extern "C" diff --git a/src/gpujpeg_dct_cpu.c b/src/gpujpeg_dct_cpu.c index bf6d9124..8e7bfb7a 100644 --- a/src/gpujpeg_dct_cpu.c +++ b/src/gpujpeg_dct_cpu.c @@ -213,7 +213,13 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder) struct gpujpeg_component* component = &coder->component[comp]; // Copy data to host +#ifdef GPUJPEG_USE_CUDA cudaMemcpy(component->data_quantized, component->d_data_quantized, component->data_size * sizeof(uint16_t), cudaMemcpyDeviceToHost); +#else + // TODO: NEED IMPLEMENTATION +#endif + + // Perform IDCT on CPU int width = component->data_width / GPUJPEG_BLOCK_SIZE; @@ -230,6 +236,7 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder) // Copy results to device uint8_t* data = NULL; +#ifdef GPUJPEG_USE_CUDA assert(cudaMallocHost((void**)&data, component->data_size * sizeof(uint8_t)) == cudaSuccess); for ( int y = 0; y < height; y++ ) { for ( int x = 0; x < width; x++ ) { @@ -248,5 +255,8 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder) } cudaMemcpy(component->d_data, data, component->data_size * sizeof(uint8_t), cudaMemcpyHostToDevice); cudaFreeHost(data); +#else + // TODO: NEED IMPLEMENTATION +#endif } } diff --git a/src/gpujpeg_decoder.c b/src/gpujpeg_decoder.c index d73485e4..fa41a15b 100644 --- a/src/gpujpeg_decoder.c +++ b/src/gpujpeg_decoder.c @@ -30,13 +30,16 @@ #include "../libgpujpeg/gpujpeg_decoder.h" #include "gpujpeg_dct_cpu.h" -#include "gpujpeg_dct_gpu.h" #include "gpujpeg_decoder_internal.h" #include "gpujpeg_huffman_cpu_decoder.h" -#include "gpujpeg_huffman_gpu_decoder.h" -#include "gpujpeg_postprocessor.h" #include "gpujpeg_util.h" +#ifdef GPUJPEG_USE_CUDA + #include "gpujpeg_dct_gpu.h" + #include "gpujpeg_huffman_gpu_decoder.h" + #include "gpujpeg_postprocessor.h" +#endif + /* Documented at declaration */ void gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output) @@ -119,6 +122,7 @@ gpujpeg_decoder_create(cudaStream_t stream) if ( decoder->reader == NULL ) result = 0; +#ifdef GPUJPEG_USE_CUDA // Allocate quantization tables in device memory for ( int comp_type = 0; comp_type < GPUJPEG_MAX_COMPONENT_COUNT; comp_type++ ) { if ( cudaSuccess != cudaMalloc((void**)&decoder->table_quantization[comp_type].d_table, 64 * sizeof(uint16_t)) ) @@ -142,6 +146,10 @@ gpujpeg_decoder_create(cudaStream_t stream) if ((decoder->huffman_gpu_decoder = gpujpeg_huffman_gpu_decoder_init()) == NULL) { result = 0; } +#else + // TODO: NEED IMPLEMENTATION + result = 0; +#endif // Stream decoder->stream = stream; @@ -193,10 +201,14 @@ gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, const struct gpujpeg_param } // Init postprocessor +#ifdef GPUJPEG_USE_CUDA if ( gpujpeg_preprocessor_decoder_init(&decoder->coder) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Failed to init postprocessor!\n"); return -1; } +#else + // TODO: NOT YET IMPLEMENTED +#endif return 0; } @@ -235,6 +247,7 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i } // Perform huffman decoding on CPU (when there are not enough segments to saturate GPU) +#ifdef GPUJPEG_USE_CUDA if (coder->segment_count < 32 || unsupp_gpu_huffman_params) { GPUJPEG_CUSTOM_TIMER_START(coder->duration_huffman_coder, coder->param.perf_stats, decoder->stream, return -1); if (0 != gpujpeg_huffman_cpu_decoder_decode(decoder)) { @@ -283,7 +296,6 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_dct_quantization, coder->param.perf_stats, decoder->stream, return -1); - // Create buffers if not already created if (coder->data_raw == NULL) { if (cudaSuccess != cudaMallocHost((void**)&coder->data_raw, coder->data_raw_size * sizeof(uint8_t))) { return -1; @@ -294,6 +306,9 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i return -1; } } +#else + // TODO: NEED IMPLEMENTATION +#endif // Select CUDA output buffer if (output->type == GPUJPEG_DECODER_OUTPUT_CUSTOM_CUDA_BUFFER) { @@ -317,6 +332,7 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i } // Preprocessing +#ifdef GPUJPEG_USE_CUDA GPUJPEG_CUSTOM_TIMER_START(coder->duration_preprocessor, coder->param.perf_stats, decoder->stream, return -1); rc = gpujpeg_preprocessor_decode(&decoder->coder, decoder->stream); if (rc != GPUJPEG_NOERR) { @@ -326,6 +342,9 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i // Wait for async operations before copying from the device cudaStreamSynchronize(decoder->stream); +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_in_gpu, coder->param.perf_stats, decoder->stream, return -1); @@ -335,12 +354,16 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i output->pixel_format = decoder->coder.param_image.pixel_format; // Set decompressed image + if (output->type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER) { GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1); // Copy decompressed image to host memory +#ifdef GPUJPEG_USE_CUDA cudaMemcpy(coder->data_raw, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); - +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1); // Set output to internal buffer @@ -352,8 +375,11 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i assert(output->data != NULL); // Copy decompressed image to host memory +#ifdef GPUJPEG_USE_CUDA cudaMemcpy(output->data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); - +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1); } else if (output->type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE) { @@ -369,10 +395,12 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_map, coder->param.perf_stats, decoder->stream, return -1); GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1); - // Copy decompressed image to texture pixel buffer object device data +#ifdef GPUJPEG_USE_CUDA cudaMemcpy(d_data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice); - +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1); } @@ -423,7 +451,7 @@ gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder) if (0 != gpujpeg_coder_deinit(&decoder->coder)) { return -1; } - +#ifdef GPUJPEG_USE_CUDA for (int comp_type = 0; comp_type < GPUJPEG_MAX_COMPONENT_COUNT; comp_type++) { if (decoder->table_quantization[comp_type].d_table != NULL) { cudaFree(decoder->table_quantization[comp_type].d_table); @@ -443,6 +471,9 @@ gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder) if (decoder->huffman_gpu_decoder != NULL) { gpujpeg_huffman_gpu_decoder_destroy(decoder->huffman_gpu_decoder); } +#else + // TODO: NEED IMPLEMENTATION +#endif free(decoder); diff --git a/src/gpujpeg_decoder_internal.h b/src/gpujpeg_decoder_internal.h index 753e8b1e..22f27968 100644 --- a/src/gpujpeg_decoder_internal.h +++ b/src/gpujpeg_decoder_internal.h @@ -67,8 +67,12 @@ struct gpujpeg_decoder /// Current data compressed size for decoded image size_t data_compressed_size; +#ifdef GPUJPEG_USE_CUDA // Stream cudaStream_t stream; +#else + void* stream; +#endif }; #endif // GPUJPEG_DECODER_INTERNAL_H diff --git a/src/gpujpeg_encoder.c b/src/gpujpeg_encoder.c index 3843d07e..bd9758db 100644 --- a/src/gpujpeg_encoder.c +++ b/src/gpujpeg_encoder.c @@ -31,14 +31,17 @@ #include #include #include "../libgpujpeg/gpujpeg_encoder.h" -#include "gpujpeg_preprocessor.h" #include "gpujpeg_dct_cpu.h" -#include "gpujpeg_dct_gpu.h" #include "gpujpeg_huffman_cpu_encoder.h" -#include "gpujpeg_huffman_gpu_encoder.h" #include "gpujpeg_marker.h" #include "gpujpeg_util.h" +#ifdef GPUJPEG_USE_CUDA + #include "gpujpeg_dct_gpu.h" + #include "gpujpeg_huffman_gpu_encoder.h" + #include "gpujpeg_preprocessor.h" +#endif + /* Documented at declaration */ void gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* image) @@ -94,6 +97,7 @@ gpujpeg_encoder_create(cudaStream_t stream) coder->encoder = 1; // Allocate quantization tables in device memory +#ifdef GPUJPEG_USE_CUDA for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { if ( cudaSuccess != cudaMalloc((void**)&encoder->table_quantization[comp_type].d_table, 64 * sizeof(uint16_t)) ) { result = 0; @@ -103,6 +107,9 @@ gpujpeg_encoder_create(cudaStream_t stream) } } gpujpeg_cuda_check_error("Encoder table allocation", return NULL); +#else + // TODO: NEED IMPLEMENTATION +#endif // Init huffman tables for encoder for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { @@ -111,13 +118,21 @@ gpujpeg_encoder_create(cudaStream_t stream) result = 0; } } +#ifdef GPUJPEG_USE_CUDA gpujpeg_cuda_check_error("Encoder table init", return NULL); +#else + // TODO: NEED IMPLEMENTATION +#endif // Init huffman encoder +#ifdef GPUJPEG_USE_CUDA encoder->huffman_gpu_encoder = gpujpeg_huffman_gpu_encoder_create(encoder); if (encoder->huffman_gpu_encoder == NULL) { result = 0; } +#else + // TODO: NEED IMPLEMENTATION +#endif if ( result == 0 ) { gpujpeg_encoder_destroy(encoder); @@ -146,10 +161,17 @@ size_t gpujpeg_encoder_max_pixels(struct gpujpeg_parameters * param, struct gpuj param_image->width = (int) sqrt((float) pixels); param_image->height = (pixels + param_image->width - 1) / param_image->width; //printf("\nIteration #%d (pixels: %d, size: %dx%d)\n", iteration++, pixels, param_image->width, param_image->height); + +#ifdef GPUJPEG_USE_CUDA size_t image_memory_size = gpujpeg_coder_init_image(&coder, param, param_image, cudaStreamDefault); +#else + // TODO: NEED IMPLEMENTATION + size_t image_memory_size = 0; +#endif if (image_memory_size == 0) { break; } + size_t allocated_memory_size = 0; allocated_memory_size += encoder_memory_size; allocated_memory_size += image_memory_size; @@ -199,8 +221,14 @@ size_t gpujpeg_encoder_max_memory(struct gpujpeg_parameters * param, struct gpuj param_image->width = (int) sqrt((float) max_pixels); param_image->height = (max_pixels + param_image->width - 1) / param_image->width; - + +#ifdef GPUJPEG_USE_CUDA size_t image_memory_size = gpujpeg_coder_init_image(&coder, param, param_image, cudaStreamDefault); +#else + // TODO: NEED IMPLEMENTATION + size_t image_memory_size = 0; +#endif + if (image_memory_size == 0) { return 0; } @@ -237,6 +265,7 @@ int gpujpeg_encoder_allocate(struct gpujpeg_encoder * encoder, const struct gpuj if (coder->data_raw_size > coder->data_raw_allocated_size) { coder->data_raw_allocated_size = 0; +#ifdef GPUJPEG_USE_CUDA // (Re)allocate raw data in device memory if (coder->d_data_raw_allocated != NULL) { cudaFree(coder->d_data_raw_allocated); @@ -244,7 +273,9 @@ int gpujpeg_encoder_allocate(struct gpujpeg_encoder * encoder, const struct gpuj } cudaMalloc((void**)&coder->d_data_raw_allocated, coder->data_raw_size); gpujpeg_cuda_check_error("Encoder raw data allocation", return -1); - +#else + // TODO: NEED IMPLEMENTATION +#endif coder->data_raw_allocated_size = coder->data_raw_size; } } @@ -303,8 +334,13 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter return -1; } } +#ifdef GPUJPEG_USE_CUDA gpujpeg_cuda_check_error("Quantization init", return -1); +#else + // TODO: NEED IMPLEMENTATION +#endif } + if (0 == gpujpeg_coder_init_image(coder, param, param_image, encoder->stream)) { fprintf(stderr, "[GPUJPEG] [Error] Failed to init image encoding!\n"); return -1; @@ -317,16 +353,21 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter } // (Re)initialize preprocessor +#ifdef GPUJPEG_USE_CUDA if (gpujpeg_preprocessor_encoder_init(&encoder->coder) != 0) { fprintf(stderr, "[GPUJPEG] [Error] Failed to init preprocessor!\n"); return -1; } +#else + // TODO: NEED IMPLEMENTATION +#endif // Load input image if ( input->type == GPUJPEG_ENCODER_INPUT_IMAGE ) { GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1); // Allocate raw data internal buffer +#ifdef GPUJPEG_USE_CUDA if (coder->data_raw_size > coder->data_raw_allocated_size) { coder->data_raw_allocated_size = 0; @@ -346,7 +387,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter // Copy image to device memory cudaMemcpyAsync(coder->d_data_raw, input->image, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyHostToDevice, encoder->stream); gpujpeg_cuda_check_error("Encoder raw data copy", return -1); - +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1); } else if (input->type == GPUJPEG_ENCODER_INPUT_GPU_IMAGE) { @@ -358,6 +401,7 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_map, coder->param.perf_stats, encoder->stream, return -1); // Create buffers if not already created +#ifdef GPUJPEG_USE_CUDA if (coder->data_raw_size > coder->data_raw_allocated_size) { coder->data_raw_allocated_size = 0; @@ -371,6 +415,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter coder->data_raw_allocated_size = coder->data_raw_size; } +#else + // TODO: NEED IMPLEMENTATION +#endif coder->d_data_raw = coder->d_data_raw_allocated; // Map texture to CUDA @@ -382,7 +429,11 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1); // Copy image data from texture pixel buffer object to device data +#ifdef GPUJPEG_USE_CUDA cudaMemcpyAsync(coder->d_data_raw, d_data, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice, encoder->stream); +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1); GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_unmap, coder->param.perf_stats, encoder->stream, return -1); @@ -399,7 +450,7 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_LUMINANCE]); //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_CHROMINANCE]); - +#ifdef GPUJPEG_USE_CUDA GPUJPEG_CUSTOM_TIMER_START(coder->duration_in_gpu, coder->param.perf_stats, encoder->stream, return -1); GPUJPEG_CUSTOM_TIMER_START(coder->duration_preprocessor, coder->param.perf_stats, encoder->stream, return -1); @@ -416,6 +467,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter if (0 != gpujpeg_dct_gpu(encoder)) { return -1; } +#else + // TODO: NEED IMPLEMENTATION +#endif // If restart interval is 0 then the GPU processing is in the end (even huffman coder will be performed on CPU) if (coder->param.restart_interval == 0) { @@ -434,10 +488,14 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter if ( coder->param.restart_interval == 0 ) { GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1); // Copy quantized data from device memory to cpu memory +#ifdef GPUJPEG_USE_CUDA cudaMemcpyAsync(coder->data_quantized, coder->d_data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyDeviceToHost, encoder->stream); // Wait for async operations before the coding cudaStreamSynchronize(encoder->stream); +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1); GPUJPEG_CUSTOM_TIMER_START(coder->duration_huffman_coder, coder->param.perf_stats, encoder->stream, return -1); @@ -453,15 +511,20 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter GPUJPEG_CUSTOM_TIMER_START(coder->duration_huffman_coder, coder->param.perf_stats, encoder->stream, return -1); // Perform huffman coding unsigned int output_size; +#ifdef GPUJPEG_USE_CUDA if ( gpujpeg_huffman_gpu_encoder_encode(encoder, encoder->huffman_gpu_encoder, &output_size) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Huffman encoder on GPU failed!\n"); return -1; } - +#else + // TODO: NEED IMPLEMENTATION + return -1; +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_huffman_coder, coder->param.perf_stats, encoder->stream, return -1); GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_in_gpu, coder->param.perf_stats, encoder->stream, return -1); GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1); +#ifdef GPUJPEG_USE_CUDA // Copy compressed data from device memory to cpu memory if ( cudaSuccess != cudaMemcpyAsync(coder->data_compressed, coder->d_data_compressed, output_size, cudaMemcpyDeviceToHost, encoder->stream) ) { return -1; @@ -473,6 +536,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter // Wait for async operations before formatting cudaStreamSynchronize(encoder->stream); +#else + // TODO: NEED IMPLEMENTATION +#endif GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1); GPUJPEG_CUSTOM_TIMER_START(coder->duration_stream, coder->param.perf_stats, encoder->stream, return -1); @@ -562,18 +628,26 @@ gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder) assert(encoder != NULL); if (encoder->huffman_gpu_encoder != NULL) { +#ifdef GPUJPEG_USE_CUDA gpujpeg_huffman_gpu_encoder_destroy(encoder->huffman_gpu_encoder); +#else + // TODO: NEED TO BE IMPLEMENTED +#endif } if (gpujpeg_coder_deinit(&encoder->coder) != 0) { return -1; } for (int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++) { +#ifdef GPUJPEG_USE_CUDA if (encoder->table_quantization[comp_type].d_table != NULL) { cudaFree(encoder->table_quantization[comp_type].d_table); } if (encoder->table_quantization[comp_type].d_table_forward != NULL) { cudaFree(encoder->table_quantization[comp_type].d_table_forward); } +#else + // TODO: NEED TO BE IMPLEMENTED +#endif } if (encoder->writer != NULL) { gpujpeg_writer_destroy(encoder->writer); diff --git a/src/gpujpeg_encoder_internal.h b/src/gpujpeg_encoder_internal.h index fc5dadfb..32dea1a4 100644 --- a/src/gpujpeg_encoder_internal.h +++ b/src/gpujpeg_encoder_internal.h @@ -63,8 +63,12 @@ struct gpujpeg_encoder /// JPEG header to be emitted enum gpujpeg_header_type header_type; +#ifdef GPUJPEG_USE_CUDA // Stream cudaStream_t stream; +#else + void* stream; +#endif }; #ifdef __cplusplus diff --git a/src/gpujpeg_reader.c b/src/gpujpeg_reader.c index 6aaa45c2..9c089cb2 100644 --- a/src/gpujpeg_reader.c +++ b/src/gpujpeg_reader.c @@ -871,8 +871,12 @@ gpujpeg_reader_read_dht(struct gpujpeg_decoder* decoder, uint8_t** image, const gpujpeg_table_huffman_decoder_compute(table); // Copy table to device memory +#ifdef GPUJPEG_USE_CUDA cudaMemcpyAsync(d_table, table, sizeof(struct gpujpeg_table_huffman_decoder), cudaMemcpyHostToDevice, decoder->stream); gpujpeg_cuda_check_error("Decoder copy huffman table ", return -1); +#else + // TODO: NEED TO BE IMPLEMENTED +#endif } return 0; } diff --git a/src/gpujpeg_table.c b/src/gpujpeg_table.c index 8ec05d51..5b016adb 100644 --- a/src/gpujpeg_table.c +++ b/src/gpujpeg_table.c @@ -83,8 +83,8 @@ gpujpeg_table_quantization_set_default(uint8_t* table_raw, enum gpujpeg_componen void gpujpeg_table_quantization_apply_quality(uint8_t* table_raw, int quality) { - if (quality <= 0) quality = 1; - if (quality > 100) quality = 100; + if (quality <= 0) quality = 1; + if (quality > 100) quality = 100; int s = (quality < 50) ? (5000 / quality) : (200 - (2 * quality)); for ( int i = 0; i < 64; i++ ) { int value = (s * (int)table_raw[i] + 50) / 100; @@ -120,9 +120,13 @@ gpujpeg_table_quantization_encoder_init(struct gpujpeg_table_quantization* table } // Copy quantization table to constant memory +#ifdef GPUJPEG_USE_CUDA if ( cudaSuccess != cudaMemcpy(table->d_table_forward, h_quantization_table, 64 * sizeof(float), cudaMemcpyHostToDevice) ) return -1; gpujpeg_cuda_check_error("Copy DCT quantization table to device memory", return -1); +#else + // TODO: NEED TO BE IMPLEMENTED +#endif // DCT loads the table into GPU memory itself, after premultiplying coefficients with DCT normalization constants. return 0; @@ -144,8 +148,13 @@ gpujpeg_table_quantization_decoder_init(struct gpujpeg_table_quantization* table } // Copy tables to device memory +#ifdef GPUJPEG_USE_CUDA if ( cudaSuccess != cudaMemcpy(table->d_table, table->table, 64 * sizeof(uint16_t), cudaMemcpyHostToDevice) ) return -1; +#else + // TODO: NEED TO BE IMPLEMENTED + return -1; +#endif return 0; } @@ -159,9 +168,14 @@ gpujpeg_table_quantization_decoder_compute(struct gpujpeg_table_quantization* ta } // Copy tables to device memory +#ifdef GPUJPEG_USE_CUDA if ( cudaSuccess != cudaMemcpy(table->d_table, table->table, 64 * sizeof(uint16_t), cudaMemcpyHostToDevice) ) return -1; - +#else + // TODO: NEED TO BE IMPLEMENTED + return -1; +#endif + return 0; } @@ -311,30 +325,30 @@ gpujpeg_table_huffman_encoder_init(struct gpujpeg_table_huffman_encoder* table, { assert(comp_type == GPUJPEG_COMPONENT_LUMINANCE || comp_type == GPUJPEG_COMPONENT_CHROMINANCE); assert(huff_type == GPUJPEG_HUFFMAN_DC || huff_type == GPUJPEG_HUFFMAN_AC); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small"); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small"); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small"); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small"); - + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small"); + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small"); + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small"); + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small"); + if ( comp_type == GPUJPEG_COMPONENT_LUMINANCE ) { if ( huff_type == GPUJPEG_HUFFMAN_DC ) { - memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value)); + memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value)); } else { - memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value)); + memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value)); } } else if ( comp_type == GPUJPEG_COMPONENT_CHROMINANCE ) { if ( huff_type == GPUJPEG_HUFFMAN_DC ) { - memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value)); + memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value)); } else { - memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value)); + memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value)); } } gpujpeg_table_huffman_encoder_compute(table); @@ -348,30 +362,30 @@ gpujpeg_table_huffman_decoder_init(struct gpujpeg_table_huffman_decoder* table, { assert(comp_type == GPUJPEG_COMPONENT_LUMINANCE || comp_type == GPUJPEG_COMPONENT_CHROMINANCE); assert(huff_type == GPUJPEG_HUFFMAN_DC || huff_type == GPUJPEG_HUFFMAN_AC); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small"); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small"); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small"); - _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small"); - _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small"); - + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small"); + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small"); + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small"); + _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small"); + _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small"); + if ( comp_type == GPUJPEG_COMPONENT_LUMINANCE ) { if ( huff_type == GPUJPEG_HUFFMAN_DC ) { - memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value)); + memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value)); } else { - memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value)); + memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value)); } } else if ( comp_type == GPUJPEG_COMPONENT_CHROMINANCE ) { if ( huff_type == GPUJPEG_HUFFMAN_DC ) { - memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value)); + memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value)); } else { - memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits)); - memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value)); + memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits)); + memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value)); } } gpujpeg_table_huffman_decoder_compute(table); diff --git a/src/gpujpeg_util.h b/src/gpujpeg_util.h index 4c031ba9..2953315f 100644 --- a/src/gpujpeg_util.h +++ b/src/gpujpeg_util.h @@ -36,14 +36,17 @@ #include #include #include -#include +#ifdef GPUJPEG_USE_CUDA + #include +#endif #ifdef __cplusplus extern "C" { #endif #define GPUJPEG_CLAMP(x, low, high) (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) - + +#ifdef GPUJPEG_USE_CUDA // CUDA check error #define gpujpeg_cuda_check_error(msg, action) \ { \ @@ -59,8 +62,12 @@ extern "C" { cmd;\ gpujpeg_cuda_check_error(msg, action)\ } while(0) +#else +#define GPUJPEG_CHECK_EX(cmd, msg, action) +#endif + #define GPUJPEG_CHECK(cmd, action) GPUJPEG_CHECK_EX(cmd, #cmd, action) - + // Divide and round up #define gpujpeg_div_and_round_up(value, div) \ ((((value) % (div)) != 0) ? ((value) / (div) + 1) : ((value) / (div))) diff --git a/test/decoder_gltex/main.c b/test/decoder_gltex/main.c index e3d9e36e..12cc4a69 100644 --- a/test/decoder_gltex/main.c +++ b/test/decoder_gltex/main.c @@ -1,8 +1,13 @@ #include #include +#include #include "gpujpeg_reformat.h" #include +#ifdef __APPLE__ +#include +#else #include +#endif int g_texture_id; int g_width; @@ -122,7 +127,7 @@ int main(int argc, char *argv[]) // Get data from OpenGL texture uint8_t* data = NULL; size_t data_size = 0; - data = malloc(param_image.width * param_image.height * param_image.comp_count); + data = (uint8_t*)malloc(param_image.width * param_image.height * param_image.comp_count); gpujpeg_opengl_texture_get_data(texture->texture_id, data, &data_size); // Save image diff --git a/test/misc/mt_encode.c b/test/misc/mt_encode.c index dc0932af..df94cc19 100644 --- a/test/misc/mt_encode.c +++ b/test/misc/mt_encode.c @@ -1,4 +1,6 @@ -#include +#ifdef GPUJPEG_USE_CUDA + #include +#endif #include #include #include diff --git a/test/opengl_interop/util.h b/test/opengl_interop/util.h index 77fa5abd..ee52e6e2 100644 --- a/test/opengl_interop/util.h +++ b/test/opengl_interop/util.h @@ -36,12 +36,19 @@ #include #include #include -#include -#include +#ifdef GPUJPEG_USE_CUDA + #include + #include +#endif #include +#ifdef __APPLE__ +#include +#include +#else #include #include #include +#endif /** * Check CUDA error diff --git a/test/unit/run_tests.c b/test/unit/run_tests.c index 25869123..5e7a928a 100644 --- a/test/unit/run_tests.c +++ b/test/unit/run_tests.c @@ -1,5 +1,7 @@ #include +#ifdef GPUJPEG_USE_CUDA #include +#endif #include #include #include "../../src/gpujpeg_common_internal.h" @@ -53,10 +55,15 @@ static void encode_gpu_mem_as_cpu() { uint8_t *image = NULL; size_t len = param_image.width * param_image.height * 3 / 2; +#ifdef GPUJPEG_USE_CUDA if (cudaSuccess != cudaMalloc((void**) &image, len)) { abort(); } cudaMemset(image, 0, len); +#else + // TODO: NEED TO BE IMPLEMENTED + abort(); +#endif struct gpujpeg_encoder_input encoder_input; gpujpeg_encoder_input_set_image(&encoder_input, image); @@ -68,7 +75,11 @@ static void encode_gpu_mem_as_cpu() { abort(); } +#ifdef GPUJPEG_USE_CUDA cudaFree(image); +#else + abort(); +#endif gpujpeg_encoder_destroy(encoder); }