diff --git a/.gitignore b/.gitignore
index 613ade66..dc358c34 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,5 @@ test-driver
 Debug
 Release
 
+# VSCode
+.vscode/
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 654870d7..92ab0d44 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,15 @@
 cmake_minimum_required(VERSION 3.8.0 FATAL_ERROR)
 # change version also in configure.ac
-project(gpujpeg VERSION 0.21.0 LANGUAGES C CUDA)
+project(gpujpeg VERSION 0.21.0 LANGUAGES C)
+include(CheckLanguage)
+
+check_language(CUDA)
+if(CMAKE_CUDA_COMPILER)
+  enable_language(CUDA)
+  add_definitions(-DGPUJPEG_USE_CUDA)
+else()
+  message(STATUS "No CUDA support")
+endif()
 
 # options
 set(BUILD_OPENGL AUTO CACHE STRING "Build with OpenGL support, options are: AUTO ON OFF")
@@ -56,6 +65,8 @@ set(NEEDED_COMPILER_FEATURES c_std_11)
 
 set(COMPILED_OPTIONS)
 
+set(CMAKE_VERBOSE_MAKEFILE ON)
+
 # allow passing <PackageName>_ROOT to find_package()
 if(POLICY CMP0074)
     cmake_policy(SET CMP0074 NEW)
@@ -65,6 +76,17 @@ endif()
 if(POLICY CMP0092)
     cmake_policy(SET CMP0092 NEW)
 endif()
+
+# Fix behavior of CMAKE_CXX_STANDARD when targeting macOS.
+if (POLICY CMP0025)
+    cmake_policy(SET CMP0025 NEW)
+endif ()
+
+# Find OpenGL, GLEW, GLUT and GLFW
+if(POLICY CMP0072)
+    cmake_policy(SET CMP0072 NEW)
+endif()
+
 if (MSVC)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /W4")
@@ -73,10 +95,6 @@ else()
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra")
 endif()
 
-# Find OpenGL, GLEW, GLUT and GLFW
-if(POLICY CMP0072)
-    cmake_policy(SET CMP0072 NEW)
-endif()
 find_package(OpenGL)
 find_package(GLEW)
 find_package(GLUT)
@@ -99,15 +117,17 @@ if(NOT BUILD_OPENGL STREQUAL "OFF" AND NOT OPENGL_ERROR)
 
     # Build GPUJPEG library with OpenGL support
     add_definitions("-DGPUJPEG_USE_OPENGL")
+    add_definitions("-DGL_SILENCE_DEPRECATION")
+
     set(GPUJPEG_OPENGL_LIBRARIES)
     include_directories(${OPENGL_INCLUDE_DIR} ${GLEW_INCLUDE_DIRS})
-    list(APPEND GPUJPEG_OPENGL_LIBRARIES ${GLEW_LIBRARIES})
+    list(APPEND GPUJPEG_OPENGL_LIBRARIES GLEW::GLEW)
     list(APPEND GPUJPEG_OPENGL_LIBRARIES ${OPENGL_LIBRARIES})
     if(GLUT_FOUND)
         include_directories(${GLUT_INCLUDE_DIR})
         list(APPEND GPUJPEG_OPENGL_LIBRARIES ${GLUT_glut_LIBRARY})
     endif()
-    if(X11_FOUND AND (OPENGL_CONTEXT STREQUAL "AUTO" OR OPENGL_CONTEXT STREQUAL "GLX"))
+    if(NOT APPLE AND X11_FOUND AND (OPENGL_CONTEXT STREQUAL "AUTO" OR OPENGL_CONTEXT STREQUAL "GLX"))
         include_directories(${X11_INCLUDE_DIR})
         list(APPEND GPUJPEG_OPENGL_LIBRARIES ${X11_LIBRARIES})
         add_definitions("-DGPUJPEG_USE_GLX")
@@ -182,7 +202,7 @@ if(GPUJPEG_OPENGL_ENABLED AND GLUT_FOUND)
     add_executable(decoder_gltex ${FILES})
     target_compile_features(decoder_gltex PRIVATE ${NEEDED_COMPILER_FEATURES})
     set_property(TARGET decoder_gltex PROPERTY C_STANDARD 99)
-    target_link_libraries(decoder_gltex ${PROJECT_NAME} -lglut)
+    target_link_libraries(decoder_gltex ${PROJECT_NAME} ${GPUJPEG_OPENGL_LIBRARIES})
 
     # OpenGL interoperability example (currently not working)
     #file(GLOB FILES test/opengl_interop/*.c test/opengl_interop/*.h test/opengl_interop/*.cu)
diff --git a/examples/decode_to_cuda_pnm.c b/examples/decode_to_cuda_pnm.c
index 36db6a0c..5af953a6 100644
--- a/examples/decode_to_cuda_pnm.c
+++ b/examples/decode_to_cuda_pnm.c
@@ -3,7 +3,9 @@
  * then copied back to RAM and written to a PNM file.
  */
 
-#include <cuda_runtime.h>
+#ifdef GPUJPEG_USE_CUDA
+    #include <cuda_runtime.h>
+#endif
 #include <libgpujpeg/gpujpeg_common.h>
 #include <libgpujpeg/gpujpeg_decoder.h>
 #include <stdbool.h>
diff --git a/src/gpujpeg_common.c b/src/gpujpeg_common.c
index b797196d..20e96794 100644
--- a/src/gpujpeg_common.c
+++ b/src/gpujpeg_common.c
@@ -59,7 +59,9 @@
     #elif defined(GPUJPEG_USE_GLX)
         #include <GL/glx.h>
     #endif
-    #include <cuda_gl_interop.h>
+    #ifdef GPUJPEG_USE_CUDA
+        #include <cuda_gl_interop.h>
+    #endif
 #endif
 
 #if _STDC_VERSION__ >= 201112L
@@ -128,7 +130,7 @@ struct gpujpeg_devices_info
 gpujpeg_get_devices_info(void)
 {
     struct gpujpeg_devices_info devices_info = { 0 };
-
+#ifdef GPUJPEG_USE_CUDA
     cudaGetDeviceCount(&devices_info.device_count);
     gpujpeg_cuda_check_error("Cannot get number of CUDA devices", return devices_info);
 
@@ -157,7 +159,9 @@ gpujpeg_get_devices_info(void)
         device_info->multiprocessor_count = device_properties.multiProcessorCount;
 #endif
     }
-
+#else
+// TODO: NEED IMPLEMENTATION
+#endif
     return devices_info;
 }
 
@@ -192,6 +196,7 @@ gpujpeg_print_devices_info(void)
 int
 gpujpeg_init_device(int device_id, int flags)
 {
+#ifdef GPUJPEG_USE_CUDA
     int dev_count;
     cudaGetDeviceCount(&dev_count);
     gpujpeg_cuda_check_error("Cannot get number of CUDA devices", return -1);
@@ -255,7 +260,9 @@ gpujpeg_init_device(int device_id, int flags)
             fprintf(stderr, "[GPUJPEG] [Info]  OpenGL interoperability is used, is OpenGL context available?\n");
         return -1;
     }
-
+#else
+// TODO: NEED IMPLEMENTATION
+#endif
     return 0;
 }
 
@@ -429,7 +436,11 @@ gpujpeg_image_get_file_format(const char* filename)
 
 void gpujpeg_set_device(int index)
 {
+#ifdef GPUJPEG_USE_CUDA
     cudaSetDevice(index);
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif
 }
 
 /* Documented at declaration */
@@ -438,6 +449,7 @@ gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data)
 {
     int data_size = component->data_width * component->data_height;
     uint8_t* data = NULL;
+#ifdef GPUJPEG_USE_CUDA    
     cudaMallocHost((void**)&data, data_size * sizeof(uint8_t));
     cudaMemcpy(data, d_data, data_size * sizeof(uint8_t), cudaMemcpyDeviceToHost);
 
@@ -449,6 +461,9 @@ gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data)
         printf("\n");
     }
     cudaFreeHost(data);
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif    
 }
 
 /* Documented at declaration */
@@ -457,6 +472,7 @@ gpujpeg_component_print16(struct gpujpeg_component* component, int16_t* d_data)
 {
     int data_size = component->data_width * component->data_height;
     int16_t* data = NULL;
+#ifdef GPUJPEG_USE_CUDA    
     cudaMallocHost((void**)&data, data_size * sizeof(int16_t));
     cudaMemcpy(data, d_data, data_size * sizeof(int16_t), cudaMemcpyDeviceToHost);
 
@@ -468,12 +484,16 @@ gpujpeg_component_print16(struct gpujpeg_component* component, int16_t* d_data)
         printf("\n");
     }
     cudaFreeHost(data);
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif 
 }
 
 /* Documented at declaration */
 int
 gpujpeg_coder_init(struct gpujpeg_coder * coder)
 {
+#ifdef GPUJPEG_USE_CUDA
     // Get info about the device
     struct cudaDeviceProp device_properties;
     int device_idx;
@@ -486,7 +506,9 @@ gpujpeg_coder_init(struct gpujpeg_coder * coder)
         fprintf(stderr, "GPUJPEG coder is currently broken on cards with cc < 2.0\n");
         return -1;
     }
-
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif
     // Initialize coder for no image
     coder->param.quality = -1;
     coder->param.restart_interval = -1;
@@ -530,8 +552,14 @@ gpujpeg_coder_init(struct gpujpeg_coder * coder)
     return 0;
 }
 
+#ifdef GPUJPEG_USE_CUDA
 size_t
 gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, cudaStream_t stream)
+#else
+// TODO: NEED TO BE IMPLEMENTED
+size_t
+gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, void* stream)
+#endif
 {
     if (gpujpeg_parameters_equals(&coder->param, param) && gpujpeg_image_parameters_equals(&coder->param_image, param_image)) {
         coder->param.verbose = param->verbose;
@@ -548,7 +576,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
     // Allocate color components
     if (param_image->comp_count > coder->component_allocated_size) {
         coder->component_allocated_size = 0;
-
+#ifdef GPUJPEG_USE_CUDA
         // (Re)allocate color components in host memory
         if (coder->component != NULL) {
             cudaFreeHost(coder->component);
@@ -564,9 +592,12 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
         }
         cudaMalloc((void**)&coder->d_component, param_image->comp_count * sizeof(struct gpujpeg_component));
         gpujpeg_cuda_check_error("Coder color component device allocation", return 0);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
         coder->component_allocated_size = param_image->comp_count;
     }
+    
     allocated_gpu_memory_size += coder->component_allocated_size * sizeof(struct gpujpeg_component);
 
     // Calculate raw data size
@@ -689,7 +720,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
     // Allocate segments
     if (coder->segment_count > coder->segment_allocated_size) {
         coder->segment_allocated_size = 0;
-
+#ifdef GPUJPEG_USE_CUDA
         // (Re)allocate segments  in host memory
         if (coder->segment != NULL) {
             cudaFreeHost(coder->segment);
@@ -705,7 +736,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
         }
         cudaMalloc((void**)&coder->d_segment, coder->segment_count * sizeof(struct gpujpeg_segment));
         gpujpeg_cuda_check_error("Coder segment device allocation", return 0);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
         coder->segment_allocated_size = coder->segment_count;
     }
     allocated_gpu_memory_size += coder->segment_allocated_size * sizeof(struct gpujpeg_segment);
@@ -811,7 +844,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
       * GPUJPEG_BLOCK_SIZE * coder->component[0].data_width;
     if (coder->data_size + idct_overhead > coder->data_allocated_size) {
         coder->data_allocated_size = 0;
-
+#ifdef GPUJPEG_USE_CUDA
         // (Re)allocate preprocessor data in device memory
         if (coder->d_data != NULL) {
             cudaFree(coder->d_data);
@@ -835,15 +868,21 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
         }
         cudaMalloc((void**)&coder->d_data_quantized, (coder->data_size + idct_overhead) * sizeof(int16_t));
         gpujpeg_cuda_check_error("Coder quantized data device allocation", return 0);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
         coder->data_allocated_size = coder->data_size + idct_overhead;
     }
     allocated_gpu_memory_size += coder->data_allocated_size * sizeof(uint8_t);
     allocated_gpu_memory_size += coder->data_allocated_size * sizeof(int16_t);
 
     if (coder->encoder) { // clear the buffer for preprocessor when the image size is not divisible by 8x8
+#ifdef GPUJPEG_USE_CUDA    
         cudaMemset(coder->d_data, 0, coder->data_size * sizeof(uint8_t));
         gpujpeg_cuda_check_error("d_data memset failed", return 0);
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif        
     }
 
     // Set data buffer to color components
@@ -869,7 +908,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
     //max_compressed_data_size *= 2;
     if (max_compressed_data_size > coder->data_compressed_allocated_size) {
         coder->data_compressed_allocated_size = 0;
-
+#ifdef GPUJPEG_USE_CUDA
         // (Re)allocate huffman coder data in host memory
         if (coder->data_compressed != NULL) {
             cudaFreeHost(coder->data_compressed);
@@ -893,7 +932,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
         }
         cudaMalloc((void**)&coder->d_temp_huffman, max_compressed_data_size * sizeof(uint8_t));
         gpujpeg_cuda_check_error("Huffman temp buffer device allocation", return 0);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
         coder->data_compressed_allocated_size = max_compressed_data_size;
     }
     allocated_gpu_memory_size += coder->data_compressed_allocated_size * sizeof(uint8_t);
@@ -906,7 +947,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
     }
     if (coder->block_count > coder->block_allocated_size) {
         coder->block_allocated_size = 0;
-
+#ifdef GPUJPEG_USE_CUDA
         // (Re)allocate list of block indices in host memory
         if (coder->block_list != NULL) {
             cudaFreeHost(coder->block_list);
@@ -922,7 +963,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
         }
         cudaMalloc((void**)&coder->d_block_list, coder->block_count * sizeof(*coder->d_block_list));
         gpujpeg_cuda_check_error("Coder block list device allocation", return 0);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
         coder->block_allocated_size = coder->block_count;
     }
     allocated_gpu_memory_size += coder->block_allocated_size * sizeof(*coder->d_block_list);
@@ -985,7 +1028,7 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
         segment->block_count = block_idx - segment->block_index_list_begin;
     }
     assert(block_idx == coder->block_count);
-
+#ifdef GPUJPEG_USE_CUDA
     // Copy components to device memory
     cudaMemcpyAsync(coder->d_component, coder->component, coder->param_image.comp_count * sizeof(struct gpujpeg_component), cudaMemcpyHostToDevice, stream);
     gpujpeg_cuda_check_error("Coder component copy", return 0);
@@ -997,7 +1040,9 @@ gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_para
     // Copy segments to device memory
     cudaMemcpyAsync(coder->d_segment, coder->segment, coder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyHostToDevice, stream);
     gpujpeg_cuda_check_error("Coder segment copy", return 0);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
     coder->allocated_gpu_memory_size = allocated_gpu_memory_size;
 
     return allocated_gpu_memory_size;
@@ -1023,6 +1068,7 @@ gpujpeg_coder_get_stats(struct gpujpeg_coder *coder, struct gpujpeg_duration_sta
 int
 gpujpeg_coder_deinit(struct gpujpeg_coder* coder)
 {
+#ifdef GPUJPEG_USE_CUDA    
     if (coder->component != NULL)
         cudaFreeHost(coder->component);
     if (coder->d_component != NULL)
@@ -1051,7 +1097,9 @@ gpujpeg_coder_deinit(struct gpujpeg_coder* coder)
         cudaFreeHost(coder->block_list);
     if ( coder->d_block_list != NULL )
         cudaFree(coder->d_block_list);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
     GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_to, return -1);
     GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_from, return -1);
     GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_map, return -1);
@@ -1091,7 +1139,11 @@ gpujpeg_image_calculate_size(struct gpujpeg_image_parameters* param)
 
 static void *gpujpeg_cuda_malloc_host(size_t size) {
     void *ptr;
+#ifdef GPUJPEG_USE_CUDA    
     GPUJPEG_CHECK_EX(cudaMallocHost(&ptr, size), "Could not alloc host pointer", return NULL);
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif      
     return ptr;
 }
 
@@ -1119,12 +1171,16 @@ gpujpeg_image_load_from_file(const char* filename, uint8_t** image, size_t* imag
     }
 
     uint8_t* data = NULL;
+#ifdef GPUJPEG_USE_CUDA
     cudaMallocHost((void**)&data, *image_size * sizeof(uint8_t));
     gpujpeg_cuda_check_error("Initialize CUDA host buffer", return -1);
     if ( *image_size != fread(data, sizeof(uint8_t), *image_size, file) ) {
         fprintf(stderr, "[GPUJPEG] [Error] Failed to load image data [%zd bytes] from file %s!\n", *image_size, filename);
         return -1;
     }
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif    
     fclose(file);
 
     *image = data;
@@ -1206,8 +1262,11 @@ gpujpeg_image_get_properties(const char *filename, struct gpujpeg_image_paramete
 int
 gpujpeg_image_destroy(uint8_t* image)
 {
+#if GPUJPEG_USE_CUDA
     cudaFreeHost(image);
-
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif    
     return 0;
 }
 
@@ -1569,6 +1628,7 @@ gpujpeg_opengl_texture_destroy(int texture_id)
 struct gpujpeg_opengl_texture*
 gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type texture_type)
 {
+#if GPUJPEG_USE_CUDA
     struct gpujpeg_opengl_texture* texture = NULL;
     cudaMallocHost((void**)&texture, sizeof(struct gpujpeg_opengl_texture));
     assert(texture != NULL);
@@ -1622,6 +1682,9 @@ gpujpeg_opengl_texture_register(int texture_id, enum gpujpeg_opengl_texture_type
 #else
     GPUJPEG_MISSING_OPENGL(return NULL);
 #endif
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif  
 }
 
 /* Documented at declaration */
@@ -1634,10 +1697,14 @@ gpujpeg_opengl_texture_unregister(struct gpujpeg_opengl_texture* texture)
     if ( texture->texture_pbo_id != 0 ) {
      glDeleteBuffers(1, (GLuint*)&texture->texture_pbo_id);
     }
+#ifdef GPUJPEG_USE_CUDA    
     if ( texture->texture_pbo_resource != NULL ) {
         cudaGraphicsUnregisterResource(texture->texture_pbo_resource);
     }
     cudaFreeHost(texture);
+#else
+    // TODO: NEED TO BE IMPLEMENTED
+#endif
 #else
     (void) texture;
     GPUJPEG_MISSING_OPENGL(return);
@@ -1672,6 +1739,7 @@ gpujpeg_opengl_texture_map(struct gpujpeg_opengl_texture* texture, size_t* data_
     }
 
     // Map pixel buffer object to cuda
+#ifdef GPUJPEG_USE_CUDA    
     cudaGraphicsMapResources(1, &texture->texture_pbo_resource, 0);
     gpujpeg_cuda_check_error("Encoder map texture PBO resource", return NULL);
 
@@ -1681,7 +1749,11 @@ gpujpeg_opengl_texture_map(struct gpujpeg_opengl_texture* texture, size_t* data_
     gpujpeg_cuda_check_error("Encoder get device pointer for texture PBO resource", return NULL);
     if ( data_size != NULL )
         *data_size = d_data_size;
-
+#else
+    // TODO: NEED TO BE IMPLEMENTED
+    (void) data_size;
+    GPUJPEG_MISSING_OPENGL(return NULL);
+#endif
     return d_data;
 #else
     (void) data_size;
@@ -1694,6 +1766,7 @@ void
 gpujpeg_opengl_texture_unmap(struct gpujpeg_opengl_texture* texture)
 {
     // Unmap pbo
+#ifdef GPUJPEG_USE_CUDA    
     cudaGraphicsUnmapResources(1, &texture->texture_pbo_resource, 0);
     gpujpeg_cuda_check_error("Encoder unmap texture PBO resource", {});
 
@@ -1717,6 +1790,9 @@ gpujpeg_opengl_texture_unmap(struct gpujpeg_opengl_texture* texture)
 #else
     GPUJPEG_MISSING_OPENGL(return);
 #endif
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif
 }
 
 int gpujpeg_version(void)
@@ -1890,6 +1966,7 @@ int gpujpeg_pixel_format_get_subsampling(enum gpujpeg_pixel_format pixel_format)
     return -1;
 }
 
+#ifdef GPUJPEG_USE_CUDA
 float gpujpeg_custom_timer_get_duration(cudaEvent_t start, cudaEvent_t stop) {
     float elapsedTime = NAN;
     cudaError_t err = cudaEventSynchronize(stop);
@@ -1912,5 +1989,8 @@ float gpujpeg_custom_timer_get_duration(cudaEvent_t start, cudaEvent_t stop) {
     }
     return elapsedTime;
 }
+#else
+// TODO: NEED IMPLEMENTATION
+#endif
 
 /* vi: set expandtab sw=4 : */
diff --git a/src/gpujpeg_common_internal.h b/src/gpujpeg_common_internal.h
index 46b45ce0..c67ebdf2 100644
--- a/src/gpujpeg_common_internal.h
+++ b/src/gpujpeg_common_internal.h
@@ -35,7 +35,9 @@
 #ifndef GPUJPEG_COMMON_INTERNAL_H
 #define GPUJPEG_COMMON_INTERNAL_H
 
-#include <cuda_runtime.h>
+#ifdef GPUJPEG_USE_CUDA
+    #include <cuda_runtime.h>
+#endif
 #include <math.h> // NAN
 #include <stdio.h>
 #include <stdlib.h>
@@ -70,10 +72,13 @@
 
 struct gpujpeg_timer {
     int started;
+#ifdef GPUJPEG_USE_CUDA    
     cudaEvent_t start;
     cudaEvent_t stop;
+#endif    
 };
 
+#ifdef GPUJPEG_USE_CUDA
 #define GPUJPEG_CUSTOM_TIMER_CREATE(name, err_action) \
     do { \
         GPUJPEG_CHECK(cudaEventCreate(&(name).start), err_action); \
@@ -118,6 +123,32 @@ struct gpujpeg_timer {
  */
 #define GPUJPEG_CUSTOM_TIMER_DURATION(name) \
     (name).started == 1 ? gpujpeg_custom_timer_get_duration((name).start, (name).stop) : (name).started == 0 ? 0 : ( fprintf(stderr, "Debug timer disabled!\n"), 0)
+#else
+#define GPUJPEG_CUSTOM_TIMER_CREATE(name, err_action) 
+#define GPUJPEG_CUSTOM_TIMER_DESTROY(name, err_action) 
+
+/**
+ * Start timer
+ *
+ * @param name
+ * @todo stream
+ */
+#define GPUJPEG_CUSTOM_TIMER_START(name, record_perf, stream, err_action)
+
+/**
+ * Stop timer
+ *
+ * @param name
+ */
+#define GPUJPEG_CUSTOM_TIMER_STOP(name, record_perf, stream, err_action)
+
+/**
+ * Get duration for timer
+ *
+ * @param name
+ */
+#define GPUJPEG_CUSTOM_TIMER_DURATION(name) 0
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -317,8 +348,12 @@ struct gpujpeg_coder
     /// Allocated size
     size_t data_compressed_allocated_size;
 
+#ifdef GPUJPEG_USE_CUDA
     int cuda_cc_major; ///< CUDA Compute capability (major version)
     int cuda_cc_minor; ///< CUDA Compute capability (minor version)
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif
 
     // Operation durations
     struct gpujpeg_timer duration_memory_to;
@@ -354,8 +389,13 @@ gpujpeg_coder_init(struct gpujpeg_coder* coder);
  * @param stream       CUDA stream
  * @return size of allocated device memory in bytes if succeeds, otherwise 0
  */
+#ifdef GPUJPEG_USE_CUDA
 size_t
 gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, cudaStream_t stream);
+#else
+size_t
+gpujpeg_coder_init_image(struct gpujpeg_coder * coder, const struct gpujpeg_parameters * param, const struct gpujpeg_image_parameters * param_image, void* stream);
+#endif
 
 /**
  * Returns duration statistics for last coded image
@@ -420,9 +460,10 @@ gpujpeg_image_parameters_equals(const struct gpujpeg_image_parameters *p1 , cons
  *
  * @returns duration in ms, 0.0F in case of error
  */
+#ifdef GPUJPEG_USE_CUDA
 float
 gpujpeg_custom_timer_get_duration(cudaEvent_t start, cudaEvent_t stop);
-
+#endif
 
 #ifdef __cplusplus
 } // extern "C"
diff --git a/src/gpujpeg_dct_cpu.c b/src/gpujpeg_dct_cpu.c
index bf6d9124..8e7bfb7a 100644
--- a/src/gpujpeg_dct_cpu.c
+++ b/src/gpujpeg_dct_cpu.c
@@ -213,7 +213,13 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder)
         struct gpujpeg_component* component = &coder->component[comp];
 
         // Copy data to host
+#ifdef GPUJPEG_USE_CUDA        
         cudaMemcpy(component->data_quantized, component->d_data_quantized, component->data_size * sizeof(uint16_t), cudaMemcpyDeviceToHost);
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif  
+
+
 
         // Perform IDCT on CPU
         int width = component->data_width / GPUJPEG_BLOCK_SIZE;
@@ -230,6 +236,7 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder)
 
         // Copy results to device
         uint8_t* data = NULL;
+#ifdef GPUJPEG_USE_CUDA        
         assert(cudaMallocHost((void**)&data, component->data_size * sizeof(uint8_t)) == cudaSuccess);
         for ( int y = 0; y < height; y++ ) {
             for ( int x = 0; x < width; x++ ) {
@@ -248,5 +255,8 @@ gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder)
         }
         cudaMemcpy(component->d_data, data, component->data_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
         cudaFreeHost(data);
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif          
     }
 }
diff --git a/src/gpujpeg_decoder.c b/src/gpujpeg_decoder.c
index d73485e4..fa41a15b 100644
--- a/src/gpujpeg_decoder.c
+++ b/src/gpujpeg_decoder.c
@@ -30,13 +30,16 @@
 
 #include "../libgpujpeg/gpujpeg_decoder.h"
 #include "gpujpeg_dct_cpu.h"
-#include "gpujpeg_dct_gpu.h"
 #include "gpujpeg_decoder_internal.h"
 #include "gpujpeg_huffman_cpu_decoder.h"
-#include "gpujpeg_huffman_gpu_decoder.h"
-#include "gpujpeg_postprocessor.h"
 #include "gpujpeg_util.h"
 
+#ifdef GPUJPEG_USE_CUDA
+    #include "gpujpeg_dct_gpu.h"
+    #include "gpujpeg_huffman_gpu_decoder.h"
+    #include "gpujpeg_postprocessor.h"
+#endif
+
 /* Documented at declaration */
 void
 gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output)
@@ -119,6 +122,7 @@ gpujpeg_decoder_create(cudaStream_t stream)
     if ( decoder->reader == NULL )
         result = 0;
 
+#ifdef GPUJPEG_USE_CUDA
     // Allocate quantization tables in device memory
     for ( int comp_type = 0; comp_type < GPUJPEG_MAX_COMPONENT_COUNT; comp_type++ ) {
         if ( cudaSuccess != cudaMalloc((void**)&decoder->table_quantization[comp_type].d_table, 64 * sizeof(uint16_t)) )
@@ -142,6 +146,10 @@ gpujpeg_decoder_create(cudaStream_t stream)
     if ((decoder->huffman_gpu_decoder = gpujpeg_huffman_gpu_decoder_init()) == NULL) {
         result = 0;
     }
+#else
+    // TODO: NEED IMPLEMENTATION
+    result = 0;
+#endif  
 
     // Stream
     decoder->stream = stream;
@@ -193,10 +201,14 @@ gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, const struct gpujpeg_param
     }
 
     // Init postprocessor
+#ifdef GPUJPEG_USE_CUDA
     if ( gpujpeg_preprocessor_decoder_init(&decoder->coder) != 0 ) {
         fprintf(stderr, "[GPUJPEG] [Error] Failed to init postprocessor!\n");
         return -1;
     }
+#else
+    // TODO: NOT YET IMPLEMENTED
+#endif
 
     return 0;
 }
@@ -235,6 +247,7 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
     }
 
     // Perform huffman decoding on CPU (when there are not enough segments to saturate GPU)
+#ifdef GPUJPEG_USE_CUDA 
     if (coder->segment_count < 32 || unsupp_gpu_huffman_params) {
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_huffman_coder, coder->param.perf_stats, decoder->stream, return -1);
         if (0 != gpujpeg_huffman_cpu_decoder_decode(decoder)) {
@@ -283,7 +296,6 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
 
     GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_dct_quantization, coder->param.perf_stats, decoder->stream, return -1);
 
-    // Create buffers if not already created
     if (coder->data_raw == NULL) {
         if (cudaSuccess != cudaMallocHost((void**)&coder->data_raw, coder->data_raw_size * sizeof(uint8_t))) {
             return -1;
@@ -294,6 +306,9 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
             return -1;
         }
     }
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif  
 
     // Select CUDA output buffer
     if (output->type == GPUJPEG_DECODER_OUTPUT_CUSTOM_CUDA_BUFFER) {
@@ -317,6 +332,7 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
     }
 
     // Preprocessing
+#ifdef GPUJPEG_USE_CUDA    
     GPUJPEG_CUSTOM_TIMER_START(coder->duration_preprocessor, coder->param.perf_stats, decoder->stream, return -1);
     rc = gpujpeg_preprocessor_decode(&decoder->coder, decoder->stream);
     if (rc != GPUJPEG_NOERR) {
@@ -326,6 +342,9 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
 
     // Wait for async operations before copying from the device
     cudaStreamSynchronize(decoder->stream);
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif  
 
     GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_in_gpu, coder->param.perf_stats, decoder->stream, return -1);
 
@@ -335,12 +354,16 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
     output->pixel_format = decoder->coder.param_image.pixel_format;
 
     // Set decompressed image
+    
     if (output->type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER) {
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1);
 
         // Copy decompressed image to host memory
+#ifdef GPUJPEG_USE_CUDA        
         cudaMemcpy(coder->data_raw, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif  
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1);
 
         // Set output to internal buffer
@@ -352,8 +375,11 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
         assert(output->data != NULL);
 
         // Copy decompressed image to host memory
+#ifdef GPUJPEG_USE_CUDA        
         cudaMemcpy(output->data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif  
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1);
     }
     else if (output->type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE) {
@@ -369,10 +395,12 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
             GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_map, coder->param.perf_stats, decoder->stream, return -1);
 
             GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1);
-
             // Copy decompressed image to texture pixel buffer object device data
+#ifdef GPUJPEG_USE_CUDA
             cudaMemcpy(d_data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice);
-
+#else
+            // TODO: NEED IMPLEMENTATION
+#endif  
             GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, decoder->stream, return -1);
         }
 
@@ -423,7 +451,7 @@ gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder)
     if (0 != gpujpeg_coder_deinit(&decoder->coder)) {
         return -1;
     }
-
+#ifdef GPUJPEG_USE_CUDA
     for (int comp_type = 0; comp_type < GPUJPEG_MAX_COMPONENT_COUNT; comp_type++) {
         if (decoder->table_quantization[comp_type].d_table != NULL) {
             cudaFree(decoder->table_quantization[comp_type].d_table);
@@ -443,6 +471,9 @@ gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder)
     if (decoder->huffman_gpu_decoder != NULL) {
         gpujpeg_huffman_gpu_decoder_destroy(decoder->huffman_gpu_decoder);
     }
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif
 
     free(decoder);
 
diff --git a/src/gpujpeg_decoder_internal.h b/src/gpujpeg_decoder_internal.h
index 753e8b1e..22f27968 100644
--- a/src/gpujpeg_decoder_internal.h
+++ b/src/gpujpeg_decoder_internal.h
@@ -67,8 +67,12 @@ struct gpujpeg_decoder
     /// Current data compressed size for decoded image
     size_t data_compressed_size;
 
+#ifdef GPUJPEG_USE_CUDA
     // Stream
     cudaStream_t stream;
+#else
+    void* stream;
+#endif
 };
 
 #endif // GPUJPEG_DECODER_INTERNAL_H
diff --git a/src/gpujpeg_encoder.c b/src/gpujpeg_encoder.c
index 3843d07e..bd9758db 100644
--- a/src/gpujpeg_encoder.c
+++ b/src/gpujpeg_encoder.c
@@ -31,14 +31,17 @@
 #include <math.h>
 #include <string.h>
 #include "../libgpujpeg/gpujpeg_encoder.h"
-#include "gpujpeg_preprocessor.h"
 #include "gpujpeg_dct_cpu.h"
-#include "gpujpeg_dct_gpu.h"
 #include "gpujpeg_huffman_cpu_encoder.h"
-#include "gpujpeg_huffman_gpu_encoder.h"
 #include "gpujpeg_marker.h"
 #include "gpujpeg_util.h"
 
+#ifdef GPUJPEG_USE_CUDA
+    #include "gpujpeg_dct_gpu.h"
+    #include "gpujpeg_huffman_gpu_encoder.h"
+    #include "gpujpeg_preprocessor.h"
+#endif
+
 /* Documented at declaration */
 void
 gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* image)
@@ -94,6 +97,7 @@ gpujpeg_encoder_create(cudaStream_t stream)
     coder->encoder = 1;
 
     // Allocate quantization tables in device memory
+#ifdef GPUJPEG_USE_CUDA
     for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) {
         if ( cudaSuccess != cudaMalloc((void**)&encoder->table_quantization[comp_type].d_table, 64 * sizeof(uint16_t)) ) {
             result = 0;
@@ -103,6 +107,9 @@ gpujpeg_encoder_create(cudaStream_t stream)
         }
     }
     gpujpeg_cuda_check_error("Encoder table allocation", return NULL);
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif
 
     // Init huffman tables for encoder
     for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) {
@@ -111,13 +118,21 @@ gpujpeg_encoder_create(cudaStream_t stream)
                 result = 0;
         }
     }
+#ifdef GPUJPEG_USE_CUDA    
     gpujpeg_cuda_check_error("Encoder table init", return NULL);
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif
 
     // Init huffman encoder
+#ifdef GPUJPEG_USE_CUDA        
     encoder->huffman_gpu_encoder = gpujpeg_huffman_gpu_encoder_create(encoder);
     if (encoder->huffman_gpu_encoder == NULL) {
         result = 0;
     }
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif    
 
     if ( result == 0 ) {
         gpujpeg_encoder_destroy(encoder);
@@ -146,10 +161,17 @@ size_t gpujpeg_encoder_max_pixels(struct gpujpeg_parameters * param, struct gpuj
         param_image->width = (int) sqrt((float) pixels);
         param_image->height = (pixels + param_image->width - 1) / param_image->width;
         //printf("\nIteration #%d (pixels: %d, size: %dx%d)\n", iteration++, pixels, param_image->width, param_image->height);
+        
+#ifdef GPUJPEG_USE_CUDA
         size_t image_memory_size = gpujpeg_coder_init_image(&coder, param, param_image, cudaStreamDefault);
+#else
+        // TODO: NEED IMPLEMENTATION
+        size_t image_memory_size = 0;
+#endif  
         if (image_memory_size == 0) {
             break;
         }
+      
         size_t allocated_memory_size = 0;
         allocated_memory_size += encoder_memory_size;
         allocated_memory_size += image_memory_size;
@@ -199,8 +221,14 @@ size_t gpujpeg_encoder_max_memory(struct gpujpeg_parameters * param, struct gpuj
 
     param_image->width = (int) sqrt((float) max_pixels);
     param_image->height = (max_pixels + param_image->width - 1) / param_image->width;
-
+    
+#ifdef GPUJPEG_USE_CUDA
     size_t image_memory_size = gpujpeg_coder_init_image(&coder, param, param_image, cudaStreamDefault);
+#else
+    // TODO: NEED IMPLEMENTATION
+    size_t image_memory_size = 0;
+#endif      
+
     if (image_memory_size == 0) {
         return 0;
     }
@@ -237,6 +265,7 @@ int gpujpeg_encoder_allocate(struct gpujpeg_encoder * encoder, const struct gpuj
         if (coder->data_raw_size > coder->data_raw_allocated_size) {
             coder->data_raw_allocated_size = 0;
 
+#ifdef GPUJPEG_USE_CUDA
             // (Re)allocate raw data in device memory
             if (coder->d_data_raw_allocated != NULL) {
                 cudaFree(coder->d_data_raw_allocated);
@@ -244,7 +273,9 @@ int gpujpeg_encoder_allocate(struct gpujpeg_encoder * encoder, const struct gpuj
             }
             cudaMalloc((void**)&coder->d_data_raw_allocated, coder->data_raw_size);
             gpujpeg_cuda_check_error("Encoder raw data allocation", return -1);
-
+#else
+            // TODO: NEED IMPLEMENTATION
+#endif
             coder->data_raw_allocated_size = coder->data_raw_size;
         }
     }
@@ -303,8 +334,13 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
                 return -1;
             }
         }
+#ifdef GPUJPEG_USE_CUDA        
         gpujpeg_cuda_check_error("Quantization init", return -1);
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif        
     }
+
     if (0 == gpujpeg_coder_init_image(coder, param, param_image, encoder->stream)) {
         fprintf(stderr, "[GPUJPEG] [Error] Failed to init image encoding!\n");
         return -1;
@@ -317,16 +353,21 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
     }
 
     // (Re)initialize preprocessor
+#ifdef GPUJPEG_USE_CUDA    
     if (gpujpeg_preprocessor_encoder_init(&encoder->coder) != 0) {
         fprintf(stderr, "[GPUJPEG] [Error] Failed to init preprocessor!\n");
         return -1;
     }
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
 
     // Load input image
     if ( input->type == GPUJPEG_ENCODER_INPUT_IMAGE ) {
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1);
 
         // Allocate raw data internal buffer
+#ifdef GPUJPEG_USE_CUDA
         if (coder->data_raw_size > coder->data_raw_allocated_size) {
             coder->data_raw_allocated_size = 0;
 
@@ -346,7 +387,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
         // Copy image to device memory
         cudaMemcpyAsync(coder->d_data_raw, input->image, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyHostToDevice, encoder->stream);
         gpujpeg_cuda_check_error("Encoder raw data copy", return -1);
-
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1);
     }
     else if (input->type == GPUJPEG_ENCODER_INPUT_GPU_IMAGE) {
@@ -358,6 +401,7 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_map, coder->param.perf_stats, encoder->stream, return -1);
 
         // Create buffers if not already created
+#ifdef GPUJPEG_USE_CUDA        
         if (coder->data_raw_size > coder->data_raw_allocated_size) {
             coder->data_raw_allocated_size = 0;
 
@@ -371,6 +415,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
 
             coder->data_raw_allocated_size = coder->data_raw_size;
         }
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif        
         coder->d_data_raw = coder->d_data_raw_allocated;
 
         // Map texture to CUDA
@@ -382,7 +429,11 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
 
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1);
         // Copy image data from texture pixel buffer object to device data
+#ifdef GPUJPEG_USE_CUDA     
         cudaMemcpyAsync(coder->d_data_raw, d_data, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice, encoder->stream);
+#else
+        // TODO: NEED IMPLEMENTATION
+#endif        
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_to, coder->param.perf_stats, encoder->stream, return -1);
 
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_unmap, coder->param.perf_stats, encoder->stream, return -1);
@@ -399,7 +450,7 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
 
     //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_LUMINANCE]);
     //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_CHROMINANCE]);
-
+#ifdef GPUJPEG_USE_CUDA
     GPUJPEG_CUSTOM_TIMER_START(coder->duration_in_gpu, coder->param.perf_stats, encoder->stream, return -1);
     GPUJPEG_CUSTOM_TIMER_START(coder->duration_preprocessor, coder->param.perf_stats, encoder->stream, return -1);
 
@@ -416,6 +467,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
     if (0 != gpujpeg_dct_gpu(encoder)) {
         return -1;
     }
+#else
+    // TODO: NEED IMPLEMENTATION
+#endif        
 
     // If restart interval is 0 then the GPU processing is in the end (even huffman coder will be performed on CPU)
     if (coder->param.restart_interval == 0) {
@@ -434,10 +488,14 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
     if ( coder->param.restart_interval == 0 ) {
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1);
         // Copy quantized data from device memory to cpu memory
+#ifdef GPUJPEG_USE_CUDA    
         cudaMemcpyAsync(coder->data_quantized, coder->d_data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyDeviceToHost, encoder->stream);
 
         // Wait for async operations before the coding
         cudaStreamSynchronize(encoder->stream);
+#else
+            // TODO: NEED IMPLEMENTATION
+#endif
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1);
 
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_huffman_coder, coder->param.perf_stats, encoder->stream, return -1);
@@ -453,15 +511,20 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_huffman_coder, coder->param.perf_stats, encoder->stream, return -1);
         // Perform huffman coding
         unsigned int output_size;
+#ifdef GPUJPEG_USE_CUDA        
         if ( gpujpeg_huffman_gpu_encoder_encode(encoder, encoder->huffman_gpu_encoder, &output_size) != 0 ) {
             fprintf(stderr, "[GPUJPEG] [Error] Huffman encoder on GPU failed!\n");
             return -1;
         }
-
+#else
+        // TODO: NEED IMPLEMENTATION
+        return -1;
+#endif
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_huffman_coder, coder->param.perf_stats, encoder->stream, return -1);
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_in_gpu, coder->param.perf_stats, encoder->stream, return -1);
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1);
 
+#ifdef GPUJPEG_USE_CUDA
         // Copy compressed data from device memory to cpu memory
         if ( cudaSuccess != cudaMemcpyAsync(coder->data_compressed, coder->d_data_compressed, output_size, cudaMemcpyDeviceToHost, encoder->stream) ) {
             return -1;
@@ -473,6 +536,9 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_parameter
 
         // Wait for async operations before formatting
         cudaStreamSynchronize(encoder->stream);
+#else
+            // TODO: NEED IMPLEMENTATION
+#endif        
         GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_memory_from, coder->param.perf_stats, encoder->stream, return -1);
 
         GPUJPEG_CUSTOM_TIMER_START(coder->duration_stream, coder->param.perf_stats, encoder->stream, return -1);
@@ -562,18 +628,26 @@ gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder)
     assert(encoder != NULL);
 
     if (encoder->huffman_gpu_encoder != NULL) {
+#ifdef GPUJPEG_USE_CUDA
         gpujpeg_huffman_gpu_encoder_destroy(encoder->huffman_gpu_encoder);
+#else
+        // TODO: NEED TO BE IMPLEMENTED
+#endif   
     }
     if (gpujpeg_coder_deinit(&encoder->coder) != 0) {
         return -1;
     }
     for (int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++) {
+#ifdef GPUJPEG_USE_CUDA
         if (encoder->table_quantization[comp_type].d_table != NULL) {
             cudaFree(encoder->table_quantization[comp_type].d_table);
         }
         if (encoder->table_quantization[comp_type].d_table_forward != NULL) {
             cudaFree(encoder->table_quantization[comp_type].d_table_forward);
         }
+#else
+    // TODO: NEED TO BE IMPLEMENTED
+#endif
     }
     if (encoder->writer != NULL) {
         gpujpeg_writer_destroy(encoder->writer);
diff --git a/src/gpujpeg_encoder_internal.h b/src/gpujpeg_encoder_internal.h
index fc5dadfb..32dea1a4 100644
--- a/src/gpujpeg_encoder_internal.h
+++ b/src/gpujpeg_encoder_internal.h
@@ -63,8 +63,12 @@ struct gpujpeg_encoder
     /// JPEG header to be emitted
     enum gpujpeg_header_type header_type;
 
+#ifdef GPUJPEG_USE_CUDA
     // Stream
     cudaStream_t stream;
+#else
+    void* stream;
+#endif
 };
 
 #ifdef __cplusplus
diff --git a/src/gpujpeg_reader.c b/src/gpujpeg_reader.c
index 6aaa45c2..9c089cb2 100644
--- a/src/gpujpeg_reader.c
+++ b/src/gpujpeg_reader.c
@@ -871,8 +871,12 @@ gpujpeg_reader_read_dht(struct gpujpeg_decoder* decoder, uint8_t** image, const
         gpujpeg_table_huffman_decoder_compute(table);
 
         // Copy table to device memory
+#ifdef GPUJPEG_USE_CUDA        
         cudaMemcpyAsync(d_table, table, sizeof(struct gpujpeg_table_huffman_decoder), cudaMemcpyHostToDevice, decoder->stream);
         gpujpeg_cuda_check_error("Decoder copy huffman table ", return -1);
+#else
+        // TODO: NEED TO BE IMPLEMENTED
+#endif        
     }
     return 0;
 }
diff --git a/src/gpujpeg_table.c b/src/gpujpeg_table.c
index 8ec05d51..5b016adb 100644
--- a/src/gpujpeg_table.c
+++ b/src/gpujpeg_table.c
@@ -83,8 +83,8 @@ gpujpeg_table_quantization_set_default(uint8_t* table_raw, enum gpujpeg_componen
 void
 gpujpeg_table_quantization_apply_quality(uint8_t* table_raw, int quality)
 {
-    if (quality <= 0) quality = 1;
-    if (quality > 100) quality = 100;
+    if (quality <= 0) quality = 1;
+    if (quality > 100) quality = 100;
     int s = (quality < 50) ? (5000 / quality) : (200 - (2 * quality));
     for ( int i = 0; i < 64; i++ ) {
         int value = (s * (int)table_raw[i] + 50) / 100;
@@ -120,9 +120,13 @@ gpujpeg_table_quantization_encoder_init(struct gpujpeg_table_quantization* table
     }
     
     // Copy quantization table to constant memory
+#ifdef GPUJPEG_USE_CUDA
     if ( cudaSuccess != cudaMemcpy(table->d_table_forward, h_quantization_table, 64 * sizeof(float), cudaMemcpyHostToDevice) )
         return  -1;
     gpujpeg_cuda_check_error("Copy DCT quantization table to device memory", return -1);
+#else
+    // TODO: NEED TO BE IMPLEMENTED
+#endif    
 
     // DCT loads the table into GPU memory itself, after premultiplying coefficients with DCT normalization constants.
     return 0;
@@ -144,8 +148,13 @@ gpujpeg_table_quantization_decoder_init(struct gpujpeg_table_quantization* table
     }
 
     // Copy tables to device memory
+#ifdef GPUJPEG_USE_CUDA    
     if ( cudaSuccess != cudaMemcpy(table->d_table, table->table, 64 * sizeof(uint16_t), cudaMemcpyHostToDevice) )
         return -1;
+#else
+    // TODO: NEED TO BE IMPLEMENTED
+    return -1;
+#endif           
         
     return 0;
 }
@@ -159,9 +168,14 @@ gpujpeg_table_quantization_decoder_compute(struct gpujpeg_table_quantization* ta
     }
 
     // Copy tables to device memory
+#ifdef GPUJPEG_USE_CUDA    
     if ( cudaSuccess != cudaMemcpy(table->d_table, table->table, 64 * sizeof(uint16_t), cudaMemcpyHostToDevice) )
         return -1;
-        
+#else
+    // TODO: NEED TO BE IMPLEMENTED
+    return -1;
+#endif   
+
     return 0;
 }
 
@@ -311,30 +325,30 @@ gpujpeg_table_huffman_encoder_init(struct gpujpeg_table_huffman_encoder* table,
 {
     assert(comp_type == GPUJPEG_COMPONENT_LUMINANCE || comp_type == GPUJPEG_COMPONENT_CHROMINANCE);
     assert(huff_type == GPUJPEG_HUFFMAN_DC || huff_type == GPUJPEG_HUFFMAN_AC);
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small");
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small");
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small");
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small");
-
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small");
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small");
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small");
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small");
+
     if ( comp_type == GPUJPEG_COMPONENT_LUMINANCE ) {
         if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
-            memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value));
+            memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value));
         } else {
-            memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value));
+            memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value));
         }        
     } else if ( comp_type == GPUJPEG_COMPONENT_CHROMINANCE ) {
         if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
-            memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value));
+            memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value));
         } else {
-            memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value));
+            memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value));
         }
     }
     gpujpeg_table_huffman_encoder_compute(table);
@@ -348,30 +362,30 @@ gpujpeg_table_huffman_decoder_init(struct gpujpeg_table_huffman_decoder* table,
 {
     assert(comp_type == GPUJPEG_COMPONENT_LUMINANCE || comp_type == GPUJPEG_COMPONENT_CHROMINANCE);
     assert(huff_type == GPUJPEG_HUFFMAN_DC || huff_type == GPUJPEG_HUFFMAN_AC);
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small");
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small");
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small");
-    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small");
-    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small");
-
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_dc_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_dc_value), "table buffer too small");
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_y_ac_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_y_ac_value), "table buffer too small");
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_dc_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_dc_value), "table buffer too small");
+    _Static_assert(sizeof(table->bits) >= sizeof(gpujpeg_table_huffman_cbcr_ac_bits), "table buffer too small");
+    _Static_assert(sizeof(table->huffval) >= sizeof(gpujpeg_table_huffman_cbcr_ac_value), "table buffer too small");
+
     if ( comp_type == GPUJPEG_COMPONENT_LUMINANCE ) {
         if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
-            memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value));
+            memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(gpujpeg_table_huffman_y_dc_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(gpujpeg_table_huffman_y_dc_value));
         } else {
-            memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value));
+            memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(gpujpeg_table_huffman_y_ac_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(gpujpeg_table_huffman_y_ac_value));
         }        
     } else if ( comp_type == GPUJPEG_COMPONENT_CHROMINANCE ) {
         if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
-            memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value));
+            memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(gpujpeg_table_huffman_cbcr_dc_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(gpujpeg_table_huffman_cbcr_dc_value));
         } else {
-            memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits));
-            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value));
+            memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(gpujpeg_table_huffman_cbcr_ac_bits));
+            memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(gpujpeg_table_huffman_cbcr_ac_value));
         }
     }
     gpujpeg_table_huffman_decoder_compute(table);
diff --git a/src/gpujpeg_util.h b/src/gpujpeg_util.h
index 4c031ba9..2953315f 100644
--- a/src/gpujpeg_util.h
+++ b/src/gpujpeg_util.h
@@ -36,14 +36,17 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
-#include <cuda_runtime.h>
+#ifdef GPUJPEG_USE_CUDA
+    #include <cuda_runtime.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #define GPUJPEG_CLAMP(x, low, high)  (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))
-    
+
+#ifdef GPUJPEG_USE_CUDA
 // CUDA check error
 #define gpujpeg_cuda_check_error(msg, action) \
     { \
@@ -59,8 +62,12 @@ extern "C" {
         cmd;\
         gpujpeg_cuda_check_error(msg, action)\
 } while(0)
+#else
+#define GPUJPEG_CHECK_EX(cmd, msg, action)
+#endif
+
 #define GPUJPEG_CHECK(cmd, action) GPUJPEG_CHECK_EX(cmd, #cmd, action)
-    
+
 // Divide and round up
 #define gpujpeg_div_and_round_up(value, div) \
     ((((value) % (div)) != 0) ? ((value) / (div) + 1) : ((value) / (div)))
diff --git a/test/decoder_gltex/main.c b/test/decoder_gltex/main.c
index e3d9e36e..12cc4a69 100644
--- a/test/decoder_gltex/main.c
+++ b/test/decoder_gltex/main.c
@@ -1,8 +1,13 @@
 #include <libgpujpeg/gpujpeg.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include "gpujpeg_reformat.h"
 #include <GL/glew.h>
+#ifdef __APPLE__
+#include <GLUT/glut.h>
+#else
 #include <GL/glut.h>
+#endif
 
 int g_texture_id;
 int g_width;
@@ -122,7 +127,7 @@ int main(int argc, char *argv[])
     // Get data from OpenGL texture
     uint8_t* data = NULL;
     size_t data_size = 0;
-    data = malloc(param_image.width * param_image.height * param_image.comp_count);
+    data = (uint8_t*)malloc(param_image.width * param_image.height * param_image.comp_count);
     gpujpeg_opengl_texture_get_data(texture->texture_id, data, &data_size);
 
     // Save image
diff --git a/test/misc/mt_encode.c b/test/misc/mt_encode.c
index dc0932af..df94cc19 100644
--- a/test/misc/mt_encode.c
+++ b/test/misc/mt_encode.c
@@ -1,4 +1,6 @@
-#include <cuda_runtime.h>
+#ifdef GPUJPEG_USE_CUDA
+    #include <cuda_runtime.h>
+#endif
 #include <libgpujpeg/gpujpeg_encoder.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/opengl_interop/util.h b/test/opengl_interop/util.h
index 77fa5abd..ee52e6e2 100644
--- a/test/opengl_interop/util.h
+++ b/test/opengl_interop/util.h
@@ -36,12 +36,19 @@
 #include <string.h>
 #include <unistd.h>
 #include <pthread.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
+#ifdef GPUJPEG_USE_CUDA
+    #include <cuda.h>
+    #include <cuda_runtime.h>
+#endif
 #include <unistd.h>
+#ifdef __APPLE__
+#include <OpenGL/gl.h>
+#include <GLUT/glut.h>
+#else
 #include <GL/gl.h>
 #include <GL/glut.h>
 #include <GL/glx.h>
+#endif
 
 /**
  * Check CUDA error
diff --git a/test/unit/run_tests.c b/test/unit/run_tests.c
index 25869123..5e7a928a 100644
--- a/test/unit/run_tests.c
+++ b/test/unit/run_tests.c
@@ -1,5 +1,7 @@
 #include <assert.h>
+#ifdef GPUJPEG_USE_CUDA
 #include <cuda_runtime.h>
+#endif
 #include <libgpujpeg/gpujpeg_common.h>
 #include <libgpujpeg/gpujpeg_encoder.h>
 #include "../../src/gpujpeg_common_internal.h"
@@ -53,10 +55,15 @@ static void encode_gpu_mem_as_cpu() {
 
         uint8_t *image = NULL;
         size_t len = param_image.width * param_image.height * 3 / 2;
+#ifdef GPUJPEG_USE_CUDA
         if (cudaSuccess != cudaMalloc((void**) &image, len)) {
                 abort();
         }
         cudaMemset(image, 0, len);
+#else
+        // TODO: NEED TO BE IMPLEMENTED
+        abort();
+#endif
 
         struct gpujpeg_encoder_input encoder_input;
         gpujpeg_encoder_input_set_image(&encoder_input, image);
@@ -68,7 +75,11 @@ static void encode_gpu_mem_as_cpu() {
                 abort();
         }
 
+#ifdef GPUJPEG_USE_CUDA
         cudaFree(image);
+#else
+        abort();
+#endif        
         gpujpeg_encoder_destroy(encoder);
 }