Skip to content

Commit

Permalink
OpenCL support for late D65 correction
Browse files Browse the repository at this point in the history
adding the correction coeffs, no measurable performance penalty
  • Loading branch information
jenshannoschwalm committed Oct 29, 2023
1 parent 4cbfa5b commit 36a586e
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 23 deletions.
24 changes: 20 additions & 4 deletions data/kernels/basic.cl
Original file line number Diff line number Diff line change
Expand Up @@ -1364,20 +1364,35 @@ lerp_lookup_unbounded0(read_only image2d_t lut, const float x, global const floa
else return x;
}

/* kernel for the plugin colorin: plain correction */
kernel void
colorin_correct (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
global const float *corr)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

const float4 corval = (const float4)(corr[0], corr[1], corr[2], corr[3]);
float4 pixel = corval * read_imagef(in, sampleri, (int2)(x, y));
write_imagef (out, (int2)(x, y), pixel);
}

/* kernel for the plugin colorin: unbound processing */
kernel void
colorin_unbound (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
global float *cmat, global float *lmat,
read_only image2d_t lutr, read_only image2d_t lutg, read_only image2d_t lutb,
const int blue_mapping, global const float (*const a)[3])
const int blue_mapping, global const float (*const a)[3], global const float *corr)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
const float4 corval = (const float4)(corr[0], corr[1], corr[2], corr[3]);
float4 pixel = corval * read_imagef(in, sampleri, (int2)(x, y));

float cam[3], XYZ[3];
cam[0] = lerp_lookup_unbounded0(lutr, pixel.x, a[0]);
Expand Down Expand Up @@ -1420,14 +1435,15 @@ kernel void
colorin_clipping (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
global float *cmat, global float *lmat,
read_only image2d_t lutr, read_only image2d_t lutg, read_only image2d_t lutb,
const int blue_mapping, global const float (*const a)[3])
const int blue_mapping, global const float (*const a)[3], global const float *corr)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
const float4 corval = (const float4)(corr[0], corr[1], corr[2], corr[3]);
float4 pixel = corval * read_imagef(in, sampleri, (int2)(x, y));

float cam[3], RGB[3], XYZ[3];
cam[0] = lerp_lookup_unbounded0(lutr, pixel.x, a[0]);
Expand Down
66 changes: 47 additions & 19 deletions src/iop/colorin.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ typedef struct dt_iop_colorin_global_data_t
{
int kernel_colorin_unbound;
int kernel_colorin_clipping;
int kernel_colorin_correction;
} dt_iop_colorin_global_data_t;

typedef struct dt_iop_colorin_data_t
Expand Down Expand Up @@ -491,13 +492,15 @@ void init_global(dt_iop_module_so_t *module)
module->data = gd;
gd->kernel_colorin_unbound = dt_opencl_create_kernel(program, "colorin_unbound");
gd->kernel_colorin_clipping = dt_opencl_create_kernel(program, "colorin_clipping");
gd->kernel_colorin_correction = dt_opencl_create_kernel(program, "colorin_correct");
}

void cleanup_global(dt_iop_module_so_t *module)
{
dt_iop_colorin_global_data_t *gd = (dt_iop_colorin_global_data_t *)module->data;
dt_opencl_free_kernel(gd->kernel_colorin_unbound);
dt_opencl_free_kernel(gd->kernel_colorin_clipping);
dt_opencl_free_kernel(gd->kernel_colorin_correction);
free(module->data);
module->data = NULL;
}
Expand Down Expand Up @@ -655,12 +658,32 @@ int process_cl(struct dt_iop_module_t *self,
{
dt_iop_colorin_data_t *d = (dt_iop_colorin_data_t *)piece->data;
dt_iop_colorin_global_data_t *gd = (dt_iop_colorin_global_data_t *)self->global_data;

const dt_dev_chroma_t *chr = &self->dev->chroma;
const gboolean corrected = dt_dev_is_D65_chroma(self->dev) && chr->late_correction;
dt_aligned_pixel_t coeffs = { corrected ? chr->D65coeffs[0] / chr->as_shot[0] : 1.0f,
corrected ? chr->D65coeffs[1] / chr->as_shot[1] : 1.0f,
corrected ? chr->D65coeffs[2] / chr->as_shot[2] : 1.0f,
corrected ? chr->D65coeffs[3] / chr->as_shot[3] : 1.0f };
if(corrected)
{
for_four_channels(k)
{
piece->pipe->dsc.temperature.coeffs[k] *= coeffs[k];
piece->pipe->dsc.processed_maximum[k] *= coeffs[k];
}
}

cl_mem dev_m = NULL, dev_l = NULL, dev_r = NULL;
cl_mem dev_g = NULL, dev_b = NULL, dev_coeffs = NULL;
cl_mem dev_corr = NULL;

dt_print_pipe(DT_DEBUG_PARAMS,
"matrix conversion on GPU", piece->pipe, self,
roi_in, roi_out, "`%s'\n", dt_colorspaces_get_name(d->type, NULL));
"matrix conversion OpenCL",
piece->pipe, self, roi_in, roi_out, "`%s', %s: %.3f %.3f %.3f\n",
dt_colorspaces_get_name(d->type, NULL),
corrected ? "corrected by" : "",
coeffs[0], coeffs[1], coeffs[2]);
int kernel;
float cmat[9], lmat[9];

Expand All @@ -684,11 +707,23 @@ int process_cl(struct dt_iop_module_t *self,
const int width = roi_in->width;
const int height = roi_in->height;

dev_corr = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 4, coeffs);
if(dev_corr == NULL) goto error;

if(d->type == DT_COLORSPACE_LAB)
{
size_t origin[] = { 0, 0, 0 };
size_t region[] = { roi_in->width, roi_in->height, 1 };
err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region);
if(corrected)
{
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_colorin_correction, width, height,
CLARG(dev_in), CLARG(dev_out),
CLARG(width), CLARG(height), CLARG(dev_corr));
}
else
{
size_t origin[] = { 0, 0, 0 };
size_t region[] = { roi_in->width, roi_in->height, 1 };
err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region);
}
if(err != CL_SUCCESS) goto error;
return CL_SUCCESS;
}
Expand All @@ -712,14 +747,15 @@ int process_cl(struct dt_iop_module_t *self,
CLARG(width), CLARG(height),
CLARG(dev_m), CLARG(dev_l), CLARG(dev_r),
CLARG(dev_g), CLARG(dev_b),
CLARG(blue_mapping), CLARG(dev_coeffs));
CLARG(blue_mapping), CLARG(dev_coeffs), CLARG(dev_corr));
error:
dt_opencl_release_mem_object(dev_m);
dt_opencl_release_mem_object(dev_l);
dt_opencl_release_mem_object(dev_r);
dt_opencl_release_mem_object(dev_g);
dt_opencl_release_mem_object(dev_b);
dt_opencl_release_mem_object(dev_coeffs);
dt_opencl_release_mem_object(dev_corr);
return err;
}
#endif
Expand Down Expand Up @@ -1217,10 +1253,6 @@ void process(struct dt_iop_module_t *self,
ivoid, ovoid, roi_in, roi_out))
return;

/* a) not tested for best compiler performance
b) algo might want some love for lower mem footprint
like integration in called functions for performance
*/
const dt_dev_chroma_t *chr = &self->dev->chroma;
const gboolean corrected = dt_dev_is_D65_chroma(self->dev) && chr->late_correction;
const dt_aligned_pixel_t coeffs = { corrected ? chr->D65coeffs[0] / chr->as_shot[0] : 1.0f,
Expand All @@ -1229,10 +1261,6 @@ void process(struct dt_iop_module_t *self,
corrected ? chr->D65coeffs[3] / chr->as_shot[3] : 1.0f };
if(corrected)
{
dt_print_pipe(DT_DEBUG_PARAMS,
"correct D65 on CPU", piece->pipe, self, roi_in, roi_out,
"%.3f %.3f %.3f\n", coeffs[0], coeffs[1], coeffs[2]);

for_four_channels(k)
{
piece->pipe->dsc.temperature.coeffs[k] *= coeffs[k];
Expand All @@ -1245,8 +1273,11 @@ void process(struct dt_iop_module_t *self,
d->blue_mapping && dt_image_is_matrix_correction_supported(&piece->pipe->image);

dt_print_pipe(DT_DEBUG_PARAMS,
"matrix conversion on CPU", piece->pipe, self,
roi_in, roi_out, "`%s'\n", dt_colorspaces_get_name(d->type, NULL));
"matrix conversion on CPU",
piece->pipe, self, roi_in, roi_out, "`%s', %s: %.3f %.3f %.3f\n",
dt_colorspaces_get_name(d->type, NULL),
corrected ? "corrected by" : "",
coeffs[0], coeffs[1], coeffs[2]);

if(d->type == DT_COLORSPACE_LAB)
{
Expand Down Expand Up @@ -1362,9 +1393,6 @@ void commit_params(struct dt_iop_module_t *self,
d->nonlinearlut = FALSE;
piece->process_cl_ready = TRUE;

// FIXME: preliminary until we have OpenCL supporting this
piece->process_cl_ready = FALSE;

dt_colorspaces_color_profile_type_t type = p->type;
if(type == DT_COLORSPACE_LAB)
{
Expand Down

0 comments on commit 36a586e

Please sign in to comment.