From 2195828d46e621f10f937247f57f674fa3c096f7 Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 15:39:39 +0400 Subject: [PATCH 1/9] Optimize python part --- src/blurhash/__init__.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/blurhash/__init__.py b/src/blurhash/__init__.py index 8bf74b7..1200a30 100644 --- a/src/blurhash/__init__.py +++ b/src/blurhash/__init__.py @@ -1,18 +1,13 @@ -from __future__ import absolute_import -from itertools import chain - +from enum import Enum from PIL import Image -from six.moves import zip -from enum import Enum - from ._functions import ffi as _ffi, lib as _lib from ._version import version as __version__ -__all__ = 'encode', 'decode', 'is_valid_blurhash', 'PixelMode', \ - 'BlurhashDecodeError', '__version__' +__all__ = ('encode', 'decode', 'is_valid_blurhash', 'PixelMode', + 'BlurhashDecodeError', '__version__') class PixelMode(Enum): @@ -34,10 +29,7 @@ def encode(image, x_components, y_components): image = Image.open(image) if image.mode != 'RGB': image = image.convert('RGB') - red_band = image.getdata(band=0) - green_band = image.getdata(band=1) - blue_band = image.getdata(band=2) - rgb_data = list(chain.from_iterable(zip(red_band, green_band, blue_band))) + rgb_data = image.tobytes() width, height = image.size image.close() From cbe5fbf1184716ebd7d47ffe7eb613c0e43aa428 Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 15:39:49 +0400 Subject: [PATCH 2/9] Optimize C code --- src/common.h | 10 +++++----- src/encode.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/common.h b/src/common.h index ce58144..3cea89b 100644 --- a/src/common.h +++ b/src/common.h @@ -1,7 +1,7 @@ #ifndef __BLURHASH_COMMON_H__ #define __BLURHASH_COMMON_H__ -#include +#include #ifndef M_PI #define M_PI 3.14159265358979323846 @@ -13,10 +13,10 @@ static inline int linearTosRGB(float value) { else return (1.055 * powf(v, 1 / 2.4) - 0.055) * 255 + 0.5; } -static inline float sRGBToLinear(int value) { - float v = (float)value / 255; - if(v <= 0.04045) return v / 12.92; - else return powf((v + 0.055) / 1.055, 2.4); +static inline float sRGBToLinear(uint8_t value) { + float v = value * (1 / 255.0); + if(v <= 0.04045) return v * (1 / 12.92); + else return powf((v + 0.055) * (1 /1.055), 2.4); } static inline float signPow(float value, float exp) { diff --git a/src/encode.c b/src/encode.c index aa2a990..4628dea 100644 --- a/src/encode.c +++ b/src/encode.c @@ -1,13 +1,9 @@ #include #include #include -#include #include "common.h" -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif struct RGB { float r; @@ -21,6 +17,18 @@ static char *encode_int(int value, int length, char *destination); static int encodeDC(float r, float g, float b); static int encodeAC(float r, float g, float b, float maximumValue); +float *sRGBToLinear_cache = NULL; + +static void init_sRGBToLinear_cache() { + if (sRGBToLinear_cache != NULL) { + return; + } + sRGBToLinear_cache = (float *)malloc(sizeof(float) * 256); + for (int x = 0; x < 256; x++) { + sRGBToLinear_cache[x] = sRGBToLinear(x); + } +} + const char *blurHashForPixels(int xComponents, int yComponents, int width, int height, uint8_t *rgb, size_t bytesPerRow, char *destination) { if(xComponents < 1 || xComponents > 9) return NULL; if(yComponents < 1 || yComponents > 9) return NULL; @@ -32,6 +40,8 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h #endif memset(factors, 0, sizeof(factors)); + init_sRGBToLinear_cache(); + for(int y = 0; y < yComponents; y++) { for(int x = 0; x < xComponents; x++) { struct RGB factor = multiplyBasisFunction(x, y, width, height, rgb, bytesPerRow); @@ -78,16 +88,25 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow) { struct RGB result = { 0, 0, 0 }; float normalisation = (xComponent == 0 && yComponent == 0) ? 1 : 2; + float *cosx = (float *)malloc(sizeof(float) * width); + + for(int x = 0; x < width; x++) { + cosx[x] = cosf(M_PI * xComponent * x / width); + } for(int y = 0; y < height; y++) { + float cosy = cosf(M_PI * yComponent * y / height); + uint8_t *src = rgb + y * bytesPerRow; for(int x = 0; x < width; x++) { - float basis = cosf(M_PI * xComponent * x / width) * cosf(M_PI * yComponent * y / height); - result.r += basis * sRGBToLinear(rgb[3 * x + 0 + y * bytesPerRow]); - result.g += basis * sRGBToLinear(rgb[3 * x + 1 + y * bytesPerRow]); - result.b += basis * sRGBToLinear(rgb[3 * x + 2 + y * bytesPerRow]); + float basis = cosy * cosx[x]; + result.r += basis * sRGBToLinear_cache[src[3 * x + 0]]; + result.g += basis * sRGBToLinear_cache[src[3 * x + 1]]; + result.b += basis * sRGBToLinear_cache[src[3 * x + 2]]; } } + free(cosx); + float scale = normalisation / (width * height); result.r *= scale; From 6abedbafaf74d0e3d4b1b263b3ae0fc9ecefa882 Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 16:47:27 +0400 Subject: [PATCH 3/9] Allocate cosx array in safe function --- src/encode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/encode.c b/src/encode.c index 4628dea..81d96fc 100644 --- a/src/encode.c +++ b/src/encode.c @@ -11,7 +11,7 @@ struct RGB { float b; }; -static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow); +static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosx); static char *encode_int(int value, int length, char *destination); static int encodeDC(float r, float g, float b); @@ -42,14 +42,17 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h init_sRGBToLinear_cache(); + float *cosx = (float *)malloc(sizeof(float) * width); + if (! cosx) return NULL; for(int y = 0; y < yComponents; y++) { for(int x = 0; x < xComponents; x++) { - struct RGB factor = multiplyBasisFunction(x, y, width, height, rgb, bytesPerRow); + struct RGB factor = multiplyBasisFunction(x, y, width, height, rgb, bytesPerRow, cosx); factors[y * xComponents + x][0] = factor.r; factors[y * xComponents + x][1] = factor.g; factors[y * xComponents + x][2] = factor.b; } } + free(cosx); float *dc = factors[0]; float *ac = dc + 3; @@ -85,10 +88,9 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h return destination; } -static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow) { +static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosx) { struct RGB result = { 0, 0, 0 }; float normalisation = (xComponent == 0 && yComponent == 0) ? 1 : 2; - float *cosx = (float *)malloc(sizeof(float) * width); for(int x = 0; x < width; x++) { cosx[x] = cosf(M_PI * xComponent * x / width); @@ -105,8 +107,6 @@ static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int widt } } - free(cosx); - float scale = normalisation / (width * height); result.r *= scale; From ab47f4ce4a6f752a92163e647c6f2eeed2503a5c Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 16:53:17 +0400 Subject: [PATCH 4/9] Remove extra initialization --- src/encode.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/encode.c b/src/encode.c index 81d96fc..7058367 100644 --- a/src/encode.c +++ b/src/encode.c @@ -33,12 +33,7 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h if(xComponents < 1 || xComponents > 9) return NULL; if(yComponents < 1 || yComponents > 9) return NULL; -#ifndef _MSC_VER - float factors[yComponents * xComponents][3]; -#else float factors[9 * 9][3]; -#endif - memset(factors, 0, sizeof(factors)); init_sRGBToLinear_cache(); From 9e744cf678d51a4f3d88bcff6320c32cd806bb12 Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 17:00:41 +0400 Subject: [PATCH 5/9] Release Pillow image only if open it --- src/blurhash/__init__.py | 16 ++++++++++------ tests/test_encode.py | 10 +++++++--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/blurhash/__init__.py b/src/blurhash/__init__.py index 1200a30..d0988e0 100644 --- a/src/blurhash/__init__.py +++ b/src/blurhash/__init__.py @@ -1,3 +1,4 @@ +from contextlib import nullcontext from enum import Enum from PIL import Image @@ -25,13 +26,16 @@ def __str__(self): def encode(image, x_components, y_components): - if not isinstance(image, Image.Image): + if isinstance(image, Image.Image): + image_context = nullcontext() + else: image = Image.open(image) - if image.mode != 'RGB': - image = image.convert('RGB') - rgb_data = image.tobytes() - width, height = image.size - image.close() + image_context = image + with image_context: + if image.mode != 'RGB': + image = image.convert('RGB') + rgb_data = image.tobytes() + width, height = image.size rgb = _ffi.new('uint8_t[]', rgb_data) bytes_per_row = _ffi.cast('size_t', width * 3) diff --git a/tests/test_encode.py b/tests/test_encode.py index 5d7cb27..7c7b931 100644 --- a/tests/test_encode.py +++ b/tests/test_encode.py @@ -13,11 +13,15 @@ def test_encode_file(): assert result == 'LlMF%n00%#MwS|WCWEM{R*bbWBbH' -def test_encode_pil_image(): +def test_encode_pil_image_twise(): with Image.open('tests/pic2.png') as image: - result = encode(image, 4, 3) + image = image.convert('RGB') + result1 = encode(image, 4, 3) + # Should not raise second time + result2 = encode(image, 4, 3) - assert result == 'LlMF%n00%#MwS|WCWEM{R*bbWBbH' + assert result1 == result2 + assert result1 == 'LlMF%n00%#MwS|WCWEM{R*bbWBbH' def test_encode_with_filename(): From f61730133a72a6870ddaf2e5b6fb6f9f2b8abe9e Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 17:08:58 +0400 Subject: [PATCH 6/9] Remove extra requirements --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index a39252b..d6a94ce 100755 --- a/setup.py +++ b/setup.py @@ -26,7 +26,6 @@ install_requires=[ 'cffi', 'Pillow', - 'six', ], setup_requires=[ 'cffi', From f4a1ed0f713cc92a4feaddb052e855e10afa6d49 Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 18:31:37 +0400 Subject: [PATCH 7/9] Calculate cosx and cosy once --- src/encode.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/encode.c b/src/encode.c index 7058367..3ede7df 100644 --- a/src/encode.c +++ b/src/encode.c @@ -11,7 +11,7 @@ struct RGB { float b; }; -static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosx); +static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosx, float *cosy); static char *encode_int(int value, int length, char *destination); static int encodeDC(float r, float g, float b); @@ -37,17 +37,32 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h init_sRGBToLinear_cache(); - float *cosx = (float *)malloc(sizeof(float) * width); + float *cosx = (float *)malloc(sizeof(float) * width * xComponents); if (! cosx) return NULL; + float *cosy = (float *)malloc(sizeof(float) * height); + if (! cosy) { + free(cosx); + return NULL; + } + for(int x = 0; x < xComponents; x++) { + for(int i = 0; i < width; i++) { + cosx[x * width + i] = cosf(M_PI * x * i / width); + } + } for(int y = 0; y < yComponents; y++) { + for(int i = 0; i < height; i++) { + cosy[i] = cosf(M_PI * y * i / height); + } for(int x = 0; x < xComponents; x++) { - struct RGB factor = multiplyBasisFunction(x, y, width, height, rgb, bytesPerRow, cosx); + struct RGB factor = multiplyBasisFunction(x, y, width, height, rgb, bytesPerRow, + cosx + x * width, cosy); factors[y * xComponents + x][0] = factor.r; factors[y * xComponents + x][1] = factor.g; factors[y * xComponents + x][2] = factor.b; } } free(cosx); + free(cosy); float *dc = factors[0]; float *ac = dc + 3; @@ -83,19 +98,17 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h return destination; } -static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosx) { +static struct RGB multiplyBasisFunction( + int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float *cosx, float *cosy +) { struct RGB result = { 0, 0, 0 }; float normalisation = (xComponent == 0 && yComponent == 0) ? 1 : 2; - for(int x = 0; x < width; x++) { - cosx[x] = cosf(M_PI * xComponent * x / width); - } - for(int y = 0; y < height; y++) { - float cosy = cosf(M_PI * yComponent * y / height); uint8_t *src = rgb + y * bytesPerRow; for(int x = 0; x < width; x++) { - float basis = cosy * cosx[x]; + float basis = cosy[y] * cosx[x]; result.r += basis * sRGBToLinear_cache[src[3 * x + 0]]; result.g += basis * sRGBToLinear_cache[src[3 * x + 1]]; result.b += basis * sRGBToLinear_cache[src[3 * x + 2]]; From ae86c2dc439d21dfc9e200b6f6315ab8fb41913d Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Tue, 24 Sep 2024 21:59:32 +0400 Subject: [PATCH 8/9] Calculate factors in one pass --- src/encode.c | 90 ++++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/src/encode.c b/src/encode.c index 3ede7df..b218fb5 100644 --- a/src/encode.c +++ b/src/encode.c @@ -11,7 +11,9 @@ struct RGB { float b; }; -static struct RGB multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosx, float *cosy); +static void multiplyBasisFunction( + struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float *cosX, float *cosY); static char *encode_int(int value, int length, char *destination); static int encodeDC(float r, float g, float b); @@ -33,40 +35,41 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h if(xComponents < 1 || xComponents > 9) return NULL; if(yComponents < 1 || yComponents > 9) return NULL; - float factors[9 * 9][3]; + struct RGB factors[9 * 9] = {0}; + int factorsCount = xComponents * yComponents; init_sRGBToLinear_cache(); - float *cosx = (float *)malloc(sizeof(float) * width * xComponents); - if (! cosx) return NULL; - float *cosy = (float *)malloc(sizeof(float) * height); - if (! cosy) { - free(cosx); + float *cosX = (float *)malloc(sizeof(float) * width * factorsCount); + if (! cosX) return NULL; + float *cosY = (float *)malloc(sizeof(float) * height * factorsCount); + if (! cosY) { + free(cosX); return NULL; } - for(int x = 0; x < xComponents; x++) { - for(int i = 0; i < width; i++) { - cosx[x * width + i] = cosf(M_PI * x * i / width); + for(int i = 0; i < width; i++) { + for(int x = 0; x < xComponents; x++) { + float weight = cosf(M_PI * x * i / width); + for(int y = 0; y < yComponents; y++) { + cosX[i * factorsCount + y * xComponents + x] = weight; + } } } - for(int y = 0; y < yComponents; y++) { - for(int i = 0; i < height; i++) { - cosy[i] = cosf(M_PI * y * i / height); - } - for(int x = 0; x < xComponents; x++) { - struct RGB factor = multiplyBasisFunction(x, y, width, height, rgb, bytesPerRow, - cosx + x * width, cosy); - factors[y * xComponents + x][0] = factor.r; - factors[y * xComponents + x][1] = factor.g; - factors[y * xComponents + x][2] = factor.b; + for(int i = 0; i < height; i++) { + for(int y = 0; y < yComponents; y++) { + float weight = cosf(M_PI * y * i / height); + for(int x = 0; x < xComponents; x++) { + cosY[i * factorsCount + y * xComponents + x] = weight; + } } } - free(cosx); - free(cosy); + multiplyBasisFunction(factors, factorsCount, width, height, rgb, bytesPerRow, cosX, cosY); + free(cosX); + free(cosY); - float *dc = factors[0]; + float *dc = (float *)factors; float *ac = dc + 3; - int acCount = xComponents * yComponents - 1; + int acCount = factorsCount - 1; char *ptr = destination; int sizeFlag = (xComponents - 1) + (yComponents - 1) * 9; @@ -98,30 +101,35 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h return destination; } -static struct RGB multiplyBasisFunction( - int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow, - float *cosx, float *cosy +static void multiplyBasisFunction( + struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float *cosX, float *cosY ) { - struct RGB result = { 0, 0, 0 }; - float normalisation = (xComponent == 0 && yComponent == 0) ? 1 : 2; - for(int y = 0; y < height; y++) { uint8_t *src = rgb + y * bytesPerRow; + float *cosYLocal = cosY + y * factorsCount; for(int x = 0; x < width; x++) { - float basis = cosy[y] * cosx[x]; - result.r += basis * sRGBToLinear_cache[src[3 * x + 0]]; - result.g += basis * sRGBToLinear_cache[src[3 * x + 1]]; - result.b += basis * sRGBToLinear_cache[src[3 * x + 2]]; + float pixel[3]; + float *cosXLocal = cosX + x * factorsCount; + pixel[0] = sRGBToLinear_cache[src[3 * x + 0]]; + pixel[1] = sRGBToLinear_cache[src[3 * x + 1]]; + pixel[2] = sRGBToLinear_cache[src[3 * x + 2]]; + for (int i = 0; i < factorsCount; i++) { + float basis = cosYLocal[i] * cosXLocal[i]; + factors[i].r += basis * pixel[0]; + factors[i].g += basis * pixel[1]; + factors[i].b += basis * pixel[2]; + } } } - float scale = normalisation / (width * height); - - result.r *= scale; - result.g *= scale; - result.b *= scale; - - return result; + for (int i = 0; i < factorsCount; i++) { + float normalisation = (i == 0) ? 1 : 2; + float scale = normalisation / (width * height); + factors[i].r *= scale; + factors[i].g *= scale; + factors[i].b *= scale; + } } static int encodeDC(float r, float g, float b) { From f077928a520d1861ba8f5afc857802e2052bfe49 Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Fri, 11 Oct 2024 19:58:18 +0400 Subject: [PATCH 9/9] Update to lates versions from optimization branch --- src/encode.c | 54 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/encode.c b/src/encode.c index b218fb5..f329ba3 100644 --- a/src/encode.c +++ b/src/encode.c @@ -5,14 +5,8 @@ #include "common.h" -struct RGB { - float r; - float g; - float b; -}; - static void multiplyBasisFunction( - struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosX, float *cosY); static char *encode_int(int value, int length, char *destination); @@ -35,8 +29,9 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h if(xComponents < 1 || xComponents > 9) return NULL; if(yComponents < 1 || yComponents > 9) return NULL; - struct RGB factors[9 * 9] = {0}; + float factors[yComponents * xComponents][4]; int factorsCount = xComponents * yComponents; + memset(factors, 0, sizeof(factors)); init_sRGBToLinear_cache(); @@ -67,8 +62,8 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h free(cosX); free(cosY); - float *dc = (float *)factors; - float *ac = dc + 3; + float *dc = factors[0]; + float *ac = dc + 4; int acCount = factorsCount - 1; char *ptr = destination; @@ -78,7 +73,7 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h float maximumValue; if(acCount > 0) { float actualMaximumValue = 0; - for(int i = 0; i < acCount * 3; i++) { + for(int i = 0; i < acCount * 4; i++) { actualMaximumValue = fmaxf(fabsf(ac[i]), actualMaximumValue); } @@ -93,7 +88,7 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h ptr = encode_int(encodeDC(dc[0], dc[1], dc[2]), 4, ptr); for(int i = 0; i < acCount; i++) { - ptr = encode_int(encodeAC(ac[i * 3 + 0], ac[i * 3 + 1], ac[i * 3 + 2], maximumValue), 2, ptr); + ptr = encode_int(encodeAC(ac[i * 4 + 0], ac[i * 4 + 1], ac[i * 4 + 2], maximumValue), 2, ptr); } *ptr = 0; @@ -102,23 +97,40 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h } static void multiplyBasisFunction( - struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosX, float *cosY ) { for(int y = 0; y < height; y++) { uint8_t *src = rgb + y * bytesPerRow; float *cosYLocal = cosY + y * factorsCount; - for(int x = 0; x < width; x++) { - float pixel[3]; + int x = 0; + for(; x < width - 3; x += 4) { + float *cosXLocal = cosX + x * factorsCount; + float pixel0[4] = {sRGBToLinear_cache[src[3 * (x+0) + 0]], sRGBToLinear_cache[src[3 * (x+0) + 1]], sRGBToLinear_cache[src[3 * (x+0) + 2]]}; + float pixel1[4] = {sRGBToLinear_cache[src[3 * (x+1) + 0]], sRGBToLinear_cache[src[3 * (x+1) + 1]], sRGBToLinear_cache[src[3 * (x+1) + 2]]}; + float pixel2[4] = {sRGBToLinear_cache[src[3 * (x+2) + 0]], sRGBToLinear_cache[src[3 * (x+2) + 1]], sRGBToLinear_cache[src[3 * (x+2) + 2]]}; + float pixel3[4] = {sRGBToLinear_cache[src[3 * (x+3) + 0]], sRGBToLinear_cache[src[3 * (x+3) + 1]], sRGBToLinear_cache[src[3 * (x+3) + 2]]}; + for (int i = 0; i < factorsCount; i++) { + float basis0 = cosYLocal[i] * cosXLocal[i + 0 * factorsCount]; + float basis1 = cosYLocal[i] * cosXLocal[i + 1 * factorsCount]; + float basis2 = cosYLocal[i] * cosXLocal[i + 2 * factorsCount]; + float basis3 = cosYLocal[i] * cosXLocal[i + 3 * factorsCount]; + factors[i][0] += basis0 * pixel0[0] + basis1 * pixel1[0] + basis2 * pixel2[0] + basis3 * pixel3[0]; + factors[i][1] += basis0 * pixel0[1] + basis1 * pixel1[1] + basis2 * pixel2[1] + basis3 * pixel3[1]; + factors[i][2] += basis0 * pixel0[2] + basis1 * pixel1[2] + basis2 * pixel2[2] + basis3 * pixel3[2]; + } + } + for(; x < width; x++) { + float pixel[4]; float *cosXLocal = cosX + x * factorsCount; pixel[0] = sRGBToLinear_cache[src[3 * x + 0]]; pixel[1] = sRGBToLinear_cache[src[3 * x + 1]]; pixel[2] = sRGBToLinear_cache[src[3 * x + 2]]; for (int i = 0; i < factorsCount; i++) { float basis = cosYLocal[i] * cosXLocal[i]; - factors[i].r += basis * pixel[0]; - factors[i].g += basis * pixel[1]; - factors[i].b += basis * pixel[2]; + factors[i][0] += basis * pixel[0]; + factors[i][1] += basis * pixel[1]; + factors[i][2] += basis * pixel[2]; } } } @@ -126,9 +138,9 @@ static void multiplyBasisFunction( for (int i = 0; i < factorsCount; i++) { float normalisation = (i == 0) ? 1 : 2; float scale = normalisation / (width * height); - factors[i].r *= scale; - factors[i].g *= scale; - factors[i].b *= scale; + factors[i][0] *= scale; + factors[i][1] *= scale; + factors[i][2] *= scale; } }