Skip to content

Commit ef808a5

Browse files
authored
More inplace (#45)
* Added inaplce option to cv2.add * more inplace * Updated pre-commit
1 parent f5eb6c6 commit ef808a5

File tree

113 files changed

+959
-305
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+959
-305
lines changed

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,13 @@ repos:
5353
# hooks:
5454
# - id: markdownlint
5555
- repo: https://github.com/tox-dev/pyproject-fmt
56-
rev: "v2.4.3"
56+
rev: "v2.5.0"
5757
hooks:
5858
- id: pyproject-fmt
5959
additional_dependencies: ["tomli"]
6060
- repo: https://github.com/astral-sh/ruff-pre-commit
6161
# Ruff version.
62-
rev: v0.7.1
62+
rev: v0.7.2
6363
hooks:
6464
# Run the linter.
6565
- id: ruff

albucore/functions.py

+26-18
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def add_weighted_simsimd(img1: np.ndarray, weight1: float, img2: np.ndarray, wei
3232
original_dtype = img1.dtype
3333

3434
if img2.dtype != original_dtype:
35-
img2 = clip(img2.astype(original_dtype), original_dtype)
35+
img2 = clip(img2.astype(original_dtype), original_dtype, inplace=True)
3636

3737
return np.frombuffer(
3838
ss.wsum(img1.reshape(-1), img2.astype(original_dtype).reshape(-1), alpha=weight1, beta=weight2),
@@ -96,7 +96,7 @@ def apply_lut(
9696

9797
num_channels = img.shape[-1]
9898
luts = create_lut_array(dtype, value, operation)
99-
return cv2.merge([sz_lut(img[:, :, i], clip(luts[i], dtype), inplace) for i in range(num_channels)])
99+
return cv2.merge([sz_lut(img[:, :, i], clip(luts[i], dtype, inplace=False), inplace) for i in range(num_channels)])
100100

101101

102102
def prepare_value_opencv(
@@ -212,7 +212,7 @@ def multiply(img: np.ndarray, value: ValueType, inplace: bool = False) -> np.nda
212212

213213

214214
@preserve_channel_dim
215-
def add_opencv(img: np.ndarray, value: np.ndarray | float) -> np.ndarray:
215+
def add_opencv(img: np.ndarray, value: np.ndarray | float, inplace: bool = False) -> np.ndarray:
216216
value = prepare_value_opencv(img, value, "add")
217217

218218
# Convert to float32 if:
@@ -225,7 +225,9 @@ def add_opencv(img: np.ndarray, value: np.ndarray | float) -> np.ndarray:
225225
if needs_float:
226226
return cv2.add(img.astype(np.float32), value if isinstance(value, (int, float)) else value.astype(np.float32))
227227

228-
return cv2.add(img, value)
228+
# Use img as the destination array if inplace=True
229+
dst = img if inplace else None
230+
return cv2.add(img, value, dst=dst)
229231

230232

231233
def add_numpy(img: np.ndarray, value: float | np.ndarray) -> np.ndarray:
@@ -237,20 +239,20 @@ def add_lut(img: np.ndarray, value: np.ndarray | float, inplace: bool) -> np.nda
237239

238240

239241
@clipped
240-
def add_constant(img: np.ndarray, value: float) -> np.ndarray:
241-
return add_opencv(img, value)
242+
def add_constant(img: np.ndarray, value: float, inplace: bool = False) -> np.ndarray:
243+
return add_opencv(img, value, inplace)
242244

243245

244246
@clipped
245247
def add_vector(img: np.ndarray, value: np.ndarray, inplace: bool) -> np.ndarray:
246248
if img.dtype == np.uint8:
247249
return add_lut(img, value, inplace)
248-
return add_opencv(img, value)
250+
return add_opencv(img, value, inplace)
249251

250252

251253
@clipped
252-
def add_array(img: np.ndarray, value: np.ndarray) -> np.ndarray:
253-
return add_opencv(img, value)
254+
def add_array(img: np.ndarray, value: np.ndarray, inplace: bool = False) -> np.ndarray:
255+
return add_opencv(img, value, inplace)
254256

255257

256258
def add(img: np.ndarray, value: ValueType, inplace: bool = False) -> np.ndarray:
@@ -264,9 +266,9 @@ def add(img: np.ndarray, value: ValueType, inplace: bool = False) -> np.ndarray:
264266
if img.dtype == np.uint8:
265267
value = int(value)
266268

267-
return add_constant(img, value)
269+
return add_constant(img, value, inplace)
268270

269-
return add_vector(img, value, inplace) if value.ndim == 1 else add_array(img, value)
271+
return add_vector(img, value, inplace) if value.ndim == 1 else add_array(img, value, inplace)
270272

271273

272274
def normalize_numpy(img: np.ndarray, mean: float | np.ndarray, denominator: float | np.ndarray) -> np.ndarray:
@@ -371,11 +373,17 @@ def add_weighted_numpy(img1: np.ndarray, weight1: float, img2: np.ndarray, weigh
371373

372374
@preserve_channel_dim
373375
def add_weighted_opencv(img1: np.ndarray, weight1: float, img2: np.ndarray, weight2: float) -> np.ndarray:
374-
return cv2.addWeighted(img1.astype(np.float32), weight1, img2.astype(np.float32), weight2, 0)
376+
return cv2.addWeighted(img1, weight1, img2, weight2, 0)
375377

376378

377379
@preserve_channel_dim
378-
def add_weighted_lut(img1: np.ndarray, weight1: float, img2: np.ndarray, weight2: float) -> np.ndarray:
380+
def add_weighted_lut(
381+
img1: np.ndarray,
382+
weight1: float,
383+
img2: np.ndarray,
384+
weight2: float,
385+
inplace: bool = False,
386+
) -> np.ndarray:
379387
dtype = img1.dtype
380388
max_value = MAX_VALUES_BY_DTYPE[dtype]
381389

@@ -389,15 +397,15 @@ def add_weighted_lut(img1: np.ndarray, weight1: float, img2: np.ndarray, weight2
389397
return np.zeros_like(img1)
390398

391399
if weight1 == 1 and weight2 == 1:
392-
return add_array(img1, img2)
400+
return add_array(img1, img2, inplace)
393401

394402
lut1 = np.arange(0, max_value + 1, dtype=np.float32) * weight1
395403
result1 = cv2.LUT(img1, lut1)
396404

397405
lut2 = np.arange(0, max_value + 1, dtype=np.float32) * weight2
398406
result2 = cv2.LUT(img2, lut2)
399407

400-
return add_opencv(result1, result2)
408+
return add_opencv(result1, result2, inplace)
401409

402410

403411
@clipped
@@ -437,7 +445,7 @@ def multiply_add_lut(img: np.ndarray, factor: ValueType, value: ValueType, inpla
437445
num_channels = get_num_channels(img)
438446

439447
if isinstance(factor, (float, int)) and isinstance(value, (float, int)):
440-
lut = clip(np.arange(0, max_value + 1, dtype=np.float32) * factor + value, dtype)
448+
lut = clip(np.arange(0, max_value + 1, dtype=np.float32) * factor + value, dtype, inplace=False)
441449
return sz_lut(img, lut, inplace)
442450

443451
if isinstance(factor, np.ndarray) and factor.shape != ():
@@ -446,7 +454,7 @@ def multiply_add_lut(img: np.ndarray, factor: ValueType, value: ValueType, inpla
446454
if isinstance(value, np.ndarray) and value.shape != ():
447455
value = value.reshape(-1, 1)
448456

449-
luts = clip(np.arange(0, max_value + 1, dtype=np.float32) * factor + value, dtype)
457+
luts = clip(np.arange(0, max_value + 1, dtype=np.float32) * factor + value, dtype, inplace=True)
450458

451459
return cv2.merge([sz_lut(img[:, :, i], luts[i], inplace) for i in range(num_channels)])
452460

@@ -641,7 +649,7 @@ def to_float(img: np.ndarray, max_value: float | None = None) -> np.ndarray:
641649
def from_float_numpy(img: np.ndarray, target_dtype: np.dtype, max_value: float | None = None) -> np.ndarray:
642650
if max_value is None:
643651
max_value = get_max_value(target_dtype)
644-
return clip(np.rint(img * max_value), target_dtype)
652+
return clip(np.rint(img * max_value), target_dtype, inplace=True)
645653

646654

647655
@preserve_channel_dim

albucore/utils.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,10 @@ def __process_fn(img: np.ndarray, *process_args: P.args, **process_kwargs: P.kwa
101101
return __process_fn
102102

103103

104-
def clip(img: np.ndarray, dtype: Any) -> np.ndarray:
104+
def clip(img: np.ndarray, dtype: Any, inplace: bool = False) -> np.ndarray:
105105
max_value = MAX_VALUES_BY_DTYPE[dtype]
106+
if inplace:
107+
return np.clip(img, 0, max_value, out=img)
106108
return np.clip(img, 0, max_value).astype(dtype)
107109

108110

benchmark/albucore_benchmark/results/float32_1/AddArray.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:---------|:-----------------|:------|:-----------------|:-----------------|:-----------------|
23-
| AddArray | 1854.77 ± 415.86 | N/A | 1852.88 ± 121.24 | 1459.54 ± 375.42 | 1654.07 ± 126.99 |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:---------|:----------------|:------|:----------------|:-----------------|:-----------------|
23+
| AddArray | 1884.89 ± 67.19 | N/A | 1860.28 ± 59.29 | 1442.33 ± 176.91 | 1310.62 ± 268.11 |

benchmark/albucore_benchmark/results/float32_1/AddConstant.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:------------|:-----------------|:------|:----------------|:----------------|:-----------------|
23-
| AddConstant | 1888.37 ± 106.49 | N/A | 1954.72 ± 42.99 | 1861.73 ± 74.98 | 1219.05 ± 189.20 |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:------------|:----------------|:------|:-----------------|:-----------------|:-----------------|
23+
| AddConstant | 1965.69 ± 83.21 | N/A | 1632.53 ± 240.98 | 1754.53 ± 219.01 | 1248.80 ± 210.45 |

benchmark/albucore_benchmark/results/float32_1/AddVector.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:----------|:-----------------|:------|:----------------|:----------------|:----------|
23-
| AddVector | 1933.94 ± 176.48 | N/A | 1802.99 ± 47.72 | 1876.27 ± 73.40 | N/A |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:----------|:-----------------|:------|:-----------------|:-----------------|:----------|
23+
| AddVector | 1948.10 ± 195.87 | N/A | 1612.23 ± 308.33 | 1896.24 ± 112.59 | N/A |

benchmark/albucore_benchmark/results/float32_1/AddWeighted.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:------------|:----------------|:------|:----------------|:-----------------|:----------------|
23-
| AddWeighted | 1522.93 ± 45.94 | N/A | 1378.03 ± 37.98 | 1134.65 ± 176.15 | 1555.68 ± 37.54 |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:------------|:----------------|:------|:-----------------|:----------------|:----------------|
23+
| AddWeighted | 1481.93 ± 91.72 | N/A | 1796.92 ± 292.15 | 967.24 ± 216.82 | 988.38 ± 287.66 |

benchmark/albucore_benchmark/results/float32_1/FromFloat.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:----------|:----------------|:------|:---------------|:---------------|:----------|
23-
| FromFloat | 1260.47 ± 41.56 | N/A | 712.18 ± 88.37 | 734.41 ± 58.55 | N/A |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:----------|:-----------------|:------|:---------------|:-----------------|:----------|
23+
| FromFloat | 1027.60 ± 247.12 | N/A | 686.23 ± 40.08 | 1148.59 ± 221.74 | N/A |

benchmark/albucore_benchmark/results/float32_1/HorizontalFlip.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:---------------|:------------------|:------|:-----------------|:-----------------|:----------|
23-
| HorizontalFlip | 4655.65 ± 1173.34 | N/A | 1665.88 ± 304.01 | 1802.95 ± 195.86 | N/A |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:---------------|:-----------------|:------|:-----------------|:----------------|:----------|
23+
| HorizontalFlip | 6997.24 ± 616.52 | N/A | 2097.85 ± 121.23 | 1952.43 ± 83.57 | N/A |

benchmark/albucore_benchmark/results/float32_1/MultiplyAdd.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:------------|:---------------|:------|:----------------|:-----------------|:----------|
23-
| MultiplyAdd | 629.51 ± 81.55 | N/A | 445.55 ± 178.66 | 1733.97 ± 110.31 | N/A |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:------------|:---------------|:------|:---------------|:-----------------|:----------|
23+
| MultiplyAdd | 758.16 ± 22.12 | N/A | 728.14 ± 50.02 | 1856.18 ± 159.34 | N/A |

benchmark/albucore_benchmark/results/float32_1/MultiplyArray.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:--------------|:---------------|:------|:--------------|:---------------|:----------|
23-
| MultiplyArray | 485.21 ± 13.04 | N/A | 511.79 ± 3.88 | 446.52 ± 32.21 | N/A |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:--------------|:---------------|:------|:---------------|:---------------|:----------|
23+
| MultiplyArray | 471.85 ± 77.52 | N/A | 456.21 ± 49.29 | 309.56 ± 87.87 | N/A |

benchmark/albucore_benchmark/results/float32_1/MultiplyConstant.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:-----------------|:----------------|:------|:----------------|:----------------|:-----------------|
23-
| MultiplyConstant | 1878.75 ± 49.01 | N/A | 1416.15 ± 79.34 | 1840.54 ± 78.93 | 1525.54 ± 242.47 |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:-----------------|:-----------------|:------|:-----------------|:-----------------|:-----------------|
23+
| MultiplyConstant | 1917.92 ± 120.49 | N/A | 1176.76 ± 307.71 | 1983.32 ± 154.74 | 1813.73 ± 226.46 |

benchmark/albucore_benchmark/results/float32_1/MultiplyVector.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:---------------|:----------------|:------|:----------------|:----------------|:----------|
23-
| MultiplyVector | 1815.01 ± 79.45 | N/A | 1719.36 ± 94.23 | 1882.30 ± 56.89 | N/A |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:---------------|:-----------------|:------|:-----------------|:-----------------|:----------|
23+
| MultiplyVector | 1797.16 ± 344.07 | N/A | 1872.64 ± 210.21 | 1897.21 ± 132.37 | N/A |

benchmark/albucore_benchmark/results/float32_1/Normalize.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ Number of images: 500
1313

1414
| Python | albucore | opencv-python-headless | numpy | torchvision |
1515
|:--------------------------------------|:-----------|:-------------------------|:--------|:--------------|
16-
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.19 | 4.10.0.84 | 2.0.2 | 0.19.1 |
16+
| 3.9.20 (main, Oct 3 2024, 02:24:59) | 0.0.20 | 4.10.0.84 | 2.0.2 | 0.19.1 |
1717
| [Clang 14.0.6 ] | | | | |
1818

1919
## Performance (images/second)
2020

21-
| | albucore | lut | opencv | numpy | simsimd |
22-
|:----------|:-----------------|:------|:---------------|:----------------|:----------|
23-
| Normalize | 1368.75 ± 159.20 | N/A | 490.46 ± 60.55 | 801.62 ± 112.00 | N/A |
21+
| | albucore | lut | opencv | numpy | simsimd |
22+
|:----------|:-----------------|:------|:----------------|:---------------|:----------|
23+
| Normalize | 1512.83 ± 151.87 | N/A | 443.25 ± 100.17 | 816.24 ± 71.20 | N/A |

0 commit comments

Comments
 (0)