Skip to content

Commit 030d62c

Browse files
committed
Get transform compiling on SDL3
1 parent 4b9ade9 commit 030d62c

File tree

6 files changed

+125
-64
lines changed

6 files changed

+125
-64
lines changed

src_c/_pygame.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484
#define PG_SurfaceHasRLE SDL_SurfaceHasRLE
8585

8686
#define PG_SoftStretchNearest(src, srcrect, dst, dstrect) \
87-
SDL_SoftStretch(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
87+
SDL_StretchSurface(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
8888

8989
/* Emulating SDL2 SDL_LockMutex API. In SDL3, it returns void. */
9090
static inline int

src_c/meson.build

-3
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,6 @@ image = py.extension_module(
182182
subdir: pg,
183183
)
184184

185-
# TODO: support SDL3
186-
if sdl_api != 3
187185
simd_transform_avx2 = static_library(
188186
'simd_transform_avx2',
189187
'simd_transform_avx2.c',
@@ -224,7 +222,6 @@ transform = py.extension_module(
224222
install: true,
225223
subdir: pg,
226224
)
227-
endif
228225

229226
mask = py.extension_module(
230227
'mask',

src_c/simd_transform.h

+24-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,24 @@
11
#define NO_PYGAME_C_API
22
#include "_surface.h"
33

4+
/* TODO: This compat code should probably go in some place like simd_shared.h
5+
* That header file however is inconsistently used at the moment and not
6+
* included wherever it should be.
7+
* this block will be needed by simd_blitters and simd_fill */
8+
9+
#if PG_SDL3
10+
// SDL3 no longer includes intrinsics by default, we need to do it explicitly
11+
#include <SDL3/SDL_intrin.h>
12+
13+
/* If SDL_AVX2_INTRINSICS is defined by SDL3, we need to set macros that our
14+
* code checks for avx2 build time support */
15+
#ifdef SDL_AVX2_INTRINSICS
16+
#ifndef HAVE_IMMINTRIN_H
17+
#define HAVE_IMMINTRIN_H 1
18+
#endif /* HAVE_IMMINTRIN_H*/
19+
#endif /* SDL_AVX2_INTRINSICS*/
20+
#endif /* PG_SDL3 */
21+
422
/**
523
* MACRO borrowed from SSE2NEON - useful for making the shuffling family of
624
* intrinsics easier to understand by indicating clearly what will go where.
@@ -26,7 +44,8 @@
2644
#if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)
2745

2846
void
29-
grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf);
47+
grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt,
48+
SDL_Surface *newsurf);
3049
// smoothscale filters
3150
void
3251
filter_shrink_X_SSE2(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch,
@@ -41,12 +60,13 @@ void
4160
filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
4261
int dstpitch, int srcheight, int dstheight);
4362
void
44-
invert_sse2(SDL_Surface *src, SDL_Surface *newsurf);
63+
invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);
4564

4665
#endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */
4766

4867
// AVX2 functions
4968
void
50-
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf);
69+
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt,
70+
SDL_Surface *newsurf);
5171
void
52-
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf);
72+
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);

src_c/simd_transform_avx2.c

+8-9
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pg_avx2_at_runtime_but_uncompiled()
4646
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
4747
!defined(SDL_DISABLE_IMMINTRIN_H)
4848
void
49-
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
49+
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
5050
{
5151
/* See the SSE2 code for a simpler overview of this algorithm
5252
* Current AVX2 process
@@ -85,12 +85,11 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
8585
Uint32 *srcp = (Uint32 *)src->pixels;
8686
Uint32 *dstp = (Uint32 *)newsurf->pixels;
8787

88-
Uint32 amask = src->format->Amask;
88+
Uint32 amask = src_fmt->Amask;
8989
Uint32 rgbmask = ~amask;
9090

91-
int rgb_weights =
92-
((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
93-
(0x1D << src->format->Bshift));
91+
int rgb_weights = ((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
92+
(0x1D << src_fmt->Bshift));
9493

9594
__m256i *srcp256 = (__m256i *)src->pixels;
9695
__m256i *dstp256 = (__m256i *)newsurf->pixels;
@@ -216,7 +215,7 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
216215
}
217216

218217
void
219-
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
218+
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
220219
{
221220
int s_row_skip = (src->pitch - src->w * 4) / 4;
222221

@@ -237,7 +236,7 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
237236
Uint32 *srcp = (Uint32 *)src->pixels;
238237
Uint32 *dstp = (Uint32 *)newsurf->pixels;
239238

240-
Uint32 amask = src->format->Amask;
239+
Uint32 amask = src_fmt->Amask;
241240
Uint32 rgbmask = ~amask;
242241

243242
__m256i *srcp256 = (__m256i *)src->pixels;
@@ -300,12 +299,12 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
300299
}
301300
#else
302301
void
303-
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
302+
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
304303
{
305304
BAD_AVX2_FUNCTION_CALL;
306305
}
307306
void
308-
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
307+
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
309308
{
310309
BAD_AVX2_FUNCTION_CALL;
311310
}

src_c/simd_transform_sse2.c

+9-10
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
423423
}
424424

425425
void
426-
grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
426+
grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
427427
{
428428
/* For the SSE2 SIMD version of grayscale we do one pixel at a time
429429
* Thus we can calculate the number of loops (and pixels) by multiplying
@@ -459,16 +459,15 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
459459
Uint32 *srcp = (Uint32 *)src->pixels;
460460
Uint32 *dstp = (Uint32 *)newsurf->pixels;
461461

462-
Uint64 amask64 = ((Uint64)src->format->Amask) | src->format->Amask;
462+
Uint64 amask64 = ((Uint64)src_fmt->Amask) | src_fmt->Amask;
463463
Uint64 rgbmask64 = ~amask64;
464464

465465
Uint64 rgb_weights =
466-
((Uint64)((0x4C << src->format->Rshift) |
467-
(0x96 << src->format->Gshift) |
468-
(0x1D << src->format->Bshift))
466+
((Uint64)((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
467+
(0x1D << src_fmt->Bshift))
469468
<< 32) |
470-
((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
471-
(0x1D << src->format->Bshift));
469+
((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
470+
(0x1D << src_fmt->Bshift));
472471

473472
Uint64 *srcp64 = (Uint64 *)src->pixels;
474473
Uint64 *dstp64 = (Uint64 *)newsurf->pixels;
@@ -612,7 +611,7 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
612611
}
613612

614613
void
615-
invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
614+
invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
616615
{
617616
int s_row_skip = (src->pitch - src->w * 4) / 4;
618617

@@ -637,8 +636,8 @@ invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
637636
__m128i *srcp128 = (__m128i *)src->pixels;
638637
__m128i *dstp128 = (__m128i *)newsurf->pixels;
639638

640-
mm_rgb_invert_mask = _mm_set1_epi32(~src->format->Amask);
641-
mm_alpha_mask = _mm_set1_epi32(src->format->Amask);
639+
mm_rgb_invert_mask = _mm_set1_epi32(~src_fmt->Amask);
640+
mm_alpha_mask = _mm_set1_epi32(src_fmt->Amask);
642641

643642
while (num_batches--) {
644643
perfect_4_pixels_batch_counter = perfect_4_pixels;

0 commit comments

Comments
 (0)