Get transform compiling on SDL3

ankith26 · ankith26 · commit 030d62cfdaa5 · 2025-02-26T12:23:22.000+05:30
diff --git a/src_c/_pygame.h b/src_c/_pygame.h
@@ -84,7 +84,7 @@
 #define PG_SurfaceHasRLE SDL_SurfaceHasRLE
 
 #define PG_SoftStretchNearest(src, srcrect, dst, dstrect) \
-    SDL_SoftStretch(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
+    SDL_StretchSurface(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
 
 /* Emulating SDL2 SDL_LockMutex API. In SDL3, it returns void. */
 static inline int
diff --git a/src_c/meson.build b/src_c/meson.build
@@ -182,8 +182,6 @@ image = py.extension_module(
     subdir: pg,
 )
 
-# TODO: support SDL3
-if sdl_api != 3
 simd_transform_avx2 = static_library(
     'simd_transform_avx2',
     'simd_transform_avx2.c',
@@ -224,7 +222,6 @@ transform = py.extension_module(
     install: true,
     subdir: pg,
 )
-endif
 
 mask = py.extension_module(
     'mask',
diff --git a/src_c/simd_transform.h b/src_c/simd_transform.h
@@ -1,6 +1,24 @@
 #define NO_PYGAME_C_API
 #include "_surface.h"
 
+/* TODO: This compat code should probably go in some place like simd_shared.h
+ * That header file however is inconsistently used at the moment and not
+ * included wherever it should be.
+ * this block will be needed by simd_blitters and simd_fill */
+
+#if PG_SDL3
+// SDL3 no longer includes intrinsics by default, we need to do it explicitly
+#include <SDL3/SDL_intrin.h>
+
+/* If SDL_AVX2_INTRINSICS is defined by SDL3, we need to set macros that our
+ * code checks for avx2 build time support */
+#ifdef SDL_AVX2_INTRINSICS
+#ifndef HAVE_IMMINTRIN_H
+#define HAVE_IMMINTRIN_H 1
+#endif /* HAVE_IMMINTRIN_H*/
+#endif /* SDL_AVX2_INTRINSICS*/
+#endif /* PG_SDL3 */
+
 /**
  * MACRO borrowed from SSE2NEON - useful for making the shuffling family of
  * intrinsics easier to understand by indicating clearly what will go where.
@@ -26,7 +44,8 @@
 #if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)
 
 void
-grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf);
+grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt,
+               SDL_Surface *newsurf);
 // smoothscale filters
 void
 filter_shrink_X_SSE2(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch,
@@ -41,12 +60,13 @@ void
 filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
                      int dstpitch, int srcheight, int dstheight);
 void
-invert_sse2(SDL_Surface *src, SDL_Surface *newsurf);
+invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);
 
 #endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */
 
 // AVX2 functions
 void
-grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf);
+grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt,
+               SDL_Surface *newsurf);
 void
-invert_avx2(SDL_Surface *src, SDL_Surface *newsurf);
+invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);
diff --git a/src_c/simd_transform_avx2.c b/src_c/simd_transform_avx2.c
@@ -46,7 +46,7 @@ pg_avx2_at_runtime_but_uncompiled()
 #if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
     !defined(SDL_DISABLE_IMMINTRIN_H)
 void
-grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     /* See the SSE2 code for a simpler overview of this algorithm
      * Current AVX2 process
@@ -85,12 +85,11 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
     Uint32 *srcp = (Uint32 *)src->pixels;
     Uint32 *dstp = (Uint32 *)newsurf->pixels;
 
-    Uint32 amask = src->format->Amask;
+    Uint32 amask = src_fmt->Amask;
     Uint32 rgbmask = ~amask;
 
-    int rgb_weights =
-        ((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
-         (0x1D << src->format->Bshift));
+    int rgb_weights = ((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
+                       (0x1D << src_fmt->Bshift));
 
     __m256i *srcp256 = (__m256i *)src->pixels;
     __m256i *dstp256 = (__m256i *)newsurf->pixels;
@@ -216,7 +215,7 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
 }
 
 void
-invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     int s_row_skip = (src->pitch - src->w * 4) / 4;
 
@@ -237,7 +236,7 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
     Uint32 *srcp = (Uint32 *)src->pixels;
     Uint32 *dstp = (Uint32 *)newsurf->pixels;
 
-    Uint32 amask = src->format->Amask;
+    Uint32 amask = src_fmt->Amask;
     Uint32 rgbmask = ~amask;
 
     __m256i *srcp256 = (__m256i *)src->pixels;
@@ -300,12 +299,12 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
 }
 #else
 void
-grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     BAD_AVX2_FUNCTION_CALL;
 }
 void
-invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     BAD_AVX2_FUNCTION_CALL;
 }
diff --git a/src_c/simd_transform_sse2.c b/src_c/simd_transform_sse2.c
@@ -423,7 +423,7 @@ filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
 }
 
 void
-grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
+grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     /* For the SSE2 SIMD version of grayscale we do one pixel at a time
      * Thus we can calculate the number of loops (and pixels) by multiplying
@@ -459,16 +459,15 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
     Uint32 *srcp = (Uint32 *)src->pixels;
     Uint32 *dstp = (Uint32 *)newsurf->pixels;
 
-    Uint64 amask64 = ((Uint64)src->format->Amask) | src->format->Amask;
+    Uint64 amask64 = ((Uint64)src_fmt->Amask) | src_fmt->Amask;
     Uint64 rgbmask64 = ~amask64;
 
     Uint64 rgb_weights =
-        ((Uint64)((0x4C << src->format->Rshift) |
-                  (0x96 << src->format->Gshift) |
-                  (0x1D << src->format->Bshift))
+        ((Uint64)((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
+                  (0x1D << src_fmt->Bshift))
          << 32) |
-        ((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
-         (0x1D << src->format->Bshift));
+        ((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
+         (0x1D << src_fmt->Bshift));
 
     Uint64 *srcp64 = (Uint64 *)src->pixels;
     Uint64 *dstp64 = (Uint64 *)newsurf->pixels;
@@ -612,7 +611,7 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
 }
 
 void
-invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
+invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     int s_row_skip = (src->pitch - src->w * 4) / 4;
 
@@ -637,8 +636,8 @@ invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
     __m128i *srcp128 = (__m128i *)src->pixels;
     __m128i *dstp128 = (__m128i *)newsurf->pixels;
 
-    mm_rgb_invert_mask = _mm_set1_epi32(~src->format->Amask);
-    mm_alpha_mask = _mm_set1_epi32(src->format->Amask);
+    mm_rgb_invert_mask = _mm_set1_epi32(~src_fmt->Amask);
+    mm_alpha_mask = _mm_set1_epi32(src_fmt->Amask);
 
     while (num_batches--) {
         perfect_4_pixels_batch_counter = perfect_4_pixels;
diff --git a/src_c/transform.c b/src_c/transform.c

Original file line number	Diff line number	Diff line change
`@@ -182,8 +182,6 @@ image = py.extension_module(`
`182`	`182`	`subdir: pg,`
`183`	`183`	`)`
`184`	`184`
`185`		`-# TODO: support SDL3`
`186`		`-if sdl_api != 3`
`187`	`185`	`simd_transform_avx2 = static_library(`
`188`	`186`	`'simd_transform_avx2',`
`189`	`187`	`'simd_transform_avx2.c',`
`@@ -224,7 +222,6 @@ transform = py.extension_module(`
`224`	`222`	`install: true,`
`225`	`223`	`subdir: pg,`
`226`	`224`	`)`
`227`		`-endif`
`228`	`225`
`229`	`226`	`mask = py.extension_module(`
`230`	`227`	`'mask',`