diff --git a/CMakeLists.txt b/CMakeLists.txt
index c518d85e..1740fae5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -279,10 +279,10 @@ else()
     add_compile_options(-march=armv8-a)
     add_compile_options(-fno-lax-vector-conversions)
   elseif(ARCH STREQUAL "riscv64")
-    add_compile_options(-march=rv64gc)
+    add_compile_options(-march=rv64gcv)
     add_compile_options(-mabi=lp64d)
   elseif(ARCH STREQUAL "riscv32")
-    add_compile_options(-march=rv32gc)
+    add_compile_options(-march=rv32gcv)
     add_compile_options(-mabi=ilp32d)
   elseif(ARCH STREQUAL "loong64")
     add_compile_options(-march=loongarch64)
@@ -553,6 +553,14 @@ if(UHDR_ENABLE_INTRINSICS)
     file(GLOB UHDR_CORE_NEON_SRCS_LIST "${SOURCE_DIR}/src/dsp/arm/*.cpp")
     list(APPEND UHDR_CORE_SRCS_LIST ${UHDR_CORE_NEON_SRCS_LIST})
   endif()
+  if(ARCH STREQUAL "riscv64")
+    file(GLOB UHDR_CORE_RVV_SRCS_LIST "${SOURCE_DIR}/src/dsp/riscv/*.cpp")
+    list(APPEND UHDR_CORE_SRCS_LIST ${UHDR_CORE_RVV_SRCS_LIST})
+  endif()
+  if(ARCH STREQUAL "riscv32")
+    file(GLOB UHDR_CORE_RVV_SRCS_LIST "${SOURCE_DIR}/src/dsp/riscv/*.cpp")
+    list(APPEND UHDR_CORE_SRCS_LIST ${UHDR_CORE_RVV_SRCS_LIST})
+  endif()
 endif()
 if(UHDR_ENABLE_GLES)
   file(GLOB UHDR_CORE_GLES_SRCS_LIST "${SOURCE_DIR}/src/gpu/*.cpp")
diff --git a/lib/include/ultrahdr/gainmapmath.h b/lib/include/ultrahdr/gainmapmath.h
index d604ad2b..f88a9a30 100644
--- a/lib/include/ultrahdr/gainmapmath.h
+++ b/lib/include/ultrahdr/gainmapmath.h
@@ -414,14 +414,16 @@ extern const std::array<float, 9> kYuvBt601ToBt2100;
 extern const std::array<float, 9> kYuvBt2100ToBt709;
 extern const std::array<float, 9> kYuvBt2100ToBt601;
 
-#if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
+#ifdef UHDR_ENABLE_INTRINSICS
+
+extern const int16_t kYuv709To601_coeffs_simd[8];
+extern const int16_t kYuv709To2100_coeffs_simd[8];
+extern const int16_t kYuv601To709_coeffs_simd[8];
+extern const int16_t kYuv601To2100_coeffs_simd[8];
+extern const int16_t kYuv2100To709_coeffs_simd[8];
+extern const int16_t kYuv2100To601_coeffs_simd[8];
 
-extern const int16_t kYuv709To601_coeffs_neon[8];
-extern const int16_t kYuv709To2100_coeffs_neon[8];
-extern const int16_t kYuv601To709_coeffs_neon[8];
-extern const int16_t kYuv601To2100_coeffs_neon[8];
-extern const int16_t kYuv2100To709_coeffs_neon[8];
-extern const int16_t kYuv2100To601_coeffs_neon[8];
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
 
 /*
  * The Y values are provided at half the width of U & V values to allow use of the widening
@@ -435,6 +437,15 @@ void transformYuv444_neon(uhdr_raw_image_t* image, const int16_t* coeffs_ptr);
 
 uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t src_encoding,
                                   uhdr_color_gamut_t dst_encoding);
+
+#elif defined(__riscv_v_intrinsic)
+
+void transformYuv420_rvv(uhdr_raw_image_t* image, const int16_t* coeffs_ptr);
+
+uhdr_error_info_t convertYuv_rvv(uhdr_raw_image_t* image, uhdr_color_gamut_t src_encoding,
+                                  uhdr_color_gamut_t dst_encoding);
+
+#endif
 #endif
 
 // Performs a color gamut transformation on an yuv image.
diff --git a/lib/src/dsp/arm/gainmapmath_neon.cpp b/lib/src/dsp/arm/gainmapmath_neon.cpp
index 306a971a..40b07b39 100644
--- a/lib/src/dsp/arm/gainmapmath_neon.cpp
+++ b/lib/src/dsp/arm/gainmapmath_neon.cpp
@@ -27,55 +27,6 @@
 
 namespace ultrahdr {
 
-// Scale all coefficients by 2^14 to avoid needing floating-point arithmetic. This can cause an off
-// by one error compared to the scalar floating-point implementation.
-
-// Removing conversion coefficients 1 and 0 from the group for each standard leaves 6 coefficients.
-// Pack them into a single 128-bit vector as follows, zeroing the remaining elements:
-// {Y1, Y2, U1, U2, V1, V2, 0, 0}
-
-// Yuv Bt709 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.101579f * U) + ( 0.196076f * V)
-// U' = (0.0f * Y) + ( 0.989854f * U) + (-0.110653f * V)
-// V' = (0.0f * Y) + (-0.072453f * U) + ( 0.983398f * V)
-ALIGNED(16)
-const int16_t kYuv709To601_coeffs_neon[8] = {1664, 3213, 16218, -1813, -1187, 16112, 0, 0};
-
-// Yuv Bt709 -> Yuv Bt2100
-// Y' = (1.0f * Y) + (-0.016969f * U) + ( 0.096312f * V)
-// U' = (0.0f * Y) + ( 0.995306f * U) + (-0.051192f * V)
-// V' = (0.0f * Y) + ( 0.011507f * U) + ( 1.002637f * V)
-ALIGNED(16)
-const int16_t kYuv709To2100_coeffs_neon[8] = {-278, 1578, 16307, -839, 189, 16427, 0, 0};
-
-// Yuv Bt601 -> Yuv Bt709
-// Y' = (1.0f * Y) + (-0.118188f * U) + (-0.212685f * V),
-// U' = (0.0f * Y) + ( 1.018640f * U) + ( 0.114618f * V),
-// V' = (0.0f * Y) + ( 0.075049f * U) + ( 1.025327f * V);
-ALIGNED(16)
-const int16_t kYuv601To709_coeffs_neon[8] = {-1936, -3485, 16689, 1878, 1230, 16799, 0, 0};
-
-// Yuv Bt601 -> Yuv Bt2100
-// Y' = (1.0f * Y) + (-0.128245f * U) + (-0.115879f * V)
-// U' = (0.0f * Y) + ( 1.010016f * U) + ( 0.061592f * V)
-// V' = (0.0f * Y) + ( 0.086969f * U) + ( 1.029350f * V)
-ALIGNED(16)
-const int16_t kYuv601To2100_coeffs_neon[8] = {-2101, -1899, 16548, 1009, 1425, 16865, 0, 0};
-
-// Yuv Bt2100 -> Yuv Bt709
-// Y' = (1.0f * Y) + ( 0.018149f * U) + (-0.095132f * V)
-// U' = (0.0f * Y) + ( 1.004123f * U) + ( 0.051267f * V)
-// V' = (0.0f * Y) + (-0.011524f * U) + ( 0.996782f * V)
-ALIGNED(16)
-const int16_t kYuv2100To709_coeffs_neon[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
-
-// Yuv Bt2100 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.117887f * U) + ( 0.105521f * V)
-// U' = (0.0f * Y) + ( 0.995211f * U) + (-0.059549f * V)
-// V' = (0.0f * Y) + (-0.084085f * U) + ( 0.976518f * V)
-ALIGNED(16)
-const int16_t kYuv2100To601_coeffs_neon[8] = {1931, 1729, 16306, -976, -1378, 15999, 0, 0};
-
 static inline int16x8_t yConversion_neon(uint8x8_t y, int16x8_t u, int16x8_t v, int16x8_t coeffs) {
   int32x4_t lo = vmull_lane_s16(vget_low_s16(u), vget_low_s16(coeffs), 0);
   int32x4_t hi = vmull_lane_s16(vget_high_s16(u), vget_low_s16(coeffs), 0);
@@ -244,10 +195,10 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
         case UHDR_CG_BT_709:
           return status;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv709To601_coeffs_neon;
+          coeffs = kYuv709To601_coeffs_simd;
           break;
         case UHDR_CG_BT_2100:
-          coeffs = kYuv709To2100_coeffs_neon;
+          coeffs = kYuv709To2100_coeffs_simd;
           break;
         default:
           status.error_code = UHDR_CODEC_INVALID_PARAM;
@@ -260,12 +211,12 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
     case UHDR_CG_DISPLAY_P3:
       switch (dst_encoding) {
         case UHDR_CG_BT_709:
-          coeffs = kYuv601To709_coeffs_neon;
+          coeffs = kYuv601To709_coeffs_simd;
           break;
         case UHDR_CG_DISPLAY_P3:
           return status;
         case UHDR_CG_BT_2100:
-          coeffs = kYuv601To2100_coeffs_neon;
+          coeffs = kYuv601To2100_coeffs_simd;
           break;
         default:
           status.error_code = UHDR_CODEC_INVALID_PARAM;
@@ -278,10 +229,10 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
     case UHDR_CG_BT_2100:
       switch (dst_encoding) {
         case UHDR_CG_BT_709:
-          coeffs = kYuv2100To709_coeffs_neon;
+          coeffs = kYuv2100To709_coeffs_simd;
           break;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv2100To601_coeffs_neon;
+          coeffs = kYuv2100To601_coeffs_simd;
           break;
         case UHDR_CG_BT_2100:
           return status;
@@ -328,21 +279,21 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
 // U = -0.114592135 * R + -0.385407865 * G + 0.5 * B
 // V = 0.5 * R + -0.454155718 * G + -0.045844282 * B
 ALIGNED(16)
-const uint16_t kRgb709ToYuv_coeffs_neon[8] = {3484, 11717, 1183, 1877, 6315, 8192, 7441, 751};
+const uint16_t kRgb709ToYuv_coeffs_simd[8] = {3484, 11717, 1183, 1877, 6315, 8192, 7441, 751};
 
 // RGB Display P3 -> Yuv Display P3
 // Y = 0.2289746 * R + 0.6917385 * G + 0.0792869 * B
 // U = -0.124346335 * R + -0.375653665 * G + 0.5 * B
 // V = 0.5 * R + -0.448583471 * G + -0.051416529 * B
 ALIGNED(16)
-const uint16_t kRgbDispP3ToYuv_coeffs_neon[8] = {3752, 11333, 1299, 2037, 6155, 8192, 7350, 842};
+const uint16_t kRgbDispP3ToYuv_coeffs_simd[8] = {3752, 11333, 1299, 2037, 6155, 8192, 7350, 842};
 
 // RGB Bt2100 -> Yuv Bt2100
 // Y = 0.2627 * R + 0.677998 * G + 0.059302 * B
 // U = -0.13963036 * R + -0.36036964 * G + 0.5 * B
 // V = 0.5 * R + -0.459784348 * G + -0.040215652 * B
 ALIGNED(16)
-const uint16_t kRgb2100ToYuv_coeffs_neon[8] = {4304, 11108, 972, 2288, 5904, 8192, 7533, 659};
+const uint16_t kRgb2100ToYuv_coeffs_simd[8] = {4304, 11108, 972, 2288, 5904, 8192, 7533, 659};
 
 // The core logic is taken from jsimd_rgb_ycc_convert_neon implementation in jccolext-neon.c of
 // libjpeg-turbo
@@ -460,11 +411,11 @@ std::unique_ptr<uhdr_raw_image_ext_t> convert_raw_input_to_ycbcr_neon(uhdr_raw_i
     const uint16_t* coeffs_ptr = nullptr;
 
     if (src->cg == UHDR_CG_BT_709) {
-      coeffs_ptr = kRgb709ToYuv_coeffs_neon;
+      coeffs_ptr = kRgb709ToYuv_coeffs_simd;
     } else if (src->cg == UHDR_CG_BT_2100) {
-      coeffs_ptr = kRgbDispP3ToYuv_coeffs_neon;
+      coeffs_ptr = kRgbDispP3ToYuv_coeffs_simd;
     } else if (src->cg == UHDR_CG_DISPLAY_P3) {
-      coeffs_ptr = kRgb2100ToYuv_coeffs_neon;
+      coeffs_ptr = kRgb2100ToYuv_coeffs_simd;
     } else {
       return dst;
     }
diff --git a/lib/src/dsp/riscv/gainmapmath_rvv.cpp b/lib/src/dsp/riscv/gainmapmath_rvv.cpp
new file mode 100644
index 00000000..8875c4d3
--- /dev/null
+++ b/lib/src/dsp/riscv/gainmapmath_rvv.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ultrahdr/gainmapmath.h"
+#include <riscv_vector.h>
+#include <cassert>
+
+namespace ultrahdr {
+
+static inline vuint16m8_t zip_self(vuint16m4_t a, size_t vl) {
+  vuint32m8_t a_wide = __riscv_vzext_vf2_u32m8(a, vl / 2);
+  vuint16m8_t a_zero = __riscv_vreinterpret_v_u32m8_u16m8(a_wide);
+  vuint16m8_t a_zero_slide = __riscv_vslide1up_vx_u16m8(a_zero, 0, vl);
+  vuint16m8_t a_zip = __riscv_vadd_vv_u16m8(a_zero, a_zero_slide, vl);
+  return a_zip;
+}
+
+static inline vint16m4_t vqrshrn_n_s32(vint32m8_t a, const int b, size_t vl) {
+  return __riscv_vnclip_wx_i16m4(a, b, vl);
+}
+
+static inline vuint8m4_t vget_low_u8(vuint8m8_t u) { return __riscv_vget_v_u8m8_u8m4(u, 0); }
+
+static inline vuint8m4_t vget_high_u8(vuint8m8_t u, size_t vl) {
+  return __riscv_vget_v_u8m8_u8m4(__riscv_vslidedown_vx_u8m8(u, vl / 2, vl), 0);
+}
+
+static inline vint16m4_t vget_low_s16(vint16m8_t u) { return __riscv_vget_v_i16m8_i16m4(u, 0); }
+
+static inline vint16m4_t vget_high_s16(vint16m8_t u, size_t vl) {
+  return __riscv_vget_v_i16m8_i16m4(__riscv_vslidedown_vx_i16m8(u, vl / 2, vl), 0);
+}
+
+static inline vuint16m4_t vget_low_u16(vuint16m8_t u) { return __riscv_vget_v_u16m8_u16m4(u, 0); }
+
+static inline vuint16m4_t vget_high_u16(vuint16m8_t u, size_t vl) {
+  return __riscv_vget_v_u16m8_u16m4(__riscv_vslidedown_vx_u16m8(u, vl / 2, vl), 0);
+}
+
+static inline vint16m8_t vcombine_s16(vint16m4_t a, vint16m4_t b, size_t vl) {
+  vint16m8_t a_wide = __riscv_vlmul_ext_v_i16m4_i16m8(a);
+  vint16m8_t b_wide = __riscv_vlmul_ext_v_i16m4_i16m8(b);
+  return __riscv_vslideup_vx_i16m8(a_wide, b_wide, vl / 2, vl);
+}
+
+static inline vuint8m8_t vcombine_u8(vuint8m4_t a, vuint8m4_t b, size_t vl) {
+  vuint8m8_t a_wide = __riscv_vlmul_ext_v_u8m4_u8m8(a);
+  vuint8m8_t b_wide = __riscv_vlmul_ext_v_u8m4_u8m8(b);
+  return __riscv_vslideup_vx_u8m8(a_wide, b_wide, vl / 2, vl);
+}
+
+static inline vuint8m4_t vqmovun_s16(vint16m8_t a, size_t vl) {
+  vuint16m8_t a_non_neg = __riscv_vreinterpret_v_i16m8_u16m8(__riscv_vmax_vx_i16m8(a, 0, vl));
+  return __riscv_vnclipu_wx_u8m4(a_non_neg, 0, vl);
+}
+
+static inline vint16m8_t yConversion_rvv(vuint8m4_t y, vint16m8_t u, vint16m8_t v,
+                                         const int16_t* coeffs, size_t vl) {
+  vint32m8_t u_lo = __riscv_vwmul_vx_i32m8(vget_low_s16(u), coeffs[0], vl / 2);
+  vint32m8_t u_hi = __riscv_vwmul_vx_i32m8(vget_high_s16(u, vl), coeffs[0], vl / 2);
+
+  vint32m8_t v_lo = __riscv_vwmul_vx_i32m8(vget_low_s16(v), coeffs[1], vl / 2);
+  vint32m8_t v_hi = __riscv_vwmul_vx_i32m8(vget_high_s16(v, vl), coeffs[1], vl / 2);
+
+  vint32m8_t lo = __riscv_vadd_vv_i32m8(u_lo, v_lo, vl / 2);
+  vint32m8_t hi = __riscv_vadd_vv_i32m8(u_hi, v_hi, vl / 2);
+
+  vint16m4_t lo_shr = vqrshrn_n_s32(lo, 14, vl / 2);
+  vint16m4_t hi_shr = vqrshrn_n_s32(hi, 14, vl / 2);
+
+  vint16m8_t y_output = vcombine_s16(lo_shr, hi_shr, vl);
+  vuint16m8_t y_u16 = __riscv_vreinterpret_v_i16m8_u16m8(y_output);
+  vuint16m8_t y_ret = __riscv_vwaddu_wv_u16m8(y_u16, y, vl);
+  return __riscv_vreinterpret_v_u16m8_i16m8(y_ret);
+}
+
+static inline vint16m8_t uConversion_rvv(vint16m8_t u, vint16m8_t v, const int16_t* coeffs,
+                                         size_t vl) {
+  vint32m8_t u_lo = __riscv_vwmul_vx_i32m8(vget_low_s16(u), coeffs[2], vl / 2);
+  vint32m8_t u_hi = __riscv_vwmul_vx_i32m8(vget_high_s16(u, vl), coeffs[2], vl / 2);
+
+  vint32m8_t v_lo = __riscv_vwmul_vx_i32m8(vget_low_s16(v), coeffs[3], vl / 2);
+  vint32m8_t v_hi = __riscv_vwmul_vx_i32m8(vget_high_s16(v, vl), coeffs[3], vl / 2);
+
+  vint32m8_t lo = __riscv_vadd_vv_i32m8(u_lo, v_lo, vl / 2);
+  vint32m8_t hi = __riscv_vadd_vv_i32m8(u_hi, v_hi, vl / 2);
+
+  vint16m4_t lo_shr = vqrshrn_n_s32(lo, 14, vl / 2);
+  vint16m4_t hi_shr = vqrshrn_n_s32(hi, 14, vl / 2);
+
+  vint16m8_t u_output = vcombine_s16(lo_shr, hi_shr, vl);
+  return u_output;
+}
+
+static inline vint16m8_t vConversion_rvv(vint16m8_t u, vint16m8_t v, const int16_t* coeffs,
+                                         size_t vl) {
+  vint32m8_t u_lo = __riscv_vwmul_vx_i32m8(vget_low_s16(u), coeffs[4], vl / 2);
+  vint32m8_t u_hi = __riscv_vwmul_vx_i32m8(vget_high_s16(u, vl), coeffs[4], vl / 2);
+
+  vint32m8_t v_lo = __riscv_vwmul_vx_i32m8(vget_low_s16(v), coeffs[5], vl / 2);
+  vint32m8_t v_hi = __riscv_vwmul_vx_i32m8(vget_high_s16(v, vl), coeffs[5], vl / 2);
+
+  vint32m8_t lo = __riscv_vadd_vv_i32m8(u_lo, v_lo, vl / 2);
+  vint32m8_t hi = __riscv_vadd_vv_i32m8(u_hi, v_hi, vl / 2);
+
+  vint16m4_t lo_shr = vqrshrn_n_s32(lo, 14, vl / 2);
+  vint16m4_t hi_shr = vqrshrn_n_s32(hi, 14, vl / 2);
+
+  vint16m8_t v_output = vcombine_s16(lo_shr, hi_shr, vl);
+  return v_output;
+}
+
+void transformYuv420_rvv(uhdr_raw_image_t* image, const int16_t* coeffs_ptr) {
+  assert(image->w % 16 == 0);
+  uint8_t* y0_ptr = static_cast<uint8_t*>(image->planes[UHDR_PLANE_Y]);
+  uint8_t* y1_ptr = y0_ptr + image->stride[UHDR_PLANE_Y];
+  uint8_t* u_ptr = static_cast<uint8_t*>(image->planes[UHDR_PLANE_U]);
+  uint8_t* v_ptr = static_cast<uint8_t*>(image->planes[UHDR_PLANE_V]);
+  size_t vl;
+  size_t h = 0;
+  do {
+    size_t w = 0;
+    do {
+      vl = __riscv_vsetvl_e8m8((image->w) - w);
+      assert((vl % 4) == 0 && vl >= 4);
+
+      vuint8m8_t y0 = __riscv_vle8_v_u8m8(y0_ptr + w * 2, vl);
+      vuint8m8_t y1 = __riscv_vle8_v_u8m8(y1_ptr + w * 2, vl);
+
+      vuint8m4_t u8 = __riscv_vle8_v_u8m4(u_ptr + w, vl / 2);
+      vuint8m4_t v8 = __riscv_vle8_v_u8m4(v_ptr + w, vl / 2);
+
+      vuint16m8_t u16_wide = __riscv_vwsubu_vx_u16m8(u8, 128, vl / 2);
+      vuint16m8_t v16_wide = __riscv_vwsubu_vx_u16m8(v8, 128, vl / 2);
+
+      vuint16m8_t uu_wide_lo = zip_self(__riscv_vget_v_u16m8_u16m4(u16_wide, 0), vl / 2);
+      vuint16m8_t uu_wide_hi = zip_self(vget_high_u16(u16_wide, vl / 2), vl / 2);
+      vuint16m8_t uv_wide_lo = zip_self(__riscv_vget_v_u16m8_u16m4(v16_wide, 0), vl / 2);
+      vuint16m8_t uv_wide_hi = zip_self(vget_high_u16(v16_wide, vl / 2), vl / 2);
+
+      vint16m8_t u_wide_lo = __riscv_vreinterpret_v_u16m8_i16m8(uu_wide_lo);
+      vint16m8_t v_wide_lo = __riscv_vreinterpret_v_u16m8_i16m8(uv_wide_lo);
+      vint16m8_t u_wide_hi = __riscv_vreinterpret_v_u16m8_i16m8(uu_wide_hi);
+      vint16m8_t v_wide_hi = __riscv_vreinterpret_v_u16m8_i16m8(uv_wide_hi);
+
+      vint16m8_t y0_lo = yConversion_rvv(vget_low_u8(y0), u_wide_lo, v_wide_lo, coeffs_ptr, vl / 2);
+      vint16m8_t y1_lo = yConversion_rvv(vget_low_u8(y1), u_wide_lo, v_wide_lo, coeffs_ptr, vl / 2);
+      vint16m8_t y0_hi =
+          yConversion_rvv(vget_high_u8(y0, vl / 2), u_wide_hi, v_wide_hi, coeffs_ptr, vl / 2);
+      vint16m8_t y1_hi =
+          yConversion_rvv(vget_high_u8(y1, vl / 2), u_wide_hi, v_wide_hi, coeffs_ptr, vl / 2);
+
+      vint16m8_t u_wide_s16 = __riscv_vreinterpret_v_u16m8_i16m8(u16_wide);
+      vint16m8_t v_wide_s16 = __riscv_vreinterpret_v_u16m8_i16m8(v16_wide);
+      vint16m8_t new_u = uConversion_rvv(u_wide_s16, v_wide_s16, coeffs_ptr, vl / 2);
+      vint16m8_t new_v = vConversion_rvv(u_wide_s16, v_wide_s16, coeffs_ptr, vl / 2);
+
+      vuint8m8_t y0_output =
+          vcombine_u8(vqmovun_s16(y0_lo, vl / 2), vqmovun_s16(y0_hi, vl / 2), vl);
+      vuint8m8_t y1_output =
+          vcombine_u8(vqmovun_s16(y1_lo, vl / 2), vqmovun_s16(y1_hi, vl / 2), vl);
+      vuint8m4_t u_output = vqmovun_s16(__riscv_vadd_vx_i16m8(new_u, 128, vl / 2), vl / 2);
+      vuint8m4_t v_output = vqmovun_s16(__riscv_vadd_vx_i16m8(new_v, 128, vl / 2), vl / 2);
+
+      __riscv_vse8_v_u8m8(y0_ptr + w * 2, y0_output, vl);
+      __riscv_vse8_v_u8m8(y1_ptr + w * 2, y1_output, vl);
+      __riscv_vse8_v_u8m4(u_ptr + w, u_output, vl / 2);
+      __riscv_vse8_v_u8m4(v_ptr + w, v_output, vl / 2);
+
+      w += (vl / 2);
+    } while (w < image->w / 2);
+    y0_ptr += image->stride[UHDR_PLANE_Y] * 2;
+    y1_ptr += image->stride[UHDR_PLANE_Y] * 2;
+    u_ptr += image->stride[UHDR_PLANE_U];
+    v_ptr += image->stride[UHDR_PLANE_V];
+  } while (++h < image->h / 2);
+}
+
+uhdr_error_info_t convertYuv_rvv(uhdr_raw_image_t* image, uhdr_color_gamut_t src_encoding,
+                                 uhdr_color_gamut_t dst_encoding) {
+  uhdr_error_info_t status = g_no_error;
+  const int16_t* coeffs = nullptr;
+
+  switch (src_encoding) {
+    case UHDR_CG_BT_709:
+      switch (dst_encoding) {
+        case UHDR_CG_BT_709:
+          return status;
+        case UHDR_CG_DISPLAY_P3:
+          coeffs = kYuv709To601_coeffs_simd;
+          break;
+        case UHDR_CG_BT_2100:
+          coeffs = kYuv709To2100_coeffs_simd;
+          break;
+        default:
+          status.error_code = UHDR_CODEC_INVALID_PARAM;
+          status.has_detail = 1;
+          snprintf(status.detail, sizeof status.detail, "Unrecognized dest color gamut %d",
+                   dst_encoding);
+          return status;
+      }
+      break;
+    case UHDR_CG_DISPLAY_P3:
+      switch (dst_encoding) {
+        case UHDR_CG_BT_709:
+          coeffs = kYuv601To709_coeffs_simd;
+          break;
+        case UHDR_CG_DISPLAY_P3:
+          return status;
+        case UHDR_CG_BT_2100:
+          coeffs = kYuv601To2100_coeffs_simd;
+          break;
+        default:
+          status.error_code = UHDR_CODEC_INVALID_PARAM;
+          status.has_detail = 1;
+          snprintf(status.detail, sizeof status.detail, "Unrecognized dest color gamut %d",
+                   dst_encoding);
+          return status;
+      }
+      break;
+    case UHDR_CG_BT_2100:
+      switch (dst_encoding) {
+        case UHDR_CG_BT_709:
+          coeffs = kYuv2100To709_coeffs_simd;
+          break;
+        case UHDR_CG_DISPLAY_P3:
+          coeffs = kYuv2100To601_coeffs_simd;
+          break;
+        case UHDR_CG_BT_2100:
+          return status;
+        default:
+          status.error_code = UHDR_CODEC_INVALID_PARAM;
+          status.has_detail = 1;
+          snprintf(status.detail, sizeof status.detail, "Unrecognized dest color gamut %d",
+                   dst_encoding);
+          return status;
+      }
+      break;
+    default:
+      status.error_code = UHDR_CODEC_INVALID_PARAM;
+      status.has_detail = 1;
+      snprintf(status.detail, sizeof status.detail, "Unrecognized src color gamut %d",
+               src_encoding);
+      return status;
+  }
+
+  if (image->fmt == UHDR_IMG_FMT_12bppYCbCr420) {
+    transformYuv420_rvv(image, coeffs);
+  } else {
+    status.error_code = UHDR_CODEC_UNSUPPORTED_FEATURE;
+    status.has_detail = 1;
+    snprintf(status.detail, sizeof status.detail,
+             "No implementation available for performing gamut conversion for color format %d",
+             image->fmt);
+    return status;
+  }
+
+  return status;
+}
+}  // namespace ultrahdr
diff --git a/lib/src/gainmapmath.cpp b/lib/src/gainmapmath.cpp
index fa56c3e8..2bae4fb3 100644
--- a/lib/src/gainmapmath.cpp
+++ b/lib/src/gainmapmath.cpp
@@ -684,6 +684,63 @@ const std::array<float, 9> kYuvBt2100ToBt709 = {
 const std::array<float, 9> kYuvBt2100ToBt601 = {
     1.0f, 0.117887f, 0.105521f, 0.0f, 0.995211f, -0.059549f, 0.0f, -0.084085f, 0.976518f};
 
+#ifdef UHDR_ENABLE_INTRINSICS
+
+#ifdef _MSC_VER
+#define ALIGNED(x) __declspec(align(x))
+#else
+#define ALIGNED(x) __attribute__((aligned(x)))
+#endif
+// Scale all coefficients by 2^14 to avoid needing floating-point arithmetic. This can cause an off
+// by one error compared to the scalar floating-point implementation.
+
+// Removing conversion coefficients 1 and 0 from the group for each standard leaves 6 coefficients.
+// Pack them into a single 128-bit vector as follows, zeroing the remaining elements:
+// {Y1, Y2, U1, U2, V1, V2, 0, 0}
+
+// Yuv Bt709 -> Yuv Bt601
+// Y' = (1.0f * Y) + ( 0.101579f * U) + ( 0.196076f * V)
+// U' = (0.0f * Y) + ( 0.989854f * U) + (-0.110653f * V)
+// V' = (0.0f * Y) + (-0.072453f * U) + ( 0.983398f * V)
+ALIGNED(16)
+const int16_t kYuv709To601_coeffs_simd[8] = {1664, 3213, 16218, -1813, -1187, 16112, 0, 0};
+
+// Yuv Bt709 -> Yuv Bt2100
+// Y' = (1.0f * Y) + (-0.016969f * U) + ( 0.096312f * V)
+// U' = (0.0f * Y) + ( 0.995306f * U) + (-0.051192f * V)
+// V' = (0.0f * Y) + ( 0.011507f * U) + ( 1.002637f * V)
+ALIGNED(16)
+const int16_t kYuv709To2100_coeffs_simd[8] = {-278, 1578, 16307, -839, 189, 16427, 0, 0};
+
+// Yuv Bt601 -> Yuv Bt709
+// Y' = (1.0f * Y) + (-0.118188f * U) + (-0.212685f * V),
+// U' = (0.0f * Y) + ( 1.018640f * U) + ( 0.114618f * V),
+// V' = (0.0f * Y) + ( 0.075049f * U) + ( 1.025327f * V);
+ALIGNED(16)
+const int16_t kYuv601To709_coeffs_simd[8] = {-1936, -3485, 16689, 1878, 1230, 16799, 0, 0};
+
+// Yuv Bt601 -> Yuv Bt2100
+// Y' = (1.0f * Y) + (-0.128245f * U) + (-0.115879f * V)
+// U' = (0.0f * Y) + ( 1.010016f * U) + ( 0.061592f * V)
+// V' = (0.0f * Y) + ( 0.086969f * U) + ( 1.029350f * V)
+ALIGNED(16)
+const int16_t kYuv601To2100_coeffs_simd[8] = {-2101, -1899, 16548, 1009, 1425, 16865, 0, 0};
+
+// Yuv Bt2100 -> Yuv Bt709
+// Y' = (1.0f * Y) + ( 0.018149f * U) + (-0.095132f * V)
+// U' = (0.0f * Y) + ( 1.004123f * U) + ( 0.051267f * V)
+// V' = (0.0f * Y) + (-0.011524f * U) + ( 0.996782f * V)
+ALIGNED(16)
+const int16_t kYuv2100To709_coeffs_simd[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
+
+// Yuv Bt2100 -> Yuv Bt601
+// Y' = (1.0f * Y) + ( 0.117887f * U) + ( 0.105521f * V)
+// U' = (0.0f * Y) + ( 0.995211f * U) + (-0.059549f * V)
+// V' = (0.0f * Y) + (-0.084085f * U) + ( 0.976518f * V)
+ALIGNED(16)
+const int16_t kYuv2100To601_coeffs_simd[8] = {1931, 1729, 16306, -976, -1378, 15999, 0, 0};
+#endif
+
 Color yuvColorGamutConversion(Color e_gamma, const std::array<float, 9>& coeffs) {
   const float y = e_gamma.y * std::get<0>(coeffs) + e_gamma.u * std::get<1>(coeffs) +
                   e_gamma.v * std::get<2>(coeffs);
diff --git a/lib/src/jpegr.cpp b/lib/src/jpegr.cpp
index 1f83b34d..23c5bb2f 100644
--- a/lib/src/jpegr.cpp
+++ b/lib/src/jpegr.cpp
@@ -264,6 +264,8 @@ uhdr_error_info_t JpegR::encodeJPEGR(uhdr_raw_image_t* hdr_intent, uhdr_raw_imag
   // convert to bt601 YUV encoding for JPEG encode
 #if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
   UHDR_ERR_CHECK(convertYuv_neon(sdr_intent_yuv, sdr_intent_yuv->cg, UHDR_CG_DISPLAY_P3));
+#elif (defined(UHDR_ENABLE_INTRINSICS) && defined(__riscv_v_intrinsic))
+  UHDR_ERR_CHECK(convertYuv_rvv(sdr_intent_yuv, sdr_intent_yuv->cg, UHDR_CG_DISPLAY_P3));
 #else
   UHDR_ERR_CHECK(convertYuv(sdr_intent_yuv, sdr_intent_yuv->cg, UHDR_CG_DISPLAY_P3));
 #endif
diff --git a/tests/gainmapmath_test.cpp b/tests/gainmapmath_test.cpp
index 91d942a8..240b667c 100644
--- a/tests/gainmapmath_test.cpp
+++ b/tests/gainmapmath_test.cpp
@@ -788,12 +788,12 @@ TEST_F(GainMapMathTest, YuvConversionNeon) {
   const std::array<
       std::tuple<const int16_t*, const std::array<Pixel, 5>, const std::array<Pixel, 5>>, 6>
       coeffs_setup_correct{{
-          {kYuv709To601_coeffs_neon, SrgbYuvColors, P3YuvColors},
-          {kYuv709To2100_coeffs_neon, SrgbYuvColors, Bt2100YuvColors},
-          {kYuv601To709_coeffs_neon, P3YuvColors, SrgbYuvColors},
-          {kYuv601To2100_coeffs_neon, P3YuvColors, Bt2100YuvColors},
-          {kYuv2100To709_coeffs_neon, Bt2100YuvColors, SrgbYuvColors},
-          {kYuv2100To601_coeffs_neon, Bt2100YuvColors, P3YuvColors},
+          {kYuv709To601_coeffs_simd, SrgbYuvColors, P3YuvColors},
+          {kYuv709To2100_coeffs_simd, SrgbYuvColors, Bt2100YuvColors},
+          {kYuv601To709_coeffs_simd, P3YuvColors, SrgbYuvColors},
+          {kYuv601To2100_coeffs_simd, P3YuvColors, Bt2100YuvColors},
+          {kYuv2100To709_coeffs_simd, Bt2100YuvColors, SrgbYuvColors},
+          {kYuv2100To601_coeffs_simd, Bt2100YuvColors, P3YuvColors},
       }};
 
   for (const auto& [coeff_ptr, input, expected] : coeffs_setup_correct) {
@@ -954,16 +954,15 @@ TEST_F(GainMapMathTest, TransformYuv420) {
     }
   }
 }
-
-#if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
-TEST_F(GainMapMathTest, TransformYuv420Neon) {
+#ifdef UHDR_ENABLE_INTRINSICS
+TEST_F(GainMapMathTest, TransformYuv420SIMD) {
   const std::array<std::pair<const int16_t*, const std::array<float, 9>>, 6> fixed_floating_coeffs{
-      {{kYuv709To601_coeffs_neon, kYuvBt709ToBt601},
-       {kYuv709To2100_coeffs_neon, kYuvBt709ToBt2100},
-       {kYuv601To709_coeffs_neon, kYuvBt601ToBt709},
-       {kYuv601To2100_coeffs_neon, kYuvBt601ToBt2100},
-       {kYuv2100To709_coeffs_neon, kYuvBt2100ToBt709},
-       {kYuv2100To601_coeffs_neon, kYuvBt2100ToBt601}}};
+      {{kYuv709To601_coeffs_simd, kYuvBt709ToBt601},
+       {kYuv709To2100_coeffs_simd, kYuvBt709ToBt2100},
+       {kYuv601To709_coeffs_simd, kYuvBt601ToBt709},
+       {kYuv601To2100_coeffs_simd, kYuvBt601ToBt2100},
+       {kYuv2100To709_coeffs_simd, kYuvBt2100ToBt709},
+       {kYuv2100To601_coeffs_simd, kYuvBt2100ToBt601}}};
 
   for (const auto& [neon_coeffs_ptr, floating_point_coeffs] : fixed_floating_coeffs) {
     uhdr_raw_image_t input = Yuv420Image32x4();
@@ -980,8 +979,14 @@ TEST_F(GainMapMathTest, TransformYuv420Neon) {
     output.planes[UHDR_PLANE_Y] = luma;
     output.planes[UHDR_PLANE_U] = cb;
     output.planes[UHDR_PLANE_V] = cr;
-
+    
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
     transformYuv420_neon(&output, neon_coeffs_ptr);
+#elif defined(__riscv_v_intrinsic)
+    transformYuv420_rvv(&output, neon_coeffs_ptr);
+#else
+    return;
+#endif
 
     for (size_t y = 0; y < input.h / 2; ++y) {
       for (size_t x = 0; x < input.w / 2; ++x) {
@@ -1014,11 +1019,17 @@ TEST_F(GainMapMathTest, TransformYuv420Neon) {
 
         // Due to the Neon version using a fixed-point approximation, this can result in an off by
         // one error compared with the standard floating-point version.
+#if defined(__riscv_v_intrinsic)
+        EXPECT_NEAR(expect_y1, out1.y, 2);
+        EXPECT_NEAR(expect_y2, out2.y, 2);
+        EXPECT_NEAR(expect_y3, out3.y, 2);
+        EXPECT_NEAR(expect_y4, out4.y, 2);
+#else
         EXPECT_NEAR(expect_y1, out1.y, 1);
         EXPECT_NEAR(expect_y2, out2.y, 1);
         EXPECT_NEAR(expect_y3, out3.y, 1);
         EXPECT_NEAR(expect_y4, out4.y, 1);
-
+#endif
         EXPECT_NEAR(expect_u, out1.u, 1);
         EXPECT_NEAR(expect_u, out2.u, 1);
         EXPECT_NEAR(expect_u, out3.u, 1);
@@ -1678,5 +1689,4 @@ TEST_F(GainMapMathTest, ApplyMap) {
   EXPECT_RGB_EQ(Recover(YuvWhite(), 0.25f, &metadata), RgbWhite());
   EXPECT_RGB_EQ(Recover(YuvWhite(), 0.0f, &metadata), RgbWhite() / 2.0f);
 }
-
 }  // namespace ultrahdr