@@ -35,47 +35,68 @@ namespace ultrahdr {
3535// {Y1, Y2, U1, U2, V1, V2, 0, 0}
3636
3737// Yuv Bt709 -> Yuv Bt601
38- // Y' = (1.0f * Y) + ( 0.101579f * U) + ( 0.196076f * V)
39- // U' = (0.0f * Y) + ( 0.989854f * U) + (-0.110653f * V)
40- // V' = (0.0f * Y) + (-0.072453f * U) + ( 0.983398f * V)
38+ // Y' = (1.0 * Y) + ( 0.101579 * U) + ( 0.196076 * V)
39+ // U' = (0.0 * Y) + ( 0.989854 * U) + (-0.110653 * V)
40+ // V' = (0.0 * Y) + (-0.072453 * U) + ( 0.983398 * V)
4141ALIGNED (16 )
4242const int16_t kYuv709To601_coeffs_neon [8 ] = {1664 , 3213 , 16218 , -1813 , -1187 , 16112 , 0 , 0 };
4343
44+ // Yuv Bt709 -> Display P3
45+ // Y' = (1.0 * Y) + ( 0.017545 * U) + ( 0.03677 * V)
46+ // U' = (0.0 * Y) + ( 0.998169 * U) + (-0.019968 * V)
47+ // V' = (0.0 * Y) + (-0.011378 * U) + ( 0.997393 * V)
48+ ALIGNED (16 )
49+ const int16_t kYuv709ToP3_coeffs_neon [8 ] = {287 , 602 , 16354 , -327 , -186 , 16341 , 0 , 0 };
50+
4451// Yuv Bt709 -> Yuv Bt2100
4552// Y' = (1.0f * Y) + (-0.016969f * U) + ( 0.096312f * V)
4653// U' = (0.0f * Y) + ( 0.995306f * U) + (-0.051192f * V)
4754// V' = (0.0f * Y) + ( 0.011507f * U) + ( 1.002637f * V)
4855ALIGNED (16 )
4956const int16_t kYuv709To2100_coeffs_neon [8 ] = {-278 , 1578 , 16307 , -839 , 189 , 16427 , 0 , 0 };
5057
51- // Yuv Bt601 -> Yuv Bt709
52- // Y' = (1.0f * Y) + (-0.118188f * U) + (-0.212685f * V),
53- // U' = (0.0f * Y) + ( 1.018640f * U) + ( 0.114618f * V),
54- // V' = (0.0f * Y) + ( 0.075049f * U) + ( 1.025327f * V);
58+ // Yuv Display P3 -> Yuv Bt601
59+ // Y' = (1.0 * Y) + ( 0.086028 * U) + ( 0.161445 * V)
60+ // U' = (0.0 * Y) + ( 0.990631 * U) + (-0.091109 * V)
61+ // V' = (0.0 * Y) + (-0.061361 * U) + ( 0.98474 * V)
5562ALIGNED (16 )
56- const int16_t kYuv601To709_coeffs_neon [8 ] = {- 1936 , - 3485 , 16689 , 1878 , 1230 , 16799 , 0 , 0 };
63+ const int16_t kYuvP3To601_coeffs_neon [8 ] = {1409 , 2645 , 16230 , - 1493 , - 1005 , 16134 , 0 , 0 };
5764
58- // Yuv Bt601 -> Yuv Bt2100
59- // Y' = (1.0f * Y) + (-0.128245f * U) + (-0.115879f * V)
60- // U' = (0.0f * Y) + ( 1.010016f * U) + ( 0.061592f * V)
61- // V' = (0.0f * Y) + ( 0.086969f * U) + ( 1.029350f * V)
65+ // Yuv Display P3 -> Yuv Bt709
66+ // Y' = (1.0 * Y) + (-0.018002 * U) + (-0.037226 * V)
67+ // U' = (0.0 * Y) + ( 1.002063 * U) + ( 0.020061 * V)
68+ // V' = (0.0 * Y) + ( 0.011431 * U) + ( 1.002843 * V)
6269ALIGNED (16 )
63- const int16_t kYuv601To2100_coeffs_neon [8 ] = {-2101 , -1899 , 16548 , 1009 , 1425 , 16865 , 0 , 0 };
70+ const int16_t kYuvP3To709_coeffs_neon [8 ] = {-295 , -610 , 16418 , 329 , 187 , 16431 , 0 , 0 };
6471
65- // Yuv Bt2100 -> Yuv Bt709
66- // Y' = (1.0f * Y) + ( 0.018149f * U) + (-0.095132f * V)
67- // U' = (0.0f * Y) + ( 1.004123f * U) + ( 0.051267f * V)
68- // V' = (0.0f * Y) + (-0.011524f * U) + ( 0.996782f * V)
72+ // Yuv Display P3 -> Yuv Bt2100
73+ // Y' = (1.0 * Y) + (-0.033905 * U) + ( 0.059019 * V)
74+ // U' = (0.0 * Y) + ( 0.996774 * U) + ( -0.03137 * V)
75+ // V' = (0.0 * Y) + ( 0.022992 * U) + ( 1.005718 * V)
6976ALIGNED (16 )
70- const int16_t kYuv2100To709_coeffs_neon [8 ] = {297 , - 1559 , 16452 , 840 , -189 , 16331 , 0 , 0 };
77+ const int16_t kYuvP3To2100_coeffs_neon [8 ] = {- 555 , 967 , 16331 , -514 , 377 , 16478 , 0 , 0 };
7178
7279// Yuv Bt2100 -> Yuv Bt601
73- // Y' = (1.0f * Y) + ( 0.117887f * U) + ( 0.105521f * V)
74- // U' = (0.0f * Y) + ( 0.995211f * U) + (-0.059549f * V)
75- // V' = (0.0f * Y) + (-0.084085f * U) + ( 0.976518f * V)
80+ // Y' = (1.0 * Y) + ( 0.117887 * U) + ( 0.105521 * V)
81+ // U' = (0.0 * Y) + ( 0.995211 * U) + (-0.059549 * V)
82+ // V' = (0.0 * Y) + (-0.084085 * U) + ( 0.976518 * V)
7683ALIGNED (16 )
7784const int16_t kYuv2100To601_coeffs_neon [8 ] = {1931 , 1729 , 16306 , -976 , -1378 , 15999 , 0 , 0 };
7885
86+ // Yuv Bt2100 -> Yuv Bt709
87+ // Y' = (1.0 * Y) + ( 0.018149 * U) + (-0.095132 * V)
88+ // U' = (0.0 * Y) + ( 1.004123 * U) + ( 0.051267 * V)
89+ // V' = (0.0 * Y) + (-0.011524 * U) + ( 0.996782 * V)
90+ ALIGNED (16 )
91+ const int16_t kYuv2100To709_coeffs_neon [8 ] = {297 , -1559 , 16452 , 840 , -189 , 16331 , 0 , 0 };
92+
93+ // Yuv Bt2100 -> Yuv Display P3
94+ // Y' = (1.0 * Y) + ( 0.035343 * U) + ( -0.057581 * V)
95+ // U' = (0.0 * Y) + ( 1.002515 * U) + ( 0.03127 * V)
96+ // V' = (0.0 * Y) + (-0.022919 * U) + ( 0.9936 * V)
97+ ALIGNED (16 )
98+ const int16_t kYuv2100ToP3_coeffs_neon [8 ] = {579 , -943 , 16425 , 512 , -376 , 16279 , 0 , 0 };
99+
79100static inline int16x8_t yConversion_neon (uint8x8_t y, int16x8_t u, int16x8_t v, int16x8_t coeffs) {
80101 int32x4_t lo = vmull_lane_s16 (vget_low_s16 (u), vget_low_s16 (coeffs), 0 );
81102 int32x4_t hi = vmull_lane_s16 (vget_high_s16 (u), vget_low_s16 (coeffs), 0 );
@@ -240,11 +261,14 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
240261
241262 switch (src_encoding) {
242263 case UHDR_CG_BT_709:
243- switch (dst_encoding) {
264+ switch ((int )dst_encoding) {
265+ case UHDR_CG_BT_601:
266+ coeffs = kYuv709To601_coeffs_neon ;
267+ break ;
244268 case UHDR_CG_BT_709:
245269 return status;
246270 case UHDR_CG_DISPLAY_P3:
247- coeffs = kYuv709To601_coeffs_neon ;
271+ coeffs = kYuv709ToP3_coeffs_neon ;
248272 break ;
249273 case UHDR_CG_BT_2100:
250274 coeffs = kYuv709To2100_coeffs_neon ;
@@ -258,14 +282,17 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
258282 }
259283 break ;
260284 case UHDR_CG_DISPLAY_P3:
261- switch (dst_encoding) {
285+ switch ((int )dst_encoding) {
286+ case UHDR_CG_BT_601:
287+ coeffs = kYuvP3To601_coeffs_neon ;
288+ break ;
262289 case UHDR_CG_BT_709:
263- coeffs = kYuv601To709_coeffs_neon ;
290+ coeffs = kYuvP3To709_coeffs_neon ;
264291 break ;
265292 case UHDR_CG_DISPLAY_P3:
266293 return status;
267294 case UHDR_CG_BT_2100:
268- coeffs = kYuv601To2100_coeffs_neon ;
295+ coeffs = kYuvP3To2100_coeffs_neon ;
269296 break ;
270297 default :
271298 status.error_code = UHDR_CODEC_INVALID_PARAM;
@@ -276,12 +303,15 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
276303 }
277304 break ;
278305 case UHDR_CG_BT_2100:
279- switch (dst_encoding) {
306+ switch ((int )dst_encoding) {
307+ case UHDR_CG_BT_601:
308+ coeffs = kYuv2100To601_coeffs_neon ;
309+ break ;
280310 case UHDR_CG_BT_709:
281311 coeffs = kYuv2100To709_coeffs_neon ;
282312 break ;
283313 case UHDR_CG_DISPLAY_P3:
284- coeffs = kYuv2100To601_coeffs_neon ;
314+ coeffs = kYuv2100ToP3_coeffs_neon ;
285315 break ;
286316 case UHDR_CG_BT_2100:
287317 return status;
@@ -323,6 +353,13 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
323353// In the 3x3 conversion matrix, 0.5 is duplicated. But represented as only one entry in lut leaving
324354// with an array size of 8 elements.
325355
356+ // RGB Bt601 -> Yuv Bt601
357+ // Y = 0.299 * R + 0.587 * G + 0.114 * B
358+ // U = -0.168735892 * R + -0.331264108 * G + 0.5 * B
359+ // V = 0.5 * R + -0.418687589 * G + -0.081312411 * B
360+ ALIGNED (16 )
361+ const uint16_t kRgb601ToYuv_coeffs_neon [8 ] = {4899 , 9617 , 1868 , 2765 , 5427 , 8192 , 6860 , 1332 };
362+
326363// RGB Bt709 -> Yuv Bt709
327364// Y = 0.212639 * R + 0.715169 * G + 0.072192 * B
328365// U = -0.114592135 * R + -0.385407865 * G + 0.5 * B
@@ -454,12 +491,15 @@ static void ConvertRgba8888ToYuv444_neon(uhdr_raw_image_t* src, uhdr_raw_image_t
454491 } while (++h < src->h );
455492}
456493
457- std::unique_ptr<uhdr_raw_image_ext_t > convert_raw_input_to_ycbcr_neon (uhdr_raw_image_t * src) {
494+ std::unique_ptr<uhdr_raw_image_ext_t > convert_raw_input_to_ycbcr_neon (uhdr_raw_image_t * src,
495+ bool use_bt601) {
458496 if (src->fmt == UHDR_IMG_FMT_32bppRGBA8888) {
459497 std::unique_ptr<uhdr_raw_image_ext_t > dst = nullptr ;
460498 const uint16_t * coeffs_ptr = nullptr ;
461499
462- if (src->cg == UHDR_CG_BT_709) {
500+ if (use_bt601) {
501+ coeffs_ptr = kRgb601ToYuv_coeffs_neon ;
502+ } else if (src->cg == UHDR_CG_BT_709) {
463503 coeffs_ptr = kRgb709ToYuv_coeffs_neon ;
464504 } else if (src->cg == UHDR_CG_BT_2100) {
465505 coeffs_ptr = kRgbDispP3ToYuv_coeffs_neon ;
0 commit comments