From 2ebe80e19861407b0f7bdb91a9e6cc6d8a7be10d Mon Sep 17 00:00:00 2001
From: tobozo <tobozo@users.noreply.github.com>
Date: Thu, 29 Dec 2022 12:48:30 +0100
Subject: [PATCH 01/12] Adding esp32-arduino core 2.0.6

---
 .github/workflows/ArduinoBuild.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ArduinoBuild.yml b/.github/workflows/ArduinoBuild.yml
index 78a85bd1..3e3f488d 100644
--- a/.github/workflows/ArduinoBuild.yml
+++ b/.github/workflows/ArduinoBuild.yml
@@ -44,6 +44,7 @@ jobs:
           - 2.0.3
           - 2.0.4
           - 2.0.5
+          - 2.0.6
 
         include:
           # 3D matrix doesn't apply to these:

From 92175994888130db07dec23317eaad67f942f679 Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Fri, 30 Dec 2022 08:42:44 +0900
Subject: [PATCH 02/12] fix comment

---
 src/lgfx_user/LGFX_ESP8266_sample.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/lgfx_user/LGFX_ESP8266_sample.hpp b/src/lgfx_user/LGFX_ESP8266_sample.hpp
index 9ab67809..141dbe7b 100644
--- a/src/lgfx_user/LGFX_ESP8266_sample.hpp
+++ b/src/lgfx_user/LGFX_ESP8266_sample.hpp
@@ -4,7 +4,7 @@
 
 #include <LovyanGFX.hpp>
 
-// ESP32でLovyanGFXを独自設定で利用する場合の設定例
+// ESP8266でLovyanGFXを独自設定で利用する場合の設定例
 
 /*
 このファイルを複製し、新しい名前を付けて、環境に合わせて設定内容を変更してください。
@@ -27,8 +27,8 @@ class LGFX : public lgfx::LGFX_Device
  ※ クラス名を変更する場合はコンストラクタの名前も併せて同じ名前に変更が必要です。
 
  名前の付け方は自由に決めて構いませんが、設定が増えた場合を想定し、
- 例えばESP32 DevKit-CでSPI接続のILI9341の設定を行った場合、
-  LGFX_DevKitC_SPI_ILI9341
+ 例えばESP8266でSPI接続のILI9341の設定を行った場合、
+  LGFX_ESP8266_SPI_ILI9341
  のような名前にし、ファイル名とクラス名を一致させておくことで、利用時に迷いにくくなります。
 //*/
 

From ea6debc6d297e517361d651f5795e7a5e2913b4f Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Sun, 1 Jan 2023 21:22:24 +0900
Subject: [PATCH 03/12] add support RGB565 for Panel_CVBS

---
 src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp | 744 +++++++++++++++------
 src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp |   2 +-
 2 files changed, 522 insertions(+), 224 deletions(-)

diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
index 557e9343..dc7bf164 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
@@ -171,7 +171,7 @@ namespace lgfx
     uint8_t** lines = nullptr;        // フレームバッファ配列ポインタ;
     uint16_t* allocated_list = nullptr;  // フレームバッファのalloc割当対象のインデクス番号(free時に使用);
     uint32_t* palette = nullptr;   // RGB332から波形に変換するためのテーブル;
-    void (*fp_blit)(uint32_t*, const uint32_t*, const uint32_t*, const uint32_t*, bool, int, int);
+    void (*fp_blit)(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int);
     uint32_t burst_wave[2];       // カラーバースト信号の波形データ(EVENとODDで２通り)
     intr_handle_t isr_handle = nullptr;
     lldesc_t dma_desc[2];
@@ -189,6 +189,7 @@ namespace lgfx
     uint16_t WHITE_LEVEL;
     uint8_t burst_shift = 0;        // カラーバースト信号の反転・位相ずらし処理状態保持用;
     uint8_t use_psram = 0;          // フレームバッファ PSRAM使用モード 0=不使用 / 1=半分PSRAM / 2=全部PSRAM
+    uint8_t pixel_per_bytes = 1;
     static constexpr uint8_t SYNC_LEVEL = 0;
   };
 
@@ -196,17 +197,71 @@ namespace lgfx
   static internal_t internal;
 
 
-  static void setup_palette_ntsc(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
+  static uint32_t setup_palette_ntsc_inner(uint32_t rgb, uint32_t diff_level, uint32_t base_level, float satuation_base, float chroma_scale)
   {
-    uint8_t buf[4];
-
 // NTSCの I・Q信号は基準位相から-147度ずれている。;
 // 加えて、このライブラリのburst_waveの位相基準は-45度となっている。;
 // この両者を合わせて 147+45=192 を引いた値が基準位相となる。;
 // つまり 360-192 = 168度を基準とする。;
     static constexpr float BASE_RAD = (M_PI * 168) / 180; // 2.932153;
+    uint8_t buf[4];
+
+    uint32_t r = rgb >> 16;
+    uint32_t g = (rgb >> 8) & 0xFF;
+    uint32_t b = rgb & 0xFF;
+
+    float y = r * 0.299f + g * 0.587f + b * 0.114f;
+    float i = (b - y) * -0.2680f + (r - y) * 0.7358f;
+    float q = (b - y) *  0.4127f + (r - y) * 0.4778f;
+    y = y * diff_level / 256 + base_level;
+
+    float phase_offset = atan2f(i, q) + BASE_RAD;
+    float saturation = sqrtf(i * i + q * q) * chroma_scale;
+    saturation = saturation * satuation_base;
+    for (int j = 0; j < 4; j++)
+    {
+      int tmp = ((int)(128.5f + y + sinf(phase_offset + (float)M_PI / 2 * j) * saturation)) >> 8;
+      buf[j] = tmp < 0 ? 0 : tmp > 255 ? 255 : tmp;
+    }
+    // I2Sに渡す際に処理負荷を軽減できるよう、予めバイトスワップ等を行ったテーブルを作成しておく;
+    return buf[0] << 24
+          | buf[1] <<  8
+          | buf[2] << 16
+          | buf[3] <<  0
+          ;
+  }
+
+  static void setup_palette_ntsc_565(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
+  {
+    float chroma_scale = chroma_level / 7168.0f;
+    float satuation_base = black_level / 2;
+    uint32_t diff_level = white_level - black_level;
 
+    uint32_t base_level = black_level / 2;
+    for (int idx = 0; idx < 256; ++idx)
+    {
+      { // RGB565の上位1Byteに対するテーブル
+        int r = (idx >> 3);
+        int g = (idx & 7) << 3;
+        r = (r * 0x21) >> 2;
+        g = (g * 0x41) >> 4;
+        palette[idx] = setup_palette_ntsc_inner(r<<16|g<<8, diff_level, base_level, satuation_base, chroma_scale);
+      }
+      { // RGB565の下位1Byteに対するテーブル
+        int g = idx >> 5;
+        int b = idx & 0x1F;
+        b = (b * 0x21) >> 2;
+        g = (g * 0x41) >> 4;
+        palette[idx + 256] = setup_palette_ntsc_inner(g<<8|b, diff_level, base_level, satuation_base, chroma_scale);
+      }
+    }
+  }
+
+  static void setup_palette_ntsc_332(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
+  {
     float chroma_scale = chroma_level / 7168.0f;
+    float satuation_base = black_level / 2;
+    uint32_t diff_level = white_level - black_level;
 
     for (int rgb332 = 0; rgb332 < 256; ++rgb332)
     {
@@ -214,40 +269,85 @@ namespace lgfx
       int g = (((rgb332 >> 2) & 0x07) * 0x49) >> 1;
       int b = (( rgb332       & 0x03) * 0x55);
 
-      float y = r * 0.299f + g * 0.587f + b * 0.114f;
-      float i = (b - y) * -0.2680f + (r - y) * 0.7358f;
-      float q = (b - y) *  0.4127f + (r - y) * 0.4778f;
-      y = y / 255 * (white_level - black_level) + black_level;
+      palette[rgb332] = setup_palette_ntsc_inner(r<<16|g<<8|b, diff_level, black_level, satuation_base, chroma_scale);
+    }
+  }
 
-      {
-        float phase_offset = atan2f(i, q) + BASE_RAD;
-        float saturation = sqrtf(i * i + q * q) * chroma_scale;
-        saturation = saturation * black_level / 2;
-        for (int j = 0; j < 4; j++)
-        {
-          int tmp = ((int)roundf(y + sinf(phase_offset + (float)M_PI / 2 * j) * saturation)) >> 8;
-          buf[j] = tmp < 0 ? 0 : tmp > 255 ? 255 : tmp;
-        }
-        // I2Sに渡す際に処理負荷を軽減できるよう、予めバイトスワップ等を行ったテーブルを作成しておく;
-        palette[rgb332] = buf[0] << 24
-                        | buf[1] <<  8
-                        | buf[2] << 16
-                        | buf[3] <<  0
-                        ;
+  static void setup_palette_pal_inner(uint8_t *result, uint32_t rgb, int diff_level, float base_level, float chroma_scale)
+  {
+    static constexpr const int8_t sin_tbl[5] = { 0, -1, 0, 1, 0 };
+
+    // I2Sに渡す際に処理負荷を軽減できるよう、予めバイトスワップされたテーブルを作成するため、インデクス順を入れ替える
+    static constexpr const int8_t idx_tbl[4] = { 3, 1, 2, 0 };
+    uint32_t r = rgb >> 16;
+    uint32_t g = (rgb >> 8) & 0xFF;
+    uint32_t b = rgb & 0xFF;
+
+    float y = r * 0.299f + g * 0.587f + b * 0.114f;
+    float u = -0.147407 * r - 0.289391 * g + 0.436798 * b;
+    float v =  0.614777 * r - 0.514799 * g - 0.099978 * b;
+    y = y * diff_level / 256 + base_level;
+    u *= chroma_scale;
+    v *= chroma_scale;
+
+    for (int j = 0; j < 4; j++)
+    {
+      float s = u * sin_tbl[j    ];
+      float c = v * sin_tbl[j + 1]; // cos
+      int tmp = ((int)(128.5f + y + s + c)) >> 8;
+      int i = idx_tbl[j];
+      result[i  ] = tmp < 0 ? 0 : tmp > 255 ? 255 : tmp;
+      tmp = ((int)(128.5f + y + s - c)) >> 8;
+      result[i+4] = tmp < 0 ? 0 : tmp > 255 ? 255 : tmp;
+    }
+  }
+
+  static void setup_palette_pal_565(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
+  {
+    auto e = palette;
+    auto o = &palette[512];
+
+    uint32_t result_buf[2];
+    float chroma_scale = black_level * chroma_level / 14336.0f;
+
+    int32_t diff_level = white_level - black_level;
+    float base_level = (float)black_level / 2;
+    for (int idx = 0; idx < 256; ++idx)
+    {
+      { // RGB565の上位1Byteに対するテーブル
+        int r = (idx >> 3);
+        int g = (idx & 7) << 3;
+        r = (r * 0x21) >> 2;
+        g = (g * 0x41) >> 4;
+
+        setup_palette_pal_inner((uint8_t*)result_buf, r<<16|g<<8, diff_level, base_level, chroma_scale);
+        e[idx] = result_buf[0];
+        o[idx] = result_buf[1];
+      }
+      { // RGB565の下位1Byteに対するテーブル
+        int g = idx >> 5;
+        int b = idx & 0x1F;
+        b = (b * 0x21) >> 2;
+        g = (g * 0x41) >> 4;
+
+        setup_palette_pal_inner((uint8_t*)result_buf, g<<8|b, diff_level, base_level, chroma_scale);
+        e[idx + 256] = result_buf[0];
+        o[idx + 256] = result_buf[1];
       }
     }
+
   }
 
-  static void setup_palette_pal(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
+  static void setup_palette_pal_332(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
   {
     auto e = palette;
     auto o = &palette[256];
 
-    uint8_t e_buf[4];
-    uint8_t o_buf[4];
+    uint32_t result_buf[2];
     float chroma_scale = black_level * chroma_level / 14336.0f;
 
-    static constexpr const int8_t sin_tbl[5] = { 0, -1, 0, 1, 0 };
+    int32_t diff_level = white_level - black_level;
+    float base_level = (float)black_level;
 
     for (int rgb332 = 0; rgb332 < 256; ++rgb332)
     {
@@ -255,33 +355,10 @@ namespace lgfx
       int g = (((rgb332 >> 2) & 0x07) * 0x49) >> 1;
       int b = (( rgb332       & 0x03) * 0x55);
 
-      float y = r * 0.299f + g * 0.587f + b * 0.114f;
-      float u = -0.147407 * r - 0.289391 * g + 0.436798 * b;
-      float v =  0.614777 * r - 0.514799 * g - 0.099978 * b;
-      y = (y / 255 * (white_level - black_level) + black_level);
-      u *= chroma_scale;
-      v *= chroma_scale;
+      setup_palette_pal_inner((uint8_t*)result_buf, r<<16|g<<8|b, diff_level, base_level, chroma_scale);
 
-      for (int j = 0; j < 4; j++)
-      {
-        float s = u * sin_tbl[j    ];
-        float c = v * sin_tbl[j + 1]; // cos
-        int tmp = ((int)roundf(y + s + c)) >> 8;
-        e_buf[j] = tmp < 0 ? 0 : tmp > 255 ? 255 : tmp;
-        tmp = ((int)roundf(y + s - c)) >> 8;
-        o_buf[j] = tmp < 0 ? 0 : tmp > 255 ? 255 : tmp;
-      }
-      // I2Sに渡す際に処理負荷を軽減できるよう、予めバイトスワップ等を行ったテーブルを作成しておく;
-      e[rgb332] = e_buf[0] << 24
-                | e_buf[1] <<  8
-                | e_buf[2] << 16
-                | e_buf[3] <<  0
-                ;
-      o[rgb332] = o_buf[0] << 24
-                | o_buf[1] <<  8
-                | o_buf[2] << 16
-                | o_buf[3] <<  0
-                ;
+      e[rgb332] = result_buf[0];
+      o[rgb332] = result_buf[1];
     }
   }
 
@@ -395,7 +472,8 @@ namespace lgfx
 
   struct signal_setup_info_t
   {
-    void (*setup_palette)(uint32_t*, uint_fast16_t, uint_fast16_t, uint_fast8_t); // パレット生成関数のポインタ;
+    void (*setup_palette_332)(uint32_t*, uint_fast16_t, uint_fast16_t, uint_fast8_t); // RGB332用パレット生成関数のポインタ;
+    void (*setup_palette_565)(uint32_t*, uint_fast16_t, uint_fast16_t, uint_fast8_t); // RGB565用パレット生成関数のポインタ;
     uint32_t apll_sdm;            // apllのクロック設定;
     uint16_t blanking_mv;         // SYNCレベルとBLANKINGレベルの電圧差 mV
     uint16_t black_mv;            // SYNCレベルと黒レベルの電圧差 mV
@@ -405,7 +483,8 @@ namespace lgfx
 
   static constexpr const signal_setup_info_t signal_setup_info_list[]
   { // NTSC
-    { setup_palette_ntsc
+    { setup_palette_ntsc_332
+    , setup_palette_ntsc_565
     , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 // 要求仕様に近い )
     , 286         // 286mV = 0IRE
     , 340         // 340mV = 7.5IRE  米国仕様では黒レベルは 7.5IRE
@@ -413,7 +492,8 @@ namespace lgfx
     , 1           // パレット数は256
     }
   , // NTSC_J
-    { setup_palette_ntsc
+    { setup_palette_ntsc_332
+    , setup_palette_ntsc_565
     , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 // 要求仕様に近い )
     , 286         // 286mV = 0IRE
     , 286         // 286mV = 0IRE  日本仕様では黒レベルは 0IRE
@@ -421,7 +501,8 @@ namespace lgfx
     , 1           // パレット数は256
     }
   , // PAL
-    { setup_palette_pal
+    { setup_palette_pal_332
+    , setup_palette_pal_565
     , 0x06A404    // 17.734476mhz ~4x
     , 300
     , 300
@@ -429,7 +510,8 @@ namespace lgfx
     , 2           // パレット数は512
     }
   , // PAL_M
-    { setup_palette_pal
+    { setup_palette_pal_332
+    , setup_palette_pal_565
     , 0x0494DA
     , 300
     , 300
@@ -437,7 +519,8 @@ namespace lgfx
     , 2           // パレット数は512
     }
   , // PAL_N
-    { setup_palette_pal
+    { setup_palette_pal_332
+    , setup_palette_pal_565
     , 0x498D1    // 17.734476mhz ~4x
     , 300
     , 300
@@ -447,264 +530,434 @@ namespace lgfx
   };
 
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60(uint32_t* __restrict d, const uint32_t* s, const uint32_t* s_end, const uint32_t* p, bool odd, int ratio_5, int ratio_6)
+  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_5, int ratio_6)
   {
-    --d;
+    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift1 = shift0 ^ 8;
+    int diff = (ratio_6 + ratio_5) >> 1;
 
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
+    {
+      uint32_t p1l = s[3];
+      uint32_t p1h = s[2];
+      uint32_t p0l = s[1];
+      uint32_t p0h = s[0];
+      p1l += 256;
+      p0l += 256;
+      p0h = p[p0h];
+      p0l = p[p0l];
+      p1h = p[p1h];
+      p1l = p[p1l];
+      s += 4;
+      uint32_t color0 = p0h + p0l;
+      uint32_t color1 = p1h + p1l;
+
+      if (diff < 0)
+      {
+        diff += ratio_5;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d[0] = color0 <<= shift0;
+        d[4] = color1 <<= shift0;
+        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
+        d += 5;
+        std::swap(shift0, shift1);
+      }
+      else
+      {
+        diff += ratio_6;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d[5] = color1 << shift1;
+        d[4] = color1 << shift0;
+        d[0] = color0 << shift0;
+        d[2] = color0 << shift0;
+        d += 6;
+      }
+    }
+  }
+
+  // x5 ~ x6
+  void IRAM_ATTR blit_x50_x60(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_5, int ratio_6)
+  {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_6 + ratio_5) >> 1;
-    do
+
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
     {
-      uint32_t c = *s;
-      for (int i = 0; i < 2; ++i)
+      auto p0 = s[0];
+      auto p1 = s[1];
+      s += 2;
+      uint32_t color0 = p[p0];
+      uint32_t color1 = p[p1];
+      if (diff < 0)
       {
-        uint32_t color0 = p[c & 0xFF]; c >>= 8;
-        uint32_t color1 = p[c & 0xFF]; c >>= 8;
-        uint32_t c00 = color0 << shift0;
-        uint32_t c01 = color0 << shift1;
-        uint32_t c10 = color1 << shift0;
-        uint32_t c11 = color1 << shift1;
-        *++d = c00;
-        *++d = c01;
-        if (diff < 0)
-        {
-          diff += ratio_6;
-          *++d = (c00 & 0xFFFF0000) + (c10 & 0xFFFF);
-          *++d = c11;
-          *++d = c10;
-          std::swap(shift0, shift1);
-        }
-        else
-        {
-          diff += ratio_5;
-          *++d = c00;
-          *++d = c11;
-          *++d = c10;
-          *++d = c11;
-        }
+        diff += ratio_5;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d[0] = color0 <<= shift0;
+        d[4] = color1 <<= shift0;
+        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
+        d += 5;
+        std::swap(shift0, shift1);
+      }
+      else
+      {
+        diff += ratio_6;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d[5] = color1 << shift1;
+        d[4] = color1 << shift0;
+        d[0] = color0 << shift0;
+        d[2] = color0 << shift0;
+        d += 6;
       }
-    } while (++s < s_end);
+    }
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50(uint32_t* __restrict d, const uint32_t* s, const uint32_t* s_end, const uint32_t* p, bool odd, int ratio_4, int ratio_5)
+  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_4, int ratio_5)
   {
-    --d;
+    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift1 = shift0 ^ 8;
+    int diff = (ratio_5 + ratio_4) >> 1;
+
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
+    {
+      uint32_t p1l = s[3];
+      uint32_t p1h = s[2];
+      uint32_t p0l = s[1];
+      uint32_t p0h = s[0];
+      p1l += 256;
+      p0l += 256;
+      p0h = p[p0h];
+      p0l = p[p0l];
+      p1h = p[p1h];
+      p1l = p[p1l];
+      s += 4;
+      uint32_t color0 = p0h + p0l;
+      uint32_t color1 = p1h + p1l;
 
+      if (diff < 0)
+      {
+        diff += ratio_4;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        d += 4;
+      }
+      else
+      {
+        diff += ratio_5;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        color0 <<= shift0;
+        color1 <<= shift0;
+        d[0] = color0;
+        d[4] = color1;
+        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
+        std::swap(shift0, shift1);
+        d += 5;
+      }
+    }
+  }
+
+  // x4 ~ x5
+  void IRAM_ATTR blit_x40_x50(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_4, int ratio_5)
+  {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_5 + ratio_4) >> 1;
-    do
+
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
     {
-      uint32_t c = *s;
-      for (int i = 0; i < 2; ++i)
+      auto p0 = s[0];
+      auto p1 = s[1];
+      s += 2;
+      uint32_t color0 = p[p0];
+      uint32_t color1 = p[p1];
+      if (diff < 0)
       {
-        uint32_t color0 = p[c & 0xFF]; c >>= 8;
-        uint32_t color1 = p[c & 0xFF]; c >>= 8;
-        uint32_t c00 = color0 << shift0;
-        uint32_t c01 = color0 << shift1;
-        uint32_t c10 = color1 << shift0;
-        uint32_t c11 = color1 << shift1;
-        *++d = c00;
-        *++d = c01;
-        if (diff < 0)
-        {
-          diff += ratio_5;
-          *++d = c10;
-          *++d = c11;
-        }
-        else
-        {
-          diff += ratio_4;
-          *++d = (c00 & 0xFFFF0000) + (c10 & 0xFFFF);
-          *++d = c11;
-          *++d = c10;
-          std::swap(shift0, shift1);
-        }
+        diff += ratio_4;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        d += 4;
+      }
+      else
+      {
+        diff += ratio_5;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        color0 <<= shift0;
+        color1 <<= shift0;
+        d[0] = color0;
+        d[4] = color1;
+        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
+        std::swap(shift0, shift1);
+        d += 5;
       }
-    } while (++s < s_end);
+    }
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40(uint32_t* __restrict d, const uint32_t* s, const uint32_t* s_end, const uint32_t* p, bool odd, int ratio_3, int ratio_4)
+  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_3, int ratio_4)
   {
-    --d;
-
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_4 + ratio_3) >> 1;
-    do
+
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
     {
-      uint32_t c = *s;
-      uint32_t color0 = p[c & 0xFF]; c >>= 8;
-      uint32_t color1 = p[c & 0xFF]; c >>= 8;
+      uint32_t p1l = s[3];
+      uint32_t p1h = s[2];
+      uint32_t p0l = s[1];
+      uint32_t p0h = s[0];
+      p1l += 256;
+      p0l += 256;
+      p0h = p[p0h];
+      p0l = p[p0l];
+      p1h = p[p1h];
+      p1l = p[p1l];
+      s += 4;
+      uint32_t color0 = p0h + p0l;
+      uint32_t color1 = p1h + p1l;
+
       if (diff < 0)
       {
-        diff += ratio_4;
-        *++d = color0 << shift0;
-        *++d = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
-        *++d = color1 << shift0;
+        diff += ratio_3;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        color0 = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF));
+        d[1] = color0 << shift1;
         std::swap(shift0, shift1);
+        d += 3;
       }
       else
       {
-        diff += ratio_3;
-        *++d = color0 << shift0;
-        *++d = color0 << shift1;
-        *++d = color1 << shift0;
-        *++d = color1 << shift1;
+        diff += ratio_4;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d += 4;
       }
-      color0 = p[c & 0xFF]; c >>= 8;
-      color1 = p[c       ];
+    }
+  }
+
+  // x3 ~ x4
+  void IRAM_ATTR blit_x30_x40(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_3, int ratio_4)
+  {
+    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift1 = shift0 ^ 8;
+    int diff = (ratio_4 + ratio_3) >> 1;
+
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
+    {
+      auto p0 = s[0];
+      auto p1 = s[1];
+      s += 2;
+      uint32_t color0 = p[p0];
+      uint32_t color1 = p[p1];
       if (diff < 0)
       {
-        diff += ratio_4;
-        *++d = color0 << shift0;
-        *++d = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
-        *++d = color1 << shift0;
+        diff += ratio_3;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        color0 = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF));
+        d[1] = color0 << shift1;
         std::swap(shift0, shift1);
+        d += 3;
       }
       else
       {
-        diff += ratio_3;
-        *++d = color0 << shift0;
-        *++d = color0 << shift1;
-        *++d = color1 << shift0;
-        *++d = color1 << shift1;
+        diff += ratio_4;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        d[1] = color0 << shift1;
+        d[3] = color1 << shift1;
+        d += 4;
       }
-    } while (++s < s_end);
+    }
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30(uint32_t* __restrict d, const uint32_t* s, const uint32_t* s_end, const uint32_t* p, bool odd, int ratio_2, int ratio_3)
+  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_2, int ratio_3)
   {
-    --d;
-
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_3 + ratio_2) >> 1;
-    do
+
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
     {
-      uint32_t c = *s;
-      uint32_t color0 = p[c & 0xFF]; c >>= 8;
-      uint32_t color1 = p[c & 0xFF]; c >>= 8;
+      uint32_t p1l = s[3];
+      uint32_t p1h = s[2];
+      uint32_t p0l = s[1];
+      uint32_t p0h = s[0];
+      p1l += 256;
+      p0l += 256;
+      p0h = p[p0h];
+      p0l = p[p0l];
+      p1h = p[p1h];
+      p1l = p[p1l];
+      s += 4;
+      uint32_t color0 = p0h + p0l;
+      uint32_t color1 = p1h + p1l;
+
       if (diff < 0)
       {
-        diff += ratio_3;
+        diff += ratio_2;
         color0 <<= shift0;
         color1 <<= shift1;
-        *++d = color0;
-        *++d = color1;
+        d[0] = color0;
+        d[1] = color1;
+        d += 2;
       }
       else
       {
-        diff += ratio_2;
-        *++d = color0 << shift0;
-        *++d = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
-        *++d = color1 << shift0;
+        diff += ratio_3;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        d[1] = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
+        d += 3;
         std::swap(shift0, shift1);
       }
-      color0 = p[c & 0xFF]; c >>= 8;
-      color1 = p[c       ];
+    }
+  }
+
+  // x2 ~ x3
+  void IRAM_ATTR blit_x20_x30(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_2, int ratio_3)
+  {
+    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift1 = shift0 ^ 8;
+    int diff = (ratio_3 + ratio_2) >> 1;
+
+    src_length = (src_length + 1) >> 1;
+    while (src_length--)
+    {
+      auto p0 = s[0];
+      auto p1 = s[1];
+      s += 2;
+      uint32_t color0 = p[p0];
+      uint32_t color1 = p[p1];
       if (diff < 0)
       {
-        diff += ratio_3;
+        diff += ratio_2;
         color0 <<= shift0;
         color1 <<= shift1;
-        *++d = color0;
-        *++d = color1;
+        d[0] = color0;
+        d[1] = color1;
+        d += 2;
       }
       else
       {
-        diff += ratio_2;
-        *++d = color0 << shift0;
-        *++d = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
-        *++d = color1 << shift0;
+        diff += ratio_3;
+        d[0] = color0 << shift0;
+        d[2] = color1 << shift0;
+        d[1] = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
+        d += 3;
         std::swap(shift0, shift1);
       }
-    } while (++s < s_end);
+    }
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20(uint32_t* __restrict d, const uint32_t* s, const uint32_t* s_end, const uint32_t* p, bool odd, int ratio_15, int ratio_20)
+  void IRAM_ATTR blit_x15_x20(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_15, int ratio_20)
   {
-    --d;
-
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_20 + ratio_15) >> 1;
-    do
+
+    src_length = (src_length + 3) >> 2;
+    while (src_length--)
     {
-      uint32_t c = *s;
-      uint32_t color0 = p[c & 0xFF]; c >>= 8;
-      uint32_t color1 = p[c & 0xFF]; c >>= 8;
-      uint32_t color2 = p[c & 0xFF]; c >>= 8;
-      uint32_t color3 = p[c       ];
+      uint32_t color0 = s[0];
+      uint32_t color1 = s[1];
+      uint32_t color2 = s[2];
+      uint32_t color3 = s[3];
+      color0 = p[color0];
+      color1 = p[color1];
+      color2 = p[color2];
+      color3 = p[color3];
+      s += 4;
       if (diff < 0)
       {
-        color0 = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF));
+        diff += ratio_15;
         color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
-        diff += ratio_20;
-        *++d = color0 << shift0;
-        *++d = color1 << shift1;
-        *++d = color3 << shift0;
+        color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
+        d[0] = color0 << shift0;
+        d[2] = color2 << shift0;
+        d[1] = color1 << shift1;
+        d += 3;
         std::swap(shift0, shift1);
       }
       else
       {
-        color0 <<= shift0;
-        color1 <<= shift1;
-        color2 <<= shift0;
-        color3 <<= shift1;
-        diff += ratio_15;
-        *++d = color0;
-        *++d = color1;
-        *++d = color2;
-        *++d = color3;
+        diff += ratio_20;
+        d[0] = color0 << shift0;
+        d[2] = color2 << shift0;
+        d[1] = color1 << shift1;
+        d[3] = color3 << shift1;
+        d += 4;
       }
-    } while (++s < s_end);
+    }
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15(uint32_t* __restrict d, const uint32_t* s, const uint32_t* s_end, const uint32_t* p, bool odd, int ratio_10, int ratio_15)
+  void IRAM_ATTR blit_x10_x15(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_10, int ratio_15)
   {
-    --d;
-
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_15 + ratio_10) >> 1;
-    do
+
+    src_length = (src_length + 3) >> 2;
+    while (src_length--)
     {
-      uint32_t c = *s;
-      uint32_t color0 = p[c & 0xFF]; c >>= 8;
-      uint32_t color1 = p[c & 0xFF]; c >>= 8;
-      uint32_t color2 = p[c & 0xFF]; c >>= 8;
-      uint32_t color3 = p[c       ];
+      uint32_t color0 = s[0];
+      uint32_t color1 = s[1];
+      uint32_t color2 = s[2];
+      uint32_t color3 = s[3];
+      color0 = p[color0];
+      color1 = p[color1];
+      color2 = p[color2];
+      color3 = p[color3];
+      s += 4;
       if (diff < 0)
       {
+        diff += ratio_10;
         color0 &= 0xFFFF0000;
         color2 &= 0xFFFF0000;
         color1 &= 0xFFFF;
         color3 &= 0xFFFF;
         color0 = (color0 + color1) << shift0;
         color2 = (color2 + color3) << shift1;
-        diff += ratio_15;
-        *++d = color0;
-        *++d = color2;
+        d[0] = color0;
+        d[1] = color2;
+        d += 2;
       }
       else
       {
+        diff += ratio_15;
         color0 <<= shift0;
         color3 <<= shift0;
         color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF)) << shift1;
-        diff += ratio_10;
+        d[0] = color0;
+        d[1] = color1;
+        d[2] = color3;
         std::swap(shift0, shift1);
-        *++d = color0;
-        *++d = color1;
-        *++d = color3;
+        d += 3;
       }
-    } while (++s < s_end);
+    }
   }
 
   /// 引数のポインタアドレスがSRAMかどうか判定する  true=SRAM / false=not SRAM (e.g. PSRAM FlashROM) ;
@@ -763,14 +1016,19 @@ namespace lgfx
         {
           src = _scanline_cache.get(src);
         }
+        int pidx = 0;
+        if (internal.burst_shift & 1)
+        {
+          pidx = internal.pixel_per_bytes << 8;
+        }
 
-        internal.fp_blit( (      uint32_t*)(&buf[internal.leftside_index]),
-                          (const uint32_t*) src,
-                          (const uint32_t*)(&src[internal.panel_width]),
-                          &internal.palette[(1 & internal.burst_shift) << 8],
+        internal.fp_blit( (uint32_t*)(&buf[internal.leftside_index]),
+                          src,
+                          internal.panel_width,
+                          &internal.palette[pidx],
                           internal.burst_shift & 2,
-                          internal.blit_ratio_h,
-                          internal.blit_ratio_l );
+                          internal.blit_ratio_l,
+                          internal.blit_ratio_h );
       }
     }
     else
@@ -953,6 +1211,9 @@ namespace lgfx
     const signal_spec_info_t& spec_info = signal_spec_info_list[_config_detail.signal_type];
     _signal_spec_info = spec_info;
 
+    uint32_t pixelPerBytes = (getWriteDepth() & color_depth_t::bit_mask) >> 3;
+    internal.pixel_per_bytes = pixelPerBytes;
+
 // 幅方向の解像度に関する準備 ;
     {
       uint16_t output_width = std::min(_cfg.memory_width, spec_info.display_width);
@@ -965,7 +1226,7 @@ namespace lgfx
       scale_index = (scale_index < 2 ? 2 : scale_index > 10 ? 10 : scale_index) - 2;
 
       /// 表示倍率に応じて出力データ生成関数を変更する;
-      static constexpr void (*fp_tbl[])(uint32_t*, const uint32_t*, const uint32_t*, const uint32_t*, bool, int, int) =
+      static constexpr void (*fp_tbl_332[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int) =
       {
         blit_x10_x15,
         blit_x15_x20,
@@ -977,7 +1238,20 @@ namespace lgfx
         blit_x40_x50,
         blit_x50_x60
       };
-      internal.fp_blit = fp_tbl[scale_index];
+      static constexpr void (*fp_tbl_565[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int) =
+      {
+        blit_x10_x15,
+        blit_x15_x20,
+        blit_x20_x30_565,
+        blit_x20_x30_565,
+        blit_x30_x40_565,
+        blit_x30_x40_565,
+        blit_x40_x50_565,
+        blit_x40_x50_565,
+        blit_x50_x60_565
+      };
+
+      internal.fp_blit = (pixelPerBytes == 1 ? fp_tbl_332 : fp_tbl_565)[scale_index];
 
       /// 描画時の引き延ばし倍率テーブル (例:2=等倍  3=1.5倍  4=2倍)  上位4bitと下位4bitで２種類の倍率を指定する;
       /// この２種類の倍率をデータ生成時に切り替えて任意サイズの出力倍率を実現する;
@@ -995,7 +1269,7 @@ namespace lgfx
 
       internal.leftside_index = (spec_info.active_start + scale_offset) & ~3u;
 
-// printf("scale_l:%d scale_h:%d swl:%d swh:%d  ratio a:%d b:%d left:%d  \n", scale_l, scale_h, scale_width_l, scale_width_h, internal.blit_ratio_h, internal.blit_ratio_l, internal.leftside_index);
+// printf("scale_l:%d scale_h:%d swl:%d swh:%d  ratio:a:%d b:%d left:%d  \n", scale_l, scale_h, output_width * scale_l, output_width * scale_h, internal.blit_ratio_h, internal.blit_ratio_l, internal.leftside_index);
     }
 
     {
@@ -1010,18 +1284,18 @@ namespace lgfx
     setRotation(getRotation());
 
     const signal_setup_info_t& setup_info = signal_setup_info_list[_config_detail.signal_type];
-    internal.palette = (uint32_t*)heap_alloc(setup_info.palette_num_256 * 256 * sizeof(uint32_t));
+    internal.palette = (uint32_t*)heap_alloc(setup_info.palette_num_256 * pixelPerBytes * 256 * sizeof(uint32_t));
 // printf("internal.palette: %08x alloc\n", internal.palette);
     if (!internal.palette) { return false; }
 
     uint_fast8_t use_psram = _config_detail.use_psram;
-    if (!initFrameBuffer(internal.panel_width, internal.panel_height, use_psram)) { return false; }
+    if (!initFrameBuffer(internal.panel_width * pixelPerBytes, internal.panel_height, use_psram)) { return false; }
 
     use_psram = isSRAM(_lines_buffer[0]) ? 0 : use_psram;
     internal.use_psram = use_psram;
     if (use_psram)
     {
-      _scanline_cache.begin(( internal.panel_width + 4 ) & ~3);
+      _scanline_cache.begin(( internal.panel_width * pixelPerBytes + 4 ) & ~3);
     }
 
     size_t n = spec_info.scanline_width << 1;  // n=DMA 1回分のデータ量  最大値は4092;
@@ -1133,6 +1407,26 @@ namespace lgfx
     }
   }
 
+  color_depth_t Panel_CVBS::setColorDepth(color_depth_t depth)
+  {
+    depth = ((depth & color_depth_t::bit_mask) > 8) ? rgb565_2Byte : rgb332_1Byte;
+    if (depth != _write_depth)
+    {
+      bool flg_started = _started;
+      if (flg_started)
+      {
+        deinit();
+      }
+      _write_depth = depth;
+      _read_depth = depth;
+      if (flg_started)
+      {
+        init(false);
+      }
+    }
+    return depth;
+  }
+
   void Panel_CVBS::setResolution(uint16_t width, uint16_t height, config_detail_t::signal_type_t type, int output_width, int output_height, int offset_x, int offset_y)
   {
     bool flg_started = _started;
@@ -1203,7 +1497,11 @@ namespace lgfx
     if (internal.palette)
     {
       const signal_setup_info_t& setup_info_ = signal_setup_info_list[_config_detail.signal_type];
-      setup_info_.setup_palette(internal.palette, internal.WHITE_LEVEL, internal.BLACK_LEVEL, _config_detail.chroma_level);
+      if (internal.pixel_per_bytes == 1) {
+        setup_info_.setup_palette_332(internal.palette, internal.WHITE_LEVEL, internal.BLACK_LEVEL, _config_detail.chroma_level);
+      } else {
+        setup_info_.setup_palette_565(internal.palette, internal.WHITE_LEVEL, internal.BLACK_LEVEL, _config_detail.chroma_level);
+      }
     }
   }
 
diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
index 608e3d4b..3b6b4de6 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
@@ -59,7 +59,7 @@ namespace lgfx
       uint8_t use_psram = 0;
     };
 
-    color_depth_t setColorDepth(color_depth_t) override { return _write_depth; }
+    color_depth_t setColorDepth(color_depth_t) override;
     void setResolution(uint16_t width, uint16_t height, config_detail_t::signal_type_t type = config_detail_t::signal_type_max, int output_width = -1, int output_height = -1, int offset_x = -1, int offset_y = -1);
     void setOutputLevel(uint8_t output_level);
     void setChromaLevel(uint8_t chroma);

From 0e3e0ef373029cd4e6b4260af220db7584b7aece Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Sun, 1 Jan 2023 22:54:35 +0900
Subject: [PATCH 04/12] tweak RGB565 for Panel_CVBS

---
 src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp | 204 ++++++++++++++++-----
 1 file changed, 159 insertions(+), 45 deletions(-)

diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
index dc7bf164..24f3eb59 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
@@ -535,6 +535,7 @@ namespace lgfx
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_6 + ratio_5) >> 1;
+    auto pl = p + 256;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -543,15 +544,13 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l += 256;
-      p0l += 256;
-      p0h = p[p0h];
-      p0l = p[p0l];
-      p1h = p[p1h];
-      p1l = p[p1l];
+      p1l = pl[p1l];
+      p1h = p [p1h];
+      p0l = pl[p0l];
+      p0h = p [p0h];
       s += 4;
-      uint32_t color0 = p0h + p0l;
       uint32_t color1 = p1h + p1l;
+      uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
@@ -579,7 +578,7 @@ namespace lgfx
   }
 
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_5, int ratio_6)
+  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_5, int ratio_6)
   {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
@@ -624,6 +623,7 @@ namespace lgfx
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_5 + ratio_4) >> 1;
+    auto pl = p + 256;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -632,15 +632,13 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l += 256;
-      p0l += 256;
-      p0h = p[p0h];
-      p0l = p[p0l];
-      p1h = p[p1h];
-      p1l = p[p1l];
+      p1l = pl[p1l];
+      p1h = p [p1h];
+      p0l = pl[p0l];
+      p0h = p [p0h];
       s += 4;
-      uint32_t color0 = p0h + p0l;
       uint32_t color1 = p1h + p1l;
+      uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
@@ -668,7 +666,7 @@ namespace lgfx
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_4, int ratio_5)
+  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_4, int ratio_5)
   {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
@@ -713,6 +711,7 @@ namespace lgfx
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_4 + ratio_3) >> 1;
+    auto pl = p + 256;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -721,15 +720,13 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l += 256;
-      p0l += 256;
-      p0h = p[p0h];
-      p0l = p[p0l];
-      p1h = p[p1h];
-      p1l = p[p1l];
+      p1l = pl[p1l];
+      p1h = p [p1h];
+      p0l = pl[p0l];
+      p0h = p [p0h];
       s += 4;
-      uint32_t color0 = p0h + p0l;
       uint32_t color1 = p1h + p1l;
+      uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
@@ -754,7 +751,7 @@ namespace lgfx
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_3, int ratio_4)
+  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_3, int ratio_4)
   {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
@@ -796,6 +793,7 @@ namespace lgfx
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio_3 + ratio_2) >> 1;
+    auto pl = p + 256;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -804,15 +802,13 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l += 256;
-      p0l += 256;
-      p0h = p[p0h];
-      p0l = p[p0l];
-      p1h = p[p1h];
-      p1l = p[p1l];
+      p1l = pl[p1l];
+      p1h = p [p1h];
+      p0l = pl[p0l];
+      p0h = p [p0h];
       s += 4;
-      uint32_t color0 = p0h + p0l;
       uint32_t color1 = p1h + p1l;
+      uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
@@ -836,7 +832,7 @@ namespace lgfx
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_2, int ratio_3)
+  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_2, int ratio_3)
   {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
@@ -872,7 +868,64 @@ namespace lgfx
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_15, int ratio_20)
+  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_15, int ratio_20)
+  {
+    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift1 = shift0 ^ 8;
+    int diff = (ratio_20 + ratio_15) >> 1;
+    auto pl = p + 256;
+
+    src_length = (src_length + 3) >> 2;
+    while (src_length--)
+    {
+      uint32_t p3l = s[7];
+      uint32_t p3h = s[6];
+      uint32_t p2l = s[5];
+      uint32_t p2h = s[4];
+      p3l = pl[p3l];
+      p3h = p [p3h];
+      p2l = pl[p2l];
+      p2h = p [p2h];
+      uint32_t color3 = p3h + p3l;
+
+      uint32_t p1l = s[3];
+      uint32_t p1h = s[2];
+      uint32_t color2 = p2h + p2l;
+      uint32_t p0h = s[0];
+      uint32_t p0l = s[1];
+      p1l = pl[p1l];
+      p1h = p [p1h];
+      p0l = pl[p0l];
+      p0h = p [p0h];
+      s += 8;
+      uint32_t color1 = p1h + p1l;
+      uint32_t color0 = p0h + p0l;
+
+      if (diff < 0)
+      {
+        diff += ratio_15;
+        color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
+        color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
+        d[0] = color0 << shift0;
+        d[2] = color2 << shift0;
+        d[1] = color1 << shift1;
+        d += 3;
+        std::swap(shift0, shift1);
+      }
+      else
+      {
+        diff += ratio_20;
+        d[0] = color0 << shift0;
+        d[2] = color2 << shift0;
+        d[1] = color1 << shift1;
+        d[3] = color3 << shift1;
+        d += 4;
+      }
+    }
+  }
+
+  // x1.5~x2.0
+  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_15, int ratio_20)
   {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
@@ -914,7 +967,68 @@ namespace lgfx
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_10, int ratio_15)
+  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_10, int ratio_15)
+  {
+    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift1 = shift0 ^ 8;
+    int diff = (ratio_15 + ratio_10) >> 1;
+    auto pl = p + 256;
+
+    src_length = (src_length + 3) >> 2;
+    while (src_length--)
+    {
+      uint32_t p3l = s[7];
+      uint32_t p3h = s[6];
+      uint32_t p2l = s[5];
+      uint32_t p2h = s[4];
+      p3l = pl[p3l];
+      p3h = p [p3h];
+      p2l = pl[p2l];
+      p2h = p [p2h];
+      uint32_t color3 = p3h + p3l;
+
+      uint32_t p1l = s[3];
+      uint32_t p1h = s[2];
+      uint32_t color2 = p2h + p2l;
+      uint32_t p0h = s[0];
+      uint32_t p0l = s[1];
+      p1l = pl[p1l];
+      p1h = p [p1h];
+      p0l = pl[p0l];
+      p0h = p [p0h];
+      s += 8;
+      uint32_t color1 = p1h + p1l;
+      uint32_t color0 = p0h + p0l;
+
+      if (diff < 0)
+      {
+        diff += ratio_10;
+        color0 &= 0xFFFF0000;
+        color2 &= 0xFFFF0000;
+        color1 &= 0xFFFF;
+        color3 &= 0xFFFF;
+        color0 = (color0 + color1) << shift0;
+        color2 = (color2 + color3) << shift1;
+        d[0] = color0;
+        d[1] = color2;
+        d += 2;
+      }
+      else
+      {
+        diff += ratio_15;
+        color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
+        color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
+        d[0] = color0 << shift0;
+        d[2] = color2 << shift0;
+        d[1] = color1 << shift1;
+        d += 3;
+        std::swap(shift0, shift1);
+      }
+    }
+  }
+
+  // x1.0~x1.5
+  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_10, int ratio_15)
   {
     uint_fast8_t shift0 = odd << 3;
     uint_fast8_t shift1 = shift0 ^ 8;
@@ -1228,20 +1342,20 @@ namespace lgfx
       /// 表示倍率に応じて出力データ生成関数を変更する;
       static constexpr void (*fp_tbl_332[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int) =
       {
-        blit_x10_x15,
-        blit_x15_x20,
-        blit_x20_x30,
-        blit_x20_x30,
-        blit_x30_x40,
-        blit_x30_x40,
-        blit_x40_x50,
-        blit_x40_x50,
-        blit_x50_x60
+        blit_x10_x15_332,
+        blit_x15_x20_332,
+        blit_x20_x30_332,
+        blit_x20_x30_332,
+        blit_x30_x40_332,
+        blit_x30_x40_332,
+        blit_x40_x50_332,
+        blit_x40_x50_332,
+        blit_x50_x60_332
       };
       static constexpr void (*fp_tbl_565[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int) =
       {
-        blit_x10_x15,
-        blit_x15_x20,
+        blit_x10_x15_565,
+        blit_x15_x20_565,
         blit_x20_x30_565,
         blit_x20_x30_565,
         blit_x30_x40_565,

From 6c57cbe68e44ae4f802ad99384617d759714f4ea Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Mon, 2 Jan 2023 16:50:38 +0900
Subject: [PATCH 05/12] fix compiler warning.

---
 .../LGFX_AutoDetect_ESP32_all.hpp             | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/lgfx/v1_autodetect/LGFX_AutoDetect_ESP32_all.hpp b/src/lgfx/v1_autodetect/LGFX_AutoDetect_ESP32_all.hpp
index 6c546737..2d0a60be 100644
--- a/src/lgfx/v1_autodetect/LGFX_AutoDetect_ESP32_all.hpp
+++ b/src/lgfx/v1_autodetect/LGFX_AutoDetect_ESP32_all.hpp
@@ -909,8 +909,8 @@ namespace lgfx
 
         _pin_level(pin_cs, true);
 
-        Bus_Parallel16 bus;
-        auto cfg = bus.config();
+        Bus_Parallel16 bus_tmp;
+        auto cfg = bus_tmp.config();
         for (size_t i = 0; i < 16; ++i)
         {
           cfg.pin_data[i] = pin_data[i];
@@ -922,13 +922,13 @@ namespace lgfx
         // cfg.freq_read = 5000000;
         cfg.port   = port;
 
-        bus.config(cfg);
-        bus.init();
+        bus_tmp.config(cfg);
+        bus_tmp.init();
         _pin_reset(pin_rst, use_reset); // LCD RST
 
-        bool hit = judgement(&bus, pin_cs);
+        bool hit = judgement(&bus_tmp, pin_cs);
 
-        bus.release();
+        bus_tmp.release();
 
         if (hit)
         {
@@ -945,10 +945,10 @@ namespace lgfx
           auto p = result->panel;
           p->bus(bus);
           {
-            auto cfg = p->config();
-            if (pin_cs  >= 0) { cfg.pin_cs  = pin_cs;  }
-            if (pin_rst >= 0) { cfg.pin_rst = pin_rst; }
-            p->config(cfg);
+            auto cfg_panel = p->config();
+            if (pin_cs  >= 0) { cfg_panel.pin_cs  = pin_cs;  }
+            if (pin_rst >= 0) { cfg_panel.pin_rst = pin_rst; }
+            p->config(cfg_panel);
           }
           return true;
         }

From cefcfb0da9c8a297ba832a83077d16b5a34eed85 Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Mon, 2 Jan 2023 16:51:48 +0900
Subject: [PATCH 06/12] add support grayscale for Panel_CVBS

---
 doc/Panel_CVBS.md                          | 20 ++++++++
 src/lgfx/v1/LGFXBase.hpp                   | 14 ++++--
 src/lgfx/v1/misc/colortype.hpp             | 32 ++++++++-----
 src/lgfx/v1/misc/pixelcopy.cpp             |  5 +-
 src/lgfx/v1/misc/pixelcopy.hpp             |  3 ++
 src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp | 54 +++++++++++++++++++---
 6 files changed, 104 insertions(+), 24 deletions(-)

diff --git a/doc/Panel_CVBS.md b/doc/Panel_CVBS.md
index e1900a65..cba7bdd3 100644
--- a/doc/Panel_CVBS.md
+++ b/doc/Panel_CVBS.md
@@ -172,7 +172,27 @@ LGFX gfx;
 
 void setup(void)
 {
+// 色数の指定 (省略時は rgb332_1Byte)
+//gfx.setColorDepth( 8);        // RGB332 256色
+//gfx.setColorDepth(16);        // RGB565 65536色
+//gfx.setColorDepth(lgfx::color_depth_t::rgb332_1Byte);   // RGB332 256色
+//gfx.setColorDepth(lgfx::color_depth_t::rgb565_2Byte);   // RGB565 65536色
+//gfx.setColorDepth(lgfx::color_depth_t::grayscale_8bit); // モノクロ 256階調
+
+//※ 実行中に setColorDepth で色数を変更することも可能ですが、
+//   メモリの再割当を実行するため描画内容は無効になります。
+
   gfx.init();
+
+  for (int x = 0; x < gfx.width(); ++x)
+  {
+    int v = x * 256 / gfx.width();
+    gfx.fillRect(x, 0 * gfx.height() >> 3, 7, gfx.height() >> 3, gfx.color888(v, v, v));
+    gfx.fillRect(x, 1 * gfx.height() >> 3, 7, gfx.height() >> 3, gfx.color888(v, 0 ,0));
+    gfx.fillRect(x, 2 * gfx.height() >> 3, 7, gfx.height() >> 3, gfx.color888(0, v, 0));
+    gfx.fillRect(x, 3 * gfx.height() >> 3, 7, gfx.height() >> 3, gfx.color888(0, 0, v));
+  }
+  delay(1000);
 }
 
 void loop(void)
diff --git a/src/lgfx/v1/LGFXBase.hpp b/src/lgfx/v1/LGFXBase.hpp
index e5a81983..7471bdbb 100644
--- a/src/lgfx/v1/LGFXBase.hpp
+++ b/src/lgfx/v1/LGFXBase.hpp
@@ -859,8 +859,9 @@ namespace lgfx
       }
       else
       {
-        if (dst_depth == rgb565_2Byte) { pc.fp_copy = pixelcopy_t::copy_rgb_fast<swap565_t, T>; }
-        else                           { pc.fp_copy = pixelcopy_t::copy_rgb_fast<rgb332_t, T>; }
+        if (     dst_depth == rgb565_2Byte) { pc.fp_copy = pixelcopy_t::copy_rgb_fast<swap565_t, T>; }
+        else if (dst_depth == rgb332_1Byte) { pc.fp_copy = pixelcopy_t::copy_rgb_fast<rgb332_t, T>; }
+        else                                { pc.fp_copy = pixelcopy_t::copy_rgb_fast<grayscale_t, T>; }
       }
       return pc;
     }
@@ -919,8 +920,9 @@ namespace lgfx
         }
         else
         {
-          if (dst_depth == rgb565_2Byte) { pc.fp_copy = pixelcopy_t::copy_palette_fast<swap565_t, T>; }
-          else                           { pc.fp_copy = pixelcopy_t::copy_palette_fast<rgb332_t, T>; }
+          if (     dst_depth == rgb565_2Byte) { pc.fp_copy = pixelcopy_t::copy_palette_fast<swap565_t, T>; }
+          else if (dst_depth == rgb332_1Byte) { pc.fp_copy = pixelcopy_t::copy_palette_fast<rgb332_t, T>; }
+          else                                { pc.fp_copy = pixelcopy_t::copy_palette_fast<grayscale_t, T>; }
         }
       }
       return pc;
@@ -1060,8 +1062,10 @@ namespace lgfx
       {
         if (depth == rgb565_2Byte) {
           pc.fp_copy = pixelcopy_t::copy_rgb_antialias<swap565_t>;
-        } else {
+        } else if (depth == rgb332_1Byte) {
           pc.fp_copy = pixelcopy_t::copy_rgb_antialias<rgb332_t>;
+        } else {
+          pc.fp_copy = pixelcopy_t::copy_rgb_antialias<grayscale_t>;
         }
       }
       return pc;
diff --git a/src/lgfx/v1/misc/colortype.hpp b/src/lgfx/v1/misc/colortype.hpp
index 0e973cc7..a8bc27e6 100644
--- a/src/lgfx/v1/misc/colortype.hpp
+++ b/src/lgfx/v1/misc/colortype.hpp
@@ -586,13 +586,15 @@ namespace lgfx
   template<> LGFX_INLINE uint32_t color_convert<bgra8888_t , bgr666_t   >(uint32_t c) { c<<=2; return (c << 8) + ((c & 0xC0C0C0) << 2) + 0xFF; }
   template<> LGFX_INLINE uint32_t color_convert<bgra8888_t , bgr888_t   >(uint32_t c) { return (c << 8) + 0xFF; }
   template<> LGFX_INLINE uint32_t color_convert<bgra8888_t , grayscale_t>(uint32_t c) { return (((c << 8) + c) << 16) + (c << 8) + 0xFF; }
-  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, rgb332_t   >(uint32_t c) { uint_fast16_t t = ((c>>5)*0x49); t += ((c>>2)&7)*0x92; t += (c&3) * 0xAA; return t >> 3; }
-  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, rgb565_t   >(uint32_t c) { uint_fast16_t g = ( c & 0x07E0); g |= g >> 6; uint_fast16_t rb = ((c>>11)+(c&0x1F))*0x21; return (rb+g)>>4; }
-  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, rgb888_t   >(uint32_t c) { uint_fast16_t g = ( c >> 8) & 0xFF; g = (g << 1) + (g >> 7); return (g + ((c>>16)&0xFF)+(c&0xFF))>>2; }
-  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, argb8888_t >(uint32_t c) { uint_fast16_t g = ( c >> 8) & 0xFF; g = (g << 1) + (g >> 7); return (g + ((c>>16)&0xFF)+(c&0xFF))>>2; }
-  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, swap565_t  >(uint32_t c) { uint_fast16_t rb = ((((c>>3)&0x1F) + ((c>>8)&0x1F)) * 0x21) >> 2; uint_fast16_t g = (c & 7); g = (((g << 3) + (c >> 13)) << 3) + g; return (rb+g) >> 2; }
-  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, bgr666_t   >(uint32_t c) { uint_fast16_t g = ((c >> 8) & 0x3F)*0x82; uint_fast16_t rb = ((c>>16) + (c&0x3F))*0x41; return (rb+g)>>6; }
-  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, bgr888_t   >(uint32_t c) { uint_fast16_t g = ( c >> 8) & 0xFF; g = (g << 1) + (g >> 7); return (g + ((c>>16)&0xFF)+(c&0xFF))>>2; }
+
+// ITU-R BT.601 RGB to Y convert  R 0.299 + G 0.587 + B 0.114
+  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, rgb332_t   >(uint32_t c) { return (((c >>  5) & 0x07) *  43 + ((c >>  2) & 0x07) *  86 +  (c        & 0x03) *  39) >> 2; }
+  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, rgb565_t   >(uint32_t c) { return (((c >> 11) & 0x1F) *  79 + ((c >>  5) & 0x3F) *  76 +  (c        & 0x1F) *  30) >> 5; }
+  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, rgb888_t   >(uint32_t c) { return (((c >> 16) & 0xFF) *  77 + ((c >>  8) & 0xFF) * 151 +  (c        & 0xFF) *  29) >> 8; }
+  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, argb8888_t >(uint32_t c) { return (((c >> 24) & 0xFF) *  77 + ((c >> 16) & 0xFF) * 151 + ((c >>  8) & 0xFF) *  29) >> 8; }
+  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, swap565_t  >(uint32_t c) { return (((c >>  3) & 0x1F) *  79+(((c<<3)+(c>>13))&0x3F)*76 + ((c >>  8) & 0x1F) *  30) >> 5; }
+  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, bgr666_t   >(uint32_t c) { return (( c        & 0x3F) *  39 + ((c >>  8) & 0x3F) *  76 + ((c >> 16) & 0x3F) *  15) >> 5; }
+  template<> LGFX_INLINE uint32_t color_convert<grayscale_t, bgr888_t   >(uint32_t c) { return (( c        & 0xFF) *  77 + ((c >>  8) & 0xFF) * 151 + ((c >> 16) & 0xFF) *  29) >> 8; }
   template<> LGFX_INLINE uint32_t color_convert<grayscale_t, bgra8888_t >(uint32_t c) { return color_convert<grayscale_t, bgr888_t>(c>>8); }
 
   LGFX_INLINE rgb332_t&    rgb332_t   ::operator=(const rgb565_t&    c) { set(color_convert<rgb332_t   , rgb565_t   >(c.get())); return *this; }
@@ -708,6 +710,7 @@ namespace lgfx
       case rgb666_3Byte  : return color_convert<bgr666_t  , rgb332_t>;
       case rgb565_2Byte  : return color_convert<swap565_t , rgb332_t>;
       case rgb332_1Byte  : return no_convert;
+      case grayscale_8bit: return color_convert<grayscale_t, rgb332_t>;
       default: break;
       }
     } else if (std::is_same<TSrc, rgb888_t>::value || std::is_same<TSrc, uint32_t>::value) {
@@ -717,6 +720,7 @@ namespace lgfx
       case rgb666_3Byte  : return color_convert<bgr666_t  , rgb888_t>;
       case rgb565_2Byte  : return color_convert<swap565_t , rgb888_t>;
       case rgb332_1Byte  : return color_convert<rgb332_t  , rgb888_t>;
+      case grayscale_8bit: return color_convert<grayscale_t,rgb888_t>;
       default: break;
       }
     } else if (std::is_same<TSrc, argb8888_t>::value) {
@@ -726,6 +730,7 @@ namespace lgfx
       case rgb666_3Byte  : return color_convert<bgr666_t , rgb888_t>;
       case rgb565_2Byte  : return color_convert<swap565_t, rgb888_t>;
       case rgb332_1Byte  : return color_convert<rgb332_t , rgb888_t>;
+      case grayscale_8bit: return color_convert<grayscale_t,rgb888_t>;
       default: break;
       }
     } else if (std::is_same<TSrc, bgr888_t>::value) {
@@ -735,6 +740,7 @@ namespace lgfx
       case rgb666_3Byte  : return color_convert<bgr666_t  , bgr888_t>;
       case rgb565_2Byte  : return color_convert<swap565_t , bgr888_t>;
       case rgb332_1Byte  : return color_convert<rgb332_t  , bgr888_t>;
+      case grayscale_8bit: return color_convert<grayscale_t,bgr888_t>;
       default: break;
       }
     } else { // if (std::is_same<TSrc, rgb565_t>::value || std::is_same<TSrc, uint16_t>::value || std::is_same<TSrc, int>::value)
@@ -744,6 +750,7 @@ namespace lgfx
       case rgb666_3Byte  : return color_convert<bgr666_t  , rgb565_t>;
       case rgb565_2Byte  : return getSwap16;
       case rgb332_1Byte  : return color_convert<rgb332_t  , rgb565_t>;
+      case grayscale_8bit: return color_convert<grayscale_t,rgb565_t>;
       default: break;
       }
     }
@@ -822,11 +829,12 @@ namespace lgfx
       convert_bgr888   = get_fp_convert_src<bgr888_t  >(depth_);
 
       switch (depth_) {
-      case argb8888_4Byte: revert_rgb888 = color_convert<rgb888_t, bgra8888_t>; break;
-      case rgb888_3Byte:   revert_rgb888 = color_convert<rgb888_t, bgr888_t  >; break;
-      case rgb666_3Byte:   revert_rgb888 = color_convert<rgb888_t, bgr666_t  >; break;
-      case rgb565_2Byte:   revert_rgb888 = color_convert<rgb888_t, swap565_t >; break;
-      case rgb332_1Byte:   revert_rgb888 = color_convert<rgb888_t, rgb332_t  >; break;
+      case argb8888_4Byte: revert_rgb888 = color_convert<rgb888_t, bgra8888_t >; break;
+      case rgb888_3Byte:   revert_rgb888 = color_convert<rgb888_t, bgr888_t   >; break;
+      case rgb666_3Byte:   revert_rgb888 = color_convert<rgb888_t, bgr666_t   >; break;
+      case rgb565_2Byte:   revert_rgb888 = color_convert<rgb888_t, swap565_t  >; break;
+      case rgb332_1Byte:   revert_rgb888 = color_convert<rgb888_t, rgb332_t   >; break;
+      case grayscale_8bit: revert_rgb888 = color_convert<rgb888_t, grayscale_t>; break;
       default:             revert_rgb888 = no_convert;
       }
     }
diff --git a/src/lgfx/v1/misc/pixelcopy.cpp b/src/lgfx/v1/misc/pixelcopy.cpp
index 092fe460..c65fc5fb 100644
--- a/src/lgfx/v1/misc/pixelcopy.cpp
+++ b/src/lgfx/v1/misc/pixelcopy.cpp
@@ -64,9 +64,12 @@ namespace lgfx
           if (src_depth == rgb565_2Byte) {
             fp_copy = pixelcopy_t::get_fp_copy_rgb_affine<swap565_t>(dst_depth);
             fp_skip = pixelcopy_t::skip_rgb_affine<swap565_t>;
-          } else { // src_depth == rgb332_1Byte:
+          } else if (src_depth == rgb332_1Byte) {
             fp_copy = pixelcopy_t::get_fp_copy_rgb_affine<rgb332_t >(dst_depth);
             fp_skip = pixelcopy_t::skip_rgb_affine<rgb332_t>;
+          } else { // src_depth == grayscale_8bit:
+            fp_copy = pixelcopy_t::get_fp_copy_rgb_affine<grayscale_t >(dst_depth);
+            fp_skip = pixelcopy_t::skip_rgb_affine<grayscale_t>;
           }
         }
       }
diff --git a/src/lgfx/v1/misc/pixelcopy.hpp b/src/lgfx/v1/misc/pixelcopy.hpp
index 9a7d497d..a047fb2d 100644
--- a/src/lgfx/v1/misc/pixelcopy.hpp
+++ b/src/lgfx/v1/misc/pixelcopy.hpp
@@ -111,6 +111,7 @@ namespace lgfx
            : (dst_depth == rgb666_3Byte) ? (std::is_same<bgr666_t, TSrc>::value
                                            ? copy_rgb_affine<bgr888_t, bgr888_t>
                                            : copy_rgb_affine<bgr666_t, TSrc>)
+           : (dst_depth == grayscale_8bit) ? copy_rgb_affine<grayscale_t, TSrc>
            : nullptr;
     }
 
@@ -119,6 +120,7 @@ namespace lgfx
     {
       return (src_depth == rgb565_2Byte) ? copy_rgb_affine<TDst, swap565_t>
            : (src_depth == rgb332_1Byte) ? copy_rgb_affine<TDst, rgb332_t >
+           : (src_depth == grayscale_8bit) ? copy_rgb_affine<TDst, grayscale_t>
            : (src_depth == rgb888_3Byte) ? copy_rgb_affine<TDst, bgr888_t >
                                          : (std::is_same<bgr666_t, TDst>::value)
                                            ? copy_rgb_affine<bgr888_t, bgr888_t>
@@ -132,6 +134,7 @@ namespace lgfx
            : (dst_depth == rgb332_1Byte) ? copy_palette_affine<rgb332_t , TPalette>
            : (dst_depth == rgb888_3Byte) ? copy_palette_affine<bgr888_t , TPalette>
            : (dst_depth == rgb666_3Byte) ? copy_palette_affine<bgr666_t , TPalette>
+           : (dst_depth == grayscale_8bit) ? copy_palette_affine<grayscale_t, TPalette>
            : nullptr;
     }
 
diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
index 24f3eb59..a7c285bd 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
@@ -273,6 +273,18 @@ namespace lgfx
     }
   }
 
+  static void setup_palette_ntsc_gray(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
+  {
+    float chroma_scale = chroma_level / 7168.0f;
+    float satuation_base = black_level / 2;
+    uint32_t diff_level = white_level - black_level;
+
+    for (int idx = 0; idx < 256; ++idx)
+    {
+      palette[idx] = setup_palette_ntsc_inner(idx<<16|idx<<8|idx, diff_level, black_level, satuation_base, chroma_scale);
+    }
+  }
+
   static void setup_palette_pal_inner(uint8_t *result, uint32_t rgb, int diff_level, float base_level, float chroma_scale)
   {
     static constexpr const int8_t sin_tbl[5] = { 0, -1, 0, 1, 0 };
@@ -335,7 +347,6 @@ namespace lgfx
         o[idx + 256] = result_buf[1];
       }
     }
-
   }
 
   static void setup_palette_pal_332(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
@@ -362,6 +373,26 @@ namespace lgfx
     }
   }
 
+  static void setup_palette_pal_gray(uint32_t* palette, uint_fast16_t white_level, uint_fast16_t black_level, uint_fast8_t chroma_level)
+  {
+    auto e = palette;
+    auto o = &palette[256];
+
+    uint32_t result_buf[2];
+    float chroma_scale = black_level * chroma_level / 14336.0f;
+
+    int32_t diff_level = white_level - black_level;
+    float base_level = (float)black_level;
+
+    for (int idx = 0; idx < 256; ++idx)
+    {
+      setup_palette_pal_inner((uint8_t*)result_buf, idx<<16|idx<<8|idx, diff_level, base_level, chroma_scale);
+
+      e[idx] = result_buf[0];
+      o[idx] = result_buf[1];
+    }
+  }
+
   struct signal_spec_info_t
   {
     uint16_t total_scanlines;     // 走査線数(２フィールド、１フレーム);
@@ -474,6 +505,7 @@ namespace lgfx
   {
     void (*setup_palette_332)(uint32_t*, uint_fast16_t, uint_fast16_t, uint_fast8_t); // RGB332用パレット生成関数のポインタ;
     void (*setup_palette_565)(uint32_t*, uint_fast16_t, uint_fast16_t, uint_fast8_t); // RGB565用パレット生成関数のポインタ;
+    void (*setup_palette_gray)(uint32_t*, uint_fast16_t, uint_fast16_t, uint_fast8_t); // グレースケール用パレット生成関数のポインタ;
     uint32_t apll_sdm;            // apllのクロック設定;
     uint16_t blanking_mv;         // SYNCレベルとBLANKINGレベルの電圧差 mV
     uint16_t black_mv;            // SYNCレベルと黒レベルの電圧差 mV
@@ -485,6 +517,7 @@ namespace lgfx
   { // NTSC
     { setup_palette_ntsc_332
     , setup_palette_ntsc_565
+    , setup_palette_ntsc_gray
     , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 // 要求仕様に近い )
     , 286         // 286mV = 0IRE
     , 340         // 340mV = 7.5IRE  米国仕様では黒レベルは 7.5IRE
@@ -494,6 +527,7 @@ namespace lgfx
   , // NTSC_J
     { setup_palette_ntsc_332
     , setup_palette_ntsc_565
+    , setup_palette_ntsc_gray
     , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 // 要求仕様に近い )
     , 286         // 286mV = 0IRE
     , 286         // 286mV = 0IRE  日本仕様では黒レベルは 0IRE
@@ -503,6 +537,7 @@ namespace lgfx
   , // PAL
     { setup_palette_pal_332
     , setup_palette_pal_565
+    , setup_palette_pal_gray
     , 0x06A404    // 17.734476mhz ~4x
     , 300
     , 300
@@ -512,6 +547,7 @@ namespace lgfx
   , // PAL_M
     { setup_palette_pal_332
     , setup_palette_pal_565
+    , setup_palette_pal_gray
     , 0x0494DA
     , 300
     , 300
@@ -521,6 +557,7 @@ namespace lgfx
   , // PAL_N
     { setup_palette_pal_332
     , setup_palette_pal_565
+    , setup_palette_pal_gray
     , 0x498D1    // 17.734476mhz ~4x
     , 300
     , 300
@@ -1523,7 +1560,9 @@ namespace lgfx
 
   color_depth_t Panel_CVBS::setColorDepth(color_depth_t depth)
   {
-    depth = ((depth & color_depth_t::bit_mask) > 8) ? rgb565_2Byte : rgb332_1Byte;
+    if (depth != color_depth_t::grayscale_8bit) {
+      depth = ((depth & color_depth_t::bit_mask) > 8) ? rgb565_2Byte : rgb332_1Byte;
+    }
     if (depth != _write_depth)
     {
       bool flg_started = _started;
@@ -1611,11 +1650,14 @@ namespace lgfx
     if (internal.palette)
     {
       const signal_setup_info_t& setup_info_ = signal_setup_info_list[_config_detail.signal_type];
-      if (internal.pixel_per_bytes == 1) {
-        setup_info_.setup_palette_332(internal.palette, internal.WHITE_LEVEL, internal.BLACK_LEVEL, _config_detail.chroma_level);
-      } else {
-        setup_info_.setup_palette_565(internal.palette, internal.WHITE_LEVEL, internal.BLACK_LEVEL, _config_detail.chroma_level);
+      auto fp_setup_palette = internal.pixel_per_bytes == 1
+                            ? setup_info_.setup_palette_332
+                            : setup_info_.setup_palette_565
+                            ;
+      if (getWriteDepth() == grayscale_8bit) {
+        fp_setup_palette = setup_info_.setup_palette_gray;
       }
+      fp_setup_palette(internal.palette, internal.WHITE_LEVEL, internal.BLACK_LEVEL, _config_detail.chroma_level);
     }
   }
 

From 51a640a56a2692d045c2344d12448630517d90c9 Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Tue, 3 Jan 2023 12:53:20 +0900
Subject: [PATCH 07/12] improve speed for Panel_CVBS

---
 src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp | 805 ++++++++++++++++++---
 1 file changed, 688 insertions(+), 117 deletions(-)

diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
index a7c285bd..74dd4b9c 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
@@ -15,7 +15,7 @@ Original Source:
  [mongonta0716](https://github.com/mongonta0716)
  [tobozo](https://github.com/tobozo)
 
-Inherited Sources:
+Inspiration Sources:
  [Roger Cheng](https://github.com/Roger-random/ESP_8_BIT_composite)
  [rossum](https://github.com/rossumur/esp_8_bit)
 /----------------------------------------------------------------------------*/
@@ -58,7 +58,7 @@ namespace lgfx
 //----------------------------------------------------------------------------
 
   static constexpr const char *TAG = "Panel_CVBS";
-
+//debug
   #define ISR_BEGIN()
   #define ISR_END()
   #define MEMCPY_BEGIN()
@@ -171,12 +171,11 @@ namespace lgfx
     uint8_t** lines = nullptr;        // フレームバッファ配列ポインタ;
     uint16_t* allocated_list = nullptr;  // フレームバッファのalloc割当対象のインデクス番号(free時に使用);
     uint32_t* palette = nullptr;   // RGB332から波形に変換するためのテーブル;
-    void (*fp_blit)(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int);
+    void (*fp_blit)(uint32_t*, const uint8_t*, size_t, const uint32_t*, int, int);
     uint32_t burst_wave[2];       // カラーバースト信号の波形データ(EVENとODDで２通り)
     intr_handle_t isr_handle = nullptr;
     lldesc_t dma_desc[2];
-    int16_t blit_ratio_h = 0;
-    int16_t blit_ratio_l = 0;
+    int32_t mul_ratio = 0;
     int16_t offset_y;
     uint16_t memory_height;
     uint16_t panel_height;
@@ -245,14 +244,14 @@ namespace lgfx
         int g = (idx & 7) << 3;
         r = (r * 0x21) >> 2;
         g = (g * 0x41) >> 4;
-        palette[idx] = setup_palette_ntsc_inner(r<<16|g<<8, diff_level, base_level, satuation_base, chroma_scale);
+        palette[idx << 1] = setup_palette_ntsc_inner(r<<16|g<<8, diff_level, base_level, satuation_base, chroma_scale);
       }
       { // RGB565の下位1Byteに対するテーブル
         int g = idx >> 5;
         int b = idx & 0x1F;
         b = (b * 0x21) >> 2;
         g = (g * 0x41) >> 4;
-        palette[idx + 256] = setup_palette_ntsc_inner(g<<8|b, diff_level, base_level, satuation_base, chroma_scale);
+        palette[(idx << 1) + 1] = setup_palette_ntsc_inner(g<<8|b, diff_level, base_level, satuation_base, chroma_scale);
       }
     }
   }
@@ -333,8 +332,8 @@ namespace lgfx
         g = (g * 0x41) >> 4;
 
         setup_palette_pal_inner((uint8_t*)result_buf, r<<16|g<<8, diff_level, base_level, chroma_scale);
-        e[idx] = result_buf[0];
-        o[idx] = result_buf[1];
+        e[idx << 1] = result_buf[0];
+        o[idx << 1] = result_buf[1];
       }
       { // RGB565の下位1Byteに対するテーブル
         int g = idx >> 5;
@@ -343,8 +342,8 @@ namespace lgfx
         g = (g * 0x41) >> 4;
 
         setup_palette_pal_inner((uint8_t*)result_buf, g<<8|b, diff_level, base_level, chroma_scale);
-        e[idx + 256] = result_buf[0];
-        o[idx + 256] = result_buf[1];
+        e[(idx << 1) + 1] = result_buf[0];
+        o[(idx << 1) + 1] = result_buf[1];
       }
     }
   }
@@ -566,13 +565,581 @@ namespace lgfx
     }
   };
 
+#if 1
+
+// a4 = ループ回数
+// a6 = シフト量反転 SARレジスタと入替、シフト量を 8 or 0 で変化させる
+// a7 = ratio
+// a8 = -ratio - 32768
+// a9 = ratio diff
+// a15 = 8固定  odd xor用
+// 準備 x3~x4 , x5~x6
+#define ASM_INIT_BLIT_NEGATIVE \
+    "ssl        a6                      \n" \
+    "addi       a6, a6, 24              \n" \
+    "srai       a9, a7, 1               \n" \
+    "addmi      a9, a9, -16384          \n" \
+    "addmi      a4, a4, 1               \n" \
+    "srai       a4, a4, 1               \n" \
+    "neg        a8, a7                  \n" \
+    "addmi      a8, a8, -32768          \n"
+
+// 準備 x2~x3 , x4~x5
+#define ASM_INIT_BLIT_POSITIVE \
+    "ssl        a6                      \n" \
+    "addi       a6, a6, 24              \n" \
+    "srai       a9, a7, 1               \n" \
+    "addmi      a9, a9, -16384          \n" \
+    "addmi      a4, a4, 1               \n" \
+    "srai       a4, a4, 1               \n" \
+    "addmi      a7, a7, 16384           \n" \
+    "addmi      a7, a7, 16384           \n"
+
+#define ASM_READ_RGB332_2PIXEL \
+    "l8ui       a10,a3, 0               \n" \
+    "l8ui       a11,a3, 1               \n" \
+    "addi       a3, a3, 2               \n" \
+    "addx4      a10,a10,a5              \n" \
+    "l32i       a10,a10,0               \n" \
+    "addx4      a11,a11,a5              \n" \
+    "l32i       a11,a11,0               \n"
+
+#define ASM_READ_RGB565_2PIXEL \
+    "l8ui       a14,a3, 1               \n" \
+    "l8ui       a4, a3, 3               \n" \
+    "l8ui       a10,a3, 0               \n" \
+    "l8ui       a11,a3, 2               \n" \
+    "addx8      a14,a14,a5              \n" \
+    "addx8      a4, a4, a5              \n" \
+    "addx8      a10,a10,a5              \n" \
+    "addx8      a11,a11,a5              \n" \
+    "l32i       a14,a14,4               \n" \
+    "l32i       a10,a10,0               \n" \
+    "l32i       a4, a4, 4               \n" \
+    "l32i       a11,a11,0               \n" \
+    "add        a10,a10,a14             \n" \
+    "addi       a3, a3, 4               \n" \
+    "add        a11,a11,a4              \n"
+
+#define ASM_READ_RGB332_4PIXEL \
+    "l8ui       a10,a3, 0               \n" \
+    "l8ui       a11,a3, 1               \n" \
+    "l8ui       a12,a3, 2               \n" \
+    "l8ui       a13,a3, 3               \n" \
+    "addx4      a10,a10,a5              \n" \
+    "l32i       a10,a10,0               \n" \
+    "addx4      a11,a11,a5              \n" \
+    "l32i       a11,a11,0               \n" \
+    "addx4      a12,a12,a5              \n" \
+    "l32i       a12,a12,0               \n" \
+    "addx4      a13,a13,a5              \n" \
+    "l32i       a13,a13,0               \n" \
+    "addi       a3, a3, 4               \n"
+
+#define ASM_READ_RGB565_4PIXEL \
+    "l8ui       a14,a3, 1               \n" \
+    "l8ui       a4, a3, 3               \n" \
+    "l8ui       a10,a3, 0               \n" \
+    "l8ui       a11,a3, 2               \n" \
+    "addx8      a14,a14,a5              \n" \
+    "addx8      a4, a4, a5              \n" \
+    "addx8      a10,a10,a5              \n" \
+    "addx8      a11,a11,a5              \n" \
+    "l32i       a14,a14,4               \n" \
+    "l32i       a10,a10,0               \n" \
+    "l32i       a4, a4, 4               \n" \
+    "l32i       a11,a11,0               \n" \
+    "add        a10,a10,a14             \n" \
+    "l8ui       a14,a3, 5               \n" \
+    "add        a11,a11,a4              \n" \
+    "l8ui       a4, a3, 7               \n" \
+    "l8ui       a12,a3, 4               \n" \
+    "l8ui       a13,a3, 6               \n" \
+    "addx8      a14,a14,a5              \n" \
+    "addx8      a4, a4, a5              \n" \
+    "addx8      a12,a12,a5              \n" \
+    "addx8      a13,a13,a5              \n" \
+    "l32i       a14,a14,4               \n" \
+    "l32i       a12,a12,0               \n" \
+    "l32i       a4, a4, 4               \n" \
+    "l32i       a13,a13,0               \n" \
+    "add        a12,a12,a14             \n" \
+    "addi       a3, a3, 8               \n" \
+    "add        a13,a13,a4              \n"
+
+
+
+/* blit_関数が呼び出された直後のレジスタの値
+    a0 : リターンアドレス     (使用しない)
+    a1 : スタックポインタ     (変更不可)
+    a2 : uint32_t d           (ループ中で加算しながら利用する)
+    a3 : const uint8_t* s     (ループ中で加算しながら利用する)
+    a4 : size_t src_length    (ループ回数として設定後、別用途に利用)
+    a5 : const uint32_t* p    (変更せずそのまま利用する)
+    a6 : int32_t odd          (そのまま利用する)
+    a7 : int32_t ratio        (変更せずそのまま利用する)
+//
+    a8 : - ratio - 32768
+    a9 : diff                 比率判定用に利用
+*/
+
+  // x5 ~ x6
+  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_POSITIVE
+    "loop       a4, LOOP_x50_x60_565    \n" // ループ開始
+    ASM_READ_RGB565_2PIXEL
+
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "s32i       a4, a2, 20              \n" // 10,11 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 16              \n" // 8,9 保存
+    "bgez       a9, BGEZ_x50_x60_565    \n"
+
+// diffがマイナスの時の処理 x5.0
+    "s16i       a4, a2, 8               \n" //   5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=6されるので、トータルで +5になる)
+
+"BGEZ_x50_x60_565:                  \n"
+// diffがプラスの時の処理 x6.0
+    "addi       a2, a2, 6*4             \n" // a2 += 6 * sizeof(uint32_t)
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+"LOOP_x50_x60_565:                  \n"
+    );
+  }
+
+  // x5 ~ x6
+  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_POSITIVE
+    "loop       a4, LOOP_x50_x60_332    \n" // ループ開始
+    ASM_READ_RGB332_2PIXEL
+
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "s32i       a4, a2, 20              \n" // 10,11 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 16              \n" // 8,9 保存
+    "bgez       a9, BGEZ_x50_x60_332    \n"
+
+// diffがマイナスの時の処理 x5.0
+    "s16i       a4, a2, 8               \n" //   5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=6されるので、トータルで +5になる)
+
+"BGEZ_x50_x60_332:                  \n"
+// diffがプラスの時の処理 x6.0
+    "addi       a2, a2, 6*4             \n" // a2 += 6 * sizeof(uint32_t)
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+"LOOP_x50_x60_332:                  \n"
+    );
+  }
+
+  // x4 ~ x5
+  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_NEGATIVE
+    "loop       a4, LOOP_x40_x50_565    \n" // ループ開始
+    ASM_READ_RGB565_2PIXEL
+
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a10,a10                 \n"
+    "s32i       a10,a2, 0               \n" // 0,1 保存
+    "sll        a11,a11                 \n"
+    "s32i       a11,a2, 8               \n" // 4,5 保存
+    "bltz       a9, BLTZ_x40_x50_565    \n"
+
+// diffがプラスの時の処理 x5.0
+    "srli       a10,a10,16              \n"
+    "s16i       a10,a2, 10              \n" // 4   保存
+    "s32i       a11,a2, 16              \n" // 8,9 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+    "addi       a2, a2, 4               \n" // 出力先 += 1  (後の処理で+=4されるので、トータルで +5になる)
+
+"BLTZ_x40_x50_565:                  \n"
+// diffがマイナスの時の処理 x4.0
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
+"LOOP_x40_x50_565:                  \n"
+    );
+  }
+
+  // x4 ~ x5
+  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_NEGATIVE
+    "loop       a4, LOOP_x40_x50_332    \n" // ループ開始
+    ASM_READ_RGB332_2PIXEL
+
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a10,a10                 \n"
+    "s32i       a10,a2, 0               \n" // 0,1 保存
+    "sll        a11,a11                 \n"
+    "s32i       a11,a2, 8               \n" // 4,5 保存
+    "bltz       a9, BLTZ_x40_x50_332    \n"
+
+// diffがプラスの時の処理 x5.0
+    "srli       a10,a10,16              \n"
+    "s16i       a10,a2, 10              \n" // 4   保存
+    "s32i       a11,a2, 16              \n" // 8,9 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+    "addi       a2, a2, 4               \n" // 出力先 += 1  (後の処理で+=4されるので、トータルで +5になる)
+
+"BLTZ_x40_x50_332:                  \n"
+// diffがマイナスの時の処理 x4.0
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
+"LOOP_x40_x50_332:                  \n"
+    );
+  }
+
+  // x3 ~ x4
+  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_POSITIVE
+    "loop       a4, LOOP_x30_x40_565    \n"  // ループ開始
+    ASM_READ_RGB565_2PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "bgez       a9, BGEZ_x30_x40_565    \n"
+
+// diffがマイナスの時の処理 x3.0
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+
+"BGEZ_x30_x40_565:                  \n"
+// diffがプラスの時の処理 x4.0
+    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+"LOOP_x30_x40_565:                  \n"
+    );
+  }
+
+  // x3 ~ x4
+  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_POSITIVE
+
+    "loop       a4, LOOP_x30_x40_332    \n"  // ループ開始
+    ASM_READ_RGB332_2PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "bgez       a9, BGEZ_x30_x40_332    \n"
+
+// diffがマイナスの時の処理 x3.0
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+
+"BGEZ_x30_x40_332:                  \n"
+// diffがプラスの時の処理 x4.0
+    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+"LOOP_x30_x40_332:                  \n"
+    );
+  }
+
+  // x2 ~ x3
+  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_NEGATIVE
+    "loop       a4, LOOP_x20_x30_565    \n"  // ループ開始
+    ASM_READ_RGB565_2PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "bltz       a9, BLTZ_x20_x30_565    \n"
+
+// diffがプラスの時の処理 x3.0
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "srli       a4, a10,16              \n" // a4 = a10 >> 16
+    "sll        a4, a4                  \n" // a4 = !odd a10
+    "s16i       a4, a2, 6               \n" // 2   保存
+    "addi       a2, a2, 4               \n"
+    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+
+"BLTZ_x20_x30_565:                  \n"
+// diffがマイナスの時の処理 x2.0
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
+"LOOP_x20_x30_565:                  \n"
+    );
+  }
+
+  // x2 ~ x3
+  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_NEGATIVE
+    "loop       a4, LOOP_x20_x30_332    \n"  // ループ開始
+    ASM_READ_RGB332_2PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "bltz       a9, BLTZ_x20_x30_332    \n"
+
+// diffがプラスの時の処理 x3.0
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "srli       a4, a10,16              \n" // a4 = a10 >> 16
+    "sll        a4, a4                  \n" // a4 = !odd a10
+    "s16i       a4, a2, 6               \n" // 2   保存
+    "addi       a2, a2, 4               \n"
+    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+
+"BLTZ_x20_x30_332:                  \n"
+// diffがマイナスの時の処理 x2.0
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
+"LOOP_x20_x30_332:                  \n"
+    );
+  }
+
+  // x1.5~x2.0
+  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_POSITIVE
+    "addi       a4, a4, 1               \n"
+    "srli       a4, a4, 1               \n"
+    "loop       a4, LOOP_x15_x20_565    \n"  // ループ開始
+    ASM_READ_RGB565_4PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "sll        a4, a12                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a13                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
+    "bgez       a9, BGEZ_x15_x20_565    \n"
+// diffがマイナスの時の処理 x1.5
+    "sll        a4, a13                 \n"
+    "s16i       a4, a2, 8               \n" //   5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a12                 \n"
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+
+"BGEZ_x15_x20_565:                  \n"
+// diffがプラスの時の処理 x2.0
+    "addi       a2, a2, 4*4             \n" // a15 += 4 * sizeof(uint32_t)
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+"LOOP_x15_x20_565:                  \n"
+    );
+  }
+
+  // x1.5~x2.0
+  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_POSITIVE
+    "addi       a4, a4, 1               \n"
+    "srli       a4, a4, 1               \n"
+    "loop       a4, LOOP_x15_x20_332    \n"  // ループ開始
+    ASM_READ_RGB332_4PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "sll        a4, a12                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a4, a13                 \n"
+    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
+    "bgez       a9, BGEZ_x15_x20_332    \n"
+// diffがマイナスの時の処理 x1.5
+    "sll        a4, a13                 \n"
+    "s16i       a4, a2, 8               \n" //   5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a12                 \n"
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+
+"BGEZ_x15_x20_332:                  \n"
+// diffがプラスの時の処理 x2.0
+    "addi       a2, a2, 4*4             \n" // a15 += 4 * sizeof(uint32_t)
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+"LOOP_x15_x20_332:                  \n"
+    );
+  }
+
+  // x1.0~x1.5
+  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_NEGATIVE
+    "addi       a4, a4, 1               \n"
+    "srli       a4, a4, 1               \n"
+    "loop       a4, LOOP_x10_x15_565    \n"  // ループ開始
+    ASM_READ_RGB565_4PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "bltz       a9, BLTZ_x10_x15_565    \n"
+// diffがプラスの時の処理 x1.5
+
+    "sll        a4, a13                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 4               \n" // 2   保存
+    "sll        a4, a12                 \n"
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "addi       a2, a2, 3*4             \n"
+    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+    "j          ENDIF_x10_x15_565       \n"
+
+"BLTZ_x10_x15_565:                  \n"
+// diffがマイナスの時の処理 x1.0
+
+    "sll        a4, a11                 \n"
+    "s16i       a4, a2, 0               \n" //   1 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a12                 \n"
+    "s32i       a4, a2, 4               \n" // 2   保存
+    "sll        a4, a13                 \n"
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
+
+"ENDIF_x10_x15_565:                 \n"
+    "add        a9, a9, a7              \n" // diff += ratio
+"LOOP_x10_x15_565:                  \n"
+    );
+  }
+
+  // x1.0~x1.5
+  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  {
+    __asm__ (
+    ASM_INIT_BLIT_NEGATIVE
+    "addi       a4, a4, 1               \n"
+    "srli       a4, a4, 1               \n"
+    "loop       a4, LOOP_x10_x15_332    \n"  // ループ開始
+    ASM_READ_RGB332_4PIXEL
+
+    "sll        a4, a10                 \n"
+    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "bltz       a9, BLTZ_x10_x15_332    \n"
+// diffがプラスの時の処理 x1.5
+
+    "sll        a4, a13                 \n"
+    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a11                 \n"
+    "s32i       a4, a2, 4               \n" // 2   保存
+    "sll        a4, a12                 \n"
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "addi       a2, a2, 3*4             \n"
+    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+    "j          ENDIF_x10_x15_332       \n"
+
+"BLTZ_x10_x15_332:                  \n"
+// diffがマイナスの時の処理 x1.0
+
+    "sll        a4, a11                 \n"
+    "s16i       a4, a2, 0               \n" //   1 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a4, a12                 \n"
+    "s32i       a4, a2, 4               \n" // 2   保存
+    "sll        a4, a13                 \n"
+    "s16i       a4, a2, 4               \n" //   3 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
+
+"ENDIF_x10_x15_332:                 \n"
+    "add        a9, a9, a7              \n" // diff += ratio
+"LOOP_x10_x15_332:                  \n"
+    );
+  }
+
+#else
+
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_5, int ratio_6)
+  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_6 + ratio_5) >> 1;
-    auto pl = p + 256;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -581,17 +1148,17 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l = pl[p1l];
-      p1h = p [p1h];
-      p0l = pl[p0l];
-      p0h = p [p0h];
+      p1l = p[(p1l<<1)+1];
+      p1h = p[(p1h<<1)  ];
+      p0l = p[(p0l<<1)+1];
+      p0h = p[(p0h<<1)  ];
       s += 4;
       uint32_t color1 = p1h + p1l;
       uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
-        diff += ratio_5;
+        diff += ratio;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         d[0] = color0 <<= shift0;
@@ -602,7 +1169,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_6;
+        diff -= 32768;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         d[5] = color1 << shift1;
@@ -615,11 +1182,11 @@ namespace lgfx
   }
 
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_5, int ratio_6)
+  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_6 + ratio_5) >> 1;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -631,7 +1198,7 @@ namespace lgfx
       uint32_t color1 = p[p1];
       if (diff < 0)
       {
-        diff += ratio_5;
+        diff += ratio;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         d[0] = color0 <<= shift0;
@@ -642,7 +1209,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_6;
+        diff -= 32768;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         d[5] = color1 << shift1;
@@ -655,12 +1222,11 @@ namespace lgfx
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_4, int ratio_5)
+  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_5 + ratio_4) >> 1;
-    auto pl = p + 256;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -669,17 +1235,17 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l = pl[p1l];
-      p1h = p [p1h];
-      p0l = pl[p0l];
-      p0h = p [p0h];
+      p1l = p[(p1l<<1)+1];
+      p1h = p[(p1h<<1)  ];
+      p0l = p[(p0l<<1)+1];
+      p0h = p[(p0h<<1)  ];
       s += 4;
       uint32_t color1 = p1h + p1l;
       uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
-        diff += ratio_4;
+        diff += ratio;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         d[0] = color0 << shift0;
@@ -688,7 +1254,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_5;
+        diff -= 32768;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         color0 <<= shift0;
@@ -703,11 +1269,11 @@ namespace lgfx
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_4, int ratio_5)
+  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_5 + ratio_4) >> 1;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -719,7 +1285,7 @@ namespace lgfx
       uint32_t color1 = p[p1];
       if (diff < 0)
       {
-        diff += ratio_4;
+        diff += ratio;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         d[0] = color0 << shift0;
@@ -728,7 +1294,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_5;
+        diff -= 32768;
         d[1] = color0 << shift1;
         d[3] = color1 << shift1;
         color0 <<= shift0;
@@ -743,12 +1309,11 @@ namespace lgfx
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_3, int ratio_4)
+  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_4 + ratio_3) >> 1;
-    auto pl = p + 256;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -757,17 +1322,17 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l = pl[p1l];
-      p1h = p [p1h];
-      p0l = pl[p0l];
-      p0h = p [p0h];
+      p1l = p[(p1l<<1)+1];
+      p1h = p[(p1h<<1)  ];
+      p0l = p[(p0l<<1)+1];
+      p0h = p[(p0h<<1)  ];
       s += 4;
       uint32_t color1 = p1h + p1l;
       uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
-        diff += ratio_3;
+        diff += ratio;
         d[0] = color0 << shift0;
         d[2] = color1 << shift0;
         color0 = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF));
@@ -777,7 +1342,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_4;
+        diff -= 32768;
         d[0] = color0 << shift0;
         d[2] = color1 << shift0;
         d[1] = color0 << shift1;
@@ -788,11 +1353,11 @@ namespace lgfx
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_3, int ratio_4)
+  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_4 + ratio_3) >> 1;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -804,7 +1369,7 @@ namespace lgfx
       uint32_t color1 = p[p1];
       if (diff < 0)
       {
-        diff += ratio_3;
+        diff += ratio;
         d[0] = color0 << shift0;
         d[2] = color1 << shift0;
         color0 = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF));
@@ -814,7 +1379,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_4;
+        diff -= 32768;
         d[0] = color0 << shift0;
         d[2] = color1 << shift0;
         d[1] = color0 << shift1;
@@ -825,12 +1390,11 @@ namespace lgfx
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_2, int ratio_3)
+  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_3 + ratio_2) >> 1;
-    auto pl = p + 256;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -839,17 +1403,17 @@ namespace lgfx
       uint32_t p1h = s[2];
       uint32_t p0l = s[1];
       uint32_t p0h = s[0];
-      p1l = pl[p1l];
-      p1h = p [p1h];
-      p0l = pl[p0l];
-      p0h = p [p0h];
+      p1l = p[(p1l<<1)+1];
+      p1h = p[(p1h<<1)  ];
+      p0l = p[(p0l<<1)+1];
+      p0h = p[(p0h<<1)  ];
       s += 4;
       uint32_t color1 = p1h + p1l;
       uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
-        diff += ratio_2;
+        diff += ratio;
         color0 <<= shift0;
         color1 <<= shift1;
         d[0] = color0;
@@ -858,7 +1422,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_3;
+        diff -= 32768;
         d[0] = color0 << shift0;
         d[2] = color1 << shift0;
         d[1] = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
@@ -869,11 +1433,11 @@ namespace lgfx
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_2, int ratio_3)
+  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_3 + ratio_2) >> 1;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 1) >> 1;
     while (src_length--)
@@ -885,7 +1449,7 @@ namespace lgfx
       uint32_t color1 = p[p1];
       if (diff < 0)
       {
-        diff += ratio_2;
+        diff += ratio;
         color0 <<= shift0;
         color1 <<= shift1;
         d[0] = color0;
@@ -894,7 +1458,8 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_3;
+        // diff += ratio_3;
+        diff -= 32768;
         d[0] = color0 << shift0;
         d[2] = color1 << shift0;
         d[1] = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
@@ -905,12 +1470,11 @@ namespace lgfx
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_15, int ratio_20)
+  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_20 + ratio_15) >> 1;
-    auto pl = p + 256;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 3) >> 2;
     while (src_length--)
@@ -919,10 +1483,10 @@ namespace lgfx
       uint32_t p3h = s[6];
       uint32_t p2l = s[5];
       uint32_t p2h = s[4];
-      p3l = pl[p3l];
-      p3h = p [p3h];
-      p2l = pl[p2l];
-      p2h = p [p2h];
+      p3l = p[(p3l<<1)+1];
+      p3h = p[(p3h<<1)  ];
+      p2l = p[(p2l<<1)+1];
+      p2h = p[(p2h<<1)  ];
       uint32_t color3 = p3h + p3l;
 
       uint32_t p1l = s[3];
@@ -930,17 +1494,17 @@ namespace lgfx
       uint32_t color2 = p2h + p2l;
       uint32_t p0h = s[0];
       uint32_t p0l = s[1];
-      p1l = pl[p1l];
-      p1h = p [p1h];
-      p0l = pl[p0l];
-      p0h = p [p0h];
+      p1l = p[(p1l<<1)+1];
+      p1h = p[(p1h<<1)  ];
+      p0l = p[(p0l<<1)+1];
+      p0h = p[(p0h<<1)  ];
       s += 8;
       uint32_t color1 = p1h + p1l;
       uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
-        diff += ratio_15;
+        diff += ratio;
         color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
         color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
         d[0] = color0 << shift0;
@@ -951,7 +1515,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_20;
+        diff -= 32768;
         d[0] = color0 << shift0;
         d[2] = color2 << shift0;
         d[1] = color1 << shift1;
@@ -962,11 +1526,11 @@ namespace lgfx
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_15, int ratio_20)
+  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_20 + ratio_15) >> 1;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 3) >> 2;
     while (src_length--)
@@ -982,7 +1546,7 @@ namespace lgfx
       s += 4;
       if (diff < 0)
       {
-        diff += ratio_15;
+        diff += ratio;
         color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
         color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
         d[0] = color0 << shift0;
@@ -993,7 +1557,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_20;
+        diff -= 32768;
         d[0] = color0 << shift0;
         d[2] = color2 << shift0;
         d[1] = color1 << shift1;
@@ -1004,12 +1568,11 @@ namespace lgfx
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_10, int ratio_15)
+  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_15 + ratio_10) >> 1;
-    auto pl = p + 256;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 3) >> 2;
     while (src_length--)
@@ -1018,10 +1581,10 @@ namespace lgfx
       uint32_t p3h = s[6];
       uint32_t p2l = s[5];
       uint32_t p2h = s[4];
-      p3l = pl[p3l];
-      p3h = p [p3h];
-      p2l = pl[p2l];
-      p2h = p [p2h];
+      p3l = p[(p3l<<1)+1];
+      p3h = p[(p3h<<1)  ];
+      p2l = p[(p2l<<1)+1];
+      p2h = p[(p2h<<1)  ];
       uint32_t color3 = p3h + p3l;
 
       uint32_t p1l = s[3];
@@ -1029,17 +1592,17 @@ namespace lgfx
       uint32_t color2 = p2h + p2l;
       uint32_t p0h = s[0];
       uint32_t p0l = s[1];
-      p1l = pl[p1l];
-      p1h = p [p1h];
-      p0l = pl[p0l];
-      p0h = p [p0h];
+      p1l = p[(p1l<<1)+1];
+      p1h = p[(p1h<<1)  ];
+      p0l = p[(p0l<<1)+1];
+      p0h = p[(p0h<<1)  ];
       s += 8;
       uint32_t color1 = p1h + p1l;
       uint32_t color0 = p0h + p0l;
 
       if (diff < 0)
       {
-        diff += ratio_10;
+        diff += ratio;
         color0 &= 0xFFFF0000;
         color2 &= 0xFFFF0000;
         color1 &= 0xFFFF;
@@ -1052,7 +1615,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_15;
+        diff -= 32768;
         color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
         color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
         d[0] = color0 << shift0;
@@ -1065,11 +1628,11 @@ namespace lgfx
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, bool odd, int ratio_10, int ratio_15)
+  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
   {
-    uint_fast8_t shift0 = odd << 3;
+    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
     uint_fast8_t shift1 = shift0 ^ 8;
-    int diff = (ratio_15 + ratio_10) >> 1;
+    int diff = (ratio - 32768) >> 1;
 
     src_length = (src_length + 3) >> 2;
     while (src_length--)
@@ -1085,7 +1648,7 @@ namespace lgfx
       s += 4;
       if (diff < 0)
       {
-        diff += ratio_10;
+        diff += ratio;
         color0 &= 0xFFFF0000;
         color2 &= 0xFFFF0000;
         color1 &= 0xFFFF;
@@ -1098,7 +1661,7 @@ namespace lgfx
       }
       else
       {
-        diff += ratio_15;
+        diff -= 32768;
         color0 <<= shift0;
         color3 <<= shift0;
         color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF)) << shift1;
@@ -1111,6 +1674,8 @@ namespace lgfx
     }
   }
 
+#endif
+
   /// 引数のポインタアドレスがSRAMかどうか判定する  true=SRAM / false=not SRAM (e.g. PSRAM FlashROM) ;
   static inline bool IRAM_ATTR isSRAM(const void* ptr)
   {
@@ -1177,9 +1742,8 @@ namespace lgfx
                           src,
                           internal.panel_width,
                           &internal.palette[pidx],
-                          internal.burst_shift & 2,
-                          internal.blit_ratio_l,
-                          internal.blit_ratio_h );
+                          (internal.burst_shift & 2) << 2,  // burst_shift ? 8 : 0
+                          internal.mul_ratio );
       }
     }
     else
@@ -1377,7 +1941,7 @@ namespace lgfx
       scale_index = (scale_index < 2 ? 2 : scale_index > 10 ? 10 : scale_index) - 2;
 
       /// 表示倍率に応じて出力データ生成関数を変更する;
-      static constexpr void (*fp_tbl_332[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int) =
+      static constexpr void (*fp_tbl_332[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, int, int) =
       {
         blit_x10_x15_332,
         blit_x15_x20_332,
@@ -1389,7 +1953,7 @@ namespace lgfx
         blit_x40_x50_332,
         blit_x50_x60_332
       };
-      static constexpr void (*fp_tbl_565[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, bool, int, int) =
+      static constexpr void (*fp_tbl_565[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, int, int) =
       {
         blit_x10_x15_565,
         blit_x15_x20_565,
@@ -1412,15 +1976,22 @@ namespace lgfx
       scale_h &= 0x0F;
 
       /// 表示倍率の比率を求める;
-      internal.blit_ratio_h = spec_info.display_width - (output_width * scale_h / 2);
-      internal.blit_ratio_l = spec_info.display_width - (output_width * scale_l / 2);
+      int32_t mul_ratio_h = spec_info.display_width - (output_width * scale_h / 2);
+      int32_t mul_ratio_l = spec_info.display_width - (output_width * scale_l / 2);
+      int32_t mul_ratio = INT32_MAX;
+      if (mul_ratio_h < 0) {
+        mul_ratio_h = -mul_ratio_h;
+        mul_ratio = ((mul_ratio_l << 15) + (mul_ratio_h >> 1)) / mul_ratio_h;
+      }
+      internal.mul_ratio = mul_ratio;
+
 
       // Xオフセットに表示倍率を掛けたものを描画開始位置情報に加える
       int scale_offset = (offset_x * spec_info.display_width + output_width-1) / output_width;
 
       internal.leftside_index = (spec_info.active_start + scale_offset) & ~3u;
 
-// printf("scale_l:%d scale_h:%d swl:%d swh:%d  ratio:a:%d b:%d left:%d  \n", scale_l, scale_h, output_width * scale_l, output_width * scale_h, internal.blit_ratio_h, internal.blit_ratio_l, internal.leftside_index);
+// printf("scale_l:%d scale_h:%d swl:%d swh:%d  ratio:%d  a:%d b:%d left:%d  \n", scale_l, scale_h, output_width * scale_l, output_width * scale_h, mul_ratio, mul_ratio_h, mul_ratio_l, internal.leftside_index);
     }
 
     {

From 66098ca4acf77511bc2835613a18d3ffdc5a6424 Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Tue, 3 Jan 2023 21:45:10 +0900
Subject: [PATCH 08/12] improve speed for Panel_CVBS

---
 doc/Panel_CVBS.md                          |    8 +-
 src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp | 1312 ++++++++++----------
 2 files changed, 658 insertions(+), 662 deletions(-)

diff --git a/doc/Panel_CVBS.md b/doc/Panel_CVBS.md
index cba7bdd3..e5948846 100644
--- a/doc/Panel_CVBS.md
+++ b/doc/Panel_CVBS.md
@@ -23,8 +23,8 @@ NTSCを使用した際に黒が僅かに白浮きしていると感じる場合
 ----------------
 
  - 出力できる最大解像度は信号タイプによって差があります。
-   - 720 x 480  (NTSC,NTSC-J)
-   - 864 x 576  (PAL,PAL-M)
+   - 720 x 480  (PAL-M,NTSC,NTSC-J)
+   - 864 x 576  (PAL)
    - 720 x 576  (PAL-N)
  - 最大解像度以下であれば、任意の解像度を設定可能です。
  - 最大解像度を整数で約分した解像度の指定を推奨します。
@@ -34,9 +34,9 @@ NTSCを使用した際に黒が僅かに白浮きしていると感じる場合
 <TABLE>
  <TR>
   <TH></TH>
-  <TH> NTSC <BR> NTSC-J </TH>
+  <TH> PAL-M <BR> NTSC <BR> NTSC-J </TH>
   <TH> PAL-N </TH>
-  <TH> PAL <BR> PAL-M </TH>
+  <TH> PAL </TH>
  </TR>
  <TR align="center">
   <TH> max width </TH>
diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
index 74dd4b9c..96be886b 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
@@ -168,13 +168,14 @@ namespace lgfx
 
   struct internal_t
   {
+    static constexpr const uint8_t dma_desc_count = 2;
     uint8_t** lines = nullptr;        // フレームバッファ配列ポインタ;
     uint16_t* allocated_list = nullptr;  // フレームバッファのalloc割当対象のインデクス番号(free時に使用);
     uint32_t* palette = nullptr;   // RGB332から波形に変換するためのテーブル;
-    void (*fp_blit)(uint32_t*, const uint8_t*, size_t, const uint32_t*, int, int);
+    void (*fp_blit)(uint32_t*, const uint8_t*, const uint8_t*, const uint32_t*, int, int);
     uint32_t burst_wave[2];       // カラーバースト信号の波形データ(EVENとODDで２通り)
     intr_handle_t isr_handle = nullptr;
-    lldesc_t dma_desc[2];
+    lldesc_t dma_desc[dma_desc_count];
     int32_t mul_ratio = 0;
     int16_t offset_y;
     uint16_t memory_height;
@@ -394,6 +395,7 @@ namespace lgfx
 
   struct signal_spec_info_t
   {
+    static constexpr const size_t sync_proc_count = 12;
     uint16_t total_scanlines;     // 走査線数(２フィールド、１フレーム);
     uint16_t scanline_width;      // 走査線内のサンプル数 (カラークロック数 x4);
     uint8_t hsync_serration;      // 切り込みパルス幅;
@@ -405,7 +407,7 @@ namespace lgfx
     uint8_t burst_shift_mask;
     uint16_t display_width;       // X方向 表示可能ピクセル数;
     uint16_t display_height;      // Y方向 表示可能ピクセル数;
-    uint8_t sync_proc[2][12];     // 垂直同期期間の処理内容テーブル 偶数行・奇数行で2要素,各要素12ライン分;
+    uint8_t sync_proc[2][sync_proc_count];     // 垂直同期期間の処理内容テーブル 偶数行・奇数行で2要素,各要素12ライン分;
     uint8_t vsync_lines;          // 垂直同期期間(表示期間外)の走査線数(単フィールド分)
   };
 
@@ -443,6 +445,7 @@ namespace lgfx
     , 480         // height max 480
     , { { 0x55, 0x55, 0x00, 0x22, 0x22, 0x00, 0x55, 0x55, 0x00, 0xB0, 0xB0, 0x00 } // NTSC EVEN
       , { 0x05, 0x55, 0x50, 0x02, 0x22, 0x20, 0x05, 0x55, 0x50, 0x04, 0xB0, 0xB0 } // NTSC ODD
+
       }
     , 22
     }
@@ -454,7 +457,6 @@ namespace lgfx
     , 484         // hsync_long 484 sample
     , 98          // burst start = 98 sample (5.6us)
     , 10          // burst cycle = 10 cycle
-    // , 220         // active_start = 220 sample (12.0us)
     , 216         // active_start = 216 sample (12.0us)
     , 1           // burst_shift_mask パレットインデクス変更動作;
     , 864         // max width 864
@@ -488,10 +490,10 @@ namespace lgfx
     , 66
     , 380
     , 80
-    , 9           // burst cycle = 10 cycle
+    , 9           // burst cycle = 9 cycle
     , 156
     , 1           // burst_shift_mask パレットインデクス変更動作;
-    , 720         // max width 768
+    , 720         // max width 720
     , 576         // max height 576
     , { { 0x05, 0x55, 0x50, 0x22, 0x22, 0x05, 0x55, 0x50, 0x34, 0xB0, 0xB0, 0x00 } // PAL EVEN
       , { 0x00, 0x55, 0x55, 0x02, 0x22, 0x20, 0x55, 0x55, 0x04, 0xB0, 0xB0, 0x00 } // PAL ODD
@@ -565,35 +567,15 @@ namespace lgfx
     }
   };
 
-#if 1
+#if 1  // 1:asm / 0:cpp   switch
 
-// a4 = ループ回数
 // a6 = シフト量反転 SARレジスタと入替、シフト量を 8 or 0 で変化させる
-// a7 = ratio
-// a8 = -ratio - 32768
 // a9 = ratio diff
-// a15 = 8固定  odd xor用
-// 準備 x3~x4 , x5~x6
-#define ASM_INIT_BLIT_NEGATIVE \
-    "ssl        a6                      \n" \
-    "addi       a6, a6, 24              \n" \
-    "srai       a9, a7, 1               \n" \
-    "addmi      a9, a9, -16384          \n" \
-    "addmi      a4, a4, 1               \n" \
-    "srai       a4, a4, 1               \n" \
-    "neg        a8, a7                  \n" \
-    "addmi      a8, a8, -32768          \n"
-
-// 準備 x2~x3 , x4~x5
-#define ASM_INIT_BLIT_POSITIVE \
+#define ASM_INIT_BLIT \
     "ssl        a6                      \n" \
     "addi       a6, a6, 24              \n" \
     "srai       a9, a7, 1               \n" \
-    "addmi      a9, a9, -16384          \n" \
-    "addmi      a4, a4, 1               \n" \
-    "srai       a4, a4, 1               \n" \
-    "addmi      a7, a7, 16384           \n" \
-    "addmi      a7, a7, 16384           \n"
+    "addmi      a9, a9, -16384          \n"
 
 #define ASM_READ_RGB332_2PIXEL \
     "l8ui       a10,a3, 0               \n" \
@@ -605,21 +587,21 @@ namespace lgfx
     "l32i       a11,a11,0               \n"
 
 #define ASM_READ_RGB565_2PIXEL \
-    "l8ui       a14,a3, 1               \n" \
-    "l8ui       a4, a3, 3               \n" \
+    "l8ui       a12,a3, 1               \n" \
+    "l8ui       a13,a3, 3               \n" \
     "l8ui       a10,a3, 0               \n" \
     "l8ui       a11,a3, 2               \n" \
-    "addx8      a14,a14,a5              \n" \
-    "addx8      a4, a4, a5              \n" \
+    "addx8      a12,a12,a5              \n" \
+    "addx8      a13,a13,a5              \n" \
     "addx8      a10,a10,a5              \n" \
     "addx8      a11,a11,a5              \n" \
-    "l32i       a14,a14,4               \n" \
+    "l32i       a12,a12,4               \n" \
     "l32i       a10,a10,0               \n" \
-    "l32i       a4, a4, 4               \n" \
+    "l32i       a13,a13,4               \n" \
     "l32i       a11,a11,0               \n" \
-    "add        a10,a10,a14             \n" \
+    "add        a10,a10,a12             \n" \
     "addi       a3, a3, 4               \n" \
-    "add        a11,a11,a4              \n"
+    "add        a11,a11,a13             \n"
 
 #define ASM_READ_RGB332_4PIXEL \
     "l8ui       a10,a3, 0               \n" \
@@ -638,34 +620,34 @@ namespace lgfx
 
 #define ASM_READ_RGB565_4PIXEL \
     "l8ui       a14,a3, 1               \n" \
-    "l8ui       a4, a3, 3               \n" \
+    "l8ui       a15,a3, 3               \n" \
     "l8ui       a10,a3, 0               \n" \
     "l8ui       a11,a3, 2               \n" \
     "addx8      a14,a14,a5              \n" \
-    "addx8      a4, a4, a5              \n" \
+    "addx8      a15,a15,a5              \n" \
     "addx8      a10,a10,a5              \n" \
     "addx8      a11,a11,a5              \n" \
     "l32i       a14,a14,4               \n" \
     "l32i       a10,a10,0               \n" \
-    "l32i       a4, a4, 4               \n" \
+    "l32i       a15,a15,4               \n" \
     "l32i       a11,a11,0               \n" \
     "add        a10,a10,a14             \n" \
     "l8ui       a14,a3, 5               \n" \
-    "add        a11,a11,a4              \n" \
-    "l8ui       a4, a3, 7               \n" \
+    "add        a11,a11,a15             \n" \
+    "l8ui       a15,a3, 7               \n" \
     "l8ui       a12,a3, 4               \n" \
     "l8ui       a13,a3, 6               \n" \
     "addx8      a14,a14,a5              \n" \
-    "addx8      a4, a4, a5              \n" \
+    "addx8      a15,a15,a5              \n" \
     "addx8      a12,a12,a5              \n" \
     "addx8      a13,a13,a5              \n" \
     "l32i       a14,a14,4               \n" \
     "l32i       a12,a12,0               \n" \
-    "l32i       a4, a4, 4               \n" \
+    "l32i       a15,a15,4               \n" \
     "l32i       a13,a13,0               \n" \
     "add        a12,a12,a14             \n" \
     "addi       a3, a3, 8               \n" \
-    "add        a13,a13,a4              \n"
+    "add        a13,a13,a15             \n"
 
 
 
@@ -684,992 +666,1003 @@ namespace lgfx
 */
 
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_POSITIVE
-    "loop       a4, LOOP_x50_x60_565    \n" // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x50_x60_565:                  \n"
     ASM_READ_RGB565_2PIXEL
 
+    "sll        a12,a10                 \n"
+    "sll        a13,a11                 \n"
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
-    "s32i       a4, a2, 20              \n" // 10,11 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 16              \n" // 8,9 保存
+    "sll        a14,a10                 \n"
+    "sll        a15,a11                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "s32i       a12,a2, 8               \n" // 4,5 保存
+    "s32i       a15,a2, 12              \n" // 6,7 保存
+    "s32i       a13,a2, 16              \n" // 8,9 保存
     "bgez       a9, BGEZ_x50_x60_565    \n"
-
 // diffがマイナスの時の処理 x5.0
-    "s16i       a4, a2, 8               \n" //   5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "s16i       a13,a2, 8               \n" //   5 保存
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=6されるので、トータルで +5になる)
+    "addi       a2, a2, 5*4             \n" // 出力先 += 5 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x50_x60_565\n"
+    "retw                               \n"
 
 "BGEZ_x50_x60_565:                  \n"
 // diffがプラスの時の処理 x6.0
-    "addi       a2, a2, 6*4             \n" // a2 += 6 * sizeof(uint32_t)
+    "s32i       a15,a2, 20              \n" // 10,11 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
     "addmi      a9, a9, -32768          \n" // diff -= 32768
-"LOOP_x50_x60_565:                  \n"
+    "addi       a2, a2, 6*4             \n" // 出力先 += 6 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x50_x60_565\n"
     );
   }
 
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_POSITIVE
-    "loop       a4, LOOP_x50_x60_332    \n" // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x50_x60_332:                  \n"
     ASM_READ_RGB332_2PIXEL
 
+    "sll        a12,a10                 \n"
+    "sll        a13,a11                 \n"
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
-    "s32i       a4, a2, 20              \n" // 10,11 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 16              \n" // 8,9 保存
+    "sll        a14,a10                 \n"
+    "sll        a15,a11                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "s32i       a12,a2, 8               \n" // 4,5 保存
+    "s32i       a15,a2, 12              \n" // 6,7 保存
+    "s32i       a13,a2, 16              \n" // 8,9 保存
     "bgez       a9, BGEZ_x50_x60_332    \n"
-
 // diffがマイナスの時の処理 x5.0
-    "s16i       a4, a2, 8               \n" //   5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "s16i       a13,a2, 8               \n" //   5 保存
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=6されるので、トータルで +5になる)
+    "addi       a2, a2, 5*4             \n" // 出力先 += 5 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x50_x60_332\n"
+    "retw                               \n"
 
 "BGEZ_x50_x60_332:                  \n"
 // diffがプラスの時の処理 x6.0
-    "addi       a2, a2, 6*4             \n" // a2 += 6 * sizeof(uint32_t)
+    "s32i       a15,a2, 20              \n" // 10,11 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
     "addmi      a9, a9, -32768          \n" // diff -= 32768
-"LOOP_x50_x60_332:                  \n"
+    "addi       a2, a2, 6*4             \n" // 出力先 += 6 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x50_x60_332\n"
     );
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_NEGATIVE
-    "loop       a4, LOOP_x40_x50_565    \n" // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x40_x50_565:                  \n"
     ASM_READ_RGB565_2PIXEL
 
+    "sll        a12,a10                 \n"
+    "sll        a13,a11                 \n"
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a10,a10                 \n"
-    "s32i       a10,a2, 0               \n" // 0,1 保存
-    "sll        a11,a11                 \n"
-    "s32i       a11,a2, 8               \n" // 4,5 保存
-    "bltz       a9, BLTZ_x40_x50_565    \n"
-
-// diffがプラスの時の処理 x5.0
-    "srli       a10,a10,16              \n"
-    "s16i       a10,a2, 10              \n" // 4   保存
-    "s32i       a11,a2, 16              \n" // 8,9 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "add        a9, a9, a8              \n" // diff += - ratio - 32768
-    "addi       a2, a2, 4               \n" // 出力先 += 1  (後の処理で+=4されるので、トータルで +5になる)
+    "sll        a14,a10                 \n"
+    "sll        a15,a11                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "s32i       a15,a2, 12              \n" // 6,7 保存
 
-"BLTZ_x40_x50_565:                  \n"
+    "bgez       a9, BGEZ_x40_x50_565    \n"
 // diffがマイナスの時の処理 x4.0
+    "s32i       a13,a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
-"LOOP_x40_x50_565:                  \n"
+    "addi       a2, a2, 4*4             \n" // 出力先 += 4 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x40_x50_565\n"
+    "retw                               \n"
+
+"BGEZ_x40_x50_565:                  \n"
+// diffがプラスの時の処理 x5.0
+    "s32i       a12,a2, 8               \n" // 4,5 保存
+    "s32i       a13,a2, 16              \n" // 8,9 保存
+    "s16i       a13,a2, 8               \n" //   5 保存
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+    "addi       a2, a2, 5*4             \n" // 出力先 += 5 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x40_x50_565\n"
     );
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_NEGATIVE
-    "loop       a4, LOOP_x40_x50_332    \n" // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x40_x50_332:                  \n"
     ASM_READ_RGB332_2PIXEL
 
+    "sll        a12,a10                 \n"
+    "sll        a13,a11                 \n"
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a10,a10                 \n"
-    "s32i       a10,a2, 0               \n" // 0,1 保存
-    "sll        a11,a11                 \n"
-    "s32i       a11,a2, 8               \n" // 4,5 保存
-    "bltz       a9, BLTZ_x40_x50_332    \n"
+    "sll        a14,a10                 \n"
+    "sll        a15,a11                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "s32i       a15,a2, 12              \n" // 6,7 保存
 
-// diffがプラスの時の処理 x5.0
-    "srli       a10,a10,16              \n"
-    "s16i       a10,a2, 10              \n" // 4   保存
-    "s32i       a11,a2, 16              \n" // 8,9 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "add        a9, a9, a8              \n" // diff += - ratio - 32768
-    "addi       a2, a2, 4               \n" // 出力先 += 1  (後の処理で+=4されるので、トータルで +5になる)
-
-"BLTZ_x40_x50_332:                  \n"
+    "bgez       a9, BGEZ_x40_x50_332    \n"
 // diffがマイナスの時の処理 x4.0
+    "s32i       a13,a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
-"LOOP_x40_x50_332:                  \n"
+    "addi       a2, a2, 4*4             \n" // 出力先 += 4 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x40_x50_332\n"
+    "retw                               \n"
+
+"BGEZ_x40_x50_332:                  \n"
+// diffがプラスの時の処理 x5.0
+    "s32i       a12,a2, 8               \n" // 4,5 保存
+    "s32i       a13,a2, 16              \n" // 8,9 保存
+    "s16i       a13,a2, 8               \n" //   5 保存
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+    "addi       a2, a2, 5*4             \n" // 出力先 += 5 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x40_x50_332\n"
     );
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_POSITIVE
-    "loop       a4, LOOP_x30_x40_565    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x30_x40_565:                  \n"
     ASM_READ_RGB565_2PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a11                 \n"
     "bgez       a9, BGEZ_x30_x40_565    \n"
-
 // diffがマイナスの時の処理 x3.0
-    "s16i       a4, a2, 4               \n" //   3 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "s16i       a14,a2, 4               \n" //   3 保存
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x30_x40_565\n"
+    "retw                               \n"
 
 "BGEZ_x30_x40_565:                  \n"
 // diffがプラスの時の処理 x4.0
-    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
+    "s32i       a14,a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
     "addmi      a9, a9, -32768          \n" // diff -= 32768
-"LOOP_x30_x40_565:                  \n"
+    "addi       a2, a2, 4*4             \n" // 出力先 += 4 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x30_x40_565\n"
     );
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_POSITIVE
-
-    "loop       a4, LOOP_x30_x40_332    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x30_x40_332:                  \n"
     ASM_READ_RGB332_2PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a11                 \n"
     "bgez       a9, BGEZ_x30_x40_332    \n"
-
 // diffがマイナスの時の処理 x3.0
-    "s16i       a4, a2, 4               \n" //   3 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "s16i       a14,a2, 4               \n" //   3 保存
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x30_x40_332\n"
+    "retw                               \n"
 
 "BGEZ_x30_x40_332:                  \n"
 // diffがプラスの時の処理 x4.0
-    "addi       a2, a2, 4*4             \n" // a2 += 4 * sizeof(uint32_t)
+    "s32i       a14,a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
     "addmi      a9, a9, -32768          \n" // diff -= 32768
-"LOOP_x30_x40_332:                  \n"
+    "addi       a2, a2, 4*4             \n" // 出力先 += 4 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x30_x40_332\n"
     );
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_NEGATIVE
-    "loop       a4, LOOP_x20_x30_565    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x20_x30_565:                  \n"
     ASM_READ_RGB565_2PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "bgez       a9, BGEZ_x20_x30_565    \n"
+// diffがマイナスの時の処理 x2.0
+
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "bltz       a9, BLTZ_x20_x30_565    \n"
 
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, 2*4             \n" // 出力先 += 2 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x20_x30_565\n"
+    "retw                               \n"
+
+"BGEZ_x20_x30_565:                  \n"
 // diffがプラスの時の処理 x3.0
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "srli       a4, a10,16              \n" // a4 = a10 >> 16
-    "sll        a4, a4                  \n" // a4 = !odd a10
-    "s16i       a4, a2, 6               \n" // 2   保存
-    "addi       a2, a2, 4               \n"
-    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a11                 \n" // a14 = !odd a10
+    "s16i       a14,a2, 4               \n" //   3 保存
 
-"BLTZ_x20_x30_565:                  \n"
-// diffがマイナスの時の処理 x2.0
-    "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
-"LOOP_x20_x30_565:                  \n"
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x20_x30_565\n"
     );
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_NEGATIVE
-    "loop       a4, LOOP_x20_x30_332    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x20_x30_332:                  \n"
     ASM_READ_RGB332_2PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "bgez       a9, BGEZ_x20_x30_332    \n"
+// diffがマイナスの時の処理 x2.0
+
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "bltz       a9, BLTZ_x20_x30_332    \n"
 
+    "add        a9, a9, a7              \n" // diff += ratio
+    "addi       a2, a2, 2*4             \n" // 出力先 += 2 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x20_x30_332\n"
+    "retw                               \n"
+
+"BGEZ_x20_x30_332:                  \n"
 // diffがプラスの時の処理 x3.0
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "srli       a4, a10,16              \n" // a4 = a10 >> 16
-    "sll        a4, a4                  \n" // a4 = !odd a10
-    "s16i       a4, a2, 6               \n" // 2   保存
-    "addi       a2, a2, 4               \n"
-    "add        a9, a9, a8              \n" // diff += - ratio - 32768
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a11                 \n" // a14 = !odd a10
+    "s16i       a14,a2, 4               \n" //   3 保存
 
-"BLTZ_x20_x30_332:                  \n"
-// diffがマイナスの時の処理 x2.0
-    "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
-"LOOP_x20_x30_332:                  \n"
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x20_x30_332\n"
     );
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_POSITIVE
-    "addi       a4, a4, 1               \n"
-    "srli       a4, a4, 1               \n"
-    "loop       a4, LOOP_x15_x20_565    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x15_x20_565:                  \n"
     ASM_READ_RGB565_4PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "sll        a4, a12                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a13                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "sll        a14,a12                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
 
     "bgez       a9, BGEZ_x15_x20_565    \n"
 // diffがマイナスの時の処理 x1.5
-    "sll        a4, a13                 \n"
-    "s16i       a4, a2, 8               \n" //   5 保存
+    "sll        a14,a13                 \n"
+    "s16i       a14,a2, 8               \n" //   5 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a12                 \n"
-    "s16i       a4, a2, 4               \n" //   3 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a12                 \n"
+    "s16i       a14,a2, 4               \n" //   3 保存
+
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x15_x20_565\n"
+    "retw                               \n"
 
 "BGEZ_x15_x20_565:                  \n"
 // diffがプラスの時の処理 x2.0
-    "addi       a2, a2, 4*4             \n" // a15 += 4 * sizeof(uint32_t)
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a13                 \n"
+    "s32i       a14,a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
     "addmi      a9, a9, -32768          \n" // diff -= 32768
-"LOOP_x15_x20_565:                  \n"
+    "addi       a2, a2, 4*4             \n" // 出力先 += 4 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x15_x20_565\n"
     );
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_POSITIVE
-    "addi       a4, a4, 1               \n"
-    "srli       a4, a4, 1               \n"
-    "loop       a4, LOOP_x15_x20_332    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x15_x20_332:                  \n"
     ASM_READ_RGB332_4PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "sll        a4, a12                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 4               \n" // 2,3 保存
-    "sll        a4, a13                 \n"
-    "s32i       a4, a2, 12              \n" // 6,7 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "sll        a14,a12                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
 
     "bgez       a9, BGEZ_x15_x20_332    \n"
 // diffがマイナスの時の処理 x1.5
-    "sll        a4, a13                 \n"
-    "s16i       a4, a2, 8               \n" //   5 保存
+    "sll        a14,a13                 \n"
+    "s16i       a14,a2, 8               \n" //   5 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a12                 \n"
-    "s16i       a4, a2, 4               \n" //   3 保存
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a12                 \n"
+    "s16i       a14,a2, 4               \n" //   3 保存
+
     "add        a9, a9, a7              \n" // diff += ratio
-    "addi       a2, a2, -1*4            \n" // 出力先 -= 1  (後の処理で+=4されるので、トータルで +3になる)
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x15_x20_332\n"
+    "retw                               \n"
 
 "BGEZ_x15_x20_332:                  \n"
 // diffがプラスの時の処理 x2.0
-    "addi       a2, a2, 4*4             \n" // a15 += 4 * sizeof(uint32_t)
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a14,a13                 \n"
+    "s32i       a14,a2, 12              \n" // 6,7 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+
     "addmi      a9, a9, -32768          \n" // diff -= 32768
-"LOOP_x15_x20_332:                  \n"
+    "addi       a2, a2, 4*4             \n" // 出力先 += 4 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x15_x20_332\n"
     );
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_NEGATIVE
-    "addi       a4, a4, 1               \n"
-    "srli       a4, a4, 1               \n"
-    "loop       a4, LOOP_x10_x15_565    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x10_x15_565:                  \n"
     ASM_READ_RGB565_4PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "bltz       a9, BLTZ_x10_x15_565    \n"
-// diffがプラスの時の処理 x1.5
-
-    "sll        a4, a13                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 4               \n" // 2   保存
-    "sll        a4, a12                 \n"
-    "s16i       a4, a2, 4               \n" //   3 保存
-    "addi       a2, a2, 3*4             \n"
-    "add        a9, a9, a8              \n" // diff += - ratio - 32768
-    "j          ENDIF_x10_x15_565       \n"
-
-"BLTZ_x10_x15_565:                  \n"
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "bgez       a9, BGEZ_x10_x15_565    \n"
 // diffがマイナスの時の処理 x1.0
 
-    "sll        a4, a11                 \n"
-    "s16i       a4, a2, 0               \n" //   1 保存
+    "sll        a14,a11                 \n"
+    "s16i       a14,a2, 0               \n" //   1 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a12                 \n"
-    "s32i       a4, a2, 4               \n" // 2   保存
-    "sll        a4, a13                 \n"
-    "s16i       a4, a2, 4               \n" //   3 保存
+    "sll        a14,a12                 \n"
+    "s32i       a14,a2, 4               \n" // 2   保存
+    "sll        a14,a13                 \n"
+    "s16i       a14,a2, 4               \n" //   3 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
 
-"ENDIF_x10_x15_565:                 \n"
     "add        a9, a9, a7              \n" // diff += ratio
-"LOOP_x10_x15_565:                  \n"
+    "addi       a2, a2, 2*4             \n" // 出力先 += 2 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x10_x15_565\n"
+    "retw                               \n"
+
+"BGEZ_x10_x15_565:                  \n"
+// diffがプラスの時の処理 x1.5
+    "sll        a14,a13                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2   保存
+    "sll        a14,a12                 \n"
+    "s16i       a14,a2, 4               \n" //   3 保存
+
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x10_x15_565\n"
     );
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int odd, int ratio)
   {
     __asm__ (
-    ASM_INIT_BLIT_NEGATIVE
-    "addi       a4, a4, 1               \n"
-    "srli       a4, a4, 1               \n"
-    "loop       a4, LOOP_x10_x15_332    \n"  // ループ開始
+    ASM_INIT_BLIT
+"LOOP_x10_x15_332:                  \n"
     ASM_READ_RGB332_4PIXEL
 
-    "sll        a4, a10                 \n"
-    "s32i       a4, a2, 0               \n" // 0,1 保存
-    "bltz       a9, BLTZ_x10_x15_332    \n"
-// diffがプラスの時の処理 x1.5
-
-    "sll        a4, a13                 \n"
-    "s32i       a4, a2, 8               \n" // 4,5 保存
-    "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a11                 \n"
-    "s32i       a4, a2, 4               \n" // 2   保存
-    "sll        a4, a12                 \n"
-    "s16i       a4, a2, 4               \n" //   3 保存
-    "addi       a2, a2, 3*4             \n"
-    "add        a9, a9, a8              \n" // diff += - ratio - 32768
-    "j          ENDIF_x10_x15_332       \n"
-
-"BLTZ_x10_x15_332:                  \n"
+    "sll        a14,a10                 \n"
+    "s32i       a14,a2, 0               \n" // 0,1 保存
+    "bgez       a9, BGEZ_x10_x15_332    \n"
 // diffがマイナスの時の処理 x1.0
 
-    "sll        a4, a11                 \n"
-    "s16i       a4, a2, 0               \n" //   1 保存
+    "sll        a14,a11                 \n"
+    "s16i       a14,a2, 0               \n" //   1 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "sll        a4, a12                 \n"
-    "s32i       a4, a2, 4               \n" // 2   保存
-    "sll        a4, a13                 \n"
-    "s16i       a4, a2, 4               \n" //   3 保存
+    "sll        a14,a12                 \n"
+    "s32i       a14,a2, 4               \n" // 2   保存
+    "sll        a14,a13                 \n"
+    "s16i       a14,a2, 4               \n" //   3 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
-    "addi       a2, a2, 2*4             \n" // a15 += 2 * sizeof(uint32_t)
 
-"ENDIF_x10_x15_332:                 \n"
     "add        a9, a9, a7              \n" // diff += ratio
-"LOOP_x10_x15_332:                  \n"
+    "addi       a2, a2, 2*4             \n" // 出力先 += 2 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x10_x15_332\n"
+    "retw                               \n"
+
+"BGEZ_x10_x15_332:                  \n"
+// diffがプラスの時の処理 x1.5
+    "sll        a14,a13                 \n"
+    "s32i       a14,a2, 8               \n" // 4,5 保存
+    "xsr        a6, SAR                 \n" // シフト量スイッチ
+    "sll        a14,a11                 \n"
+    "s32i       a14,a2, 4               \n" // 2   保存
+    "sll        a14,a12                 \n"
+    "s16i       a14,a2, 4               \n" //   3 保存
+
+    "addmi      a9, a9, -32768          \n" // diff -= 32768
+    "addi       a2, a2, 3*4             \n" // 出力先 += 3 * sizeof(uint32_t)
+    "bltu       a3, a4, LOOP_x10_x15_332\n"
     );
   }
 
 #else
 
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x50_x60_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t p1l = s[3];
-      uint32_t p1h = s[2];
-      uint32_t p0l = s[1];
-      uint32_t p0h = s[0];
-      p1l = p[(p1l<<1)+1];
-      p1h = p[(p1h<<1)  ];
-      p0l = p[(p0l<<1)+1];
-      p0h = p[(p0h<<1)  ];
+      uint32_t s0h = s[0];
+      uint32_t s0l = s[1];
+      uint32_t s1h = s[2];
+      uint32_t s1l = s[3];
+      s0h = p[(s0h << 1)  ];
+      s0l = p[(s0l << 1)+1];
+      s1h = p[(s1h << 1)  ];
+      s1l = p[(s1l << 1)+1];
       s += 4;
-      uint32_t color1 = p1h + p1l;
-      uint32_t color0 = p0h + p0l;
-
+      uint32_t s0 = s0h + s0l;
+      uint32_t s1 = s1h + s1l;
+
+      uint32_t s0even = s0 << shift;
+      uint32_t s1even = s1 << shift;
+      shift ^= 8;
+      uint32_t s0odd = s0 << shift;
+      uint32_t s1odd = s1 << shift;
+      d[0] = s0even;
+      d[1] = s0odd;
+      d[2] = s0even;
+      d[3] = s1odd;
+      d[4] = s1even;
       if (diff < 0)
       {
         diff += ratio;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        d[0] = color0 <<= shift0;
-        d[4] = color1 <<= shift0;
-        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
+        *((uint16_t*)&d[2]) = s1even;
         d += 5;
-        std::swap(shift0, shift1);
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        d[5] = color1 << shift1;
-        d[4] = color1 << shift0;
-        d[0] = color0 << shift0;
-        d[2] = color0 << shift0;
+        d[5] = s1odd;
+        shift ^= 8;
         d += 6;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x5 ~ x6
-  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x50_x60_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      auto p0 = s[0];
-      auto p1 = s[1];
+      auto s0 = s[0];
+      auto s1 = s[1];
       s += 2;
-      uint32_t color0 = p[p0];
-      uint32_t color1 = p[p1];
+      s0 = p[s0];
+      s1 = p[s1];
+
+      uint32_t s0even = s0 << shift;
+      uint32_t s1even = s1 << shift;
+      shift ^= 8;
+      uint32_t s0odd = s0 << shift;
+      uint32_t s1odd = s1 << shift;
+      d[0] = s0even;
+      d[1] = s0odd;
+      d[2] = s0even;
+      d[3] = s1odd;
+      d[4] = s1even;
       if (diff < 0)
       {
         diff += ratio;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        d[0] = color0 <<= shift0;
-        d[4] = color1 <<= shift0;
-        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
+        *((uint16_t*)&d[2]) = s1even;
         d += 5;
-        std::swap(shift0, shift1);
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        d[5] = color1 << shift1;
-        d[4] = color1 << shift0;
-        d[0] = color0 << shift0;
-        d[2] = color0 << shift0;
+        d[5] = s1odd;
+        shift ^= 8;
         d += 6;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x40_x50_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t p1l = s[3];
-      uint32_t p1h = s[2];
-      uint32_t p0l = s[1];
-      uint32_t p0h = s[0];
-      p1l = p[(p1l<<1)+1];
-      p1h = p[(p1h<<1)  ];
-      p0l = p[(p0l<<1)+1];
-      p0h = p[(p0h<<1)  ];
+      uint32_t s0h = s[0];
+      uint32_t s0l = s[1];
+      uint32_t s1h = s[2];
+      uint32_t s1l = s[3];
+      s0h = p[(s0h << 1)  ];
+      s0l = p[(s0l << 1)+1];
+      s1h = p[(s1h << 1)  ];
+      s1l = p[(s1l << 1)+1];
       s += 4;
-      uint32_t color1 = p1h + p1l;
-      uint32_t color0 = p0h + p0l;
-
+      uint32_t s0 = s0h + s0l;
+      uint32_t s1 = s1h + s1l;
+
+      uint32_t s0even = s0 << shift;
+      uint32_t s1even = s1 << shift;
+      shift ^= 8;
+      uint32_t s0odd = s0 << shift;
+      uint32_t s1odd = s1 << shift;
+      d[0] = s0even;
+      d[1] = s0odd;
+      d[3] = s1odd;
       if (diff < 0)
       {
         diff += ratio;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
+        d[2] = s1even;
+        shift ^= 8;
         d += 4;
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        color0 <<= shift0;
-        color1 <<= shift0;
-        d[0] = color0;
-        d[4] = color1;
-        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
-        std::swap(shift0, shift1);
+        d[4] = s1even;
+        d[2] = s0even;
+        *((uint16_t*)&d[2]) = s1even;
         d += 5;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x4 ~ x5
-  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x40_x50_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      auto p0 = s[0];
-      auto p1 = s[1];
+      auto s0 = s[0];
+      auto s1 = s[1];
       s += 2;
-      uint32_t color0 = p[p0];
-      uint32_t color1 = p[p1];
+      s0 = p[s0];
+      s1 = p[s1];
+
+      uint32_t s0even = s0 << shift;
+      uint32_t s1even = s1 << shift;
+      shift ^= 8;
+      uint32_t s0odd = s0 << shift;
+      uint32_t s1odd = s1 << shift;
+      d[0] = s0even;
+      d[1] = s0odd;
+      d[3] = s1odd;
       if (diff < 0)
       {
         diff += ratio;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
+        d[2] = s1even;
+        shift ^= 8;
         d += 4;
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
-        color0 <<= shift0;
-        color1 <<= shift0;
-        d[0] = color0;
-        d[4] = color1;
-        d[2] = (color0 & 0xFFFF0000) + (color1 & 0xFFFF);
-        std::swap(shift0, shift1);
+        d[4] = s1even;
+        d[2] = s0even;
+        *((uint16_t*)&d[2]) = s1even;
         d += 5;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x30_x40_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t p1l = s[3];
-      uint32_t p1h = s[2];
-      uint32_t p0l = s[1];
-      uint32_t p0h = s[0];
-      p1l = p[(p1l<<1)+1];
-      p1h = p[(p1h<<1)  ];
-      p0l = p[(p0l<<1)+1];
-      p0h = p[(p0h<<1)  ];
+      uint32_t s0h = s[0];
+      uint32_t s0l = s[1];
+      uint32_t s1h = s[2];
+      uint32_t s1l = s[3];
+      s0h = p[(s0h << 1)  ];
+      s0l = p[(s0l << 1)+1];
+      s1h = p[(s1h << 1)  ];
+      s1l = p[(s1l << 1)+1];
       s += 4;
-      uint32_t color1 = p1h + p1l;
-      uint32_t color0 = p0h + p0l;
-
+      uint32_t s0 = s0h + s0l;
+      uint32_t s1 = s1h + s1l;
+
+      uint32_t s0even = s0 << shift;
+      uint32_t s1even = s1 << shift;
+      shift ^= 8;
+      uint32_t s0odd = s0 << shift;
+      uint32_t s1odd = s1 << shift;
+      d[0] = s0even;
+      d[1] = s0odd;
+      d[2] = s1even;
       if (diff < 0)
       {
         diff += ratio;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
-        color0 = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF));
-        d[1] = color0 << shift1;
-        std::swap(shift0, shift1);
+        *((uint16_t*)&d[1]) = s1odd;
         d += 3;
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
+        d[3] = s1odd;
+        shift ^= 8;
         d += 4;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x3 ~ x4
-  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x30_x40_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      auto p0 = s[0];
-      auto p1 = s[1];
+      auto s0 = s[0];
+      auto s1 = s[1];
       s += 2;
-      uint32_t color0 = p[p0];
-      uint32_t color1 = p[p1];
+      s0 = p[s0];
+      s1 = p[s1];
+
+      uint32_t s0even = s0 << shift;
+      uint32_t s1even = s1 << shift;
+      shift ^= 8;
+      uint32_t s0odd = s0 << shift;
+      uint32_t s1odd = s1 << shift;
+      d[0] = s0even;
+      d[1] = s0odd;
+      d[2] = s1even;
       if (diff < 0)
       {
         diff += ratio;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
-        color0 = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF));
-        d[1] = color0 << shift1;
-        std::swap(shift0, shift1);
+        *((uint16_t*)&d[1]) = s1odd;
         d += 3;
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
-        d[1] = color0 << shift1;
-        d[3] = color1 << shift1;
+        d[3] = s1odd;
+        shift ^= 8;
         d += 4;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x20_x30_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t p1l = s[3];
-      uint32_t p1h = s[2];
-      uint32_t p0l = s[1];
-      uint32_t p0h = s[0];
-      p1l = p[(p1l<<1)+1];
-      p1h = p[(p1h<<1)  ];
-      p0l = p[(p0l<<1)+1];
-      p0h = p[(p0h<<1)  ];
+      uint32_t s0h = s[0];
+      uint32_t s0l = s[1];
+      uint32_t s1h = s[2];
+      uint32_t s1l = s[3];
+      s0h = p[(s0h << 1)  ];
+      s0l = p[(s0l << 1)+1];
+      s1h = p[(s1h << 1)  ];
+      s1l = p[(s1l << 1)+1];
       s += 4;
-      uint32_t color1 = p1h + p1l;
-      uint32_t color0 = p0h + p0l;
+      uint32_t s0 = s0h + s0l;
+      uint32_t s1 = s1h + s1l;
 
+      uint32_t s0even = s0 << shift;
+      d[0] = s0even;
       if (diff < 0)
       {
         diff += ratio;
-        color0 <<= shift0;
-        color1 <<= shift1;
-        d[0] = color0;
-        d[1] = color1;
+        shift ^= 8;
+        uint32_t s1odd = s1 << shift;
+        d[1] = s1odd;
+        shift ^= 8;
         d += 2;
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
-        d[1] = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
+        uint32_t s1even = s1 << shift;
+        shift ^= 8;
+        uint32_t s0odd = s0 << shift;
+        uint32_t s1odd = s1 << shift;
+        d[1] = s0odd;
+        d[2] = s1even;
+        *((uint16_t*)&d[1]) = s1odd;
         d += 3;
-        std::swap(shift0, shift1);
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x2 ~ x3
-  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x20_x30_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 1) >> 1;
-    while (src_length--)
+    for (;;)
     {
-      auto p0 = s[0];
-      auto p1 = s[1];
+      auto s0 = s[0];
+      auto s1 = s[1];
       s += 2;
-      uint32_t color0 = p[p0];
-      uint32_t color1 = p[p1];
+      s0 = p[s0];
+      s1 = p[s1];
+
+      uint32_t s0even = s0 << shift;
+      d[0] = s0even;
       if (diff < 0)
       {
         diff += ratio;
-        color0 <<= shift0;
-        color1 <<= shift1;
-        d[0] = color0;
-        d[1] = color1;
+        shift ^= 8;
+        uint32_t s1odd = s1 << shift;
+        d[1] = s1odd;
+        shift ^= 8;
         d += 2;
+        if (s >= s_end) { return; }
       }
       else
       {
-        // diff += ratio_3;
         diff -= 32768;
-        d[0] = color0 << shift0;
-        d[2] = color1 << shift0;
-        d[1] = ((color0 & 0xFFFF0000) + (color1 & 0xFFFF)) << shift1;
+        uint32_t s1even = s1 << shift;
+        shift ^= 8;
+        uint32_t s0odd = s0 << shift;
+        uint32_t s1odd = s1 << shift;
+        d[1] = s0odd;
+        d[2] = s1even;
+        *((uint16_t*)&d[1]) = s1odd;
         d += 3;
-        std::swap(shift0, shift1);
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x15_x20_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 3) >> 2;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t p3l = s[7];
-      uint32_t p3h = s[6];
-      uint32_t p2l = s[5];
-      uint32_t p2h = s[4];
-      p3l = p[(p3l<<1)+1];
-      p3h = p[(p3h<<1)  ];
-      p2l = p[(p2l<<1)+1];
-      p2h = p[(p2h<<1)  ];
-      uint32_t color3 = p3h + p3l;
-
-      uint32_t p1l = s[3];
-      uint32_t p1h = s[2];
-      uint32_t color2 = p2h + p2l;
-      uint32_t p0h = s[0];
-      uint32_t p0l = s[1];
-      p1l = p[(p1l<<1)+1];
-      p1h = p[(p1h<<1)  ];
-      p0l = p[(p0l<<1)+1];
-      p0h = p[(p0h<<1)  ];
+      uint32_t s0h = s[0];
+      uint32_t s0l = s[1];
+      uint32_t s1h = s[2];
+      uint32_t s1l = s[3];
+      s0h = p[(s0h << 1)  ];
+      s0l = p[(s0l << 1)+1];
+      s1h = p[(s1h << 1)  ];
+      s1l = p[(s1l << 1)+1];
+      uint32_t s0 = s0h + s0l;
+      uint32_t s1 = s1h + s1l;
+
+      uint32_t s2h = s[4];
+      uint32_t s2l = s[5];
+      uint32_t s3h = s[6];
+      uint32_t s3l = s[7];
+      s2h = p[(s2h << 1)  ];
+      s2l = p[(s2l << 1)+1];
+      s3h = p[(s3h << 1)  ];
+      s3l = p[(s3l << 1)+1];
       s += 8;
-      uint32_t color1 = p1h + p1l;
-      uint32_t color0 = p0h + p0l;
+      uint32_t s2 = s2h + s2l;
+      uint32_t s3 = s3h + s3l;
+
+      d[0] = s0 << shift;
+      d[2] = s2 << shift;
 
       if (diff < 0)
       {
         diff += ratio;
-        color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
-        color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
-        d[0] = color0 << shift0;
-        d[2] = color2 << shift0;
-        d[1] = color1 << shift1;
+        *((uint16_t*)&d[2]) = s3 << shift;
+        shift ^= 8;
+        d[1] = s1 << shift;
+        *((uint16_t*)&d[1]) = s2 << shift;
         d += 3;
-        std::swap(shift0, shift1);
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[0] = color0 << shift0;
-        d[2] = color2 << shift0;
-        d[1] = color1 << shift1;
-        d[3] = color3 << shift1;
+        shift ^= 8;
+        d[1] = s1 << shift;
+        d[3] = s3 << shift;
+        shift ^= 8;
         d += 4;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x1.5~x2.0
-  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x15_x20_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 3) >> 2;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t color0 = s[0];
-      uint32_t color1 = s[1];
-      uint32_t color2 = s[2];
-      uint32_t color3 = s[3];
-      color0 = p[color0];
-      color1 = p[color1];
-      color2 = p[color2];
-      color3 = p[color3];
+      uint32_t s0 = s[0];
+      uint32_t s1 = s[1];
+      uint32_t s2 = s[2];
+      uint32_t s3 = s[3];
+      s0 = p[s0];
+      s1 = p[s1];
+      s2 = p[s2];
+      s3 = p[s3];
       s += 4;
+
+      d[0] = s0 << shift;
+      d[2] = s2 << shift;
+
       if (diff < 0)
       {
         diff += ratio;
-        color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
-        color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
-        d[0] = color0 << shift0;
-        d[2] = color2 << shift0;
-        d[1] = color1 << shift1;
+        *((uint16_t*)&d[2]) = s3 << shift;
+        shift ^= 8;
+        d[1] = s1 << shift;
+        *((uint16_t*)&d[1]) = s2 << shift;
         d += 3;
-        std::swap(shift0, shift1);
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        d[0] = color0 << shift0;
-        d[2] = color2 << shift0;
-        d[1] = color1 << shift1;
-        d[3] = color3 << shift1;
+        shift ^= 8;
+        d[1] = s1 << shift;
+        d[3] = s3 << shift;
+        shift ^= 8;
         d += 4;
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x10_x15_565(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 3) >> 2;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t p3l = s[7];
-      uint32_t p3h = s[6];
-      uint32_t p2l = s[5];
-      uint32_t p2h = s[4];
-      p3l = p[(p3l<<1)+1];
-      p3h = p[(p3h<<1)  ];
-      p2l = p[(p2l<<1)+1];
-      p2h = p[(p2h<<1)  ];
-      uint32_t color3 = p3h + p3l;
-
-      uint32_t p1l = s[3];
-      uint32_t p1h = s[2];
-      uint32_t color2 = p2h + p2l;
-      uint32_t p0h = s[0];
-      uint32_t p0l = s[1];
-      p1l = p[(p1l<<1)+1];
-      p1h = p[(p1h<<1)  ];
-      p0l = p[(p0l<<1)+1];
-      p0h = p[(p0h<<1)  ];
+      uint32_t s0h = s[0];
+      uint32_t s0l = s[1];
+      uint32_t s1h = s[2];
+      uint32_t s1l = s[3];
+      s0h = p[(s0h << 1)  ];
+      s0l = p[(s0l << 1)+1];
+      s1h = p[(s1h << 1)  ];
+      s1l = p[(s1l << 1)+1];
+      uint32_t s0 = s0h + s0l;
+      uint32_t s1 = s1h + s1l;
+
+      uint32_t s2h = s[4];
+      uint32_t s2l = s[5];
+      uint32_t s3h = s[6];
+      uint32_t s3l = s[7];
+      s2h = p[(s2h << 1)  ];
+      s2l = p[(s2l << 1)+1];
+      s3h = p[(s3h << 1)  ];
+      s3l = p[(s3l << 1)+1];
       s += 8;
-      uint32_t color1 = p1h + p1l;
-      uint32_t color0 = p0h + p0l;
+      uint32_t s2 = s2h + s2l;
+      uint32_t s3 = s3h + s3l;
 
+      d[0] = s0 << shift;
       if (diff < 0)
       {
         diff += ratio;
-        color0 &= 0xFFFF0000;
-        color2 &= 0xFFFF0000;
-        color1 &= 0xFFFF;
-        color3 &= 0xFFFF;
-        color0 = (color0 + color1) << shift0;
-        color2 = (color2 + color3) << shift1;
-        d[0] = color0;
-        d[1] = color2;
+        *((uint16_t*)&d[0]) = s1 << shift;
+        shift ^= 8;
+        d[1] = s2 << shift;
+        *((uint16_t*)&d[1]) = s3 << shift;
+        shift ^= 8;
         d += 2;
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF));
-        color2 = ((color2 & 0xFFFF0000) + (color3 & 0xFFFF));
-        d[0] = color0 << shift0;
-        d[2] = color2 << shift0;
-        d[1] = color1 << shift1;
+        d[2] = s3 << shift;
+        shift ^= 8;
+        d[1] = s1 << shift;
+        *((uint16_t*)&d[1]) = s2 << shift;
         d += 3;
-        std::swap(shift0, shift1);
+        if (s >= s_end) { return; }
       }
     }
   }
 
   // x1.0~x1.5
-  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, size_t src_length, const uint32_t* p, int odd, int ratio)
+  void IRAM_ATTR blit_x10_x15_332(uint32_t* __restrict d, const uint8_t* s, const uint8_t* s_end, const uint32_t* p, int shift, int ratio)
   {
-    uint_fast8_t shift0 = odd < 0 ? 0 : 8;
-    uint_fast8_t shift1 = shift0 ^ 8;
     int diff = (ratio - 32768) >> 1;
-
-    src_length = (src_length + 3) >> 2;
-    while (src_length--)
+    for (;;)
     {
-      uint32_t color0 = s[0];
-      uint32_t color1 = s[1];
-      uint32_t color2 = s[2];
-      uint32_t color3 = s[3];
-      color0 = p[color0];
-      color1 = p[color1];
-      color2 = p[color2];
-      color3 = p[color3];
+      uint32_t s0 = s[0];
+      uint32_t s1 = s[1];
+      uint32_t s2 = s[2];
+      uint32_t s3 = s[3];
+      s0 = p[s0];
+      s1 = p[s1];
+      s2 = p[s2];
+      s3 = p[s3];
       s += 4;
+
+      d[0] = s0 << shift;
       if (diff < 0)
       {
         diff += ratio;
-        color0 &= 0xFFFF0000;
-        color2 &= 0xFFFF0000;
-        color1 &= 0xFFFF;
-        color3 &= 0xFFFF;
-        color0 = (color0 + color1) << shift0;
-        color2 = (color2 + color3) << shift1;
-        d[0] = color0;
-        d[1] = color2;
+        *((uint16_t*)&d[0]) = s1 << shift;
+        shift ^= 8;
+        d[1] = s2 << shift;
+        *((uint16_t*)&d[1]) = s3 << shift;
+        shift ^= 8;
         d += 2;
+        if (s >= s_end) { return; }
       }
       else
       {
         diff -= 32768;
-        color0 <<= shift0;
-        color3 <<= shift0;
-        color1 = ((color1 & 0xFFFF0000) + (color2 & 0xFFFF)) << shift1;
-        d[0] = color0;
-        d[1] = color1;
-        d[2] = color3;
-        std::swap(shift0, shift1);
+        d[2] = s3 << shift;
+        shift ^= 8;
+        d[1] = s1 << shift;
+        *((uint16_t*)&d[1]) = s2 << shift;
         d += 3;
+        if (s >= s_end) { return; }
       }
     }
   }
@@ -1719,7 +1712,7 @@ namespace lgfx
       size_t idx = ScanLineToY(i, odd_field);
       if (idx >= internal.panel_height)
       {
-        if (idx - internal.panel_height < 4)
+        if (idx - internal.panel_height < (internal.dma_desc_count << 1))
         {
           memset(&buf[_signal_spec_info.active_start], internal.BLACK_LEVEL >> 8, (_signal_spec_info.scanline_width - 22 - _signal_spec_info.active_start) << 1);
           // memset(&buf[_signal_spec_info.scanline_width - 22], internal.BLANKING_LEVEL >> 8, 22 << 1);
@@ -1737,18 +1730,19 @@ namespace lgfx
         {
           pidx = internal.pixel_per_bytes << 8;
         }
-
-        internal.fp_blit( (uint32_t*)(&buf[internal.leftside_index]),
-                          src,
-                          internal.panel_width,
-                          &internal.palette[pidx],
-                          (internal.burst_shift & 2) << 2,  // burst_shift ? 8 : 0
-                          internal.mul_ratio );
+        if (src) {
+          internal.fp_blit( (uint32_t*)(&buf[internal.leftside_index]),
+                            src,
+                            &src[internal.panel_width * internal.pixel_per_bytes],
+                            &internal.palette[pidx],
+                            (internal.burst_shift & 2) << 2,  // burst_shift ? 8 : 0
+                            internal.mul_ratio );
+        }
       }
     }
     else
     {
-      if (i < 12)
+      if (i < _signal_spec_info.sync_proc_count)
       {
         auto sync_proc = _signal_spec_info.sync_proc[odd_field][i];
         size_t half_index = (_signal_spec_info.scanline_width >> 1);
@@ -1846,8 +1840,10 @@ namespace lgfx
         prevcurrent_scanline = tmp;
       }
       esp_intr_disable(internal.isr_handle);
-      internal.dma_desc[0].empty = 0;
-      internal.dma_desc[1].empty = 0;
+      for (int i = 0; i < internal.dma_desc_count; ++i)
+      {
+        internal.dma_desc[i].empty = 0;
+      }
       esp_intr_free(internal.isr_handle);
       internal.isr_handle = nullptr;
 
@@ -1887,7 +1883,7 @@ namespace lgfx
       _scanline_cache.end();
     }
 
-    for (int i = 0; i < 2; i++) {
+    for (int i = 0; i < internal.dma_desc_count; i++) {
       internal.dma_desc[i].buf = nullptr;
     }
     internal.palette = nullptr;
@@ -1941,7 +1937,7 @@ namespace lgfx
       scale_index = (scale_index < 2 ? 2 : scale_index > 10 ? 10 : scale_index) - 2;
 
       /// 表示倍率に応じて出力データ生成関数を変更する;
-      static constexpr void (*fp_tbl_332[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, int, int) =
+      static constexpr void (*fp_tbl_332[])(uint32_t*, const uint8_t*, const uint8_t*, const uint32_t*, int, int) =
       {
         blit_x10_x15_332,
         blit_x15_x20_332,
@@ -1953,7 +1949,7 @@ namespace lgfx
         blit_x40_x50_332,
         blit_x50_x60_332
       };
-      static constexpr void (*fp_tbl_565[])(uint32_t*, const uint8_t*, size_t, const uint32_t*, int, int) =
+      static constexpr void (*fp_tbl_565[])(uint32_t*, const uint8_t*, const uint8_t*, const uint32_t*, int, int) =
       {
         blit_x10_x15_565,
         blit_x15_x20_565,
@@ -1966,8 +1962,6 @@ namespace lgfx
         blit_x50_x60_565
       };
 
-      internal.fp_blit = (pixelPerBytes == 1 ? fp_tbl_332 : fp_tbl_565)[scale_index];
-
       /// 描画時の引き延ばし倍率テーブル (例:2=等倍  3=1.5倍  4=2倍)  上位4bitと下位4bitで２種類の倍率を指定する;
       /// この２種類の倍率をデータ生成時に切り替えて任意サイズの出力倍率を実現する;
       static constexpr const uint8_t scale_tbl[] = { 0x23, 0x34, 0x46, 0x46, 0x68, 0x68, 0x8A, 0x8A, 0xAC };
@@ -1975,6 +1969,8 @@ namespace lgfx
       uint8_t scale_l = scale_h >> 4;
       scale_h &= 0x0F;
 
+      internal.fp_blit = (pixelPerBytes == 1 ? fp_tbl_332 : fp_tbl_565)[scale_index];
+
       /// 表示倍率の比率を求める;
       int32_t mul_ratio_h = spec_info.display_width - (output_width * scale_h / 2);
       int32_t mul_ratio_l = spec_info.display_width - (output_width * scale_l / 2);
@@ -1985,7 +1981,6 @@ namespace lgfx
       }
       internal.mul_ratio = mul_ratio;
 
-
       // Xオフセットに表示倍率を掛けたものを描画開始位置情報に加える
       int scale_offset = (offset_x * spec_info.display_width + output_width-1) / output_width;
 
@@ -2023,20 +2018,21 @@ namespace lgfx
     size_t n = spec_info.scanline_width << 1;  // n=DMA 1回分のデータ量  最大値は4092;
     size_t len = (n + 3) & ~3u;
 
-    uint8_t* dmabuf = (uint8_t*)heap_alloc_dma(len * 2);    // 2ライン纏めて確保しておく;
+    uint8_t* dmabuf = (uint8_t*)heap_alloc_dma(len * internal.dma_desc_count);    // dma_descの個数分を纏めて確保しておく;
 // printf("dmabuf: %08x alloc\n", dmabuf);
     if (dmabuf == nullptr)
     {
       return false;
     }
-    memset(dmabuf, 0, len*2);
-    for (int i = 0; i < 2; i++) {
+    memset(dmabuf, 0, len * internal.dma_desc_count);
+    for (int i = 0; i < internal.dma_desc_count; ++i)
+    {
       internal.dma_desc[i].buf = &dmabuf[i * len];
       internal.dma_desc[i].owner = 1;
       internal.dma_desc[i].eof = 1;
       internal.dma_desc[i].length = len;
       internal.dma_desc[i].size = n;
-      internal.dma_desc[i].empty = (uint32_t)(&internal.dma_desc[1 - i]);
+      internal.dma_desc[i].empty = (uint32_t)(&internal.dma_desc[(i + 1) & (internal.dma_desc_count - 1)]);
     }
 
     internal.lines = _lines_buffer;

From 493441a681eef15d402865bdec14ac20421e1075 Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Sun, 8 Jan 2023 21:35:53 +0900
Subject: [PATCH 09/12] tweak pin setting for M5UnitLCD/M5UnitOLED with ATOMS3.

---
 src/lgfx_user/M5UnitLCD.hpp  | 4 ++++
 src/lgfx_user/M5UnitOLED.hpp | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/src/lgfx_user/M5UnitLCD.hpp b/src/lgfx_user/M5UnitLCD.hpp
index fee2a156..2e1dff28 100644
--- a/src/lgfx_user/M5UnitLCD.hpp
+++ b/src/lgfx_user/M5UnitLCD.hpp
@@ -13,6 +13,8 @@
 #ifndef M5UNITLCD_SDA
  #if defined ( ARDUINO )
   #define M5UNITLCD_SDA SDA
+ #elif defined (CONFIG_IDF_TARGET_ESP32S3)
+  #define M5UNITLCD_SDA 2
  #elif defined (CONFIG_IDF_TARGET_ESP32C3)
   #define M5UNITLCD_SDA 1
  #else
@@ -23,6 +25,8 @@
 #ifndef M5UNITLCD_SCL
  #if defined ( ARDUINO )
   #define M5UNITLCD_SCL SCL
+ #elif defined (CONFIG_IDF_TARGET_ESP32S3)
+  #define M5UNITLCD_SCL 1
  #elif defined (CONFIG_IDF_TARGET_ESP32C3)
   #define M5UNITLCD_SCL 0
  #else
diff --git a/src/lgfx_user/M5UnitOLED.hpp b/src/lgfx_user/M5UnitOLED.hpp
index d047b151..e2810c8d 100644
--- a/src/lgfx_user/M5UnitOLED.hpp
+++ b/src/lgfx_user/M5UnitOLED.hpp
@@ -13,6 +13,8 @@
 #ifndef M5UNITOLED_SDA
  #if defined ( ARDUINO )
   #define M5UNITOLED_SDA SDA
+ #elif defined (CONFIG_IDF_TARGET_ESP32S3)
+  #define M5UNITOLED_SDA 2
  #elif defined (CONFIG_IDF_TARGET_ESP32C3)
   #define M5UNITOLED_SDA 1
  #else
@@ -23,6 +25,8 @@
 #ifndef M5UNITOLED_SCL
  #if defined ( ARDUINO )
   #define M5UNITOLED_SCL SCL
+ #elif defined (CONFIG_IDF_TARGET_ESP32S3)
+  #define M5UNITOLED_SCL 1
  #elif defined (CONFIG_IDF_TARGET_ESP32C3)
   #define M5UNITOLED_SCL 0
  #else

From 25cc0b2867fbb100bea7d7d4eecea07f2392b15f Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Sun, 8 Jan 2023 22:07:22 +0900
Subject: [PATCH 10/12] =?UTF-8?q?Added=20PSRAM=20read=20task=20priority=20?=
 =?UTF-8?q?and=20Core=20settings=20to=20Panel=5FCVBS=E3=80=80(=20#295=20)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 doc/Panel_CVBS.md                          |   6 ++
 src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp | 103 +++++++++++----------
 src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp |   6 ++
 3 files changed, 68 insertions(+), 47 deletions(-)

diff --git a/doc/Panel_CVBS.md b/doc/Panel_CVBS.md
index e5948846..9b302011 100644
--- a/doc/Panel_CVBS.md
+++ b/doc/Panel_CVBS.md
@@ -160,6 +160,12 @@ public:
       cfg.chroma_level = 128; // 初期値128
       // 数値を下げると彩度が下がり、0で白黒になります。数値を上げると彩度が上がります。;
 
+      // バックグラウンドでPSRAMの読出しを行うタスクの優先度を設定;
+      // cfg.task_priority = 25;
+
+      // バックグラウンドでPSRAMの読出しを行うタスクを実行するCPUを選択 (APP_CPU_NUM or PRO_CPU_NUM);
+      // cfg.task_pinned_core = PRO_CPU_NUM;
+
       _panel_instance.config_detail(cfg);
     }
 
diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
index 96be886b..68186cf0 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
@@ -74,7 +74,7 @@ namespace lgfx
 
     typedef void(*tasktype)(void*);
 
-    bool begin(size_t line_width)
+    bool begin(size_t line_width, UBaseType_t task_priority, BaseType_t task_pinned_core)
     {
       _datasize = line_width;
       _buffer = (uint8_t*)heap_alloc_dma((line_width * cache_num + 3) & ~3u);
@@ -84,7 +84,14 @@ namespace lgfx
       _push_idx = 0;
       _using_idx = cache_num - 1;
       prev_index = 0;
-      xTaskCreatePinnedToCore(task_memcpy, "task_memcpy", 2048, this, 25, &_task_handle, PRO_CPU_NUM);
+      if ((uint32_t)task_pinned_core < portNUM_PROCESSORS)
+      {
+        xTaskCreatePinnedToCore(task_memcpy, "task_memcpy", 2048, this, task_priority, &_task_handle, task_pinned_core);
+      }
+      else
+      {
+        xTaskCreate(task_memcpy, "task_memcpy", 2048, this, task_priority, &_task_handle);
+      }
       return true;
     }
 
@@ -519,7 +526,7 @@ namespace lgfx
     { setup_palette_ntsc_332
     , setup_palette_ntsc_565
     , setup_palette_ntsc_gray
-    , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 // 要求仕様に近い )
+    , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 = 3.579545 x4 // 要求仕様に近い )
     , 286         // 286mV = 0IRE
     , 340         // 340mV = 7.5IRE  米国仕様では黒レベルは 7.5IRE
     , 960         // 960mV  黄色の振幅の最大値が100IRE付近になるよう、白レベルは100IREよりも低く調整しておく;
@@ -529,7 +536,7 @@ namespace lgfx
     { setup_palette_ntsc_332
     , setup_palette_ntsc_565
     , setup_palette_ntsc_gray
-    , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 // 要求仕様に近い )
+    , 0x049748    // 14.318237 // 映像に縞模様ノイズが出にくい;  ( 0x049746 = 14.318181 = 3.579545 x4  // 要求仕様に近い )
     , 286         // 286mV = 0IRE
     , 286         // 286mV = 0IRE  日本仕様では黒レベルは 0IRE
     , 960
@@ -539,7 +546,7 @@ namespace lgfx
     { setup_palette_pal_332
     , setup_palette_pal_565
     , setup_palette_pal_gray
-    , 0x06A404    // 17.734476mhz ~4x
+    , 0x06A404    // 17.734476mhz ~4x   4.43361875 x4
     , 300
     , 300
     , 960
@@ -559,7 +566,7 @@ namespace lgfx
     { setup_palette_pal_332
     , setup_palette_pal_565
     , setup_palette_pal_gray
-    , 0x498D1    // 17.734476mhz ~4x
+    , 0x0498D1    // 3.58205625 x4
     , 300
     , 300
     , 960
@@ -587,20 +594,20 @@ namespace lgfx
     "l32i       a11,a11,0               \n"
 
 #define ASM_READ_RGB565_2PIXEL \
-    "l8ui       a12,a3, 1               \n" \
-    "l8ui       a13,a3, 3               \n" \
     "l8ui       a10,a3, 0               \n" \
+    "l8ui       a12,a3, 1               \n" \
     "l8ui       a11,a3, 2               \n" \
-    "addx8      a12,a12,a5              \n" \
-    "addx8      a13,a13,a5              \n" \
+    "l8ui       a13,a3, 3               \n" \
     "addx8      a10,a10,a5              \n" \
-    "addx8      a11,a11,a5              \n" \
-    "l32i       a12,a12,4               \n" \
     "l32i       a10,a10,0               \n" \
-    "l32i       a13,a13,4               \n" \
+    "addx8      a12,a12,a5              \n" \
+    "l32i       a12,a12,4               \n" \
+    "addx8      a11,a11,a5              \n" \
     "l32i       a11,a11,0               \n" \
-    "add        a10,a10,a12             \n" \
+    "addx8      a13,a13,a5              \n" \
+    "l32i       a13,a13,4               \n" \
     "addi       a3, a3, 4               \n" \
+    "add        a10,a10,a12             \n" \
     "add        a11,a11,a13             \n"
 
 #define ASM_READ_RGB332_4PIXEL \
@@ -619,34 +626,34 @@ namespace lgfx
     "addi       a3, a3, 4               \n"
 
 #define ASM_READ_RGB565_4PIXEL \
-    "l8ui       a14,a3, 1               \n" \
-    "l8ui       a15,a3, 3               \n" \
+    "l8ui       a12,a3, 1               \n" \
     "l8ui       a10,a3, 0               \n" \
+    "l8ui       a13,a3, 3               \n" \
     "l8ui       a11,a3, 2               \n" \
-    "addx8      a14,a14,a5              \n" \
-    "addx8      a15,a15,a5              \n" \
+    "addx8      a12,a12,a5              \n" \
+    "l32i       a12,a12,4               \n" \
     "addx8      a10,a10,a5              \n" \
-    "addx8      a11,a11,a5              \n" \
-    "l32i       a14,a14,4               \n" \
     "l32i       a10,a10,0               \n" \
-    "l32i       a15,a15,4               \n" \
+    "addx8      a13,a13,a5              \n" \
+    "l32i       a13,a13,4               \n" \
+    "addx8      a11,a11,a5              \n" \
     "l32i       a11,a11,0               \n" \
-    "add        a10,a10,a14             \n" \
     "l8ui       a14,a3, 5               \n" \
-    "add        a11,a11,a15             \n" \
-    "l8ui       a15,a3, 7               \n" \
+    "add        a10,a10,a12             \n" \
     "l8ui       a12,a3, 4               \n" \
+    "add        a11,a11,a13             \n" \
+    "l8ui       a15,a3, 7               \n" \
     "l8ui       a13,a3, 6               \n" \
     "addx8      a14,a14,a5              \n" \
-    "addx8      a15,a15,a5              \n" \
-    "addx8      a12,a12,a5              \n" \
-    "addx8      a13,a13,a5              \n" \
     "l32i       a14,a14,4               \n" \
+    "addx8      a12,a12,a5              \n" \
     "l32i       a12,a12,0               \n" \
+    "addx8      a15,a15,a5              \n" \
     "l32i       a15,a15,4               \n" \
+    "addx8      a13,a13,a5              \n" \
     "l32i       a13,a13,0               \n" \
-    "add        a12,a12,a14             \n" \
     "addi       a3, a3, 8               \n" \
+    "add        a12,a12,a14             \n" \
     "add        a13,a13,a15             \n"
 
 
@@ -674,15 +681,15 @@ namespace lgfx
     ASM_READ_RGB565_2PIXEL
 
     "sll        a12,a10                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
+    "s32i       a12,a2, 8               \n" // 4,5 保存
     "sll        a13,a11                 \n"
+    "s32i       a13,a2, 16              \n" // 8,9 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
     "sll        a14,a10                 \n"
-    "sll        a15,a11                 \n"
-    "s32i       a12,a2, 0               \n" // 0,1 保存
     "s32i       a14,a2, 4               \n" // 2,3 保存
-    "s32i       a12,a2, 8               \n" // 4,5 保存
+    "sll        a15,a11                 \n"
     "s32i       a15,a2, 12              \n" // 6,7 保存
-    "s32i       a13,a2, 16              \n" // 8,9 保存
     "bgez       a9, BGEZ_x50_x60_565    \n"
 // diffがマイナスの時の処理 x5.0
     "s16i       a13,a2, 8               \n" //   5 保存
@@ -711,15 +718,15 @@ namespace lgfx
     ASM_READ_RGB332_2PIXEL
 
     "sll        a12,a10                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
+    "s32i       a12,a2, 8               \n" // 4,5 保存
     "sll        a13,a11                 \n"
+    "s32i       a13,a2, 16              \n" // 8,9 保存
     "xsr        a6, SAR                 \n" // シフト量スイッチ
     "sll        a14,a10                 \n"
-    "sll        a15,a11                 \n"
-    "s32i       a12,a2, 0               \n" // 0,1 保存
     "s32i       a14,a2, 4               \n" // 2,3 保存
-    "s32i       a12,a2, 8               \n" // 4,5 保存
+    "sll        a15,a11                 \n"
     "s32i       a15,a2, 12              \n" // 6,7 保存
-    "s32i       a13,a2, 16              \n" // 8,9 保存
     "bgez       a9, BGEZ_x50_x60_332    \n"
 // diffがマイナスの時の処理 x5.0
     "s16i       a13,a2, 8               \n" //   5 保存
@@ -748,12 +755,12 @@ namespace lgfx
     ASM_READ_RGB565_2PIXEL
 
     "sll        a12,a10                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
     "sll        a13,a11                 \n"
     "xsr        a6, SAR                 \n" // シフト量スイッチ
     "sll        a14,a10                 \n"
-    "sll        a15,a11                 \n"
-    "s32i       a12,a2, 0               \n" // 0,1 保存
     "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a15,a11                 \n"
     "s32i       a15,a2, 12              \n" // 6,7 保存
 
     "bgez       a9, BGEZ_x40_x50_565    \n"
@@ -785,12 +792,12 @@ namespace lgfx
     ASM_READ_RGB332_2PIXEL
 
     "sll        a12,a10                 \n"
+    "s32i       a12,a2, 0               \n" // 0,1 保存
     "sll        a13,a11                 \n"
     "xsr        a6, SAR                 \n" // シフト量スイッチ
     "sll        a14,a10                 \n"
-    "sll        a15,a11                 \n"
-    "s32i       a12,a2, 0               \n" // 0,1 保存
     "s32i       a14,a2, 4               \n" // 2,3 保存
+    "sll        a15,a11                 \n"
     "s32i       a15,a2, 12              \n" // 6,7 保存
 
     "bgez       a9, BGEZ_x40_x50_332    \n"
@@ -1797,15 +1804,17 @@ namespace lgfx
 
     if (internal.use_psram)
     {
-      i = (i == - 8) ? 0 : _scanline_cache.prev_index;
-      for (;; ++i)
+      int32_t j = (i == - _scanline_cache.cache_num) ? 0 : _scanline_cache.prev_index;
+      i += _scanline_cache.cache_num << 1;
+      for (;j < i; ++j)
       {
-        int idx = ScanLineToY(i, odd_field);
+        int idx = ScanLineToY(j, odd_field);
         if (idx >= internal.panel_height) { break; }
-        if (idx < 0 || isSRAM(internal.lines[idx])) { continue; }
-        if (!_scanline_cache.prepare(internal.lines[idx])) { break; }
+        auto ptr = internal.lines[idx];
+        if (idx < 0 || isSRAM(ptr)) { continue; }
+        if (!_scanline_cache.prepare(ptr)) { break; }
       }
-      _scanline_cache.prev_index = i;
+      _scanline_cache.prev_index = j;
     }
 
     ISR_END();
@@ -2012,7 +2021,7 @@ namespace lgfx
     internal.use_psram = use_psram;
     if (use_psram)
     {
-      _scanline_cache.begin(( internal.panel_width * pixelPerBytes + 4 ) & ~3);
+      _scanline_cache.begin(( internal.panel_width * pixelPerBytes + 4 ) & ~3, _config_detail.task_priority, _config_detail.task_pinned_core);
     }
 
     size_t n = spec_info.scanline_width << 1;  // n=DMA 1回分のデータ量  最大値は4092;
diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
index 3b6b4de6..d385e520 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
@@ -57,6 +57,12 @@ namespace lgfx
 
       // 0=SRAM only (no use PSRAM) / 1=both(half PSRAM and half SRAM) / 2=PSRAM only (no use SRAM)
       uint8_t use_psram = 0;
+
+      /// background PSRAM read task priority
+      UBaseType_t task_priority = 25;
+
+      /// background PSRAM read task pinned core. (APP_CPU_NUM or PRO_CPU_NUM)
+      BaseType_t task_pinned_core = PRO_CPU_NUM;
     };
 
     color_depth_t setColorDepth(color_depth_t) override;

From caac1e280eb9cc0fd92f9432f8d8f903b1cf78fb Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Mon, 9 Jan 2023 10:46:47 +0900
Subject: [PATCH 11/12] Adjusted the selection process of CPU cores to execute
 the PSRAM copy task in Panel_CVBS.

---
 src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp | 9 +++------
 src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
index 68186cf0..61d5f629 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.cpp
@@ -84,14 +84,11 @@ namespace lgfx
       _push_idx = 0;
       _using_idx = cache_num - 1;
       prev_index = 0;
-      if ((uint32_t)task_pinned_core < portNUM_PROCESSORS)
+      if ((uint32_t)task_pinned_core >= portNUM_PROCESSORS)
       {
-        xTaskCreatePinnedToCore(task_memcpy, "task_memcpy", 2048, this, task_priority, &_task_handle, task_pinned_core);
-      }
-      else
-      {
-        xTaskCreate(task_memcpy, "task_memcpy", 2048, this, task_priority, &_task_handle);
+        task_pinned_core = (xPortGetCoreID() + 1) % portNUM_PROCESSORS;
       }
+      xTaskCreatePinnedToCore(task_memcpy, "task_memcpy", 2048, this, task_priority, &_task_handle, task_pinned_core);
       return true;
     }
 
diff --git a/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp b/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
index d385e520..a5a35ef4 100644
--- a/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
+++ b/src/lgfx/v1/platforms/esp32/Panel_CVBS.hpp
@@ -62,7 +62,7 @@ namespace lgfx
       UBaseType_t task_priority = 25;
 
       /// background PSRAM read task pinned core. (APP_CPU_NUM or PRO_CPU_NUM)
-      BaseType_t task_pinned_core = PRO_CPU_NUM;
+      BaseType_t task_pinned_core = -1;
     };
 
     color_depth_t setColorDepth(color_depth_t) override;

From 80eb0e84683163f280a0ddc1d8fde07ca35f17c8 Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Mon, 9 Jan 2023 13:41:35 +0900
Subject: [PATCH 12/12] raising version 1.1.2

---
 README.md                   | 25 ++++++++++---------
 library.json                |  2 +-
 library.properties          |  2 +-
 src/LovyanGFX.hpp           | 50 ++++++++++++++++---------------------
 src/lgfx/v1/gitTagVersion.h |  2 +-
 5 files changed, 38 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index aa489559..693026c3 100644
--- a/README.md
+++ b/README.md
@@ -44,16 +44,16 @@ This library has the following advantages.
   - Composite video signal (NTSC, PAL) output (only ESP32)
 
 
-|        | SPI | I2C | 8bit Para |16bit Para | CVBS  |
-|:------:|:---:|:---:|:---------:|:---------:|:-----:|
-|ESP32   | HW  | HW  | HW (I2S)  | ---       |HW(I2S)|
-|ESP32-S2| HW  | HW  | HW (I2S)  | HW (I2S)  | ---   |
-|ESP32-S3| HW  | HW  |HW(LCD/CAM)|HW(LCD/CAM)| ---   |
-|ESP32-C3| HW  | HW  | SW        | ---       | ---   |
-|ESP8266 | HW  | SW  | ---       | ---       | ---   |
-|SAMD51  | HW  | HW  | ---       | ---       | ---   |
-|SAMD21  | HW  | HW  | ---       | ---       | ---   |
-|RP2040  | HW  | --- | ---       | ---       | ---   |
+|        | SPI | I2C | 8bit Para |16bit Para | RGB       | CVBS     |
+|:------:|:---:|:---:|:---------:|:---------:|:---------:|:--------:|
+|ESP32   | HW  | HW  | HW (I2S)  | ---       | ---       |HW(I2SDAC)|
+|ESP32-S2| HW  | HW  | HW (I2S)  | HW (I2S)  | ---       | ---      |
+|ESP32-S3| HW  | HW  |HW(LCD/CAM)|HW(LCD/CAM)|HW(LCD/CAM)| ---      |
+|ESP32-C3| HW  | HW  | SW        | ---       | ---       | ---      |
+|ESP8266 | HW  | SW  | ---       | ---       | ---       | ---      |
+|SAMD51  | HW  | HW  | ---       | ---       | ---       | ---      |
+|SAMD21  | HW  | HW  | ---       | ---       | ---       | ---      |
+|RP2040  | HW  | --- | ---       | ---       | ---       | ---      |
 
 ※ HW = HardWare Peripheral / SW = SoftWare implementation
 
@@ -79,6 +79,7 @@ This library has the following advantages.
     - ILI9486
     - ILI9488 (Makerfabs Touch with Camera)
     - IT8951 (M5Paper)
+    - NT35510/OTM8009A
     - R61529
     - RA8875
     - RM68120
@@ -544,7 +545,7 @@ TomThumb font : [3-clause BSD](src/lgfx/Fonts/GFXFF/TomThumb.h) Brian J. Swetlan
 実装予定 Unimplemented request
 ----------------
   - ディスプレイ Displays
-    - OTM8009A / NT35510
     - SEPS525
-
+    - LT7680A / LT7685
+    - RA8873 / RA8876
 
diff --git a/library.json b/library.json
index 10839db2..67cac662 100644
--- a/library.json
+++ b/library.json
@@ -11,7 +11,7 @@
     "type": "git",
     "url": "https://github.com/lovyan03/LovyanGFX.git"
   },
-  "version": "0.5.0",
+  "version": "1.1.2",
   "frameworks": ["arduino", "espidf"],
   "platforms": ["espressif32", "espressif8266", "atmelsam"],
   "headers": "LovyanGFX.hpp",
diff --git a/library.properties b/library.properties
index 473f705c..a2ee0225 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=LovyanGFX
-version=0.5.0
+version=1.1.2
 author=lovyan03
 maintainer=lovyan03
 sentence=TFT LCD Graphics driver with touch for ESP32, ESP8266, SAMD21, SAMD51, RP2040
diff --git a/src/LovyanGFX.hpp b/src/LovyanGFX.hpp
index f1402f69..d6a04897 100644
--- a/src/LovyanGFX.hpp
+++ b/src/LovyanGFX.hpp
@@ -1,23 +1,19 @@
 /*----------------------------------------------------------------------------/
   Lovyan GFX library - LCD graphics library .
-  
-  support platform:
-    ESP32 (SPI/I2S) with Arduino/ESP-IDF
-    ATSAMD51 (SPI) with Arduino
-  
-Original Source:  
- https://github.com/lovyan03/LovyanGFX/  
-
-Licence:  
- [BSD](https://github.com/lovyan03/LovyanGFX/blob/master/license.txt)  
-
-Author:  
- [lovyan03](https://twitter.com/lovyan03)  
-
-Contributors:  
- [ciniml](https://github.com/ciniml)  
- [mongonta0716](https://github.com/mongonta0716)  
- [tobozo](https://github.com/tobozo)  
+
+Original Source:
+ https://github.com/lovyan03/LovyanGFX/
+
+Licence:
+ [BSD](https://github.com/lovyan03/LovyanGFX/blob/master/license.txt)
+
+Author:
+ [lovyan03](https://twitter.com/lovyan03)
+
+Contributors:
+ [ciniml](https://github.com/ciniml)
+ [mongonta0716](https://github.com/mongonta0716)
+ [tobozo](https://github.com/tobozo)
 /----------------------------------------------------------------------------*/
 #ifndef LOVYANGFX_HPP_
 #define LOVYANGFX_HPP_
@@ -26,22 +22,20 @@ Original Source:
 #undef setFont
 #endif
 
-#if __has_include("lgfx/v1_init.hpp") && ( defined ( LGFX_USE_V1 ) || !__has_include("lgfx/v0_init.hpp") )
+ #if defined ( LGFX_USE_V0 ) && __has_include("lgfx/v0_init.hpp")
+
+  #include "lgfx/v0_init.hpp"
 
- #include "lgfx/v1_init.hpp"
+ #else
 
- #if defined ( LGFX_AUTODETECT )
+  #include "lgfx/v1_init.hpp"
 
-  #include "LGFX_AUTODETECT.hpp"
+  #if defined ( LGFX_AUTODETECT )
 
- #endif
+   #include "LGFX_AUTODETECT.hpp"
 
-#else  // if defined ( LGFX_USE_V0 )
+  #endif
 
- #if __has_include("lgfx/v0_init.hpp")
-  #include "lgfx/v0_init.hpp"
  #endif
 
 #endif
-
-#endif
diff --git a/src/lgfx/v1/gitTagVersion.h b/src/lgfx/v1/gitTagVersion.h
index 6f9b7af2..ad7a4467 100644
--- a/src/lgfx/v1/gitTagVersion.h
+++ b/src/lgfx/v1/gitTagVersion.h
@@ -1,4 +1,4 @@
 #define LGFX_VERSION_MAJOR 1
 #define LGFX_VERSION_MINOR 1
-#define LGFX_VERSION_PATCH 0
+#define LGFX_VERSION_PATCH 2
 #define LOVYANGFX_VERSION F( LGFX_VERSION_MAJOR "." LGFX_VERSION_MINOR "." LGFX_VERSION_PATCH )