From 5af2972665a8cbbd77f914832866eb206151b8c6 Mon Sep 17 00:00:00 2001
From: sunlight798 <3281498087@qq.com>
Date: Sat, 7 Dec 2024 22:33:39 +0800
Subject: [PATCH 1/6] BUG: Fix multi-index on columns with bool level values
 does not roundtrip through parquet

---
 doc/source/whatsnew/v3.0.0.rst  |  1 +
 pandas/core/dtypes/astype.py    |  7 +++++++
 pandas/tests/io/test_parquet.py | 21 +++++++++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index ab5746eca1b18..0ace6f681f3a7 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -709,6 +709,7 @@ I/O
 - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
 - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
 - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
+- Bug in :meth:`read_parquet` raising ``ValueError`` if the multi-index contains a level with bools and if that multi-index is on the columns, then while the parquet can be written with the ``pyarrow`` engine, it cannot be read back in using ``pyarrow``.   (:issue:`60508`)
 
 Period
 ^^^^^^
diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 086f7d2da6640..9f9298b4d4a79 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -125,6 +125,13 @@ def _astype_nansafe(
         )
         raise ValueError(msg)
 
+    if arr.dtype == object and dtype == bool:
+        # If the dtype is bool and the array is object, we need to replace the False and True of the object type in the ndarray with the bool type
+        # to ensure that the type conversion is correct
+        arr[arr == "False"] = np.False_
+        arr[arr == "True"] = np.True_
+        return arr.astype(dtype, copy=copy)
+
     if copy or arr.dtype == object or dtype == object:
         # Explicit copy, or required since NumPy can't view from / to object.
         return arr.astype(dtype, copy=True)
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 7919bb956dc7a..3354f9353a309 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1468,3 +1468,24 @@ def test_invalid_dtype_backend(self, engine):
             df.to_parquet(path)
             with pytest.raises(ValueError, match=msg):
                 read_parquet(path, dtype_backend="numpy")
+
+    def test_bool_multiIndex(self, tmp_path, pa):
+        # GH 60508
+        df = pd.DataFrame(
+            [
+                [1, 2],
+                [4, 5],
+            ],
+            columns=pd.MultiIndex.from_tuples(
+                [
+                    (True, 'B'),
+                    (False, 'C'),
+                ]
+            )
+        )
+        df.to_parquet(
+            path=tmp_path,
+            engine=pa,
+        )
+        result = pd.read_parquet(tmp_path, engine=pa)
+        tm.assert_frame_equal(result, df)

From 4b1b9b8f8998a50beca8c8c42bdb8bf7dae3ce03 Mon Sep 17 00:00:00 2001
From: sunlight798 <3281498087@qq.com>
Date: Sat, 7 Dec 2024 23:06:32 +0800
Subject: [PATCH 2/6] BUG: Modify the comment format.

---
 pandas/core/dtypes/astype.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
index 9f9298b4d4a79..2a1b3a16c9d67 100644
--- a/pandas/core/dtypes/astype.py
+++ b/pandas/core/dtypes/astype.py
@@ -126,8 +126,9 @@ def _astype_nansafe(
         raise ValueError(msg)
 
     if arr.dtype == object and dtype == bool:
-        # If the dtype is bool and the array is object, we need to replace the False and True of the object type in the ndarray with the bool type
-        # to ensure that the type conversion is correct
+        # If the dtype is bool and the array is object, we need to replace
+        # the False and True of the object type in the ndarray with the
+        # bool type to ensure that the type conversion is correct
         arr[arr == "False"] = np.False_
         arr[arr == "True"] = np.True_
         return arr.astype(dtype, copy=copy)

From 81596e7790f494e9b23d3ad15ac10f8d73c47449 Mon Sep 17 00:00:00 2001
From: sunlight798 <3281498087@qq.com>
Date: Sat, 7 Dec 2024 23:22:09 +0800
Subject: [PATCH 3/6] BUG: Modify the test function.

---
 pandas/tests/io/test_parquet.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 3354f9353a309..bbcd733d48bb3 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1472,20 +1472,12 @@ def test_invalid_dtype_backend(self, engine):
     def test_bool_multiIndex(self, tmp_path, pa):
         # GH 60508
         df = pd.DataFrame(
-            [
-                [1, 2],
-                [4, 5],
-            ],
-            columns=pd.MultiIndex.from_tuples(
-                [
-                    (True, 'B'),
-                    (False, 'C'),
-                ]
-            )
+            [[1, 2], [4, 5]],
+            columns=pd.MultiIndex.from_tuples([(True, 'B'), (False, 'C')]),
         )
         df.to_parquet(
             path=tmp_path,
             engine=pa,
         )
-        result = pd.read_parquet(tmp_path, engine=pa)
+        result = read_parquet(tmp_path, engine=pa)
         tm.assert_frame_equal(result, df)

From 912dca66424aeab48bf2b247633f586614b0aeb2 Mon Sep 17 00:00:00 2001
From: sunlight798 <3281498087@qq.com>
Date: Sat, 7 Dec 2024 23:27:27 +0800
Subject: [PATCH 4/6] BUG: Modify the v3.0.0.rst file.

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 0ace6f681f3a7..9427d410e700a 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -709,7 +709,7 @@ I/O
 - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
 - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
 - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
-- Bug in :meth:`read_parquet` raising ``ValueError`` if the multi-index contains a level with bools and if that multi-index is on the columns, then while the parquet can be written with the ``pyarrow`` engine, it cannot be read back in using ``pyarrow``.   (:issue:`60508`)
+- Bug in :meth:`read_parquet` raising ``ValueError`` if the multi-index contains a level with bools and if that multi-index is on the columns, then while the parquet can be written with the ``pyarrow`` engine, it cannot be read back in using ``pyarrow``. (:issue:`60508`)
 
 Period
 ^^^^^^

From a22e7b9229bf5f3f45cd050bb6e038d85ebb15df Mon Sep 17 00:00:00 2001
From: sunlight798 <3281498087@qq.com>
Date: Sun, 8 Dec 2024 08:57:15 +0800
Subject: [PATCH 5/6] BUG: Modify the
 test_bool_multiIndex_roundtrip_through_parquet function

---
 pandas/tests/io/test_parquet.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index bbcd733d48bb3..21852169fea4a 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1469,15 +1469,14 @@ def test_invalid_dtype_backend(self, engine):
             with pytest.raises(ValueError, match=msg):
                 read_parquet(path, dtype_backend="numpy")
 
-    def test_bool_multiIndex(self, tmp_path, pa):
+    def test_bool_multiIndex_roundtrip_through_parquet(self, pa):
         # GH 60508
         df = pd.DataFrame(
             [[1, 2], [4, 5]],
             columns=pd.MultiIndex.from_tuples([(True, 'B'), (False, 'C')]),
         )
-        df.to_parquet(
-            path=tmp_path,
-            engine=pa,
-        )
-        result = read_parquet(tmp_path, engine=pa)
+        with tm.ensure_clean("test.parquet") as path:
+            df.to_parquet(f, engine=pa)
+
+            result = read_parquet(path, engine=pa)
         tm.assert_frame_equal(result, df)

From b9b3455ee75b3d2a4569fdcb7ecb5a6eee0249d1 Mon Sep 17 00:00:00 2001
From: sunlight798 <3281498087@qq.com>
Date: Sun, 8 Dec 2024 09:14:16 +0800
Subject: [PATCH 6/6] BUG: Modify the
 test_bool_multiIndex_roundtrip_through_parquet function

---
 pandas/tests/io/test_parquet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 21852169fea4a..04e9b9906c204 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1476,7 +1476,7 @@ def test_bool_multiIndex_roundtrip_through_parquet(self, pa):
             columns=pd.MultiIndex.from_tuples([(True, 'B'), (False, 'C')]),
         )
         with tm.ensure_clean("test.parquet") as path:
-            df.to_parquet(f, engine=pa)
+            df.to_parquet(path, engine=pa)
 
             result = read_parquet(path, engine=pa)
         tm.assert_frame_equal(result, df)