11
11
cast ,
12
12
overload ,
13
13
)
14
+ import warnings
14
15
15
16
import numpy as np
16
17
23
24
)
24
25
from pandas ._libs .arrays import NDArrayBacked
25
26
from pandas .compat .numpy import function as nv
27
+ from pandas .errors import Pandas4Warning
28
+ from pandas .util ._exceptions import find_stack_level
26
29
from pandas .util ._validators import validate_bool_kwarg
27
30
28
31
from pandas .core .dtypes .cast import (
@@ -476,7 +479,11 @@ def __init__(
476
479
elif isinstance (values .dtype , CategoricalDtype ):
477
480
old_codes = extract_array (values )._codes
478
481
codes = recode_for_categories (
479
- old_codes , values .dtype .categories , dtype .categories , copy = copy
482
+ old_codes ,
483
+ values .dtype .categories ,
484
+ dtype .categories ,
485
+ copy = copy ,
486
+ warn = True ,
480
487
)
481
488
482
489
else :
@@ -528,7 +535,12 @@ def _from_sequence(
528
535
529
536
def _cast_pointwise_result (self , values ) -> ArrayLike :
530
537
res = super ()._cast_pointwise_result (values )
531
- cat = type (self )._from_sequence (res , dtype = self .dtype )
538
+ with warnings .catch_warnings ():
539
+ warnings .filterwarnings (
540
+ "ignore" ,
541
+ "Constructing a Categorical with a dtype and values containing" ,
542
+ )
543
+ cat = type (self )._from_sequence (res , dtype = self .dtype )
532
544
if (cat .isna () == isna (res )).all ():
533
545
# i.e. the conversion was non-lossy
534
546
return cat
@@ -565,6 +577,15 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
565
577
dtype = self .dtype .update_dtype (dtype )
566
578
self = self .copy () if copy else self
567
579
result = self ._set_dtype (dtype , copy = False )
580
+ wrong = result .isna () & ~ self .isna ()
581
+ if wrong .any ():
582
+ warnings .warn (
583
+ "Constructing a Categorical with a dtype and values containing "
584
+ "non-null entries not in that dtype's categories is deprecated "
585
+ "and will raise in a future version." ,
586
+ Pandas4Warning ,
587
+ stacklevel = find_stack_level (),
588
+ )
568
589
569
590
elif isinstance (dtype , ExtensionDtype ):
570
591
return super ().astype (dtype , copy = copy )
@@ -659,14 +680,16 @@ def _from_inferred_categories(
659
680
if known_categories :
660
681
# Recode from observation order to dtype.categories order.
661
682
categories = dtype .categories
662
- codes = recode_for_categories (inferred_codes , cats , categories , copy = False )
683
+ codes = recode_for_categories (
684
+ inferred_codes , cats , categories , copy = False , warn = True
685
+ )
663
686
elif not cats .is_monotonic_increasing :
664
687
# Sort categories and recode for unknown categories.
665
688
unsorted = cats .copy ()
666
689
categories = cats .sort_values ()
667
690
668
691
codes = recode_for_categories (
669
- inferred_codes , unsorted , categories , copy = False
692
+ inferred_codes , unsorted , categories , copy = False , warn = True
670
693
)
671
694
dtype = CategoricalDtype (categories , ordered = False )
672
695
else :
@@ -787,7 +810,7 @@ def categories(self) -> Index:
787
810
>>> ser.cat.categories
788
811
Index(['a', 'b', 'c'], dtype='str')
789
812
790
- >>> raw_cat = pd.Categorical(["a" , "b", "c", "a" ], categories=["b", "c", "d"])
813
+ >>> raw_cat = pd.Categorical([None , "b", "c", None ], categories=["b", "c", "d"])
791
814
>>> ser = pd.Series(raw_cat)
792
815
>>> ser.cat.categories
793
816
Index(['b', 'c', 'd'], dtype='str')
@@ -1095,7 +1118,7 @@ def set_categories(
1095
1118
For :class:`pandas.Series`:
1096
1119
1097
1120
>>> raw_cat = pd.Categorical(
1098
- ... ["a", "b", "c", "A" ], categories=["a", "b", "c"], ordered=True
1121
+ ... ["a", "b", "c", None ], categories=["a", "b", "c"], ordered=True
1099
1122
... )
1100
1123
>>> ser = pd.Series(raw_cat)
1101
1124
>>> ser
@@ -1117,7 +1140,7 @@ def set_categories(
1117
1140
For :class:`pandas.CategoricalIndex`:
1118
1141
1119
1142
>>> ci = pd.CategoricalIndex(
1120
- ... ["a", "b", "c", "A" ], categories=["a", "b", "c"], ordered=True
1143
+ ... ["a", "b", "c", None ], categories=["a", "b", "c"], ordered=True
1121
1144
... )
1122
1145
>>> ci
1123
1146
CategoricalIndex(['a', 'b', 'c', nan], categories=['a', 'b', 'c'],
@@ -1145,7 +1168,7 @@ def set_categories(
1145
1168
codes = cat ._codes
1146
1169
else :
1147
1170
codes = recode_for_categories (
1148
- cat .codes , cat .categories , new_dtype .categories , copy = False
1171
+ cat .codes , cat .categories , new_dtype .categories , copy = False , warn = False
1149
1172
)
1150
1173
NDArrayBacked .__init__ (cat , codes , new_dtype )
1151
1174
return cat
@@ -2956,7 +2979,7 @@ def codes(self) -> Series:
2956
2979
2957
2980
Examples
2958
2981
--------
2959
- >>> raw_cate = pd.Categorical(["a", "b", "c" , "a"], categories=["a", "b"])
2982
+ >>> raw_cate = pd.Categorical(["a", "b", None , "a"], categories=["a", "b"])
2960
2983
>>> ser = pd.Series(raw_cate)
2961
2984
>>> ser.cat.codes
2962
2985
0 0
@@ -2991,11 +3014,25 @@ def _get_codes_for_values(
2991
3014
If `values` is known to be a Categorical, use recode_for_categories instead.
2992
3015
"""
2993
3016
codes = categories .get_indexer_for (values )
3017
+ wrong = (codes == - 1 ) & ~ isna (values )
3018
+ if wrong .any ():
3019
+ warnings .warn (
3020
+ "Constructing a Categorical with a dtype and values containing "
3021
+ "non-null entries not in that dtype's categories is deprecated "
3022
+ "and will raise in a future version." ,
3023
+ Pandas4Warning ,
3024
+ stacklevel = find_stack_level (),
3025
+ )
2994
3026
return coerce_indexer_dtype (codes , categories )
2995
3027
2996
3028
2997
3029
def recode_for_categories (
2998
- codes : np .ndarray , old_categories , new_categories , * , copy : bool
3030
+ codes : np .ndarray ,
3031
+ old_categories ,
3032
+ new_categories ,
3033
+ * ,
3034
+ copy : bool = True ,
3035
+ warn : bool = False ,
2999
3036
) -> np .ndarray :
3000
3037
"""
3001
3038
Convert a set of codes for to a new set of categories
@@ -3006,6 +3043,8 @@ def recode_for_categories(
3006
3043
old_categories, new_categories : Index
3007
3044
copy: bool, default True
3008
3045
Whether to copy if the codes are unchanged.
3046
+ warn : bool, default False
3047
+ Whether to warn on silent-NA mapping.
3009
3048
3010
3049
Returns
3011
3050
-------
@@ -3030,9 +3069,18 @@ def recode_for_categories(
3030
3069
return codes .copy ()
3031
3070
return codes
3032
3071
3033
- indexer = coerce_indexer_dtype (
3034
- new_categories .get_indexer_for (old_categories ), new_categories
3035
- )
3072
+ codes_in_old_cats = new_categories .get_indexer_for (old_categories )
3073
+ if warn :
3074
+ wrong = codes_in_old_cats == - 1
3075
+ if wrong .any ():
3076
+ warnings .warn (
3077
+ "Constructing a Categorical with a dtype and values containing "
3078
+ "non-null entries not in that dtype's categories is deprecated "
3079
+ "and will raise in a future version." ,
3080
+ Pandas4Warning ,
3081
+ stacklevel = find_stack_level (),
3082
+ )
3083
+ indexer = coerce_indexer_dtype (codes_in_old_cats , new_categories )
3036
3084
new_codes = take_nd (indexer , codes , fill_value = - 1 )
3037
3085
return new_codes
3038
3086
0 commit comments