@@ -201,7 +201,7 @@ def resolve_feature_engine(
201201    feature_engine : FeatureEngine ,
202202) ->  FeatureEngineConcrete :  # noqa 
203203
204-     if  feature_engine  in  ["none" , "pandas" , DIRTY_CAT , "torch" , CUDA_CAT ]:
204+     if  feature_engine  in  ["none" , "pandas" , "dirty_cat" , "torch" , "cu_cat" ]:
205205        return  feature_engine   # type: ignore 
206206    if  feature_engine  ==  "auto" :
207207        has_dependancy_text_ , _ , _  =  lazy_import_has_dependancy_text ()
@@ -967,19 +967,19 @@ def process_dirty_dataframes(
967967            the data encoder, and the label encoder. 
968968    """ 
969969
970-     if  feature_engine  ==  CUDA_CAT :
970+     if  feature_engine  ==  "cu_cat" :
971971        assert_imported_cucat ()
972-         from  cu_cat  import  SuperVectorizer , GapEncoder    # , SimilarityEncoder 
972+         from  cu_cat  import  SuperVectorizer , GapEncoder 
973973        from  cuml .preprocessing  import  FunctionTransformer 
974- 
975-     else :   # if feature_engine == "dirty_cat":  # DIRTY_CAT 
976-         from  dirty_cat  import  SuperVectorizer , GapEncoder    # , SimilarityEncoder 
974+      
975+     else :
976+         from  dirty_cat  import  SuperVectorizer , GapEncoder 
977977        from  sklearn .preprocessing  import  FunctionTransformer 
978978
979979    t  =  time ()
980980
981981    if  not  is_dataframe_all_numeric (ndf ):
982-         if  feature_engine  ==  CUDA_CAT :
982+         if  feature_engine  ==  "cu_cat" :
983983            data_encoder  =  SuperVectorizer (
984984                auto_cast = True ,
985985                cardinality_threshold = cardinality_threshold_target ,
@@ -1010,9 +1010,9 @@ def process_dirty_dataframes(
10101010            features_transformed  =  data_encoder .get_feature_names_out ()
10111011
10121012        all_transformers  =  data_encoder .transformers 
1013-         if  feature_engine  ==  CUDA_CAT :
1013+         if  feature_engine  ==  "cu_cat" :
10141014            logger .info (f"-Shape of [[cu_cat fit]] data { X_enc .shape }  " )
1015-         elif   feature_engine   ==   DIRTY_CAT :
1015+         else :
10161016            logger .info (f"-Shape of [[dirty_cat fit]] data { X_enc .shape }  " )
10171017        logger .debug (f"-Transformers: \n { all_transformers } \n " )
10181018        logger .debug (
@@ -1058,7 +1058,7 @@ def process_dirty_dataframes(
10581058        t2  =  time ()
10591059        logger .debug ("-Fitting Targets --\n %s" , y .columns )
10601060
1061-         if  feature_engine  ==  CUDA_CAT :
1061+         if  feature_engine  ==  "cu_cat" :
10621062            label_encoder  =  SuperVectorizer (
10631063                auto_cast = True ,
10641064                cardinality_threshold = cardinality_threshold_target ,
@@ -1486,10 +1486,17 @@ def process_edge_dataframes(
14861486            other_df , y 
14871487        )
14881488        # add the two datasets together 
1489-         if  feature_engine  ==  'pandas' :
1490-             X_enc  =  pd .concat ([T , X_enc ], axis = 1 )
1491-         elif  feature_engine  ==  'cudf' :
1489+         has_dependancy_cudf_ , import_exn , cudf  =  lazy_import_has_dependancy_cudf ()
1490+         T_type  =  str (getmodule (T ))
1491+         X_type  =  str (getmodule (X_enc ))
1492+         if  'cudf'  in  T_type  and  'cudf'  in  X_type :
14921493            X_enc  =  cudf .concat ([T , X_enc ], axis = 1 )
1494+         elif  'pd'  in  T_type  and  'pd'  in  X_type :
1495+             X_enc  =  pd .concat ([T , X_enc ], axis = 1 )
1496+         elif  'cudf'  in  T_type  and  'pd'  in  X_type :
1497+             X_enc  =  cudf .concat ([cudf .from_pandas (T ), X_enc ], axis = 1 )
1498+         elif  'pd'  in  T_type  and  'cudf'  in  X_type :
1499+             X_enc  =  cudf .concat ([T , cudf .from_pandas (X_enc )], axis = 1 )
14931500        # then scale them 
14941501        X_encs , y_encs , scaling_pipeline , scaling_pipeline_target  =  smart_scaler (  # noqa 
14951502            X_enc ,
@@ -1556,21 +1563,17 @@ def process_edge_dataframes(
15561563    if  not  X_enc .empty  and  not  T .empty :
15571564        logger .debug ("-"  *  60 )
15581565        logger .debug ("<= Found Edges and Dirty_cat encoding =>" )
1566+         has_dependancy_cudf_ , import_exn , cudf  =  lazy_import_has_dependancy_cudf ()
15591567        T_type  =  str (getmodule (T ))
15601568        X_type  =  str (getmodule (X_enc ))
15611569        if  'cudf'  in  T_type  and  'cudf'  in  X_type :
15621570            X_enc  =  cudf .concat ([T , X_enc ], axis = 1 )
15631571        elif  'pd'  in  T_type  and  'pd'  in  X_type :
15641572            X_enc  =  pd .concat ([T , X_enc ], axis = 1 )
1565-         else :
1566-             try :
1567-                 X_enc  =  cudf .concat ([cudf .from_pandas (T ), X_enc ], axis = 1 )
1568-             except :
1569-                 pass 
1570-             try :
1571-                 X_enc  =  cudf .concat ([T , cudf .from_pandas (X_enc )], axis = 1 )
1572-             except :
1573-                 pass 
1573+         elif  'cudf'  in  T_type  and  'pd'  in  X_type :
1574+             X_enc  =  cudf .concat ([cudf .from_pandas (T ), X_enc ], axis = 1 )
1575+         elif  'pd'  in  T_type  and  'cudf'  in  X_type :
1576+             X_enc  =  cudf .concat ([T , cudf .from_pandas (X_enc )], axis = 1 )
15741577    elif  not  T .empty  and  X_enc .empty :
15751578        logger .debug ("-"  *  60 )
15761579        logger .debug ("<= Found only Edges =>" )
@@ -1750,7 +1753,18 @@ def transform(
17501753
17511754    # concat text to dirty_cat, with text in front. 
17521755    if  not  tX .empty  and  not  X .empty :
1753-         X  =  pd .concat ([tX , X ], axis = 1 )
1756+         has_dependancy_cudf_ , import_exn , cudf  =  lazy_import_has_dependancy_cudf ()
1757+         T_type  =  str (getmodule (tX ))
1758+         X_type  =  str (getmodule (X ))
1759+         if  'cudf'  in  T_type  and  'cudf'  in  X_type :
1760+             X  =  cudf .concat ([tX , X ], axis = 1 )
1761+         elif  'pd'  in  T_type  and  'pd'  in  X_type :
1762+             X  =  pd .concat ([tX , X ], axis = 1 )
1763+         elif  'cudf'  in  T_type  and  'pd'  in  X_type :
1764+             X  =  cudf .concat ([cudf .from_pandas (tX ), X ], axis = 1 )
1765+         elif  'pd'  in  T_type  and  'cudf'  in  X_type :
1766+             X  =  cudf .concat ([tX , cudf .from_pandas (X )], axis = 1 )
1767+         # X = pd.concat([tX, X], axis=1) 
17541768        logger .info ("--Combining both Textual and Numeric/Dirty_Cat" )
17551769    elif  not  tX .empty  and  X .empty :
17561770        X  =  tX   # textual 
@@ -1765,7 +1779,18 @@ def transform(
17651779
17661780    # now if edges, add T at front 
17671781    if  kind  ==  "edges" :
1768-         X  =  pd .concat ([T , X ], axis = 1 )  # edges, text, dirty_cat 
1782+         # X = pd.concat([T, X], axis=1)  # edges, text, dirty_cat 
1783+         has_dependancy_cudf_ , import_exn , cudf  =  lazy_import_has_dependancy_cudf ()
1784+         T_type  =  str (getmodule (T ))
1785+         X_type  =  str (getmodule (X ))
1786+         if  'cudf'  in  T_type  and  'cudf'  in  X_type :
1787+             X  =  cudf .concat ([T , X ], axis = 1 )
1788+         elif  'pd'  in  T_type  and  'pd'  in  X_type :
1789+             X  =  pd .concat ([T , X ], axis = 1 )
1790+         elif  'cudf'  in  T_type  and  'pd'  in  X_type :
1791+             X  =  cudf .concat ([cudf .from_pandas (T ), X ], axis = 1 )
1792+         elif  'pd'  in  T_type  and  'cudf'  in  X_type :
1793+             X  =  cudf .concat ([T , cudf .from_pandas (X )], axis = 1 )
17691794        logger .info ("-Combining MultiLabelBinarizer with previous features" )
17701795
17711796    logger .info ("-"  *  40 )
@@ -2656,10 +2681,11 @@ def featurize(
26562681        """ 
26572682        feature_engine  =  resolve_feature_engine (feature_engine )
26582683
2659-         if  feature_engine  ==  'dirty_cat' :
2660-             assert_imported_min ()
2661-         elif  feature_engine  ==  'cu_cat' :
2684+         
2685+         if  feature_engine  ==  "cu_cat" :
26622686            assert_imported_cucat ()
2687+         else :
2688+             assert_imported_min ()
26632689
26642690        if  inplace :
26652691            res  =  self 
0 commit comments