9
9
from data_flow .lib import FileType
10
10
from data_flow .lib .data_columns import data_get_columns , data_delete_columns , data_rename_columns , data_select_columns
11
11
from data_flow .lib .data_from import (
12
- df_from_tmp_filename ,
13
12
from_csv_2_file ,
14
13
from_feather_2_file ,
15
14
from_parquet_2_file ,
23
22
to_json_from_file ,
24
23
to_hdf_from_file ,
25
24
)
25
+ from data_flow .lib .fireducks import from_fireducks_2_file , to_fireducks_from_file
26
+ from data_flow .lib .pandas import from_pandas_2_file
27
+ from data_flow .lib .polars import from_polars_2_file , to_polars_from_file
26
28
from data_flow .lib .tools import generate_temporary_filename , delete_file
27
29
28
30
@@ -45,25 +47,44 @@ def __del__(self):
45
47
if not self .__in_memory :
46
48
delete_file (self .__filename )
47
49
50
+ def from_fireducks (self , df : fd .DataFrame ):
51
+ if self .__in_memory :
52
+ self .__data = df
53
+ else :
54
+ from_fireducks_2_file (df = df , tmp_filename = self .__filename , file_type = self .__file_type )
55
+ return self
56
+
48
57
def to_fireducks (self ) -> fd .DataFrame :
49
58
if self .__in_memory :
50
59
return self .__data
51
60
else :
52
- return df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type )
61
+ return to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type )
62
+
63
+ def from_pandas (self , df : pd .DataFrame ):
64
+ if self .__in_memory :
65
+ self .__data = fd .from_pandas (df )
66
+ else :
67
+ from_pandas_2_file (df = df , tmp_filename = self .__filename , file_type = self .__file_type )
68
+ return self
53
69
54
70
def to_pandas (self ) -> pd .DataFrame :
55
71
if self .__in_memory :
56
72
return self .__data .to_pandas ()
57
73
else :
58
- return df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
74
+ return to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
75
+
76
+ def from_polars (self , df : pl .DataFrame ):
77
+ if self .__in_memory :
78
+ self .__data = fd .from_pandas (df .to_pandas ())
79
+ else :
80
+ from_polars_2_file (df = df , tmp_filename = self .__filename , file_type = self .__file_type )
81
+ return self
59
82
60
83
def to_polars (self ) -> pl .DataFrame :
61
84
if self .__in_memory :
62
85
return pl .from_pandas (self .__data .to_pandas ())
63
86
else :
64
- return pl .from_pandas (
65
- df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
66
- )
87
+ return to_polars_from_file (tmp_filename = self .__filename , file_type = self .__file_type )
67
88
68
89
def from_csv (self , filename : str ):
69
90
if self .__in_memory :
@@ -139,14 +160,14 @@ def head(self):
139
160
if self .__in_memory :
140
161
print (self .__data .head ())
141
162
else :
142
- print (df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type ).head ())
163
+ print (to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type ).head ())
143
164
return self
144
165
145
166
def stats (self ):
146
167
if self .__in_memory :
147
168
data = self .__data
148
169
else :
149
- data = df_from_tmp_filename (tmp_filename = self .__filename , file_type = self .__file_type )
170
+ data = to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type )
150
171
151
172
print ("***** Data stats *****" )
152
173
print (f"Columns names : { data .columns .to_list ()} " )
0 commit comments