Revert "FGBoost python support and some test files (#4158)" (#4179)

intel · Mar 8, 2022 · a4ae25e · a4ae25e
1 parent 17dd415
commit a4ae25e
Show file tree

Hide file tree

Showing 16 changed files with 58 additions and 428 deletions.
diff --git a/python/ppml/example/fgboost_regression/README.md b/python/ppml/example/fgboost_regression/README.md
@@ -1,47 +1,15 @@
-# FGBoost Regression Python Example
+# FGBoost Regression Example
+This example shows how to build a 2-clients FGBoost Regression application.
 
-### Walkthrough
-We provide the full code below, which you could directly run once BigDL PPML [installation](#link) done.
-```python
-import numpy as np
-from bigdl.ppml import FLServer
-from bigdl.ppml.algorithms.fgboost_regression import FGBoostRegression
-from bigdl.ppml.utils import init_fl_context
+### Data
+This example uses [House Price]() dataset from Kaggle. We provide a copy of dataset [here](), and split the dataset in two parts by [split script]().
 
-fl_server = FLServer()
-fl_server.build()
-fl_server.start()
-init_fl_context()
+The data files used here are [vfl-house-price-1.csv](), [vfl-house-price-2.csv](), which represent the two data files held by two parties.
 
-x, y = np.ones([2, 3]), np.ones([2])
-fgboost_regression = FGBoostRegression()
-fgboost_regression.fit(x, y)
-result = fgboost_regression.predict(x)
-result
-
-fl_server.close()
-```
-Now we dive into the code.
 ### Start FLServer
-To start a BigDL PPML application, you first start a FLServer by
-```python
-fl_server = FLServer()
-fl_server.build()
-fl_server.start()
-```
-### Initialize FLContext
-The client to interact with FLServer is inside FLContext, to use it, initialize the FLContext by
-```python
-init_fl_context()
-```
+
+
 ### Run Algorithm
-Then create a `FGBoostRegression` instance to apply Federated Gradient Boosting Regression algorithm, and call train and predict on dummy data.
-```python
-fgboost_regression = FGBoostRegression()
-fgboost_regression.fit(x, y)
-result = fgboost_regression.predict(x)
-result
-```
 
-### Model Save and Load
-//TODO
+
+### Model Save and Load
diff --git a/python/ppml/src/bigdl/ppml/algorithms/fgboost_regression.py b/python/ppml/src/bigdl/ppml/algorithms/fgboost_regression.py
@@ -17,22 +17,22 @@
 from bigdl.dllib.utils.common import JavaValue
 from bigdl.ppml.data_utils import *
 
-from bigdl.ppml import *
-
 
 class FGBoostRegression(JavaValue):
-    def __init__(self, jvalue=None, learning_rate:float=1e-3, max_depth=6, min_child_size=1):
+    def __init__(self, jvalue, *args):
         self.bigdl_type = "float"
-        super().__init__(jvalue, self.bigdl_type, learning_rate, max_depth, min_child_size)
+        super(JavaValue, self).__init__(jvalue, bigdl_type, *args)
 
-    def fit(self, x, y, num_round=5, **kargs):
-        x, y = convert_to_jtensor(x, y, **kargs)
-        return callBigDlFunc(self.bigdl_type, "fgBoostFit", self.value, x, y, num_round)
+    def fit(self, x, y, num_round, feature_cols="features", label_cols="label"):
+        x = convert_to_numpy(x, feature_cols)
+        y = convert_to_numpy(y, label_cols)
+        callBigDlFunc(self.bigdl_type, "fgBoostFit", x, y, num_round)
 
-    def evaluate(self, x, y, **kargs):
-        x, y = convert_to_jtensor(x, y, **kargs)
-        return callBigDlFunc(self.bigdl_type, "fgBoostEvaluate", self.value, x, y)
+    def evaluate(self, x, y):
+        x = convert_to_numpy(x)
+        y = convert_to_numpy(y)
+        callBigDlFunc(self.bigdl_type, "fgBoostEvaluate", x, y)
 
-    def predict(self, x, **kargs):
-        x, _ = convert_to_jtensor(x, **kargs)
-        return callBigDlFunc(self.bigdl_type, "fgBoostPredict", self.value, x).to_ndarray()
+    def predict(self, x):
+        x = convert_to_numpy(x)
+        callBigDlFunc(self.bigdl_type, "fgBoostPredict", x)
diff --git a/python/ppml/src/bigdl/ppml/algorithms/psi.py b/python/ppml/src/bigdl/ppml/algorithms/psi.py
@@ -14,22 +14,19 @@
 # limitations under the License.
 #
 
-
-from bigdl.ppml import *
-
 from bigdl.dllib.utils.common import JavaValue
 
 
 class PSI(JavaValue):
-    def __init__(self, jvalue=None, *args):
-        self.bigdl_type = "float"
-        super().__init__(jvalue, self.bigdl_type, *args)
+    def __init__(self, jvalue, *args):
+        bigdl_type = "float"
+        super(JavaValue, self).__init__(jvalue, bigdl_type, *args)
 
-    def get_salt(self, secure_code=""):
-        return callBigDlFunc(self.bigdl_type, "psiGetSalt", self.value, secure_code)
+    def get_salt(self):
+        pass
 
     def upload_set(self, ids, salt):
-        callBigDlFunc(self.bigdl_type, "psiUploadSet", self.value, ids, salt)
+        pass
 
     def download_intersection(self, max_try=100, retry=3):
-        return callBigDlFunc(self.bigdl_type, "psiDownloadIntersection", self.value, max_try, retry)
+        pass
diff --git a/python/ppml/src/bigdl/ppml/data_utils.py b/python/ppml/src/bigdl/ppml/data_utils.py
@@ -13,34 +13,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import pandas as pd
 import numpy as np
-from bigdl.ppml import *
 
 
-def get_input_type(x, y=None):
+def convert_to_numpy(x, dataframe_columns=None):
+    """
+    :param x: The input to convert
+    :param dataframe_columns: applicable if x is pandas.DataFrame, the column to convert
+    :return: the converted numpy.ndarray
+    """
     if isinstance(x, pd.DataFrame):
-        if y is not None and not isinstance(y, pd.DataFrame):
-            raise ValueError(f"Feature is DataFrame, label should be DataFrame, but got {type(y)}")
-        return "DataFrame"
+        return [x[col] for col in dataframe_columns]
     elif isinstance(x, np.ndarray):
-        if y is not None and not isinstance(y, np.ndarray):
-            raise ValueError(
-                f"Feature is Numpy NdArray, label should be Numpy NdArray, but got {type(y)}")
-        return "NdArray"
+        return [x]
+    elif isinstance(x, list):
+        for e in x:
+            if not isinstance(x, np.ndarray):
+                raise Exception("only NdArray type is supported for list input")
+        return x
     else:
-        raise ValueError(f"Supported argument types: DataFrame, NdArray, but got {type(x)}")
-
-def convert_to_jtensor(x, y=None, feature_columns=None, label_columns=None):
-    arg_type = get_input_type(x, y)
-    if arg_type == "DataFrame":
-        if feature_columns is None or (y is not None and label_columns is None):
-            raise ValueError("Input DataFrame type must have feature_columns and label_columns")
-        x = x.to_numpy()
-        y = y.to_numpy() if y else None
-    return JTensor.from_ndarray(x), JTensor.from_ndarray(y)
-
-
+        raise Exception("Input could be Pandas DataFrame or Numpy NDArray or list of NDArray, but got", type(x))
 
 
diff --git a/python/ppml/test/bigdl/ppml/algorithms/test_fgboost_regression.py b/python/ppml/test/bigdl/ppml/algorithms/test_fgboost_regression.py
diff --git a/python/ppml/test/bigdl/ppml/algorithms/test_hfl_linear_regression.py b/python/ppml/test/bigdl/ppml/algorithms/test_hfl_linear_regression.py
diff --git a/python/ppml/test/bigdl/ppml/algorithms/test_hfl_logistic_regression.py b/python/ppml/test/bigdl/ppml/algorithms/test_hfl_logistic_regression.py
diff --git a/python/ppml/test/bigdl/ppml/algorithms/test_psi.py b/python/ppml/test/bigdl/ppml/algorithms/test_psi.py
diff --git a/python/ppml/test/bigdl/ppml/algorithms/test_vfl_linear_regression.py b/python/ppml/test/bigdl/ppml/algorithms/test_vfl_linear_regression.py