Skip to content

Commit

Permalink
Revert "FGBoost python support and some test files (#4158)" (#4179)
Browse files Browse the repository at this point in the history
  • Loading branch information
Litchilitchy authored Mar 8, 2022
1 parent 17dd415 commit a4ae25e
Show file tree
Hide file tree
Showing 16 changed files with 58 additions and 428 deletions.
50 changes: 9 additions & 41 deletions python/ppml/example/fgboost_regression/README.md
Original file line number Diff line number Diff line change
@@ -1,47 +1,15 @@
# FGBoost Regression Python Example
# FGBoost Regression Example
This example shows how to build a 2-clients FGBoost Regression application.

### Walkthrough
We provide the full code below, which you could directly run once BigDL PPML [installation](#link) done.
```python
import numpy as np
from bigdl.ppml import FLServer
from bigdl.ppml.algorithms.fgboost_regression import FGBoostRegression
from bigdl.ppml.utils import init_fl_context
### Data
This example uses [House Price]() dataset from Kaggle. We provide a copy of dataset [here](), and split the dataset in two parts by [split script]().

fl_server = FLServer()
fl_server.build()
fl_server.start()
init_fl_context()
The data files used here are [vfl-house-price-1.csv](), [vfl-house-price-2.csv](), which represent the two data files held by two parties.

x, y = np.ones([2, 3]), np.ones([2])
fgboost_regression = FGBoostRegression()
fgboost_regression.fit(x, y)
result = fgboost_regression.predict(x)
result

fl_server.close()
```
Now we dive into the code.
### Start FLServer
To start a BigDL PPML application, you first start a FLServer by
```python
fl_server = FLServer()
fl_server.build()
fl_server.start()
```
### Initialize FLContext
The client to interact with FLServer is inside FLContext, to use it, initialize the FLContext by
```python
init_fl_context()
```


### Run Algorithm
Then create a `FGBoostRegression` instance to apply Federated Gradient Boosting Regression algorithm, and call train and predict on dummy data.
```python
fgboost_regression = FGBoostRegression()
fgboost_regression.fit(x, y)
result = fgboost_regression.predict(x)
result
```

### Model Save and Load
//TODO

### Model Save and Load
26 changes: 13 additions & 13 deletions python/ppml/src/bigdl/ppml/algorithms/fgboost_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,22 @@
from bigdl.dllib.utils.common import JavaValue
from bigdl.ppml.data_utils import *

from bigdl.ppml import *


class FGBoostRegression(JavaValue):
def __init__(self, jvalue=None, learning_rate:float=1e-3, max_depth=6, min_child_size=1):
def __init__(self, jvalue, *args):
self.bigdl_type = "float"
super().__init__(jvalue, self.bigdl_type, learning_rate, max_depth, min_child_size)
super(JavaValue, self).__init__(jvalue, bigdl_type, *args)

def fit(self, x, y, num_round=5, **kargs):
x, y = convert_to_jtensor(x, y, **kargs)
return callBigDlFunc(self.bigdl_type, "fgBoostFit", self.value, x, y, num_round)
def fit(self, x, y, num_round, feature_cols="features", label_cols="label"):
x = convert_to_numpy(x, feature_cols)
y = convert_to_numpy(y, label_cols)
callBigDlFunc(self.bigdl_type, "fgBoostFit", x, y, num_round)

def evaluate(self, x, y, **kargs):
x, y = convert_to_jtensor(x, y, **kargs)
return callBigDlFunc(self.bigdl_type, "fgBoostEvaluate", self.value, x, y)
def evaluate(self, x, y):
x = convert_to_numpy(x)
y = convert_to_numpy(y)
callBigDlFunc(self.bigdl_type, "fgBoostEvaluate", x, y)

def predict(self, x, **kargs):
x, _ = convert_to_jtensor(x, **kargs)
return callBigDlFunc(self.bigdl_type, "fgBoostPredict", self.value, x).to_ndarray()
def predict(self, x):
x = convert_to_numpy(x)
callBigDlFunc(self.bigdl_type, "fgBoostPredict", x)
17 changes: 7 additions & 10 deletions python/ppml/src/bigdl/ppml/algorithms/psi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,19 @@
# limitations under the License.
#


from bigdl.ppml import *

from bigdl.dllib.utils.common import JavaValue


class PSI(JavaValue):
def __init__(self, jvalue=None, *args):
self.bigdl_type = "float"
super().__init__(jvalue, self.bigdl_type, *args)
def __init__(self, jvalue, *args):
bigdl_type = "float"
super(JavaValue, self).__init__(jvalue, bigdl_type, *args)

def get_salt(self, secure_code=""):
return callBigDlFunc(self.bigdl_type, "psiGetSalt", self.value, secure_code)
def get_salt(self):
pass

def upload_set(self, ids, salt):
callBigDlFunc(self.bigdl_type, "psiUploadSet", self.value, ids, salt)
pass

def download_intersection(self, max_try=100, retry=3):
return callBigDlFunc(self.bigdl_type, "psiDownloadIntersection", self.value, max_try, retry)
pass
36 changes: 14 additions & 22 deletions python/ppml/src/bigdl/ppml/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import pandas as pd
import numpy as np
from bigdl.ppml import *


def get_input_type(x, y=None):
def convert_to_numpy(x, dataframe_columns=None):
"""
:param x: The input to convert
:param dataframe_columns: applicable if x is pandas.DataFrame, the column to convert
:return: the converted numpy.ndarray
"""
if isinstance(x, pd.DataFrame):
if y is not None and not isinstance(y, pd.DataFrame):
raise ValueError(f"Feature is DataFrame, label should be DataFrame, but got {type(y)}")
return "DataFrame"
return [x[col] for col in dataframe_columns]
elif isinstance(x, np.ndarray):
if y is not None and not isinstance(y, np.ndarray):
raise ValueError(
f"Feature is Numpy NdArray, label should be Numpy NdArray, but got {type(y)}")
return "NdArray"
return [x]
elif isinstance(x, list):
for e in x:
if not isinstance(x, np.ndarray):
raise Exception("only NdArray type is supported for list input")
return x
else:
raise ValueError(f"Supported argument types: DataFrame, NdArray, but got {type(x)}")

def convert_to_jtensor(x, y=None, feature_columns=None, label_columns=None):
arg_type = get_input_type(x, y)
if arg_type == "DataFrame":
if feature_columns is None or (y is not None and label_columns is None):
raise ValueError("Input DataFrame type must have feature_columns and label_columns")
x = x.to_numpy()
y = y.to_numpy() if y else None
return JTensor.from_ndarray(x), JTensor.from_ndarray(y)


raise Exception("Input could be Pandas DataFrame or Numpy NDArray or list of NDArray, but got", type(x))


44 changes: 0 additions & 44 deletions python/ppml/test/bigdl/ppml/algorithms/test_fgboost_regression.py

This file was deleted.

This file was deleted.

This file was deleted.

54 changes: 0 additions & 54 deletions python/ppml/test/bigdl/ppml/algorithms/test_psi.py

This file was deleted.

This file was deleted.

Loading

0 comments on commit a4ae25e

Please sign in to comment.