From 0215a9a5a4f8cff351636dbfa22876af8db71063 Mon Sep 17 00:00:00 2001
From: zhongtianq <141391912+zhongtianq@users.noreply.github.com>
Date: Thu, 20 Jun 2024 19:32:40 +0800
Subject: [PATCH] repo-sync-2024-06-19T10:45:51+0800 (#72)

---
 docs/architecture/apps/index.rst       |   2 +
 docs/architecture/apps/lgbm_predict.md | 123 ++++++
 docs/architecture/apps/lgbm_train.md   | 141 +++++++
 docs/architecture/index.rst            |   2 +-
 docs/architecture/policy.md            |  75 ++--
 docs/development/index.rst             |  15 +
 docs/development/new_component.ipynb   | 559 +++++++++++++++++++++++++
 docs/index.rst                         |   4 +
 docs/quick_start/step3.ipynb           |  88 ++--
 9 files changed, 923 insertions(+), 86 deletions(-)
 create mode 100644 docs/architecture/apps/lgbm_predict.md
 create mode 100644 docs/architecture/apps/lgbm_train.md
 create mode 100644 docs/development/index.rst
 create mode 100644 docs/development/new_component.ipynb

diff --git a/docs/architecture/apps/index.rst b/docs/architecture/apps/index.rst
index 5fa23f1..f576345 100644
--- a/docs/architecture/apps/index.rst
+++ b/docs/architecture/apps/index.rst
@@ -31,6 +31,8 @@ TrustedFlow内置了多种可信APP，每一个可信APP在执行计算逻辑之
    lr_train
    xgb_predict
    lr_predict
+   lgbm_train
+   lgbm_predict
    binary_evaluation
    prediction_bias_eval
 
diff --git a/docs/architecture/apps/lgbm_predict.md b/docs/architecture/apps/lgbm_predict.md
new file mode 100644
index 0000000..5b2719f
--- /dev/null
+++ b/docs/architecture/apps/lgbm_predict.md
@@ -0,0 +1,123 @@
+# LightGBM预测
+
+使用给定的LightGBM模型对数据进行预测。
+
+## 组件定义
+
+1. 参数
+    (1) pred_name: 预测值的列名。
+    (2) save_label: 输出结果是否包含标签列，true表示保存。
+    (3) label_name: 标签列的名称，默认为“label”。
+    (4) save_id: 输出结果是否保存ID列，true表示保存。
+    (5) id_name： ID列的名称。
+    (6) col_names: 可选，输出指定的列到结果中，默认为空。
+2. 输入：待预测的数据以及LightGBM模型。
+3. 输出：预测结果。
+
+```json
+{
+    "domain": "ml.predict",
+    "name": "lgbm_predict",
+    "desc": "Predict using the lgbm model.",
+    "version": "0.0.1",
+    "attrs": [
+        {
+            "name": "pred_name",
+            "desc": "Column name for predictions.",
+            "type": "AT_STRING",
+            "atomic": {
+                "is_optional": true,
+                "default_value": {
+                    "s": "pred"
+                }
+            }
+        },
+        {
+            "name": "save_label",
+            "desc": "Whether or not to save real label column into output pred table. If true, input feature_dataset must contain label column.",
+            "type": "AT_BOOL",
+            "atomic": {
+                "is_optional": true,
+                "default_value": {}
+            }
+        },
+        {
+            "name": "label_name",
+            "desc": "Column name for label.",
+            "type": "AT_STRING",
+            "atomic": {
+                "is_optional": true,
+                "default_value": {
+                    "s": "label"
+                }
+            }
+        },
+        {
+            "name": "save_id",
+            "desc": "Whether to save id column into output pred table. If true, input feature_dataset must contain id column.",
+            "type": "AT_BOOL",
+            "atomic": {
+                "is_optional": true,
+                "default_value": {}
+            }
+        },
+        {
+            "name": "id_name",
+            "desc": "Column name for id.",
+            "type": "AT_STRING",
+            "atomic": {
+                "is_optional": true,
+                "default_value": {
+                    "s": "id"
+                }
+            }
+        },
+        {
+            "name": "col_names",
+            "desc": "Extra column names into output pred table.",
+            "type": "AT_STRINGS",
+            "atomic": {
+                "list_max_length_inclusive": "-1",
+                "is_optional": true
+            }
+        }
+    ],
+    "inputs": [
+        {
+            "name": "feature_dataset",
+            "desc": "Input feature dataset.",
+            "types": [
+                "sf.table.individual"
+            ],
+            "attrs": [
+                {
+                    "name": "ids",
+                    "desc": "Id columns.",
+                    "col_max_cnt_inclusive": "1"
+                },
+                {
+                    "name": "label",
+                    "desc": "Label column.",
+                    "col_max_cnt_inclusive": "1"
+                }
+            ]
+        },
+        {
+            "name": "model",
+            "desc": "Input model.",
+            "types": [
+                "sf.model.lgbm"
+            ]
+        }
+    ],
+    "outputs": [
+        {
+            "name": "pred",
+            "desc": "Output prediction.",
+            "types": [
+                "sf.table.individual"
+            ]
+        }
+    ]
+}
+```
\ No newline at end of file
diff --git a/docs/architecture/apps/lgbm_train.md b/docs/architecture/apps/lgbm_train.md
new file mode 100644
index 0000000..d8684e0
--- /dev/null
+++ b/docs/architecture/apps/lgbm_train.md
@@ -0,0 +1,141 @@
+# LightGBM训练
+
+使用LightGBM对数据集进行训练，得到LightGBM模型，支持二分类和线性回归。
+
+## 组件定义
+
+```json
+{
+  "domain": "ml.train",
+  "name": "lgbm_train",
+  "desc": "LightGBM train component for individual dataset.",
+  "version": "0.0.1",
+  "attrs": [
+      {
+          "name": "n_estimators",
+          "desc": "Number of boosted trees to fit.",
+          "type": "AT_INT",
+          "atomic": {
+              "is_optional": true,
+              "default_value": {
+                  "i64": "10"
+              },
+              "lower_bound_enabled": true,
+              "lower_bound": {
+                  "i64": "1"
+              },
+              "lower_bound_inclusive": true,
+              "upper_bound_enabled": true,
+              "upper_bound": {
+                  "i64": "1024"
+              },
+              "upper_bound_inclusive": true
+          }
+      },
+      {
+          "name": "objective",
+          "desc": "Specify the learning objective.",
+          "type": "AT_STRING",
+          "atomic": {
+              "is_optional": true,
+              "default_value": {
+                  "s": "binary"
+              },
+              "allowed_values": {
+                  "ss": [
+                      "regression",
+                      "binary"
+                  ]
+              }
+          }
+      },
+      {
+          "name": "boosting_type",
+          "desc": "Boosting type.",
+          "type": "AT_STRING",
+          "atomic": {
+              "is_optional": true,
+              "default_value": {
+                  "s": "gbdt"
+              },
+              "allowed_values": {
+                  "ss": [
+                      "gbdt",
+                      "rf",
+                      "dart"
+                  ]
+              }
+          }
+      },
+      {
+          "name": "learning_rate",
+          "desc": "Learning rate.",
+          "type": "AT_FLOAT",
+          "atomic": {
+              "is_optional": true,
+              "default_value": {
+                  "f": 0.1
+              },
+              "lower_bound_enabled": true,
+              "lower_bound": {},
+              "upper_bound_enabled": true,
+              "upper_bound": {
+                  "f": 1
+              },
+              "upper_bound_inclusive": true
+          }
+      },
+      {
+          "name": "num_leaves",
+          "desc": "Max number of leaves in one tree.",
+          "type": "AT_INT",
+          "atomic": {
+              "is_optional": true,
+              "default_value": {
+                  "i64": "31"
+              },
+              "lower_bound_enabled": true,
+              "lower_bound": {
+                  "i64": "2"
+              },
+              "lower_bound_inclusive": true,
+              "upper_bound_enabled": true,
+              "upper_bound": {
+                  "i64": "1024"
+              },
+              "upper_bound_inclusive": true
+          }
+      }
+  ],
+  "inputs": [
+      {
+          "name": "train_dataset",
+          "desc": "Input table.",
+          "types": [
+              "sf.table.individual"
+          ],
+          "attrs": [
+              {
+                  "name": "ids",
+                  "desc": "Id columns will not be trained."
+              },
+              {
+                  "name": "label",
+                  "desc": "Label column.",
+                  "col_min_cnt_inclusive": "1",
+                  "col_max_cnt_inclusive": "1"
+              }
+          ]
+      }
+  ],
+  "outputs": [
+      {
+          "name": "output_model",
+          "desc": "Output model.",
+          "types": [
+              "sf.model.lgbm"
+          ]
+      }
+  ]
+}
+```
\ No newline at end of file
diff --git a/docs/architecture/index.rst b/docs/architecture/index.rst
index 012aa17..d366782 100644
--- a/docs/architecture/index.rst
+++ b/docs/architecture/index.rst
@@ -1,4 +1,4 @@
-核心功能
+架构设计
 ========================
 想了解TrustedFlow原理和功能，欢迎阅读下列文章！
 
diff --git a/docs/architecture/policy.md b/docs/architecture/policy.md
index 523ea54..75eb64c 100644
--- a/docs/architecture/policy.md
+++ b/docs/architecture/policy.md
@@ -16,7 +16,15 @@ constraint本质上是描述“访问控制”这件事，比如允许对数据
 ### 可限制的元信息
 constraint支持对以下元信息进行限制。具体语法上，每一条constraint的元素都是以`r.`作为开头。（TrustedFlow采用了[casbin](https://github.com/casbin/casbin)作为底层的访问控制实现）
 
+#### platform
+在[global_constraints](#global_constraints)下设置。
+限制代码运行的TEE平台。目前可选`sim/sgx/tdx/csv`。
+```yaml
+r.env.tee.platform=="tee platform type"
+```
+
 #### mr_enclave
+在[global_constraints](#global_constraints)下设置。
 限制代码的MRENCLAVE，关于MRENCLAVE的说明参见 [Enclave](./tee/sgx.md#enclave) 。
 
 ```yaml
@@ -24,41 +32,26 @@ r.env.tee.sgx.mr_encalve=="mrenclave of the enclave"
 ```
 
 #### mr_signer
+在[global_constraints](#global_constraints)下设置。
 限制代码的MRSIGNER，关于MRSIGNER的说明参见 [Enclave](./tee/sgx.md#enclave) 。
 
 ```yaml
 r.env.tee.sgx.mr_signer=="mrsigner of the enclave"
 ```
 
-#### op
-限制可以使用哪些[可信APP](./apps/index.rst)进行计算，需要配合rule一齐生效（具体参见后面的rule说明）。
-目前可信APP对应的op名称为
-
-- [数据求交](./apps/intersect.md): `OP_PSI`
-- [数据随机切割](./apps/split.md): `OP_DATASET_SPLIT`
-- [特征过滤](./apps/feature_filter.md): `OP_DATASET_FILTER`
-- [全表统计](./apps/data_describe.md): `OP_TABLE_STATISTICS`
-- [WOE分箱](./apps/woe_binning.md): `OP_WOE_BINNING`
-- [WOE转换](./apps/woe_substitution.md): `OP_WOE_SUBSTITUTION`
-- [相关系数矩阵](./apps/corr.md): `OP_STATS_CORR`
-- [VIF](./apps/vif.md): `OP_LR`
-- [LR训练](./apps/lr_train.md): `OP_WOE_SUBSTITUTION`
-- [LR预测](./apps/lr_predict.md): `OP_PREDICT`
-- [XGBoost训练](./apps/xgb_train.md): `OP_XGB`
-- [XGBoost预测](./apps/xgb_train.md): `OP_PREDICT`
-- [二分类评估](./apps/binary_evaluation.md): `OP_BICLASSIFIER_EVALUATION`
-- [预测偏差评估](./apps/prediction_bias_eval.md): `OP_PREDICTION_BIAS_EVALUATION`
-
-示例写法如下。
+#### (暂不可用) execution_time
+在[global_constraints](#global_constraints)下设置。
+限制执行时间。
 ```yaml
-# 表示限制仅能对数据执行XGBoost训练。
-r.op=="OP_XGB"
+r.execution_time<="2023-10-01 23:59:59"
 ```
 
-#### （暂不可用）execution_time
-限制执行时间。
+#### (暂不可用) op参数
+在[op_constraints](#op_constraints)下设置。
+限制可信app的参数。具体参数名可以在[可信APP](./apps/index.rst)中找到对应的app查询。
+例如限制回归类型为逻辑回归：
 ```yaml
-r.execution_time<="2023-10-01 23:59:59"
+r.op.params.reg_type=="logistic"
 ```
 
 ### 元素之间支持的操作符
@@ -93,8 +86,10 @@ op_constraints表示作用于特定算法的约束，由一条或者多条op_con
 下列rule描述了以下限制
 1. 被授权方为bob和carol
 2. 允许使用数据列f1、f2和f3
-3. 限制XGB和LR的mrenclave
-4. 限制所有代码的mrsigner
+3. 允许xgb_train组件使用数据
+4. 允许lr_train组件进行逻辑回归时使用数据
+5. 限制组件运行平台为sgx
+6. 限制代码的mr_enclave为MRENCLAVE
 
 ```json
 {
@@ -110,20 +105,19 @@ op_constraints表示作用于特定算法的约束，由一条或者多条op_con
     ],
     "op_constraints":[
         {
-            "op_name":"OP_XGB",
-            "constraints":[
-                "r.op==\"OP_XGB\" && r.env.tee.sgx.mr_enclave==\"XGB_ENCLAVE\""
-            ]
+            "op_name": "xgb_train",
+            "constraints":[]
         },
         {
-            "op_name":"OP_LR",
+            "op_name": "lr_train",
             "constraints":[
-                "r.op==\"OP_LR\" && r.env.tee.sgx.mr_enclave==\"LR_ENCLAVE\""
+                "r.op.params.reg_type==\"logistic\""
             ]
         }
     ],
     "global_constraints":[
-        "r.env.tee.sgx.mr_signer==\"MRSIGNER\""
+        "r.env.tee.platform==\"sgx\"",
+        "r.env.tee.sgx.mr_enclave==\"MRENCLAVE\""
     ]
 }
 ```
@@ -160,20 +154,19 @@ op_constraints表示作用于特定算法的约束，由一条或者多条op_con
             ],
             "op_constraints":[
                 {
-                    "op_name":"OP_XGB",
-                    "constraints":[
-                        "r.op==\"OP_XGB\" && r.env.tee.sgx.mr_enclave==\"XGB_ENCLAVE\""
-                    ]
+                    "op_name": "xgb_train",
+                    "constraints":[]
                 },
                 {
-                    "op_name":"OP_LR",
+                    "op_name": "lr_train",
                     "constraints":[
-                        "r.op==\"OP_LR\" && r.env.tee.sgx.mr_enclave==\"LR_ENCLAVE\""
+                        "r.op.params.reg_type==\"logistic\""
                     ]
                 }
             ],
             "global_constraints":[
-                "r.env.tee.sgx.mr_signer==\"MRSIGNER\""
+                "r.env.tee.platform==\"sgx\"",
+                "r.env.tee.sgx.mr_enclave==\"MRENCLAVE\""
             ]
         }
     ]
diff --git a/docs/development/index.rst b/docs/development/index.rst
new file mode 100644
index 0000000..5d4b216
--- /dev/null
+++ b/docs/development/index.rst
@@ -0,0 +1,15 @@
+.. _development:
+
+开发者教程
+===============
+本文档面向想要基于TrustedFlow进行二次开发的工程人员。
+
+新组件开发
+---------------
+TrustedFlow已经提供了一些常用的组件。
+但有时候您可能想新增一个组件，或者想要修改已有组件的参数。这是您可以阅读 `新组件开发教程 <new_component>`_ 。
+
+.. toctree::
+   :maxdepth: 2
+
+   new_component
\ No newline at end of file
diff --git a/docs/development/new_component.ipynb b/docs/development/new_component.ipynb
new file mode 100644
index 0000000..0430a8f
--- /dev/null
+++ b/docs/development/new_component.ipynb
@@ -0,0 +1,559 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 新组件开发教程\n",
+    "TrustedFlow中组件定义和实现都在[Teeapps](https://github.com/secretflow/teeapps)中。\n",
+    "我们的组件通过secretflow的[component spec](https://github.com/secretflow/spec)来统一定义。这是隐语开放标准中用于定义组件的标准。用这套标准我们可以定义组件的名称、版本，参数的类型、取值范围、说明，定义输入输出的格式。建议您先阅读这套组件定义标准，便于理解接下来的开发流程。\n",
+    "\n",
+    "下面我们以新增LightGBM训练算法为例来说明新增组件的开发流程。\n",
+    "准备好代码：\n",
+    "```bash\n",
+    "git clone https://github.com/secretflow/teeapps.git\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 定义组件\n",
+    "\n",
+    "### 1. 新建文件\n",
+    "在`teeapps/component`目录下找到合适的组件分类，您也可以新建新的组件分类。\n",
+    "LightGBM训练算法属于机器学习领域，因此在`ml/train`分类下新建`lgbm_component.h`和`lgbm_component.cc`文件。\n",
+    "\n",
+    "### 2. 声明组件\n",
+    "LightGBM训练组件头文件`lgbm_component.h`示例如下：\n",
+    "```c++\n",
+    "#pragma once\n",
+    "\n",
+    "#include \"../../component.h\"\n",
+    "\n",
+    "namespace teeapps {\n",
+    "namespace component {\n",
+    "\n",
+    "class LgbmTrainComponent : public Component {\n",
+    " private:\n",
+    "  void Init();\n",
+    "\n",
+    "  explicit LgbmTrainComponent(\n",
+    "      const std::string& name = \"lgbm_train\",\n",
+    "      const std::string& domain = \"ml.train\",\n",
+    "      const std::string& version = \"0.0.1\",\n",
+    "      const std::string& desc =\n",
+    "          \"LightGBM train component for individual dataset.\")\n",
+    "      : Component(name, domain, version, desc) {\n",
+    "    Init();\n",
+    "  }\n",
+    "  ~LgbmTrainComponent() {}\n",
+    "  LgbmTrainComponent(const LgbmTrainComponent&) = delete;\n",
+    "  const LgbmTrainComponent& operator=(const LgbmTrainComponent&) = delete;\n",
+    "\n",
+    " public:\n",
+    "  static LgbmTrainComponent& GetInstance() {\n",
+    "    static LgbmTrainComponent instance;\n",
+    "    return instance;\n",
+    "  }\n",
+    "};\n",
+    "\n",
+    "}  // namespace component\n",
+    "}  // namespace teeapps\n",
+    "```\n",
+    "\n",
+    "这段代码声明了新组件的一些基本信息:\n",
+    "\n",
+    "- 组件名称：\"lgbm_train\"\n",
+    "- 组件所属领域：\"ml.train\"\n",
+    "- 组件版本号：\"0.0.1\"\n",
+    "- 组件描述：\"LightGBM train component for individual dataset.\"\n",
+    "\n",
+    "### 3. 定义组件参数\n",
+    "我们在`lgbm_component.cc`中来定义组件的详细参数，包含每个参数的名字、类型、取值范围、默认值、是否是可选参数等。\n",
+    "添加参数需要用到的`AddAttr`函数的具体定义如下：\n",
+    "\n",
+    "```c++\n",
+    "template <typename T>\n",
+    "void AddAttr(\n",
+    "      const std::string& name, const std::string& desc, bool is_list,\n",
+    "      bool is_optional,\n",
+    "      const std::optional<std::vector<T>>& default_values = std::nullopt,\n",
+    "      const std::optional<std::vector<T>>& allowed_values = std::nullopt,\n",
+    "      const std::optional<T>& lower_bound = std::nullopt,\n",
+    "      const std::optional<T>& upper_bound = std::nullopt,\n",
+    "      const std::optional<bool>& lower_bound_inclusive = std::nullopt,\n",
+    "      const std::optional<bool>& upper_bound_inclusive = std::nullopt,\n",
+    "      const std::optional<int>& list_min_length_inclusive = std::nullopt,\n",
+    "      const std::optional<int>& list_max_length_inclusive = std::nullopt);\n",
+    "```\n",
+    "- name：参数名称。\n",
+    "- desc：参数的详细描述。\n",
+    "- is_list：参数是否是一个列表。如果为fasle，则代表了我们允许用户输入一个T类型的值；如果是true，则代表了允许用户输入一个T类型的列表。\n",
+    "- is_optional：参数是否是optional的。如果为true，则代表了用户可以不填，此时会使用该参数的默认值；如果为false，则代表用户必须传递该值。\n",
+    "- default_values：参数的默认值，std::nullopt表示不设置默认值。当is_optional为true时必须定义该默认值。\n",
+    "- allowed_values：参数的允许值，std::nullopt表示不设置允许值。如果设置了该值，那么用户就必须在给出的allowed_values中选择输入。\n",
+    "- lower_bound：参数下限，std::nullopt表示不设置下限。\n",
+    "- lower_bound_inclusive：下限是否是包含。如果为true，则代表了lower_bound也是一个合法的输入。\n",
+    "- upper_bound：参数上限，std::nullopt表示不设置上限。\n",
+    "- upper_bound_inclusive：上限是否包含。如果为true，则代表了upper_bound也是一个合法的输入。\n",
+    "- list_min_length_inclusive：列表类型参数的最小长度，std::nullopt表示不设置列表最小长度。该值仅在is_list为true的时候可选设置。\n",
+    "- list_max_length_inclusive：列表类型参数的最大长度，std::nullopt表示不设置列表最大长度。该值仅在is_list为true的时候可选设置。\n",
+    "\n",
+    "我们先添加一个训练轮数的参数:\n",
+    "```c++\n",
+    "#include \"lgbm_component.h\"\n",
+    "\n",
+    "namespace teeapps {\n",
+    "namespace component {\n",
+    "\n",
+    "void LgbmTrainComponent::Init() {\n",
+    "  AddAttr<int64_t>(\"n_estimators\", \"Number of boosted trees to fit.\", false,\n",
+    "                   true, std::vector<int64_t>{10}, std::nullopt, 1, 1024, true,\n",
+    "                   true);\n",
+    "}\n",
+    "\n",
+    "}  // namespace component\n",
+    "}  // namespace teeapps\n",
+    "```\n",
+    "这段代码使用`AddAttr`函数为`lgbm_train`这个组件添加了一个参数：\n",
+    "\n",
+    "- 参数名称：\"n_estimators\"\n",
+    "- 参数详细描述：\"Number of boosted trees to fit.\"\n",
+    "- 非列表类型参数，代表输入的是一个值而非一个列表\n",
+    "- 可选参数，代表用户如果不填该参数，就会使用默认值\n",
+    "- 参数的默认值为10\n",
+    "- 不设定特定的某几个可选值，但是设定了取值范围为[1, 1024]，包含下限1和上限1024。\n",
+    "\n",
+    "接下来，您可以在`lgbm_component.cc`中继续添加您需要的参数，下面为一个示例，您可以根据需要进行删改：\n",
+    "```c++\n",
+    "#include \"lgbm_component.h\"\n",
+    "\n",
+    "namespace teeapps {\n",
+    "namespace component {\n",
+    "\n",
+    "void LgbmTrainComponent::Init() {\n",
+    "  AddAttr<int64_t>(\"n_estimators\", \"Number of boosted trees to fit.\", false,\n",
+    "                   true, std::vector<int64_t>{10}, std::nullopt, 1, 1024, true,\n",
+    "                   true);\n",
+    "  AddAttr<std::string>(\"objective\", \"Specify the learning objective.\", false,\n",
+    "                       true, std::vector<std::string>{\"binary\"},\n",
+    "                       std::vector<std::string>{\"regression\", \"binary\"});\n",
+    "  AddAttr<std::string>(\"boosting_type\", \"Boosting type.\", false, true,\n",
+    "                       std::vector<std::string>{\"gbdt\"},\n",
+    "                       std::vector<std::string>{\"gbdt\", \"rf\", \"dart\"});\n",
+    "  AddAttr<float>(\"learning_rate\", \"Learning rate.\", false, true,\n",
+    "                 std::vector<float>{0.1}, std::nullopt, 0, 1, false, true);\n",
+    "  AddAttr<int64_t>(\"num_leaves\", \"Max number of leaves in one tree.\", false,\n",
+    "                   true, std::vector<int64_t>{31}, std::nullopt, 2, 1024, true,\n",
+    "                   true);\n",
+    "}\n",
+    "\n",
+    "}  // namespace component\n",
+    "}  // namespace teeapps\n",
+    "```\n",
+    "\n",
+    "### 4. 定义组件输入输出\n",
+    "我们继续在`lgbm_component.cc`中来定义组件的输入输出。\n",
+    "定义输入输出需要用到的`AddIo`函数定义如下：\n",
+    "```c++\n",
+    "void AddIo(const IoType io_type, const std::string& name,\n",
+    "             const std::string& desc, const std::vector<std::string>& types,\n",
+    "             const std::optional<std::vector<TableColParam>>& col_params =\n",
+    "                 std::nullopt);\n",
+    "```\n",
+    "- io_type：表示这是输入还是输出，可选IoType::INPUT和IoType::OUTPUT。\n",
+    "- name：io名称。\n",
+    "- desc：io的详细说明。\n",
+    "- types: io的类型，使用定义在teeapps/component/util.h的DistDataType中的字符串作为类型名称，如果需要添加新的类型，请在DistDataType中添加定义。\n",
+    "- col_params:  额外列参数。对于输入的数据表，如果需要额外指定一些列参数，可以在这一项进行设置。比如在psi组件中，可以用\"key\"指定哪一列用于求交；比如在woe组件中，可以用\"feature_selects\"指定对哪些列进行woe binning；再比如训练组件中可以用\"label\"指定哪一列作为训练标签。\n",
+    "\n",
+    "\n",
+    "定义输入输出：\n",
+    "```c++\n",
+    "#include \"lgbm_component.h\"\n",
+    "\n",
+    "namespace teeapps {\n",
+    "namespace component {\n",
+    "\n",
+    "void LgbmTrainComponent::Init() {\n",
+    "  //省略了前面定义的组件参数....\n",
+    "\n",
+    "  AddIo(IoType::INPUT, \"train_dataset\", \"Input table.\",\n",
+    "        {DistDataType::INDIVIDUAL_TABLE},\n",
+    "        std::vector<TableColParam>{\n",
+    "            TableColParam(\"ids\", \"Id columns will not be trained.\"),\n",
+    "            TableColParam(\"label\", \"Label column.\", 1, 1)});\n",
+    "  AddIo(IoType::OUTPUT, \"output_model\", \"Output model.\",\n",
+    "        {DistDataType::LGBM_MODEL});\n",
+    "}\n",
+    "\n",
+    "}  // namespace component\n",
+    "}  // namespace teeapps\n",
+    "```\n",
+    "\n",
+    "这段代码定义了`lgbm_train`组件的输入输出：\n",
+    "\n",
+    "- 输入名称为\"train_dataset\"\n",
+    "- 输入的类型为DistDataType::INDIVIDUAL_TABLE，单边表，通常就是一个csv表格\n",
+    "- 输入的额外列参数\"ids\"：指明了哪些列作为id列，没有设定数量的上下限，也就是可以不指定id列，也可以指定多列都作为id列，被作为id列的那些列不会被用于训练。\n",
+    "- 输入的额外列参数\"label\"：指明了哪一列作为训练的标签列，设定数量的上下限均为1，也就是有且仅有一列标签用于训练（不同于数据表的schema中可以有多列label，训练时必须指明一列作为训练标签）。\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 编写组件执行逻辑\n",
+    "\n",
+    "Teeapps框架会解析json化的[sf_node_eval_param](https://github.com/secretflow/spec/blob/main/secretflow/spec/v1/evaluation.proto)，检查参数范围，对默认值进行赋值等，然后生成一个执行时的配置文件。组件执行代码可以直接读取该配置文件中的参数值，配置文件格式示例如下：\n",
+    "```json\n",
+    "{\n",
+    "  \"component_name\": \"lgbm_train\",\n",
+    "  \"n_estimators\": 10,\n",
+    "  \"objective\": \"binary\",\n",
+    "  \"boosting_type\": \"gbdt\",\n",
+    "  \"num_leaves\": 15,\n",
+    "  \"learning_rate\": 0.1,\n",
+    "  \"inputs\": [\n",
+    "    {\n",
+    "      \"data_path\": \"teeapps/biz/testdata/breast_cancer/breast_cancer.csv\",\n",
+    "      \"schema\": {\n",
+    "        \"ids\": [\n",
+    "          \"id\"\n",
+    "        ],\n",
+    "        \"features\": [\n",
+    "          \"mean radius\",\n",
+    "          \"mean texture\",\n",
+    "          \"mean perimeter\",\n",
+    "          \"mean area\",\n",
+    "          \"mean smoothness\",\n",
+    "          \"mean compactness\",\n",
+    "          \"mean concavity\",\n",
+    "          \"mean concave points\",\n",
+    "          \"mean symmetry\",\n",
+    "          \"mean fractal dimension\"\n",
+    "        ],\n",
+    "        \"labels\": [\n",
+    "          \"target\"\n",
+    "        ],\n",
+    "        \"id_types\": [\n",
+    "          \"int\"\n",
+    "        ],\n",
+    "        \"feature_types\": [\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\",\n",
+    "          \"float\"\n",
+    "        ],\n",
+    "        \"label_types\": [\n",
+    "          \"bool\"\n",
+    "        ]\n",
+    "      },\n",
+    "      \"ids\": [\"id\"],\n",
+    "      \"label\": [\"target\"]\n",
+    "    }\n",
+    "  ],\n",
+    "  \"outputs\": [\n",
+    "    {\n",
+    "      \"data_path\": \"lgbm_bin_class.model\"\n",
+    "    }\n",
+    "  ]\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "在`teeapps/biz`目录下新建`lgbm/lgbm.py`实现组件执行逻辑，它将按照上述json中的配置执行相应算法：\n",
+    "```python\n",
+    "import json\n",
+    "import logging\n",
+    "import sys\n",
+    "\n",
+    "import joblib\n",
+    "import lightgbm as lgb\n",
+    "import pandas\n",
+    "\n",
+    "from teeapps.biz.common import common\n",
+    "\n",
+    "COMPONENT_NAME = \"lgbm_train\"\n",
+    "\n",
+    "IDS = \"ids\"\n",
+    "LABEL = \"label\"\n",
+    "\n",
+    "N_ESTIMATORS = \"n_estimators\"\n",
+    "OBJECTIVE = \"objective\"\n",
+    "BOOSTING_TYPE = \"boosting_type\"\n",
+    "LEARNING_RATE = \"learning_rate\"\n",
+    "NUM_LEAVES = \"num_leaves\"\n",
+    "\n",
+    "REGRESSION = \"regression\"\n",
+    "BINARY = \"binary\"\n",
+    "\n",
+    "\n",
+    "def run_lgbm(task_config: dict):\n",
+    "    logging.info(\"Running lgbm training...\")\n",
+    "\n",
+    "    assert (\n",
+    "        task_config[common.COMPONENT_NAME] == COMPONENT_NAME\n",
+    "    ), f\"Component name should be {COMPONENT_NAME}, but got {task_config[common.COMPONENT_NAME]}\"\n",
+    "\n",
+    "    inputs = task_config[common.INPUTS]\n",
+    "    outputs = task_config[common.OUTPUTS]\n",
+    "\n",
+    "    assert len(inputs) == 1, f\"{COMPONENT_NAME} should have only 1 input\"\n",
+    "    assert len(outputs) == 1, f\"{COMPONENT_NAME} should have only 1 output\"\n",
+    "\n",
+    "    # get train data\n",
+    "    logging.info(\"Loading training data...\")\n",
+    "    df = common.gen_data_frame(inputs[0])\n",
+    "\n",
+    "    # labels in schema can be multiple, but eval target label is unique(in params)\n",
+    "    ids = inputs[0][IDS]\n",
+    "    labels = inputs[0][LABEL]\n",
+    "    assert len(labels) == 1, f\"{COMPONENT_NAME} should have only 1 labels column\"\n",
+    "\n",
+    "    features = inputs[0][common.SCHEMA][common.FEATURES]\n",
+    "    features = [feature for feature in features if feature not in ids + labels]\n",
+    "\n",
+    "    X = df[features]\n",
+    "    Y = pandas.to_numeric(df[labels[0]], errors=\"coerce\")\n",
+    "\n",
+    "    param = dict()\n",
+    "    param_keys = [N_ESTIMATORS, OBJECTIVE, BOOSTING_TYPE, LEARNING_RATE, NUM_LEAVES]\n",
+    "\n",
+    "    for key in param_keys:\n",
+    "        param[key] = task_config[key]\n",
+    "\n",
+    "    if param[OBJECTIVE] == REGRESSION:\n",
+    "        model = lgb.LGBMRegressor(**param)\n",
+    "    elif param[OBJECTIVE] == BINARY:\n",
+    "        model = lgb.LGBMClassifier(**param)\n",
+    "    else:\n",
+    "        raise RuntimeError(f\"unsupported objective function: {param[OBJECTIVE]}\")\n",
+    "\n",
+    "    # train model\n",
+    "    model.fit(X, Y)\n",
+    "\n",
+    "    logging.info(\"Setting origin feature_name in model...\")\n",
+    "    model.origin_feature_name_ = features\n",
+    "\n",
+    "    # dump model\n",
+    "    logging.info(\"Dumping model...\")\n",
+    "    model_data_path = outputs[0][common.DATA_PATH]\n",
+    "    joblib.dump(model, model_data_path)\n",
+    "\n",
+    "\n",
+    "def main():\n",
+    "    assert len(sys.argv) == 2, f\"Wrong arguments number: {len(sys.argv)}\"\n",
+    "    # load task_config json\n",
+    "    task_config_path = sys.argv[1]\n",
+    "    logging.info(\"Reading task config file...\")\n",
+    "    with open(task_config_path, \"r\") as task_config_f:\n",
+    "        task_config = json.load(task_config_f)\n",
+    "        logging.debug(f\"Configurations: {task_config}\")\n",
+    "        run_lgbm(task_config)\n",
+    "\n",
+    "\n",
+    "\"\"\"\n",
+    "This app is expected to be launched by app framework via running a subprocess \n",
+    "`python3 lgbm.py config`. Before launching the subprocess, the app framework will \n",
+    "firstly generate a config file which is a json file containing all the required \n",
+    "parameters and is serialized from the task.proto. Currently we do not handle any \n",
+    "errors/exceptions in this file as the outer app framework will capture the stderr \n",
+    "and stdout.\n",
+    "\"\"\"\n",
+    "if __name__ == \"__main__\":\n",
+    "    # TODO set log level\n",
+    "    logging.basicConfig(\n",
+    "        stream=sys.stdout,\n",
+    "        level=logging.INFO,\n",
+    "        format=\"%(asctime)s - %(levelname)s - %(message)s\",\n",
+    "    )\n",
+    "    main()\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 注册组件\n",
+    "\n",
+    "### 1. 在`teeapps/component/component_list.h`中注册组件\n",
+    "\n",
+    "在ComponentDomain中新增Domain名，没有新增Domain则不需要添加。\n",
+    "\n",
+    "在ComponentName中新增组件名:\n",
+    "\n",
+    "```c++\n",
+    "struct ComponentName {\n",
+    "  ...\n",
+    "  static constexpr char kLgbmTrainComp[] = \"lgbm_train\";\n",
+    "  ...\n",
+    "};\n",
+    "```\n",
+    "\n",
+    "在ComponentPyFile中新增组件执行逻辑的python文件名:\n",
+    "\n",
+    "```c++\n",
+    "struct ComponentPyFile {\n",
+    "  ...\n",
+    "  static constexpr char kLgbmPy[] = \"lgbm.py\";\n",
+    "  ...\n",
+    "};\n",
+    "```\n",
+    "\n",
+    "在comp_py_map中新增组件名与组件执行python文件名的映射关系:\n",
+    "\n",
+    "```c++\n",
+    "const std::unordered_map<std::string, std::string> comp_py_map = {\n",
+    "    ...\n",
+    "    {ComponentName::kLgbmTrainComp, ComponentPyFile::kLgbmPy},\n",
+    "    ...\n",
+    "};\n",
+    "```\n",
+    "\n",
+    "在COMP_DEF_MAP中新增组件全名与组件定义的映射关系:\n",
+    "\n",
+    "```c++\n",
+    "const std::map<std::string, secretflow::spec::v1::ComponentDef> COMP_DEF_MAP = {\n",
+    "    ...\n",
+    "    {GenCompFullName(ComponentDomain::kMlTrainDomain,\n",
+    "                     ComponentName::kLgbmTrainComp, kCompVersion),\n",
+    "     secretflow::spec::v1::ComponentDef(\n",
+    "         *teeapps::component::LgbmTrainComponent::GetInstance().Definition())},\n",
+    "    ...\n",
+    "    };\n",
+    "```\n",
+    "\n",
+    "### 2. 增加翻译（可选）\n",
+    "在teeapps/component/all_translation_cn.json中增加组件名称和参数的翻译，例如：\n",
+    "```json\n",
+    "{\n",
+    "  ...\n",
+    "  \"ml.train/lgbm_train:0.0.1\": {\n",
+    "    \"ml.train\": \"模型训练\",\n",
+    "    \"lgbm_train\": \"LightGBM训练\",\n",
+    "    \"LightGBM train component for individual dataset.\": \"为独立数据集提供LightGBM训练能力的组件\",\n",
+    "    \"0.0.1\": \"0.0.1\",\n",
+    "    \"n_estimators\": \"训练轮数\",\n",
+    "    \"Number of boosted trees to fit.\": \"训练轮数\",\n",
+    "    \"objective\": \"学习目标\",\n",
+    "    \"Specify the learning objective.\": \"指定学习目标（二分类或回归）\",\n",
+    "    \"boosting_type\": \"基学习类型\",\n",
+    "    \"Boosting type.\": \"基学习类型\",\n",
+    "    \"learning_rate\": \"学习率\",\n",
+    "    \"Learning rate.\": \"学习率\",\n",
+    "    \"num_leaves\": \"叶子数\",\n",
+    "    \"Max number of leaves in one tree.\": \"一棵树中的最大叶子数量\",\n",
+    "    \"train_dataset\": \"训练数据集\",\n",
+    "    \"Input table.\": \"输入的训练数据集\",\n",
+    "    \"ids\": \"id列\",\n",
+    "    \"Id columns will not be trained.\": \"指定的id列不会作为训练的特征\",\n",
+    "    \"label\": \"标签列\",\n",
+    "    \"Label column.\": \"标签列\",\n",
+    "    \"output_model\": \"输出模型\",\n",
+    "    \"Output model.\": \"输出模型\"\n",
+    "  },\n",
+    "  ...\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "### 3. 生成新的组件列表(可选)\n",
+    "进入开发容器\n",
+    "```bash\n",
+    "bash env.sh\n",
+    "bash env.sh enter\n",
+    "```\n",
+    "\n",
+    "编译component目录\n",
+    "```bash\n",
+    "bazel --output_base=target build //teeapps/component/...\n",
+    "```\n",
+    "\n",
+    "生成组件列表和翻译列表\n",
+    "```bash\n",
+    "./bazel-bin/teeapps/component/main\n",
+    "```\n",
+    "您将在`teeapps/component/comp_list.json`中看到新的组件列表，它对应secretpad中的[trustedflow组件定义](https://github.com/secretflow/secretpad/blob/main/config/components/trustedflow.json)\n",
+    "在`teeapps/component/translation.json`中看到相关字段的翻译，它对应secretpad中的[trustedflow组件翻译](https://github.com/secretflow/secretpad/blob/main/config/i18n/trustedflow.json)。\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 构建Teeapps镜像\n",
+    "在主机上用`deployment`目录下的`build.sh`脚本来构建不同平台下的镜像。\n",
+    "\n",
+    "对于sgx平台，在运行脚本前还需要在`deployment/occlum/python.yaml`中添加`lgbm.py`，如下：\n",
+    "\n",
+    "```yaml\n",
+    "includes:\n",
+    "  - base.yaml\n",
+    "targets:\n",
+    "  - target: /bin\n",
+    "    createlinks:\n",
+    "      - src: /opt/python-occlum/bin/python3\n",
+    "        linkname: python3\n",
+    "  # python packages\n",
+    "  - target: /opt\n",
+    "    copy: \n",
+    "      - dirs:\n",
+    "          - /home/teeapp/python-occlum\n",
+    "  - target: /\n",
+    "    copy:\n",
+    "      - from: /home/teeapp/occlum/teeapps/biz\n",
+    "        dirs:\n",
+    "          - secretflow\n",
+    "          - teeapps\n",
+    "        files: \n",
+    "          - biclassification_eval.py\n",
+    "          - feature_filter.py\n",
+    "          - train_test_split.py\n",
+    "          - lr.py\n",
+    "          - predict.py\n",
+    "          - prediction_bias_eval.py\n",
+    "          - psi.py\n",
+    "          - pearsonr.py\n",
+    "          - vif.py\n",
+    "          - table_statistics.py\n",
+    "          - woe_binning.py\n",
+    "          - woe_substitution.py\n",
+    "          - xgb.py\n",
+    "          - lgbm.py\n",
+    "          - __init__.py\n",
+    "```\n",
+    "\n",
+    "`build.sh`镜像构建脚本执行方式如下:\n",
+    "\n",
+    "```bash\n",
+    "cd deployment\n",
+    "\n",
+    "bash build.sh -p sim -v ${VERSION}\n",
+    "\n",
+    "bash build.sh -p sgx -v ${VERSION}\n",
+    "\n",
+    "bash build.sh -p tdx -v ${VERSION}\n",
+    "\n",
+    "bash build.sh -p csv -v ${VERSION}\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/index.rst b/docs/index.rst
index 75d9021..5a7274b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -47,6 +47,9 @@ TrustedFlow保护了数据使用中（data-in-use）、数据存储（data-at-re
 2. `授权策略 <architecture/policy>`_
 3. `可信应用 <architecture/apps/index>`_
 
+自定义可信组件
+---------------
+开发新的自定义可信组件，欢迎阅读 `新组件开发教程 <development/new_component>`_ 。
 
 高阶话题
 -----------
@@ -72,4 +75,5 @@ TrustedFlow功能列表和路线图，欢迎阅读 `路线图 <./advanced_topic/
 
    quick_start/index
    architecture/index
+   development/index
    advanced_topic/index
diff --git a/docs/quick_start/step3.ipynb b/docs/quick_start/step3.ipynb
index 1f4b974..5258e83 100644
--- a/docs/quick_start/step3.ipynb
+++ b/docs/quick_start/step3.ipynb
@@ -40,7 +40,7 @@
     "- `rule_id`：alice为它要授权的规则取了id号为alice_rule_id_1。如果后续有删除该条规则的需求，可以根据该id号来做删除。\n",
     "- `grantee_party_ids`: alice指定被他授权的人是carol，因为可以授权给多个人，所以是一个列表。\n",
     "- `columns`: alice允许carol使用数据的这些列：id、mean radius、mean texture、mean perimeter、mean area、mean smoothness。\n",
-    "- `op_constraints`: alice允许carol执行以下计算：数据求交（OP_PSI）、数据拆分（OP_DATASET_SPLIT）、XGB训练（OP_XGB）、XGB预测（OP_XGB_PREDICT）、二分类评估（OP_BICLASSIFIER_EVALUATION）。关于算子的更详细说明，可以阅读[可信应用](../architecture/apps/index.rst)。\n",
+    "- `op_constraints`: alice允许carol执行以下计算：数据求交（`psi`）、数据拆分（`train_test_split`）、XGB训练（`xgb_train`）、XGB预测（`xgb_predict`）、二分类评估（`biclassification_eval`）。关于算子的更详细说明，可以阅读[可信APP](../architecture/apps/index.rst)。\n",
     "\n",
     "\n",
     "下面的配置还需要您根据实际情况进行完善，包含：\n",
@@ -73,27 +73,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",
@@ -130,27 +130,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",
@@ -245,7 +245,7 @@
     "- `rule_id`：alice为它要授权的规则取了id号为alice_rule_id_1。如果后续有删除该条规则的需求，可以根据该id号来做删除。\n",
     "- `grantee_party_ids`: alice指定被他授权的人是carol，因为可以授权给多个人，所以是一个列表。\n",
     "- `columns`: alice允许carol使用数据的这些列：id、mean radius、mean texture、mean perimeter、mean area、mean smoothness。\n",
-    "- `op_constraints`: alice允许carol执行以下计算：数据求交（OP_PSI）、数据拆分（OP_DATASET_SPLIT）、XGB训练（OP_XGB）、XGB预测（OP_XGB_PREDICT）、二分类评估（OP_BICLASSIFIER_EVALUATION）。关于算子的更详细说明，可以阅读[可信应用](../architecture/apps/index.rst)。\n",
+    "- `op_constraints`: alice允许carol执行以下计算：数据求交（`psi`）、数据拆分（`train_test_split`）、XGB训练（`xgb_train`）、XGB预测（`xgb_predict`）、二分类评估（`biclassification_eval`）。关于算子的更详细说明，可以阅读[可信APP](../architecture/apps/index.rst)。\n",
     "\n",
     "下面的配置还需要您根据实际情况进行完善，包含：\n",
     "\n",
@@ -281,27 +281,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",
@@ -338,27 +338,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",
@@ -400,7 +400,7 @@
     "- `rule_id`：alice为它要授权的规则取了id号为alice_rule_id_1。如果后续有删除该条规则的需求，可以根据该id号来做删除。\n",
     "- `grantee_party_ids`: alice指定被他授权的人是carol，因为可以授权给多个人，所以是一个列表。\n",
     "- `columns`: alice允许carol使用数据的这些列：id、mean radius、mean texture、mean perimeter、mean area、mean smoothness。\n",
-    "- `op_constraints`: alice允许carol执行以下计算：数据求交（OP_PSI）、数据拆分（OP_DATASET_SPLIT）、XGB训练（OP_XGB）、XGB预测（OP_XGB_PREDICT）、二分类评估（OP_BICLASSIFIER_EVALUATION）。关于算子的更详细说明，可以阅读[可信应用](../architecture/apps/index.rst)。\n",
+    "- `op_constraints`: alice允许carol执行以下计算：数据求交（`psi`）、数据拆分（`train_test_split`）、XGB训练（`xgb_train`）、XGB预测（`xgb_predict`）、二分类评估（`biclassification_eval`）。关于算子的更详细说明，可以阅读[可信APP](../architecture/apps/index.rst)。\n",
     "\n",
     "下面的配置还需要您根据实际情况进行完善，包含：\n",
     "\n",
@@ -433,27 +433,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",
@@ -490,27 +490,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",
@@ -554,7 +554,7 @@
     "- `rule_id`：alice为它要授权的规则取了id号为alice_rule_id_1。如果后续有删除该条规则的需求，可以根据该id号来做删除。\n",
     "- `grantee_party_ids`: alice指定被他授权的人是carol，因为可以授权给多个人，所以是一个列表。\n",
     "- `columns`: alice允许carol使用数据的这些列：id、mean radius、mean texture、mean perimeter、mean area、mean smoothness。\n",
-    "- `op_constraints`: alice允许carol执行以下计算：数据求交（OP_PSI）、数据拆分（OP_DATASET_SPLIT）、XGB训练（OP_XGB）、XGB预测（OP_XGB_PREDICT）、二分类评估（OP_BICLASSIFIER_EVALUATION）。关于算子的更详细说明，可以阅读[可信应用](../architecture/apps/index.rst)。\n",
+    "- `op_constraints`: alice允许carol执行以下计算：数据求交（`psi`）、数据拆分（`train_test_split`）、XGB训练（`xgb_train`）、XGB预测（`xgb_predict`）、二分类评估（`biclassification_eval`）。关于算子的更详细说明，可以阅读[可信APP](../architecture/apps/index.rst)。\n",
     "\n",
     "下面的配置还需要您根据实际情况进行完善，包含：\n",
     "\n",
@@ -588,27 +588,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",
@@ -643,27 +643,27 @@
     "      op_constraints:\n",
     "        - \n",
     "          # (required) str\n",
-    "          op_name: OP_PSI\n",
+    "          op_name: psi\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_DATASET_SPLIT\n",
+    "          op_name: train_test_split\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_XGB\n",
+    "          op_name: xgb_train\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_PREDICT\n",
+    "          op_name: xgb_predict\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "        -\n",
     "          # (required) str\n",
-    "          op_name: OP_BICLASSIFIER_EVALUATION\n",
+    "          op_name: biclassification_eval\n",
     "          # (optional) List[str]\n",
     "          constraints:\n",
     "```\n",