From b29a61de70af0f95bc809ccfbc3fadcd62d15876 Mon Sep 17 00:00:00 2001
From: 4ever <PlayingLove00@gmail.com>
Date: Fri, 23 Jan 2026 16:03:12 +0800
Subject: [PATCH 1/3] Ernie 4.5 inference tasks

---
 llm/README.md                         |   1 +
 llm/ernie4_5/inference_ernie4_5.ipynb | 319 ++++++++++++++++++++++++++
 2 files changed, 320 insertions(+)
 create mode 100644 llm/ernie4_5/inference_ernie4_5.ipynb

diff --git a/llm/README.md b/llm/README.md
index d13b624..8612170 100644
--- a/llm/README.md
+++ b/llm/README.md
@@ -14,6 +14,7 @@ The following notebooks are actively maintained in sync with MindSpore and MindS
 | 2   | [distilgpt2](./distilgpt2/) | Includes notebooks for DistilGPT-2 finetuning and inference on causal language modeling (text generation) tasks. |
 | 3   | [bert](./bert/) | Includes notebooks for finetuning BERT on SWAG dataset for Multiple Choice tasks using MindSpore NLP |
 | 4   | [esm](./esmforproteinfolding/) | Includes notebooks for EsmForProteinFolding finetuning and inference tasks  |
+| 5   | [ernie4.5](./ernie4_5/) | Includes notebooks for Ernie 4.5 inference tasks |
 
 ### Community-Driven / Legacy Applications
 
diff --git a/llm/ernie4_5/inference_ernie4_5.ipynb b/llm/ernie4_5/inference_ernie4_5.ipynb
new file mode 100644
index 0000000..aa129bf
--- /dev/null
+++ b/llm/ernie4_5/inference_ernie4_5.ipynb
@@ -0,0 +1,319 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0c7132da",
+   "metadata": {},
+   "source": [
+    "# 基于 MindSpore NLP 的 ERNIE 4.5 模型推理与应用\n",
+    "\n",
+    "## 实验介绍\n",
+    "\n",
+    "本实验主要介绍如何基于 MindSpore 2.7.0 AI 框架和 MindSpore NLP 0.5.1 套件，在 Ascend 800I/T A2 硬件环境下，实现 ERNIE 4.5 大语言模型的加载、推理及应用开发。\n",
+    "\n",
+    "ERNIE 4.5 是百度开源的大规模模型系列，包含稠密（Dense）与混合专家（MoE）架构，在中文理解、多模态交互及长文本处理方面表现优异。本案例将演示如何利用 MindSpore 的 `AutoClass` 接口快速加载模型权重，并构建一个基于该模型的对话应用。\n",
+    "\n",
+    "## 实验环境\n",
+    "\n",
+    "本案例基于 **Ascend 800I/T A2** 硬件环境，软件环境如下：\n",
+    "\n",
+    "| Python | MindSpore | MindSpore NLP |\n",
+    "| :----- | :-------- | :------------ |\n",
+    "| 3.10   | 2.7.0     | 0.5.1         |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "20bb5f2e",
+   "metadata": {},
+   "source": [
+    "### 安装依赖\n",
+    "\n",
+    "首先，我们需要安装 MindNLP 及相关依赖库。如果环境中未安装，请执行以下命令："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3041a225",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 安装 MindSpore NLP\n",
+    "# !pip install mindnlp==0.5.1 -i https://pypi.tuna.tsinghua.edu.cn/simple\n",
+    "# 安装常用的文本处理库\n",
+    "# !pip install jieba\n",
+    "# !pip install sentencepiece"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eca64203",
+   "metadata": {},
+   "source": [
+    "### 配置运行环境\n",
+    "\n",
+    "引入必要的库，并设置 MindSpore 的运行模式。针对大模型推理，我们使用 Ascend 作为计算后端。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6d59a07c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "import mindspore\n",
+    "from mindspore import context\n",
+    "import mindnlp\n",
+    "\n",
+    "# 设置使用 Ascend 设备\n",
+    "# 默认使用 PYNATIVE_MODE \n",
+    "context.set_context(device_target=\"Ascend\")\n",
+    "\n",
+    "print(f\"MindSpore version: {mindspore.__version__}\")\n",
+    "print(\"MindNLP version:\", mindnlp.__version__)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "188668d7",
+   "metadata": {},
+   "source": [
+    "## 数据准备\n",
+    "\n",
+    "对于大模型推理任务，我们通常不需要像 CV NLP 等任务中那样下载大规模训练数据集。但在实际应用开发中，我们可能需要准备一些特定的 Prompt（提示词）或测试用例。\n",
+    "\n",
+    "此处我们创建一个简单的测试数据集，模拟应用场景中的输入。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1471afc3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 模拟应用场景数据\n",
+    "test_cases = [\n",
+    "    \"请简要介绍一下什么是混合专家模型（MoE）？\",\n",
+    "    \"写一首关于秋天丰收的七言绝句。\",\n",
+    "    \"请分析以下句子的情感倾向：'这家餐厅的服务真是太糟糕了，我再也不会来了。'\",\n",
+    "    \"使用Python写一个冒泡排序算法。\"\n",
+    "]\n",
+    "\n",
+    "print(\"测试用例准备完成。\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "71617376",
+   "metadata": {},
+   "source": [
+    "## 模型构建与加载\n",
+    "\n",
+    "本章节将演示如何使用 MindSpore NLP 的 `Transformers` 接口加载 ERNIE 4.5 模型。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f8f53408",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载分词器 (Tokenizer)\n",
+    "# 分词器负责将自然语言文本转换为模型可理解的 Token ID。\n",
+    "\n",
+    "from mindnlp.transformers import AutoTokenizer\n",
+    "from mindnlp.transformers import AutoModelForCausalLM\n",
+    "\n",
+    "MODEL_NAME = \"baidu/ERNIE-4.5-0.3B-Base-PT\"\n",
+    "\n",
+    "print(f\"正在加载分词器: {MODEL_NAME} ...\")\n",
+    "try:\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n",
+    "    print(\"分词器加载成功。\")\n",
+    "except Exception as e:\n",
+    "    print(f\"分词器加载失败，请检查网络或模型名称。错误信息: {e}\")\n",
+    "    \n",
+    "# 加载模型 (Model)\n",
+    "# 在 Ascend 800I/T A2 上，为了节省显存并加速推理，我们推荐使用 float16 精度加载模型。\n",
+    "\n",
+    "print(f\"正在加载模型: {MODEL_NAME} ...\")\n",
+    "\n",
+    "# 加载模型权重\n",
+    "# mindspore_dtype=mindspore.float16 可以显著降低显存占用\n",
+    "try:\n",
+    "    model = AutoModelForCausalLM.from_pretrained(\n",
+    "        MODEL_NAME,\n",
+    "        mindspore_dtype=mindspore.float16\n",
+    "    )\n",
+    "    # 将模型设置为评估模式\n",
+    "    model.set_train(False)\n",
+    "    print(\"模型加载成功。\")\n",
+    "except Exception as e:\n",
+    "    print(f\"模型加载失败。错误信息: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b4775da6",
+   "metadata": {},
+   "source": [
+    "## 应用开发：构建对话生成函数\n",
+    "\n",
+    "为了方便进行多轮对话或特定任务推理，我们将模型的生成过程封装为一个函数。这类似于 ResNet 案例中的“验证”或“推理”步骤。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd42b1ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat_with_ernie(query, history=[], max_length=2048, temperature=0.7, top_p=0.9):\n",
+    "    \"\"\"\n",
+    "    基于 ERNIE 4.5 的对话生成函数\n",
+    "    \n",
+    "    Args:\n",
+    "        query (str): 用户输入的问题\n",
+    "        history (list): 对话历史\n",
+    "        max_length (int): 生成的最大长度\n",
+    "        temperature (float): 采样温度，控制生成的多样性\n",
+    "        top_p (float): 核采样阈值\n",
+    "    \n",
+    "    Returns:\n",
+    "        str: 模型生成的回答\n",
+    "    \"\"\"\n",
+    "    # 1. 构建 Prompt\n",
+    "    # 说明：此示例针对 ERNIE 4.5 的 Base 预训练模型，直接对原始 query 做 tokenize，不使用额外 Chat Template。\n",
+    "    # 若使用的是已对话微调的 ERNIE 4.5 Chat 类模型，请先根据其官方 Chat Template 将 history 和 query 拼接为 prompt，再送入 tokenizer。\n",
+    "    inputs = tokenizer(query, return_tensors=\"ms\")\n",
+    "    \n",
+    "    # 2. 生成配置\n",
+    "    # 注意：在 MindSpore 2.7 + MindSpore NLP 0.5.1 中，generate 接口用法与 Huggingface 类似\n",
+    "    outputs = model.generate(\n",
+    "        inputs[\"input_ids\"],\n",
+    "        max_length=max_length,\n",
+    "        do_sample=True,\n",
+    "        temperature=temperature,\n",
+    "        top_p=top_p,\n",
+    "        pad_token_id=tokenizer.pad_token_id,\n",
+    "        eos_token_id=tokenizer.eos_token_id\n",
+    "    )\n",
+    "    \n",
+    "    # 3. 解码输出：仅解码生成的部分，避免误删或截断输入内容\n",
+    "    generated_ids = outputs[0][inputs[\"input_ids\"].shape[-1]:]\n",
+    "    response = tokenizer.decode(generated_ids, skip_special_tokens=True)\n",
+    "    \n",
+    "    return response.strip()\n",
+    "\n",
+    "print(\"推理函数封装完成。\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "87b53bb4",
+   "metadata": {},
+   "source": [
+    "## 实验结果展示\n",
+    "\n",
+    "在本节中，我们将使用第3节准备的测试用例，对 ERNIE 4.5 模型进行实际的推理测试，展示其在不同领域的应用能力。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b65e8acc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 知识问答任务\n",
+    "# 测试模型对专业知识的理解能力。\n",
+    "\n",
+    "query_1 = test_cases[0] # 关于 MoE 的问题\n",
+    "print(f\"Q: {query_1}\")\n",
+    "\n",
+    "start_time = time.time()\n",
+    "response_1 = chat_with_ernie(query_1)\n",
+    "end_time = time.time()\n",
+    "\n",
+    "print(f\"A: {response_1}\")\n",
+    "print(f\"推理耗时: {end_time - start_time:.2f} s\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f36aedc7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 文学创作任务\n",
+    "# 测试模型的创意写作能力。\n",
+    "\n",
+    "query_2 = test_cases[1] # 写诗\n",
+    "print(f\"Q: {query_2}\")\n",
+    "response_2 = chat_with_ernie(query_2)\n",
+    "print(f\"A: \\n{response_2}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9972bcce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 情感分析任务\n",
+    "# 测试模型对自然语言的情绪理解能力。\n",
+    "\n",
+    "query_3 = test_cases[2] # 情感分析\n",
+    "print(f\"Q: {query_3}\")\n",
+    "response_3 = chat_with_ernie(query_3)\n",
+    "print(f\"A: \\n{response_3}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "33c0675a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 逻辑与代码生成任务\n",
+    "# 测试模型的逻辑推理与代码能力。\n",
+    "\n",
+    "query_4 = test_cases[3] # 写冒泡排序\n",
+    "print(f\"Q: {query_4}\")\n",
+    "response_4 = chat_with_ernie(query_4)\n",
+    "print(f\"A: \\n{response_4}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mind",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 18afb6024cfdbfafe2fd1ad5268755ead46b7cd1 Mon Sep 17 00:00:00 2001
From: 4ever <PlayingLove00@gmail.com>
Date: Tue, 17 Mar 2026 14:00:32 +1030
Subject: [PATCH 2/3] Update Ascend device configuration for ERNIE 4.5
 inference

---
 llm/ernie4_5/inference_ernie4_5.ipynb | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llm/ernie4_5/inference_ernie4_5.ipynb b/llm/ernie4_5/inference_ernie4_5.ipynb
index aa129bf..b456bc8 100644
--- a/llm/ernie4_5/inference_ernie4_5.ipynb
+++ b/llm/ernie4_5/inference_ernie4_5.ipynb
@@ -69,9 +69,10 @@
     "from mindspore import context\n",
     "import mindnlp\n",
     "\n",
-    "# 设置使用 Ascend 设备\n",
-    "# 默认使用 PYNATIVE_MODE \n",
-    "context.set_context(device_target=\"Ascend\")\n",
+    "# 设置使用 Ascend 设备 (NPU)\n",
+    "# 确保 Ascend 驱动与 ACL 运行时已安装并配置好环境变量（如 ASCEND_DEVICE_ID）。\n",
+    "device_id = int(os.getenv('DEVICE_ID', os.getenv('ASCEND_DEVICE_ID', '0')))\n",
+    "context.set_context(device_target=\"Ascend\", mode=context.GRAPH_MODE, device_id=device_id)\n",
     "\n",
     "print(f\"MindSpore version: {mindspore.__version__}\")\n",
     "print(\"MindNLP version:\", mindnlp.__version__)"

From f29f393cfde248056224a3246e77c21998d5c3f2 Mon Sep 17 00:00:00 2001
From: 4ever <PlayingLove00@gmail.com>
Date: Tue, 17 Mar 2026 21:32:06 +1030
Subject: [PATCH 3/3] Refine ERNIE 4.5 inference notebook: update installation
 instructions, adjust model loading precision, and enhance comments for
 clarity

---
 llm/ernie4_5/inference_ernie4_5.ipynb | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/llm/ernie4_5/inference_ernie4_5.ipynb b/llm/ernie4_5/inference_ernie4_5.ipynb
index b456bc8..d1a611c 100644
--- a/llm/ernie4_5/inference_ernie4_5.ipynb
+++ b/llm/ernie4_5/inference_ernie4_5.ipynb
@@ -40,7 +40,7 @@
    "outputs": [],
    "source": [
     "# 安装 MindSpore NLP\n",
-    "# !pip install mindnlp==0.5.1 -i https://pypi.tuna.tsinghua.edu.cn/simple\n",
+    "# !pip install mindnlp==0.5.1\n",
     "# 安装常用的文本处理库\n",
     "# !pip install jieba\n",
     "# !pip install sentencepiece"
@@ -69,11 +69,6 @@
     "from mindspore import context\n",
     "import mindnlp\n",
     "\n",
-    "# 设置使用 Ascend 设备 (NPU)\n",
-    "# 确保 Ascend 驱动与 ACL 运行时已安装并配置好环境变量（如 ASCEND_DEVICE_ID）。\n",
-    "device_id = int(os.getenv('DEVICE_ID', os.getenv('ASCEND_DEVICE_ID', '0')))\n",
-    "context.set_context(device_target=\"Ascend\", mode=context.GRAPH_MODE, device_id=device_id)\n",
-    "\n",
     "print(f\"MindSpore version: {mindspore.__version__}\")\n",
     "print(\"MindNLP version:\", mindnlp.__version__)"
    ]
@@ -131,7 +126,7 @@
     "from mindnlp.transformers import AutoTokenizer\n",
     "from mindnlp.transformers import AutoModelForCausalLM\n",
     "\n",
-    "MODEL_NAME = \"baidu/ERNIE-4.5-0.3B-Base-PT\"\n",
+    "MODEL_NAME = \"baidu/ERNIE-4.5-0.3B-PT\"\n",
     "\n",
     "print(f\"正在加载分词器: {MODEL_NAME} ...\")\n",
     "try:\n",
@@ -141,20 +136,19 @@
     "    print(f\"分词器加载失败，请检查网络或模型名称。错误信息: {e}\")\n",
     "    \n",
     "# 加载模型 (Model)\n",
-    "# 在 Ascend 800I/T A2 上，为了节省显存并加速推理，我们推荐使用 float16 精度加载模型。\n",
     "\n",
     "print(f\"正在加载模型: {MODEL_NAME} ...\")\n",
     "\n",
     "# 加载模型权重\n",
-    "# mindspore_dtype=mindspore.float16 可以显著降低显存占用\n",
+    "# mindspore_dtype=mindspore.bfloat16 可在 NPU 上使用更高效的 bfloat16 精度\n",
     "try:\n",
     "    model = AutoModelForCausalLM.from_pretrained(\n",
     "        MODEL_NAME,\n",
-    "        mindspore_dtype=mindspore.float16\n",
-    "    )\n",
+    "        mindspore_dtype=mindspore.bfloat16\n",
+    "    ).to('npu')\n",
     "    # 将模型设置为评估模式\n",
     "    model.set_train(False)\n",
-    "    print(\"模型加载成功。\")\n",
+    "    print(\"模型加载成功，已加载到 NPU (bfloat16)。\")\n",
     "except Exception as e:\n",
     "    print(f\"模型加载失败。错误信息: {e}\")"
    ]
@@ -191,12 +185,14 @@
     "        str: 模型生成的回答\n",
     "    \"\"\"\n",
     "    # 1. 构建 Prompt\n",
-    "    # 说明：此示例针对 ERNIE 4.5 的 Base 预训练模型，直接对原始 query 做 tokenize，不使用额外 Chat Template。\n",
+    "    # 说明：此示例针对 ERNIE 4.5 的预训练模型，直接对原始 query 做 tokenize，不使用额外 Chat Template。\n",
     "    # 若使用的是已对话微调的 ERNIE 4.5 Chat 类模型，请先根据其官方 Chat Template 将 history 和 query 拼接为 prompt，再送入 tokenizer。\n",
     "    inputs = tokenizer(query, return_tensors=\"ms\")\n",
+    "    # 将输入张量迁移到 NPU 上，与模型设备保持一致\n",
+    "    inputs = {k: v.to('npu:0') for k, v in inputs.items()}\n",
     "    \n",
     "    # 2. 生成配置\n",
-    "    # 注意：在 MindSpore 2.7 + MindSpore NLP 0.5.1 中，generate 接口用法与 Huggingface 类似\n",
+    "    # 在 MindSpore 2.7 + MindSpore NLP 0.5.1 中，generate 接口用法与 Huggingface 类似\n",
     "    outputs = model.generate(\n",
     "        inputs[\"input_ids\"],\n",
     "        max_length=max_length,\n",
@@ -213,7 +209,7 @@
     "    \n",
     "    return response.strip()\n",
     "\n",
-    "print(\"推理函数封装完成。\")"
+    "print(\"推理函数封装完成。\")\n"
    ]
   },
   {