From b29a61de70af0f95bc809ccfbc3fadcd62d15876 Mon Sep 17 00:00:00 2001 From: 4ever Date: Fri, 23 Jan 2026 16:03:12 +0800 Subject: [PATCH 1/3] Ernie 4.5 inference tasks --- llm/README.md | 1 + llm/ernie4_5/inference_ernie4_5.ipynb | 319 ++++++++++++++++++++++++++ 2 files changed, 320 insertions(+) create mode 100644 llm/ernie4_5/inference_ernie4_5.ipynb diff --git a/llm/README.md b/llm/README.md index d13b624..8612170 100644 --- a/llm/README.md +++ b/llm/README.md @@ -14,6 +14,7 @@ The following notebooks are actively maintained in sync with MindSpore and MindS | 2 | [distilgpt2](./distilgpt2/) | Includes notebooks for DistilGPT-2 finetuning and inference on causal language modeling (text generation) tasks. | | 3 | [bert](./bert/) | Includes notebooks for finetuning BERT on SWAG dataset for Multiple Choice tasks using MindSpore NLP | | 4 | [esm](./esmforproteinfolding/) | Includes notebooks for EsmForProteinFolding finetuning and inference tasks | +| 5 | [ernie4.5](./ernie4_5/) | Includes notebooks for Ernie 4.5 inference tasks | ### Community-Driven / Legacy Applications diff --git a/llm/ernie4_5/inference_ernie4_5.ipynb b/llm/ernie4_5/inference_ernie4_5.ipynb new file mode 100644 index 0000000..aa129bf --- /dev/null +++ b/llm/ernie4_5/inference_ernie4_5.ipynb @@ -0,0 +1,319 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0c7132da", + "metadata": {}, + "source": [ + "# 基于 MindSpore NLP 的 ERNIE 4.5 模型推理与应用\n", + "\n", + "## 实验介绍\n", + "\n", + "本实验主要介绍如何基于 MindSpore 2.7.0 AI 框架和 MindSpore NLP 0.5.1 套件,在 Ascend 800I/T A2 硬件环境下,实现 ERNIE 4.5 大语言模型的加载、推理及应用开发。\n", + "\n", + "ERNIE 4.5 是百度开源的大规模模型系列,包含稠密(Dense)与混合专家(MoE)架构,在中文理解、多模态交互及长文本处理方面表现优异。本案例将演示如何利用 MindSpore 的 `AutoClass` 接口快速加载模型权重,并构建一个基于该模型的对话应用。\n", + "\n", + "## 实验环境\n", + "\n", + "本案例基于 **Ascend 800I/T A2** 硬件环境,软件环境如下:\n", + "\n", + "| Python | MindSpore | MindSpore NLP |\n", + "| :----- | :-------- | :------------ |\n", + "| 3.10 | 2.7.0 | 0.5.1 |" + ] + }, + { + "cell_type": "markdown", + "id": "20bb5f2e", + "metadata": {}, + "source": [ + "### 安装依赖\n", + "\n", + "首先,我们需要安装 MindNLP 及相关依赖库。如果环境中未安装,请执行以下命令:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3041a225", + "metadata": {}, + "outputs": [], + "source": [ + "# 安装 MindSpore NLP\n", + "# !pip install mindnlp==0.5.1 -i https://pypi.tuna.tsinghua.edu.cn/simple\n", + "# 安装常用的文本处理库\n", + "# !pip install jieba\n", + "# !pip install sentencepiece" + ] + }, + { + "cell_type": "markdown", + "id": "eca64203", + "metadata": {}, + "source": [ + "### 配置运行环境\n", + "\n", + "引入必要的库,并设置 MindSpore 的运行模式。针对大模型推理,我们使用 Ascend 作为计算后端。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d59a07c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import mindspore\n", + "from mindspore import context\n", + "import mindnlp\n", + "\n", + "# 设置使用 Ascend 设备\n", + "# 默认使用 PYNATIVE_MODE \n", + "context.set_context(device_target=\"Ascend\")\n", + "\n", + "print(f\"MindSpore version: {mindspore.__version__}\")\n", + "print(\"MindNLP version:\", mindnlp.__version__)" + ] + }, + { + "cell_type": "markdown", + "id": "188668d7", + "metadata": {}, + "source": [ + "## 数据准备\n", + "\n", + "对于大模型推理任务,我们通常不需要像 CV NLP 等任务中那样下载大规模训练数据集。但在实际应用开发中,我们可能需要准备一些特定的 Prompt(提示词)或测试用例。\n", + "\n", + "此处我们创建一个简单的测试数据集,模拟应用场景中的输入。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1471afc3", + "metadata": {}, + "outputs": [], + "source": [ + "# 模拟应用场景数据\n", + "test_cases = [\n", + " \"请简要介绍一下什么是混合专家模型(MoE)?\",\n", + " \"写一首关于秋天丰收的七言绝句。\",\n", + " \"请分析以下句子的情感倾向:'这家餐厅的服务真是太糟糕了,我再也不会来了。'\",\n", + " \"使用Python写一个冒泡排序算法。\"\n", + "]\n", + "\n", + "print(\"测试用例准备完成。\")" + ] + }, + { + "cell_type": "markdown", + "id": "71617376", + "metadata": {}, + "source": [ + "## 模型构建与加载\n", + "\n", + "本章节将演示如何使用 MindSpore NLP 的 `Transformers` 接口加载 ERNIE 4.5 模型。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8f53408", + "metadata": {}, + "outputs": [], + "source": [ + "# 加载分词器 (Tokenizer)\n", + "# 分词器负责将自然语言文本转换为模型可理解的 Token ID。\n", + "\n", + "from mindnlp.transformers import AutoTokenizer\n", + "from mindnlp.transformers import AutoModelForCausalLM\n", + "\n", + "MODEL_NAME = \"baidu/ERNIE-4.5-0.3B-Base-PT\"\n", + "\n", + "print(f\"正在加载分词器: {MODEL_NAME} ...\")\n", + "try:\n", + " tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n", + " print(\"分词器加载成功。\")\n", + "except Exception as e:\n", + " print(f\"分词器加载失败,请检查网络或模型名称。错误信息: {e}\")\n", + " \n", + "# 加载模型 (Model)\n", + "# 在 Ascend 800I/T A2 上,为了节省显存并加速推理,我们推荐使用 float16 精度加载模型。\n", + "\n", + "print(f\"正在加载模型: {MODEL_NAME} ...\")\n", + "\n", + "# 加载模型权重\n", + "# mindspore_dtype=mindspore.float16 可以显著降低显存占用\n", + "try:\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " MODEL_NAME,\n", + " mindspore_dtype=mindspore.float16\n", + " )\n", + " # 将模型设置为评估模式\n", + " model.set_train(False)\n", + " print(\"模型加载成功。\")\n", + "except Exception as e:\n", + " print(f\"模型加载失败。错误信息: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b4775da6", + "metadata": {}, + "source": [ + "## 应用开发:构建对话生成函数\n", + "\n", + "为了方便进行多轮对话或特定任务推理,我们将模型的生成过程封装为一个函数。这类似于 ResNet 案例中的“验证”或“推理”步骤。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd42b1ca", + "metadata": {}, + "outputs": [], + "source": [ + "def chat_with_ernie(query, history=[], max_length=2048, temperature=0.7, top_p=0.9):\n", + " \"\"\"\n", + " 基于 ERNIE 4.5 的对话生成函数\n", + " \n", + " Args:\n", + " query (str): 用户输入的问题\n", + " history (list): 对话历史\n", + " max_length (int): 生成的最大长度\n", + " temperature (float): 采样温度,控制生成的多样性\n", + " top_p (float): 核采样阈值\n", + " \n", + " Returns:\n", + " str: 模型生成的回答\n", + " \"\"\"\n", + " # 1. 构建 Prompt\n", + " # 说明:此示例针对 ERNIE 4.5 的 Base 预训练模型,直接对原始 query 做 tokenize,不使用额外 Chat Template。\n", + " # 若使用的是已对话微调的 ERNIE 4.5 Chat 类模型,请先根据其官方 Chat Template 将 history 和 query 拼接为 prompt,再送入 tokenizer。\n", + " inputs = tokenizer(query, return_tensors=\"ms\")\n", + " \n", + " # 2. 生成配置\n", + " # 注意:在 MindSpore 2.7 + MindSpore NLP 0.5.1 中,generate 接口用法与 Huggingface 类似\n", + " outputs = model.generate(\n", + " inputs[\"input_ids\"],\n", + " max_length=max_length,\n", + " do_sample=True,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " pad_token_id=tokenizer.pad_token_id,\n", + " eos_token_id=tokenizer.eos_token_id\n", + " )\n", + " \n", + " # 3. 解码输出:仅解码生成的部分,避免误删或截断输入内容\n", + " generated_ids = outputs[0][inputs[\"input_ids\"].shape[-1]:]\n", + " response = tokenizer.decode(generated_ids, skip_special_tokens=True)\n", + " \n", + " return response.strip()\n", + "\n", + "print(\"推理函数封装完成。\")" + ] + }, + { + "cell_type": "markdown", + "id": "87b53bb4", + "metadata": {}, + "source": [ + "## 实验结果展示\n", + "\n", + "在本节中,我们将使用第3节准备的测试用例,对 ERNIE 4.5 模型进行实际的推理测试,展示其在不同领域的应用能力。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b65e8acc", + "metadata": {}, + "outputs": [], + "source": [ + "# 知识问答任务\n", + "# 测试模型对专业知识的理解能力。\n", + "\n", + "query_1 = test_cases[0] # 关于 MoE 的问题\n", + "print(f\"Q: {query_1}\")\n", + "\n", + "start_time = time.time()\n", + "response_1 = chat_with_ernie(query_1)\n", + "end_time = time.time()\n", + "\n", + "print(f\"A: {response_1}\")\n", + "print(f\"推理耗时: {end_time - start_time:.2f} s\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f36aedc7", + "metadata": {}, + "outputs": [], + "source": [ + "# 文学创作任务\n", + "# 测试模型的创意写作能力。\n", + "\n", + "query_2 = test_cases[1] # 写诗\n", + "print(f\"Q: {query_2}\")\n", + "response_2 = chat_with_ernie(query_2)\n", + "print(f\"A: \\n{response_2}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9972bcce", + "metadata": {}, + "outputs": [], + "source": [ + "# 情感分析任务\n", + "# 测试模型对自然语言的情绪理解能力。\n", + "\n", + "query_3 = test_cases[2] # 情感分析\n", + "print(f\"Q: {query_3}\")\n", + "response_3 = chat_with_ernie(query_3)\n", + "print(f\"A: \\n{response_3}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33c0675a", + "metadata": {}, + "outputs": [], + "source": [ + "# 逻辑与代码生成任务\n", + "# 测试模型的逻辑推理与代码能力。\n", + "\n", + "query_4 = test_cases[3] # 写冒泡排序\n", + "print(f\"Q: {query_4}\")\n", + "response_4 = chat_with_ernie(query_4)\n", + "print(f\"A: \\n{response_4}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mind", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 18afb6024cfdbfafe2fd1ad5268755ead46b7cd1 Mon Sep 17 00:00:00 2001 From: 4ever Date: Tue, 17 Mar 2026 14:00:32 +1030 Subject: [PATCH 2/3] Update Ascend device configuration for ERNIE 4.5 inference --- llm/ernie4_5/inference_ernie4_5.ipynb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llm/ernie4_5/inference_ernie4_5.ipynb b/llm/ernie4_5/inference_ernie4_5.ipynb index aa129bf..b456bc8 100644 --- a/llm/ernie4_5/inference_ernie4_5.ipynb +++ b/llm/ernie4_5/inference_ernie4_5.ipynb @@ -69,9 +69,10 @@ "from mindspore import context\n", "import mindnlp\n", "\n", - "# 设置使用 Ascend 设备\n", - "# 默认使用 PYNATIVE_MODE \n", - "context.set_context(device_target=\"Ascend\")\n", + "# 设置使用 Ascend 设备 (NPU)\n", + "# 确保 Ascend 驱动与 ACL 运行时已安装并配置好环境变量(如 ASCEND_DEVICE_ID)。\n", + "device_id = int(os.getenv('DEVICE_ID', os.getenv('ASCEND_DEVICE_ID', '0')))\n", + "context.set_context(device_target=\"Ascend\", mode=context.GRAPH_MODE, device_id=device_id)\n", "\n", "print(f\"MindSpore version: {mindspore.__version__}\")\n", "print(\"MindNLP version:\", mindnlp.__version__)" From f29f393cfde248056224a3246e77c21998d5c3f2 Mon Sep 17 00:00:00 2001 From: 4ever Date: Tue, 17 Mar 2026 21:32:06 +1030 Subject: [PATCH 3/3] Refine ERNIE 4.5 inference notebook: update installation instructions, adjust model loading precision, and enhance comments for clarity --- llm/ernie4_5/inference_ernie4_5.ipynb | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/llm/ernie4_5/inference_ernie4_5.ipynb b/llm/ernie4_5/inference_ernie4_5.ipynb index b456bc8..d1a611c 100644 --- a/llm/ernie4_5/inference_ernie4_5.ipynb +++ b/llm/ernie4_5/inference_ernie4_5.ipynb @@ -40,7 +40,7 @@ "outputs": [], "source": [ "# 安装 MindSpore NLP\n", - "# !pip install mindnlp==0.5.1 -i https://pypi.tuna.tsinghua.edu.cn/simple\n", + "# !pip install mindnlp==0.5.1\n", "# 安装常用的文本处理库\n", "# !pip install jieba\n", "# !pip install sentencepiece" @@ -69,11 +69,6 @@ "from mindspore import context\n", "import mindnlp\n", "\n", - "# 设置使用 Ascend 设备 (NPU)\n", - "# 确保 Ascend 驱动与 ACL 运行时已安装并配置好环境变量(如 ASCEND_DEVICE_ID)。\n", - "device_id = int(os.getenv('DEVICE_ID', os.getenv('ASCEND_DEVICE_ID', '0')))\n", - "context.set_context(device_target=\"Ascend\", mode=context.GRAPH_MODE, device_id=device_id)\n", - "\n", "print(f\"MindSpore version: {mindspore.__version__}\")\n", "print(\"MindNLP version:\", mindnlp.__version__)" ] @@ -131,7 +126,7 @@ "from mindnlp.transformers import AutoTokenizer\n", "from mindnlp.transformers import AutoModelForCausalLM\n", "\n", - "MODEL_NAME = \"baidu/ERNIE-4.5-0.3B-Base-PT\"\n", + "MODEL_NAME = \"baidu/ERNIE-4.5-0.3B-PT\"\n", "\n", "print(f\"正在加载分词器: {MODEL_NAME} ...\")\n", "try:\n", @@ -141,20 +136,19 @@ " print(f\"分词器加载失败,请检查网络或模型名称。错误信息: {e}\")\n", " \n", "# 加载模型 (Model)\n", - "# 在 Ascend 800I/T A2 上,为了节省显存并加速推理,我们推荐使用 float16 精度加载模型。\n", "\n", "print(f\"正在加载模型: {MODEL_NAME} ...\")\n", "\n", "# 加载模型权重\n", - "# mindspore_dtype=mindspore.float16 可以显著降低显存占用\n", + "# mindspore_dtype=mindspore.bfloat16 可在 NPU 上使用更高效的 bfloat16 精度\n", "try:\n", " model = AutoModelForCausalLM.from_pretrained(\n", " MODEL_NAME,\n", - " mindspore_dtype=mindspore.float16\n", - " )\n", + " mindspore_dtype=mindspore.bfloat16\n", + " ).to('npu')\n", " # 将模型设置为评估模式\n", " model.set_train(False)\n", - " print(\"模型加载成功。\")\n", + " print(\"模型加载成功,已加载到 NPU (bfloat16)。\")\n", "except Exception as e:\n", " print(f\"模型加载失败。错误信息: {e}\")" ] @@ -191,12 +185,14 @@ " str: 模型生成的回答\n", " \"\"\"\n", " # 1. 构建 Prompt\n", - " # 说明:此示例针对 ERNIE 4.5 的 Base 预训练模型,直接对原始 query 做 tokenize,不使用额外 Chat Template。\n", + " # 说明:此示例针对 ERNIE 4.5 的预训练模型,直接对原始 query 做 tokenize,不使用额外 Chat Template。\n", " # 若使用的是已对话微调的 ERNIE 4.5 Chat 类模型,请先根据其官方 Chat Template 将 history 和 query 拼接为 prompt,再送入 tokenizer。\n", " inputs = tokenizer(query, return_tensors=\"ms\")\n", + " # 将输入张量迁移到 NPU 上,与模型设备保持一致\n", + " inputs = {k: v.to('npu:0') for k, v in inputs.items()}\n", " \n", " # 2. 生成配置\n", - " # 注意:在 MindSpore 2.7 + MindSpore NLP 0.5.1 中,generate 接口用法与 Huggingface 类似\n", + " # 在 MindSpore 2.7 + MindSpore NLP 0.5.1 中,generate 接口用法与 Huggingface 类似\n", " outputs = model.generate(\n", " inputs[\"input_ids\"],\n", " max_length=max_length,\n", @@ -213,7 +209,7 @@ " \n", " return response.strip()\n", "\n", - "print(\"推理函数封装完成。\")" + "print(\"推理函数封装完成。\")\n" ] }, {