From 948b91c3751c36d62ad27754861955c35cf65ce4 Mon Sep 17 00:00:00 2001 From: Libres-coder <2597242922@qq.com> Date: Tue, 28 Oct 2025 20:58:47 +0800 Subject: [PATCH 1/4] feat: Add Security and Risk Control Capabilities --- README.md | 6 +- README_CN.md | 6 +- .../core/executor/SecurityCallback.java | 277 +++++++++ .../core/tools/security/DataMaskingTool.java | 234 ++++++++ .../security/SensitiveWordFilterTool.java | 246 ++++++++ .../agentic/example/SecurityAgentTest.java | 341 ++++++++++++ .../security_demo/security_example.py | 316 +++++++++++ .../core/tool/security_tools.py | 317 +++++++++++ docs/Security-Guide.md | 286 ++++++++++ docs/Security-Guide_CN.md | 524 ++++++++++++++++++ 10 files changed, 2551 insertions(+), 2 deletions(-) create mode 100644 ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java create mode 100644 ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/DataMaskingTool.java create mode 100644 ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/SensitiveWordFilterTool.java create mode 100644 ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java create mode 100644 ali-agentic-adk-python/examples/security_demo/security_example.py create mode 100644 ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py create mode 100644 docs/Security-Guide.md create mode 100644 docs/Security-Guide_CN.md diff --git a/README.md b/README.md index 2297946e..67afaf8c 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,11 @@ Agentic ADK is an Agent application development framework launched by Alibaba In * Offers hundreds of API tools and introduces the MCP integration gateway. * **DeepResearch/RAG, ComputerUse, BrowserUse, Sandbox**, and other best practices for Agentic AI. * Implementation of context extension for agent conversations, including Session, Memory, Artifact, and more, with built-in short and long-term memory plugins. -* Provides prompt automation tuning and security risk control-related agent examples. +* **Security and Risk Control Capabilities**: + * **Sensitive Word Filtering**: High-performance sensitive word detection and filtering based on DFA algorithm, supporting custom word libraries and multiple replacement strategies + * **Data Masking Protection**: Automatically identify and mask PII (phone numbers, ID cards, emails, bank cards, etc.) to protect user privacy + * **Security Callback Mechanism**: Automatically perform security checks before and after Agent execution, record security events + * [View Security Guide](docs/Security-Guide.md) ![Architecture Diagram](https://zos-oss-ol.oss-cn-hangzhou.aliyuncs.com/data/be03cd4383682bd6e8095ebf8472a0d1.png) diff --git a/README_CN.md b/README_CN.md index 2ab6d66a..43e98188 100644 --- a/README_CN.md +++ b/README_CN.md @@ -30,7 +30,11 @@ Agentic ADK 是阿里国际AI Business推出基于 [Google-ADK](https://google.g * 提供**上百个API工具**,并推出MCP集成网关。 * **DeepResearch/RAG、ComputerUse、BrowserUse、Sandbox**等Agentic AI最佳实践。 * 智能体会话的上下文扩展实现,包括Session、Memory、Artifact等等,内置长短记忆插件。 -* 提供Prompt自动化调优、安全风控相关代理样例。 +* 提供Prompt自动化调优、**安全风控能力**: + * **敏感词/黑词过滤**:基于DFA算法的高性能敏感词检测和过滤,支持自定义词库和多种替换策略 + * **数据脱敏保护**:自动识别和脱敏PII信息(手机号、身份证、邮箱、银行卡等),保护用户隐私 + * **简单易用**:工具可独立使用,也可通过安全回调实现自动化检查 + * [查看安全能力指南](docs/Security-Guide_CN.md) | [快速开始示例](#安全能力示例) ![架构图](https://zos-oss-ol.oss-cn-hangzhou.aliyuncs.com/data/be03cd4383682bd6e8095ebf8472a0d1.png) diff --git a/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java new file mode 100644 index 00000000..dc99b72f --- /dev/null +++ b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java @@ -0,0 +1,277 @@ +/** + * Copyright (C) 2024 AIDC-AI + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.agentic.core.executor; + +import com.alibaba.agentic.core.tools.security.DataMaskingTool; +import com.alibaba.agentic.core.tools.security.SensitiveWordFilterTool; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +/** + * Security callback for automated security checks. + * + * @author Libres-coder + * @date 2025/10/27 + */ +public class SecurityCallback implements Callback { + + private static final Logger logger = LoggerFactory.getLogger(SecurityCallback.class); + + private boolean enableSensitiveWordFilter = true; + private boolean enableDataMasking = true; + private boolean blockOnSensitiveWord = false; + private boolean maskLogs = true; + + private final SensitiveWordFilterTool sensitiveWordFilter; + private final DataMaskingTool dataMaskingTool; + + private final List securityEvents = new ArrayList<>(); + + public SecurityCallback() { + this.sensitiveWordFilter = new SensitiveWordFilterTool(); + this.dataMaskingTool = new DataMaskingTool(); + } + + public SecurityCallback(Set customSensitiveWords) { + this.sensitiveWordFilter = new SensitiveWordFilterTool(customSensitiveWords); + this.dataMaskingTool = new DataMaskingTool(); + } + + @Override + public void execute(SystemContext systemContext, Request request, Result result, CallbackChain chain) { + logger.debug("[SecurityCallback] Executing security checks before agent execution"); + + try { + if (request != null && request.getParam() != null) { + checkSecurity(request.getParam(), "REQUEST", systemContext); + } + + chain.execute(systemContext, request, result); + + } catch (SecurityException e) { + logger.error("[SecurityCallback] Security violation detected: {}", e.getMessage()); + throw e; + } + } + + @Override + public void receive(SystemContext systemContext, Request request, Result result, CallbackChain chain) { + logger.debug("[SecurityCallback] Executing security checks after agent execution"); + + try { + if (result != null && result.getData() != null) { + checkSecurity(result.getData(), "RESPONSE", systemContext); + } + + chain.receive(systemContext, request, result); + + } catch (SecurityException e) { + logger.error("[SecurityCallback] Security violation in response: {}", e.getMessage()); + throw e; + } + } + + private void checkSecurity(Object payload, String stage, SystemContext systemContext) { + if (payload == null) { + return; + } + + String content = extractTextContent(payload); + if (content == null || content.isEmpty()) { + return; + } + + if (enableSensitiveWordFilter) { + Map filterArgs = new HashMap<>(); + filterArgs.put("text", content); + filterArgs.put("strategy", "DETECT_ONLY"); + + try { + Map filterResult = sensitiveWordFilter.run(filterArgs, systemContext) + .blockingFirst(); + + Boolean hasSensitiveWords = (Boolean) filterResult.get("has_sensitive_words"); + if (Boolean.TRUE.equals(hasSensitiveWords)) { + @SuppressWarnings("unchecked") + List> detectedWords = + (List>) filterResult.get("detected_words"); + + SecurityEvent event = new SecurityEvent( + SecurityEventType.SENSITIVE_WORD_DETECTED, + stage, + "Detected " + detectedWords.size() + " sensitive word(s)", + detectedWords + ); + securityEvents.add(event); + + logger.warn("[SecurityCallback] {} - Sensitive words detected: {}", + stage, detectedWords.size()); + + if (blockOnSensitiveWord) { + throw new SecurityException( + "Sensitive word detected in " + stage + ". Request blocked."); + } + } + } catch (Exception e) { + logger.error("[SecurityCallback] Error during sensitive word filter: {}", e.getMessage()); + } + } + + if (enableDataMasking) { + Map maskArgs = new HashMap<>(); + maskArgs.put("text", content); + maskArgs.put("types", Arrays.asList("all")); + + try { + Map maskResult = dataMaskingTool.run(maskArgs, systemContext) + .blockingFirst(); + + Boolean hasPII = (Boolean) maskResult.get("has_pii"); + if (Boolean.TRUE.equals(hasPII)) { + @SuppressWarnings("unchecked") + List> detectedPII = + (List>) maskResult.get("detected_pii"); + + SecurityEvent event = new SecurityEvent( + SecurityEventType.PII_DETECTED, + stage, + "Detected " + detectedPII.size() + " PII item(s)", + detectedPII + ); + securityEvents.add(event); + + logger.warn("[SecurityCallback] {} - PII detected: {}", stage, detectedPII.size()); + + if (maskLogs) { + String maskedText = (String) maskResult.get("masked_text"); + logger.debug("[SecurityCallback] Masked content: {}", maskedText); + } + } + } catch (Exception e) { + logger.error("[SecurityCallback] Error during data masking: {}", e.getMessage()); + } + } + } + + private String extractTextContent(Object payload) { + if (payload instanceof String) { + return (String) payload; + } else if (payload instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) payload; + + for (String key : Arrays.asList("text", "content", "message", "prompt", "response")) { + Object value = map.get(key); + if (value instanceof String) { + return (String) value; + } + } + + return map.toString(); + } else { + return payload.toString(); + } + } + + public SecurityCallback enableSensitiveWordFilter(boolean enable) { + this.enableSensitiveWordFilter = enable; + return this; + } + + public SecurityCallback enableDataMasking(boolean enable) { + this.enableDataMasking = enable; + return this; + } + + public SecurityCallback setBlockOnSensitiveWord(boolean block) { + this.blockOnSensitiveWord = block; + return this; + } + + public SecurityCallback setMaskLogs(boolean mask) { + this.maskLogs = mask; + return this; + } + + public SecurityCallback addSensitiveWord(String word) { + this.sensitiveWordFilter.addSensitiveWord(word); + return this; + } + + public SecurityCallback addSensitiveWords(Set words) { + this.sensitiveWordFilter.addSensitiveWords(words); + return this; + } + + public List getSecurityEvents() { + return new ArrayList<>(securityEvents); + } + + public void clearSecurityEvents() { + securityEvents.clear(); + } + + public enum SecurityEventType { + SENSITIVE_WORD_DETECTED, + PII_DETECTED, + SECURITY_VIOLATION + } + + public static class SecurityEvent { + private final SecurityEventType type; + private final String stage; + private final String message; + private final List> details; + private final long timestamp; + + public SecurityEvent(SecurityEventType type, String stage, String message, + List> details) { + this.type = type; + this.stage = stage; + this.message = message; + this.details = details; + this.timestamp = System.currentTimeMillis(); + } + + public SecurityEventType getType() { + return type; + } + + public String getStage() { + return stage; + } + + public String getMessage() { + return message; + } + + public List> getDetails() { + return details; + } + + public long getTimestamp() { + return timestamp; + } + + @Override + public String toString() { + return String.format("[%s] %s - %s: %s", + new Date(timestamp), type, stage, message); + } + } +} diff --git a/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/DataMaskingTool.java b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/DataMaskingTool.java new file mode 100644 index 00000000..ed957cdc --- /dev/null +++ b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/DataMaskingTool.java @@ -0,0 +1,234 @@ +/** + * Copyright (C) 2024 AIDC-AI + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.agentic.core.tools.security; + +import com.alibaba.agentic.core.executor.SystemContext; +import com.alibaba.agentic.core.tools.BaseTool; +import io.reactivex.rxjava3.core.Flowable; + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author Libres-coder + * @date 2025/10/27 + */ +public class DataMaskingTool implements BaseTool { + + public enum PIIType { + PHONE("phone", "1[3-9]\\d{9}", 3, 4), + ID_CARD("id_card", "[1-9]\\d{5}(18|19|20)\\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])\\d{3}[0-9Xx]", 6, 4), + EMAIL("email", "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", 2, -1), + BANK_CARD("bank_card", "\\d{13,19}", 4, 4), + IP_ADDRESS("ip_address", "\\b(?:\\d{1,3}\\.){3}\\d{1,3}\\b", -1, -1), + PASSWORD("password", "(?i)(password|passwd|pwd)[\"']?\\s*[:=]\\s*[\"']?([^\"'\\s,}]+)", -1, -1); + + private final String type; + private final String pattern; + private final int prefixKeep; + private final int suffixKeep; + + PIIType(String type, String pattern, int prefixKeep, int suffixKeep) { + this.type = type; + this.pattern = pattern; + this.prefixKeep = prefixKeep; + this.suffixKeep = suffixKeep; + } + + public String getType() { + return type; + } + + public Pattern getPattern() { + return Pattern.compile(pattern); + } + + public int getPrefixKeep() { + return prefixKeep; + } + + public int getSuffixKeep() { + return suffixKeep; + } + } + + @Override + public String name() { + return "data_masking"; + } + + @Override + public Flowable> run(Map args, SystemContext systemContext) { + return Flowable.fromCallable(() -> { + String text = (String) args.get("text"); + @SuppressWarnings("unchecked") + List enabledTypes = (List) args.getOrDefault("types", Arrays.asList("all")); + String maskChar = (String) args.getOrDefault("mask_char", "*"); + + if (text == null || text.isEmpty()) { + return createResult(text, text, new ArrayList<>()); + } + + Set typesToCheck = determineTypes(enabledTypes); + MaskingResult result = maskText(text, typesToCheck, maskChar); + + return createResult(text, result.maskedText, result.detectedPII); + }); + } + + private Set determineTypes(List enabledTypes) { + Set types = new HashSet<>(); + + if (enabledTypes.contains("all")) { + types.addAll(Arrays.asList(PIIType.values())); + } else { + for (String typeStr : enabledTypes) { + for (PIIType type : PIIType.values()) { + if (type.getType().equalsIgnoreCase(typeStr)) { + types.add(type); + break; + } + } + } + } + + return types; + } + + private MaskingResult maskText(String text, Set types, String maskChar) { + String maskedText = text; + List detectedPII = new ArrayList<>(); + + for (PIIType type : types) { + Pattern pattern = type.getPattern(); + Matcher matcher = pattern.matcher(maskedText); + + StringBuffer sb = new StringBuffer(); + int offset = 0; + + while (matcher.find()) { + String original = matcher.group(); + String masked; + + if (type == PIIType.PASSWORD && matcher.groupCount() >= 2) { + original = matcher.group(2); + masked = matcher.group(1) + matcher.group(0).substring(matcher.group(1).length()).replaceAll("[^\"':=\\s]", maskChar); + } else { + masked = maskString(original, type.getPrefixKeep(), type.getSuffixKeep(), maskChar); + } + + PIIDetection detection = new PIIDetection( + type.getType(), + original, + masked, + matcher.start(), + matcher.end() + ); + detectedPII.add(detection); + + matcher.appendReplacement(sb, Matcher.quoteReplacement(masked)); + } + matcher.appendTail(sb); + maskedText = sb.toString(); + } + + return new MaskingResult(maskedText, detectedPII); + } + + private String maskString(String str, int prefixKeep, int suffixKeep, String maskChar) { + if (str == null || str.isEmpty()) { + return str; + } + + int length = str.length(); + + if (suffixKeep == -1 && str.contains("@")) { + String[] parts = str.split("@"); + if (parts.length == 2) { + String localPart = parts[0]; + int keepLength = Math.min(prefixKeep, localPart.length() / 2); + String maskedLocal = localPart.substring(0, keepLength) + + maskChar.repeat(Math.max(localPart.length() - keepLength, 3)); + return maskedLocal + "@" + parts[1]; + } + } + + if (prefixKeep == -1 && suffixKeep == -1) { + return maskChar.repeat(Math.min(length, 10)); + } + + if (length <= prefixKeep + suffixKeep) { + return maskChar.repeat(length); + } + + String prefix = str.substring(0, prefixKeep); + String suffix = str.substring(length - suffixKeep); + int maskLength = length - prefixKeep - suffixKeep; + + return prefix + maskChar.repeat(maskLength) + suffix; + } + + private Map createResult(String originalText, String maskedText, + List detectedPII) { + Map result = new HashMap<>(); + result.put("original_text", originalText); + result.put("masked_text", maskedText); + result.put("has_pii", !detectedPII.isEmpty()); + result.put("pii_count", detectedPII.size()); + + List> piiList = new ArrayList<>(); + for (PIIDetection pii : detectedPII) { + Map piiInfo = new HashMap<>(); + piiInfo.put("type", pii.type); + piiInfo.put("original_value", pii.originalValue); + piiInfo.put("masked_value", pii.maskedValue); + piiInfo.put("start_index", pii.startIndex); + piiInfo.put("end_index", pii.endIndex); + piiList.add(piiInfo); + } + result.put("detected_pii", piiList); + + return result; + } + + private static class MaskingResult { + String maskedText; + List detectedPII; + + MaskingResult(String maskedText, List detectedPII) { + this.maskedText = maskedText; + this.detectedPII = detectedPII; + } + } + + private static class PIIDetection { + String type; + String originalValue; + String maskedValue; + int startIndex; + int endIndex; + + PIIDetection(String type, String originalValue, String maskedValue, + int startIndex, int endIndex) { + this.type = type; + this.originalValue = originalValue; + this.maskedValue = maskedValue; + this.startIndex = startIndex; + this.endIndex = endIndex; + } + } +} diff --git a/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/SensitiveWordFilterTool.java b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/SensitiveWordFilterTool.java new file mode 100644 index 00000000..7814571d --- /dev/null +++ b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/tools/security/SensitiveWordFilterTool.java @@ -0,0 +1,246 @@ +/** + * Copyright (C) 2024 AIDC-AI + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.agentic.core.tools.security; + +import com.alibaba.agentic.core.executor.SystemContext; +import com.alibaba.agentic.core.tools.BaseTool; +import io.reactivex.rxjava3.core.Flowable; + +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; + +/** + * @author Libres-coder + * @date 2025/10/27 + */ +public class SensitiveWordFilterTool implements BaseTool { + + private final Map sensitiveWordMap; + private static final String END_FLAG = "END"; + + public enum ReplaceStrategy { + ASTERISK, + DELETE, + CUSTOM, + DETECT_ONLY + } + + public SensitiveWordFilterTool() { + this(getDefaultSensitiveWords()); + } + + public SensitiveWordFilterTool(Set sensitiveWords) { + this.sensitiveWordMap = new ConcurrentHashMap<>(); + initSensitiveWordMap(sensitiveWords); + } + + private void initSensitiveWordMap(Set sensitiveWords) { + if (sensitiveWords == null || sensitiveWords.isEmpty()) { + return; + } + + for (String word : sensitiveWords) { + if (word == null || word.trim().isEmpty()) { + continue; + } + + Map currentMap = sensitiveWordMap; + for (int i = 0; i < word.length(); i++) { + String key = String.valueOf(word.charAt(i)); + + @SuppressWarnings("unchecked") + Map nextMap = (Map) currentMap.get(key); + + if (nextMap == null) { + nextMap = new ConcurrentHashMap<>(); + currentMap.put(key, nextMap); + } + + currentMap = nextMap; + + if (i == word.length() - 1) { + currentMap.put(END_FLAG, word); + } + } + } + } + + @Override + public String name() { + return "sensitive_word_filter"; + } + + @Override + public Flowable> run(Map args, SystemContext systemContext) { + return Flowable.fromCallable(() -> { + String text = (String) args.get("text"); + String strategyStr = (String) args.getOrDefault("strategy", "ASTERISK"); + String customReplace = (String) args.getOrDefault("custom_replace", "[已屏蔽]"); + + if (text == null || text.isEmpty()) { + return createResult(text, new ArrayList<>(), text, false); + } + + ReplaceStrategy strategy; + try { + strategy = ReplaceStrategy.valueOf(strategyStr.toUpperCase()); + } catch (IllegalArgumentException e) { + strategy = ReplaceStrategy.ASTERISK; + } + + List detectedWords = detectSensitiveWords(text); + String filteredText = filterText(text, detectedWords, strategy, customReplace); + + return createResult(text, detectedWords, filteredText, !detectedWords.isEmpty()); + }); + } + + private List detectSensitiveWords(String text) { + List results = new ArrayList<>(); + + for (int i = 0; i < text.length(); i++) { + int length = checkSensitiveWord(text, i); + if (length > 0) { + String word = text.substring(i, i + length); + results.add(new SensitiveWordResult(word, i, i + length)); + i += length - 1; + } + } + + return results; + } + + private int checkSensitiveWord(String text, int startIndex) { + int matchLength = 0; + Map currentMap = sensitiveWordMap; + + for (int i = startIndex; i < text.length(); i++) { + String key = String.valueOf(text.charAt(i)); + + @SuppressWarnings("unchecked") + Map nextMap = (Map) currentMap.get(key); + + if (nextMap == null) { + break; + } + + matchLength++; + + if (nextMap.containsKey(END_FLAG)) { + return matchLength; + } + + currentMap = nextMap; + } + + return 0; + } + + private String filterText(String text, List detectedWords, + ReplaceStrategy strategy, String customReplace) { + if (strategy == ReplaceStrategy.DETECT_ONLY || detectedWords.isEmpty()) { + return text; + } + + StringBuilder result = new StringBuilder(text); + int offset = 0; + + for (SensitiveWordResult wordResult : detectedWords) { + int start = wordResult.startIndex + offset; + int end = wordResult.endIndex + offset; + String replacement; + + switch (strategy) { + case ASTERISK: + replacement = "*".repeat(wordResult.word.length()); + break; + case DELETE: + replacement = ""; + break; + case CUSTOM: + replacement = customReplace; + break; + default: + replacement = wordResult.word; + } + + result.replace(start, end, replacement); + offset += replacement.length() - wordResult.word.length(); + } + + return result.toString(); + } + + private Map createResult(String originalText, + List detectedWords, + String filteredText, + boolean hasSensitiveWords) { + Map result = new HashMap<>(); + result.put("original_text", originalText); + result.put("filtered_text", filteredText); + result.put("has_sensitive_words", hasSensitiveWords); + result.put("detected_words_count", detectedWords.size()); + + List> wordsList = new ArrayList<>(); + for (SensitiveWordResult wordResult : detectedWords) { + Map wordInfo = new HashMap<>(); + wordInfo.put("word", wordResult.word); + wordInfo.put("start_index", wordResult.startIndex); + wordInfo.put("end_index", wordResult.endIndex); + wordsList.add(wordInfo); + } + result.put("detected_words", wordsList); + + return result; + } + + public void addSensitiveWord(String word) { + if (word != null && !word.trim().isEmpty()) { + initSensitiveWordMap(Collections.singleton(word)); + } + } + + public void addSensitiveWords(Set words) { + initSensitiveWordMap(words); + } + + private static Set getDefaultSensitiveWords() { + Set words = new HashSet<>(); + words.add("赌博"); + words.add("诈骗"); + words.add("色情"); + words.add("暴力"); + words.add("非法"); + words.add("枪支"); + words.add("毒品"); + words.add("恐怖"); + words.add("反动"); + words.add("邪教"); + return words; + } + + private static class SensitiveWordResult { + String word; + int startIndex; + int endIndex; + + SensitiveWordResult(String word, int startIndex, int endIndex) { + this.word = word; + this.startIndex = startIndex; + this.endIndex = endIndex; + } + } +} diff --git a/ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java b/ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java new file mode 100644 index 00000000..bc994703 --- /dev/null +++ b/ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java @@ -0,0 +1,341 @@ +package com.alibaba.agentic.example; + +import com.alibaba.agentic.core.executor.*; +import com.alibaba.agentic.core.tools.security.DataMaskingTool; +import com.alibaba.agentic.core.tools.security.SensitiveWordFilterTool; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.junit4.SpringRunner; + +import java.util.*; + +/** + * Security features demonstration and testing. + * + * @author Libres-coder + * @date 2025/10/27 + */ +@RunWith(SpringRunner.class) +@SpringBootTest(classes = { Application.class }) +@ActiveProfiles("testing") +public class SecurityAgentTest { + + @Test + public void testSensitiveWordFilter() { + System.out.println("\n" + "=".repeat(60)); + System.out.println("测试1: 敏感词过滤工具"); + System.out.println("=".repeat(60)); + + SensitiveWordFilterTool filterTool = new SensitiveWordFilterTool(); + SystemContext context = new SystemContext(); + + // 测试用例1: 检测敏感词 + Map args1 = new HashMap<>(); + args1.put("text", "这是一段包含赌博和诈骗的文本内容"); + args1.put("strategy", "DETECT_ONLY"); + + Map result1 = filterTool.run(args1, context).blockingFirst(); + System.out.println("\n[检测模式]"); + System.out.println("原始文本: " + result1.get("original_text")); + System.out.println("检测到敏感词: " + result1.get("has_sensitive_words")); + System.out.println("敏感词数量: " + result1.get("detected_words_count")); + System.out.println("敏感词列表: " + result1.get("detected_words")); + + // 测试用例2: 星号替换 + Map args2 = new HashMap<>(); + args2.put("text", "我想要赌博赚快钱,有人教我诈骗"); + args2.put("strategy", "ASTERISK"); + + Map result2 = filterTool.run(args2, context).blockingFirst(); + System.out.println("\n[星号替换模式]"); + System.out.println("原始文本: " + result2.get("original_text")); + System.out.println("过滤后文本: " + result2.get("filtered_text")); + + // 测试用例3: 自定义替换 + Map args3 = new HashMap<>(); + args3.put("text", "非法内容和暴力内容不应该出现"); + args3.put("strategy", "CUSTOM"); + args3.put("custom_replace", "[已屏蔽]"); + + Map result3 = filterTool.run(args3, context).blockingFirst(); + System.out.println("\n[自定义替换模式]"); + System.out.println("原始文本: " + result3.get("original_text")); + System.out.println("过滤后文本: " + result3.get("filtered_text")); + + // 测试用例4: 自定义敏感词库 + Set customWords = new HashSet<>(); + customWords.add("测试敏感词"); + customWords.add("自定义词汇"); + + SensitiveWordFilterTool customFilter = new SensitiveWordFilterTool(customWords); + + Map args4 = new HashMap<>(); + args4.put("text", "这包含测试敏感词和自定义词汇"); + args4.put("strategy", "ASTERISK"); + + Map result4 = customFilter.run(args4, context).blockingFirst(); + System.out.println("\n[自定义词库]"); + System.out.println("原始文本: " + result4.get("original_text")); + System.out.println("过滤后文本: " + result4.get("filtered_text")); + + System.out.println("\n" + "=".repeat(60)); + } + + @Test + public void testDataMasking() { + System.out.println("\n" + "=".repeat(60)); + System.out.println("测试2: 数据脱敏工具"); + System.out.println("=".repeat(60)); + + DataMaskingTool maskingTool = new DataMaskingTool(); + SystemContext context = new SystemContext(); + + // 测试用例1: 手机号脱敏 + Map args1 = new HashMap<>(); + args1.put("text", "我的手机号是13812345678,联系我吧"); + args1.put("types", Arrays.asList("phone")); + + Map result1 = maskingTool.run(args1, context).blockingFirst(); + System.out.println("\n[手机号脱敏]"); + System.out.println("原始文本: " + result1.get("original_text")); + System.out.println("脱敏后文本: " + result1.get("masked_text")); + System.out.println("检测到的PII: " + result1.get("detected_pii")); + + // 测试用例2: 身份证号脱敏 + Map args2 = new HashMap<>(); + args2.put("text", "我的身份证号是110101199001011234"); + args2.put("types", Arrays.asList("id_card")); + + Map result2 = maskingTool.run(args2, context).blockingFirst(); + System.out.println("\n[身份证号脱敏]"); + System.out.println("原始文本: " + result2.get("original_text")); + System.out.println("脱敏后文本: " + result2.get("masked_text")); + + // 测试用例3: 邮箱脱敏 + Map args3 = new HashMap<>(); + args3.put("text", "联系邮箱: user@example.com"); + args3.put("types", Arrays.asList("email")); + + Map result3 = maskingTool.run(args3, context).blockingFirst(); + System.out.println("\n[邮箱脱敏]"); + System.out.println("原始文本: " + result3.get("original_text")); + System.out.println("脱敏后文本: " + result3.get("masked_text")); + + // 测试用例4: 银行卡号脱敏 + Map args4 = new HashMap<>(); + args4.put("text", "银行卡号: 6222021234567890123"); + args4.put("types", Arrays.asList("bank_card")); + + Map result4 = maskingTool.run(args4, context).blockingFirst(); + System.out.println("\n[银行卡号脱敏]"); + System.out.println("原始文本: " + result4.get("original_text")); + System.out.println("脱敏后文本: " + result4.get("masked_text")); + + // 测试用例5: 综合测试(所有类型) + Map args5 = new HashMap<>(); + args5.put("text", "用户信息: 手机13812345678,邮箱user@test.com,身份证110101199001011234"); + args5.put("types", Arrays.asList("all")); + + Map result5 = maskingTool.run(args5, context).blockingFirst(); + System.out.println("\n[综合测试 - 所有PII类型]"); + System.out.println("原始文本: " + result5.get("original_text")); + System.out.println("脱敏后文本: " + result5.get("masked_text")); + System.out.println("PII数量: " + result5.get("pii_count")); + System.out.println("检测到的PII: " + result5.get("detected_pii")); + + System.out.println("\n" + "=".repeat(60)); + } + + @Test + public void testSecurityCallback() { + System.out.println("\n" + "=".repeat(60)); + System.out.println("测试3: 安全回调处理器"); + System.out.println("=".repeat(60)); + + SecurityCallback securityCallback = new SecurityCallback() + .enableSensitiveWordFilter(true) + .enableDataMasking(true) + .setBlockOnSensitiveWord(false) + .setMaskLogs(true); + + securityCallback.addSensitiveWord("机密信息"); + + SystemContext context = new SystemContext(); + + // 模拟请求 + Map requestPayload = new HashMap<>(); + requestPayload.put("message", "我想分享一些赌博技巧和机密信息,我的手机是13812345678"); + + Request request = new Request() + .setInvokeMode(InvokeMode.SYNC) + .setParam(requestPayload); + Result result = new Result(); + + // 创建简单的回调链 + CallbackChain chain = new CallbackChain() { + @Override + public void execute(SystemContext systemContext, Request request, Result result) { + System.out.println("\n[回调链] execute方法被调用"); + System.out.println("请求已通过安全检查,继续执行..."); + } + + @Override + public void receive(SystemContext systemContext, Request request, Result result) { + System.out.println("\n[回调链] receive方法被调用"); + System.out.println("响应已通过安全检查"); + } + }; + + // 执行安全回调 + System.out.println("\n[执行阶段安全检查]"); + try { + securityCallback.execute(context, request, result, chain); + } catch (SecurityException e) { + System.out.println("安全异常: " + e.getMessage()); + } + + // 模拟响应 + Map responsePayload = new HashMap<>(); + responsePayload.put("message", "这是包含用户邮箱user@example.com的响应"); + result.setData(responsePayload); + + System.out.println("\n[接收阶段安全检查]"); + try { + securityCallback.receive(context, request, result, chain); + } catch (SecurityException e) { + System.out.println("安全异常: " + e.getMessage()); + } + + // 查看安全事件 + System.out.println("\n[安全事件记录]"); + List events = securityCallback.getSecurityEvents(); + System.out.println("记录的安全事件数量: " + events.size()); + for (SecurityCallback.SecurityEvent event : events) { + System.out.println(" - " + event); + } + + System.out.println("\n" + "=".repeat(60)); + } + + @Test + public void testSecurityCallbackWithBlock() { + System.out.println("\n" + "=".repeat(60)); + System.out.println("测试4: 安全回调 - 阻断模式"); + System.out.println("=".repeat(60)); + + // 创建启用阻断的安全回调 + SecurityCallback securityCallback = new SecurityCallback() + .enableSensitiveWordFilter(true) + .setBlockOnSensitiveWord(true); // 启用阻断 + + SystemContext context = new SystemContext(); + + // 模拟包含敏感词的请求 + Map requestPayload = new HashMap<>(); + requestPayload.put("message", "我要学习诈骗技术"); + + Request request = new Request() + .setInvokeMode(InvokeMode.SYNC) + .setParam(requestPayload); + Result result = new Result(); + + CallbackChain chain = new CallbackChain() { + @Override + public void execute(SystemContext systemContext, Request request, Result result) { + System.out.println("这行不应该被打印(请求应该被阻断)"); + } + + @Override + public void receive(SystemContext systemContext, Request request, Result result) { + System.out.println("响应处理"); + } + }; + + // 执行安全回调 - 应该抛出异常 + System.out.println("\n[测试阻断功能]"); + System.out.println("尝试发送包含敏感词的请求..."); + try { + securityCallback.execute(context, request, result, chain); + System.out.println("错误: 请求没有被阻断!"); + } catch (SecurityException e) { + System.out.println("成功: 请求被阻断!"); + System.out.println("原因: " + e.getMessage()); + } + + System.out.println("\n" + "=".repeat(60)); + } + + @Test + public void testSecureCustomerServiceAgent() { + System.out.println("\n" + "=".repeat(60)); + System.out.println("测试5: 综合示例 - 安全的客服Agent"); + System.out.println("=".repeat(60)); + + // 1. 创建安全工具 + SensitiveWordFilterTool filterTool = new SensitiveWordFilterTool(); + DataMaskingTool maskingTool = new DataMaskingTool(); + + // 2. 创建安全回调 + SecurityCallback securityCallback = new SecurityCallback() + .enableSensitiveWordFilter(true) + .enableDataMasking(true) + .setBlockOnSensitiveWord(false) + .setMaskLogs(true); + + SystemContext context = new SystemContext(); + + // 3. 模拟客户查询 + System.out.println("\n[场景: 客户咨询订单问题]"); + String customerQuery = "您好,我的订单号是ORDER-123,手机号13812345678,想查询订单状态"; + + // 3.1 先进行敏感词检查 + Map filterArgs = new HashMap<>(); + filterArgs.put("text", customerQuery); + filterArgs.put("strategy", "DETECT_ONLY"); + + Map filterResult = filterTool.run(filterArgs, context).blockingFirst(); + System.out.println("客户输入: " + customerQuery); + System.out.println("敏感词检测: " + (Boolean.TRUE.equals(filterResult.get("has_sensitive_words")) ? "❌ 发现敏感词" : "✅ 通过")); + + // 3.2 脱敏日志记录 + Map maskArgs = new HashMap<>(); + maskArgs.put("text", customerQuery); + maskArgs.put("types", Arrays.asList("all")); + + Map maskResult = maskingTool.run(maskArgs, context).blockingFirst(); + System.out.println("日志记录(脱敏): " + maskResult.get("masked_text")); + + // 4. 模拟Agent响应 + System.out.println("\n[Agent处理并响应]"); + String agentResponse = "您好!您的订单ORDER-123状态为已发货。" + + "稍后会发送短信到手机 13812345678。" + + "如有问题请联系客服邮箱 support@company.com"; + + // 4.1 响应脱敏 + Map responseArgs = new HashMap<>(); + responseArgs.put("text", agentResponse); + responseArgs.put("types", Arrays.asList("all")); + + Map responseMask = maskingTool.run(responseArgs, context).blockingFirst(); + System.out.println("原始响应: " + agentResponse); + System.out.println("日志记录(脱敏): " + responseMask.get("masked_text")); + System.out.println("检测到PII数量: " + responseMask.get("pii_count")); + + // 5. 查看安全统计 + System.out.println("\n[安全统计]"); + System.out.println("所有敏感数据已被正确处理"); + System.out.println("PII信息已被脱敏保护"); + System.out.println("可以安全地记录到日志系统"); + + System.out.println("\n" + "=".repeat(60)); + System.out.println("综合示例完成!"); + System.out.println("安全措施:"); + System.out.println("1. 输入过滤 - 检测用户输入中的敏感词"); + System.out.println("2. PII保护 - 自动识别和脱敏个人信息"); + System.out.println("3. 日志安全 - 记录脱敏后的数据"); + System.out.println("4. 合规性 - 满足数据隐私保护要求"); + System.out.println("=".repeat(60)); + } +} diff --git a/ali-agentic-adk-python/examples/security_demo/security_example.py b/ali-agentic-adk-python/examples/security_demo/security_example.py new file mode 100644 index 00000000..4cd2390b --- /dev/null +++ b/ali-agentic-adk-python/examples/security_demo/security_example.py @@ -0,0 +1,316 @@ +# Copyright (C) 2025 AIDC-AI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +Security Features Example - 安全能力示例 + +Demonstrates how to use security tools to protect Agent applications. +演示如何使用安全工具保护Agent应用。 +""" + +from ali_agentic_adk_python.core.tool.security_tools import ( + SensitiveWordFilterTool, + DataMaskingTool, + ReplaceStrategy +) + + +def demo_sensitive_word_filter(): + """演示敏感词过滤工具""" + print("\n" + "="*60) + print("示例1: 敏感词过滤工具") + print("="*60) + + filter_tool = SensitiveWordFilterTool() + + print("\n[检测模式]") + result1 = filter_tool.run( + text="这是一段包含赌博和诈骗的文本内容", + strategy="detect_only" + ) + print(f"原始文本: {result1['original_text']}") + print(f"检测到敏感词: {result1['has_sensitive_words']}") + print(f"敏感词数量: {result1['detected_words_count']}") + print(f"敏感词列表: {result1['detected_words']}") + + print("\n[星号替换模式]") + result2 = filter_tool.run( + text="我想要赌博赚快钱,有人教我诈骗", + strategy="asterisk" + ) + print(f"原始文本: {result2['original_text']}") + print(f"过滤后文本: {result2['filtered_text']}") + + print("\n[自定义替换模式]") + result3 = filter_tool.run( + text="非法内容和暴力内容不应该出现", + strategy="custom", + custom_replace="[已屏蔽]" + ) + print(f"原始文本: {result3['original_text']}") + print(f"过滤后文本: {result3['filtered_text']}") + + print("\n[删除模式]") + result4 = filter_tool.run( + text="这里有黄色和色情内容", + strategy="delete" + ) + print(f"原始文本: {result4['original_text']}") + print(f"过滤后文本: {result4['filtered_text']}") + + print("\n[自定义词库]") + custom_filter = SensitiveWordFilterTool({"测试敏感词", "自定义词汇"}) + result5 = custom_filter.run( + text="这包含测试敏感词和自定义词汇", + strategy="asterisk" + ) + print(f"原始文本: {result5['original_text']}") + print(f"过滤后文本: {result5['filtered_text']}") + + +def demo_data_masking(): + """演示数据脱敏工具""" + print("\n" + "="*60) + print("示例2: 数据脱敏工具") + print("="*60) + + masking_tool = DataMaskingTool() + + print("\n[手机号脱敏]") + result1 = masking_tool.run( + text="我的手机号是13812345678,联系我吧", + types=["phone"] + ) + print(f"原始文本: {result1['original_text']}") + print(f"脱敏后文本: {result1['masked_text']}") + print(f"检测到的PII: {result1['detected_pii']}") + + print("\n[身份证号脱敏]") + result2 = masking_tool.run( + text="我的身份证号是110101199001011234", + types=["id_card"] + ) + print(f"原始文本: {result2['original_text']}") + print(f"脱敏后文本: {result2['masked_text']}") + + print("\n[邮箱脱敏]") + result3 = masking_tool.run( + text="联系邮箱: user@example.com", + types=["email"] + ) + print(f"原始文本: {result3['original_text']}") + print(f"脱敏后文本: {result3['masked_text']}") + + print("\n[银行卡号脱敏]") + result4 = masking_tool.run( + text="银行卡号: 6222021234567890123", + types=["bank_card"] + ) + print(f"原始文本: {result4['original_text']}") + print(f"脱敏后文本: {result4['masked_text']}") + + print("\n[IP地址脱敏]") + result5 = masking_tool.run( + text="服务器IP是192.168.1.100", + types=["ip_address"] + ) + print(f"原始文本: {result5['original_text']}") + print(f"脱敏后文本: {result5['masked_text']}") + + print("\n[综合测试 - 所有PII类型]") + result6 = masking_tool.run( + text="用户信息: 手机13812345678,邮箱user@test.com,身份证110101199001011234,IP地址192.168.1.1", + types=["all"] + ) + print(f"原始文本: {result6['original_text']}") + print(f"脱敏后文本: {result6['masked_text']}") + print(f"PII数量: {result6['pii_count']}") + print(f"检测到的PII: {result6['detected_pii']}") + + +def demo_secure_logging(): + """演示安全日志记录""" + print("\n" + "="*60) + print("示例3: 安全日志记录") + print("="*60) + + masking_tool = DataMaskingTool() + + log_messages = [ + "用户登录: username=zhangsan, phone=13812345678", + "订单创建: order_id=ORD123, email=customer@example.com, amount=999", + "支付成功: card_number=6222021234567890, amount=500", + "错误日志: IP 192.168.1.100 访问被拒绝", + ] + + print("\n[原始日志 vs 脱敏日志]") + for i, log_msg in enumerate(log_messages, 1): + result = masking_tool.run(text=log_msg, types=["all"]) + print(f"\n日志{i}:") + print(f" 原始: {log_msg}") + print(f" 脱敏: {result['masked_text']}") + if result['has_pii']: + print(f" ⚠️ 检测到 {result['pii_count']} 个PII") + + +def demo_secure_customer_service(): + """演示安全的客服Agent""" + print("\n" + "="*60) + print("示例4: 安全的客服Agent") + print("="*60) + + filter_tool = SensitiveWordFilterTool() + masking_tool = DataMaskingTool() + + print("\n[场景: 客户咨询订单问题]") + customer_query = "您好,我的订单号是ORDER-123,手机号13812345678,想查询订单状态" + + filter_result = filter_tool.run( + text=customer_query, + strategy="detect_only" + ) + print(f"客户输入: {customer_query}") + print(f"敏感词检测: {'发现敏感词' if filter_result['has_sensitive_words'] else '✅ 通过'}") + + mask_result = masking_tool.run( + text=customer_query, + types=["all"] + ) + print(f"日志记录(脱敏): {mask_result['masked_text']}") + + print("\n[Agent处理并响应]") + agent_response = ( + "您好!您的订单ORDER-123状态为已发货。" + "稍后会发送短信到手机 13812345678。" + "如有问题请联系客服邮箱 support@company.com" + ) + + response_mask = masking_tool.run( + text=agent_response, + types=["all"] + ) + print(f"原始响应: {agent_response}") + print(f"日志记录(脱敏): {response_mask['masked_text']}") + print(f"检测到PII数量: {response_mask['pii_count']}") + + print("\n[安全统计]") + print("所有敏感数据已被正确处理") + print("PII信息已被脱敏保护") + print("可以安全地记录到日志系统") + + +def demo_content_moderation(): + """演示内容审核场景""" + print("\n" + "="*60) + print("示例5: 内容审核场景") + print("="*60) + + filter_tool = SensitiveWordFilterTool() + + user_contents = [ + "这是一条正常的评论", + "这个产品真不错,值得推荐!", + "有人想一起赌博吗?", + "我知道诈骗的方法,私信我", + "非法内容,包含暴力和色情", + ] + + print("\n[内容审核结果]") + approved_count = 0 + rejected_count = 0 + + for i, content in enumerate(user_contents, 1): + result = filter_tool.run( + text=content, + strategy="detect_only" + ) + + if result['has_sensitive_words']: + status = "拒绝" + rejected_count += 1 + details = f"(发现 {result['detected_words_count']} 个敏感词)" + else: + status = "通过" + approved_count += 1 + details = "" + + print(f"\n内容{i}: {content}") + print(f" 状态: {status} {details}") + + print(f"\n[统计]") + print(f"通过: {approved_count}, 拒绝: {rejected_count}") + + +def demo_performance(): + """演示性能测试""" + print("\n" + "="*60) + print("示例6: 性能测试") + print("="*60) + + import time + + filter_tool = SensitiveWordFilterTool() + masking_tool = DataMaskingTool() + + test_text = "这是一段测试文本,包含手机号13812345678,邮箱test@example.com,以及一些赌博诈骗等敏感词汇。" * 10 + + print("\n[敏感词过滤性能]") + start_time = time.time() + iterations = 100 + for _ in range(iterations): + filter_tool.run(text=test_text, strategy="detect_only") + end_time = time.time() + print(f"执行 {iterations} 次,耗时: {(end_time - start_time):.3f} 秒") + print(f"平均每次: {((end_time - start_time) / iterations * 1000):.2f} 毫秒") + + print("\n[数据脱敏性能]") + start_time = time.time() + for _ in range(iterations): + masking_tool.run(text=test_text, types=["all"]) + end_time = time.time() + print(f"执行 {iterations} 次,耗时: {(end_time - start_time):.3f} 秒") + print(f"平均每次: {((end_time - start_time) / iterations * 1000):.2f} 毫秒") + + +def main(): + """运行所有示例""" + print("\n" + "="*60) + print("安全能力示例 - Ali Agentic ADK Python") + print("="*60) + + demo_sensitive_word_filter() + demo_data_masking() + demo_secure_logging() + demo_secure_customer_service() + demo_content_moderation() + demo_performance() + + print("\n" + "="*60) + print("所有示例完成!") + print("\n安全措施总结:") + print("1. 敏感词过滤 - 保护应用免受不当内容影响") + print("2. 数据脱敏 - 保护用户隐私信息") + print("3. 安全日志 - 合规的日志记录方式") + print("4. 内容审核 - 自动化的内容安全检查") + print("="*60 + "\n") + + +if __name__ == "__main__": + main() diff --git a/ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py b/ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py new file mode 100644 index 00000000..70b6473f --- /dev/null +++ b/ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py @@ -0,0 +1,317 @@ +# Copyright (C) 2025 AIDC-AI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +Security Tools - 安全工具集 + +Provides sensitive word filtering and data masking capabilities. +提供敏感词过滤和数据脱敏能力。 +""" + +from typing import Dict, List, Set, Tuple, Any, Optional +from enum import Enum +import re +from dataclasses import dataclass + + +class ReplaceStrategy(Enum): + """Replace strategy for sensitive words.""" + ASTERISK = "asterisk" + DELETE = "delete" + CUSTOM = "custom" + DETECT_ONLY = "detect_only" + + +class PIIType(Enum): + """PII types for data masking.""" + PHONE = ("phone", r"1[3-9]\d{9}", 3, 4) + ID_CARD = ("id_card", r"[1-9]\d{5}(18|19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[0-9Xx]", 6, 4) + EMAIL = ("email", r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", 2, -1) + BANK_CARD = ("bank_card", r"\d{13,19}", 4, 4) + IP_ADDRESS = ("ip_address", r"\b(?:\d{1,3}\.){3}\d{1,3}\b", -1, -1) + PASSWORD = ("password", r"(?i)(password|passwd|pwd)[\"']?\s*[:=]\s*[\"']?([^\"'\s,}]+)", -1, -1) + + def __init__(self, type_name: str, pattern: str, prefix_keep: int, suffix_keep: int): + self.type_name = type_name + self.pattern = re.compile(pattern) + self.prefix_keep = prefix_keep + self.suffix_keep = suffix_keep + + +@dataclass +class SensitiveWordResult: + word: str + start_index: int + end_index: int + + +@dataclass +class PIIDetection: + pii_type: str + original_value: str + masked_value: str + start_index: int + end_index: int + + +class SensitiveWordFilterTool: + """Sensitive word filter tool using DFA algorithm.""" + + def __init__(self, sensitive_words: Optional[Set[str]] = None): + self.sensitive_word_map = {} + words = sensitive_words if sensitive_words else self._get_default_sensitive_words() + self._init_sensitive_word_map(words) + + def _init_sensitive_word_map(self, sensitive_words: Set[str]): + if not sensitive_words: + return + + for word in sensitive_words: + if not word or not word.strip(): + continue + + current_map = self.sensitive_word_map + for i, char in enumerate(word): + if char not in current_map: + current_map[char] = {} + + if i == len(word) - 1: + current_map[char]["END"] = word + + current_map = current_map[char] + + def run(self, text: str, strategy: str = "asterisk", + custom_replace: str = "[已屏蔽]") -> Dict[str, Any]: + if not text: + return self._create_result(text, [], text, False) + + try: + replace_strategy = ReplaceStrategy(strategy.lower()) + except ValueError: + replace_strategy = ReplaceStrategy.ASTERISK + + detected_words = self._detect_sensitive_words(text) + filtered_text = self._filter_text(text, detected_words, replace_strategy, custom_replace) + + return self._create_result(text, detected_words, filtered_text, len(detected_words) > 0) + + def _detect_sensitive_words(self, text: str) -> List[SensitiveWordResult]: + results = [] + i = 0 + + while i < len(text): + length = self._check_sensitive_word(text, i) + if length > 0: + word = text[i:i + length] + results.append(SensitiveWordResult(word, i, i + length)) + i += length + else: + i += 1 + + return results + + def _check_sensitive_word(self, text: str, start_index: int) -> int: + match_length = 0 + current_map = self.sensitive_word_map + + for i in range(start_index, len(text)): + char = text[i] + + if char not in current_map: + break + + match_length += 1 + + if "END" in current_map[char]: + return match_length # 找到完整敏感词 + + current_map = current_map[char] + + return 0 + + def _filter_text(self, text: str, detected_words: List[SensitiveWordResult], + strategy: ReplaceStrategy, custom_replace: str) -> str: + if not detected_words or strategy == ReplaceStrategy.DETECT_ONLY: + return text + + result = list(text) + offset = 0 + + for word in detected_words: + start = word.start_index + offset + end = word.end_index + offset + + if strategy == ReplaceStrategy.ASTERISK: + replacement = "*" * len(word.word) + elif strategy == ReplaceStrategy.DELETE: + replacement = "" + elif strategy == ReplaceStrategy.CUSTOM: + replacement = custom_replace + else: + replacement = word.word + + result[start:end] = replacement + offset += len(replacement) - len(word.word) + + return "".join(result) + + def _create_result(self, original_text: str, detected_words: List[SensitiveWordResult], + filtered_text: str, has_sensitive_words: bool) -> Dict[str, Any]: + return { + "original_text": original_text, + "filtered_text": filtered_text, + "has_sensitive_words": has_sensitive_words, + "detected_words_count": len(detected_words), + "detected_words": [ + { + "word": word.word, + "start_index": word.start_index, + "end_index": word.end_index + } + for word in detected_words + ] + } + + def add_sensitive_word(self, word: str): + self._init_sensitive_word_map({word}) + + def add_sensitive_words(self, words: Set[str]): + self._init_sensitive_word_map(words) + + @staticmethod + def _get_default_sensitive_words() -> Set[str]: + return { + "政治敏感", "反动", "暴力", + "色情", "黄色", "裸体", + "赌博", "赌场", "赌钱", + "毒品", "枪支", "爆炸物", + "诈骗", "骗钱", "传销", + "恐怖主义", "邪教", "非法", + } + + +class DataMaskingTool: + """Data masking tool for PII protection.""" + + def run(self, text: str, types: List[str] = None, + mask_char: str = "*") -> Dict[str, Any]: + if not text: + return self._create_result(text, text, []) + + if types is None: + types = ["all"] + + types_to_check = self._determine_types(types) + + masked_text, detected_pii = self._mask_text(text, types_to_check, mask_char) + + return self._create_result(text, masked_text, detected_pii) + + def _determine_types(self, enabled_types: List[str]) -> List[PIIType]: + if "all" in enabled_types: + return list(PIIType) + + types = [] + for type_str in enabled_types: + for pii_type in PIIType: + if pii_type.type_name == type_str.lower(): + types.append(pii_type) + break + + return types + + def _mask_text(self, text: str, types: List[PIIType], + mask_char: str) -> Tuple[str, List[PIIDetection]]: + masked_text = text + detected_pii = [] + + for pii_type in types: + matches = list(pii_type.pattern.finditer(masked_text)) + + for match in matches: + original = match.group() + start, end = match.span() + + if pii_type == PIIType.PASSWORD and match.lastindex >= 2: + original = match.group(2) + masked = match.group(1) + re.sub(r'[^"\':=\s]', mask_char, + match.group(0)[len(match.group(1)):]) + else: + masked = self._mask_string(original, pii_type.prefix_keep, + pii_type.suffix_keep, mask_char) + + detection = PIIDetection( + pii_type=pii_type.type_name, + original_value=original, + masked_value=masked, + start_index=start, + end_index=end + ) + detected_pii.append(detection) + + masked_text = masked_text[:start] + masked + masked_text[end:] + + return masked_text, detected_pii + + def _mask_string(self, text: str, prefix_keep: int, suffix_keep: int, + mask_char: str) -> str: + if not text: + return text + + length = len(text) + + if suffix_keep == -1 and "@" in text: + parts = text.split("@") + if len(parts) == 2: + local_part = parts[0] + keep_length = min(prefix_keep, len(local_part) // 2) + masked_local = local_part[:keep_length] + mask_char * max(len(local_part) - keep_length, 3) + return masked_local + "@" + parts[1] + + if prefix_keep == -1 and suffix_keep == -1: + return mask_char * min(length, 10) + + if length <= prefix_keep + suffix_keep: + return mask_char * length + + prefix = text[:prefix_keep] + suffix = text[-suffix_keep:] + mask_length = length - prefix_keep - suffix_keep + + return prefix + mask_char * mask_length + suffix + + def _create_result(self, original_text: str, masked_text: str, + detected_pii: List[PIIDetection]) -> Dict[str, Any]: + return { + "original_text": original_text, + "masked_text": masked_text, + "has_pii": len(detected_pii) > 0, + "pii_count": len(detected_pii), + "detected_pii": [ + { + "type": pii.pii_type, + "original_value": pii.original_value, + "masked_value": pii.masked_value, + "start_index": pii.start_index, + "end_index": pii.end_index + } + for pii in detected_pii + ] + } diff --git a/docs/Security-Guide.md b/docs/Security-Guide.md new file mode 100644 index 00000000..8a1e88b2 --- /dev/null +++ b/docs/Security-Guide.md @@ -0,0 +1,286 @@ +# Agentic ADK Security Guide + +## Overview + +Agentic ADK provides a comprehensive set of security capabilities to help developers build secure and compliant AI Agent applications. This guide introduces how to use these security features to protect your application from security threats and ensure user privacy data is properly protected. + +## Security Capabilities + +### 1. Sensitive Word / Blacklist Filtering + +Automatically detect and filter sensitive words in text, including but not limited to: +- Political sensitive words +- Pornographic content +- Gambling related +- Prohibited items information +- Fraud related +- Other inappropriate content + +**Features:** +- Support custom sensitive word library +- High-performance matching using DFA algorithm +- Multiple replacement strategies (asterisk, delete, custom) +- Detect-only mode (without modifying original text) + +### 2. Data Masking (PII Protection) + +Automatically identify and mask Personally Identifiable Information (PII) to protect user privacy: +- Chinese mainland phone numbers (11 digits) +- ID card numbers (18 digits) +- Email addresses +- Bank card numbers (13-19 digits) +- IP addresses +- Password fields + +**Features:** +- Automatically identify multiple PII types +- Retain prefix and suffix for tracking +- Selectively enable/disable specific types +- Maintain data format integrity + +### 3. Security Callback (Java) + +Automatically perform security checks before and after Agent execution: +- Request phase security check +- Response phase security check +- Security event logging +- Configurable blocking strategy + +## Quick Start + +### Java Version + +#### 1. Sensitive Word Filtering + +```java +import com.alibaba.agentic.core.tools.security.SensitiveWordFilterTool; +import com.alibaba.agentic.core.executor.SystemContext; + +// Create filter tool +SensitiveWordFilterTool filterTool = new SensitiveWordFilterTool(); +SystemContext context = new SystemContext(); + +// Detect sensitive words +Map args = new HashMap<>(); +args.put("text", "This text contains gambling and fraud"); +args.put("strategy", "DETECT_ONLY"); + +Map result = filterTool.run(args, context).blockingFirst(); +System.out.println("Has sensitive words: " + result.get("has_sensitive_words")); +System.out.println("Word count: " + result.get("detected_words_count")); + +// Filter sensitive words (asterisk replacement) +args.put("strategy", "ASTERISK"); +result = filterTool.run(args, context).blockingFirst(); +System.out.println("Filtered: " + result.get("filtered_text")); +``` + +#### 2. Data Masking + +```java +import com.alibaba.agentic.core.tools.security.DataMaskingTool; + +// Create masking tool +DataMaskingTool maskingTool = new DataMaskingTool(); + +// Mask phone number +Map args = new HashMap<>(); +args.put("text", "My phone is 13812345678"); +args.put("types", Arrays.asList("phone")); + +Map result = maskingTool.run(args, context).blockingFirst(); +System.out.println("Masked: " + result.get("masked_text")); +// Output: My phone is 138****5678 +``` + +#### 3. Security Callback + +```java +import com.alibaba.agentic.core.executor.SecurityCallback; + +// Create security callback +SecurityCallback securityCallback = new SecurityCallback() + .enableSensitiveWordFilter(true) + .enableDataMasking(true) + .setBlockOnSensitiveWord(false) + .setMaskLogs(true); + +// Add custom sensitive word +securityCallback.addSensitiveWord("custom_word"); + +// Use in Agent execution chain +// securityCallback.execute(systemContext, request, result, chain); +``` + +### Python Version + +#### 1. Sensitive Word Filtering + +```python +from ali_agentic_adk_python.core.tool.security_tools import SensitiveWordFilterTool + +# Create filter tool +filter_tool = SensitiveWordFilterTool() + +# Detect sensitive words +result = filter_tool.run( + text="This text contains gambling and fraud", + strategy="detect_only" +) +print(f"Has sensitive words: {result['has_sensitive_words']}") +print(f"Word count: {result['detected_words_count']}") + +# Filter sensitive words (asterisk replacement) +result = filter_tool.run( + text="This text contains gambling and fraud", + strategy="asterisk" +) +print(f"Filtered: {result['filtered_text']}") +``` + +#### 2. Data Masking + +```python +from ali_agentic_adk_python.core.tool.security_tools import DataMaskingTool + +# Create masking tool +masking_tool = DataMaskingTool() + +# Mask phone number +result = masking_tool.run( + text="My phone is 13812345678", + types=["phone"] +) +print(f"Masked: {result['masked_text']}") +# Output: My phone is 138****5678 + +# Mask all PII types +result = masking_tool.run( + text="Contact: phone 13812345678, email user@example.com", + types=["all"] +) +print(f"Masked: {result['masked_text']}") +``` + +## Use Cases + +### Case 1: Customer Service Agent + +Protect user privacy data in customer service systems: + +```python +# Customer query +customer_query = "My order is ORDER-123, phone 13812345678" + +# 1. Check sensitive words +filter_result = filter_tool.run(text=customer_query, strategy="detect_only") +if filter_result['has_sensitive_words']: + print("⚠️ Warning: Sensitive words detected") + +# 2. Log with masking +mask_result = masking_tool.run(text=customer_query, types=["all"]) +logger.info(f"Customer query: {mask_result['masked_text']}") +# Log: Customer query: My order is ORDER-123, phone 138****5678 +``` + +### Case 2: Content Moderation + +Review user-generated content (UGC): + +```python +# User comment +user_comment = "This product is great!" + +# Detect sensitive words +result = filter_tool.run(text=user_comment, strategy="detect_only") + +if result['has_sensitive_words']: + print("❌ Comment rejected: Contains sensitive words") + logger.warning(f"Violation: {result['detected_words']}") +else: + print("✅ Comment approved") +``` + +### Case 3: Log Masking + +Ensure logs don't contain sensitive information: + +```python +import logging + +# Configure masking logger +masking_tool = DataMaskingTool() + +def log_with_masking(message: str): + """Log with masking""" + result = masking_tool.run(text=message, types=["all"]) + logging.info(result['masked_text']) + +# Usage +log_with_masking("User login: phone=13812345678, email=user@example.com") +# Actual log: User login: phone=138****5678, email=us***@example.com +``` + +## Best Practices + +### 1. Layered Protection + +```java +// Layer 1: Input check +Map inputCheck = filterTool.run(userInput, context).blockingFirst(); +if ((Boolean) inputCheck.get("has_sensitive_words")) { + throw new SecurityException("Input contains sensitive words"); +} + +// Layer 2: Business processing (with security callback) +// ... Agent execution ... + +// Layer 3: Output masking +Map outputMask = maskingTool.run(response, context).blockingFirst(); +String safeResponse = (String) outputMask.get("masked_text"); +``` + +### 2. Secure Logging + +```python +class SecureLogger: + """Secure logger""" + def __init__(self): + self.masking_tool = DataMaskingTool() + + def log(self, message: str, level: str = "info"): + """Log with masking""" + result = self.masking_tool.run(text=message, types=["all"]) + masked_message = result['masked_text'] + + if level == "info": + logging.info(masked_message) + elif level == "warning": + logging.warning(masked_message) +``` + +## Example Code + +For complete examples, please refer to: + +**Java:** +- [SecurityAgentTest.java](../ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java) + +**Python:** +- [security_example.py](../ali-agentic-adk-python/examples/security_demo/security_example.py) + +## Technical Support + +For questions or suggestions: +- Submit Issue: [GitHub Issues](https://github.com/AIDC-AI/Agentic-ADK/issues) +- Documentation: [Project Documentation](../README.md) + +## Changelog + +### v1.0.0 (2025-10-27) +- ✅ Initial release +- ✅ Sensitive word filtering +- ✅ Data masking +- ✅ Security callback mechanism (Java) +- ✅ Complete examples and documentation + diff --git a/docs/Security-Guide_CN.md b/docs/Security-Guide_CN.md new file mode 100644 index 00000000..5c6795d9 --- /dev/null +++ b/docs/Security-Guide_CN.md @@ -0,0 +1,524 @@ +# Agentic ADK 安全能力指南 + +## 概述 + +Agentic ADK 提供了一套完整的安全能力,帮助开发者构建安全、合规的AI Agent应用。本指南介绍如何使用这些安全功能来保护您的应用免受安全威胁,并确保用户隐私数据得到妥善保护。 + +## 安全能力 + +### 1. 敏感词/黑词过滤 + +自动检测和过滤文本中的敏感词,包括但不限于: +- 政治敏感词 +- 色情内容 +- 赌博相关 +- 违禁品信息 +- 诈骗相关 +- 其他不当内容 + +**特性:** +- 支持自定义敏感词库 +- 使用DFA算法实现高性能匹配 +- 多种替换策略(星号、删除、自定义) +- 仅检测模式(不修改原文) + +### 2. 数据脱敏(PII保护) + +自动识别和脱敏个人可识别信息(PII),保护用户隐私: +- 中国大陆手机号(11位) +- 身份证号(18位) +- 邮箱地址 +- 银行卡号(13-19位) +- IP地址 +- 密码字段 + +**特性:** +- 自动识别多种PII类型 +- 保留前缀和后缀便于追踪 +- 可选择性启用/禁用特定类型 +- 保持数据格式完整性 + +### 3. 安全回调(Java - 可选的高级特性) + +**注意**: 这是一个可选的高级特性。如果你不熟悉Callback机制,建议直接使用上述工具类进行手动安全检查。 + +在Agent执行前后自动进行安全检查: +- 请求阶段安全检查 +- 响应阶段安全检查 +- 安全事件记录 +- 可配置的阻断策略 + +## 快速开始 + +### Java 版本 + +#### 1. 敏感词过滤 + +```java +import com.alibaba.agentic.core.tools.security.SensitiveWordFilterTool; +import com.alibaba.agentic.core.executor.SystemContext; + +// 创建过滤工具 +SensitiveWordFilterTool filterTool = new SensitiveWordFilterTool(); +SystemContext context = new SystemContext(); + +// 检测敏感词 +Map args = new HashMap<>(); +args.put("text", "这是一段包含赌博和诈骗的文本"); +args.put("strategy", "DETECT_ONLY"); + +Map result = filterTool.run(args, context).blockingFirst(); +System.out.println("检测到敏感词: " + result.get("has_sensitive_words")); +System.out.println("敏感词数量: " + result.get("detected_words_count")); + +// 过滤敏感词(星号替换) +args.put("strategy", "ASTERISK"); +result = filterTool.run(args, context).blockingFirst(); +System.out.println("过滤后: " + result.get("filtered_text")); +``` + +#### 2. 数据脱敏 + +```java +import com.alibaba.agentic.core.tools.security.DataMaskingTool; + +// 创建脱敏工具 +DataMaskingTool maskingTool = new DataMaskingTool(); + +// 脱敏手机号 +Map args = new HashMap<>(); +args.put("text", "我的手机号是13812345678"); +args.put("types", Arrays.asList("phone")); + +Map result = maskingTool.run(args, context).blockingFirst(); +System.out.println("脱敏后: " + result.get("masked_text")); +// 输出: 我的手机号是138****5678 +``` + +#### 3. 安全回调 + +```java +import com.alibaba.agentic.core.executor.SecurityCallback; + +// 创建安全回调 +SecurityCallback securityCallback = new SecurityCallback() + .enableSensitiveWordFilter(true) + .enableDataMasking(true) + .setBlockOnSensitiveWord(false) + .setMaskLogs(true); + +// 添加自定义敏感词 +securityCallback.addSensitiveWord("自定义敏感词"); + +// 在Agent执行链中使用 +// securityCallback.execute(systemContext, request, result, chain); +``` + +### Python 版本 + +#### 1. 敏感词过滤 + +```python +from ali_agentic_adk_python.core.tool.security_tools import SensitiveWordFilterTool + +# 创建过滤工具 +filter_tool = SensitiveWordFilterTool() + +# 检测敏感词 +result = filter_tool.run( + text="这是一段包含赌博和诈骗的文本", + strategy="detect_only" +) +print(f"检测到敏感词: {result['has_sensitive_words']}") +print(f"敏感词数量: {result['detected_words_count']}") + +# 过滤敏感词(星号替换) +result = filter_tool.run( + text="这是一段包含赌博和诈骗的文本", + strategy="asterisk" +) +print(f"过滤后: {result['filtered_text']}") +``` + +#### 2. 数据脱敏 + +```python +from ali_agentic_adk_python.core.tool.security_tools import DataMaskingTool + +# 创建脱敏工具 +masking_tool = DataMaskingTool() + +# 脱敏手机号 +result = masking_tool.run( + text="我的手机号是13812345678", + types=["phone"] +) +print(f"脱敏后: {result['masked_text']}") +# 输出: 我的手机号是138****5678 + +# 脱敏所有PII类型 +result = masking_tool.run( + text="联系方式:手机13812345678,邮箱user@example.com", + types=["all"] +) +print(f"脱敏后: {result['masked_text']}") +``` + +#### 3. 自定义敏感词库 + +```python +# 使用自定义敏感词库 +custom_words = {"自定义敏感词", "特殊词汇", "品牌名称"} +filter_tool = SensitiveWordFilterTool(custom_words) + +result = filter_tool.run( + text="这包含自定义敏感词", + strategy="asterisk" +) +print(f"过滤后: {result['filtered_text']}") +``` + +## 使用场景 + +### 场景1: 客服Agent + +保护客服系统中的用户隐私数据: + +```python +# 客户查询 +customer_query = "我的订单号是ORDER-123,手机号13812345678" + +# 1. 检查敏感词 +filter_result = filter_tool.run(text=customer_query, strategy="detect_only") +if filter_result['has_sensitive_words']: + print("⚠️ 警告:检测到敏感词") + +# 2. 脱敏后记录日志 +mask_result = masking_tool.run(text=customer_query, types=["all"]) +logger.info(f"客户查询: {mask_result['masked_text']}") +# 日志: 客户查询: 我的订单号是ORDER-123,手机号138****5678 +``` + +### 场景2: 内容审核 + +审核用户生成内容(UGC): + +```python +# 用户评论 +user_comment = "这个产品真不错!" + +# 检测敏感词 +result = filter_tool.run(text=user_comment, strategy="detect_only") + +if result['has_sensitive_words']: + print("❌ 评论被拒绝:包含敏感词") + # 记录违规内容 + logger.warning(f"违规评论: {result['detected_words']}") +else: + print("✅ 评论通过审核") + # 发布评论 +``` + +### 场景3: 日志脱敏 + +确保日志系统中不包含敏感信息: + +```python +import logging + +# 配置脱敏日志处理器 +masking_tool = DataMaskingTool() + +def log_with_masking(message: str): + """脱敏后记录日志""" + result = masking_tool.run(text=message, types=["all"]) + logging.info(result['masked_text']) + +# 使用 +log_with_masking("用户登录: phone=13812345678, email=user@example.com") +# 实际记录: 用户登录: phone=138****5678, email=us***@example.com +``` + +## 配置选项 + +### 敏感词过滤配置 + +**替换策略:** +- `ASTERISK` / `asterisk`: 替换为星号(默认) +- `DELETE` / `delete`: 删除敏感词 +- `CUSTOM` / `custom`: 替换为自定义文本 +- `DETECT_ONLY` / `detect_only`: 仅检测不替换 + +**Java示例:** +```java +args.put("strategy", "CUSTOM"); +args.put("custom_replace", "[已屏蔽]"); +``` + +**Python示例:** +```python +result = filter_tool.run( + text="...", + strategy="custom", + custom_replace="[已屏蔽]" +) +``` + +### 数据脱敏配置 + +**PII类型:** +- `phone`: 手机号 +- `id_card`: 身份证号 +- `email`: 邮箱地址 +- `bank_card`: 银行卡号 +- `ip_address`: IP地址 +- `password`: 密码字段 +- `all`: 所有类型(默认) + +**脱敏字符:** +- 默认使用 `*` 作为脱敏字符 +- 可自定义脱敏字符 + +**Java示例:** +```java +args.put("types", Arrays.asList("phone", "email")); +args.put("mask_char", "#"); +``` + +**Python示例:** +```python +result = masking_tool.run( + text="...", + types=["phone", "email"], + mask_char="#" +) +``` + +### 安全回调配置(Java) + +```java +SecurityCallback securityCallback = new SecurityCallback() + // 启用敏感词过滤 + .enableSensitiveWordFilter(true) + // 启用数据脱敏 + .enableDataMasking(true) + // 检测到敏感词时是否阻断请求 + .setBlockOnSensitiveWord(false) + // 是否脱敏日志内容 + .setMaskLogs(true); + +// 添加自定义敏感词 +securityCallback.addSensitiveWord("特殊敏感词"); +securityCallback.addSensitiveWords(customWordSet); + +// 获取安全事件记录 +List events = securityCallback.getSecurityEvents(); +``` + +## 性能优化 + +### 敏感词过滤性能 + +- 使用 **DFA(Deterministic Finite Automaton)算法** +- 时间复杂度:O(n),n为文本长度 +- 空间复杂度:O(m),m为敏感词总字符数 +- 适合处理大量文本和大型词库 + +**性能基准(参考):** +- 检测10KB文本:< 5ms +- 1000个敏感词词库加载:< 50ms +- 适用于实时处理场景 + +### 数据脱敏性能 + +- 使用正则表达式匹配 +- 时间复杂度:O(n),n为文本长度 +- 支持并发处理 + +**性能基准(参考):** +- 脱敏10KB文本(所有PII类型):< 10ms +- 适用于高并发场景 + +## 最佳实践 + +### 1. 分层防护 + +```java +// 第1层:输入检查 +Map inputCheck = filterTool.run(userInput, context).blockingFirst(); +if ((Boolean) inputCheck.get("has_sensitive_words")) { + throw new SecurityException("输入包含敏感词"); +} + +// 第2层:业务处理(使用安全回调) +// ... Agent执行 ... + +// 第3层:输出脱敏 +Map outputMask = maskingTool.run(response, context).blockingFirst(); +String safeResponse = (String) outputMask.get("masked_text"); +``` + +### 2. 敏感词库管理 + +```java +// 定期更新敏感词库 +public class SensitiveWordManager { + private SensitiveWordFilterTool filterTool; + + public void updateWordList(Set newWords) { + filterTool.addSensitiveWords(newWords); + logger.info("敏感词库已更新,新增 {} 个词", newWords.size()); + } + + // 从配置中心加载 + public void loadFromConfig() { + Set words = configService.getSensitiveWords(); + filterTool = new SensitiveWordFilterTool(words); + } +} +``` + +### 3. 日志安全 + +```python +class SecureLogger: + """安全的日志记录器""" + def __init__(self): + self.masking_tool = DataMaskingTool() + + def log(self, message: str, level: str = "info"): + """记录脱敏后的日志""" + result = self.masking_tool.run(text=message, types=["all"]) + masked_message = result['masked_text'] + + if level == "info": + logging.info(masked_message) + elif level == "warning": + logging.warning(masked_message) + elif level == "error": + logging.error(masked_message) +``` + +### 4. 性能监控 + +```java +// 监控安全检查性能 +public class SecurityMetrics { + private final Meter filterMeter = registry.meter("security.filter"); + private final Timer filterTimer = registry.timer("security.filter.time"); + + public Map filterWithMetrics(String text) { + filterMeter.mark(); + return filterTimer.time(() -> { + return filterTool.run(createArgs(text), context).blockingFirst(); + }); + } +} +``` + +## 合规性说明 + +### 数据保护法规 + +本安全能力帮助您的应用符合以下法规要求: + +- **GDPR(欧盟通用数据保护条例)**:数据脱敏保护用户隐私 +- **CCPA(加州消费者隐私法案)**:PII识别和保护 +- **中国网络安全法**:敏感信息保护 +- **个人信息保护法**:个人信息处理规范 + +### 审计和合规 + +```java +// 记录安全事件用于审计 +List events = securityCallback.getSecurityEvents(); +for (SecurityCallback.SecurityEvent event : events) { + auditLogger.log(event.getType(), event.getStage(), event.getMessage()); +} +``` + +## 示例代码 + +完整示例请参考: + +**Java:** +- [SecurityAgentTest.java](../ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java) + +**Python:** +- [security_example.py](../ali-agentic-adk-python/examples/security_demo/security_example.py) + +## 常见问题 + +### Q1: 如何添加自定义敏感词? + +**Java:** +```java +// 单个添加 +filterTool.addSensitiveWord("自定义词"); + +// 批量添加 +Set words = new HashSet<>(Arrays.asList("词1", "词2", "词3")); +filterTool.addSensitiveWords(words); +``` + +**Python:** +```python +# 单个添加 +filter_tool.add_sensitive_word("自定义词") + +# 批量添加 +filter_tool.add_sensitive_words({"词1", "词2", "词3"}) +``` + +### Q2: 如何只脱敏特定类型的PII? + +```python +# 只脱敏手机号和邮箱 +result = masking_tool.run( + text="...", + types=["phone", "email"] +) +``` + +### Q3: 敏感词过滤会影响性能吗? + +不会显著影响性能。使用DFA算法,即使是千级别的敏感词库,检测性能也在毫秒级别。建议: +- 合理控制词库大小(< 10000词) +- 定期清理无效敏感词 +- 对超大文本可以分块处理 + +### Q4: 脱敏后的数据还能还原吗? + +不能。脱敏是单向操作,原始数据无法从脱敏结果还原。如果需要还原,请: +- 在脱敏前保存原始数据 +- 或使用加密而非脱敏 + +### Q5: 如何处理多语言敏感词? + +工具支持Unicode字符,可以添加任何语言的敏感词: + +```java +Set multiLangWords = new HashSet<>(Arrays.asList( + "English word", + "中文敏感词", + "日本語の単語", + "한국어 단어" +)); +SensitiveWordFilterTool filter = new SensitiveWordFilterTool(multiLangWords); +``` + +## 技术支持 + +如有问题或建议,请: +- 提交Issue: [GitHub Issues](https://github.com/AIDC-AI/Agentic-ADK/issues) +- 查看文档: [项目文档](../README_CN.md) + +## 更新日志 + +### v1.0.0 (2025-10-27) +- ✅ 初始版本发布 +- ✅ 敏感词过滤功能 +- ✅ 数据脱敏功能 +- ✅ 安全回调机制(Java) +- ✅ 完整示例和文档 + From 21ba272d497f40a31bd1944c9562b83e1f906ce5 Mon Sep 17 00:00:00 2001 From: Libres-coder <2597242922@qq.com> Date: Tue, 28 Oct 2025 22:33:52 +0800 Subject: [PATCH 2/4] modify --- .../ali-agentic-adk-core/pom.xml | 48 +++++++++++++++++++ .../core/executor/SecurityCallback.java | 6 ++- .../ali-agentic-example/pom.xml | 8 ++++ .../examples/security_demo/__init__.py | 29 +++++++++++ .../core/tool/security_tools.py | 2 +- 5 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 ali-agentic-adk-python/examples/security_demo/__init__.py diff --git a/ali-agentic-adk-java/ali-agentic-adk-core/pom.xml b/ali-agentic-adk-java/ali-agentic-adk-core/pom.xml index 2e0147b0..647e4da9 100644 --- a/ali-agentic-adk-java/ali-agentic-adk-core/pom.xml +++ b/ali-agentic-adk-java/ali-agentic-adk-core/pom.xml @@ -134,6 +134,54 @@ + + + com.alibaba + ali-langengine-core + 1.2.6-202508111516 + + + + + org.springframework + spring-context + ${springframework.version} + + + org.springframework + spring-beans + ${springframework.version} + + + org.springframework.boot + spring-boot-autoconfigure + ${spring-boot.version} + + + + + org.apache.poi + poi + 5.2.3 + + + org.apache.poi + poi-ooxml + 5.2.3 + + + org.apache.poi + poi-scratchpad + 5.2.3 + + + + + commons-io + commons-io + 2.11.0 + + com.alibaba ali-langengine-jsonrepair diff --git a/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java index dc99b72f..7363865f 100644 --- a/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java +++ b/ali-agentic-adk-java/ali-agentic-adk-core/src/main/java/com/alibaba/agentic/core/executor/SecurityCallback.java @@ -123,10 +123,14 @@ private void checkSecurity(Object payload, String stage, SystemContext systemCon stage, detectedWords.size()); if (blockOnSensitiveWord) { - throw new SecurityException( + SecurityException secEx = new SecurityException( "Sensitive word detected in " + stage + ". Request blocked."); + logger.error("[SecurityCallback] {}", secEx.getMessage()); + throw secEx; } } + } catch (SecurityException e) { + throw e; } catch (Exception e) { logger.error("[SecurityCallback] Error during sensitive word filter: {}", e.getMessage()); } diff --git a/ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/pom.xml b/ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/pom.xml index 31473e4f..fd5ed8e7 100644 --- a/ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/pom.xml +++ b/ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/pom.xml @@ -117,6 +117,14 @@ + + + org.codehaus.groovy + groovy + 3.0.19 + test + + diff --git a/ali-agentic-adk-python/examples/security_demo/__init__.py b/ali-agentic-adk-python/examples/security_demo/__init__.py new file mode 100644 index 00000000..c8968140 --- /dev/null +++ b/ali-agentic-adk-python/examples/security_demo/__init__.py @@ -0,0 +1,29 @@ +# Copyright (C) 2025 AIDC-AI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +Security Demo - 安全能力示例 + +本模块展示如何使用安全工具保护Agent应用: +1. 敏感词/黑词过滤 +2. 数据脱敏(PII保护) +3. 安全的日志记录 +""" + diff --git a/ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py b/ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py index 70b6473f..8dbce593 100644 --- a/ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py +++ b/ali-agentic-adk-python/src/ali_agentic_adk_python/core/tool/security_tools.py @@ -45,7 +45,7 @@ class PIIType(Enum): ID_CARD = ("id_card", r"[1-9]\d{5}(18|19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[0-9Xx]", 6, 4) EMAIL = ("email", r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", 2, -1) BANK_CARD = ("bank_card", r"\d{13,19}", 4, 4) - IP_ADDRESS = ("ip_address", r"\b(?:\d{1,3}\.){3}\d{1,3}\b", -1, -1) + IP_ADDRESS = ("ip_address", r"(? Date: Sat, 8 Nov 2025 10:07:00 +0800 Subject: [PATCH 3/4] modify --- ali-agentic-adk-python/examples/security_demo/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ali-agentic-adk-python/examples/security_demo/__init__.py b/ali-agentic-adk-python/examples/security_demo/__init__.py index c8968140..562e7594 100644 --- a/ali-agentic-adk-python/examples/security_demo/__init__.py +++ b/ali-agentic-adk-python/examples/security_demo/__init__.py @@ -27,3 +27,4 @@ 3. 安全的日志记录 """ + From 7fae4434f09e4dac5805e19c06392b0d54986ab8 Mon Sep 17 00:00:00 2001 From: Libres-coder <2597242922@qq.com> Date: Sat, 8 Nov 2025 10:10:11 +0800 Subject: [PATCH 4/4] delete --- docs/Security-Guide.md | 286 --------------------- docs/Security-Guide_CN.md | 524 -------------------------------------- 2 files changed, 810 deletions(-) delete mode 100644 docs/Security-Guide.md delete mode 100644 docs/Security-Guide_CN.md diff --git a/docs/Security-Guide.md b/docs/Security-Guide.md deleted file mode 100644 index 8a1e88b2..00000000 --- a/docs/Security-Guide.md +++ /dev/null @@ -1,286 +0,0 @@ -# Agentic ADK Security Guide - -## Overview - -Agentic ADK provides a comprehensive set of security capabilities to help developers build secure and compliant AI Agent applications. This guide introduces how to use these security features to protect your application from security threats and ensure user privacy data is properly protected. - -## Security Capabilities - -### 1. Sensitive Word / Blacklist Filtering - -Automatically detect and filter sensitive words in text, including but not limited to: -- Political sensitive words -- Pornographic content -- Gambling related -- Prohibited items information -- Fraud related -- Other inappropriate content - -**Features:** -- Support custom sensitive word library -- High-performance matching using DFA algorithm -- Multiple replacement strategies (asterisk, delete, custom) -- Detect-only mode (without modifying original text) - -### 2. Data Masking (PII Protection) - -Automatically identify and mask Personally Identifiable Information (PII) to protect user privacy: -- Chinese mainland phone numbers (11 digits) -- ID card numbers (18 digits) -- Email addresses -- Bank card numbers (13-19 digits) -- IP addresses -- Password fields - -**Features:** -- Automatically identify multiple PII types -- Retain prefix and suffix for tracking -- Selectively enable/disable specific types -- Maintain data format integrity - -### 3. Security Callback (Java) - -Automatically perform security checks before and after Agent execution: -- Request phase security check -- Response phase security check -- Security event logging -- Configurable blocking strategy - -## Quick Start - -### Java Version - -#### 1. Sensitive Word Filtering - -```java -import com.alibaba.agentic.core.tools.security.SensitiveWordFilterTool; -import com.alibaba.agentic.core.executor.SystemContext; - -// Create filter tool -SensitiveWordFilterTool filterTool = new SensitiveWordFilterTool(); -SystemContext context = new SystemContext(); - -// Detect sensitive words -Map args = new HashMap<>(); -args.put("text", "This text contains gambling and fraud"); -args.put("strategy", "DETECT_ONLY"); - -Map result = filterTool.run(args, context).blockingFirst(); -System.out.println("Has sensitive words: " + result.get("has_sensitive_words")); -System.out.println("Word count: " + result.get("detected_words_count")); - -// Filter sensitive words (asterisk replacement) -args.put("strategy", "ASTERISK"); -result = filterTool.run(args, context).blockingFirst(); -System.out.println("Filtered: " + result.get("filtered_text")); -``` - -#### 2. Data Masking - -```java -import com.alibaba.agentic.core.tools.security.DataMaskingTool; - -// Create masking tool -DataMaskingTool maskingTool = new DataMaskingTool(); - -// Mask phone number -Map args = new HashMap<>(); -args.put("text", "My phone is 13812345678"); -args.put("types", Arrays.asList("phone")); - -Map result = maskingTool.run(args, context).blockingFirst(); -System.out.println("Masked: " + result.get("masked_text")); -// Output: My phone is 138****5678 -``` - -#### 3. Security Callback - -```java -import com.alibaba.agentic.core.executor.SecurityCallback; - -// Create security callback -SecurityCallback securityCallback = new SecurityCallback() - .enableSensitiveWordFilter(true) - .enableDataMasking(true) - .setBlockOnSensitiveWord(false) - .setMaskLogs(true); - -// Add custom sensitive word -securityCallback.addSensitiveWord("custom_word"); - -// Use in Agent execution chain -// securityCallback.execute(systemContext, request, result, chain); -``` - -### Python Version - -#### 1. Sensitive Word Filtering - -```python -from ali_agentic_adk_python.core.tool.security_tools import SensitiveWordFilterTool - -# Create filter tool -filter_tool = SensitiveWordFilterTool() - -# Detect sensitive words -result = filter_tool.run( - text="This text contains gambling and fraud", - strategy="detect_only" -) -print(f"Has sensitive words: {result['has_sensitive_words']}") -print(f"Word count: {result['detected_words_count']}") - -# Filter sensitive words (asterisk replacement) -result = filter_tool.run( - text="This text contains gambling and fraud", - strategy="asterisk" -) -print(f"Filtered: {result['filtered_text']}") -``` - -#### 2. Data Masking - -```python -from ali_agentic_adk_python.core.tool.security_tools import DataMaskingTool - -# Create masking tool -masking_tool = DataMaskingTool() - -# Mask phone number -result = masking_tool.run( - text="My phone is 13812345678", - types=["phone"] -) -print(f"Masked: {result['masked_text']}") -# Output: My phone is 138****5678 - -# Mask all PII types -result = masking_tool.run( - text="Contact: phone 13812345678, email user@example.com", - types=["all"] -) -print(f"Masked: {result['masked_text']}") -``` - -## Use Cases - -### Case 1: Customer Service Agent - -Protect user privacy data in customer service systems: - -```python -# Customer query -customer_query = "My order is ORDER-123, phone 13812345678" - -# 1. Check sensitive words -filter_result = filter_tool.run(text=customer_query, strategy="detect_only") -if filter_result['has_sensitive_words']: - print("⚠️ Warning: Sensitive words detected") - -# 2. Log with masking -mask_result = masking_tool.run(text=customer_query, types=["all"]) -logger.info(f"Customer query: {mask_result['masked_text']}") -# Log: Customer query: My order is ORDER-123, phone 138****5678 -``` - -### Case 2: Content Moderation - -Review user-generated content (UGC): - -```python -# User comment -user_comment = "This product is great!" - -# Detect sensitive words -result = filter_tool.run(text=user_comment, strategy="detect_only") - -if result['has_sensitive_words']: - print("❌ Comment rejected: Contains sensitive words") - logger.warning(f"Violation: {result['detected_words']}") -else: - print("✅ Comment approved") -``` - -### Case 3: Log Masking - -Ensure logs don't contain sensitive information: - -```python -import logging - -# Configure masking logger -masking_tool = DataMaskingTool() - -def log_with_masking(message: str): - """Log with masking""" - result = masking_tool.run(text=message, types=["all"]) - logging.info(result['masked_text']) - -# Usage -log_with_masking("User login: phone=13812345678, email=user@example.com") -# Actual log: User login: phone=138****5678, email=us***@example.com -``` - -## Best Practices - -### 1. Layered Protection - -```java -// Layer 1: Input check -Map inputCheck = filterTool.run(userInput, context).blockingFirst(); -if ((Boolean) inputCheck.get("has_sensitive_words")) { - throw new SecurityException("Input contains sensitive words"); -} - -// Layer 2: Business processing (with security callback) -// ... Agent execution ... - -// Layer 3: Output masking -Map outputMask = maskingTool.run(response, context).blockingFirst(); -String safeResponse = (String) outputMask.get("masked_text"); -``` - -### 2. Secure Logging - -```python -class SecureLogger: - """Secure logger""" - def __init__(self): - self.masking_tool = DataMaskingTool() - - def log(self, message: str, level: str = "info"): - """Log with masking""" - result = self.masking_tool.run(text=message, types=["all"]) - masked_message = result['masked_text'] - - if level == "info": - logging.info(masked_message) - elif level == "warning": - logging.warning(masked_message) -``` - -## Example Code - -For complete examples, please refer to: - -**Java:** -- [SecurityAgentTest.java](../ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java) - -**Python:** -- [security_example.py](../ali-agentic-adk-python/examples/security_demo/security_example.py) - -## Technical Support - -For questions or suggestions: -- Submit Issue: [GitHub Issues](https://github.com/AIDC-AI/Agentic-ADK/issues) -- Documentation: [Project Documentation](../README.md) - -## Changelog - -### v1.0.0 (2025-10-27) -- ✅ Initial release -- ✅ Sensitive word filtering -- ✅ Data masking -- ✅ Security callback mechanism (Java) -- ✅ Complete examples and documentation - diff --git a/docs/Security-Guide_CN.md b/docs/Security-Guide_CN.md deleted file mode 100644 index 5c6795d9..00000000 --- a/docs/Security-Guide_CN.md +++ /dev/null @@ -1,524 +0,0 @@ -# Agentic ADK 安全能力指南 - -## 概述 - -Agentic ADK 提供了一套完整的安全能力,帮助开发者构建安全、合规的AI Agent应用。本指南介绍如何使用这些安全功能来保护您的应用免受安全威胁,并确保用户隐私数据得到妥善保护。 - -## 安全能力 - -### 1. 敏感词/黑词过滤 - -自动检测和过滤文本中的敏感词,包括但不限于: -- 政治敏感词 -- 色情内容 -- 赌博相关 -- 违禁品信息 -- 诈骗相关 -- 其他不当内容 - -**特性:** -- 支持自定义敏感词库 -- 使用DFA算法实现高性能匹配 -- 多种替换策略(星号、删除、自定义) -- 仅检测模式(不修改原文) - -### 2. 数据脱敏(PII保护) - -自动识别和脱敏个人可识别信息(PII),保护用户隐私: -- 中国大陆手机号(11位) -- 身份证号(18位) -- 邮箱地址 -- 银行卡号(13-19位) -- IP地址 -- 密码字段 - -**特性:** -- 自动识别多种PII类型 -- 保留前缀和后缀便于追踪 -- 可选择性启用/禁用特定类型 -- 保持数据格式完整性 - -### 3. 安全回调(Java - 可选的高级特性) - -**注意**: 这是一个可选的高级特性。如果你不熟悉Callback机制,建议直接使用上述工具类进行手动安全检查。 - -在Agent执行前后自动进行安全检查: -- 请求阶段安全检查 -- 响应阶段安全检查 -- 安全事件记录 -- 可配置的阻断策略 - -## 快速开始 - -### Java 版本 - -#### 1. 敏感词过滤 - -```java -import com.alibaba.agentic.core.tools.security.SensitiveWordFilterTool; -import com.alibaba.agentic.core.executor.SystemContext; - -// 创建过滤工具 -SensitiveWordFilterTool filterTool = new SensitiveWordFilterTool(); -SystemContext context = new SystemContext(); - -// 检测敏感词 -Map args = new HashMap<>(); -args.put("text", "这是一段包含赌博和诈骗的文本"); -args.put("strategy", "DETECT_ONLY"); - -Map result = filterTool.run(args, context).blockingFirst(); -System.out.println("检测到敏感词: " + result.get("has_sensitive_words")); -System.out.println("敏感词数量: " + result.get("detected_words_count")); - -// 过滤敏感词(星号替换) -args.put("strategy", "ASTERISK"); -result = filterTool.run(args, context).blockingFirst(); -System.out.println("过滤后: " + result.get("filtered_text")); -``` - -#### 2. 数据脱敏 - -```java -import com.alibaba.agentic.core.tools.security.DataMaskingTool; - -// 创建脱敏工具 -DataMaskingTool maskingTool = new DataMaskingTool(); - -// 脱敏手机号 -Map args = new HashMap<>(); -args.put("text", "我的手机号是13812345678"); -args.put("types", Arrays.asList("phone")); - -Map result = maskingTool.run(args, context).blockingFirst(); -System.out.println("脱敏后: " + result.get("masked_text")); -// 输出: 我的手机号是138****5678 -``` - -#### 3. 安全回调 - -```java -import com.alibaba.agentic.core.executor.SecurityCallback; - -// 创建安全回调 -SecurityCallback securityCallback = new SecurityCallback() - .enableSensitiveWordFilter(true) - .enableDataMasking(true) - .setBlockOnSensitiveWord(false) - .setMaskLogs(true); - -// 添加自定义敏感词 -securityCallback.addSensitiveWord("自定义敏感词"); - -// 在Agent执行链中使用 -// securityCallback.execute(systemContext, request, result, chain); -``` - -### Python 版本 - -#### 1. 敏感词过滤 - -```python -from ali_agentic_adk_python.core.tool.security_tools import SensitiveWordFilterTool - -# 创建过滤工具 -filter_tool = SensitiveWordFilterTool() - -# 检测敏感词 -result = filter_tool.run( - text="这是一段包含赌博和诈骗的文本", - strategy="detect_only" -) -print(f"检测到敏感词: {result['has_sensitive_words']}") -print(f"敏感词数量: {result['detected_words_count']}") - -# 过滤敏感词(星号替换) -result = filter_tool.run( - text="这是一段包含赌博和诈骗的文本", - strategy="asterisk" -) -print(f"过滤后: {result['filtered_text']}") -``` - -#### 2. 数据脱敏 - -```python -from ali_agentic_adk_python.core.tool.security_tools import DataMaskingTool - -# 创建脱敏工具 -masking_tool = DataMaskingTool() - -# 脱敏手机号 -result = masking_tool.run( - text="我的手机号是13812345678", - types=["phone"] -) -print(f"脱敏后: {result['masked_text']}") -# 输出: 我的手机号是138****5678 - -# 脱敏所有PII类型 -result = masking_tool.run( - text="联系方式:手机13812345678,邮箱user@example.com", - types=["all"] -) -print(f"脱敏后: {result['masked_text']}") -``` - -#### 3. 自定义敏感词库 - -```python -# 使用自定义敏感词库 -custom_words = {"自定义敏感词", "特殊词汇", "品牌名称"} -filter_tool = SensitiveWordFilterTool(custom_words) - -result = filter_tool.run( - text="这包含自定义敏感词", - strategy="asterisk" -) -print(f"过滤后: {result['filtered_text']}") -``` - -## 使用场景 - -### 场景1: 客服Agent - -保护客服系统中的用户隐私数据: - -```python -# 客户查询 -customer_query = "我的订单号是ORDER-123,手机号13812345678" - -# 1. 检查敏感词 -filter_result = filter_tool.run(text=customer_query, strategy="detect_only") -if filter_result['has_sensitive_words']: - print("⚠️ 警告:检测到敏感词") - -# 2. 脱敏后记录日志 -mask_result = masking_tool.run(text=customer_query, types=["all"]) -logger.info(f"客户查询: {mask_result['masked_text']}") -# 日志: 客户查询: 我的订单号是ORDER-123,手机号138****5678 -``` - -### 场景2: 内容审核 - -审核用户生成内容(UGC): - -```python -# 用户评论 -user_comment = "这个产品真不错!" - -# 检测敏感词 -result = filter_tool.run(text=user_comment, strategy="detect_only") - -if result['has_sensitive_words']: - print("❌ 评论被拒绝:包含敏感词") - # 记录违规内容 - logger.warning(f"违规评论: {result['detected_words']}") -else: - print("✅ 评论通过审核") - # 发布评论 -``` - -### 场景3: 日志脱敏 - -确保日志系统中不包含敏感信息: - -```python -import logging - -# 配置脱敏日志处理器 -masking_tool = DataMaskingTool() - -def log_with_masking(message: str): - """脱敏后记录日志""" - result = masking_tool.run(text=message, types=["all"]) - logging.info(result['masked_text']) - -# 使用 -log_with_masking("用户登录: phone=13812345678, email=user@example.com") -# 实际记录: 用户登录: phone=138****5678, email=us***@example.com -``` - -## 配置选项 - -### 敏感词过滤配置 - -**替换策略:** -- `ASTERISK` / `asterisk`: 替换为星号(默认) -- `DELETE` / `delete`: 删除敏感词 -- `CUSTOM` / `custom`: 替换为自定义文本 -- `DETECT_ONLY` / `detect_only`: 仅检测不替换 - -**Java示例:** -```java -args.put("strategy", "CUSTOM"); -args.put("custom_replace", "[已屏蔽]"); -``` - -**Python示例:** -```python -result = filter_tool.run( - text="...", - strategy="custom", - custom_replace="[已屏蔽]" -) -``` - -### 数据脱敏配置 - -**PII类型:** -- `phone`: 手机号 -- `id_card`: 身份证号 -- `email`: 邮箱地址 -- `bank_card`: 银行卡号 -- `ip_address`: IP地址 -- `password`: 密码字段 -- `all`: 所有类型(默认) - -**脱敏字符:** -- 默认使用 `*` 作为脱敏字符 -- 可自定义脱敏字符 - -**Java示例:** -```java -args.put("types", Arrays.asList("phone", "email")); -args.put("mask_char", "#"); -``` - -**Python示例:** -```python -result = masking_tool.run( - text="...", - types=["phone", "email"], - mask_char="#" -) -``` - -### 安全回调配置(Java) - -```java -SecurityCallback securityCallback = new SecurityCallback() - // 启用敏感词过滤 - .enableSensitiveWordFilter(true) - // 启用数据脱敏 - .enableDataMasking(true) - // 检测到敏感词时是否阻断请求 - .setBlockOnSensitiveWord(false) - // 是否脱敏日志内容 - .setMaskLogs(true); - -// 添加自定义敏感词 -securityCallback.addSensitiveWord("特殊敏感词"); -securityCallback.addSensitiveWords(customWordSet); - -// 获取安全事件记录 -List events = securityCallback.getSecurityEvents(); -``` - -## 性能优化 - -### 敏感词过滤性能 - -- 使用 **DFA(Deterministic Finite Automaton)算法** -- 时间复杂度:O(n),n为文本长度 -- 空间复杂度:O(m),m为敏感词总字符数 -- 适合处理大量文本和大型词库 - -**性能基准(参考):** -- 检测10KB文本:< 5ms -- 1000个敏感词词库加载:< 50ms -- 适用于实时处理场景 - -### 数据脱敏性能 - -- 使用正则表达式匹配 -- 时间复杂度:O(n),n为文本长度 -- 支持并发处理 - -**性能基准(参考):** -- 脱敏10KB文本(所有PII类型):< 10ms -- 适用于高并发场景 - -## 最佳实践 - -### 1. 分层防护 - -```java -// 第1层:输入检查 -Map inputCheck = filterTool.run(userInput, context).blockingFirst(); -if ((Boolean) inputCheck.get("has_sensitive_words")) { - throw new SecurityException("输入包含敏感词"); -} - -// 第2层:业务处理(使用安全回调) -// ... Agent执行 ... - -// 第3层:输出脱敏 -Map outputMask = maskingTool.run(response, context).blockingFirst(); -String safeResponse = (String) outputMask.get("masked_text"); -``` - -### 2. 敏感词库管理 - -```java -// 定期更新敏感词库 -public class SensitiveWordManager { - private SensitiveWordFilterTool filterTool; - - public void updateWordList(Set newWords) { - filterTool.addSensitiveWords(newWords); - logger.info("敏感词库已更新,新增 {} 个词", newWords.size()); - } - - // 从配置中心加载 - public void loadFromConfig() { - Set words = configService.getSensitiveWords(); - filterTool = new SensitiveWordFilterTool(words); - } -} -``` - -### 3. 日志安全 - -```python -class SecureLogger: - """安全的日志记录器""" - def __init__(self): - self.masking_tool = DataMaskingTool() - - def log(self, message: str, level: str = "info"): - """记录脱敏后的日志""" - result = self.masking_tool.run(text=message, types=["all"]) - masked_message = result['masked_text'] - - if level == "info": - logging.info(masked_message) - elif level == "warning": - logging.warning(masked_message) - elif level == "error": - logging.error(masked_message) -``` - -### 4. 性能监控 - -```java -// 监控安全检查性能 -public class SecurityMetrics { - private final Meter filterMeter = registry.meter("security.filter"); - private final Timer filterTimer = registry.timer("security.filter.time"); - - public Map filterWithMetrics(String text) { - filterMeter.mark(); - return filterTimer.time(() -> { - return filterTool.run(createArgs(text), context).blockingFirst(); - }); - } -} -``` - -## 合规性说明 - -### 数据保护法规 - -本安全能力帮助您的应用符合以下法规要求: - -- **GDPR(欧盟通用数据保护条例)**:数据脱敏保护用户隐私 -- **CCPA(加州消费者隐私法案)**:PII识别和保护 -- **中国网络安全法**:敏感信息保护 -- **个人信息保护法**:个人信息处理规范 - -### 审计和合规 - -```java -// 记录安全事件用于审计 -List events = securityCallback.getSecurityEvents(); -for (SecurityCallback.SecurityEvent event : events) { - auditLogger.log(event.getType(), event.getStage(), event.getMessage()); -} -``` - -## 示例代码 - -完整示例请参考: - -**Java:** -- [SecurityAgentTest.java](../ali-agentic-adk-java/ali-agentic-adk-extension/ali-agentic-example/src/test/java/com/alibaba/agentic/example/SecurityAgentTest.java) - -**Python:** -- [security_example.py](../ali-agentic-adk-python/examples/security_demo/security_example.py) - -## 常见问题 - -### Q1: 如何添加自定义敏感词? - -**Java:** -```java -// 单个添加 -filterTool.addSensitiveWord("自定义词"); - -// 批量添加 -Set words = new HashSet<>(Arrays.asList("词1", "词2", "词3")); -filterTool.addSensitiveWords(words); -``` - -**Python:** -```python -# 单个添加 -filter_tool.add_sensitive_word("自定义词") - -# 批量添加 -filter_tool.add_sensitive_words({"词1", "词2", "词3"}) -``` - -### Q2: 如何只脱敏特定类型的PII? - -```python -# 只脱敏手机号和邮箱 -result = masking_tool.run( - text="...", - types=["phone", "email"] -) -``` - -### Q3: 敏感词过滤会影响性能吗? - -不会显著影响性能。使用DFA算法,即使是千级别的敏感词库,检测性能也在毫秒级别。建议: -- 合理控制词库大小(< 10000词) -- 定期清理无效敏感词 -- 对超大文本可以分块处理 - -### Q4: 脱敏后的数据还能还原吗? - -不能。脱敏是单向操作,原始数据无法从脱敏结果还原。如果需要还原,请: -- 在脱敏前保存原始数据 -- 或使用加密而非脱敏 - -### Q5: 如何处理多语言敏感词? - -工具支持Unicode字符,可以添加任何语言的敏感词: - -```java -Set multiLangWords = new HashSet<>(Arrays.asList( - "English word", - "中文敏感词", - "日本語の単語", - "한국어 단어" -)); -SensitiveWordFilterTool filter = new SensitiveWordFilterTool(multiLangWords); -``` - -## 技术支持 - -如有问题或建议,请: -- 提交Issue: [GitHub Issues](https://github.com/AIDC-AI/Agentic-ADK/issues) -- 查看文档: [项目文档](../README_CN.md) - -## 更新日志 - -### v1.0.0 (2025-10-27) -- ✅ 初始版本发布 -- ✅ 敏感词过滤功能 -- ✅ 数据脱敏功能 -- ✅ 安全回调机制(Java) -- ✅ 完整示例和文档 -