|
6 | 6 | CodeTabs,
|
7 | 7 | PythonBlock,
|
8 | 8 | TypeScriptBlock,
|
| 9 | + python, |
| 10 | + typescript, |
9 | 11 | } from "@site/src/components/InstructionsWithCode";
|
10 | 12 |
|
11 | 13 | # Prevent logging of sensitive data in traces
|
@@ -84,18 +86,79 @@ await openaiClient.chat.completions.create({
|
84 | 86 | groupId="client-language"
|
85 | 87 | />
|
86 | 88 |
|
87 |
| -## Example masking UUIDs and emails in inputs and outputs |
| 89 | +## Rule-based masking of inputs and outputs |
88 | 90 |
|
89 |
| -You can also customize the `hide_inputs` and `hide_outputs` functions to mask specific data in the inputs and outputs. For example, you can mask UUIDs and emails in the inputs and outputs. |
| 91 | +:::info |
| 92 | +This feature is available in the following LangSmith SDK versions: |
| 93 | + |
| 94 | +- Python: 0.1.81 and above |
| 95 | +- TypeScript: 0.1.33 and above |
| 96 | + |
| 97 | +::: |
| 98 | + |
| 99 | +To mask specific data in inputs and outputs, you can use the `create_anonymizer` / `createAnonymizer` function and pass the newly created anonymizer when instantiating the client. The anonymizer can be either constructed from a list of regex patterns and the replacement values or from a function that accepts and returns a string value. |
| 100 | + |
| 101 | +The anonymizer will be skipped for inputs if `LANGCHAIN_HIDE_INPUTS = true`. Same applies for outputs if `LANGCHAIN_HIDE_OUTPUTS = true`. |
| 102 | + |
| 103 | +However, if inputs or outputs are to be sent to client, the `anonymizer` method will take precedence over functions found in `hide_inputs` and `hide_outputs`. By default, the `create_anonymizer` will only look at maximum of 10 nesting levels deep, which can be configured via the `max_depth` parameter. |
| 104 | + |
| 105 | +<CodeTabs |
| 106 | + tabs={[ |
| 107 | + python` |
| 108 | + from langsmith.anonymizer import create_anonymizer |
| 109 | + from langsmith import Client, traceable |
| 110 | + |
| 111 | + # create anonymizer from list of regex patterns and replacement values |
| 112 | + anonymizer = create_anonymizer([ |
| 113 | + { "pattern": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", "replace": "<email>" }, |
| 114 | + { "pattern": r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}", "replace": "<uuid>" } |
| 115 | + ]) |
| 116 | + |
| 117 | + # or create anonymizer from a function |
| 118 | + anonymizer = create_anonymizer(lambda text: r"...".sub("[value]", text)) |
| 119 | + |
| 120 | + client = Client(anonymizer=anonymizer) |
| 121 | + |
| 122 | + @traceable(client=client) |
| 123 | + def main(inputs: dict) -> dict: |
| 124 | + ... |
| 125 | + `, |
| 126 | + typescript` |
| 127 | + import { createAnonymizer } from "langsmith/anonymizer" |
| 128 | + import { traceable } from "langsmith/traceable" |
| 129 | + import { Client } from "langsmith" |
| 130 | + |
| 131 | + // create anonymizer from list of regex patterns and replacement values |
| 132 | + const anonymizer = createAnonymizer([ |
| 133 | + { pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g, replace: "<email>" }, |
| 134 | + { pattern: /[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/g, replace: "<uuid>" } |
| 135 | + ]) |
| 136 | + |
| 137 | + // or create anonymizer from a function |
| 138 | + const anonymizer = createAnonymizer((value) => value.replace("...", "<value>")) |
| 139 | + |
| 140 | + const client = new Client({ anonymizer }) |
| 141 | + |
| 142 | + const main = traceable(async (inputs: any) => { |
| 143 | + // ... |
| 144 | + }, { client }) |
| 145 | + `, |
| 146 | + ]} |
| 147 | + groupId="client-language" |
| 148 | +/> |
| 149 | + |
| 150 | +Please note, that using the anonymizer might incur a performance hit with complex regular expressions or large payloads, as the anonymizer serializes the payload to JSON before processing. |
90 | 151 |
|
91 | 152 | :::note
|
92 | 153 |
|
93 |
| -We are in the process of adding more built-in filters for common data types. If you have a specific use case that you would like us to support, please contact [email protected]. |
| 154 | +Improving the performance of `anonymizer` API is on our roadmap! If you are encountering performance issues, please contact us at [email protected]. |
94 | 155 |
|
95 | 156 | :::
|
96 | 157 |
|
97 | 158 | 
|
98 | 159 |
|
| 160 | +Older versions of LangSmith SDKs can use the `hide_inputs` and `hide_outputs` parameters to achieve the same effect. You can also use these parameters to process the inputs and outputs more efficiently as well. |
| 161 | + |
99 | 162 | <CodeTabs
|
100 | 163 | tabs={[
|
101 | 164 | PythonBlock(`import re
|
|
0 commit comments