langchain-ai · agola11 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/...oned_docs/version-2.0/how_to_guides/datasets/manage_datasets_in_application.mdx b/...oned_docs/version-2.0/how_to_guides/datasets/manage_datasets_in_application.mdx
@@ -35,7 +35,7 @@ You can do this from any 'run' details page by clicking the 'Add to Dataset' but
 
 :::tip
 An extremely powerful technique to build datasets is to drill-down into the most interesting traces, such as traces that were tagged with poor user feedback, and add them to a dataset.
-For tips on how to filter traces, see the [filtering traces] guide.
+For tips on how to filter traces, see the [filtering traces](../monitoring/filter_traces_in_application) guide.
 :::
 
 :::tip automations

diff --git a/...ned_docs/version-2.0/how_to_guides/evaluation/fetch_perf_metrics_experiment.mdx b/...ned_docs/version-2.0/how_to_guides/evaluation/fetch_perf_metrics_experiment.mdx
@@ -0,0 +1,162 @@
+---
+sidebar_position: 10
+---
+
+import {
+  CodeTabs,
+  PythonBlock,
+  TypeScriptBlock,
+} from "@site/src/components/InstructionsWithCode";
+
+# Fetch performance metrics for an experiment
+
+:::tip Experiments, Projects, and Sessions
+
+Tracing projects and experiments use the same underlying data structure in our backend, which is called a "session."
+
+You might see these terms interchangeably in our documentation, but they all refer to the same underlying data structure.
+
+We are working on unifying the terminology across our documentation and APIs.
+:::
+
+When you run an experiment using `evaluate` with the Python or TypeScript SDK, you can fetch the performance metrics for the experiment using the `read_project`/`readProject` methods.
+
+The payload for experiment details includes the following values:
+
+```json
+{
+  "start_time": "2024-06-06T01:02:51.299960",
+  "end_time": "2024-06-06T01:03:04.557530+00:00",
+  "extra": {
+    "metadata": {
+      "git": {
+        "tags": null,
+        "dirty": true,
+        "branch": "ankush/agent-eval",
+        "commit": "...",
+        "repo_name": "...",
+        "remote_url": "...",
+        "author_name": "Ankush Gola",
+        "commit_time": "...",
+        "author_email": "..."
+      },
+      "revision_id": null,
+      "dataset_splits": ["base"],
+      "dataset_version": "2024-06-05T04:57:01.535578+00:00",
+      "num_repetitions": 3
+    }
+  },
+  "name": "SQL Database Agent-ae9ad229",
+  "description": null,
+  "default_dataset_id": null,
+  "reference_dataset_id": "...",
+  "id": "...",
+  "run_count": 9,
+  "latency_p50": 7.896,
+  "latency_p99": 13.09332,
+  "first_token_p50": null,
+  "first_token_p99": null,
+  "total_tokens": 35573,
+  "prompt_tokens": 32711,
+  "completion_tokens": 2862,
+  "total_cost": 0.206485,
+  "prompt_cost": 0.163555,
+  "completion_cost": 0.04293,
+  "tenant_id": "...",
+  "last_run_start_time": "2024-06-06T01:02:51.366397",
+  "last_run_start_time_live": null,
+  "feedback_stats": {
+    "cot contextual accuracy": {
+      "n": 9,
+      "avg": 0.6666666666666666,
+      "values": {
+        "CORRECT": 6,
+        "INCORRECT": 3
+      }
+    }
+  },
+  "session_feedback_stats": {},
+  "run_facets": [],
+  "error_rate": 0,
+  "streaming_rate": 0,
+  "test_run_number": 11
+}
+```
+
+From here, you can extract performance metrics such as:
+
+- `latency_p50`: The 50th percentile latency in seconds.
+- `latency_p99`: The 99th percentile latency in seconds.
+- `total_tokens`: The total number of tokens used.
+- `prompt_tokens`: The number of prompt tokens used.
+- `completion_tokens`: The number of completion tokens used.
+- `total_cost`: The total cost of the experiment.
+- `prompt_cost`: The cost of the prompt tokens.
+- `completion_cost`: The cost of the completion tokens.
+- `feedback_stats`: The feedback statistics for the experiment.
+- `error_rate`: The error rate for the experiment.
+- `first_token_p50`: The 50th percentile latency for the time to generate the first token (if using streaming).
+- `first_token_p99`: The 99th percentile latency for the time to generate the first token (if using streaming).
+
+Here is an example of how you can fetch the performance metrics for an experiment using the Python and TypeScript SDKs.
+
+First, as a prerequisite, we will create a trivial dataset. Here, we only demonstrate this in Python, but you can do the same in TypeScript.
+Please view the [how-to guide](./evaluate_llm_application) on evaluation for more details.
+
+```python
+from langsmith import Client
+
+client = Client()
+
+# Create a dataset
+examples = [
+    ("Harrison", "Hello Harrison"),
+    ("Ankush", "Hello Ankush"),
+]
+
+dataset_name = "HelloDataset"
+dataset = client.create_dataset(dataset_name=dataset_name)
+inputs, outputs = zip(
+    *[({"input": text}, {"expected": result}) for text, result in examples]
+)
+client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
+```
+
+Next, we will create an experiment, retrieve the experiment name from the result of `evaluate`, then fetch the performance metrics for the experiment.
+
+<CodeTabs
+  groupId="client-language"
+  tabs={[
+    PythonBlock(`from langsmith.schemas import Example, Run\n
+dataset_name = "HelloDataset"\n
+def foo_label(root_run: Run, example: Example) -> dict:
+    return {"score": 1, "key": "foo"}\n
+from langsmith.evaluation import evaluate\n
+results = evaluate(
+    lambda inputs: "Hello " + inputs["input"],
+    data=dataset_name,
+    evaluators=[foo_label],
+    experiment_prefix="Hello",
+)\n
+resp = client.read_project(project_name=results.experiment_name, include_stats=True)\n
+print(resp.json(indent=2))`),
+    TypeScriptBlock(`import { Client } from "langsmith";
+import { evaluate } from "langsmith/evaluation";
+import type { EvaluationResult } from "langsmith/evaluation";
+import type { Run, Example } from "langsmith/schemas";\n
+// Row-level evaluator
+function fooLabel(rootRun: Run, example: Example): EvaluationResult {
+  return {score: 1, key: "foo"};
+}\n
+const client = new Client();\n
+const results = await evaluate((inputs) => {
+  return { output: "Hello " + inputs.input };
+}, {
+  data: "HelloDataset",
+  experimentPrefix: "Hello",
+  evaluators: [fooLabel],
+});\n
+const resp = await client.readProject({ projectName: results.experimentName, includeStats: true })
+console.log(JSON.stringify(resp, null, 2))`),
+  ]}
+/>
diff --git a/versioned_docs/version-2.0/how_to_guides/index.md b/versioned_docs/version-2.0/how_to_guides/index.md
@@ -71,6 +71,7 @@ Get started with LangSmith's tracing features to start adding observability to y
   - [Trace withouth setting environment variables](./how_to_guides/tracing/trace_with_langchain#trace-without-setting-environment-variables)
 - [Trace with `Instructor` (Python only)](./how_to_guides/tracing/trace_with_instructor)
 - [Trace without setting environment variables](./how_to_guides/tracing/trace_without_env_vars)
+- [Trace using the LangSmith REST API](./how_to_guides/tracing/trace_with_api)
 
 ## Datasets
 
@@ -138,6 +139,7 @@ Evaluate your LLM applications to measure their performance over time.
   - [In the comparison view](./how_to_guides/evaluation/audit_evaluator_scores#in-the-comparison-view)
   - [In the runs table](./how_to_guides/evaluation/audit_evaluator_scores#in-the-runs-table)
   - [In the SDK](./how_to_guides/evaluation/audit_evaluator_scores#in-the-sdk)
+- [Fetch performance metrics for an experiment](./how_to_guides/evaluation/fetch_perf_metrics_experiment)
 
 ## Human feedback
 

diff --git a/versioned_docs/version-2.0/how_to_guides/tracing/trace_with_api.mdx b/versioned_docs/version-2.0/how_to_guides/tracing/trace_with_api.mdx
@@ -0,0 +1,83 @@
+---
+sidebar_position: 17
+---
+
+# Trace using the LangSmith REST API
+
+It is HIGHLY recommended to use our Python or TypeScript SDKs to send traces to LangSmith.
+We have designed these SDKs with several optimizations, including batching and backgrounding, to ensure that your application's performance is not impacted by sending traces to LangSmith.
+However, if you are unable to use our SDKs, you can use the LangSmith REST API to send traces. Performance may be impacted if you send traces synchronously in your application.
+This guide will show you how to trace a request using the LangSmith REST API. Please view our API documentation [here](https://api.smith.langchain.com/redoc) for a full list of endpoints and request/response schemas.
+
+:::note
+When using the LangSmith REST API, you will need to provide your API key in the request headers as `"x-api-key"`.
+
+Additionally, you should IGNORE and not set the `dotted_order` and `trace_id` fields in the request body. These fields will be automatically generated by the system.
+:::
+
+The following example shows how you might leverage our API directly in Python. The same principles apply to other languages.
+
+```python
+import openai
+import os
+import requests
+from datetime import datetime
+from uuid import uuid4
+
+def post_run(run_id, name, run_type, inputs, parent_id=None):
+    """Function to post a new run to the API."""
+    data = {
+        "id": run_id.hex,
+        "name": name,
+        "run_type": run_type,
+        "inputs": inputs,
+        "start_time": datetime.utcnow().isoformat(),
+    }
+    if parent_id:
+        data["parent_run_id"] = parent_id.hex
+    requests.post(
+        "https://api.smith.langchain.com/runs",
+        json=data,
+        headers=headers
+    )
+
+def patch_run(run_id, outputs):
+    """Function to patch a run with outputs."""
+    requests.patch(
+        f"https://api.smith.langchain.com/runs/{run_id}",
+        json={
+            "outputs": outputs,
+            "end_time": datetime.utcnow().isoformat(),
+        },
+        headers=headers,
+    )
+
+# Send your API Key in the request headers
+headers = {"x-api-key": os.environ["LANGCHAIN_API_KEY"]}
+
+# This can be a user input to your app
+question = "Can you summarize this morning's meetings?"
+
+# This can be retrieved in a retrieval step
+context = "During this morning's meeting, we solved all world conflict."
+messages = [
+    {"role": "system", "content": "You are a helpful assistant. Please respond to the user's request only based on the given context."},
+    {"role": "user", "content": f"Question: {question}\\nContext: {context}"}
+]
+
+# Create parent run
+parent_run_id = uuid4()
+post_run(parent_run_id, "Chat Pipeline", "chain", {"question": question})
+
+# Create child run
+child_run_id = uuid4()
+post_run(child_run_id, "OpenAI Call", "llm", {"messages": messages}, parent_run_id)
+
+# Generate a completion
+client = openai.Client()
+chat_completion = client.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
+
+# End runs
+patch_run(child_run_id, chat_completion.dict())
+patch_run(parent_run_id, {"answer": chat_completion.choices[0].message.content})
+```