Skip to content

Commit 50fa72d

Browse files
authored
Merge pull request #27 from elizaos-plugins/feat/impl-deterministic-ids
feat: deterministic ids to prevent duplicates
2 parents 2cad175 + 363fbf3 commit 50fa72d

9 files changed

Lines changed: 394 additions & 209 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@elizaos/plugin-knowledge",
33
"description": "Plugin for Knowledge",
4-
"version": "1.0.8",
4+
"version": "1.0.9",
55
"type": "module",
66
"main": "dist/index.js",
77
"module": "dist/index.js",

src/ctx-embeddings.ts

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -630,13 +630,5 @@ export function getChunkWithContext(
630630
return chunkContent;
631631
}
632632

633-
// Verify that the generated context contains the original chunk
634-
if (!generatedContext.includes(chunkContent)) {
635-
console.warn(
636-
"Generated context does not contain the original chunk. Appending original to ensure data integrity."
637-
);
638-
return `${generatedContext.trim()}\n\n${chunkContent}`;
639-
}
640-
641633
return generatedContext.trim();
642634
}

src/docs-loader.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import { logger, UUID, createUniqueUuid } from "@elizaos/core";
2-
import * as fs from "fs";
3-
import * as path from "path";
4-
import { KnowledgeService } from "./service.ts";
5-
import { AddKnowledgeOptions } from "./types.ts";
1+
import { logger, UUID } from '@elizaos/core';
2+
import * as fs from 'fs';
3+
import * as path from 'path';
4+
import { KnowledgeService } from './service.ts';
5+
import { AddKnowledgeOptions } from './types.ts';
66
import { isBinaryContentType } from './utils.ts';
77

88
/**
@@ -97,7 +97,7 @@ export async function loadDocsFromPath(
9797

9898
// Create knowledge options
9999
const knowledgeOptions: AddKnowledgeOptions = {
100-
clientDocumentId: createUniqueUuid(agentId, `docs-${fileName}-${Date.now()}`) as UUID,
100+
clientDocumentId: '' as UUID, // Will be generated by the service based on content
101101
contentType,
102102
originalFilename: fileName,
103103
worldId: worldId || agentId,

src/document-processor.ts

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,17 @@ const ctxKnowledgeEnabled =
3131
* Check if custom LLM should be used based on environment variables
3232
* Custom LLM is enabled when all three key variables are set:
3333
* - TEXT_PROVIDER
34-
* - TEXT_MODEL
34+
* - TEXT_MODEL
3535
* - OPENROUTER_API_KEY (or provider-specific API key)
3636
*/
3737
function shouldUseCustomLLM(): boolean {
3838
const textProvider = process.env.TEXT_PROVIDER;
3939
const textModel = process.env.TEXT_MODEL;
40-
40+
4141
if (!textProvider || !textModel) {
4242
return false;
4343
}
44-
44+
4545
// Check for provider-specific API keys
4646
switch (textProvider.toLowerCase()) {
4747
case 'openrouter':
@@ -63,7 +63,9 @@ const useCustomLLM = shouldUseCustomLLM();
6363
if (ctxKnowledgeEnabled) {
6464
logger.info(`Document processor starting with Contextual Knowledge ENABLED`);
6565
if (useCustomLLM) {
66-
logger.info(`Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`);
66+
logger.info(
67+
`Using Custom LLM with provider: ${process.env.TEXT_PROVIDER}, model: ${process.env.TEXT_MODEL}`
68+
);
6769
} else {
6870
logger.info(`Using ElizaOS Runtime LLM (default behavior)`);
6971
}
@@ -431,11 +433,11 @@ async function generateEmbeddingsForChunks(
431433
rateLimiter: () => Promise<void>
432434
): Promise<Array<any>> {
433435
// Filter out failed chunks
434-
const validChunks = contextualizedChunks.filter(chunk => chunk.success);
435-
const failedChunks = contextualizedChunks.filter(chunk => !chunk.success);
436+
const validChunks = contextualizedChunks.filter((chunk) => chunk.success);
437+
const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success);
436438

437439
if (validChunks.length === 0) {
438-
return failedChunks.map(chunk => ({
440+
return failedChunks.map((chunk) => ({
439441
success: false,
440442
index: chunk.index,
441443
error: new Error('Chunk processing failed'),
@@ -544,6 +546,8 @@ async function generateContextsInBatch(
544546
contentType?: string,
545547
batchIndices?: number[]
546548
): Promise<Array<{ contextualizedText: string; success: boolean; index: number }>> {
549+
console.log('####### generateContextsInBatch FULLL DOCUMENT', fullDocumentText);
550+
console.log('####### generateContextsInBatch CHUNKS', chunks);
547551
if (!chunks || chunks.length === 0) {
548552
return [];
549553
}
@@ -559,10 +563,9 @@ async function generateContextsInBatch(
559563
(config.TEXT_MODEL?.toLowerCase().includes('claude') ||
560564
config.TEXT_MODEL?.toLowerCase().includes('gemini'));
561565

562-
// For now custom TEXT_PROVIDER is not supported.
563-
// logger.info(
564-
// `Using provider: ${config.TEXT_PROVIDER}, model: ${config.TEXT_MODEL}, caching capability: ${isUsingCacheCapableModel}`
565-
// );
566+
logger.info(
567+
`Using provider: ${config.TEXT_PROVIDER}, model: ${config.TEXT_MODEL}, caching capability: ${isUsingCacheCapableModel}`
568+
);
566569

567570
// Prepare prompts or system messages in parallel
568571
const promptConfigs = prepareContextPrompts(
@@ -595,15 +598,11 @@ async function generateContextsInBatch(
595598
// Use custom LLM with caching support
596599
if (item.usesCaching) {
597600
// Use the newer caching approach with separate document
598-
return await generateText(
599-
item.promptText!,
600-
item.systemPrompt,
601-
{
602-
cacheDocument: item.fullDocumentTextForContext,
603-
cacheOptions: { type: 'ephemeral' },
604-
autoCacheContextualRetrieval: true,
605-
}
606-
);
601+
return await generateText(item.promptText!, item.systemPrompt, {
602+
cacheDocument: item.fullDocumentTextForContext,
603+
cacheOptions: { type: 'ephemeral' },
604+
autoCacheContextualRetrieval: true,
605+
});
607606
} else {
608607
// Original approach - document embedded in prompt
609608
return await generateText(item.prompt!);
@@ -631,7 +630,7 @@ async function generateContextsInBatch(
631630
`context generation for chunk ${item.originalIndex}`
632631
);
633632

634-
const generatedContext = llmResponse.text;
633+
const generatedContext = typeof llmResponse === 'string' ? llmResponse : llmResponse.text;
635634
const contextualizedText = getChunkWithContext(item.chunkText, generatedContext);
636635

637636
logger.debug(
@@ -763,7 +762,7 @@ async function generateEmbeddingWithValidation(
763762
const embeddingResult = await runtime.useModel(ModelType.TEXT_EMBEDDING, {
764763
text,
765764
});
766-
765+
767766
// Handle different embedding result formats consistently
768767
const embedding = Array.isArray(embeddingResult)
769768
? embeddingResult

0 commit comments

Comments
 (0)