Skip to content

Commit 02036cf

Browse files
a-klosgithub-actions[bot]Copilot
authored
feat: add sitemap loader to admin frontend (#21)
* feat: add Confluence integration with configurable parameters for document loading * chore: update submodules to latest main * feat: enable MinIO feature in Helm chart configuration and remove default max pages limit in document upload * feat: add pytest configuration and update testing setup across multiple components * Update frontend/libs/i18n/admin/en.json Co-authored-by: Copilot <[email protected]> * feat: enhance Confluence upload form with additional input fields and labels * feat: add sitemap upload functionality with configuration options and error handling * feat: add header template support for sitemap upload with JSON validation * chore: update .gitignore to include todo files, remove unused Confluence configuration from DocumentUploadContainer, and update subproject reference in rag-core-library * chore: update subproject commit reference in rag-core-library * chore: update subproject commit reference in rag-core-library * chore: update subproject commit reference in rag-core-library * chore: update submodules to latest main * feat: update dependencies and add new packages - Added `fake-useragent` version 2.2.0 to `document-extractor/poetry.lock`. - Introduced `googleapis-common-protos` version 1.70.0 and updated dependencies in `rag-backend/poetry.lock`. - Added `importlib-metadata` version 8.7.0 with dependencies in `rag-backend/poetry.lock`. - Updated `langfuse` to version 3.0.0 in `rag-backend/poetry.lock` and adjusted its dependencies. - Added OpenTelemetry packages (`opentelemetry-api`, `opentelemetry-exporter-otlp`, `opentelemetry-sdk`, etc.) with version 1.34.1 in `rag-backend/poetry.lock`. - Downgraded `protobuf` to version 5.29.5 in `rag-backend/poetry.lock`. - Added `zipp` version 3.23.0 in `rag-backend/poetry.lock`. - Updated submodules for `rag-core-library` and `rag-infrastructure`. * chore: update submodules to latest main * Update subproject commits for rag-core-library and rag-infrastructure * chore: update submodules to latest main * chore: update subproject commit for rag-core-library * Update frontend/libs/admin-app/feature-document/DocumentUploadContainer.vue Co-authored-by: Copilot <[email protected]> * Update frontend/libs/admin-app/data-access/+state/documents.store.ts Co-authored-by: Copilot <[email protected]> * Update DocumentUploadContainer.vue * chore: update submodule --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Copilot <[email protected]>
1 parent fac5836 commit 02036cf

File tree

9 files changed

+1693
-1047
lines changed

9 files changed

+1693
-1047
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ auth
1111
# Node Modules
1212
node_modules/
1313

14-
14+
**/.notebooks
15+
**/todo*.md
1516

1617
# Byte-compiled / optimized / DLL files
1718
__pycache__/

admin-backend/poetry.lock

Lines changed: 284 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

document-extractor/poetry.lock

Lines changed: 1053 additions & 1004 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

frontend/libs/admin-app/data-access/+state/documents.store.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ import { ref } from 'vue';
33
import { DocumentModel } from "../../models/document.model.ts";
44
import { ErrorType } from "../../models/error-type";
55
import { UploadedDocument, mapToUploadDocument } from "../../models/uploaded-document.model";
6-
import { DocumentAPI, ConfluenceConfig } from "../document.api";
6+
import { ConfluenceConfig, DocumentAPI, SitemapConfig } from "../document.api";
77

88
export const useDocumentsStore = defineStore('chat', () => {
99
const uploadedDocuments = ref<UploadedDocument[]>([]);
1010
const allDocuments = ref<DocumentModel[]>();
1111
const error = ref<ErrorType | null>(null);
1212
const isLoadingConfluence = ref(false);
13+
const isLoadingSitemap = ref(false);
1314

1415
function updateUploadedDocumentData(documentId: string, data: Partial<UploadedDocument>) {
1516
const document = uploadedDocuments.value.find((d: UploadedDocument) => d.id === documentId);
@@ -75,6 +76,29 @@ export const useDocumentsStore = defineStore('chat', () => {
7576
}
7677
};
7778

79+
const loadSitemap = async (config: SitemapConfig) => {
80+
isLoadingSitemap.value = true;
81+
error.value = null;
82+
try {
83+
// provide sitemap configuration from frontend
84+
await DocumentAPI.loadSitemap(config);
85+
await loadDocuments(); // Refresh the document list after uploading
86+
} catch(err) {
87+
if (err.response && err.response.status === 501) {
88+
error.value = "sitemap_not_configured";
89+
console.error("Sitemap loader is not configured.");
90+
} else if (err.response && err.response.status === 423) {
91+
error.value = "sitemap_locked";
92+
console.error("Sitemap loader returned a warning.");
93+
} else {
94+
error.value = "sitemap";
95+
console.error(err);
96+
}
97+
} finally {
98+
isLoadingSitemap.value = false;
99+
}
100+
};
101+
78102
const uploadDocuments = async (files: File[]) => {
79103
try {
80104
const uploads = files.map(uploadDocument);
@@ -103,5 +127,5 @@ export const useDocumentsStore = defineStore('chat', () => {
103127
uploadedDocuments.value = uploadedDocuments.value.filter(o => o.id !== documentId);
104128
};
105129

106-
return {removeUploadedDocument, uploadDocuments, loadDocuments, deleteDocument, loadConfluence, allDocuments, uploadedDocuments, error};
130+
return {removeUploadedDocument, uploadDocuments, loadDocuments, deleteDocument, loadConfluence, loadSitemap, allDocuments, uploadedDocuments, error, isLoadingSitemap};
107131
});

frontend/libs/admin-app/data-access/document.api.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@ export interface ConfluenceConfig {
1616
name: string;
1717
}
1818

19+
// sitemap configuration interface
20+
export interface SitemapConfig {
21+
webPath: string;
22+
filterUrls: string;
23+
headerTemplate: string;
24+
name: string;
25+
}
26+
1927
export class DocumentAPI {
2028
static async loadDocuments(): Promise<DocumentModel[]> {
2129
try {
@@ -62,6 +70,46 @@ export class DocumentAPI {
6270
}
6371
}
6472

73+
static async loadSitemap(config: SitemapConfig): Promise<void> {
74+
try {
75+
// convert config to list of key/value items for backend
76+
const payload = [
77+
{ key: 'web_path', value: config.webPath }
78+
];
79+
80+
// add filter_urls only if provided
81+
if (config.filterUrls && config.filterUrls.trim()) {
82+
// Convert multiline string to array and filter out empty lines
83+
const filterUrlsArray = config.filterUrls
84+
.split('\n')
85+
.map(url => url.trim())
86+
.filter(url => url.length > 0);
87+
88+
if (filterUrlsArray.length > 0) {
89+
payload.push({ key: 'filter_urls', value: JSON.stringify(filterUrlsArray) });
90+
}
91+
}
92+
93+
// add header_template only if provided
94+
if (config.headerTemplate && config.headerTemplate.trim()) {
95+
try {
96+
// Validate JSON format
97+
JSON.parse(config.headerTemplate);
98+
payload.push({ key: 'header_template', value: config.headerTemplate });
99+
} catch (jsonError) {
100+
throw new Error('Header template must be valid JSON format');
101+
}
102+
}
103+
104+
// include required query parameters
105+
await axios.post<void>('/upload_source', payload, {
106+
params: { source_type: 'sitemap', name: config.name }
107+
});
108+
} catch(error) {
109+
this.handleError(error);
110+
}
111+
}
112+
65113
static async deleteDocument(documentId: string): Promise<void> {
66114
try {
67115
await axios.delete<void>(`/delete_document/${documentId}`);

frontend/libs/admin-app/feature-document/DocumentUploadContainer.vue

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const fileInputRef = ref<HTMLInputElement>();
1313
const uploadedDocuments = computed((): UploadedDocument[] => store.uploadedDocuments);
1414
const isInvalidFileType = ref(false);
1515
const allowedFileTypes = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'text/xml'];
16-
const uploadMethod = ref<'file' | 'confluence'>('file');
16+
const uploadMethod = ref<'file' | 'confluence' | 'sitemap'>('file');
1717
1818
1919
// confluence configuration refs
@@ -23,6 +23,12 @@ const confluenceToken = ref('');
2323
const confluenceUrl = ref('');
2424
const maxPages = ref<number>();
2525
26+
// sitemap configuration refs
27+
const sitemapName = ref('');
28+
const sitemapWebPath = ref('');
29+
const sitemapFilterUrls = ref('');
30+
const sitemapHeaderTemplate = ref('');
31+
2632
const error = computed(() => store.error);
2733
2834
const uploadDocuments = (files: File[]) => {
@@ -73,6 +79,16 @@ const handleConfluenceUpload = () => {
7379
});
7480
}
7581
82+
const handleSitemapUpload = () => {
83+
// send configured parameters to backend
84+
store.loadSitemap({
85+
name: sitemapName.value,
86+
webPath: sitemapWebPath.value,
87+
filterUrls: sitemapFilterUrls.value,
88+
headerTemplate: sitemapHeaderTemplate.value
89+
});
90+
}
91+
7692
const clearError = () => {
7793
store.error = null
7894
}
@@ -91,6 +107,12 @@ const getErrorMessage = (errorType: string) => {
91107
return t('documents.confluenceNotConfigured');
92108
case 'confluence_locked':
93109
return t('documents.confluenceLocked');
110+
case 'sitemap':
111+
return t('documents.sitemapError');
112+
case 'sitemap_not_configured':
113+
return t('documents.sitemapNotConfigured');
114+
case 'sitemap_locked':
115+
return t('documents.sitemapLocked');
94116
default:
95117
return t('documents.unknownError');
96118
}
@@ -124,6 +146,10 @@ const getErrorMessage = (errorType: string) => {
124146
@click="uploadMethod = 'confluence'">
125147
{{ t('documents.confluenceUpload') }}
126148
</a>
149+
<a class="tab" :class="{'tab-active': uploadMethod === 'sitemap'}"
150+
@click="uploadMethod = 'sitemap'">
151+
{{ t('documents.sitemapUpload') }}
152+
</a>
127153
</div>
128154

129155
<!-- File upload area -->
@@ -144,7 +170,7 @@ const getErrorMessage = (errorType: string) => {
144170
</div>
145171

146172
<!-- Confluence load area -->
147-
<div v-else
173+
<div v-else-if="uploadMethod === 'confluence'"
148174
class="flex flex-col m-auto justify-center items-center w-full h-112 bg-base-100 rounded-box border border-base-300">
149175
<div class="flex flex-col justify-center items-center pt-5 pb-6">
150176
<GlobeAltIcon class="w-10 h-10 mb-4 text-accent-content" />
@@ -169,6 +195,30 @@ const getErrorMessage = (errorType: string) => {
169195
</div>
170196
</div>
171197

198+
<!-- Sitemap load area -->
199+
<div v-else-if="uploadMethod === 'sitemap'"
200+
class="flex flex-col m-auto justify-center items-center w-full h-112 bg-base-100 rounded-box border border-base-300">
201+
<div class="flex flex-col justify-center items-center pt-5 pb-6">
202+
<GlobeAltIcon class="w-10 h-10 mb-4 text-accent-content" />
203+
<p class="mb-1 font-bold">{{ t('documents.sitemapLoadTitle') }}</p>
204+
<!-- configuration inputs -->
205+
<div class="space-y-2 mb-4 w-full max-w-sm">
206+
<label for="sitemapName" class="sr-only">Sitemap Name</label>
207+
<input id="sitemapName" v-model="sitemapName" type="text" placeholder="Name" class="input input-bordered w-full" required/>
208+
<label for="sitemapWebPath" class="sr-only">Sitemap URL</label>
209+
<input v-model="sitemapWebPath" type="url" placeholder="Sitemap URL (required)" class="input input-bordered w-full" required />
210+
<label for="sitemapFilterUrls" class="sr-only">Filter URLs</label>
211+
<textarea v-model="sitemapFilterUrls" placeholder="Filter URLs (optional) - one regex pattern per line" class="textarea textarea-bordered w-full" rows="3"></textarea>
212+
<label for="sitemapHeaderTemplate" class="sr-only">Headers JSON</label>
213+
<textarea v-model="sitemapHeaderTemplate" placeholder="Headers (optional) - JSON format: {&quot;Authorization&quot;: &quot;Bearer token&quot;}" class="textarea textarea-bordered w-full" rows="2"></textarea>
214+
</div>
215+
<p class="text-xs opacity-50 mb-4">{{ t('documents.sitemapLoadDescription') }}</p>
216+
<button class="btn btn-sm btn-accent" @click="handleSitemapUpload">
217+
{{ t('documents.loadSitemap') }}
218+
</button>
219+
</div>
220+
</div>
221+
172222
<!-- Uploaded documents -->
173223
<div class="mx-auto mt-4 w-full">
174224
<div class="mb-4" v-for="uploadDocument in uploadedDocuments" :key="uploadDocument.id">

frontend/libs/i18n/admin/en.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
"confluenceLoadTitle": "Load all Confluence pages from a space",
1616
"confluenceLoadDescription": "Click the button below to load pages from Confluence",
1717
"loadConfluence": "Load Confluence",
18+
"sitemapUpload": "Sitemap",
19+
"sitemapLoadTitle": "Load content from a sitemap",
20+
"sitemapLoadDescription": "Enter a sitemap URL to extract and load content from all linked pages",
21+
"loadSitemap": "Load Sitemap",
1822
"select": "Select",
1923
"chat": "Start chat",
2024
"uploadDocumentFailed": "Upload failed",
@@ -25,6 +29,9 @@
2529
"confluenceError": "Failed to load from Confluence",
2630
"confluenceNotConfigured": "Confluence is not configured",
2731
"confluenceLocked": "Confluence Loader is busy. Please try again later",
32+
"sitemapError": "Failed to load from sitemap",
33+
"sitemapNotConfigured": "Sitemap is not configured",
34+
"sitemapLocked": "Sitemap Loader is busy. Please try again later",
2835
"unknownError": "An unknown error occurred"
2936
}
3037
}

0 commit comments

Comments
 (0)