Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ vastdb = ["requirements/connectors/vastdb.txt"]
vectara = ["requirements/connectors/vectara.txt"]
weaviate = ["requirements/connectors/weaviate.txt"]
wikipedia = ["requirements/connectors/wikipedia.txt"]
yugabytedb = ["requirements/connectors/yugabytedb.txt"]
zendesk = ["requirements/connectors/zendesk.txt"]

# Embedders
Expand Down
2 changes: 2 additions & 0 deletions requirements/connectors/yugabytedb.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pandas
psycopg2-yugabytedb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
services:
yugabytedb:
image: yugabytedb/yugabyte:latest
restart: always
container_name: yugabytedb_dest
command: bin/yugabyted start --daemon=false --initial_scripts_dir=/home/yugabyte/init_scripts
ports:
- 5433:5433
volumes:
- ./yugabytedb-schema.sql:/home/yugabyte/init_scripts/init.sql
healthcheck:
test: ["CMD-SHELL", "bin/ysqlsh -h localhost -U yugabyte -d yugabyte -c 'SELECT 1'"]
interval: 10s
timeout: 60s
retries: 10
start_period: 20s

Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- Enable pgvector extension (YugabyteDB has native support)
CREATE EXTENSION IF NOT EXISTS vector;

-- Create the elements table with vector support in yugabyte database
CREATE TABLE elements (
id UUID PRIMARY KEY,
record_id VARCHAR,
element_id VARCHAR,
text TEXT,
embeddings vector(384),
type VARCHAR,
system VARCHAR,
layout_width DECIMAL,
layout_height DECIMAL,
points TEXT,
url TEXT,
version VARCHAR,
date_created TIMESTAMPTZ,
date_modified TIMESTAMPTZ,
date_processed TIMESTAMPTZ,
permissions_data TEXT,
record_locator TEXT,
category_depth INTEGER,
parent_id VARCHAR,
attached_filename VARCHAR,
filetype VARCHAR,
last_modified TIMESTAMPTZ,
file_directory VARCHAR,
filename VARCHAR,
languages VARCHAR [],
page_number VARCHAR,
links TEXT,
page_name VARCHAR,
link_urls VARCHAR [],
link_texts VARCHAR [],
sent_from VARCHAR [],
sent_to VARCHAR [],
subject VARCHAR,
section VARCHAR,
header_footer_type VARCHAR,
emphasized_text_contents VARCHAR [],
emphasized_text_tags VARCHAR [],
text_as_html TEXT,
regex_metadata TEXT,
detection_class_prob DECIMAL
);


Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
services:
yugabytedb:
image: yugabytedb/yugabyte:latest
restart: always
container_name: yugabytedb_src
command: bin/yugabyted start --daemon=false --initial_scripts_dir=/home/yugabyte/init_scripts
ports:
- 5433:5433
volumes:
- ./yugabytedb-schema.sql:/home/yugabyte/init_scripts/init.sql
healthcheck:
test: ["CMD-SHELL", "bin/ysqlsh -h localhost -U yugabyte -d yugabyte -c 'SELECT 1'"]
interval: 10s
timeout: 60s
retries: 10
start_period: 20s

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Create the cars table in the yugabyte database
CREATE TABLE cars (
car_id SERIAL PRIMARY KEY,
brand TEXT NOT NULL,
price INTEGER NOT NULL
);


Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"directory_structure": [
"cars-1-5fb93ce5.csv",
"cars-10-5fb93ce5.csv",
"cars-2-5fb93ce5.csv",
"cars-3-5fb93ce5.csv",
"cars-4-5fb93ce5.csv",
"cars-5-5fb93ce5.csv",
"cars-6-5fb93ce5.csv",
"cars-7-5fb93ce5.csv",
"cars-8-5fb93ce5.csv",
"cars-9-5fb93ce5.csv"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
1,brand_0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
10,brand_9
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
2,brand_1
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
3,brand_2
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
4,brand_3
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
5,brand_4
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
6,brand_5
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
7,brand_6
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
8,brand_7
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
car_id,brand
9,brand_8
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"identifier": "1c704234-e0da-5676-be32-a12cc6407b3d",
"connector_type": "yugabytedb",
"source_identifiers": null,
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1749467370.4465034",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": null,
"display_name": "cars-car_id-[8..7]",
"batch_items": [
{
"identifier": "10",
"version": null
},
{
"identifier": "7",
"version": null
},
{
"identifier": "8",
"version": null
},
{
"identifier": "9",
"version": null
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"identifier": "324354db-0465-56ca-a023-072481f5dd03",
"connector_type": "yugabytedb",
"source_identifiers": null,
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1749467370.4308963",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": null,
"display_name": "cars-car_id-[1..6]",
"batch_items": [
{
"identifier": "1",
"version": null
},
{
"identifier": "2",
"version": null
},
{
"identifier": "3",
"version": null
},
{
"identifier": "4",
"version": null
},
{
"identifier": "5",
"version": null
},
{
"identifier": "6",
"version": null
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"identifier": "cars-1-5fb93ce5",
"connector_type": "yugabytedb",
"source_identifiers": {
"filename": "cars-1-5fb93ce5.csv",
"fullpath": "cars-1-5fb93ce5.csv",
"rel_path": null
},
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1749467370.4308963",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/tmp/tmptf6_mjz9/cars-1-5fb93ce5.csv",
"display_name": "cars-car_id-[1..6]"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"identifier": "cars-10-5fb93ce5",
"connector_type": "yugabytedb",
"source_identifiers": {
"filename": "cars-10-5fb93ce5.csv",
"fullpath": "cars-10-5fb93ce5.csv",
"rel_path": null
},
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1749467370.4465034",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/tmp/tmptf6_mjz9/cars-10-5fb93ce5.csv",
"display_name": "cars-car_id-[8..7]"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"identifier": "cars-2-5fb93ce5",
"connector_type": "yugabytedb",
"source_identifiers": {
"filename": "cars-2-5fb93ce5.csv",
"fullpath": "cars-2-5fb93ce5.csv",
"rel_path": null
},
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1749467370.4308963",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/tmp/tmptf6_mjz9/cars-2-5fb93ce5.csv",
"display_name": "cars-car_id-[1..6]"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"identifier": "cars-3-5fb93ce5",
"connector_type": "yugabytedb",
"source_identifiers": {
"filename": "cars-3-5fb93ce5.csv",
"fullpath": "cars-3-5fb93ce5.csv",
"rel_path": null
},
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1749467370.4308963",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/tmp/tmptf6_mjz9/cars-3-5fb93ce5.csv",
"display_name": "cars-car_id-[1..6]"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"identifier": "cars-4-5fb93ce5",
"connector_type": "yugabytedb",
"source_identifiers": {
"filename": "cars-4-5fb93ce5.csv",
"fullpath": "cars-4-5fb93ce5.csv",
"rel_path": null
},
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1749467370.4308963",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/tmp/tmptf6_mjz9/cars-4-5fb93ce5.csv",
"display_name": "cars-car_id-[1..6]"
}
Loading