Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 207 additions & 0 deletions scripts/create_sample_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
#!/usr/bin/env python3
from datetime import datetime
from typing import List

import pyst_client
from pyst_client.models import (
ConceptCreate,
ConceptSchemeInput,
DateTime,
DateTimeType,
MultilingualString,
Node,
Status,
VersionString,
)

# Configuration
API_URL = "http://localhost:8000"
API_TOKEN = "missing"
Copy link

Copilot AI Apr 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a hard-coded API token may lead to security issues; consider retrieving the token from an environment variable instead.

Suggested change
API_TOKEN = "missing"
import os
API_TOKEN = os.getenv("API_TOKEN", "missing")

Copilot uses AI. Check for mistakes.
LANGUAGES = ["en", "de", "es", "da", "fr", "pt", "it"]

def create_multilingual_string(text_base: str, index: int) -> List[MultilingualString]:
"""Create multilingual strings for each supported language."""
translations = {
"en": f"{text_base} {index}",
"de": f"{text_base} {index} (DE)",
"es": f"{text_base} {index} (ES)",
"da": f"{text_base} {index} (DA)",
"fr": f"{text_base} {index} (FR)",
"pt": f"{text_base} {index} (PT)",
"it": f"{text_base} {index} (IT)",
}
return [
MultilingualString(language=lang, value=translations[lang])
for lang in LANGUAGES
]

def create_concept_scheme(index: int) -> ConceptSchemeInput:
"""Create a concept scheme with the given index."""
return ConceptSchemeInput(
id=f"http://example.com/taxonomy/2024/scheme{index}",
type=["http://www.w3.org/2004/02/skos/core#ConceptScheme"],
http___www_w3_org_2004_02_skos_corepref_label=create_multilingual_string(
"Example Taxonomy", index
),
http___purl_org_ontology_bibo_status=[
Status(id="http://purl.org/ontology/bibo/status/accepted")
],
http___purl_org_dc_terms_created=[DateTime(
type=DateTimeType.HTTP_COLON_SLASH_SLASH_WWW_DOT_W3_DOT_ORG_SLASH_2001_SLASH_XML_SCHEMA_HASH_DATE_TIME,
value=datetime.now()
)],
http___purl_org_dc_terms_creator=[
Node(id="http://example.com/organization")
],
http___www_w3_org_2002_07_owlversion_info=[
VersionString(value=f"1.0.{index}")
],
http___www_w3_org_2004_02_skos_coredefinition=create_multilingual_string(
"A comprehensive taxonomy for organizing and classifying various concepts and categories, version",
index
),
http___www_w3_org_2004_02_skos_corenotation=[{
"@value": f"TAX-2024-{index:03d}",
"@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#PlainLiteral"
}]
)

def create_concept(scheme_id: str, index: int) -> ConceptCreate:
"""Create a concept with the given index for a specific scheme."""
concept_id = f"{scheme_id}/concept{index}"
broader = None
if index > 1:
broader = [Node(id=f"{scheme_id}/concept{index-1}")]

return ConceptCreate(
id=concept_id,
type=["http://www.w3.org/2004/02/skos/core#Concept"],
http___www_w3_org_2004_02_skos_corepref_label=create_multilingual_string(
"Example Concept", index
),
http___www_w3_org_2004_02_skos_corein_scheme=[Node(id=scheme_id)],
http___purl_org_ontology_bibo_status=[
Status(id="http://purl.org/ontology/bibo/status/accepted")
],
http___purl_org_dc_terms_created=[DateTime(
type=DateTimeType.HTTP_COLON_SLASH_SLASH_WWW_DOT_W3_DOT_ORG_SLASH_2001_SLASH_XML_SCHEMA_HASH_DATE_TIME,
value=datetime.now()
)],
http___purl_org_dc_terms_creator=[
Node(id="http://example.com/organization")
],
http___www_w3_org_2004_02_skos_coredefinition=create_multilingual_string(
"A concept that represents a specific category or idea, version",
index
),
http___www_w3_org_2004_02_skos_corenotation=[{
"@value": f"CON-{index:03d}",
"@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#PlainLiteral"
}],
http___www_w3_org_2004_02_skos_corebroader=broader
)

async def create_schemes():
"""Create 10 concept schemes."""
configuration = pyst_client.Configuration(
host=API_URL,
api_key={"X-PyST-Auth-Token": API_TOKEN}
)

async with pyst_client.ApiClient(configuration) as api_client:
concept_scheme_api = pyst_client.ConceptSchemeApi(api_client)

for i in range(1, 11):
try:
scheme = create_concept_scheme(i)
response = await concept_scheme_api.concept_scheme_create_concept_scheme_post(
concept_scheme_input=scheme,
x_pyst_auth_token=API_TOKEN
)
print(f"Created concept scheme {i} successfully")
except Exception as e:
print(f"Failed to create concept scheme {i}: {str(e)}")

async def create_concepts_for_scheme(scheme_id: str):
"""Create 5 concepts for a specific scheme."""
configuration = pyst_client.Configuration(
host=API_URL,
api_key={"X-PyST-Auth-Token": API_TOKEN}
)

async with pyst_client.ApiClient(configuration) as api_client:
concept_api = pyst_client.ConceptApi(api_client)

for j in range(1, 6):
try:
concept = create_concept(scheme_id, j)
await concept_api.concept_create_concept_post(
concept_create=concept,
x_pyst_auth_token=API_TOKEN
)
print(f"Created concept {j} for scheme {scheme_id} successfully")
except Exception as e:
print(f"Failed to create concept {j} for scheme {scheme_id}: {str(e)}")

async def create_all_data():
"""Create all sample data - schemes and concepts for all schemes."""
configuration = pyst_client.Configuration(
host=API_URL,
api_key={"X-PyST-Auth-Token": API_TOKEN}
)

async with pyst_client.ApiClient(configuration) as api_client:
concept_scheme_api = pyst_client.ConceptSchemeApi(api_client)
concept_api = pyst_client.ConceptApi(api_client)

# Create schemes
for i in range(1, 11):
try:
scheme = create_concept_scheme(i)
response = await concept_scheme_api.concept_scheme_create_concept_scheme_post(
concept_scheme_input=scheme,
x_pyst_auth_token=API_TOKEN
)
print(f"Created concept scheme {i} successfully")

# Create concepts for this scheme
for j in range(1, 6):
try:
concept = create_concept(scheme.id, j)
await concept_api.concept_create_concept_post(
concept_create=concept,
x_pyst_auth_token=API_TOKEN
)
print(f"Created concept {j} for scheme {i} successfully")
except Exception as e:
print(f"Failed to create concept {j} for scheme {i}: {str(e)}")
except Exception as e:
print(f"Failed to create concept scheme {i}: {str(e)}")

async def main():
"""Create sample data - either schemes, concepts, or both."""
import argparse

parser = argparse.ArgumentParser(description='Create sample data for the semantic taxonomy')
parser.add_argument('--schemes', action='store_true', help='Create concept schemes')
parser.add_argument('--concepts', action='store_true', help='Create concepts')
parser.add_argument('--scheme-id', type=str, help='Scheme ID to create concepts for (required if --concepts is used)')
parser.add_argument('--all', action='store_true', help='Create all sample data (schemes and concepts)')
args = parser.parse_args()

if args.all:
await create_all_data()
return

if args.schemes:
await create_schemes()

if args.concepts:
if not args.scheme_id:
Copy link

Copilot AI Apr 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When '--scheme-id' is missing while creating concepts, consider exiting with a non-zero status code (e.g., using sys.exit(1)) rather than only printing an error message, to properly signal the failure.

Copilot uses AI. Check for mistakes.
print("Error: --scheme-id is required when creating concepts")
return
await create_concepts_for_scheme(args.scheme_id)

if __name__ == "__main__":
import asyncio
asyncio.run(main())