Skip to content

Commit

Permalink
generate ontology from nested data
Browse files Browse the repository at this point in the history
  • Loading branch information
alkidbaci committed Feb 20, 2025
1 parent b96e8b6 commit 34e6d64
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 0 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/ai_review.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Codacy Analysis & Coverage
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
codacy-analysis-cli:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Run Tests and Generate Coverage Report
run: |
pip install pytest pytest-cov
pytest --cov=my_project --cov-report=xml # Generates coverage.xml
- name: Run Codacy Analysis CLI
uses: codacy/codacy-analysis-cli-action@master
env:
CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }}

- name: Upload Coverage Report to Codacy
uses: codacy/codacy-coverage-reporter-action@v1
with:
project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
coverage-reports: coverage.xml
69 changes: 69 additions & 0 deletions owlapy/code_to_review_testing_ai_reviewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
def generate_ontology(json_data_path: str = None, output_path:str = None, namespace: str = None):
from rdflib import Graph, URIRef, Literal, RDFS, OWL, Namespace, RDF
from rdflib.namespace import XSD
from openai import OpenAI
import json

with open(json_data_path) as json_file:
data = json.load(json_file)

g = Graph()
if namespace is None:
namespace = "http://example.org/"
if output_path is None:
output_path = "output.ttl"

ex = Namespace(namespace)
g.bind("ex", ex)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)
g.bind("owl", OWL)
seen_set = set()
subjects = set()
predicates = set()
obj_for_pred = dict()
objects = set()
graphs = data["graphs"]
for graph in graphs:
quadruples = graph["quadruples"]
for quad in quadruples:
sub = quad["subject"]
pred = quad["predicate"]
obj = quad["object"]
if pred not in obj_for_pred.keys():
obj_for_pred[pred] = set()
subjects.add(sub)
predicates.add(pred)
objects.add(obj)
obj_for_pred[pred].add(obj)

for graph in graphs:
quadruples = graph["quadruples"]
for quad in quadruples:
subj = quad["subject"].strip().replace(" ", "_")
pred = quad["predicate"].strip().replace(" ", "_")
obj = quad["object"].strip().replace(" ", "_")
subject = URIRef(namespace + subj)
predicate = URIRef(namespace + pred)
obj_is_individual = len(obj_for_pred[pred].intersection(subjects)) > 0
if pred not in seen_set:
seen_set.add(pred)
if obj_is_individual:
g.add((predicate, RDF.type, OWL.ObjectProperty))
else: # we consider it a literal since no occurrence in subjects set
g.add((predicate, RDF.type, OWL.DatatypeProperty))

if obj_is_individual:
g.add((subject, predicate, URIRef(namespace + obj)))
else:
try:
obj_val = int(quad["object"])
g.add((subject, predicate, Literal(quad["object"], datatype=XSD.integer)))
except ValueError:
try:
obj_val = float(quad["object"])
g.add((subject, predicate, Literal(quad["object"], datatype=XSD.double)))
except ValueError:
g.add((subject, predicate, Literal(quad["object"], datatype=XSD.string)))

g.serialize(destination=output_path, format="xml")

0 comments on commit 34e6d64

Please sign in to comment.