|
| 1 | +import requests |
| 2 | +from pdf_processor import process_pdf |
| 3 | +from config import HEADERS, DIGRAPH_EXAMPLE |
| 4 | + |
| 5 | +def generate_digraph(base64_images): |
| 6 | + page_answers = [] |
| 7 | + for page_num, base64_image in enumerate(base64_images, start=1): |
| 8 | + payload = { |
| 9 | + "model": "gpt-4o", |
| 10 | + "messages": [ |
| 11 | + { |
| 12 | + "role": "system", |
| 13 | + "content": "You are an AI specialized in extracting structured information from documents. Your task is to analyze the provided image and generate a Graphviz digraph that represents the entities and their relationships found within. Focus on identifying key concepts, hierarchical structures, and relevant data points regardless of the document type. The digraph should be clear, well-organized, and follow the structure of the example provided. Ensure that all entities are properly connected, labeled, and reflect the content and relationships present in the document." |
| 14 | + }, |
| 15 | + { |
| 16 | + "role": "user", |
| 17 | + "content": [ |
| 18 | + {"type": "text", "text": f"Generate a digraph like the following for the meaningful entities in this image, following this example: {DIGRAPH_EXAMPLE} (Page {page_num})"}, |
| 19 | + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} |
| 20 | + ] |
| 21 | + } |
| 22 | + ], |
| 23 | + } |
| 24 | + |
| 25 | + response = requests.post("https://api.openai.com/v1/chat/completions", headers=HEADERS, json=payload) |
| 26 | + answer = response.json()['choices'][0]['message']['content'] |
| 27 | + page_answers.append(f"Page {page_num}: {answer}") |
| 28 | + print(f"Processed page {page_num}") |
| 29 | + |
| 30 | + return page_answers |
| 31 | + |
| 32 | +def merge_digraphs(page_answers): |
| 33 | + digraph_prompt = "Merge the partial digraphs that I provide to you merging together all the detected entities, \n\n" + "\n\n".join(page_answers) + \ |
| 34 | + "\nYour answer digraph must be a tree and must contain only the code for a valid graphviz graph" |
| 35 | + digraph_payload = { |
| 36 | + "model": "gpt-4o", |
| 37 | + "messages": [ |
| 38 | + {"role": "system", "content": "You are an AI that generates only valid digraph code without any comments before or after the generated code. At the end, it always shows the generated viz with dot.render('ontology_graph', format='png'). You have to provide a graph that takes as reference the following graph: {DIGRAPH_EXAMPLE}"}, |
| 39 | + {"role": "user", "content": digraph_prompt} |
| 40 | + ], |
| 41 | + } |
| 42 | + |
| 43 | + digraph_response = requests.post("https://api.openai.com/v1/chat/completions", headers=HEADERS, json=digraph_payload) |
| 44 | + digraph_code = digraph_response.json()['choices'][0]['message']['content'] |
| 45 | + return digraph_code |
| 46 | + |
| 47 | +def main(): |
| 48 | + pdf_path = './test.pdf' |
| 49 | + base64_images = process_pdf(pdf_path) |
| 50 | + |
| 51 | + if base64_images: |
| 52 | + page_answers = generate_digraph(base64_images) |
| 53 | + digraph_code = merge_digraphs(page_answers) |
| 54 | + |
| 55 | + print("\nDigraph code for all pages:") |
| 56 | + print(digraph_code[9:-3]) |
| 57 | + print("digraph_code_execution----------------------------------") |
| 58 | + exec(digraph_code[9:-3]) |
| 59 | + |
| 60 | +if __name__ == "__main__": |
| 61 | + main() |
| 62 | + |
| 63 | + |
| 64 | + |
| 65 | + |
| 66 | + |
0 commit comments