diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dc12cb7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +__pycache__ \ No newline at end of file diff --git a/data_generation.py b/data_generation.py new file mode 100644 index 0000000..fc0ad74 --- /dev/null +++ b/data_generation.py @@ -0,0 +1,100 @@ +from required_libaries import time, tqdm,csv +from gpt_processing import process_gpt +from file_management import save_patient,save_records + +# Placeholder for any specific imports or functions that are relevant to data generation but not directly defined in the overview + +# Function definitions related to generating patient data + + +def generate_patients(user_request :str = None ,generate_time: int =1,record_count : int = 10,output_file_name:str ="patient_data.csv" ): + "Generated time is the number of time the function should run.Default 1" + + if user_request is None: + user_request =""" Heart disease +Cancer +Chronic lower respiratory disease +Stroke +Alzheimer's disease +Diabetes +Influenza and pneumonia +Kidney disease +Suicide +Septicemia +Chronic liver disease and cirrhosis +Hypertension +Parkinson's disease +Intentional self-harm +Chronic obstructive pulmonary disease (COPD) +Liver cancer +Liver disease and cirrhosis +Falls +Leukemia +Breast cancer, Country of origion :USA""" + + for i in tqdm(range(generate_time)) : + #gen_recs , patient names and profiles generated by GPT. + gen_recs = process_gpt(str(user_request),record_count,1) + print("Generated time :",i+1) + print(gen_recs) + + #Save patient + save_patient(gen_recs,output_file_name) + time.sleep(2) + + +##Read patient data +def read_patient_names(): + with open('patient_data.csv', mode='r') as csv_file: + csv_reader = csv.reader(csv_file) + patients = [] + next(csv_reader) # Skip header row + for row in csv_reader: + patient = " ".join(row[:7]) + patients.append(patient) + return patients + +def generate_records(start_index: int = 0,max_attempts:int= 10,record_count: int = 10,output_file_name:str ="medical_data3.csv"): + """Generate patient medica record, Input should be patient info and country of origion. + This code run with csv file""" + + attempts = 0 + + last_processed_index = start_index - 1 + + while attempts < max_attempts: + try: + #Read patient csv file. + patients = read_patient_names()[start_index:] + total_patients = len(patients) + + + for i, patient in tqdm(enumerate(patients, start=start_index), total=total_patients): + print(patient) + try: + gpt_generated = process_gpt(str(patient),record_count, 2) + #Save patient note . + save_records(gpt_generated,output_file_name) + last_processed_index = i + print(f"patient {i+1} passed") + time.sleep(2) + except IndexError: + print(f"IndexError: Please check the CSV file and create_record() function for patient {i+1}.") + time.sleep(2) + raise # Re-raise the error to retry the current row + except Exception as e: + time.sleep(2) + print(f"Error: {e} occurred for patient {i}. Retrying...") + continue # Retry the current row if there was an error + + break # Exit loop if successful + except IndexError: + print("IndexError: Please check the CSV file and create_record() function.") + time.sleep(2) + attempts += 1 + if attempts == max_attempts: + print(f"Failed after {attempts} attempts. Exiting program. Restart from {i+1}") + else: + start_index = last_processed_index + 1 + time.sleep(2) + print(f"Retrying from patient {start_index}...") diff --git a/file_management.py b/file_management.py new file mode 100644 index 0000000..df914b2 --- /dev/null +++ b/file_management.py @@ -0,0 +1,111 @@ + +from required_libaries import pd, time, os + +# Function definitions related to saving and reading data +def check_csv(filename: str): + """Check if the file exists""" + try: + with open(filename, 'r') as file: + return True + except FileNotFoundError: + return False + +def save_records(gen_recs: str,outputfilename:str): + """Patient medical notes will be accepted""" + #outputfilename ="medical_record2.csv" + + rec_values = gen_recs.split('|') + medical_record = { + 'record_id': rec_values[0].split(":")[1].strip(), + 'date_of_visit': rec_values[1].split(":")[1].strip(), + 'patient_id': rec_values[2].split(":")[1].strip(), + 'patient_name': rec_values[3].split(":")[1].strip(), + 'age': (rec_values[4]).split(":")[1].strip(), + 'gender': rec_values[5].split(":")[1].strip(), + 'race': rec_values[6].split(":")[1].strip(), + 'country_of_origin': rec_values[7].split(":")[1].strip(), + 'chief_complaint': rec_values[8].split(":")[1].strip(), + 'history of present illness': rec_values[9].split(":")[1].strip(), + 'past medical and surgical history': rec_values[10].split(":")[1].strip(), + 'immunization history': rec_values[11].split(":")[1].strip(), + 'allergy history': rec_values[12].split(":")[1].strip(), + 'currently taking drugs': rec_values[13].split(":")[1].strip(), + 'social/personal history': rec_values[14].split(":")[1].strip(), + 'investigation records': rec_values[15].split(":")[1].strip(), + 'lab tests and results': rec_values[16].split(":")[1].strip(), + 'differential diagnosis': rec_values[17].split(":")[1].strip(), + 'treatment and management plan': rec_values[18].split(":")[1].strip(), + 'additional notes': rec_values[19].split(":")[1].strip() + } + + # create a DataFrame from the medical record dictionary + df = pd.DataFrame.from_dict([medical_record]) + + # reorder the columns to match the desired order + df = df[['record_id', 'date_of_visit', 'patient_id', 'patient_name', 'age', 'gender', 'race', 'country_of_origin', 'chief_complaint', + 'history of present illness', 'past medical and surgical history', 'immunization history', 'allergy history', 'currently taking drugs', + 'social/personal history', 'investigation records', 'lab tests and results', 'differential diagnosis', 'treatment and management plan', + 'additional notes']] + + # export the DataFrame to a CSV file + df.to_csv(outputfilename, mode='a', header=False,index=False) + print("Record saved successfully") + +def save_patient(patient_names: str,outfile_name:str): + """patient names, output file name.csv and source file name.csv will be accepted""" + # Load the existing patient data + #outfile_name = "patient_data.csv" + if os.path.exists(outfile_name): + # Load the existing patient data + df = pd.read_csv(outfile_name,header = None) + if df.empty: + max_id = 0 + else: + # Get the maximum ID value in the existing data + max_id = df[0].str.replace('P', '').astype(int).max() + else: + # File doesn't exist, start max index from zero + max_id = 0 + start_id = max_id+1 + + records = patient_names.split('\n') + + # Create an empty list to store the patient data + patient_data = [] + + # Loop through each record and extract the patient information + # Loop through each record and extract the patient information + for record in records: + # Skip empty records + if not record: + continue + # Split the record into individual data fields + try: + record_data = record.split('|') + # Extract the patient information from the record + id = 'P' + str(start_id).zfill(6) + name = record_data[0].split('. ')[1].strip() + age = record_data[1].strip() + gender = record_data[2].strip() + race = record_data[3].strip() + country = record_data[4].strip() + chief_complaint = record_data[5].strip() + disease = record_data[6].strip() + # Add the patient data to the list + patient_data.append([id, name, age, gender, race, country, chief_complaint, disease]) + # Increment the max_id variable + start_id += 1 + except Exception as e: + print("Error: ", e) + print("Retrying after 1 seconds...") + time.sleep(1) + # Retry the same record after waiting for 5 seconds and start the id after the most recent id + start_id = int(patient_data[-1][0].strip('P')) + 1 if patient_data else start_id + continue + + # Convert the list of patient data to a DataFrame + df = pd.DataFrame(patient_data, columns=['Patient ID', 'Name', 'Age', 'Gender', 'Race', 'Country of Origin', 'Chief Complaint', 'Disease']) + # Append the data to the existing file + print("pass") + df.to_csv(outfile_name, mode='a', header=False, index=False) + print(f"Successfully appended to file {outfile_name}") \ No newline at end of file diff --git a/gpt_processing.py b/gpt_processing.py new file mode 100644 index 0000000..ddd59f1 --- /dev/null +++ b/gpt_processing.py @@ -0,0 +1,51 @@ + +from required_libaries import client + +# Placeholder for any specific imports or functions that are relevant to GPT processing but not directly defined in the overview + +# Function definitions related to processing or generating text with GPT models + +def process_gpt(user_request : str, record_count: int = 10 ,prompt_num: int = None ): + """user_request is the prompt from user. + prompt_num is the option to select system prompt, + 1 to generate patient names. + 2 to generate patient medical record.""" + + + if prompt_num == 1: + system_prompt = str(f"""Generate {record_count} dummy patient records. + I will provide the list of differential diseases and country of origin. + You will answer me with full names, ages, genders, race, country of origin , chief complaint(reason to visit the doctor) ,disease. + Your chief complaint must be complex with overlap symptoms and should be relevant to the country of origin. Separate each line by \n and separate each cell by |. + Do not write extra sentences. Do not index.Example format: 1. Sarah Fish |23|Female| White| USA| Fever, headache, joint pains, and vomiting|disease. My first request is + .""") + elif prompt_num ==2: + system_prompt =str(f"""Generate {record_count} medical note for the given patient .I will provide you the patient_id,name,age,gender,race, country of origin and chief complaint. You will answer me these columns: + record_id (format : ddmmpatient_id)| date_of_visit(dd/mm/yyyy) |patient_id |patient_name |age|gender|race|country_of_origin| chief_complaint| history_of_present_illness| past_medical_and_surgical_history|immunization_history |allergy_history| currently_taking_drugs |social_personal_history |investigation_records| lab_tests_and_results| differential_diagnosis| treatment_and_management_plan| additional_notes. Each column must be separated by "|". + Each entry must be similar to a natural medical note can user + or - for present and absent. currently taking drugs must include name,dosage,form,frequency and duration. + Do not write extra sentences. + If more information is required to add , put in additional notes. If there is nothing to generate ,write Null. + The medical note must be as complete as possible and must be relevant to chief complaint. History must contain all relevant additional information related to patient chief complaint. + Example format is Example format: 'record_id: date_of_visit+patient_id'|'date_of_visit: dd/mm/yyyy'| 'patient_id: P000003'| 'patient_name: Aung Soe'| 'age: 55'|'gender: Male'| 'race: Asian'| 'country_of_origin: Myanmar'|'chief_complaint: Extreme thirst, frequent urination, numbness or tingling of feet.'|'history_of_present_illness: explain here'|'past_medical_and_surgical_history: explain here'| 'immunization_history: explain here'| 'allergy_history: explain here'| 'currently_taking_drugs: '| 'social_personal_history: explain here'| 'investigation_records: explain here'| 'lab_tests_and_results: explain here', 'differential_diagnosis: explain here' , 'treatment_and_management_plan: explain here', 'additional_notes: explain here.' +My first request is + + """) + elif prompt_num ==3: + system_prompt="" + + else: + print("No prompt found") + + # Generate patient information using OpenAI API + messages = [{"role": "system", "content":system_prompt }, + {"role": "user", "content": user_request}] + + + model = "gpt-3.5-turbo" + response = client.chat.completions.create(model=model, messages=messages, max_tokens=2000) + generations = response.choices[0].message.content + + return generations + + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..82b5e32 --- /dev/null +++ b/main.py @@ -0,0 +1,12 @@ +from data_generation import generate_records,generate_patients +# Placeholder for any specific imports or code snippets that are relevant to orchestrating the workflow but not directly defined in the overview + +# Main function definition + +def main(): + #create patient names + generate_patients(generate_time = 1, record_count =5, output_file_name ="patient_data.csv") + #create patient's records + generate_records(start_index=0,record_count=5, output_file_name="medical_data.csv") + +main() diff --git a/medical_data.csv b/medical_data.csv new file mode 100644 index 0000000..cd3c887 --- /dev/null +++ b/medical_data.csv @@ -0,0 +1,9 @@ +0102P000002',dd/mm/yyyy',P000002',Michael Rodriguez',60',Male',Hispanic',USA',"Memory loss, confusion, and difficulty performing daily tasks.'","Gradual onset of memory loss over the last 6 months, recently progressed to confusion and difficulty with daily tasks. No history of head trauma or seizures. No significant weight changes or appetite alterations.'","Hypertension, hyperlipidemia, type 2 diabetes. Appendectomy at age 30. No history of psychiatric disorders.'",Up to date with routine vaccinations.',No known drug allergies.',"Donepezil 10mg oral tablet once daily for dementia, Metformin 1000mg oral tablet twice daily for diabetes, Lisinopril 20mg oral tablet once daily for hypertension. All medications have been taken for the past 3 years.'","Lives with his wife who reports progressive memory decline. Former accountant, retired 5 years ago. Non-smoker, occasional alcohol consumption.'",Brain MRI scheduled for next week.',"Fasting blood glucose 158 mg/dL, HbA1c 7.3%, LDL cholesterol 110 mg/dL, HDL cholesterol 50 mg/dL, triglycerides 150 mg/dL.'","Alzheimer's disease, vascular dementia, side effect of medication, depression.'",Continue current medications. Initiate cognitive behavioral therapy sessions. Follow up in 2 weeks for MRI results.',Family history of Alzheimer's disease unknown.' +0103P000003',01/03/2023',P000003',Olivia Chen',35',Female',Asian',USA',"Shortness of breath, chest pain, and fatigue.'","Patient complains of sudden onset shortness of breath, described as worsening with activity. She also reports sharp chest pain that radiates to the left arm, associated with feeling of tightness. Patient mentions feeling unusually tired even with adequate rest.'",No significant past medical or surgical history.',Up to date with routine immunizations.',No known allergies.',-',"Non-smoker, occasional alcohol use. Denies illicit drug use.'","ECG, CBC, Chest X-ray requested.'",Pending',"Pulmonary embolism, myocardial infarction, pneumonia.'",Patient to be admitted for further evaluation and monitoring. Oxygen therapy initiated. Pain management provided. Anticoagulation therapy considered based on investigation results.',Monitor closely for any changes in symptoms or signs of complications.' +0104P000004',03/08/2023',P000004',Ethan Thompson',70',Male',Black',USA',"Sudden weakness or numbness, difficulty speaking, and severe headache.'","The patient presents with sudden onset of weakness and numbness, difficulty speaking, and complains of a severe headache. No history of trauma or recent illness reported.'","Hypertension, Diabetes Mellitus type 2, Coronary Artery Disease (CAD). Previous history of transient ischemic attack (TIA). Laparoscopic cholecystectomy in 2015.'",Up to date.,Penicillin - rash,"Amlodipine 5mg, Oral, Once daily, Indefinite; Metformin 1000mg, Oral, Twice daily, Indefinite; Aspirin 81mg, Oral, Once daily, Indefinite; Atorvastatin 40mg, Oral, Once daily, Indefinite.'","Former smoker, quit 10 years ago. Occasional alcohol use. No illicit drug use. Lives independently. Retired truck driver.'",Neurological examination is pending. ECG showed sinus rhythm. CT Head scheduled for today.',Lipid panel within normal limits. HbA1c at 7.2%. Normal renal function. Elevated LDL levels.',"Acute stroke, transient ischemic attack, intracranial hemorrhage, space-occupying lesion in brain.'",Admit for further evaluation. Ensure adequate hydration. Neurology consult. Monitor vitals closely. Initiate antiplatelet therapy. Stat CT Head. Consider thrombolytic therapy if indicated. Blood pressure control. Diabetic management.',Family contact information updated. Advance directives on file. Fall risk assessment completed. Patient requires assistance for activities of daily living.' +0105P000005',07/10/2023',P000005',Sophia Brown',55',Female',White',USA',"Excessive thirst, frequent urination, and unexplained weight loss.'","Patient reports increased thirst, frequent urination, and unintentional weight loss over the past few weeks. No significant relief with increased fluid intake.'","Hypertension, Type 2 Diabetes mellitus'",Up to date',None',"Metformin 500mg Tablet Oral once daily - for diabetes, Lisinopril 10mg Tablet Oral once daily - for hypertension'","Non-smoker, occasional alcohol use'","Blood glucose levels, HbA1c, Renal function tests'","Blood glucose 280mg/dL, HbA1c 9%, Creatinine within normal limits'","Diabetes mellitus, Diabetic ketoacidosis, Hyperthyroidism'","Start insulin therapy, monitor blood glucose levels, refer to endocrinologist for further evaluation'",Patient scheduled for diabetic education session next week.' +0806P000006',08/06/2023',P000006',Emily Anderson',45',Female',White',USA',"Fatigue, weight loss, and chest pain.'",Patient complains of persistent fatigue for the past 2 months associated with unintentional weight loss of 8 pounds. She also reports sharp chest pain on exertion and at rest.',Negative for any significant medical or surgical history.',Up to date',No known allergies',-',"Denies smoking, occasional alcohol use'","ECG - Sinus tachycardia, CBC - Mild anemia'",CBC,"Anemia, Cardiac etiology for chest pain, Underlying infection causing fatigue'","Iron supplementation for anemia, Cardiology referral for further evaluation of chest pain, Monitor closely for any signs of infection'",Consider further imaging studies if no improvement with initial management.' +150007,15/07/2023,P000007,Michael Chang,60,Male,Asian,USA,"Shortness of breath, chronic cough, and wheezing.","The patient presents with a two-month history of progressively worsening shortness of breath, associated with a chronic cough productive of yellowish sputum. He also reports wheezing episodes, especially at night, disturbing his sleep. No history of fever or chest pain. No relief with over-the-counter cough medications. +History of smoking 1 pack/day for the past 30 years. -No history of known lung diseases or environmental exposures.","Hypertension, well controlled on amlodipine 5mg daily. No surgical history.",Up to date with routine immunizations.,No known drug allergies.,"Amlodipine 5mg tablet, daily, oral, indefinite duration.",+History of smoking. -Denies alcohol or illicit drug use.,Chest X-ray ordered.,Pending chest X-ray results.,"Acute exacerbation of chronic obstructive pulmonary disease, asthma, pneumonia.",1. Await chest X-ray results. 2. Start short-acting bronchodilator inhaler for symptomatic relief. 3. Smoking cessation counseling. 4. Follow-up in 1 week for review.,Encourage lifestyle modifications such as smoking cessation and regular exercise.' +0109P000008',01/09/2023',P000008',Olivia Rodriguez',70',Female',Hispanic',USA',"Forgetfulness, confusion, and difficulty in performing daily tasks.'","The patient reports a progressive decline in memory and cognitive functions over the past few months, leading to difficulties in daily activities such as remembering appointments, completing tasks, and following conversations. She also experiences occasional disorientation and forgetfulness of recent events.'","Hypertension, type 2 diabetes, and arthritis. No history of cognitive impairment or neurological disorders.'",Up to date with routine vaccinations.',No known drug allergies.',"Donepezil 10mg Oral Tablet Once daily, ongoing.","Lives alone, independent in daily activities, retired teacher. Non-smoker, occasional alcohol use.'","Mini-Mental State Examination (MMSE) score of 20/30, suggestive of mild cognitive impairment. Blood tests for vitamin B12 and thyroid function within normal limits.'","Normal CBC, CMP, and lipid profile. MRI brain scheduled for next week to evaluate for underlying causes of cognitive decline.'","Mild cognitive impairment, early dementia, depression, medication side effects.'","Continued monitoring of cognitive function, initiation of cognitive training exercises, referral to neurology for further evaluation. Consideration of support services and caregiver assistance.'",Family history of Alzheimer's disease in mother. Close monitoring of medication adherence and cognitive symptoms recommended.' +0109P000009',01/09/2023',P000009',Tyrone Johnson',55',Male',Black',USA',"Weakness in one side of the body, slurred speech, and severe headache.'","Sudden onset weakness, slurred speech, and severe headache. No history of trauma or recent illnesses. No fever, seizures, or visual disturbances'","Hypertension, Type 2 Diabetes Mellitus'",Up to date with routine immunizations',None',"Amlodipine 5mg tablet, once daily, for hypertension, Metformin 1000mg tablet, twice daily, for diabetes, Aspirin 81mg tablet, once daily, for cardiovascular protection'","Non-smoker, occasional alcohol use'",CT scan of the brain scheduled',Pending',"Acute Stroke, Transient Ischemic Attack, Intracerebral Hemorrhage'","Immediate transfer to stroke center, initiate tissue plasminogen activator (tPA) if eligible, control blood pressure, monitor neurological status'",Close monitoring for any changes in neurological status post tPA administration.' +1208P000010',12/08/2023',P000010',Harper Smith',35',Female',White',USA',"Increased thirst, frequent urination, and unexplained weight loss.'","The patient reports a recent onset of increased thirst, frequent urination, and unexplained weight loss. Nocturia or excessive hunger is denied. No history of recent infections or travel.'",No significant past medical or surgical history reported.',Up to date with routine immunizations.',No known drug allergies.',-',"Non-smoker, occasional alcohol use. Works as a teacher. Denies any recent changes in diet or exercise routine.'","Physical examination performed. Blood tests ordered including fasting blood glucose, HbA1c, and urine analysis.'",Pending',"Diabetes mellitus, hyperthyroidism, diabetes insipidus, urinary tract infection.'",Educate on polyuria and polydipsia. Advise follow-up after blood test results. Consider referral to endocrinologist depending on findings.',Encourage increased water intake and monitoring of symptoms.' diff --git a/patient_data.csv b/patient_data.csv new file mode 100644 index 0000000..77acbe5 --- /dev/null +++ b/patient_data.csv @@ -0,0 +1,10 @@ +P000001,Emily Johnson,45,Female,White,USA,"Fatigue, unintentional weight loss, and persistent cough",Lung cancer +P000002,Michael Rodriguez,60,Male,Hispanic,USA,"Memory loss, confusion, and difficulty performing daily tasks",Alzheimer's disease +P000003,Olivia Chen,35,Female,Asian,USA,"Shortness of breath, chest pain, and fatigue",Chronic obstructive pulmonary disease (COPD) +P000004,Ethan Thompson,70,Male,Black,USA,"Sudden weakness or numbness, difficulty speaking, and severe headache",Stroke +P000005,Sophia Brown,55,Female,White,USA,"Excessive thirst, frequent urination, and unexplained weight loss",Diabetes +P000006,Emily Anderson,45,Female,White,USA,"Fatigue, weight loss, and chest pain",Cancer +P000007,Michael Chang,60,Male,Asian,USA,"Shortness of breath, chronic cough, and wheezing",Chronic lower respiratory disease +P000008,Olivia Rodriguez,70,Female,Hispanic,USA,"Forgetfulness, confusion, and difficulty in performing daily tasks",Alzheimer's disease +P000009,Tyrone Johnson,55,Male,Black,USA,"Weakness in one side of the body, slurred speech, and severe headache",Stroke +P000010,Harper Smith,35,Female,White,USA,"Increased thirst, frequent urination, and unexplained weight loss",Diabetes diff --git a/required_libaries.py b/required_libaries.py new file mode 100644 index 0000000..a6597b3 --- /dev/null +++ b/required_libaries.py @@ -0,0 +1,12 @@ +from openai import OpenAI +from tqdm import tqdm +import time +import pandas as pd +import os +import csv +from dotenv import load_dotenv +load_dotenv() + + +openaikey = os.getenv("OPENAI_KEY") +client = OpenAI(api_key=openaikey) \ No newline at end of file