-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_generation.py
100 lines (84 loc) · 3.42 KB
/
data_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from required_libaries import time, tqdm,csv
from gpt_processing import process_gpt
from file_management import save_patient,save_records
# Placeholder for any specific imports or functions that are relevant to data generation but not directly defined in the overview
# Function definitions related to generating patient data
def generate_patients(user_request :str = None ,generate_time: int =1,record_count : int = 10,output_file_name:str ="patient_data.csv" ):
"Generated time is the number of time the function should run.Default 1"
if user_request is None:
user_request =""" Heart disease
Cancer
Chronic lower respiratory disease
Stroke
Alzheimer's disease
Diabetes
Influenza and pneumonia
Kidney disease
Suicide
Septicemia
Chronic liver disease and cirrhosis
Hypertension
Parkinson's disease
Intentional self-harm
Chronic obstructive pulmonary disease (COPD)
Liver cancer
Liver disease and cirrhosis
Falls
Leukemia
Breast cancer, Country of origion :USA"""
for i in tqdm(range(generate_time)) :
#gen_recs , patient names and profiles generated by GPT.
gen_recs = process_gpt(str(user_request),record_count,1)
print("Generated time :",i+1)
print(gen_recs)
#Save patient
save_patient(gen_recs,output_file_name)
time.sleep(2)
##Read patient data
def read_patient_names():
with open('patient_data.csv', mode='r') as csv_file:
csv_reader = csv.reader(csv_file)
patients = []
next(csv_reader) # Skip header row
for row in csv_reader:
patient = " ".join(row[:7])
patients.append(patient)
return patients
def generate_records(start_index: int = 0,max_attempts:int= 10,record_count: int = 10,output_file_name:str ="medical_data3.csv"):
"""Generate patient medica record, Input should be patient info and country of origion.
This code run with csv file"""
attempts = 0
last_processed_index = start_index - 1
while attempts < max_attempts:
try:
#Read patient csv file.
patients = read_patient_names()[start_index:]
total_patients = len(patients)
for i, patient in tqdm(enumerate(patients, start=start_index), total=total_patients):
print(patient)
try:
gpt_generated = process_gpt(str(patient),record_count, 2)
#Save patient note .
save_records(gpt_generated,output_file_name)
last_processed_index = i
print(f"patient {i+1} passed")
time.sleep(2)
except IndexError:
print(f"IndexError: Please check the CSV file and create_record() function for patient {i+1}.")
time.sleep(2)
raise # Re-raise the error to retry the current row
except Exception as e:
time.sleep(2)
print(f"Error: {e} occurred for patient {i}. Retrying...")
continue # Retry the current row if there was an error
break # Exit loop if successful
except IndexError:
print("IndexError: Please check the CSV file and create_record() function.")
time.sleep(2)
attempts += 1
if attempts == max_attempts:
print(f"Failed after {attempts} attempts. Exiting program. Restart from {i+1}")
else:
start_index = last_processed_index + 1
time.sleep(2)
print(f"Retrying from patient {start_index}...")