1111
1212import sys
1313from pathlib import Path
14- from typing import Dict
1514
1615# Add src to path
1716sys .path .insert (0 , str (Path (__file__ ).parent / "src" ))
1817
19- from data import parse_vcf_file , VariantAnnotator
18+ from data import VariantAnnotator , parse_vcf_file
2019from models import NutrientPredictor
2120
2221
2322def run_demo (vcf_path : Path ):
2423 """Run complete Dirghayu pipeline demo"""
25-
24+
2625 print ("=" * 80 )
2726 print ("DIRGHAYU: India-First Longevity Genomics Platform" )
2827 print ("=" * 80 )
29-
28+
3029 # Step 1: Parse VCF
3130 print ("\n [1/4] Parsing VCF file..." )
3231 print (f" Input: { vcf_path } " )
33-
32+
3433 variants_df = parse_vcf_file (vcf_path )
3534 print (f" [OK] Found { len (variants_df )} variants" )
36-
35+
3736 if len (variants_df ) == 0 :
3837 print (" [!] No variants found!" )
3938 return
40-
39+
4140 print ("\n Sample variants:" )
42- print (variants_df [[' chrom' , ' pos' , ' rsid' , ' ref' , ' alt' , ' genotype' ]].head ())
43-
41+ print (variants_df [[" chrom" , " pos" , " rsid" , " ref" , " alt" , " genotype" ]].head ())
42+
4443 # Step 2: Annotate variants
4544 print ("\n [2/4] Annotating variants with public databases..." )
4645 print (" Sources: Ensembl VEP, gnomAD" )
4746 print (" [!] This makes API calls - may take 30-60 seconds" )
48-
47+
4948 annotator = VariantAnnotator ()
5049 annotated_df = annotator .annotate_dataframe (variants_df )
51-
50+
5251 print ("\n [OK] Annotation complete!" )
5352 print ("\n Annotated variants:" )
54- print (annotated_df [[' rsid' , ' gene_symbol' , ' consequence' , ' gnomad_af' ]].head ())
55-
53+ print (annotated_df [[" rsid" , " gene_symbol" , " consequence" , " gnomad_af" ]].head ())
54+
5655 # Step 3: Train model (on synthetic data for demo)
5756 print ("\n [3/4] Training nutrient deficiency predictor..." )
5857 print (" [!] Using synthetic data for demonstration" )
59-
58+
6059 predictor = NutrientPredictor ()
6160 predictor .train (
6261 variants_df = annotated_df ,
6362 labels_df = None , # Would be real clinical data
64- epochs = 30
63+ epochs = 30 ,
6564 )
66-
65+
6766 # Save model
6867 model_path = Path ("models/nutrient_predictor.pth" )
6968 predictor .save (model_path )
70-
69+
7170 # Step 4: Generate predictions
7271 print ("\n [4/4] Generating personalized health predictions..." )
73-
72+
7473 predictions = predictor .predict (annotated_df )
75-
74+
7675 print ("\n " + "=" * 80 )
7776 print ("HEALTH PREDICTION REPORT" )
7877 print ("=" * 80 )
79-
78+
8079 # Display nutrient deficiency risks
8180 print ("\n [NUTRIENT DEFICIENCY RISK ASSESSMENT]" )
8281 print ("-" * 80 )
83-
82+
8483 risk_levels = {
8584 (0.0 , 0.3 ): ("LOW" , "[LOW]" ),
8685 (0.3 , 0.6 ): ("MODERATE" , "[MOD]" ),
87- (0.6 , 1.0 ): ("HIGH" , "[HIGH]" )
86+ (0.6 , 1.0 ): ("HIGH" , "[HIGH]" ),
8887 }
89-
88+
9089 for nutrient , risk_score in predictions .items ():
9190 # Determine risk level
9291 level , icon = "UNKNOWN" , "[?]"
93- for (low , high ), (l , i ) in risk_levels .items ():
92+ for (low , high ), (lvl , icn ) in risk_levels .items ():
9493 if low <= risk_score < high :
95- level , icon = l , i
94+ level , icon = lvl , icn
9695 break
97-
98- nutrient_name = nutrient .replace ('_' , ' ' ).title ()
96+
97+ nutrient_name = nutrient .replace ("_" , " " ).title ()
9998 print (f"\n { icon } { nutrient_name } :" )
10099 print (f" Risk Score: { risk_score :.2%} " )
101100 print (f" Risk Level: { level } " )
102-
101+
103102 # Provide recommendations based on risk
104103 if risk_score > 0.6 :
105104 recommendations = get_recommendations (nutrient )
106- print (f " Recommendations:" )
105+ print (" Recommendations:" )
107106 for rec in recommendations :
108107 print (f" - { rec } " )
109-
108+
110109 # Genetic insights from annotated variants
111110 print ("\n " + "=" * 80 )
112111 print ("🧬 GENETIC INSIGHTS" )
113112 print ("=" * 80 )
114-
113+
115114 # Look for key variants
116115 key_variants = {
117- ' rs1801133' : ' MTHFR C677T - Affects folate metabolism' ,
118- ' rs429358' : ' APOE e4 - Increased Alzheimer\ ' s risk' ,
119- ' rs601338' : ' FUT2 - Affects vitamin B12 absorption' ,
120- ' rs2228570' : ' VDR FokI - Affects vitamin D receptor'
116+ " rs1801133" : " MTHFR C677T - Affects folate metabolism" ,
117+ " rs429358" : " APOE e4 - Increased Alzheimer's risk" ,
118+ " rs601338" : " FUT2 - Affects vitamin B12 absorption" ,
119+ " rs2228570" : " VDR FokI - Affects vitamin D receptor" ,
121120 }
122-
123- found_variants = annotated_df [annotated_df [' rsid' ].isin (key_variants .keys ())]
124-
121+
122+ found_variants = annotated_df [annotated_df [" rsid" ].isin (key_variants .keys ())]
123+
125124 if len (found_variants ) > 0 :
126125 print ("\n Key variants detected:" )
127126 for _ , var in found_variants .iterrows ():
128- rsid = var [' rsid' ]
127+ rsid = var [" rsid" ]
129128 if rsid in key_variants :
130129 print (f"\n - { rsid } ({ var ['genotype' ]} )" )
131130 print (f" Gene: { var .get ('gene_symbol' , 'Unknown' )} " )
132131 print (f" Impact: { key_variants [rsid ]} " )
133132 print (f" Population frequency: { var .get ('gnomad_af' , 'Unknown' )} " )
134133 else :
135134 print ("\n No high-impact variants detected in this sample" )
136-
135+
137136 print ("\n " + "=" * 80 )
138137 print ("[OK] Demo complete!" )
139138 print ("=" * 80 )
@@ -148,34 +147,34 @@ def run_demo(vcf_path: Path):
148147
149148def get_recommendations (nutrient : str ) -> list :
150149 """Get dietary/lifestyle recommendations for nutrient deficiency risk"""
151-
150+
152151 recommendations = {
153- ' vitamin_b12' : [
152+ " vitamin_b12" : [
154153 "Consider B12 supplementation (methylcobalamin 1000 mcg/day)" ,
155154 "Increase fortified foods (cereals, plant milk)" ,
156155 "If vegetarian, consult about B12 injections" ,
157- "Monitor serum B12 levels every 6 months"
156+ "Monitor serum B12 levels every 6 months" ,
158157 ],
159- ' vitamin_d' : [
158+ " vitamin_d" : [
160159 "Vitamin D3 supplementation (2000 IU/day)" ,
161160 "15 minutes sun exposure daily (10 AM - 12 PM)" ,
162161 "Include fatty fish, egg yolks, fortified milk" ,
163- "Check 25(OH)D levels quarterly"
162+ "Check 25(OH)D levels quarterly" ,
164163 ],
165- ' iron' : [
164+ " iron" : [
166165 "Iron-rich foods (lentils, spinach, fortified grains)" ,
167166 "Vitamin C with meals to enhance absorption" ,
168167 "Avoid tea/coffee with iron-rich meals" ,
169- "Consider iron supplementation if confirmed deficient"
168+ "Consider iron supplementation if confirmed deficient" ,
170169 ],
171- ' folate' : [
170+ " folate" : [
172171 "Methylfolate supplementation (400-800 mcg/day)" ,
173172 "Leafy greens, legumes, fortified grains" ,
174173 "Ensure adequate B6 and B12 intake" ,
175- "Monitor homocysteine levels"
176- ]
174+ "Monitor homocysteine levels" ,
175+ ],
177176 }
178-
177+
179178 return recommendations .get (nutrient , ["Consult healthcare provider" ])
180179
181180
@@ -186,14 +185,14 @@ def get_recommendations(nutrient: str) -> list:
186185 else :
187186 # Use sample VCF
188187 vcf_path = Path ("data/sample.vcf" )
189-
188+
190189 if not vcf_path .exists ():
191190 print (f"Error: VCF file not found: { vcf_path } " )
192191 print ("\n Usage:" )
193192 print (" python demo.py <path_to_vcf_file>" )
194193 print ("\n Or create sample data first:" )
195194 print (" python scripts/download_data.py" )
196195 sys .exit (1 )
197-
196+
198197 # Run demo
199198 run_demo (vcf_path )
0 commit comments