diff --git a/.gitignore b/.gitignore
index b1cb160..767acfe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -157,5 +157,5 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
 
diff --git a/flask_app/app.py b/flask_app/app.py
new file mode 100644
index 0000000..d64e92b
--- /dev/null
+++ b/flask_app/app.py
@@ -0,0 +1,71 @@
+import os,sys
+
+sys.path.append('../')
+sys.path.append('../library_spectra_validation')
+
+from flask import Flask
+from flask import request, render_template, redirect, url_for
+
+import pandas as pd
+import numpy as np
+from matplotlib.figure import Figure
+
+from library_spectra_validation.library_handler import LibraryHandler
+
+
+app = Flask(__name__)
+app.config.from_pyfile("config.py")
+
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+@app.route('/upload', methods=['GET','POST'])
+def upload():
+    if request.method == 'POST':
+        file = request.files['file']
+        file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
+        fpath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
+        if file:
+            library_handler = LibraryHandler(fpath)
+            df_spectra = pd.DataFrame({"spectrum": library_handler.spectra}) #TODO 
+            return render_template("preview.html", data=df_spectra.to_html())
+    return render_template('upload_data.html')
+    
+@app.route('/preview', methods=['POST'])
+def preview():
+    sheet_name = request.form['sheet']
+    df = pd.read_excel(request.files['file'], sheet_name=sheet_name)
+    return render_template('preview.html', df=df.to_html(), sheet_name=sheet_name)
+
+# @app.route('/plot_spectrum', methods=['POST'])
+# def plot_spectrum():
+#     # TODO plotly??
+#     cmp_selector = request.form['compound_name']
+#     cmp_id = cmp_list.index(cmp_selector)
+#     cmp_smile = df_spectra.loc[cmp_id]["smiles"]
+
+#     plt_spectrum = spectra[cmp_id]
+
+#     fig, axs = plt.subplots(1, 2, figsize=(12.8, 4.2), gridspec_kw={'width_ratios': [2, 5]}, sharey=False)
+#     cmp_img = Chem.Draw.MolToImage(Chem.MolFromSmiles(cmp_smile), ax=axs[0])
+
+#     axs[0].grid(False)
+#     axs[0].tick_params(axis='both', bottom=False, labelbottom=False, left=False, labelleft=False)
+#     axs[0].set_title(cmp_smile)
+#     axs[0].imshow(cmp_img)
+#     axs[0].axis("off")
+
+#     plot_spectrum(plt_spectrum, axs[1])
+
+#     # Save the plot to a temporary file or convert it to a base64 string to embed in HTML
+#     # Example: plt.savefig('static/plot.png')
+#     # Pass the path or base64 string to the template
+#     return render_template('plot_spectrum.html', plot_path='static/plot.png')
+
+@app.route('/about')
+def about():
+    return render_template('about.html')
+
+if __name__ == '__main__':
+    app.run(debug=True)
\ No newline at end of file
diff --git a/flask_app/config.py b/flask_app/config.py
new file mode 100644
index 0000000..96f9a31
--- /dev/null
+++ b/flask_app/config.py
@@ -0,0 +1 @@
+UPLOAD_FOLDER = 'resources'
\ No newline at end of file
diff --git a/flask_app/resources/Broken_records.txt b/flask_app/resources/Broken_records.txt
new file mode 100644
index 0000000..9df55e1
--- /dev/null
+++ b/flask_app/resources/Broken_records.txt
@@ -0,0 +1,19 @@
+I clean all the 12 mass spectra in the "test_case_correct.mgf", in terms of 
+--> adding inchikey
+--> cleaning the inchi
+--> adding formula
+
+So all mass spectra in the "test_case_wrong.mgf" are missing formula and inchikey
+
+mass spectrum 1: no change
+mass spectrum 2: wrong adduct
+mass spectrum 3: wrong pepmass (precursor)
+mass spectrum 4: wrong smiles
+mass spectrum 5: missing adduct
+mass spectrum 6: no change (share the same compound name as mass spectrum 5, but different adduct)
+mass spectrum 7: missing adduct
+mass spectrum 8: missing adduct
+mass spectrum 9: no change
+mass spectrum 10: missing compound name and adduct
+mass spectrum 11: no change
+mass spectrum 12: wroing inchi
\ No newline at end of file
diff --git a/flask_app/resources/test_case_correct.mgf b/flask_app/resources/test_case_correct.mgf
new file mode 100644
index 0000000..0f48a7f
--- /dev/null
+++ b/flask_app/resources/test_case_correct.mgf
@@ -0,0 +1,725 @@
+BEGIN IONS
+ID=1
+PEPMASS=181.051
+CHARGE=-1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/negative/GNPS00001_E3_n.mzXML
+SEQ=*..*
+IONMODE=Negative
+ORGANISM=GNPS-MSMLS
+NAME=HYDROXYPHENYLLACTATE 
+ADDUCT=M-H
+FORMULA=C9H10O4
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=OC(CC1=CC=C(O)C=C1)C(O)=O
+INCHI="1S/C9H10O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)"
+INCHIKEY=JVGVDSSUAVXRDY-UHFFFAOYSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463540
+SCANS=18
+57.993649	2301.143799
+58.669075	2842.239258
+68.020729	2971.482422
+70.564552	2513.018311
+72.878845	2689.933105
+72.975563	3853.803467
+72.991684	718360.75
+73.995079	12563.180664
+75.007271	3571.86499
+77.404732	2682.560059
+81.033073	4734.676758
+81.774086	2626.58374
+92.918541	5914.78418
+92.992386	76993.929688
+93.033134	41289.710938
+99.857903	2497.19165
+101.239021	2791.706299
+105.356773	3432.287598
+105.568924	3190.02417
+106.041046	4729.494629
+107.048927	59435.523438
+109.02813	5910.954102
+112.984322	45298.136719
+117.033318	5821.436035
+118.203209	2873.571289
+119.015236	4521.074707
+119.048965	1540851.625
+119.084404	5451.431152
+120.052269	105341.335938
+121.028297	18331.339844
+121.108994	2778.724121
+133.028061	3424.031738
+134.031036	5080.327637
+134.036118	83965.585938
+134.04097	5610.906738
+134.998993	3203.721924
+135.00325	4971.822266
+135.043961	2215514.25
+135.087067	6798.085938
+135.733582	2684.214844
+136.047302	150738.0625
+136.982407	174019.640625
+137.985413	4226.324707
+142.010696	3147.537842
+147.589081	4205.175781
+148.523163	5009.87207
+152.916489	5558.07959
+162.985809	8731.754883
+163.039062	3223728.5
+163.096542	9567.081055
+164.042374	236031.921875
+172.091812	3181.887695
+179.034134	8333.552734
+180.911652	4656.253906
+180.972595	24925.556641
+180.987289	4149.787598
+181.049759	1808479.75
+181.116394	6630.770508
+182.053116	139884.703125
+200.736801	3475.613037
+END IONS
+
+
+
+BEGIN IONS
+ID=2
+PEPMASS=123.08
+CHARGE=1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/positive/GNPS00001_C10_p.mzXML
+SEQ=*..*
+IONMODE=Positive
+ORGANISM=GNPS-MSMLS
+NAME=1-PHENYLETHANOL 
+ADDUCT=M+H
+FORMULA=C8H10O
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=CC(O)C1=CC=CC=C1
+INCHI="1S/C8H10O/c1-7(9)8-5-3-2-4-6-8/h2-7,9H,1H3"
+INCHKEY=WAPNOHKVXSQRPX-UHFFFAOYSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463542
+SCANS=9
+50.09074	1563.576538
+51.847359	1482.95874
+52.35796	1460.236206
+55.054832	4853.48291
+56.050049	1694.615234
+57.053383	2707.153564
+57.070385	3209.65625
+64.927635	2554.697266
+67.054626	24782.671875
+70.341408	1578.040405
+78.997322	2244.730469
+79.054504	6207.743164
+80.99279	2491.513672
+81.070122	27285.521484
+85.559135	2069.354004
+91.039093	37482.609375
+93.070007	3839.950195
+95.049271	6394.27832
+95.08564	33426.289062
+97.00779	14587.337891
+99.003365	33937.394531
+105.070114	4827.084473
+105.073883	4377.217285
+106.028793	2912.144043
+106.943672	3527.098633
+108.056923	3015.60376
+108.866722	1828.277222
+110.307365	1760.50647
+111.023346	5263.312012
+113.018959	24370.855469
+113.90287	1687.043091
+122.09655	4319.949707
+123.043922	4615.996094
+123.055367	13869.549805
+123.080566	637932.5
+123.099785	25931.980469
+123.116882	17160.033203
+123.964729	4258.138184
+124.084068	3809.735352
+125.038429	2065.710205
+END IONS
+
+
+BEGIN IONS
+ID=3
+PEPMASS=341.109
+CHARGE=-1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/negative/GNPS00001_G7_n.mzXML
+SEQ=*..*
+IONMODE=Negative
+ORGANISM=GNPS-MSMLS
+NAME=MELIBIOSE 
+ADDUCT=M-H
+FORMULA=C12H22O11
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=OC[C@H]1O[C@H](OC[C@H]2OC(O)[C@H](O)[C@@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O
+INCHI="1S/C12H22O11/c13-1-3-5(14)8(17)10(19)12(23-3)21-2-4-6(15)7(16)9(18)11(20)22-4/h3-20H,1-2H2/t3-,4-,5+,6-,7+,8+,9-,10-,11?,12+/m1/s1"
+INCHIKEY=DLRVVLDZNNYCBX-ZZFZYMBESA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463543
+SCANS=20
+50.540623	1971.973022
+53.1106	2001.842529
+59.012436	48294.171875
+66.468216	2272.426514
+67.171623	2172.375
+71.012428	33443.402344
+71.837288	2314.537598
+75.025131	2055.233398
+85.028244	3493.986084
+87.00737	2498.182617
+89.02301	46724.976562
+101.023033	29916.160156
+113.02298	15779.530273
+119.033714	7765.674316
+127.43145	2624.237549
+136.582092	2697.059326
+138.292267	2471.99292
+143.03392	2970.882812
+161.044678	5681.307617
+179.055206	12105.952148
+212.821136	2522.489258
+221.066132	20078.255859
+223.95575	2675.333252
+227.882217	2573.143311
+230.085648	2318.889404
+232.42157	2854.153076
+249.285477	2839.33252
+256.436523	2698.845215
+300.400299	2575.356934
+332.259033	2550.45874
+END IONS
+
+
+BEGIN IONS
+ID=4
+PEPMASS=179.056
+CHARGE=-1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/negative/GNPS00001_H2_n.mzXML
+SEQ=*..*
+IONMODE=Negative
+ORGANISM=GNPS-MSMLS
+NAME=PSICOSE 
+ADDUCT=M-H
+FORMULA=C6H12O6
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=C1C(C(C(C(O1)(CO)O)O)O)O
+INCHI="1S/C6H12O6/c7-2-6(11)5(10)4(9)3(8)1-12-6/h3-5,7-11H,1-2H2"
+INCHIKEY=LKDRXBCSQODPBY-JDJSBBGDSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463544
+SCANS=23
+50.752808	1545.243286
+59.012474	19256.941406
+60.720825	2136.356934
+61.612026	1596.15271
+71.012405	15034.151367
+76.279961	1849.901611
+79.870834	1799.16272
+89.023018	10720.604492
+90.996696	14408.519531
+91.915749	1874.651733
+92.994606	2727.983643
+93.787796	1993.916138
+102.502869	2168.821777
+110.237366	1885.488647
+113.023308	2582.52124
+122.958046	7717.575684
+134.966919	4371.566406
+134.986786	32977.765625
+139.372147	2003.396973
+140.994339	2101.980225
+143.275223	2098.721191
+149.183472	2278.868652
+150.952957	36120.242188
+151.434525	1920.847046
+158.950302	7290.147461
+178.956345	3667.172363
+178.977158	5032.843262
+197.86763	2062.096924
+198.909378	1795.668457
+199.788757	2060.966553
+END IONS
+
+
+BEGIN IONS
+ID=5
+PEPMASS=172.098
+CHARGE=-1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/negative/GNPS00001_C1_n.mzXML
+SEQ=*..*
+IONMODE=Negative
+ORGANISM=GNPS-MSMLS
+NAME=N-ACETYLLEUCINE 
+ADDUCT=M-H
+FORMULA=C8H15NO3
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=CC(C)C[C@H](NC(C)=O)C(O)=O
+INCHI="1S/C8H15NO3/c1-5(2)4-7(8(11)12)9-6(3)10/h5,7H,4H2,1-3H3,(H,9,10)(H,11,12)/t7-/m0/s1"
+INCHIKEY=WXNXCEHXYPACJF-ZETCQYMHSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463545
+SCANS=17
+50.494568	2305.545166
+51.01384	2171.879883
+51.815498	2572.902344
+51.995987	2339.180176
+52.061226	2207.783203
+52.57711	2512.306152
+55.847023	2212.564697
+58.028439	87037.296875
+60.502853	2177.543945
+60.521988	2473.28418
+63.7416	2167.294678
+66.966057	2456.80835
+69.75647	2647.145264
+73.483582	2592.871826
+82.064796	34505.539062
+83.57148	2632.96167
+84.080338	6847.935059
+85.034073	3291.934326
+85.425201	2819.0354
+93.145638	2523.094238
+98.150711	3326.976807
+100.950996	10970.250977
+104.952477	4165.535156
+110.096161	5159.470215
+111.080231	16679.011719
+116.945961	17610.503906
+128.106979	256260.953125
+128.876358	4115.287598
+128.946121	11519.360352
+129.110062	22094.078125
+130.046829	33868.449219
+130.086136	9670261.0
+130.128036	19900.978516
+131.083527	30412.302734
+131.089355	600183.375
+144.941299	4816.934082
+154.086502	5185.08252
+154.94632	40799.105469
+172.09697	1241461.125
+172.153442	3612.325439
+172.957062	7246.802246
+172.990631	5766.550781
+173.081223	5280.883789
+173.100342	119199.460938
+END IONS
+
+
+BEGIN IONS
+ID=6
+PEPMASS=367.185
+CHARGE=-1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/negative/GNPS00001_C1_n.mzXML
+SEQ=*..*
+IONMODE=Negative
+ORGANISM=GNPS-MSMLS
+NAME=N-ACETYLLEUCINE 
+ADDUCT=2M-2H+Na
+FORMULA=C8H15NO3
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=CC(C)C[C@H](NC(C)=O)C(O)=O
+INCHI="1S/C8H15NO3/c1-5(2)4-7(8(11)12)9-6(3)10/h5,7H,4H2,1-3H3,(H,9,10)(H,11,12)/t7-/m0/s1"
+INCHIKEY=WXNXCEHXYPACJF-ZETCQYMHSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463546
+SCANS=29
+50.182331	1945.575806
+54.236893	2076.633789
+54.998264	2017.147461
+56.697639	2070.655029
+56.92223	2073.952148
+58.39571	2177.846924
+60.040657	2549.327148
+61.123554	2238.305908
+78.493736	2203.665283
+83.095627	2948.52832
+90.494804	2418.666016
+98.84182	2410.284912
+100.659897	2554.206543
+109.763535	2380.613525
+114.740555	2268.283447
+116.718567	2305.662109
+119.477913	2791.840332
+129.462494	2899.041748
+130.081238	3260.672363
+130.086151	61868.832031
+172.096985	143111.046875
+173.100342	4825.463379
+182.386368	2966.69873
+194.3871	2497.397705
+210.838821	3673.36377
+221.303009	2314.973633
+223.828033	2770.725098
+265.850708	2885.436035
+301.585999	3519.039551
+354.620514	2900.132812
+367.184906	7250.871582
+367.265472	2715.052979
+END IONS
+
+
+BEGIN IONS
+ID=7
+PEPMASS=150.077
+CHARGE=1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/positive/GNPS00001_E2_p.mzXML
+SEQ=*..*
+IONMODE=Positive
+ORGANISM=GNPS-MSMLS
+NAME=3-METHYLADENINE 
+ADDUCT=M+H
+FORMULA=C6H7N5
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=CN1C=NC(N)=C2N=CN=C12
+INCHI="1S/C6H7N5/c1-11-3-10-5(7)4-6(11)9-2-8-4/h2-3H,7H2,1H3"
+INCHIKEY=ZPBYVFQJHWLTFB-UHFFFAOYSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463547
+SCANS=20
+50.786098	9135.300781
+54.021648	17471.267578
+55.029583	43251.820312
+56.674351	9691.061523
+57.045086	9206.999023
+59.332993	9622.615234
+66.418159	10749.587891
+66.546501	10004.551758
+67.029388	12454.616211
+67.425049	9276.546875
+69.045151	31944.523438
+72.456032	9194.083008
+76.090012	11107.212891
+78.258568	10212.219727
+82.040123	110676.773438
+84.959908	18314.753906
+92.024513	20427.816406
+96.055695	125424.664062
+100.490868	11362.44043
+105.962997	40719.503906
+106.040092	30840.582031
+108.042923	35402.824219
+108.055611	101371.835938
+109.050941	336142.5625
+111.252899	10821.838867
+123.066544	511276.15625
+133.050964	856015.75
+134.046265	37031.992188
+135.054138	72465.648438
+150.077545	54001820.0
+150.125961	92599.414062
+151.06189	21264.314453
+151.074524	58892.902344
+151.08078	216933.5625
+152.056854	30901.611328
+END IONS
+
+
+BEGIN IONS
+ID=8
+PEPMASS=221.057
+CHARGE=-1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/negative/GNPS00001_B7_n.mzXML
+SEQ=*..*
+IONMODE=Negative
+ORGANISM=GNPS-MSMLS
+NAME="2,6-DIHYDROXYPYRIDINE" 
+ADDUCT=2M-H
+FORMULA=C5H5NO2
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=C1=CC(=O)NC(=C1)O
+INCHI="1S/C5H5NO2/c7-4-2-1-3-5(8)6-4/h1-3H,(H2,6,7,8)"
+INCHIKEY=WLFXSECCHULRRO-UHFFFAOYSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463548
+SCANS=27
+53.619167	1898.965576
+54.11095	2265.26123
+61.186977	1738.268433
+66.779572	1851.167114
+70.708031	1796.109985
+72.527184	1818.754028
+79.024498	1845.408569
+80.350136	2010.399048
+83.263359	1960.025024
+103.919197	2362.208008
+105.173363	2122.984619
+109.281166	2030.764893
+136.039307	33703.570312
+137.319504	2297.305908
+148.751129	2441.170898
+161.034668	2347.14917
+177.71376	2034.473755
+179.045242	38240.597656
+179.052521	2948.430664
+180.048599	2745.167236
+193.928101	2060.133057
+204.702805	2358.849609
+205.28389	2073.789062
+221.056381	51394.78125
+222.060211	3162.884521
+END IONS
+
+
+BEGIN IONS
+ID=9
+PEPMASS=365.105
+CHARGE=1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084072/peak/positive/GNPS00001_G2_p.mzXML
+SEQ=*..*
+IONMODE=Positive
+ORGANISM=GNPS-MSMLS
+NAME=SUCROSE 
+ADDUCT=M+Na
+FORMULA=C12H22O11
+PI=Dorrestein
+DATACOLLECTOR=Fernando Vargas
+SMILES=OC[C@H]1O[C@@](CO)(O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@@H]1O
+INCHI="1S/C12H22O11/c13-1-4-6(16)8(18)9(19)11(21-4)23-12(3-15)10(20)7(17)5(2-14)22-12/h4-11,13-20H,1-3H2/t4-,5-,6-,7-,8+,9-,10+,11-,12+/m1/s1"
+INCHIKEY=CZMRCDWAGMRECN-UGDNZRGBSA-N
+INCHIAUX=N/A
+PUBMED=na
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005463549
+SCANS=20
+51.110802	2751.992676
+52.042931	2427.323242
+54.204987	2326.957031
+55.620426	2482.91333
+56.299335	2671.567383
+58.450058	2513.786133
+63.139435	2968.675537
+66.149086	2727.473633
+66.624603	7029.318848
+66.626404	4924.129395
+89.847275	2804.193115
+97.383942	3441.288574
+103.737549	3060.2854
+107.869011	3100.476074
+114.239304	3123.896484
+116.606705	2991.578857
+139.445679	3365.54126
+143.453751	3923.98877
+152.061234	2890.845703
+177.644165	2889.41748
+184.255539	3648.265625
+185.042358	758432.1875
+192.030014	4091.378174
+200.209564	3764.209961
+202.973297	3485.999023
+203.052887	1701323.0
+204.05629	5566.01416
+213.650085	3444.569824
+218.029663	2887.408691
+221.062866	7352.574219
+290.206573	3159.827148
+351.157837	3244.633545
+365.106232	2048528.625
+366.109528	6856.312012
+END IONS
+
+
+BEGIN IONS
+ID=10
+PEPMASS=191.118
+CHARGE=1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-LC-ESI-QTOF
+FILENAME=Massbank_ESI_positive_8_1_2014_peaks.mgf
+SEQ=*..*
+IONMODE=Positive
+ORGANISM=MASSBANK
+NAME=Cytisine 
+ADDUCT=M+H
+FORMULA=C11H14N2O
+PI="Putative Massbank Match"
+DATACOLLECTOR=Massbank
+SMILES=C1[C@H]2CNC[C@@H]1C3=CC=CC(=O)N3C2
+INCHI=1S/C11H14N2O/c14-11-3-1-2-10-9-4-8(5-12-6-9)7-13(10)11/h1-3,8-9,12H,4-7H2/t8-,9+/m0/s1
+INCHIKEY=ANJTVLIZGCUXLD-DTWKUNHWSA-N
+INCHIAUX=N/A
+PUBMED=N/A
+SUBMITUSER=mwang87
+LIBRARYQUALITY=3
+SPECTRUMID=CCMSLIB00000204751
+SCANS=11
+120.0805	215.0
+130.0652	159.0
+133.0512	196.0
+134.0586	206.0
+146.0606	182.0
+148.0755	12133.0
+148.1124	232.0
+148.1288	161.0
+148.1582	128.0
+148.1883	267.0
+160.0762	146.0
+162.0892	375.0
+174.0923	136.0
+191.1175	7529.0
+191.1614	182.0
+191.2455	133.0
+END IONS
+
+
+BEGIN IONS
+ID=11
+PEPMASS=260.092
+CHARGE=1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-LC-ESI-QTOF
+FILENAME=Massbank_ESI_positive_8_1_2014_peaks.mgf
+SEQ=*..*
+IONMODE=Positive
+ORGANISM=MASSBANK
+NAME=Skimmianine 
+ADDUCT=M+H
+FORMULA=C14H13NO4
+PI="Putative Massbank Match"
+DATACOLLECTOR=Massbank
+SMILES=COC1=C(C2=C(C=C1)C(=C3C=COC3=N2)OC)OC
+INCHI=1S/C14H13NO4/c1-16-10-5-4-8-11(13(10)18-3)15-14-9(6-7-19-14)12(8)17-2/h4-7H,1-3H3
+INCHIKEY=SLSIBLKBHNKZTB-UHFFFAOYSA-N
+INCHIAUX=N/A
+PUBMED=N/A
+SUBMITUSER=mwang87
+LIBRARYQUALITY=3
+SPECTRUMID=CCMSLIB00000204756
+SCANS=16
+156.0445	78.0
+184.0367	144.0
+199.063	596.0
+199.1029	35.0
+212.037	49.0
+216.0646	458.0
+227.0571	2793.0
+227.1144	38.0
+227.1307	39.0
+227.1957	37.0
+230.0439	260.0
+244.0586	157.0
+245.0678	695.0
+245.1017	38.0
+260.0922	648.0
+END IONS
+
+
+BEGIN IONS
+ID=12
+PEPMASS=181.072
+CHARGE=1
+MSLEVEL=2
+SOURCE_INSTRUMENT=ESI-Orbitrap
+FILENAME=MSV000084479/peak/mzXML/GNPS00005_G8_p.mzXML
+SEQ=*..*
+IONMODE=Positive
+ORGANISM=GNPS-MSMLS
+NAME=THEOBROMINE 
+ADDUCT=M+H
+FORMULA=C7H8N4O2
+PI=Dorrestein
+DATACOLLECTOR=Kelly Weldon
+SMILES=CN1C=NC2=C1C(=O)NC(=O)N2C
+INCHI="1S/C7H8N4O2/c1-10-3-8-5-4(10)6(12)9-7(13)11(5)2/h3H,1-2H3,(H,9,12,13)"
+INCHIKEY=YAPQBXQYLJRXSA-UHFFFAOYSA-N
+INCHIAUX=N/A
+PUBMED=N/A
+SUBMITUSER=mpanitchpakdi
+LIBRARYQUALITY=1
+SPECTRUMID=CCMSLIB00005464198
+SCANS=26
+53.984585	2200.075928
+56.698093	1968.606812
+62.04755	2400.987793
+67.029778	19701.019531
+69.045395	23568.802734
+70.177971	2305.233643
+83.060989	12263.048828
+84.960213	10869.87793
+85.738388	2162.437988
+91.209183	2625.989014
+94.040466	2627.905273
+96.056137	23821.800781
+97.717781	2385.289551
+98.896057	2513.965576
+107.010551	2494.426758
+108.056038	60805.148438
+110.071846	102124.40625
+112.087212	5632.263672
+113.035294	4863.25293
+120.055634	2745.1521
+122.059227	20169.671875
+123.041679	102874.648438
+123.080559	6526.376953
+124.050507	6017.03125
+135.066483	85918.375
+137.082413	197951.296875
+138.066544	591843.4375
+139.069885	4696.746094
+139.075333	5365.431641
+139.111893	2735.634277
+149.023682	5598.092773
+156.077072	6471.537109
+158.954269	2866.068848
+159.013214	8620.223633
+163.062057	58183.132812
+181.072327	5051340.5
+182.069122	4534.439941
+182.076019	28589.470703
+184.980209	4541.842285
+190.653259	2998.242432
+203.411301	2598.710449
+END IONS
\ No newline at end of file
diff --git a/flask_app/services/plot_spectra.py b/flask_app/services/plot_spectra.py
new file mode 100644
index 0000000..617c52f
--- /dev/null
+++ b/flask_app/services/plot_spectra.py
@@ -0,0 +1,26 @@
+'''
+TODO Migrate here functionality to plot spectra
+'''
+
+from matchms.plotting.spectrum_plots import plot_spectra_mirror, plot_spectrum
+import streamlit as st
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import pubchempy
+from rdkit import Chem
+from rdkit.Chem import Draw
+
+def plot_spectra(spectrum):
+    fig, axs = plt.subplots(1, 2, figsize=(12.8, 4.2), gridspec_kw={'width_ratios': [2, 5]}, sharey=False)
+    cmp_img = Chem.Draw.MolToImage(Chem.MolFromSmiles(cmp_smile), ax=axs[0])
+
+    axs[0].grid(False)
+    axs[0].tick_params(axis='both', bottom=False, labelbottom=False, left=False, labelleft=False)
+    axs[0].set_title(cmp_smile)
+    axs[0].imshow(cmp_img)
+    axs[0].axis("off")
+
+    plot_spectrum(spectrum, axs[1])
+    return fig
+    #TODO pass the plot to flask
\ No newline at end of file
diff --git a/flask_app/static/loading.gif b/flask_app/static/loading.gif
new file mode 100644
index 0000000..8ff5c37
Binary files /dev/null and b/flask_app/static/loading.gif differ
diff --git a/flask_app/static/styles.css b/flask_app/static/styles.css
new file mode 100644
index 0000000..a0adead
--- /dev/null
+++ b/flask_app/static/styles.css
@@ -0,0 +1,56 @@
+body {
+    margin: 0;
+    padding: 0;
+    font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+    color: #444;
+  }
+  /*
+   * Formatting the header area
+   */
+  header {
+    background-color: #DFB887;
+    height: 35px;
+    width: 100%;
+    opacity: .9;
+    margin-left: 10px;
+    margin-bottom: 10px;
+  }
+  ul {
+    list-style-type: none;
+    margin: 10;
+    padding: 0;
+    overflow: hidden;
+    background-color: #333;
+  }
+  
+  li {
+    float: left;
+  }
+  
+  li a {
+    display: block;
+    color: white;
+    text-align: center;
+    padding: 10px 10px;
+    text-decoration: none;
+  }
+  
+  /* Change the link color to #111 (black) on hover */
+  li a:hover {
+    background-color: #111;
+  }
+
+  div.content {
+    padding-left: 10px;
+  }
+
+  .button-style {
+    border: none;
+    color: black;
+    padding: 15px 32px;
+    text-align: center;
+    text-decoration: none;
+    display: inline-block;
+    font-size: 16px;
+    cursor: pointer;
+}
\ No newline at end of file
diff --git a/flask_app/templates/about.html b/flask_app/templates/about.html
new file mode 100644
index 0000000..c2837ae
--- /dev/null
+++ b/flask_app/templates/about.html
@@ -0,0 +1,7 @@
+{% extends 'base.html' %}
+
+{% block title %}About{% endblock %}
+
+{% block content %}
+<p>This is the creation and curation wizard for FAIR MS Libraries. You can find source code <a href="https://github.com/mzmine/biohack23_p15">here</a></p>
+{% endblock %}
\ No newline at end of file
diff --git a/flask_app/templates/base.html b/flask_app/templates/base.html
new file mode 100644
index 0000000..9e9e11c
--- /dev/null
+++ b/flask_app/templates/base.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{% block title %}FAIR spectral db{% endblock %}</title>
+    <link rel= "stylesheet" type= "text/css" href= "{{ url_for('static',filename='styles.css') }}">
+    <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js"></script>
+</head>
+<body>
+    <nav>
+        <ul>
+            <li><a href="{{ url_for('index') }}">Home</a></li>
+            <li><a href="{{ url_for('upload') }}">Upload data</a></li>
+            <li><a href="https://github.com/mzmine/biohack23_p15/issues/new/choose">Get help or report bug</a></li>
+            <li><a href="{{ url_for('about') }}">About</a></li>
+        </ul>
+    </nav>
+    <div class="content">
+        {% block content %}
+        {% endblock %}
+    </div>
+</body>
+</html>
diff --git a/flask_app/templates/index.html b/flask_app/templates/index.html
new file mode 100644
index 0000000..dfd7105
--- /dev/null
+++ b/flask_app/templates/index.html
@@ -0,0 +1,8 @@
+{% extends 'base.html' %}
+
+{% block title %}Home{% endblock %}
+
+{% block content %}
+    <h1>Welcome to the FAIRification of mass spectral libraries</h1>
+    <p>This webservice facilitates easy and intuitive curation of your mass spectral data.</p>
+{% endblock %}
\ No newline at end of file
diff --git a/flask_app/templates/plot_spectrum.html b/flask_app/templates/plot_spectrum.html
new file mode 100644
index 0000000..50a18c5
--- /dev/null
+++ b/flask_app/templates/plot_spectrum.html
@@ -0,0 +1,11 @@
+<form action="/plot_spectrum" method="post">
+    <label for="compound_name">Select a compound name:</label>
+    <select id="compound_name" name="compound_name">
+        {% for cmp in cmp_list %}
+            <option value="{{ cmp }}">{{ cmp }}</option>
+        {% endfor %}
+    </select>
+    <input type="submit" value="Plot Spectrum">
+</form>
+
+<!-- <img src="{{ plot_path }}" alt="Spectrum Plot"> -->
\ No newline at end of file
diff --git a/flask_app/templates/preview.html b/flask_app/templates/preview.html
new file mode 100644
index 0000000..2b1480d
--- /dev/null
+++ b/flask_app/templates/preview.html
@@ -0,0 +1,6 @@
+{% extends "base.html" %}
+{% block content %}
+
+<h1>{{name}}</h1>
+{{data | safe}}
+{% endblock %}
\ No newline at end of file
diff --git a/flask_app/templates/upload_data.html b/flask_app/templates/upload_data.html
new file mode 100644
index 0000000..321204a
--- /dev/null
+++ b/flask_app/templates/upload_data.html
@@ -0,0 +1,20 @@
+{% extends 'base.html' %}
+
+{% block content %}
+    <div id="loading" style="display:none;margin:10px"><img src="../static/loading.gif" alt="" /></div>
+    <div id="content">
+        <form action = "" method = "POST" enctype = "multipart/form-data" style="margin: 10px;">
+            <input type = "file" name = "file" class="button-style" />
+            <input type = "submit" value="Submit" onclick="$('#loading').show();" class="button-style">
+        </form>
+    </div>
+
+    <script type="text/javascript">// <![CDATA[
+        function preloader(){
+            document.getElementById("loading").style.display = "none";
+            document.getElementById("content").style.display = "block";
+        }//preloader
+        window.onload = preloader;
+// ]]></script>
+
+{% endblock %}
\ No newline at end of file
diff --git a/library_spectra_validation/.gitignore b/library_spectra_validation/.gitignore
deleted file mode 100644
index cd25e0f..0000000
--- a/library_spectra_validation/.gitignore
+++ /dev/null
@@ -1,35 +0,0 @@
-*.py[cod]
-*.egg-info
-*.eggs
-.ipynb_checkpoints
-
-build
-dist
-.cache
-__pycache__
-
-htmlcov
-.coverage
-coverage.xml
-.pytest_cache
-pylint-report.txt
-xunit-result.xml
-.scannerwork/
-
-docs/_build
-docs/apidocs
-
-# ide
-.idea
-.eclipse
-.vscode
-
-# Mac
-.DS_Store
-config.py
-__pycache__/
-debug.log
-
-
-# conda build directory
-/_build
\ No newline at end of file
diff --git a/streamlit_app/requirements.txt b/requirements.txt
similarity index 100%
rename from streamlit_app/requirements.txt
rename to requirements.txt
diff --git a/streamlit_app/FAIR_MS_Library_Editor.py b/streamlit_app/FAIR_MS_Library_Editor.py
deleted file mode 100644
index 37e93eb..0000000
--- a/streamlit_app/FAIR_MS_Library_Editor.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import json
-import os
-import tempfile
-import streamlit as st
-
-st.set_page_config(
-    layout="wide", 
-    page_title="FAIR MS Library Curation Editor", 
-    #page_icon="assets/favicon.ico",
-    menu_items={
-        'Get Help': 'https://github.com/mzmine/biohack23_p15',
-        'Report a bug': "https://github.com/mzmine/biohack23_p15/issues/new/choose",
-        'About': "# This is the creation and curation wizard for FAIR MS Libraries."
-    }
-)
-
-from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx
-ctx = get_script_run_ctx()
-if 'session_id' not in st.session_state:
-    print("Setting session ID:", ctx.session_id)
-    st.session_state.session_id = ctx.session_id
-else:
-    print("Retrieving session ID:", st.session_state.session_id)
-
-if 'submission_id' not in st.session_state:
-    submission_id = "FMSL-"+st.session_state.session_id
-    print(f"Setting submission id {submission_id}")
-    st.session_state.submission_id = submission_id
-
-submission_id = st.session_state.submission_id
-
-if 'cv_config' not in st.session_state:
-    # reading the data from the file
-    with open('config.json') as f:
-        data = f.read()
-    config = json.loads(data)
-    st.session_state.cv_config = config
-
-tmp_dir = tempfile.gettempdir()
-working_dir = os.path.join(tmp_dir, "fairmslib", submission_id)
-os.makedirs(working_dir, exist_ok=True)
-print("Working dir:", working_dir)
-st.session_state['working_dir'] = working_dir
-
-st.title("FAIR MS Library Curation Editor")
-st.markdown(f"Provisional submission ID: {submission_id}")
-
-# Using "with" notation
-with st.sidebar:
-    st.markdown("## Datasets")
-    if 'datasets' not in st.session_state or st.session_state['datasets'] == {}:
-        st.warning("Please upload a file to begin!")
-    if 'selected_sheets' not in st.session_state or st.session_state['selected_sheets'] == {}:
-        st.warning("Please select a dataset to begin!")
-    # with st.spinner("Loading..."):
-    #     time.sleep(5)
-    # st.success("Done!")
-    if 'datasets' in st.session_state and st.session_state['datasets'] != {}:
-        for key in st.session_state['selected_sheets']:
-            with st.expander(key):
-                datasets = st.session_state['datasets']
-                rowsMetricColumn, columnsMetricColumn = st.columns(2)
-                with rowsMetricColumn:
-                    st.metric('Rows', datasets[key].shape[0])
-                with columnsMetricColumn:
-                    st.metric('Columns', datasets[key].shape[1])
-                # if st.button("Edit", key=key):
-                #     selected_sheet = key
-                # if key in datasets_metadata:
-                #     st.write(datasets_metadata[key].keys())
-            # st.write(datasets[dataset_key])
-        # st.json(datasets)
diff --git a/streamlit_app/README.md b/streamlit_app/README.md
deleted file mode 100644
index 3cb118f..0000000
--- a/streamlit_app/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Streamlit Application for FAIR MassSpectral Library Curation and Editing
-
-[GitHub repository](https://github.com/mzmine/biohack23_p15) - folder _streamlit_app_
-
-## Installation
-
-## Development
-
-Use python venv to use defined dependencies.
-
-    python -m venv venv
-    source venv/bin/activate
-
-to activate the virtual environment.
-
-You can then use the provided requirements.txt to populate the required dependencies in your virtual environment.
-
-    pip install -r requirements.txt
-
-## Running the application
-
-After you have activated the virtual environment and the packages listed in requirements.txt are installed, you can launch the streamlit application as follows:
-
-    streamlit run FAIR_MS_Library_Editor.py 
diff --git a/streamlit_app/__init__.py b/streamlit_app/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/streamlit_app/assets/.empty b/streamlit_app/assets/.empty
deleted file mode 100644
index e69de29..0000000
diff --git a/streamlit_app/config.json b/streamlit_app/config.json
deleted file mode 100644
index 9a3693d..0000000
--- a/streamlit_app/config.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-    "ontologies":  {
-        "NCIT": "ncit",
-        "MS": "ms",
-        "MSIO": "msio",
-        "UO": "uo",
-        "NCBITaxon": "ncbitaxon",
-        "BTO": "bto",
-        "PRIDE": "pride",
-        "EFO": "efo"
-    },
-    "static_cv_terms": {
-        "MS:1003309": {
-            "term_obo_id": "MS:1003309",
-            "cv": "MS",
-            "name": "Goslin",
-            "value": null
-        }
-    }
-}
\ No newline at end of file
diff --git a/streamlit_app/models.py b/streamlit_app/models.py
deleted file mode 100644
index 8f9546d..0000000
--- a/streamlit_app/models.py
+++ /dev/null
@@ -1,612 +0,0 @@
-# generated by datamodel-codegen:
-#   filename:  <stdin>
-#   timestamp: 2023-05-30T11:14:55+00:00
-
-from __future__ import annotations
-
-from enum import Enum
-from typing import List, Optional
-
-from pydantic import AnyUrl, BaseModel, Field, conint, constr
-
-
-class CommentPrefix(Enum):
-    COM = 'COM'
-
-
-class Comment(BaseModel):
-    prefix: CommentPrefix
-    msg: str
-    line_number: Optional[int] = None
-
-
-class MTDPrefix(Enum):
-    MTD = 'MTD'
-
-
-class SMLPrefix(Enum):
-    SML = 'SML'
-
-
-class SMHeaderPrefix(Enum):
-    SMH = 'SMH'
-
-
-class SMFPrefix(Enum):
-    SMF = 'SMF'
-
-
-class SFHeaderPrefix(Enum):
-    SFH = 'SFH'
-
-
-class SMEPrefix(Enum):
-    SME = 'SME'
-
-
-class SEHeaderPrefix(Enum):
-    SEH = 'SEH'
-
-
-class Parameter(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    cv_label: Optional[str] = ''
-    cv_accession: Optional[str] = ''
-    name: str
-    value: str
-
-
-class Instrument(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    name: Optional[Parameter] = None
-    source: Optional[Parameter] = None
-    analyzer: Optional[List[Parameter]] = Field(
-        [], description="The instrument's mass analyzer, as defined by the parameter."
-    )
-    detector: Optional[Parameter] = None
-
-
-class SampleProcessing(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    sampleProcessing: Optional[List[Parameter]] = Field(
-        [],
-        description='Parameters specifiying sample processing that was applied within one step.',
-    )
-
-
-class Software(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    parameter: Optional[Parameter] = None
-    setting: Optional[List[str]] = Field(
-        [],
-        description='A software setting used. This field MAY occur multiple times for a\nsingle software. The value of this field is deliberately set as a\nString, since there currently do not exist cvParams for every\npossible setting.\n',
-    )
-
-
-class PublicationType(Enum):
-    doi = 'doi'
-    pubmed = 'pubmed'
-    uri = 'uri'
-
-
-class PublicationItem(BaseModel):
-    type: PublicationType = Field(..., description='The type qualifier of this publication item.')
-    accession: str = Field(
-        ..., description='The native accession id for this publication item.'
-    )
-
-
-class StringList(BaseModel):
-    __root__: List[str] = Field(..., description='A typed list of strings.')
-
-
-class Contact(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    name: str = Field(..., description="The contact's name.", min_length=2)
-    affiliation: str = Field(..., description="The contact's affiliation.", min_length=2)
-    email: Optional[str] = Field(None, regex=r'^\w+([\.-]?\w+)*@\w+([\.-]?\w+)*(\.\w{2,3})+$', description="The contact's email address.") 
-    orcid: Optional[str] = Field(None, regex=r'^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]{1}$', description="The contact's ORCID identifier.")
-
-
-class Uri(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    value: Optional[AnyUrl] = Field(
-        None, description='The URI pointing to the external resource.'
-    )
-
-
-class Sample(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    name: Optional[str] = Field(None, description="The sample's name.")
-    custom: Optional[List[Parameter]] = Field(
-        [], description='Additional user or cv parameters.'
-    )
-    species: Optional[List[Parameter]] = Field(
-        [], description='Biological species information on the sample.'
-    )
-    tissue: Optional[List[Parameter]] = Field(
-        [], description='Biological tissue information on the sample.'
-    )
-    cell_type: Optional[List[Parameter]] = Field(
-        [], description='Biological cell type information on the sample.'
-    )
-    disease: Optional[List[Parameter]] = Field(
-        [], description='Disease information on the sample.'
-    )
-    description: Optional[str] = Field(
-        None, description='A free form description of the sample.'
-    )
-
-
-class MsRun(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    name: Optional[str] = Field(None, description="The msRun's name.")
-    location: AnyUrl = Field(..., description="The msRun's location URI.")
-    instrument_ref: Optional[Instrument] = None
-    format: Optional[Parameter] = None
-    id_format: Optional[Parameter] = None
-    fragmentation_method: Optional[List[Parameter]] = Field(
-        [], description='The fragmentation methods applied during this msRun.'
-    )
-    scan_polarity: Optional[List[Parameter]] = Field(
-        [], description='The scan polarity/polarities used during this msRun.'
-    )
-    hash: Optional[str] = Field(
-        None, description="The file hash value of this msRun's data file."
-    )
-    hash_method: Optional[Parameter] = None
-
-
-class Assay(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    name: str = Field(..., description='The assay name.')
-    custom: Optional[List[Parameter]] = Field(
-        [], description='Additional user or cv parameters.'
-    )
-    external_uri: Optional[AnyUrl] = Field(
-        None, description='An external URI to further information about this assay.'
-    )
-    sample_ref: Optional[Sample] = None
-    ms_run_ref: List[MsRun] = Field(
-        ..., description='The ms run(s) referenced by this assay.', min_items=1
-    )
-
-
-class CV(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    label: str = Field(..., description='The abbreviated CV label.')
-    full_name: str = Field(..., description='The full name of this CV, for humans.')
-    version: str = Field(
-        ..., description='The CV version used when the file was generated.'
-    )
-    uri: AnyUrl = Field(..., description='A URI to the CV definition.')
-
-
-class Database(BaseModel):
-    id: conint(ge=1) = Field(..., readOnly=True)
-    param: Parameter
-    prefix: str = Field(..., description='The database prefix.')
-    version: str = Field(..., description='The database version.')
-    uri: AnyUrl = Field(..., description='The URI to the online database.')
-
-
-class ColumnParameterMapping(BaseModel):
-    column_name: str = Field(..., description='The fully qualified target column name.')
-    param: Parameter
-
-
-class OptColumnMapping(BaseModel):
-    identifier: str = Field(..., description='The fully qualified column name.')
-    param: Optional[Parameter] = None
-    value: Optional[str] = Field(
-        None, description='The value for this column in a particular row.'
-    )
-
-
-class Error(BaseModel):
-    code: int
-    message: str
-
-
-class Category(Enum):
-    format = 'format'
-    logical = 'logical'
-    cross_check = 'cross_check'
-
-
-class MessageType(Enum):
-    error = 'error'
-    warn = 'warn'
-    info = 'info'
-
-
-class ValidationMessage(BaseModel):
-    code: str
-    category: Category
-    message_type: Optional[MessageType] = 'info'
-    message: str
-    line_number: Optional[int] = None
-
-
-class SmallMoleculeSummary(BaseModel):
-    prefix: Optional[SMLPrefix] = Field(
-        'SML',
-        description='The small molecule table row prefix. SML MUST be used for rows of the small molecule table.',
-    )
-    header_prefix: Optional[SMHeaderPrefix] = Field(
-        'SMH',
-        description='The small molecule table header prefix. SMH MUST be used for the small molecule table header line (the column labels).',
-    )
-    sml_id: int = Field(
-        ..., description='A within file unique identifier for the small molecule.'
-    )
-    smf_id_refs: Optional[List[int]] = Field(
-        [],
-        description='References to all the features on which quantitation has been based (SMF elements) via referencing SMF_ID values. Multiple values SHOULD be provided as a “|” separated list. This MAY be null only if this is a Summary file.',
-    )
-    database_identifier: Optional[List[str]] = Field(
-        [],
-        description='A list of “|” separated possible identifiers for the small molecule; multiple values MUST only be provided to indicate ambiguity in the identification of the molecule and not to demonstrate different identifier types for the same molecule. Alternative identifiers for the same molecule MAY be provided as optional columns.\n\nThe database identifier must be preceded by the resource description (prefix) followed by a colon, as specified in the metadata section.    \n\nA null value MAY be provided if the identification is sufficiently ambiguous as to be meaningless for reporting or the small molecule has not been identified.\n',
-    )
-    chemical_formula: Optional[List[str]] = Field(
-        [],
-        description='A list of “|” separated potential chemical formulae of the reported compound. The number of values provided MUST match the number of entities reported under “database_identifier”, even if this leads to redundant reporting of information (i.e. if ambiguity can be resolved in the chemical formula), and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.\n\nThis should be specified in Hill notation (EA Hill 1900), i.e. elements in the order C, H and then alphabetically all other elements. Counts of one may be omitted. Elements should be capitalized properly to avoid confusion (e.g., “CO” vs. “Co”). The chemical formula reported should refer to the neutral form.\n\nExample: N-acetylglucosamine would be encoded by the string “C8H15NO6”.\n',
-    )
-    smiles: Optional[List[str]] = Field(
-        [],
-        description='A list of “|” separated potential molecule structures in the simplified molecular-input line-entry system (SMILES) for the small molecule. The number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.',
-    )
-    inchi: Optional[List[str]] = Field(
-        [],
-        description='A list of “|” separated potential standard IUPAC International Chemical Identifier (InChI) of the given substance.\n\nThe number of values provided MUST match the number of entities reported under “database_identifier”, even if this leads to redundant information being reported (i.e. if ambiguity can be resolved in the InChi), and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.\n',
-    )
-    chemical_name: Optional[List[str]] = Field(
-        [],
-        description='A list of “|” separated possible chemical/common names for the small molecule, or general description if a chemical name is unavailable. Multiple names are only to demonstrate ambiguity in the identification. The number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.\n',
-    )
-    uri: Optional[List[AnyUrl]] = Field(
-        [],
-        description='A URI pointing to the small molecule’s entry in a reference database (e.g., the small molecule’s HMDB or KEGG entry). The number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values between bars are allowed.',
-    )
-    theoretical_neutral_mass: Optional[List[float]] = Field(
-        [],
-        description='The small molecule’s precursor’s theoretical neutral mass.\n\nThe number of values provided MUST match the number of entities reported under “database_identifier”, and the validation software will throw an error if the number of “|” symbols does not match. “null” values (in general and between bars) are allowed for molecules that have not been identified only, or for molecules where the neutral mass cannot be calculated. In these cases, the SML entry SHOULD reference features in which exp_mass_to_charge values are captured.\n',
-    )
-    adduct_ions: Optional[List[str]] = Field(
-        [],
-        description='A “|” separated list of detected adducts for this this molecule, following the general style in the 2013 IUPAC recommendations on terms relating to MS e.g. [M+H]1+, [M+Na]1+, [M+NH4]1+, [M-H]1-, [M+Cl]1-, [M+H]1+. If the adduct classification is ambiguous with regards to identification evidence it MAY be null.\n',
-        regex='^\\[\\d*M([+-][\\w]*)\\]\\d*[+-]$',
-    )
-    reliability: Optional[str] = Field(
-        None,
-        description='The reliability of the given small molecule identification. This must be supplied by the resource and MUST be reported as an integer between 1-4:\n\n    identified metabolite (1)\n\n    putatively annotated compound (2)\n\n    putatively characterized compound class (3)\n\n    unknown compound (4)\n\nThese MAY be replaced using a suitable CV term in the metadata section e.g. to use MSI recommendation levels (see Section 6.2.57 for details).\n\nThe following CV terms are already available within the PSI MS CV. Future schemes may be implemented by extending the PSI MS CV with new terms and associated levels.\n\nThe MSI has recently discussed an extension of the original four level scheme into a five level scheme MS:1002896 (compound identification confidence level) with levels\n\n    isolated, pure compound, full stereochemistry (0)\n\n    reference standard match or full 2D structure (1)\n\n    unambiguous diagnostic evidence (literature, database) (2)\n\n    most likely structure, including isomers, substance class or substructure match (3)\n\n    unknown compound (4)\n\nFor high-resolution MS, the following term and its levels may be used: MS:1002955 (hr-ms compound identification confidence level) with levels\n\n    confirmed structure (1)\n\n    probable structure (2)\n\n        unambiguous ms library match (2a)\n\n        diagnostic evidence (2b)\n\n    tentative candidates (3)\n\n    unequivocal molecular formula (4)\n\n    exact mass (5)\n\nA String data type is set to allow for different systems to be specified in the metadata section.\n',
-    )
-    best_id_confidence_measure: Optional[Parameter] = None
-    best_id_confidence_value: Optional[float] = Field(
-        None,
-        description='The best confidence measure in identification (for this type of score) for the given small molecule across all assays. The type of score MUST be defined in the metadata section. If the small molecule was not identified by the specified search engine, “null” MUST be reported. If the confidence measure does not report a numerical confidence value, “null” SHOULD be reported.',
-    )
-    abundance_assay: Optional[List[float]] = Field(
-        [],
-        description='The small molecule’s abundance in every assay described in the metadata section MUST be reported. Null or zero values may be reported as appropriate. "null" SHOULD be used to report missing quantities, while zero SHOULD be used to indicate a present but not reliably quantifiable value (e.g. below a minimum noise threshold).',
-    )
-    abundance_study_variable: Optional[List[float]] = Field(
-        [],
-        description='The small molecule’s abundance in all the study variables described in the metadata section (study_variable[1-n]_average_function), calculated using the method as described in the Metadata section (default = arithmetic mean across assays). Null or zero values may be reported as appropriate. "null" SHOULD be used to report missing quantities, while zero SHOULD be used to indicate a present but not reliably quantifiable value (e.g. below a minimum noise threshold).',
-    )
-    abundance_variation_study_variable: Optional[List[float]] = Field(
-        [],
-        description='A measure of the variability of the study variable abundance measurement, calculated using the method as described in the metadata section (study_variable[1-n]_average_function), with a default = arithmethic co-efficient of variation of the small molecule’s abundance in the given study variable.',
-    )
-    opt: Optional[List[OptColumnMapping]] = Field(
-        [],
-        description='Additional columns can be added to the end of the small molecule table. These column headers MUST start with the prefix “opt_” followed by the {identifier} of the object they reference: assay, study variable, MS run or “global” (if the value relates to all replicates). Column names MUST only contain the following characters: ‘A’-‘Z’, ‘a’-‘z’, ‘0’-‘9’, ‘’, ‘-’, ‘[’, ‘]’, and ‘:’. CV parameter accessions MAY be used for optional columns following the format: opt{identifier}_cv_{accession}_\\{parameter name}. Spaces within the parameter’s name MUST be replaced by ‘_’.\n',
-    )
-    comment: Optional[List[Comment]] = []
-
-
-class SmallMoleculeFeature(BaseModel):
-    prefix: Optional[SMFPrefix] = Field(
-        'SMF',
-        description='The small molecule feature table row prefix. SMF MUST be used for rows of the small molecule feature table.',
-    )
-    header_prefix: Optional[SFHeaderPrefix] = Field(
-        'SFH',
-        description='The small molecule feature table header prefix. SFH MUST be used for the small molecule feature table header line (the column labels).',
-    )
-    smf_id: int = Field(
-        ...,
-        description='A within file unique identifier for the small molecule feature.',
-    )
-    sme_id_refs: Optional[List[int]] = Field(
-        [],
-        description='References to the identification evidence (SME elements) via referencing SME_ID values. Multiple values MAY be provided as a “|” separated list to indicate ambiguity in the identification or to indicate that different types of data supported the identifiction (see SME_ID_REF_ambiguity_code). For the case of a consensus approach where multiple adduct forms are used to infer the SML ID, different features should just reference the same SME_ID value(s).',
-    )
-    sme_id_ref_ambiguity_code: Optional[int] = Field(
-        None,
-        description='If multiple values are given under SME_ID_REFS, one of the following codes MUST be provided. 1=Ambiguous identification; 2=Only different evidence streams for the same molecule with no ambiguity; 3=Both ambiguous identification and multiple evidence streams. If there are no or one value under SME_ID_REFs, this MUST be reported as null.',
-    )
-    adduct_ion: Optional[constr(regex=r'^\[\d*M([+-][\w]*)\]\d*[+-]$')] = Field(
-        None,
-        description='The assumed classification of this molecule’s adduct ion after detection, following the general style in the 2013 IUPAC recommendations on terms relating to MS e.g. [M+H]1+, [M+Na]1+, [M+NH4]1+, [M-H]1-, [M+Cl]1-, [M+H]1+.',
-    )
-    isotopomer: Optional[Parameter] = None
-    exp_mass_to_charge: float = Field(
-        ...,
-        description='The experimental mass/charge value for the feature, by default assumed to be the mean across assays or a representative value. For approaches that report isotopomers as SMF rows, then the m/z of the isotopomer MUST be reported here.',
-    )
-    charge: int = Field(
-        ...,
-        description='The feature’s charge value using positive integers both for positive and negative polarity modes.',
-    )
-    retention_time_in_seconds: Optional[float] = Field(
-        None,
-        description='The apex of the feature on the retention time axis, in a Master or aggregate MS run. Retention time MUST be reported in seconds. Retention time values for individual MS runs (i.e. before alignment) MAY be reported as optional columns. Retention time SHOULD only be null in the case of direct infusion MS or other techniques where a retention time value is absent or unknown. Relative retention time or retention time index values MAY be reported as optional columns, and could be considered for inclusion in future versions of mzTab as appropriate.',
-    )
-    retention_time_in_seconds_start: Optional[float] = Field(
-        None,
-        description='The start time of the feature on the retention time axis, in a Master or aggregate MS run. Retention time MUST be reported in seconds. Retention time start and end SHOULD only be null in the case of direct infusion MS or other techniques where a retention time value is absent or unknown and MAY be reported in optional columns.',
-    )
-    retention_time_in_seconds_end: Optional[float] = Field(
-        None,
-        description='The end time of the feature on the retention time axis, in a Master or aggregate MS run. Retention time MUST be reported in seconds. Retention time start and end SHOULD only be null in the case of direct infusion MS or other techniques where a retention time value is absent or unknown and MAY be reported in optional columns..',
-    )
-    abundance_assay: Optional[List[float]] = Field(
-        [],
-        description='The feature’s abundance in every assay described in the metadata section MUST be reported. Null or zero values may be reported as appropriate.',
-    )
-    opt: Optional[List[OptColumnMapping]] = Field(
-        [],
-        description='Additional columns can be added to the end of the small molecule feature table. These column headers MUST start with the prefix “opt_” followed by the {identifier} of the object they reference: assay, study variable, MS run or “global” (if the value relates to all replicates). Column names MUST only contain the following characters: ‘A’-‘Z’, ‘a’-‘z’, ‘0’-‘9’, ‘’, ‘-’, ‘[’, ‘]’, and ‘:’. CV parameter accessions MAY be used for optional columns following the format: opt{identifier}_cv_{accession}_\\{parameter name}. Spaces within the parameter’s name MUST be replaced by ‘_’.\n',
-    )
-    comment: Optional[List[Comment]] = []
-
-
-class Publication(BaseModel):
-    id: Optional[conint(ge=1)] = None
-    publicationItems: List[PublicationItem] = Field(
-        ..., description='The publication item ids referenced by this publication.'
-    )
-
-
-class SpectraRef(BaseModel):
-    ms_run: MsRun
-    reference: str = Field(
-        ...,
-        description='The (vendor-dependendent) reference string to the actual mass spectrum.\n',
-    )
-
-
-class StudyVariable(BaseModel):
-    id: conint(ge=1)
-    name: str = Field(..., description='The study variable name.')
-    assay_refs: Optional[List[Assay]] = Field(
-        [], description='The assays referenced by this study variable.'
-    )
-    average_function: Optional[Parameter] = None
-    variation_function: Optional[Parameter] = None
-    description: Optional[str] = Field(
-        None, description='A free-form description of this study variable.'
-    )
-    factors: Optional[List[Parameter]] = Field(
-        [],
-        description='Parameters indicating which factors were used for the assays referenced by this study variable, and at which levels.',
-    )
-
-
-class Metadata(BaseModel):
-    prefix: MTDPrefix = Field(
-        ...,
-        description='The metadata section prefix. MUST always be MTD.\n',
-        example='MTD',
-    )
-    mzTab_version: constr(regex=r'^\d{1}\.\d{1}\.\d{1}-[A-Z]{1}$') = Field(
-        ...,
-        alias='mzTab-version',
-        description='The version of the mzTab file. The suffix MUST be "-M" for mzTab for metabolomics (mzTab-M).\n',
-        example='2.0.0-M',
-    )
-    mzTab_ID: str = Field(
-        ...,
-        alias='mzTab-ID',
-        description='The ID of the mzTab file, this could be supplied by the repository from which it is downloaded or a local identifier from the lab producing the file. It is not intended to be a globally unique ID but carry some locally useful meaning.\n',
-        example='MTBLS214',
-    )
-    title: Optional[str] = Field(
-        None,
-        description='The file’s human readable title.\n',
-        example='My first test experiment',
-    )
-    description: Optional[str] = Field(
-        None,
-        description='The file’s human readable description.\n',
-        example='An experiment investigating the effects of Il-6.',
-    )
-    contact: Optional[List[Contact]] = Field(
-        [],
-        description='The contact’s name, affiliation and e-mail. Several contacts can be given by indicating the number in the square brackets after "contact". A contact has to be supplied in the format [first name] [initials] [last name].',
-    )
-    publication: Optional[List[Publication]] = Field(
-        [],
-        description='A publication associated with this file. Several publications can be given by indicating the number in the square brackets after “publication”. PubMed ids must be prefixed by “pubmed:”, DOIs by “doi:”. Multiple identifiers MUST be separated by “|”.',
-    )
-    uri: Optional[List[Uri]] = Field(
-        [],
-        description='A URI pointing to the file’s source data (e.g., a MetaboLights records).',
-    )
-    external_study_uri: Optional[List[Uri]] = Field(
-        [],
-        description='A URI pointing to an external file with more details about the study design (e.g., an ISA-TAB file).',
-    )
-    instrument: Optional[List[Instrument]] = Field(
-        [],
-        description='The name, source, analyzer and detector of the instruments used in the experiment. Multiple instruments are numbered [1-n].',
-    )
-    quantification_method: Parameter
-    sample: Optional[List[Sample]] = Field(
-        [],
-        description='Specification of sample.\n(empty) name: A name for each sample to serve as a list of the samples that MUST be reported in the following tables. Samples MUST be reported if a statistical design is being captured (i.e. bio or tech replicates). If the type of replicates are not known, samples SHOULD NOT be reported. \nspecies: The respective species of the samples analysed. For more complex cases, such as metagenomics, optional columns and userParams should be used. \ntissue: The respective tissue(s) of the sample. \ncell_type: The respective cell type(s) of the sample. \ndisease: The respective disease(s) of the sample. \ndescription: A human readable description of the sample. \ncustom: Custom parameters describing the sample’s additional properties. Dates MUST be provided in ISO-8601 format.\n',
-    )
-    sample_processing: Optional[List[SampleProcessing]] = Field(
-        [],
-        description="A list of parameters describing a sample processing, preparation or handling step similar to a biological or analytical methods report. The order of the sample_processing items should reflect the order these processing steps were performed in. If multiple parameters are given for a step these MUST be separated by a “|”. If derivatization was performed, it MUST be reported here as a general step, e.g. 'silylation' and the actual derivatization agens MUST be specified in the Section 6.2.54 part.\n",
-    )
-    software: List[Software] = Field(
-        ...,
-        description='Software used to analyze the data and obtain the reported results. The parameter’s value SHOULD contain the software’s version. The order (numbering) should reflect the order in which the tools were used. A software setting used. This field MAY occur multiple times for a single software. The value of this field is deliberately set as a String, since there currently do not exist CV terms for every possible setting.',
-    )
-    derivatization_agent: Optional[List[Parameter]] = Field(
-        [],
-        description='A description of derivatization agents applied to small molecules, using userParams or CV terms where possible.',
-    )
-    ms_run: List[MsRun] = Field(
-        ...,
-        description='Specification of ms_run. \nlocation: Location of the external data file e.g. raw files on which analysis has been performed. If the actual location of the MS run is unknown, a “null” MUST be used as a place holder value, since the [1-n] cardinality is referenced elsewhere. If pre-fractionation has been performed, then [1-n] ms_runs SHOULD be created per assay. \ninstrument_ref: If different instruments are used in different runs, instrument_ref can be used to link a specific instrument to a specific run. \nformat: Parameter specifying the data format of the external MS data file. If ms_run[1-n]-format is present, ms_run[1-n]-id_format SHOULD also be present, following the parameters specified in Table 1. \nid_format: Parameter specifying the id format used in the external data file. If ms_run[1-n]-id_format is present, ms_run[1-n]-format SHOULD also be present.\nfragmentation_method: The type(s) of fragmentation used in a given ms run.\nscan_polarity: The polarity mode of a given run. Usually only one value SHOULD be given here except for the case of mixed polarity runs.\nhash: Hash value of the corresponding external MS data file defined in ms_run[1-n]-location. If ms_run[1-n]-hash is present, ms_run[1-n]-hash_method SHOULD also be present.\nhash_method: A parameter specifying the hash methods used to generate the String in ms_run[1-n]-hash. Specifics of the hash method used MAY follow the definitions of the mzML format. If ms_run[1-n]-hash is present, ms_run[1-n]-hash_method SHOULD also be present.\n',
-    )
-    assay: List[Assay] = Field(
-        ...,
-        description='Specification of assay.\n(empty) name: A name for each assay, to serve as a list of the assays that MUST be reported in the following tables. \ncustom: Additional custom parameters or values for a given assay. \nexternal_uri: An external reference uri to further information about the assay, for example via a reference to an object within an ISA-TAB file. \nsample_ref: An association from a given assay to the sample analysed. \nms_run_ref: An association from a given assay to the source MS run. All assays MUST reference exactly one ms_run unless a workflow with pre-fractionation is being encoded, in which case each assay MUST reference n ms_runs where n fractions have been collected. Multiple assays SHOULD reference the same ms_run to capture multiplexed experimental designs.\n',
-    )
-    study_variable: List[StudyVariable] = Field(
-        ...,
-        description='Specification of study_variable.\n(empty) name: A name for each study variable (experimental condition or factor), to serve as a list of the study variables that MUST be reported in the following tables. For software that does not capture study variables, a single study variable MUST be reported, linking to all assays. This single study variable MUST have the identifier “undefined“.\nassay_refs: Bar-separated references to the IDs of assays grouped in the study variable.\naverage_function: The function used to calculate the study variable quantification value and the operation used is not arithmetic mean (default) e.g. “geometric mean”, “median”. The 1-n refers to different study variables.\nvariation_function: The function used to calculate the study variable quantification variation value if it is reported and the operation used is not coefficient of variation (default) e.g. “standard error”.\ndescription: A textual description of the study variable.\nfactors: Additional parameters or factors, separated by bars, that are known about study variables allowing the capture of more complex, such as nested designs.\n',
-    )
-    custom: Optional[List[Parameter]] = Field(
-        [], description='Any additional parameters describing the analysis reported.'
-    )
-    cv: List[CV] = Field(
-        ...,
-        description='Specification of controlled vocabularies.\nlabel: A string describing the labels of the controlled vocabularies/ontologies used in the mzTab file as a short-hand e.g. "MS" for PSI-MS.\nfull_name: A string describing the full names of the controlled vocabularies/ontologies used in the mzTab file.\nversion: A string describing the version of the controlled vocabularies/ontologies used in the mzTab file.\nuri: A string containing the URIs of the controlled vocabularies/ontologies used in the mzTab file.\n',
-    )
-    small_molecule_quantification_unit: Parameter = Field(
-        ..., alias='small_molecule-quantification_unit'
-    )
-    small_molecule_feature_quantification_unit: Parameter = Field(
-        ..., alias='small_molecule_feature-quantification_unit'
-    )
-    small_molecule_identification_reliability: Optional[Parameter] = Field(
-        None, alias='small_molecule-identification_reliability'
-    )
-    database: List[Database] = Field(
-        ...,
-        description='Specification of databases.\n(empty): The description of databases used. For cases, where a known database has not been used for identification, a userParam SHOULD be inserted to describe any identification performed e.g. de novo. If no identification has been performed at all then "no database" should be inserted followed by null.\nprefix: The prefix used in the “identifier” column of data tables. For the “no database” case "null" must be used.\nversion: The database version is mandatory where identification has been performed. This may be a formal version number e.g. “1.4.1”, a date of access “2016-10-27” (ISO-8601 format) or “Unknown” if there is no suitable version that can be annotated.\nuri: The URI to the database. For the “no database” case, "null" must be reported.\n',
-    )
-    id_confidence_measure: List[Parameter] = Field(
-        ...,
-        description='The type of small molecule confidence measures or scores MUST be reported as a CV parameter [1-n]. The CV parameter definition should formally state whether the ordering is high to low or vice versa. The order of the scores SHOULD reflect their importance for the identification and be used to determine the identification’s rank.',
-    )
-    colunit_small_molecule: Optional[List[ColumnParameterMapping]] = Field(
-        [],
-        alias='colunit-small_molecule',
-        description='Defines the used unit for a column in the small molecule section. The format of the value has to be \\{column name}=\\{Parameter defining the unit}. This field MUST NOT be used to define a unit for quantification columns. The unit used for small molecule quantification values MUST be set in small_molecule-quantification_unit.',
-    )
-    colunit_small_molecule_feature: Optional[List[ColumnParameterMapping]] = Field(
-        [],
-        alias='colunit-small_molecule_feature',
-        description='Defines the used unit for a column in the small molecule feature section. The format of the value has to be \\{column name}=\\{Parameter defining the unit}. This field MUST NOT be used to define a unit for quantification columns. The unit used for small molecule quantification values MUST be set in small_molecule_feature-quantification_unit.',
-    )
-    colunit_small_molecule_evidence: Optional[List[ColumnParameterMapping]] = Field(
-        [],
-        alias='colunit-small_molecule_evidence',
-        description='Defines the used unit for a column in the small molecule evidence section. The format of the value has to be \\{column name}=\\{Parameter defining the unit}.',
-    )
-
-
-class SmallMoleculeEvidence(BaseModel):
-    prefix: Optional[SMEPrefix] = Field(
-        'SME',
-        description='The small molecule evidence table row prefix. SME MUST be used for rows of the small molecule evidence table.',
-    )
-    header_prefix: Optional[SEHeaderPrefix] = Field(
-        'SEH',
-        description='The small molecule evidence table header prefix. SEH MUST be used for the small molecule evidence table header line (the column labels).',
-    )
-    sme_id: int = Field(
-        ...,
-        description='A within file unique identifier for the small molecule evidence result.',
-    )
-    evidence_input_id: str = Field(
-        ...,
-        description='A within file unique identifier for the input data used to support this identification e.g. fragment spectrum, RT and m/z pair, isotope profile that was used for the identification process, to serve as a grouping mechanism, whereby multiple rows of results from the same input data share the same ID. The identifiers may be human readable but should not be assumed to be interpretable. For example, if fragmentation spectra have been searched then the ID may be the spectrum reference, or for accurate mass search, the ms_run[2]:458.75.',
-    )
-    database_identifier: str = Field(
-        ...,
-        description='The putative identification for the small molecule sourced from an external database, using the same prefix specified in database[1-n]-prefix.\n\nThis could include additionally a chemical class or an identifier to a spectral library entity, even if its actual identity is unknown.\n\nFor the “no database” case, "null" must be used. The unprefixed use of "null" is prohibited for any other case. If no putative identification can be reported for a particular database, it MUST be reported as the database prefix followed by null.\n',
-    )
-    chemical_formula: Optional[str] = Field(
-        None,
-        description='The chemical formula of the identified compound e.g. in a database, assumed to match the theoretical mass to charge (in some cases this will be the derivatized form, including adducts and protons).\n\nThis should be specified in Hill notation (EA Hill 1900), i.e. elements in the order C, H and then alphabetically all other elements. Counts of one may be omitted. Elements should be capitalized properly to avoid confusion (e.g., “CO” vs. “Co”). The chemical formula reported should refer to the neutral form. Charge state is reported by the charge field.\n\nExample N-acetylglucosamine would be encoded by the string “C8H15NO6”\n',
-    )
-    smiles: Optional[str] = Field(
-        None,
-        description='The potential molecule’s structure in the simplified molecular-input line-entry system (SMILES) for the small molecule.',
-    )
-    inchi: Optional[str] = Field(
-        None,
-        description='A standard IUPAC International Chemical Identifier (InChI) for the given substance.',
-    )
-    chemical_name: Optional[str] = Field(
-        None,
-        description='The small molecule’s chemical/common name, or general description if a chemical name is unavailable.',
-    )
-    uri: Optional[AnyUrl] = Field(
-        None,
-        description='A URI pointing to the small molecule’s entry in a database (e.g., the small molecule’s HMDB, Chebi or KEGG entry).',
-    )
-    derivatized_form: Optional[Parameter] = None
-    adduct_ion: Optional[constr(regex=r'^\[\d*M([-][\w]*)\]\d*[+-]$')] = Field(
-        None,
-        description='The assumed classification of this molecule’s adduct ion after detection, following the general style in the 2013 IUPAC recommendations on terms relating to MS e.g. [M+H]+, [M+Na]1+, [M+NH4]1+, [M-H]1-, [M+Cl]1-. If the adduct classification is ambiguous with regards to identification evidence it MAY be null.',
-    )
-    exp_mass_to_charge: float = Field(
-        ...,
-        description='The experimental mass/charge value for the precursor ion. If multiple adduct forms have been combined into a single identification event/search, then a single value e.g. for the protonated form SHOULD be reported here.',
-    )
-    charge: int = Field(
-        ...,
-        description='The small molecule evidence’s charge value using positive integers both for positive and negative polarity modes.',
-    )
-    theoretical_mass_to_charge: float = Field(
-        ...,
-        description='The theoretical mass/charge value for the small molecule or the database mass/charge value (for a spectral library match).',
-    )
-    spectra_ref: List[SpectraRef] = Field(
-        ...,
-        description='Reference to a spectrum in a spectrum file, for example a fragmentation spectrum has been used to support the identification. If a separate spectrum file has been used for fragmentation spectrum, this MUST be reported in the metadata section as additional ms_runs. The reference must be in the format ms_run[1-n]:{SPECTRA_REF} where SPECTRA_REF MUST follow the format defined in 5.2 (including references to chromatograms where these are used to inform identification). Multiple spectra MUST be referenced using a “|” delimited list for the (rare) cases in which search engines have combined or aggregated multiple spectra in advance of the search to make identifications.\n\nIf a fragmentation spectrum has not been used, the value should indicate the ms_run to which is identification is mapped e.g. “ms_run[1]”.\n',
-    )
-    identification_method: Parameter
-    ms_level: Parameter
-    id_confidence_measure: Optional[List[float]] = Field(
-        [],
-        description='Any statistical value or score for the identification. The metadata section reports the type of score used, as id_confidence_measure[1-n] of type Param.',
-    )
-    rank: conint(ge=1) = Field(
-        ...,
-        description='The rank of this identification from this approach as increasing integers from 1 (best ranked identification). Ties (equal score) are represented by using the same rank – defaults to 1 if there is no ranking system used.',
-    )
-    opt: Optional[List[OptColumnMapping]] = Field(
-        [],
-        description='Additional columns can be added to the end of the small molecule evidence table. These column headers MUST start with the prefix “opt_” followed by the {identifier} of the object they reference: assay, study variable, MS run or “global” (if the value relates to all replicates). Column names MUST only contain the following characters: ‘A’-‘Z’, ‘a’-‘z’, ‘0’-‘9’, ‘’, ‘-’, ‘[’, ‘]’, and ‘:’. CV parameter accessions MAY be used for optional columns following the format: opt{identifier}_cv_{accession}_\\{parameter name}. Spaces within the parameter’s name MUST be replaced by ‘_’.\n',
-    )
-    comment: Optional[List[Comment]] = []
-
-
-class MzTab(BaseModel):
-    metadata: Metadata
-    smallMoleculeSummary: List[SmallMoleculeSummary] = Field(
-        ...,
-        description='The small molecule section is table-based. The small molecule section MUST always come after the metadata section. All table columns MUST be Tab separated. There MUST NOT be any empty cells; missing values MUST be reported using “null” for columns where Is Nullable = “True”.\n\nEach row of the small molecule section is intended to report one final result to be communicated in terms of a molecule that has been quantified. In many cases, this may be the molecule of biological interest, although in some cases, the final result could be a derivatized form as appropriate – although it is desirable for the database identifier(s) to reference to the biological (non-derivatized) form. In general, different adduct forms would generally be reported in the Small Molecule Feature section.\n\nThe order of columns MUST follow the order specified below.\n\nAll columns are MANDATORY except for “opt_” columns.\n',
-        min_items=1,
-    )
-    smallMoleculeFeature: Optional[List[SmallMoleculeFeature]] = Field(
-        ...,
-        description='The small molecule feature section is table-based, representing individual MS regions (generally considered to be the elution profile for all isotopomers formed from a single charge state of a molecule), that have been measured/quantified. However, for approaches that quantify individual isotopomers e.g. stable isotope labelling/flux studies, then each SMF row SHOULD represent a single isotopomer.\n\nDifferent adducts or derivatives and different charge states of individual molecules should be reported as separate SMF rows.\n\nThe small molecule feature section MUST always come after the Small Molecule Table. All table columns MUST be Tab separated. There MUST NOT be any empty cells. Missing values MUST be reported using “null”.\n\nThe order of columns MUST follow the order specified below.\n\nAll columns are MANDATORY except for “opt_” columns.\n',
-    )
-    smallMoleculeEvidence: Optional[List[SmallMoleculeEvidence]] = Field(
-        ...,
-        description='The small molecule evidence section is table-based, representing evidence for identifications of small molecules/features, from database search or any other process used to give putative identifications to molecules. In a typical case, each row represents one result from a single search or intepretation of a piece of evidence e.g. a database search with a fragmentation spectrum. Multiple results from a given input data item (e.g. one fragment spectrum) SHOULD share the same value under evidence_input_id.\n\nThe small molecule evidence section MUST always come after the Small Molecule Feature Table. All table columns MUST be Tab separated. There MUST NOT be any empty cells. Missing values MUST be reported using “null”.\n\nThe order of columns MUST follow the order specified below.\n\nAll columns are MANDATORY except for “opt_” columns.\n',
-    )
-    comment: Optional[List[Comment]] = Field(
-        [],
-        description='Comment lines can be placed anywhere in an mzTab file. These lines must start with the three-letter code COM and are ignored by most parsers. Empty lines can also occur anywhere in an mzTab file and are ignored.\n',
-    )
diff --git a/streamlit_app/pages/1_File_Import.py b/streamlit_app/pages/1_File_Import.py
deleted file mode 100644
index 4f6229e..0000000
--- a/streamlit_app/pages/1_File_Import.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import streamlit as st
-import pandas as pd
-
-st.set_page_config(
-    layout="wide", 
-    page_title="File Import - FAIR MS Library Curation Editor", 
-    #page_icon="assets/favicon.ico",
-    menu_items={
-        'Get Help': 'https://github.com/mzmine/biohack23_p15',
-        'Report a bug': "https://github.com/mzmine/biohack23_p15/issues/new/choose",
-        'About': "# This is the creation and curation wizard for FAIR MS Libraries."
-    }
-)
-
-st.markdown("## File Import")
-st.markdown("Please select an Excel file to upload. The file should contain one or more sheets. Each sheet should contain sample columns, detailing factors of each individual sample (rows). Lipid identities are the column headers of the non-sample columns, quantities should be reported in the cells.")
-
-uploaded_file = st.file_uploader("Choose a file", )
-if uploaded_file is not None:
-    print(uploaded_file)
-    st.session_state['uploaded_file'] = uploaded_file
-
-if 'uploaded_file' in st.session_state and st.session_state['uploaded_file'] is not None:
-    print("Uploaded file:", st.session_state['uploaded_file'])
-    uploaded_file = st.session_state['uploaded_file']
-    with st.spinner('Loading data...'):
-        datasets = {}
-        if 'datasets' in st.session_state:
-            datasets = st.session_state['datasets']
-        else:
-            st.session_state['datasets'] = datasets
-        
-        xl = pd.ExcelFile(uploaded_file)
-        sheets = xl.sheet_names
-        for sheet in sheets:
-            if sheet not in datasets:
-                df = pd.read_excel(uploaded_file, sheet_name=sheet)
-                datasets[sheet] = df
-
-        st.markdown("## Preview Sheets")
-        sheet_selector = st.selectbox(
-                "Select a sheet",
-                sheets
-            )
-        if sheet_selector is not None and sheet_selector in datasets:
-            rowsMetricColumn, columnsMetricColumn = st.columns(2)
-            with rowsMetricColumn:
-                st.metric('Rows', datasets[sheet_selector].shape[0])
-            with columnsMetricColumn:
-                st.metric('Columns', datasets[sheet_selector].shape[1])
-            st.write(datasets[sheet_selector])
-
-        st.markdown("## Select Sheets as Datasets")
-        selected_sheets = st.multiselect(
-            'Each selected sheet will be converted to a dataset',
-            sheets,
-            sheets
-        )
-        st.session_state['datasets'] = datasets
-        st.session_state['selected_sheets'] = selected_sheets
-
-        if 'datasets' not in st.session_state:
-            st.session_state['datasets'] = []
diff --git a/streamlit_app/pages/4_Library_Export.py b/streamlit_app/pages/4_Library_Export.py
deleted file mode 100644
index f1e08c5..0000000
--- a/streamlit_app/pages/4_Library_Export.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import streamlit as st
-
-st.set_page_config(
-    layout="wide", 
-    page_title="Library Export - FAIR MS Library Curation Editor", 
-    #page_icon="assets/favicon.ico",
-    menu_items={
-        'Get Help': 'https://github.com/mzmine/biohack23_p15',
-        'Report a bug': "https://github.com/mzmine/biohack23_p15/issues/new/choose",
-        'About': "# This is the creation and curation wizard for FAIR MS Libraries."
-    }
-)
-
-st.markdown("## Conversion to MS Library Export Format")
-
-datasets = {}
-if 'datasets' in st.session_state:
-    datasets = st.session_state['datasets']
-
-metadata_parts = {}
-if 'metadata_parts' in st.session_state:
-    metadata_parts = st.session_state['metadata_parts']
-
-with st.form("conversion-settings", clear_on_submit=False):
-    if datasets == {}:
-        st.warning("Please upload a file to begin!")
-    
-    if metadata_parts == {}:
-        st.warning("Please enter metadata to begin!")
-    
-    submit_disabled = (metadata_parts == {} or datasets == {})
-    convert = st.form_submit_button("Create MS Library XYZ file", disabled=submit_disabled)
-    if convert:
-        st.info("Exporting to XYZ format...")