From 9986c66f8fca6429e5bc9f05c6c65b2f3ee5df17 Mon Sep 17 00:00:00 2001 From: Ashish Y G Date: Wed, 2 Oct 2024 20:30:19 +0530 Subject: [PATCH 1/3] output for one page in the terminal using pdfplumber library --- test.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 test.py diff --git a/test.py b/test.py new file mode 100644 index 00000000..8a4c43e3 --- /dev/null +++ b/test.py @@ -0,0 +1,4 @@ +import pdfplumber +with pdfplumber.open(r'data_preprocessor\data\acetone-acs-l.pdf') as pdf: + data =pdf.pages[0].extract_text() + print(data) \ No newline at end of file From 3bcf50f4d70397d3e349b2d900ba7dc67cc6774b Mon Sep 17 00:00:00 2001 From: Ashish Y G Date: Wed, 2 Oct 2024 20:55:23 +0530 Subject: [PATCH 2/3] computed the output in the form of textfile --- extracted_text.txt | 393 +++++++++++++++++++++++++++++++++++++++++++++ test.py | 20 ++- 2 files changed, 410 insertions(+), 3 deletions(-) create mode 100644 extracted_text.txt diff --git a/extracted_text.txt b/extracted_text.txt new file mode 100644 index 00000000..5466ffff --- /dev/null +++ b/extracted_text.txt @@ -0,0 +1,393 @@ +SAFETY DATA SHEET +Creation Date 28-Apr-2009 Revision Date 13-Oct-2023 Revision Number 9 +1. Identification +Product Name Acetone +Cat No. : A9-4; A9-20; A9-200; A11-1; A11-4; A11-20; A11-200; A11S-4; A13-20; +A13-200; A16F-1GAL; A16P-1GAL; A16P-4; A16S-4; A16S-20; A18-1; +A18-4; A18-20; A18-20LC; A18-200; A18-200LC; A18-500; A18CU1300; +A18FB-19; A18FB-50; A18FB-115; A18FB-200; A18P-4; A18POP-19; +A18POPB-50; A18RB-19; A18RB-50; A18RB-115; A18RB-200; +A18RS-28; A18RS-50; A18RS-115; A18RS-200; A18S-4; A18SK-4; +A18SS-19; A18SS-28; A18SS-50; A18SS-115; A18SS-200; A19-1; +A19-4; A19RS-115; A19RS-200; A40-4; A928-4; A929-1; A929-4; +A929-4LC; A929RS-19; A929RS-50; A929RS-200; A929SK-4; +A929SS-28; A929SS-50; A929SS-115; A929SS-200; A946-4; A946-4LC; +A946FB-200; A946RB-19; A946RB-50; A946RB-115; A946RB-200; +A949-1; A949-4; A949-4LC; A949CU-50; A949N-119; A949N-219; +A949POP-19; A949RS-28; A949RS-50; A949RS-115; A949SK-1; +A949SK-4; A949SS-19; A949SS-28; A949SS-50; A949SS-115; +A949SS-200; BP2403-1; BP2403-4; BP2403-20; BP2403-RS200; +BP2404-1; BP2404-4; BP2404-SK1; BP2404-SK4; HC300-1GAL; +S70091; 22050131; 22050295; XXA9ET200LI; NC2396838 +CAS No 67-64-1 +Synonyms 2-Propanone; Dimethyl ketone; (Certified ACS, HPLC, OPTIMA, Histological, +Spectranalyzed, NF/FCC/EP, Pesticide, Electronic, GC Resolv, SAFE-COTE) +Recommended Use Laboratory chemicals. +Uses advised against Food, drug, pesticide or biocidal product use. +Details of the supplier of the safety data sheet +Company +Fisher Scientific Company +One Reagent Lane +Fair Lawn, NJ 07410 +Tel: (201) 796-7100 +Emergency Telephone Number +CHEMTRECÒ, Inside the USA: 800-424-9300 +CHEMTRECÒ, Outside the USA: 001-703-527-3887 +2. Hazard(s) identification +Classification +______________________________________________________________________________________________ +Page 1 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +This chemical is considered hazardous by the 2012 OSHA Hazard Communication Standard (29 CFR 1910.1200) +Flammable liquids Category 2 +Serious Eye Damage/Eye Irritation Category 2 +Specific target organ toxicity (single exposure) Category 3 +Target Organs - Central nervous system (CNS). +Specific target organ toxicity - (repeated exposure) Category 2 +Label Elements +Signal Word +Danger +Hazard Statements +Highly flammable liquid and vapor +Causes serious eye irritation +May cause drowsiness or dizziness +May cause damage to organs through prolonged or repeated exposure +Precautionary Statements +Prevention +Wash face, hands and any exposed skin thoroughly after handling +Do not breathe dust/fume/gas/mist/vapors/spray +Use only outdoors or in a well-ventilated area +Keep away from heat/sparks/open flames/hot surfaces. - No smoking +Keep container tightly closed +Ground/bond container and receiving equipment +Use explosion-proof electrical/ventilating/lighting equipment +Use only non-sparking tools +Take precautionary measures against static discharge +Wear protective gloves/protective clothing/eye protection/face protection +Keep cool +Response +Get medical attention/advice if you feel unwell +Inhalation +IF INHALED: Remove victim to fresh air and keep at rest in a position comfortable for breathing +Call a POISON CENTER or doctor/physician if you feel unwell +Skin +IF ON SKIN (or hair): Take off immediately all contaminated clothing. Rinse skin with water/shower +Eyes +IF IN EYES: Rinse cautiously with water for several minutes. Remove contact lenses, if present and easy to do. Continue rinsing +If eye irritation persists: Get medical advice/attention +Fire +In case of fire: Use CO2, dry chemical, or foam for extinction +Storage +Store in a well-ventilated place. Keep container tightly closed +Store locked up +Disposal +______________________________________________________________________________________________ +Page 2 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +Dispose of contents/container to an approved waste disposal plant +Hazards not otherwise classified (HNOC) +Repeated exposure may cause skin dryness or cracking +3. Composition/Information on Ingredients +Component CAS No Weight % +Acetone 67-64-1 >95 +4. First-aid measures +General Advice If symptoms persist, call a physician. +Eye Contact Rinse immediately with plenty of water, also under the eyelids, for at least 15 minutes. Get +medical attention. +Skin Contact Wash off immediately with plenty of water for at least 15 minutes. If skin irritation persists, +call a physician. +Inhalation Remove to fresh air. If not breathing, give artificial respiration. Get medical attention if +symptoms occur. +Ingestion Clean mouth with water and drink afterwards plenty of water. +Most important symptoms and Difficulty in breathing. Symptoms of overexposure may be headache, dizziness, tiredness, +effects nausea and vomiting: May cause pulmonary edema +Notes to Physician Treat symptomatically +5. Fire-fighting measures +Suitable Extinguishing Media Water spray, carbon dioxide (CO2), dry chemical, alcohol-resistant foam. Water mist may +be used to cool closed containers. +Unsuitable Extinguishing Media Water may be ineffective +Flash Point -20 °C / -4 °F +Method - CC (closed cup) +Autoignition Temperature 465 °C / 869 °F +Explosion Limits +Upper 12.8 vol % +Lower 2.5 vol % +Oxidizing Properties Not oxidising +Sensitivity to Mechanical ImpactNo information available +Sensitivity to Static Discharge No information available +Specific Hazards Arising from the Chemical +Flammable. Risk of ignition. Containers may explode when heated. Vapors may form explosive mixtures with air. Vapors may +travel to source of ignition and flash back. +Hazardous Combustion Products +Carbon monoxide (CO). Carbon dioxide (CO2). Formaldehyde. Methanol. +Protective Equipment and Precautions for Firefighters +______________________________________________________________________________________________ +Page 3 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +As in any fire, wear self-contained breathing apparatus pressure-demand, MSHA/NIOSH (approved or equivalent) and full +protective gear. +NFPA +Health Flammability Instability Physical hazards +2 3 0 N/A +6. Accidental release measures +Personal Precautions Use personal protective equipment as required. Ensure adequate ventilation. Remove all +sources of ignition. Take precautionary measures against static discharges. +Environmental Precautions Should not be released into the environment. +Methods for Containment and CleanSoak up with inert absorbent material. Keep in suitable, closed containers for disposal. +Up Remove all sources of ignition. Use spark-proof tools and explosion-proof equipment. +7. Handling and storage +Handling Do not get in eyes, on skin, or on clothing. Wear personal protective equipment/face +protection. Ensure adequate ventilation. Avoid ingestion and inhalation. Keep away from +open flames, hot surfaces and sources of ignition. Use only non-sparking tools. To avoid +ignition of vapors by static electricity discharge, all metal parts of the equipment must be +grounded. Take precautionary measures against static discharges. +Storage. Flammables area. Keep containers tightly closed in a dry, cool and well-ventilated place. +Keep away from heat, sparks and flame. Incompatible Materials. Strong oxidizing agents. +Strong reducing agents. Strong bases. Peroxides. Halogenated compounds. Alkali metals. +Amines. +8. Exposure controls / personal protection +Exposure Guidelines +Component ACGIH TLV OSHA PEL NIOSH Mexico OEL (TWA) +Acetone TWA: 250 ppm (Vacated) TWA: 750 ppm IDLH: 2500 ppm TWA: 500 ppm +STEL: 500 ppm (Vacated) TWA: 1800 mg/m3 TWA: 250 ppm STEL: 750 ppm +(Vacated) STEL: 2400 TWA: 590 mg/m3 +mg/m3 +(Vacated) STEL: 1000 ppm +TWA: 1000 ppm +TWA: 2400 mg/m3 +Legend +ACGIH - American Conference of Governmental Industrial Hygienists +OSHA - Occupational Safety and Health Administration +NIOSH: NIOSH - National Institute for Occupational Safety and Health +Engineering Measures Ensure adequate ventilation, especially in confined areas. Ensure that eyewash stations +and safety showers are close to the workstation location. Use explosion-proof +electrical/ventilating/lighting equipment. +Personal Protective Equipment +Eye/face Protection Wear appropriate protective eyeglasses or chemical safety goggles as described by +OSHA's eye and face protection regulations in 29 CFR 1910.133 or European Standard +EN166. +Skin and body protection Wear appropriate protective gloves and clothing to prevent skin exposure. +Respiratory Protection Follow the OSHA respirator regulations found in 29 CFR 1910.134 or European Standard +______________________________________________________________________________________________ +Page 4 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +EN 149. Use a NIOSH/MSHA or European Standard EN 149 approved respirator if +exposure limits are exceeded or if irritation or other symptoms are experienced. +Recommended Filter type: low boiling organic solvent. Type AX. Brown. conforming to EN371. +Hygiene Measures Handle in accordance with good industrial hygiene and safety practice. +9. Physical and chemical properties +Physical State Liquid +Appearance Colorless +Odor sweet +Odor Threshold 19.8 ppm +pH 7 +Melting Point/Range -95 °C / -139 °F +Boiling Point/Range 56 °C / 132.8 °F +Flash Point -20 °C / -4 °F +Method - CC (closed cup) +Evaporation Rate 5.6 (Butyl Acetate = 1.0) +Flammability (solid,gas) Not applicable +Flammability or explosive limits +Upper 12.8 vol % +Lower 2.5 vol % +Vapor Pressure 247 mbar @ 20 °C +Vapor Density 2.0 +Specific Gravity 0.790 +Solubility Soluble in water +Partition coefficient; n-octanol/water No data available +Autoignition Temperature 465 °C / 869 °F +Decomposition Temperature > 4°C +Viscosity 0.32 mPa.s @ 20 °C +Molecular Formula C3 H6 O +Molecular Weight 58.08 +VOC Content(%) 100 +Refractive index 1.358 - 1.359 +10. Stability and reactivity +Reactive Hazard None known, based on information available +Stability Stable under normal conditions. +Conditions to Avoid Heat, flames and sparks. Incompatible products. Keep away from open flames, hot +surfaces and sources of ignition. +Incompatible Materials Strong oxidizing agents, Strong reducing agents, Strong bases, Peroxides, Halogenated +compounds, Alkali metals, Amines +Hazardous Decomposition ProductsCarbon monoxide (CO), Carbon dioxide (CO2), Formaldehyde, Methanol +Hazardous Polymerization Hazardous polymerization does not occur. +Hazardous Reactions None under normal processing. +11. Toxicological information +Acute Toxicity +Product Information +Component Information +Component LD50 Oral LD50 Dermal LC50 Inhalation +______________________________________________________________________________________________ +Page 5 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +Acetone 5800 mg/kg ( Rat ) > 15800 mg/kg (rabbit) 76 mg/l, 4 h, (rat) +> 7400 mg/kg (rat) +Toxicologically Synergistic Carbon tetrachloride; Chloroform; Trichloroethylene; Bromodichloromethane; +Products Dibromochloromethane; N-nitrosodimethylamine; 1,1,2-Trichloroethane; Styrene; +Acetonitrile, 2,5-Hexanedione; Ethanol; 1,2-Dichlorobenzene +Delayed and immediate effects as well as chronic effects from short and long-term exposure +Irritation Irritating to eyes +Sensitization No information available +Carcinogenicity The table below indicates whether each agency has listed any ingredient as a carcinogen. +Component CAS No IARC NTP ACGIH OSHA Mexico +Acetone 67-64-1 Not listed Not listed Not listed Not listed Not listed +Mutagenic Effects No information available +Reproductive Effects No information available. +Developmental Effects No information available. +Teratogenicity No information available. +STOT - single exposure Central nervous system (CNS) +STOT - repeated exposure None known +Aspiration hazard No information available +Symptoms / effects,both acute and Symptoms of overexposure may be headache, dizziness, tiredness, nausea and vomiting: +delayed May cause pulmonary edema +Endocrine Disruptor Information No information available +Other Adverse Effects The toxicological properties have not been fully investigated. +12. Ecological information +Ecotoxicity +. +Component Freshwater Algae Freshwater Fish Microtox Water Flea +Acetone NOEC = 430 mg/l (algae; 96 Oncorhynchus mykiss: LC50 EC50 = 14500 mg/L/15 min EC50 = 8800 mg/L/48h +h) = 5540 mg/l 96h EC50 = 12700 mg/L/48h +Alburnus alburnus: LC50 = EC50 = 12600 mg/L/48h +11000 mg/l 96h +Leuciscus idus: LC50 = +11300 mg/L/48h +Salmo gairdneri: LC50 = +6100 mg/L/24h +Persistence and Degradability Persistence is unlikely based on information available. +Bioaccumulation/ Accumulation No information available. +Mobility Will likely be mobile in the environment due to its volatility. +Component log Pow +Acetone -0.24 +13. Disposal considerations +Waste Disposal Methods Chemical waste generators must determine whether a discarded chemical is classified as a +hazardous waste. Chemical waste generators must also consult local, regional, and +national hazardous waste regulations to ensure complete and accurate classification. +______________________________________________________________________________________________ +Page 6 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +Component RCRA - U Series Wastes RCRA - P Series Wastes +Acetone - 67-64-1 U002 - +14. Transport information +DOT +UN-No UN1090 +Proper Shipping Name ACETONE +Hazard Class 3 +Packing Group II +TDG +UN-No UN1090 +Proper Shipping Name ACETONE +Hazard Class 3 +Packing Group II +IATA +UN-No UN1090 +Proper Shipping Name ACETONE +Hazard Class 3 +Packing Group II +IMDG/IMO +UN-No UN1090 +Proper Shipping Name ACETONE +Hazard Class 3 +Packing Group II +15. Regulatory information +United States of America Inventory +Component CAS No TSCA TSCA Inventory notification - TSCA - EPA Regulatory +Active-Inactive Flags +Acetone 67-64-1 X ACTIVE - +Legend: +TSCA US EPA (TSCA) - Toxic Substances Control Act, (40 CFR Part 710) +X - Listed +'-' - Not Listed +TSCA - Per 40 CFR 751, Regulation of Certain Chemical Not applicable +Substances & Mixtures, Under TSCA Section 6(h) (PBT) +TSCA 12(b) - Notices of Export Not applicable +International Inventories +Canada (DSL/NDSL), Europe (EINECS/ELINCS/NLP), Philippines (PICCS), Japan (ENCS), Japan (ISHL), Australia (AICS), China (IECSC), Korea +(KECL). +Component CAS No DSL NDSL EINECS PICCS ENCS ISHL AICS IECSC KECL +Acetone 67-64-1 X - 200-662-2 X X X X X KE-29367 +KECL - NIER number or KE number (http://ncis.nier.go.kr/en/main.do) +U.S. Federal Regulations +SARA 313 Not applicable +SARA 311/312 Hazard Categories See section 2 for more information +CWA (Clean Water Act) Not applicable +______________________________________________________________________________________________ +Page 7 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +Clean Air Act Not applicable +OSHA - Occupational Safety and Not applicable +Health Administration +CERCLA This material, as supplied, contains one or more substances regulated as a hazardous +substance under the Comprehensive Environmental Response Compensation and Liability +Act (CERCLA) (40 CFR 302) +Component Hazardous Substances RQs CERCLA EHS RQs +Acetone 5000 lb - +California Proposition 65 This product does not contain any Proposition 65 chemicals. +U.S. State Right-to-Know +Regulations +Component Massachusetts New Jersey Pennsylvania Illinois Rhode Island +Acetone X X X - X +U.S. Department of Transportation +Reportable Quantity (RQ): Y +DOT Marine Pollutant N +DOT Severe Marine Pollutant N +U.S. Department of Homeland This product does not contain any DHS chemicals. +Security +Other International Regulations +Mexico - Grade Serious risk, Grade 3 +Authorisation/Restrictions according to EU REACH +Component CAS No REACH (1907/2006) - REACH (1907/2006) - REACH Regulation (EC +Annex XIV - Substances Annex XVII - Restrictions 1907/2006) article 59 - +Subject to Authorization on Certain Dangerous Candidate List of +Substances Substances of Very High +Concern (SVHC) +Acetone 67-64-1 - Use restricted. See item - +75. +(see link for restriction +details) +REACH links +https://echa.europa.eu/substances-restricted-under-reach +Safety, health and environmental regulations/legislation specific for the substance or mixture +Component CAS No OECD HPV Persistent Organic Ozone Depletion Restriction of +Pollutant Potential Hazardous +Substances (RoHS) +Acetone 67-64-1 Listed Not applicable Not applicable Not applicable +Contains component(s) that meet a 'definition' of per & poly fluoroalkyl substance (PFAS)? +Not applicable +______________________________________________________________________________________________ +Page 8 / 9 +Acetone Revision Date 13-Oct-2023 +______________________________________________________________________________________________ +Other International Regulations +Component CAS No Seveso III Directive Seveso III Directive Rotterdam Basel Convention +(2012/18/EC) - (2012/18/EC) - Convention (PIC) (Hazardous Waste) +Qualifying QuantitiesQualifying Quantities +for Major Accident for Safety Report +Notification Requirements +Acetone 67-64-1 Not applicable Not applicable Not applicable Annex I - Y42 +16. Other information +Prepared By Regulatory Affairs +Thermo Fisher Scientific +Email: EMSDS.RA@thermofisher.com +Creation Date 28-Apr-2009 +Revision Date 13-Oct-2023 +Print Date 13-Oct-2023 +Revision Summary This document has been updated to comply with the US OSHA HazCom 2012 Standard +replacing the current legislation under 29 CFR 1910.1200 to align with the Globally +Harmonized System of Classification and Labeling of Chemicals (GHS). +Disclaimer +The information provided in this Safety Data Sheet is correct to the best of our knowledge, information and belief at the +date of its publication. The information given is designed only as a guidance for safe handling, use, processing, storage, +transportation, disposal and release and is not to be considered a warranty or quality specification. The information +relates only to the specific material designated and may not be valid for such material used in combination with any other +materials or in any process, unless specified in the text +End of SDS +______________________________________________________________________________________________ +Page 9 / 9 \ No newline at end of file diff --git a/test.py b/test.py index 8a4c43e3..ae60efd2 100644 --- a/test.py +++ b/test.py @@ -1,4 +1,18 @@ import pdfplumber -with pdfplumber.open(r'data_preprocessor\data\acetone-acs-l.pdf') as pdf: - data =pdf.pages[0].extract_text() - print(data) \ No newline at end of file + +with pdfplumber.open(r'data\acetone-acs-l.pdf') as pdf: + + text1 = [] + + for page in pdf.pages: + + text = page.extract_text() + if text: + text1.append(text) + +text2 = "\n".join(text1) + +print(text2) + +with open("extracted_text.txt", "w") as text_file: + text_file.write(text2) \ No newline at end of file From fc3f02541e29f6b8e23fa4651e3d634bd20778be Mon Sep 17 00:00:00 2001 From: Ashish Y G Date: Fri, 4 Oct 2024 15:10:33 +0530 Subject: [PATCH 3/3] got json output but not in right format --- output.json | 444 ++++++++++++++++++++++++++++++++++++++++++++++++++++ test.py | 67 ++++++-- 2 files changed, 499 insertions(+), 12 deletions(-) create mode 100644 output.json diff --git a/output.json b/output.json new file mode 100644 index 00000000..93cf36e5 --- /dev/null +++ b/output.json @@ -0,0 +1,444 @@ +{ + "1. Identification": { + "0": "Product Name Chloroform, stabilized with ethanol", + "Cat No.": "C298-1; C298-1EA; C298-1LC; C298-4; C298-20; C298-200;", + "2": "C298-200LC; C298-500; C298FB-19; C298FB-50; C298FB-115;", + "3": "C298FB-200; C298RB-115; C298RB-200; C298RS-19; C298RS-28;", + "4": "C298RS-50; C298RS-115; C298RS-200; C298S-4; C298SK-4;", + "5": "C298SS-50; C298SS-115; C298SS-200", + "6": "CAS No 67-66-3", + "7": "Synonyms Formyl trichloride; Methane trichloride; Methenyl trichloride", + "8": "Recommended Use Laboratory chemicals.", + "9": "Uses advised against .", + "10": "Details of the supplier of the safety data sheet", + "11": "Company", + "12": "Fisher Scientific Company", + "13": "One Reagent Lane", + "14": "Fair Lawn, NJ 07410", + "Tel": "(201) 796-7100", + "Emergency Telephone Number CHEMTREC\u00d2, Inside the USA": "800-424-9300", + "CHEMTREC\u00d2, Outside the USA": "001-703-527-3887" + }, + "2. Hazard(s) identification": { + "0": "Classification", + "1": "This chemical is considered hazardous by the 2012 OSHA Hazard Communication Standard (29 CFR 1910.1200)", + "2": "Acute oral toxicity Category 4", + "3": "Acute Inhalation Toxicity - Vapors Category 3", + "4": "Skin Corrosion/Irritation Category 2", + "5": "Serious Eye Damage/Eye Irritation Category 2", + "6": "Carcinogenicity Category 2", + "7": "Reproductive Toxicity Category 2", + "8": "Specific target organ toxicity (single exposure) Category 3", + "9": "Target Organs - Respiratory system, Central nervous system (CNS).", + "10": "Specific target organ toxicity - (repeated exposure) Category 2", + "11": "Target Organs - Heart, Liver, Kidney, Blood.", + "12": "______________________________________________________________________________________________", + "13": "Page 1 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "14": "______________________________________________________________________________________________", + "15": "Label Elements", + "16": "Signal Word", + "17": "Danger", + "18": "Hazard Statements", + "19": "Harmful if swallowed", + "20": "Causes skin irritation", + "21": "Causes serious eye irritation", + "22": "Toxic if inhaled", + "23": "May cause respiratory irritation", + "24": "May cause drowsiness or dizziness", + "25": "Suspected of causing cancer", + "26": "Suspected of damaging the unborn child", + "27": "May cause damage to organs through prolonged or repeated exposure", + "28": "Precautionary Statements", + "29": "Prevention", + "30": "Obtain special instructions before use", + "31": "Do not handle until all safety precautions have been read and understood", + "32": "Use personal protective equipment as required", + "33": "Wash face, hands and any exposed skin thoroughly after handling", + "34": "Do not eat, drink or smoke when using this product", + "35": "Use only outdoors or in a well-ventilated area", + "36": "Wear eye/face protection", + "37": "Do not breathe dust/fume/gas/mist/vapors/spray", + "38": "Response", + "IF exposed or concerned": "Get medical attention/advice", + "40": "Inhalation", + "IF INHALED": "Remove victim to fresh air and keep at rest in a position comfortable for breathing", + "42": "Skin", + "IF ON SKIN": "Wash with plenty of soap and water", + "If skin irritation occurs": "Get medical advice/attention", + "45": "Take off contaminated clothing and wash before reuse", + "46": "Eyes", + "IF IN EYES": "Rinse cautiously with water for several minutes. Remove contact lenses, if present and easy to do. Continue rinsing", + "If eye irritation persists": "Get medical advice/attention", + "49": "Ingestion", + "IF SWALLOWED": "Call a POISON CENTER or doctor/physician if you feel unwell", + "51": "Rinse mouth", + "52": "Storage", + "53": "Store locked up", + "54": "Store in a well-ventilated place. Keep container tightly closed", + "55": "Disposal", + "56": "Dispose of contents/container to an approved waste disposal plant", + "57": "Hazards not otherwise classified (HNOC)", + "WARNING. Cancer - https": "//www.p65warnings.ca.gov/." + }, + "3. Composition/Information on Ingredients": { + "0": "______________________________________________________________________________________________", + "1": "Page 2 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "2": "______________________________________________________________________________________________", + "3": "Component CAS No Weight %", + "4": "Chloroform 67-66-3 >99", + "5": "Ethyl alcohol 64-17-5 <0.8" + }, + "4. First-aid measures": { + "0": "General Advice Show this safety data sheet to the doctor in attendance. Immediate medical attention is", + "1": "required.", + "2": "Eye Contact Rinse immediately with plenty of water, also under the eyelids, for at least 15 minutes. In", + "3": "the case of contact with eyes, rinse immediately with plenty of water and seek medical", + "4": "advice.", + "5": "Skin Contact Wash off immediately with plenty of water for at least 15 minutes. Immediate medical", + "6": "attention is required.", + "7": "Inhalation Remove to fresh air. If not breathing, give artificial respiration. Do not use mouth-to-mouth", + "8": "method if victim ingested or inhaled the substance; give artificial respiration with the aid of a", + "9": "pocket mask equipped with a one-way valve or other proper respiratory medical device.", + "10": "Immediate medical attention is required.", + "11": "Ingestion Do NOT induce vomiting. Call a physician or poison control center immediately.", + "12": "Most important symptoms and . Symptoms of overexposure are dizziness, headache, tiredness, nausea,", + "effects unconsciousness, cessation of breathing": "May cause decreases in blood pressure and", + "other cardiac effects": "Symptoms may be delayed", + "15": "Notes to Physician Treat symptomatically" + }, + "5. Fire-fighting measures": { + "0": "Suitable Extinguishing Media Substance is nonflammable; use agent most appropriate to extinguish surrounding fire.", + "1": "Unsuitable Extinguishing Media No information available", + "2": "Flash Point No information available", + "3": "Method - No information available", + "4": "Autoignition Temperature No information available", + "5": "Explosion Limits", + "6": "Upper No data available", + "7": "Lower No data available", + "8": "Sensitivity to Mechanical ImpactNo information available", + "9": "Sensitivity to Static Discharge No information available", + "10": "Specific Hazards Arising from the Chemical", + "11": "Non-combustible, substance itself does not burn but may decompose upon heating to produce corrosive and/or toxic fumes.", + "12": "Hazardous Combustion Products", + "13": "Carbon monoxide (CO). Carbon dioxide (CO2). Phosgene. Hydrogen chloride gas.", + "14": "Protective Equipment and Precautions for Firefighters", + "15": "As in any fire, wear self-contained breathing apparatus pressure-demand, MSHA/NIOSH (approved or equivalent) and full", + "16": "protective gear. Thermal decomposition can lead to release of irritating gases and vapors.", + "17": "NFPA", + "18": "Health Flammability Instability Physical hazards", + "19": "2 1 1 N/A", + "20": "______________________________________________________________________________________________", + "21": "Page 3 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "22": "______________________________________________________________________________________________" + }, + "6. Accidental release measures": { + "0": "Personal Precautions Ensure adequate ventilation. Use personal protective equipment as required. Keep people", + "1": "away from and upwind of spill/leak. Evacuate personnel to safe areas.", + "2": "Environmental Precautions Should not be released into the environment.", + "3": "Methods for Containment and CleanSoak up with inert absorbent material. Keep in suitable, closed containers for disposal.", + "4": "Up" + }, + "7. Handling and storage": { + "0": "Handling Wear personal protective equipment/face protection. Do not get in eyes, on skin, or on", + "1": "clothing. Use only under a chemical fume hood. Do not breathe mist/vapors/spray. Do not", + "2": "ingest. If swallowed then seek immediate medical assistance.", + "3": "Storage. Keep containers tightly closed in a dry, cool and well-ventilated place. Protect from direct", + "4": "sunlight. Store under an inert atmosphere. Protect from moisture. Incompatible Materials.", + "5": "Strong oxidizing agents. Alkali metals. Aluminium. Acetone." + }, + "8. Exposure controls / personal protection": { + "0": "Exposure Guidelines", + "1": "Component ACGIH TLV OSHA PEL NIOSH IDLH Mexico OEL (TWA)", + "Chloroform TWA": "10 ppm (Vacated) TWA: 2 ppm IDLH: 500 ppm TWA: 10 ppm", + "(Vacated) TWA": "1900 mg/m3 TWA: 1000 ppm", + "Ceiling": "240 mg/m3 STEL: 225 mg/m3", + "Ethyl alcohol STEL": "1000 ppm (Vacated) TWA: 1000 ppm IDLH: 3300 ppm STEL: 1000 ppm", + "TWA": "1900 mg/m3", + "7": "Legend", + "8": "ACGIH - American Conference of Governmental Industrial Hygienists", + "9": "OSHA - Occupational Safety and Health Administration", + "NIOSH IDLH": "NIOSH - National Institute for Occupational Safety and Health", + "11": "Engineering Measures Use only under a chemical fume hood. Ensure adequate ventilation, especially in confined", + "12": "areas. Ensure that eyewash stations and safety showers are close to the workstation", + "13": "location.", + "14": "Personal Protective Equipment", + "15": "Eye/face Protection Wear appropriate protective eyeglasses or chemical safety goggles as described by", + "16": "OSHA's eye and face protection regulations in 29 CFR 1910.133 or European Standard", + "17": "EN166. Tight sealing safety goggles. Face protection shield.", + "18": "Skin and body protection Wear appropriate protective gloves and clothing to prevent skin exposure.", + "19": "Respiratory Protection Follow the OSHA respirator regulations found in 29 CFR 1910.134 or European Standard", + "20": "EN 149. Use a NIOSH/MSHA or European Standard EN 149 approved respirator if", + "21": "exposure limits are exceeded or if irritation or other symptoms are experienced.", + "22": "Hygiene Measures Handle in accordance with good industrial hygiene and safety practice." + }, + "9. Physical and chemical properties": { + "0": "Physical State Liquid", + "1": "______________________________________________________________________________________________", + "2": "Page 4 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "3": "______________________________________________________________________________________________", + "4": "Appearance Colorless", + "5": "Odor aromatic Slight sweet", + "6": "Odor Threshold No information available", + "7": "pH No information available", + "8": "Melting Point/Range -63 \u00b0C / -81.4 \u00b0F", + "9": "Boiling Point/Range 61 \u00b0C / 141.8 142.7 \u00b0F", + "10": "Flash Point No information available", + "11": "Evaporation Rate 11.6 (Butyl Acetate = 1.0)", + "12": "Flammability (solid,gas) Not applicable", + "13": "Flammability or explosive limits", + "14": "Upper No data available", + "15": "Lower No data available", + "16": "Vapor Pressure 213 mbar @ 20 \u00b0C", + "17": "Vapor Density 4.12 (Air = 1.0)", + "18": "Specific Gravity 1.480", + "19": "Solubility Slightly soluble in water", + "20": "Partition coefficient; n-octanol/water No data available", + "21": "Autoignition Temperature No information available", + "22": "Decomposition Temperature No information available", + "23": "Viscosity 0.56 mPa.s @ 20 \u00b0C", + "24": "Molecular Formula C H Cl3", + "25": "Molecular Weight 119.38" + }, + "10. Stability and reactivity": { + "0": "Reactive Hazard None known, based on information available", + "1": "Stability Stable under normal conditions. UNSTABLE (REACTIVE) UPON DEPLETION OF", + "2": "INHIBITOR. Light sensitive.", + "3": "Conditions to Avoid Incompatible products. Heat, flames and sparks. Excess heat. Exposure to light. Protect", + "4": "from moisture.", + "5": "Incompatible Materials Strong oxidizing agents, Alkali metals, Aluminium, Acetone", + "6": "Hazardous Decomposition ProductsCarbon monoxide (CO), Carbon dioxide (CO2), Phosgene, Hydrogen chloride gas", + "7": "Hazardous Polymerization Hazardous polymerization does not occur.", + "8": "Hazardous Reactions None under normal processing." + }, + "11. Toxicological information": { + "0": "Acute Toxicity", + "1": "Product Information", + "2": "Component Information", + "3": "Component LD50 Oral LD50 Dermal LC50 Inhalation", + "4": "Chloroform LD50 = 908 mg/kg (rat) LD50 > 20 g/kg ( Rabbit ) LC50 = 10.5 mg/L ( Rat ) 4 h", + "5": "LD50 = 695 mg/kg ( Rat )", + "6": "LD50 = 450 mg/kg ( Rat )", + "7": "Ethyl alcohol LD50 = 10470 mg/kg Not listed LC50 = 117-125 mg/l (4h)", + "8": "OCED 401 (Rat) OECD 403 (rat)", + "9": "3450 mg/kg ( Mouse ) 20000 ppm/10H (rat)", + "10": "Toxicologically Synergistic No information available", + "11": "Products", + "12": "Delayed and immediate effects as well as chronic effects from short and long-term exposure", + "13": "Irritation Irritating to eyes and skin", + "14": "______________________________________________________________________________________________", + "15": "Page 5 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "16": "______________________________________________________________________________________________", + "17": "Sensitization No information available", + "18": "Carcinogenicity The table below indicates whether each agency has listed any ingredient as a carcinogen.", + "19": "Limited evidence of a carcinogenic effect. Ethanol has been shown to be carcinogenic in", + "20": "long-term studies only when consumed and abused as an alcoholic beverage.", + "21": "Component CAS No IARC NTP ACGIH OSHA Mexico", + "22": "Chloroform 67-66-3 Group 2B Reasonably A3 X A3", + "23": "Anticipated", + "24": "Ethyl alcohol 64-17-5 Not listed Known A3 Not listed A3", + "25": "IARC (International Agency for Research on Cancer) IARC (International Agency for Research on Cancer)", + "26": "Group 1 - Carcinogenic to Humans", + "27": "Group 2A - Probably Carcinogenic to Humans", + "28": "Group 2B - Possibly Carcinogenic to Humans", + "NTP": "(National Toxicity Program) NTP: (National Toxicity Program)", + "30": "Known - Known Carcinogen", + "31": "Reasonably Anticipated - Reasonably Anticipated to be a Human", + "32": "Carcinogen", + "ACGIH": "(American Conference of Governmental Industrial Hygienists)", + "34": "Hygienists) A2 - Suspected Human Carcinogen", + "35": "A3 - Animal Carcinogen", + "36": "Mexico - Occupational Exposure Limits - Carcinogens Mexico - Occupational Exposure Limits - Carcinogens", + "37": "A1 - Confirmed Human Carcinogen", + "38": "A2 - Suspected Human Carcinogen", + "39": "A3 - Confirmed Animal Carcinogen", + "40": "A4 - Not Classifiable as a Human Carcinogen", + "41": "A5 - Not Suspected as a Human Carcinogen", + "42": "Mutagenic Effects No information available", + "43": "Reproductive Effects SUSPECT REPRODUCTIVE HAZARD - CONTAINS MATERIAL WHICH MAY INJURE", + "44": "UNBORN CHILD (CAUSE BIRTH DEFECTS) (BASED ON ANIMAL DATA).", + "45": "Developmental Effects No information available.", + "46": "Teratogenicity No information available.", + "47": "STOT - single exposure Respiratory system Central nervous system (CNS)", + "48": "STOT - repeated exposure Heart Liver Kidney Blood", + "49": "Aspiration hazard No information available", + "50": "Symptoms / effects,both acute and Symptoms of overexposure are dizziness, headache, tiredness, nausea, unconsciousness,", + "delayed cessation of breathing": "May cause decreases in blood pressure and other cardiac effects:", + "52": "Symptoms may be delayed", + "53": "Endocrine Disruptor Information No information available", + "54": "Other Adverse Effects Tumorigenic effects have been reported in experimental animals. See actual entry in", + "55": "RTECS for complete information." + }, + "12. Ecological information": { + "0": "Ecotoxicity", + "1": "Do not empty into drains. Harmful to aquatic organisms, may cause long-term adverse effects in the aquatic environment. The", + "2": "product contains following substances which are hazardous for the environment.", + "3": "Component Freshwater Algae Freshwater Fish Microtox Water Flea", + "Chloroform EC50 = 560 mg/L/48h LC50": "= 300 mg/L, 96h static Photobacterium EC50 = 28.9 mg/L/48h", + "(Poecilia reticulata) phosphoreum": "EC50 = 520", + "LC50": "= 71 mg/L, 96h mg/L/30min", + "7": "flow-through (Lepomis Photobacterium", + "macrochirus) phosphoreum": "EC50 = 670", + "9": "______________________________________________________________________________________________", + "10": "Page 6 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "11": "______________________________________________________________________________________________", + "12": "flow-through (Oncorhynchus Photobacterium", + "mykiss) phosphoreum": "EC50 = 670", + "14": "flow-through (Pimephales", + "15": "promelas)", + "16": "Ethyl alcohol EC50 (72h) = 275 mg/l Fathead minnow Photobacterium EC50 = 9268 mg/L/48h", + "(Chlorella vulgaris) (Pimephales promelas) phosphoreum": "EC50 = 34634 EC50 = 10800 mg/L/24h", + "18": "LC50 = 14200 mg/l/96h mg/L/30 min", + "19": "Photobacterium", + "phosphoreum": "EC50 = 35470", + "21": "mg/L/5 min", + "22": "Persistence and Degradability Persistence is unlikely based on information available.", + "23": "Bioaccumulation/ Accumulation No information available.", + "24": "Mobility Will likely be mobile in the environment due to its volatility.", + "25": "Component log Pow", + "26": "Chloroform 2", + "27": "Ethyl alcohol -0.32" + }, + "13. Disposal considerations": { + "0": "Waste Disposal Methods Chemical waste generators must determine whether a discarded chemical is classified as a", + "1": "hazardous waste. Chemical waste generators must also consult local, regional, and", + "2": "national hazardous waste regulations to ensure complete and accurate classification.", + "3": "Component RCRA - U Series Wastes RCRA - P Series Wastes", + "4": "Chloroform - 67-66-3 U044 -" + }, + "14. Transport information": { + "0": "DOT", + "1": "UN-No UN1888", + "2": "Proper Shipping Name CHLOROFORM", + "3": "Hazard Class 6.1", + "4": "Packing Group III", + "5": "TDG", + "6": "UN-No UN1888", + "7": "Proper Shipping Name CHLOROFORM", + "8": "Hazard Class 6.1", + "9": "Packing Group III", + "10": "IATA", + "11": "UN-No UN1888", + "12": "Proper Shipping Name CHLOROFORM", + "13": "Hazard Class 6.1", + "14": "Packing Group III", + "15": "IMDG/IMO", + "16": "UN-No UN1888", + "17": "Proper Shipping Name CHLOROFORM", + "18": "Hazard Class 6.1", + "19": "Packing Group III" + }, + "15. Regulatory information": { + "0": "United States of America Inventory", + "1": "Component CAS No TSCA TSCA Inventory notification - TSCA - EPA Regulatory", + "2": "Active-Inactive Flags", + "3": "Chloroform 67-66-3 X ACTIVE -", + "4": "Ethyl alcohol 64-17-5 X ACTIVE -", + "5": "______________________________________________________________________________________________", + "6": "Page 7 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "7": "______________________________________________________________________________________________", + "Legend": "", + "9": "TSCA US EPA (TSCA) - Toxic Substances Control Act, (40 CFR Part 710)", + "10": "X - Listed", + "11": "'-' - Not Listed", + "12": "TSCA 12(b) - Notices of Export Not applicable", + "13": "International Inventories", + "14": "Canada (DSL/NDSL), Europe (EINECS/ELINCS/NLP), Philippines (PICCS), Japan (ENCS), Japan (ISHL), Australia (AICS), China (IECSC), Korea", + "15": "(KECL).", + "16": "Component CAS No DSL NDSL EINECS PICCS ENCS ISHL AICS IECSC KECL", + "17": "Chloroform 67-66-3 X - 200-663-8 X X X X X X", + "18": "Ethyl alcohol 64-17-5 X - 200-578-6 X X X X X KE-13217", + "KECL - NIER number or KE number (http": "//ncis.nier.go.kr/en/main.do)", + "20": "U.S. Federal Regulations", + "21": "SARA 313", + "22": "Component CAS No Weight % SARA 313 - Threshold", + "23": "Values %", + "24": "Chloroform 67-66-3 >99 0.1", + "25": "SARA 311/312 Hazard Categories See section 2 for more information", + "26": "CWA (Clean Water Act)", + "27": "Component CWA - Hazardous CWA - Reportable CWA - Toxic Pollutants CWA - Priority Pollutants", + "28": "Substances Quantities", + "29": "Chloroform X 10 lb X X", + "30": "Clean Air Act", + "31": "Component HAPS Data Class 1 Ozone Depletors Class 2 Ozone Depletors", + "32": "Chloroform X -", + "33": "OSHA - Occupational Safety and Not applicable", + "34": "Health Administration", + "35": "CERCLA This material, as supplied, contains one or more substances regulated as a hazardous", + "36": "substance under the Comprehensive Environmental Response Compensation and Liability", + "37": "Act (CERCLA) (40 CFR 302)", + "38": "Component Hazardous Substances RQs CERCLA EHS RQs", + "39": "Chloroform 10 lb 1 lb 10 lb", + "40": "California Proposition 65 This product contains the following Proposition 65 chemicals.", + "41": "Component CAS No California Prop. 65 Prop 65 NSRL Category", + "42": "Chloroform 67-66-3 Carcinogen 20 \u00b5g/day Developmental", + "43": "Developmental 40 \u00b5g/day Carcinogen", + "44": "Ethyl alcohol 64-17-5 Development (alcoholic - Developmental", + "45": "beverages only) Carcinogen", + "46": "Carcinogen", + "47": "U.S. State Right-to-Know", + "48": "Regulations", + "49": "Component Massachusetts New Jersey Pennsylvania Illinois Rhode Island", + "50": "Chloroform X X X X X", + "51": "Ethyl alcohol X X X X X", + "52": "______________________________________________________________________________________________", + "53": "Page 8 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "54": "______________________________________________________________________________________________", + "55": "U.S. Department of Transportation", + "Reportable Quantity (RQ)": "Y", + "57": "DOT Marine Pollutant N", + "58": "DOT Severe Marine Pollutant N", + "U.S. Department of Homeland This product contains the following DHS chemicals": "", + "60": "Security Legend - STQs = Screening Threshold Quantities, APA = A placarded amount", + "61": "Component DHS Chemical Facility Anti-Terrorism Standard", + "62": "Chloroform Release STQs - 20000lb", + "63": "Other International Regulations", + "64": "Mexico - Grade No information available", + "65": "Authorisation/Restrictions according to EU REACH", + "66": "Component REACH (1907/2006) - Annex XIV - REACH (1907/2006) - Annex XVII - REACH Regulation (EC", + "67": "Substances Subject to Restrictions on Certain Dangerous 1907/2006) article 59 - Candidate", + "68": "Authorization Substances List of Substances of Very High", + "69": "Concern (SVHC)", + "70": "Chloroform - Use restricted. See item 32. -", + "71": "(see", + "http": "//eur-lex.europa.eu/LexUriServ/L", + "exUriServ.do?uri=CELEX": "32006R190", + "7": "EN:NOT for restriction details)", + "https": "//echa.europa.eu/substances-restricted-under-reach", + "76": "Safety, health and environmental regulations/legislation specific for the substance or mixture", + "77": "Component CAS No OECD HPV Persistent Organic Ozone Depletion Restriction of", + "78": "Pollutant Potential Hazardous", + "79": "Substances (RoHS)", + "80": "Chloroform 67-66-3 Listed Not applicable Not applicable Not applicable", + "81": "Ethyl alcohol 64-17-5 Listed Not applicable Not applicable Not applicable", + "82": "Component CAS No Seveso III Directive Seveso III Directive Rotterdam Basel Convention", + "83": "(2012/18/EC) - (2012/18/EC) - Convention (PIC) (Hazardous Waste)", + "84": "Qualifying QuantitiesQualifying Quantities", + "85": "for Major Accident for Safety Report", + "86": "Notification Requirements", + "87": "Chloroform 67-66-3 Not applicable Not applicable Not applicable Annex I - Y45", + "88": "Ethyl alcohol 64-17-5 Not applicable Not applicable Not applicable Annex I - Y42" + }, + "16. Other information": { + "0": "Prepared By Regulatory Affairs", + "1": "Thermo Fisher Scientific", + "Email": "EMSDS.RA@thermofisher.com", + "3": "Creation Date 20-Jan-2010", + "4": "Revision Date 24-Dec-2021", + "5": "Print Date 24-Dec-2021", + "6": "Revision Summary This document has been updated to comply with the US OSHA HazCom 2012 Standard", + "7": "replacing the current legislation under 29 CFR 1910.1200 to align with the Globally", + "8": "Harmonized System of Classification and Labeling of Chemicals (GHS).", + "9": "Disclaimer", + "10": "The information provided in this Safety Data Sheet is correct to the best of our knowledge, information and belief at the", + "11": "date of its publication. The information given is designed only as a guidance for safe handling, use, processing, storage,", + "12": "transportation, disposal and release and is not to be considered a warranty or quality specification. The information", + "13": "relates only to the specific material designated and may not be valid for such material used in combination with any other", + "14": "______________________________________________________________________________________________", + "15": "Page 9 / 10Chloroform, stabilized with ethanol Revision Date 24-Dec-2021", + "16": "______________________________________________________________________________________________", + "17": "materials or in any process, unless specified in the text", + "18": "End of SDS", + "19": "______________________________________________________________________________________________", + "20": "Page 10 / 10" + } +} \ No newline at end of file diff --git a/test.py b/test.py index ae60efd2..18a8e30b 100644 --- a/test.py +++ b/test.py @@ -1,18 +1,61 @@ import pdfplumber +import re +import json -with pdfplumber.open(r'data\acetone-acs-l.pdf') as pdf: - - text1 = [] +def extract_text_from_pdf(file_path): + with pdfplumber.open(file_path) as pdf: + text = "" + for page in pdf.pages: + text += page.extract_text() + return text + +def parse_content_with_regex(text): + section_regex = re.compile(r"^\d+\.\s[A-Za-z()]+.*$") + key_value_regex = re.compile(r"^(.*?):\s*(.*)$") - for page in pdf.pages: - - text = page.extract_text() - if text: - text1.append(text) + structured_data = {} + current_section = None + current_subsection = None + section_data = {} + + for line in text.splitlines(): + line = line.strip() + + if section_regex.match(line): + if current_section: + structured_data[current_section] = section_data + current_section = line + section_data = {} + + elif key_value_regex.match(line): + key, value = key_value_regex.findall(line)[0] + section_data[key.strip()] = value.strip() + + elif current_subsection: + section_data[current_subsection] += " " + line.strip() + + else: + if current_section: + if line: + section_data[len(section_data)] = line.strip() + + if current_section: + structured_data[current_section] = section_data + + return structured_data + +def create_json_output(parsed_data): + return json.dumps(parsed_data, indent=4) + +pdf_path = r'data\chloroform-certified-acs-l.pdf' + +pdf_text = extract_text_from_pdf(pdf_path) + +parsed_content = parse_content_with_regex(pdf_text) -text2 = "\n".join(text1) +json_output = create_json_output(parsed_content) -print(text2) +print(json_output) -with open("extracted_text.txt", "w") as text_file: - text_file.write(text2) \ No newline at end of file +with open('output.json', 'w') as json_file: + json_file.write(json_output)