|
| 1 | +import os |
| 2 | +import requests |
| 3 | +from datetime import datetime, timedelta |
| 4 | +import subprocess |
| 5 | +from helper import parse_messages, write_to_file, create_folder |
| 6 | +from settings import GRIB2_FILES_PATH |
| 7 | + |
| 8 | +# move DATA_FOLDER to a settings file and import it here |
| 9 | + |
| 10 | +start_loading_time = datetime.now() |
| 11 | +available_template_header = """XXXXXX EMPTY LINES XXXXXXXXX |
| 12 | +XXXXXX EMPTY LINES XXXXXXXX |
| 13 | +YYYYMMDD HHMMSS name of the file(up to 80 characters) |
| 14 | +""" |
| 15 | + |
| 16 | + |
| 17 | +def parse_available_file(date, hour, file_name): |
| 18 | + available_template_body = f"""{date.strftime('%Y%m%d')} {hour}0000 {file_name} ON DISC\n""" |
| 19 | + write_to_file('', 'AVAILABLE', available_template_body, 'a') |
| 20 | + |
| 21 | + |
| 22 | +def compress_grib_record(file_path, new_file_path): |
| 23 | + for start, end in [(1, 6), (9, 14), (75, 76), (83, 84), (95, 97), (101, 102), (104, 107), (111, 112), (114, 117), (121, 122), (124, 127), (131, 132), (134, 137), (141, 142), (144, 147), (151, 156), (163, 167), (171, 177), (181, 186), (193, 198), (202, 213), (217, 224), (226, 240), (242, 256), (258, 272), (274, 288), (290, 304), (306, 320), (322, 336), (338, 347), (349, 352), (354, 368), (370, 379), (381, 384), (386, 395), (397, 400), (402, 411), (413, 416), (418, 432), (434, 443), (445, 448), (450, 459), (461, 464), (466, 480), (482, 491), (493, 496), (498, 507), (509, 512), (514, 523), (525, 529), (531, 540), (542, 544), (546, 558), (561, 565), (567, 568), (570, 571), (573, 574), (577, 578), (580, 593), (598, 607), (613, 676), (679, 683), (685, 696)]: |
| 24 | + command = f"wgrib2 {file_path} -for_n {start}:{end} -grib >(cat >> {new_file_path}) > /dev/null 2>&1" |
| 25 | + process = subprocess.Popen(command, shell=True, executable="/bin/bash") |
| 26 | + process.communicate() |
| 27 | + parse_messages(f"File {file_path} updated successfully.\n") |
| 28 | + |
| 29 | + |
| 30 | +def prepare_file(current_date, hour, file_name, file_compressed=True): |
| 31 | + full_file_path = os.path.join(GRIB2_FILES_PATH, file_name) |
| 32 | + new_name = file_name.split(".")[0] + "_.grib2" |
| 33 | + full_new_path = os.path.join(GRIB2_FILES_PATH, new_name) |
| 34 | + |
| 35 | + # check is file need to be compressed and compressed it if needed |
| 36 | + if current_date > datetime(2021, 3, 21, 6): |
| 37 | + if not os.path.isfile(full_new_path): |
| 38 | + compress_grib_record( |
| 39 | + full_file_path, full_new_path) if not file_compressed else None |
| 40 | + |
| 41 | + parse_available_file(current_date, hour, new_name) |
| 42 | + else: |
| 43 | + parse_available_file(current_date, hour, file_name) |
| 44 | + |
| 45 | + |
| 46 | +def download_data(start, end): |
| 47 | + parse_messages('Started loading grid data.') |
| 48 | + |
| 49 | + if type(start) is not datetime: |
| 50 | + parse_messages("grib_error: Start Date is incorrect", True) |
| 51 | + elif type(end) is not datetime: |
| 52 | + parse_messages("grib_error: End Date is incorrect", True) |
| 53 | + elif (end - start).days > 61: |
| 54 | + parse_messages("grib_error: Difference between start and end dates should be less than 60 days", True) |
| 55 | + else: |
| 56 | + # dates should ends with hours that divides to 3 |
| 57 | + if start.hour % 6 != 0: |
| 58 | + start_date = start - timedelta(hours = start.hour % 6) |
| 59 | + else: |
| 60 | + start_date = start - timedelta(hours = 6) |
| 61 | + |
| 62 | + if end.hour % 6 != 0: |
| 63 | + end_date = end + timedelta(hours = (end.hour % 6) + 6) |
| 64 | + else: |
| 65 | + end_date = end + timedelta(hours = 6) |
| 66 | + |
| 67 | + # remove from start_date and end_date minutes and seconds |
| 68 | + start_date = start_date.replace(minute=0, second=0, microsecond=0) |
| 69 | + end_date = end_date.replace(minute=0, second=0, microsecond=0) |
| 70 | + |
| 71 | + write_to_file('', 'AVAILABLE', available_template_header) |
| 72 | + |
| 73 | + # URL example: https://data.rda.ucar.edu/ds083.2/grib2/2022/2022.04/fnl_20220419_12_00.grib2 |
| 74 | + url_template = "https://data.rda.ucar.edu/ds083.2/grib2/{year}/{year}.{month}/fnl_{date}_{hour}_00.grib2" |
| 75 | + # Create a directory to store the downloaded files |
| 76 | + create_folder(GRIB2_FILES_PATH) |
| 77 | + |
| 78 | + # Loop through the dates between start_date and end_date (inclusive) |
| 79 | + current_date = start_date |
| 80 | + while current_date <= end_date: |
| 81 | + # create correct hour value with leading zero |
| 82 | + hour = str(current_date.hour).zfill(2) |
| 83 | + # Generate the URL for the current date and hour |
| 84 | + url = url_template.format(year=current_date.year, month=current_date.strftime("%m"), |
| 85 | + date=current_date.strftime("%Y%m%d"), hour=hour) |
| 86 | + |
| 87 | + # Generate the output file name |
| 88 | + file_name = f"fnl_{current_date.strftime('%Y%m%d')}_{hour}_00.grib2" |
| 89 | + file_path = os.path.join(GRIB2_FILES_PATH, file_name) |
| 90 | + new_name = file_name.split(".")[0] + "_.grib2" |
| 91 | + new_path = os.path.join(GRIB2_FILES_PATH, new_name) |
| 92 | + # check if file already exists or already compressed |
| 93 | + if current_date > datetime(2021, 3, 21, 6) and os.path.isfile(new_path): |
| 94 | + parse_messages( |
| 95 | + f"File {new_path} already exists. Skipping download.") |
| 96 | + parse_available_file(current_date, hour, new_name) |
| 97 | + current_date += timedelta(hours=6) |
| 98 | + continue |
| 99 | + elif os.path.isfile(file_path): |
| 100 | + parse_messages( |
| 101 | + f"File {file_path} already exists. Skipping download.") |
| 102 | + prepare_file(current_date, hour, file_name, False) |
| 103 | + current_date += timedelta(hours=6) |
| 104 | + continue |
| 105 | + else: |
| 106 | + # Download the grib data and parse AVAILABLE file |
| 107 | + response = None |
| 108 | + try: |
| 109 | + parse_messages(f"Trying to download the file {file_path} ") |
| 110 | + response = requests.get(url) |
| 111 | + except Exception as ex: |
| 112 | + parse_messages( |
| 113 | + f"grib_error: Problem with the connection to the URL {url}", True) |
| 114 | + else: |
| 115 | + if response.status_code == 200: |
| 116 | + file = open(file_path, "wb") |
| 117 | + try: |
| 118 | + file.write(response.content) |
| 119 | + parse_messages(f"File {file_path} downloaded successfully.\n") |
| 120 | + prepare_file(current_date, hour, file_name, False) |
| 121 | + finally: |
| 122 | + file.close() |
| 123 | + else: |
| 124 | + parse_messages(f"grib_error: Failed to download file {file_name} from {url}", True) |
| 125 | + |
| 126 | + # Move to the next date |
| 127 | + current_date += timedelta(hours=6) |
| 128 | + parse_messages( |
| 129 | + f"Finished loading grid data and filling AVAILABLE file, it took {datetime.now()-start_loading_time}.\n") |
0 commit comments