diff --git a/main.py b/main.py index 3d07bd9..ee23b72 100644 --- a/main.py +++ b/main.py @@ -24,26 +24,26 @@ def main(): print("šŸ“„ Extracting data from sources...") # TODO: Call the extraction functions - # airports = extract_airports() - # flights = extract_flights() + airports = extract_airports() + flights = extract_flights() # Uncomment the lines above once you've implemented the functions print("āš ļø Extraction functions not yet implemented") - return + # return # Step 2: Transform data print("\n=== TRANSFORMATION ===") print("šŸ”„ Cleaning and transforming data...") # TODO: Call the transformation functions - # clean_airports_data = clean_airports(airports) - # clean_flights_data = clean_flights(flights) - # final_airports, final_flights = combine_data(clean_airports_data, clean_flights_data) - + clean_airports_data = clean_airports(airports) + clean_flights_data = clean_flights(flights) + final_airports, final_flights = combine_data(clean_airports_data, clean_flights_data) + # return # Step 3: Load data print("\n=== LOADING ===") print("šŸ’¾ Loading data to database...") - + # return # TODO: Call the loading function # load_to_database(final_airports, final_flights) @@ -56,6 +56,6 @@ def main(): print("\nšŸŽ‰ ETL Pipeline completed!") print("=" * 50) - + return if __name__ == "__main__": main() diff --git a/src/extract_data.py b/src/extract_data.py index e50e07c..d12606d 100644 --- a/src/extract_data.py +++ b/src/extract_data.py @@ -24,14 +24,14 @@ def extract_airports(): # TODO: Read the airports.csv file using pandas # The file is located at: data/airports.csv # Hint: Use pd.read_csv() - + df=pd.read_csv('data/airports.csv') # For now, return an empty DataFrame - df = pd.DataFrame() + #df = pd.DataFrame() # TODO: Print how many airports were loaded # Example: print(f"Loaded {len(df)} airports") - - print("āš ļø Airport extraction not yet implemented") + print(f"Loaded {len(df)} airports") + #print("āš ļø Airport extraction not yet implemented") return df except Exception as e: @@ -63,26 +63,27 @@ def extract_flights(): # TODO: Make the API request using requests.get() # Hint: response = requests.get(url, params=params, timeout=10) - + reponse=requests.get(url,params=params,timeout=10) # TODO: Check if the response is successful # Hint: Check response.status_code == 200 - + check=reponse.status_code==200 # TODO: Get the JSON data from the response # Hint: data = response.json() - + data=reponse.json() # TODO: Extract the 'states' data from the JSON # The API returns: {"time": 123456789, "states": [[aircraft_data], [aircraft_data], ...]} # Hint: states = data['states'] if data['states'] else [] - + states = data['states'] if data['states'] else [] # TODO: Convert to DataFrame # Hint: df = pd.DataFrame(states) - + df=pd.DataFrame(states) + # TODO: Print how many flights were found # Example: print(f"Found {len(df)} active flights") - + print(f"Found {len(df)} active flights") # For now, return empty DataFrame - print("āš ļø Flight extraction not yet implemented") - return pd.DataFrame() + # print("āš ļø Flight extraction not yet implemented") + return df except requests.exceptions.RequestException as e: print(f"āŒ Network error fetching flight data: {e}") diff --git a/src/load_data.py b/src/load_data.py index 3cff5f0..11d269a 100644 --- a/src/load_data.py +++ b/src/load_data.py @@ -14,8 +14,8 @@ # Database connection configuration # TODO: Update these values with your actual database credentials DATABASE_CONFIG = { - 'username': 'your_username', - 'password': 'your_password', + 'username': 'postgres', + 'password': 'supaero2025', 'host': 'localhost', 'port': '5432', 'database': 'airlife_db' @@ -37,12 +37,12 @@ def load_to_database(airports_df, flights_df): # TODO: Create connection string using the function above # connection_string = get_connection_string() - + connection_string = get_connection_string() try: # TODO: Create SQLAlchemy engine # Hint: engine = create_engine(connection_string) - - print("āš ļø Database loading not yet implemented") + engine = create_engine(connection_string) + # print("āš ļø Database loading not yet implemented") return # TODO: Load airports data @@ -93,22 +93,30 @@ def verify_data(): # TODO: Count airports in database # Hint: airports_count = pd.read_sql("SELECT COUNT(*) as count FROM airports", engine) # print(f"šŸ“Š Airports in database: {airports_count.iloc[0]['count']}") - + airports_count = pd.read_sql("SELECT COUNT(*) as count FROM airports", engine) + print(f"šŸ“Š Airports in database: {airports_count.iloc[0]['count']}") # TODO: Count flights in database # Hint: flights_count = pd.read_sql("SELECT COUNT(*) as count FROM flights", engine) # print(f"šŸ“Š Flights in database: {flights_count.iloc[0]['count']}") - + flights_count = pd.read_sql("SELECT COUNT(*) as count FROM flights", engine) + print(f"šŸ“Š Flights in database: {flights_count.iloc[0]['count']}") # TODO: Show sample airport data # Hint: sample_airports = pd.read_sql("SELECT name, city, country FROM airports LIMIT 3", engine) # print("\nšŸ“‹ Sample airports:") # print(sample_airports.to_string(index=False)) - + sample_airports = pd.read_sql("SELECT name, city, country FROM airports LIMIT 3", engine) + print("\nšŸ“‹ Sample airports:") + print(sample_airports.to_string(index=False)) # TODO: Show sample flight data (if any exists) # Hint: Check if flights table has data first # sample_flights = pd.read_sql("SELECT callsign, origin_country, altitude FROM flights LIMIT 3", engine) # if not sample_flights.empty: # print("\nāœˆļø Sample flights:") - # print(sample_flights.to_string(index=False)) + print(sample_flights.to_string(index=False)) + sample_flights = pd.read_sql("SELECT callsign, origin_country, altitude FROM flights LIMIT 3", engine) + if not sample_flights.empty: + print("\nāœˆļø Sample flights:") + print(sample_flights.to_string(index=False)) except Exception as e: print(f"āŒ Error verifying data: {e}") diff --git a/src/transform_data.py b/src/transform_data.py index d99f5fd..80e8fcd 100644 --- a/src/transform_data.py +++ b/src/transform_data.py @@ -33,23 +33,24 @@ def clean_airports(airports_df): # TODO: Remove rows with missing latitude or longitude # Hint: Use .dropna(subset=['latitude', 'longitude']) # df = df.dropna(subset=['latitude', 'longitude']) - + df = df.dropna(subset=['latitude', 'longitude']) # TODO: Remove airports with invalid coordinates # Latitude should be between -90 and 90 # Longitude should be between -180 and 180 # Hint: df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)] + df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)] # Hint: df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)] - + df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)] # TODO: Handle missing IATA codes (replace empty strings or 'N' with None) # Hint: df['iata_code'] = df['iata_code'].replace(['', 'N', '\\N'], None) - + df['iata_code'] = df['iata_code'].replace(['', 'N', '\\N'], None) # TODO: Convert altitude to numeric (handle non-numeric values) # Hint: df['altitude'] = pd.to_numeric(df['altitude'], errors='coerce') - + df['altitude'] = pd.to_numeric(df['altitude'], errors='coerce') # TODO: Print how many airports remain after cleaning # print(f"After cleaning: {len(df)} airports remain") - - print("āš ļø Airport cleaning not yet implemented") + print(f"After cleaning: {len(df)} airports remain") + #print("āš ļø Airport cleaning not yet implemented") return df def clean_flights(flights_df): @@ -88,29 +89,30 @@ def clean_flights(flights_df): # Make a copy to avoid modifying the original df = flights_df.copy() - + df=df.iloc[:,0:12] # TODO: Assign column names to the DataFrame # Hint: df.columns = expected_columns - + df.columns = expected_columns # TODO: Remove flights with missing coordinates # Hint: df = df.dropna(subset=['longitude', 'latitude']) - + df = df.dropna(subset=['longitude', 'latitude']) # TODO: Convert altitude from meters to feet (multiply by 3.28084) # This makes it easier to understand for aviation # Hint: df['altitude'] = df['altitude'] * 3.28084 - + df['altitude'] = df['altitude'] * 3.28084 # TODO: Remove flights with invalid coordinates # Same coordinate bounds as airports # Hint: df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)] # Hint: df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)] - + df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)] + df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)] # TODO: Clean callsign (remove extra whitespace) # Hint: df['callsign'] = df['callsign'].str.strip() - + df['callsign'] = df['callsign'].str.strip() # TODO: Print how many flights remain after cleaning # print(f"After cleaning: {len(df)} flights remain") - - print("āš ļø Flight cleaning not yet implemented") + print(f"After cleaning: {len(df)} flights remain") + #print("āš ļø Flight cleaning not yet implemented") return df def combine_data(airports_df, flights_df): @@ -139,8 +141,8 @@ def combine_data(airports_df, flights_df): # TODO (Optional): If you want to try something more advanced, # you could find the nearest airport for each flight: # - # def find_nearest_airport(flight_lat, flight_lon, airports_df): - # # Calculate distances and return nearest airport + #def find_nearest_airport(flight_lat, flight_lon, airports_df): + # Calculate distances and return nearest airport # pass return airports_df, flights_df