Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,26 @@ def main():
print("📥 Extracting data from sources...")

# TODO: Call the extraction functions
# airports = extract_airports()
# flights = extract_flights()
airports = extract_airports()
flights = extract_flights()

# Uncomment the lines above once you've implemented the functions
print("⚠️ Extraction functions not yet implemented")
return
# return

# Step 2: Transform data
print("\n=== TRANSFORMATION ===")
print("🔄 Cleaning and transforming data...")

# TODO: Call the transformation functions
# clean_airports_data = clean_airports(airports)
# clean_flights_data = clean_flights(flights)
# final_airports, final_flights = combine_data(clean_airports_data, clean_flights_data)

clean_airports_data = clean_airports(airports)
clean_flights_data = clean_flights(flights)
final_airports, final_flights = combine_data(clean_airports_data, clean_flights_data)
# return
# Step 3: Load data
print("\n=== LOADING ===")
print("💾 Loading data to database...")

# return
# TODO: Call the loading function
# load_to_database(final_airports, final_flights)

Expand All @@ -56,6 +56,6 @@ def main():

print("\n🎉 ETL Pipeline completed!")
print("=" * 50)

return
if __name__ == "__main__":
main()
25 changes: 13 additions & 12 deletions src/extract_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ def extract_airports():
# TODO: Read the airports.csv file using pandas
# The file is located at: data/airports.csv
# Hint: Use pd.read_csv()

df=pd.read_csv('data/airports.csv')
# For now, return an empty DataFrame
df = pd.DataFrame()
#df = pd.DataFrame()

# TODO: Print how many airports were loaded
# Example: print(f"Loaded {len(df)} airports")

print("⚠️ Airport extraction not yet implemented")
print(f"Loaded {len(df)} airports")
#print("⚠️ Airport extraction not yet implemented")
return df

except Exception as e:
Expand Down Expand Up @@ -63,26 +63,27 @@ def extract_flights():

# TODO: Make the API request using requests.get()
# Hint: response = requests.get(url, params=params, timeout=10)

reponse=requests.get(url,params=params,timeout=10)
# TODO: Check if the response is successful
# Hint: Check response.status_code == 200

check=reponse.status_code==200
# TODO: Get the JSON data from the response
# Hint: data = response.json()

data=reponse.json()
# TODO: Extract the 'states' data from the JSON
# The API returns: {"time": 123456789, "states": [[aircraft_data], [aircraft_data], ...]}
# Hint: states = data['states'] if data['states'] else []

states = data['states'] if data['states'] else []
# TODO: Convert to DataFrame
# Hint: df = pd.DataFrame(states)

df=pd.DataFrame(states)

# TODO: Print how many flights were found
# Example: print(f"Found {len(df)} active flights")

print(f"Found {len(df)} active flights")
# For now, return empty DataFrame
print("⚠️ Flight extraction not yet implemented")
return pd.DataFrame()
# print("⚠️ Flight extraction not yet implemented")
return df

except requests.exceptions.RequestException as e:
print(f"❌ Network error fetching flight data: {e}")
Expand Down
26 changes: 17 additions & 9 deletions src/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# Database connection configuration
# TODO: Update these values with your actual database credentials
DATABASE_CONFIG = {
'username': 'your_username',
'password': 'your_password',
'username': 'postgres',
'password': 'supaero2025',
'host': 'localhost',
'port': '5432',
'database': 'airlife_db'
Expand All @@ -37,12 +37,12 @@ def load_to_database(airports_df, flights_df):

# TODO: Create connection string using the function above
# connection_string = get_connection_string()

connection_string = get_connection_string()
try:
# TODO: Create SQLAlchemy engine
# Hint: engine = create_engine(connection_string)

print("⚠️ Database loading not yet implemented")
engine = create_engine(connection_string)
# print("⚠️ Database loading not yet implemented")
return

# TODO: Load airports data
Expand Down Expand Up @@ -93,22 +93,30 @@ def verify_data():
# TODO: Count airports in database
# Hint: airports_count = pd.read_sql("SELECT COUNT(*) as count FROM airports", engine)
# print(f"📊 Airports in database: {airports_count.iloc[0]['count']}")

airports_count = pd.read_sql("SELECT COUNT(*) as count FROM airports", engine)
print(f"📊 Airports in database: {airports_count.iloc[0]['count']}")
# TODO: Count flights in database
# Hint: flights_count = pd.read_sql("SELECT COUNT(*) as count FROM flights", engine)
# print(f"📊 Flights in database: {flights_count.iloc[0]['count']}")

flights_count = pd.read_sql("SELECT COUNT(*) as count FROM flights", engine)
print(f"📊 Flights in database: {flights_count.iloc[0]['count']}")
# TODO: Show sample airport data
# Hint: sample_airports = pd.read_sql("SELECT name, city, country FROM airports LIMIT 3", engine)
# print("\n📋 Sample airports:")
# print(sample_airports.to_string(index=False))

sample_airports = pd.read_sql("SELECT name, city, country FROM airports LIMIT 3", engine)
print("\n📋 Sample airports:")
print(sample_airports.to_string(index=False))
# TODO: Show sample flight data (if any exists)
# Hint: Check if flights table has data first
# sample_flights = pd.read_sql("SELECT callsign, origin_country, altitude FROM flights LIMIT 3", engine)
# if not sample_flights.empty:
# print("\n✈️ Sample flights:")
# print(sample_flights.to_string(index=False))
print(sample_flights.to_string(index=False))
sample_flights = pd.read_sql("SELECT callsign, origin_country, altitude FROM flights LIMIT 3", engine)
if not sample_flights.empty:
print("\n✈️ Sample flights:")
print(sample_flights.to_string(index=False))

except Exception as e:
print(f"❌ Error verifying data: {e}")
Expand Down
34 changes: 18 additions & 16 deletions src/transform_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,24 @@ def clean_airports(airports_df):
# TODO: Remove rows with missing latitude or longitude
# Hint: Use .dropna(subset=['latitude', 'longitude'])
# df = df.dropna(subset=['latitude', 'longitude'])

df = df.dropna(subset=['latitude', 'longitude'])
# TODO: Remove airports with invalid coordinates
# Latitude should be between -90 and 90
# Longitude should be between -180 and 180
# Hint: df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)]
df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)]
# Hint: df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)]

df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)]
# TODO: Handle missing IATA codes (replace empty strings or 'N' with None)
# Hint: df['iata_code'] = df['iata_code'].replace(['', 'N', '\\N'], None)

df['iata_code'] = df['iata_code'].replace(['', 'N', '\\N'], None)
# TODO: Convert altitude to numeric (handle non-numeric values)
# Hint: df['altitude'] = pd.to_numeric(df['altitude'], errors='coerce')

df['altitude'] = pd.to_numeric(df['altitude'], errors='coerce')
# TODO: Print how many airports remain after cleaning
# print(f"After cleaning: {len(df)} airports remain")

print("⚠️ Airport cleaning not yet implemented")
print(f"After cleaning: {len(df)} airports remain")
#print("⚠️ Airport cleaning not yet implemented")
return df

def clean_flights(flights_df):
Expand Down Expand Up @@ -88,29 +89,30 @@ def clean_flights(flights_df):

# Make a copy to avoid modifying the original
df = flights_df.copy()

df=df.iloc[:,0:12]
# TODO: Assign column names to the DataFrame
# Hint: df.columns = expected_columns

df.columns = expected_columns
# TODO: Remove flights with missing coordinates
# Hint: df = df.dropna(subset=['longitude', 'latitude'])

df = df.dropna(subset=['longitude', 'latitude'])
# TODO: Convert altitude from meters to feet (multiply by 3.28084)
# This makes it easier to understand for aviation
# Hint: df['altitude'] = df['altitude'] * 3.28084

df['altitude'] = df['altitude'] * 3.28084
# TODO: Remove flights with invalid coordinates
# Same coordinate bounds as airports
# Hint: df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)]
# Hint: df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)]

df = df[(df['latitude'] >= -90) & (df['latitude'] <= 90)]
df = df[(df['longitude'] >= -180) & (df['longitude'] <= 180)]
# TODO: Clean callsign (remove extra whitespace)
# Hint: df['callsign'] = df['callsign'].str.strip()

df['callsign'] = df['callsign'].str.strip()
# TODO: Print how many flights remain after cleaning
# print(f"After cleaning: {len(df)} flights remain")

print("⚠️ Flight cleaning not yet implemented")
print(f"After cleaning: {len(df)} flights remain")
#print("⚠️ Flight cleaning not yet implemented")
return df

def combine_data(airports_df, flights_df):
Expand Down Expand Up @@ -139,8 +141,8 @@ def combine_data(airports_df, flights_df):
# TODO (Optional): If you want to try something more advanced,
# you could find the nearest airport for each flight:
#
# def find_nearest_airport(flight_lat, flight_lon, airports_df):
# # Calculate distances and return nearest airport
#def find_nearest_airport(flight_lat, flight_lon, airports_df):
# Calculate distances and return nearest airport
# pass

return airports_df, flights_df
Expand Down