-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.py
More file actions
98 lines (76 loc) · 3.58 KB
/
analysis.py
File metadata and controls
98 lines (76 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""
Statistical analysis module for tide gate and wildlife detection modeling.
This module provides hypothesis-driven statistical analyses to quantify how
environmental and operational factors influence wildlife detection outcomes.
It is designed to complement descriptive analyses by adding inferential
statistics and predictive modeling.
Analyses included:
- Gate impact analysis using binned gate opening angles and chi-square testing
- Temporal pattern analysis of hourly and seasonal detection rates
- Generalized Linear Model (GLM) for predicting detection probability based on
environmental covariates and time-of-day effects
The module assumes that the input DataFrame has already been merged and cleaned,
with consistent datetime handling and camera activity indicators.
"""
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
def analyze_gate_impact(df):
"""
Analyzes how the tide gate's opening angle affects wildlife detection rates.
Returns:
pd.DataFrame: A summary of detection rates by gate state.
dict: Results of the Chi-square test.
"""
if 'Gate_Opening_MTR_Deg' not in df.columns:
print("Gate opening data not found. Skipping gate analysis.")
return None, None
df['gate_category'] = pd.cut(
df['Gate_Opening_MTR_Deg'],
bins=[-1, 5, 30, 60, 90],
labels=['Closed (0-5°)', 'Partially Open (5-30°)', 'Open (30-60°)', 'Wide Open (>60°)']
)
gate_summary = df.groupby('gate_category').agg(
Detection_Rate=('has_camera_data', 'mean')
).reset_index()
gate_summary['Detection_Rate_Pct'] = gate_summary['Detection_Rate'] * 100
# Chi-square test
contingency_table = pd.crosstab(df['has_camera_data'], df['gate_category'])
chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
test_results = {'chi2': chi2, 'p_value': p_value, 'dof': dof}
print("--- Gate Impact Analysis ---")
print(gate_summary)
print(f"\nChi-square test p-value: {p_value:.4f}")
return gate_summary, test_results
def analyze_temporal_patterns(df):
"""Analyzes hourly and seasonal detection patterns."""
df['hour'] = df['DateTime'].dt.hour
df['month'] = df['DateTime'].dt.month
hourly_stats = df.groupby('hour')['has_camera_data'].mean().reset_index()
monthly_stats = df.groupby('month')['has_camera_data'].mean().reset_index()
print("\n--- Temporal Pattern Analysis ---")
print("Peak Activity Hours (by detection rate):")
print(hourly_stats.nlargest(3, 'has_camera_data'))
return hourly_stats, monthly_stats
def run_glm_analysis(df):
"""
Builds and summarizes a Generalized Linear Model (GLM) to predict detection probability.
"""
predictors = ['Air_Temp_C', 'Tide_Level_In_m', 'Gate_Opening_MTR_Deg', 'Wind_Speed_km_h']
available_predictors = [p for p in predictors if p in df.columns]
if not available_predictors:
print("No predictors available for GLM.")
return None
model_data = df[['has_camera_data'] + available_predictors].dropna()
model_data['is_night'] = ((df['DateTime'].dt.hour < 6) | (df['DateTime'].dt.hour >= 18)).astype(int)
formula = f"has_camera_data ~ {' + '.join(available_predictors)} + is_night"
try:
glm_model = smf.glm(formula=formula, data=model_data, family=sm.families.Binomial()).fit()
print("\n--- GLM Results ---")
print(glm_model.summary())
return glm_model
except Exception as e:
print(f"Error fitting GLM: {e}")
return None