-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
103 lines (81 loc) · 3.26 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pytest
import pandas as pd
import numpy as np
import streamlit as st
import io
# Test: Verify if the app processes CSV file correctly
def test_csv_file_processing():
# Create a sample CSV in memory
data = {
"A": [1, 2, 3, 4, 5],
"B": [10, 20, 30, 40, 50],
"C": [100, 200, 300, 400, 500],
}
df = pd.DataFrame(data)
csv_data = df.to_csv(index=False)
file = io.StringIO(csv_data)
# Simulate the file upload in the Streamlit app
uploaded_file = file
# If a file is uploaded, process it
if uploaded_file is not None:
# Read the CSV file into a DataFrame
df_uploaded = pd.read_csv(uploaded_file)
# Test that data is loaded correctly
assert df_uploaded.shape == (5, 3) # Checking if there are 5 rows and 3 columns
assert list(df_uploaded.columns) == ["A", "B", "C"] # Checking column names
# Test basic statistics generation
assert df_uploaded["A"].mean() == 3.0 # Mean of column A
assert (
df_uploaded["B"].std() == 15.811388300841896
) # Standard deviation of column B
# Test: Check if the visualizations are generated (simple checks)
def test_visualizations():
# Create a sample DataFrame with numerical data for visualization
data = {
"A": [1, 2, 3, 4, 5],
"B": [10, 20, 30, 40, 50],
"C": [100, 200, 300, 400, 500],
}
df = pd.DataFrame(data)
# Check if we have numerical columns for visualization
numerical_columns = df.select_dtypes(include=np.number).columns.tolist()
# Test for pairplot existence
if len(numerical_columns) > 0:
# Here, we just check if the function exists; detailed plotting can be done manually
assert len(numerical_columns) > 0
# Check if correlation matrix is computed
corr = df[numerical_columns].corr()
assert corr.shape == (3, 3) # Checking if the correlation matrix is 3x3
# Test the 'choose column' functionality
assert "A" in df.columns # Ensure the column exists
assert df["A"].mean() == 3.0 # Ensure statistics are correct
# Test: Check if column statistics are displayed correctly
def test_column_statistics():
# Create a sample DataFrame
data = {
"A": [1, 2, 3, 4, 5],
"B": [10, 20, 30, 40, 50],
}
df = pd.DataFrame(data)
column_name = "A"
# Check if statistics are correctly calculated
assert df[column_name].mean() == 3.0
assert df[column_name].median() == 3.0
assert (
round(df[column_name].std(), 2) == 1.58
) # Round the calculated std value to 2 decimal places
# Test: Check if app handles empty CSV correctly
def test_empty_csv():
# Create an empty DataFrame (with no rows and no columns)
empty_df = pd.DataFrame()
# Simulate an empty CSV
empty_csv = empty_df.to_csv(index=False)
file = io.StringIO(empty_csv)
# Now check if the file is truly empty
try:
df_uploaded = pd.read_csv(file)
# Since the CSV is empty, the DataFrame should also be empty
assert df_uploaded.empty # Ensure the dataframe is empty
except pd.errors.EmptyDataError:
# If the file is empty, assert that we get the EmptyDataError
assert True # This will pass the test if the error is raised