Skip to content
41 changes: 41 additions & 0 deletions augment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np
from sklearn.linear_model import LogisticRegression

# Augmentation parameters
n_values = [2, 3, 4]

# Generate a larger training dataset
x_large_train = np.arange(0, 1000)
X_large_augmented = np.hstack([x_large_train.reshape(-1, 1) % n for n in n_values])
y_large_mod_3 = x_large_train % 3

# Train the logistic regression model for x % 3 with more data
logistic_large_mod_3 = LogisticRegression(multi_class='multinomial', max_iter=1000, solver='lbfgs')
logistic_large_mod_3.fit(X_large_augmented, y_large_mod_3)

# Generate test data far outside the training range
x_far_test = np.arange(10000, 10100)
X_far_test_augmented = np.hstack([x_far_test.reshape(-1, 1) % n for n in n_values])
y_far_test = x_far_test % 3

# Evaluate the logistic regression model for x % 3 on far away test data
probs_far_mod_3 = logistic_large_mod_3.predict_proba(X_far_test_augmented)
far_test_accuracy_mod_3 = logistic_large_mod_3.score(X_far_test_augmented, y_far_test)

# Calculate confidence levels for the far away test data
confidence_levels_far_mod_3 = []
for prob in probs_far_mod_3:
max_prob = max(prob)
if max_prob >= 0.8:
confidence_levels_far_mod_3.append("High")
elif max_prob >= 0.6:
confidence_levels_far_mod_3.append("Medium")
else:
confidence_levels_far_mod_3.append("Low")

# Display the test accuracy and the confidence levels for the first few instances
print("Test Accuracy for x % 3:", far_test_accuracy_mod_3)
print("Prediction Probabilities for first 5 instances:")
print(probs_far_mod_3[:5])
print("Confidence Levels for first 5 instances:")
print(confidence_levels_far_mod_3[:5])
222 changes: 222 additions & 0 deletions augment2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
from matplotlib import colors
import numpy as np
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from typing import Any, List, Tuple, Callable

# Type aliases for readability
Array = np.ndarray[np.int64]
FeatureFunction = Callable[[Array, Array, int], Array]
plt: Any = plt

# Augmentation parameters
modulo_n_values: List[int] = [2, 3, 4]

# Grid pattern generation


def generate_grid(size: int, modulo: int) -> Tuple[Array, Array, Array]:
x_: Array = np.arange(size)
y_: Array = np.arange(size)
x, y = np.meshgrid(x_, y_) # type: ignore
color_indices : Array = (x + y) % modulo # type: ignore
return x, y, color_indices # type: ignore


# List of operations to be applied for augmentation and their corresponding names
operations: List[Tuple[FeatureFunction, str]] = [
(lambda x, y, n: x % n, "x % {n}"),
(lambda x, y, n: y % n, "y % {n}"),
(lambda x, y, n: (x + y) % n, "(x + y) % {n}"),
(lambda x, y, n: (x - y + n) % n, "(x - y + {n}) % {n}"),
(lambda x, y, n: (y - x + n) % n, "(y - x + {n}) % {n}")
] # type: ignore

# Feature augmentation


def augment_features(X: Array, Y: Array, n_values: List[int]) -> Array:
X_flat = X.flatten()
Y_flat = Y.flatten()

features: List[Array] = []
for op, _ in operations:
for n in n_values:
features.append(op(X_flat, Y_flat, n))

return np.column_stack(features) # type: ignore

# Generate feature function names


def generate_feature_functions(n_values: List[int]) -> List[str]:
feature_functions: List[str] = []
for _, name_template in operations:
for n in n_values:
feature_functions.append(name_template.format(n=n))
return feature_functions

# Train logistic regression model


def train_model(X: Array, y: Array) -> LogisticRegression:
model = LogisticRegression(
multi_class='multinomial',
max_iter=1000,
solver='lbfgs'
)
model.fit(X, y) # type: ignore
return model

# Evaluate the model


def evaluate_model(model: LogisticRegression, X: Array, y: Array) -> Tuple[float, Array, Array, List[str]]:
probs = model.predict_proba(X) # type: ignore
accuracy = model.score(X, y) # type: ignore
predictions = model.predict(X) # type: ignore
confidence_levels: List[str] = []
for prob in probs: # type: ignore
max_prob = max(prob) # type: ignore
if max_prob >= 0.8:
confidence_levels.append("High")
elif max_prob >= 0.6:
confidence_levels.append("Medium")
else:
confidence_levels.append("Low")
return accuracy, probs, predictions, confidence_levels # type: ignore

# Print model coefficients


def print_model_coefficients(model_: LogisticRegression, feature_functions: List[str]) -> None:
model : Any = model_
coefficients = model.coef_[0]
intercept = model.intercept_[0]
print("Learned function:")
for coef, func in zip(coefficients, feature_functions):
rounded_coef = round(coef, 2)
if rounded_coef != 0:
print(f"{rounded_coef} * {func}")
print("Intercept:", round(intercept, 2))

# Derive the most likely predicted function


def derive_predicted_function(model: LogisticRegression, feature_functions: List[str]) -> str:
coefficients = model.coef_[0] # type: ignore

# Identify the most significant term(s) (highest absolute value of coefficients)
significant_terms = sorted( # type: ignore
zip(coefficients, feature_functions), key=lambda x: abs(x[0]), reverse=True) # type: ignore

# Get the top significant term
top_term = significant_terms[0][1]

# Construct the function
predicted_function = f"f(x, y) => {top_term}"

return predicted_function

# Simplified predicted class function based on significant terms


def predicted_class(x: int, y: int):
term1 = (x + y) % 4

# Considering the intercept and most significant terms
if term1 in [0, 2]:
return 0
else:
return 1


# Define the custom color scheme as a list of colors
color_scheme = [
'#000000', # black
'#0074D9', # blue
'#FF4136', # red
'#2ECC40', # green
'#FFDC00', # yellow
'#AAAAAA', # grey
'#F012BE', # fuschia
'#FF851B', # orange
'#870C25', # brown
'#7FDBFF', # teal
]

cmap = colors.ListedColormap(color_scheme)


# Visualization

# Simplified Visualization

def visualize_results(true_color_indices: Array, predicted_color_indices: Array, grid_size: int) -> None:
true_grid = true_color_indices.reshape(grid_size, grid_size)
predicted_grid = predicted_color_indices.reshape(grid_size, grid_size)

_fig, axs = plt.subplots(1, 2, figsize=(12, 5))

# Plot the expected checkerboard pattern
axs[0].imshow(true_grid, cmap='viridis')
axs[0].set_title("Expected Checkerboard Pattern")
axs[0].axis('off')

# Plot the predicted checkerboard pattern
axs[1].imshow(predicted_grid, cmap='viridis')
axs[1].set_title("Predicted Checkerboard Pattern")
axs[1].axis('off')

plt.tight_layout()
plt.show()

# Main function


def main() -> None:
# Generate training data (checkerboard pattern)
grid_size = 100
modulo = 4
X_train, Y_train, color_indices_train = generate_grid(grid_size, modulo)
X_augmented_train = augment_features(X_train, Y_train, modulo_n_values)

# Train the logistic regression model
logistic_model = train_model(
X_augmented_train, color_indices_train.flatten())

# Generate feature functions
feature_functions = generate_feature_functions(modulo_n_values)

# Print the model coefficients
print_model_coefficients(logistic_model, feature_functions)

# Derive and print the most likely predicted function
predicted_function = derive_predicted_function(
logistic_model, feature_functions)
print("The most likely predicted function:")
print(predicted_function)

# Generate test data (different range to test generalization)
test_grid_size = 100
X_test, Y_test, color_indices_test = generate_grid(test_grid_size, modulo)
X_augmented_test = augment_features(X_test, Y_test, modulo_n_values)

# Evaluate the logistic regression model
test_accuracy, probs_test, predictions, confidence_levels_test = evaluate_model(
logistic_model, X_augmented_test, color_indices_test.flatten())

# Display the test accuracy and the confidence levels for the first few instances
print("Test Accuracy for checkerboard pattern:", test_accuracy)
print("Prediction Probabilities for first 5 instances:")
print(probs_test[:5])
print("Confidence Levels for first 5 instances:")
print(confidence_levels_test[:5])

# Visualize the results
visualize_results(color_indices_test.flatten(),
predictions, test_grid_size)


if __name__ == "__main__":
main()
42 changes: 42 additions & 0 deletions fft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy.typing as npt
import numpy as np

num_points = 1000

x: npt.NDArray[np.int_] = np.arange(0, num_points)

def dominant_frequency_likelihood(y: npt.NDArray[np.int_]) -> float:
# Perform Fourier Transform on the dataset
fft_values = np.fft.fft(y)

# Get the absolute values (magnitudes) of the FFT
magnitudes = np.abs(fft_values)

# Get the frequencies corresponding to FFT values
frequencies = np.fft.fftfreq(len(y), d=1) # d=1 means sample spacing is 1

# Find the index of the maximum magnitude, excluding the zero-frequency component
dominant_frequency_index = np.argmax(magnitudes[1:]) + 1
dominant_magnitude = magnitudes[dominant_frequency_index]

# Calculate the average magnitude, excluding the zero-frequency component
average_magnitude = np.mean(magnitudes[1:])

# Calculate the ratio of the dominant magnitude to the average magnitude
likelihood = dominant_magnitude / average_magnitude

return likelihood

def test_mod3():
y = x % 3
likelihood = dominant_frequency_likelihood(y)
print(f"Likelihood of periodic patterns in mod3: {likelihood}")

def test_random():
y: npt.NDArray[np.int_] = np.random.randint(0, 100, size=num_points) # type: ignore
likelihood = dominant_frequency_likelihood(y)
print(f"Likelihood of periodic patterns in random: {likelihood}")

# Run the tests
test_mod3()
test_random()