diff --git a/augment.py b/augment.py new file mode 100644 index 0000000..37591a4 --- /dev/null +++ b/augment.py @@ -0,0 +1,41 @@ +import numpy as np +from sklearn.linear_model import LogisticRegression + +# Augmentation parameters +n_values = [2, 3, 4] + +# Generate a larger training dataset +x_large_train = np.arange(0, 1000) +X_large_augmented = np.hstack([x_large_train.reshape(-1, 1) % n for n in n_values]) +y_large_mod_3 = x_large_train % 3 + +# Train the logistic regression model for x % 3 with more data +logistic_large_mod_3 = LogisticRegression(multi_class='multinomial', max_iter=1000, solver='lbfgs') +logistic_large_mod_3.fit(X_large_augmented, y_large_mod_3) + +# Generate test data far outside the training range +x_far_test = np.arange(10000, 10100) +X_far_test_augmented = np.hstack([x_far_test.reshape(-1, 1) % n for n in n_values]) +y_far_test = x_far_test % 3 + +# Evaluate the logistic regression model for x % 3 on far away test data +probs_far_mod_3 = logistic_large_mod_3.predict_proba(X_far_test_augmented) +far_test_accuracy_mod_3 = logistic_large_mod_3.score(X_far_test_augmented, y_far_test) + +# Calculate confidence levels for the far away test data +confidence_levels_far_mod_3 = [] +for prob in probs_far_mod_3: + max_prob = max(prob) + if max_prob >= 0.8: + confidence_levels_far_mod_3.append("High") + elif max_prob >= 0.6: + confidence_levels_far_mod_3.append("Medium") + else: + confidence_levels_far_mod_3.append("Low") + +# Display the test accuracy and the confidence levels for the first few instances +print("Test Accuracy for x % 3:", far_test_accuracy_mod_3) +print("Prediction Probabilities for first 5 instances:") +print(probs_far_mod_3[:5]) +print("Confidence Levels for first 5 instances:") +print(confidence_levels_far_mod_3[:5]) diff --git a/augment2d.py b/augment2d.py new file mode 100644 index 0000000..7da6859 --- /dev/null +++ b/augment2d.py @@ -0,0 +1,222 @@ +from matplotlib import colors +import numpy as np +from sklearn.linear_model import LogisticRegression +import matplotlib.pyplot as plt +from typing import Any, List, Tuple, Callable + +# Type aliases for readability +Array = np.ndarray[np.int64] +FeatureFunction = Callable[[Array, Array, int], Array] +plt: Any = plt + +# Augmentation parameters +modulo_n_values: List[int] = [2, 3, 4] + +# Grid pattern generation + + +def generate_grid(size: int, modulo: int) -> Tuple[Array, Array, Array]: + x_: Array = np.arange(size) + y_: Array = np.arange(size) + x, y = np.meshgrid(x_, y_) # type: ignore + color_indices : Array = (x + y) % modulo # type: ignore + return x, y, color_indices # type: ignore + + +# List of operations to be applied for augmentation and their corresponding names +operations: List[Tuple[FeatureFunction, str]] = [ + (lambda x, y, n: x % n, "x % {n}"), + (lambda x, y, n: y % n, "y % {n}"), + (lambda x, y, n: (x + y) % n, "(x + y) % {n}"), + (lambda x, y, n: (x - y + n) % n, "(x - y + {n}) % {n}"), + (lambda x, y, n: (y - x + n) % n, "(y - x + {n}) % {n}") +] # type: ignore + +# Feature augmentation + + +def augment_features(X: Array, Y: Array, n_values: List[int]) -> Array: + X_flat = X.flatten() + Y_flat = Y.flatten() + + features: List[Array] = [] + for op, _ in operations: + for n in n_values: + features.append(op(X_flat, Y_flat, n)) + + return np.column_stack(features) # type: ignore + +# Generate feature function names + + +def generate_feature_functions(n_values: List[int]) -> List[str]: + feature_functions: List[str] = [] + for _, name_template in operations: + for n in n_values: + feature_functions.append(name_template.format(n=n)) + return feature_functions + +# Train logistic regression model + + +def train_model(X: Array, y: Array) -> LogisticRegression: + model = LogisticRegression( + multi_class='multinomial', + max_iter=1000, + solver='lbfgs' + ) + model.fit(X, y) # type: ignore + return model + +# Evaluate the model + + +def evaluate_model(model: LogisticRegression, X: Array, y: Array) -> Tuple[float, Array, Array, List[str]]: + probs = model.predict_proba(X) # type: ignore + accuracy = model.score(X, y) # type: ignore + predictions = model.predict(X) # type: ignore + confidence_levels: List[str] = [] + for prob in probs: # type: ignore + max_prob = max(prob) # type: ignore + if max_prob >= 0.8: + confidence_levels.append("High") + elif max_prob >= 0.6: + confidence_levels.append("Medium") + else: + confidence_levels.append("Low") + return accuracy, probs, predictions, confidence_levels # type: ignore + +# Print model coefficients + + +def print_model_coefficients(model_: LogisticRegression, feature_functions: List[str]) -> None: + model : Any = model_ + coefficients = model.coef_[0] + intercept = model.intercept_[0] + print("Learned function:") + for coef, func in zip(coefficients, feature_functions): + rounded_coef = round(coef, 2) + if rounded_coef != 0: + print(f"{rounded_coef} * {func}") + print("Intercept:", round(intercept, 2)) + +# Derive the most likely predicted function + + +def derive_predicted_function(model: LogisticRegression, feature_functions: List[str]) -> str: + coefficients = model.coef_[0] # type: ignore + + # Identify the most significant term(s) (highest absolute value of coefficients) + significant_terms = sorted( # type: ignore + zip(coefficients, feature_functions), key=lambda x: abs(x[0]), reverse=True) # type: ignore + + # Get the top significant term + top_term = significant_terms[0][1] + + # Construct the function + predicted_function = f"f(x, y) => {top_term}" + + return predicted_function + +# Simplified predicted class function based on significant terms + + +def predicted_class(x: int, y: int): + term1 = (x + y) % 4 + + # Considering the intercept and most significant terms + if term1 in [0, 2]: + return 0 + else: + return 1 + + +# Define the custom color scheme as a list of colors +color_scheme = [ + '#000000', # black + '#0074D9', # blue + '#FF4136', # red + '#2ECC40', # green + '#FFDC00', # yellow + '#AAAAAA', # grey + '#F012BE', # fuschia + '#FF851B', # orange + '#870C25', # brown + '#7FDBFF', # teal +] + +cmap = colors.ListedColormap(color_scheme) + + +# Visualization + +# Simplified Visualization + +def visualize_results(true_color_indices: Array, predicted_color_indices: Array, grid_size: int) -> None: + true_grid = true_color_indices.reshape(grid_size, grid_size) + predicted_grid = predicted_color_indices.reshape(grid_size, grid_size) + + _fig, axs = plt.subplots(1, 2, figsize=(12, 5)) + + # Plot the expected checkerboard pattern + axs[0].imshow(true_grid, cmap='viridis') + axs[0].set_title("Expected Checkerboard Pattern") + axs[0].axis('off') + + # Plot the predicted checkerboard pattern + axs[1].imshow(predicted_grid, cmap='viridis') + axs[1].set_title("Predicted Checkerboard Pattern") + axs[1].axis('off') + + plt.tight_layout() + plt.show() + +# Main function + + +def main() -> None: + # Generate training data (checkerboard pattern) + grid_size = 100 + modulo = 4 + X_train, Y_train, color_indices_train = generate_grid(grid_size, modulo) + X_augmented_train = augment_features(X_train, Y_train, modulo_n_values) + + # Train the logistic regression model + logistic_model = train_model( + X_augmented_train, color_indices_train.flatten()) + + # Generate feature functions + feature_functions = generate_feature_functions(modulo_n_values) + + # Print the model coefficients + print_model_coefficients(logistic_model, feature_functions) + + # Derive and print the most likely predicted function + predicted_function = derive_predicted_function( + logistic_model, feature_functions) + print("The most likely predicted function:") + print(predicted_function) + + # Generate test data (different range to test generalization) + test_grid_size = 100 + X_test, Y_test, color_indices_test = generate_grid(test_grid_size, modulo) + X_augmented_test = augment_features(X_test, Y_test, modulo_n_values) + + # Evaluate the logistic regression model + test_accuracy, probs_test, predictions, confidence_levels_test = evaluate_model( + logistic_model, X_augmented_test, color_indices_test.flatten()) + + # Display the test accuracy and the confidence levels for the first few instances + print("Test Accuracy for checkerboard pattern:", test_accuracy) + print("Prediction Probabilities for first 5 instances:") + print(probs_test[:5]) + print("Confidence Levels for first 5 instances:") + print(confidence_levels_test[:5]) + + # Visualize the results + visualize_results(color_indices_test.flatten(), + predictions, test_grid_size) + + +if __name__ == "__main__": + main() diff --git a/fft.py b/fft.py new file mode 100644 index 0000000..3801f9e --- /dev/null +++ b/fft.py @@ -0,0 +1,42 @@ +import numpy.typing as npt +import numpy as np + +num_points = 1000 + +x: npt.NDArray[np.int_] = np.arange(0, num_points) + +def dominant_frequency_likelihood(y: npt.NDArray[np.int_]) -> float: + # Perform Fourier Transform on the dataset + fft_values = np.fft.fft(y) + + # Get the absolute values (magnitudes) of the FFT + magnitudes = np.abs(fft_values) + + # Get the frequencies corresponding to FFT values + frequencies = np.fft.fftfreq(len(y), d=1) # d=1 means sample spacing is 1 + + # Find the index of the maximum magnitude, excluding the zero-frequency component + dominant_frequency_index = np.argmax(magnitudes[1:]) + 1 + dominant_magnitude = magnitudes[dominant_frequency_index] + + # Calculate the average magnitude, excluding the zero-frequency component + average_magnitude = np.mean(magnitudes[1:]) + + # Calculate the ratio of the dominant magnitude to the average magnitude + likelihood = dominant_magnitude / average_magnitude + + return likelihood + +def test_mod3(): + y = x % 3 + likelihood = dominant_frequency_likelihood(y) + print(f"Likelihood of periodic patterns in mod3: {likelihood}") + +def test_random(): + y: npt.NDArray[np.int_] = np.random.randint(0, 100, size=num_points) # type: ignore + likelihood = dominant_frequency_likelihood(y) + print(f"Likelihood of periodic patterns in random: {likelihood}") + +# Run the tests +test_mod3() +test_random()