From b5ad233d539803da41ae41f98e7997f68394ec35 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 27 Sep 2024 02:29:40 -0700 Subject: [PATCH 01/13] Reduce config --- configs/incremental.json | 40 +++++++++------------------------------- 1 file changed, 9 insertions(+), 31 deletions(-) diff --git a/configs/incremental.json b/configs/incremental.json index c9ffb19c..f09927ee 100644 --- a/configs/incremental.json +++ b/configs/incremental.json @@ -1,7 +1,7 @@ { "PARAMETERS_SETS": { "common": {"bench": {"n_runs": 10, "time_limit": 60}}, - "covariance data": { + "unlabeled dataset": { "data": [ { "source": "make_blobs", @@ -14,18 +14,7 @@ } ] }, - "basic_statistics data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 10000, - "n_features": [16, 64] - }, - "split_kwargs": {"ignore": true} - } - }, - "linear_regression data": { + "labeled dataset": { "data": { "source": "make_regression", "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, @@ -37,22 +26,11 @@ } } }, - "pca data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 1000, - "n_features": [16, 64] - }, - "split_kwargs": {"ignore": true} - } - }, "covariance": { "algorithm": [ { "estimator": "IncrementalEmpiricalCovariance", - "library": "sklearnex.covariance", + "library": "sklearnex", "estimator_methods": {"training": "partial_fit"}, "num_batches": {"training": 2} } @@ -62,7 +40,7 @@ "algorithm": [ { "estimator": "IncrementalBasicStatistics", - "library": "sklearnex.basic_statistics", + "library": "sklearnex", "num_batches": {"training": 2} } ] @@ -71,7 +49,7 @@ "algorithm": [ { "estimator": "IncrementalLinearRegression", - "library": "sklearnex.linear_model", + "library": "sklearnex", "num_batches": {"training": 2} } ] @@ -80,17 +58,17 @@ "algorithm": [ { "estimator": "IncrementalPCA", - "library": "sklearnex.preview.decomposition", + "library": "sklearnex.preview", "num_batches": {"training": 2} } ] } }, "TEMPLATES": { - "covariance": {"SETS": ["common", "covariance", "covariance data"]}, + "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]}, "linear_regression": { - "SETS": ["common", "linear_regression", "linear_regression data"] + "SETS": ["common", "linear_regression", "labeled dataset"] }, - "pca": {"SETS": ["common", "pca", "pca data"]} + "pca": {"SETS": ["common", "pca", "unlabeled dataset"]} } } From fc4ad2b12ffefebdc3fe3f7103d24fc997cdad0f Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 27 Sep 2024 04:53:32 -0700 Subject: [PATCH 02/13] Add covariance module to incremental config --- configs/incremental.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/incremental.json b/configs/incremental.json index f09927ee..d36e2a16 100644 --- a/configs/incremental.json +++ b/configs/incremental.json @@ -30,7 +30,7 @@ "algorithm": [ { "estimator": "IncrementalEmpiricalCovariance", - "library": "sklearnex", + "library": "sklearnex.covariance", "estimator_methods": {"training": "partial_fit"}, "num_batches": {"training": 2} } From 040802dc7229b4713b5ccab4de4248505e762b65 Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 02:49:02 -0700 Subject: [PATCH 03/13] Rename example config --- .../{incremental.json => sklearnex_incremental_example.json} | 0 test-configuration-linux.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename configs/{incremental.json => sklearnex_incremental_example.json} (100%) diff --git a/configs/incremental.json b/configs/sklearnex_incremental_example.json similarity index 100% rename from configs/incremental.json rename to configs/sklearnex_incremental_example.json diff --git a/test-configuration-linux.yml b/test-configuration-linux.yml index 722d1008..d8c1a64e 100644 --- a/test-configuration-linux.yml +++ b/test-configuration-linux.yml @@ -48,7 +48,7 @@ steps: - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate bench-env - python -m sklbench --report -l DEBUG --report -c configs/incremental.json + python -m sklbench --report -l DEBUG --report -c configs/sklearnex_incremental_example.json displayName: Incremental algorithms example run - script: | source /usr/share/miniconda/etc/profile.d/conda.sh From 69cc4c1754024b2817fe87b3a0d89a926b45658b Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 03:54:18 -0700 Subject: [PATCH 04/13] Remove bs mentioning in config (need to be added later) --- configs/sklearnex_incremental_example.json | 9 --------- 1 file changed, 9 deletions(-) diff --git a/configs/sklearnex_incremental_example.json b/configs/sklearnex_incremental_example.json index d36e2a16..37b2c7fb 100644 --- a/configs/sklearnex_incremental_example.json +++ b/configs/sklearnex_incremental_example.json @@ -36,15 +36,6 @@ } ] }, - "basic_statistics": { - "algorithm": [ - { - "estimator": "IncrementalBasicStatistics", - "library": "sklearnex", - "num_batches": {"training": 2} - } - ] - }, "linear_regression": { "algorithm": [ { From f275062098635b049f2ff822c524c44f7b62422a Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 08:36:17 -0700 Subject: [PATCH 05/13] Fix num_batches and batch_size reading from config --- configs/sklearnex_incremental_example.json | 6 +----- sklbench/benchmarks/sklearn_estimator.py | 8 ++++++-- sklbench/report/implementation.py | 2 ++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/configs/sklearnex_incremental_example.json b/configs/sklearnex_incremental_example.json index 37b2c7fb..1fbbcafa 100644 --- a/configs/sklearnex_incremental_example.json +++ b/configs/sklearnex_incremental_example.json @@ -56,10 +56,6 @@ } }, "TEMPLATES": { - "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]}, - "linear_regression": { - "SETS": ["common", "linear_regression", "labeled dataset"] - }, - "pca": {"SETS": ["common", "pca", "unlabeled dataset"]} + "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]} } } diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index 3f8b1641..c4f94c47 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -425,8 +425,12 @@ def measure_sklearn_estimator( data_args = (x_test,) if method == "partial_fit": - num_batches = get_bench_case_value(bench_case, "data:num_batches") - batch_size = get_bench_case_value(bench_case, "data:batch_size") + num_batches = get_bench_case_value( + bench_case, f"algorithm:num_batches:{stage}" + ) + batch_size = get_bench_case_value( + bench_case, f"algorithm:batch_size:{stage}" + ) if batch_size is None: if num_batches is None: diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index df15b5eb..af0398dd 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -236,6 +236,7 @@ def get_result_tables_as_df( splitby_columns=["estimator", "method", "function"], compatibility_mode=False, ): + print(results["bench_cases"]) bench_cases = pd.DataFrame( [flatten_dict(bench_case) for bench_case in results["bench_cases"]] ) @@ -244,6 +245,7 @@ def get_result_tables_as_df( if compatibility_mode: bench_cases = transform_results_to_compatible(bench_cases) + print(bench_cases) for column in diffby_columns.copy(): if bench_cases[column].nunique() == 1: bench_cases.drop(columns=[column], inplace=True) From 5a9be80616e5dca5e50bd27145ce11c6316b4c2d Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Fri, 4 Oct 2024 08:41:09 -0700 Subject: [PATCH 06/13] Revert accidentally pushed changes --- configs/sklearnex_incremental_example.json | 6 +++++- sklbench/report/implementation.py | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/configs/sklearnex_incremental_example.json b/configs/sklearnex_incremental_example.json index 1fbbcafa..37b2c7fb 100644 --- a/configs/sklearnex_incremental_example.json +++ b/configs/sklearnex_incremental_example.json @@ -56,6 +56,10 @@ } }, "TEMPLATES": { - "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]} + "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]}, + "linear_regression": { + "SETS": ["common", "linear_regression", "labeled dataset"] + }, + "pca": {"SETS": ["common", "pca", "unlabeled dataset"]} } } diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index af0398dd..df15b5eb 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -236,7 +236,6 @@ def get_result_tables_as_df( splitby_columns=["estimator", "method", "function"], compatibility_mode=False, ): - print(results["bench_cases"]) bench_cases = pd.DataFrame( [flatten_dict(bench_case) for bench_case in results["bench_cases"]] ) @@ -245,7 +244,6 @@ def get_result_tables_as_df( if compatibility_mode: bench_cases = transform_results_to_compatible(bench_cases) - print(bench_cases) for column in diffby_columns.copy(): if bench_cases[column].nunique() == 1: bench_cases.drop(columns=[column], inplace=True) From 1d48f3a1b35668def560fb05c4b200783102cfda Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 17 Mar 2025 22:30:21 -0700 Subject: [PATCH 07/13] remove batch_size logic from incremental benchmarking for num_batches --- configs/README.md | 1 + sklbench/benchmarks/sklearn_estimator.py | 57 ++++++++++-------------- sklbench/report/implementation.py | 1 + 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/configs/README.md b/configs/README.md index 8d3c5ac2..07c92dc1 100644 --- a/configs/README.md +++ b/configs/README.md @@ -117,6 +117,7 @@ Configs have the three highest parameter keys: |:---------------|:--------------|:--------|:------------| | `algorithm`:`estimator` | None | | Name of measured estimator. | | `algorithm`:`estimator_params` | Empty `dict` | | Parameters for estimator constructor. | +| `algorithm`:`training`:`num_batches` | 5 | | Number of batches to benchmark `partial_fit` function, using batches the size of number of samples specified (not samples divided by `num_batches`). For incremental estimators only. | | `algorithm`:`online_inference_mode` | False | | Enables online mode for inference methods of estimator (separate call for each sample). | | `algorithm`:`sklearn_context` | None | | Parameters for sklearn `config_context` used over estimator. | | `algorithm`:`sklearnex_context` | None | | Parameters for sklearnex `config_context` used over estimator. Updated by `sklearn_context` if set. | diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index c4f94c47..dd0ef1a5 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -334,27 +334,19 @@ def verify_patching(stream: io.StringIO, function_name) -> bool: return acceleration_lines > 0 and fallback_lines == 0 -def create_online_function( - estimator_instance, method_instance, data_args, num_batches, batch_size -): +def create_online_function(estimator_instance, method_instance, data_args, num_batches): if "y" in list(inspect.signature(method_instance).parameters): def ndarray_function(x, y): for i in range(num_batches): - method_instance( - x[i * batch_size : (i + 1) * batch_size], - y[i * batch_size : (i + 1) * batch_size], - ) + method_instance(x, y) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() def dataframe_function(x, y): for i in range(num_batches): - method_instance( - x.iloc[i * batch_size : (i + 1) * batch_size], - y.iloc[i * batch_size : (i + 1) * batch_size], - ) + method_instance(x, y) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() @@ -362,13 +354,13 @@ def dataframe_function(x, y): def ndarray_function(x): for i in range(num_batches): - method_instance(x[i * batch_size : (i + 1) * batch_size]) + method_instance(x) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() def dataframe_function(x): for i in range(num_batches): - method_instance(x.iloc[i * batch_size : (i + 1) * batch_size]) + method_instance(x) if hasattr(estimator_instance, "_onedal_finalize_fit"): estimator_instance._onedal_finalize_fit() @@ -423,32 +415,20 @@ def measure_sklearn_estimator( data_args = (x_train,) else: data_args = (x_test,) + batch_size = get_bench_case_value( + bench_case, f"algorithm:batch_size:{stage}" + ) if method == "partial_fit": num_batches = get_bench_case_value( - bench_case, f"algorithm:num_batches:{stage}" - ) - batch_size = get_bench_case_value( - bench_case, f"algorithm:batch_size:{stage}" + bench_case, f"algorithm:num_batches:{stage}", 5 ) - if batch_size is None: - if num_batches is None: - num_batches = 5 - batch_size = ( - data_args[0].shape[0] + num_batches - 1 - ) // num_batches - if num_batches is None: - num_batches = ( - data_args[0].shape[0] + batch_size - 1 - ) // batch_size - method_instance = create_online_function( estimator_instance, method_instance, data_args, - num_batches, - batch_size, + num_batches ) # daal4py model builders enabling branch if enable_modelbuilders and stage == "inference": @@ -465,6 +445,10 @@ def measure_sklearn_estimator( metrics[method]["time std[ms]"], _, ) = measure_case(bench_case, method_instance, *data_args) + if batch_size is not None: + metrics[method]["throughput[samples/ms]"] = ( + (data_args[0].shape[0] // batch_size) * batch_size + ) / metrics[method]["time[ms]"] if ensure_sklearnex_patching: full_method_name = f"{estimator_class.__name__}.{method}" sklearnex_logging_stream.seek(0) @@ -561,9 +545,16 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): for stage in estimator_methods.keys(): data_descs[stage].update( { - "batch_size": get_bench_case_value( - bench_case, f"algorithm:batch_size:{stage}" - ) + key: val + for key, val in { + "batch_size": get_bench_case_value( + bench_case, f"algorithm:batch_size:{stage}" + ), + "num_batches": get_bench_case_value( + bench_case, f"algorithm:num_batches:{stage}" + ) + }.items() + if val is not None } ) if "n_classes" in data_description: diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index 8e76479f..689396f1 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -94,6 +94,7 @@ "order", "n_classes", "n_clusters", + "num_batches", "batch_size", ] From 7aa42a3c92fe79364b5c5e6e940b7d329a58f8e1 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 24 Mar 2025 07:31:04 -0700 Subject: [PATCH 08/13] Support incremental benchmarking of datasets larger than memory --- configs/README.md | 2 +- configs/sklearnex_incremental_example.json | 65 ---------------------- sklbench/report/implementation.py | 5 +- test-configuration-linux.yml | 5 -- test-configuration-win.yml | 4 -- 5 files changed, 2 insertions(+), 79 deletions(-) delete mode 100644 configs/sklearnex_incremental_example.json diff --git a/configs/README.md b/configs/README.md index 07c92dc1..c1cbf959 100644 --- a/configs/README.md +++ b/configs/README.md @@ -117,7 +117,7 @@ Configs have the three highest parameter keys: |:---------------|:--------------|:--------|:------------| | `algorithm`:`estimator` | None | | Name of measured estimator. | | `algorithm`:`estimator_params` | Empty `dict` | | Parameters for estimator constructor. | -| `algorithm`:`training`:`num_batches` | 5 | | Number of batches to benchmark `partial_fit` function, using batches the size of number of samples specified (not samples divided by `num_batches`). For incremental estimators only. | +| `algorithm`:`num_batches`:`training` | 5 | | Number of batches to benchmark `partial_fit` function, using batches the size of number of samples specified (not samples divided by `num_batches`). For incremental estimators only. | | `algorithm`:`online_inference_mode` | False | | Enables online mode for inference methods of estimator (separate call for each sample). | | `algorithm`:`sklearn_context` | None | | Parameters for sklearn `config_context` used over estimator. | | `algorithm`:`sklearnex_context` | None | | Parameters for sklearnex `config_context` used over estimator. Updated by `sklearn_context` if set. | diff --git a/configs/sklearnex_incremental_example.json b/configs/sklearnex_incremental_example.json deleted file mode 100644 index 37b2c7fb..00000000 --- a/configs/sklearnex_incremental_example.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "PARAMETERS_SETS": { - "common": {"bench": {"n_runs": 10, "time_limit": 60}}, - "unlabeled dataset": { - "data": [ - { - "source": "make_blobs", - "generation_kwargs": { - "centers": 1, - "n_samples": 1000, - "n_features": [16, 64] - }, - "split_kwargs": {"ignore": true} - } - ] - }, - "labeled dataset": { - "data": { - "source": "make_regression", - "split_kwargs": {"train_size": 0.2, "test_size": 0.8}, - "generation_kwargs": { - "n_samples": 5000, - "n_features": [40, 100], - "n_informative": 5, - "noise": 2.0 - } - } - }, - "covariance": { - "algorithm": [ - { - "estimator": "IncrementalEmpiricalCovariance", - "library": "sklearnex.covariance", - "estimator_methods": {"training": "partial_fit"}, - "num_batches": {"training": 2} - } - ] - }, - "linear_regression": { - "algorithm": [ - { - "estimator": "IncrementalLinearRegression", - "library": "sklearnex", - "num_batches": {"training": 2} - } - ] - }, - "pca": { - "algorithm": [ - { - "estimator": "IncrementalPCA", - "library": "sklearnex.preview", - "num_batches": {"training": 2} - } - ] - } - }, - "TEMPLATES": { - "covariance": {"SETS": ["common", "covariance", "unlabeled dataset"]}, - "linear_regression": { - "SETS": ["common", "linear_regression", "labeled dataset"] - }, - "pca": {"SETS": ["common", "pca", "unlabeled dataset"]} - } -} diff --git a/sklbench/report/implementation.py b/sklbench/report/implementation.py index 689396f1..8fae9e43 100644 --- a/sklbench/report/implementation.py +++ b/sklbench/report/implementation.py @@ -260,10 +260,7 @@ def get_summary_from_df(df: pd.DataFrame, df_name: str) -> pd.DataFrame: # only relative improvements are included in summary currently if len(column) > 1 and column[1] == f"{metric_name} relative improvement": metric_columns.append(column) - if metric_columns: - summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T - else: - summary = pd.DataFrame() + summary = df[metric_columns].aggregate(geomean_wrapper, axis=0).to_frame().T summary.index = pd.Index([df_name]) return summary diff --git a/test-configuration-linux.yml b/test-configuration-linux.yml index d8c1a64e..a37769ce 100644 --- a/test-configuration-linux.yml +++ b/test-configuration-linux.yml @@ -45,11 +45,6 @@ steps: conda activate bench-env python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json displayName: Sklearn example run - - script: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate bench-env - python -m sklbench --report -l DEBUG --report -c configs/sklearnex_incremental_example.json - displayName: Incremental algorithms example run - script: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate bench-env diff --git a/test-configuration-win.yml b/test-configuration-win.yml index f3ac1595..a1eddaeb 100644 --- a/test-configuration-win.yml +++ b/test-configuration-win.yml @@ -43,10 +43,6 @@ steps: call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/sklearn_example.json displayName: Sklearn example run - - script: | - call activate bench-env - python -m sklbench --report -l DEBUG --report -c configs/incremental.json - displayName: Incremental algorithms example run - script: | call activate bench-env python -m sklbench --report -l DEBUG --report -c configs/xgboost_example.json From 6be47229cf88c34394bd88e555a84cca6fe9ff22 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 24 Mar 2025 10:04:14 -0700 Subject: [PATCH 09/13] black --- sklbench/benchmarks/sklearn_estimator.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index 34455455..bd9b3b51 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -415,10 +415,7 @@ def measure_sklearn_estimator( ) method_instance = create_online_function( - estimator_instance, - method_instance, - data_args, - num_batches + estimator_instance, method_instance, data_args, num_batches ) # daal4py model builders enabling branch if enable_modelbuilders and stage == "inference": @@ -554,7 +551,7 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): ), "num_batches": get_bench_case_value( bench_case, f"algorithm:num_batches:{stage}" - ) + ), }.items() if val is not None } From e075184df2ded6d6178d80774b758bf4aae6e86d Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 25 Mar 2025 10:01:09 -0700 Subject: [PATCH 10/13] fix logreg strong --- configs/spmd/large_scale/logreg_strong.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/spmd/large_scale/logreg_strong.json b/configs/spmd/large_scale/logreg_strong.json index 219840ea..962d7da9 100644 --- a/configs/spmd/large_scale/logreg_strong.json +++ b/configs/spmd/large_scale/logreg_strong.json @@ -19,7 +19,7 @@ "logreg": { "SETS": [ "sklearnex spmd implementation", - "large scale strong 64 parameters", + "large scale strong <=64 parameters", "spmd logreg parameters", "synthetic data", "spmd logreg2 parameters" From 0e2cac6e78eb5e240fd6386646cf08a72293daf9 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 25 Mar 2025 22:27:23 -0700 Subject: [PATCH 11/13] align pca and knn bf16 configs --- configs/regular/bf16/knn.json | 2 +- configs/regular/bf16/pca.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/regular/bf16/knn.json b/configs/regular/bf16/knn.json index fabf6d6d..5f703bb9 100644 --- a/configs/regular/bf16/knn.json +++ b/configs/regular/bf16/knn.json @@ -22,7 +22,7 @@ "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] } }, "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } + { "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 51000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } ] }, "synthetic regression data": { diff --git a/configs/regular/bf16/pca.json b/configs/regular/bf16/pca.json index e5113261..01d2a125 100644 --- a/configs/regular/bf16/pca.json +++ b/configs/regular/bf16/pca.json @@ -20,7 +20,7 @@ }, "synthetic data": { "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } } + { "source": "make_blobs", "generation_kwargs": { "n_samples": 3000000, "n_features": 10, "centers": 1 } } ] } }, From 4b2f1892feb3e10e61106ddb84a52d0111d7315b Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 25 Mar 2025 22:47:28 -0700 Subject: [PATCH 12/13] more knn alignment bf16 --- configs/regular/bf16/knn.json | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/configs/regular/bf16/knn.json b/configs/regular/bf16/knn.json index 5f703bb9..a1254a0a 100644 --- a/configs/regular/bf16/knn.json +++ b/configs/regular/bf16/knn.json @@ -4,7 +4,7 @@ "common knn parameters": { "algorithm": { "estimator_params": { - "n_neighbors": [10, 100], + "n_neighbors": 10, "weights": "uniform" } }, @@ -19,20 +19,11 @@ "synthetic classification data": { "algorithm": { "estimator": "KNeighborsClassifier", - "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] } + "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": 2 } }, "data": [ { "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 51000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } ] - }, - "synthetic regression data": { - "algorithm": { - "estimator": "KNeighborsRegressor", - "estimator_params": { "algorithm": "brute", "metric": "minkowski", "p": [1, 2] } - }, - "data": [ - { "source": "make_regression", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 100, "noise":1.5 } } - ] } }, "TEMPLATES": { @@ -43,14 +34,6 @@ "sklearn knn parameters", "synthetic classification data" ] - }, - "sklearn brute knn reg": { - "SETS": [ - "sklearn-ex[gpu] implementations", - "common knn parameters", - "sklearn knn parameters", - "synthetic regression data" - ] } } } From 9611fbc6e84e500eee3c7714a3fb665dd11cf697 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 25 Mar 2025 22:49:27 -0700 Subject: [PATCH 13/13] minor followup --- configs/regular/bf16/knn.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/regular/bf16/knn.json b/configs/regular/bf16/knn.json index a1254a0a..46193894 100644 --- a/configs/regular/bf16/knn.json +++ b/configs/regular/bf16/knn.json @@ -4,7 +4,7 @@ "common knn parameters": { "algorithm": { "estimator_params": { - "n_neighbors": 10, + "n_neighbors": 100, "weights": "uniform" } },