From bc03d91fd1f7cc50f4550dfa126df9d011de4f67 Mon Sep 17 00:00:00 2001 From: David Slater Date: Thu, 1 Dec 2022 18:58:20 +0000 Subject: [PATCH 1/9] librispeech_dev_test upload --- armory/datasets/README.md | 42 ++++++++++++------- armory/datasets/cached_datasets.json | 7 ++++ .../standard/librispeech_dev_test/__init__.py | 3 ++ .../librispeech_dev_test/checksums.tsv | 1 + .../librispeech_dev_test.py | 37 ++++++++++++++++ .../hubert_untargeted_snr_pgd.json | 11 +++-- 6 files changed, 81 insertions(+), 20 deletions(-) create mode 100644 armory/datasets/standard/librispeech_dev_test/__init__.py create mode 100644 armory/datasets/standard/librispeech_dev_test/checksums.tsv create mode 100644 armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py diff --git a/armory/datasets/README.md b/armory/datasets/README.md index 76c2d8ac2..27e4f9933 100644 --- a/armory/datasets/README.md +++ b/armory/datasets/README.md @@ -84,6 +84,19 @@ info, ds = load.load("digit") info, ds = load.from_directory("/armory/datasets/new_builds/digit/1.0.8") ``` +### Apache Beam Datasets + +Currently, `librispeech` and `librispeech_dev_clean` use apache beam to build. +Apache beam is not installed by default in the container due to older dependencies. +If building in the container, do: +``` +pip install apache-beam +``` + +When building, armory does not provide beam options by default. +This makes building VERY slow unless overrides are provided. +It is recommended that these are built directly using tfds on the command line. + ## Packaging and Uploading for Cache After a dataset has been successfully built and loaded (locally), it can be packaged and uploaded to the cache. @@ -91,43 +104,44 @@ After a dataset has been successfully built and loaded (locally), it can be pack First, it is recommended that you test the packaging and untarring process without upload/download. In python: -``` +```python from armory.datasets import package -package.package("my_dataset") # creates a tar.gz file -package.update("my_dataset") # adds the tar hash info to "cached_datasets.json" -package.verify("my_dataset") # uses the "cached_datasets.json" information to verify hash information on tar file -package.extract("my_dataset", overwrite=False) # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite -package.extract("my_dataset", overwrite=True) # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error) +my_dataset = "my_dataset" +package.package(my_dataset) # creates a tar.gz file +package.update(my_dataset) # adds the tar hash info to "cached_datasets.json" +package.verify(my_dataset) # uses the "cached_datasets.json" information to verify hash information on tar file +package.extract(my_dataset, overwrite=False) # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite +package.extract(my_dataset, overwrite=True) # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error) ``` If you can successfully load the dataset after extracting it here, this part is good. Now, to upload to s3 (you will need `ARMORY_PRIVATE_S3_ID` and `ARMORY_PRIVATE_S3_KEY`): -``` +```python from armory.datasets import upload -upload.upload("my_dataset") # this will fail, as you need to explicitly force it to be public -upload.upload("my_dataset", public=True) +upload.upload(my_dataset) # this will fail, as you need to explicitly force it to be public +upload.upload(my_dataset, public=True) ``` Or, alternatively to packaging and uploading, you can use this convenience function: -``` -package.add_to_cache("my_dataset", public=True) +```python +package.add_to_cache(my_dataset, public=True) ``` To download, which will download it directly to the tar cache directory, do: ``` from armory.datasets import download -download.download("my_dataset", overwrite=True, verify=True) +download.download(my_dataset, overwrite=True, verify=True) ``` You can also download and extract with: ``` from armory.datasets import load -load.ensure_download_extract("my_dataset", verify=True) +load.ensure_download_extract(my_dataset, verify=True) ``` or just try to load it directly ``` -load.load("my_dataset") +load.load(my_dataset) ``` # Running / Testing with current armory scenario files diff --git a/armory/datasets/cached_datasets.json b/armory/datasets/cached_datasets.json index 373e18fcf..13f9c7ac0 100644 --- a/armory/datasets/cached_datasets.json +++ b/armory/datasets/cached_datasets.json @@ -13,6 +13,13 @@ "url": null, "version": "1.0.8" }, + "librispeech_dev_test": { + "sha256": "5c5c6cb53e458e2415bc4f242122155d51f32d7e78770176afe01acb584c4caa", + "size": 2332265306, + "subdir": "librispeech_dev_test/2.1.0", + "url": null, + "version": "2.1.0" + }, "mnist": { "sha256": "fdc3408e29580367145e95ac7cb1d51e807105b174314cd52c16d27a13b98979", "size": 16920751, diff --git a/armory/datasets/standard/librispeech_dev_test/__init__.py b/armory/datasets/standard/librispeech_dev_test/__init__.py new file mode 100644 index 000000000..d84f1d722 --- /dev/null +++ b/armory/datasets/standard/librispeech_dev_test/__init__.py @@ -0,0 +1,3 @@ +"""librispeech_dev_test dataset.""" + +from .librispeech_dev_test import LibrispeechDevTest diff --git a/armory/datasets/standard/librispeech_dev_test/checksums.tsv b/armory/datasets/standard/librispeech_dev_test/checksums.tsv new file mode 100644 index 000000000..edb48d2cf --- /dev/null +++ b/armory/datasets/standard/librispeech_dev_test/checksums.tsv @@ -0,0 +1 @@ +# NOTE: This file is empty due to subclassing the existing tfds librispeech builder: https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/audio/librispeech.py diff --git a/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py new file mode 100644 index 000000000..d3de81f73 --- /dev/null +++ b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py @@ -0,0 +1,37 @@ +""" +Subset of librispeech containing just 'dev' and 'test' splits. + +checksums.tsv is empty as it uses the underlying librispeech class. + +NOTE: In order to build, this requires apache beam installed. + In the container, do: `pip install apache-beam` + This is not installed by default due to older dependencies + +NOTE: when building, armory does not provide beam options by default + This makes building VERY slow unless overrides are provided + It is recommended that this is built directly using tfds on the command line +""" + +import tensorflow_datasets as tfds +from tensorflow_datasets.audio import librispeech + +_SUBSET = ( + "dev_clean", + "dev_other", + "test_clean", + "test_other", +) +_DL_URLS = {k: v for k, v in librispeech._DL_URLS.items() if k in _SUBSET} + + +class LibrispeechDevTest(librispeech.Librispeech): + """DatasetBuilder for subset of Librispeech""" + + def _split_generators(self, dl_manager): + extracted_dirs = dl_manager.download_and_extract(_DL_URLS) + self._populate_metadata(extracted_dirs) + splits = [ + tfds.core.SplitGenerator(name=k, gen_kwargs={"directory": v}) + for k, v in extracted_dirs.items() + ] + return splits diff --git a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json index 25b1b5bc0..6852dd90d 100644 --- a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json @@ -20,12 +20,11 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": null, "metric": { From 8de5435d93339a8b7602a71ed01ff536d0f6a2ac Mon Sep 17 00:00:00 2001 From: David Slater Date: Fri, 2 Dec 2022 17:05:05 +0000 Subject: [PATCH 2/9] prep --- armory/datasets/preprocessing.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/armory/datasets/preprocessing.py b/armory/datasets/preprocessing.py index 84325e6ab..ee5b3c9a3 100644 --- a/armory/datasets/preprocessing.py +++ b/armory/datasets/preprocessing.py @@ -57,6 +57,24 @@ def carla_over_obj_det_dev(element, modality="rgb"): ) +@register +def librispeech(element, audio_kwargs=None): + # TODO: determine how to fix np.array([], dtype=object) output for text + # https://github.com/tensorflow/tensorflow/issues/34871 + # Our traditional behavior to decode to str once in numpy + # This can be done via: y.astype("U") + # Currently, this is handled by scenarios or metrics after dataset output + # NOTE: 16000 sampling rate + if audio_kwargs is None: + audio_kwargs = {} + text = element["text"] + speech = audio_to_canon(element["speech"], **audio_kwargs) + return (speech, text) + + +librispeech_dev_test = register(librispeech, "librispeech_dev_test") + + def image_to_canon(image, resize=None, target_dtype=tf.float32, input_type="uint8"): """ TFDS Image feature uses (height, width, channels) From c818b42bbd1acda8fcfd3c3ee2233096caaf7a37 Mon Sep 17 00:00:00 2001 From: David Slater Date: Fri, 2 Dec 2022 21:23:08 +0000 Subject: [PATCH 3/9] armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py --- .../standard/librispeech_dev_test/librispeech_dev_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py index d3de81f73..5fcb31e72 100644 --- a/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py +++ b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py @@ -10,6 +10,10 @@ NOTE: when building, armory does not provide beam options by default This makes building VERY slow unless overrides are provided It is recommended that this is built directly using tfds on the command line + +Using DirectRunner with apache beam, can build with this: + tfds build /workspace/armory/datasets/standard/librispeech_dev_test --data_dir /armory/datasets/new_builds --force_checksums_validation --beam_pipeline_options="runner=DirectRunner,direct_num_workers=16,direct_running_mode=multi_processing" + See: https://beam.apache.org/releases/pydoc/2.43.0/_modules/apache_beam/options/pipeline_options.html#DirectOptions """ import tensorflow_datasets as tfds From ede60fe78fbb5a2d9ab571b657396401fad31185 Mon Sep 17 00:00:00 2001 From: David Slater Date: Mon, 5 Dec 2022 18:09:06 -0800 Subject: [PATCH 4/9] update librispeech configs --- armory/scenarios/audio_asr.py | 3 +- armory/scenarios/audio_classification.py | 3 +- .../asr_librispeech_entailment.json | 2 +- .../asr_librispeech_targeted.json | 2 +- ...ibrispeech_asr_imperceptible_defended.json | 92 ------------------ ...rispeech_asr_imperceptible_undefended.json | 81 ---------------- .../librispeech_asr_kenansville_defended.json | 75 -------------- ...ibrispeech_asr_kenansville_undefended.json | 64 ------------ .../librispeech_asr_pgd_defended.json | 86 ---------------- ..._asr_pgd_multipath_channel_undefended.json | 80 --------------- .../librispeech_asr_pgd_undefended.json | 75 -------------- .../librispeech_asr_snr_targeted.json | 81 ---------------- .../librispeech_asr_snr_undefended.json | 80 --------------- .../librispeech_baseline_sincnet.json | 65 ------------- .../librispeech_baseline_sincnet_snr_pgd.json | 69 ------------- ...librispeech_baseline_sincnet_targeted.json | 72 -------------- .../asr_librispeech/defended_entailment.json | 97 ------------------- .../defended_targeted_snr_pgd.json | 92 ------------------ .../asr_librispeech/untargeted_snr_pgd.json | 73 -------------- .../hubert_defended_untargeted.json} | 29 +++--- .../asr_librispeech/hubert_entailment.json} | 30 ++---- .../hubert_targeted_snr_pgd.json} | 29 +++--- .../hubert_untargeted_snr_pgd.json | 4 +- scenario_configs/speaker_id_librispeech.json | 1 - tools/pre-commit.sh | 5 + 25 files changed, 45 insertions(+), 1245 deletions(-) delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json delete mode 100644 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json delete mode 100755 scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json delete mode 100644 scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json delete mode 100644 scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json delete mode 100644 scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json delete mode 100644 scenario_configs/eval5/asr_librispeech/defended_entailment.json delete mode 100644 scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json delete mode 100644 scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json rename scenario_configs/{eval5/asr_librispeech/defended_untargeted_snr_pgd.json => eval6/asr_librispeech/hubert_defended_untargeted.json} (70%) rename scenario_configs/{eval5/asr_librispeech/entailment.json => eval6/asr_librispeech/hubert_entailment.json} (70%) rename scenario_configs/{eval5/asr_librispeech/targeted_snr_pgd.json => eval6/asr_librispeech/hubert_targeted_snr_pgd.json} (69%) delete mode 120000 scenario_configs/speaker_id_librispeech.json diff --git a/armory/scenarios/audio_asr.py b/armory/scenarios/audio_asr.py index 93c73aca8..22bb29b2c 100644 --- a/armory/scenarios/audio_asr.py +++ b/armory/scenarios/audio_asr.py @@ -110,5 +110,6 @@ def load_test_dataset(self, test_split_default="test_clean"): def _load_sample_exporter(self): return AudioExporter( self.export_dir, - self.test_dataset.context.sample_rate, + self.test_dataset.info.metadata["sample_rate"], # TODO: smarter way? + # self.test_dataset.info['speech'].sample_rate, # TODO: get in a smarter way ) diff --git a/armory/scenarios/audio_classification.py b/armory/scenarios/audio_classification.py index ef0aa1e90..7bcdf7545 100644 --- a/armory/scenarios/audio_classification.py +++ b/armory/scenarios/audio_classification.py @@ -16,5 +16,6 @@ def load_test_dataset(self): def _load_sample_exporter(self): return AudioExporter( self.export_dir, - self.test_dataset.context.sample_rate, + self.test_dataset.info.metadata["sample_rate"], # TODO: smarter way? + # self.test_dataset.info['speech'].sample_rate, ) diff --git a/scenario_configs/asr_librispeech_entailment.json b/scenario_configs/asr_librispeech_entailment.json index 752937374..b3038f281 120000 --- a/scenario_configs/asr_librispeech_entailment.json +++ b/scenario_configs/asr_librispeech_entailment.json @@ -1 +1 @@ -eval5/asr_librispeech/entailment.json \ No newline at end of file +eval6/asr_librispeech/hubert_entailment.json \ No newline at end of file diff --git a/scenario_configs/asr_librispeech_targeted.json b/scenario_configs/asr_librispeech_targeted.json index 04b2e2ac6..37d7e1bd6 120000 --- a/scenario_configs/asr_librispeech_targeted.json +++ b/scenario_configs/asr_librispeech_targeted.json @@ -1 +1 @@ -eval5/asr_librispeech/untargeted_snr_pgd.json \ No newline at end of file +eval6/asr_librispeech/hubert_targeted_snr_pgd.json \ No newline at end of file diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json deleted file mode 100755 index cdecb16ce..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "decrease_factor_alpha": 0.5, - "decrease_factor_eps": 0.5, - "eps": 0.0075, - "global_max_length": 562480, - "increase_factor_alpha": 2.0, - "initial_rescale": 1.0, - "learning_rate_1": 0.0001, - "learning_rate_2": 1e-07, - "max_iter_1": 400, - "max_iter_2": 100, - "num_iter_decrease_alpha": 50 - }, - "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch", - "name": "ImperceptibleASRPyTorch", - "targeted": true, - "targeted_labels": { - "scheme": "matched length", - "transcripts": [ - "REALLY SHORT TEST STRING", - "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS", - "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY" - ] - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3CompressionPyTorch", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json deleted file mode 100755 index 71b02f0dc..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "decrease_factor_alpha": 0.5, - "decrease_factor_eps": 0.5, - "eps": 0.0075, - "global_max_length": 562480, - "increase_factor_alpha": 2.0, - "initial_rescale": 1.0, - "learning_rate_1": 0.0001, - "learning_rate_2": 1e-07, - "max_iter_1": 400, - "max_iter_2": 100, - "num_iter_decrease_alpha": 50 - }, - "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch", - "name": "ImperceptibleASRPyTorch", - "targeted": true, - "targeted_labels": { - "scheme": "matched length", - "transcripts": [ - "REALLY SHORT TEST STRING", - "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS", - "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY" - ] - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json deleted file mode 100755 index c4d41fb71..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "partial_attack": false, - "snr_db": 20, - "targeted": false - }, - "module": "armory.art_experimental.attacks.kenansville_dft", - "name": "KenansvilleDFT", - "use_label": false - }, - "dataset": { - "batch_size": 8, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json deleted file mode 100755 index 1a8e25bed..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "partial_attack": false, - "snr_db": 20, - "targeted": false - }, - "module": "armory.art_experimental.attacks.kenansville_dft", - "name": "KenansvilleDFT", - "use_label": false - }, - "dataset": { - "batch_size": 8, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json deleted file mode 100755 index c54f8ef78..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 1.5, - "eps_step": 0.05, - "max_iter": 100, - "norm": 2, - "num_random_init": 0, - "random_eps": false, - "targeted": false, - "verbose": false - }, - "module": "art.attacks.evasion", - "name": "ProjectedGradientDescent", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json deleted file mode 100755 index ac814e83a..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "audio_channel": { - "attenuation": 0.5, - "delay": 300, - "pytorch": true - }, - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 1.5, - "eps_step": 0.05, - "max_iter": 100, - "norm": 2, - "num_random_init": 0, - "random_eps": false, - "targeted": false, - "verbose": false - }, - "module": "art.attacks.evasion", - "name": "ProjectedGradientDescent", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json deleted file mode 100755 index 94a7bef1c..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 1.5, - "eps_step": 0.05, - "max_iter": 100, - "norm": 2, - "num_random_init": 0, - "random_eps": false, - "targeted": false, - "verbose": false - }, - "module": "art.attacks.evasion", - "name": "ProjectedGradientDescent", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json deleted file mode 100644 index 263adccac..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 10, - "eps_step": 0.5, - "max_iter": 10, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "import_from": "armory.attacks.librispeech_target_labels", - "transcripts": "matched_length" - }, - "module": "armory.utils.labels", - "name": "MatchedTranscriptLengthTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json deleted file mode 100755 index 9ed517ef0..000000000 --- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 10, - "eps_step": 0.5, - "max_iter": 10, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "value": "TEST STRING" - }, - "module": "armory.utils.labels", - "name": "FixedStringTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json deleted file mode 100644 index 71d688d97..000000000 --- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation", - "adhoc": null, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 0.2, - "eps_step": 0.1, - "minimal": false, - "num_random_init": 0, - "targeted": false - }, - "module": "art.attacks.evasion", - "name": "FastGradientMethod", - "use_label": false - }, - "dataset": { - "batch_size": 1, - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech_dev_clean" - }, - "defense": null, - "metric": { - "means": true, - "perturbation": "linf", - "record_metric_per_sample": false, - "task": [ - "categorical_accuracy" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "fit_batch_size": 16, - "nb_epochs": 20000 - }, - "model_kwargs": { - "predict_mode": "all" - }, - "module": "armory.baseline_models.pytorch.sincnet", - "name": "get_art_model", - "weights_file": "sincnet_librispeech_v1.pth", - "wrapper_kwargs": { - "clip_values": [ - -1.0, - 1.0 - ] - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_classification", - "name": "AudioClassificationTask" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch", - "external_github_repo": "hkakitani/SincNet", - "gpus": "all", - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json deleted file mode 100644 index 8ea65668d..000000000 --- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation", - "adhoc": null, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 10, - "eps_step": 0.5, - "max_iter": 10, - "norm": "snr", - "num_random_init": 0, - "targeted": false - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD", - "use_label": false - }, - "dataset": { - "batch_size": 1, - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech_dev_clean" - }, - "defense": null, - "metric": { - "means": true, - "perturbation": [ - "snr", - "snr_db" - ], - "record_metric_per_sample": true, - "task": [ - "categorical_accuracy" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "fit_batch_size": 16, - "nb_epochs": 20000 - }, - "model_kwargs": { - "predict_mode": "all" - }, - "module": "armory.baseline_models.pytorch.sincnet", - "name": "get_art_model", - "weights_file": "sincnet_librispeech_v1.pth", - "wrapper_kwargs": { - "clip_values": [ - -1.0, - 1.0 - ] - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_classification", - "name": "AudioClassificationTask" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch", - "external_github_repo": "hkakitani/SincNet", - "gpus": "all", - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json deleted file mode 100644 index 526353755..000000000 --- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation", - "adhoc": null, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 0.2, - "eps_step": 0.1, - "minimal": false, - "num_random_init": 0, - "targeted": true - }, - "module": "art.attacks.evasion", - "name": "FastGradientMethod", - "targeted_labels": { - "kwargs": { - "num_classes": 40 - }, - "module": "armory.utils.labels", - "name": "RoundRobinTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech_dev_clean" - }, - "defense": null, - "metric": { - "means": true, - "perturbation": "linf", - "record_metric_per_sample": false, - "task": [ - "categorical_accuracy" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "fit_batch_size": 16, - "nb_epochs": 20000 - }, - "model_kwargs": { - "predict_mode": "all" - }, - "module": "armory.baseline_models.pytorch.sincnet", - "name": "get_art_model", - "weights_file": "sincnet_librispeech_v1.pth", - "wrapper_kwargs": { - "clip_values": [ - -1.0, - 1.0 - ] - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_classification", - "name": "AudioClassificationTask" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch", - "external_github_repo": "hkakitani/SincNet", - "gpus": "all", - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/defended_entailment.json b/scenario_configs/eval5/asr_librispeech/defended_entailment.json deleted file mode 100644 index 5727d7654..000000000 --- a/scenario_configs/eval5/asr_librispeech/defended_entailment.json +++ /dev/null @@ -1,97 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 20, - "eps_step": 0.05, - "max_iter": 500, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "dtype": "str", - "import_from": "armory.attacks.librispeech_target_labels", - "values": "entailment_100" - }, - "module": "armory.utils.labels", - "name": "ManualTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "snr_db", - "record_metric_per_sample": true, - "task": [ - "entailment", - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": [ - "SeanNaren/deepspeech.pytorch@V3.0" - ], - "gpus": "all", - "local_repo_path": null, - "num_eval_batches": 100, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json deleted file mode 100644 index c9ff3fdb2..000000000 --- a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 20, - "eps_step": 0.5, - "max_iter": 500, - "norm": "snr", - "num_random_init": 0, - "targeted": true - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": true, - "targeted_labels": { - "kwargs": { - "import_from": "armory.attacks.librispeech_target_labels", - "transcripts": "matched_length" - }, - "module": "armory.utils.labels", - "name": "MatchedTranscriptLengthTargeter" - }, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": { - "kwargs": { - "apply_fit": false, - "apply_predict": true, - "channels_first": false, - "sample_rate": 16000, - "verbose": false - }, - "module": "art.defences.preprocessor", - "name": "Mp3Compression", - "type": "Preprocessor" - }, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json deleted file mode 100644 index 58a8c1af8..000000000 --- a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", - "adhoc": { - "skip_adversarial": false - }, - "attack": { - "knowledge": "white", - "kwargs": { - "batch_size": 1, - "eps": 20, - "eps_step": 0.5, - "max_iter": 500, - "norm": "snr", - "num_random_init": 0, - "targeted": false - }, - "module": "armory.art_experimental.attacks.snr_pgd", - "name": "SNR_PGD_Numpy", - "targeted": false, - "use_label": false - }, - "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" - }, - "defense": null, - "metric": { - "means": false, - "perturbation": "linf", - "record_metric_per_sample": true, - "task": [ - "word_error_rate" - ] - }, - "model": { - "fit": false, - "fit_kwargs": { - "nb_epochs": 20000 - }, - "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", - "name": "get_art_model", - "predict_kwargs": { - "transcription_output": true - }, - "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } - }, - "scenario": { - "kwargs": {}, - "module": "armory.scenarios.audio_asr", - "name": "AutomaticSpeechRecognition" - }, - "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", - "gpus": "all", - "local_repo_path": null, - "output_dir": null, - "output_filename": null, - "use_gpu": false - } -} diff --git a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json similarity index 70% rename from scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json rename to scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json index 4c128b261..006318915 100644 --- a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -20,12 +20,11 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": { "kwargs": { @@ -41,7 +40,7 @@ }, "metric": { "means": false, - "perturbation": "linf", + "perturbation": "snr_db", "record_metric_per_sample": true, "task": [ "word_error_rate" @@ -53,19 +52,13 @@ "nb_epochs": 20000 }, "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", + "module": "armory.baseline_models.pytorch.hubert_asr_large", "name": "get_art_model", "predict_kwargs": { "transcription_output": true }, "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } + "wrapper_kwargs": {} }, "scenario": { "kwargs": {}, @@ -73,8 +66,8 @@ "name": "AutomaticSpeechRecognition" }, "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", + "docker_image": "twosixarmory/pytorch", + "external_github_repo": null, "gpus": "all", "local_repo_path": null, "output_dir": null, diff --git a/scenario_configs/eval5/asr_librispeech/entailment.json b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json similarity index 70% rename from scenario_configs/eval5/asr_librispeech/entailment.json rename to scenario_configs/eval6/asr_librispeech/hubert_entailment.json index 21f5ff3e1..1a3388db5 100644 --- a/scenario_configs/eval5/asr_librispeech/entailment.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -29,12 +29,11 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": null, "metric": { @@ -52,19 +51,13 @@ "nb_epochs": 20000 }, "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", + "module": "armory.baseline_models.pytorch.hubert_asr_large", "name": "get_art_model", "predict_kwargs": { "transcription_output": true }, "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } + "wrapper_kwargs": {} }, "scenario": { "kwargs": {}, @@ -72,13 +65,10 @@ "name": "AutomaticSpeechRecognition" }, "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": [ - "SeanNaren/deepspeech.pytorch@V3.0" - ], + "docker_image": "twosixarmory/pytorch", + "external_github_repo": null, "gpus": "all", "local_repo_path": null, - "num_eval_batches": 100, "output_dir": null, "output_filename": null, "use_gpu": false diff --git a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json similarity index 69% rename from scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json rename to scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json index f650a46eb..5f469cca6 100644 --- a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -28,17 +28,16 @@ "use_label": false }, "dataset": { - "batch_size": 1, - "eval_split": "test_clean", - "framework": "numpy", - "module": "armory.data.datasets", - "name": "librispeech", - "train_split": "train_clean100" + "test": { + "batch_size": 1, + "name": "librispeech_dev_test", + "split": "test_clean" + } }, "defense": null, "metric": { "means": false, - "perturbation": "linf", + "perturbation": "snr_db", "record_metric_per_sample": true, "task": [ "word_error_rate" @@ -50,19 +49,13 @@ "nb_epochs": 20000 }, "model_kwargs": {}, - "module": "armory.baseline_models.pytorch.deep_speech", + "module": "armory.baseline_models.pytorch.hubert_asr_large", "name": "get_art_model", "predict_kwargs": { "transcription_output": true }, "weights_file": null, - "wrapper_kwargs": { - "clip_values": [ - -1, - 1 - ], - "pretrained_model": "librispeech" - } + "wrapper_kwargs": {} }, "scenario": { "kwargs": {}, @@ -70,8 +63,8 @@ "name": "AutomaticSpeechRecognition" }, "sysconfig": { - "docker_image": "twosixarmory/pytorch-deepspeech", - "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0", + "docker_image": "twosixarmory/pytorch", + "external_github_repo": null, "gpus": "all", "local_repo_path": null, "output_dir": null, diff --git a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json index 6852dd90d..3ce122237 100644 --- a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json +++ b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json @@ -1,5 +1,5 @@ { - "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation", + "_description": "Baseline HuBERT ASR on LibriSpeech", "adhoc": { "skip_adversarial": false }, @@ -29,7 +29,7 @@ "defense": null, "metric": { "means": false, - "perturbation": "linf", + "perturbation": "snr_db", "record_metric_per_sample": true, "task": [ "word_error_rate" diff --git a/scenario_configs/speaker_id_librispeech.json b/scenario_configs/speaker_id_librispeech.json deleted file mode 120000 index c9d0b713e..000000000 --- a/scenario_configs/speaker_id_librispeech.json +++ /dev/null @@ -1 +0,0 @@ -eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json \ No newline at end of file diff --git a/tools/pre-commit.sh b/tools/pre-commit.sh index 7852fb1de..b3f85a5cc 100755 --- a/tools/pre-commit.sh +++ b/tools/pre-commit.sh @@ -74,6 +74,11 @@ pushd $PROJECT_ROOT > /dev/null || exit 1 else echo "📄 Executing 'json.tool' formatter..." for TARGET_FILE in ${TARGET_FILES}; do + # Check if + if [ ! -f "${TARGET_FILE}" ]; then + echo "📄 Skipping ${TARGET_FILE} (deleted)" + continue + fi # Check if file is too large to be linted FILE_SIZE=`du -m ${TARGET_FILE} | cut -f1` if [ ${FILE_SIZE} -gt ${MAX_FILE_SIZE} ]; then From 65326eb703146d29203438a43eea4e599ebf0586 Mon Sep 17 00:00:00 2001 From: David Slater Date: Tue, 6 Dec 2022 08:09:42 -0800 Subject: [PATCH 5/9] finish comment --- tools/pre-commit.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pre-commit.sh b/tools/pre-commit.sh index b3f85a5cc..9e85f0eb8 100755 --- a/tools/pre-commit.sh +++ b/tools/pre-commit.sh @@ -74,7 +74,7 @@ pushd $PROJECT_ROOT > /dev/null || exit 1 else echo "📄 Executing 'json.tool' formatter..." for TARGET_FILE in ${TARGET_FILES}; do - # Check if + # Check if file has been deleted if [ ! -f "${TARGET_FILE}" ]; then echo "📄 Skipping ${TARGET_FILE} (deleted)" continue From 3366deb262fec2aa19395500a2dcd22666c73165 Mon Sep 17 00:00:00 2001 From: David Slater Date: Tue, 6 Dec 2022 08:26:22 -0800 Subject: [PATCH 6/9] update pre-commit --- tools/pre-commit.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/pre-commit.sh b/tools/pre-commit.sh index 9e85f0eb8..eeccecf96 100755 --- a/tools/pre-commit.sh +++ b/tools/pre-commit.sh @@ -15,7 +15,7 @@ PROJECT_ROOT=`git rev-parse --show-toplevel` # $ ARMORY_CI_TEST=1 ./tools/pre-commit.sh ARMORY_CI_TEST="${ARMORY_COMMIT_HOOK_CI:-0}" -TRACKED_FILES="git --no-pager diff HEAD --name-only" +TRACKED_FILES="git --no-pager diff --diff-filter=d --name-only HEAD" if [ "${ARMORY_CI_TEST}" -ne 0 ]; then TRACKED_FILES="git --no-pager ls-files" fi @@ -74,11 +74,6 @@ pushd $PROJECT_ROOT > /dev/null || exit 1 else echo "📄 Executing 'json.tool' formatter..." for TARGET_FILE in ${TARGET_FILES}; do - # Check if file has been deleted - if [ ! -f "${TARGET_FILE}" ]; then - echo "📄 Skipping ${TARGET_FILE} (deleted)" - continue - fi # Check if file is too large to be linted FILE_SIZE=`du -m ${TARGET_FILE} | cut -f1` if [ ${FILE_SIZE} -gt ${MAX_FILE_SIZE} ]; then From b3d3e382ef23ea4bbc0504c4750c8b0375160c43 Mon Sep 17 00:00:00 2001 From: David Slater Date: Tue, 6 Dec 2022 08:33:11 -0800 Subject: [PATCH 7/9] update docs --- docs/datasets.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docs/datasets.md b/docs/datasets.md index 681147a2a..6d60a1850 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -56,14 +56,10 @@ The carla_over_obj_det_train dataset has the same properties as the above mentio | Dataset | Description | x_shape | x_dtype | y_shape | y_dtype | sampling_rate | splits | |:----------: |:-----------: |:-------: |:--------: |:--------: |:-------: |:-------: |:------: | | [digit](https://github.com/Jakobovski/free-spoken-digit-dataset) | Audio dataset of spoken digits | (N, variable_length) | int64 | (N,) | int64 | 8 kHz | train, test | -| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100 | -| [librispeech-full](http://www.openslr.org/12/) | Full Librispeech dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100, train_clean360, train_other500 | -| [librispeech_dev_clean](http://www.openslr.org/12/) | Librispeech dev dataset for speaker identification | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | train, validation, test | -| [librispeech_dev_clean_asr](http://www.openslr.org/12) | Librispeech dev dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | train, validation, test | +| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition (NOTE: not currently cached. Use TFDS builder.) | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | dev_clean, dev_other, test_clean, test_other, train_clean100, train_clean360, train_other500 | +| [librispeech_dev_test](http://www.openslr.org/12/) | Librispeech with ontly dev and test splits | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | dev_clean, dev_other, test_clean, test_other | | [speech_commands](https://www.tensorflow.org/datasets/catalog/speech_commands) | Speech commands dataset for audio poisoning | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | train, validation, test | -NOTE: because the Librispeech dataset is over 300 GB with all splits, the ```librispeech_full``` dataset has -all splits, whereas the ```librispeech``` dataset does not have the train_clean360 or train_other500 splits.
### Video Datasets @@ -101,9 +97,6 @@ Tensorflow Datasets [library](https://www.tensorflow.org/datasets/catalog/overvi | resisc_45 | train | First 5/7 of dataset | See armory/data/resisc45/resisc45_dataset_partition.py | | | validation | Next 1/7 of dataset | | | | test | Final 1/7 of dataset | | -| librispeech_dev_clean | train | 1371 recordings from dev_clean dataset | Assign discrete clips so at least 50% of audio time | -| | validation | 692 recordings from dev_clean dataset | is in train, at least 25% is in validation, | -| | test | 640 recordings from dev_clean dataset | and the remainder are in test |
From 15f8fb1792636a305ffad96be72274b32484a851 Mon Sep 17 00:00:00 2001 From: David Slater Date: Tue, 6 Dec 2022 08:36:58 -0800 Subject: [PATCH 8/9] remove old models --- armory/baseline_models/pytorch/deep_speech.py | 24 -- armory/baseline_models/pytorch/sincnet.py | 289 ------------------ docs/baseline_models.md | 4 +- 3 files changed, 1 insertion(+), 316 deletions(-) delete mode 100644 armory/baseline_models/pytorch/deep_speech.py delete mode 100644 armory/baseline_models/pytorch/sincnet.py diff --git a/armory/baseline_models/pytorch/deep_speech.py b/armory/baseline_models/pytorch/deep_speech.py deleted file mode 100644 index b3efd23d1..000000000 --- a/armory/baseline_models/pytorch/deep_speech.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -Automatic speech recognition model - -Model contributed by: MITRE Corporation -""" - -from typing import Optional - -from art.estimators.speech_recognition import PyTorchDeepSpeech - -from armory.utils.external_repo import ExternalRepoImport - -# Test for external repo at import time to fail fast -with ExternalRepoImport( - repo="SeanNaren/deepspeech.pytorch@V3.0", - experiment="librispeech_asr_snr_undefended.json", -): - from deepspeech_pytorch.model import DeepSpeech # noqa: F401 - - -def get_art_model( - model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None -) -> PyTorchDeepSpeech: - return PyTorchDeepSpeech(**wrapper_kwargs) diff --git a/armory/baseline_models/pytorch/sincnet.py b/armory/baseline_models/pytorch/sincnet.py deleted file mode 100644 index 37401045d..000000000 --- a/armory/baseline_models/pytorch/sincnet.py +++ /dev/null @@ -1,289 +0,0 @@ -""" -CNN model for raw audio classification - -Model contributed by: MITRE Corporation -Adapted from: https://github.com/mravanelli/SincNet -""" -from typing import Optional - -from art.estimators.classification import PyTorchClassifier -import numpy as np -import torch -from torch import nn - -from armory.utils.external_repo import ExternalRepoImport - -with ExternalRepoImport( - repo="hkakitani/SincNet", - experiment="librispeech_baseline_sincnet.json", -): - from SincNet import dnn_models - -# NOTE: Underlying dataset sample rate is 16 kHz. SincNet uses this SAMPLE_RATE to -# determine internal filter high cutoff frequency. -SAMPLE_RATE = 8000 -WINDOW_STEP_SIZE = 375 -WINDOW_LENGTH = int(SAMPLE_RATE * WINDOW_STEP_SIZE / 1000) - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -def numpy_random_preprocessing_fn(batch: np.ndarray): - """ - Standardize, then normalize sound clips - - Then generate a random cut of the input - """ - processed_batch = [] - for clip in batch: - # convert and normalize - signal = clip.astype(np.float32) - # Signal normalization - signal = signal / np.max(np.abs(signal)) - - # make a pseudorandom cut of size equal to WINDOW_LENGTH - # (from SincNet's create_batches_rnd) - signal_length = len(signal) - np.random.seed(signal_length) - signal_start = int( - np.random.randint(signal_length / WINDOW_LENGTH - 1) - * WINDOW_LENGTH - % signal_length - ) - signal_stop = signal_start + WINDOW_LENGTH - signal = signal[signal_start:signal_stop] - processed_batch.append(signal) - - return np.array(processed_batch) - - -def numpy_all_preprocessing_fn(batch: np.ndarray): - """ - Input is comprised of one or more clips, where each clip i - is given as an ndarray with shape (n_i,). - Preprocessing normalizes each clip and breaks each clip into an integer number - of non-overlapping segments of length WINDOW_LENGTH. - Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH) - """ - if len(batch) != 1: - raise NotImplementedError( - "Requires ART variable length input capability for batch size != 1" - ) - processed_batch = [] - for clip in batch: - # convert and normalize - signal = clip.astype(np.float64) - signal = signal / np.max(np.abs(signal)) - - # break into a number of chunks of equal length - num_chunks = int(len(signal) / WINDOW_LENGTH) - signal = signal[: num_chunks * WINDOW_LENGTH] - signal = np.reshape(signal, (num_chunks, WINDOW_LENGTH), order="C") - processed_batch.append(signal) - # remove outer batch (of size 1) - processed_batch = processed_batch[0] - return np.array(processed_batch) - - -def torch_random_preprocessing_fn(x): - """ - Standardize, then normalize sound clips - """ - if x.shape[0] != 1: - raise ValueError(f"Shape of batch x {x.shape[0]} != 1") - if x.dtype != torch.float32: - raise ValueError(f"dtype of batch x {x.dtype} != torch.float32") - if x.max() > 1.0: - raise ValueError(f"batch x max {x.max()} > 1.0") - if x.min() < -1.0: - raise ValueError(f"batch x min {x.min()} < -1.0") - x = x.squeeze(0) - - # Signal normalization - x = x / x.abs().max() - - # get pseudorandom chunk of fixed length (from SincNet's create_batches_rnd) - signal_length = len(x) - np.random.seed(signal_length) - start = int( - np.random.randint(signal_length / WINDOW_LENGTH - 1) - * WINDOW_LENGTH - % signal_length - ) - - x = x[start : start + WINDOW_LENGTH] - - x = x.unsqueeze(0) - return x - - -def torch_all_preprocessing_fn(x: torch.Tensor): - """ - Input is comprised of one or more clips, where each clip i - is given as an ndarray with shape (n_i,). - Preprocessing normalizes each clip and breaks each clip into an integer number - of non-overlapping segments of length WINDOW_LENGTH. - Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH) - """ - if x.shape[0] != 1: - raise NotImplementedError( - "Requires ART variable length input capability for batch size != 1" - ) - if x.max() > 1.0: - raise ValueError(f"batch x max {x.max()} > 1.0") - if x.min() < -1.0: - raise ValueError(f"batch x min {x.min()} < -1.0") - if x.dtype != torch.float32: - raise ValueError(f"dtype of batch x {x.dtype} != torch.float32") - x = x.squeeze(0) - - # Signal normalization - x = x / x.abs().max() - - # break into a number of chunks of equal length - num_chunks = int(len(x) / WINDOW_LENGTH) - x = x[: num_chunks * WINDOW_LENGTH] - x = x.reshape((num_chunks, WINDOW_LENGTH)) - - return x - - -def sincnet(weights_path: Optional[str] = None) -> dnn_models.SincWrapper: - """ - Set configuration options and instantiates SincWrapper object - """ - pretrained = weights_path is not None - if pretrained: - model_params = torch.load(weights_path, map_location=DEVICE) - else: - model_params = {} - CNN_params = model_params.get("CNN_model_par") - DNN1_params = model_params.get("DNN1_model_par") - DNN2_params = model_params.get("DNN2_model_par") - - # from SincNet/cfg/SincNet_dev_LibriSpeech.cfg - cnn_N_filt = [80, 60, 60] - cnn_len_filt = [251, 5, 5] - cnn_max_pool_len = [3, 3, 3] - cnn_use_laynorm_inp = True - cnn_use_batchnorm_inp = False - cnn_use_laynorm = [True, True, True] - cnn_use_batchnorm = [False, False, False] - cnn_act = ["relu", "relu", "relu"] - cnn_drop = [0.0, 0.0, 0.0] - - fc_lay = [2048, 2048, 2048] - fc_drop = [0.0, 0.0, 0.0] - fc_use_laynorm_inp = True - fc_use_batchnorm_inp = False - fc_use_batchnorm = [True, True, True] - fc_use_laynorm = [False, False, False] - fc_act = ["leaky_relu", "linear", "leaky_relu"] - - class_lay = [40] - class_drop = [0.0, 0.0] - class_use_laynorm_inp = True - class_use_batchnorm_inp = False - class_use_batchnorm = [False] - class_use_laynorm = [False] - class_act = ["softmax"] - - CNN_options = { - "input_dim": WINDOW_LENGTH, - "fs": SAMPLE_RATE, - "cnn_N_filt": cnn_N_filt, - "cnn_len_filt": cnn_len_filt, - "cnn_max_pool_len": cnn_max_pool_len, - "cnn_use_laynorm_inp": cnn_use_laynorm_inp, - "cnn_use_batchnorm_inp": cnn_use_batchnorm_inp, - "cnn_use_laynorm": cnn_use_laynorm, - "cnn_use_batchnorm": cnn_use_batchnorm, - "cnn_act": cnn_act, - "cnn_drop": cnn_drop, - "pretrained": pretrained, - "model_params": CNN_params, - } - - DNN1_options = { - "fc_lay": fc_lay, - "fc_drop": fc_drop, - "fc_use_batchnorm": fc_use_batchnorm, - "fc_use_laynorm": fc_use_laynorm, - "fc_use_laynorm_inp": fc_use_laynorm_inp, - "fc_use_batchnorm_inp": fc_use_batchnorm_inp, - "fc_act": fc_act, - "pretrained": pretrained, - "model_params": DNN1_params, - } - - DNN2_options = { - "input_dim": fc_lay[-1], - "fc_lay": class_lay, - "fc_drop": class_drop, - "fc_use_batchnorm": class_use_batchnorm, - "fc_use_laynorm": class_use_laynorm, - "fc_use_laynorm_inp": class_use_laynorm_inp, - "fc_use_batchnorm_inp": class_use_batchnorm_inp, - "fc_act": class_act, - } - - sincNet = dnn_models.SincWrapper(DNN2_options, DNN1_options, CNN_options) - - if pretrained: - sincNet.eval() - sincNet.load_state_dict(DNN2_params) - - else: - sincNet.train() - - return sincNet - - -class SincNetWrapper(nn.Module): - MODES = { - "random": torch_random_preprocessing_fn, - "all": torch_all_preprocessing_fn, - } - - def __init__(self, model_kwargs: dict, weights_path: Optional[str]) -> None: - super().__init__() - predict_mode = model_kwargs.pop("predict_mode", "all") - if predict_mode not in self.MODES: - raise ValueError(f"predict_mode {predict_mode} not in {tuple(self.MODES)}") - self.predict_mode = predict_mode - - self.model = sincnet(weights_path=weights_path, **model_kwargs) - self.model.to(DEVICE) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - if self.training: - # preprocessing should be done before model for arbitrary length input - return self.model(x) - - x = self.MODES[self.predict_mode](x) - output = self.model(x) - if self.predict_mode == "all": - output = torch.mean(output, dim=0, keepdim=True) - return output - - -preprocessing_fn = numpy_random_preprocessing_fn - - -def get_art_model( - model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None -) -> PyTorchClassifier: - model = SincNetWrapper(model_kwargs, weights_path) - model.to(DEVICE) - - wrapped_model = PyTorchClassifier( - model, - loss=torch.nn.NLLLoss(), - optimizer=torch.optim.RMSprop( - model.parameters(), lr=0.001, alpha=0.95, eps=1e-8 - ), - input_shape=(None,), - nb_classes=40, - **wrapper_kwargs, - ) - return wrapped_model diff --git a/docs/baseline_models.md b/docs/baseline_models.md index 4fde37b87..ede6900ca 100644 --- a/docs/baseline_models.md +++ b/docs/baseline_models.md @@ -37,8 +37,6 @@ The model files can be found in [armory/baseline_models/pytorch](../armory/basel | Model | S3 weight_files | |:----------: |:---------------------------------------------:| | Cifar10 CNN | | -| DeepSpeech 2 | | -| Sincnet CNN | `sincnet_librispeech_v1.pth` | | MARS | `mars_ucf101_v1.pth` , `mars_kinetics_v1.pth` | | ResNet50 CNN | `resnet50_imagenet_v1.pth` | | MNIST CNN | `undefended_mnist_5epochs.pth` | @@ -59,4 +57,4 @@ The weights for this model are downloaded from the link listed below. ### Preprocessing Functions Preprocessing functions have been moved inside each model's forward pass. This is to allow each -model to receive as input the canonicalized form of a dataset. \ No newline at end of file +model to receive as input the canonicalized form of a dataset. From cb51df3b73796fe722564df91c07245562813e48 Mon Sep 17 00:00:00 2001 From: David Slater Date: Tue, 6 Dec 2022 08:58:23 -0800 Subject: [PATCH 9/9] remove unneeded commented lines --- armory/datasets/preprocessing.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/armory/datasets/preprocessing.py b/armory/datasets/preprocessing.py index ee5b3c9a3..271dfdea4 100644 --- a/armory/datasets/preprocessing.py +++ b/armory/datasets/preprocessing.py @@ -109,14 +109,6 @@ def audio_to_canon(audio, resample=None, target_dtype=tf.float32, input_type="in return audio -# config = { -# "preprocessor": "mnist(max_frames=1)" -# "preprocessor_kwargs": { -# "max_frames": null, -# } -# } - - def video_to_canon( video, resize=None,