diff --git a/armory/baseline_models/pytorch/deep_speech.py b/armory/baseline_models/pytorch/deep_speech.py
deleted file mode 100644
index b3efd23d1..000000000
--- a/armory/baseline_models/pytorch/deep_speech.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""
-Automatic speech recognition model
-
-Model contributed by: MITRE Corporation
-"""
-
-from typing import Optional
-
-from art.estimators.speech_recognition import PyTorchDeepSpeech
-
-from armory.utils.external_repo import ExternalRepoImport
-
-# Test for external repo at import time to fail fast
-with ExternalRepoImport(
-    repo="SeanNaren/deepspeech.pytorch@V3.0",
-    experiment="librispeech_asr_snr_undefended.json",
-):
-    from deepspeech_pytorch.model import DeepSpeech  # noqa: F401
-
-
-def get_art_model(
-    model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None
-) -> PyTorchDeepSpeech:
-    return PyTorchDeepSpeech(**wrapper_kwargs)
diff --git a/armory/baseline_models/pytorch/sincnet.py b/armory/baseline_models/pytorch/sincnet.py
deleted file mode 100644
index 37401045d..000000000
--- a/armory/baseline_models/pytorch/sincnet.py
+++ /dev/null
@@ -1,289 +0,0 @@
-"""
-CNN model for raw audio classification
-
-Model contributed by: MITRE Corporation
-Adapted from: https://github.com/mravanelli/SincNet
-"""
-from typing import Optional
-
-from art.estimators.classification import PyTorchClassifier
-import numpy as np
-import torch
-from torch import nn
-
-from armory.utils.external_repo import ExternalRepoImport
-
-with ExternalRepoImport(
-    repo="hkakitani/SincNet",
-    experiment="librispeech_baseline_sincnet.json",
-):
-    from SincNet import dnn_models
-
-# NOTE: Underlying dataset sample rate is 16 kHz. SincNet uses this SAMPLE_RATE to
-# determine internal filter high cutoff frequency.
-SAMPLE_RATE = 8000
-WINDOW_STEP_SIZE = 375
-WINDOW_LENGTH = int(SAMPLE_RATE * WINDOW_STEP_SIZE / 1000)
-
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
-def numpy_random_preprocessing_fn(batch: np.ndarray):
-    """
-    Standardize, then normalize sound clips
-
-    Then generate a random cut of the input
-    """
-    processed_batch = []
-    for clip in batch:
-        # convert and normalize
-        signal = clip.astype(np.float32)
-        # Signal normalization
-        signal = signal / np.max(np.abs(signal))
-
-        # make a pseudorandom cut of size equal to WINDOW_LENGTH
-        # (from SincNet's create_batches_rnd)
-        signal_length = len(signal)
-        np.random.seed(signal_length)
-        signal_start = int(
-            np.random.randint(signal_length / WINDOW_LENGTH - 1)
-            * WINDOW_LENGTH
-            % signal_length
-        )
-        signal_stop = signal_start + WINDOW_LENGTH
-        signal = signal[signal_start:signal_stop]
-        processed_batch.append(signal)
-
-    return np.array(processed_batch)
-
-
-def numpy_all_preprocessing_fn(batch: np.ndarray):
-    """
-    Input is comprised of one or more clips, where each clip i
-    is given as an ndarray with shape (n_i,).
-    Preprocessing normalizes each clip and breaks each clip into an integer number
-    of non-overlapping segments of length WINDOW_LENGTH.
-    Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH)
-    """
-    if len(batch) != 1:
-        raise NotImplementedError(
-            "Requires ART variable length input capability for batch size != 1"
-        )
-    processed_batch = []
-    for clip in batch:
-        # convert and normalize
-        signal = clip.astype(np.float64)
-        signal = signal / np.max(np.abs(signal))
-
-        # break into a number of chunks of equal length
-        num_chunks = int(len(signal) / WINDOW_LENGTH)
-        signal = signal[: num_chunks * WINDOW_LENGTH]
-        signal = np.reshape(signal, (num_chunks, WINDOW_LENGTH), order="C")
-        processed_batch.append(signal)
-    # remove outer batch (of size 1)
-    processed_batch = processed_batch[0]
-    return np.array(processed_batch)
-
-
-def torch_random_preprocessing_fn(x):
-    """
-    Standardize, then normalize sound clips
-    """
-    if x.shape[0] != 1:
-        raise ValueError(f"Shape of batch x {x.shape[0]} != 1")
-    if x.dtype != torch.float32:
-        raise ValueError(f"dtype of batch x {x.dtype} != torch.float32")
-    if x.max() > 1.0:
-        raise ValueError(f"batch x max {x.max()} > 1.0")
-    if x.min() < -1.0:
-        raise ValueError(f"batch x min {x.min()} < -1.0")
-    x = x.squeeze(0)
-
-    # Signal normalization
-    x = x / x.abs().max()
-
-    # get pseudorandom chunk of fixed length (from SincNet's create_batches_rnd)
-    signal_length = len(x)
-    np.random.seed(signal_length)
-    start = int(
-        np.random.randint(signal_length / WINDOW_LENGTH - 1)
-        * WINDOW_LENGTH
-        % signal_length
-    )
-
-    x = x[start : start + WINDOW_LENGTH]
-
-    x = x.unsqueeze(0)
-    return x
-
-
-def torch_all_preprocessing_fn(x: torch.Tensor):
-    """
-    Input is comprised of one or more clips, where each clip i
-    is given as an ndarray with shape (n_i,).
-    Preprocessing normalizes each clip and breaks each clip into an integer number
-    of non-overlapping segments of length WINDOW_LENGTH.
-    Output is a list of clips, each of shape (int(n_i/WINDOW_LENGTH), WINDOW_LENGTH)
-    """
-    if x.shape[0] != 1:
-        raise NotImplementedError(
-            "Requires ART variable length input capability for batch size != 1"
-        )
-    if x.max() > 1.0:
-        raise ValueError(f"batch x max {x.max()} > 1.0")
-    if x.min() < -1.0:
-        raise ValueError(f"batch x min {x.min()} < -1.0")
-    if x.dtype != torch.float32:
-        raise ValueError(f"dtype of batch x {x.dtype} != torch.float32")
-    x = x.squeeze(0)
-
-    # Signal normalization
-    x = x / x.abs().max()
-
-    # break into a number of chunks of equal length
-    num_chunks = int(len(x) / WINDOW_LENGTH)
-    x = x[: num_chunks * WINDOW_LENGTH]
-    x = x.reshape((num_chunks, WINDOW_LENGTH))
-
-    return x
-
-
-def sincnet(weights_path: Optional[str] = None) -> dnn_models.SincWrapper:
-    """
-    Set configuration options and instantiates SincWrapper object
-    """
-    pretrained = weights_path is not None
-    if pretrained:
-        model_params = torch.load(weights_path, map_location=DEVICE)
-    else:
-        model_params = {}
-    CNN_params = model_params.get("CNN_model_par")
-    DNN1_params = model_params.get("DNN1_model_par")
-    DNN2_params = model_params.get("DNN2_model_par")
-
-    # from SincNet/cfg/SincNet_dev_LibriSpeech.cfg
-    cnn_N_filt = [80, 60, 60]
-    cnn_len_filt = [251, 5, 5]
-    cnn_max_pool_len = [3, 3, 3]
-    cnn_use_laynorm_inp = True
-    cnn_use_batchnorm_inp = False
-    cnn_use_laynorm = [True, True, True]
-    cnn_use_batchnorm = [False, False, False]
-    cnn_act = ["relu", "relu", "relu"]
-    cnn_drop = [0.0, 0.0, 0.0]
-
-    fc_lay = [2048, 2048, 2048]
-    fc_drop = [0.0, 0.0, 0.0]
-    fc_use_laynorm_inp = True
-    fc_use_batchnorm_inp = False
-    fc_use_batchnorm = [True, True, True]
-    fc_use_laynorm = [False, False, False]
-    fc_act = ["leaky_relu", "linear", "leaky_relu"]
-
-    class_lay = [40]
-    class_drop = [0.0, 0.0]
-    class_use_laynorm_inp = True
-    class_use_batchnorm_inp = False
-    class_use_batchnorm = [False]
-    class_use_laynorm = [False]
-    class_act = ["softmax"]
-
-    CNN_options = {
-        "input_dim": WINDOW_LENGTH,
-        "fs": SAMPLE_RATE,
-        "cnn_N_filt": cnn_N_filt,
-        "cnn_len_filt": cnn_len_filt,
-        "cnn_max_pool_len": cnn_max_pool_len,
-        "cnn_use_laynorm_inp": cnn_use_laynorm_inp,
-        "cnn_use_batchnorm_inp": cnn_use_batchnorm_inp,
-        "cnn_use_laynorm": cnn_use_laynorm,
-        "cnn_use_batchnorm": cnn_use_batchnorm,
-        "cnn_act": cnn_act,
-        "cnn_drop": cnn_drop,
-        "pretrained": pretrained,
-        "model_params": CNN_params,
-    }
-
-    DNN1_options = {
-        "fc_lay": fc_lay,
-        "fc_drop": fc_drop,
-        "fc_use_batchnorm": fc_use_batchnorm,
-        "fc_use_laynorm": fc_use_laynorm,
-        "fc_use_laynorm_inp": fc_use_laynorm_inp,
-        "fc_use_batchnorm_inp": fc_use_batchnorm_inp,
-        "fc_act": fc_act,
-        "pretrained": pretrained,
-        "model_params": DNN1_params,
-    }
-
-    DNN2_options = {
-        "input_dim": fc_lay[-1],
-        "fc_lay": class_lay,
-        "fc_drop": class_drop,
-        "fc_use_batchnorm": class_use_batchnorm,
-        "fc_use_laynorm": class_use_laynorm,
-        "fc_use_laynorm_inp": class_use_laynorm_inp,
-        "fc_use_batchnorm_inp": class_use_batchnorm_inp,
-        "fc_act": class_act,
-    }
-
-    sincNet = dnn_models.SincWrapper(DNN2_options, DNN1_options, CNN_options)
-
-    if pretrained:
-        sincNet.eval()
-        sincNet.load_state_dict(DNN2_params)
-
-    else:
-        sincNet.train()
-
-    return sincNet
-
-
-class SincNetWrapper(nn.Module):
-    MODES = {
-        "random": torch_random_preprocessing_fn,
-        "all": torch_all_preprocessing_fn,
-    }
-
-    def __init__(self, model_kwargs: dict, weights_path: Optional[str]) -> None:
-        super().__init__()
-        predict_mode = model_kwargs.pop("predict_mode", "all")
-        if predict_mode not in self.MODES:
-            raise ValueError(f"predict_mode {predict_mode} not in {tuple(self.MODES)}")
-        self.predict_mode = predict_mode
-
-        self.model = sincnet(weights_path=weights_path, **model_kwargs)
-        self.model.to(DEVICE)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.training:
-            # preprocessing should be done before model for arbitrary length input
-            return self.model(x)
-
-        x = self.MODES[self.predict_mode](x)
-        output = self.model(x)
-        if self.predict_mode == "all":
-            output = torch.mean(output, dim=0, keepdim=True)
-        return output
-
-
-preprocessing_fn = numpy_random_preprocessing_fn
-
-
-def get_art_model(
-    model_kwargs: dict, wrapper_kwargs: dict, weights_path: Optional[str] = None
-) -> PyTorchClassifier:
-    model = SincNetWrapper(model_kwargs, weights_path)
-    model.to(DEVICE)
-
-    wrapped_model = PyTorchClassifier(
-        model,
-        loss=torch.nn.NLLLoss(),
-        optimizer=torch.optim.RMSprop(
-            model.parameters(), lr=0.001, alpha=0.95, eps=1e-8
-        ),
-        input_shape=(None,),
-        nb_classes=40,
-        **wrapper_kwargs,
-    )
-    return wrapped_model
diff --git a/armory/datasets/README.md b/armory/datasets/README.md
index 76c2d8ac2..27e4f9933 100644
--- a/armory/datasets/README.md
+++ b/armory/datasets/README.md
@@ -84,6 +84,19 @@ info, ds = load.load("digit")
 info, ds = load.from_directory("/armory/datasets/new_builds/digit/1.0.8")
 ```
 
+### Apache Beam Datasets
+
+Currently, `librispeech` and `librispeech_dev_clean` use apache beam to build.
+Apache beam is not installed by default in the container due to older dependencies.
+If building in the container, do:
+```
+pip install apache-beam
+```
+
+When building, armory does not provide beam options by default.
+This makes building VERY slow unless overrides are provided.
+It is recommended that these are built directly using tfds on the command line.
+
 ## Packaging and Uploading for Cache
 
 After a dataset has been successfully built and loaded (locally), it can be packaged and uploaded to the cache.
@@ -91,43 +104,44 @@ After a dataset has been successfully built and loaded (locally), it can be pack
 First, it is recommended that you test the packaging and untarring process without upload/download.
 
 In python:
-```
+```python
 from armory.datasets import package
-package.package("my_dataset")  # creates a tar.gz file
-package.update("my_dataset")  # adds the tar hash info to "cached_datasets.json"
-package.verify("my_dataset")  # uses the "cached_datasets.json" information to verify hash information on tar file
-package.extract("my_dataset", overwrite=False)  # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite
-package.extract("my_dataset", overwrite=True)  # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error)
+my_dataset = "my_dataset"
+package.package(my_dataset)  # creates a tar.gz file
+package.update(my_dataset)  # adds the tar hash info to "cached_datasets.json"
+package.verify(my_dataset)  # uses the "cached_datasets.json" information to verify hash information on tar file
+package.extract(my_dataset, overwrite=False)  # This should raise an error, unless you first remove the built dataset; it will ask you to overwrite
+package.extract(my_dataset, overwrite=True)  # extracts the tar file into the data directory, overwriting the old one (if overwrite is false, this should raise an error)
 ```
 
 If you can successfully load the dataset after extracting it here, this part is good.
 
 Now, to upload to s3 (you will need `ARMORY_PRIVATE_S3_ID` and `ARMORY_PRIVATE_S3_KEY`):
-```
+```python
 from armory.datasets import upload
-upload.upload("my_dataset")  # this will fail, as you need to explicitly force it to be public
-upload.upload("my_dataset", public=True)
+upload.upload(my_dataset)  # this will fail, as you need to explicitly force it to be public
+upload.upload(my_dataset, public=True)
 ```
 
 Or, alternatively to packaging and uploading, you can use this convenience function:
-```
-package.add_to_cache("my_dataset", public=True)
+```python
+package.add_to_cache(my_dataset, public=True)
 ```
 
 To download, which will download it directly to the tar cache directory, do:
 ```
 from armory.datasets import download
-download.download("my_dataset", overwrite=True, verify=True)
+download.download(my_dataset, overwrite=True, verify=True)
 ```
 
 You can also download and extract with:
 ```
 from armory.datasets import load
-load.ensure_download_extract("my_dataset", verify=True)
+load.ensure_download_extract(my_dataset, verify=True)
 ```
 or just try to load it directly
 ```
-load.load("my_dataset")
+load.load(my_dataset)
 ```
 
 # Running / Testing with current armory scenario files
diff --git a/armory/datasets/cached_datasets.json b/armory/datasets/cached_datasets.json
index a3b6a2fd8..add4b96e4 100644
--- a/armory/datasets/cached_datasets.json
+++ b/armory/datasets/cached_datasets.json
@@ -13,6 +13,13 @@
         "url": null,
         "version": "1.0.8"
     },
+    "librispeech_dev_test": {
+        "sha256": "5c5c6cb53e458e2415bc4f242122155d51f32d7e78770176afe01acb584c4caa",
+        "size": 2332265306,
+        "subdir": "librispeech_dev_test/2.1.0",
+        "url": null,
+        "version": "2.1.0"
+    },
     "mnist": {
         "sha256": "fdc3408e29580367145e95ac7cb1d51e807105b174314cd52c16d27a13b98979",
         "size": 16920751,
diff --git a/armory/datasets/preprocessing.py b/armory/datasets/preprocessing.py
index 91e7c15b1..1ca0e4190 100644
--- a/armory/datasets/preprocessing.py
+++ b/armory/datasets/preprocessing.py
@@ -64,6 +64,24 @@ def xview(element):
     )
 
 
+@register
+def librispeech(element, audio_kwargs=None):
+    # TODO: determine how to fix np.array([<byte>], dtype=object) output for text
+    #    https://github.com/tensorflow/tensorflow/issues/34871
+    #    Our traditional behavior to decode to str once in numpy
+    #    This can be done via: y.astype("U")
+    #    Currently, this is handled by scenarios or metrics after dataset output
+    # NOTE: 16000 sampling rate
+    if audio_kwargs is None:
+        audio_kwargs = {}
+    text = element["text"]
+    speech = audio_to_canon(element["speech"], **audio_kwargs)
+    return (speech, text)
+
+
+librispeech_dev_test = register(librispeech, "librispeech_dev_test")
+
+
 def image_to_canon(image, resize=None, target_dtype=tf.float32, input_type="uint8"):
     """
     TFDS Image feature uses (height, width, channels)
@@ -98,14 +116,6 @@ def audio_to_canon(audio, resample=None, target_dtype=tf.float32, input_type="in
     return audio
 
 
-# config = {
-#     "preprocessor": "mnist(max_frames=1)"
-#     "preprocessor_kwargs": {
-#         "max_frames": null,
-#     }
-# }
-
-
 def video_to_canon(
     video,
     resize=None,
diff --git a/armory/datasets/standard/librispeech_dev_test/__init__.py b/armory/datasets/standard/librispeech_dev_test/__init__.py
new file mode 100644
index 000000000..d84f1d722
--- /dev/null
+++ b/armory/datasets/standard/librispeech_dev_test/__init__.py
@@ -0,0 +1,3 @@
+"""librispeech_dev_test dataset."""
+
+from .librispeech_dev_test import LibrispeechDevTest
diff --git a/armory/datasets/standard/librispeech_dev_test/checksums.tsv b/armory/datasets/standard/librispeech_dev_test/checksums.tsv
new file mode 100644
index 000000000..edb48d2cf
--- /dev/null
+++ b/armory/datasets/standard/librispeech_dev_test/checksums.tsv
@@ -0,0 +1 @@
+# NOTE: This file is empty due to subclassing the existing tfds librispeech builder: https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/audio/librispeech.py
diff --git a/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py
new file mode 100644
index 000000000..5fcb31e72
--- /dev/null
+++ b/armory/datasets/standard/librispeech_dev_test/librispeech_dev_test.py
@@ -0,0 +1,41 @@
+"""
+Subset of librispeech containing just 'dev' and 'test' splits.
+
+checksums.tsv is empty as it uses the underlying librispeech class.
+
+NOTE: In order to build, this requires apache beam installed.
+    In the container, do: `pip install apache-beam`
+    This is not installed by default due to older dependencies
+
+NOTE: when building, armory does not provide beam options by default
+    This makes building VERY slow unless overrides are provided
+    It is recommended that this is built directly using tfds on the command line
+
+Using DirectRunner with apache beam, can build with this:
+    tfds build /workspace/armory/datasets/standard/librispeech_dev_test --data_dir /armory/datasets/new_builds --force_checksums_validation --beam_pipeline_options="runner=DirectRunner,direct_num_workers=16,direct_running_mode=multi_processing"
+    See: https://beam.apache.org/releases/pydoc/2.43.0/_modules/apache_beam/options/pipeline_options.html#DirectOptions
+"""
+
+import tensorflow_datasets as tfds
+from tensorflow_datasets.audio import librispeech
+
+_SUBSET = (
+    "dev_clean",
+    "dev_other",
+    "test_clean",
+    "test_other",
+)
+_DL_URLS = {k: v for k, v in librispeech._DL_URLS.items() if k in _SUBSET}
+
+
+class LibrispeechDevTest(librispeech.Librispeech):
+    """DatasetBuilder for subset of Librispeech"""
+
+    def _split_generators(self, dl_manager):
+        extracted_dirs = dl_manager.download_and_extract(_DL_URLS)
+        self._populate_metadata(extracted_dirs)
+        splits = [
+            tfds.core.SplitGenerator(name=k, gen_kwargs={"directory": v})
+            for k, v in extracted_dirs.items()
+        ]
+        return splits
diff --git a/armory/scenarios/audio_asr.py b/armory/scenarios/audio_asr.py
index 93c73aca8..22bb29b2c 100644
--- a/armory/scenarios/audio_asr.py
+++ b/armory/scenarios/audio_asr.py
@@ -110,5 +110,6 @@ def load_test_dataset(self, test_split_default="test_clean"):
     def _load_sample_exporter(self):
         return AudioExporter(
             self.export_dir,
-            self.test_dataset.context.sample_rate,
+            self.test_dataset.info.metadata["sample_rate"],  # TODO: smarter way?
+            # self.test_dataset.info['speech'].sample_rate,  # TODO: get in a smarter way
         )
diff --git a/armory/scenarios/audio_classification.py b/armory/scenarios/audio_classification.py
index ef0aa1e90..7bcdf7545 100644
--- a/armory/scenarios/audio_classification.py
+++ b/armory/scenarios/audio_classification.py
@@ -16,5 +16,6 @@ def load_test_dataset(self):
     def _load_sample_exporter(self):
         return AudioExporter(
             self.export_dir,
-            self.test_dataset.context.sample_rate,
+            self.test_dataset.info.metadata["sample_rate"],  # TODO: smarter way?
+            # self.test_dataset.info['speech'].sample_rate,
         )
diff --git a/docs/baseline_models.md b/docs/baseline_models.md
index 4fde37b87..ede6900ca 100644
--- a/docs/baseline_models.md
+++ b/docs/baseline_models.md
@@ -37,8 +37,6 @@ The model files can be found in [armory/baseline_models/pytorch](../armory/basel
 | Model   |                S3 weight_files                | 
 |:----------: |:---------------------------------------------:| 
 | Cifar10 CNN |                                               |  
-| DeepSpeech 2 |                                               |
-| Sincnet CNN |         `sincnet_librispeech_v1.pth`          |
 | MARS | `mars_ucf101_v1.pth` , `mars_kinetics_v1.pth` |
 | ResNet50 CNN |          `resnet50_imagenet_v1.pth`           |
 | MNIST CNN |        `undefended_mnist_5epochs.pth`         |
@@ -59,4 +57,4 @@ The weights for this model are downloaded from the link listed below.
 
 ### Preprocessing Functions
 Preprocessing functions have been moved inside each model's forward pass. This is to allow each
-model to receive as input the canonicalized form of a dataset.
\ No newline at end of file
+model to receive as input the canonicalized form of a dataset.
diff --git a/docs/datasets.md b/docs/datasets.md
index 681147a2a..6d60a1850 100644
--- a/docs/datasets.md
+++ b/docs/datasets.md
@@ -56,14 +56,10 @@ The carla_over_obj_det_train dataset has the same properties as the above mentio
 | Dataset    | Description | x_shape | x_dtype  | y_shape  | y_dtype | sampling_rate | splits |
 |:----------: |:-----------: |:-------: |:--------: |:--------: |:-------: |:-------: |:------: |
 | [digit](https://github.com/Jakobovski/free-spoken-digit-dataset) | Audio dataset of spoken digits | (N, variable_length) | int64 | (N,) | int64 | 8 kHz | train, test |
-| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition  | (N, variable_length)  | float32 | (N,)  | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100 |
-| [librispeech-full](http://www.openslr.org/12/) | Full Librispeech dataset for automatic speech recognition | (N, variable_length)  | float32 | (N,)  | bytes | 16 kHz | dev_clean, dev_other, test_clean, train_clean100, train_clean360, train_other500 |
-| [librispeech_dev_clean](http://www.openslr.org/12/) | Librispeech dev dataset for speaker identification  | (N, variable_length)  | float32 | (N,)  | int64 | 16 kHz | train, validation, test |
-| [librispeech_dev_clean_asr](http://www.openslr.org/12) | Librispeech dev dataset for automatic speech recognition | (N, variable_length) | float32 | (N,) | bytes | 16 kHz | train, validation, test |
+| [librispeech](http://www.openslr.org/12/) | Librispeech dataset for automatic speech recognition (NOTE: not currently cached. Use TFDS builder.) | (N, variable_length)  | float32 | (N,)  | bytes | 16 kHz | dev_clean, dev_other, test_clean, test_other, train_clean100, train_clean360, train_other500 |
+| [librispeech_dev_test](http://www.openslr.org/12/) | Librispeech with ontly dev and test splits | (N, variable_length)  | float32 | (N,)  | int64 | 16 kHz | dev_clean, dev_other, test_clean, test_other |
 | [speech_commands](https://www.tensorflow.org/datasets/catalog/speech_commands) | Speech commands dataset for audio poisoning | (N, variable_length) | float32 | (N,) | int64 | 16 kHz | train, validation, test |
 
-NOTE: because the Librispeech dataset is over 300 GB with all splits, the ```librispeech_full``` dataset has
-all splits, whereas the ```librispeech``` dataset does not have the train_clean360 or train_other500 splits.
 <br>
 
 ### Video Datasets
@@ -101,9 +97,6 @@ Tensorflow Datasets [library](https://www.tensorflow.org/datasets/catalog/overvi
 |       resisc_45       |    train   |         First 5/7 of dataset           | See armory/data/resisc45/resisc45_dataset_partition.py |
 |                       | validation |          Next 1/7 of dataset           |                                                        |
 |                       |    test    |         Final 1/7 of dataset           |                                                        |
-| librispeech_dev_clean |    train   | 1371 recordings from dev_clean dataset |   Assign discrete clips so at least 50% of audio time  |
-|                       | validation |  692 recordings from dev_clean dataset |       is in train, at least 25% is in validation,      |
-|                       |    test    |  640 recordings from dev_clean dataset |              and the remainder are in test             |
 
 
 <br>
diff --git a/scenario_configs/asr_librispeech_entailment.json b/scenario_configs/asr_librispeech_entailment.json
index 752937374..b3038f281 120000
--- a/scenario_configs/asr_librispeech_entailment.json
+++ b/scenario_configs/asr_librispeech_entailment.json
@@ -1 +1 @@
-eval5/asr_librispeech/entailment.json
\ No newline at end of file
+eval6/asr_librispeech/hubert_entailment.json
\ No newline at end of file
diff --git a/scenario_configs/asr_librispeech_targeted.json b/scenario_configs/asr_librispeech_targeted.json
index 04b2e2ac6..37d7e1bd6 120000
--- a/scenario_configs/asr_librispeech_targeted.json
+++ b/scenario_configs/asr_librispeech_targeted.json
@@ -1 +1 @@
-eval5/asr_librispeech/untargeted_snr_pgd.json
\ No newline at end of file
+eval6/asr_librispeech/hubert_targeted_snr_pgd.json
\ No newline at end of file
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json
deleted file mode 100755
index cdecb16ce..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_defended.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "decrease_factor_alpha": 0.5,
-            "decrease_factor_eps": 0.5,
-            "eps": 0.0075,
-            "global_max_length": 562480,
-            "increase_factor_alpha": 2.0,
-            "initial_rescale": 1.0,
-            "learning_rate_1": 0.0001,
-            "learning_rate_2": 1e-07,
-            "max_iter_1": 400,
-            "max_iter_2": 100,
-            "num_iter_decrease_alpha": 50
-        },
-        "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch",
-        "name": "ImperceptibleASRPyTorch",
-        "targeted": true,
-        "targeted_labels": {
-            "scheme": "matched length",
-            "transcripts": [
-                "REALLY SHORT TEST STRING",
-                "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS",
-                "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY"
-            ]
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3CompressionPyTorch",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json
deleted file mode 100755
index 71b02f0dc..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_imperceptible_undefended.json
+++ /dev/null
@@ -1,81 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "decrease_factor_alpha": 0.5,
-            "decrease_factor_eps": 0.5,
-            "eps": 0.0075,
-            "global_max_length": 562480,
-            "increase_factor_alpha": 2.0,
-            "initial_rescale": 1.0,
-            "learning_rate_1": 0.0001,
-            "learning_rate_2": 1e-07,
-            "max_iter_1": 400,
-            "max_iter_2": 100,
-            "num_iter_decrease_alpha": 50
-        },
-        "module": "art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch",
-        "name": "ImperceptibleASRPyTorch",
-        "targeted": true,
-        "targeted_labels": {
-            "scheme": "matched length",
-            "transcripts": [
-                "REALLY SHORT TEST STRING",
-                "THE TEST STRING HAS A LENGTH EQUAL TO THE MEDIAN OF THE CLEAN TEST TRANSCRIPT LENGTHS",
-                "THIS IS AN EXCEEDINGLY LONG TEST STRING BUT NOT REALLY AS THE LONGEST STRING HAS OVER FIVE HUNDRED CHARACTERS IN ITS TRANSCRIPT AND INCLUDES A LIST OF PEOPLE AND SPEAKS OF A SENATOR FROM NEW JERSEY"
-            ]
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json
deleted file mode 100755
index c4d41fb71..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_defended.json
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "partial_attack": false,
-            "snr_db": 20,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.kenansville_dft",
-        "name": "KenansvilleDFT",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 8,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json
deleted file mode 100755
index 1a8e25bed..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_kenansville_undefended.json
+++ /dev/null
@@ -1,64 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "partial_attack": false,
-            "snr_db": 20,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.kenansville_dft",
-        "name": "KenansvilleDFT",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 8,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json
deleted file mode 100755
index c54f8ef78..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_defended.json
+++ /dev/null
@@ -1,86 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 1.5,
-            "eps_step": 0.05,
-            "max_iter": 100,
-            "norm": 2,
-            "num_random_init": 0,
-            "random_eps": false,
-            "targeted": false,
-            "verbose": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "ProjectedGradientDescent",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json
deleted file mode 100755
index ac814e83a..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_multipath_channel_undefended.json
+++ /dev/null
@@ -1,80 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "audio_channel": {
-            "attenuation": 0.5,
-            "delay": 300,
-            "pytorch": true
-        },
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 1.5,
-            "eps_step": 0.05,
-            "max_iter": 100,
-            "norm": 2,
-            "num_random_init": 0,
-            "random_eps": false,
-            "targeted": false,
-            "verbose": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "ProjectedGradientDescent",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json
deleted file mode 100755
index 94a7bef1c..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_pgd_undefended.json
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 1.5,
-            "eps_step": 0.05,
-            "max_iter": 100,
-            "norm": 2,
-            "num_random_init": 0,
-            "random_eps": false,
-            "targeted": false,
-            "verbose": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "ProjectedGradientDescent",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json
deleted file mode 100644
index 263adccac..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_targeted.json
+++ /dev/null
@@ -1,81 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 10,
-            "eps_step": 0.5,
-            "max_iter": 10,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "import_from": "armory.attacks.librispeech_target_labels",
-                "transcripts": "matched_length"
-            },
-            "module": "armory.utils.labels",
-            "name": "MatchedTranscriptLengthTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json b/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json
deleted file mode 100755
index 9ed517ef0..000000000
--- a/scenario_configs/eval1-4/asr_librispeech/librispeech_asr_snr_undefended.json
+++ /dev/null
@@ -1,80 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 10,
-            "eps_step": 0.5,
-            "max_iter": 10,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "value": "TEST STRING"
-            },
-            "module": "armory.utils.labels",
-            "name": "FixedStringTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json
deleted file mode 100644
index 71d688d97..000000000
--- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-    "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation",
-    "adhoc": null,
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 0.2,
-            "eps_step": 0.1,
-            "minimal": false,
-            "num_random_init": 0,
-            "targeted": false
-        },
-        "module": "art.attacks.evasion",
-        "name": "FastGradientMethod",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech_dev_clean"
-    },
-    "defense": null,
-    "metric": {
-        "means": true,
-        "perturbation": "linf",
-        "record_metric_per_sample": false,
-        "task": [
-            "categorical_accuracy"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "fit_batch_size": 16,
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {
-            "predict_mode": "all"
-        },
-        "module": "armory.baseline_models.pytorch.sincnet",
-        "name": "get_art_model",
-        "weights_file": "sincnet_librispeech_v1.pth",
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1.0,
-                1.0
-            ]
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_classification",
-        "name": "AudioClassificationTask"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch",
-        "external_github_repo": "hkakitani/SincNet",
-        "gpus": "all",
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json
deleted file mode 100644
index 8ea65668d..000000000
--- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json
+++ /dev/null
@@ -1,69 +0,0 @@
-{
-    "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation",
-    "adhoc": null,
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 10,
-            "eps_step": 0.5,
-            "max_iter": 10,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD",
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech_dev_clean"
-    },
-    "defense": null,
-    "metric": {
-        "means": true,
-        "perturbation": [
-            "snr",
-            "snr_db"
-        ],
-        "record_metric_per_sample": true,
-        "task": [
-            "categorical_accuracy"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "fit_batch_size": 16,
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {
-            "predict_mode": "all"
-        },
-        "module": "armory.baseline_models.pytorch.sincnet",
-        "name": "get_art_model",
-        "weights_file": "sincnet_librispeech_v1.pth",
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1.0,
-                1.0
-            ]
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_classification",
-        "name": "AudioClassificationTask"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch",
-        "external_github_repo": "hkakitani/SincNet",
-        "gpus": "all",
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json b/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json
deleted file mode 100644
index 526353755..000000000
--- a/scenario_configs/eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_targeted.json
+++ /dev/null
@@ -1,72 +0,0 @@
-{
-    "_description": "Librispeech_dev_clean raw audio classification, contributed by MITRE Corporation",
-    "adhoc": null,
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 0.2,
-            "eps_step": 0.1,
-            "minimal": false,
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "art.attacks.evasion",
-        "name": "FastGradientMethod",
-        "targeted_labels": {
-            "kwargs": {
-                "num_classes": 40
-            },
-            "module": "armory.utils.labels",
-            "name": "RoundRobinTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech_dev_clean"
-    },
-    "defense": null,
-    "metric": {
-        "means": true,
-        "perturbation": "linf",
-        "record_metric_per_sample": false,
-        "task": [
-            "categorical_accuracy"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "fit_batch_size": 16,
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {
-            "predict_mode": "all"
-        },
-        "module": "armory.baseline_models.pytorch.sincnet",
-        "name": "get_art_model",
-        "weights_file": "sincnet_librispeech_v1.pth",
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1.0,
-                1.0
-            ]
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_classification",
-        "name": "AudioClassificationTask"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch",
-        "external_github_repo": "hkakitani/SincNet",
-        "gpus": "all",
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/defended_entailment.json b/scenario_configs/eval5/asr_librispeech/defended_entailment.json
deleted file mode 100644
index 5727d7654..000000000
--- a/scenario_configs/eval5/asr_librispeech/defended_entailment.json
+++ /dev/null
@@ -1,97 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 20,
-            "eps_step": 0.05,
-            "max_iter": 500,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "dtype": "str",
-                "import_from": "armory.attacks.librispeech_target_labels",
-                "values": "entailment_100"
-            },
-            "module": "armory.utils.labels",
-            "name": "ManualTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "snr_db",
-        "record_metric_per_sample": true,
-        "task": [
-            "entailment",
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": [
-            "SeanNaren/deepspeech.pytorch@V3.0"
-        ],
-        "gpus": "all",
-        "local_repo_path": null,
-        "num_eval_batches": 100,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json
deleted file mode 100644
index c9ff3fdb2..000000000
--- a/scenario_configs/eval5/asr_librispeech/defended_targeted_snr_pgd.json
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 20,
-            "eps_step": 0.5,
-            "max_iter": 500,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": true
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": true,
-        "targeted_labels": {
-            "kwargs": {
-                "import_from": "armory.attacks.librispeech_target_labels",
-                "transcripts": "matched_length"
-            },
-            "module": "armory.utils.labels",
-            "name": "MatchedTranscriptLengthTargeter"
-        },
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": {
-        "kwargs": {
-            "apply_fit": false,
-            "apply_predict": true,
-            "channels_first": false,
-            "sample_rate": 16000,
-            "verbose": false
-        },
-        "module": "art.defences.preprocessor",
-        "name": "Mp3Compression",
-        "type": "Preprocessor"
-    },
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json b/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json
deleted file mode 100644
index 58a8c1af8..000000000
--- a/scenario_configs/eval5/asr_librispeech/untargeted_snr_pgd.json
+++ /dev/null
@@ -1,73 +0,0 @@
-{
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
-    "adhoc": {
-        "skip_adversarial": false
-    },
-    "attack": {
-        "knowledge": "white",
-        "kwargs": {
-            "batch_size": 1,
-            "eps": 20,
-            "eps_step": 0.5,
-            "max_iter": 500,
-            "norm": "snr",
-            "num_random_init": 0,
-            "targeted": false
-        },
-        "module": "armory.art_experimental.attacks.snr_pgd",
-        "name": "SNR_PGD_Numpy",
-        "targeted": false,
-        "use_label": false
-    },
-    "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
-    },
-    "defense": null,
-    "metric": {
-        "means": false,
-        "perturbation": "linf",
-        "record_metric_per_sample": true,
-        "task": [
-            "word_error_rate"
-        ]
-    },
-    "model": {
-        "fit": false,
-        "fit_kwargs": {
-            "nb_epochs": 20000
-        },
-        "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
-        "name": "get_art_model",
-        "predict_kwargs": {
-            "transcription_output": true
-        },
-        "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
-    },
-    "scenario": {
-        "kwargs": {},
-        "module": "armory.scenarios.audio_asr",
-        "name": "AutomaticSpeechRecognition"
-    },
-    "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
-        "gpus": "all",
-        "local_repo_path": null,
-        "output_dir": null,
-        "output_filename": null,
-        "use_gpu": false
-    }
-}
diff --git a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json
similarity index 70%
rename from scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json
rename to scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json
index 4c128b261..006318915 100644
--- a/scenario_configs/eval5/asr_librispeech/defended_untargeted_snr_pgd.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_defended_untargeted.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -20,12 +20,11 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": {
         "kwargs": {
@@ -41,7 +40,7 @@
     },
     "metric": {
         "means": false,
-        "perturbation": "linf",
+        "perturbation": "snr_db",
         "record_metric_per_sample": true,
         "task": [
             "word_error_rate"
@@ -53,19 +52,13 @@
             "nb_epochs": 20000
         },
         "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
+        "module": "armory.baseline_models.pytorch.hubert_asr_large",
         "name": "get_art_model",
         "predict_kwargs": {
             "transcription_output": true
         },
         "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
+        "wrapper_kwargs": {}
     },
     "scenario": {
         "kwargs": {},
@@ -73,8 +66,8 @@
         "name": "AutomaticSpeechRecognition"
     },
     "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
+        "docker_image": "twosixarmory/pytorch",
+        "external_github_repo": null,
         "gpus": "all",
         "local_repo_path": null,
         "output_dir": null,
diff --git a/scenario_configs/eval5/asr_librispeech/entailment.json b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json
similarity index 70%
rename from scenario_configs/eval5/asr_librispeech/entailment.json
rename to scenario_configs/eval6/asr_librispeech/hubert_entailment.json
index 21f5ff3e1..1a3388db5 100644
--- a/scenario_configs/eval5/asr_librispeech/entailment.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_entailment.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -29,12 +29,11 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": null,
     "metric": {
@@ -52,19 +51,13 @@
             "nb_epochs": 20000
         },
         "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
+        "module": "armory.baseline_models.pytorch.hubert_asr_large",
         "name": "get_art_model",
         "predict_kwargs": {
             "transcription_output": true
         },
         "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
+        "wrapper_kwargs": {}
     },
     "scenario": {
         "kwargs": {},
@@ -72,13 +65,10 @@
         "name": "AutomaticSpeechRecognition"
     },
     "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": [
-            "SeanNaren/deepspeech.pytorch@V3.0"
-        ],
+        "docker_image": "twosixarmory/pytorch",
+        "external_github_repo": null,
         "gpus": "all",
         "local_repo_path": null,
-        "num_eval_batches": 100,
         "output_dir": null,
         "output_filename": null,
         "use_gpu": false
diff --git a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json
similarity index 69%
rename from scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json
rename to scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json
index f650a46eb..5f469cca6 100644
--- a/scenario_configs/eval5/asr_librispeech/targeted_snr_pgd.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_targeted_snr_pgd.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -28,17 +28,16 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": null,
     "metric": {
         "means": false,
-        "perturbation": "linf",
+        "perturbation": "snr_db",
         "record_metric_per_sample": true,
         "task": [
             "word_error_rate"
@@ -50,19 +49,13 @@
             "nb_epochs": 20000
         },
         "model_kwargs": {},
-        "module": "armory.baseline_models.pytorch.deep_speech",
+        "module": "armory.baseline_models.pytorch.hubert_asr_large",
         "name": "get_art_model",
         "predict_kwargs": {
             "transcription_output": true
         },
         "weights_file": null,
-        "wrapper_kwargs": {
-            "clip_values": [
-                -1,
-                1
-            ],
-            "pretrained_model": "librispeech"
-        }
+        "wrapper_kwargs": {}
     },
     "scenario": {
         "kwargs": {},
@@ -70,8 +63,8 @@
         "name": "AutomaticSpeechRecognition"
     },
     "sysconfig": {
-        "docker_image": "twosixarmory/pytorch-deepspeech",
-        "external_github_repo": "SeanNaren/deepspeech.pytorch@V3.0",
+        "docker_image": "twosixarmory/pytorch",
+        "external_github_repo": null,
         "gpus": "all",
         "local_repo_path": null,
         "output_dir": null,
diff --git a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json
index 25b1b5bc0..3ce122237 100644
--- a/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json
+++ b/scenario_configs/eval6/asr_librispeech/hubert_untargeted_snr_pgd.json
@@ -1,5 +1,5 @@
 {
-    "_description": "Baseline DeepSpeech ASR on LibriSpeech, contributed by MITRE Corporation",
+    "_description": "Baseline HuBERT ASR on LibriSpeech",
     "adhoc": {
         "skip_adversarial": false
     },
@@ -20,17 +20,16 @@
         "use_label": false
     },
     "dataset": {
-        "batch_size": 1,
-        "eval_split": "test_clean",
-        "framework": "numpy",
-        "module": "armory.data.datasets",
-        "name": "librispeech",
-        "train_split": "train_clean100"
+        "test": {
+            "batch_size": 1,
+            "name": "librispeech_dev_test",
+            "split": "test_clean"
+        }
     },
     "defense": null,
     "metric": {
         "means": false,
-        "perturbation": "linf",
+        "perturbation": "snr_db",
         "record_metric_per_sample": true,
         "task": [
             "word_error_rate"
diff --git a/scenario_configs/speaker_id_librispeech.json b/scenario_configs/speaker_id_librispeech.json
deleted file mode 120000
index c9d0b713e..000000000
--- a/scenario_configs/speaker_id_librispeech.json
+++ /dev/null
@@ -1 +0,0 @@
-eval1-4/speaker_id_librispeech/librispeech_baseline_sincnet_snr_pgd.json
\ No newline at end of file