diff --git a/.dict-speechbrain.txt b/.dict-speechbrain.txt new file mode 100644 index 000000000..e69de29bb diff --git a/.flake8 b/.flake8 index 340733a2e..918967ffa 100644 --- a/.flake8 +++ b/.flake8 @@ -1,7 +1,7 @@ [flake8] -ignore = E203, E266, E501, W503 +ignore = E203, E266, E501, W503, DOC105, DOC106, DOC107, DOC203, DOC403, DOC404, DOC405, DOC501, DOC502 # line length is intentionally set to 80 here because black uses Bugbear # See https://github.com/psf/black/blob/master/README.md#line-length for more details max-line-length = 80 max-complexity = 18 -select = B,C,E,F,W,T4,B9 +select = B,C,E,F,W,T4,B9,DOC diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7eaa8a13a..af574f389 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,18 +14,26 @@ repos: args: [--maxkb=1024] - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 24.3.0 hooks: - id: black types: [python] - additional_dependencies: ['click==8.0.4'] + additional_dependencies: ['click==8.1.7'] - repo: https://github.com/PyCQA/flake8 - rev: 3.7.9 + rev: 7.0.0 hooks: - id: flake8 types: [python] - repo: https://github.com/adrienverge/yamllint - rev: v1.23.0 + rev: v1.35.1 hooks: - id: yamllint + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.4 + hooks: + - id: codespell + args: [--ignore-words=.dict-speechbrain.txt] + additional_dependencies: + - tomli diff --git a/benchmarks/CL_MASR/analyze_logs.py b/benchmarks/CL_MASR/analyze_logs.py index 78061c49a..9c4fe26db 100644 --- a/benchmarks/CL_MASR/analyze_logs.py +++ b/benchmarks/CL_MASR/analyze_logs.py @@ -64,13 +64,13 @@ def parse_train_log(train_log: "str") -> "Dict[str, ndarray]": Arguments --------- - train_log: + train_log: str The path to the train log. Returns ------- - The metrics, i.e. a dict that maps names of - the metrics to their corresponding values. + The metrics, i.e. a dict that maps names of + the metrics to their corresponding values. Examples -------- @@ -107,16 +107,16 @@ def compute_wer_matrix( Arguments --------- - wers: + wers: ndarray The word error rate for each locale. - num_base_locales: + num_base_locales: int The number of base locales. - num_new_locales: + num_new_locales: int The number of new locales. Returns ------- - The word error rate matrix. + The word error rate matrix. Raises ------ @@ -152,12 +152,12 @@ def compute_awer(wer_matrix: "ndarray") -> "ndarray": Arguments --------- - wer_matrix: + wer_matrix: ndarray The word error rate matrix. Returns ------- - The average word error rate. + The average word error rate. References ---------- @@ -185,12 +185,12 @@ def compute_bwt(wer_matrix: "ndarray") -> "ndarray": Arguments --------- - wer_matrix: + wer_matrix: ndarray The word error rate matrix. Returns ------- - The backward transfer. + The backward transfer. References ---------- @@ -220,14 +220,14 @@ def compute_im(wer_matrix: "ndarray", refs: "ndarray") -> "ndarray": Arguments --------- - wer_matrix: + wer_matrix: ndarray The word error rate matrix. - refs: + refs: ndarray The intransigence measure references (joint fine-tuning). Returns ------- - The intransigence measure. + The intransigence measure. References ---------- @@ -255,14 +255,14 @@ def compute_fwt(wer_matrix: "ndarray", refs: "ndarray") -> "ndarray": Arguments --------- - wer_matrix: + wer_matrix: ndarray The word error rate matrix. - refs: + refs: ndarray The forward transfer references (single task fine-tuning). Returns ------- - The forward transfer. + The forward transfer. Examples -------- @@ -289,31 +289,31 @@ def plot_wer( usetex: "bool" = False, hide_legend: "bool" = False, style_file_or_name: "str" = "classic", -) -> "None": +): """Plot word error rates extracted from a continual learning train log. Arguments --------- - wers: + wers: ndarray The word error rates (base + new locales). - output_image: + output_image: str The path to the output image. - base_locales: + base_locales: Sequence[str] The base locales. - new_locales: + new_locales: Sequence[str] The new locales. - xlabel: + xlabel: str The x-axis label. - figsize: + figsize: Tuple[float, float] The figure size. - title: + title: str The plot title. - usetex: + usetex: bool True to render text with LaTeX, False otherwise. - hide_legend: + hide_legend: bool True to hide the legend, False otherwise. - style_file_or_name: + style_file_or_name: str The path to a Matplotlib style file or the name of one of Matplotlib built-in styles (see https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html). @@ -435,7 +435,7 @@ def plot_wer( margin={"t": 60, "b": 60}, ) fig.write_html( - f"{output_image.rsplit('.', 1)[0]}.html", include_plotlyjs=True, + f"{output_image.rsplit('.', 1)[0]}.html", include_plotlyjs=True ) except ImportError: logging.warning( @@ -455,32 +455,32 @@ def plot_metric( usetex: "bool" = False, hide_legend: "bool" = False, style_file_or_name: "str" = "classic", -) -> "None": +): """Plot a continual learning metric. Arguments --------- - metric_csv_file: + metric_csv_file: str The path to the continual learning metric CSV file. - output_image: + output_image: str The path to the output image. - xlabel: + xlabel: str The x-axis label. - ylabel: + ylabel: str The y-axis label. - xticks: + xticks: List[str] The x-ticks. - figsize: + figsize: Tuple[float, float] The figure size. - title: + title: str The plot title. - opacity: + opacity: float The confidence interval opacity. - usetex: + usetex: bool True to render text with LaTeX, False otherwise. - hide_legend: + hide_legend: bool True to hide the legend, False otherwise. - style_file_or_name: + style_file_or_name: str The path to a Matplotlib style file or the name of one of Matplotlib built-in styles (see https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html). @@ -650,7 +650,7 @@ def hex_to_rgb(hex_color: "str") -> "Tuple": margin={"t": 60, "b": 60}, ) fig.write_html( - f"{output_image.rsplit('.', 1)[0]}.html", include_plotlyjs=True, + f"{output_image.rsplit('.', 1)[0]}.html", include_plotlyjs=True ) except ImportError: logging.warning( @@ -689,9 +689,7 @@ def hex_to_rgb(hex_color: "str") -> "Tuple": # fmt: on help="forward transfer references", ) - parser.add_argument( - "-f", "--format", default="png", help="image format", - ) + parser.add_argument("-f", "--format", default="png", help="image format") parser.add_argument( "-s", "--figsize", @@ -700,17 +698,15 @@ def hex_to_rgb(hex_color: "str") -> "Tuple": type=float, help="figure size", ) + parser.add_argument("-t", "--title", default=None, help="title") parser.add_argument( - "-t", "--title", default=None, help="title", - ) - parser.add_argument( - "-o", "--opacity", default=0.15, help="confidence interval opacity", + "-o", "--opacity", default=0.15, help="confidence interval opacity" ) parser.add_argument( - "--hide_legend", action="store_true", help="hide legend", + "--hide_legend", action="store_true", help="hide legend" ) parser.add_argument( - "-u", "--usetex", action="store_true", help="render text with LaTeX", + "-u", "--usetex", action="store_true", help="render text with LaTeX" ) parser.add_argument( "--order", @@ -831,7 +827,7 @@ def hex_to_rgb(hex_color: "str") -> "Tuple": avg_mean = np.mean(avg) # Assuming independence, sigma^2 = sum_1^n sigma_i^2 / n^2 avg_stddev = np.sqrt( - np.nansum(stddev ** 2) / (~np.isnan(stddev)).sum() ** 2 + np.nansum(stddev**2) / (~np.isnan(stddev)).sum() ** 2 ) csv_writer.writerow( [group_name] @@ -851,9 +847,9 @@ def hex_to_rgb(hex_color: "str") -> "Tuple": f"{name.lower().replace(' ', '_')}.{args.format}", ), xlabel=None, - ylabel=f"{name} (\%)" - if args.usetex - else f"{name} (%)", # noqa: W605 + ylabel=( + f"{name} (\\%)" if args.usetex else f"{name} (%)" + ), # noqa: W605 xticks=["base"] + [f"L{i}" for i in range(1, 1 + len(new_locales))], figsize=args.figsize, title=args.title, diff --git a/benchmarks/CL_MASR/common_voice_prepare.py b/benchmarks/CL_MASR/common_voice_prepare.py index e6f6fdb10..d58c8d8a5 100644 --- a/benchmarks/CL_MASR/common_voice_prepare.py +++ b/benchmarks/CL_MASR/common_voice_prepare.py @@ -59,17 +59,17 @@ def prepare_common_voice( locales: "Sequence[str]" = ("en",), data_folder: "str" = "data", max_durations: "Optional[Sequence[float]]" = None, -) -> "None": +): """Prepare data manifest CSV files for Common Voice dataset (see https://commonvoice.mozilla.org/en/datasets). Arguments --------- - locales: + locales: Sequence[str] The locales to use (e.g. "en", "it", etc.). - data_folder: + data_folder: str The path to the dataset folder. - max_durations: + max_durations: float The maximum total durations in seconds to sample from each locale for train, dev and test splits, respectively. Default to infinity. @@ -111,7 +111,7 @@ def prepare_common_voice( _LOGGER.info( "----------------------------------------------------------------------", ) - _LOGGER.info(f"Merging TSV files...") + _LOGGER.info("Merging TSV files...") for split, max_duration in zip(_SPLITS, max_durations): tsv_files = [ os.path.join(data_folder, locale, f"{split}_with_duration.tsv") @@ -126,7 +126,7 @@ def prepare_common_voice( _LOGGER.info( "----------------------------------------------------------------------", ) - _LOGGER.info(f"Creating data manifest CSV files...") + _LOGGER.info("Creating data manifest CSV files...") for split in _SPLITS: preprocess_tsv_file( os.path.join(data_folder, f"{split}_with_duration.tsv"), @@ -134,12 +134,12 @@ def prepare_common_voice( ) -def compute_clip_durations(locale_folder: "str") -> "None": +def compute_clip_durations(locale_folder: "str"): """Compute clip durations for a Common Voice dataset locale. Arguments --------- - locale_folder: + locale_folder: str The path to the dataset locale folder. Examples @@ -185,20 +185,20 @@ def merge_tsv_files( output_tsv_file: "str", max_duration: "Optional[float]" = None, shuffle: "bool" = False, -) -> "None": +): """Merge input TSV files into a single output TSV file. Arguments --------- - input_tsv_files: + input_tsv_files: str The paths to the input TSV files. - output_tsv_file: + output_tsv_file: str The path to the output TSV file. - max_duration: + max_duration: float The maximum total duration in seconds to sample from each TSV file. Default to infinity. - shuffle: + shuffle: bool True to shuffle the data, False otherwise. Used only if `max_duration` is less than infinity. @@ -271,23 +271,20 @@ def merge_tsv_files( # Adapted from: # https://github.com/speechbrain/speechbrain/blob/v0.5.13/recipes/CommonVoice/common_voice_prepare.py#L160 -def preprocess_tsv_file( - input_tsv_file: "str", output_csv_file: "str", -) -> "None": +def preprocess_tsv_file(input_tsv_file: "str", output_csv_file: "str"): """Apply minimal Common Voice preprocessing (e.g. rename columns, remove unused columns, remove commas, special characters and empty sentences etc.) to each row of an input TSV file. Arguments --------- - input_tsv_file: + input_tsv_file: str The path to the input TSV file. - output_csv_file: + output_csv_file: str The path to the output CSV file. Examples -------- >>> preprocess_tsv_file("data/test_with_duration.tsv", "data/test.csv") - """ # Header: client_id path sentence up_votes down_votes age gender accents locale segment duration _LOGGER.info(f"Reading input TSV file ({input_tsv_file})...") @@ -383,6 +380,4 @@ def preprocess_tsv_file( ) args = parser.parse_args() - prepare_common_voice( - args.locales, args.data_folder, args.max_durations, - ) + prepare_common_voice(args.locales, args.data_folder, args.max_durations) diff --git a/benchmarks/CL_MASR/wavlm/model.py b/benchmarks/CL_MASR/wavlm/model.py index f00217bbf..00829bcb2 100644 --- a/benchmarks/CL_MASR/wavlm/model.py +++ b/benchmarks/CL_MASR/wavlm/model.py @@ -86,7 +86,7 @@ def __init__( dest_unpack=_TOKENIZER_PATH, ) self.tokenizer = SentencePiece( - model_dir=_TOKENIZER_PATH, vocab_size=4887, model_type="char", + model_dir=_TOKENIZER_PATH, vocab_size=4887, model_type="char" ).sp vocab_size = self.tokenizer.vocab_size() encoder_kwargs = { @@ -103,7 +103,7 @@ def __init__( "bidirectional": bidirectional, } self.model = Model( - source, save_path, vocab_size, encoder_kwargs, decoder_kwargs, + source, save_path, vocab_size, encoder_kwargs, decoder_kwargs ) if freeze: self.model.requires_grad_(False) @@ -243,7 +243,7 @@ def __init__( ] ) self.out_proj = nn.Linear( - (2 if bidirectional else 1) * hidden_size, output_size, + (2 if bidirectional else 1) * hidden_size, output_size ) def forward(self, input, lengths=None): diff --git a/benchmarks/CL_MASR/wavlm/pretrain.py b/benchmarks/CL_MASR/wavlm/pretrain.py index b65be5573..2f6af8ca2 100644 --- a/benchmarks/CL_MASR/wavlm/pretrain.py +++ b/benchmarks/CL_MASR/wavlm/pretrain.py @@ -35,7 +35,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -117,7 +117,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -166,7 +167,7 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, hparams["sample_rate"] )(sig) return resampled @@ -192,7 +193,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -227,10 +229,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -243,7 +245,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -340,9 +342,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["locales"], f"wer_test.txt", - ) + test(hparams, run_opts, hparams["locales"], "wer_test.txt") def profile(hparams, run_opts): @@ -364,7 +364,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -374,13 +374,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_agem.py b/benchmarks/CL_MASR/wavlm/train_agem.py index 9d94875c7..ff677dbd5 100644 --- a/benchmarks/CL_MASR/wavlm/train_agem.py +++ b/benchmarks/CL_MASR/wavlm/train_agem.py @@ -39,7 +39,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -107,7 +107,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -243,7 +244,8 @@ def fit_batch(self, batch): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -292,7 +294,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -318,7 +321,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -353,10 +357,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -369,7 +373,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -430,9 +434,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -512,7 +514,7 @@ def train(hparams, run_opts): hparams["valid_dataloader_kwargs"].pop("ckpt_prefix", None) hparams["epoch_counter"].current = 0 replay_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) replay_data = replay_brain.make_dataloader( replay_data, @@ -557,7 +559,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -567,13 +569,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_der.py b/benchmarks/CL_MASR/wavlm/train_der.py index 987e94b8a..07ca9850f 100644 --- a/benchmarks/CL_MASR/wavlm/train_der.py +++ b/benchmarks/CL_MASR/wavlm/train_der.py @@ -38,7 +38,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -65,7 +65,8 @@ def compute_objectives(self, predictions, batch, stage): # Compute distillation loss if stage == sb.Stage.TRAIN: selected_samples = random.sample( - self.hparams.replay_buffer, len(ids), + self.hparams.replay_buffer, + len(ids), ) tmp = [] @@ -104,7 +105,7 @@ def compute_objectives(self, predictions, batch, stage): if self.hparams.gradient_checkpointing: replay_wavs.requires_grad_() replay_logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, replay_wavs, replay_wav_lens, + self.modules.wavlm, replay_wavs, replay_wav_lens ) else: replay_logits = self.modules.wavlm(replay_wavs, replay_wav_lens) @@ -156,7 +157,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -174,7 +176,8 @@ def _fit_train(self, train_set, epoch, enable): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -223,7 +226,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -249,7 +253,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -285,10 +290,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -301,7 +306,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -362,9 +367,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") replay_buffer = [] @@ -488,7 +491,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -498,13 +501,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_er.py b/benchmarks/CL_MASR/wavlm/train_er.py index 6fe01ae43..f626fa2d3 100644 --- a/benchmarks/CL_MASR/wavlm/train_er.py +++ b/benchmarks/CL_MASR/wavlm/train_er.py @@ -38,7 +38,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -104,7 +104,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -117,7 +118,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -166,7 +168,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -192,7 +195,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -228,10 +232,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -244,7 +248,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -305,9 +309,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -420,7 +422,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -430,13 +432,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_ewc.py b/benchmarks/CL_MASR/wavlm/train_ewc.py index d02ad7c68..f47649b24 100644 --- a/benchmarks/CL_MASR/wavlm/train_ewc.py +++ b/benchmarks/CL_MASR/wavlm/train_ewc.py @@ -37,7 +37,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -119,7 +119,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -178,6 +179,10 @@ def compute_ewc_params(hparams, run_opts, locales): locales : list[str] The locales to consider. + Returns + ------- + params + fisher """ tokenizer = hparams["wavlm"].tokenizer batch_size = hparams["train_dataloader_kwargs"].get("batch_size", 1) @@ -204,7 +209,7 @@ def compute_ewc_params(hparams, run_opts, locales): # Trainer initialization asr_brain = EWCParamsComputer( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -229,7 +234,8 @@ def compute_ewc_params(hparams, run_opts, locales): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -278,7 +284,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -304,7 +311,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -339,10 +347,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -355,7 +363,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -416,9 +424,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -522,7 +528,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -532,13 +538,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_ft.py b/benchmarks/CL_MASR/wavlm/train_ft.py index 3f8f7aaf4..a61d25970 100644 --- a/benchmarks/CL_MASR/wavlm/train_ft.py +++ b/benchmarks/CL_MASR/wavlm/train_ft.py @@ -38,7 +38,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -104,7 +104,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -117,7 +118,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -166,7 +168,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -192,7 +195,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -227,10 +231,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -243,7 +247,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -304,9 +308,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -390,7 +392,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -400,13 +402,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_joint.py b/benchmarks/CL_MASR/wavlm/train_joint.py index 52d4b9c94..76e36b5a0 100644 --- a/benchmarks/CL_MASR/wavlm/train_joint.py +++ b/benchmarks/CL_MASR/wavlm/train_joint.py @@ -35,7 +35,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -117,7 +117,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -166,7 +167,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -192,7 +194,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -227,10 +230,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -243,7 +246,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -304,9 +307,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales # Multi-gpu (ddp) save data preparation @@ -358,7 +359,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"], - f"wer_test_after.txt", + "wer_test_after.txt", ) @@ -381,7 +382,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -391,13 +392,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_l2p.py b/benchmarks/CL_MASR/wavlm/train_l2p.py index 49114dcb2..505f29477 100644 --- a/benchmarks/CL_MASR/wavlm/train_l2p.py +++ b/benchmarks/CL_MASR/wavlm/train_l2p.py @@ -39,7 +39,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out = torch.utils.checkpoint.checkpoint( - self.modules.wavlm.model.encoder, wavs, wav_lens, + self.modules.wavlm.model.encoder, wavs, wav_lens ) enc_out = torch.utils.checkpoint.checkpoint( self.modules.prompt_pool, @@ -47,7 +47,7 @@ def compute_forward(self, batch, stage): self.hparams.forced_decoder_locale, ) logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm.model.decoder, enc_out, wav_lens, + self.modules.wavlm.model.decoder, enc_out, wav_lens ) else: enc_out = self.modules.wavlm.model.encoder(wavs, wav_lens) @@ -117,7 +117,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -183,7 +184,8 @@ def forward(self, input, locale=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -232,7 +234,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -258,7 +261,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -293,10 +297,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -312,7 +316,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -373,9 +377,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -483,7 +485,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -493,13 +495,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_lwf.py b/benchmarks/CL_MASR/wavlm/train_lwf.py index fd29e613d..40e72b72d 100644 --- a/benchmarks/CL_MASR/wavlm/train_lwf.py +++ b/benchmarks/CL_MASR/wavlm/train_lwf.py @@ -39,7 +39,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -72,7 +72,8 @@ def compute_objectives(self, predictions, batch, stage): if stage == sb.Stage.TRAIN: # Probabilities modified by distillation temperature modified_probs = F.softmax( - logits.flatten(end_dim=-2) / self.hparams.lwf_T, dim=1, + logits.flatten(end_dim=-2) / self.hparams.lwf_T, + dim=1, ) # Target probabilities modified by distillation temperature @@ -135,7 +136,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -148,7 +150,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -197,7 +200,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -223,7 +227,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -258,10 +263,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -274,7 +279,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -334,9 +339,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -427,7 +430,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -437,13 +440,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_mas.py b/benchmarks/CL_MASR/wavlm/train_mas.py index a5f97465e..421273b60 100644 --- a/benchmarks/CL_MASR/wavlm/train_mas.py +++ b/benchmarks/CL_MASR/wavlm/train_mas.py @@ -37,7 +37,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -113,7 +113,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -182,6 +183,10 @@ def compute_mas_params(hparams, run_opts, locales): locales : list[str] The locales to consider. + Returns + ------- + params + importance """ tokenizer = hparams["wavlm"].tokenizer batch_size = hparams["train_dataloader_kwargs"].get("batch_size", 1) @@ -208,7 +213,7 @@ def compute_mas_params(hparams, run_opts, locales): # Trainer initialization asr_brain = MASParamsComputer( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -233,7 +238,8 @@ def compute_mas_params(hparams, run_opts, locales): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -282,7 +288,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -308,7 +315,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -343,10 +351,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -359,7 +367,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -420,9 +428,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -528,7 +534,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -538,13 +544,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_pb.py b/benchmarks/CL_MASR/wavlm/train_pb.py index 052d70b73..2af3e895f 100644 --- a/benchmarks/CL_MASR/wavlm/train_pb.py +++ b/benchmarks/CL_MASR/wavlm/train_pb.py @@ -77,7 +77,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -143,7 +143,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -174,7 +175,8 @@ def init_optimizers(self): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -223,7 +225,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -249,7 +252,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -284,10 +288,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -319,7 +323,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -397,9 +401,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -517,7 +519,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -527,13 +529,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/wavlm/train_pnn.py b/benchmarks/CL_MASR/wavlm/train_pnn.py index 3641fa54b..d1d16a189 100644 --- a/benchmarks/CL_MASR/wavlm/train_pnn.py +++ b/benchmarks/CL_MASR/wavlm/train_pnn.py @@ -41,7 +41,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() logits = torch.utils.checkpoint.checkpoint( - self.modules.wavlm, wavs, wav_lens, + self.modules.wavlm, wavs, wav_lens ) else: logits = self.modules.wavlm(wavs, wav_lens) @@ -107,7 +107,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -120,7 +121,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -169,7 +171,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -195,7 +198,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens", "target_wrd"], + datasets, + ["id", "sig", "tokens", "target_wrd"], ) return train_data, valid_data, test_data @@ -230,10 +234,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -246,7 +250,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -308,9 +312,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -435,7 +437,7 @@ def __init__(self): super().__init__() self.wavlm = hparams["wavlm"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) @torch.no_grad() @@ -445,13 +447,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/model.py b/benchmarks/CL_MASR/whisper/model.py index 8c7dc26e6..a2df01cdb 100644 --- a/benchmarks/CL_MASR/whisper/model.py +++ b/benchmarks/CL_MASR/whisper/model.py @@ -28,6 +28,12 @@ class ProgressiveWhisperTokenizer(WhisperTokenizer): See the documentation of `transformers.models.whisper.tokenization_whisper.WhisperTokenizer`. + Arguments + --------- + *args : tuple + **kwargs : dict + Arguments forwarded to ``WhisperTokenizer`` + Examples -------- >>> model_hub = "openai/whisper-tiny" @@ -95,6 +101,13 @@ class ProgressiveWhisper(Whisper): See the documentation of `speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper`. + Arguments + --------- + source : str + save_path : str + **kwargs : dict + Arguments forwarded to ``Whisper`` + Examples -------- >>> model_hub = "openai/whisper-tiny" @@ -103,16 +116,11 @@ class ProgressiveWhisper(Whisper): >>> inputs = torch.randn([2, 93680]) >>> tokens = torch.tensor([[1, 1]]) * model.model.config.decoder_start_token_id >>> outputs = model(inputs, tokens) - """ # override - def __init__( - self, source, save_path, **kwargs, - ): - super().__init__( - source, save_path, **kwargs, - ) + def __init__(self, source, save_path, **kwargs): + super().__init__(source, save_path, **kwargs) if self.tokenizer is not None: self.tokenizer = ProgressiveWhisperTokenizer.from_pretrained( source, @@ -277,7 +285,7 @@ def generate( if forced_decoder_locale is None: # Compute most likely language token IDs all_lang_tokens = [ - f"<|{l}|>" for l in self.tokenizer.supported_languages + f"<|{lang}|>" for lang in self.tokenizer.supported_languages ] all_lang_tokens_ids = self.tokenizer.convert_tokens_to_ids( all_lang_tokens @@ -333,7 +341,7 @@ def generate( hyps, scores = hyps[:, 0, :], scores[:, 0] else: hyps, scores = self._greedy_search( - audio_features, hyps, suppress_mask, max_gen_tokens, + audio_features, hyps, suppress_mask, max_gen_tokens ) if return_all: hyps, scores = hyps[:, None, :], scores[:, None] @@ -382,9 +390,7 @@ def _greedy_search( # B* alive_mask_unchanged = gen_token_ids != endoftext_id if not alive_mask_unchanged.all(): - alive_mask[ - alive_mask == True - ] = alive_mask_unchanged # noqa: E712 + alive_mask[alive_mask] = alive_mask_unchanged # noqa: E712 if not alive_mask.any(): break # B* x S x F @@ -566,9 +572,7 @@ def _beam_search( # B* alive_mask_unchanged = end_idxes < beam_size if not alive_mask_unchanged.all(): - alive_mask[ - alive_mask == True - ] = alive_mask_unchanged # noqa: E712 + alive_mask[alive_mask] = alive_mask_unchanged # noqa: E712 if not alive_mask.any(): break # N x B* x S x F diff --git a/benchmarks/CL_MASR/whisper/train_agem.py b/benchmarks/CL_MASR/whisper/train_agem.py index 70454a829..48dbb67aa 100644 --- a/benchmarks/CL_MASR/whisper/train_agem.py +++ b/benchmarks/CL_MASR/whisper/train_agem.py @@ -40,7 +40,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -118,7 +118,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -248,7 +249,8 @@ def fit_batch(self, batch): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -297,7 +299,7 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, hparams["sample_rate"] )(sig) return resampled @@ -341,7 +343,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -376,10 +379,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -395,7 +398,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -451,9 +454,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -557,7 +558,7 @@ def train(hparams, run_opts): hparams["valid_dataloader_kwargs"].pop("ckpt_prefix", None) hparams["epoch_counter"].current = 0 replay_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) replay_data = replay_brain.make_dataloader( replay_data, @@ -579,7 +580,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) @@ -602,7 +603,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -618,13 +619,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_der.py b/benchmarks/CL_MASR/whisper/train_der.py index 86ab58048..9dc94239a 100644 --- a/benchmarks/CL_MASR/whisper/train_der.py +++ b/benchmarks/CL_MASR/whisper/train_der.py @@ -39,7 +39,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -67,7 +67,7 @@ def compute_objectives(self, predictions, batch, stage): # Compute distillation loss if stage == sb.Stage.TRAIN: selected_samples = random.sample( - self.hparams.replay_buffer, len(ids), + self.hparams.replay_buffer, len(ids) ) tmp = [] @@ -118,7 +118,7 @@ def compute_objectives(self, predictions, batch, stage): if self.hparams.gradient_checkpointing: replay_wavs.requires_grad_() _, replay_logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, replay_wavs, replay_bos_tokens, + self.modules.whisper, replay_wavs, replay_bos_tokens ) else: _, replay_logits, _ = self.modules.whisper( @@ -187,7 +187,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -205,7 +206,8 @@ def _fit_train(self, train_set, epoch, enable): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -254,7 +256,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -298,7 +301,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -333,10 +337,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -352,7 +356,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -408,9 +412,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") replay_buffer = [] @@ -540,7 +542,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) @@ -563,7 +565,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -579,13 +581,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_er.py b/benchmarks/CL_MASR/whisper/train_er.py index 2783c1f85..5a2b2d250 100644 --- a/benchmarks/CL_MASR/whisper/train_er.py +++ b/benchmarks/CL_MASR/whisper/train_er.py @@ -39,7 +39,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -115,7 +115,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -128,7 +129,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -177,7 +179,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -221,7 +224,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -256,10 +260,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -275,7 +279,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -331,9 +335,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -447,7 +449,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) @@ -470,7 +472,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -486,13 +488,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_ewc.py b/benchmarks/CL_MASR/whisper/train_ewc.py index 44b1607c1..d6a3693e7 100644 --- a/benchmarks/CL_MASR/whisper/train_ewc.py +++ b/benchmarks/CL_MASR/whisper/train_ewc.py @@ -39,7 +39,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -139,7 +139,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -198,6 +199,10 @@ def compute_ewc_params(hparams, run_opts, locales): locales : list[str] The locales to consider. + Returns + ------- + params + fisher """ tokenizer = hparams["whisper"].tokenizer batch_size = hparams["train_dataloader_kwargs"].get("batch_size", 1) @@ -224,7 +229,7 @@ def compute_ewc_params(hparams, run_opts, locales): # Trainer initialization asr_brain = EWCParamsComputer( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -250,7 +255,8 @@ def compute_ewc_params(hparams, run_opts, locales): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -299,7 +305,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -343,7 +350,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -378,10 +386,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -397,7 +405,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -453,9 +461,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -565,7 +571,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) @@ -588,7 +594,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -604,13 +610,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_ft.py b/benchmarks/CL_MASR/whisper/train_ft.py index cf404404d..4c599c9e3 100644 --- a/benchmarks/CL_MASR/whisper/train_ft.py +++ b/benchmarks/CL_MASR/whisper/train_ft.py @@ -38,7 +38,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -114,7 +114,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -127,7 +128,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -176,7 +178,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -220,7 +223,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -255,10 +259,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -274,7 +278,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -330,9 +334,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -417,7 +419,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) @@ -440,7 +442,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -456,13 +458,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_joint.py b/benchmarks/CL_MASR/whisper/train_joint.py index ea0cb2743..01cf63234 100644 --- a/benchmarks/CL_MASR/whisper/train_joint.py +++ b/benchmarks/CL_MASR/whisper/train_joint.py @@ -36,7 +36,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -128,7 +128,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -177,7 +178,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -221,7 +223,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -256,10 +259,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -275,7 +278,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -331,9 +334,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales # Multi-gpu (ddp) save data preparation @@ -413,7 +414,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"], - f"wer_test_after.txt", + "wer_test_after.txt", ) @@ -436,7 +437,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -452,13 +453,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_l2p.py b/benchmarks/CL_MASR/whisper/train_l2p.py index d2ce451d0..5fd20935c 100644 --- a/benchmarks/CL_MASR/whisper/train_l2p.py +++ b/benchmarks/CL_MASR/whisper/train_l2p.py @@ -40,7 +40,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out = torch.utils.checkpoint.checkpoint( - self.modules.whisper.forward_encoder, wavs, + self.modules.whisper.forward_encoder, wavs ) enc_out = torch.utils.checkpoint.checkpoint( self.modules.prompt_pool, @@ -133,7 +133,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -199,7 +200,8 @@ def forward(self, input, locale=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -248,7 +250,7 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, hparams["sample_rate"] )(sig) return resampled @@ -292,7 +294,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -327,10 +330,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -346,7 +349,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -402,9 +405,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -473,7 +474,7 @@ def train(hparams, run_opts): run_opts, [locale], # hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) # Copy previous lines (no forgetting by design) @@ -513,7 +514,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -529,13 +530,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_lwf.py b/benchmarks/CL_MASR/whisper/train_lwf.py index d69d4ab3a..b7c088e42 100644 --- a/benchmarks/CL_MASR/whisper/train_lwf.py +++ b/benchmarks/CL_MASR/whisper/train_lwf.py @@ -41,7 +41,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -151,7 +151,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -164,7 +165,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -213,7 +215,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -257,7 +260,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -292,10 +296,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -311,7 +315,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -367,9 +371,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -466,7 +468,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) @@ -489,7 +491,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -505,13 +507,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_mas.py b/benchmarks/CL_MASR/whisper/train_mas.py index 1b0a56dfd..8c8029bb4 100644 --- a/benchmarks/CL_MASR/whisper/train_mas.py +++ b/benchmarks/CL_MASR/whisper/train_mas.py @@ -38,7 +38,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -132,7 +132,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -200,6 +201,10 @@ def compute_mas_params(hparams, run_opts, locales): locales : list[str] The locales to consider. + Returns + ------- + params + importance """ tokenizer = hparams["whisper"].tokenizer batch_size = hparams["train_dataloader_kwargs"].get("batch_size", 1) @@ -226,7 +231,7 @@ def compute_mas_params(hparams, run_opts, locales): # Trainer initialization asr_brain = MASParamsComputer( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -252,7 +257,8 @@ def compute_mas_params(hparams, run_opts, locales): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -301,7 +307,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -345,7 +352,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -380,10 +388,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -399,7 +407,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -455,9 +463,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -569,7 +575,7 @@ def train(hparams, run_opts): hparams, run_opts, hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) @@ -592,7 +598,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -608,13 +614,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_pb.py b/benchmarks/CL_MASR/whisper/train_pb.py index b032fb6cd..5006f577d 100644 --- a/benchmarks/CL_MASR/whisper/train_pb.py +++ b/benchmarks/CL_MASR/whisper/train_pb.py @@ -60,7 +60,10 @@ def compute_forward(self, batch, stage): self.hparams.forced_decoder_locale ) if decoder_mask is not None: - for (k, v,) in self.modules.whisper.model.decoder.layers[ + for ( + k, + v, + ) in self.modules.whisper.model.decoder.layers[ -2: ].named_parameters(): if k not in decoder_mask: @@ -76,7 +79,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -152,7 +155,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -183,7 +187,8 @@ def init_optimizers(self): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -232,7 +237,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -276,7 +282,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -311,10 +318,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -348,7 +355,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -421,9 +428,7 @@ def train(hparams, run_opts): ) # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -524,7 +529,7 @@ def train(hparams, run_opts): run_opts, [locale], # hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) # Copy previous lines (no forgetting by design) @@ -564,7 +569,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -580,13 +585,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/CL_MASR/whisper/train_pnn.py b/benchmarks/CL_MASR/whisper/train_pnn.py index c610935e8..630bacf5f 100644 --- a/benchmarks/CL_MASR/whisper/train_pnn.py +++ b/benchmarks/CL_MASR/whisper/train_pnn.py @@ -41,7 +41,7 @@ def compute_forward(self, batch, stage): if self.hparams.gradient_checkpointing: wavs.requires_grad_() enc_out, logits, _ = torch.utils.checkpoint.checkpoint( - self.modules.whisper, wavs, bos_tokens, + self.modules.whisper, wavs, bos_tokens ) else: enc_out, logits, _ = self.modules.whisper(wavs, bos_tokens) @@ -117,7 +117,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -130,7 +131,8 @@ def on_stage_end(self, stage, stage_loss, epoch=None): def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( csv_path=os.path.join(hparams["data_folder"], "train.csv"), replacements={"data_root": hparams["data_folder"]}, @@ -179,7 +181,8 @@ def audio_pipeline(mp3): info = torchaudio.info(mp3) sig = sb.dataio.dataio.read_audio(mp3) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -223,7 +226,8 @@ def text_pipeline(wrd, locale): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], + datasets, + ["id", "sig", "tokens_bos", "tokens_eos", "target_wrd"], ) return train_data, valid_data, test_data @@ -258,10 +262,10 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): if locale in ["zh-CN", "ja"]: # Use CER instead of WER (spaces are not used) - hparams[ - "wer_computer" - ] = lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( - split_tokens=True + hparams["wer_computer"] = ( + lambda *args, **kwargs: sb.utils.metric_stats.ErrorRateStats( + split_tokens=True + ) ) else: hparams["wer_computer"] = sb.utils.metric_stats.ErrorRateStats @@ -277,7 +281,7 @@ def test(hparams, run_opts, locales, wer_file="wer_test.txt"): # Trainer initialization asr_brain = ASR( - modules=hparams["modules"], hparams=hparams, run_opts=run_opts, + modules=hparams["modules"], hparams=hparams, run_opts=run_opts ) # We dynamically add the tokenizer to our brain class @@ -333,9 +337,7 @@ def train(hparams, run_opts): """ # Testing - test( - hparams, run_opts, hparams["base_locales"], f"wer_test_before.txt", - ) + test(hparams, run_opts, hparams["base_locales"], "wer_test_before.txt") # Train on new locales for i, locale in enumerate(hparams["new_locales"]): @@ -433,7 +435,7 @@ def train(hparams, run_opts): run_opts, [locale], # hparams["base_locales"] + hparams["new_locales"][: i + 1], - f"wer_test_after_{locale}.txt", + "wer_test_after_{locale}.txt", ) # Copy previous lines (no forgetting by design) @@ -473,7 +475,7 @@ def __init__(self): super().__init__() self.whisper = hparams["whisper"] self.wavs = torch.randn( - 1, hparams["sample_rate"], device=run_opts["device"], + 1, hparams["sample_rate"], device=run_opts["device"] ) self.bos_tokens = torch.ones( 1, @@ -489,13 +491,13 @@ def forward(self, _=None): model = Model().eval().to(run_opts["device"]) macs, params = ptflops.get_model_complexity_info( - model, (1,), as_strings=True, print_per_layer_stat=False, + model, (1,), as_strings=True, print_per_layer_stat=False ) time_start = time.time() model() torch.cuda.synchronize() time_stop = time.time() - time_start - max_mem = torch.cuda.max_memory_allocated("cuda") / 10 ** 9 + max_mem = torch.cuda.max_memory_allocated("cuda") / 10**9 result = { "MACs": macs, "memory": max_mem, diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGConformer.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGConformer.yaml index 0754cea3a..b5130ec19 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGConformer.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGConformer.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 3 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 22 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGNet.yaml index 7dca191ac..e3164d3c2 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 2 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 22 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 61 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 51 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 4 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 7 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 7 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.008464 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/ShallowConvNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/ShallowConvNet.yaml index 30c513649..87111d727 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/ShallowConvNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014001/ShallowConvNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 2 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 22 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGConformer.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGConformer.yaml index 8b230a0c2..97fd84690 100755 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGConformer.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGConformer.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 1 T: !apply:math.ceil - !ref * ( - ) C: 3 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGNet.yaml index e03a4d369..b2139f167 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 1 T: !apply:math.ceil - !ref * ( - ) C: 3 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 30 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 42 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 3 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 2 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 5 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.3609 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/ShallowConvNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/ShallowConvNet.yaml index 5ae88aff6..6c197d01e 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/ShallowConvNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2014004/ShallowConvNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 1 T: !apply:math.ceil - !ref * ( - ) C: 3 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGConformer.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGConformer.yaml index be4d61f64..0b409da89 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGConformer.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGConformer.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 3 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 13 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGNet.yaml index e2be109d5..8ee9d75df 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 3 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 13 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 26 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 54 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 3 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 5 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 7 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.2184 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/ShallowConvNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/ShallowConvNet.yaml index 5e830758d..56b0acc99 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/ShallowConvNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/BNCI2015001/ShallowConvNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 2 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 13 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGConformer.yaml b/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGConformer.yaml index 01b6ce7e1..f53197959 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGConformer.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGConformer.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 5 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 62 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGNet.yaml index 839d6995f..64fd9ff3b 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 2 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 62 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 41 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 29 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 2 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 7 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 8 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.01204 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/ShallowConvNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/ShallowConvNet.yaml index 8cf848256..354c47cfe 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/ShallowConvNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/Lee2019_MI/ShallowConvNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 2 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 62 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGConformer.yaml b/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGConformer.yaml index f8a3a0620..90bd8f192 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGConformer.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGConformer.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 1 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 14 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGNet.yaml index bdae72c87..767b3d58b 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 2 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 14 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 61 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 58 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 2 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 7 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 3 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.3694 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/ShallowConvNet.yaml b/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/ShallowConvNet.yaml index 3edce849f..b96b02205 100644 --- a/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/ShallowConvNet.yaml +++ b/benchmarks/MOABB/hparams/MotorImagery/Zhou2016/ShallowConvNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 1 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 14 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. diff --git a/benchmarks/MOABB/hparams/P300/BNCI2014009/EEGNet.yaml b/benchmarks/MOABB/hparams/P300/BNCI2014009/EEGNet.yaml index fba784841..85befdd4c 100644 --- a/benchmarks/MOABB/hparams/P300/BNCI2014009/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/P300/BNCI2014009/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 3 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 16 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 58 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 42 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 3 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 5 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 4 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.3903 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/P300/EPFLP300/EEGNet.yaml b/benchmarks/MOABB/hparams/P300/EPFLP300/EEGNet.yaml index a5b36a5b1..cbb7c261c 100644 --- a/benchmarks/MOABB/hparams/P300/EPFLP300/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/P300/EPFLP300/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 3 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 32 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 61 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 29 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 1 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 7 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 4 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.3831 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/P300/bi2015a/EEGNet.yaml b/benchmarks/MOABB/hparams/P300/bi2015a/EEGNet.yaml index 5c2aa50d7..b572b5f69 100755 --- a/benchmarks/MOABB/hparams/P300/bi2015a/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/P300/bi2015a/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 3 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 32 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -130,7 +130,7 @@ cnn_temporal_kernels: 39 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 29 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 3 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 7 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -145,7 +145,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 4 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.2011 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/hparams/SSVEP/Lee2019_SSVEP/EEGNet.yaml b/benchmarks/MOABB/hparams/SSVEP/Lee2019_SSVEP/EEGNet.yaml index cc6724593..4af5731e4 100644 --- a/benchmarks/MOABB/hparams/SSVEP/Lee2019_SSVEP/EEGNet.yaml +++ b/benchmarks/MOABB/hparams/SSVEP/Lee2019_SSVEP/EEGNet.yaml @@ -31,7 +31,7 @@ n_steps_channel_selection: 5 # @orion_step1: --n_steps_channel_selection~"unifor T: !apply:math.ceil - !ref * ( - ) C: 62 -# We here specify how to perfom test: +# We here specify how to perform test: # - If test_with: 'last' we perform test with the latest model. # - if test_with: 'best, we perform test with the best model (according to the metric specified in test_key) # The variable avg_models can be used to average the parameters of the last (or best) N saved models before testing. @@ -129,7 +129,7 @@ cnn_temporal_kernels: 34 # @orion_step1: --cnn_temporal_kernels~"uniform(4, 64,d cnn_temporal_kernelsize: 31 # @orion_step1: --cnn_temporal_kernelsize~"uniform(24, 62,discrete=True)" # depth multiplier for the spatial depthwise conv. layer cnn_spatial_depth_multiplier: 3 # @orion_step1: --cnn_spatial_depth_multiplier~"uniform(1, 4,discrete=True)" -cnn_spatial_max_norm: 1. # kernel max-norm constaint of the spatial depthwise conv. layer +cnn_spatial_max_norm: 1. # kernel max-norm constraint of the spatial depthwise conv. layer cnn_spatial_pool: 4 cnn_septemporal_depth_multiplier: 1 # depth multiplier for the separable temporal conv. layer cnn_septemporal_point_kernels_ratio_: 7 # @orion_step1: --cnn_septemporal_point_kernels_ratio_~"uniform(0, 8, discrete=True)" @@ -144,7 +144,7 @@ cnn_septemporal_kernelsize: !apply:round - !ref * / cnn_septemporal_pool: 1 # @orion_step1: --cnn_septemporal_pool~"uniform(1, 8,discrete=True)" cnn_pool_type: 'avg' -dense_max_norm: 0.25 # kernel max-norm constaint of the dense layer +dense_max_norm: 0.25 # kernel max-norm constraint of the dense layer dropout: 0.2004 # @orion_step1: --dropout~"uniform(0.0, 0.5)" activation_type: 'elu' diff --git a/benchmarks/MOABB/models/BraindecodeNN.py b/benchmarks/MOABB/models/BraindecodeNN.py index 15a3bc5d8..33fd48716 100644 --- a/benchmarks/MOABB/models/BraindecodeNN.py +++ b/benchmarks/MOABB/models/BraindecodeNN.py @@ -27,6 +27,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: --------- x : torch.Tensor (batch, time, EEG channel, channel) Input to convolve. 4d tensors are expected. + + Returns + ------- + x : torch.Tensor + The processed outputs. """ # (batch, time_, EEG channel, channel) -> # (batch, EEG channel, time_, channel) x = torch.transpose(x, 1, 2) diff --git a/benchmarks/MOABB/models/EEGConformer.py b/benchmarks/MOABB/models/EEGConformer.py index a361db888..e721f17dc 100644 --- a/benchmarks/MOABB/models/EEGConformer.py +++ b/benchmarks/MOABB/models/EEGConformer.py @@ -9,6 +9,7 @@ Authors * Davide Borra, 2023 """ + import torch import speechbrain as sb @@ -103,13 +104,11 @@ def __init__( dense_input_size = self._num_flat_features(out) # DENSE MODULE self.dense_module = torch.nn.Sequential() - self.dense_module.add_module( - "flatten", torch.nn.Flatten(), - ) + self.dense_module.add_module("flatten", torch.nn.Flatten()) self.dense_module.add_module( "fc_out", sb.nnet.linear.Linear( - input_size=dense_input_size, n_neurons=dense_n_neurons, + input_size=dense_input_size, n_neurons=dense_n_neurons ), ) self.dense_module.add_module("act_out", torch.nn.LogSoftmax(dim=1)) @@ -121,6 +120,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: --------- x : torch.Tensor (batch, time, EEG channel, channel) Input to convolve. 4d tensors are expected. + + Returns + ------- + x : torch.Tensor + The transformed outputs. """ x = self.emb_module(x) # (batch, time_, EEG channel, channel) @@ -135,6 +139,11 @@ def _num_flat_features(self, x): --------- x : torch.Tensor Input feature map. + + Returns + ------- + num_features : int + Count of features in the input. """ size = x.size()[1:] # all dimensions except the batch dimension @@ -155,6 +164,8 @@ class PatchEmbedding(torch.nn.Module): Number of kernels in the 2d spatial convolution in the convolutional module. cnn_temporal_kernelsize: tuple Kernel size of the 2d temporal convolution in the convolutional module. + cnn_spatial_kernelsize: tuple + Kernel size of the 2d spatial convolution in the convolutional module. cnn_poolsize: tuple Pool size in the convolutional module. cnn_poolstride: tuple @@ -213,7 +224,7 @@ def __init__( swap=True, ), sb.nnet.normalization.BatchNorm2d( - input_size=cnn_spatial_kernels, momentum=0.01, affine=True, + input_size=cnn_spatial_kernels, momentum=0.01, affine=True ), activation, sb.nnet.pooling.Pooling2d( @@ -241,6 +252,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: --------- x : torch.Tensor (batch, time, EEG channel, channel) Input to convolve. 4d tensors are expected. + + Returns + ------- + x : torch.Tensor + The convolved outputs. """ x = self.shallownet( x @@ -271,17 +287,17 @@ def __init__(self, emb_size, num_heads, dropout): self.num_heads = num_heads self.keys = sb.nnet.linear.Linear( - input_size=emb_size, n_neurons=emb_size, bias=True, + input_size=emb_size, n_neurons=emb_size, bias=True ) self.queries = sb.nnet.linear.Linear( - input_size=emb_size, n_neurons=emb_size, bias=True, + input_size=emb_size, n_neurons=emb_size, bias=True ) self.values = sb.nnet.linear.Linear( - input_size=emb_size, n_neurons=emb_size, bias=True, + input_size=emb_size, n_neurons=emb_size, bias=True ) self.dropout = torch.nn.Dropout(dropout) self.projection = sb.nnet.linear.Linear( - input_size=emb_size, n_neurons=emb_size, bias=True, + input_size=emb_size, n_neurons=emb_size, bias=True ) def forward( diff --git a/benchmarks/MOABB/models/EEGNet.py b/benchmarks/MOABB/models/EEGNet.py index dcb62d47e..8b0b25f54 100644 --- a/benchmarks/MOABB/models/EEGNet.py +++ b/benchmarks/MOABB/models/EEGNet.py @@ -5,6 +5,7 @@ Authors * Davide Borra, 2021 """ + import torch import speechbrain as sb @@ -28,6 +29,8 @@ class EEGNet(torch.nn.Module): Pool size and stride after the 2d spatial depthwise convolution. cnn_septemporal_depth_multiplier: int Depth multiplier of the 2d temporal separable convolution. + cnn_septemporal_point_kernels : int + Size of point kernels. cnn_septemporal_kernelsize: tuple Kernel size of the 2d temporal separable convolution. cnn_septemporal_pool: tuple @@ -107,7 +110,7 @@ def __init__( self.conv_module.add_module( "bnorm_0", sb.nnet.normalization.BatchNorm2d( - input_size=cnn_temporal_kernels, momentum=0.01, affine=True, + input_size=cnn_temporal_kernels, momentum=0.01, affine=True ), ) # Spatial depthwise convolution @@ -130,7 +133,7 @@ def __init__( self.conv_module.add_module( "bnorm_1", sb.nnet.normalization.BatchNorm2d( - input_size=cnn_spatial_kernels, momentum=0.01, affine=True, + input_size=cnn_spatial_kernels, momentum=0.01, affine=True ), ) self.conv_module.add_module("act_1", activation) @@ -204,9 +207,7 @@ def __init__( dense_input_size = self._num_flat_features(out) # DENSE MODULE self.dense_module = torch.nn.Sequential() - self.dense_module.add_module( - "flatten", torch.nn.Flatten(), - ) + self.dense_module.add_module("flatten", torch.nn.Flatten()) self.dense_module.add_module( "fc_out", sb.nnet.linear.Linear( @@ -224,6 +225,11 @@ def _num_flat_features(self, x): --------- x : torch.Tensor Input feature map. + + Returns + ------- + num_features : int + Count of features in the input. """ size = x.size()[1:] # all dimensions except the batch dimension @@ -239,6 +245,11 @@ def forward(self, x): --------- x : torch.Tensor (batch, time, EEG channel, channel) Input to convolve. 4d tensors are expected. + + Returns + ------- + x : torch.Tensor + The convolved outputs. """ x = self.conv_module(x) x = self.dense_module(x) diff --git a/benchmarks/MOABB/models/ShallowConvNet.py b/benchmarks/MOABB/models/ShallowConvNet.py index c5aa4804a..2dfabe716 100644 --- a/benchmarks/MOABB/models/ShallowConvNet.py +++ b/benchmarks/MOABB/models/ShallowConvNet.py @@ -5,6 +5,7 @@ Authors * Davide Borra, 2021 """ + import torch import speechbrain as sb @@ -105,14 +106,12 @@ def __init__( self.conv_module.add_module( "bnorm_1", sb.nnet.normalization.BatchNorm2d( - input_size=cnn_spatial_kernels, momentum=0.1, affine=True, + input_size=cnn_spatial_kernels, momentum=0.1, affine=True ), ) # Square-pool-log-dropout # conv non-lin - self.conv_module.add_module( - "square_1", Square(), - ) + self.conv_module.add_module("square_1", Square()) self.conv_module.add_module( "pool_1", sb.nnet.pooling.Pooling2d( @@ -123,12 +122,8 @@ def __init__( ), ) # pool non-lin - self.conv_module.add_module( - "log_1", Log(), - ) - self.conv_module.add_module( - "dropout_1", torch.nn.Dropout(p=dropout), - ) + self.conv_module.add_module("log_1", Log()) + self.conv_module.add_module("dropout_1", torch.nn.Dropout(p=dropout)) # Shape of intermediate feature maps out = self.conv_module( torch.ones((1,) + tuple(input_shape[1:-1]) + (1,)) @@ -136,13 +131,11 @@ def __init__( dense_input_size = self._num_flat_features(out) # DENSE MODULE self.dense_module = torch.nn.Sequential() - self.dense_module.add_module( - "flatten", torch.nn.Flatten(), - ) + self.dense_module.add_module("flatten", torch.nn.Flatten()) self.dense_module.add_module( "fc_out", sb.nnet.linear.Linear( - input_size=dense_input_size, n_neurons=dense_n_neurons, + input_size=dense_input_size, n_neurons=dense_n_neurons ), ) self.dense_module.add_module("act_out", torch.nn.LogSoftmax(dim=1)) @@ -154,6 +147,11 @@ def _num_flat_features(self, x): --------- x : torch.Tensor Input feature map. + + Returns + ------- + num_features : int + Count of all features in input. """ size = x.size()[1:] # all dimensions except the batch dimension @@ -169,6 +167,11 @@ def forward(self, x): --------- x : torch.Tensor (batch, time, EEG channel, channel) Input to convolve. 4d tensors are expected. + + Returns + ------- + x : torch.Tensor + The convolved output. """ x = self.conv_module(x) x = self.dense_module(x) diff --git a/benchmarks/MOABB/run_hparam_optimization.sh b/benchmarks/MOABB/run_hparam_optimization.sh index fd0e209d1..9ab415275 100755 --- a/benchmarks/MOABB/run_hparam_optimization.sh +++ b/benchmarks/MOABB/run_hparam_optimization.sh @@ -92,7 +92,7 @@ print_argument_descriptions() { echo " --mne_dir mne_dir [Optional] MNE directory. Need it different from your home (see notes on MNE in README.md)" echo " --orion_db_address [Optional] Path of the database where orion will store hparams and performance" echo " --orion_db_type db_type [Optional] Type of the dataset that orion will use. Default: PickledDB" - echo " --exp_max_trials int [Optional] Maximum number of hparam trials for each oprimization step. Default:50" + echo " --exp_max_trials int [Optional] Maximum number of hparam trials for each optimization step. Default:50" echo " --store_all Bool [Optional] When set to True, the output folders of all hparam trials will be stored in randomly named folders. Default: False" echo " --compress_exp Bool [Optional] When set to True, this option compresses the output folders of all hyperparameter trials into a single tar.gz file. This is particularly useful when store_all is set to True, as it helps prevent the accumulation of a large number of files. Default: False" exit 1 @@ -297,7 +297,7 @@ echo "-------------------------------------" # This function will extract all the optimization flags added in the yaml file # The input is a text file (e.g, a yaml file) and a pattern (e.g, "@orion_step1:") -# The ouput are the detected flags (e.g., --dropout~"uniform(0.0, 0.5)"). +# The output are the detected flags (e.g., --dropout~"uniform(0.0, 0.5)"). get_flag() { local file_path="$1" local pattern="$2" diff --git a/benchmarks/MOABB/train.py b/benchmarks/MOABB/train.py index 78d3fa058..d87cef9e0 100644 --- a/benchmarks/MOABB/train.py +++ b/benchmarks/MOABB/train.py @@ -81,7 +81,7 @@ def compute_objectives(self, predictions, batch, stage): self.hparams.lr_annealing.on_batch_end(self.optimizer) return loss - def on_fit_start(self,): + def on_fit_start(self): """Gets called at the beginning of ``fit()``""" self.init_model(self.hparams.model) self.init_optimizers() @@ -185,12 +185,16 @@ def on_stage_end(self, stage, stage_loss, epoch=None): stats_meta={ "epoch loaded": self.hparams.epoch_counter.current }, - test_stats=self.last_eval_stats - if not getattr(self, "log_test_as_valid", False) - else None, - valid_stats=self.last_eval_stats - if getattr(self, "log_test_as_valid", False) - else None, + test_stats=( + self.last_eval_stats + if not getattr(self, "log_test_as_valid", False) + else None + ), + valid_stats=( + self.last_eval_stats + if getattr(self, "log_test_as_valid", False) + else None + ), ) # save the averaged checkpoint at the end of the evaluation stage # delete the rest of the intermediate checkpoints @@ -218,17 +222,15 @@ def on_evaluate_start(self, max_key=None, min_key=None): max_key=max_key, min_key=min_key ) ckpt = sb.utils.checkpoints.average_checkpoints( - ckpts, recoverable_name="model", + ckpts, recoverable_name="model" ) self.hparams.model.load_state_dict(ckpt, strict=True) self.hparams.model.eval() - def check_if_best( - self, last_eval_stats, best_eval_stats, keys, - ): + def check_if_best(self, last_eval_stats, best_eval_stats, keys): """Checks if the current model is the best according at least to - one of the monitored metrics. """ + one of the monitored metrics.""" is_best = False for key in keys: if key == "loss": diff --git a/benchmarks/MOABB/utils/aggregate_results.py b/benchmarks/MOABB/utils/aggregate_results.py index 11c81ebd2..4f48aedd3 100644 --- a/benchmarks/MOABB/utils/aggregate_results.py +++ b/benchmarks/MOABB/utils/aggregate_results.py @@ -37,7 +37,7 @@ def get_prototype(res_file, eval_metric): Metric of interest (e.g, acc or f1). Returns - --------- + ------- prototype: list List of the lines of the result file (with as placeholder). n_metrics: int @@ -69,7 +69,7 @@ def get_metrics(res_files, eval_metric): Metric of interest (e.g, acc or f1). Returns - --------- + ------- metrics: np.array Matrix (n_metrics, n_files) containing the metrics of interest. """ diff --git a/benchmarks/MOABB/utils/dataio_iterators.py b/benchmarks/MOABB/utils/dataio_iterators.py index 07379f198..3c81ed144 100644 --- a/benchmarks/MOABB/utils/dataio_iterators.py +++ b/benchmarks/MOABB/utils/dataio_iterators.py @@ -49,7 +49,7 @@ def get_dataloader(batch_size, xy_train, xy_valid, xy_test): inps = torch.Tensor( x_train.reshape( - (x_train.shape[0], x_train.shape[1], x_train.shape[2], 1,) + (x_train.shape[0], x_train.shape[1], x_train.shape[2], 1) ) ) tgts = torch.tensor(y_train, dtype=torch.long) @@ -60,7 +60,7 @@ def get_dataloader(batch_size, xy_train, xy_valid, xy_test): inps = torch.Tensor( x_valid.reshape( - (x_valid.shape[0], x_valid.shape[1], x_valid.shape[2], 1,) + (x_valid.shape[0], x_valid.shape[1], x_valid.shape[2], 1) ) ) tgts = torch.tensor(y_valid, dtype=torch.long) @@ -68,7 +68,14 @@ def get_dataloader(batch_size, xy_train, xy_valid, xy_test): valid_loader = DataLoader(ds, batch_size=batch_size, pin_memory=True) inps = torch.Tensor( - x_test.reshape((x_test.shape[0], x_test.shape[1], x_test.shape[2], 1,)) + x_test.reshape( + ( + x_test.shape[0], + x_test.shape[1], + x_test.shape[2], + 1, + ) + ) ) tgts = torch.tensor(y_test, dtype=torch.long) ds = TensorDataset(inps, tgts) @@ -206,15 +213,13 @@ def prepare( Flag to save the prepared dataset into a pkl file. n_steps_channel_selection: int Number of steps to perform when sampling a subset of channels from a seed channel, based on the adjacency matrix. - ... Returns - --------- + ------- tail_path: str String containing the relative path where results will be stored for the specified iterator, subject and session. datasets: dict Dictionary containing all sets (keys: 'train', 'test', 'valid'). - --------- """ interval = [tmin, tmax] @@ -259,7 +264,7 @@ def prepare( # obtaining indices for the current session idx = np.where(metadata.session == s)[0] # validation set definition (equal proportion btw classes) - (tmp_idx_train, tmp_idx_valid,) = get_idx_train_valid_classbalanced( + (tmp_idx_train, tmp_idx_valid) = get_idx_train_valid_classbalanced( idx, valid_ratio, y ) idx_train.extend(tmp_idx_train) @@ -427,15 +432,13 @@ def prepare( Flag to save the prepared dataset into a pkl file. n_steps_channel_selection: int Number of steps to perform when sampling a subset of channels from a seed channel, based on the adjacency matrix. - ... Returns - --------- + ------- tail_path: str String containing the relative path where results will be stored for the specified iterator, subject and session. datasets: dict Dictionary containing all sets (keys: 'train', 'test', 'valid'). - --------- """ interval = [tmin, tmax] if len(dataset.subject_list) < 2: diff --git a/benchmarks/MOABB/utils/parse_results.py b/benchmarks/MOABB/utils/parse_results.py index 5c22445b3..3c6525a62 100644 --- a/benchmarks/MOABB/utils/parse_results.py +++ b/benchmarks/MOABB/utils/parse_results.py @@ -32,7 +32,6 @@ def load_metrics(filepath: Path) -> dict: - """ Loads pickles and parses into a dictionary @@ -68,7 +67,7 @@ def visualize_results(paradigm: str, results: dict, vis_metrics: list) -> None: """ print("\n----", paradigm.name, "----") for key in results: - if type(results[key]) == dict: + if isinstance(results[key], dict): for m in vis_metrics: print( key, @@ -93,7 +92,6 @@ def parse_one_session_out( stat_metrics: list = ["loss", "f1", "acc"], metric_file: str = "test_metrics.pkl", ) -> dict: - """ Aggregates results obtain by helding back one session as test set and using the remaining ones to train the neural nets @@ -258,7 +256,7 @@ def aggregate_metrics( Arguments --------- - verbode: int + verbose: int metric_file: str stat_metrics: list diff --git a/benchmarks/MOABB/utils/prepare.py b/benchmarks/MOABB/utils/prepare.py index 9f6371be4..441ae966d 100644 --- a/benchmarks/MOABB/utils/prepare.py +++ b/benchmarks/MOABB/utils/prepare.py @@ -83,11 +83,11 @@ def get_output_dict( ) if verbose == 1: - for l in np.unique(labels): + for label in np.unique(labels): print( print( "Number of label {0} examples: {1}".format( - l, np.where(labels == l)[0].shape[0] + label, np.where(labels == label)[0].shape[0] ) ) ) @@ -119,7 +119,8 @@ def get_output_dict( def load_data(paradigm, dataset, idx): """This function returns EEG signals and the corresponding labels using MOABB methods - In addition metadata, channel names and the sampling rate are provided too.""" + In addition metadata, channel names and the sampling rate are provided too. + """ x, labels, metadata = paradigm.get_data(dataset, idx, True) ch_names = x.info.ch_names adjacency, _ = find_ch_adjacency(x.info, ch_type="eeg") diff --git a/benchmarks/MP3S/Buckeye/LSTM/buckeye_prepare.py b/benchmarks/MP3S/Buckeye/LSTM/buckeye_prepare.py index 4ea352128..799727ae8 100755 --- a/benchmarks/MP3S/Buckeye/LSTM/buckeye_prepare.py +++ b/benchmarks/MP3S/Buckeye/LSTM/buckeye_prepare.py @@ -183,10 +183,6 @@ def unzip_buckeye(buckeye_dir): --------- buckeye_dir : str Path to the folder containing the Buckeye zipped folders. - - Returns - ------- - None """ files = os.listdir(buckeye_dir) for zip_fil in files: @@ -220,10 +216,6 @@ def prepare_csv(buckeye_dir, save_folder, csv_file, prefixes): Name of the file that will be saved (corresponding to the split) prefixes: list List of prefixes of recordings defining the elements that go into this split - - Returns - ------- - None """ csv_lines = [ ["ID", "duration", "wav", "spk_id", "start_seg", "end_seg", "wrd"] diff --git a/benchmarks/MP3S/Buckeye/LSTM/train.py b/benchmarks/MP3S/Buckeye/LSTM/train.py index c48e6bc7e..14a60cbb8 100644 --- a/benchmarks/MP3S/Buckeye/LSTM/train.py +++ b/benchmarks/MP3S/Buckeye/LSTM/train.py @@ -108,7 +108,8 @@ def on_stage_end(self, stage, stage_loss, epoch): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -141,11 +142,13 @@ def init_optimizers(self): def dataio_prepare(hparams): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["train_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["train_csv"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -170,7 +173,8 @@ def dataio_prepare(hparams): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["valid_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["valid_csv"], + replacements={"data_root": data_folder}, ) valid_data = valid_data.filtered_sorted(sort_key="duration") @@ -228,7 +232,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "wrd", "char_list", "tokens"], + datasets, + ["id", "sig", "wrd", "char_list", "tokens"], ) return train_data, valid_data, test_datasets, label_encoder @@ -277,7 +282,7 @@ def text_pipeline(wrd): checkpointer=hparams["checkpointer"], ) - # We dynamicaly add the tokenizer to our brain class. + # We dynamically add the tokenizer to our brain class. # NB: This tokenizer corresponds to the one used for the LM!! asr_brain.tokenizer = label_encoder @@ -287,7 +292,8 @@ def text_pipeline(wrd): from speechbrain.decoders.ctc import CTCBeamSearcher test_searcher = CTCBeamSearcher( - **hparams["test_beam_search"], vocab_list=vocab_list, + **hparams["test_beam_search"], + vocab_list=vocab_list, ) # Training @@ -305,7 +311,7 @@ def text_pipeline(wrd): for k in test_datasets.keys(): # keys are test_clean, test_other etc asr_brain.hparams.test_wer_file = os.path.join( - hparams["output_wer_folder"], f"wer_{k}.txt" + hparams["output_wer_folder"], "wer_{k}.txt" ) asr_brain.evaluate( test_datasets[k], diff --git a/benchmarks/MP3S/Buckeye/contextnet/train.py b/benchmarks/MP3S/Buckeye/contextnet/train.py index 52b56930f..c8509f9d3 100644 --- a/benchmarks/MP3S/Buckeye/contextnet/train.py +++ b/benchmarks/MP3S/Buckeye/contextnet/train.py @@ -108,7 +108,8 @@ def on_stage_end(self, stage, stage_loss, epoch): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -141,11 +142,13 @@ def init_optimizers(self): def dataio_prepare(hparams): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["train_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["train_csv"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -170,7 +173,8 @@ def dataio_prepare(hparams): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["valid_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["valid_csv"], + replacements={"data_root": data_folder}, ) valid_data = valid_data.filtered_sorted(sort_key="duration") @@ -232,7 +236,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "wrd", "char_list", "tokens"], + datasets, + ["id", "sig", "wrd", "char_list", "tokens"], ) return train_data, valid_data, test_datasets, label_encoder @@ -281,7 +286,7 @@ def text_pipeline(wrd): checkpointer=hparams["checkpointer"], ) - # We dynamicaly add the tokenizer to our brain class. + # We dynamically add the tokenizer to our brain class. # NB: This tokenizer corresponds to the one used for the LM!! asr_brain.tokenizer = label_encoder @@ -291,7 +296,8 @@ def text_pipeline(wrd): from speechbrain.decoders.ctc import CTCBeamSearcher test_searcher = CTCBeamSearcher( - **hparams["test_beam_search"], vocab_list=vocab_list, + **hparams["test_beam_search"], + vocab_list=vocab_list, ) # Training @@ -309,7 +315,7 @@ def text_pipeline(wrd): for k in test_datasets.keys(): # keys are test_clean, test_other etc asr_brain.hparams.test_wer_file = os.path.join( - hparams["output_wer_folder"], f"wer_{k}.txt" + hparams["output_wer_folder"], "wer_{k}.txt" ) asr_brain.evaluate( test_datasets[k], diff --git a/benchmarks/MP3S/CommonVoice/LSTM/common_voice_prepare.py b/benchmarks/MP3S/CommonVoice/LSTM/common_voice_prepare.py index 5af472065..96ab4340f 100644 --- a/benchmarks/MP3S/CommonVoice/LSTM/common_voice_prepare.py +++ b/benchmarks/MP3S/CommonVoice/LSTM/common_voice_prepare.py @@ -37,6 +37,7 @@ def prepare_common_voice( """ Prepares the csv files for the Mozilla Common Voice dataset. Download: https://voice.mozilla.org/en/datasets + Arguments --------- data_folder : str @@ -57,6 +58,11 @@ def prepare_common_voice( Specify the language for text normalization. skip_prep: bool If True, skip data preparation. + + Returns + ------- + None + Example ------- >>> from recipes.CommonVoice.common_voice_prepare import prepare_common_voice @@ -101,7 +107,7 @@ def prepare_common_voice( if not os.path.exists(save_folder): os.makedirs(save_folder) - # Setting ouput files + # Setting output files save_csv_train = save_folder + "/train.csv" save_csv_dev = save_folder + "/dev.csv" save_csv_test = save_folder + "/test.csv" @@ -128,15 +134,23 @@ def prepare_common_voice( [save_csv_train, save_csv_dev, save_csv_test], ) for tsv_file, save_csv in file_pairs: - create_csv( - tsv_file, save_csv, data_folder, accented_letters, language, - ) + create_csv(tsv_file, save_csv, data_folder, accented_letters, language) def skip(save_csv_train, save_csv_dev, save_csv_test): """ Detects if the Common Voice data preparation has been already done. If the preparation has been done, we can skip it. + + Arguments + --------- + save_csv_train : str + The train csv file + save_csv_dev : str + The dev csv file + save_csv_test : str + The test csv file + Returns ------- bool @@ -233,18 +247,20 @@ def create_csv( ): """ Creates the csv file given a list of wav files. + Arguments --------- orig_tsv_file : str Path to the Common Voice tsv file (standard file). + csv_file : str + Path to the new CSV file. data_folder : str Path of the CommonVoice dataset. accented_letters : bool, optional Defines if accented letters will be kept as individual letters or transformed to the closest non-accented letters. - Returns - ------- - None + language : str + The language to use, default "en" """ # Check if the given files exists @@ -333,9 +349,7 @@ def language_specific_preprocess(language, words): ) # replace 0000SS0000 back to ß as its initial presence in the corpus elif language == "fr": # SM - words = re.sub( - "[^’'A-Za-z0-9À-ÖØ-öø-ÿЀ-ӿéæœâçèàûî]+", " ", words - ) + words = re.sub("[^’'A-Za-z0-9À-ÖØ-öø-ÿЀ-ӿéæœâçèàûî]+", " ", words) words = words.replace("’", "'") words = words.replace("é", "é") words = words.replace("æ", "ae") @@ -424,9 +438,12 @@ def check_commonvoice_folders(data_folder): """ Check if the data folder actually contains the Common Voice dataset. If not, raises an error. - Returns - ------- - None + + Arguments + --------- + data_folder : str + Path to the directory containing data. + Raises ------ FileNotFoundError diff --git a/benchmarks/MP3S/CommonVoice/LSTM/train.py b/benchmarks/MP3S/CommonVoice/LSTM/train.py index 2a35bf582..fdfa78222 100644 --- a/benchmarks/MP3S/CommonVoice/LSTM/train.py +++ b/benchmarks/MP3S/CommonVoice/LSTM/train.py @@ -107,7 +107,8 @@ def on_stage_end(self, stage, stage_loss, epoch): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -141,13 +142,15 @@ def init_optimizers(self): # Define custom data procedure def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ # 1. Define datasets data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["train_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["train_csv"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -177,13 +180,15 @@ def dataio_prepare(hparams, tokenizer): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["valid_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["valid_csv"], + replacements={"data_root": data_folder}, ) # We also sort the validation data so it is faster to validate valid_data = valid_data.filtered_sorted(sort_key="duration") test_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["test_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["test_csv"], + replacements={"data_root": data_folder}, ) # We also sort the validation data so it is faster to validate @@ -198,7 +203,8 @@ def audio_pipeline(wav): info = torchaudio.info(wav) sig = sb.dataio.dataio.read_audio(wav) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -217,7 +223,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens"], + datasets, + ["id", "sig", "tokens"], ) return train_data, valid_data, test_data @@ -289,7 +296,8 @@ def text_pipeline(wrd): from speechbrain.decoders.ctc import CTCBeamSearcher test_searcher = CTCBeamSearcher( - **hparams["test_beam_search"], vocab_list=vocab_list, + **hparams["test_beam_search"], + vocab_list=vocab_list, ) # Training diff --git a/benchmarks/MP3S/CommonVoice/linear/train.py b/benchmarks/MP3S/CommonVoice/linear/train.py index abd87cdfa..69b9e9aea 100644 --- a/benchmarks/MP3S/CommonVoice/linear/train.py +++ b/benchmarks/MP3S/CommonVoice/linear/train.py @@ -106,7 +106,8 @@ def on_stage_end(self, stage, stage_loss, epoch): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -140,13 +141,15 @@ def init_optimizers(self): # Define custom data procedure def dataio_prepare(hparams, tokenizer): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ # 1. Define datasets data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["train_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["train_csv"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -176,13 +179,15 @@ def dataio_prepare(hparams, tokenizer): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["valid_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["valid_csv"], + replacements={"data_root": data_folder}, ) # We also sort the validation data so it is faster to validate valid_data = valid_data.filtered_sorted(sort_key="duration") test_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["test_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["test_csv"], + replacements={"data_root": data_folder}, ) # We also sort the validation data so it is faster to validate @@ -197,7 +202,8 @@ def audio_pipeline(wav): info = torchaudio.info(wav) sig = sb.dataio.dataio.read_audio(wav) resampled = torchaudio.transforms.Resample( - info.sample_rate, hparams["sample_rate"], + info.sample_rate, + hparams["sample_rate"], )(sig) return resampled @@ -216,7 +222,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "tokens"], + datasets, + ["id", "sig", "tokens"], ) return train_data, valid_data, test_data @@ -288,7 +295,8 @@ def text_pipeline(wrd): from speechbrain.decoders.ctc import CTCBeamSearcher test_searcher = CTCBeamSearcher( - **hparams["test_beam_search"], vocab_list=vocab_list, + **hparams["test_beam_search"], + vocab_list=vocab_list, ) # Training diff --git a/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/hparams/ssl.yaml b/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/hparams/ssl.yaml index 1f74f7255..f563f1b66 100644 --- a/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/hparams/ssl.yaml +++ b/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/hparams/ssl.yaml @@ -21,7 +21,7 @@ train_log: !ref /train_log.txt # URL for the ssl encoder model, you can change to benchmark diffrenet models # Important: we use ssl encoder base and not the fine-tuned one with ASR task -# This allow you to have ~4% improvment +# This allow you to have ~4% improvement ssl_hub: facebook/wav2vec2-base ssl_folder: !ref /ssl_checkpoints diff --git a/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/iemocap_prepare.py b/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/iemocap_prepare.py index 48feed81f..2e82e66cf 100644 --- a/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/iemocap_prepare.py +++ b/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/iemocap_prepare.py @@ -45,12 +45,14 @@ def prepare_data( Path where the validation data specification file will be saved. save_json_test : str Path where the test data specification file will be saved. - split_ratio: list + split_ratio : list List composed of three integers that sets split ratios for train, - valid, and test sets, respecively. + valid, and test sets, respectively. For instance split_ratio=[80, 10, 10] will assign 80% of the sentences to training, 10% for validation, and 10% for test. - test_spk_id: int + different_speakers : bool + Whether to prevent the same speaker from appearing in different splits. + test_spk_id : int Id of speaker used for test set, 10 speakers in total. Here a leave-two-speaker strategy is used for the split, if one test_spk_id is selected for test, the other spk_id in the same @@ -60,6 +62,10 @@ def prepare_data( seed : int Seed for reproducibility + Returns + ------- + None + Example ------- >>> data_original = '/path/to/iemocap/IEMOCAP_full_release' @@ -139,6 +145,11 @@ def skip(*filenames): Detects if the data preparation has been already done. If the preparation has been done, we can skip it. + Arguments + --------- + *filenames : tuple + A list of file paths to check for existence. + Returns ------- bool @@ -168,7 +179,7 @@ def split_different_speakers(speaker_dict, test_spk_id): Session1 contains speaker 1&2, Session2 contains speaker 3&4, ... Returns - ------ + ------- dictionary containing train, valid, and test splits. """ data_split = {k: [] for k in ["train", "valid", "test"]} @@ -195,12 +206,12 @@ def split_sets(speaker_dict, split_ratio): same proportion of samples (e.g, spk01 should have 80% of samples in training, 10% validation, 10% test, the same for speaker2 etc.). This is the approach followed in some recipes such as the Voxceleb one. For - simplicity, we here simply split the full list without necessarly + simplicity, we here simply split the full list without necessarily respecting the split ratio within each class. Arguments --------- - speaker_dict : list + speaker_dict : dict a dictionary of speaker id and its corresponding audio information split_ratio: list List composed of three integers that sets split ratios for train, @@ -209,7 +220,7 @@ def split_sets(speaker_dict, split_ratio): to training, 10% for validation, and 10% for test. Returns - ------ + ------- dictionary containing train, valid, and test splits. """ @@ -242,6 +253,11 @@ def transform_data(path_loadSession): path_loadSession : str Path to the folder where the original IEMOCAP dataset is stored. + Returns + ------- + speaker_dict : dict + a dictionary of speaker id and its corresponding audio information + Example ------- >>> data_original = '/path/to/iemocap/IEMOCAP_full_release/Session' @@ -273,7 +289,7 @@ def load_utterInfo(inputFile): # [START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D] # [V, A, D] means [Valence, Arousal, Dominance] pattern = re.compile( - "[\[]*[0-9]*[.][0-9]*[ -]*[0-9]*[.][0-9]*[\]][\t][a-z0-9_]*[\t][a-z]{3}[\t][\[][0-9]*[.][0-9]*[, ]+[0-9]*[.][0-9]*[, ]+[0-9]*[.][0-9]*[\]]", + r"[\[]*[0-9]*[.][0-9]*[ -]*[0-9]*[.][0-9]*[\]][\t][a-z0-9_]*[\t][a-z]{3}[\t][\[][0-9]*[.][0-9]*[, ]+[0-9]*[.][0-9]*[, ]+[0-9]*[.][0-9]*[\]]", re.IGNORECASE, ) # noqa with open(inputFile, "r") as myfile: diff --git a/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/train.py b/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/train.py index 5fcd32de2..e3078a7e2 100644 --- a/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/train.py +++ b/benchmarks/MP3S/IEMOCAP/ecapa_tdnn/train.py @@ -161,7 +161,7 @@ def audio_pipeline(wav): sig = sb.dataio.dataio.read_audio(wav) return sig - # Initialization of the label encoder. The label encoder assignes to each + # Initialization of the label encoder. The label encoder assigns to each # of the observed label a unique index (e.g, 'spk01': 0, 'spk02': 1, ..) label_encoder = sb.dataio.encoder.CategoricalEncoder() diff --git a/benchmarks/MP3S/IEMOCAP/linear/hparams/ssl.yaml b/benchmarks/MP3S/IEMOCAP/linear/hparams/ssl.yaml index 83c5f5882..4e8550b66 100644 --- a/benchmarks/MP3S/IEMOCAP/linear/hparams/ssl.yaml +++ b/benchmarks/MP3S/IEMOCAP/linear/hparams/ssl.yaml @@ -21,7 +21,7 @@ train_log: !ref /train_log.txt # URL for the ssl encoder model, you can change to benchmark diffrenet models # Important: we use ssl encoder base and not the fine-tuned one with ASR task -# This allow you to have ~4% improvment +# This allow you to have ~4% improvement ssl_hub: facebook/wav2vec2-base ssl_folder: !ref /ssl_checkpoints diff --git a/benchmarks/MP3S/IEMOCAP/linear/train.py b/benchmarks/MP3S/IEMOCAP/linear/train.py index 99e5b64c6..b9e650c23 100644 --- a/benchmarks/MP3S/IEMOCAP/linear/train.py +++ b/benchmarks/MP3S/IEMOCAP/linear/train.py @@ -169,7 +169,7 @@ def audio_pipeline(wav): sig = sb.dataio.dataio.read_audio(wav) return sig - # Initialization of the label encoder. The label encoder assignes to each + # Initialization of the label encoder. The label encoder assigns to each # of the observed label a unique index (e.g, 'spk01': 0, 'spk02': 1, ..) label_encoder = sb.dataio.encoder.CategoricalEncoder() diff --git a/benchmarks/MP3S/LibriSpeech/LSTM/librispeech_prepare.py b/benchmarks/MP3S/LibriSpeech/LSTM/librispeech_prepare.py index dfb033d35..5cb3ec112 100755 --- a/benchmarks/MP3S/LibriSpeech/LSTM/librispeech_prepare.py +++ b/benchmarks/MP3S/LibriSpeech/LSTM/librispeech_prepare.py @@ -46,6 +46,8 @@ def prepare_librispeech( --------- data_folder : str Path to the folder where the original LibriSpeech dataset is stored. + save_folder : str + The directory where to store the csv files. tr_splits : list List of train splits to prepare from ['test-others','train-clean-100', 'train-clean-360','train-other-500']. @@ -53,14 +55,12 @@ def prepare_librispeech( List of dev splits to prepare from ['dev-clean','dev-others']. te_splits : list List of test splits to prepare from ['test-clean','test-others']. - save_folder : str - The directory where to store the csv files. select_n_sentences : int Default : None If not None, only pick this many sentences. merge_lst : list List of librispeech splits (e.g, train-clean, train-clean-360,..) to - merge in a singe csv file. + merge in a single csv file. merge_name: str Name of the merged csv file. create_lexicon: bool @@ -69,6 +69,9 @@ def prepare_librispeech( skip_prep: bool If True, data preparation is skipped. + Returns + ------- + None Example ------- @@ -129,15 +132,13 @@ def prepare_librispeech( else: n_sentences = len(wav_lst) - create_csv( - save_folder, wav_lst, text_dict, split, n_sentences, - ) + create_csv(save_folder, wav_lst, text_dict, split, n_sentences) # Merging csv file if needed if merge_lst and merge_name is not None: merge_files = [split_libri + ".csv" for split_libri in merge_lst] merge_csvs( - data_folder=save_folder, csv_lst=merge_files, merged_csv=merge_name, + data_folder=save_folder, csv_lst=merge_files, merged_csv=merge_name ) # Create lexicon.csv and oov.csv @@ -155,15 +156,12 @@ def create_lexicon_and_oov_csv(all_texts, data_folder, save_folder): Arguments --------- - all_text : dict + all_texts : dict Dictionary containing text from the librispeech transcriptions data_folder : str Path to the folder where the original LibriSpeech dataset is stored. save_folder : str The directory where to store the csv files. - Returns - ------- - None """ # If the lexicon file does not exist, download it lexicon_url = "http://www.openslr.org/resources/11/librispeech-lexicon.txt" @@ -226,10 +224,6 @@ def split_lexicon(data_folder, split_ratio): List containing the training, validation, and test split ratio. Set it to [80, 10, 10] for having 80% of material for training, 10% for valid, and 10 for test. - - Returns - ------- - None """ # Reading lexicon.csv lexicon_csv_path = os.path.join(data_folder, "lexicon.csv") @@ -259,9 +253,7 @@ def split_lexicon(data_folder, split_ratio): f.writelines(test_lines) -def create_csv( - save_folder, wav_lst, text_dict, split, select_n_sentences, -): +def create_csv(save_folder, wav_lst, text_dict, split, select_n_sentences): """ Create the dataset csv file given a list of wav files. @@ -277,10 +269,6 @@ def create_csv( The name of the current data split. select_n_sentences : int, optional The number of sentences to select. - - Returns - ------- - None """ # Setting path for the csv file csv_file = os.path.join(save_folder, split + ".csv") @@ -407,9 +395,12 @@ def check_librispeech_folders(data_folder, splits): If it does not, an error is raised. - Returns - ------- - None + Arguments + --------- + data_folder : str + The path to the directory containing the data. + splits : list + The data portions to check. Raises ------ diff --git a/benchmarks/MP3S/LibriSpeech/LSTM/train.py b/benchmarks/MP3S/LibriSpeech/LSTM/train.py index cf259f10b..d9d815380 100644 --- a/benchmarks/MP3S/LibriSpeech/LSTM/train.py +++ b/benchmarks/MP3S/LibriSpeech/LSTM/train.py @@ -109,7 +109,8 @@ def on_stage_end(self, stage, stage_loss, epoch): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -142,11 +143,13 @@ def init_optimizers(self): def dataio_prepare(hparams): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["train_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["train_csv"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -171,7 +174,8 @@ def dataio_prepare(hparams): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["valid_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["valid_csv"], + replacements={"data_root": data_folder}, ) valid_data = valid_data.filtered_sorted(sort_key="duration") @@ -229,7 +233,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "wrd", "char_list", "tokens"], + datasets, + ["id", "sig", "wrd", "char_list", "tokens"], ) return train_data, valid_data, test_datasets, label_encoder @@ -285,7 +290,7 @@ def text_pipeline(wrd): ) # Loading the SSL model - # We dynamicaly add the tokenizer to our brain class. + # We dynamically add the tokenizer to our brain class. asr_brain.tokenizer = label_encoder ind2lab = label_encoder.ind2lab @@ -294,7 +299,8 @@ def text_pipeline(wrd): from speechbrain.decoders.ctc import CTCBeamSearcher test_searcher = CTCBeamSearcher( - **hparams["test_beam_search"], vocab_list=vocab_list, + **hparams["test_beam_search"], + vocab_list=vocab_list, ) # Training @@ -312,7 +318,7 @@ def text_pipeline(wrd): for k in test_datasets.keys(): # keys are test_clean, test_other etc asr_brain.hparams.test_wer_file = os.path.join( - hparams["output_wer_folder"], f"wer_{k}.txt" + hparams["output_wer_folder"], "wer_{k}.txt" ) asr_brain.evaluate( test_datasets[k], diff --git a/benchmarks/MP3S/LibriSpeech/contextnet/train.py b/benchmarks/MP3S/LibriSpeech/contextnet/train.py index 2c942cec0..facdd26f5 100644 --- a/benchmarks/MP3S/LibriSpeech/contextnet/train.py +++ b/benchmarks/MP3S/LibriSpeech/contextnet/train.py @@ -109,7 +109,8 @@ def on_stage_end(self, stage, stage_loss, epoch): valid_stats=stage_stats, ) self.checkpointer.save_and_keep_only( - meta={"WER": stage_stats["WER"]}, min_keys=["WER"], + meta={"WER": stage_stats["WER"]}, + min_keys=["WER"], ) elif stage == sb.Stage.TEST: self.hparams.train_logger.log_stats( @@ -142,11 +143,13 @@ def init_optimizers(self): def dataio_prepare(hparams): """This function prepares the datasets to be used in the brain class. - It also defines the data processing pipeline through user-defined functions.""" + It also defines the data processing pipeline through user-defined functions. + """ data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["train_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["train_csv"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -171,7 +174,8 @@ def dataio_prepare(hparams): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["valid_csv"], replacements={"data_root": data_folder}, + csv_path=hparams["valid_csv"], + replacements={"data_root": data_folder}, ) valid_data = valid_data.filtered_sorted(sort_key="duration") @@ -229,7 +233,8 @@ def text_pipeline(wrd): # 4. Set output: sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "wrd", "char_list", "tokens"], + datasets, + ["id", "sig", "wrd", "char_list", "tokens"], ) return train_data, valid_data, test_datasets, label_encoder @@ -284,7 +289,7 @@ def text_pipeline(wrd): checkpointer=hparams["checkpointer"], ) - # We dynamicaly add the tokenizer to our brain class. + # We dynamically add the tokenizer to our brain class. asr_brain.tokenizer = label_encoder ind2lab = label_encoder.ind2lab @@ -293,7 +298,8 @@ def text_pipeline(wrd): from speechbrain.decoders.ctc import CTCBeamSearcher test_searcher = CTCBeamSearcher( - **hparams["test_beam_search"], vocab_list=vocab_list, + **hparams["test_beam_search"], + vocab_list=vocab_list, ) # Training @@ -311,7 +317,7 @@ def text_pipeline(wrd): for k in test_datasets.keys(): # keys are test_clean, test_other etc asr_brain.hparams.test_wer_file = os.path.join( - hparams["output_wer_folder"], f"wer_{k}.txt" + hparams["output_wer_folder"], "wer_{k}.txt" ) asr_brain.evaluate( test_datasets[k], diff --git a/benchmarks/MP3S/SLURP/LSTM_linear/train.py b/benchmarks/MP3S/SLURP/LSTM_linear/train.py index 370d83b19..e640a5dc7 100644 --- a/benchmarks/MP3S/SLURP/LSTM_linear/train.py +++ b/benchmarks/MP3S/SLURP/LSTM_linear/train.py @@ -164,7 +164,8 @@ def dataio_prep(hparams): data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["csv_train"], replacements={"data_root": data_folder}, + csv_path=hparams["csv_train"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -189,12 +190,14 @@ def dataio_prep(hparams): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["csv_valid"], replacements={"data_root": data_folder}, + csv_path=hparams["csv_valid"], + replacements={"data_root": data_folder}, ) valid_data = valid_data.filtered_sorted(sort_key="duration") test_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["csv_test"], replacements={"data_root": data_folder}, + csv_path=hparams["csv_test"], + replacements={"data_root": data_folder}, ) test_data = test_data.filtered_sorted(sort_key="duration") @@ -209,7 +212,7 @@ def audio_pipeline(wav): sig = sb.dataio.dataio.read_audio(wav) return sig - # Initialization of the label encoder. The label encoder assignes to each + # Initialization of the label encoder. The label encoder assigns to each # of the observed label a unique index (e.g, 'spk01': 0, 'spk02': 1, ..) label_encoder = sb.dataio.encoder.CategoricalEncoder() @@ -228,7 +231,8 @@ def label_pipeline(semantics): # Define datasets. We also connect the dataset with the data processing # functions defined above. sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "scenario", "scenario_encoded"], + datasets, + ["id", "sig", "scenario", "scenario_encoded"], ) # Load or compute the label encoder (with multi-GPU DDP support) # Please, take a look into the lab_enc_file to see the label to index @@ -236,7 +240,9 @@ def label_pipeline(semantics): lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt") label_encoder.load_or_create( - path=lab_enc_file, from_didatasets=[datasets[0]], output_key="scenario", + path=lab_enc_file, + from_didatasets=[datasets[0]], + output_key="scenario", ) return {"train": datasets[0], "valid": datasets[1], "test": datasets[2]} diff --git a/benchmarks/MP3S/SLURP/linear/train.py b/benchmarks/MP3S/SLURP/linear/train.py index 8c8979bc9..b62c5c080 100644 --- a/benchmarks/MP3S/SLURP/linear/train.py +++ b/benchmarks/MP3S/SLURP/linear/train.py @@ -19,8 +19,7 @@ class IntentIdBrain(sb.Brain): def compute_forward(self, batch, stage): - """Computation pipeline based on a encoder + scenario classifier. - """ + """Computation pipeline based on a encoder + scenario classifier.""" batch = batch.to(self.device) wavs, lens = batch.sig feats = self.modules.weighted_ssl_model(wavs) @@ -32,8 +31,7 @@ def compute_forward(self, batch, stage): return outputs def compute_objectives(self, predictions, batch, stage): - """Computes the loss using speaker-id as label. - """ + """Computes the loss using speaker-id as label.""" scenario_id, _ = batch.scenario_encoded # to meet the input form of nll loss scenario_id = scenario_id.squeeze(1) @@ -161,7 +159,8 @@ def dataio_prep(hparams): data_folder = hparams["data_folder"] train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["csv_train"], replacements={"data_root": data_folder}, + csv_path=hparams["csv_train"], + replacements={"data_root": data_folder}, ) if hparams["sorting"] == "ascending": @@ -186,12 +185,14 @@ def dataio_prep(hparams): ) valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["csv_valid"], replacements={"data_root": data_folder}, + csv_path=hparams["csv_valid"], + replacements={"data_root": data_folder}, ) valid_data = valid_data.filtered_sorted(sort_key="duration") test_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=hparams["csv_test"], replacements={"data_root": data_folder}, + csv_path=hparams["csv_test"], + replacements={"data_root": data_folder}, ) test_data = test_data.filtered_sorted(sort_key="duration") @@ -206,7 +207,7 @@ def audio_pipeline(wav): sig = sb.dataio.dataio.read_audio(wav) return sig - # Initialization of the label encoder. The label encoder assignes to each + # Initialization of the label encoder. The label encoder assigns to each # of the observed label a unique index (e.g, 'spk01': 0, 'spk02': 1, ..) label_encoder = sb.dataio.encoder.CategoricalEncoder() @@ -225,7 +226,8 @@ def label_pipeline(semantics): # Define datasets. We also connect the dataset with the data processing # functions defined above. sb.dataio.dataset.set_output_keys( - datasets, ["id", "sig", "scenario", "scenario_encoded"], + datasets, + ["id", "sig", "scenario", "scenario_encoded"], ) # Load or compute the label encoder (with multi-GPU DDP support) # Please, take a look into the lab_enc_file to see the label to index @@ -233,7 +235,9 @@ def label_pipeline(semantics): lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt") label_encoder.load_or_create( - path=lab_enc_file, from_didatasets=[datasets[0]], output_key="scenario", + path=lab_enc_file, + from_didatasets=[datasets[0]], + output_key="scenario", ) return {"train": datasets[0], "valid": datasets[1], "test": datasets[2]} diff --git a/benchmarks/MP3S/VoxCeleb1/Xvectors/train.py b/benchmarks/MP3S/VoxCeleb1/Xvectors/train.py index fabfe94bb..e8dd7963b 100755 --- a/benchmarks/MP3S/VoxCeleb1/Xvectors/train.py +++ b/benchmarks/MP3S/VoxCeleb1/Xvectors/train.py @@ -27,12 +27,16 @@ def compute_embedding(wavs, wav_lens): Arguments --------- - wavs : Torch.Tensor + wavs : torch.Tensor Tensor containing the speech waveform (batch, time). Make sure the sample rate is fs=16000 Hz. - wav_lens: Torch.Tensor + wav_lens : torch.Tensor Tensor containing the relative length for each sentence in the length (e.g., [0.8 0.6 1.0]) + + Returns + ------- + embeddings : torch.Tensor """ with torch.no_grad(): wavs, wav_lens = ( @@ -71,8 +75,7 @@ def compute_embedding_loop(data_loader): def get_verification_scores(veri_test): - """ Computes positive and negative scores given the verification split. - """ + """Computes positive and negative scores given the verification split.""" scores = [] positive_scores = [] negative_scores = [] @@ -157,7 +160,8 @@ def dataio_prep_verif(params): # Train data (used for normalization) train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=params["train_data"], replacements={"data_root": data_folder}, + csv_path=params["train_data"], + replacements={"data_root": data_folder}, ) train_data = train_data.filtered_sorted( sort_key="duration", select_n=params["n_train_snts"] @@ -165,13 +169,15 @@ def dataio_prep_verif(params): # Enrol data enrol_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=params["enrol_data"], replacements={"data_root": data_folder}, + csv_path=params["enrol_data"], + replacements={"data_root": data_folder}, ) enrol_data = enrol_data.filtered_sorted(sort_key="duration") # Test data test_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=params["test_data"], replacements={"data_root": data_folder}, + csv_path=params["test_data"], + replacements={"data_root": data_folder}, ) test_data = test_data.filtered_sorted(sort_key="duration") @@ -210,12 +216,10 @@ def audio_pipeline(wav, start, stop): class SpeakerBrain(sb.core.Brain): - """Class for speaker embedding training" - """ + """Class for speaker embedding training" """ def compute_forward(self, batch, stage): - """Computation pipeline based on a encoder + speaker classifier. - """ + """Computation pipeline based on a encoder + speaker classifier.""" batch = batch.to(self.device) wavs, lens = batch.sig feats = self.modules.weighted_ssl_model(wavs) @@ -225,8 +229,7 @@ def compute_forward(self, batch, stage): return outputs, lens def compute_objectives(self, predictions, batch, stage): - """Computes the loss using speaker-id as label. - """ + """Computes the loss using speaker-id as label.""" predictions, lens = predictions uttid = batch.id spkid, _ = batch.spk_id_encoded @@ -349,7 +352,9 @@ def label_pipeline(spk_id): # Load or compute the label encoder (with multi-GPU DDP support) lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt") label_encoder.load_or_create( - path=lab_enc_file, from_didatasets=[train_data], output_key="spk_id", + path=lab_enc_file, + from_didatasets=[train_data], + output_key="spk_id", ) # 4. Set output: @@ -374,7 +379,7 @@ def label_pipeline(spk_id): with open(hparams_file) as fin: hparams = load_hyperpyyaml(fin, overrides) - # Download verification list (to exlude verification sentences from train) + # Download verification list (to exclude verification sentences from train) veri_file_path = os.path.join( hparams["save_folder"], os.path.basename(hparams["verification_file"]) ) @@ -391,9 +396,9 @@ def label_pipeline(spk_id): split_ratio=[90, 10], seg_dur=hparams["sentence_len"], skip_prep=hparams["skip_prep"], - source=hparams["voxceleb_source"] - if "voxceleb_source" in hparams - else None, + source=( + hparams["voxceleb_source"] if "voxceleb_source" in hparams else None + ), ) # Loading wav2vec2.0 diff --git a/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/train.py b/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/train.py index 5cb2d63a3..82855999c 100644 --- a/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/train.py +++ b/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/train.py @@ -27,12 +27,16 @@ def compute_embedding(wavs, wav_lens): Arguments --------- - wavs : Torch.Tensor + wavs : torch.Tensor Tensor containing the speech waveform (batch, time). Make sure the sample rate is fs=16000 Hz. - wav_lens: Torch.Tensor + wav_lens: torch.Tensor Tensor containing the relative length for each sentence in the length (e.g., [0.8 0.6 1.0]) + + Returns + ------- + embeddings : torch.Tensor """ with torch.no_grad(): wavs, wav_lens = ( @@ -70,8 +74,7 @@ def compute_embedding_loop(data_loader): def get_verification_scores(veri_test): - """ Computes positive and negative scores given the verification split. - """ + """Computes positive and negative scores given the verification split.""" scores = [] positive_scores = [] negative_scores = [] @@ -156,7 +159,8 @@ def dataio_prep_verif(params): # Train data (used for normalization) train_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=params["train_data"], replacements={"data_root": data_folder}, + csv_path=params["train_data"], + replacements={"data_root": data_folder}, ) train_data = train_data.filtered_sorted( sort_key="duration", select_n=params["n_train_snts"] @@ -164,13 +168,15 @@ def dataio_prep_verif(params): # Enrol data enrol_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=params["enrol_data"], replacements={"data_root": data_folder}, + csv_path=params["enrol_data"], + replacements={"data_root": data_folder}, ) enrol_data = enrol_data.filtered_sorted(sort_key="duration") # Test data test_data = sb.dataio.dataset.DynamicItemDataset.from_csv( - csv_path=params["test_data"], replacements={"data_root": data_folder}, + csv_path=params["test_data"], + replacements={"data_root": data_folder}, ) test_data = test_data.filtered_sorted(sort_key="duration") @@ -209,12 +215,10 @@ def audio_pipeline(wav, start, stop): class SpeakerBrain(sb.core.Brain): - """Class for speaker embedding training" - """ + """Class for speaker embedding training" """ def compute_forward(self, batch, stage): - """Computation pipeline based on a encoder + speaker classifier. - """ + """Computation pipeline based on a encoder + speaker classifier.""" batch = batch.to(self.device) wavs, lens = batch.sig feats = self.modules.weighted_ssl_model(wavs) @@ -224,8 +228,7 @@ def compute_forward(self, batch, stage): return outputs, lens def compute_objectives(self, predictions, batch, stage): - """Computes the loss using speaker-id as label. - """ + """Computes the loss using speaker-id as label.""" predictions, lens = predictions uttid = batch.id spkid, _ = batch.spk_id_encoded @@ -348,7 +351,9 @@ def label_pipeline(spk_id): # Load or compute the label encoder (with multi-GPU DDP support) lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt") label_encoder.load_or_create( - path=lab_enc_file, from_didatasets=[train_data], output_key="spk_id", + path=lab_enc_file, + from_didatasets=[train_data], + output_key="spk_id", ) # 4. Set output: @@ -373,7 +378,7 @@ def label_pipeline(spk_id): with open(hparams_file) as fin: hparams = load_hyperpyyaml(fin, overrides) - # Download verification list (to exlude verification sentences from train) + # Download verification list (to exclude verification sentences from train) veri_file_path = os.path.join( hparams["save_folder"], os.path.basename(hparams["verification_file"]) ) @@ -390,9 +395,9 @@ def label_pipeline(spk_id): split_ratio=[90, 10], seg_dur=hparams["sentence_len"], skip_prep=hparams["skip_prep"], - source=hparams["voxceleb_source"] - if "voxceleb_source" in hparams - else None, + source=( + hparams["voxceleb_source"] if "voxceleb_source" in hparams else None + ), ) # Loading wav2vec2.0 diff --git a/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/voxceleb_prepare.py b/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/voxceleb_prepare.py index 1fef734f1..ea510fda2 100644 --- a/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/voxceleb_prepare.py +++ b/benchmarks/MP3S/VoxCeleb1/ecapa_tdnn/voxceleb_prepare.py @@ -64,7 +64,7 @@ def prepare_voxceleb( List of splits to prepare from ['train', 'dev'] split_ratio : list List if int for train and validation splits - seg_dur : int + seg_dur : float Segment duration of a chunk in seconds (e.g., 3.0 seconds). amp_th : float removes segments whose average amplitude is below the @@ -78,6 +78,10 @@ def prepare_voxceleb( skip_prep: Bool If True, skip preparation. + Returns + ------- + None + Example ------- >>> from recipes.VoxCeleb.voxceleb1_prepare import prepare_voxceleb @@ -103,7 +107,7 @@ def prepare_voxceleb( if not os.path.exists(save_folder): os.makedirs(save_folder) - # Setting ouput files + # Setting output files save_opt = os.path.join(save_folder, OPT_FILE) save_csv_train = os.path.join(save_folder, TRAIN_CSV) save_csv_dev = os.path.join(save_folder, DEV_CSV) @@ -164,6 +168,15 @@ def skip(splits, save_folder, conf): Detects if the voxceleb data_preparation has been already done. If the preparation has been done, we can skip it. + Arguments + --------- + splits : list + List of data sections to check. + save_folder : str + Folder containing generated files to check. + conf : dict + Configuration options to check against saved options. + Returns ------- bool @@ -203,9 +216,12 @@ def _check_voxceleb_folders(data_folders, splits): If it does not, raise an error. - Returns - ------- - None + Arguments + --------- + data_folders : str + Folders containing data files to check. + splits : list + List of data sections to check. Raises ------ @@ -325,6 +341,8 @@ def prepare_csv(seg_dur, wav_lst, csv_file, random_segment=False, amp_th=0): Arguments --------- + seg_dur : float + Segment duration of a chunk in seconds (e.g., 3.0 seconds). wav_lst : list The list of wav files of a given data split. csv_file : str @@ -334,10 +352,6 @@ def prepare_csv(seg_dur, wav_lst, csv_file, random_segment=False, amp_th=0): amp_th: float Threshold on the average amplitude on the chunk. If under this threshold, the chunk is discarded. - - Returns - ------- - None """ msg = '\t"Creating csv lists in %s..."' % (csv_file) @@ -423,14 +437,12 @@ def prepare_csv_enrol_test(data_folders, save_folder, verification_pairs_file): Arguments --------- - data_folder : str + data_folders : str Path of the data folders save_folder : str The directory where to store the csv files. - - Returns - ------- - None + verification_pairs_file : str + The path to the file of verification pairs. """ # msg = '\t"Creating csv lists in %s..."' % (csv_file) diff --git a/lint-requirements.txt b/lint-requirements.txt index 41d37cf72..1cfb73555 100644 --- a/lint-requirements.txt +++ b/lint-requirements.txt @@ -1,6 +1,7 @@ -black==19.10b0 -click==8.0.4 -flake8==3.7.9 -pycodestyle==2.5.0 -pytest==5.4.1 -yamllint==1.23.0 +black==24.3.0 +click==8.1.7 +flake8==7.0.0 +pycodestyle==2.11.0 +pydoclint==0.4.1 +pytest==7.4.0 +yamllint==1.35.1 diff --git a/tests/consistency/test_HF_repo.py b/tests/consistency/test_HF_repo.py index 2e5a65678..dc6c98011 100644 --- a/tests/consistency/test_HF_repo.py +++ b/tests/consistency/test_HF_repo.py @@ -4,13 +4,14 @@ * Mirco Ravanelli 2022 * Andreas Nautsch 2022 """ + import os import csv from speechbrain.utils.data_utils import download_file def run_HF_check( - recipe_folder="tests/recipes", field="HF_repo", output_folder="HF_repos", + recipe_folder="tests/recipes", field="HF_repo", output_folder="HF_repos" ): """Checks if the code reported in the readme files of the HF repository is runnable. Note: the tests run the code marked as python in the readme file. @@ -25,7 +26,7 @@ def run_HF_check( Where to download the HF readme files. Returns - --------- + ------- check: True True if all the code runs, False otherwise. """ @@ -56,7 +57,7 @@ def repo_list(recipe_folder="tests/recipes", field="HF_repo"): Field of the csv recipe file containing the links to HF repos. Returns - --------- + ------- HF_repos: list List of the detected HF repos. """ @@ -87,7 +88,7 @@ def check_repo(HF_repo): URL of the HF repository to check. Returns - --------- + ------- check: bool True if all the code runs, False otherwise. """ diff --git a/tests/consistency/test_docstrings.py b/tests/consistency/test_docstrings.py index c829d949c..1eb2e1763 100644 --- a/tests/consistency/test_docstrings.py +++ b/tests/consistency/test_docstrings.py @@ -3,6 +3,7 @@ Authors * Mirco Ravanelli 2022 """ + from tests.utils.check_docstrings import check_docstrings diff --git a/tests/consistency/test_recipe.py b/tests/consistency/test_recipe.py index 303ff71ae..f9a9d8aea 100644 --- a/tests/consistency/test_recipe.py +++ b/tests/consistency/test_recipe.py @@ -3,6 +3,7 @@ Authors * Mirco Ravanelli 2022 """ + import os import csv from speechbrain.utils.data_utils import get_all_files, get_list_from_csv @@ -42,12 +43,6 @@ def test_recipe_list( Field of the csv file where the debug flags are stated (for data flow testing). avoid_list: list List of files for which this check must be avoided. - - Returns - --------- - bool: - True if the test passes, False otherwise. - """ all_diffs_zero = True all_with_flags = True @@ -93,16 +88,11 @@ def test_recipe_files( """This test checks if the files listed in the recipe csv file exist. Arguments - ---------. + --------- recipe_folder: path Path of the folder containing csv recipe files. fields: list Fields of the csv recipe file to check. - - Returns - --------- - check: bool - True if the test passes, False otherwise. """ check = True # Loop over all recipe CSVs diff --git a/tests/consistency/test_yaml.py b/tests/consistency/test_yaml.py index 2ee2cba7f..625d6fd48 100644 --- a/tests/consistency/test_yaml.py +++ b/tests/consistency/test_yaml.py @@ -3,6 +3,7 @@ Authors * Mirco Ravanelli 2022 """ + import os import csv from tests.consistency.test_recipe import __skip_list @@ -17,7 +18,7 @@ def test_yaml_script_consistency(recipe_folder="tests/recipes"): --------- recipe_folder : path Path of the folder with csv files containing the training scripts with their coupled - yaml files (with colums called 'Hparam_file', 'Script_file', 'Data_prep_file') + yaml files (with columns called 'Hparam_file', 'Script_file', 'Data_prep_file') """ # Use this list to itemize special yaml for which we do not have to test diff --git a/tests/utils/README.md b/tests/utils/README.md index 88eaadde5..34f781c38 100644 --- a/tests/utils/README.md +++ b/tests/utils/README.md @@ -39,7 +39,7 @@ Depending on the testing need, `test.yaml` grows - some examples fnx: transcribe_batch # as above dataset: LibriSpeech # which dataset to use -> will create a tests/tmp/LibriSpeech folder recipe_yaml: recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml # the training recipe for dataloader etc - overrides: # what of the recipe_yaml needs to be overriden + overrides: # what of the recipe_yaml needs to be overridden output_folder: !ref tests/tmp/ # the output folder is at the tmp dataset (data prep & eval tasks only) dataio: | # which dataio_prepare to import; copy/paste from train_with_wav2vec.py — pay attention to the last line (their dataio_prepare needs to know how to prepare the recipe dataset) from recipes.LibriSpeech.librispeech_prepare import prepare_librispeech diff --git a/tests/utils/check_HF_repo.py b/tests/utils/check_HF_repo.py index 478171905..f8881dfac 100644 --- a/tests/utils/check_HF_repo.py +++ b/tests/utils/check_HF_repo.py @@ -4,6 +4,7 @@ * Mirco Ravanelli 2022 * Andreas Nautsch 2022, 2023 """ + import os import csv from speechbrain.utils.data_utils import download_file @@ -11,7 +12,9 @@ def run_HF_check( - recipe_folder="tests/recipes", field="HF_repo", output_folder="tests/tmp", + recipe_folder="tests/recipes", + field="HF_repo", + output_folder="tests/tmp", ): """Checks if the code reported in the readme files of the HF repository is runnable. Note: the tests run the code marked as python in the readme file. @@ -26,7 +29,7 @@ def run_HF_check( Where to download the HF readme files. Returns - --------- + ------- check: True True if all the code runs, False otherwise. """ @@ -57,7 +60,7 @@ def repo_list(recipe_folder="tests/recipes", field="HF_repo"): Field of the csv recipe file containing the links to HF repos. Returns - --------- + ------- HF_repos: list List of the detected HF repos. """ @@ -91,7 +94,7 @@ def check_repo(HF_repo): URL of the HF repository to check. Returns - --------- + ------- check: bool True if all the code runs, False otherwise. """ diff --git a/tests/utils/check_docstrings.py b/tests/utils/check_docstrings.py index 8dd6dd80d..cde093055 100644 --- a/tests/utils/check_docstrings.py +++ b/tests/utils/check_docstrings.py @@ -15,7 +15,7 @@ def extractName(s, search_class=False): --------- s: string Input string where to search for function or class names. - search_clas: bool + search_class: bool If True, searches for class names. Returns @@ -65,7 +65,7 @@ def check_docstrings( continue print("Checking %s..." % (libpath)) - # Support variable initalization + # Support variable initialization fun_name = libpath class_name = libpath check_line = True diff --git a/tests/utils/check_url.py b/tests/utils/check_url.py index feabd9ea0..a5fa1330b 100644 --- a/tests/utils/check_url.py +++ b/tests/utils/check_url.py @@ -4,6 +4,7 @@ Authors * Mirco Ravanelli 2022 """ + import os import re import time @@ -58,7 +59,7 @@ def get_all_urls(file_lst, avoid_urls): ------- urls: dict A dictionary where the keys are the detected URLs and the values - are the files where the URLs are found. + are the files where the URLs are found. """ all_urls = {} @@ -90,7 +91,7 @@ def get_all_urls(file_lst, avoid_urls): def check_url(url): - """Cheks if an URL is broken + """Checks if an URL is broken Arguments --------- @@ -131,6 +132,13 @@ def check_links( Used to avoid some file extensions. avoid_files: list Used to avoid testing some specific file. + avoid_urls: list + Used to avoid testing some urls. + + Returns + ------- + bool + True if the check is passed. """ check_test = True diff --git a/tests/utils/check_yaml.py b/tests/utils/check_yaml.py index e1b0aea94..3779e3bba 100644 --- a/tests/utils/check_yaml.py +++ b/tests/utils/check_yaml.py @@ -120,8 +120,8 @@ def detect_script_vars(script_file, var_lst): detected_var.append(var) continue - # Chek var types - # Chek var types + # Check var types + # Check var types for var_type in var_types: if var_type + var in line: if var not in detected_var: @@ -308,7 +308,7 @@ def check_module_vars( if avoid in module_var_script: module_var_script.remove(avoid) - # Check Module variavles + # Check Module variables unused_vars = list(set(module_var_script) - set(module_vars_hparams)) for unused_var in unused_vars: diff --git a/tests/utils/recipe_tests.py b/tests/utils/recipe_tests.py index 747ac6f1d..161346fd5 100644 --- a/tests/utils/recipe_tests.py +++ b/tests/utils/recipe_tests.py @@ -4,6 +4,7 @@ * Mirco Ravanelli 2022 * Andreas Nautsch 2022, 2023 """ + import os import re import csv @@ -34,18 +35,18 @@ def check_row_for_test(row, filters_fields, filters, test_field): Key of the input dictionary that contains the test flags. Returns - --------- + ------- test: bool True if the line must be tested, False otherwise. """ test = True for i, field in enumerate(filters_fields): field_values = filters[i] - if type(field_values) == str: + if isinstance(field_values, str): # ... AND ... filter if not (field_values == row[field]): test = False - elif type(field_values) == list: # type(field) == list + elif isinstance(field_values, list): # type(field) == list # ... AND (... OR ...) ... filter; at least one entry of the list matches test_flag = False for filt in field_values: @@ -101,7 +102,7 @@ def prepare_test( See above. Returns - --------- + ------- test_script: dict A Dictionary containing recipe IDs as keys and test_scripts as values. test_hparam: dict @@ -182,7 +183,7 @@ def check_files( The pattern used to extract the list of files to check from check_str. Returns - --------- + ------- check: bool True if all the files are found, False otherwise. """ @@ -225,7 +226,7 @@ def check_performance( The pattern used to extract the list of files to check from check_str. Returns - --------- + ------- check: bool True if all the files are found, False otherwise. """ @@ -255,7 +256,7 @@ def check_performance( with open(filename) as file: lines = file.readlines() - # Fitler the lines + # Filter the lines lines_filt = [] last_line = "" for line in lines: @@ -316,15 +317,17 @@ def extract_value(string, key): The key argument to extract. Returns - --------- + ------- value: float or str The value corresponding to the specified key. """ escaped_key = re.escape(key) # Create the regular expression pattern to match the argument and its corresponding value - pattern = r"(?P{})\s*:\s*(?P[-+]?\d*\.\d+([eE][-+]?\d+)?)".format( - escaped_key + pattern = ( + r"(?P{})\s*:\s*(?P[-+]?\d*\.\d+([eE][-+]?\d+)?)".format( + escaped_key + ) ) # Search for the pattern in the input string @@ -348,7 +351,7 @@ def check_threshold(threshold, value): Float corresponding to the value to test Returns - --------- + ------- bool True if the constraint is satisfied, False otherwise. """ @@ -391,7 +394,7 @@ def run_test_cmd(cmd, stdout_file, stderr_file): File where standard error is stored. Returns - --------- + ------- rc: bool The return code obtained after running the command. If 0, the test is run without errors. If >0 the execution failed. @@ -462,7 +465,7 @@ def run_recipe_tests( """ # Create the output folder (where the tests results will be saved) os.makedirs(output_folder, exist_ok=True) - print("Test ouputs will be put in %s" % (output_folder)) + print("Test outputs will be put in %s" % (output_folder)) # Read the csv recipe file and detect which tests we have to run ( @@ -623,8 +626,6 @@ def download_only_test( A dictionary containing recipe IDs as keys and the checks as values. run_opts: str Running options to append to each test. - run_tests_with_checks_only: str - Running options to append to each test. run_tests_with_checks_only: bool If True skips all tests that do not have performance check criteria defined. output_folder: path @@ -709,15 +710,15 @@ def load_yaml_test( See above. avoid_list: list List of hparam file not to check. - rir_folder: + rir_folder: str This overrides the rir_folder; rir_path, and openrir_folder usually specified in the hparam files. - data_folder: + data_folder: str This overrides the data_folder usually specified in the hparam files. - output_folder: + output_folder: str This overrides the output_folder usually specified in the hparam files. Returns - --------- + ------- check: True True if all the hparam files are loaded correctly, False otherwise. """ diff --git a/tests/utils/refactoring_checks.py b/tests/utils/refactoring_checks.py index c9f125cee..bcad68dcc 100644 --- a/tests/utils/refactoring_checks.py +++ b/tests/utils/refactoring_checks.py @@ -116,7 +116,8 @@ def get_model(repo, values, updates_dir=None, run_opts=None): if "foreign" in values.keys(): os.unlink(custom) os.symlink( - f'{updates_dir}/{repo}/{values["foreign"]}', custom, + f'{updates_dir}/{repo}/{values["foreign"]}', + custom, ) else: # re:testing on develop? => simply unlink anything before and re:link from cached HF hub @@ -228,7 +229,10 @@ def gather_expected_results( updates_dir = init( new_interfaces_git, new_interfaces_branch, new_interfaces_local_dir ) - repos = map(os.path.basename, glob(f"{updates_dir}/{glob_filter}"),) + repos = map( + os.path.basename, + glob(f"{updates_dir}/{glob_filter}"), + ) for repo in repos: # skip if results are there if repo not in results.keys(): @@ -277,7 +281,10 @@ def gather_refactoring_results( updates_dir = init( new_interfaces_git, new_interfaces_branch, new_interfaces_local_dir ) - repos = map(os.path.basename, glob(f"{updates_dir}/{glob_filter}"),) + repos = map( + os.path.basename, + glob(f"{updates_dir}/{glob_filter}"), + ) for repo in repos: # skip if results are there if "after" not in results[repo].keys(): @@ -402,7 +409,8 @@ def test_performance( for metric, specs in reporting.items(): stats[k][metric] = specs["tracker"].summarize(specs["field"]) logger.log_stats( - stats_meta=stats_meta | {"set": k}, test_stats=stats[k], + stats_meta=stats_meta | {"set": k}, + test_stats=stats[k], ) return stats