Skip to content

Commit

Permalink
Merge branch 'r2.0.0rc0' of github.com:NVIDIA/NeMo into r2.0.0rc0
Browse files Browse the repository at this point in the history
  • Loading branch information
ericharper committed May 23, 2024
2 parents 38fcd5f + d8afaba commit 8b65e3e
Show file tree
Hide file tree
Showing 20 changed files with 644 additions and 339 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export:
decoder_type: llama # gptnext, gpt2, llama
inference_tensor_parallel: 1 # Default using 1 TP for inference
inference_pipeline_parallel: 1 # Default using 1 PP for inference
dtype: 16 # Default precision data type
dtype: bf16 # Default precision data type

model_file: llama2-7b-fp16.nemo # Nemo file path
model_save: llama2-7b-fp8.qnemo # Path where the quantized model will be saved
Expand Down
23 changes: 18 additions & 5 deletions nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ def _states_to_device(dec_state, device='cpu'):
return dec_state


_DECODER_LENGTHS_NONE_WARNING = "Passing in decoder_lengths=None for CTC decoding is likely to be an error, since it is unlikely that each element of your batch has exactly the same length. decoder_lengths will default to decoder_output.shape[0]."


class GreedyCTCInfer(Typing, ConfidenceMethodMixin):
"""A greedy CTC decoder.
Expand Down Expand Up @@ -148,7 +151,7 @@ def __init__(
def forward(
self,
decoder_output: torch.Tensor,
decoder_lengths: torch.Tensor,
decoder_lengths: Optional[torch.Tensor],
):
"""Returns a list of hypotheses given an input batch of the encoder hidden embedding.
Output token is generated auto-repressively.
Expand All @@ -167,6 +170,9 @@ def forward(
mode=logging_mode.ONCE,
)

if decoder_lengths is None:
logging.warning(_DECODER_LENGTHS_NONE_WARNING, mode=logging_mode.ONCE)

with torch.inference_mode():
hypotheses = []
# Process each sequence independently
Expand Down Expand Up @@ -213,7 +219,7 @@ def forward(
return (packed_result,)

@torch.no_grad()
def _greedy_decode_logprobs(self, x: torch.Tensor, out_len: torch.Tensor):
def _greedy_decode_logprobs(self, x: torch.Tensor, out_len: Optional[torch.Tensor]):
# x: [T, D]
# out_len: [seq_len]

Expand Down Expand Up @@ -243,7 +249,7 @@ def _greedy_decode_logprobs(self, x: torch.Tensor, out_len: torch.Tensor):
return hypothesis

@torch.no_grad()
def _greedy_decode_labels(self, x: torch.Tensor, out_len: torch.Tensor):
def _greedy_decode_labels(self, x: torch.Tensor, out_len: Optional[torch.Tensor]):
# x: [T]
# out_len: [seq_len]

Expand Down Expand Up @@ -370,7 +376,7 @@ def __init__(
def forward(
self,
decoder_output: torch.Tensor,
decoder_lengths: torch.Tensor,
decoder_lengths: Optional[torch.Tensor],
):
"""Returns a list of hypotheses given an input batch of the encoder hidden embedding.
Output token is generated auto-repressively.
Expand All @@ -383,11 +389,18 @@ def forward(
Returns:
packed list containing batch number of sentences (Hypotheses).
"""

input_decoder_lengths = decoder_lengths

if decoder_lengths is None:
logging.warning(_DECODER_LENGTHS_NONE_WARNING, mode=logging_mode.ONCE)
decoder_lengths = torch.tensor([decoder_output.shape[1]], dtype=torch.long).expand(decoder_output.shape[0])

if decoder_output.ndim == 2:
hypotheses = self._greedy_decode_labels_batched(decoder_output, decoder_lengths)
else:
hypotheses = self._greedy_decode_logprobs_batched(decoder_output, decoder_lengths)
packed_result = pack_hypotheses(hypotheses, decoder_lengths)
packed_result = pack_hypotheses(hypotheses, input_decoder_lengths)
return (packed_result,)

@torch.no_grad()
Expand Down
12 changes: 7 additions & 5 deletions nemo/collections/nlp/modules/common/megatron/clip_grads.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,28 +142,30 @@ def clip_grad_norm_fp32(parameters, max_norm, norm_type=2, use_fsdp=False):
grad_norm = torch.zeros(1, device='cuda', dtype=torch.float32).squeeze()
# Since we will be summing across data parallel groups,
# we need the pow(norm-type).
total_norm = grad_norm ** norm_type
total_norm = grad_norm**norm_type
if use_fsdp:
if len(sharded_grads_for_norm) > 0:
sharded_grad_norm, _ = multi_tensor_applier(
amp_C.multi_tensor_l2norm, dummy_overflow_buf.fill_(0), [sharded_grads_for_norm], False
)
else:
sharded_grad_norm = torch.zeros(1, device='cuda', dtype=torch.float32).squeeze()
total_sharded_norm = sharded_grad_norm ** norm_type
total_sharded_norm = sharded_grad_norm**norm_type
else:
for grad in grads_for_norm:
grad_norm = torch.norm(grad, norm_type)
total_norm += grad_norm ** norm_type
total_norm += grad_norm**norm_type
if use_fsdp:
for grad in sharded_grads_for_norm:
grad_norm = torch.norm(grad, norm_type)
total_sharded_norm += grad_norm ** norm_type
total_sharded_norm += grad_norm**norm_type

if use_fsdp:
# Sum norm of grad shards across data-parallel GPUs.
torch.distributed.all_reduce(
total_sharded_norm, op=torch.distributed.ReduceOp.SUM, group=parallel_state.get_data_parallel_group(),
total_sharded_norm,
op=torch.distributed.ReduceOp.SUM,
group=parallel_state.get_data_parallel_group(with_context_parallel=True),
)
total_norm += total_sharded_norm.squeeze()

Expand Down
52 changes: 33 additions & 19 deletions nemo/collections/tts/g2p/models/t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,23 @@ class T5G2PModel(G2PModel, Exportable):

@property
def input_types(self) -> Optional[Dict[str, NeuralType]]:
return {
"input_ids": NeuralType(('B', 'T'), TokenIndex()),
"attention_mask": NeuralType(('B', 'T'), MaskType(), optional=True),
"labels": NeuralType(('B', 'T'), LabelsType()),
}
if self._input_types is None:
return {
"input_ids": NeuralType(('B', 'T'), TokenIndex()),
"attention_mask": NeuralType(('B', 'T'), MaskType(), optional=True),
"labels": NeuralType(('B', 'T'), LabelsType()),
}
return self._input_types

@property
def output_types(self) -> Optional[Dict[str, NeuralType]]:
return {"loss": NeuralType((), LossType())}
if self._output_types is None:
return {"loss": NeuralType((), LossType())}
return self._output_types

def __init__(self, cfg: DictConfig, trainer: Trainer = None):
self._input_types = None
self._output_types = None
self.world_size = 1
if trainer is not None:
self.world_size = trainer.num_nodes * trainer.num_devices
Expand Down Expand Up @@ -91,7 +97,11 @@ def forward(self, input_ids, attention_mask, labels):
# ===== Training Functions ===== #
def training_step(self, batch, batch_idx):
input_ids, attention_mask, labels = batch
train_loss = self.forward(input_ids=input_ids, attention_mask=attention_mask, labels=labels,)
train_loss = self.forward(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels,
)

self.log('train_loss', train_loss)
return train_loss
Expand Down Expand Up @@ -126,7 +136,10 @@ def _setup_infer_dataloader(self, cfg) -> 'torch.utils.data.DataLoader':

# Functions for inference
@torch.no_grad()
def _infer(self, config: DictConfig,) -> List[int]:
def _infer(
self,
config: DictConfig,
) -> List[int]:
"""
Runs model inference.
Expand Down Expand Up @@ -161,7 +174,11 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0, split="val"):
input_ids, attention_mask, labels = batch

# Get loss from forward step
val_loss = self.forward(input_ids=input_ids, attention_mask=attention_mask, labels=labels,)
val_loss = self.forward(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels,
)

# Get preds from generate function and calculate PER
labels_str = self._tokenizer.batch_decode(
Expand Down Expand Up @@ -287,15 +304,8 @@ def _prepare_for_export(self, **kwargs):
}

def _export_teardown(self):
self._input_types = self._output_types = None

@property
def input_types(self):
return self._input_types

@property
def output_types(self):
return self._output_types
self._input_types = None
self._output_types = None

def input_example(self, max_batch=1, max_dim=44):
"""
Expand All @@ -307,7 +317,11 @@ def input_example(self, max_batch=1, max_dim=44):
sentence = "Kupil sem si bicikel in mu zamenjal stol."
input_ids = [sentence]
input_encoding = self._tokenizer(
input_ids, padding='longest', max_length=self.max_source_len, truncation=True, return_tensors='pt',
input_ids,
padding='longest',
max_length=self.max_source_len,
truncation=True,
return_tensors='pt',
)
return (input_encoding.input_ids,)

Expand Down
Loading

0 comments on commit 8b65e3e

Please sign in to comment.