Skip to content

Commit

Permalink
Fix some bugs (#12153)
Browse files Browse the repository at this point in the history
* update outdated image to np code; update random sampling

Signed-off-by: Jason <[email protected]>

* code cleanup

Signed-off-by: Jason <[email protected]>

---------

Signed-off-by: Jason <[email protected]>
  • Loading branch information
blisc authored Feb 15, 2025
1 parent c54a628 commit b0fd4ce
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 10 deletions.
8 changes: 4 additions & 4 deletions nemo/collections/tts/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ def __init__(
self.phoneme_probability = getattr(self.text_tokenizer, "phoneme_probability", None)
else:
if text_tokenizer_pad_id is None:
raise ValueError(f"text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer")
raise ValueError("text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer")

if tokens is None:
raise ValueError(f"tokens must be specified if text_tokenizer is not BaseTokenizer")
raise ValueError("tokens must be specified if text_tokenizer is not BaseTokenizer")

self.text_tokenizer_pad_id = text_tokenizer_pad_id
self.cache_text = True if self.phoneme_probability is None else False
Expand Down Expand Up @@ -496,7 +496,7 @@ def add_reference_audio(self, **kwargs):
speaker_to_index_map[d["speaker_id"]].add(i)
# Random sample a reference audio from the same speaker
self.get_reference_for_sample = lambda sample: self.data[
random.sample(speaker_to_index_map[sample["speaker_id"]], 1)[0]
random.choice(speaker_to_index_map[tuple(sample["speaker_id"])])
]
elif reference_audio_type == "ground-truth":
# Use ground truth audio as reference audio
Expand Down Expand Up @@ -679,7 +679,7 @@ def __getitem__(self, index):
sample_pitch_mean = pitch_stats["pitch_mean"]
sample_pitch_std = pitch_stats["pitch_std"]
else:
raise ValueError(f"Missing statistics for pitch normalization.")
raise ValueError("Missing statistics for pitch normalization.")

pitch -= sample_pitch_mean
pitch[pitch == -sample_pitch_mean] = 0.0 # Zero out values that were previously zero
Expand Down
9 changes: 3 additions & 6 deletions nemo/collections/tts/parts/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,10 +632,8 @@ def plot_gate_outputs_to_numpy(gate_targets, gate_outputs):


def save_figure_to_numpy(fig):
# save it to a numpy array.
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
return data
img_array = np.array(fig.canvas.renderer.buffer_rgba())
return img_array


@rank_zero_only
Expand Down Expand Up @@ -802,8 +800,7 @@ def clip_grad_value_(parameters, clip_value, norm_type=2):


def convert_pad_shape(pad_shape):
l = pad_shape[::-1]
pad_shape = [item for sublist in l for item in sublist]
pad_shape = [item for sublist in pad_shape[::-1] for item in sublist]
return pad_shape


Expand Down

0 comments on commit b0fd4ce

Please sign in to comment.