S35 - MSO Extraction Scripts
Table of contents
The extraction scripts are available in the hvo_sequence
directory of the GrooveTransformer
repository.
# -------------------------------------------------------------
# MSO::Multiband Synthesized Onsets
# -------------------------------------------------------------
def get_logf_stft(self, **kwargs):
"""calculates the log-frequency STFT of the synthesized version of the .hvo score"""
sf_path = kwargs.get('sf_path', "soundfonts/Standard_Drum_Kit.sf2")
sr = kwargs.get('sr', 44100)
n_fft = kwargs.get('n_fft', 1024)
win_length = kwargs.get('win_length', 1024)
hop_length = kwargs.get('hop_length', 512)
n_bins_per_octave = kwargs.get('n_bins_per_octave', 16)
n_octaves = kwargs.get('n_octaves', 9)
f_min = kwargs.get('f_min', 40)
# mean_filter_size = kwargs.get('mean_filter_size', 22)
# audio
y = self.synthesize(sr=sr, sf_path=sf_path)
y /= np.max(np.abs(y))
mX, f_bins = logf_stft(y, n_fft, win_length, hop_length, n_bins_per_octave, n_octaves, f_min, sr)
return mX, f_bins
def get_onset_strength_spec(self, **kwargs):
"""calculates the onset strength spectrogram of the synthesized version of the .hvo score"""
sf_path = kwargs.get('sf_path', "soundfonts/Standard_Drum_Kit.sf2")
sr = kwargs.get('sr', 44100)
n_fft = kwargs.get('n_fft', 1024)
win_length = kwargs.get('win_length', 1024)
hop_length = kwargs.get('hop_length', 512)
n_bins_per_octave = kwargs.get('n_bins_per_octave', 16)
n_octaves = kwargs.get('n_octaves', 9)
f_min = kwargs.get('f_min', 40)
mean_filter_size = kwargs.get('mean_filter_size', 22)
# audio
y = self.synthesize(sr=sr, sf_path=sf_path)
y /= np.max(np.abs(y))
# onset strength spectrogram
spec, f_cq = onset_strength_spec(y, n_fft, win_length, hop_length, n_bins_per_octave, n_octaves, f_min, sr,
mean_filter_size)
return spec, f_cq
def mso(self, **kwargs):
"""calculates the Multi-band synthesized onsets."""
sf_path = kwargs.get('sf_path', "soundfonts/Standard_Drum_Kit.sf2")
sr = kwargs.get('sr', 44100)
n_fft = kwargs.get('n_fft', 1024)
win_length = kwargs.get('win_length', 1024)
hop_length = kwargs.get('hop_length', 512)
n_bins_per_octave = kwargs.get('n_bins_per_octave', 16)
n_octaves = kwargs.get('n_octaves', 9)
f_min = kwargs.get('f_min', 40)
mean_filter_size = kwargs.get('mean_filter_size', 22)
c_freq = kwargs.get('c_freq', [55, 90, 138, 175, 350, 6000, 8500, 12500])
# onset strength spectrogram
spec, f_cq = self.get_onset_strength_spec(sf_path=sf_path, n_fft=n_fft, win_length=win_length,
hop_length=hop_length, n_bins_per_octave=n_bins_per_octave,
n_octaves=n_octaves, f_min=f_min, sr=sr,
mean_filter_size=mean_filter_size)
# multi-band onset detection and strength
mb_onset_strength = reduce_f_bands_in_spec(c_freq, f_cq, spec)
mb_onset_detect = detect_onset(mb_onset_strength)
# map to grid
grid = np.array(self.__grid_maker.get_grid_lines(self.number_of_steps))
strength_grid, onsets_grid = map_onsets_to_grid(grid, mb_onset_strength, mb_onset_detect, n_fft=n_fft,
hop_length=hop_length, sr=sr)
# concatenate in one single array
mso = np.concatenate((strength_grid, onsets_grid), axis=1)
return mso