S35 - MSO Extraction Scripts


Table of contents


The extraction scripts are available in the hvo_sequence directory of the GrooveTransformer repository.

#   -------------------------------------------------------------
#   MSO::Multiband Synthesized Onsets
#   -------------------------------------------------------------
def get_logf_stft(self, **kwargs):
    """calculates the log-frequency STFT of the synthesized version of the .hvo score"""
    sf_path = kwargs.get('sf_path', "soundfonts/Standard_Drum_Kit.sf2")
    sr = kwargs.get('sr', 44100)
    n_fft = kwargs.get('n_fft', 1024)
    win_length = kwargs.get('win_length', 1024)
    hop_length = kwargs.get('hop_length', 512)
    n_bins_per_octave = kwargs.get('n_bins_per_octave', 16)
    n_octaves = kwargs.get('n_octaves', 9)
    f_min = kwargs.get('f_min', 40)
    # mean_filter_size = kwargs.get('mean_filter_size', 22)

    # audio
    y = self.synthesize(sr=sr, sf_path=sf_path)
    y /= np.max(np.abs(y))

    mX, f_bins = logf_stft(y, n_fft, win_length, hop_length, n_bins_per_octave, n_octaves, f_min, sr)

    return mX, f_bins

def get_onset_strength_spec(self, **kwargs):
    """calculates the onset strength spectrogram of the synthesized version of the .hvo score"""
    sf_path = kwargs.get('sf_path', "soundfonts/Standard_Drum_Kit.sf2")
    sr = kwargs.get('sr', 44100)
    n_fft = kwargs.get('n_fft', 1024)
    win_length = kwargs.get('win_length', 1024)
    hop_length = kwargs.get('hop_length', 512)
    n_bins_per_octave = kwargs.get('n_bins_per_octave', 16)
    n_octaves = kwargs.get('n_octaves', 9)
    f_min = kwargs.get('f_min', 40)
    mean_filter_size = kwargs.get('mean_filter_size', 22)

    # audio
    y = self.synthesize(sr=sr, sf_path=sf_path)
    y /= np.max(np.abs(y))

    # onset strength spectrogram
    spec, f_cq = onset_strength_spec(y, n_fft, win_length, hop_length, n_bins_per_octave, n_octaves, f_min, sr,
                                     mean_filter_size)

    return spec, f_cq

def mso(self, **kwargs):
    """calculates the Multi-band synthesized onsets."""
    sf_path = kwargs.get('sf_path', "soundfonts/Standard_Drum_Kit.sf2")
    sr = kwargs.get('sr', 44100)
    n_fft = kwargs.get('n_fft', 1024)
    win_length = kwargs.get('win_length', 1024)
    hop_length = kwargs.get('hop_length', 512)
    n_bins_per_octave = kwargs.get('n_bins_per_octave', 16)
    n_octaves = kwargs.get('n_octaves', 9)
    f_min = kwargs.get('f_min', 40)
    mean_filter_size = kwargs.get('mean_filter_size', 22)
    c_freq = kwargs.get('c_freq', [55, 90, 138, 175, 350, 6000, 8500, 12500])

    # onset strength spectrogram
    spec, f_cq = self.get_onset_strength_spec(sf_path=sf_path, n_fft=n_fft, win_length=win_length,
                                              hop_length=hop_length, n_bins_per_octave=n_bins_per_octave,
                                              n_octaves=n_octaves, f_min=f_min, sr=sr,
                                              mean_filter_size=mean_filter_size)

    # multi-band onset detection and strength
    mb_onset_strength = reduce_f_bands_in_spec(c_freq, f_cq, spec)
    mb_onset_detect = detect_onset(mb_onset_strength)

    # map to grid
    grid = np.array(self.__grid_maker.get_grid_lines(self.number_of_steps))
    strength_grid, onsets_grid = map_onsets_to_grid(grid, mb_onset_strength, mb_onset_detect, n_fft=n_fft,
                                                    hop_length=hop_length, sr=sr)

    # concatenate in one single array
    mso = np.concatenate((strength_grid, onsets_grid), axis=1)

    return mso