Source code for compiam.melody.pitch_extraction.melodia
import os
import numpy as np
from compiam.utils.pitch import normalisation, resampling
from compiam.io import write_csv
from compiam.utils import get_logger, stereo_to_mono
logger = get_logger(__name__)
[docs]
class Melodia:
"""Melodia predominant melody extraction"""
def __init__(
self,
bin_resolution=10,
filter_iterations=3,
frame_size=2048,
guess_unvoiced=False,
harmonic_weight=0.8,
hop_size=128,
magnitude_compression=1,
magnitude_threshold=40,
max_frequency=20000,
min_duration=100,
min_frequency=80,
num_harmonics=20,
peak_distribution_threshold=0.9,
peak_frame_threshold=0.9,
pitch_continuity=27.5625,
reference_frequency=55,
sample_rate=44100,
time_continuity=100,
voice_vibrato=False,
voicing_tolerance=0.2,
):
"""Melodia predominant melody extraction init method
For a complete and detailed list of the parameters see the documentation on the
following link: https://essentia.upf.edu/reference/std_PredominantPitchMelodia.html
"""
### IMPORTING OPTIONAL DEPENDENCIES
try:
global estd
import essentia.standard as estd
except:
raise ImportError(
"In order to use this tool you need to have essentia installed. "
"Install compIAM with essentia support: pip install 'compiam[essentia]'"
)
###
self.bin_resolution = bin_resolution
self.filter_iterations = filter_iterations
self.frame_size = frame_size
self.guess_unvoiced = guess_unvoiced
self.harmonic_weight = harmonic_weight
self.hop_size = hop_size
self.magnitude_compression = magnitude_compression
self.magnitude_threshold = magnitude_threshold
self.max_frequency = max_frequency
self.min_duration = min_duration
self.min_frequency = min_frequency
self.num_harmonics = num_harmonics
self.peak_distribution_threshold = peak_distribution_threshold
self.peak_frame_threshold = peak_frame_threshold
self.pitch_continuity = pitch_continuity
self.reference_frequency = reference_frequency
self.sample_rate = sample_rate
self.time_continuity = time_continuity
self.voice_vibrato = voice_vibrato
self.voicing_tolerance = voicing_tolerance
[docs]
def extract(self, input_data, input_sr=44100, out_step=None):
"""Extract the melody from a given file.
:param input_data: path to audio file or numpy array like audio signal
:param input_sr: sampling rate of the input array of data (if any). This variable is only
relevant if the input is an array of data instead of a filepath.
:param out_step: particular time-step duration if needed at output
:returns: a 2-D list with time-stamps and pitch values per timestamp.
"""
if isinstance(input_data, str):
if not os.path.exists(input_data):
raise FileNotFoundError("Target audio not found.")
audio = estd.EqloudLoader(
filename=input_data, sampleRate=self.sample_rate
)()
elif isinstance(input_data, np.ndarray):
input_data = stereo_to_mono(input_data)
# Apply Eqloudness filter
logger.warning(
f"Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)"
)
resample_audio = estd.Resample(
inputSampleRate=input_sr, outputSampleRate=self.sample_rate
)
input_data = resample_audio(input_data)
audio = estd.EqualLoudness(sampleRate=self.sample_rate)(input_data)
else:
raise ValueError("Input must be path to audio signal or an audio array")
extractor = estd.PredominantPitchMelodia(
binResolution=self.bin_resolution,
filterIterations=self.filter_iterations,
frameSize=self.frame_size,
guessUnvoiced=self.guess_unvoiced,
harmonicWeight=self.harmonic_weight,
hopSize=self.hop_size,
magnitudeCompression=self.magnitude_compression,
magnitudeThreshold=self.magnitude_threshold,
maxFrequency=self.max_frequency,
minDuration=self.min_duration,
minFrequency=self.min_frequency,
numberHarmonics=self.num_harmonics,
peakDistributionThreshold=self.peak_distribution_threshold,
peakFrameThreshold=self.peak_frame_threshold,
pitchContinuity=self.pitch_continuity,
referenceFrequency=self.reference_frequency,
sampleRate=self.sample_rate,
timeContinuity=self.time_continuity,
voiceVibrato=self.voice_vibrato,
voicingTolerance=self.voicing_tolerance,
)
pitch, _ = extractor(audio)
TStamps = (
np.array(range(0, len(pitch))) * float(self.hop_size) / self.sample_rate
)
output = np.array([TStamps, pitch]).transpose()
if out_step is not None:
new_len = int((len(audio) / self.sample_rate) // out_step)
return resampling(output, new_len)
return output
[docs]
@staticmethod
def normalise_pitch(pitch, tonic, bins_per_octave=120, max_value=4):
"""Normalize pitch given a tonic.
:param pitch: a 2-D list with time-stamps and pitch values per timestamp.
:param tonic: recording tonic to normalize the pitch to.
:param bins_per_octave: number of frequency bins per octave.
:param max_value: maximum value to clip the normalized pitch to.
:returns: a 2-D list with time-stamps and normalized to a given tonic
pitch values per timestamp.
"""
return normalisation(
pitch, tonic, bins_per_octave=bins_per_octave, max_value=max_value
)
[docs]
@staticmethod
def save_pitch(data, output_path):
"""Calling the write_csv function in compiam.io to write the output pitch curve in a fle
:param data: the data to write
:param output_path: the path where the data is going to be stored
:returns: None
"""
return write_csv(data, output_path)