Source code for compiam.melody.pitch_extraction.melodia
import os
import numpy as np
from compiam.utils.pitch import normalisation, resampling
from compiam.io import write_csv
from compiam.utils import get_logger
logger = get_logger(__name__)
[docs]class Melodia:
"""Melodia predominant melody extraction"""
def __init__(
self,
binResolution=10,
filterIterations=3,
frameSize=2048,
guessUnvoiced=False,
harmonicWeight=0.8,
hopSize=128,
magnitudeCompression=1,
magnitudeThreshold=40,
maxFrequency=20000,
minDuration=100,
minFrequency=80,
numberHarmonics=20,
peakDistributionThreshold=0.9,
peakFrameThreshold=0.9,
pitchContinuity=27.5625,
referenceFrequency=55,
sampleRate=44100,
timeContinuity=100,
voiceVibrato=False,
voicingTolerance=0.2,
):
"""Melodia predominant melody extraction init method
For a complete and detailed list of the parameters see the documentation on the
following link: https://essentia.upf.edu/reference/std_PredominantPitchMelodia.html
"""
### IMPORTING OPTIONAL DEPENDENCIES
try:
global estd
import essentia.standard as estd
except:
raise ImportError(
"In order to use this tool you need to have essentia installed. "
"Please install essentia using: pip install essentia"
)
###
self.binResolution = binResolution
self.filterIterations = filterIterations
self.frameSize = frameSize
self.guessUnvoiced = guessUnvoiced
self.harmonicWeight = harmonicWeight
self.hopSize = hopSize
self.magnitudeCompression = magnitudeCompression
self.magnitudeThreshold = magnitudeThreshold
self.maxFrequency = maxFrequency
self.minDuration = minDuration
self.minFrequency = minFrequency
self.numberHarmonics = numberHarmonics
self.peakDistributionThreshold = peakDistributionThreshold
self.peakFrameThreshold = peakFrameThreshold
self.pitchContinuity = pitchContinuity
self.referenceFrequency = referenceFrequency
self.sampleRate = sampleRate
self.timeContinuity = timeContinuity
self.voiceVibrato = voiceVibrato
self.voicingTolerance = voicingTolerance
[docs] def extract(self, input_data, input_sr=44100, out_step=None):
"""Extract the melody from a given file.
:param input_data: path to audio file or numpy array like audio signal
:param input_sr: sampling rate of the input array of data (if any). This variable is only
relevant if the input is an array of data instead of a filepath.
:param out_step: particular time-step duration if needed at output
:returns: a 2-D list with time-stamps and pitch values per timestamp.
"""
if isinstance(input_data, str):
if not os.path.exists(input_data):
raise FileNotFoundError("Target audio not found.")
audio = estd.EqloudLoader(filename=input_data, sampleRate=self.sampleRate)()
elif isinstance(input_data, np.ndarray):
logger.warning(
f"Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)"
)
resample_audio = estd.Resample(
inputSampleRate=input_sr, outputSampleRate=self.sampleRate
)()
input_data = resample_audio(input_data)
audio = estd.EqualLoudness(signal=input_data)()
else:
raise ValueError("Input must be path to audio signal or an audio array")
extractor = estd.PredominantPitchMelodia(
binResolution=self.binResolution,
filterIterations=self.filterIterations,
frameSize=self.frameSize,
guessUnvoiced=self.guessUnvoiced,
harmonicWeight=self.harmonicWeight,
hopSize=self.hopSize,
magnitudeCompression=self.magnitudeCompression,
magnitudeThreshold=self.magnitudeThreshold,
maxFrequency=self.maxFrequency,
minDuration=self.minDuration,
minFrequency=self.minFrequency,
numberHarmonics=self.numberHarmonics,
peakDistributionThreshold=self.peakDistributionThreshold,
peakFrameThreshold=self.peakFrameThreshold,
pitchContinuity=self.pitchContinuity,
referenceFrequency=self.referenceFrequency,
sampleRate=self.sampleRate,
timeContinuity=self.timeContinuity,
voiceVibrato=self.voiceVibrato,
voicingTolerance=self.voicingTolerance,
)
pitch, _ = extractor(audio)
TStamps = np.array(range(0, len(pitch))) * float(self.hopSize) / self.sampleRate
output = np.array([TStamps, pitch]).transpose()
if out_step is not None:
new_len = int((len(audio) / self.sampleRate) // out_step)
return resampling(output, new_len)
return output
[docs] @staticmethod
def normalise_pitch(pitch, tonic, bins_per_octave=120, max_value=4):
"""Normalize pitch given a tonic.
:param pitch: a 2-D list with time-stamps and pitch values per timestamp.
:param tonic: recording tonic to normalize the pitch to.
:param bins_per_octave: number of frequency bins per octave.
:param max_value: maximum value to clip the normalized pitch to.
:returns: a 2-D list with time-stamps and normalized to a given tonic
pitch values per timestamp.
"""
return normalisation(
pitch, tonic, bins_per_octave=bins_per_octave, max_value=max_value
)
[docs] @staticmethod
def save_pitch(data, output_path):
"""Calling the write_csv function in compiam.io to write the output pitch curve in a fle
:param data: the data to write
:param output_path: the path where the data is going to be stored
:returns: None
"""
return write_csv(data, output_path)