"use strict";
/**
* @license
* Copyright (C) 2006-2020 Music Technology Group - Universitat Pompeu Fabra
*
* This file is part of Essentia
*
* Essentia is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation (FSF), either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the Affero GNU General Public License
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
*/
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
exports.__esModule = true;
/**
* Class with methods for computing common feature input representations required
* for the inference of Essentia-Tensorflow.js pre-trained models using EssentiaWASM
* backend which is imported from `essentia-wasm*.js` builds.
* @class
* @example
* // Create `EssentiaTFInputExtractor` instance by passing EssentiaWASM import object and `extractorType` value.
* const extractor = new EssentiaTFInputExtractor(EssentiaWASM, "musicnn");
* // Compute feature for a given frame of audio signal
* let featureMusiCNN = extractor.compute(audioSignalFrame);
* // Change the feature extractor with a new setting for VGGish input
* extractor.extractorType = "vggish";
* let featureVGGish = extractor.compute(audioSignalFrame);
* // Delete and shutdown the extractor instance if you don't need it anymore.
* extractor.delete();
* extractor.shutdown();
*/
var EssentiaTFInputExtractor = /** @class */ (function () {
/**
* @constructs
* @param {EssentiaWASM} EssentiaWASM Essentia WASM emcripten global module object
* @param {string} [extractorType='musicnn'] type of the desired extractor type (eg. 'muscinn', 'vggish' or 'tempocnn').
* @param {boolean} [isDebug=false] whether to enable EssentiaWASM internal debugger for logs.
*/
function EssentiaTFInputExtractor(EssentiaWASM, extractorType, isDebug) {
if (extractorType === void 0) { extractorType = "musicnn"; }
if (isDebug === void 0) { isDebug = false; }
/**
* @property {EssentiaJS} this.essentia an instance of `EssentiaWASM.EssentiaJS`.
* @property {string} this.extractorType type of the choosen extractor (eg. 'muscinn', 'vggish' or 'tempocnn').
*/
this.essentia = null;
this.module = null;
this.frameSize = 512;
this.sampleRate = 16000;
this.extractorType = extractorType;
if (this.extractorType === "musicnn")
this.frameSize = 512;
else if (this.extractorType === "vggish")
this.frameSize = 400;
else if (this.extractorType === "tempocnn")
this.frameSize = 1024;
else
throw Error("Invalid 'extractorType' choice! Available types are [musicnn', 'vggish', 'tempocnn']");
this.essentia = new EssentiaWASM.EssentiaJS(isDebug);
this.module = EssentiaWASM;
}
/**
* Convert a typed JS Float32Array into VectorFloat type.
* @method
* @param {Float32Array} inputArray input Float32 typed array.
* @returns {VectorFloat} returns converted VectorFloat array.
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.arrayToVector = function (inputArray) {
return this.module.arrayToVector(inputArray);
};
/**
* Convert an input VectorFloat array into typed JS Float32Array
* @method
* @param {VectorFloat} inputVector input VectorFloat array
* @returns {Float32Array} returns converted JS typed array
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.vectorToArray = function (inputVector) {
return this.module.vectorToArray(inputVector);
};
/**
* Decode and returns the audio buffer from an given audio url or blob uri using Web Audio API. (NOTE: This doesn't work on Safari browser)
* @async
* @method
* @param {string} audioURL web url or blob uri of a audio file
* @param {AudioContext} webAudioCtx an instance of Web Audio API `AudioContext`
* @returns {Promise<AudioBuffer>} decoded audio buffer as a promise
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.getAudioBufferFromURL = function (audioURL, webAudioCtx) {
return __awaiter(this, void 0, void 0, function () {
var response, arrayBuffer, audioBuffer;
return __generator(this, function (_a) {
switch (_a.label) {
case 0: return [4 /*yield*/, fetch(audioURL)];
case 1:
response = _a.sent();
return [4 /*yield*/, response.arrayBuffer()];
case 2:
arrayBuffer = _a.sent();
return [4 /*yield*/, webAudioCtx.decodeAudioData(arrayBuffer)];
case 3:
audioBuffer = _a.sent();
return [2 /*return*/, audioBuffer];
}
});
});
};
/**
* Convert an AudioBuffer object to a Mono audio signal array. The audio signal is downmixed
* to mono using essentia `MonoMixer` algorithm if the audio buffer has 2 channels of audio.
* Throws an expection if the input AudioBuffer object has more than 2 channels of audio.
* @method
* @param {AudioBuffer} buffer `AudioBuffer` object decoded from an audio file.
* @returns {Float32Array} audio channel data. (downmixed to mono if its stereo signal).
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.audioBufferToMonoSignal = function (buffer) {
if (buffer.numberOfChannels === 1) {
return buffer.getChannelData(0);
}
if (buffer.numberOfChannels === 2) {
var left = this.arrayToVector(buffer.getChannelData(0));
var right = this.arrayToVector(buffer.getChannelData(1));
var monoSignal = this.essentia.MonoMixer(left, right);
return this.vectorToArray(monoSignal);
}
throw new Error('Unexpected number of channels found in audio buffer. Only accepts mono or stereo audio buffers.');
};
/**
* Downsample a audio buffer to a target audio sample rate using the Web Audio API
* NOTE: This method will only works on web-browsers which supports the Web Audio API.
* @method
* @param {AudioBuffer} sourceBuffer `AudioBuffer` object decoded from an audio file.
* @returns {Float32Array} decoded audio buffer object
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.downsampleAudioBuffer = function (sourceBuffer) {
// adapted from https://github.com/julesyoungberg/soundboy/blob/main/worker/loadSoundFile.ts#L25
var ctx = new OfflineAudioContext(1, sourceBuffer.duration * this.sampleRate, this.sampleRate);
// create mono input buffer
var buffer = ctx.createBuffer(1, sourceBuffer.length, sourceBuffer.sampleRate);
buffer.copyToChannel(this.audioBufferToMonoSignal(sourceBuffer), 0);
// connect the buffer to the context
var source = ctx.createBufferSource();
source.buffer = buffer;
source.connect(ctx.destination);
// resolve when the source buffer has been rendered to a downsampled buffer
return new Promise(function (resolve) {
ctx.oncomplete = function (e) {
var rendered = e.renderedBuffer;
var samples = rendered.getChannelData(0);
resolve(samples);
};
ctx.startRendering();
source.start(0);
});
};
/**
* This method compute the pre-configured features for a given audio signal frame.
* It throws an exception if the size of audioFrame is not equal to the pre-configured
* audioFrame size for the selected `extractorType` setting.
* @method
* @param {Float32Array} audioFrame a frame of audio signal as Float32 typed JS array.
* @returns {EssentiaTFInputExtractorOutput} returns the computed feature for the input the given audio frame.
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.compute = function (audioFrame) {
var frame;
if (audioFrame instanceof Float32Array) {
frame = this.arrayToVector(audioFrame);
}
else {
frame = audioFrame;
} // assume it's of type VectorFloat
// setup feature extractor based on the given `extractorType` input.
switch (this.extractorType) {
case "musicnn": {
if (audioFrame.length != this.frameSize)
throw new Error("The chosen `extractorType` only works with an audio signal frame size of " + this.frameSize);
var spectrum = this.essentia.TensorflowInputMusiCNN(frame);
return {
melSpectrum: this.vectorToArray(spectrum.bands),
frameSize: 1,
patchSize: 187,
melBandsSize: 96
};
}
case "vggish": {
if (audioFrame.length != this.frameSize)
throw new Error("The chosen `extractorType` only works with an audio signal frame size of 400 " + this.frameSize);
var spectrum = this.essentia.TensorflowInputVGGish(frame);
return {
melSpectrum: this.vectorToArray(spectrum.bands),
frameSize: 1,
patchSize: 96,
melBandsSize: 64
};
}
case "tempocnn": {
if (audioFrame.length != this.frameSize)
throw Error("The chosen `extractorType` only works with an audio signal frame size of " + this.frameSize);
var spectrum = this.essentia.TensorflowInputTempoCNN(frame);
return {
melSpectrum: this.vectorToArray(spectrum.bands),
frameSize: 1,
patchSize: 256,
melBandsSize: 40
};
}
default: {
throw Error("Invalid 'extractorType' choice! Available types are [musicnn', 'vggish', 'tempocnn']");
}
}
};
/**
* This method compute the pre-configured feature for a whole audio signal.
* It is a wrapper on top of the `compute` method. It throws an exception
* if the size of audioFrame is not equal to the pre-configured size.
* @method
* @param {Float32Array} audioSignal decoded audio signal as Float32 typed JS array.
* @param {number} hopSize? optional param for specifying hopSize for overlapping-frames. By default use none.
* @returns {EssentiaTFInputExtractorOutput} returns the computed frame-wise feature for the given audio signal.
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.computeFrameWise = function (audioSignal, hopSize) {
var _hopSize;
if (hopSize)
_hopSize = hopSize;
else
_hopSize = this.frameSize;
// compute overlapping frames given frameSize, hopSize
var frames = this.essentia.FrameGenerator(audioSignal, this.frameSize, _hopSize);
var melSpectrogram = [];
var framewiseFeature = null;
for (var i = 0; i < frames.size(); i++) {
framewiseFeature = this.compute(this.vectorToArray(frames.get(i)));
melSpectrogram.push(framewiseFeature.melSpectrum);
}
framewiseFeature.melSpectrum = melSpectrogram;
framewiseFeature.frameSize = frames.size();
frames["delete"]();
return framewiseFeature;
};
/**
* Delete essentia session and frees the memory.
* @method
* @returns {null}
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype["delete"] = function () {
this.essentia["delete"]();
};
/**
* This method shutdown all the instance of Essentia WASM and frees the memory.
* NOTE: If you want to just free the memory of the pre-configured extractor,
* use `this.extractor.delete()` instead.
* @method
* @returns {null}
* @memberof EssentiaTFInputExtractor
*/
EssentiaTFInputExtractor.prototype.shutdown = function () {
this.essentia.shutdown();
};
return EssentiaTFInputExtractor;
}());
exports.EssentiaTFInputExtractor = EssentiaTFInputExtractor;