machinelearning/tfjs

"use strict";
/**
 * @license
 * Copyright (C) 2006-2020  Music Technology Group - Universitat Pompeu Fabra
 *
 * This file is part of Essentia
 *
 * Essentia is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Affero General Public License as published by the Free
 * Software Foundation (FSF), either version 3 of the License, or (at your
 * option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the Affero GNU General Public License
 * version 3 along with this program.  If not, see http://www.gnu.org/licenses/
 */
var __extends = (this && this.__extends) || (function () {
    var extendStatics = function (d, b) {
        extendStatics = Object.setPrototypeOf ||
            ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
            function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
        return extendStatics(d, b);
    };
    return function (d, b) {
        extendStatics(d, b);
        function __() { this.constructor = d; }
        d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
    };
})();
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
    return new (P || (P = Promise))(function (resolve, reject) {
        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
        step((generator = generator.apply(thisArg, _arguments || [])).next());
    });
};
var __generator = (this && this.__generator) || function (thisArg, body) {
    var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
    return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
    function verb(n) { return function (v) { return step([n, v]); }; }
    function step(op) {
        if (f) throw new TypeError("Generator is already executing.");
        while (_) try {
            if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
            if (y = 0, t) op = [op[0] & 2, t.value];
            switch (op[0]) {
                case 0: case 1: t = op; break;
                case 4: _.label++; return { value: op[1], done: false };
                case 5: _.label++; y = op[1]; op = [0]; continue;
                case 7: op = _.ops.pop(); _.trys.pop(); continue;
                default:
                    if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
                    if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
                    if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
                    if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
                    if (t[2]) _.ops.pop();
                    _.trys.pop(); continue;
            }
            op = body.call(thisArg, _);
        } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
        if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
    }
};
exports.__esModule = true;
/**
 * Base class for loading a pre-trained Essentia-Tensorflow.js model for inference
 * using TensorFlow.js.
 * @class
 */
var EssentiaTensorflowJSModel = /** @class */ (function () {
    function EssentiaTensorflowJSModel(tfjs, modelPath, verbose) {
        if (verbose === void 0) { verbose = false; }
        this.model = null;
        this.audioSampleRate = 16000;
        this.tf = null;
        this.isReady = false;
        this.modelPath = "";
        this.IS_TRAIN = null;
        this.randomTensorInput = null;
        this.minimumInputFrameSize = null;
        this.tf = tfjs;
        this.IS_TRAIN = this.tf.tensor([0], [1], 'bool');
        this.modelPath = modelPath;
        this.isReady = !!this.model;
    }
    /**
     * Promise for loading & initialise an Essentia.js-TensorFlow.js model.
     * @async
     * @method
     * @memberof EssentiaTensorflowJSModel
     */
    EssentiaTensorflowJSModel.prototype.initialize = function () {
        return __awaiter(this, void 0, void 0, function () {
            var _a;
            return __generator(this, function (_b) {
                switch (_b.label) {
                    case 0:
                        _a = this;
                        return [4 /*yield*/, this.tf.loadGraphModel(this.modelPath)];
                    case 1:
                        _a.model = _b.sent();
                        this.isReady = true;
                        return [2 /*return*/];
                }
            });
        });
    };
    /**
     * Converts an input 1D or 2D array into a 3D tensor (tfjs) given it's shape and required
     * patchSize. If `padding=true`, this method will zero-pad the input feature.
     *
     * @method
     * @param {Float32Array|any[]} inputFeatureArray input feature array as either 1D or 2D array
     * @param {any[]} inputShape shape of the input feature array in 2D.
     * @param {number} patchSize required patchSize to dynamically make batches of feature
     * @param {boolean} [zeroPadding=false] whether to enable zero-padding if less frames found for a batch.
     * @returns {tf.Tensor3D} returns the computed frame-wise feature for the given audio signal.
     * @memberof EssentiaTensorflowJSModel
     */
    EssentiaTensorflowJSModel.prototype.arrayToTensorAsBatches = function (inputfeatureArray, inputShape, patchSize, zeroPadding) {
        if (zeroPadding === void 0) { zeroPadding = false; }
        // convert a flattened 1D typed array into 2D tensor with given shape 
        var featureTensor = this.tf.tensor(inputfeatureArray, inputShape, 'float32');
        // create a tensor of zeros for zero-padding the output tensor if necessary
        var zeroPadTensor;
        // variable to store the dynamic batch size computed from given input array and patchSize
        var batchSize;
        if (!zeroPadding) {
            this.assertMinimumFeatureInputSize({
                melSpectrum: inputfeatureArray,
                frameSize: inputShape[0],
                melBandsSize: inputShape[1],
                patchSize: patchSize
            });
            return featureTensor.as3D(1, patchSize, inputShape[1]);
            // return the feature with batch size 1 if number of frames = patchSize
        }
        else if (inputShape[0] === patchSize) {
            return featureTensor.as3D(1, patchSize, inputShape[1]);
            // Otherwise do zeropadding 
        }
        else if (inputShape[0] > patchSize) {
            if ((inputShape[0] % patchSize) != 0) {
                batchSize = Math.floor(inputShape[0] / patchSize) + 1;
                zeroPadTensor = this.tf.zeros([
                    Math.floor(batchSize * patchSize - inputfeatureArray.length),
                    inputShape[1]
                ], 'float32');
                featureTensor = featureTensor.concat(zeroPadTensor);
                zeroPadTensor.dispose();
                return featureTensor.as3D(batchSize, patchSize, inputShape[1]);
            }
            else {
                batchSize = Math.floor(inputShape[0] / patchSize);
                zeroPadTensor = this.tf.zeros([
                    Math.floor(batchSize * patchSize - inputfeatureArray.length),
                    inputShape[1]
                ], 'float32');
                featureTensor = featureTensor.concat(zeroPadTensor);
                zeroPadTensor.dispose();
                return featureTensor.as3D(batchSize, patchSize, inputShape[1]);
            }
        }
        else {
            // fixed batchSize=1 if the input array has lengh less than the given patchSize.
            batchSize = 1;
            zeroPadTensor = this.tf.zeros([patchSize - inputShape[0], inputShape[1]]);
            featureTensor = featureTensor.concat(zeroPadTensor);
            zeroPadTensor.dispose();
            return featureTensor.as3D(batchSize, patchSize, inputShape[1]);
        }
    };
    EssentiaTensorflowJSModel.prototype.dispose = function () {
        this.model.dispose();
    };
    EssentiaTensorflowJSModel.prototype.assertMinimumFeatureInputSize = function (inputFeature) {
        this.minimumInputFrameSize = inputFeature.patchSize; // at least 1 full patch
        if (inputFeature.melSpectrum.length != this.minimumInputFrameSize) {
            // let minimumAudioDuration = this.minimumInputFrameSize / this.audioSampleRate; // <-- cannot provide accurate duration without model input hopSize
            throw Error("When `padding=false` in `predict` method, the model expect audio feature for a minimum frame size of "
                + this.minimumInputFrameSize + ". Was given " + inputFeature.melSpectrum.length + " melband frames");
        }
    };
    EssentiaTensorflowJSModel.prototype.disambiguateExtraInputs = function () {
        if (!this.isReady)
            throw Error("No loaded tfjs model found! Make sure to call `initialize` method and resolve the promise before calling `predict` method.");
        var inputsCount = this.model.executor.inputs.length;
        if (inputsCount === 1) {
            return [];
        }
        else if (inputsCount === 2) {
            return [this.IS_TRAIN.clone()];
        }
        else if (inputsCount === 3) {
            // Overhead from the tensorflowjs-converter, creates random tensorinput without
            // connected to other layers for some vggish models trained on audioset. 
            // The tfjs model needs this unsignificant tensor object on the prediction call.
            // This will removed in future once this has been sorted on the conversation process.
            if (!this.randomTensorInput)
                this.randomTensorInput = this.tf.zeros([1, this.model.executor.inputs[0].shape[1]]);
            return [this.randomTensorInput.clone(), this.IS_TRAIN.clone()];
        }
        else {
            throw Error("Found unsupported number of input requirements for the model. Expects the following inputs -> " + this.model.executor.inputs);
        }
    };
    return EssentiaTensorflowJSModel;
}());
exports.EssentiaTensorflowJSModel = EssentiaTensorflowJSModel;
/**
 * Class with methods for computing inference of
 * Essentia-Tensorflow.js MusiCNN-based pre-trained models.
 * The `predict` method expect an input audio feature computed
 * using `EssentiaTFInputExtractor`.
 * @class
 * @example
 *
 * // FEATURE EXTRACTION
 * // Create `EssentiaTFInputExtractor` instance by passing
 * // essentia-wasm import `EssentiaWASM` global object and `extractorType=musicnn`.
 * const inputFeatureExtractor = new EssentiaTFInputExtractor(EssentiaWASM, "musicnn");
 * // Compute feature for a given audio signal
 * let inputMusiCNN = inputFeatureExtractor.computeFrameWise(audioSignal);
 * // INFERENCE
 * const modelURL = "./autotagging/msd/msd-musicnn-1/model.json"
 * // Where `tf` is the global import object from the `@tensorflow/tfjs*` package.
 * const musicnn = new TensorflowMusiCNN(tf, modelURL);
 * // Promise for loading the model
 * await musicnn.initialize();
 * // Compute predictions for a given input feature.
 * let predictions = await musicnn.predict(inputMusiCNN);
 * @extends {EssentiaTensorflowJSModel}
 */
var TensorflowMusiCNN = /** @class */ (function (_super) {
    __extends(TensorflowMusiCNN, _super);
    function TensorflowMusiCNN(tfjs, model_url, verbose) {
        if (verbose === void 0) { verbose = false; }
        var _this = _super.call(this, tfjs, model_url) || this;
        _this.minimumInputFrameSize = 3;
        return _this;
    }
    /**
     * Run inference on the given audio feature input and returns the activations
     * @param {InputMusiCNN} inputFeature audio feature required by the MusiCNN model.
     * @param {boolean} [zeroPadding=false] whether to do zero-padding to the input feature.
     * @returns {array} activations of the output layer of the model
     * @memberof TensorflowMusiCNN
     */
    TensorflowMusiCNN.prototype.predict = function (inputFeature, zeroPadding) {
        if (zeroPadding === void 0) { zeroPadding = false; }
        return __awaiter(this, void 0, void 0, function () {
            var featureTensor, modelInputs, results, resultsArray;
            return __generator(this, function (_a) {
                switch (_a.label) {
                    case 0:
                        featureTensor = this.arrayToTensorAsBatches(inputFeature.melSpectrum, [inputFeature.frameSize, inputFeature.melBandsSize], inputFeature.patchSize, zeroPadding);
                        modelInputs = this.disambiguateExtraInputs();
                        // add the input feature tensor to the model inputs
                        modelInputs.push(featureTensor);
                        results = this.model.execute(modelInputs);
                        // free tensors
                        featureTensor.dispose();
                        return [4 /*yield*/, results.array()];
                    case 1:
                        resultsArray = _a.sent();
                        results.dispose();
                        return [2 /*return*/, resultsArray];
                }
            });
        });
    };
    return TensorflowMusiCNN;
}(EssentiaTensorflowJSModel));
exports.TensorflowMusiCNN = TensorflowMusiCNN;
/**
* Class with methods for computing inference of
 * Essentia-Tensorflow.js VGGish-based pre-trained models.
 * The `predict` method expect an input audio feature computed
 * using `EssentiaTFInputExtractor`.
 * @class
 * @example
 * // FEATURE EXTRACTION
 * // Create `EssentiaTFInputExtractor` instance by passing
 * // essentia-wasm import `EssentiaWASM` global object and `extractorType=vggish`.
 * const inputFeatureExtractor = new EssentiaTFInputExtractor(EssentiaWASM, "vggish");
 * // Compute feature for a given audio signal array
 * let inputVGGish = inputFeatureExtractor.computeFrameWise(audioSignal);
 * // INFERENCE
 * const modelURL = "./classifiers/danceability/danceability-vggish-audioset-1/model.json"
 * // Where `tf` is the global import object from the `@tensorflow/tfjs*` package.
 * const vggish = new TensorflowVGGish(tf, modelURL);
 * // Promise for loading the model
 * await vggish.initialize();
 * // Compute predictions for a given input feature.
 * let predictions = await vggish.predict(inputVGGish);
 * @extends {EssentiaTensorflowJSModel}
 */
var TensorflowVGGish = /** @class */ (function (_super) {
    __extends(TensorflowVGGish, _super);
    function TensorflowVGGish(tfjs, model_url, verbose) {
        if (verbose === void 0) { verbose = false; }
        return _super.call(this, tfjs, model_url) || this;
    }
    /**
     * Run inference on the given audio feature input and returns the activations
     * @param {InputVGGish} inputFeature audio feature required by the VGGish model.
     * @param {boolean} [zeroPadding=false] whether to do zero-padding to the input feature.
     * @returns {array} activations of the output layer of the model
     * @memberof TensorflowVGGish
     */
    TensorflowVGGish.prototype.predict = function (inputFeature, zeroPadding) {
        if (zeroPadding === void 0) { zeroPadding = false; }
        return __awaiter(this, void 0, void 0, function () {
            var featureTensor, modelInputs, results, resultsArray;
            return __generator(this, function (_a) {
                switch (_a.label) {
                    case 0:
                        featureTensor = this.arrayToTensorAsBatches(inputFeature.melSpectrum, [inputFeature.frameSize, inputFeature.melBandsSize], inputFeature.patchSize, zeroPadding);
                        modelInputs = this.disambiguateExtraInputs();
                        // add the input feature tensor to the model inputs
                        modelInputs.push(featureTensor);
                        results = this.model.execute(modelInputs);
                        // free tensors
                        featureTensor.dispose();
                        return [4 /*yield*/, results.array()];
                    case 1:
                        resultsArray = _a.sent();
                        results.dispose();
                        return [2 /*return*/, resultsArray];
                }
            });
        });
    };
    return TensorflowVGGish;
}(EssentiaTensorflowJSModel));
exports.TensorflowVGGish = TensorflowVGGish;
machinelearning/tfjs_models.js