fixed memory leak

2020-10-12 22:01:35 -04:00 · 2020-10-12 22:01:35 -04:00 · c106aa2a18
parent 96aa082c09
commit c106aa2a18
13 changed files with 563 additions and 594 deletions
--- a/README.md
+++ b/README.md
@ -280,4 +280,4 @@ Library can also be used on mobile devices
 ## Todo
 - Improve detection of smaller faces
- Fix memory leak in face detector
+- Verify age/gender models
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@ -7,13 +7,13 @@ const config = {
  face: {
    enabled: true,
    detector: { maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
-    mesh: { enabled: true },
+    mesh: { enabled: false },
-    iris: { enabled: true },
+    iris: { enabled: false },
-    age: { enabled: true, skipFrames: 5 },
+    age: { enabled: false, skipFrames: 5 },
-    gender: { enabled: true },
+    gender: { enabled: false },
  },
-  body: { enabled: true, maxDetections: 5, scoreThreshold: 0.75, nmsRadius: 20 },
+  body: { enabled: false, maxDetections: 5, scoreThreshold: 0.75, nmsRadius: 20 },
-  hand: { enabled: true, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
+  hand: { enabled: false, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
 };
 let settings;
@ -181,10 +181,11 @@ async function runHumanDetect() {
    log.innerText = `
      TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors
      GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes
-      Result: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
+      Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
    `;
    // rinse & repeate
-    requestAnimationFrame(runHumanDetect);
+    // setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes
    requestAnimationFrame(runHumanDetect); // immediate loop
  }
 }
@ -242,6 +243,10 @@ async function setupCanvas() {
 async function setupCamera() {
  const video = document.getElementById('video');
  if (!navigator.mediaDevices) {
    document.getElementById('log').innerText = 'Video not supported';
    return;
  }
  const stream = await navigator.mediaDevices.getUserMedia({
    audio: false,
    video: { facingMode: 'user', width: window.innerWidth, height: window.innerHeight },
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.js.map
+++ b/dist/human.js.map
--- a/src/blazeface/box.js
+++ b/src/blazeface/box.js
@ -1,20 +0,0 @@
 const tf = require('@tensorflow/tfjs');
 exports.disposeBox = (box) => {
  box.startEndTensor.dispose();
  box.startPoint.dispose();
  box.endPoint.dispose();
 };
 exports.createBox = (startEndTensor) => ({
  startEndTensor,
  startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]),
  endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]),
 });
 exports.scaleBox = (box, factors) => {
  const starts = tf.mul(box.startPoint, factors);
  const ends = tf.mul(box.endPoint, factors);
  const newCoordinates = tf.concat2d([starts, ends], 1);
  return exports.createBox(newCoordinates);
 };
--- a/src/blazeface/index.js
+++ b/src/blazeface/index.js
@ -1,12 +0,0 @@
 const tf = require('@tensorflow/tfjs');
 const face = require('./face');
 async function load(config) {
  const blazeface = await tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') });
  const model = new face.BlazeFaceModel(blazeface, config);
  return model;
 }
 exports.load = load;
 const face_2 = require('./face');
 Object.defineProperty(exports, 'BlazeFaceModel', { enumerable: true, get() { return face_2.BlazeFaceModel; } });
--- a/src/facemesh/blazeface.js
+++ b/src/facemesh/blazeface.js
@ -1,10 +1,10 @@
 const tf = require('@tensorflow/tfjs');
 const bounding = require('./box');
 const ANCHORS_CONFIG = {
  strides: [8, 16],
  anchors: [2, 6],
 };
 const NUM_LANDMARKS = 6;
 function generateAnchors(width, height, outputSpec) {
  const anchors = [];
@ -25,6 +25,26 @@ function generateAnchors(width, height, outputSpec) {
  }
  return anchors;
 }
 const disposeBox = (box) => {
  box.startEndTensor.dispose();
  box.startPoint.dispose();
  box.endPoint.dispose();
 };
 const createBox = (startEndTensor) => ({
  startEndTensor,
  startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]),
  endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]),
 });
 const scaleBox = (box, factors) => {
  const starts = tf.mul(box.startPoint, factors);
  const ends = tf.mul(box.endPoint, factors);
  const newCoordinates = tf.concat2d([starts, ends], 1);
  return createBox(newCoordinates);
 };
 function decodeBounds(boxOutputs, anchors, inputSize) {
  const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
  const centers = tf.add(boxStarts, anchors);
@ -39,12 +59,14 @@ function decodeBounds(boxOutputs, anchors, inputSize) {
  const concatAxis = 1;
  return tf.concat2d([startNormalized, endNormalized], concatAxis);
 }
 function scaleBoxFromPrediction(face, scaleFactor) {
  return tf.tidy(() => {
    const box = face['box'] ? face['box'] : face;
-    return bounding.scaleBox(box, scaleFactor).startEndTensor.squeeze();
+    return scaleBox(box, scaleFactor).startEndTensor.squeeze();
  });
 }
 class BlazeFaceModel {
  constructor(model, config) {
    this.blazeFaceModel = model;
@ -59,11 +81,10 @@ class BlazeFaceModel {
    this.scoreThreshold = config.detector.scoreThreshold;
  }
-  async getBoundingBoxes(inputImage, returnTensors, annotateBoxes = true) {
+  async getBoundingBoxes(inputImage) {
    const [detectedOutputs, boxes, scores] = tf.tidy(() => {
      const resizedImage = inputImage.resizeBilinear([this.width, this.height]);
      const normalizedImage = tf.mul(tf.sub(resizedImage.div(255), 0.5), 2);
      // [1, 897, 17] 1 = batch, 897 = number of anchors
      const batchedPrediction = this.blazeFaceModel.predict(normalizedImage);
      const prediction = batchedPrediction.squeeze();
      const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize);
@ -71,45 +92,23 @@ class BlazeFaceModel {
      const scoresOut = tf.sigmoid(logits).squeeze();
      return [prediction, decodedBounds, scoresOut];
    });
    const boxIndicesTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxFaces, this.iouThreshold, this.scoreThreshold);
    const boxIndices = await boxIndicesTensor.array();
    boxIndicesTensor.dispose();
    let boundingBoxes = boxIndices.map((boxIndex) => tf.slice(boxes, [boxIndex, 0], [1, -1]));
    if (!returnTensors) {
    boundingBoxes = await Promise.all(boundingBoxes.map(async (boundingBox) => {
      const vals = await boundingBox.array();
      boundingBox.dispose();
      return vals;
    }));
    }
    const originalHeight = inputImage.shape[1];
    const originalWidth = inputImage.shape[2];
    let scaleFactor;
    if (returnTensors) {
      scaleFactor = tf.div([originalWidth, originalHeight], this.inputSize);
    } else {
      scaleFactor = [
        originalWidth / this.inputSizeData[0],
        originalHeight / this.inputSizeData[1],
      ];
    }
    const annotatedBoxes = [];
    for (let i = 0; i < boundingBoxes.length; i++) {
      const boundingBox = boundingBoxes[i];
      const annotatedBox = tf.tidy(() => {
-        const box = boundingBox instanceof tf.Tensor
+        const box = createBox(boundingBox);
          ? bounding.createBox(boundingBox)
          : bounding.createBox(tf.tensor2d(boundingBox));
        if (!annotateBoxes) {
          return box;
        }
        const boxIndex = boxIndices[i];
-        let anchor;
+        const anchor = this.anchorsData[boxIndex];
        if (returnTensors) {
          anchor = this.anchors.slice([boxIndex, 0], [1, 2]);
        } else {
          anchor = this.anchorsData[boxIndex];
        }
        const landmarks = tf.slice(detectedOutputs, [boxIndex, NUM_LANDMARKS - 1], [1, -1])
          .squeeze()
          .reshape([NUM_LANDMARKS, -1]);
@ -123,7 +122,7 @@ class BlazeFaceModel {
    detectedOutputs.dispose();
    return {
      boxes: annotatedBoxes,
-      scaleFactor,
+      scaleFactor: [inputImage.shape[2] / this.inputSizeData[0], inputImage.shape[1] / this.inputSizeData[1]],
    };
  }
@ -134,7 +133,7 @@ class BlazeFaceModel {
      }
      return input.toFloat().expandDims(0);
    });
-    const { boxes, scaleFactor } = await this.getBoundingBoxes(image, returnTensors, annotateBoxes);
+    const { boxes, scaleFactor } = await this.getBoundingBoxes(image);
    image.dispose();
    if (returnTensors) {
      return boxes.map((face) => {
@ -176,7 +175,7 @@ class BlazeFaceModel {
          landmarks: scaledLandmarks,
          probability: probabilityData,
        };
-        bounding.disposeBox(face.box);
+        disposeBox(face.box);
        face.landmarks.dispose();
        face.probability.dispose();
      }
@ -185,4 +184,13 @@ class BlazeFaceModel {
    }));
  }
 }
 async function load(config) {
  const blazeface = await tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') });
  const model = new BlazeFaceModel(blazeface, config);
  return model;
 }
 exports.load = load;
 exports.BlazeFaceModel = BlazeFaceModel;
 exports.disposeBox = disposeBox;
--- a/src/facemesh/index.js
+++ b/src/facemesh/index.js
@ -1,81 +1,63 @@
 const tf = require('@tensorflow/tfjs');
-const blazeface = require('../blazeface');
+const blazeface = require('./blazeface');
 const keypoints = require('./keypoints');
 const pipe = require('./pipeline');
 const uv_coords = require('./uvcoords');
 const triangulation = require('./triangulation').default;
 exports.uv_coords = uv_coords;
 exports.triangulation = triangulation;
 async function loadDetectorModel(config) {
  return blazeface.load(config);
 }
 async function loadMeshModel(modelUrl) {
  return tf.loadGraphModel(modelUrl, { fromTFHub: modelUrl.includes('tfhub.dev') });
 }
 async function loadIrisModel(modelUrl) {
  return tf.loadGraphModel(modelUrl, { fromTFHub: modelUrl.includes('tfhub.dev') });
 }
 async function load(config) {
  const models = await Promise.all([
    loadDetectorModel(config),
    loadMeshModel(config.mesh.modelPath),
    loadIrisModel(config.iris.modelPath),
  ]);
  // eslint-disable-next-line no-use-before-define
  const faceMesh = new MediaPipeFaceMesh(models[0], models[1], models[2], config);
  return faceMesh;
 }
 exports.load = load;
 class MediaPipeFaceMesh {
  constructor(blazeFace, blazeMeshModel, irisModel, config) {
    this.pipeline = new pipe.Pipeline(blazeFace, blazeMeshModel, irisModel, config);
-    this.config = config;
+    if (config) this.config = config;
  }
  async estimateFaces(input, config) {
    if (config) this.config = config;
    const image = tf.tidy(() => {
-      if (!(input instanceof tf.Tensor)) {
+      if (!(input instanceof tf.Tensor)) input = tf.browser.fromPixels(input);
        input = tf.browser.fromPixels(input);
      }
      return input.toFloat().expandDims(0);
    });
    const results = [];
    const predictions = await this.pipeline.predict(image, this.config.iris.enabled, this.config.mesh.enabled);
-    image.dispose();
+    tf.dispose(image);
-    if (!predictions) return results;
+    const results = [];
-    for (const prediction of predictions) {
+    for (const prediction of (predictions || [])) {
      const confidence = prediction.confidence.arraySync();
      if (confidence >= this.config.detector.minConfidence) {
-        const result = {
+        const mesh = prediction.coords ? prediction.coords.arraySync() : null;
          confidence: confidence || 0,
          box: prediction.box ? [prediction.box.startPoint[0], prediction.box.startPoint[1], prediction.box.endPoint[0] - prediction.box.startPoint[0], prediction.box.endPoint[1] - prediction.box.startPoint[1]] : 0,
          mesh: prediction.coords ? prediction.coords.arraySync() : null,
          image: prediction.image ? tf.clone(prediction.image) : null,
          // mesh: prediction.coords.arraySync(),
        };
        const annotations = {};
-        if (result.mesh && result.mesh.length > 0) {
+        if (mesh && mesh.length > 0) {
          for (const key in keypoints.MESH_ANNOTATIONS) {
            if (this.config.iris.enabled || key.includes('Iris') === false) {
-              annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => result.mesh[index]);
+              annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => mesh[index]);
            }
          }
        }
-        result['annotations'] = annotations;
+        results.push({
-        results.push(result);
+          confidence: confidence || 0,
          box: prediction.box ? [prediction.box.startPoint[0], prediction.box.startPoint[1], prediction.box.endPoint[0] - prediction.box.startPoint[0], prediction.box.endPoint[1] - prediction.box.startPoint[1]] : 0,
          mesh,
          annotations,
          image: prediction.image ? tf.clone(prediction.image) : null,
        });
      }
-      tf.dispose(prediction.confidence);
+      prediction.confidence.dispose();
-      tf.dispose(prediction.image);
+      prediction.image.dispose();
      tf.dispose(prediction.coords);
      tf.dispose(prediction);
    }
    tf.dispose(predictions);
    return results;
  }
 }
 async function load(config) {
  const models = await Promise.all([
    blazeface.load(config),
    tf.loadGraphModel(config.mesh.modelPath, { fromTFHub: config.mesh.modelPath.includes('tfhub.dev') }),
    tf.loadGraphModel(config.iris.modelPath, { fromTFHub: config.iris.modelPath.includes('tfhub.dev') }),
  ]);
  const faceMesh = new MediaPipeFaceMesh(models[0], models[1], models[2], config);
  return faceMesh;
 }
 exports.load = load;
 exports.MediaPipeFaceMesh = MediaPipeFaceMesh;
 exports.uv_coords = uv_coords;
 exports.triangulation = triangulation;
--- a/src/facemesh/pipeline.js
+++ b/src/facemesh/pipeline.js
@ -142,38 +142,31 @@ class Pipeline {
  async predict(input, predictIrises, predictMesh) {
    if (this.shouldUpdateRegionsOfInterest()) {
-      const returnTensors = false;
+      const { boxes, scaleFactor } = await this.boundingBoxDetector.getBoundingBoxes(input);
      const annotateFace = true;
      const { boxes, scaleFactor } = await this.boundingBoxDetector.getBoundingBoxes(input, returnTensors, annotateFace);
      if (boxes.length === 0) {
        this.regionsOfInterest = [];
        return null;
      }
      const scaledBoxes = boxes.map((prediction) => {
-        const predictionBoxCPU = {
+        const predictionBox = {
          startPoint: prediction.box.startPoint.squeeze().arraySync(),
          endPoint: prediction.box.endPoint.squeeze().arraySync(),
        };
-        const scaledBox = bounding.scaleBoxCoordinates(predictionBoxCPU, scaleFactor);
+        prediction.box.startPoint.dispose();
        prediction.box.endPoint.dispose();
        const scaledBox = bounding.scaleBoxCoordinates(predictionBox, scaleFactor);
        const enlargedBox = bounding.enlargeBox(scaledBox);
-        return {
+        const landmarks = prediction.landmarks.arraySync();
-          ...enlargedBox,
+        prediction.landmarks.dispose();
-          landmarks: prediction.landmarks.arraySync(),
+        prediction.probability.dispose();
-        };
+        return { ...enlargedBox, landmarks };
      });
      boxes.forEach((box) => {
        if (box != null && box.startPoint != null) {
          box.startEndTensor.dispose();
          box.startPoint.dispose();
          box.endPoint.dispose();
        }
      });
      this.updateRegionsOfInterest(scaledBoxes);
      this.runsWithoutFaceDetector = 0;
    } else {
      this.runsWithoutFaceDetector++;
    }
-    return tf.tidy(() => this.regionsOfInterest.map((box, i) => {
+    const results = tf.tidy(() => this.regionsOfInterest.map((box, i) => {
      let angle = 0;
      // The facial bounding box landmarks could come either from blazeface (if we are using a fresh box), or from the mesh model (if we are reusing an old box).
      const boxLandmarksFromMeshModel = box.landmarks.length >= LANDMARKS_COUNT;
@ -201,6 +194,7 @@ class Pipeline {
        const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
        const eyePredictions = (this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop])));
        const eyePredictionsData = eyePredictions.dataSync();
        eyePredictions.dispose();
        const leftEyeData = eyePredictionsData.slice(0, IRIS_NUM_COORDINATES * 3);
        const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
        const rightEyeData = eyePredictionsData.slice(IRIS_NUM_COORDINATES * 3);
@ -226,7 +220,6 @@ class Pipeline {
        const transformedCoords = tf.tensor2d(transformedCoordsData);
        this.regionsOfInterest[i] = { ...landmarksBox, landmarks: transformedCoords.arraySync() };
        const prediction = {
          // coords: tf.tensor2d(rawCoords, [rawCoords.length, 3]),
          coords: transformedCoords,
          box: landmarksBox,
          confidence: flag.squeeze(),
@ -236,13 +229,13 @@ class Pipeline {
      }
      const prediction = {
        coords: null,
        // scaledCoords: null,
        box: landmarksBox,
        confidence: flag.squeeze(),
        image: face,
      };
      return prediction;
    }));
    return results;
  }
  // Updates regions of interest if the intersection over union between the incoming and previous regions falls below a threshold.
--- a/src/index.js
+++ b/src/index.js
@ -33,28 +33,31 @@ function mergeDeep(...objects) {
 async function detect(input, userConfig) {
  const config = mergeDeep(defaults, userConfig);
  // load models if enabled
  if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body);
  if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand);
  if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
  if (config.face.age.enabled) await ssrnet.loadAge(config);
  if (config.face.gender.enabled) await ssrnet.loadGender(config);
  tf.engine().startScope();
  // run posenet
  let poseRes = [];
-  if (config.body.enabled) {
+  if (config.body.enabled) poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
    if (!models.posenet) models.posenet = await posenet.load(config.body);
    poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
  }
  // run handpose
  let handRes = [];
-  if (config.hand.enabled) {
+  if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand);
    if (!models.handpose) models.handpose = await handpose.load(config.hand);
    handRes = await models.handpose.estimateHands(input, config.hand);
  }
  // run facemesh, includes blazeface and iris
  const faceRes = [];
  if (config.face.enabled) {
    if (!models.facemesh) models.facemesh = await facemesh.load(config.face);
    const faces = await models.facemesh.estimateFaces(input, config.face);
    for (const face of faces) {
      // run ssr-net age & gender, inherits face from blazeface
      const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
      face.image.dispose();
      // iris: array[ bottom, left, top, right, center ]
      const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
        ? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0])
@ -71,6 +74,8 @@ async function detect(input, userConfig) {
    }
  }
  tf.engine().endScope();
  // combine results
  return { face: faceRes, body: poseRes, hand: handRes };
 }
--- a/src/ssrnet/index.js
+++ b/src/ssrnet/index.js
@ -15,14 +15,20 @@ async function getImage(image, size) {
  return tensor;
 }
 async function loadAge(config) {
  if (!models.age) models.age = await tf.loadGraphModel(config.face.age.modelPath);
 }
 async function loadGender(config) {
  if (!models.gender) models.gender = await tf.loadGraphModel(config.face.gender.modelPath);
 }
 async function predict(image, config) {
  frame += 1;
  if (frame >= config.face.age.skipFrames) {
    frame = 0;
    return last;
  }
  if (!models.age && config.face.age.enabled) models.age = await tf.loadGraphModel(config.face.age.modelPath);
  if (!models.gender && config.face.gender.enabled) models.gender = await tf.loadGraphModel(config.face.gender.modelPath);
  let enhance;
  if (image instanceof tf.Tensor) {
    const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
@ -48,3 +54,5 @@ async function predict(image, config) {
 }
 exports.predict = predict;
 exports.loadAge = loadAge;
 exports.loadGender = loadGender;