added blazeface back and front models

2020-10-15 20:20:37 -04:00 · 2020-10-15 20:20:37 -04:00 · 86bb3f1c87
parent f6059df802
commit 86bb3f1c87
22 changed files with 105 additions and 90 deletions
--- a/README.md
+++ b/README.md
@ -218,12 +218,13 @@ human.defaults = {
  face: {
    enabled: true,          // controls if specified modul is enabled (note: module is not loaded until it is required)
    detector: {
-      modelPath: '../models/blazeface/model.json', // path to specific pre-trained model
+      modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back'
+      inputSize: 128,       // 128 for tfhub and front models, 256 for back
      maxFaces: 10,         // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
-      minConfidence: 0.8,   // threshold for discarding a prediction
+      minConfidence: 0.5,   // threshold for discarding a prediction
      iouThreshold: 0.3,    // threshold for deciding whether boxes overlap too much in non-maximum suppression
-      scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+      scoreThreshold: 0.7,  // threshold for deciding when to remove boxes based on score in non-maximum suppression
    },
    mesh: {
      enabled: true,
@ -235,12 +236,12 @@ human.defaults = {
    },
    age: {
      enabled: true,
-      modelPath: '../models/ssrnet-imdb-age/model.json',
+      modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
    },
    gender: {
      enabled: true,
-      modelPath: '../models/ssrnet-imdb-gender/model.json',
+      modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki'
    },
    emotion: {
      enabled: true,
@ -254,15 +255,15 @@ human.defaults = {
    enabled: true,
    modelPath: '../models/posenet/model.json',
    maxDetections: 5,       // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance  
-    scoreThreshold: 0.75,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    scoreThreshold: 0.7,    // threshold for deciding when to remove boxes based on score in non-maximum suppression
    nmsRadius: 20,          // radius for deciding points are too close in non-maximum suppression
  },
  hand: {
    enabled: true,
    skipFrames: 10,         // how many frames to skip before re-running bounding box detection
-    minConfidence: 0.8,     // threshold for discarding a prediction
+    minConfidence: 0.5,     // threshold for discarding a prediction
    iouThreshold: 0.3,      // threshold for deciding whether boxes overlap too much in non-maximum suppression
-    scoreThreshold: 0.75,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    scoreThreshold: 0.7,    // threshold for deciding when to remove boxes based on score in non-maximum suppression
    detector: {
      anchors: '../models/handdetect/anchors.json',
      modelPath: '../models/handdetect/model.json',
@ -361,14 +362,14 @@ For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple
 Performance per module:

 - Enabled all: 10 FPS
- Face Detect: 80 FPS
+- Face Detect: 80 FPS (standalone)
 - Face Geometry: 30 FPS (includes face detect)
 - Face Iris: 25 FPS (includes face detect and face geometry)
 - Age: 60 FPS (includes face detect)
 - Gender: 60 FPS (includes face detect)
 - Emotion: 60 FPS (includes face detect)
- Hand: 40 FPS
- Body: 50 FPS
+- Hand: 40 FPS (standalone)
+- Body: 50 FPS (standalone)

 Library can also be used on mobile devices  

--- a/demo/demo-esm-webworker.js
+++ b/demo/demo-esm-webworker.js
@ -13,8 +13,7 @@ onmessage = async (msg) => {
  config = msg.data.config;
  let result = {};
  try {
-    // result = await human.detect(image, config);
-    result = {};
+    result = await human.detect(image, config);
  } catch (err) {
    result.error = err.message;
    log('Worker thread error:', err.message);
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@ -60,7 +60,7 @@ async function drawFace(result, canvas) {
    const labelIris = face.iris ? `iris: ${face.iris}` : '';
    const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
    ctx.fillStyle = ui.baseLabel;
-    ctx.fillText(`face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22, face.box[2]);
+    ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22);
    ctx.stroke();
    ctx.lineWidth = 1;
    if (face.mesh) {
@ -238,7 +238,7 @@ function webWorker(input, image, canvas) {
    log('Creating worker thread');
    worker = new Worker('demo-esm-webworker.js', { type: 'module' });
    // after receiving message from webworker, parse&draw results and send new frame for processing
-    worker.addEventListener('message', async (msg) => drawResults(input, msg.data, canvas));
+    worker.addEventListener('message', (msg) => drawResults(input, msg.data, canvas));
  }
  // pass image data as arraybuffer to worker by reference to avoid copy
  worker.postMessage({ image: image.data.buffer, width: canvas.width, height: canvas.height, config }, [image.data.buffer]);
--- a/dist/human-nobundle.cjs
+++ b/dist/human-nobundle.cjs
@ -23,12 +23,12 @@ var require_blazeface = __commonJS((exports2) => {
    anchors: [2, 6]
  };
  const NUM_LANDMARKS = 6;
-  function generateAnchors(width, height, outputSpec) {
+  function generateAnchors(anchorSize, outputSpec) {
    const anchors = [];
    for (let i = 0; i < outputSpec.strides.length; i++) {
      const stride = outputSpec.strides[i];
-      const gridRows = Math.floor((height + stride - 1) / stride);
-      const gridCols = Math.floor((width + stride - 1) / stride);
+      const gridRows = Math.floor((anchorSize + stride - 1) / stride);
+      const gridCols = Math.floor((anchorSize + stride - 1) / stride);
      const anchorsNum = outputSpec.anchors[i];
      for (let gridY = 0; gridY < gridRows; gridY++) {
        const anchorY = stride * (gridY + 0.5);
@ -83,11 +83,11 @@ var require_blazeface = __commonJS((exports2) => {
      this.blazeFaceModel = model;
      this.width = config2.detector.inputSize;
      this.height = config2.detector.inputSize;
+      this.anchorSize = config2.detector.anchorSize;
      this.maxFaces = config2.detector.maxFaces;
-      this.anchorsData = generateAnchors(config2.detector.inputSize, config2.detector.inputSize, ANCHORS_CONFIG);
+      this.anchorsData = generateAnchors(config2.detector.anchorSize, ANCHORS_CONFIG);
      this.anchors = tf2.tensor2d(this.anchorsData);
-      this.inputSizeData = [config2.detector.inputSize, config2.detector.inputSize];
-      this.inputSize = tf2.tensor1d([config2.detector.inputSize, config2.detector.inputSize]);
+      this.inputSize = tf2.tensor1d([this.width, this.height]);
      this.iouThreshold = config2.detector.iouThreshold;
      this.scaleFaces = 0.8;
      this.scoreThreshold = config2.detector.scoreThreshold;
@ -97,7 +97,16 @@ var require_blazeface = __commonJS((exports2) => {
        const resizedImage = inputImage.resizeBilinear([this.width, this.height]);
        const normalizedImage = tf2.mul(tf2.sub(resizedImage.div(255), 0.5), 2);
        const batchedPrediction = this.blazeFaceModel.predict(normalizedImage);
-        const prediction = batchedPrediction.squeeze();
+        let prediction;
+        if (Array.isArray(batchedPrediction)) {
+          const sorted = batchedPrediction.sort((a, b) => a.size - b.size);
+          const concat384 = tf2.concat([sorted[0], sorted[2]], 2);
+          const concat512 = tf2.concat([sorted[1], sorted[3]], 2);
+          const concat = tf2.concat([concat512, concat384], 1);
+          prediction = concat.squeeze(0);
+        } else {
+          prediction = batchedPrediction.squeeze();
+        }
        const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize);
        const logits = tf2.slice(prediction, [0, 0], [-1, 1]);
        const scoresOut = tf2.sigmoid(logits).squeeze();
@ -130,7 +139,7 @@ var require_blazeface = __commonJS((exports2) => {
      detectedOutputs.dispose();
      return {
        boxes: annotatedBoxes,
-        scaleFactor: [inputImage.shape[2] / this.inputSizeData[0], inputImage.shape[1] / this.inputSizeData[1]]
+        scaleFactor: [inputImage.shape[2] / this.width, inputImage.shape[1] / this.height]
      };
    }
    async estimateFaces(input) {
@ -5041,7 +5050,8 @@ var require_config = __commonJS((exports2) => {
    face: {
      enabled: true,
      detector: {
-        modelPath: "../models/blazeface/model.json",
+        modelPath: "../models/blazeface/tfhub/model.json",
+        anchorSize: 128,
        inputSize: 128,
        maxFaces: 10,
        skipFrames: 10,
@ -5112,7 +5122,7 @@ var require_config = __commonJS((exports2) => {
 var require_package = __commonJS((exports2, module2) => {
  module2.exports = {
    name: "@vladmandic/human",
-    version: "0.3.1",
+    version: "0.3.2",
    description: "human: 3D Face Detection, Iris Tracking and Age & Gender Prediction",
    sideEffects: false,
    main: "dist/human.cjs",
@ -5242,17 +5252,13 @@ async function detect(input, userConfig) {
    const perf = {};
    let timeStamp;
    timeStamp = performance.now();
-    let poseRes = [];
    tf.engine().startScope();
-    if (config.body.enabled)
-      poseRes = await models.posenet.estimatePoses(input, config.body);
+    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
    tf.engine().endScope();
    perf.body = Math.trunc(performance.now() - timeStamp);
    timeStamp = performance.now();
-    let handRes = [];
    tf.engine().startScope();
-    if (config.hand.enabled)
-      handRes = await models.handpose.estimateHands(input, config.hand);
+    const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
    tf.engine().endScope();
    perf.hand = Math.trunc(performance.now() - timeStamp);
    const faceRes = [];
--- a/dist/human-nobundle.cjs.map
+++ b/dist/human-nobundle.cjs.map
--- a/dist/human.cjs
+++ b/dist/human.cjs
--- a/dist/human.cjs.map
+++ b/dist/human.cjs.map
--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
--- a/dist/human.esm-nobundle.js.map
+++ b/dist/human.esm-nobundle.js.map
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.js.map
+++ b/dist/human.js.map
--- a/models/blazeface/back/group1-shard1of1.bin
+++ b/models/blazeface/back/group1-shard1of1.bin
--- a/models/blazeface/back/model.json
+++ b/models/blazeface/back/model.json
--- a/models/blazeface/front/group1-shard1of1.bin
+++ b/models/blazeface/front/group1-shard1of1.bin
--- a/models/blazeface/front/model.json
+++ b/models/blazeface/front/model.json
--- a/models/blazeface/tfhub/group1-shard1of1.bin
+++ b/models/blazeface/tfhub/group1-shard1of1.bin
--- a/models/blazeface/tfhub/model.json
+++ b/models/blazeface/tfhub/model.json
--- a/src/config.js
+++ b/src/config.js
@ -2,8 +2,9 @@ export default {
  face: {
    enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
    detector: {
-      modelPath: '../models/blazeface/model.json',
-      inputSize: 128, // fixed value
+      modelPath: '../models/blazeface/tfhub/model.json', // can be blazeface-front or blazeface-back
+      anchorSize: 128, // fixed regardless of model
+      inputSize: 128, // fixed value: 128 for front and tfhub and 256 for back
      maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
      skipFrames: 10, // how many frames to go without running the bounding box detector
      minConfidence: 0.5, // threshold for discarding a prediction
--- a/src/facemesh/blazeface.js
+++ b/src/facemesh/blazeface.js
@ -6,12 +6,12 @@ const ANCHORS_CONFIG = {
 };

 const NUM_LANDMARKS = 6;
-function generateAnchors(width, height, outputSpec) {
+function generateAnchors(anchorSize, outputSpec) {
  const anchors = [];
  for (let i = 0; i < outputSpec.strides.length; i++) {
    const stride = outputSpec.strides[i];
-    const gridRows = Math.floor((height + stride - 1) / stride);
-    const gridCols = Math.floor((width + stride - 1) / stride);
+    const gridRows = Math.floor((anchorSize + stride - 1) / stride);
+    const gridCols = Math.floor((anchorSize + stride - 1) / stride);
    const anchorsNum = outputSpec.anchors[i];
    for (let gridY = 0; gridY < gridRows; gridY++) {
      const anchorY = stride * (gridY + 0.5);
@ -72,11 +72,11 @@ class BlazeFaceModel {
    this.blazeFaceModel = model;
    this.width = config.detector.inputSize;
    this.height = config.detector.inputSize;
+    this.anchorSize = config.detector.anchorSize;
    this.maxFaces = config.detector.maxFaces;
-    this.anchorsData = generateAnchors(config.detector.inputSize, config.detector.inputSize, ANCHORS_CONFIG);
+    this.anchorsData = generateAnchors(config.detector.anchorSize, ANCHORS_CONFIG);
    this.anchors = tf.tensor2d(this.anchorsData);
-    this.inputSizeData = [config.detector.inputSize, config.detector.inputSize];
-    this.inputSize = tf.tensor1d([config.detector.inputSize, config.detector.inputSize]);
+    this.inputSize = tf.tensor1d([this.width, this.height]);
    this.iouThreshold = config.detector.iouThreshold;
    this.scaleFaces = 0.8;
    this.scoreThreshold = config.detector.scoreThreshold;
@ -87,11 +87,21 @@ class BlazeFaceModel {
      const resizedImage = inputImage.resizeBilinear([this.width, this.height]);
      const normalizedImage = tf.mul(tf.sub(resizedImage.div(255), 0.5), 2);
      const batchedPrediction = this.blazeFaceModel.predict(normalizedImage);
-      // todo: add handler for blazeface-front and blazeface-back
-      const prediction = batchedPrediction.squeeze();
+      let prediction;
+      // are we using tfhub or pinto converted model?
+      if (Array.isArray(batchedPrediction)) {
+        const sorted = batchedPrediction.sort((a, b) => a.size - b.size);
+        const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
+        const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
+        const concat = tf.concat([concat512, concat384], 1);
+        prediction = concat.squeeze(0);
+      } else {
+        prediction = batchedPrediction.squeeze(); // when using tfhub model
+      }
      const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize);
      const logits = tf.slice(prediction, [0, 0], [-1, 1]);
      const scoresOut = tf.sigmoid(logits).squeeze();
+      // console.log(prediction, decodedBounds, logits, scoresOut);
      return [prediction, decodedBounds, scoresOut];
    });

@ -125,7 +135,7 @@ class BlazeFaceModel {
    detectedOutputs.dispose();
    return {
      boxes: annotatedBoxes,
-      scaleFactor: [inputImage.shape[2] / this.inputSizeData[0], inputImage.shape[1] / this.inputSizeData[1]],
+      scaleFactor: [inputImage.shape[2] / this.width, inputImage.shape[1] / this.height],
    };
  }

--- a/src/index.js
+++ b/src/index.js
@ -79,17 +79,15 @@ async function detect(input, userConfig) {

    // run posenet
    timeStamp = performance.now();
-    let poseRes = [];
    tf.engine().startScope();
-    if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body);
+    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
    tf.engine().endScope();
    perf.body = Math.trunc(performance.now() - timeStamp);

    // run handpose
    timeStamp = performance.now();
-    let handRes = [];
    tf.engine().startScope();
-    if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand);
+    const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
    tf.engine().endScope();
    perf.hand = Math.trunc(performance.now() - timeStamp);