add face return tensor

2021-03-11 22:04:44 -05:00 · 2021-03-11 22:04:44 -05:00 · 8b5e7cc2d5
parent ca1bc638f8
commit 8b5e7cc2d5
23 changed files with 6386 additions and 443021 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,7 +9,14 @@ Repository: **<git+https://github.com/vladmandic/human.git>**

 ## Changelog

-### **HEAD -> main** 2021/03/10 mandic00@live.com
+### **HEAD -> main** 2021/03/11 mandic00@live.com
+
+- wip on embedding
+- simplify face box coordinate calculations
+- annotated models and removed gender-ssrnet
+- autodetect inputsizes
+
+### **origin/main** 2021/03/10 mandic00@live.com


 ### **1.0.3** 2021/03/10 mandic00@live.com
--- a/TODO.md
+++ b/TODO.md
@ -4,7 +4,6 @@
 - Automated testing
 - Guard against corrupt input
 - Improve face embedding
- Build Face embedding database
 - Dynamic sample processing
 - Explore EfficientPose  
  <https://github.com/daniegr/EfficientPose>  
--- a/config.js
+++ b/config.js
@ -67,7 +67,7 @@ export default {
                             // (note: module is not loaded until it is required)
    detector: {
      modelPath: '../models/blazeface-back.json',
-      rotation: true,        // use best-guess rotated face image or just box with rotation as-is
+      rotation: false,       // use best-guess rotated face image or just box with rotation as-is
                             // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
                             // this parameter is not valid in nodejs
      maxFaces: 10,          // maximum number of faces detected in the input
@ -85,7 +85,7 @@ export default {
      scoreThreshold: 0.2,   // threshold for deciding when to remove boxes based on score
                             // in non-maximum suppression,
                             // this is applied on detection objects only and before minConfidence
-      return: true,          // return extracted face as tensor
+      return: false,         // return extracted face as tensor
    },

    mesh: {
@ -121,7 +121,8 @@ export default {
    },

    embedding: {
-      enabled: false,
+      enabled: false,        // to improve accuracy of face embedding extraction it is recommended
+                             // to enable detector.rotation and mesh.enabled
      modelPath: '../models/mobilefacenet.json',
    },
  },
--- a/demo/browser.js
+++ b/demo/browser.js
@ -3,18 +3,20 @@ import Human from '../src/human';
 import Menu from './menu.js';
 import GLBench from './gl-bench.js';

-// const userConfig = { backend: 'webgl' }; // add any user configuration overrides
+const userConfig = { backend: 'webgl' }; // add any user configuration overrides

+/*
 const userConfig = {
  backend: 'wasm',
  async: false,
  warmup: 'face',
  videoOptimized: false,
-  face: { enabled: true, mesh: { enabled: true }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: true } },
+  face: { enabled: true, mesh: { enabled: true }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
  hand: { enabled: false },
  gesture: { enabled: false },
  body: { enabled: false, modelPath: '../models/blazepose.json' },
 };
+*/

 const human = new Human(userConfig);

--- a/demo/embedding.html
+++ b/demo/embedding.html
@ -20,12 +20,14 @@
      body { margin: 0; background: black; color: white; overflow-x: hidden; scrollbar-width: none; }
      body::-webkit-scrollbar { display: none; }
      img { object-fit: contain; }
-      .face { width: 200px; height: 200px; }
+      .face { width: 150px; height: 150px; }
    </style>
  </head>
  <body>
    <br>Sample Images:
    <div id="images"></div>
+    <br>Selected Face<br>
+    <canvas id="orig" style="width: 200px; height: 200px;"></canvas>
    <br>Extracted Faces - click on a face to sort by simmilarity:<br>
    <div id="faces"></div>
  </body>
--- a/demo/embedding.js
+++ b/demo/embedding.js
@ -9,7 +9,7 @@ const userConfig = {
  videoOptimized: false,
  face: {
    enabled: true,
-    detector: { rotation: true },
+    detector: { rotation: true, return: true },
    mesh: { enabled: true },
    embedding: { enabled: true, modelPath: '../models/mobilefacenet.json' },
    iris: { enabled: false },
@ -22,9 +22,10 @@ const userConfig = {
  body: { enabled: false },
 };
 const human = new Human(userConfig);
-const samples = ['../assets/sample-me.jpg', '../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg',
-  '../private/me (1).jpg', '../private/me (2).jpg', '../private/me (3).jpg', '../private/me (4).jpg', '../private/me (5).jpg', '../private/me (6).jpg', '../private/me (7).jpg', '../private/me (8).jpg',
-  '../private/me (9).jpg', '../private/me (10).jpg', '../private/me (11).jpg', '../private/me (12).jpg', '../private/me (13).jpg'];
+const samples = ['../assets/sample-me.jpg', '../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg'];
+// const samples = ['../assets/sample-me.jpg', '../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg',
+//  '../private/me (1).jpg', '../private/me (2).jpg', '../private/me (3).jpg', '../private/me (4).jpg', '../private/me (5).jpg', '../private/me (6).jpg', '../private/me (7).jpg', '../private/me (8).jpg',
+//  '../private/me (9).jpg', '../private/me (10).jpg', '../private/me (11).jpg', '../private/me (12).jpg', '../private/me (13).jpg'];
 const all = [];

 function log(...msg) {
@ -36,6 +37,12 @@ function log(...msg) {

 async function analyze(face) {
  log('Face:', face);
+
+  const box = [[0.05, 0.15, 0.90, 0.85]]; // top, left, bottom, right
+  const crop = human.tf.image.cropAndResize(face.tensor.expandDims(0), box, [0], [200, 200]); // optionally do a tight box crop
+  const c = document.getElementById('orig');
+  human.tf.browser.toPixels(crop.squeeze(), c);
+
  const canvases = document.getElementsByClassName('face');
  for (const canvas of canvases) {
    const res = human.simmilarity(face.embedding, all[canvas.tag.sample][canvas.tag.face].embedding);
@ -64,7 +71,7 @@ async function faces(index, res) {
    canvas.height = 200;
    canvas.className = 'face';
    canvas.addEventListener('click', (evt) => {
-      log('Select:', evt.target.tag.sample, evt.target.tag.face);
+      log('Select:', 'Image:', evt.target.tag.sample, 'Face:', evt.target.tag.face);
      analyze(all[evt.target.tag.sample][evt.target.tag.face]);
    });
    human.tf.browser.toPixels(res.face[i].tensor, canvas);
@ -73,7 +80,7 @@ async function faces(index, res) {
 }

 async function add(index) {
-  log('Add:', samples[index]);
+  log('Add image:', samples[index]);
  return new Promise((resolve) => {
    const img = new Image(100, 100);
    img.onload = () => {
@ -89,6 +96,7 @@ async function add(index) {
 async function main() {
  await human.load();
  for (const i in samples) await add(i);
+  log('Ready');
 }

 window.onload = main;
--- a/dist/demo-browser-index.js
+++ b/dist/demo-browser-index.js
--- a/dist/demo-browser-index.js.map
+++ b/dist/demo-browser-index.js.map
--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
--- a/dist/human.esm-nobundle.js.map
+++ b/dist/human.esm-nobundle.js.map
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.js.map
+++ b/dist/human.js.map
--- a/dist/human.node-gpu.js
+++ b/dist/human.node-gpu.js
--- a/dist/human.node-gpu.js.map
+++ b/dist/human.node-gpu.js.map
--- a/dist/human.node.js
+++ b/dist/human.node.js
--- a/dist/human.node.js.map
+++ b/dist/human.node.js.map
--- a/dist/tfjs.esm.js
+++ b/dist/tfjs.esm.js
--- a/dist/tfjs.esm.js.map
+++ b/dist/tfjs.esm.js.map
--- a/src/blazeface/facepipeline.ts
+++ b/src/blazeface/facepipeline.ts
@ -4,20 +4,36 @@ import * as bounding from './box';
 import * as util from './util';
 import * as coords from './coords';

-const LANDMARKS_COUNT = 468;
-const MESH_MOUTH_INDEX = 13;
-const MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [MESH_MOUTH_INDEX, coords.MESH_ANNOTATIONS['midwayBetweenEyes'][0]];
-const BLAZEFACE_MOUTH_INDEX = 3;
-const BLAZEFACE_NOSE_INDEX = 2;
-const BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [BLAZEFACE_MOUTH_INDEX, BLAZEFACE_NOSE_INDEX];
-const LEFT_EYE_OUTLINE = coords.MESH_ANNOTATIONS['leftEyeLower0'];
-const LEFT_EYE_BOUNDS = [LEFT_EYE_OUTLINE[0], LEFT_EYE_OUTLINE[LEFT_EYE_OUTLINE.length - 1]];
-const RIGHT_EYE_OUTLINE = coords.MESH_ANNOTATIONS['rightEyeLower0'];
-const RIGHT_EYE_BOUNDS = [RIGHT_EYE_OUTLINE[0], RIGHT_EYE_OUTLINE[RIGHT_EYE_OUTLINE.length - 1]];
-const IRIS_UPPER_CENTER_INDEX = 3;
-const IRIS_LOWER_CENTER_INDEX = 4;
-const IRIS_IRIS_INDEX = 71;
-const IRIS_NUM_COORDINATES = 76;
+const leftOutline = coords.MESH_ANNOTATIONS['leftEyeLower0'];
+const rightOutline = coords.MESH_ANNOTATIONS['rightEyeLower0'];
+
+const eyeLandmarks = {
+  leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
+  rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
+};
+
+const meshLandmarks = {
+  count: 468,
+  mouth: 13,
+  symmetryLine: [13, coords.MESH_ANNOTATIONS['midwayBetweenEyes'][0]],
+};
+
+const blazeFaceLandmarks = {
+  leftEye: 0,
+  rightEye: 1,
+  nose: 2,
+  mouth: 3,
+  leftEar: 4,
+  rightEar: 5,
+  symmetryLine: [3, 2],
+};
+
+const irisLandmarks = {
+  upperCenter: 3,
+  lowerCenter: 4,
+  index: 71,
+  numCoordinates: 76,
+};

 // Replace the raw coordinates returned by facemesh with refined iris model coordinates
 // Update the z coordinate to be an average of the original and the new.
@ -83,8 +99,8 @@ export class Pipeline {
  }

  getLeftToRightEyeDepthDifference(rawCoords) {
-    const leftEyeZ = rawCoords[LEFT_EYE_BOUNDS[0]][2];
-    const rightEyeZ = rawCoords[RIGHT_EYE_BOUNDS[0]][2];
+    const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
+    const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
    return leftEyeZ - rightEyeZ;
  }

@ -106,7 +122,7 @@ export class Pipeline {
  // Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
  getEyeCoords(eyeData, eyeBox, eyeBoxSize, flip = false) {
    const eyeRawCoords: Array<any[]> = [];
-    for (let i = 0; i < IRIS_NUM_COORDINATES; i++) {
+    for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
      const x = eyeData[i * 3];
      const y = eyeData[i * 3 + 1];
      const z = eyeData[i * 3 + 2];
@ -115,13 +131,13 @@ export class Pipeline {
        (y / this.irisSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
      ]);
    }
-    return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(IRIS_IRIS_INDEX) };
+    return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
  }

  // The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
  getAdjustedIrisCoords(rawCoords, irisCoords, direction) {
-    const upperCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeUpper0`][IRIS_UPPER_CENTER_INDEX]][2];
-    const lowerCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeLower0`][IRIS_LOWER_CENTER_INDEX]][2];
+    const upperCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
+    const lowerCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
    const averageZ = (upperCenterZ + lowerCenterZ) / 2;
    // Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
    return irisCoords.map((coord, i) => {
@ -187,7 +203,7 @@ export class Pipeline {
      let rotationMatrix;

      if (config.face.detector.rotation && config.face.mesh.enabled && tf.ENV.flags.IS_BROWSER) {
-        const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= LANDMARKS_COUNT) ? MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES : BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES;
+        const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine;
        angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
        const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
        const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
@ -222,13 +238,13 @@ export class Pipeline {
      let rawCoords = coordsReshaped.arraySync();

      if (config.face.iris.enabled) {
-        const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true);
-        const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
+        const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true);
+        const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1]);
        const eyePredictions = this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop]));
        const eyePredictionsData = eyePredictions.dataSync();
-        const leftEyeData = eyePredictionsData.slice(0, IRIS_NUM_COORDINATES * 3);
+        const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
        const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
-        const rightEyeData = eyePredictionsData.slice(IRIS_NUM_COORDINATES * 3);
+        const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
        const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = this.getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
        const leftToRightEyeDepthDifference = this.getLeftToRightEyeDepthDifference(rawCoords);
        if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
@ -246,18 +262,33 @@ export class Pipeline {
        rawCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
      }

+      // override box from detection with one calculated from mesh
      const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
-      const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData), 1.5);
-      const squarifiedLandmarksBox = bounding.squarifyBox(landmarksBox);
+      box = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData), 1.5); // redefine box with mesh calculated one
      const transformedCoords = tf.tensor2d(transformedCoordsData);
+
+      // do rotation one more time with mesh keypoints if we want to return perfect image
+      if (config.face.detector.rotation && config.face.mesh.enabled && config.face.detector.return && tf.ENV.flags.IS_BROWSER) {
+        const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine;
+        angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
+        const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
+        const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
+        const rotatedImage = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
+        rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
+        face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotatedImage, [this.meshSize, this.meshSize]).div(255);
+      }
+
      const prediction = {
        coords: transformedCoords,
-        box: landmarksBox,
+        box,
        faceConfidence,
        boxConfidence: box.confidence,
        image: face,
        rawCoords,
      };
+
+      // updated stored cache values
+      const squarifiedLandmarksBox = bounding.squarifyBox(box);
      this.storedBoxes[i] = { ...squarifiedLandmarksBox, landmarks: transformedCoordsData, confidence: box.confidence, faceConfidence };

      return prediction;
--- a/src/embedding/embedding.ts
+++ b/src/embedding/embedding.ts
@ -26,7 +26,7 @@ export function simmilarity(embedding1, embedding2, order = 2) {
    .map((val, i) => (Math.abs(embedding1[i] - embedding2[i]) ** order)) // distance squared
    .reduce((sum, now) => (sum + now), 0) // sum all distances
    ** (1 / order); // get root of
-  const res = Math.trunc(1000 * (1 - (20 * distance))) / 1000;
+  const res = Math.max(Math.trunc(1000 * (1 - (50 * distance))) / 1000, 0);
  return res;
 }

@ -35,9 +35,10 @@ export async function predict(input, config) {
  return new Promise(async (resolve) => {
    const image = tf.tidy(() => {
      const data = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); // input is already normalized to 0..1
-      // const box = [[0.05, 0.10, 0.85, 0.90]]; // top, left, bottom, right
-      // const crop = tf.image.cropAndResize(data, box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]); // optionally do a tight box crop
-      const norm = data.sub(data.mean()); // trick to normalize around image mean value
+      const box = [[0.05, 0.15, 0.90, 0.85]]; // top, left, bottom, right
+      const crop = tf.image.cropAndResize(data, box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]); // optionally do a tight box crop
+      // const norm = crop.sub(crop.min()).sub(0.5); // trick to normalize around image mean value
+      const norm = crop.sub(0.5);
      return norm;
    });
    let data: Array<[]> = [];
@ -49,7 +50,7 @@ export async function predict(input, config) {
          const scale = res.div(l2);
          return scale;
        });
-        data = [...scaled.dataSync()]; // convert object array to standard array
+        data = scaled.dataSync(); // convert object array to standard array
        tf.dispose(scaled);
        tf.dispose(res);
      } else {
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit fa7ac1f695547aa0fd25845e6cac7ed5ee0adcae
+Subproject commit a6a1fb7149d8a25da4874ce469f66977d517420d