add face return tensor

2021-03-11 22:04:44 -05:00 · 2021-03-11 22:04:44 -05:00 · 8b5e7cc2d5
parent ca1bc638f8
commit 8b5e7cc2d5
23 changed files with 6386 additions and 443021 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,7 +9,14 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
 ## Changelog
-### **HEAD -> main** 2021/03/10 mandic00@live.com
+### **HEAD -> main** 2021/03/11 mandic00@live.com
 - wip on embedding
 - simplify face box coordinate calculations
 - annotated models and removed gender-ssrnet
 - autodetect inputsizes
 ### **origin/main** 2021/03/10 mandic00@live.com
 ### **1.0.3** 2021/03/10 mandic00@live.com
--- a/TODO.md
+++ b/TODO.md
@ -4,7 +4,6 @@
 - Automated testing
 - Guard against corrupt input
 - Improve face embedding
 - Build Face embedding database
 - Dynamic sample processing
 - Explore EfficientPose  
  <https://github.com/daniegr/EfficientPose>  
--- a/config.js
+++ b/config.js
@ -67,7 +67,7 @@ export default {
                             // (note: module is not loaded until it is required)
    detector: {
      modelPath: '../models/blazeface-back.json',
-      rotation: true,        // use best-guess rotated face image or just box with rotation as-is
+      rotation: false,       // use best-guess rotated face image or just box with rotation as-is
                             // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
                             // this parameter is not valid in nodejs
      maxFaces: 10,          // maximum number of faces detected in the input
@ -85,7 +85,7 @@ export default {
      scoreThreshold: 0.2,   // threshold for deciding when to remove boxes based on score
                             // in non-maximum suppression,
                             // this is applied on detection objects only and before minConfidence
-      return: true,          // return extracted face as tensor
+      return: false,         // return extracted face as tensor
    },
    mesh: {
@ -121,7 +121,8 @@ export default {
    },
    embedding: {
-      enabled: false,
+      enabled: false,        // to improve accuracy of face embedding extraction it is recommended
                             // to enable detector.rotation and mesh.enabled
      modelPath: '../models/mobilefacenet.json',
    },
  },
--- a/demo/browser.js
+++ b/demo/browser.js
@ -3,18 +3,20 @@ import Human from '../src/human';
 import Menu from './menu.js';
 import GLBench from './gl-bench.js';
-// const userConfig = { backend: 'webgl' }; // add any user configuration overrides
+const userConfig = { backend: 'webgl' }; // add any user configuration overrides
 /*
 const userConfig = {
  backend: 'wasm',
  async: false,
  warmup: 'face',
  videoOptimized: false,
-  face: { enabled: true, mesh: { enabled: true }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: true } },
+  face: { enabled: true, mesh: { enabled: true }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
  hand: { enabled: false },
  gesture: { enabled: false },
  body: { enabled: false, modelPath: '../models/blazepose.json' },
 };
 */
 const human = new Human(userConfig);
--- a/demo/embedding.html
+++ b/demo/embedding.html
@ -20,12 +20,14 @@
      body { margin: 0; background: black; color: white; overflow-x: hidden; scrollbar-width: none; }
      body::-webkit-scrollbar { display: none; }
      img { object-fit: contain; }
-      .face { width: 200px; height: 200px; }
+      .face { width: 150px; height: 150px; }
    </style>
  </head>
  <body>
    <br>Sample Images:
    <div id="images"></div>
    <br>Selected Face<br>
    <canvas id="orig" style="width: 200px; height: 200px;"></canvas>
    <br>Extracted Faces - click on a face to sort by simmilarity:<br>
    <div id="faces"></div>
  </body>
--- a/demo/embedding.js
+++ b/demo/embedding.js
@ -9,7 +9,7 @@ const userConfig = {
  videoOptimized: false,
  face: {
    enabled: true,
-    detector: { rotation: true },
+    detector: { rotation: true, return: true },
    mesh: { enabled: true },
    embedding: { enabled: true, modelPath: '../models/mobilefacenet.json' },
    iris: { enabled: false },
@ -22,9 +22,10 @@ const userConfig = {
  body: { enabled: false },
 };
 const human = new Human(userConfig);
-const samples = ['../assets/sample-me.jpg', '../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg',
+const samples = ['../assets/sample-me.jpg', '../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg'];
-  '../private/me (1).jpg', '../private/me (2).jpg', '../private/me (3).jpg', '../private/me (4).jpg', '../private/me (5).jpg', '../private/me (6).jpg', '../private/me (7).jpg', '../private/me (8).jpg',
+// const samples = ['../assets/sample-me.jpg', '../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg',
-  '../private/me (9).jpg', '../private/me (10).jpg', '../private/me (11).jpg', '../private/me (12).jpg', '../private/me (13).jpg'];
+//  '../private/me (1).jpg', '../private/me (2).jpg', '../private/me (3).jpg', '../private/me (4).jpg', '../private/me (5).jpg', '../private/me (6).jpg', '../private/me (7).jpg', '../private/me (8).jpg',
 //  '../private/me (9).jpg', '../private/me (10).jpg', '../private/me (11).jpg', '../private/me (12).jpg', '../private/me (13).jpg'];
 const all = [];
 function log(...msg) {
@ -36,6 +37,12 @@ function log(...msg) {
 async function analyze(face) {
  log('Face:', face);
  const box = [[0.05, 0.15, 0.90, 0.85]]; // top, left, bottom, right
  const crop = human.tf.image.cropAndResize(face.tensor.expandDims(0), box, [0], [200, 200]); // optionally do a tight box crop
  const c = document.getElementById('orig');
  human.tf.browser.toPixels(crop.squeeze(), c);
  const canvases = document.getElementsByClassName('face');
  for (const canvas of canvases) {
    const res = human.simmilarity(face.embedding, all[canvas.tag.sample][canvas.tag.face].embedding);
@ -64,7 +71,7 @@ async function faces(index, res) {
    canvas.height = 200;
    canvas.className = 'face';
    canvas.addEventListener('click', (evt) => {
-      log('Select:', evt.target.tag.sample, evt.target.tag.face);
+      log('Select:', 'Image:', evt.target.tag.sample, 'Face:', evt.target.tag.face);
      analyze(all[evt.target.tag.sample][evt.target.tag.face]);
    });
    human.tf.browser.toPixels(res.face[i].tensor, canvas);
@ -73,7 +80,7 @@ async function faces(index, res) {
 }
 async function add(index) {
-  log('Add:', samples[index]);
+  log('Add image:', samples[index]);
  return new Promise((resolve) => {
    const img = new Image(100, 100);
    img.onload = () => {
@ -89,6 +96,7 @@ async function add(index) {
 async function main() {
  await human.load();
  for (const i in samples) await add(i);
  log('Ready');
 }
 window.onload = main;
--- a/dist/demo-browser-index.js
+++ b/dist/demo-browser-index.js
--- a/dist/demo-browser-index.js.map
+++ b/dist/demo-browser-index.js.map
--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
--- a/dist/human.esm-nobundle.js.map
+++ b/dist/human.esm-nobundle.js.map
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.js.map
+++ b/dist/human.js.map
--- a/dist/human.node-gpu.js
+++ b/dist/human.node-gpu.js
--- a/dist/human.node-gpu.js.map
+++ b/dist/human.node-gpu.js.map
--- a/dist/human.node.js
+++ b/dist/human.node.js
--- a/dist/human.node.js.map
+++ b/dist/human.node.js.map
--- a/dist/tfjs.esm.js
+++ b/dist/tfjs.esm.js
--- a/dist/tfjs.esm.js.map
+++ b/dist/tfjs.esm.js.map
--- a/src/blazeface/facepipeline.ts
+++ b/src/blazeface/facepipeline.ts
@ -4,20 +4,36 @@ import * as bounding from './box';
 import * as util from './util';
 import * as coords from './coords';
-const LANDMARKS_COUNT = 468;
+const leftOutline = coords.MESH_ANNOTATIONS['leftEyeLower0'];
-const MESH_MOUTH_INDEX = 13;
+const rightOutline = coords.MESH_ANNOTATIONS['rightEyeLower0'];
-const MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [MESH_MOUTH_INDEX, coords.MESH_ANNOTATIONS['midwayBetweenEyes'][0]];
+
-const BLAZEFACE_MOUTH_INDEX = 3;
+const eyeLandmarks = {
-const BLAZEFACE_NOSE_INDEX = 2;
+  leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
-const BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [BLAZEFACE_MOUTH_INDEX, BLAZEFACE_NOSE_INDEX];
+  rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
-const LEFT_EYE_OUTLINE = coords.MESH_ANNOTATIONS['leftEyeLower0'];
+};
-const LEFT_EYE_BOUNDS = [LEFT_EYE_OUTLINE[0], LEFT_EYE_OUTLINE[LEFT_EYE_OUTLINE.length - 1]];
+
-const RIGHT_EYE_OUTLINE = coords.MESH_ANNOTATIONS['rightEyeLower0'];
+const meshLandmarks = {
-const RIGHT_EYE_BOUNDS = [RIGHT_EYE_OUTLINE[0], RIGHT_EYE_OUTLINE[RIGHT_EYE_OUTLINE.length - 1]];
+  count: 468,
-const IRIS_UPPER_CENTER_INDEX = 3;
+  mouth: 13,
-const IRIS_LOWER_CENTER_INDEX = 4;
+  symmetryLine: [13, coords.MESH_ANNOTATIONS['midwayBetweenEyes'][0]],
-const IRIS_IRIS_INDEX = 71;
+};
-const IRIS_NUM_COORDINATES = 76;
+
 const blazeFaceLandmarks = {
  leftEye: 0,
  rightEye: 1,
  nose: 2,
  mouth: 3,
  leftEar: 4,
  rightEar: 5,
  symmetryLine: [3, 2],
 };
 const irisLandmarks = {
  upperCenter: 3,
  lowerCenter: 4,
  index: 71,
  numCoordinates: 76,
 };
 // Replace the raw coordinates returned by facemesh with refined iris model coordinates
 // Update the z coordinate to be an average of the original and the new.
@ -83,8 +99,8 @@ export class Pipeline {
  }
  getLeftToRightEyeDepthDifference(rawCoords) {
-    const leftEyeZ = rawCoords[LEFT_EYE_BOUNDS[0]][2];
+    const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
-    const rightEyeZ = rawCoords[RIGHT_EYE_BOUNDS[0]][2];
+    const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
    return leftEyeZ - rightEyeZ;
  }
@ -106,7 +122,7 @@ export class Pipeline {
  // Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
  getEyeCoords(eyeData, eyeBox, eyeBoxSize, flip = false) {
    const eyeRawCoords: Array<any[]> = [];
-    for (let i = 0; i < IRIS_NUM_COORDINATES; i++) {
+    for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
      const x = eyeData[i * 3];
      const y = eyeData[i * 3 + 1];
      const z = eyeData[i * 3 + 2];
@ -115,13 +131,13 @@ export class Pipeline {
        (y / this.irisSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
      ]);
    }
-    return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(IRIS_IRIS_INDEX) };
+    return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
  }
  // The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
  getAdjustedIrisCoords(rawCoords, irisCoords, direction) {
-    const upperCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeUpper0`][IRIS_UPPER_CENTER_INDEX]][2];
+    const upperCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
-    const lowerCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeLower0`][IRIS_LOWER_CENTER_INDEX]][2];
+    const lowerCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
    const averageZ = (upperCenterZ + lowerCenterZ) / 2;
    // Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
    return irisCoords.map((coord, i) => {
@ -187,7 +203,7 @@ export class Pipeline {
      let rotationMatrix;
      if (config.face.detector.rotation && config.face.mesh.enabled && tf.ENV.flags.IS_BROWSER) {
-        const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= LANDMARKS_COUNT) ? MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES : BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES;
+        const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine;
        angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
        const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
        const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
@ -222,13 +238,13 @@ export class Pipeline {
      let rawCoords = coordsReshaped.arraySync();
      if (config.face.iris.enabled) {
-        const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true);
+        const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true);
-        const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
+        const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1]);
        const eyePredictions = this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop]));
        const eyePredictionsData = eyePredictions.dataSync();
-        const leftEyeData = eyePredictionsData.slice(0, IRIS_NUM_COORDINATES * 3);
+        const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
        const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
-        const rightEyeData = eyePredictionsData.slice(IRIS_NUM_COORDINATES * 3);
+        const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
        const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = this.getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
        const leftToRightEyeDepthDifference = this.getLeftToRightEyeDepthDifference(rawCoords);
        if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
@ -246,18 +262,33 @@ export class Pipeline {
        rawCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
      }
      // override box from detection with one calculated from mesh
      const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
-      const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData), 1.5);
+      box = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData), 1.5); // redefine box with mesh calculated one
      const squarifiedLandmarksBox = bounding.squarifyBox(landmarksBox);
      const transformedCoords = tf.tensor2d(transformedCoordsData);
      // do rotation one more time with mesh keypoints if we want to return perfect image
      if (config.face.detector.rotation && config.face.mesh.enabled && config.face.detector.return && tf.ENV.flags.IS_BROWSER) {
        const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine;
        angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
        const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
        const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
        const rotatedImage = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
        rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
        face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotatedImage, [this.meshSize, this.meshSize]).div(255);
      }
      const prediction = {
        coords: transformedCoords,
-        box: landmarksBox,
+        box,
        faceConfidence,
        boxConfidence: box.confidence,
        image: face,
        rawCoords,
      };
      // updated stored cache values
      const squarifiedLandmarksBox = bounding.squarifyBox(box);
      this.storedBoxes[i] = { ...squarifiedLandmarksBox, landmarks: transformedCoordsData, confidence: box.confidence, faceConfidence };
      return prediction;
--- a/src/embedding/embedding.ts
+++ b/src/embedding/embedding.ts
@ -26,7 +26,7 @@ export function simmilarity(embedding1, embedding2, order = 2) {
    .map((val, i) => (Math.abs(embedding1[i] - embedding2[i]) ** order)) // distance squared
    .reduce((sum, now) => (sum + now), 0) // sum all distances
    ** (1 / order); // get root of
-  const res = Math.trunc(1000 * (1 - (20 * distance))) / 1000;
+  const res = Math.max(Math.trunc(1000 * (1 - (50 * distance))) / 1000, 0);
  return res;
 }
@ -35,9 +35,10 @@ export async function predict(input, config) {
  return new Promise(async (resolve) => {
    const image = tf.tidy(() => {
      const data = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); // input is already normalized to 0..1
-      // const box = [[0.05, 0.10, 0.85, 0.90]]; // top, left, bottom, right
+      const box = [[0.05, 0.15, 0.90, 0.85]]; // top, left, bottom, right
-      // const crop = tf.image.cropAndResize(data, box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]); // optionally do a tight box crop
+      const crop = tf.image.cropAndResize(data, box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]); // optionally do a tight box crop
-      const norm = data.sub(data.mean()); // trick to normalize around image mean value
+      // const norm = crop.sub(crop.min()).sub(0.5); // trick to normalize around image mean value
      const norm = crop.sub(0.5);
      return norm;
    });
    let data: Array<[]> = [];
@ -49,7 +50,7 @@ export async function predict(input, config) {
          const scale = res.div(l2);
          return scale;
        });
-        data = [...scaled.dataSync()]; // convert object array to standard array
+        data = scaled.dataSync(); // convert object array to standard array
        tf.dispose(scaled);
        tf.dispose(res);
      } else {
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit fa7ac1f695547aa0fd25845e6cac7ed5ee0adcae
+Subproject commit a6a1fb7149d8a25da4874ce469f66977d517420d
		`@ -1 +1 @@`
			`Subproject commit fa7ac1f695547aa0fd25845e6cac7ed5ee0adcae`				`Subproject commit a6a1fb7149d8a25da4874ce469f66977d517420d`