implemented face embedding

2020-11-13 16:13:35 -05:00 · 2020-11-13 16:13:35 -05:00 · 17b079ab39
parent 2f3da6441d
commit 17b079ab39
7 changed files with 96 additions and 32 deletions
--- a/config.js
+++ b/config.js
@ -66,6 +66,7 @@ export default {
                                                  // such as front-facing camera and
                                                  // 'back' is optimized for distanct faces.
      inputSize: 256,        // fixed value: 128 for front and 256 for 'back'
+      rotation: false,       // use best-guess rotated face image or just box with rotation as-is
      maxFaces: 10,          // maximum number of faces detected in the input
                             // should be set to the minimum number for performance
      skipFrames: 15,        // how many frames to go without re-running the face bounding box detector
@ -118,6 +119,12 @@ export default {
      skipFrames: 15,        // how many frames to go without re-running the detector
      modelPath: '../models/emotion-large.json', // can be 'mini', 'large'
    },
+
+    embedding: {
+      enabled: false,
+      inputSize: 112,        // fixed value
+      modelPath: '../models/mobilefacenet.json',
+    },
  },

  body: {
--- a/demo/browser.js
+++ b/demo/browser.js
@ -22,6 +22,7 @@ const ui = {
  useWorker: false,
  worker: 'demo/worker.js',
  samples: ['../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg'],
+  compare: '../assets/sample-me.jpg',
  drawBoxes: true,
  drawPoints: false,
  drawPolygons: true,
@ -48,6 +49,7 @@ let menu;
 let menuFX;
 let worker;
 let bench;
+let sample;
 let lastDetectedResult = {};

 // helper function: translates json to human readable string
@ -72,6 +74,16 @@ const status = (msg) => {
  document.getElementById('status').innerText = msg;
 };

+async function calcSimmilariry(faces) {
+  if (!faces || !faces[0] || (faces[0].embedding?.length !== 192)) return;
+  const current = faces[0].embedding;
+  const original = (sample && sample.face && sample.face[0] && sample.face[0].embedding) ? sample.face[0].embedding : null;
+  if (original && original.length === 192) {
+    const simmilarity = human.simmilarity(current, original);
+    document.getElementById('simmilarity').innerText = `simmilarity: ${Math.trunc(1000 * simmilarity) / 10}%`;
+  }
+}
+
 // draws processed results and starts processing of a next frame
 async function drawResults(input) {
  const result = lastDetectedResult;
@ -79,7 +91,7 @@ async function drawResults(input) {

  // update fps data
  // const elapsed = performance.now() - timeStamp;
-  ui.fps.push(1000 / result.performance.total);
+  if (result.performance && result.performance.total) ui.fps.push(1000 / result.performance.total);
  if (ui.fps.length > ui.maxFPSframes) ui.fps.shift();

  // enable for continous performance monitoring
@ -89,7 +101,7 @@ async function drawResults(input) {
  await menu.updateChart('FPS', ui.fps);

  // get updated canvas
-  result.canvas = await human.image(input, userConfig);
+  if (ui.buffered || !result.canvas) result.canvas = await human.image(input, userConfig);

  // draw image from video
  const ctx = canvas.getContext('2d');
@ -102,17 +114,20 @@ async function drawResults(input) {
  } else {
    ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height);
  }
+
  // draw all results
  await draw.face(result.face, canvas, ui, human.facemesh.triangulation);
  await draw.body(result.body, canvas, ui);
  await draw.hand(result.hand, canvas, ui);
  await draw.gesture(result.gesture, canvas, ui);
+  await calcSimmilariry(result.face);
+
  // update log
  const engine = human.tf.engine();
  const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
  const memory = `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
  const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : '';
-  const avg = Math.trunc(10 * ui.fps.reduce((a, b) => a + b) / ui.fps.length) / 10;
+  const avg = Math.trunc(10 * ui.fps.reduce((a, b) => a + b, 0) / ui.fps.length) / 10;
  const warning = (ui.fps.length > 5) && (avg < 5) ? '<font color="lightcoral">warning: your performance is low: try switching to higher performance backend, lowering resolution or disabling some models</font>' : '';
  document.getElementById('log').innerHTML = `
    video: ${ui.camera.name} | facing: ${ui.camera.facing} | resolution: ${ui.camera.width} x ${ui.camera.height} ${processing}<br>
@ -277,7 +292,8 @@ async function processImage(input) {
      canvas.width = human.config.filter.width && human.config.filter.width > 0 ? human.config.filter.width : image.naturalWidth;
      canvas.height = human.config.filter.height && human.config.filter.height > 0 ? human.config.filter.height : image.naturalHeight;
      const result = await human.detect(image, userConfig);
-      drawResults(image, result, canvas);
+      lastDetectedResult = result;
+      await drawResults(image);
      const thumb = document.createElement('canvas');
      thumb.className = 'thumbnail';
      thumb.width = window.innerWidth / (ui.columns + 0.1);
@ -325,11 +341,12 @@ async function detectSampleImages() {
  log('Running detection of sample images');
  status('processing images');
  document.getElementById('samples-container').innerHTML = '';
-  for (const sample of ui.samples) await processImage(sample);
+  for (const image of ui.samples) await processImage(image);
  status('');
 }

 function setupMenu() {
+  document.getElementById('compare-container').style.display = human.config.face.embedding.enabled ? 'block' : 'none';
  menu = new Menu(document.body, '', { top: '1rem', right: '1rem' });
  const btn = menu.addButton('start video', 'pause video', () => detectVideo());
  menu.addButton('process images', 'process images', () => detectSampleImages());
@ -449,7 +466,7 @@ async function main() {
  // this is not required, just pre-warms all models for faster initial inference
  if (ui.modelsWarmup) {
    status('initializing');
-    await human.warmup(userConfig);
+    sample = await human.warmup(userConfig, document.getElementById('sample-image'));
  }
  status('human: ready');
  document.getElementById('loader').style.display = 'none';
--- a/demo/index.html
+++ b/demo/index.html
@ -34,6 +34,7 @@
      .video { display: none; }
      .canvas { margin: 0 auto; }
      .bench { position: absolute; right: 0; bottom: 0; }
+      .compare-image { width: 10vw; position: absolute; top: 150px; left: 30px; box-shadow: 0 0 2px 2px black; background: black; }
      .loader { width: 300px; height: 300px; border: 3px solid transparent; border-radius: 50%; border-top: 4px solid #f15e41; animation: spin 4s linear infinite; position: absolute; top: 30%; left: 50%; margin-left: -150px; z-index: 15; }
      .loader::before, .loader::after { content: ""; position: absolute; top: 6px; bottom: 6px; left: 6px; right: 6px; border-radius: 50%; border: 4px solid transparent; }
      .loader::before { border-top-color: #bad375; animation: 3s spin linear infinite; }
@ -70,6 +71,10 @@
      <canvas id="canvas" class="canvas"></canvas>
      <video id="video" playsinline class="video"></video>
    </div>
+    <div id="compare-container" style="display: none" class="compare-image">
+      <img id="sample-image" style="width: 100%" src="../assets/sample-me.jpg"></img>
+      <div id="simmilarity"></div>
+    </div>
    <div id="samples-container" class="samples-container"></div>
    <canvas id="bench-canvas" class="bench"></canvas>
    <div id="log" class="log"></div>
--- a/src/age/age.js
+++ b/src/age/age.js
@ -5,9 +5,6 @@ const models = {};
 let last = { age: 0 };
 let frame = Number.MAX_SAFE_INTEGER;

-// tuning values
-const zoom = [0, 0]; // 0..1 meaning 0%..100%
-
 async function load(config) {
  if (!models.age) {
    models.age = await loadGraphModel(config.face.age.modelPath);
@ -18,12 +15,15 @@ async function load(config) {
 }

 async function predict(image, config) {
+  if (!models.age) return null;
  if ((frame < config.face.age.skipFrames) && last.age && (last.age > 0)) {
    frame += 1;
    return last;
  }
  frame = 0;
  return new Promise(async (resolve) => {
+    /*
+    const zoom = [0, 0]; // 0..1 meaning 0%..100%
    const box = [[
      (image.shape[1] * zoom[0]) / image.shape[1],
      (image.shape[2] * zoom[1]) / image.shape[2],
@ -31,7 +31,8 @@ async function predict(image, config) {
      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
    ]];
    const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]);
-    // const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
+    */
+    const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
    const enhance = tf.mul(resize, [255.0]);
    tf.dispose(resize);

--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@ -7,7 +7,6 @@ let last = [];
 let frame = Number.MAX_SAFE_INTEGER;

 // tuning values
-const zoom = [0, 0]; // 0..1 meaning 0%..100%
 const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale
 const scale = 1; // score multiplication factor

@ -21,12 +20,15 @@ async function load(config) {
 }

 async function predict(image, config) {
+  if (!models.emotion) return null;
  if ((frame < config.face.emotion.skipFrames) && (last.length > 0)) {
    frame += 1;
    return last;
  }
  frame = 0;
  return new Promise(async (resolve) => {
+    /*
+    const zoom = [0, 0]; // 0..1 meaning 0%..100%
    const box = [[
      (image.shape[1] * zoom[0]) / image.shape[1],
      (image.shape[2] * zoom[1]) / image.shape[2],
@ -34,7 +36,8 @@ async function predict(image, config) {
      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
    ]];
    const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]);
-    // const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
+    */
+    const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
    const [red, green, blue] = tf.split(resize, 3, 3);
    resize.dispose();
    // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
--- a/src/gender/gender.js
+++ b/src/gender/gender.js
@ -7,7 +7,6 @@ let frame = Number.MAX_SAFE_INTEGER;
 let alternative = false;

 // tuning values
-const zoom = [0, 0]; // 0..1 meaning 0%..100%
 const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale

 async function load(config) {
@ -21,12 +20,15 @@ async function load(config) {
 }

 async function predict(image, config) {
+  if (!models.gender) return null;
  if ((frame < config.face.gender.skipFrames) && last.gender !== '') {
    frame += 1;
    return last;
  }
  frame = 0;
  return new Promise(async (resolve) => {
+    /*
+    const zoom = [0, 0]; // 0..1 meaning 0%..100%
    const box = [[
      (image.shape[1] * zoom[0]) / image.shape[1],
      (image.shape[2] * zoom[1]) / image.shape[2],
@ -34,6 +36,8 @@ async function predict(image, config) {
      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
    ]];
    const resize = tf.image.cropAndResize(image, box, [0], [config.face.gender.inputSize, config.face.gender.inputSize]);
+    */
+    const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false);
    let enhance;
    if (alternative) {
      enhance = tf.tidy(() => {
--- a/src/human.js
+++ b/src/human.js
@ -3,6 +3,7 @@ import * as facemesh from './face/facemesh.js';
 import * as age from './age/age.js';
 import * as gender from './gender/gender.js';
 import * as emotion from './emotion/emotion.js';
+import * as embedding from './embedding/embedding.js';
 import * as posenet from './body/posenet.js';
 import * as handpose from './hand/handpose.js';
 import * as gesture from './gesture.js';
@ -108,6 +109,11 @@ class Human {
    return null;
  }

+  simmilarity(embedding1, embedding2) {
+    if (this.config.face.embedding.enabled) return embedding.simmilarity(embedding1, embedding2);
+    return 0;
+  }
+
  // preload models, not explicitly required as it's done automatically on first use
  async load(userConfig) {
    this.state = 'load';
@ -127,6 +133,7 @@ class Human {
        this.models.age,
        this.models.gender,
        this.models.emotion,
+        this.models.embedding,
        this.models.posenet,
        this.models.handpose,
      ] = await Promise.all([
@ -134,6 +141,7 @@ class Human {
        this.models.age || ((this.config.face.enabled && this.config.face.age.enabled) ? age.load(this.config) : null),
        this.models.gender || ((this.config.face.enabled && this.config.face.gender.enabled) ? gender.load(this.config) : null),
        this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null),
+        this.models.embedding || ((this.config.face.enabled && this.config.face.embedding.enabled) ? embedding.load(this.config) : null),
        this.models.posenet || (this.config.body.enabled ? posenet.load(this.config) : null),
        this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config.hand) : null),
      ]);
@ -142,6 +150,7 @@ class Human {
      if (this.config.face.enabled && this.config.face.age.enabled && !this.models.age) this.models.age = await age.load(this.config);
      if (this.config.face.enabled && this.config.face.gender.enabled && !this.models.gender) this.models.gender = await gender.load(this.config);
      if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config);
+      if (this.config.face.enabled && this.config.face.embedding.enabled && !this.models.embedding) this.models.embedding = await embedding.load(this.config);
      if (this.config.body.enabled && !this.models.posenet) this.models.posenet = await posenet.load(this.config);
      if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config.hand);
    }
@ -199,6 +208,7 @@ class Human {
    let ageRes;
    let genderRes;
    let emotionRes;
+    let embeddingRes;
    const faceRes = [];
    this.state = 'run:face';
    timeStamp = now();
@ -206,11 +216,13 @@ class Human {
    this.perf.face = Math.trunc(now() - timeStamp);
    for (const face of faces) {
      this.analyze('Get Face');
+
      // is something went wrong, skip the face
      if (!face.image || face.image.isDisposedInternal) {
        this.log('Face object is disposed:', face.image);
        continue;
      }
+
      // run age, inherits face from blazeface
      this.analyze('Start Age:');
      if (this.config.async) {
@ -232,6 +244,7 @@ class Human {
        genderRes = this.config.face.gender.enabled ? await gender.predict(face.image, this.config) : {};
        this.perf.gender = Math.trunc(now() - timeStamp);
      }
+
      // run emotion, inherits face from blazeface
      this.analyze('Start Emotion:');
      if (this.config.async) {
@ -244,9 +257,21 @@ class Human {
      }
      this.analyze('End Emotion:');

+      // run emotion, inherits face from blazeface
+      this.analyze('Start Embedding:');
+      if (this.config.async) {
+        embeddingRes = this.config.face.embedding.enabled ? embedding.predict(face.image, this.config) : {};
+      } else {
+        this.state = 'run:embedding';
+        timeStamp = now();
+        embeddingRes = this.config.face.embedding.enabled ? await embedding.predict(face.image, this.config) : {};
+        this.perf.embedding = Math.trunc(now() - timeStamp);
+      }
+      this.analyze('End Emotion:');
+
      // if async wait for results
      if (this.config.async) {
-        [ageRes, genderRes, emotionRes] = await Promise.all([ageRes, genderRes, emotionRes]);
+        [ageRes, genderRes, emotionRes, embeddingRes] = await Promise.all([ageRes, genderRes, emotionRes, embeddingRes]);
      }

      this.analyze('Finish Face:');
@ -270,6 +295,7 @@ class Human {
        gender: genderRes.gender,
        genderConfidence: genderRes.confidence,
        emotion: emotionRes,
+        embedding: embeddingRes,
        iris: (irisSize !== 0) ? Math.trunc(irisSize) / 100 : 0,
      });
      this.analyze('End Face');
@ -294,23 +320,23 @@ class Human {

  // main detect function
  async detect(input, userConfig = {}) {
-    this.state = 'config';
-    let timeStamp;
-
-    // update configuration
-    this.config = mergeDeep(this.config, userConfig);
-    if (!this.config.videoOptimized) this.config = mergeDeep(this.config, disableSkipFrames);
-
-    // sanity checks
-    this.state = 'check';
-    const error = this.sanity(input);
-    if (error) {
-      this.log(error, input);
-      return { error };
-    }
-
    // detection happens inside a promise
    return new Promise(async (resolve) => {
+      this.state = 'config';
+      let timeStamp;
+
+      // update configuration
+      this.config = mergeDeep(this.config, userConfig);
+      if (!this.config.videoOptimized) this.config = mergeDeep(this.config, disableSkipFrames);
+
+      // sanity checks
+      this.state = 'check';
+      const error = this.sanity(input);
+      if (error) {
+        this.log(error, input);
+        resolve({ error });
+      }
+
      let poseRes;
      let handRes;
      let faceRes;
@ -391,10 +417,11 @@ class Human {
    });
  }

-  async warmup(userConfig) {
-    const warmup = new ImageData(255, 255);
-    await this.detect(warmup, userConfig);
+  async warmup(userConfig, sample) {
+    if (!sample) sample = new ImageData(255, 255);
+    const warmup = await this.detect(sample, userConfig);
    this.log('warmed up');
+    return warmup;
  }
 }