wip on embedding

2021-03-11 13:31:36 -05:00 · 2021-03-11 13:31:36 -05:00 · fb10a529ab
parent 0ee6b8b00c
commit fb10a529ab
17 changed files with 138 additions and 77 deletions
--- a/demo/browser.js
+++ b/demo/browser.js
@ -82,7 +82,7 @@ let original;
 async function calcSimmilariry(result) {
  document.getElementById('compare-container').style.display = human.config.face.embedding.enabled ? 'block' : 'none';
  if (!human.config.face.embedding.enabled) return;
-  if ((result?.face?.length > 0) && (result?.face[0].embedding?.length !== 192)) return;
+  if (!(result?.face?.length > 0) || (result?.face[0]?.embedding?.length !== 192)) return;
  if (!original) {
    original = result;
    document.getElementById('compare-canvas').getContext('2d').drawImage(original.canvas, 0, 0, 200, 200);
--- a/dist/demo-browser-index.js
+++ b/dist/demo-browser-index.js
@ -79438,35 +79438,39 @@ async function load5(config3) {
  }
  return model5;
 }
-function simmilarity(embedding1, embedding22) {
+function simmilarity(embedding1, embedding22, order = 2) {
  if (!embedding1 || !embedding22)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) === 0 || (embedding22 == null ? void 0 : embedding22.length) === 0)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) !== (embedding22 == null ? void 0 : embedding22.length))
    return 0;
-  const order = 2;
-  const distance = 10 * embedding1.map((val, i) => val - embedding22[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
-  return Math.trunc(1e3 * (1 - distance)) / 1e3;
+  const distance = 50 * embedding1.map((val, i) => val - embedding22[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
+  const res = Math.trunc(1e3 * (1 - (isNaN(distance) ? 1 : distance))) / 1e3;
+  console.log(distance, res);
+  return res;
 }
 async function predict4(image3, config3) {
  if (!model5)
    return null;
  return new Promise(async (resolve) => {
    const resize = image.resizeBilinear(image3, [model5.inputs[0].shape[2], model5.inputs[0].shape[1]], false);
+    const norm2 = resize.sub(0.5);
+    resize.dispose();
    let data2 = [];
    if (config3.face.embedding.enabled) {
      if (!config3.profile) {
-        const embeddingT = await model5.predict({img_inputs: resize});
-        data2 = [...embeddingT.dataSync()];
-        dispose(embeddingT);
+        const res = await model5.predict({img_inputs: norm2});
+        data2 = [...res.dataSync()];
+        dispose(res);
      } else {
-        const profileData = await profile(() => model5.predict({img_inputs: resize}));
+        const profileData = await profile(() => model5.predict({img_inputs: norm2}));
        data2 = [...profileData.result.dataSync()];
        profileData.result.dispose();
        run("emotion", profileData);
      }
    }
+    norm2.dispose();
    resolve(data2);
  });
 }
@ -101412,17 +101416,17 @@ function status(msg) {
 }
 var original;
 async function calcSimmilariry(result) {
-  var _a, _b, _c, _d;
+  var _a, _b, _c, _d, _e;
  document.getElementById("compare-container").style.display = human.config.face.embedding.enabled ? "block" : "none";
  if (!human.config.face.embedding.enabled)
    return;
-  if (((_a = result == null ? void 0 : result.face) == null ? void 0 : _a.length) > 0 && ((_b = result == null ? void 0 : result.face[0].embedding) == null ? void 0 : _b.length) !== 192)
+  if (!(((_a = result == null ? void 0 : result.face) == null ? void 0 : _a.length) > 0) || ((_c = (_b = result == null ? void 0 : result.face[0]) == null ? void 0 : _b.embedding) == null ? void 0 : _c.length) !== 192)
    return;
  if (!original) {
    original = result;
    document.getElementById("compare-canvas").getContext("2d").drawImage(original.canvas, 0, 0, 200, 200);
  }
-  const simmilarity2 = human.simmilarity((_c = original == null ? void 0 : original.face[0]) == null ? void 0 : _c.embedding, (_d = result == null ? void 0 : result.face[0]) == null ? void 0 : _d.embedding);
+  const simmilarity2 = human.simmilarity((_d = original == null ? void 0 : original.face[0]) == null ? void 0 : _d.embedding, (_e = result == null ? void 0 : result.face[0]) == null ? void 0 : _e.embedding);
  document.getElementById("simmilarity").innerText = `simmilarity: ${Math.trunc(1e3 * simmilarity2) / 10}%`;
 }
 var lastDraw = performance.now();
--- a/dist/demo-browser-index.js.map
+++ b/dist/demo-browser-index.js.map
--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
@ -4350,35 +4350,39 @@ async function load5(config3) {
  }
  return model4;
 }
-function simmilarity(embedding1, embedding2) {
+function simmilarity(embedding1, embedding2, order = 2) {
  if (!embedding1 || !embedding2)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) === 0 || (embedding2 == null ? void 0 : embedding2.length) === 0)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) !== (embedding2 == null ? void 0 : embedding2.length))
    return 0;
-  const order = 2;
-  const distance = 10 * embedding1.map((val, i) => val - embedding2[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
-  return Math.trunc(1e3 * (1 - distance)) / 1e3;
+  const distance = 50 * embedding1.map((val, i) => val - embedding2[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
+  const res = Math.trunc(1e3 * (1 - (isNaN(distance) ? 1 : distance))) / 1e3;
+  console.log(distance, res);
+  return res;
 }
 async function predict4(image13, config3) {
  if (!model4)
    return null;
  return new Promise(async (resolve) => {
    const resize = tf8.image.resizeBilinear(image13, [model4.inputs[0].shape[2], model4.inputs[0].shape[1]], false);
+    const norm = resize.sub(0.5);
+    resize.dispose();
    let data3 = [];
    if (config3.face.embedding.enabled) {
      if (!config3.profile) {
-        const embeddingT = await model4.predict({img_inputs: resize});
-        data3 = [...embeddingT.dataSync()];
-        tf8.dispose(embeddingT);
+        const res = await model4.predict({img_inputs: norm});
+        data3 = [...res.dataSync()];
+        tf8.dispose(res);
      } else {
-        const profileData = await tf8.profile(() => model4.predict({img_inputs: resize}));
+        const profileData = await tf8.profile(() => model4.predict({img_inputs: norm}));
        data3 = [...profileData.result.dataSync()];
        profileData.result.dispose();
        run("emotion", profileData);
      }
    }
+    norm.dispose();
    resolve(data3);
  });
 }
--- a/dist/human.esm-nobundle.js.map
+++ b/dist/human.esm-nobundle.js.map
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
@ -79438,35 +79438,39 @@ async function load5(config3) {
  }
  return model5;
 }
-function simmilarity(embedding1, embedding22) {
+function simmilarity(embedding1, embedding22, order = 2) {
  if (!embedding1 || !embedding22)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) === 0 || (embedding22 == null ? void 0 : embedding22.length) === 0)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) !== (embedding22 == null ? void 0 : embedding22.length))
    return 0;
-  const order = 2;
-  const distance = 10 * embedding1.map((val, i) => val - embedding22[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
-  return Math.trunc(1e3 * (1 - distance)) / 1e3;
+  const distance = 50 * embedding1.map((val, i) => val - embedding22[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
+  const res = Math.trunc(1e3 * (1 - (isNaN(distance) ? 1 : distance))) / 1e3;
+  console.log(distance, res);
+  return res;
 }
 async function predict4(image3, config3) {
  if (!model5)
    return null;
  return new Promise(async (resolve) => {
    const resize = image.resizeBilinear(image3, [model5.inputs[0].shape[2], model5.inputs[0].shape[1]], false);
+    const norm2 = resize.sub(0.5);
+    resize.dispose();
    let data2 = [];
    if (config3.face.embedding.enabled) {
      if (!config3.profile) {
-        const embeddingT = await model5.predict({img_inputs: resize});
-        data2 = [...embeddingT.dataSync()];
-        dispose(embeddingT);
+        const res = await model5.predict({img_inputs: norm2});
+        data2 = [...res.dataSync()];
+        dispose(res);
      } else {
-        const profileData = await profile(() => model5.predict({img_inputs: resize}));
+        const profileData = await profile(() => model5.predict({img_inputs: norm2}));
        data2 = [...profileData.result.dataSync()];
        profileData.result.dispose();
        run("emotion", profileData);
      }
    }
+    norm2.dispose();
    resolve(data2);
  });
 }
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
@ -79445,35 +79445,39 @@ return a / b;`;
    }
    return model5;
  }
-  function simmilarity(embedding1, embedding22) {
+  function simmilarity(embedding1, embedding22, order = 2) {
    if (!embedding1 || !embedding22)
      return 0;
    if ((embedding1 == null ? void 0 : embedding1.length) === 0 || (embedding22 == null ? void 0 : embedding22.length) === 0)
      return 0;
    if ((embedding1 == null ? void 0 : embedding1.length) !== (embedding22 == null ? void 0 : embedding22.length))
      return 0;
-    const order = 2;
-    const distance = 10 * embedding1.map((val, i) => val - embedding22[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
-    return Math.trunc(1e3 * (1 - distance)) / 1e3;
+    const distance = 50 * embedding1.map((val, i) => val - embedding22[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
+    const res = Math.trunc(1e3 * (1 - (isNaN(distance) ? 1 : distance))) / 1e3;
+    console.log(distance, res);
+    return res;
  }
  async function predict4(image3, config3) {
    if (!model5)
      return null;
    return new Promise(async (resolve) => {
      const resize = image.resizeBilinear(image3, [model5.inputs[0].shape[2], model5.inputs[0].shape[1]], false);
+      const norm2 = resize.sub(0.5);
+      resize.dispose();
      let data2 = [];
      if (config3.face.embedding.enabled) {
        if (!config3.profile) {
-          const embeddingT = await model5.predict({img_inputs: resize});
-          data2 = [...embeddingT.dataSync()];
-          dispose(embeddingT);
+          const res = await model5.predict({img_inputs: norm2});
+          data2 = [...res.dataSync()];
+          dispose(res);
        } else {
-          const profileData = await profile(() => model5.predict({img_inputs: resize}));
+          const profileData = await profile(() => model5.predict({img_inputs: norm2}));
          data2 = [...profileData.result.dataSync()];
          profileData.result.dispose();
          run("emotion", profileData);
        }
      }
+      norm2.dispose();
      resolve(data2);
    });
  }
--- a/dist/human.js.map
+++ b/dist/human.js.map
--- a/dist/human.node-gpu.js
+++ b/dist/human.node-gpu.js
@ -4330,35 +4330,39 @@ async function load5(config3) {
  }
  return model4;
 }
-function simmilarity(embedding1, embedding2) {
+function simmilarity(embedding1, embedding2, order = 2) {
  if (!embedding1 || !embedding2)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) === 0 || (embedding2 == null ? void 0 : embedding2.length) === 0)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) !== (embedding2 == null ? void 0 : embedding2.length))
    return 0;
-  const order = 2;
-  const distance = 10 * embedding1.map((val, i) => val - embedding2[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
-  return Math.trunc(1e3 * (1 - distance)) / 1e3;
+  const distance = 50 * embedding1.map((val, i) => val - embedding2[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
+  const res = Math.trunc(1e3 * (1 - (isNaN(distance) ? 1 : distance))) / 1e3;
+  console.log(distance, res);
+  return res;
 }
 async function predict4(image13, config3) {
  if (!model4)
    return null;
  return new Promise(async (resolve) => {
    const resize = tf8.image.resizeBilinear(image13, [model4.inputs[0].shape[2], model4.inputs[0].shape[1]], false);
+    const norm = resize.sub(0.5);
+    resize.dispose();
    let data2 = [];
    if (config3.face.embedding.enabled) {
      if (!config3.profile) {
-        const embeddingT = await model4.predict({img_inputs: resize});
-        data2 = [...embeddingT.dataSync()];
-        tf8.dispose(embeddingT);
+        const res = await model4.predict({img_inputs: norm});
+        data2 = [...res.dataSync()];
+        tf8.dispose(res);
      } else {
-        const profileData = await tf8.profile(() => model4.predict({img_inputs: resize}));
+        const profileData = await tf8.profile(() => model4.predict({img_inputs: norm}));
        data2 = [...profileData.result.dataSync()];
        profileData.result.dispose();
        run("emotion", profileData);
      }
    }
+    norm.dispose();
    resolve(data2);
  });
 }
--- a/dist/human.node-gpu.js.map
+++ b/dist/human.node-gpu.js.map
--- a/dist/human.node.js
+++ b/dist/human.node.js
@ -4330,35 +4330,39 @@ async function load5(config3) {
  }
  return model4;
 }
-function simmilarity(embedding1, embedding2) {
+function simmilarity(embedding1, embedding2, order = 2) {
  if (!embedding1 || !embedding2)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) === 0 || (embedding2 == null ? void 0 : embedding2.length) === 0)
    return 0;
  if ((embedding1 == null ? void 0 : embedding1.length) !== (embedding2 == null ? void 0 : embedding2.length))
    return 0;
-  const order = 2;
-  const distance = 10 * embedding1.map((val, i) => val - embedding2[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
-  return Math.trunc(1e3 * (1 - distance)) / 1e3;
+  const distance = 50 * embedding1.map((val, i) => val - embedding2[i]).reduce((dist, diff) => dist + diff ** order, 0) ** (1 / order);
+  const res = Math.trunc(1e3 * (1 - (isNaN(distance) ? 1 : distance))) / 1e3;
+  console.log(distance, res);
+  return res;
 }
 async function predict4(image13, config3) {
  if (!model4)
    return null;
  return new Promise(async (resolve) => {
    const resize = tf8.image.resizeBilinear(image13, [model4.inputs[0].shape[2], model4.inputs[0].shape[1]], false);
+    const norm = resize.sub(0.5);
+    resize.dispose();
    let data2 = [];
    if (config3.face.embedding.enabled) {
      if (!config3.profile) {
-        const embeddingT = await model4.predict({img_inputs: resize});
-        data2 = [...embeddingT.dataSync()];
-        tf8.dispose(embeddingT);
+        const res = await model4.predict({img_inputs: norm});
+        data2 = [...res.dataSync()];
+        tf8.dispose(res);
      } else {
-        const profileData = await tf8.profile(() => model4.predict({img_inputs: resize}));
+        const profileData = await tf8.profile(() => model4.predict({img_inputs: norm}));
        data2 = [...profileData.result.dataSync()];
        profileData.result.dispose();
        run("emotion", profileData);
      }
    }
+    norm.dispose();
    resolve(data2);
  });
 }
--- a/dist/human.node.js.map
+++ b/dist/human.node.js.map
--- a/src/blazeface/box.ts
+++ b/src/blazeface/box.ts
@ -24,7 +24,9 @@ export function cutBoxFromImageAndResize(box, image, cropSize) {
  const h = image.shape[1];
  const w = image.shape[2];
  const boxes = [[
-    box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h,
+    box.startPoint[1] / h,
+    box.startPoint[0] / w,
+    box.endPoint[1] / h,
    box.endPoint[0] / w,
  ]];
  return tf.image.cropAndResize(image, boxes, [0], cropSize);
--- a/src/embedding/embedding.ts
+++ b/src/embedding/embedding.ts
@ -6,6 +6,13 @@ import * as profile from '../profile';
 // modified: https://github.com/sirius-ai/MobileFaceNet_TF/issues/46
 // download: https://github.com/sirius-ai/MobileFaceNet_TF/files/3551493/FaceMobileNet192_train_false.zip

+/* WiP
+
+- Should input box be tightly cropped?
+- What is the best input range? (adjust distance scale accordingly)
+- How to best normalize output
+*/
+
 let model;

 export async function load(config) {
@ -16,36 +23,64 @@ export async function load(config) {
  return model;
 }

-export function simmilarity(embedding1, embedding2) {
+export function simmilarity(embedding1, embedding2, order = 2) {
  if (!embedding1 || !embedding2) return 0;
  if (embedding1?.length === 0 || embedding2?.length === 0) return 0;
  if (embedding1?.length !== embedding2?.length) return 0;
  // general minkowski distance
  // euclidean distance is limited case where order is 2
-  const order = 2;
-  const distance = 10.0 * ((embedding1.map((val, i) => (val - embedding2[i])).reduce((dist, diff) => dist + (diff ** order), 0) ** (1 / order)));
-  return (Math.trunc(1000 * (1 - distance)) / 1000);
+  const distance = 50.0 * ((embedding1.map((val, i) => (val - embedding2[i])).reduce((dist, diff) => dist + (diff ** order), 0) ** (1 / order)));
+  const res = (Math.trunc(1000 * (1 - (isNaN(distance) ? 1 : distance))) / 1000);
+  console.log(distance, res);
+  return res;
 }

 export async function predict(image, config) {
  if (!model) return null;
  return new Promise(async (resolve) => {
    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); // input is already normalized to 0..1
-    // const mean = resize.mean();
-    // const whiten = resize.sub(mean); // normalizes with mean value being at point 0
+    // optionally do a tight box crop
+    /*
+    const box = [[0, 0.2, 0.9, 0.8]]; // top, left, bottom, right
+    const resize = tf.image.cropAndResize(image, box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
+    */
+    // debug visualize box
+    // const canvas = document.getElementById('compare-canvas');
+    // await tf.browser.toPixels(resize.squeeze(), canvas);
+    const norm = resize.sub(0.5);
+    // optionally normalizes with mean value being at point 0, better than fixed range -0.5..0.5
+    /*
+    const mean = resize.mean();
+    const norm = resize.sub(mean);
+    */
+    resize.dispose();
    let data: Array<[]> = [];
    if (config.face.embedding.enabled) {
      if (!config.profile) {
-        const embeddingT = await model.predict({ img_inputs: resize });
-        data = [...embeddingT.dataSync()]; // convert object array to standard array
-        tf.dispose(embeddingT);
+        const res = await model.predict({ img_inputs: norm });
+        /*
+        const scaled = tf.tidy(() => {
+          // run l2 normalization on output
+          const sqr = res.square();
+          const sum = sqr.sum();
+          const sqrt = sum.sqrt();
+          const l2 = res.div(sqrt);
+          // scale outputs
+          const range = l2.max().sub(l2.min());
+          const scale = l2.mul(2).div(range);
+          return scale;
+        });
+        */
+        data = [...res.dataSync()]; // convert object array to standard array
+        tf.dispose(res);
      } else {
-        const profileData = await tf.profile(() => model.predict({ img_inputs: resize }));
+        const profileData = await tf.profile(() => model.predict({ img_inputs: norm }));
        data = [...profileData.result.dataSync()];
        profileData.result.dispose();
        profile.run('emotion', profileData);
      }
    }
+    norm.dispose();
    resolve(data);
  });
 }
--- a/types/embedding/embedding.d.ts
+++ b/types/embedding/embedding.d.ts
@ -1,3 +1,3 @@
 export declare function load(config: any): Promise<any>;
-export declare function simmilarity(embedding1: any, embedding2: any): number;
+export declare function simmilarity(embedding1: any, embedding2: any, order?: number): number;
 export declare function predict(image: any, config: any): Promise<unknown>;
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 8d403c2d9d0185ead9897e7177f460b25b95b3b6
+Subproject commit 69294f7a0a99bd996286f8f5bb655c7ea8bfc10d