tweaked default values

2021-09-28 13:48:29 -04:00 · 2021-09-28 13:48:29 -04:00 · 4b807b5f11
parent bd5cc2b36b
commit 4b807b5f11
6 changed files with 54 additions and 40 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,8 +9,14 @@
  
 ## Changelog
  
-### **HEAD -> main** 2021/09/27 mandic00@live.com
+### **HEAD -> main** 2021/09/28 mandic00@live.com

+- enable handtrack as default model
+- redesign face processing
+
+### **origin/main** 2021/09/27 mandic00@live.com
+
+- refactoring
 - define app specific types
 - implement box caching for movenet
 - autodetect number of bodies and hands
--- a/src/config.ts
+++ b/src/config.ts
@ -358,7 +358,7 @@ const config: Config = {
                             // this parameter is not valid in nodejs
      maxDetected: 1,        // maximum number of faces detected in the input
                             // should be set to the minimum number for performance
-      skipFrames: 15,        // how many max frames to go without re-running the face bounding box detector
+      skipFrames: 11,        // how many max frames to go without re-running the face bounding box detector
                             // only used when cacheSensitivity is not zero
                             // e.g., if model is running st 25 FPS, we can re-use existing bounding
                             // box for updated face analysis as the head does not move fast
@ -380,23 +380,23 @@ const config: Config = {
                             // can be either absolute path or relative to modelBasePath
    },

+    emotion: {
+      enabled: true,
+      minConfidence: 0.1,    // threshold for discarding a prediction
+      skipFrames: 12,        // how max many frames to go without re-running the detector
+                             // only used when cacheSensitivity is not zero
+      modelPath: 'emotion.json',  // face emotion model, can be absolute path or relative to modelBasePath
+    },
+
    description: {
      enabled: true,         // to improve accuracy of face description extraction it is
                             // recommended to enable detector.rotation and mesh.enabled
      modelPath: 'faceres.json',  // face description model
                             // can be either absolute path or relative to modelBasePath
-      skipFrames: 11,        // how many max frames to go without re-running the detector
+      skipFrames: 13,        // how many max frames to go without re-running the detector
                             // only used when cacheSensitivity is not zero
      minConfidence: 0.1,    // threshold for discarding a prediction
    },
-
-    emotion: {
-      enabled: true,
-      minConfidence: 0.1,    // threshold for discarding a prediction
-      skipFrames: 17,        // how max many frames to go without re-running the detector
-                             // only used when cacheSensitivity is not zero
-      modelPath: 'emotion.json',  // face emotion model, can be absolute path or relative to modelBasePath
-    },
  },

  body: {
@ -420,7 +420,7 @@ const config: Config = {
    rotation: true,          // use best-guess rotated hand image or just box with rotation as-is
                             // false means higher performance, but incorrect finger mapping if hand is inverted
                             // only valid for `handdetect` variation
-    skipFrames: 18,          // how many max frames to go without re-running the hand bounding box detector
+    skipFrames: 14,          // how many max frames to go without re-running the hand bounding box detector
                             // only used when cacheSensitivity is not zero
                             // e.g., if model is running st 25 FPS, we can re-use existing bounding
                             // box for updated hand skeleton analysis as the hand
@ -447,7 +447,7 @@ const config: Config = {
    minConfidence: 0.2,      // threshold for discarding a prediction
    iouThreshold: 0.4,       // ammount of overlap between two detected objects before one object is removed
    maxDetected: 10,         // maximum number of objects detected in the input
-    skipFrames: 19,          // how many max frames to go without re-running the detector
+    skipFrames: 15,          // how many max frames to go without re-running the detector
                             // only used when cacheSensitivity is not zero
  },

--- a/src/hand/handtrack.ts
+++ b/src/hand/handtrack.ts
@ -96,7 +96,7 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
  if (!input || !models[0]) return hands;
  const t: Record<string, Tensor> = {};
  const ratio = (input.shape[2] || 1) / (input.shape[1] || 1);
-  const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 1024
+  const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 512
  const width = Math.round(height * ratio / 8) * 8;
  t.resize = tf.image.resizeBilinear(input, [height, width]); // todo: resize with padding
  t.cast = tf.cast(t.resize, 'int32');
@ -106,7 +106,7 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
  const classScores = tf.unstack(t.scores, 1);
  let id = 0;
  for (let i = 0; i < classScores.length; i++) {
-    if (i !== 0 && i !== 1) continue;
+    if (i === 4) continue; // skip faces
    t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, classScores[i], config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
    const nms = await t.nms.data();
    tf.dispose(t.nms);
@ -151,16 +151,17 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
    landmarks: {} as HandResult['landmarks'],
    annotations: {} as HandResult['annotations'],
  };
-  if (!input || !models[1]) return hand; // something is wrong
-  if (config.hand.landmarks) {
+  if (input && models[1] && config.hand.landmarks) {
    const t: Record<string, Tensor> = {};
    if (!h.yxBox) return hand;
    t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
    t.cast = tf.cast(t.crop, 'float32');
    t.div = tf.div(t.cast, 255);
    [t.score, t.keypoints] = models[1].execute(t.div) as Tensor[];
-    const score = Math.round(100 * (await t.score.data())[0] / 100);
-    if (score > (config.hand.minConfidence || 0)) {
+    // const score = Math.round(100 * (await t.score.data())[0] / 100);
+    const rawScore = (await t.score.data())[0];
+    const score = (100 - Math.trunc(100 / (1 + Math.exp(rawScore)))) / 100; // reverse sigmoid value
+    if (score >= (config.hand.minConfidence || 0)) {
      hand.fingerScore = score;
      t.reshaped = tf.reshape(t.keypoints, [-1, 3]);
      const rawCoords = await t.reshaped.array() as Point[];
@ -178,7 +179,8 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
      for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
        hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
      }
-      cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected
+      const ratioBoxFrame = Math.min(h.box[2] / (input.shape[2] || 1), h.box[3] / (input.shape[1] || 1));
+      if (ratioBoxFrame > 0.05) cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected and box is big enough
    }
    Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
  }
@ -190,16 +192,16 @@ export async function predict(input: Tensor, config: Config): Promise<HandResult
  let hands: Array<HandResult> = [];
  cache.tmpBoxes = []; // clear temp cache
  if (!config.hand.landmarks) cache.fingerBoxes = cache.handBoxes; // if hand detection only reset finger boxes cache
+  if (!config.skipFrame) cache.fingerBoxes = [];
  if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes
    skipped++;
    hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
  } else { // calculate new boxes and run finger detection
    skipped = 0;
    hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
-    if (hands.length !== config.hand.maxDetected) { // run hand detection only if we dont have enough hands in cache
+    if (hands.length !== config.hand.maxDetected) { // re-run with hand detection only if we dont have enough hands in cache
      cache.handBoxes = await detectHands(input, config);
-      const newHands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config)));
-      hands = hands.concat(newHands);
+      hands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config)));
    }
  }
  cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands
--- a/src/util/draw.ts
+++ b/src/util/draw.ts
@ -391,10 +391,10 @@ export async function hand(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
      if (localOptions.drawLabels) {
        if (localOptions.shadowColor && localOptions.shadowColor !== '') {
          ctx.fillStyle = localOptions.shadowColor;
-          ctx.fillText(`${h.label}:${Math.trunc(100 * h.score)}%`, h.box[0] + 3, 1 + h.box[1] + localOptions.lineHeight, h.box[2]);
+          ctx.fillText(`hand:${Math.trunc(100 * h.score)}%`, h.box[0] + 3, 1 + h.box[1] + localOptions.lineHeight, h.box[2]); // can use h.label
        }
        ctx.fillStyle = localOptions.labelColor;
-        ctx.fillText(`${h.label}:${Math.trunc(100 * h.score)}%`, h.box[0] + 2, 0 + h.box[1] + localOptions.lineHeight, h.box[2]);
+        ctx.fillText(`hand:${Math.trunc(100 * h.score)}%`, h.box[0] + 2, 0 + h.box[1] + localOptions.lineHeight, h.box[2]); // can use h.label
      }
      ctx.stroke();
    }
--- a/test/test-main.js
+++ b/test/test-main.js
@ -173,22 +173,26 @@ async function test(Human, inputConfig) {
  await human.load();
  const models = Object.keys(human.models).map((model) => ({ name: model, loaded: (human.models[model] !== null) }));
  const loaded = models.filter((model) => model.loaded);
-  if (models.length === 19 && loaded.length === 10) log('state', 'passed: models loaded', models);
-  else log('error', 'failed: models loaded', models);
+  if (models.length === 20 && loaded.length === 10) log('state', 'passed: models loaded', models.length, loaded.length, models);
+  else log('error', 'failed: models loaded', models.length, loaded.length, models);
+
+  // increase defaults
+  config.face = { detector: { maxDetected: 20 } };

  // test warmup sequences
  await testInstance(human);
+  config.cacheSensitivity = 0;
  config.warmup = 'none';
  res = await testWarmup(human, 'default');
  if (res.error !== 'null') log('error', 'failed: warmup none result mismatch');
  else log('state', 'passed: warmup none result match');
  config.warmup = 'face';
  res = await testWarmup(human, 'default');
-  if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 0 || res?.gesture?.length !== 3) log('error', 'failed: warmup face result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
+  if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 1 || res?.gesture?.length !== 6) log('error', 'failed: warmup face result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
  else log('state', 'passed: warmup face result match');
  config.warmup = 'body';
  res = await testWarmup(human, 'default');
-  if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 0 || res?.gesture?.length !== 3) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
+  if (!res || res?.face?.length !== 1 || res?.body?.length !== 0 || res?.hand?.length !== 1 || res?.gesture?.length !== 4) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
  else log('state', 'passed: warmup body result match');

  // test default config async
@ -233,10 +237,10 @@ async function test(Human, inputConfig) {
  const desc3 = res3 && res3.face && res3.face[0] && res3.face[0].embedding ? [...res3.face[0].embedding] : null;
  if (!desc1 || !desc2 || !desc3 || desc1.length !== 1024 || desc2.length !== 1024 || desc3.length !== 1024) log('error', 'failed: face descriptor', desc1?.length, desc2?.length, desc3?.length);
  else log('state', 'passed: face descriptor');
-  res1 = Math.round(100 * human.similarity(desc1, desc2));
-  res2 = Math.round(100 * human.similarity(desc1, desc3));
-  res3 = Math.round(100 * human.similarity(desc2, desc3));
-  if (res1 !== 51 || res2 !== 49 || res3 !== 53) log('error', 'failed: face similarity ', res1, res2, res3);
+  res1 = Math.round(10 * human.similarity(desc1, desc2));
+  res2 = Math.round(10 * human.similarity(desc1, desc3));
+  res3 = Math.round(10 * human.similarity(desc2, desc3));
+  if (res1 !== 5 || res2 !== 5 || res3 !== 5) log('error', 'failed: face similarity ', res1, res2, res3);
  else log('state', 'passed: face similarity');

  // test face matching
@ -266,17 +270,19 @@ async function test(Human, inputConfig) {
  human.reset();
  config.cacheSensitivity = 0;
  config.face = { detector: { minConfidence: 0.0001, maxDetected: 1 } };
-  config.body = { minConfidence: 0.0001, maxDetected: 1 };
-  config.hand = { minConfidence: 0.0001, maxDetected: 3 };
+  config.body = { minConfidence: 0.0001 };
+  config.hand = { minConfidence: 0.0001 };
  res = await testDetect(human, 'samples/in/ai-body.jpg', 'default');
-  if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 3 || res?.gesture?.length !== 9) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
+  if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 2 || res?.gesture?.length !== 7) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
  else log('state', 'passed: sensitive result match');

  // test sensitive details face
  const face = res && res.face ? res.face[0] : null;
-  if (!face || face?.box?.length !== 4 || face?.mesh?.length !== 478 || face?.emotion?.length !== 4 || face?.embedding?.length !== 1024 || face?.rotation?.matrix?.length !== 9) {
-    log('error', 'failed: sensitive face result mismatch', res?.face?.length, face?.box?.length, face?.mesh?.length, face?.emotion?.length, face?.embedding?.length, face?.rotation?.matrix?.length);
+  if (!face || face?.box?.length !== 4 || face?.mesh?.length !== 478 || face?.embedding?.length !== 1024 || face?.rotation?.matrix?.length !== 9) {
+    log('error', 'failed: sensitive face result mismatch', res?.face?.length, face?.box?.length, face?.mesh?.length, face?.embedding?.length, face?.rotation?.matrix?.length);
  } else log('state', 'passed: sensitive face result match');
+  if (!face || face?.emotion?.length !== 4) log('error', 'failed: sensitive face emotion result mismatch', face?.emotion.length);
+  else log('state', 'passed: sensitive face emotion result mismatch', face?.emotion.length);

  // test sensitive details body
  const body = res && res.body ? res.body[0] : null;
@ -296,7 +302,7 @@ async function test(Human, inputConfig) {
  res = await testDetect(human, 'samples/in/ai-body.jpg', 'default');
  if (!res || res?.face?.length !== 1 || res?.face[0]?.gender || res?.face[0]?.age || res?.face[0]?.embedding) log('error', 'failed: detectors result face mismatch', res?.face);
  else log('state', 'passed: detector result face match');
-  if (!res || res?.hand?.length !== 2 || res?.hand[0]?.landmarks) log('error', 'failed: detectors result hand mismatch', res?.hand?.length);
+  if (!res || res?.hand?.length !== 1 || res?.hand[0]?.landmarks?.length > 0) log('error', 'failed: detectors result hand mismatch', res?.hand?.length);
  else log('state', 'passed: detector result hand match');

  // test posenet and movenet
--- a/test/test-node-wasm.js
+++ b/test/test-node-wasm.js
@ -10,8 +10,8 @@ Human.env.Canvas = Canvas; // requires monkey-patch as wasm does not have tf.bro
 Human.env.Image = Image; // requires monkey-patch as wasm does not have tf.browser namespace

 const config = {
-  // modelBasePath: 'http://localhost:10030/models/',
  modelBasePath: 'https://vladmandic.github.io/human/models/',
+  // modelBasePath: 'http://localhost:10030/models/',
  backend: 'wasm',
  wasmPath: 'node_modules/@tensorflow/tfjs-backend-wasm/dist/',
  // wasmPath: 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@3.9.0/dist/',