diff --git a/CHANGELOG.md b/CHANGELOG.md index 23d6b9e9..84eb5bea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,14 @@ ## Changelog -### **HEAD -> main** 2021/09/27 mandic00@live.com +### **HEAD -> main** 2021/09/28 mandic00@live.com +- enable handtrack as default model +- redesign face processing + +### **origin/main** 2021/09/27 mandic00@live.com + +- refactoring - define app specific types - implement box caching for movenet - autodetect number of bodies and hands diff --git a/src/config.ts b/src/config.ts index 023cfb85..ae66be17 100644 --- a/src/config.ts +++ b/src/config.ts @@ -358,7 +358,7 @@ const config: Config = { // this parameter is not valid in nodejs maxDetected: 1, // maximum number of faces detected in the input // should be set to the minimum number for performance - skipFrames: 15, // how many max frames to go without re-running the face bounding box detector + skipFrames: 11, // how many max frames to go without re-running the face bounding box detector // only used when cacheSensitivity is not zero // e.g., if model is running st 25 FPS, we can re-use existing bounding // box for updated face analysis as the head does not move fast @@ -380,23 +380,23 @@ const config: Config = { // can be either absolute path or relative to modelBasePath }, + emotion: { + enabled: true, + minConfidence: 0.1, // threshold for discarding a prediction + skipFrames: 12, // how max many frames to go without re-running the detector + // only used when cacheSensitivity is not zero + modelPath: 'emotion.json', // face emotion model, can be absolute path or relative to modelBasePath + }, + description: { enabled: true, // to improve accuracy of face description extraction it is // recommended to enable detector.rotation and mesh.enabled modelPath: 'faceres.json', // face description model // can be either absolute path or relative to modelBasePath - skipFrames: 11, // how many max frames to go without re-running the detector + skipFrames: 13, // how many max frames to go without re-running the detector // only used when cacheSensitivity is not zero minConfidence: 0.1, // threshold for discarding a prediction }, - - emotion: { - enabled: true, - minConfidence: 0.1, // threshold for discarding a prediction - skipFrames: 17, // how max many frames to go without re-running the detector - // only used when cacheSensitivity is not zero - modelPath: 'emotion.json', // face emotion model, can be absolute path or relative to modelBasePath - }, }, body: { @@ -420,7 +420,7 @@ const config: Config = { rotation: true, // use best-guess rotated hand image or just box with rotation as-is // false means higher performance, but incorrect finger mapping if hand is inverted // only valid for `handdetect` variation - skipFrames: 18, // how many max frames to go without re-running the hand bounding box detector + skipFrames: 14, // how many max frames to go without re-running the hand bounding box detector // only used when cacheSensitivity is not zero // e.g., if model is running st 25 FPS, we can re-use existing bounding // box for updated hand skeleton analysis as the hand @@ -447,7 +447,7 @@ const config: Config = { minConfidence: 0.2, // threshold for discarding a prediction iouThreshold: 0.4, // ammount of overlap between two detected objects before one object is removed maxDetected: 10, // maximum number of objects detected in the input - skipFrames: 19, // how many max frames to go without re-running the detector + skipFrames: 15, // how many max frames to go without re-running the detector // only used when cacheSensitivity is not zero }, diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts index 46eb750a..4bc7c6de 100644 --- a/src/hand/handtrack.ts +++ b/src/hand/handtrack.ts @@ -96,7 +96,7 @@ async function detectHands(input: Tensor, config: Config): Promise = {}; const ratio = (input.shape[2] || 1) / (input.shape[1] || 1); - const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 1024 + const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 512 const width = Math.round(height * ratio / 8) * 8; t.resize = tf.image.resizeBilinear(input, [height, width]); // todo: resize with padding t.cast = tf.cast(t.resize, 'int32'); @@ -106,7 +106,7 @@ async function detectHands(input: Tensor, config: Config): Promise = {}; if (!h.yxBox) return hand; t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear'); t.cast = tf.cast(t.crop, 'float32'); t.div = tf.div(t.cast, 255); [t.score, t.keypoints] = models[1].execute(t.div) as Tensor[]; - const score = Math.round(100 * (await t.score.data())[0] / 100); - if (score > (config.hand.minConfidence || 0)) { + // const score = Math.round(100 * (await t.score.data())[0] / 100); + const rawScore = (await t.score.data())[0]; + const score = (100 - Math.trunc(100 / (1 + Math.exp(rawScore)))) / 100; // reverse sigmoid value + if (score >= (config.hand.minConfidence || 0)) { hand.fingerScore = score; t.reshaped = tf.reshape(t.keypoints, [-1, 3]); const rawCoords = await t.reshaped.array() as Point[]; @@ -178,7 +179,8 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null)); } - cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected + const ratioBoxFrame = Math.min(h.box[2] / (input.shape[2] || 1), h.box[3] / (input.shape[1] || 1)); + if (ratioBoxFrame > 0.05) cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected and box is big enough } Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); } @@ -190,16 +192,16 @@ export async function predict(input: Tensor, config: Config): Promise = []; cache.tmpBoxes = []; // clear temp cache if (!config.hand.landmarks) cache.fingerBoxes = cache.handBoxes; // if hand detection only reset finger boxes cache + if (!config.skipFrame) cache.fingerBoxes = []; if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes skipped++; hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache } else { // calculate new boxes and run finger detection skipped = 0; hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache - if (hands.length !== config.hand.maxDetected) { // run hand detection only if we dont have enough hands in cache + if (hands.length !== config.hand.maxDetected) { // re-run with hand detection only if we dont have enough hands in cache cache.handBoxes = await detectHands(input, config); - const newHands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config))); - hands = hands.concat(newHands); + hands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config))); } } cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands diff --git a/src/util/draw.ts b/src/util/draw.ts index acf2014a..cc0b9183 100644 --- a/src/util/draw.ts +++ b/src/util/draw.ts @@ -391,10 +391,10 @@ export async function hand(inCanvas: HTMLCanvasElement | OffscreenCanvas, result if (localOptions.drawLabels) { if (localOptions.shadowColor && localOptions.shadowColor !== '') { ctx.fillStyle = localOptions.shadowColor; - ctx.fillText(`${h.label}:${Math.trunc(100 * h.score)}%`, h.box[0] + 3, 1 + h.box[1] + localOptions.lineHeight, h.box[2]); + ctx.fillText(`hand:${Math.trunc(100 * h.score)}%`, h.box[0] + 3, 1 + h.box[1] + localOptions.lineHeight, h.box[2]); // can use h.label } ctx.fillStyle = localOptions.labelColor; - ctx.fillText(`${h.label}:${Math.trunc(100 * h.score)}%`, h.box[0] + 2, 0 + h.box[1] + localOptions.lineHeight, h.box[2]); + ctx.fillText(`hand:${Math.trunc(100 * h.score)}%`, h.box[0] + 2, 0 + h.box[1] + localOptions.lineHeight, h.box[2]); // can use h.label } ctx.stroke(); } diff --git a/test/test-main.js b/test/test-main.js index 2209d766..8f579dab 100644 --- a/test/test-main.js +++ b/test/test-main.js @@ -173,22 +173,26 @@ async function test(Human, inputConfig) { await human.load(); const models = Object.keys(human.models).map((model) => ({ name: model, loaded: (human.models[model] !== null) })); const loaded = models.filter((model) => model.loaded); - if (models.length === 19 && loaded.length === 10) log('state', 'passed: models loaded', models); - else log('error', 'failed: models loaded', models); + if (models.length === 20 && loaded.length === 10) log('state', 'passed: models loaded', models.length, loaded.length, models); + else log('error', 'failed: models loaded', models.length, loaded.length, models); + + // increase defaults + config.face = { detector: { maxDetected: 20 } }; // test warmup sequences await testInstance(human); + config.cacheSensitivity = 0; config.warmup = 'none'; res = await testWarmup(human, 'default'); if (res.error !== 'null') log('error', 'failed: warmup none result mismatch'); else log('state', 'passed: warmup none result match'); config.warmup = 'face'; res = await testWarmup(human, 'default'); - if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 0 || res?.gesture?.length !== 3) log('error', 'failed: warmup face result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); + if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 1 || res?.gesture?.length !== 6) log('error', 'failed: warmup face result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); else log('state', 'passed: warmup face result match'); config.warmup = 'body'; res = await testWarmup(human, 'default'); - if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 0 || res?.gesture?.length !== 3) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); + if (!res || res?.face?.length !== 1 || res?.body?.length !== 0 || res?.hand?.length !== 1 || res?.gesture?.length !== 4) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); else log('state', 'passed: warmup body result match'); // test default config async @@ -233,10 +237,10 @@ async function test(Human, inputConfig) { const desc3 = res3 && res3.face && res3.face[0] && res3.face[0].embedding ? [...res3.face[0].embedding] : null; if (!desc1 || !desc2 || !desc3 || desc1.length !== 1024 || desc2.length !== 1024 || desc3.length !== 1024) log('error', 'failed: face descriptor', desc1?.length, desc2?.length, desc3?.length); else log('state', 'passed: face descriptor'); - res1 = Math.round(100 * human.similarity(desc1, desc2)); - res2 = Math.round(100 * human.similarity(desc1, desc3)); - res3 = Math.round(100 * human.similarity(desc2, desc3)); - if (res1 !== 51 || res2 !== 49 || res3 !== 53) log('error', 'failed: face similarity ', res1, res2, res3); + res1 = Math.round(10 * human.similarity(desc1, desc2)); + res2 = Math.round(10 * human.similarity(desc1, desc3)); + res3 = Math.round(10 * human.similarity(desc2, desc3)); + if (res1 !== 5 || res2 !== 5 || res3 !== 5) log('error', 'failed: face similarity ', res1, res2, res3); else log('state', 'passed: face similarity'); // test face matching @@ -266,17 +270,19 @@ async function test(Human, inputConfig) { human.reset(); config.cacheSensitivity = 0; config.face = { detector: { minConfidence: 0.0001, maxDetected: 1 } }; - config.body = { minConfidence: 0.0001, maxDetected: 1 }; - config.hand = { minConfidence: 0.0001, maxDetected: 3 }; + config.body = { minConfidence: 0.0001 }; + config.hand = { minConfidence: 0.0001 }; res = await testDetect(human, 'samples/in/ai-body.jpg', 'default'); - if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 3 || res?.gesture?.length !== 9) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); + if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 2 || res?.gesture?.length !== 7) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); else log('state', 'passed: sensitive result match'); // test sensitive details face const face = res && res.face ? res.face[0] : null; - if (!face || face?.box?.length !== 4 || face?.mesh?.length !== 478 || face?.emotion?.length !== 4 || face?.embedding?.length !== 1024 || face?.rotation?.matrix?.length !== 9) { - log('error', 'failed: sensitive face result mismatch', res?.face?.length, face?.box?.length, face?.mesh?.length, face?.emotion?.length, face?.embedding?.length, face?.rotation?.matrix?.length); + if (!face || face?.box?.length !== 4 || face?.mesh?.length !== 478 || face?.embedding?.length !== 1024 || face?.rotation?.matrix?.length !== 9) { + log('error', 'failed: sensitive face result mismatch', res?.face?.length, face?.box?.length, face?.mesh?.length, face?.embedding?.length, face?.rotation?.matrix?.length); } else log('state', 'passed: sensitive face result match'); + if (!face || face?.emotion?.length !== 4) log('error', 'failed: sensitive face emotion result mismatch', face?.emotion.length); + else log('state', 'passed: sensitive face emotion result mismatch', face?.emotion.length); // test sensitive details body const body = res && res.body ? res.body[0] : null; @@ -296,7 +302,7 @@ async function test(Human, inputConfig) { res = await testDetect(human, 'samples/in/ai-body.jpg', 'default'); if (!res || res?.face?.length !== 1 || res?.face[0]?.gender || res?.face[0]?.age || res?.face[0]?.embedding) log('error', 'failed: detectors result face mismatch', res?.face); else log('state', 'passed: detector result face match'); - if (!res || res?.hand?.length !== 2 || res?.hand[0]?.landmarks) log('error', 'failed: detectors result hand mismatch', res?.hand?.length); + if (!res || res?.hand?.length !== 1 || res?.hand[0]?.landmarks?.length > 0) log('error', 'failed: detectors result hand mismatch', res?.hand?.length); else log('state', 'passed: detector result hand match'); // test posenet and movenet diff --git a/test/test-node-wasm.js b/test/test-node-wasm.js index 8969ce25..03564136 100644 --- a/test/test-node-wasm.js +++ b/test/test-node-wasm.js @@ -10,8 +10,8 @@ Human.env.Canvas = Canvas; // requires monkey-patch as wasm does not have tf.bro Human.env.Image = Image; // requires monkey-patch as wasm does not have tf.browser namespace const config = { - // modelBasePath: 'http://localhost:10030/models/', modelBasePath: 'https://vladmandic.github.io/human/models/', + // modelBasePath: 'http://localhost:10030/models/', backend: 'wasm', wasmPath: 'node_modules/@tensorflow/tfjs-backend-wasm/dist/', // wasmPath: 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@3.9.0/dist/',