tweaked default values

pull/280/head
Vladimir Mandic 2021-09-28 13:48:29 -04:00
parent bd5cc2b36b
commit 4b807b5f11
6 changed files with 54 additions and 40 deletions

View File

@ -9,8 +9,14 @@
## Changelog
### **HEAD -> main** 2021/09/27 mandic00@live.com
### **HEAD -> main** 2021/09/28 mandic00@live.com
- enable handtrack as default model
- redesign face processing
### **origin/main** 2021/09/27 mandic00@live.com
- refactoring
- define app specific types
- implement box caching for movenet
- autodetect number of bodies and hands

View File

@ -358,7 +358,7 @@ const config: Config = {
// this parameter is not valid in nodejs
maxDetected: 1, // maximum number of faces detected in the input
// should be set to the minimum number for performance
skipFrames: 15, // how many max frames to go without re-running the face bounding box detector
skipFrames: 11, // how many max frames to go without re-running the face bounding box detector
// only used when cacheSensitivity is not zero
// e.g., if model is running st 25 FPS, we can re-use existing bounding
// box for updated face analysis as the head does not move fast
@ -380,23 +380,23 @@ const config: Config = {
// can be either absolute path or relative to modelBasePath
},
emotion: {
enabled: true,
minConfidence: 0.1, // threshold for discarding a prediction
skipFrames: 12, // how max many frames to go without re-running the detector
// only used when cacheSensitivity is not zero
modelPath: 'emotion.json', // face emotion model, can be absolute path or relative to modelBasePath
},
description: {
enabled: true, // to improve accuracy of face description extraction it is
// recommended to enable detector.rotation and mesh.enabled
modelPath: 'faceres.json', // face description model
// can be either absolute path or relative to modelBasePath
skipFrames: 11, // how many max frames to go without re-running the detector
skipFrames: 13, // how many max frames to go without re-running the detector
// only used when cacheSensitivity is not zero
minConfidence: 0.1, // threshold for discarding a prediction
},
emotion: {
enabled: true,
minConfidence: 0.1, // threshold for discarding a prediction
skipFrames: 17, // how max many frames to go without re-running the detector
// only used when cacheSensitivity is not zero
modelPath: 'emotion.json', // face emotion model, can be absolute path or relative to modelBasePath
},
},
body: {
@ -420,7 +420,7 @@ const config: Config = {
rotation: true, // use best-guess rotated hand image or just box with rotation as-is
// false means higher performance, but incorrect finger mapping if hand is inverted
// only valid for `handdetect` variation
skipFrames: 18, // how many max frames to go without re-running the hand bounding box detector
skipFrames: 14, // how many max frames to go without re-running the hand bounding box detector
// only used when cacheSensitivity is not zero
// e.g., if model is running st 25 FPS, we can re-use existing bounding
// box for updated hand skeleton analysis as the hand
@ -447,7 +447,7 @@ const config: Config = {
minConfidence: 0.2, // threshold for discarding a prediction
iouThreshold: 0.4, // ammount of overlap between two detected objects before one object is removed
maxDetected: 10, // maximum number of objects detected in the input
skipFrames: 19, // how many max frames to go without re-running the detector
skipFrames: 15, // how many max frames to go without re-running the detector
// only used when cacheSensitivity is not zero
},

View File

@ -96,7 +96,7 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
if (!input || !models[0]) return hands;
const t: Record<string, Tensor> = {};
const ratio = (input.shape[2] || 1) / (input.shape[1] || 1);
const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 1024
const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 512
const width = Math.round(height * ratio / 8) * 8;
t.resize = tf.image.resizeBilinear(input, [height, width]); // todo: resize with padding
t.cast = tf.cast(t.resize, 'int32');
@ -106,7 +106,7 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
const classScores = tf.unstack(t.scores, 1);
let id = 0;
for (let i = 0; i < classScores.length; i++) {
if (i !== 0 && i !== 1) continue;
if (i === 4) continue; // skip faces
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, classScores[i], config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
const nms = await t.nms.data();
tf.dispose(t.nms);
@ -151,16 +151,17 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
landmarks: {} as HandResult['landmarks'],
annotations: {} as HandResult['annotations'],
};
if (!input || !models[1]) return hand; // something is wrong
if (config.hand.landmarks) {
if (input && models[1] && config.hand.landmarks) {
const t: Record<string, Tensor> = {};
if (!h.yxBox) return hand;
t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
t.cast = tf.cast(t.crop, 'float32');
t.div = tf.div(t.cast, 255);
[t.score, t.keypoints] = models[1].execute(t.div) as Tensor[];
const score = Math.round(100 * (await t.score.data())[0] / 100);
if (score > (config.hand.minConfidence || 0)) {
// const score = Math.round(100 * (await t.score.data())[0] / 100);
const rawScore = (await t.score.data())[0];
const score = (100 - Math.trunc(100 / (1 + Math.exp(rawScore)))) / 100; // reverse sigmoid value
if (score >= (config.hand.minConfidence || 0)) {
hand.fingerScore = score;
t.reshaped = tf.reshape(t.keypoints, [-1, 3]);
const rawCoords = await t.reshaped.array() as Point[];
@ -178,7 +179,8 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
}
cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected
const ratioBoxFrame = Math.min(h.box[2] / (input.shape[2] || 1), h.box[3] / (input.shape[1] || 1));
if (ratioBoxFrame > 0.05) cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected and box is big enough
}
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
}
@ -190,16 +192,16 @@ export async function predict(input: Tensor, config: Config): Promise<HandResult
let hands: Array<HandResult> = [];
cache.tmpBoxes = []; // clear temp cache
if (!config.hand.landmarks) cache.fingerBoxes = cache.handBoxes; // if hand detection only reset finger boxes cache
if (!config.skipFrame) cache.fingerBoxes = [];
if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes
skipped++;
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
} else { // calculate new boxes and run finger detection
skipped = 0;
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
if (hands.length !== config.hand.maxDetected) { // run hand detection only if we dont have enough hands in cache
if (hands.length !== config.hand.maxDetected) { // re-run with hand detection only if we dont have enough hands in cache
cache.handBoxes = await detectHands(input, config);
const newHands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config)));
hands = hands.concat(newHands);
hands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config)));
}
}
cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands

View File

@ -391,10 +391,10 @@ export async function hand(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
if (localOptions.drawLabels) {
if (localOptions.shadowColor && localOptions.shadowColor !== '') {
ctx.fillStyle = localOptions.shadowColor;
ctx.fillText(`${h.label}:${Math.trunc(100 * h.score)}%`, h.box[0] + 3, 1 + h.box[1] + localOptions.lineHeight, h.box[2]);
ctx.fillText(`hand:${Math.trunc(100 * h.score)}%`, h.box[0] + 3, 1 + h.box[1] + localOptions.lineHeight, h.box[2]); // can use h.label
}
ctx.fillStyle = localOptions.labelColor;
ctx.fillText(`${h.label}:${Math.trunc(100 * h.score)}%`, h.box[0] + 2, 0 + h.box[1] + localOptions.lineHeight, h.box[2]);
ctx.fillText(`hand:${Math.trunc(100 * h.score)}%`, h.box[0] + 2, 0 + h.box[1] + localOptions.lineHeight, h.box[2]); // can use h.label
}
ctx.stroke();
}

View File

@ -173,22 +173,26 @@ async function test(Human, inputConfig) {
await human.load();
const models = Object.keys(human.models).map((model) => ({ name: model, loaded: (human.models[model] !== null) }));
const loaded = models.filter((model) => model.loaded);
if (models.length === 19 && loaded.length === 10) log('state', 'passed: models loaded', models);
else log('error', 'failed: models loaded', models);
if (models.length === 20 && loaded.length === 10) log('state', 'passed: models loaded', models.length, loaded.length, models);
else log('error', 'failed: models loaded', models.length, loaded.length, models);
// increase defaults
config.face = { detector: { maxDetected: 20 } };
// test warmup sequences
await testInstance(human);
config.cacheSensitivity = 0;
config.warmup = 'none';
res = await testWarmup(human, 'default');
if (res.error !== 'null') log('error', 'failed: warmup none result mismatch');
else log('state', 'passed: warmup none result match');
config.warmup = 'face';
res = await testWarmup(human, 'default');
if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 0 || res?.gesture?.length !== 3) log('error', 'failed: warmup face result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 1 || res?.gesture?.length !== 6) log('error', 'failed: warmup face result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
else log('state', 'passed: warmup face result match');
config.warmup = 'body';
res = await testWarmup(human, 'default');
if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 0 || res?.gesture?.length !== 3) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
if (!res || res?.face?.length !== 1 || res?.body?.length !== 0 || res?.hand?.length !== 1 || res?.gesture?.length !== 4) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
else log('state', 'passed: warmup body result match');
// test default config async
@ -233,10 +237,10 @@ async function test(Human, inputConfig) {
const desc3 = res3 && res3.face && res3.face[0] && res3.face[0].embedding ? [...res3.face[0].embedding] : null;
if (!desc1 || !desc2 || !desc3 || desc1.length !== 1024 || desc2.length !== 1024 || desc3.length !== 1024) log('error', 'failed: face descriptor', desc1?.length, desc2?.length, desc3?.length);
else log('state', 'passed: face descriptor');
res1 = Math.round(100 * human.similarity(desc1, desc2));
res2 = Math.round(100 * human.similarity(desc1, desc3));
res3 = Math.round(100 * human.similarity(desc2, desc3));
if (res1 !== 51 || res2 !== 49 || res3 !== 53) log('error', 'failed: face similarity ', res1, res2, res3);
res1 = Math.round(10 * human.similarity(desc1, desc2));
res2 = Math.round(10 * human.similarity(desc1, desc3));
res3 = Math.round(10 * human.similarity(desc2, desc3));
if (res1 !== 5 || res2 !== 5 || res3 !== 5) log('error', 'failed: face similarity ', res1, res2, res3);
else log('state', 'passed: face similarity');
// test face matching
@ -266,17 +270,19 @@ async function test(Human, inputConfig) {
human.reset();
config.cacheSensitivity = 0;
config.face = { detector: { minConfidence: 0.0001, maxDetected: 1 } };
config.body = { minConfidence: 0.0001, maxDetected: 1 };
config.hand = { minConfidence: 0.0001, maxDetected: 3 };
config.body = { minConfidence: 0.0001 };
config.hand = { minConfidence: 0.0001 };
res = await testDetect(human, 'samples/in/ai-body.jpg', 'default');
if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 3 || res?.gesture?.length !== 9) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 2 || res?.gesture?.length !== 7) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
else log('state', 'passed: sensitive result match');
// test sensitive details face
const face = res && res.face ? res.face[0] : null;
if (!face || face?.box?.length !== 4 || face?.mesh?.length !== 478 || face?.emotion?.length !== 4 || face?.embedding?.length !== 1024 || face?.rotation?.matrix?.length !== 9) {
log('error', 'failed: sensitive face result mismatch', res?.face?.length, face?.box?.length, face?.mesh?.length, face?.emotion?.length, face?.embedding?.length, face?.rotation?.matrix?.length);
if (!face || face?.box?.length !== 4 || face?.mesh?.length !== 478 || face?.embedding?.length !== 1024 || face?.rotation?.matrix?.length !== 9) {
log('error', 'failed: sensitive face result mismatch', res?.face?.length, face?.box?.length, face?.mesh?.length, face?.embedding?.length, face?.rotation?.matrix?.length);
} else log('state', 'passed: sensitive face result match');
if (!face || face?.emotion?.length !== 4) log('error', 'failed: sensitive face emotion result mismatch', face?.emotion.length);
else log('state', 'passed: sensitive face emotion result mismatch', face?.emotion.length);
// test sensitive details body
const body = res && res.body ? res.body[0] : null;
@ -296,7 +302,7 @@ async function test(Human, inputConfig) {
res = await testDetect(human, 'samples/in/ai-body.jpg', 'default');
if (!res || res?.face?.length !== 1 || res?.face[0]?.gender || res?.face[0]?.age || res?.face[0]?.embedding) log('error', 'failed: detectors result face mismatch', res?.face);
else log('state', 'passed: detector result face match');
if (!res || res?.hand?.length !== 2 || res?.hand[0]?.landmarks) log('error', 'failed: detectors result hand mismatch', res?.hand?.length);
if (!res || res?.hand?.length !== 1 || res?.hand[0]?.landmarks?.length > 0) log('error', 'failed: detectors result hand mismatch', res?.hand?.length);
else log('state', 'passed: detector result hand match');
// test posenet and movenet

View File

@ -10,8 +10,8 @@ Human.env.Canvas = Canvas; // requires monkey-patch as wasm does not have tf.bro
Human.env.Image = Image; // requires monkey-patch as wasm does not have tf.browser namespace
const config = {
// modelBasePath: 'http://localhost:10030/models/',
modelBasePath: 'https://vladmandic.github.io/human/models/',
// modelBasePath: 'http://localhost:10030/models/',
backend: 'wasm',
wasmPath: 'node_modules/@tensorflow/tfjs-backend-wasm/dist/',
// wasmPath: 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@3.9.0/dist/',