From 272985bb25f6993fff951cb33510eb365359fb4b Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 23 Mar 2021 14:46:44 -0400 Subject: [PATCH] update nanodet and face rotation check --- CHANGELOG.md | 5 ++- package.json | 6 ++-- src/config.ts | 12 +++---- src/human.ts | 13 ++++--- src/nanodet/labels.ts | 82 ++++++++++++++++++++++++++++++++++++++++++ src/nanodet/nanodet.ts | 79 ++++++++++++++++++++-------------------- 6 files changed, 144 insertions(+), 53 deletions(-) create mode 100644 src/nanodet/labels.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index f149fb47..cae6eb1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # @vladmandic/human -Version: **1.2.2** +Version: **1.2.3** Description: **Human: AI-powered 3D Face Detection, Face Embedding & Recognition, Body Pose Tracking, Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction & Gesture Recognition** Author: **Vladimir Mandic ** @@ -9,6 +9,9 @@ Repository: **** ## Changelog +### **1.2.3** 2021/03/21 mandic00@live.com + + ### **1.2.2** 2021/03/21 mandic00@live.com - precise face rotation diff --git a/package.json b/package.json index 626d0b15..db6c2525 100644 --- a/package.json +++ b/package.json @@ -57,8 +57,8 @@ "@tensorflow/tfjs-node": "^3.3.0", "@tensorflow/tfjs-node-gpu": "^3.3.0", "@types/node": "^14.14.35", - "@typescript-eslint/eslint-plugin": "^4.18.0", - "@typescript-eslint/parser": "^4.18.0", + "@typescript-eslint/eslint-plugin": "^4.19.0", + "@typescript-eslint/parser": "^4.19.0", "@vladmandic/pilogger": "^0.2.15", "chokidar": "^3.5.1", "dayjs": "^1.10.4", @@ -73,7 +73,7 @@ "seedrandom": "^3.0.5", "simple-git": "^2.37.0", "tslib": "^2.1.0", - "typedoc": "^0.20.32", + "typedoc": "^0.20.33", "typescript": "^4.2.3" } } diff --git a/src/config.ts b/src/config.ts index 4f87c8ae..1a7053c4 100644 --- a/src/config.ts +++ b/src/config.ts @@ -228,14 +228,14 @@ const config: Config = { emotion: { enabled: true, minConfidence: 0.1, // threshold for discarding a prediction - skipFrames: 33, // how many frames to go without re-running the detector + skipFrames: 32, // how many frames to go without re-running the detector modelPath: '../models/emotion.json', }, age: { enabled: false, // obsolete, replaced by description module modelPath: '../models/age.json', - skipFrames: 31, // how many frames to go without re-running the detector + skipFrames: 33, // how many frames to go without re-running the detector // only used for video inputs }, @@ -243,7 +243,7 @@ const config: Config = { enabled: false, // obsolete, replaced by description module minConfidence: 0.1, // threshold for discarding a prediction modelPath: '../models/gender.json', - skipFrames: 32, // how many frames to go without re-running the detector + skipFrames: 34, // how many frames to go without re-running the detector // only used for video inputs }, @@ -296,11 +296,11 @@ const config: Config = { object: { enabled: false, modelPath: '../models/nanodet.json', - minConfidence: 0.15, // threshold for discarding a prediction - iouThreshold: 0.25, // threshold for deciding whether boxes overlap too much + minConfidence: 0.20, // threshold for discarding a prediction + iouThreshold: 0.40, // threshold for deciding whether boxes overlap too much // in non-maximum suppression maxResults: 10, // maximum number of objects detected in the input - skipFrames: 13, // how many frames to go without re-running the detector + skipFrames: 41, // how many frames to go without re-running the detector }, }; export { config as defaults }; diff --git a/src/human.ts b/src/human.ts index 7e0688b3..173a0cc4 100644 --- a/src/human.ts +++ b/src/human.ts @@ -345,6 +345,7 @@ export class Human { let handRes; let faceRes; let objectRes; + let current; // run face detection followed by all models that rely on face bounding box: face mesh, age, gender, emotion if (this.config.async) { @@ -354,7 +355,8 @@ export class Human { this.state = 'run:face'; timeStamp = now(); faceRes = this.config.face.enabled ? await faceall.detectFace(this, process.tensor) : []; - this.perf.face = Math.trunc(now() - timeStamp); + current = Math.trunc(now() - timeStamp); + if (current > 0) this.perf.face = current; } // run body: can be posenet or blazepose @@ -368,7 +370,8 @@ export class Human { timeStamp = now(); if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : []; else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : []; - this.perf.body = Math.trunc(now() - timeStamp); + current = Math.trunc(now() - timeStamp); + if (current > 0) this.perf.body = current; } this.analyze('End Body:'); @@ -381,7 +384,8 @@ export class Human { this.state = 'run:hand'; timeStamp = now(); handRes = this.config.hand.enabled ? await this.models.handpose?.estimateHands(process.tensor, this.config) : []; - this.perf.hand = Math.trunc(now() - timeStamp); + current = Math.trunc(now() - timeStamp); + if (current > 0) this.perf.hand = current; } this.analyze('End Hand:'); @@ -394,7 +398,8 @@ export class Human { this.state = 'run:object'; timeStamp = now(); objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : []; - this.perf.object = Math.trunc(now() - timeStamp); + current = Math.trunc(now() - timeStamp); + if (current > 0) this.perf.object = current; } this.analyze('End Object:'); diff --git a/src/nanodet/labels.ts b/src/nanodet/labels.ts new file mode 100644 index 00000000..590d6db4 --- /dev/null +++ b/src/nanodet/labels.ts @@ -0,0 +1,82 @@ +export const labels = [ + { class: 1, label: 'person' }, + { class: 2, label: 'bicycle' }, + { class: 3, label: 'car' }, + { class: 4, label: 'motorcycle' }, + { class: 5, label: 'airplane' }, + { class: 6, label: 'bus' }, + { class: 7, label: 'train' }, + { class: 8, label: 'truck' }, + { class: 9, label: 'boat' }, + { class: 10, label: 'traffic light' }, + { class: 11, label: 'fire hydrant' }, + { class: 12, label: 'stop sign' }, + { class: 13, label: 'parking meter' }, + { class: 14, label: 'bench' }, + { class: 15, label: 'bird' }, + { class: 16, label: 'cat' }, + { class: 17, label: 'dog' }, + { class: 18, label: 'horse' }, + { class: 19, label: 'sheep' }, + { class: 20, label: 'cow' }, + { class: 21, label: 'elephant' }, + { class: 22, label: 'bear' }, + { class: 23, label: 'zebra' }, + { class: 24, label: 'giraffe' }, + { class: 25, label: 'backpack' }, + { class: 26, label: 'umbrella' }, + { class: 27, label: 'handbag' }, + { class: 28, label: 'tie' }, + { class: 29, label: 'suitcase' }, + { class: 30, label: 'frisbee' }, + { class: 31, label: 'skis' }, + { class: 32, label: 'snowboard' }, + { class: 33, label: 'sports ball' }, + { class: 34, label: 'kite' }, + { class: 35, label: 'baseball bat' }, + { class: 36, label: 'baseball glove' }, + { class: 37, label: 'skateboard' }, + { class: 38, label: 'surfboard' }, + { class: 39, label: 'tennis racket' }, + { class: 40, label: 'bottle' }, + { class: 41, label: 'wine glass' }, + { class: 42, label: 'cup' }, + { class: 43, label: 'fork' }, + { class: 44, label: 'knife' }, + { class: 45, label: 'spoon' }, + { class: 46, label: 'bowl' }, + { class: 47, label: 'banana' }, + { class: 48, label: 'apple' }, + { class: 49, label: 'sandwich' }, + { class: 50, label: 'orange' }, + { class: 51, label: 'broccoli' }, + { class: 52, label: 'carrot' }, + { class: 53, label: 'hot dog' }, + { class: 54, label: 'pizza' }, + { class: 55, label: 'donut' }, + { class: 56, label: 'cake' }, + { class: 57, label: 'chair' }, + { class: 58, label: 'couch' }, + { class: 59, label: 'potted plant' }, + { class: 60, label: 'bed' }, + { class: 61, label: 'dining table' }, + { class: 62, label: 'toilet' }, + { class: 63, label: 'tv' }, + { class: 64, label: 'laptop' }, + { class: 65, label: 'mouse' }, + { class: 66, label: 'remote' }, + { class: 67, label: 'keyboard' }, + { class: 68, label: 'cell phone' }, + { class: 69, label: 'microwave' }, + { class: 70, label: 'oven' }, + { class: 71, label: 'toaster' }, + { class: 72, label: 'sink' }, + { class: 73, label: 'refrigerator' }, + { class: 74, label: 'book' }, + { class: 75, label: 'clock' }, + { class: 76, label: 'vase' }, + { class: 77, label: 'scissors' }, + { class: 78, label: 'teddy bear' }, + { class: 79, label: 'hair drier' }, + { class: 80, label: 'toothbrush' }, +]; diff --git a/src/nanodet/nanodet.ts b/src/nanodet/nanodet.ts index e272318b..5ffffd52 100644 --- a/src/nanodet/nanodet.ts +++ b/src/nanodet/nanodet.ts @@ -1,14 +1,14 @@ import { log } from '../helpers'; import * as tf from '../../dist/tfjs.esm.js'; import * as profile from '../profile'; +import { labels } from './labels'; let model; let last: Array<{}> = []; let skipped = Number.MAX_SAFE_INTEGER; const scaleBox = 2.5; // increase box size -// eslint-disable-next-line max-len -const labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'vehicle', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'animal', 'animal', 'animal', 'animal', 'animal', 'animal', 'animal', 'bear', 'animal', 'animal', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'pastry', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']; +const activateScore = false; export async function load(config) { if (!model) { @@ -21,50 +21,51 @@ export async function load(config) { } async function process(res, inputSize, outputShape, config) { + let id = 0; let results: Array<{ score: number, strideSize: number, class: number, label: string, center: number[], centerRaw: number[], box: number[], boxRaw: number[] }> = []; for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects // find scores, boxes, classes tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704 // find boxes and scores output depending on stride - // log.info('Variation:', strideSize, 'strides', baseSize, 'baseSize'); - const scores = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze(); - const features = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 32))?.squeeze(); - // log.state('Found features tensor:', features?.shape); - // log.state('Found scores tensor:', scores?.shape); - const scoreIdx = scores.argMax(1).dataSync(); // location of highest scores - const scoresMax = scores.max(1).dataSync(); // values of highest scores - const boxesMax = features.reshape([-1, 4, 8]); // reshape [32] to [4,8] where 8 is change of different features inside stride + const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze(); + const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < 80))?.squeeze(); + const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself - for (let i = 0; i < scores.shape[0]; i++) { - if (scoreIdx[i] !== 0 && scoresMax[i] > config.object.minConfidence) { - const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1 - const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1 - const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores - let boxRaw = [ // results normalized to range 0..1 - cx - (scaleBox / strideSize * boxOffset[0]), - cy - (scaleBox / strideSize * boxOffset[1]), - cx + (scaleBox / strideSize * boxOffset[2]), - cy + (scaleBox / strideSize * boxOffset[3]), - ]; - boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords - const box = [ // results normalized to input image pixels - Math.max(0, (boxRaw[0] * outputShape[0])), - Math.max(0, (boxRaw[1] * outputShape[1])), - Math.min(1, (boxRaw[2] * outputShape[0]) - (boxRaw[0] * outputShape[0])), - Math.min(1, (boxRaw[3] * outputShape[1]) - (boxRaw[1] * outputShape[1])), - ]; - const result = { - score: scoresMax[i], - strideSize, - class: scoreIdx[i] + 1, - label: labels[scoreIdx[i]], - center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)], - centerRaw: [cx, cy], - box: box.map((a) => Math.trunc(a)), - boxRaw, - }; - results.push(result); + const scores = activateScore ? scoresT.exp(1).arraySync() : scoresT.arraySync(); // optionally use exponential scores or just as-is + for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix) + for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class + const score = scores[i][j] - (activateScore ? 1 : 0); // get score for current position + if (score > config.object.minConfidence) { + const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1 + const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1 + const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores + let boxRaw = [ // results normalized to range 0..1 + cx - (scaleBox / strideSize * boxOffset[0]), + cy - (scaleBox / strideSize * boxOffset[1]), + cx + (scaleBox / strideSize * boxOffset[2]), + cy + (scaleBox / strideSize * boxOffset[3]), + ]; + boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords + const box = [ // results normalized to input image pixels + boxRaw[0] * outputShape[0], + boxRaw[1] * outputShape[1], + boxRaw[2] * outputShape[0], + boxRaw[3] * outputShape[1], + ]; + const result = { + id: id++, + strideSize, + score, + class: j + 1, + label: labels[j].label, + center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)], + centerRaw: [cx, cy], + box: box.map((a) => Math.trunc(a)), + boxRaw, + }; + results.push(result); + } } } });