update nanodet and face rotation check

pull/91/head
Vladimir Mandic 2021-03-23 14:46:44 -04:00
parent c0654a1efd
commit 0294fb52f7
20 changed files with 939 additions and 844 deletions

View File

@ -1,6 +1,6 @@
# @vladmandic/human # @vladmandic/human
Version: **1.2.2** Version: **1.2.3**
Description: **Human: AI-powered 3D Face Detection, Face Embedding & Recognition, Body Pose Tracking, Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction & Gesture Recognition** Description: **Human: AI-powered 3D Face Detection, Face Embedding & Recognition, Body Pose Tracking, Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction & Gesture Recognition**
Author: **Vladimir Mandic <mandic00@live.com>** Author: **Vladimir Mandic <mandic00@live.com>**
@ -9,6 +9,9 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
## Changelog ## Changelog
### **1.2.3** 2021/03/21 mandic00@live.com
### **1.2.2** 2021/03/21 mandic00@live.com ### **1.2.2** 2021/03/21 mandic00@live.com
- precise face rotation - precise face rotation

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

396
dist/human.esm.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

396
dist/human.js vendored

File diff suppressed because one or more lines are too long

6
dist/human.js.map vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

20
dist/human.node.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -57,8 +57,8 @@
"@tensorflow/tfjs-node": "^3.3.0", "@tensorflow/tfjs-node": "^3.3.0",
"@tensorflow/tfjs-node-gpu": "^3.3.0", "@tensorflow/tfjs-node-gpu": "^3.3.0",
"@types/node": "^14.14.35", "@types/node": "^14.14.35",
"@typescript-eslint/eslint-plugin": "^4.18.0", "@typescript-eslint/eslint-plugin": "^4.19.0",
"@typescript-eslint/parser": "^4.18.0", "@typescript-eslint/parser": "^4.19.0",
"@vladmandic/pilogger": "^0.2.15", "@vladmandic/pilogger": "^0.2.15",
"chokidar": "^3.5.1", "chokidar": "^3.5.1",
"dayjs": "^1.10.4", "dayjs": "^1.10.4",
@ -73,7 +73,7 @@
"seedrandom": "^3.0.5", "seedrandom": "^3.0.5",
"simple-git": "^2.37.0", "simple-git": "^2.37.0",
"tslib": "^2.1.0", "tslib": "^2.1.0",
"typedoc": "^0.20.32", "typedoc": "^0.20.33",
"typescript": "^4.2.3" "typescript": "^4.2.3"
} }
} }

View File

@ -270,7 +270,7 @@ export class Pipeline {
const transformedCoords = tf.tensor2d(transformedCoordsData); const transformedCoords = tf.tensor2d(transformedCoordsData);
// do rotation one more time with mesh keypoints if we want to return perfect image // do rotation one more time with mesh keypoints if we want to return perfect image
if (config.face.detector.rotation && config.face.mesh.enabled && tf.ENV.flags.IS_BROWSER) { if (config.face.detector.rotation && config.face.mesh.enabled && (config.face.description.enabled || config.face.embedding.enabled) && tf.ENV.flags.IS_BROWSER) {
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine; const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine;
angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]); angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }); const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });

View File

@ -228,14 +228,14 @@ const config: Config = {
emotion: { emotion: {
enabled: true, enabled: true,
minConfidence: 0.1, // threshold for discarding a prediction minConfidence: 0.1, // threshold for discarding a prediction
skipFrames: 33, // how many frames to go without re-running the detector skipFrames: 32, // how many frames to go without re-running the detector
modelPath: '../models/emotion.json', modelPath: '../models/emotion.json',
}, },
age: { age: {
enabled: false, // obsolete, replaced by description module enabled: false, // obsolete, replaced by description module
modelPath: '../models/age.json', modelPath: '../models/age.json',
skipFrames: 31, // how many frames to go without re-running the detector skipFrames: 33, // how many frames to go without re-running the detector
// only used for video inputs // only used for video inputs
}, },
@ -243,7 +243,7 @@ const config: Config = {
enabled: false, // obsolete, replaced by description module enabled: false, // obsolete, replaced by description module
minConfidence: 0.1, // threshold for discarding a prediction minConfidence: 0.1, // threshold for discarding a prediction
modelPath: '../models/gender.json', modelPath: '../models/gender.json',
skipFrames: 32, // how many frames to go without re-running the detector skipFrames: 34, // how many frames to go without re-running the detector
// only used for video inputs // only used for video inputs
}, },
@ -296,11 +296,11 @@ const config: Config = {
object: { object: {
enabled: false, enabled: false,
modelPath: '../models/nanodet.json', modelPath: '../models/nanodet.json',
minConfidence: 0.15, // threshold for discarding a prediction minConfidence: 0.20, // threshold for discarding a prediction
iouThreshold: 0.25, // threshold for deciding whether boxes overlap too much iouThreshold: 0.40, // threshold for deciding whether boxes overlap too much
// in non-maximum suppression // in non-maximum suppression
maxResults: 10, // maximum number of objects detected in the input maxResults: 10, // maximum number of objects detected in the input
skipFrames: 13, // how many frames to go without re-running the detector skipFrames: 41, // how many frames to go without re-running the detector
}, },
}; };
export { config as defaults }; export { config as defaults };

View File

@ -345,6 +345,7 @@ export class Human {
let handRes; let handRes;
let faceRes; let faceRes;
let objectRes; let objectRes;
let current;
// run face detection followed by all models that rely on face bounding box: face mesh, age, gender, emotion // run face detection followed by all models that rely on face bounding box: face mesh, age, gender, emotion
if (this.config.async) { if (this.config.async) {
@ -354,7 +355,8 @@ export class Human {
this.state = 'run:face'; this.state = 'run:face';
timeStamp = now(); timeStamp = now();
faceRes = this.config.face.enabled ? await faceall.detectFace(this, process.tensor) : []; faceRes = this.config.face.enabled ? await faceall.detectFace(this, process.tensor) : [];
this.perf.face = Math.trunc(now() - timeStamp); current = Math.trunc(now() - timeStamp);
if (current > 0) this.perf.face = current;
} }
// run body: can be posenet or blazepose // run body: can be posenet or blazepose
@ -368,7 +370,8 @@ export class Human {
timeStamp = now(); timeStamp = now();
if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : []; if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : []; else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : [];
this.perf.body = Math.trunc(now() - timeStamp); current = Math.trunc(now() - timeStamp);
if (current > 0) this.perf.body = current;
} }
this.analyze('End Body:'); this.analyze('End Body:');
@ -381,7 +384,8 @@ export class Human {
this.state = 'run:hand'; this.state = 'run:hand';
timeStamp = now(); timeStamp = now();
handRes = this.config.hand.enabled ? await this.models.handpose?.estimateHands(process.tensor, this.config) : []; handRes = this.config.hand.enabled ? await this.models.handpose?.estimateHands(process.tensor, this.config) : [];
this.perf.hand = Math.trunc(now() - timeStamp); current = Math.trunc(now() - timeStamp);
if (current > 0) this.perf.hand = current;
} }
this.analyze('End Hand:'); this.analyze('End Hand:');
@ -394,7 +398,8 @@ export class Human {
this.state = 'run:object'; this.state = 'run:object';
timeStamp = now(); timeStamp = now();
objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : []; objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : [];
this.perf.object = Math.trunc(now() - timeStamp); current = Math.trunc(now() - timeStamp);
if (current > 0) this.perf.object = current;
} }
this.analyze('End Object:'); this.analyze('End Object:');

82
src/nanodet/labels.ts Normal file
View File

@ -0,0 +1,82 @@
export const labels = [
{ class: 1, label: 'person' },
{ class: 2, label: 'bicycle' },
{ class: 3, label: 'car' },
{ class: 4, label: 'motorcycle' },
{ class: 5, label: 'airplane' },
{ class: 6, label: 'bus' },
{ class: 7, label: 'train' },
{ class: 8, label: 'truck' },
{ class: 9, label: 'boat' },
{ class: 10, label: 'traffic light' },
{ class: 11, label: 'fire hydrant' },
{ class: 12, label: 'stop sign' },
{ class: 13, label: 'parking meter' },
{ class: 14, label: 'bench' },
{ class: 15, label: 'bird' },
{ class: 16, label: 'cat' },
{ class: 17, label: 'dog' },
{ class: 18, label: 'horse' },
{ class: 19, label: 'sheep' },
{ class: 20, label: 'cow' },
{ class: 21, label: 'elephant' },
{ class: 22, label: 'bear' },
{ class: 23, label: 'zebra' },
{ class: 24, label: 'giraffe' },
{ class: 25, label: 'backpack' },
{ class: 26, label: 'umbrella' },
{ class: 27, label: 'handbag' },
{ class: 28, label: 'tie' },
{ class: 29, label: 'suitcase' },
{ class: 30, label: 'frisbee' },
{ class: 31, label: 'skis' },
{ class: 32, label: 'snowboard' },
{ class: 33, label: 'sports ball' },
{ class: 34, label: 'kite' },
{ class: 35, label: 'baseball bat' },
{ class: 36, label: 'baseball glove' },
{ class: 37, label: 'skateboard' },
{ class: 38, label: 'surfboard' },
{ class: 39, label: 'tennis racket' },
{ class: 40, label: 'bottle' },
{ class: 41, label: 'wine glass' },
{ class: 42, label: 'cup' },
{ class: 43, label: 'fork' },
{ class: 44, label: 'knife' },
{ class: 45, label: 'spoon' },
{ class: 46, label: 'bowl' },
{ class: 47, label: 'banana' },
{ class: 48, label: 'apple' },
{ class: 49, label: 'sandwich' },
{ class: 50, label: 'orange' },
{ class: 51, label: 'broccoli' },
{ class: 52, label: 'carrot' },
{ class: 53, label: 'hot dog' },
{ class: 54, label: 'pizza' },
{ class: 55, label: 'donut' },
{ class: 56, label: 'cake' },
{ class: 57, label: 'chair' },
{ class: 58, label: 'couch' },
{ class: 59, label: 'potted plant' },
{ class: 60, label: 'bed' },
{ class: 61, label: 'dining table' },
{ class: 62, label: 'toilet' },
{ class: 63, label: 'tv' },
{ class: 64, label: 'laptop' },
{ class: 65, label: 'mouse' },
{ class: 66, label: 'remote' },
{ class: 67, label: 'keyboard' },
{ class: 68, label: 'cell phone' },
{ class: 69, label: 'microwave' },
{ class: 70, label: 'oven' },
{ class: 71, label: 'toaster' },
{ class: 72, label: 'sink' },
{ class: 73, label: 'refrigerator' },
{ class: 74, label: 'book' },
{ class: 75, label: 'clock' },
{ class: 76, label: 'vase' },
{ class: 77, label: 'scissors' },
{ class: 78, label: 'teddy bear' },
{ class: 79, label: 'hair drier' },
{ class: 80, label: 'toothbrush' },
];

View File

@ -1,14 +1,14 @@
import { log } from '../helpers'; import { log } from '../helpers';
import * as tf from '../../dist/tfjs.esm.js'; import * as tf from '../../dist/tfjs.esm.js';
import * as profile from '../profile'; import * as profile from '../profile';
import { labels } from './labels';
let model; let model;
let last: Array<{}> = []; let last: Array<{}> = [];
let skipped = Number.MAX_SAFE_INTEGER; let skipped = Number.MAX_SAFE_INTEGER;
const scaleBox = 2.5; // increase box size const scaleBox = 2.5; // increase box size
// eslint-disable-next-line max-len const activateScore = false;
const labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'vehicle', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'animal', 'animal', 'animal', 'animal', 'animal', 'animal', 'animal', 'bear', 'animal', 'animal', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'pastry', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'];
export async function load(config) { export async function load(config) {
if (!model) { if (!model) {
@ -21,50 +21,51 @@ export async function load(config) {
} }
async function process(res, inputSize, outputShape, config) { async function process(res, inputSize, outputShape, config) {
let id = 0;
let results: Array<{ score: number, strideSize: number, class: number, label: string, center: number[], centerRaw: number[], box: number[], boxRaw: number[] }> = []; let results: Array<{ score: number, strideSize: number, class: number, label: string, center: number[], centerRaw: number[], box: number[], boxRaw: number[] }> = [];
for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects
// find scores, boxes, classes // find scores, boxes, classes
tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors
const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704 const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704
// find boxes and scores output depending on stride // find boxes and scores output depending on stride
// log.info('Variation:', strideSize, 'strides', baseSize, 'baseSize'); const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze();
const scores = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze(); const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < 80))?.squeeze();
const features = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 32))?.squeeze(); const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride
// log.state('Found features tensor:', features?.shape);
// log.state('Found scores tensor:', scores?.shape);
const scoreIdx = scores.argMax(1).dataSync(); // location of highest scores
const scoresMax = scores.max(1).dataSync(); // values of highest scores
const boxesMax = features.reshape([-1, 4, 8]); // reshape [32] to [4,8] where 8 is change of different features inside stride
const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself
for (let i = 0; i < scores.shape[0]; i++) { const scores = activateScore ? scoresT.exp(1).arraySync() : scoresT.arraySync(); // optionally use exponential scores or just as-is
if (scoreIdx[i] !== 0 && scoresMax[i] > config.object.minConfidence) { for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1 for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1 const score = scores[i][j] - (activateScore ? 1 : 0); // get score for current position
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores if (score > config.object.minConfidence) {
let boxRaw = [ // results normalized to range 0..1 const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1
cx - (scaleBox / strideSize * boxOffset[0]), const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1
cy - (scaleBox / strideSize * boxOffset[1]), const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores
cx + (scaleBox / strideSize * boxOffset[2]), let boxRaw = [ // results normalized to range 0..1
cy + (scaleBox / strideSize * boxOffset[3]), cx - (scaleBox / strideSize * boxOffset[0]),
]; cy - (scaleBox / strideSize * boxOffset[1]),
boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords cx + (scaleBox / strideSize * boxOffset[2]),
const box = [ // results normalized to input image pixels cy + (scaleBox / strideSize * boxOffset[3]),
Math.max(0, (boxRaw[0] * outputShape[0])), ];
Math.max(0, (boxRaw[1] * outputShape[1])), boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords
Math.min(1, (boxRaw[2] * outputShape[0]) - (boxRaw[0] * outputShape[0])), const box = [ // results normalized to input image pixels
Math.min(1, (boxRaw[3] * outputShape[1]) - (boxRaw[1] * outputShape[1])), boxRaw[0] * outputShape[0],
]; boxRaw[1] * outputShape[1],
const result = { boxRaw[2] * outputShape[0],
score: scoresMax[i], boxRaw[3] * outputShape[1],
strideSize, ];
class: scoreIdx[i] + 1, const result = {
label: labels[scoreIdx[i]], id: id++,
center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)], strideSize,
centerRaw: [cx, cy], score,
box: box.map((a) => Math.trunc(a)), class: j + 1,
boxRaw, label: labels[j].label,
}; center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)],
results.push(result); centerRaw: [cx, cy],
box: box.map((a) => Math.trunc(a)),
boxRaw,
};
results.push(result);
}
} }
} }
}); });

4
types/nanodet/labels.d.ts vendored Normal file
View File

@ -0,0 +1,4 @@
export declare const labels: {
class: number;
label: string;
}[];