implemented human.next global interpolation method

pull/280/head
Vladimir Mandic 2021-05-31 10:40:07 -04:00
parent 8d228ecff3
commit 5788a80a7a
11 changed files with 272 additions and 213 deletions

View File

@ -11,6 +11,7 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
### **HEAD -> main** 2021/05/30 mandic00@live.com
- finished draw buffering and smoothing and enabled by default
- implemented service worker
- quantized centernet
- release candidate

View File

@ -155,7 +155,7 @@ Additionally, `HTMLVideoElement`, `HTMLMediaElement` can be a standard `<video>`
Live streaming examples:
- **HLS** (*HTTP Live Streaming*) using `hls.js`
- **DASH** (Dynamic Adaptive Streaming over HTTP) using `dash.js`
- **WebRTC** media track
- **WebRTC** media track using built-in support
<br>
@ -197,31 +197,58 @@ or using `async/await`:
```js
// create instance of human with simple configuration using default values
const config = { backend: 'webgl' };
const human = new Human(config);
const human = new Human(config); // create instance of Human
async function detectVideo() {
const inputVideo = document.getElementById('video-id');
const outputCanvas = document.getElementById('canvas-id');
const result = await human.detect(inputVideo);
human.draw.all(outputCanvas, result);
requestAnimationFrame(detectVideo);
const result = await human.detect(inputVideo); // run detection
human.draw.all(outputCanvas, result); // draw all results
requestAnimationFrame(detectVideo); // run loop
}
detectVideo();
detectVideo(); // start loop
```
or using interpolated results for smooth video processing by separating detection and drawing loops:
```js
const human = new Human(); // create instance of Human
const inputVideo = document.getElementById('video-id');
const outputCanvas = document.getElementById('canvas-id');
let result;
async function detectVideo() {
result = await human.detect(inputVideo); // run detection
requestAnimationFrame(detectVideo); // run detect loop
}
async function drawVideo() {
if (result) { // check if result is available
const interpolated = human.next(result); // calculate next interpolated frame
human.draw.all(outputCanvas, interpolated); // draw the frame
}
requestAnimationFrame(drawVideo); // run draw loop
}
detectVideo(); // start detection loop
drawVideo(); // start draw loop
```
And for even better results, you can run detection in a separate web worker thread
<br><hr><br>
## Default models
Default models in Human library are:
- **Face Detection**: MediaPipe BlazeFace (Back version)
- **Face Detection**: MediaPipe BlazeFace - Back variation
- **Face Mesh**: MediaPipe FaceMesh
- **Face Description**: HSE FaceRes
- **Face Iris Analysis**: MediaPipe Iris
- **Face Description**: HSE FaceRes
- **Emotion Detection**: Oarriaga Emotion
- **Body Analysis**: PoseNet (AtomicBits version)
- **Body Analysis**: MoveNet - Lightning variation
Note that alternative models are provided and can be enabled via configuration
For example, `PoseNet` model can be switched for `BlazePose`, `EfficientPose` or `MoveNet` model depending on the use case

View File

@ -6,7 +6,6 @@ N/A
## Exploring Features
- Implement results interpolation on library level instead inside draw functions
- Switch to TypeScript 4.3
- Unify score/confidence variables
@ -16,9 +15,10 @@ N/A
## In Progress
## Issues
N/A
## Known Issues
- CenterNet with WebGL: <https://github.com/tensorflow/tfjs/issues/5145>
- CenterNet with WASM: <https://github.com/tensorflow/tfjs/issues/5110>
- NanoDet with WASM: <https://github.com/tensorflow/tfjs/issues/4824>
- BlazeFace and HandPose rotation in NodeJS: <https://github.com/tensorflow/tfjs/issues/4066>

View File

@ -38,25 +38,29 @@ const userConfig = {
enabled: false,
flip: false,
},
face: { enabled: false,
face: { enabled: true,
detector: { return: true },
mesh: { enabled: true },
iris: { enabled: true },
description: { enabled: true },
emotion: { enabled: true },
iris: { enabled: false },
description: { enabled: false },
emotion: { enabled: false },
},
hand: { enabled: false },
// body: { enabled: true, modelPath: 'posenet.json' },
// body: { enabled: true, modelPath: 'blazepose.json' },
body: { enabled: false, modelPath: 'movenet-lightning.json' },
object: { enabled: true },
body: { enabled: false },
object: { enabled: false },
gesture: { enabled: true },
*/
};
const drawOptions = {
bufferedOutput: true, // makes draw functions interpolate results between each detection for smoother movement
bufferedFactor: 4, // speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc.
drawBoxes: true,
drawGaze: true,
drawLabels: true,
drawPolygons: true,
drawPoints: false,
};
// ui options
@ -223,17 +227,18 @@ async function drawResults(input) {
ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height);
}
// draw all results
human.draw.all(canvas, result, drawOptions);
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const person = result.persons; // invoke person getter
/* use individual functions
// draw all results using interpolated results
const interpolated = human.next(result);
human.draw.all(canvas, interpolated, drawOptions);
/* alternatively use individual functions
human.draw.face(canvas, result.face);
human.draw.body(canvas, result.body);
human.draw.hand(canvas, result.hand);
human.draw.object(canvas, result.object);
human.draw.gesture(canvas, result.gesture);
*/
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const person = result.persons; // explicitly invoke person getter
await calcSimmilariry(result);
// update log
@ -247,10 +252,9 @@ async function drawResults(input) {
document.getElementById('log').innerHTML = `
video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}<br>
backend: ${human.tf.getBackend()} | ${memory}<br>
performance: ${str(result.performance)}ms FPS process:${avgDetect} refresh:${avgDraw}<br>
performance: ${str(lastDetectedResult.performance)}ms FPS process:${avgDetect} refresh:${avgDraw}<br>
${warning}<br>
`;
ui.framesDraw++;
ui.lastFrame = performance.now();
// if buffered, immediate loop but limit frame rate although it's going to run slower as JS is singlethreaded

View File

@ -246,7 +246,7 @@ const config: Config = {
// this parameter is not valid in nodejs
maxDetected: 10, // maximum number of faces detected in the input
// should be set to the minimum number for performance
skipFrames: 21, // how many max frames to go without re-running the face bounding box detector
skipFrames: 15, // how many max frames to go without re-running the face bounding box detector
// only used when cacheSensitivity is not zero
// e.g., if model is running st 25 FPS, we can re-use existing bounding
// box for updated face analysis as the head probably hasn't moved much
@ -272,7 +272,7 @@ const config: Config = {
// recommended to enable detector.rotation and mesh.enabled
modelPath: 'faceres.json', // face description model
// can be either absolute path or relative to modelBasePath
skipFrames: 31, // how many max frames to go without re-running the detector
skipFrames: 16, // how many max frames to go without re-running the detector
// only used when cacheSensitivity is not zero
minConfidence: 0.1, // threshold for discarding a prediction
},
@ -280,7 +280,7 @@ const config: Config = {
emotion: {
enabled: true,
minConfidence: 0.1, // threshold for discarding a prediction
skipFrames: 32, // how max many frames to go without re-running the detector
skipFrames: 17, // how max many frames to go without re-running the detector
// only used when cacheSensitivity is not zero
modelPath: 'emotion.json', // face emotion model, can be absolute path or relative to modelBasePath
},
@ -298,9 +298,9 @@ const config: Config = {
hand: {
enabled: true,
rotation: false, // use best-guess rotated hand image or just box with rotation as-is
rotation: true, // use best-guess rotated hand image or just box with rotation as-is
// false means higher performance, but incorrect finger mapping if hand is inverted
skipFrames: 32, // how many max frames to go without re-running the hand bounding box detector
skipFrames: 18, // how many max frames to go without re-running the hand bounding box detector
// only used when cacheSensitivity is not zero
// e.g., if model is running st 25 FPS, we can re-use existing bounding
// box for updated hand skeleton analysis as the hand probably
@ -325,7 +325,7 @@ const config: Config = {
minConfidence: 0.2, // threshold for discarding a prediction
iouThreshold: 0.4, // ammount of overlap between two detected objects before one object is removed
maxDetected: 10, // maximum number of objects detected in the input
skipFrames: 41, // how many max frames to go without re-running the detector
skipFrames: 19, // how many max frames to go without re-running the detector
// only used when cacheSensitivity is not zero
},
};

View File

@ -3,7 +3,7 @@
*/
import { TRI468 as triangulation } from '../blazeface/coords';
import { mergeDeep } from '../helpers';
import { mergeDeep, now } from '../helpers';
import type { Result, Face, Body, Hand, Item, Gesture, Person } from '../result';
/**
@ -25,7 +25,6 @@ import type { Result, Face, Body, Hand, Item, Gesture, Person } from '../result'
* -useDepth: use z-axis coordinate as color shade,
* -useCurves: draw polygons as cures or as lines,
* -bufferedOutput: experimental: allows to call draw methods multiple times for each detection and interpolate results between results thus achieving smoother animations
* -bufferedFactor: speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc.
*/
export interface DrawOptions {
color: string,
@ -45,7 +44,6 @@ export interface DrawOptions {
useDepth: boolean,
useCurves: boolean,
bufferedOutput: boolean,
bufferedFactor: number,
}
export const options: DrawOptions = {
@ -65,12 +63,9 @@ export const options: DrawOptions = {
fillPolygons: <boolean>false,
useDepth: <boolean>true,
useCurves: <boolean>false,
bufferedFactor: <number>3,
bufferedOutput: <boolean>true,
};
let bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 };
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
function point(ctx, x, y, z = 0, localOptions) {
@ -246,24 +241,23 @@ export async function face(inCanvas: HTMLCanvasElement, result: Array<Face>, dra
}
}
if (localOptions.drawGaze && f.rotation?.gaze?.strength && f.rotation?.gaze?.bearing) {
const leftGaze = [
f.annotations['leftEyeIris'][0][0] + (Math.cos(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[2]),
f.annotations['leftEyeIris'][0][1] - (Math.sin(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[3]),
];
ctx.beginPath();
ctx.moveTo(f.annotations['leftEyeIris'][0][0], f.annotations['leftEyeIris'][0][1]);
ctx.strokeStyle = 'pink';
ctx.beginPath();
const leftGaze = [
f.annotations['leftEyeIris'][0][0] + (Math.sin(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[3]),
f.annotations['leftEyeIris'][0][1] + (Math.cos(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[2]),
];
ctx.moveTo(f.annotations['leftEyeIris'][0][0], f.annotations['leftEyeIris'][0][1]);
ctx.lineTo(leftGaze[0], leftGaze[1]);
ctx.stroke();
const rightGaze = [
f.annotations['rightEyeIris'][0][0] + (Math.cos(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[2]),
f.annotations['rightEyeIris'][0][1] - (Math.sin(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[3]),
f.annotations['rightEyeIris'][0][0] + (Math.sin(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[3]),
f.annotations['rightEyeIris'][0][1] + (Math.cos(f.rotation.gaze.bearing) * f.rotation.gaze.strength * f.box[2]),
];
ctx.beginPath();
ctx.moveTo(f.annotations['rightEyeIris'][0][0], f.annotations['rightEyeIris'][0][1]);
ctx.strokeStyle = 'pink';
ctx.lineTo(rightGaze[0], rightGaze[1]);
ctx.stroke();
}
}
@ -507,110 +501,6 @@ export async function person(inCanvas: HTMLCanvasElement, result: Array<Person>,
}
}
function calcBuffered(newResult: Result, localOptions) {
// each record is only updated using deep clone when number of detected record changes, otherwise it will converge by itself
// otherwise bufferedResult is a shallow clone of result plus updated local calculated values
// thus mixing by-reference and by-value assignments to minimize memory operations
// interpolate body results
if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) {
bufferedResult.body = JSON.parse(JSON.stringify(newResult.body)); // deep clone once
} else {
for (let i = 0; i < newResult.body.length; i++) {
const box = newResult.body[i].box // update box
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].box[j] + b) / localOptions.bufferedFactor) as [number, number, number, number];
const boxRaw = newResult.body[i].boxRaw // update boxRaw
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].boxRaw[j] + b) / localOptions.bufferedFactor) as [number, number, number, number];
const keypoints = newResult.body[i].keypoints // update keypoints
.map((keypoint, j) => ({
score: keypoint.score,
part: keypoint.part,
position: {
x: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.x + keypoint.position.x) / localOptions.bufferedFactor : keypoint.position.x,
y: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.y + keypoint.position.y) / localOptions.bufferedFactor : keypoint.position.y,
},
}));
bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints }; // shallow clone plus updated values
}
}
// interpolate hand results
if (!bufferedResult.hand || (newResult.hand.length !== bufferedResult.hand.length)) {
bufferedResult.hand = JSON.parse(JSON.stringify(newResult.hand)); // deep clone once
} else {
for (let i = 0; i < newResult.hand.length; i++) {
const box = (newResult.hand[i].box// update box
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].box[j] + b) / localOptions.bufferedFactor)) as [number, number, number, number];
const boxRaw = (newResult.hand[i].boxRaw // update boxRaw
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + b) / localOptions.bufferedFactor)) as [number, number, number, number];
const landmarks = newResult.hand[i].landmarks // update landmarks
.map((landmark, j) => landmark
.map((coord, k) => (((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].landmarks[j][k] + coord) / localOptions.bufferedFactor)) as [number, number, number]);
const keys = Object.keys(newResult.hand[i].annotations); // update annotations
const annotations = {};
for (const key of keys) {
annotations[key] = newResult.hand[i].annotations[key]
.map((val, j) => val.map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / localOptions.bufferedFactor));
}
bufferedResult.hand[i] = { ...newResult.hand[i], box, boxRaw, landmarks, annotations }; // shallow clone plus updated values
}
}
// interpolate face results
if (!bufferedResult.face || (newResult.face.length !== bufferedResult.face.length)) {
bufferedResult.face = JSON.parse(JSON.stringify(newResult.face)); // deep clone once
} else {
for (let i = 0; i < newResult.face.length; i++) {
const box = (newResult.face[i].box // update box
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].box[j] + b) / localOptions.bufferedFactor)) as [number, number, number, number];
const boxRaw = (newResult.face[i].boxRaw // update boxRaw
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].boxRaw[j] + b) / localOptions.bufferedFactor)) as [number, number, number, number];
const matrix = newResult.face[i].rotation.matrix;
const angle = {
roll: ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].rotation.angle.roll + newResult.face[i].rotation.angle.roll) / localOptions.bufferedFactor,
yaw: ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].rotation.angle.yaw + newResult.face[i].rotation.angle.yaw) / localOptions.bufferedFactor,
pitch: ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].rotation.angle.pitch + newResult.face[i].rotation.angle.pitch) / localOptions.bufferedFactor,
};
const gaze = {
bearing: ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].rotation.gaze.bearing + newResult.face[i].rotation.gaze.bearing) / localOptions.bufferedFactor, // not correct due to wrap-around
/*
angle: Math.atan2( // average angle is calculated differently
Math.sin(bufferedResult.face[i].rotation.gaze.angle) + Math.sin(newResult.face[i].rotation.gaze.angle),
Math.cos(bufferedResult.face[i].rotation.gaze.angle) + Math.sin(newResult.face[i].rotation.gaze.angle),
),
*/
strength: ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].rotation.gaze.strength + newResult.face[i].rotation.gaze.strength) / localOptions.bufferedFactor,
};
const rotation = { angle, matrix, gaze };
bufferedResult.face[i] = { ...newResult.face[i], rotation, box, boxRaw }; // shallow clone plus updated values
}
}
// interpolate object detection results
if (!bufferedResult.object || (newResult.object.length !== bufferedResult.object.length)) {
bufferedResult.object = JSON.parse(JSON.stringify(newResult.object)); // deep clone once
} else {
for (let i = 0; i < newResult.object.length; i++) {
const box = newResult.object[i].box // update box
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.object[i].box[j] + b) / localOptions.bufferedFactor);
const boxRaw = newResult.object[i].boxRaw // update boxRaw
.map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.object[i].boxRaw[j] + b) / localOptions.bufferedFactor);
bufferedResult.object[i] = { ...newResult.object[i], box, boxRaw }; // shallow clone plus updated values
}
}
// interpolate person results
const newPersons = newResult.persons; // trigger getter function
if (!bufferedResult.persons || (newPersons.length !== bufferedResult.persons.length)) {
bufferedResult.persons = JSON.parse(JSON.stringify(newPersons));
} else {
for (let i = 0; i < newPersons.length; i++) { // update person box, we don't update the rest as it's updated as reference anyhow
bufferedResult.persons[i].box = (newPersons[i].box
.map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.persons[i].box[j] + box) / localOptions.bufferedFactor)) as [number, number, number, number];
}
}
}
export async function canvas(inCanvas: HTMLCanvasElement, outCanvas: HTMLCanvasElement) {
if (!inCanvas || !outCanvas) return;
if (!(inCanvas instanceof HTMLCanvasElement) || !(outCanvas instanceof HTMLCanvasElement)) return;
@ -619,17 +509,30 @@ export async function canvas(inCanvas: HTMLCanvasElement, outCanvas: HTMLCanvasE
}
export async function all(inCanvas: HTMLCanvasElement, result: Result, drawOptions?: DrawOptions) {
const timestamp = now();
const localOptions = mergeDeep(options, drawOptions);
if (!result || !inCanvas) return;
if (!(inCanvas instanceof HTMLCanvasElement)) return;
if (!bufferedResult) bufferedResult = result; // first pass
else if (localOptions.bufferedOutput) calcBuffered(result, localOptions); // do results interpolation
else bufferedResult = result; // or just use results as-is
face(inCanvas, bufferedResult.face, localOptions);
body(inCanvas, bufferedResult.body, localOptions);
hand(inCanvas, bufferedResult.hand, localOptions);
object(inCanvas, bufferedResult.object, localOptions);
// person(inCanvas, bufferedResult.persons, localOptions);
face(inCanvas, result.face, localOptions);
body(inCanvas, result.body, localOptions);
hand(inCanvas, result.hand, localOptions);
object(inCanvas, result.object, localOptions);
// person(inCanvas, result.persons, localOptions);
gesture(inCanvas, result.gesture, localOptions); // gestures do not have buffering
/*
if (!bufferedResult) bufferedResult = result; // first pass
else if (localOptions.bufferedOutput) calcBuffered(result); // do results interpolation
else bufferedResult = result; // or just use results as-is
const promises: Promise<void>[] = [];
promises.push(face(inCanvas, bufferedResult.face, localOptions));
promises.push(body(inCanvas, bufferedResult.body, localOptions));
promises.push(hand(inCanvas, bufferedResult.hand, localOptions));
promises.push(object(inCanvas, bufferedResult.object, localOptions));
// promises.push(person(inCanvas, bufferedResult.persons, localOptions));
promises.push(gesture(inCanvas, result.gesture, localOptions)); // gestures do not have buffering
// await Promise.all(promises);
*/
result.performance.draw = Math.trunc(now() - timestamp);
}

View File

@ -8,9 +8,10 @@ import * as facemesh from './blazeface/facemesh';
import * as emotion from './emotion/emotion';
import * as faceres from './faceres/faceres';
import { Face } from './result';
import { Tensor } from './tfjs/types';
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const rad2deg = (theta) => (theta * 180) / Math.PI;
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
const calculateGaze = (mesh, box): { bearing: number, strength: number } => {
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
@ -31,11 +32,11 @@ const calculateGaze = (mesh, box): { bearing: number, strength: number } => {
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
];
let vectorLength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
vectorLength = Math.min(vectorLength, box[2] / 2, box[3] / 2); // limit strength to half of box size
const vectorAngle = radians([0, 0], eyeDiff); // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments
let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
strength = Math.min(strength, box[2] / 2, box[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
return { bearing: vectorAngle, strength: vectorLength };
return { bearing, strength };
};
const calculateFaceAngle = (face, imageSize): {
@ -137,7 +138,7 @@ const calculateFaceAngle = (face, imageSize): {
return { angle, matrix, gaze };
};
export const detectFace = async (parent, input): Promise<Face[]> => {
export const detectFace = async (parent /* instance of human */, input: Tensor): Promise<Face[]> => {
// run facemesh, includes blazeface and iris
// eslint-disable-next-line no-async-promise-executor
let timeStamp;
@ -150,7 +151,8 @@ export const detectFace = async (parent, input): Promise<Face[]> => {
parent.state = 'run:face';
timeStamp = now();
const faces = await facemesh.predict(input, parent.config);
parent.perf.face = Math.trunc(now() - timeStamp);
parent.performance.face = Math.trunc(now() - timeStamp);
if (!input.shape || input.shape.length !== 4) return [];
if (!faces) return [];
// for (const face of faces) {
for (let i = 0; i < faces.length; i++) {
@ -172,7 +174,7 @@ export const detectFace = async (parent, input): Promise<Face[]> => {
parent.state = 'run:emotion';
timeStamp = now();
emotionRes = parent.config.face.emotion.enabled ? await emotion.predict(faces[i].image, parent.config, i, faces.length) : {};
parent.perf.emotion = Math.trunc(now() - timeStamp);
parent.performance.emotion = Math.trunc(now() - timeStamp);
}
parent.analyze('End Emotion:');
@ -184,7 +186,7 @@ export const detectFace = async (parent, input): Promise<Face[]> => {
parent.state = 'run:description';
timeStamp = now();
descRes = parent.config.face.description.enabled ? await faceres.predict(faces[i].image, parent.config, i, faces.length) : [];
parent.perf.embedding = Math.trunc(now() - timeStamp);
parent.performance.embedding = Math.trunc(now() - timeStamp);
}
parent.analyze('End Description:');
@ -226,10 +228,10 @@ export const detectFace = async (parent, input): Promise<Face[]> => {
}
parent.analyze('End FaceMesh:');
if (parent.config.async) {
if (parent.perf.face) delete parent.perf.face;
if (parent.perf.age) delete parent.perf.age;
if (parent.perf.gender) delete parent.perf.gender;
if (parent.perf.emotion) delete parent.perf.emotion;
if (parent.performance.face) delete parent.performance.face;
if (parent.performance.age) delete parent.performance.age;
if (parent.performance.gender) delete parent.performance.gender;
if (parent.performance.emotion) delete parent.performance.emotion;
}
return faceRes;
};

View File

@ -109,7 +109,7 @@ export class HandPipeline {
const angle = config.hand.rotation ? util.computeRotation(currentBox.palmLandmarks[palmLandmarksPalmBase], currentBox.palmLandmarks[palmLandmarksMiddleFingerBase]) : 0;
const palmCenter = box.getBoxCenter(currentBox);
const palmCenterNormalized = [palmCenter[0] / image.shape[2], palmCenter[1] / image.shape[1]];
const rotatedImage = config.hand.rotation ? tf.image.rotateWithOffset(image, angle, 0, palmCenterNormalized) : image.clone();
const rotatedImage = config.hand.rotation && tf.ENV.flags.IS_BROWSER ? tf.image.rotateWithOffset(image, angle, 0, palmCenterNormalized) : image.clone();
const rotationMatrix = util.buildRotationMatrix(-angle, palmCenter);
const newBox = useFreshBox ? this.getBoxForPalmLandmarks(currentBox.palmLandmarks, rotationMatrix) : currentBox;
const croppedInput = box.cutBoxFromImageAndResize(newBox, rotatedImage, [this.inputSize, this.inputSize]);

View File

@ -23,6 +23,7 @@ import * as gesture from './gesture/gesture';
import * as image from './image/image';
import * as draw from './draw/draw';
import * as persons from './persons';
import * as interpolate from './interpolate';
import * as sample from './sample';
import * as app from '../package.json';
import { Tensor } from './tfjs/types';
@ -60,7 +61,7 @@ type Model = unknown;
* @param userConfig: {@link Config}
*/
export class Human {
/** Current version of Human library in semver format */
/** Current version of Human library in *semver* format */
version: string;
/** Current configuration
* - Details: {@link Config}
@ -72,6 +73,7 @@ export class Human {
result: Result;
/** Current state of Human library
* - Can be polled to determine operations that are currently executed
* - Progresses through: 'config', 'check', 'backend', 'load', 'run:<model>', 'idle'
*/
state: string;
/** @internal: Instance of current image being processed */
@ -105,7 +107,6 @@ export class Human {
efficientpose: Model | null,
movenet: Model | null,
handpose: [Model, Model] | null,
iris: Model | null,
age: Model | null,
gender: Model | null,
emotion: Model | null,
@ -124,14 +125,14 @@ export class Human {
centernet: typeof centernet;
faceres: typeof faceres;
};
/** Face triangualtion array of 468 points, used for triangle references between points */
/** Reference face triangualtion array of 468 points, used for triangle references between points */
faceTriangulation: typeof facemesh.triangulation;
/** UV map of 468 values, used for 3D mapping of the face mesh */
/** Refernce UV map of 468 values, used for 3D mapping of the face mesh */
faceUVMap: typeof facemesh.uvmap;
/** Platform and agent information detected by Human */
sysinfo: { platform: string, agent: string };
/** Performance object that contains values for all recently performed operations */
perf: Record<string, unknown>; // perf members are dynamically defined as needed
performance: Record<string, unknown>; // perf members are dynamically defined as needed
#numTensors: number;
#analyzeMemoryLeaks: boolean;
#checkSanity: boolean;
@ -145,18 +146,18 @@ export class Human {
* Creates instance of Human library that is futher used for all operations
* @param userConfig: {@link Config}
*/
constructor(userConfig: Config | Record<string, unknown> = {}) {
constructor(userConfig?: Config | Record<string, unknown>) {
this.config = mergeDeep(defaults, userConfig || {});
this.tf = tf;
this.draw = draw;
this.version = app.version;
this.config = mergeDeep(defaults, userConfig);
this.state = 'idle';
this.#numTensors = 0;
this.#analyzeMemoryLeaks = false;
this.#checkSanity = false;
this.#firstRun = true;
this.#lastCacheDiff = 0;
this.perf = {};
this.performance = { backend: 0, load: 0, image: 0, frames: 0, cached: 0, changed: 0, total: 0, draw: 0 };
// object that contains all initialized models
this.models = {
face: null,
@ -165,7 +166,6 @@ export class Human {
efficientpose: null,
movenet: null,
handpose: null,
iris: null,
age: null,
gender: null,
emotion: null,
@ -253,9 +253,9 @@ export class Human {
/** Load method preloads all configured models on-demand
* - Not explicitly required as any required model is load implicitly on it's first run
* @param userConfig: {@link Config}
* @param userConfig?: {@link Config}
*/
async load(userConfig: Config | Record<string, unknown> = {}) {
async load(userConfig?: Config | Record<string, unknown>) {
this.state = 'load';
const timeStamp = now();
if (userConfig) this.config = mergeDeep(this.config, userConfig);
@ -315,7 +315,7 @@ export class Human {
}
const current = Math.trunc(now() - timeStamp);
if (current > (this.perf.load as number || 0)) this.perf.load = current;
if (current > (this.performance.load as number || 0)) this.performance.load = current;
}
// check if backend needs initialization if it changed
@ -366,9 +366,9 @@ export class Human {
if (this.tf.getBackend() === 'webgl' || this.tf.getBackend() === 'humangl') {
this.tf.ENV.set('CHECK_COMPUTATION_FOR_ERRORS', false);
this.tf.ENV.set('WEBGL_CPU_FORWARD', true);
tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true);
this.tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
if (typeof this.config['deallocate'] !== 'undefined') {
if (!this.config.object.enabled) this.tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true); // safe to use 16bit precision
if (typeof this.config['deallocate'] !== 'undefined' && this.config['deallocate']) { // hidden param
log('changing webgl: WEBGL_DELETE_TEXTURE_THRESHOLD:', true);
this.tf.ENV.set('WEBGL_DELETE_TEXTURE_THRESHOLD', 0);
}
@ -376,10 +376,18 @@ export class Human {
if (this.config.debug) log(`gl version:${gl.getParameter(gl.VERSION)} renderer:${gl.getParameter(gl.RENDERER)}`);
}
await this.tf.ready();
this.perf.backend = Math.trunc(now() - timeStamp);
this.performance.backend = Math.trunc(now() - timeStamp);
}
}
/**
* Runs interpolation using last known result and returns smoothened result
* Interpolation is based on time since last known result so can be called independently
* @param result?: use specific result set to run interpolation on
* @returns result: {@link Result}
*/
next = (result?: Result) => interpolate.calc(result || this.result) as Result;
// check if input changed sufficiently to trigger new detections
/** @hidden */
#skipFrame = async (input) => {
@ -414,10 +422,10 @@ export class Human {
* - Run inference for all configured models
* - Process and return result: {@link Result}
* @param input: Input
* @param userConfig: Config
* @param userConfig?: Config
* @returns result: Result
*/
async detect(input: Input, userConfig: Config | Record<string, unknown> = {}): Promise<Result | Error> {
async detect(input: Input, userConfig?: Config | Record<string, unknown>): Promise<Result | Error> {
// detection happens inside a promise
return new Promise(async (resolve) => {
this.state = 'config';
@ -466,18 +474,18 @@ export class Human {
resolve({ error: 'could not convert input to tensor' });
return;
}
this.perf.image = Math.trunc(now() - timeStamp);
this.performance.image = Math.trunc(now() - timeStamp);
this.analyze('Get Image:');
timeStamp = now();
// @ts-ignore hidden dynamic property that is not part of definitions
this.config.skipFrame = await this.#skipFrame(process.tensor);
if (!this.perf.frames) this.perf.frames = 0;
if (!this.perf.cached) this.perf.cached = 0;
(this.perf.frames as number)++;
if (!this.performance.frames) this.performance.frames = 0;
if (!this.performance.cached) this.performance.cached = 0;
(this.performance.frames as number)++;
// @ts-ignore hidden dynamic property that is not part of definitions
if (this.config.skipFrame) this.perf.cached++;
this.perf.changed = Math.trunc(now() - timeStamp);
if (this.config.skipFrame) this.performance.cached++;
this.performance.changed = Math.trunc(now() - timeStamp);
this.analyze('Check Changed:');
// prepare where to store model results
@ -491,13 +499,13 @@ export class Human {
// run face detection followed by all models that rely on face bounding box: face mesh, age, gender, emotion
if (this.config.async) {
faceRes = this.config.face.enabled ? face.detectFace(this, process.tensor) : [];
if (this.perf.face) delete this.perf.face;
if (this.performance.face) delete this.performance.face;
} else {
this.state = 'run:face';
timeStamp = now();
faceRes = this.config.face.enabled ? await face.detectFace(this, process.tensor) : [];
elapsedTime = Math.trunc(now() - timeStamp);
if (elapsedTime > 0) this.perf.face = elapsedTime;
if (elapsedTime > 0) this.performance.face = elapsedTime;
}
// run body: can be posenet, blazepose, efficientpose, movenet
@ -507,7 +515,7 @@ export class Human {
else if (this.config.body.modelPath.includes('blazepose')) bodyRes = this.config.body.enabled ? blazepose.predict(process.tensor, this.config) : [];
else if (this.config.body.modelPath.includes('efficientpose')) bodyRes = this.config.body.enabled ? efficientpose.predict(process.tensor, this.config) : [];
else if (this.config.body.modelPath.includes('movenet')) bodyRes = this.config.body.enabled ? movenet.predict(process.tensor, this.config) : [];
if (this.perf.body) delete this.perf.body;
if (this.performance.body) delete this.performance.body;
} else {
this.state = 'run:body';
timeStamp = now();
@ -516,7 +524,7 @@ export class Human {
else if (this.config.body.modelPath.includes('efficientpose')) bodyRes = this.config.body.enabled ? await efficientpose.predict(process.tensor, this.config) : [];
else if (this.config.body.modelPath.includes('movenet')) bodyRes = this.config.body.enabled ? await movenet.predict(process.tensor, this.config) : [];
elapsedTime = Math.trunc(now() - timeStamp);
if (elapsedTime > 0) this.perf.body = elapsedTime;
if (elapsedTime > 0) this.performance.body = elapsedTime;
}
this.analyze('End Body:');
@ -524,13 +532,13 @@ export class Human {
this.analyze('Start Hand:');
if (this.config.async) {
handRes = this.config.hand.enabled ? handpose.predict(process.tensor, this.config) : [];
if (this.perf.hand) delete this.perf.hand;
if (this.performance.hand) delete this.performance.hand;
} else {
this.state = 'run:hand';
timeStamp = now();
handRes = this.config.hand.enabled ? await handpose.predict(process.tensor, this.config) : [];
elapsedTime = Math.trunc(now() - timeStamp);
if (elapsedTime > 0) this.perf.hand = elapsedTime;
if (elapsedTime > 0) this.performance.hand = elapsedTime;
}
this.analyze('End Hand:');
@ -539,14 +547,14 @@ export class Human {
if (this.config.async) {
if (this.config.object.modelPath.includes('nanodet')) objectRes = this.config.object.enabled ? nanodet.predict(process.tensor, this.config) : [];
else if (this.config.object.modelPath.includes('centernet')) objectRes = this.config.object.enabled ? centernet.predict(process.tensor, this.config) : [];
if (this.perf.object) delete this.perf.object;
if (this.performance.object) delete this.performance.object;
} else {
this.state = 'run:object';
timeStamp = now();
if (this.config.object.modelPath.includes('nanodet')) objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : [];
else if (this.config.object.modelPath.includes('centernet')) objectRes = this.config.object.enabled ? await centernet.predict(process.tensor, this.config) : [];
elapsedTime = Math.trunc(now() - timeStamp);
if (elapsedTime > 0) this.perf.object = elapsedTime;
if (elapsedTime > 0) this.performance.object = elapsedTime;
}
this.analyze('End Object:');
@ -558,11 +566,11 @@ export class Human {
if (this.config.gesture.enabled) {
timeStamp = now();
gestureRes = [...gesture.face(faceRes), ...gesture.body(bodyRes), ...gesture.hand(handRes), ...gesture.iris(faceRes)];
if (!this.config.async) this.perf.gesture = Math.trunc(now() - timeStamp);
else if (this.perf.gesture) delete this.perf.gesture;
if (!this.config.async) this.performance.gesture = Math.trunc(now() - timeStamp);
else if (this.performance.gesture) delete this.performance.gesture;
}
this.perf.total = Math.trunc(now() - timeStart);
this.performance.total = Math.trunc(now() - timeStart);
this.state = 'idle';
this.result = {
face: faceRes,
@ -570,7 +578,7 @@ export class Human {
hand: handRes,
gesture: gestureRes,
object: objectRes,
performance: this.perf,
performance: this.performance,
canvas: process.canvas,
timestamp: Date.now(),
get persons() { return persons.join(faceRes, bodyRes, handRes, gestureRes, process?.tensor?.shape); },
@ -666,9 +674,9 @@ export class Human {
/** Warmup metho pre-initializes all models for faster inference
* - can take significant time on startup
* - only used for `webgl` and `humangl` backends
* @param userConfig: Config
* @param userConfig?: Config
*/
async warmup(userConfig: Config | Record<string, unknown> = {}): Promise<Result | { error }> {
async warmup(userConfig?: Config | Record<string, unknown>): Promise<Result | { error }> {
const t0 = now();
if (userConfig) this.config = mergeDeep(this.config, userConfig);
if (!this.config.warmup || this.config.warmup === 'none') return { error: 'null' };

114
src/interpolate.ts Normal file
View File

@ -0,0 +1,114 @@
/**
* Module that interpolates results for smoother animations
*/
import type { Result, Face, Body, Hand, Item, Gesture, Person } from './result';
const bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 };
export function calc(newResult: Result): Result {
// each record is only updated using deep clone when number of detected record changes, otherwise it will converge by itself
// otherwise bufferedResult is a shallow clone of result plus updated local calculated values
// thus mixing by-reference and by-value assignments to minimize memory operations
const bufferedFactor = 1000 / (Date.now() - newResult.timestamp) / 4;
// interpolate body results
if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) {
bufferedResult.body = JSON.parse(JSON.stringify(newResult.body as Body[])); // deep clone once
} else {
for (let i = 0; i < newResult.body.length; i++) {
const box = newResult.body[i].box // update box
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.body[i].box[j] + b) / bufferedFactor) as [number, number, number, number];
const boxRaw = newResult.body[i].boxRaw // update boxRaw
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.body[i].boxRaw[j] + b) / bufferedFactor) as [number, number, number, number];
const keypoints = newResult.body[i].keypoints // update keypoints
.map((keypoint, j) => ({
score: keypoint.score,
part: keypoint.part,
position: {
x: bufferedResult.body[i].keypoints[j] ? ((bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.x + keypoint.position.x) / bufferedFactor : keypoint.position.x,
y: bufferedResult.body[i].keypoints[j] ? ((bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.y + keypoint.position.y) / bufferedFactor : keypoint.position.y,
},
}));
bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints }; // shallow clone plus updated values
}
}
// interpolate hand results
if (!bufferedResult.hand || (newResult.hand.length !== bufferedResult.hand.length)) {
bufferedResult.hand = JSON.parse(JSON.stringify(newResult.hand as Hand[])); // deep clone once
} else {
for (let i = 0; i < newResult.hand.length; i++) {
const box = (newResult.hand[i].box// update box
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.hand[i].box[j] + b) / bufferedFactor)) as [number, number, number, number];
const boxRaw = (newResult.hand[i].boxRaw // update boxRaw
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + b) / bufferedFactor)) as [number, number, number, number];
const landmarks = newResult.hand[i].landmarks // update landmarks
.map((landmark, j) => landmark
.map((coord, k) => (((bufferedFactor - 1) * bufferedResult.hand[i].landmarks[j][k] + coord) / bufferedFactor)) as [number, number, number]);
const keys = Object.keys(newResult.hand[i].annotations); // update annotations
const annotations = {};
for (const key of keys) {
annotations[key] = newResult.hand[i].annotations[key]
.map((val, j) => val.map((coord, k) => ((bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / bufferedFactor));
}
bufferedResult.hand[i] = { ...newResult.hand[i], box, boxRaw, landmarks, annotations }; // shallow clone plus updated values
}
}
// interpolate face results
if (!bufferedResult.face || (newResult.face.length !== bufferedResult.face.length)) {
bufferedResult.face = JSON.parse(JSON.stringify(newResult.face as Face[])); // deep clone once
} else {
for (let i = 0; i < newResult.face.length; i++) {
const box = (newResult.face[i].box // update box
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.face[i].box[j] + b) / bufferedFactor)) as [number, number, number, number];
const boxRaw = (newResult.face[i].boxRaw // update boxRaw
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.face[i].boxRaw[j] + b) / bufferedFactor)) as [number, number, number, number];
const matrix = newResult.face[i].rotation.matrix;
const angle = {
roll: ((bufferedFactor - 1) * bufferedResult.face[i].rotation.angle.roll + newResult.face[i].rotation.angle.roll) / bufferedFactor,
yaw: ((bufferedFactor - 1) * bufferedResult.face[i].rotation.angle.yaw + newResult.face[i].rotation.angle.yaw) / bufferedFactor,
pitch: ((bufferedFactor - 1) * bufferedResult.face[i].rotation.angle.pitch + newResult.face[i].rotation.angle.pitch) / bufferedFactor,
};
const gaze = {
// not fully correct due projection on circle, also causes wrap-around draw on jump from negative to positive
bearing: ((bufferedFactor - 1) * bufferedResult.face[i].rotation.gaze.bearing + newResult.face[i].rotation.gaze.bearing) / bufferedFactor,
strength: ((bufferedFactor - 1) * bufferedResult.face[i].rotation.gaze.strength + newResult.face[i].rotation.gaze.strength) / bufferedFactor,
};
const rotation = { angle, matrix, gaze };
bufferedResult.face[i] = { ...newResult.face[i], rotation, box, boxRaw }; // shallow clone plus updated values
}
}
// interpolate object detection results
if (!bufferedResult.object || (newResult.object.length !== bufferedResult.object.length)) {
bufferedResult.object = JSON.parse(JSON.stringify(newResult.object as Item[])); // deep clone once
} else {
for (let i = 0; i < newResult.object.length; i++) {
const box = newResult.object[i].box // update box
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.object[i].box[j] + b) / bufferedFactor);
const boxRaw = newResult.object[i].boxRaw // update boxRaw
.map((b, j) => ((bufferedFactor - 1) * bufferedResult.object[i].boxRaw[j] + b) / bufferedFactor);
bufferedResult.object[i] = { ...newResult.object[i], box, boxRaw }; // shallow clone plus updated values
}
}
// interpolate person results
const newPersons = newResult.persons; // trigger getter function
if (!bufferedResult.persons || (newPersons.length !== bufferedResult.persons.length)) {
bufferedResult.persons = JSON.parse(JSON.stringify(newPersons as Person[]));
} else {
for (let i = 0; i < newPersons.length; i++) { // update person box, we don't update the rest as it's updated as reference anyhow
bufferedResult.persons[i].box = (newPersons[i].box
.map((box, j) => ((bufferedFactor - 1) * bufferedResult.persons[i].box[j] + box) / bufferedFactor)) as [number, number, number, number];
}
}
// just copy latest gestures without interpolation
bufferedResult.gesture = newResult.gesture as Gesture[];
bufferedResult.performance = newResult.performance;
return bufferedResult;
}

View File

@ -176,7 +176,7 @@ export interface Result {
/** {@link Object}: detection & analysis results */
object: Array<Item>
/** global performance object with timing values for each operation */
readonly performance: Record<string, unknown>,
performance: Record<string, unknown>,
/** optional processed canvas that can be used to draw input on screen */
readonly canvas?: OffscreenCanvas | HTMLCanvasElement,
/** timestamp of detection representing the milliseconds elapsed since the UNIX epoch */