diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0d0ef3ef..773f6ea4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,11 +9,20 @@
## Changelog
+### **HEAD -> main** 2021/10/10 mandic00@live.com
+
+
+### **origin/main** 2021/10/08 mandic00@live.com
+
+- demo default config cleanup
+- improve gaze and face angle visualizations in draw
+
+### **release 2.3.1** 2021/10/06 mandic00@live.com
+
+
### **2.3.1** 2021/10/06 mandic00@live.com
-
-### **origin/main** 2021/10/06 mandic00@live.com
-
+- workaround for chrome offscreencanvas bug
- fix backend conflict in webworker
- add blazepose v2 and add annotations to body results
- fix backend order initialization
diff --git a/demo/index.js b/demo/index.js
index 3fa1c4d7..3030b63b 100644
--- a/demo/index.js
+++ b/demo/index.js
@@ -31,15 +31,6 @@ import jsonView from './helpers/jsonview.js';
let human;
let userConfig = {
- cacheSensitivity: 0,
- hand: { enabled: true },
- body: { enabled: false },
- face: { enabled: false },
- /*
- hand: { enabled: false, maxDetected: 1, skipFrames: 0 },
- body: { enabled: false },
- face: { enabled: false },
- */
/*
warmup: 'none',
backend: 'humangl',
@@ -118,6 +109,7 @@ const ui = {
lastFrame: 0, // time of last frame processing
viewportSet: false, // internal, has custom viewport been set
background: null, // holds instance of segmentation background image
+ transferCanvas: null, // canvas used to transfer data to and from worker
// webrtc
useWebRTC: false, // use webrtc as camera source instead of local webcam
@@ -318,7 +310,7 @@ async function drawResults(input) {
const fps = avgDetect > 0 ? `FPS process:${avgDetect} refresh:${avgDraw}` : '';
const backend = result.backend || human.tf.getBackend();
const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
- const memory = result.tensors || `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
+ const memory = result.tensors ? `tensors: ${result.tensors.toLocaleString()} in worker` : `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
document.getElementById('log').innerHTML = `
video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}
backend: ${backend} | ${memory}
@@ -469,13 +461,17 @@ function webWorker(input, image, canvas, timestamp) {
if (document.getElementById('gl-bench')) document.getElementById('gl-bench').style.display = ui.bench ? 'block' : 'none';
lastDetectedResult = msg.data.result;
- if (msg.data.image) {
- lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas');
- lastDetectedResult.canvas.width = msg.data.width;
- lastDetectedResult.canvas.height = msg.data.height;
+ if (msg.data.image) { // we dont really need canvas since we draw from video
+ /*
+ if (!lastDetectedResult.canvas || lastDetectedResult.canvas.width !== msg.data.width || lastDetectedResult.canvas.height !== msg.data.height) {
+ lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas');
+ lastDetectedResult.canvas.width = msg.data.width;
+ lastDetectedResult.canvas.height = msg.data.height;
+ }
const ctx = lastDetectedResult.canvas.getContext('2d');
const imageData = new ImageData(new Uint8ClampedArray(msg.data.image), msg.data.width, msg.data.height);
ctx.putImageData(imageData, 0, 0);
+ */
}
ui.framesDetect++;
@@ -508,10 +504,12 @@ function runHumanDetect(input, canvas, timestamp) {
if (ui.hintsThread) clearInterval(ui.hintsThread);
if (ui.useWorker && human.env.offscreen) {
// get image data from video as we cannot send html objects to webworker
- const offscreen = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(canvas.width, canvas.height) : document.createElement('canvas');
- offscreen.width = canvas.width;
- offscreen.height = canvas.height;
- const ctx = offscreen.getContext('2d');
+ if (!ui.transferCanvas || ui.transferCanvas.width !== canvas.width || ui.transferCanvas.height || canvas.height) {
+ ui.transferCanvas = document.createElement('canvas');
+ ui.transferCanvas.width = canvas.width;
+ ui.transferCanvas.height = canvas.height;
+ }
+ const ctx = ui.transferCanvas.getContext('2d');
ctx.drawImage(input, 0, 0, canvas.width, canvas.height);
const data = ctx.getImageData(0, 0, canvas.width, canvas.height);
// perform detection in worker
diff --git a/package.json b/package.json
index 4ce730c8..c895a833 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@vladmandic/human",
- "version": "2.3.1",
+ "version": "2.3.2",
"description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition",
"sideEffects": false,
"main": "dist/human.node.js",
@@ -74,7 +74,6 @@
"canvas": "^2.8.0",
"dayjs": "^1.10.7",
"esbuild": "^0.13.4",
- "eslint": "^7.32.0",
"eslint-config-airbnb-base": "^14.2.1",
"eslint-plugin-import": "^2.24.2",
"eslint-plugin-json": "^3.1.0",
@@ -86,5 +85,8 @@
"tslib": "^2.3.1",
"typedoc": "0.22.5",
"typescript": "4.4.3"
+ },
+ "dependencies": {
+ "eslint": "7.32.0"
}
}
diff --git a/src/config.ts b/src/config.ts
index f1dd6d75..66a61e46 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -420,12 +420,12 @@ const config: Config = {
rotation: true, // use best-guess rotated hand image or just box with rotation as-is
// false means higher performance, but incorrect finger mapping if hand is inverted
// only valid for `handdetect` variation
- skipFrames: 1, // how many max frames to go without re-running the hand bounding box detector
+ skipFrames: 2, // how many max frames to go without re-running the hand bounding box detector
// only used when cacheSensitivity is not zero
// e.g., if model is running st 25 FPS, we can re-use existing bounding
// box for updated hand skeleton analysis as the hand
// hasn't moved much in short time (10 * 1/25 = 0.25 sec)
- minConfidence: 0.55, // threshold for discarding a prediction
+ minConfidence: 0.50, // threshold for discarding a prediction
iouThreshold: 0.2, // ammount of overlap between two detected objects before one object is removed
maxDetected: -1, // maximum number of hands detected in the input
// should be set to the minimum number for performance
diff --git a/src/face/facemesh.ts b/src/face/facemesh.ts
index f9810c1d..9cbe4a47 100644
--- a/src/face/facemesh.ts
+++ b/src/face/facemesh.ts
@@ -83,7 +83,7 @@ export async function predict(input: Tensor, config: Config): Promise [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
- for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations
+ for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key] as number]]; // add annotations
} else if (!model) { // mesh enabled, but not loaded
if (config.debug) log('face mesh detection requested, but model is not loaded');
} else { // mesh enabled
diff --git a/src/face/facemeshcoords.ts b/src/face/facemeshcoords.ts
index c06dd6f1..8ed03b92 100644
--- a/src/face/facemeshcoords.ts
+++ b/src/face/facemeshcoords.ts
@@ -3,7 +3,7 @@
* See `facemesh.ts` for entry point
*/
-export const meshAnnotations = {
+export const meshAnnotations: Record = {
silhouette: [
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
@@ -42,13 +42,13 @@ export const meshAnnotations = {
leftCheek: [425],
};
-export const meshLandmarks = {
+export const meshLandmarks: Record = {
count: 468,
mouth: 13,
symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]],
};
-export const blazeFaceLandmarks = {
+export const blazeFaceLandmarks: Record = {
leftEye: 0,
rightEye: 1,
nose: 2,
@@ -58,7 +58,7 @@ export const blazeFaceLandmarks = {
symmetryLine: [3, 2],
};
-export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
+export const MESH_TO_IRIS_INDICES_MAP: Array<{ key: string, indices: number[] }> = [ // A mapping from facemesh model keypoints to iris model keypoints.
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },
{ key: 'EyeUpper2', indices: [41, 42, 43, 44, 45, 46, 47] },
@@ -70,7 +70,7 @@ export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypo
// { key: 'EyebrowLower', indices: [48, 49, 50, 51, 52, 53] },
];
-export const UV468 = [
+export const UV468: [number, number][] = [
[0.499976992607117, 0.652534008026123],
[0.500025987625122, 0.547487020492554],
[0.499974012374878, 0.602371990680695],
@@ -541,7 +541,7 @@ export const UV468 = [
[0.723330020904541, 0.363372981548309],
];
-export const TRI468 = [
+export const TRI468: Array = [
127, 34, 139, 11, 0, 37, 232, 231, 120, 72, 37, 39, 128, 121, 47, 232, 121, 128, 104, 69, 67, 175, 171, 148, 157, 154, 155, 118, 50, 101, 73, 39, 40, 9,
151, 108, 48, 115, 131, 194, 204, 211, 74, 40, 185, 80, 42, 183, 40, 92, 186, 230, 229, 118, 202, 212, 214, 83, 18, 17, 76, 61, 146, 160, 29, 30, 56,
157, 173, 106, 204, 194, 135, 214, 192, 203, 165, 98, 21, 71, 68, 51, 45, 4, 144, 24, 23, 77, 146, 91, 205, 50, 187, 201, 200, 18, 91, 106, 182, 90, 91,
@@ -627,7 +627,7 @@ export const TRI468 = [
259, 443, 259, 260, 444, 260, 467, 445, 309, 459, 250, 305, 289, 290, 305, 290, 460, 401, 376, 435, 309, 250, 392, 376, 411, 433, 453, 341, 464, 357,
453, 465, 343, 357, 412, 437, 343, 399, 344, 360, 440, 420, 437, 456, 360, 420, 363, 361, 401, 288, 265, 372, 353, 390, 339, 249, 339, 448, 255];
-export const TRI68 = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31, 41, 3, 4, 48, 3, 48, 31, 4, 5, 48, 5, 6, 48, 6, 7, 59, 6, 59, 48, 7, 8, 58, 7, 58, 59,
+export const TRI68: Array = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31, 41, 3, 4, 48, 3, 48, 31, 4, 5, 48, 5, 6, 48, 6, 7, 59, 6, 59, 48, 7, 8, 58, 7, 58, 59,
8, 9, 56, 8, 56, 57, 8, 57, 58, 9, 10, 55, 9, 55, 56, 10, 11, 54, 10, 54, 55, 11, 12, 54, 12, 13, 54, 13, 14, 35, 13, 35, 54, 14, 15, 46, 14, 46, 35, 15, 16,
45, 15, 45, 46, 16, 26, 45, 17, 36, 18, 18, 37, 19, 18, 36, 37, 19, 38, 20, 19, 37, 38, 20, 39, 21, 20, 38, 39, 21, 39, 27, 22, 42, 23, 22, 27, 42, 23, 43, 24,
23, 42, 43, 24, 44, 25, 24, 43, 44, 25, 45, 26, 25, 44, 45, 27, 39, 28, 27, 28, 42, 28, 39, 29, 28, 29, 42, 29, 31, 30, 29, 30, 35, 29, 40, 31, 29, 35, 47, 29,
@@ -636,7 +636,7 @@ export const TRI68 = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31,
48, 59, 60, 49, 61, 50, 49, 60, 61, 50, 62, 51, 50, 61, 62, 51, 62, 52, 52, 63, 53, 52, 62, 63, 53, 64, 54, 53, 63, 64, 54, 64, 55, 55, 65, 56, 55, 64, 65, 56,
66, 57, 56, 65, 66, 57, 66, 58, 58, 67, 59, 58, 66, 67, 59, 67, 60, 60, 67, 61, 61, 66, 62, 61, 67, 66, 62, 66, 63, 63, 65, 64, 63, 66, 65, 21, 27, 22];
-export const TRI33 = [
+export const TRI33: Array = [
/* eyes */ 0, 8, 7, 7, 8, 1, 2, 10, 9, 9, 10, 3,
/* brows */ 17, 0, 18, 18, 0, 7, 18, 7, 19, 19, 7, 1, 19, 1, 11, 19, 11, 20, 21, 3, 22, 21, 9, 3, 20, 9, 21, 20, 2, 9, 20, 11, 2,
/* 4head */ 23, 17, 18, 25, 21, 22, 24, 19, 20, 24, 18, 19, 24, 20, 21, 24, 23, 18, 24, 21, 25,
@@ -647,9 +647,9 @@ export const TRI33 = [
/* cont */ 26, 30, 5, 27, 6, 31, 0, 28, 26, 3, 27, 29, 17, 28, 0, 3, 29, 22, 23, 28, 17, 22, 29, 25, 28, 30, 26, 27, 31, 29,
];
-export const TRI7 = [0, 4, 1, 2, 4, 3, 4, 5, 6];
+export const TRI7: Array = [0, 4, 1, 2, 4, 3, 4, 5, 6];
-export const VTX68 = [
+export const VTX68: Array = [
/* cont */ 127, 234, 132, 58, 172, 150, 149, 148, 152, 377, 378, 379, 397, 288, 361, 454, 356,
/* brows */ 70, 63, 105, 66, 107, 336, 296, 334, 293, 300,
/* nose */ 168, 6, 195, 4, 98, 97, 2, 326, 327,
@@ -658,9 +658,9 @@ export const VTX68 = [
/* mouth */ 78, 81, 13, 311, 308, 402, 14, 178,
];
-export const VTX33 = [33, 133, 362, 263, 1, 62, 308, 159, 145, 386, 374, 6, 102, 331, 2, 13, 14, 70, 105, 107, 336, 334, 300, 54, 10, 284, 50, 280, 234, 454, 58, 288, 152];
+export const VTX33: Array = [33, 133, 362, 263, 1, 62, 308, 159, 145, 386, 374, 6, 102, 331, 2, 13, 14, 70, 105, 107, 336, 334, 300, 54, 10, 284, 50, 280, 234, 454, 58, 288, 152];
-export const VTX7 = [33, 133, 362, 263, 1, 78, 308];
+export const VTX7: Array = [33, 133, 362, 263, 1, 78, 308];
export const UV68 = VTX68.map((x) => UV468[x]);
diff --git a/src/face/facemeshutil.ts b/src/face/facemeshutil.ts
index aa5cdc68..626930a1 100644
--- a/src/face/facemeshutil.ts
+++ b/src/face/facemeshutil.ts
@@ -152,8 +152,8 @@ export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputS
}
export function correctFaceRotation(box, input, inputSize) {
- const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
- const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
+ const symmetryLine = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
+ const angle: number = computeRotation(box.landmarks[symmetryLine[0]], box.landmarks[symmetryLine[1]]);
const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts
index c1e1b717..efd0be35 100644
--- a/src/hand/handtrack.ts
+++ b/src/hand/handtrack.ts
@@ -22,10 +22,11 @@ const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'Statef
const inputSize = [[0, 0], [0, 0]];
const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
+const faceIndex = 4;
const boxExpandFact = 1.6;
const maxDetectorResolution = 512;
-const detectorExpandFact = 1.2;
+const detectorExpandFact = 1.4;
let skipped = 0;
let outputSize: [number, number] = [0, 0];
@@ -104,10 +105,11 @@ async function detectHands(input: Tensor, config: Config): Promise = tf.unstack(t.scores, 1); // unstack scores based on classes
+ tf.dispose(classScores[faceIndex]);
+ classScores.splice(faceIndex, 1); // remove faces
t.filtered = tf.stack(classScores, 1); // restack
- tf.dispose(...classScores);
+ tf.dispose(classScores);
t.max = tf.max(t.filtered, 1); // max overall score
t.argmax = tf.argMax(t.filtered, 1); // class index of max overall score
let id = 0;
@@ -117,12 +119,13 @@ async function detectHands(input: Tensor, config: Config): Promise [kpt[0] / inputSize[1][1], kpt[1] / inputSize[1][0], (kpt[2] || 0)]);
const coordsNorm: Point[] = coordsRaw.map((kpt) => [kpt[0] * h.boxRaw[2], kpt[1] * h.boxRaw[3], (kpt[2] || 0)]);
- console.log(outputSize, h.box);
hand.keypoints = (coordsNorm).map((kpt) => [
- outputSize[0] * kpt[0] + h.box[0],
- outputSize[1] * kpt[1] + h.box[1],
+ outputSize[0] * (kpt[0] + h.boxRaw[0]),
+ outputSize[1] * (kpt[1] + h.boxRaw[1]),
(kpt[2] || 0),
]);
// hand.box = box.scale(h.box, 1 / detectorExpandFact); // scale box down for visual appeal
@@ -179,13 +181,11 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
return hand;
}
-let n = 0;
export async function predict(input: Tensor, config: Config): Promise {
- n++;
/** handtrack caching
* 1. if skipFrame returned cached
- * 2. if any cached results but although not sure if its enough we continute anyhow for 10x skipframes
- * 3. eventually rerun detector to generated new cached boxes and reset skipped
+ * 2. if any cached results but although not sure if its enough we continute anyhow for 5x skipframes
+ * 3. if not skipframe or eventually rerun detector to generated new cached boxes and reset skipped
* 4. generate cached boxes based on detected keypoints
*/
if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model
@@ -193,34 +193,14 @@ export async function predict(input: Tensor, config: Config): Promise {
- console.log(n, 'DETECT', { skipped, hands: cache.hands.length, boxes: cache.boxes.length });
- // this is logically consistent but insufficiently efficient
- /*
- skipped = 0;
- if (cache.boxes.length >= (config.hand.maxDetected || 0)) {
- cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache
- } else {
- cache.hands = []; // reset hands
- }
-
- if (cache.hands.length !== config.hand.maxDetected) { // did not find enough hands based on cached boxes so run detection on full frame
- cache.boxes = await detectHands(input, config);
+ if (config.skipFrame && skipped < 5 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for bit longer
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
- }
- */
-
- if (config.skipFrame && skipped <= 10 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for 10x skipframes
- cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
- console.log(n, 'HANDS', { hands: cache.hands.length });
} else {
cache.boxes = await detectHands(input, config);
- console.log(n, 'BOXES', { hands: cache.boxes.length });
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
- console.log(n, 'HANDS', { hands: cache.hands.length });
skipped = 0;
}
@@ -236,7 +216,6 @@ export async function predict(input: Tensor, config: Config): Promise