diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d0ef3ef..773f6ea4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,20 @@ ## Changelog +### **HEAD -> main** 2021/10/10 mandic00@live.com + + +### **origin/main** 2021/10/08 mandic00@live.com + +- demo default config cleanup +- improve gaze and face angle visualizations in draw + +### **release 2.3.1** 2021/10/06 mandic00@live.com + + ### **2.3.1** 2021/10/06 mandic00@live.com - -### **origin/main** 2021/10/06 mandic00@live.com - +- workaround for chrome offscreencanvas bug - fix backend conflict in webworker - add blazepose v2 and add annotations to body results - fix backend order initialization diff --git a/demo/index.js b/demo/index.js index 3fa1c4d7..3030b63b 100644 --- a/demo/index.js +++ b/demo/index.js @@ -31,15 +31,6 @@ import jsonView from './helpers/jsonview.js'; let human; let userConfig = { - cacheSensitivity: 0, - hand: { enabled: true }, - body: { enabled: false }, - face: { enabled: false }, - /* - hand: { enabled: false, maxDetected: 1, skipFrames: 0 }, - body: { enabled: false }, - face: { enabled: false }, - */ /* warmup: 'none', backend: 'humangl', @@ -118,6 +109,7 @@ const ui = { lastFrame: 0, // time of last frame processing viewportSet: false, // internal, has custom viewport been set background: null, // holds instance of segmentation background image + transferCanvas: null, // canvas used to transfer data to and from worker // webrtc useWebRTC: false, // use webrtc as camera source instead of local webcam @@ -318,7 +310,7 @@ async function drawResults(input) { const fps = avgDetect > 0 ? `FPS process:${avgDetect} refresh:${avgDraw}` : ''; const backend = result.backend || human.tf.getBackend(); const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : ''; - const memory = result.tensors || `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`; + const memory = result.tensors ? `tensors: ${result.tensors.toLocaleString()} in worker` : `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`; document.getElementById('log').innerHTML = ` video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}
backend: ${backend} | ${memory}
@@ -469,13 +461,17 @@ function webWorker(input, image, canvas, timestamp) { if (document.getElementById('gl-bench')) document.getElementById('gl-bench').style.display = ui.bench ? 'block' : 'none'; lastDetectedResult = msg.data.result; - if (msg.data.image) { - lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas'); - lastDetectedResult.canvas.width = msg.data.width; - lastDetectedResult.canvas.height = msg.data.height; + if (msg.data.image) { // we dont really need canvas since we draw from video + /* + if (!lastDetectedResult.canvas || lastDetectedResult.canvas.width !== msg.data.width || lastDetectedResult.canvas.height !== msg.data.height) { + lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas'); + lastDetectedResult.canvas.width = msg.data.width; + lastDetectedResult.canvas.height = msg.data.height; + } const ctx = lastDetectedResult.canvas.getContext('2d'); const imageData = new ImageData(new Uint8ClampedArray(msg.data.image), msg.data.width, msg.data.height); ctx.putImageData(imageData, 0, 0); + */ } ui.framesDetect++; @@ -508,10 +504,12 @@ function runHumanDetect(input, canvas, timestamp) { if (ui.hintsThread) clearInterval(ui.hintsThread); if (ui.useWorker && human.env.offscreen) { // get image data from video as we cannot send html objects to webworker - const offscreen = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(canvas.width, canvas.height) : document.createElement('canvas'); - offscreen.width = canvas.width; - offscreen.height = canvas.height; - const ctx = offscreen.getContext('2d'); + if (!ui.transferCanvas || ui.transferCanvas.width !== canvas.width || ui.transferCanvas.height || canvas.height) { + ui.transferCanvas = document.createElement('canvas'); + ui.transferCanvas.width = canvas.width; + ui.transferCanvas.height = canvas.height; + } + const ctx = ui.transferCanvas.getContext('2d'); ctx.drawImage(input, 0, 0, canvas.width, canvas.height); const data = ctx.getImageData(0, 0, canvas.width, canvas.height); // perform detection in worker diff --git a/package.json b/package.json index 4ce730c8..c895a833 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@vladmandic/human", - "version": "2.3.1", + "version": "2.3.2", "description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition", "sideEffects": false, "main": "dist/human.node.js", @@ -74,7 +74,6 @@ "canvas": "^2.8.0", "dayjs": "^1.10.7", "esbuild": "^0.13.4", - "eslint": "^7.32.0", "eslint-config-airbnb-base": "^14.2.1", "eslint-plugin-import": "^2.24.2", "eslint-plugin-json": "^3.1.0", @@ -86,5 +85,8 @@ "tslib": "^2.3.1", "typedoc": "0.22.5", "typescript": "4.4.3" + }, + "dependencies": { + "eslint": "7.32.0" } } diff --git a/src/config.ts b/src/config.ts index f1dd6d75..66a61e46 100644 --- a/src/config.ts +++ b/src/config.ts @@ -420,12 +420,12 @@ const config: Config = { rotation: true, // use best-guess rotated hand image or just box with rotation as-is // false means higher performance, but incorrect finger mapping if hand is inverted // only valid for `handdetect` variation - skipFrames: 1, // how many max frames to go without re-running the hand bounding box detector + skipFrames: 2, // how many max frames to go without re-running the hand bounding box detector // only used when cacheSensitivity is not zero // e.g., if model is running st 25 FPS, we can re-use existing bounding // box for updated hand skeleton analysis as the hand // hasn't moved much in short time (10 * 1/25 = 0.25 sec) - minConfidence: 0.55, // threshold for discarding a prediction + minConfidence: 0.50, // threshold for discarding a prediction iouThreshold: 0.2, // ammount of overlap between two detected objects before one object is removed maxDetected: -1, // maximum number of hands detected in the input // should be set to the minimum number for performance diff --git a/src/face/facemesh.ts b/src/face/facemesh.ts index f9810c1d..9cbe4a47 100644 --- a/src/face/facemesh.ts +++ b/src/face/facemesh.ts @@ -83,7 +83,7 @@ export async function predict(input: Tensor, config: Config): Promise [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]); - for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations + for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key] as number]]; // add annotations } else if (!model) { // mesh enabled, but not loaded if (config.debug) log('face mesh detection requested, but model is not loaded'); } else { // mesh enabled diff --git a/src/face/facemeshcoords.ts b/src/face/facemeshcoords.ts index c06dd6f1..8ed03b92 100644 --- a/src/face/facemeshcoords.ts +++ b/src/face/facemeshcoords.ts @@ -3,7 +3,7 @@ * See `facemesh.ts` for entry point */ -export const meshAnnotations = { +export const meshAnnotations: Record = { silhouette: [ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, @@ -42,13 +42,13 @@ export const meshAnnotations = { leftCheek: [425], }; -export const meshLandmarks = { +export const meshLandmarks: Record = { count: 468, mouth: 13, symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]], }; -export const blazeFaceLandmarks = { +export const blazeFaceLandmarks: Record = { leftEye: 0, rightEye: 1, nose: 2, @@ -58,7 +58,7 @@ export const blazeFaceLandmarks = { symmetryLine: [3, 2], }; -export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints. +export const MESH_TO_IRIS_INDICES_MAP: Array<{ key: string, indices: number[] }> = [ // A mapping from facemesh model keypoints to iris model keypoints. { key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] }, { key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] }, { key: 'EyeUpper2', indices: [41, 42, 43, 44, 45, 46, 47] }, @@ -70,7 +70,7 @@ export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypo // { key: 'EyebrowLower', indices: [48, 49, 50, 51, 52, 53] }, ]; -export const UV468 = [ +export const UV468: [number, number][] = [ [0.499976992607117, 0.652534008026123], [0.500025987625122, 0.547487020492554], [0.499974012374878, 0.602371990680695], @@ -541,7 +541,7 @@ export const UV468 = [ [0.723330020904541, 0.363372981548309], ]; -export const TRI468 = [ +export const TRI468: Array = [ 127, 34, 139, 11, 0, 37, 232, 231, 120, 72, 37, 39, 128, 121, 47, 232, 121, 128, 104, 69, 67, 175, 171, 148, 157, 154, 155, 118, 50, 101, 73, 39, 40, 9, 151, 108, 48, 115, 131, 194, 204, 211, 74, 40, 185, 80, 42, 183, 40, 92, 186, 230, 229, 118, 202, 212, 214, 83, 18, 17, 76, 61, 146, 160, 29, 30, 56, 157, 173, 106, 204, 194, 135, 214, 192, 203, 165, 98, 21, 71, 68, 51, 45, 4, 144, 24, 23, 77, 146, 91, 205, 50, 187, 201, 200, 18, 91, 106, 182, 90, 91, @@ -627,7 +627,7 @@ export const TRI468 = [ 259, 443, 259, 260, 444, 260, 467, 445, 309, 459, 250, 305, 289, 290, 305, 290, 460, 401, 376, 435, 309, 250, 392, 376, 411, 433, 453, 341, 464, 357, 453, 465, 343, 357, 412, 437, 343, 399, 344, 360, 440, 420, 437, 456, 360, 420, 363, 361, 401, 288, 265, 372, 353, 390, 339, 249, 339, 448, 255]; -export const TRI68 = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31, 41, 3, 4, 48, 3, 48, 31, 4, 5, 48, 5, 6, 48, 6, 7, 59, 6, 59, 48, 7, 8, 58, 7, 58, 59, +export const TRI68: Array = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31, 41, 3, 4, 48, 3, 48, 31, 4, 5, 48, 5, 6, 48, 6, 7, 59, 6, 59, 48, 7, 8, 58, 7, 58, 59, 8, 9, 56, 8, 56, 57, 8, 57, 58, 9, 10, 55, 9, 55, 56, 10, 11, 54, 10, 54, 55, 11, 12, 54, 12, 13, 54, 13, 14, 35, 13, 35, 54, 14, 15, 46, 14, 46, 35, 15, 16, 45, 15, 45, 46, 16, 26, 45, 17, 36, 18, 18, 37, 19, 18, 36, 37, 19, 38, 20, 19, 37, 38, 20, 39, 21, 20, 38, 39, 21, 39, 27, 22, 42, 23, 22, 27, 42, 23, 43, 24, 23, 42, 43, 24, 44, 25, 24, 43, 44, 25, 45, 26, 25, 44, 45, 27, 39, 28, 27, 28, 42, 28, 39, 29, 28, 29, 42, 29, 31, 30, 29, 30, 35, 29, 40, 31, 29, 35, 47, 29, @@ -636,7 +636,7 @@ export const TRI68 = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31, 48, 59, 60, 49, 61, 50, 49, 60, 61, 50, 62, 51, 50, 61, 62, 51, 62, 52, 52, 63, 53, 52, 62, 63, 53, 64, 54, 53, 63, 64, 54, 64, 55, 55, 65, 56, 55, 64, 65, 56, 66, 57, 56, 65, 66, 57, 66, 58, 58, 67, 59, 58, 66, 67, 59, 67, 60, 60, 67, 61, 61, 66, 62, 61, 67, 66, 62, 66, 63, 63, 65, 64, 63, 66, 65, 21, 27, 22]; -export const TRI33 = [ +export const TRI33: Array = [ /* eyes */ 0, 8, 7, 7, 8, 1, 2, 10, 9, 9, 10, 3, /* brows */ 17, 0, 18, 18, 0, 7, 18, 7, 19, 19, 7, 1, 19, 1, 11, 19, 11, 20, 21, 3, 22, 21, 9, 3, 20, 9, 21, 20, 2, 9, 20, 11, 2, /* 4head */ 23, 17, 18, 25, 21, 22, 24, 19, 20, 24, 18, 19, 24, 20, 21, 24, 23, 18, 24, 21, 25, @@ -647,9 +647,9 @@ export const TRI33 = [ /* cont */ 26, 30, 5, 27, 6, 31, 0, 28, 26, 3, 27, 29, 17, 28, 0, 3, 29, 22, 23, 28, 17, 22, 29, 25, 28, 30, 26, 27, 31, 29, ]; -export const TRI7 = [0, 4, 1, 2, 4, 3, 4, 5, 6]; +export const TRI7: Array = [0, 4, 1, 2, 4, 3, 4, 5, 6]; -export const VTX68 = [ +export const VTX68: Array = [ /* cont */ 127, 234, 132, 58, 172, 150, 149, 148, 152, 377, 378, 379, 397, 288, 361, 454, 356, /* brows */ 70, 63, 105, 66, 107, 336, 296, 334, 293, 300, /* nose */ 168, 6, 195, 4, 98, 97, 2, 326, 327, @@ -658,9 +658,9 @@ export const VTX68 = [ /* mouth */ 78, 81, 13, 311, 308, 402, 14, 178, ]; -export const VTX33 = [33, 133, 362, 263, 1, 62, 308, 159, 145, 386, 374, 6, 102, 331, 2, 13, 14, 70, 105, 107, 336, 334, 300, 54, 10, 284, 50, 280, 234, 454, 58, 288, 152]; +export const VTX33: Array = [33, 133, 362, 263, 1, 62, 308, 159, 145, 386, 374, 6, 102, 331, 2, 13, 14, 70, 105, 107, 336, 334, 300, 54, 10, 284, 50, 280, 234, 454, 58, 288, 152]; -export const VTX7 = [33, 133, 362, 263, 1, 78, 308]; +export const VTX7: Array = [33, 133, 362, 263, 1, 78, 308]; export const UV68 = VTX68.map((x) => UV468[x]); diff --git a/src/face/facemeshutil.ts b/src/face/facemeshutil.ts index aa5cdc68..626930a1 100644 --- a/src/face/facemeshutil.ts +++ b/src/face/facemeshutil.ts @@ -152,8 +152,8 @@ export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputS } export function correctFaceRotation(box, input, inputSize) { - const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine; - const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]); + const symmetryLine = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine; + const angle: number = computeRotation(box.landmarks[symmetryLine[0]], box.landmarks[symmetryLine[1]]); const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }); const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]]; const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts index c1e1b717..efd0be35 100644 --- a/src/hand/handtrack.ts +++ b/src/hand/handtrack.ts @@ -22,10 +22,11 @@ const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'Statef const inputSize = [[0, 0], [0, 0]]; const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip']; +const faceIndex = 4; const boxExpandFact = 1.6; const maxDetectorResolution = 512; -const detectorExpandFact = 1.2; +const detectorExpandFact = 1.4; let skipped = 0; let outputSize: [number, number] = [0, 0]; @@ -104,10 +105,11 @@ async function detectHands(input: Tensor, config: Config): Promise = tf.unstack(t.scores, 1); // unstack scores based on classes + tf.dispose(classScores[faceIndex]); + classScores.splice(faceIndex, 1); // remove faces t.filtered = tf.stack(classScores, 1); // restack - tf.dispose(...classScores); + tf.dispose(classScores); t.max = tf.max(t.filtered, 1); // max overall score t.argmax = tf.argMax(t.filtered, 1); // class index of max overall score let id = 0; @@ -117,12 +119,13 @@ async function detectHands(input: Tensor, config: Config): Promise [kpt[0] / inputSize[1][1], kpt[1] / inputSize[1][0], (kpt[2] || 0)]); const coordsNorm: Point[] = coordsRaw.map((kpt) => [kpt[0] * h.boxRaw[2], kpt[1] * h.boxRaw[3], (kpt[2] || 0)]); - console.log(outputSize, h.box); hand.keypoints = (coordsNorm).map((kpt) => [ - outputSize[0] * kpt[0] + h.box[0], - outputSize[1] * kpt[1] + h.box[1], + outputSize[0] * (kpt[0] + h.boxRaw[0]), + outputSize[1] * (kpt[1] + h.boxRaw[1]), (kpt[2] || 0), ]); // hand.box = box.scale(h.box, 1 / detectorExpandFact); // scale box down for visual appeal @@ -179,13 +181,11 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) return hand; } -let n = 0; export async function predict(input: Tensor, config: Config): Promise { - n++; /** handtrack caching * 1. if skipFrame returned cached - * 2. if any cached results but although not sure if its enough we continute anyhow for 10x skipframes - * 3. eventually rerun detector to generated new cached boxes and reset skipped + * 2. if any cached results but although not sure if its enough we continute anyhow for 5x skipframes + * 3. if not skipframe or eventually rerun detector to generated new cached boxes and reset skipped * 4. generate cached boxes based on detected keypoints */ if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model @@ -193,34 +193,14 @@ export async function predict(input: Tensor, config: Config): Promise { - console.log(n, 'DETECT', { skipped, hands: cache.hands.length, boxes: cache.boxes.length }); - // this is logically consistent but insufficiently efficient - /* - skipped = 0; - if (cache.boxes.length >= (config.hand.maxDetected || 0)) { - cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache - } else { - cache.hands = []; // reset hands - } - - if (cache.hands.length !== config.hand.maxDetected) { // did not find enough hands based on cached boxes so run detection on full frame - cache.boxes = await detectHands(input, config); + if (config.skipFrame && skipped < 5 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for bit longer cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); - } - */ - - if (config.skipFrame && skipped <= 10 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for 10x skipframes - cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); - console.log(n, 'HANDS', { hands: cache.hands.length }); } else { cache.boxes = await detectHands(input, config); - console.log(n, 'BOXES', { hands: cache.boxes.length }); cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); - console.log(n, 'HANDS', { hands: cache.hands.length }); skipped = 0; } @@ -236,7 +216,6 @@ export async function predict(input: Tensor, config: Config): Promise