From f5205bafce0067661243a9de71f5c1bb1a8634ea Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 30 May 2021 12:03:34 -0400 Subject: [PATCH] release candidate --- CHANGELOG.md | 5 ++- README.md | 12 ++++-- TODO.md | 14 +++++-- demo/index-worker.js | 1 + demo/index.js | 89 ++++++++++++++++++----------------------- demo/node.js | 21 ++++++++-- package.json | 6 +-- src/draw/draw.ts | 52 +++++++++++++++--------- src/object/centernet.ts | 16 ++++---- wiki | 2 +- 10 files changed, 123 insertions(+), 95 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 504ef673..c725042a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # @vladmandic/human -Version: **1.9.4** +Version: **2.0.0** Description: **Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition** Author: **Vladimir Mandic ** @@ -9,8 +9,9 @@ Repository: **** ## Changelog -### **HEAD -> main** 2021/05/29 mandic00@live.com +### **HEAD -> main** 2021/05/30 mandic00@live.com +- quantize handdetect model - added experimental movenet-lightning and removed blazepose from default dist - added experimental face.rotation.gaze - fix and optimize for mobile platform diff --git a/README.md b/README.md index a76dbaa3..a369c4ac 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Check out [**Live Demo**](https://vladmandic.github.io/human/demo/index.html) fo - [**Platform Support**](https://github.com/vladmandic/human/wiki/Platforms) - [**List of Models & Credits**](https://github.com/vladmandic/human/wiki/Models) - [**Security & Privacy Policy**](https://github.com/vladmandic/human/blob/main/SECURITY.md) +- [**License & Usage Restrictions**](https://github.com/vladmandic/human/blob/main/LICENSE)
@@ -77,8 +78,8 @@ Check out [**Live Demo**](https://vladmandic.github.io/human/demo/index.html) fo ## Options -As presented in the demo application... -> [demo/index.html](demo/index.html) +All options as presented in the demo application... +> [demo/index.html](demo/index.html) ![Options visible in demo](assets/screenshot-menu.png) @@ -88,7 +89,7 @@ As presented in the demo application...
-**Training image:** +**Validation image:** > [demo/index.html](demo/index.html?image=%22../assets/human-sample-upper.jpg%22) ![Example Training Image](assets/screenshot-sample.png) @@ -104,7 +105,10 @@ As presented in the demo application... ![Example Using WebCam](assets/screenshot-webcam.jpg) **Face Similarity Matching:** -> [demo/facematch.html](demo/facematch.html) +Extracts all faces from provided input images, +sorts them by similarity to selected face +and optionally matches detected face with database of known people to guess their names +> [demo/facematch.html](demo/facematch.html) ![Face Matching](assets/screenshot-facematch.jpg) diff --git a/TODO.md b/TODO.md index 5cd873f8..dc44c252 100644 --- a/TODO.md +++ b/TODO.md @@ -7,7 +7,7 @@ N/A ## Exploring Features - Implement demo as installable PWA with model caching -- Implement results interpolation on library level +- Implement results interpolation on library level instead inside draw functions - Switch to TypeScript 4.3 ## Explore Models @@ -16,6 +16,14 @@ N/A ## In Progress -- Face interpolation -- Gaze interpolation +- Face rotation interpolation +- Object detection interpolation - Unify score/confidence variables + +## Issues + +- CenterNet WebGL: +- CenterNet WASM: +- NanoDet WASM: +- BlazeFace and HandPose rotation in NodeJS: +- TypeDoc with TS 4.3: diff --git a/demo/index-worker.js b/demo/index-worker.js index b1d5a434..88e17230 100644 --- a/demo/index-worker.js +++ b/demo/index-worker.js @@ -13,6 +13,7 @@ function log(...msg) { onmessage = async (msg) => { if (busy) return; busy = true; + // received from index.js using: // worker.postMessage({ image: image.data.buffer, width: canvas.width, height: canvas.height, config }, [image.data.buffer]); const image = new ImageData(new Uint8ClampedArray(msg.data.image), msg.data.width, msg.data.height); let result = {}; diff --git a/demo/index.js b/demo/index.js index 23ede44b..b62e7521 100644 --- a/demo/index.js +++ b/demo/index.js @@ -1,13 +1,26 @@ -// @ts-nocheck // typescript checks disabled as this is pure javascript - /** * Human demo for browsers * - * Main demo app that exposes all Human functionality + * @description Main demo app that exposes all Human functionality + * + * @params Optional URL parameters: + * image=: perform detection on specific image and finish + * worker=: use WebWorkers + * backend=: use specific TF backend for operations + * preload=: pre-load all configured models + * warmup=: warmup all configured models + * + * @example + * + * @configuration + * userConfig={}: contains all model configuration used by human + * drawOptions={}: contains all draw variables used by human.draw + * ui={}: contains all variables exposed in the UI */ +// @ts-nocheck // typescript checks disabled as this is pure javascript + import Human from '../dist/human.esm.js'; // equivalent of @vladmandic/human -// import Human from '../dist/human.esm-nobundle.js'; // this requires that tf is loaded manually and bundled before human can be used import Menu from './helpers/menu.js'; import GLBench from './helpers/gl-bench.js'; import webRTC from './helpers/webrtc.js'; @@ -15,8 +28,7 @@ import webRTC from './helpers/webrtc.js'; let human; const userConfig = { - warmup: 'full', - /* + warmup: 'none', backend: 'webgl', async: false, cacheSensitivity: 0, @@ -34,10 +46,9 @@ const userConfig = { hand: { enabled: false }, // body: { enabled: true, modelPath: 'posenet.json' }, // body: { enabled: true, modelPath: 'blazepose.json' }, - body: { enabled: true, modelPath: 'movenet-lightning.json' }, - object: { enabled: false }, + body: { enabled: false, modelPath: 'movenet-lightning.json' }, + object: { enabled: true }, gesture: { enabled: true }, - */ }; const drawOptions = { @@ -53,7 +64,7 @@ const ui = { facing: true, // camera facing front or back baseBackground: 'rgba(50, 50, 50, 1)', // 'grey' columns: 2, // when processing sample images create this many columns - useWorker: false, // use web workers for processing + useWorker: true, // use web workers for processing worker: 'index-worker.js', maxFPSframes: 10, // keep fps history for how many frames modelsPreload: true, // preload human models on startup @@ -84,6 +95,7 @@ const ui = { // sample images compare: '../assets/sample-me.jpg', // base image for face compare + /* samples: [ '../assets/sample6.jpg', '../assets/sample1.jpg', @@ -92,45 +104,10 @@ const ui = { '../assets/sample3.jpg', '../assets/sample2.jpg', ], - /* - ui.samples = [ - '../private/daz3d/daz3d-brianna.jpg', - '../private/daz3d/daz3d-chiyo.jpg', - '../private/daz3d/daz3d-cody.jpg', - '../private/daz3d/daz3d-drew-01.jpg', - '../private/daz3d/daz3d-drew-02.jpg', - '../private/daz3d/daz3d-ella-01.jpg', - '../private/daz3d/daz3d-ella-02.jpg', - '../private/daz3d/daz3d-_emotions01.jpg', - '../private/daz3d/daz3d-_emotions02.jpg', - '../private/daz3d/daz3d-_emotions03.jpg', - '../private/daz3d/daz3d-_emotions04.jpg', - '../private/daz3d/daz3d-_emotions05.jpg', - '../private/daz3d/daz3d-gillian.jpg', - '../private/daz3d/daz3d-ginnifer.jpg', - '../private/daz3d/daz3d-hye-01.jpg', - '../private/daz3d/daz3d-hye-02.jpg', - '../private/daz3d/daz3d-kaia.jpg', - '../private/daz3d/daz3d-karen.jpg', - '../private/daz3d/daz3d-kiaria-01.jpg', - '../private/daz3d/daz3d-kiaria-02.jpg', - '../private/daz3d/daz3d-lilah-01.jpg', - '../private/daz3d/daz3d-lilah-02.jpg', - '../private/daz3d/daz3d-lilah-03.jpg', - '../private/daz3d/daz3d-lila.jpg', - '../private/daz3d/daz3d-lindsey.jpg', - '../private/daz3d/daz3d-megah.jpg', - '../private/daz3d/daz3d-selina-01.jpg', - '../private/daz3d/daz3d-selina-02.jpg', - '../private/daz3d/daz3d-snow.jpg', - '../private/daz3d/daz3d-sunshine.jpg', - '../private/daz3d/daz3d-taia.jpg', - '../private/daz3d/daz3d-tuesday-01.jpg', - '../private/daz3d/daz3d-tuesday-02.jpg', - '../private/daz3d/daz3d-tuesday-03.jpg', - '../private/daz3d/daz3d-zoe.jpg', - ]; */ + samples: [ + '../private/daz3d/daz3d-kiaria-02.jpg', + ], }; // global variables @@ -267,9 +244,9 @@ async function drawResults(input) { // if buffered, immediate loop but limit frame rate although it's going to run slower as JS is singlethreaded if (ui.buffered) { ui.drawThread = requestAnimationFrame(() => drawResults(input, canvas)); - } else if (!ui.buffered && ui.drawThread) { + } else { log('stopping buffered refresh'); - cancelAnimationFrame(ui.drawThread); + if (ui.drawThread) cancelAnimationFrame(ui.drawThread); ui.drawThread = null; } } @@ -435,7 +412,7 @@ function runHumanDetect(input, canvas, timestamp) { offscreen.width = canvas.width; offscreen.height = canvas.height; const ctx = offscreen.getContext('2d'); - ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height); + ctx.drawImage(input, 0, 0, canvas.width, canvas.height); const data = ctx.getImageData(0, 0, canvas.width, canvas.height); // perform detection in worker webWorker(input, data, canvas, userConfig, timestamp); @@ -522,6 +499,7 @@ async function detectVideo() { // just initialize everything and call main function async function detectSampleImages() { + document.getElementById('play').style.display = 'none'; document.getElementById('canvas').style.display = 'none'; document.getElementById('samples-container').style.display = 'block'; log('running detection of sample images'); @@ -530,6 +508,9 @@ async function detectSampleImages() { for (const m of Object.values(menu)) m.hide(); for (const image of ui.samples) await processImage(image); status(); + document.getElementById('play').style.display = 'none'; + document.getElementById('loader').style.display = 'none'; + if (ui.detectThread) cancelAnimationFrame(ui.detectThread); } function setupMenu() { @@ -692,6 +673,12 @@ async function main() { document.documentElement.style.setProperty('--icon-size', ui.iconSize); + // sanity check for webworker compatibility + if (typeof Worker === 'undefined' || typeof OffscreenCanvas === 'undefined') { + ui.useWorker = false; + log('workers are disabled due to missing browser functionality'); + } + // parse url search params const params = new URLSearchParams(location.search); log('url options:', params.toString()); diff --git a/demo/node.js b/demo/node.js index eaf195c3..61aaf9e3 100644 --- a/demo/node.js +++ b/demo/node.js @@ -4,6 +4,7 @@ const log = require('@vladmandic/pilogger'); const fs = require('fs'); +const path = require('path'); const process = require('process'); const fetch = require('node-fetch').default; @@ -142,6 +143,7 @@ async function detect(input) { log.data(' Object: N/A'); } + fs.writeFileSync('result.json', JSON.stringify(result, null, 2)); // print data to console if (result) { log.data('Persons:'); @@ -182,13 +184,26 @@ async function main() { log.header(); log.info('Current folder:', process.env.PWD); await init(); + const f = process.argv[2]; if (process.argv.length !== 3) { - log.warn('Parameters: missing'); + log.warn('Parameters: missing'); await test(); - } else if (!fs.existsSync(process.argv[2]) && !process.argv[2].startsWith('http')) { + } else if (!fs.existsSync(f) && !f.startsWith('http')) { log.error(`File not found: ${process.argv[2]}`); } else { - await detect(process.argv[2]); + if (fs.existsSync(f)) { + const stat = fs.statSync(f); + if (stat.isDirectory()) { + const dir = fs.readdirSync(f); + for (const file of dir) { + await detect(path.join(f, file)); + } + } else { + await detect(f); + } + } else { + await detect(f); + } } } diff --git a/package.json b/package.json index 9eac6b5b..aae38dbe 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@vladmandic/human", - "version": "1.9.4", + "version": "2.0.0", "description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition", "sideEffects": false, "main": "dist/human.node.js", @@ -68,10 +68,10 @@ "canvas": "^2.8.0", "chokidar": "^3.5.1", "dayjs": "^1.10.5", - "esbuild": "^0.12.4", + "esbuild": "^0.12.5", "eslint": "^7.27.0", "eslint-config-airbnb-base": "^14.2.1", - "eslint-plugin-import": "^2.23.3", + "eslint-plugin-import": "^2.23.4", "eslint-plugin-json": "^3.0.0", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^5.1.0", diff --git a/src/draw/draw.ts b/src/draw/draw.ts index 02e31da6..70d61da3 100644 --- a/src/draw/draw.ts +++ b/src/draw/draw.ts @@ -503,17 +503,18 @@ export async function person(inCanvas: HTMLCanvasElement, result: Array, } function calcBuffered(newResult, localOptions) { - // if (newResult.timestamp !== bufferedResult?.timestamp) bufferedResult = JSON.parse(JSON.stringify(newResult)); // no need to force update - // each record is only updated using deep copy when number of detected record changes, otherwise it will converge by itself + // each record is only updated using deep clone when number of detected record changes, otherwise it will converge by itself + // otherwise bufferedResult is a shallow clone of result plus updated local calculated values + // thus mixing by-reference and by-value assignments to minimize memory operations // interpolate body results - if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) bufferedResult.body = JSON.parse(JSON.stringify(newResult.body)); - for (let i = 0; i < newResult.body.length; i++) { // update body: box, boxRaw, keypoints - bufferedResult.body[i].box = newResult.body[i].box - .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].box[j] + box) / localOptions.bufferedFactor) as [number, number, number, number]; - bufferedResult.body[i].boxRaw = newResult.body[i].boxRaw - .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].boxRaw[j] + box) / localOptions.bufferedFactor) as [number, number, number, number]; - bufferedResult.body[i].keypoints = newResult.body[i].keypoints + if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) bufferedResult.body = JSON.parse(JSON.stringify(newResult.body)); // deep clone once + for (let i = 0; i < newResult.body.length; i++) { + const box = newResult.body[i].box // update box + .map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].box[j] + b) / localOptions.bufferedFactor) as [number, number, number, number]; + const boxRaw = newResult.body[i].boxRaw // update boxRaw + .map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].boxRaw[j] + b) / localOptions.bufferedFactor) as [number, number, number, number]; + const keypoints = newResult.body[i].keypoints // update keypoints .map((keypoint, j) => ({ score: keypoint.score, part: keypoint.part, @@ -522,24 +523,37 @@ function calcBuffered(newResult, localOptions) { y: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.y + keypoint.position.y) / localOptions.bufferedFactor : keypoint.position.y, }, })); + bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints }; // shallow clone plus updated values } // interpolate hand results - if (!bufferedResult.hand || (newResult.hand.length !== bufferedResult.hand.length)) bufferedResult.hand = JSON.parse(JSON.stringify(newResult.hand)); - for (let i = 0; i < newResult.hand.length; i++) { // update body: box, boxRaw, landmarks, annotations - bufferedResult.hand[i].box = newResult.hand[i].box - .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].box[j] + box) / localOptions.bufferedFactor); - bufferedResult.hand[i].boxRaw = newResult.hand[i].boxRaw - .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + box) / localOptions.bufferedFactor); - bufferedResult.hand[i].landmarks = newResult.hand[i].landmarks + if (!bufferedResult.hand || (newResult.hand.length !== bufferedResult.hand.length)) bufferedResult.hand = JSON.parse(JSON.stringify(newResult.hand)); // deep clone once + for (let i = 0; i < newResult.hand.length; i++) { + const box = newResult.hand[i].box // update box + .map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].box[j] + b) / localOptions.bufferedFactor); + const boxRaw = newResult.hand[i].boxRaw // update boxRaw + .map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + b) / localOptions.bufferedFactor); + const landmarks = newResult.hand[i].landmarks // update landmarks .map((landmark, j) => landmark .map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].landmarks[j][k] + coord) / localOptions.bufferedFactor)); - const keys = Object.keys(newResult.hand[i].annotations); + const keys = Object.keys(newResult.hand[i].annotations); // update annotations + const annotations = []; for (const key of keys) { - bufferedResult.hand[i].annotations[key] = newResult.hand[i].annotations[key] + annotations[key] = newResult.hand[i].annotations[key] .map((val, j) => val .map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / localOptions.bufferedFactor)); } + bufferedResult.hand[i] = { ...newResult.hand[i], box, boxRaw, landmarks, annotations }; // shallow clone plus updated values + } + + // interpolate face results + if (!bufferedResult.face || (newResult.face.length !== bufferedResult.face.length)) bufferedResult.face = JSON.parse(JSON.stringify(newResult.face)); // deep clone once + for (let i = 0; i < newResult.face.length; i++) { + const box = newResult.face[i].box // update box + .map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].box[j] + b) / localOptions.bufferedFactor); + const boxRaw = newResult.face[i].boxRaw // update boxRaw + .map((b, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.face[i].boxRaw[j] + b) / localOptions.bufferedFactor); + bufferedResult.face[i] = { ...newResult.face[i], box, boxRaw }; // shallow clone plus updated values } // interpolate person results @@ -569,7 +583,7 @@ export async function all(inCanvas: HTMLCanvasElement, result: Result, drawOptio if (!(inCanvas instanceof HTMLCanvasElement)) return; if (localOptions.bufferedOutput) calcBuffered(result, localOptions); // do results interpolation else bufferedResult = result; // just use results as-is - face(inCanvas, result.face, localOptions); // face does have buffering + face(inCanvas, bufferedResult.face, localOptions); // face does have buffering body(inCanvas, bufferedResult.body, localOptions); // use interpolated results if available hand(inCanvas, bufferedResult.hand, localOptions); // use interpolated results if available // person(inCanvas, bufferedResult.persons, localOptions); // use interpolated results if available diff --git a/src/object/centernet.ts b/src/object/centernet.ts index cd5b03a0..bcd6bd89 100644 --- a/src/object/centernet.ts +++ b/src/object/centernet.ts @@ -24,18 +24,18 @@ export async function load(config) { } async function process(res, inputSize, outputShape, config) { + if (!res) return []; const results: Array = []; const detections = res.arraySync(); const squeezeT = tf.squeeze(res); res.dispose(); const arr = tf.split(squeezeT, 6, 1); // x1, y1, x2, y2, score, class squeezeT.dispose(); - const stackT = tf.stack([arr[1], arr[0], arr[3], arr[2]], 1); // tf.nms expects y, x + const stackT = tf.stack([arr[1], arr[0], arr[3], arr[2]], 1); // reorder dims as tf.nms expects y, x const boxesT = stackT.squeeze(); const scoresT = arr[4].squeeze(); const classesT = arr[5].squeeze(); arr.forEach((t) => t.dispose()); - // @ts-ignore boxesT type is not correctly inferred const nmsT = await tf.image.nonMaxSuppressionAsync(boxesT, scoresT, config.object.maxDetected, config.object.iouThreshold, config.object.minConfidence); boxesT.dispose(); scoresT.dispose(); @@ -44,7 +44,7 @@ async function process(res, inputSize, outputShape, config) { nmsT.dispose(); let i = 0; for (const id of nms) { - const score = detections[0][id][4]; + const score = Math.trunc(100 * detections[0][id][4]) / 100; const classVal = detections[0][id][5]; const label = labels[classVal].label; const boxRaw = [ @@ -64,18 +64,16 @@ async function process(res, inputSize, outputShape, config) { return results; } -export async function predict(image, config): Promise { +export async function predict(input, config): Promise { if ((skipped < config.object.skipFrames) && config.skipFrame && (last.length > 0)) { skipped++; return last; } skipped = 0; return new Promise(async (resolve) => { - const outputSize = [image.shape[2], image.shape[1]]; - const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false); - - let objectT; - if (config.object.enabled) objectT = model.execute(resize, 'tower_0/detections'); + const outputSize = [input.shape[2], input.shape[1]]; + const resize = tf.image.resizeBilinear(input, [model.inputSize, model.inputSize]); + const objectT = config.object.enabled ? model.execute(resize, ['tower_0/detections']) : null; resize.dispose(); const obj = await process(objectT, model.inputSize, outputSize, config); diff --git a/wiki b/wiki index 317a8fc7..78e6de45 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 317a8fc76cd933cc38f59948ffade324fc8f1df2 +Subproject commit 78e6de4516ab49f47a906ec7778073b2dbbfed3f