From e2bd9dbc345298f1de2c961aef774d7a25d87953 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 11 Oct 2020 19:22:43 -0400 Subject: [PATCH] initial public commit --- .eslintrc.json | 54 +++++ .gitignore | 1 + README.md | 173 ++++++++++++++ demo/index.html | 25 +++ demo/index.js | 120 ++++++++++ package.json | 51 +++++ src/config.js | 58 +++++ src/facemesh/uvcoords.js | 470 +++++++++++++++++++++++++++++++++++++++ src/handpose/box.js | 65 ++++++ src/handpose/hand.js | 107 +++++++++ src/handpose/index.js | 93 ++++++++ src/image.js | 127 +++++++++++ src/index.js | 81 +++++++ 13 files changed, 1425 insertions(+) create mode 100644 .eslintrc.json create mode 100644 .gitignore create mode 100644 README.md create mode 100644 demo/index.html create mode 100644 demo/index.js create mode 100644 package.json create mode 100644 src/config.js create mode 100644 src/facemesh/uvcoords.js create mode 100644 src/handpose/box.js create mode 100644 src/handpose/hand.js create mode 100644 src/handpose/index.js create mode 100644 src/image.js create mode 100644 src/index.js diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 00000000..e8937215 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,54 @@ +{ + "globals": {}, + "env": { + "browser": true, + "commonjs": true, + "es6": true, + "node": true, + "jquery": true, + "es2020": true + }, + "parserOptions": { "ecmaVersion": 2020 }, + "plugins": [ ], + "extends": [ + "eslint:recommended", + "plugin:import/errors", + "plugin:import/warnings", + "plugin:node/recommended", + "plugin:promise/recommended", + "plugin:json/recommended-with-comments", + "airbnb-base" + ], + "ignorePatterns": [ "dist", "assets", "media", "models", "node_modules" ], + "rules": { + "max-len": [1, 275, 3], + "camelcase": "off", + "guard-for-in": "off", + "prefer-template":"off", + "import/extensions": "off", + "func-names": "off", + "no-await-in-loop": "off", + "no-bitwise": "off", + "no-case-declarations":"off", + "no-continue": "off", + "no-loop-func": "off", + "no-mixed-operators": "off", + "no-param-reassign":"off", + "no-plusplus": "off", + "dot-notation": "off", + "no-restricted-globals": "off", + "no-restricted-syntax": "off", + "no-underscore-dangle": "off", + "newline-per-chained-call": "off", + "node/no-unsupported-features/es-syntax": "off", + "node/shebang": "off", + "object-curly-newline": "off", + "prefer-destructuring": "off", + "promise/always-return": "off", + "promise/catch-or-return": "off", + "promise/no-nesting": "off", + "import/no-absolute-path": "off", + "no-regex-spaces": "off", + "radix": "off" + } +} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..3c3629e6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +node_modules diff --git a/README.md b/README.md new file mode 100644 index 00000000..d514a2ad --- /dev/null +++ b/README.md @@ -0,0 +1,173 @@ +# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction + +URL: + +*Suggestions are welcome!* + +## Credits + +This is an amalgamation of multiple existing models: + +- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view) +- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) +- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view) +- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view) +- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5) +- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net) + +## Install + +```shell +npm install @vladmandic/human +``` + +All pre-trained models are included in folder `/models` (25MB total) + +## Demo + +Demo is included in `/demo` + +## Requirements + +`Human` library is based on [TensorFlow/JS (TFJS)](js.tensorflow.org), but does not package it to allow for indepdenent version management - import `tfjs` before importing `Human` + +## Usage + +`Human` library does not require special initialization. +All configuration is done in a single JSON object and all model weights will be dynamically loaded upon their first usage(and only then, `Human` will not load weights that it doesn't need according to configuration). + +There is only *ONE* method you need: + +```js +import * as tf from '@tensorflow/tfjs'; +import human from '@vladmandic/human'; + +// 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D +// 'options': optional parameter used to override any options present in default configuration +const results = await human.detect(image, options?) +``` + +Additionally, `Human` library exposes two classes: + +```js +human.defaults // default configuration object +human.models // dynamically maintained object of any loaded models +``` + +## Configuration + +Below is output of `human.defaults` object +Any property can be overriden by passing user object during `human.detect()` +Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration + +```js +human.defaults = { + face: { + enabled: true, + detector: { + modelPath: '/models/human/blazeface/model.json', + maxFaces: 10, + skipFrames: 5, + minConfidence: 0.8, + iouThreshold: 0.3, + scoreThreshold: 0.75, + }, + mesh: { + enabled: true, + modelPath: '/models/human/facemesh/model.json', + }, + iris: { + enabled: true, + modelPath: '/models/human/iris/model.json', + }, + age: { + enabled: true, + modelPath: '/models/human/ssrnet-imdb-age/model.json', + skipFrames: 5, + }, + gender: { + enabled: true, + modelPath: '/models/human/ssrnet-imdb-gender/model.json', + }, + }, + body: { + enabled: true, + modelPath: '/models/human/posenet/model.json', + maxDetections: 5, + scoreThreshold: 0.75, + nmsRadius: 20, + }, + hand: { + enabled: true, + skipFrames: 5, + minConfidence: 0.8, + iouThreshold: 0.3, + scoreThreshold: 0.75, + detector: { + anchors: '/models/human/handdetect/anchors.json', + modelPath: '/models/human/handdetect/model.json', + }, + skeleton: { + modelPath: '/models/human/handskeleton/model.json', + }, + }, +}; +``` + +Where: +- `enabled`: controls if specified modul is enabled (note: module is not loaded until it is required) +- `modelPath`: path to specific pre-trained model weights +- `maxFaces`, `maxDetections`: how many faces or people are we trying to analyze. limiting number in busy scenes will result in higher performance +- `skipFrames`: how many frames to skip before re-running bounding box detection (e.g., face position does not move fast within a video, so it's ok to use previously detected face position and just run face geometry analysis) +- `minConfidence`: threshold for discarding a prediction +- `iouThreshold`: threshold for deciding whether boxes overlap too much in non-maximum suppression +- `scoreThreshold`: threshold for deciding when to remove boxes based on score in non-maximum suppression +- `nmsRadius`: radius for deciding points are too close in non-maximum suppression + +## Outputs + +Result of `humand.detect()` is a single object that includes data for all enabled modules and all detected objects: + +```js +result = { + face: // + [ + { + confidence: // + box: // + mesh: // (468 base points & 10 iris points) + annotations: // (32 base annotated landmarks & 2 iris annotations) + iris: // (relative distance of iris to camera, multiple by focal lenght to get actual distance) + age: // (estimated age) + gender: // (male or female) + } + ], + body: // + [ + { + score: // , + keypoints: // (17 annotated landmarks) + } + ], + hand: // + [ + confidence: // , + box: // , + landmarks: // (21 points) + annotations: // ]> (5 annotated landmakrs) + ] +} +``` + +## Performance + +Of course, performance will vary depending on your hardware, but also on number of enabled modules as well as their parameters. +For example, on a low-end nVidia GTX1050 it can perform face detection at 50+ FPS, but drop to <5 FPS if all modules are enabled. + +## Todo + +- Improve detection of smaller faces, add BlazeFace back model +- Create demo, host it on gitpages +- Implement draw helper functions +- Sample Images +- Rename human to human diff --git a/demo/index.html b/demo/index.html new file mode 100644 index 00000000..b8481df3 --- /dev/null +++ b/demo/index.html @@ -0,0 +1,25 @@ + + + + + + + +
+
+
+ + +
+
+
+
+
+ + + diff --git a/demo/index.js b/demo/index.js new file mode 100644 index 00000000..30c35746 --- /dev/null +++ b/demo/index.js @@ -0,0 +1,120 @@ +/* global tf, ScatterGL, dat */ + +import human from '../dist/human.esm.js'; + +const state = { + backend: 'webgl', + triangulateMesh: true, + renderPointcloud: true, + stop: false, + videoSize: 700, +}; +const options = { +}; + +let ctx; +let videoWidth; +let videoHeight; +let video; +let canvas; +let scatterGLHasInitialized = false; +let scatterGL; + +async function renderPrediction() { + const predictions = await human.detect(video); + ctx.drawImage(video, 0, 0, videoWidth, videoHeight, 0, 0, canvas.width, canvas.height); + const div = document.getElementById('faces'); + div.innerHTML = ''; + for (const prediction of predictions) { + div.appendChild(prediction.canvas); + ctx.beginPath(); + ctx.rect(prediction.box[0], prediction.box[1], prediction.box[2], prediction.box[3]); + ctx.font = 'small-caps 1rem "Segoe UI"'; + ctx.fillText(`${prediction.gender} ${prediction.age}`, prediction.box[0] + 2, prediction.box[1] + 16, prediction.box[2]); + ctx.stroke(); + if (state.triangulateMesh) { + for (let i = 0; i < human.triangulation.length / 3; i++) { + const points = [human.triangulation[i * 3], human.triangulation[i * 3 + 1], human.triangulation[i * 3 + 2]].map((index) => prediction.mesh[index]); + const region = new Path2D(); + region.moveTo(points[0][0], points[0][1]); + for (let j = 1; i < points.length; j++) region.lineTo(points[j][0], points[j][1]); + region.closePath(); + ctx.stroke(region); + } + } else { + for (let i = 0; i < prediction.mesh.length; i++) { + const x = prediction.mesh[i][0]; + const y = prediction.mesh[i][1]; + ctx.beginPath(); + ctx.arc(x, y, 1 /* radius */, 0, 2 * Math.PI); + ctx.fill(); + } + } + if (state.renderPointcloud && scatterGL != null) { + const pointsData = predictions.map((pred) => pred.mesh.map((point) => ([-point[0], -point[1], -point[2]]))); + let flattenedPointsData = []; + for (let i = 0; i < pointsData.length; i++) { + flattenedPointsData = flattenedPointsData.concat(pointsData[i]); + } + const dataset = new ScatterGL.Dataset(flattenedPointsData); + if (!scatterGLHasInitialized) scatterGL.render(dataset); + else scatterGL.updateDataset(dataset); + scatterGLHasInitialized = true; + } + } + if (!state.stop) requestAnimationFrame(renderPrediction); +} + +function setupDatGui() { + const gui = new dat.GUI(); + gui.add(state, 'stop').onChange(() => { renderPrediction(); }); + gui.add(state, 'backend', ['webgl', 'cpu']).onChange((backend) => { tf.setBackend(backend); }); + gui.add(options, 'maxFaces', 1, 100, 1).onChange(() => { human.load(options); }); + gui.add(options, 'detectionConfidence', 0, 1, 0.05).onChange(() => { human.load(options); }); + gui.add(options, 'iouThreshold', 0, 1, 0.05).onChange(() => { human.load(options); }); + gui.add(options, 'scoreThreshold', 0, 1, 0.05).onChange(() => { human.load(options); }); + gui.add(state, 'triangulateMesh'); + gui.add(state, 'renderPointcloud').onChange((render) => { document.querySelector('#scatter-gl-container').style.display = render ? 'inline-block' : 'none'; }); +} + +async function setupCamera() { + video = document.getElementById('video'); + const stream = await navigator.mediaDevices.getUserMedia({ + audio: false, + video: { facingMode: 'user', width: state.videoSize, height: state.videoSize }, + }); + video.srcObject = stream; + return new Promise((resolve) => { + video.onloadedmetadata = () => resolve(video); + }); +} + +async function main() { + await tf.setBackend(state.backend); + setupDatGui(); + await setupCamera(); + video.play(); + videoWidth = video.videoWidth; + videoHeight = video.videoHeight; + video.width = videoWidth; + video.height = videoHeight; + canvas = document.getElementById('output'); + canvas.width = videoWidth; + canvas.height = videoHeight; + const canvasContainer = document.querySelector('.canvas-wrapper'); + canvasContainer.style = `width: ${videoWidth}px; height: ${videoHeight}px`; + ctx = canvas.getContext('2d'); + // ctx.translate(canvas.width, 0); + // ctx.scale(-1, 1); + ctx.fillStyle = '#32EEDB'; + ctx.strokeStyle = '#32EEDB'; + ctx.lineWidth = 0.5; + human.load(options); + renderPrediction(); + if (state.renderPointcloud) { + document.querySelector('#scatter-gl-container').style = `width: ${state.videoSize}px; height: ${state.videoSize}px;`; + scatterGL = new ScatterGL(document.querySelector('#scatter-gl-container'), { rotateOnStart: false, selectEnabled: false }); + } +} + +main(); diff --git a/package.json b/package.json new file mode 100644 index 00000000..b5e5f1be --- /dev/null +++ b/package.json @@ -0,0 +1,51 @@ +{ + "name": "@vladmandic/human", + "version": "0.1.3", + "description": "human: 3D Face Detection, Iris Tracking and Age & Gender Prediction", + "sideEffects": false, + "main": "src/index.js", + "module": "dist/human.esm.js", + "browser": "dist/human.js", + "author": "Vladimir Mandic ", + "bugs": { + "url": "https://github.com/vladmandic/human/issues" + }, + "homepage": "https://github.com/vladmandic/human#readme", + "license": "MIT", + "engines": { + "node": ">=14.0.0" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/vladmandic/human.git" + }, + "dependencies": { + "@tensorflow/tfjs": "^2.6.0" + }, + "devDependencies": { + "esbuild": "^0.7.13", + "eslint": "^7.10.0", + "eslint-config-airbnb-base": "^14.2.0", + "eslint-plugin-import": "^2.22.1", + "eslint-plugin-json": "^2.1.2", + "eslint-plugin-node": "^11.1.0", + "eslint-plugin-promise": "^4.2.1", + "rimraf": "^3.0.2" + }, + "scripts": { + "build": "rimraf dist/ && npm run build-esm && npm run build-iife", + "build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --external:@tensorflow --outfile=dist/human.esm.js src/index.js", + "build-iife": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=iife --minify --global-name=human --outfile=dist/human.js src/index.js" + }, + "keywords": [ + "face detection", + "detection", + "recognition", + "blazeface", + "facemesh", + "ssrnet", + "tensorflow", + "tensorflowjs", + "tfjs" + ] +} diff --git a/src/config.js b/src/config.js new file mode 100644 index 00000000..5833f72f --- /dev/null +++ b/src/config.js @@ -0,0 +1,58 @@ +export default { + face: { + enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global + detector: { + modelPath: '/models/blazeface/model.json', + inputSize: 128, // fixed value + maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance + skipFrames: 5, // how many frames to go without running the bounding box detector, only relevant if maxFaces > 1 + minConfidence: 0.8, // threshold for discarding a prediction + iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression, must be between [0, 1] + scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression + }, + mesh: { + enabled: true, + modelPath: '/models/facemesh/model.json', + inputSize: 192, // fixed value + }, + iris: { + enabled: true, + modelPath: '/models/iris/model.json', + inputSize: 192, // fixed value + }, + age: { + enabled: true, + modelPath: '/models/ssrnet-age/imdb/model.json', + inputSize: 64, // fixed value + skipFrames: 5, + }, + gender: { + enabled: true, + modelPath: '/models/ssrnet-gender/imdb/model.json', + }, + }, + body: { + enabled: true, + modelPath: '/models/posenet/model.json', + inputResolution: 257, // fixed value + outputStride: 16, // fixed value + maxDetections: 5, + scoreThreshold: 0.75, + nmsRadius: 20, + }, + hand: { + enabled: true, + inputSize: 256, // fixed value + skipFrames: 5, + minConfidence: 0.8, + iouThreshold: 0.3, + scoreThreshold: 0.75, + detector: { + anchors: '/models/handdetect/anchors.json', + modelPath: '/models/handdetect/model.json', + }, + skeleton: { + modelPath: '/models/handskeleton/model.json', + }, + }, +}; diff --git a/src/facemesh/uvcoords.js b/src/facemesh/uvcoords.js new file mode 100644 index 00000000..0032bf2d --- /dev/null +++ b/src/facemesh/uvcoords.js @@ -0,0 +1,470 @@ +exports.UV_COORDS = [ + [0.499976992607117, 0.652534008026123], + [0.500025987625122, 0.547487020492554], + [0.499974012374878, 0.602371990680695], + [0.482113003730774, 0.471979022026062], + [0.500150978565216, 0.527155995368958], + [0.499909996986389, 0.498252987861633], + [0.499523013830185, 0.40106201171875], + [0.289712011814117, 0.380764007568359], + [0.499954998493195, 0.312398016452789], + [0.499987006187439, 0.269918978214264], + [0.500023007392883, 0.107050001621246], + [0.500023007392883, 0.666234016418457], + [0.5000159740448, 0.679224014282227], + [0.500023007392883, 0.692348003387451], + [0.499976992607117, 0.695277988910675], + [0.499976992607117, 0.70593398809433], + [0.499976992607117, 0.719385027885437], + [0.499976992607117, 0.737019002437592], + [0.499967992305756, 0.781370997428894], + [0.499816000461578, 0.562981009483337], + [0.473773002624512, 0.573909997940063], + [0.104906998574734, 0.254140973091125], + [0.365929991006851, 0.409575998783112], + [0.338757991790771, 0.41302502155304], + [0.311120003461838, 0.409460008144379], + [0.274657994508743, 0.389131009578705], + [0.393361985683441, 0.403706014156342], + [0.345234006643295, 0.344011008739471], + [0.370094001293182, 0.346076011657715], + [0.319321990013123, 0.347265005111694], + [0.297903001308441, 0.353591024875641], + [0.24779200553894, 0.410809993743896], + [0.396889001131058, 0.842755019664764], + [0.280097991228104, 0.375599980354309], + [0.106310002505779, 0.399955987930298], + [0.2099249958992, 0.391353011131287], + [0.355807989835739, 0.534406006336212], + [0.471751004457474, 0.65040397644043], + [0.474155008792877, 0.680191993713379], + [0.439785003662109, 0.657229006290436], + [0.414617002010345, 0.66654098033905], + [0.450374007225037, 0.680860996246338], + [0.428770989179611, 0.682690978050232], + [0.374971002340317, 0.727805018424988], + [0.486716985702515, 0.547628998756409], + [0.485300987958908, 0.527395009994507], + [0.257764995098114, 0.314490020275116], + [0.401223003864288, 0.455172002315521], + [0.429818987846375, 0.548614978790283], + [0.421351999044418, 0.533740997314453], + [0.276895999908447, 0.532056987285614], + [0.483370006084442, 0.499586999416351], + [0.33721199631691, 0.282882988452911], + [0.296391993761063, 0.293242990970612], + [0.169294998049736, 0.193813979625702], + [0.447580009698868, 0.302609980106354], + [0.392390012741089, 0.353887975215912], + [0.354490011930466, 0.696784019470215], + [0.067304998636246, 0.730105042457581], + [0.442739009857178, 0.572826027870178], + [0.457098007202148, 0.584792017936707], + [0.381974011659622, 0.694710969924927], + [0.392388999462128, 0.694203019142151], + [0.277076005935669, 0.271932005882263], + [0.422551989555359, 0.563233017921448], + [0.385919004678726, 0.281364023685455], + [0.383103013038635, 0.255840003490448], + [0.331431001424789, 0.119714021682739], + [0.229923993349075, 0.232002973556519], + [0.364500999450684, 0.189113974571228], + [0.229622006416321, 0.299540996551514], + [0.173287004232407, 0.278747975826263], + [0.472878992557526, 0.666198015213013], + [0.446828007698059, 0.668527007102966], + [0.422762006521225, 0.673889994621277], + [0.445307999849319, 0.580065965652466], + [0.388103008270264, 0.693961024284363], + [0.403039008378983, 0.706539988517761], + [0.403629004955292, 0.693953037261963], + [0.460041999816895, 0.557139039039612], + [0.431158006191254, 0.692366003990173], + [0.452181994915009, 0.692366003990173], + [0.475387006998062, 0.692366003990173], + [0.465828001499176, 0.779190003871918], + [0.472328990697861, 0.736225962638855], + [0.473087012767792, 0.717857003211975], + [0.473122000694275, 0.704625964164734], + [0.473033010959625, 0.695277988910675], + [0.427942007780075, 0.695277988910675], + [0.426479011774063, 0.703539967536926], + [0.423162013292313, 0.711845993995667], + [0.4183090031147, 0.720062971115112], + [0.390094995498657, 0.639572978019714], + [0.013953999616206, 0.560034036636353], + [0.499913990497589, 0.58014702796936], + [0.413199990987778, 0.69539999961853], + [0.409626007080078, 0.701822996139526], + [0.468080013990402, 0.601534962654114], + [0.422728985548019, 0.585985004901886], + [0.463079988956451, 0.593783974647522], + [0.37211999297142, 0.47341400384903], + [0.334562003612518, 0.496073007583618], + [0.411671012639999, 0.546965003013611], + [0.242175996303558, 0.14767599105835], + [0.290776997804642, 0.201445996761322], + [0.327338010072708, 0.256527006626129], + [0.399509996175766, 0.748921036720276], + [0.441727995872498, 0.261676013469696], + [0.429764986038208, 0.187834024429321], + [0.412198007106781, 0.108901023864746], + [0.288955003023148, 0.398952007293701], + [0.218936994671822, 0.435410976409912], + [0.41278201341629, 0.398970007896423], + [0.257135003805161, 0.355440020561218], + [0.427684992551804, 0.437960982322693], + [0.448339998722076, 0.536936044692993], + [0.178560003638268, 0.45755398273468], + [0.247308000922203, 0.457193970680237], + [0.286267012357712, 0.467674970626831], + [0.332827985286713, 0.460712015628815], + [0.368755996227264, 0.447206974029541], + [0.398963987827301, 0.432654976844788], + [0.476410001516342, 0.405806005001068], + [0.189241006970406, 0.523923993110657], + [0.228962004184723, 0.348950982093811], + [0.490725994110107, 0.562400996685028], + [0.404670000076294, 0.485132992267609], + [0.019469000399113, 0.401564002037048], + [0.426243007183075, 0.420431017875671], + [0.396993011236191, 0.548797011375427], + [0.266469985246658, 0.376977026462555], + [0.439121007919312, 0.51895797252655], + [0.032313998788595, 0.644356966018677], + [0.419054001569748, 0.387154996395111], + [0.462783008813858, 0.505746960639954], + [0.238978996872902, 0.779744982719421], + [0.198220998048782, 0.831938028335571], + [0.107550002634525, 0.540755033493042], + [0.183610007166862, 0.740257024765015], + [0.134409993886948, 0.333683013916016], + [0.385764002799988, 0.883153975009918], + [0.490967005491257, 0.579378008842468], + [0.382384985685349, 0.508572995662689], + [0.174399003386497, 0.397670984268188], + [0.318785011768341, 0.39623498916626], + [0.343364000320435, 0.400596976280212], + [0.396100014448166, 0.710216999053955], + [0.187885001301765, 0.588537991046906], + [0.430987000465393, 0.944064974784851], + [0.318993002176285, 0.898285031318665], + [0.266247987747192, 0.869701027870178], + [0.500023007392883, 0.190576016902924], + [0.499976992607117, 0.954452991485596], + [0.366169989109039, 0.398822009563446], + [0.393207013607025, 0.39553701877594], + [0.410373002290726, 0.391080021858215], + [0.194993004202843, 0.342101991176605], + [0.388664990663528, 0.362284004688263], + [0.365961998701096, 0.355970978736877], + [0.343364000320435, 0.355356991291046], + [0.318785011768341, 0.35834002494812], + [0.301414996385574, 0.363156020641327], + [0.058132998645306, 0.319076001644135], + [0.301414996385574, 0.387449026107788], + [0.499987989664078, 0.618434011936188], + [0.415838003158569, 0.624195992946625], + [0.445681989192963, 0.566076993942261], + [0.465844005346298, 0.620640993118286], + [0.49992299079895, 0.351523995399475], + [0.288718998432159, 0.819945991039276], + [0.335278987884521, 0.852819979190826], + [0.440512001514435, 0.902418971061707], + [0.128294005990028, 0.791940987110138], + [0.408771991729736, 0.373893976211548], + [0.455606997013092, 0.451801002025604], + [0.499877005815506, 0.908990025520325], + [0.375436991453171, 0.924192011356354], + [0.11421000212431, 0.615022003650665], + [0.448662012815475, 0.695277988910675], + [0.4480200111866, 0.704632043838501], + [0.447111994028091, 0.715808033943176], + [0.444831997156143, 0.730794012546539], + [0.430011987686157, 0.766808986663818], + [0.406787008047104, 0.685672998428345], + [0.400738000869751, 0.681069016456604], + [0.392399996519089, 0.677703022956848], + [0.367855995893478, 0.663918972015381], + [0.247923001646996, 0.601333022117615], + [0.452769994735718, 0.420849978923798], + [0.43639200925827, 0.359887003898621], + [0.416164010763168, 0.368713974952698], + [0.413385987281799, 0.692366003990173], + [0.228018000721931, 0.683571994304657], + [0.468268007040024, 0.352671027183533], + [0.411361992359161, 0.804327011108398], + [0.499989002943039, 0.469825029373169], + [0.479153990745544, 0.442654013633728], + [0.499974012374878, 0.439637005329132], + [0.432112008333206, 0.493588984012604], + [0.499886006116867, 0.866917014122009], + [0.49991300702095, 0.821729004383087], + [0.456548988819122, 0.819200992584229], + [0.344549000263214, 0.745438992977142], + [0.37890899181366, 0.574010014533997], + [0.374292999505997, 0.780184984207153], + [0.319687992334366, 0.570737957954407], + [0.357154995203018, 0.604269981384277], + [0.295284003019333, 0.621580958366394], + [0.447750002145767, 0.862477004528046], + [0.410986006259918, 0.508723020553589], + [0.31395098567009, 0.775308012962341], + [0.354128003120422, 0.812552988529205], + [0.324548006057739, 0.703992962837219], + [0.189096003770828, 0.646299958229065], + [0.279776990413666, 0.71465802192688], + [0.1338230073452, 0.682700991630554], + [0.336768001317978, 0.644733011722565], + [0.429883986711502, 0.466521978378296], + [0.455527991056442, 0.548622965812683], + [0.437114000320435, 0.558896005153656], + [0.467287987470627, 0.529924988746643], + [0.414712011814117, 0.335219979286194], + [0.37704598903656, 0.322777986526489], + [0.344107985496521, 0.320150971412659], + [0.312875986099243, 0.32233202457428], + [0.283526003360748, 0.333190023899078], + [0.241245999932289, 0.382785975933075], + [0.102986000478268, 0.468762993812561], + [0.267612010240555, 0.424560010433197], + [0.297879010438919, 0.433175981044769], + [0.333433985710144, 0.433878004550934], + [0.366427004337311, 0.426115989685059], + [0.396012008190155, 0.416696012020111], + [0.420121014118195, 0.41022801399231], + [0.007561000064015, 0.480777025222778], + [0.432949006557465, 0.569517970085144], + [0.458638995885849, 0.479089021682739], + [0.473466008901596, 0.545744001865387], + [0.476087987422943, 0.563830018043518], + [0.468472003936768, 0.555056989192963], + [0.433990985155106, 0.582361996173859], + [0.483518004417419, 0.562983989715576], + [0.482482999563217, 0.57784903049469], + [0.42645001411438, 0.389798998832703], + [0.438998997211456, 0.39649498462677], + [0.450067013502121, 0.400434017181396], + [0.289712011814117, 0.368252992630005], + [0.276670008897781, 0.363372981548309], + [0.517862021923065, 0.471948027610779], + [0.710287988185883, 0.380764007568359], + [0.526226997375488, 0.573909997940063], + [0.895093023777008, 0.254140973091125], + [0.634069979190826, 0.409575998783112], + [0.661242008209229, 0.41302502155304], + [0.688880026340485, 0.409460008144379], + [0.725341975688934, 0.389131009578705], + [0.606630027294159, 0.40370500087738], + [0.654766023159027, 0.344011008739471], + [0.629905998706818, 0.346076011657715], + [0.680678009986877, 0.347265005111694], + [0.702096998691559, 0.353591024875641], + [0.75221198797226, 0.410804986953735], + [0.602918028831482, 0.842862963676453], + [0.719901978969574, 0.375599980354309], + [0.893692970275879, 0.399959981441498], + [0.790081977844238, 0.391354024410248], + [0.643998026847839, 0.534487962722778], + [0.528249025344849, 0.65040397644043], + [0.525849997997284, 0.680191040039062], + [0.560214996337891, 0.657229006290436], + [0.585384011268616, 0.66654098033905], + [0.549625992774963, 0.680860996246338], + [0.57122802734375, 0.682691991329193], + [0.624852001667023, 0.72809898853302], + [0.513050019741058, 0.547281980514526], + [0.51509702205658, 0.527251958847046], + [0.742246985435486, 0.314507007598877], + [0.598631024360657, 0.454979002475739], + [0.570338010787964, 0.548575043678284], + [0.578631997108459, 0.533622980117798], + [0.723087012767792, 0.532054007053375], + [0.516445994377136, 0.499638974666595], + [0.662801027297974, 0.282917976379395], + [0.70362401008606, 0.293271005153656], + [0.830704987049103, 0.193813979625702], + [0.552385985851288, 0.302568018436432], + [0.607609987258911, 0.353887975215912], + [0.645429015159607, 0.696707010269165], + [0.932694971561432, 0.730105042457581], + [0.557260990142822, 0.572826027870178], + [0.542901992797852, 0.584792017936707], + [0.6180260181427, 0.694710969924927], + [0.607590973377228, 0.694203019142151], + [0.722943007946014, 0.271963000297546], + [0.577413976192474, 0.563166975975037], + [0.614082992076874, 0.281386971473694], + [0.616907000541687, 0.255886018276215], + [0.668509006500244, 0.119913995265961], + [0.770092010498047, 0.232020974159241], + [0.635536015033722, 0.189248979091644], + [0.77039098739624, 0.299556016921997], + [0.826722025871277, 0.278755009174347], + [0.527121007442474, 0.666198015213013], + [0.553171992301941, 0.668527007102966], + [0.577238023281097, 0.673889994621277], + [0.554691970348358, 0.580065965652466], + [0.611896991729736, 0.693961024284363], + [0.59696102142334, 0.706539988517761], + [0.596370995044708, 0.693953037261963], + [0.539958000183105, 0.557139039039612], + [0.568841993808746, 0.692366003990173], + [0.547818005084991, 0.692366003990173], + [0.52461302280426, 0.692366003990173], + [0.534089982509613, 0.779141008853912], + [0.527670979499817, 0.736225962638855], + [0.526912987232208, 0.717857003211975], + [0.526877999305725, 0.704625964164734], + [0.526966989040375, 0.695277988910675], + [0.572058022022247, 0.695277988910675], + [0.573521018028259, 0.703539967536926], + [0.57683801651001, 0.711845993995667], + [0.581691026687622, 0.720062971115112], + [0.609944999217987, 0.639909982681274], + [0.986046016216278, 0.560034036636353], + [0.5867999792099, 0.69539999961853], + [0.590372025966644, 0.701822996139526], + [0.531915009021759, 0.601536989212036], + [0.577268004417419, 0.585934996604919], + [0.536915004253387, 0.593786001205444], + [0.627542972564697, 0.473352015018463], + [0.665585994720459, 0.495950996875763], + [0.588353991508484, 0.546862006187439], + [0.757824003696442, 0.14767599105835], + [0.709249973297119, 0.201507985591888], + [0.672684013843536, 0.256581008434296], + [0.600408971309662, 0.74900496006012], + [0.55826598405838, 0.261672019958496], + [0.570303976535797, 0.187870979309082], + [0.588165998458862, 0.109044015407562], + [0.711045026779175, 0.398952007293701], + [0.781069993972778, 0.435405015945435], + [0.587247014045715, 0.398931980133057], + [0.742869973182678, 0.355445981025696], + [0.572156012058258, 0.437651991844177], + [0.55186802148819, 0.536570012569427], + [0.821442008018494, 0.457556009292603], + [0.752701997756958, 0.457181990146637], + [0.71375697851181, 0.467626988887787], + [0.66711300611496, 0.460672974586487], + [0.631101012229919, 0.447153985500336], + [0.6008620262146, 0.432473003864288], + [0.523481011390686, 0.405627012252808], + [0.810747981071472, 0.523926019668579], + [0.771045982837677, 0.348959028720856], + [0.509127020835876, 0.562718033790588], + [0.595292985439301, 0.485023975372314], + [0.980530977249146, 0.401564002037048], + [0.573499977588654, 0.420000016689301], + [0.602994978427887, 0.548687994480133], + [0.733529984951019, 0.376977026462555], + [0.560611009597778, 0.519016981124878], + [0.967685997486115, 0.644356966018677], + [0.580985009670258, 0.387160003185272], + [0.537728011608124, 0.505385041236877], + [0.760966002941132, 0.779752969741821], + [0.801778972148895, 0.831938028335571], + [0.892440974712372, 0.54076099395752], + [0.816350996494293, 0.740260004997253], + [0.865594983100891, 0.333687007427216], + [0.614073991775513, 0.883246004581451], + [0.508952975273132, 0.579437971115112], + [0.617941975593567, 0.508316040039062], + [0.825608015060425, 0.397674977779388], + [0.681214988231659, 0.39623498916626], + [0.656635999679565, 0.400596976280212], + [0.603900015354156, 0.710216999053955], + [0.81208598613739, 0.588539004325867], + [0.56801301240921, 0.944564998149872], + [0.681007981300354, 0.898285031318665], + [0.733752012252808, 0.869701027870178], + [0.633830010890961, 0.398822009563446], + [0.606792986392975, 0.39553701877594], + [0.589659988880157, 0.391062021255493], + [0.805015981197357, 0.342108011245728], + [0.611334979534149, 0.362284004688263], + [0.634037971496582, 0.355970978736877], + [0.656635999679565, 0.355356991291046], + [0.681214988231659, 0.35834002494812], + [0.698584973812103, 0.363156020641327], + [0.941866993904114, 0.319076001644135], + [0.698584973812103, 0.387449026107788], + [0.584177017211914, 0.624107003211975], + [0.554318010807037, 0.566076993942261], + [0.534153997898102, 0.62064003944397], + [0.711217999458313, 0.819975018501282], + [0.664629995822906, 0.852871000766754], + [0.559099972248077, 0.902631998062134], + [0.871706008911133, 0.791940987110138], + [0.591234028339386, 0.373893976211548], + [0.544341027736664, 0.451583981513977], + [0.624562978744507, 0.924192011356354], + [0.88577002286911, 0.615028977394104], + [0.551338016986847, 0.695277988910675], + [0.551980018615723, 0.704632043838501], + [0.552887976169586, 0.715808033943176], + [0.555167973041534, 0.730794012546539], + [0.569944024085999, 0.767035007476807], + [0.593203008174896, 0.685675978660583], + [0.599261999130249, 0.681069016456604], + [0.607599973678589, 0.677703022956848], + [0.631937980651855, 0.663500010967255], + [0.752032995223999, 0.601315021514893], + [0.547226011753082, 0.420395016670227], + [0.563543975353241, 0.359827995300293], + [0.583841025829315, 0.368713974952698], + [0.586614012718201, 0.692366003990173], + [0.771915018558502, 0.683578014373779], + [0.531597018241882, 0.352482974529266], + [0.588370978832245, 0.804440975189209], + [0.52079701423645, 0.442565023899078], + [0.567984998226166, 0.493479013442993], + [0.543282985687256, 0.819254994392395], + [0.655317008495331, 0.745514988899231], + [0.621008992195129, 0.574018001556396], + [0.625559985637665, 0.78031200170517], + [0.680198013782501, 0.570719003677368], + [0.64276397228241, 0.604337990283966], + [0.704662978649139, 0.621529996395111], + [0.552012026309967, 0.862591981887817], + [0.589071989059448, 0.508637011051178], + [0.685944974422455, 0.775357007980347], + [0.645735025405884, 0.812640011310577], + [0.675342977046967, 0.703978002071381], + [0.810858011245728, 0.646304965019226], + [0.72012197971344, 0.714666962623596], + [0.866151988506317, 0.682704985141754], + [0.663187026977539, 0.644596993923187], + [0.570082008838654, 0.466325998306274], + [0.544561982154846, 0.548375964164734], + [0.562758982181549, 0.558784961700439], + [0.531987011432648, 0.530140042304993], + [0.585271000862122, 0.335177004337311], + [0.622952997684479, 0.32277899980545], + [0.655896008014679, 0.320163011550903], + [0.687132000923157, 0.322345972061157], + [0.716481983661652, 0.333200991153717], + [0.758756995201111, 0.382786989212036], + [0.897013008594513, 0.468769013881683], + [0.732392013072968, 0.424547016620636], + [0.70211398601532, 0.433162987232208], + [0.66652500629425, 0.433866024017334], + [0.633504986763, 0.426087975502014], + [0.603875994682312, 0.416586995124817], + [0.579657971858978, 0.409945011138916], + [0.992439985275269, 0.480777025222778], + [0.567192018032074, 0.569419980049133], + [0.54136598110199, 0.478899002075195], + [0.526564002037048, 0.546118021011353], + [0.523913025856018, 0.563830018043518], + [0.531529009342194, 0.555056989192963], + [0.566035985946655, 0.582329034805298], + [0.51631098985672, 0.563053965568542], + [0.5174720287323, 0.577877044677734], + [0.573594987392426, 0.389806985855103], + [0.560697972774506, 0.395331978797913], + [0.549755990505219, 0.399751007556915], + [0.710287988185883, 0.368252992630005], + [0.723330020904541, 0.363372981548309], +]; diff --git a/src/handpose/box.js b/src/handpose/box.js new file mode 100644 index 00000000..3450ca7b --- /dev/null +++ b/src/handpose/box.js @@ -0,0 +1,65 @@ +const tf = require('@tensorflow/tfjs'); + +function getBoxSize(box) { + return [ + Math.abs(box.endPoint[0] - box.startPoint[0]), + Math.abs(box.endPoint[1] - box.startPoint[1]), + ]; +} +exports.getBoxSize = getBoxSize; +function getBoxCenter(box) { + return [ + box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, + box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2, + ]; +} +exports.getBoxCenter = getBoxCenter; +function cutBoxFromImageAndResize(box, image, cropSize) { + const h = image.shape[1]; + const w = image.shape[2]; + const boxes = [[ + box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h, + box.endPoint[0] / w, + ]]; + return tf.image.cropAndResize(image, boxes, [0], cropSize); +} +exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize; +function scaleBoxCoordinates(box, factor) { + const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]]; + const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]]; + const palmLandmarks = box.palmLandmarks.map((coord) => { + const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]]; + return scaledCoord; + }); + return { startPoint, endPoint, palmLandmarks }; +} +exports.scaleBoxCoordinates = scaleBoxCoordinates; +function enlargeBox(box, factor = 1.5) { + const center = getBoxCenter(box); + const size = getBoxSize(box); + const newHalfSize = [factor * size[0] / 2, factor * size[1] / 2]; + const startPoint = [center[0] - newHalfSize[0], center[1] - newHalfSize[1]]; + const endPoint = [center[0] + newHalfSize[0], center[1] + newHalfSize[1]]; + return { startPoint, endPoint, palmLandmarks: box.palmLandmarks }; +} +exports.enlargeBox = enlargeBox; +function squarifyBox(box) { + const centers = getBoxCenter(box); + const size = getBoxSize(box); + const maxEdge = Math.max(...size); + const halfSize = maxEdge / 2; + const startPoint = [centers[0] - halfSize, centers[1] - halfSize]; + const endPoint = [centers[0] + halfSize, centers[1] + halfSize]; + return { startPoint, endPoint, palmLandmarks: box.palmLandmarks }; +} +exports.squarifyBox = squarifyBox; +function shiftBox(box, shiftFactor) { + const boxSize = [ + box.endPoint[0] - box.startPoint[0], box.endPoint[1] - box.startPoint[1], + ]; + const shiftVector = [boxSize[0] * shiftFactor[0], boxSize[1] * shiftFactor[1]]; + const startPoint = [box.startPoint[0] + shiftVector[0], box.startPoint[1] + shiftVector[1]]; + const endPoint = [box.endPoint[0] + shiftVector[0], box.endPoint[1] + shiftVector[1]]; + return { startPoint, endPoint, palmLandmarks: box.palmLandmarks }; +} +exports.shiftBox = shiftBox; diff --git a/src/handpose/hand.js b/src/handpose/hand.js new file mode 100644 index 00000000..8194bf55 --- /dev/null +++ b/src/handpose/hand.js @@ -0,0 +1,107 @@ +const tf = require('@tensorflow/tfjs'); +const bounding = require('./box'); + +class HandDetector { + constructor(model, width, height, anchors, iouThreshold, scoreThreshold) { + this.model = model; + this.width = width; + this.height = height; + this.iouThreshold = iouThreshold; + this.scoreThreshold = scoreThreshold; + this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]); + this.anchorsTensor = tf.tensor2d(this.anchors); + this.inputSizeTensor = tf.tensor1d([width, height]); + this.doubleInputSizeTensor = tf.tensor1d([width * 2, height * 2]); + } + + normalizeBoxes(boxes) { + return tf.tidy(() => { + const boxOffsets = tf.slice(boxes, [0, 0], [-1, 2]); + const boxSizes = tf.slice(boxes, [0, 2], [-1, 2]); + const boxCenterPoints = tf.add(tf.div(boxOffsets, this.inputSizeTensor), this.anchorsTensor); + const halfBoxSizes = tf.div(boxSizes, this.doubleInputSizeTensor); + const startPoints = tf.mul(tf.sub(boxCenterPoints, halfBoxSizes), this.inputSizeTensor); + const endPoints = tf.mul(tf.add(boxCenterPoints, halfBoxSizes), this.inputSizeTensor); + return tf.concat2d([startPoints, endPoints], 1); + }); + } + + normalizeLandmarks(rawPalmLandmarks, index) { + return tf.tidy(() => { + const landmarks = tf.add(tf.div(rawPalmLandmarks.reshape([-1, 7, 2]), this.inputSizeTensor), this.anchors[index]); + return tf.mul(landmarks, this.inputSizeTensor); + }); + } + + async getBoundingBoxes(input) { + const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2)); + let batchedPrediction; + if (tf.getBackend() === 'webgl') { + // Currently tfjs-core does not pack depthwiseConv because it fails for + // very large inputs (https://github.com/tensorflow/tfjs/issues/1652). + // TODO(annxingyuan): call tf.enablePackedDepthwiseConv when available + // (https://github.com/tensorflow/tfjs/issues/2821) + const savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV'); + tf.env().set('WEBGL_PACK_DEPTHWISECONV', true); + // The model returns a tensor with the following shape: + // [1 (batch), 2944 (anchor points), 19 (data for each anchor)] + batchedPrediction = this.model.predict(normalizedInput); + tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag); + } else { + batchedPrediction = this.model.predict(normalizedInput); + } + const prediction = batchedPrediction.squeeze(); + // Regression score for each anchor point. + const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze()); + // Bounding box for each anchor point. + const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]); + const boxes = this.normalizeBoxes(rawBoxes); + const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, 1, this.iouThreshold, this.scoreThreshold); + const boxesWithHands = await boxesWithHandsTensor.array(); + const toDispose = [ + normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, + boxes, rawBoxes, scores, + ]; + if (boxesWithHands.length === 0) { + toDispose.forEach((tensor) => tensor.dispose()); + return null; + } + const boxIndex = boxesWithHands[0]; + const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]); + const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]); + const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([ + -1, 2, + ])); + toDispose.push(rawPalmLandmarks); + toDispose.forEach((tensor) => tensor.dispose()); + return { boxes: matchingBox, palmLandmarks }; + } + + /** + * Returns a Box identifying the bounding box of a hand within the image. + * Returns null if there is no hand in the image. + * + * @param input The image to classify. + */ + async estimateHandBounds(input) { + const inputHeight = input.shape[1]; + const inputWidth = input.shape[2]; + const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255)); + const prediction = await this.getBoundingBoxes(image); + if (prediction === null) { + image.dispose(); + return null; + } + // Calling arraySync on both boxes and palmLandmarks because the tensors are + // very small so it's not worth calling await array(). + const boundingBoxes = prediction.boxes.arraySync(); + const startPoint = boundingBoxes[0].slice(0, 2); + const endPoint = boundingBoxes[0].slice(2, 4); + const palmLandmarks = prediction.palmLandmarks.arraySync(); + image.dispose(); + prediction.boxes.dispose(); + prediction.palmLandmarks.dispose(); + return bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]); + } +} +exports.HandDetector = HandDetector; diff --git a/src/handpose/index.js b/src/handpose/index.js new file mode 100644 index 00000000..0be8a58c --- /dev/null +++ b/src/handpose/index.js @@ -0,0 +1,93 @@ +const tf = require('@tensorflow/tfjs'); +const hand = require('./hand'); +const keypoints = require('./keypoints'); +const pipe = require('./pipeline'); + +// Load the bounding box detector model. +async function loadHandDetectorModel(url) { + return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') }); +} + +// Load the mesh detector model. +async function loadHandPoseModel(url) { + return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') }); +} + +// In single shot detector pipelines, the output space is discretized into a set +// of bounding boxes, each of which is assigned a score during prediction. The +// anchors define the coordinates of these boxes. +async function loadAnchors(url) { + return tf.util + .fetch(url) + .then((d) => d.json()); +} + +/** + * Load handpose. + * + * @param config A configuration object with the following properties: + * - `maxContinuousChecks` How many frames to go without running the bounding + * box detector. Defaults to infinity. Set to a lower value if you want a safety + * net in case the mesh detector produces consistently flawed predictions. + * - `detectionConfidence` Threshold for discarding a prediction. Defaults to + * 0.8. + * - `iouThreshold` A float representing the threshold for deciding whether + * boxes overlap too much in non-maximum suppression. Must be between [0, 1]. + * Defaults to 0.3. + * - `scoreThreshold` A threshold for deciding when to remove boxes based + * on score in non-maximum suppression. Defaults to 0.75. + */ +async function load(config) { + const [ANCHORS, handDetectorModel, handPoseModel] = await Promise.all([ + loadAnchors(config.detector.anchors), + loadHandDetectorModel(config.detector.modelPath), + loadHandPoseModel(config.skeleton.modelPath), + ]); + const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold); + const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence); + // eslint-disable-next-line no-use-before-define + const handpose = new HandPose(pipeline); + return handpose; +} +exports.load = load; + +class HandPose { + constructor(pipeline) { + this.pipeline = pipeline; + } + + static getAnnotations() { + return keypoints.MESH_ANNOTATIONS; + } + + /** + * Finds hands in the input image. + * + * @param input The image to classify. Can be a tensor, DOM element image, + * video, or canvas. + * @param flipHorizontal Whether to flip the hand keypoints horizontally. + * Should be true for videos that are flipped by default (e.g. webcams). + */ + async estimateHands(input, config) { + const image = tf.tidy(() => { + if (!(input instanceof tf.Tensor)) { + input = tf.browser.fromPixels(input); + } + return input.toFloat().expandDims(0); + }); + const prediction = await this.pipeline.estimateHand(image, config); + image.dispose(); + if (!prediction) return []; + const annotations = {}; + for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) { + annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]); + } + return [{ + confidence: prediction.confidence || 0, + box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0, + landmarks: prediction.landmarks, + annotations, + }]; + } +} +exports.HandPose = HandPose; diff --git a/src/image.js b/src/image.js new file mode 100644 index 00000000..e0a485c4 --- /dev/null +++ b/src/image.js @@ -0,0 +1,127 @@ +const defaultFont = 'small-caps 1rem "Segoe UI"'; + +function clear(canvas) { + if (canvas) canvas.getContext('2d').clearRect(0, 0, canvas.width, canvas.height); +} + +function crop(image, x, y, width, height, { color = 'white', title = null, font = null }) { + const canvas = new OffscreenCanvas(width, height); + const ctx = canvas.getContext('2d'); + ctx.drawImage(image, x, y, width, height, 0, 0, canvas.width, canvas.height); + ctx.fillStyle = color; + ctx.font = font || defaultFont; + if (title) ctx.fillText(title, 2, 16, canvas.width - 4); + return canvas; +} + +function point({ canvas = null, x = 0, y = 0, color = 'white', radius = 2, title = null, font = null }) { + if (!canvas) return; + const ctx = canvas.getContext('2d'); + ctx.fillStyle = color; + ctx.beginPath(); + ctx.arc(x, y, radius, 0, 2 * Math.PI); + ctx.fill(); + ctx.font = font || defaultFont; + if (title) ctx.fillText(title, x + 10, y + 4); +} + +function rect({ canvas = null, x = 0, y = 0, width = 0, height = 0, radius = 8, lineWidth = 2, color = 'white', title = null, font = null }) { + if (!canvas) return; + const ctx = canvas.getContext('2d'); + ctx.lineWidth = lineWidth; + ctx.beginPath(); + ctx.moveTo(x + radius, y); + ctx.lineTo(x + width - radius, y); + ctx.quadraticCurveTo(x + width, y, x + width, y + radius); + ctx.lineTo(x + width, y + height - radius); + ctx.quadraticCurveTo(x + width, y + height, x + width - radius, y + height); + ctx.lineTo(x + radius, y + height); + ctx.quadraticCurveTo(x, y + height, x, y + height - radius); + ctx.lineTo(x, y + radius); + ctx.quadraticCurveTo(x, y, x + radius, y); + ctx.closePath(); + ctx.strokeStyle = color; + ctx.stroke(); + ctx.lineWidth = 1; + ctx.fillStyle = color; + ctx.font = font || defaultFont; + if (title) ctx.fillText(title, x + 4, y + 16); +} + +function line({ points = [], canvas = null, lineWidth = 2, color = 'white', title = null, font = null }) { + if (!canvas) return; + if (points.length < 2) return; + const ctx = canvas.getContext('2d'); + ctx.lineWidth = lineWidth; + ctx.beginPath(); + ctx.moveTo(points[0][0], points[0][1]); + for (const pt of points) ctx.lineTo(pt[0], pt[1]); + ctx.strokeStyle = color; + ctx.fillStyle = color; + ctx.stroke(); + ctx.lineWidth = 1; + ctx.font = font || defaultFont; + if (title) ctx.fillText(title, points[0][0] + 4, points[0][1] + 16); +} + +function spline({ points = [], canvas = null, tension = 0.5, lineWidth = 2, color = 'white', title = null, font = null }) { + if (!canvas) return; + if (points.length < 2) return; + const va = (arr, i, j) => [arr[2 * j] - arr[2 * i], arr[2 * j + 1] - arr[2 * i + 1]]; + const distance = (arr, i, j) => Math.sqrt(((arr[2 * i] - arr[2 * j]) ** 2) + ((arr[2 * i + 1] - arr[2 * j + 1]) ** 2)); + // eslint-disable-next-line no-unused-vars + function ctlpts(x1, y1, x2, y2, x3, y3) { + // eslint-disable-next-line prefer-rest-params + const v = va(arguments, 0, 2); + // eslint-disable-next-line prefer-rest-params + const d01 = distance(arguments, 0, 1); + // eslint-disable-next-line prefer-rest-params + const d12 = distance(arguments, 1, 2); + const d012 = d01 + d12; + return [ + x2 - v[0] * tension * d01 / d012, y2 - v[1] * tension * d01 / d012, + x2 + v[0] * tension * d12 / d012, y2 + v[1] * tension * d12 / d012, + ]; + } + const pts = []; + for (const pt of points) { + pts.push(pt[0]); + pts.push(pt[1]); + } + let cps = []; + for (let i = 0; i < pts.length - 2; i += 1) { + cps = cps.concat(ctlpts(pts[2 * i + 0], pts[2 * i + 1], pts[2 * i + 2], pts[2 * i + 3], pts[2 * i + 4], pts[2 * i + 5])); + } + const ctx = canvas.getContext('2d'); + ctx.lineWidth = lineWidth; + ctx.strokeStyle = color; + ctx.fillStyle = color; + if (points.length === 2) { + ctx.beginPath(); + ctx.moveTo(pts[0], pts[1]); + ctx.lineTo(pts[2], pts[3]); + } else { + ctx.beginPath(); + ctx.moveTo(pts[0], pts[1]); + // first segment is a quadratic + ctx.quadraticCurveTo(cps[0], cps[1], pts[2], pts[3]); + // for all middle points, connect with bezier + let i; + for (i = 2; i < ((pts.length / 2) - 1); i += 1) { + ctx.bezierCurveTo(cps[(2 * (i - 1) - 1) * 2], cps[(2 * (i - 1) - 1) * 2 + 1], cps[(2 * (i - 1)) * 2], cps[(2 * (i - 1)) * 2 + 1], pts[i * 2], pts[i * 2 + 1]); + } + // last segment is a quadratic + ctx.quadraticCurveTo(cps[(2 * (i - 1) - 1) * 2], cps[(2 * (i - 1) - 1) * 2 + 1], pts[i * 2], pts[i * 2 + 1]); + } + ctx.stroke(); + ctx.lineWidth = 1; + ctx.font = font || defaultFont; + if (title) ctx.fillText(title, points[0][0] + 4, points[0][1] + 16); +} + +exports.crop = crop; +exports.rect = rect; +exports.point = point; +exports.line = line; +exports.spline = spline; +exports.clear = clear; diff --git a/src/index.js b/src/index.js new file mode 100644 index 00000000..05b7e5ba --- /dev/null +++ b/src/index.js @@ -0,0 +1,81 @@ +const facemesh = require('./facemesh/index.js'); +const ssrnet = require('./ssrnet/index.js'); +const posenet = require('./posenet/index.js'); +const handpose = require('./handpose/index.js'); +// const image = require('./image.js'); +// const triangulation = require('./triangulation.js').default; +const defaults = require('./config.js').default; + +const models = { + facemesh: null, + blazeface: null, + ssrnet: null, + iris: null, +}; + +function mergeDeep(...objects) { + const isObject = (obj) => obj && typeof obj === 'object'; + return objects.reduce((prev, obj) => { + Object.keys(obj).forEach((key) => { + const pVal = prev[key]; + const oVal = obj[key]; + if (Array.isArray(pVal) && Array.isArray(oVal)) { + prev[key] = pVal.concat(...oVal); + } else if (isObject(pVal) && isObject(oVal)) { + prev[key] = mergeDeep(pVal, oVal); + } else { + prev[key] = oVal; + } + }); + return prev; + }, {}); +} + +async function detect(input, userConfig) { + const config = mergeDeep(defaults, userConfig); + + // run posenet + let poseRes = []; + if (config.body.enabled) { + if (!models.posenet) models.posenet = await posenet.load(config.body); + poseRes = await models.posenet.estimateMultiplePoses(input, config.body); + } + + // run handpose + let handRes = []; + if (config.hand.enabled) { + if (!models.handpose) models.handpose = await handpose.load(config.hand); + handRes = await models.handpose.estimateHands(input, config.hand); + } + + // run facemesh, includes blazeface and iris + const faceRes = []; + if (config.face.enabled) { + if (!models.facemesh) models.facemesh = await facemesh.load(config.face); + const faces = await models.facemesh.estimateFaces(input, config.face); + for (const face of faces) { + // run ssr-net age & gender, inherits face from blazeface + const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {}; + // iris: array[ bottom, left, top, right, center ] + const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris) + ? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0]) + : 0; + faceRes.push({ + confidence: face.confidence, + box: face.box, + mesh: face.mesh, + annotations: face.annotations, + age: ssrdata.age, + gender: ssrdata.gender, + iris: (iris !== 0) ? Math.trunc(100 * 11.7 / iris) / 100 : 0, + }); + } + } + + // combine results + return { face: faceRes, body: poseRes, hand: handRes }; +} + +exports.detect = detect; +exports.defaults = defaults; +exports.models = models;