initial public commit

pull/280/head
Vladimir Mandic 2020-10-11 19:22:43 -04:00
parent 3117d60d65
commit c018daee34
14 changed files with 1447 additions and 0 deletions

54
.eslintrc.json Normal file
View File

@ -0,0 +1,54 @@
{
"globals": {},
"env": {
"browser": true,
"commonjs": true,
"es6": true,
"node": true,
"jquery": true,
"es2020": true
},
"parserOptions": { "ecmaVersion": 2020 },
"plugins": [ ],
"extends": [
"eslint:recommended",
"plugin:import/errors",
"plugin:import/warnings",
"plugin:node/recommended",
"plugin:promise/recommended",
"plugin:json/recommended-with-comments",
"airbnb-base"
],
"ignorePatterns": [ "dist", "assets", "media", "models", "node_modules" ],
"rules": {
"max-len": [1, 275, 3],
"camelcase": "off",
"guard-for-in": "off",
"prefer-template":"off",
"import/extensions": "off",
"func-names": "off",
"no-await-in-loop": "off",
"no-bitwise": "off",
"no-case-declarations":"off",
"no-continue": "off",
"no-loop-func": "off",
"no-mixed-operators": "off",
"no-param-reassign":"off",
"no-plusplus": "off",
"dot-notation": "off",
"no-restricted-globals": "off",
"no-restricted-syntax": "off",
"no-underscore-dangle": "off",
"newline-per-chained-call": "off",
"node/no-unsupported-features/es-syntax": "off",
"node/shebang": "off",
"object-curly-newline": "off",
"prefer-destructuring": "off",
"promise/always-return": "off",
"promise/catch-or-return": "off",
"promise/no-nesting": "off",
"import/no-absolute-path": "off",
"no-regex-spaces": "off",
"radix": "off"
}
}

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
node_modules

173
README.md Normal file
View File

@ -0,0 +1,173 @@
# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction
URL: <https://github.com/vladmandic/human>
*Suggestions are welcome!*
## Credits
This is an amalgamation of multiple existing models:
- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view)
- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net)
## Install
```shell
npm install @vladmandic/human
```
All pre-trained models are included in folder `/models` (25MB total)
## Demo
Demo is included in `/demo`
## Requirements
`Human` library is based on [TensorFlow/JS (TFJS)](js.tensorflow.org), but does not package it to allow for indepdenent version management - import `tfjs` before importing `Human`
## Usage
`Human` library does not require special initialization.
All configuration is done in a single JSON object and all model weights will be dynamically loaded upon their first usage(and only then, `Human` will not load weights that it doesn't need according to configuration).
There is only *ONE* method you need:
```js
import * as tf from '@tensorflow/tfjs';
import human from '@vladmandic/human';
// 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
// 'options': optional parameter used to override any options present in default configuration
const results = await human.detect(image, options?)
```
Additionally, `Human` library exposes two classes:
```js
human.defaults // default configuration object
human.models // dynamically maintained object of any loaded models
```
## Configuration
Below is output of `human.defaults` object
Any property can be overriden by passing user object during `human.detect()`
Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration
```js
human.defaults = {
face: {
enabled: true,
detector: {
modelPath: '/models/human/blazeface/model.json',
maxFaces: 10,
skipFrames: 5,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
},
mesh: {
enabled: true,
modelPath: '/models/human/facemesh/model.json',
},
iris: {
enabled: true,
modelPath: '/models/human/iris/model.json',
},
age: {
enabled: true,
modelPath: '/models/human/ssrnet-imdb-age/model.json',
skipFrames: 5,
},
gender: {
enabled: true,
modelPath: '/models/human/ssrnet-imdb-gender/model.json',
},
},
body: {
enabled: true,
modelPath: '/models/human/posenet/model.json',
maxDetections: 5,
scoreThreshold: 0.75,
nmsRadius: 20,
},
hand: {
enabled: true,
skipFrames: 5,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
detector: {
anchors: '/models/human/handdetect/anchors.json',
modelPath: '/models/human/handdetect/model.json',
},
skeleton: {
modelPath: '/models/human/handskeleton/model.json',
},
},
};
```
Where:
- `enabled`: controls if specified modul is enabled (note: module is not loaded until it is required)
- `modelPath`: path to specific pre-trained model weights
- `maxFaces`, `maxDetections`: how many faces or people are we trying to analyze. limiting number in busy scenes will result in higher performance
- `skipFrames`: how many frames to skip before re-running bounding box detection (e.g., face position does not move fast within a video, so it's ok to use previously detected face position and just run face geometry analysis)
- `minConfidence`: threshold for discarding a prediction
- `iouThreshold`: threshold for deciding whether boxes overlap too much in non-maximum suppression
- `scoreThreshold`: threshold for deciding when to remove boxes based on score in non-maximum suppression
- `nmsRadius`: radius for deciding points are too close in non-maximum suppression
## Outputs
Result of `humand.detect()` is a single object that includes data for all enabled modules and all detected objects:
```js
result = {
face: // <array of detected objects>
[
{
confidence: // <number>
box: // <array [x, y, width, height]>
mesh: // <array of points [x, y, z]> (468 base points & 10 iris points)
annotations: // <list of object { landmark: array of points }> (32 base annotated landmarks & 2 iris annotations)
iris: // <number> (relative distance of iris to camera, multiple by focal lenght to get actual distance)
age: // <number> (estimated age)
gender: // <string> (male or female)
}
],
body: // <array of detected objects>
[
{
score: // <number>,
keypoints: // <array of landmarks [ score, landmark, position [x, y] ]> (17 annotated landmarks)
}
],
hand: // <array of detected objects>
[
confidence: // <number>,
box: // <array [x, y, width, height]>,
landmarks: // <array of points [x, y,z]> (21 points)
annotations: // <array of landmarks [ landmark: <array of points> ]> (5 annotated landmakrs)
]
}
```
## Performance
Of course, performance will vary depending on your hardware, but also on number of enabled modules as well as their parameters.
For example, on a low-end nVidia GTX1050 it can perform face detection at 50+ FPS, but drop to <5 FPS if all modules are enabled.
## Todo
- Improve detection of smaller faces, add BlazeFace back model
- Create demo, host it on gitpages
- Implement draw helper functions
- Sample Images
- Rename human to human

25
demo/index.html Normal file
View File

@ -0,0 +1,25 @@
<head>
<script src="https://cdn.jsdelivr.net/npm/three@0.106.2/build/three.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/scatter-gl@0.0.1/lib/scatter-gl.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/tensorflow/2.6.0/tf.es2017.min.js"></script>
<style>
.canvas-wrapper { display: inline-block; vertical-align: top; }
#scatter-gl-container { display: inline-block; vertical-align: top; border: solid 1px black; position: relative; }
#scatter-gl-container canvas { transform: translate3d(-50%, -50%, 0); left: 50%; top: 50%; position: absolute; }
</style>
</head>
<body>
<div id="main">
<div class="container">
<div class="canvas-wrapper">
<canvas id="output"></canvas>
<video id="video" playsinline style="visibility: hidden; width: auto; height: auto">
</video>
</div>
<div id="scatter-gl-container"></div>
<div id="faces"></div>
</div>
</div>
</body>
<script src="https://cdnjs.cloudflare.com/ajax/libs/dat-gui/0.7.6/dat.gui.min.js"></script>
<script type="module" src="./index.js"></script>

120
demo/index.js Normal file
View File

@ -0,0 +1,120 @@
/* global tf, ScatterGL, dat */
import human from '../dist/human.esm.js';
const state = {
backend: 'webgl',
triangulateMesh: true,
renderPointcloud: true,
stop: false,
videoSize: 700,
};
const options = {
};
let ctx;
let videoWidth;
let videoHeight;
let video;
let canvas;
let scatterGLHasInitialized = false;
let scatterGL;
async function renderPrediction() {
const predictions = await human.detect(video);
ctx.drawImage(video, 0, 0, videoWidth, videoHeight, 0, 0, canvas.width, canvas.height);
const div = document.getElementById('faces');
div.innerHTML = '';
for (const prediction of predictions) {
div.appendChild(prediction.canvas);
ctx.beginPath();
ctx.rect(prediction.box[0], prediction.box[1], prediction.box[2], prediction.box[3]);
ctx.font = 'small-caps 1rem "Segoe UI"';
ctx.fillText(`${prediction.gender} ${prediction.age}`, prediction.box[0] + 2, prediction.box[1] + 16, prediction.box[2]);
ctx.stroke();
if (state.triangulateMesh) {
for (let i = 0; i < human.triangulation.length / 3; i++) {
const points = [human.triangulation[i * 3], human.triangulation[i * 3 + 1], human.triangulation[i * 3 + 2]].map((index) => prediction.mesh[index]);
const region = new Path2D();
region.moveTo(points[0][0], points[0][1]);
for (let j = 1; i < points.length; j++) region.lineTo(points[j][0], points[j][1]);
region.closePath();
ctx.stroke(region);
}
} else {
for (let i = 0; i < prediction.mesh.length; i++) {
const x = prediction.mesh[i][0];
const y = prediction.mesh[i][1];
ctx.beginPath();
ctx.arc(x, y, 1 /* radius */, 0, 2 * Math.PI);
ctx.fill();
}
}
if (state.renderPointcloud && scatterGL != null) {
const pointsData = predictions.map((pred) => pred.mesh.map((point) => ([-point[0], -point[1], -point[2]])));
let flattenedPointsData = [];
for (let i = 0; i < pointsData.length; i++) {
flattenedPointsData = flattenedPointsData.concat(pointsData[i]);
}
const dataset = new ScatterGL.Dataset(flattenedPointsData);
if (!scatterGLHasInitialized) scatterGL.render(dataset);
else scatterGL.updateDataset(dataset);
scatterGLHasInitialized = true;
}
}
if (!state.stop) requestAnimationFrame(renderPrediction);
}
function setupDatGui() {
const gui = new dat.GUI();
gui.add(state, 'stop').onChange(() => { renderPrediction(); });
gui.add(state, 'backend', ['webgl', 'cpu']).onChange((backend) => { tf.setBackend(backend); });
gui.add(options, 'maxFaces', 1, 100, 1).onChange(() => { human.load(options); });
gui.add(options, 'detectionConfidence', 0, 1, 0.05).onChange(() => { human.load(options); });
gui.add(options, 'iouThreshold', 0, 1, 0.05).onChange(() => { human.load(options); });
gui.add(options, 'scoreThreshold', 0, 1, 0.05).onChange(() => { human.load(options); });
gui.add(state, 'triangulateMesh');
gui.add(state, 'renderPointcloud').onChange((render) => { document.querySelector('#scatter-gl-container').style.display = render ? 'inline-block' : 'none'; });
}
async function setupCamera() {
video = document.getElementById('video');
const stream = await navigator.mediaDevices.getUserMedia({
audio: false,
video: { facingMode: 'user', width: state.videoSize, height: state.videoSize },
});
video.srcObject = stream;
return new Promise((resolve) => {
video.onloadedmetadata = () => resolve(video);
});
}
async function main() {
await tf.setBackend(state.backend);
setupDatGui();
await setupCamera();
video.play();
videoWidth = video.videoWidth;
videoHeight = video.videoHeight;
video.width = videoWidth;
video.height = videoHeight;
canvas = document.getElementById('output');
canvas.width = videoWidth;
canvas.height = videoHeight;
const canvasContainer = document.querySelector('.canvas-wrapper');
canvasContainer.style = `width: ${videoWidth}px; height: ${videoHeight}px`;
ctx = canvas.getContext('2d');
// ctx.translate(canvas.width, 0);
// ctx.scale(-1, 1);
ctx.fillStyle = '#32EEDB';
ctx.strokeStyle = '#32EEDB';
ctx.lineWidth = 0.5;
human.load(options);
renderPrediction();
if (state.renderPointcloud) {
document.querySelector('#scatter-gl-container').style = `width: ${state.videoSize}px; height: ${state.videoSize}px;`;
scatterGL = new ScatterGL(document.querySelector('#scatter-gl-container'), { rotateOnStart: false, selectEnabled: false });
}
}
main();

51
package.json Normal file
View File

@ -0,0 +1,51 @@
{
"name": "@vladmandic/human",
"version": "0.1.3",
"description": "human: 3D Face Detection, Iris Tracking and Age & Gender Prediction",
"sideEffects": false,
"main": "src/index.js",
"module": "dist/human.esm.js",
"browser": "dist/human.js",
"author": "Vladimir Mandic <mandic00@live.com>",
"bugs": {
"url": "https://github.com/vladmandic/human/issues"
},
"homepage": "https://github.com/vladmandic/human#readme",
"license": "MIT",
"engines": {
"node": ">=14.0.0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/vladmandic/human.git"
},
"dependencies": {
"@tensorflow/tfjs": "^2.6.0"
},
"devDependencies": {
"esbuild": "^0.7.13",
"eslint": "^7.10.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-plugin-import": "^2.22.1",
"eslint-plugin-json": "^2.1.2",
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^4.2.1",
"rimraf": "^3.0.2"
},
"scripts": {
"build": "rimraf dist/ && npm run build-esm && npm run build-iife",
"build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --external:@tensorflow --outfile=dist/human.esm.js src/index.js",
"build-iife": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=iife --minify --global-name=human --outfile=dist/human.js src/index.js"
},
"keywords": [
"face detection",
"detection",
"recognition",
"blazeface",
"facemesh",
"ssrnet",
"tensorflow",
"tensorflowjs",
"tfjs"
]
}

58
src/config.js Normal file
View File

@ -0,0 +1,58 @@
export default {
face: {
enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
detector: {
modelPath: '/models/blazeface/model.json',
inputSize: 128, // fixed value
maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
skipFrames: 5, // how many frames to go without running the bounding box detector, only relevant if maxFaces > 1
minConfidence: 0.8, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression, must be between [0, 1]
scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression
},
mesh: {
enabled: true,
modelPath: '/models/facemesh/model.json',
inputSize: 192, // fixed value
},
iris: {
enabled: true,
modelPath: '/models/iris/model.json',
inputSize: 192, // fixed value
},
age: {
enabled: true,
modelPath: '/models/ssrnet-age/imdb/model.json',
inputSize: 64, // fixed value
skipFrames: 5,
},
gender: {
enabled: true,
modelPath: '/models/ssrnet-gender/imdb/model.json',
},
},
body: {
enabled: true,
modelPath: '/models/posenet/model.json',
inputResolution: 257, // fixed value
outputStride: 16, // fixed value
maxDetections: 5,
scoreThreshold: 0.75,
nmsRadius: 20,
},
hand: {
enabled: true,
inputSize: 256, // fixed value
skipFrames: 5,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
detector: {
anchors: '/models/handdetect/anchors.json',
modelPath: '/models/handdetect/model.json',
},
skeleton: {
modelPath: '/models/handskeleton/model.json',
},
},
};

470
src/facemesh/uvcoords.js Normal file
View File

@ -0,0 +1,470 @@
exports.UV_COORDS = [
[0.499976992607117, 0.652534008026123],
[0.500025987625122, 0.547487020492554],
[0.499974012374878, 0.602371990680695],
[0.482113003730774, 0.471979022026062],
[0.500150978565216, 0.527155995368958],
[0.499909996986389, 0.498252987861633],
[0.499523013830185, 0.40106201171875],
[0.289712011814117, 0.380764007568359],
[0.499954998493195, 0.312398016452789],
[0.499987006187439, 0.269918978214264],
[0.500023007392883, 0.107050001621246],
[0.500023007392883, 0.666234016418457],
[0.5000159740448, 0.679224014282227],
[0.500023007392883, 0.692348003387451],
[0.499976992607117, 0.695277988910675],
[0.499976992607117, 0.70593398809433],
[0.499976992607117, 0.719385027885437],
[0.499976992607117, 0.737019002437592],
[0.499967992305756, 0.781370997428894],
[0.499816000461578, 0.562981009483337],
[0.473773002624512, 0.573909997940063],
[0.104906998574734, 0.254140973091125],
[0.365929991006851, 0.409575998783112],
[0.338757991790771, 0.41302502155304],
[0.311120003461838, 0.409460008144379],
[0.274657994508743, 0.389131009578705],
[0.393361985683441, 0.403706014156342],
[0.345234006643295, 0.344011008739471],
[0.370094001293182, 0.346076011657715],
[0.319321990013123, 0.347265005111694],
[0.297903001308441, 0.353591024875641],
[0.24779200553894, 0.410809993743896],
[0.396889001131058, 0.842755019664764],
[0.280097991228104, 0.375599980354309],
[0.106310002505779, 0.399955987930298],
[0.2099249958992, 0.391353011131287],
[0.355807989835739, 0.534406006336212],
[0.471751004457474, 0.65040397644043],
[0.474155008792877, 0.680191993713379],
[0.439785003662109, 0.657229006290436],
[0.414617002010345, 0.66654098033905],
[0.450374007225037, 0.680860996246338],
[0.428770989179611, 0.682690978050232],
[0.374971002340317, 0.727805018424988],
[0.486716985702515, 0.547628998756409],
[0.485300987958908, 0.527395009994507],
[0.257764995098114, 0.314490020275116],
[0.401223003864288, 0.455172002315521],
[0.429818987846375, 0.548614978790283],
[0.421351999044418, 0.533740997314453],
[0.276895999908447, 0.532056987285614],
[0.483370006084442, 0.499586999416351],
[0.33721199631691, 0.282882988452911],
[0.296391993761063, 0.293242990970612],
[0.169294998049736, 0.193813979625702],
[0.447580009698868, 0.302609980106354],
[0.392390012741089, 0.353887975215912],
[0.354490011930466, 0.696784019470215],
[0.067304998636246, 0.730105042457581],
[0.442739009857178, 0.572826027870178],
[0.457098007202148, 0.584792017936707],
[0.381974011659622, 0.694710969924927],
[0.392388999462128, 0.694203019142151],
[0.277076005935669, 0.271932005882263],
[0.422551989555359, 0.563233017921448],
[0.385919004678726, 0.281364023685455],
[0.383103013038635, 0.255840003490448],
[0.331431001424789, 0.119714021682739],
[0.229923993349075, 0.232002973556519],
[0.364500999450684, 0.189113974571228],
[0.229622006416321, 0.299540996551514],
[0.173287004232407, 0.278747975826263],
[0.472878992557526, 0.666198015213013],
[0.446828007698059, 0.668527007102966],
[0.422762006521225, 0.673889994621277],
[0.445307999849319, 0.580065965652466],
[0.388103008270264, 0.693961024284363],
[0.403039008378983, 0.706539988517761],
[0.403629004955292, 0.693953037261963],
[0.460041999816895, 0.557139039039612],
[0.431158006191254, 0.692366003990173],
[0.452181994915009, 0.692366003990173],
[0.475387006998062, 0.692366003990173],
[0.465828001499176, 0.779190003871918],
[0.472328990697861, 0.736225962638855],
[0.473087012767792, 0.717857003211975],
[0.473122000694275, 0.704625964164734],
[0.473033010959625, 0.695277988910675],
[0.427942007780075, 0.695277988910675],
[0.426479011774063, 0.703539967536926],
[0.423162013292313, 0.711845993995667],
[0.4183090031147, 0.720062971115112],
[0.390094995498657, 0.639572978019714],
[0.013953999616206, 0.560034036636353],
[0.499913990497589, 0.58014702796936],
[0.413199990987778, 0.69539999961853],
[0.409626007080078, 0.701822996139526],
[0.468080013990402, 0.601534962654114],
[0.422728985548019, 0.585985004901886],
[0.463079988956451, 0.593783974647522],
[0.37211999297142, 0.47341400384903],
[0.334562003612518, 0.496073007583618],
[0.411671012639999, 0.546965003013611],
[0.242175996303558, 0.14767599105835],
[0.290776997804642, 0.201445996761322],
[0.327338010072708, 0.256527006626129],
[0.399509996175766, 0.748921036720276],
[0.441727995872498, 0.261676013469696],
[0.429764986038208, 0.187834024429321],
[0.412198007106781, 0.108901023864746],
[0.288955003023148, 0.398952007293701],
[0.218936994671822, 0.435410976409912],
[0.41278201341629, 0.398970007896423],
[0.257135003805161, 0.355440020561218],
[0.427684992551804, 0.437960982322693],
[0.448339998722076, 0.536936044692993],
[0.178560003638268, 0.45755398273468],
[0.247308000922203, 0.457193970680237],
[0.286267012357712, 0.467674970626831],
[0.332827985286713, 0.460712015628815],
[0.368755996227264, 0.447206974029541],
[0.398963987827301, 0.432654976844788],
[0.476410001516342, 0.405806005001068],
[0.189241006970406, 0.523923993110657],
[0.228962004184723, 0.348950982093811],
[0.490725994110107, 0.562400996685028],
[0.404670000076294, 0.485132992267609],
[0.019469000399113, 0.401564002037048],
[0.426243007183075, 0.420431017875671],
[0.396993011236191, 0.548797011375427],
[0.266469985246658, 0.376977026462555],
[0.439121007919312, 0.51895797252655],
[0.032313998788595, 0.644356966018677],
[0.419054001569748, 0.387154996395111],
[0.462783008813858, 0.505746960639954],
[0.238978996872902, 0.779744982719421],
[0.198220998048782, 0.831938028335571],
[0.107550002634525, 0.540755033493042],
[0.183610007166862, 0.740257024765015],
[0.134409993886948, 0.333683013916016],
[0.385764002799988, 0.883153975009918],
[0.490967005491257, 0.579378008842468],
[0.382384985685349, 0.508572995662689],
[0.174399003386497, 0.397670984268188],
[0.318785011768341, 0.39623498916626],
[0.343364000320435, 0.400596976280212],
[0.396100014448166, 0.710216999053955],
[0.187885001301765, 0.588537991046906],
[0.430987000465393, 0.944064974784851],
[0.318993002176285, 0.898285031318665],
[0.266247987747192, 0.869701027870178],
[0.500023007392883, 0.190576016902924],
[0.499976992607117, 0.954452991485596],
[0.366169989109039, 0.398822009563446],
[0.393207013607025, 0.39553701877594],
[0.410373002290726, 0.391080021858215],
[0.194993004202843, 0.342101991176605],
[0.388664990663528, 0.362284004688263],
[0.365961998701096, 0.355970978736877],
[0.343364000320435, 0.355356991291046],
[0.318785011768341, 0.35834002494812],
[0.301414996385574, 0.363156020641327],
[0.058132998645306, 0.319076001644135],
[0.301414996385574, 0.387449026107788],
[0.499987989664078, 0.618434011936188],
[0.415838003158569, 0.624195992946625],
[0.445681989192963, 0.566076993942261],
[0.465844005346298, 0.620640993118286],
[0.49992299079895, 0.351523995399475],
[0.288718998432159, 0.819945991039276],
[0.335278987884521, 0.852819979190826],
[0.440512001514435, 0.902418971061707],
[0.128294005990028, 0.791940987110138],
[0.408771991729736, 0.373893976211548],
[0.455606997013092, 0.451801002025604],
[0.499877005815506, 0.908990025520325],
[0.375436991453171, 0.924192011356354],
[0.11421000212431, 0.615022003650665],
[0.448662012815475, 0.695277988910675],
[0.4480200111866, 0.704632043838501],
[0.447111994028091, 0.715808033943176],
[0.444831997156143, 0.730794012546539],
[0.430011987686157, 0.766808986663818],
[0.406787008047104, 0.685672998428345],
[0.400738000869751, 0.681069016456604],
[0.392399996519089, 0.677703022956848],
[0.367855995893478, 0.663918972015381],
[0.247923001646996, 0.601333022117615],
[0.452769994735718, 0.420849978923798],
[0.43639200925827, 0.359887003898621],
[0.416164010763168, 0.368713974952698],
[0.413385987281799, 0.692366003990173],
[0.228018000721931, 0.683571994304657],
[0.468268007040024, 0.352671027183533],
[0.411361992359161, 0.804327011108398],
[0.499989002943039, 0.469825029373169],
[0.479153990745544, 0.442654013633728],
[0.499974012374878, 0.439637005329132],
[0.432112008333206, 0.493588984012604],
[0.499886006116867, 0.866917014122009],
[0.49991300702095, 0.821729004383087],
[0.456548988819122, 0.819200992584229],
[0.344549000263214, 0.745438992977142],
[0.37890899181366, 0.574010014533997],
[0.374292999505997, 0.780184984207153],
[0.319687992334366, 0.570737957954407],
[0.357154995203018, 0.604269981384277],
[0.295284003019333, 0.621580958366394],
[0.447750002145767, 0.862477004528046],
[0.410986006259918, 0.508723020553589],
[0.31395098567009, 0.775308012962341],
[0.354128003120422, 0.812552988529205],
[0.324548006057739, 0.703992962837219],
[0.189096003770828, 0.646299958229065],
[0.279776990413666, 0.71465802192688],
[0.1338230073452, 0.682700991630554],
[0.336768001317978, 0.644733011722565],
[0.429883986711502, 0.466521978378296],
[0.455527991056442, 0.548622965812683],
[0.437114000320435, 0.558896005153656],
[0.467287987470627, 0.529924988746643],
[0.414712011814117, 0.335219979286194],
[0.37704598903656, 0.322777986526489],
[0.344107985496521, 0.320150971412659],
[0.312875986099243, 0.32233202457428],
[0.283526003360748, 0.333190023899078],
[0.241245999932289, 0.382785975933075],
[0.102986000478268, 0.468762993812561],
[0.267612010240555, 0.424560010433197],
[0.297879010438919, 0.433175981044769],
[0.333433985710144, 0.433878004550934],
[0.366427004337311, 0.426115989685059],
[0.396012008190155, 0.416696012020111],
[0.420121014118195, 0.41022801399231],
[0.007561000064015, 0.480777025222778],
[0.432949006557465, 0.569517970085144],
[0.458638995885849, 0.479089021682739],
[0.473466008901596, 0.545744001865387],
[0.476087987422943, 0.563830018043518],
[0.468472003936768, 0.555056989192963],
[0.433990985155106, 0.582361996173859],
[0.483518004417419, 0.562983989715576],
[0.482482999563217, 0.57784903049469],
[0.42645001411438, 0.389798998832703],
[0.438998997211456, 0.39649498462677],
[0.450067013502121, 0.400434017181396],
[0.289712011814117, 0.368252992630005],
[0.276670008897781, 0.363372981548309],
[0.517862021923065, 0.471948027610779],
[0.710287988185883, 0.380764007568359],
[0.526226997375488, 0.573909997940063],
[0.895093023777008, 0.254140973091125],
[0.634069979190826, 0.409575998783112],
[0.661242008209229, 0.41302502155304],
[0.688880026340485, 0.409460008144379],
[0.725341975688934, 0.389131009578705],
[0.606630027294159, 0.40370500087738],
[0.654766023159027, 0.344011008739471],
[0.629905998706818, 0.346076011657715],
[0.680678009986877, 0.347265005111694],
[0.702096998691559, 0.353591024875641],
[0.75221198797226, 0.410804986953735],
[0.602918028831482, 0.842862963676453],
[0.719901978969574, 0.375599980354309],
[0.893692970275879, 0.399959981441498],
[0.790081977844238, 0.391354024410248],
[0.643998026847839, 0.534487962722778],
[0.528249025344849, 0.65040397644043],
[0.525849997997284, 0.680191040039062],
[0.560214996337891, 0.657229006290436],
[0.585384011268616, 0.66654098033905],
[0.549625992774963, 0.680860996246338],
[0.57122802734375, 0.682691991329193],
[0.624852001667023, 0.72809898853302],
[0.513050019741058, 0.547281980514526],
[0.51509702205658, 0.527251958847046],
[0.742246985435486, 0.314507007598877],
[0.598631024360657, 0.454979002475739],
[0.570338010787964, 0.548575043678284],
[0.578631997108459, 0.533622980117798],
[0.723087012767792, 0.532054007053375],
[0.516445994377136, 0.499638974666595],
[0.662801027297974, 0.282917976379395],
[0.70362401008606, 0.293271005153656],
[0.830704987049103, 0.193813979625702],
[0.552385985851288, 0.302568018436432],
[0.607609987258911, 0.353887975215912],
[0.645429015159607, 0.696707010269165],
[0.932694971561432, 0.730105042457581],
[0.557260990142822, 0.572826027870178],
[0.542901992797852, 0.584792017936707],
[0.6180260181427, 0.694710969924927],
[0.607590973377228, 0.694203019142151],
[0.722943007946014, 0.271963000297546],
[0.577413976192474, 0.563166975975037],
[0.614082992076874, 0.281386971473694],
[0.616907000541687, 0.255886018276215],
[0.668509006500244, 0.119913995265961],
[0.770092010498047, 0.232020974159241],
[0.635536015033722, 0.189248979091644],
[0.77039098739624, 0.299556016921997],
[0.826722025871277, 0.278755009174347],
[0.527121007442474, 0.666198015213013],
[0.553171992301941, 0.668527007102966],
[0.577238023281097, 0.673889994621277],
[0.554691970348358, 0.580065965652466],
[0.611896991729736, 0.693961024284363],
[0.59696102142334, 0.706539988517761],
[0.596370995044708, 0.693953037261963],
[0.539958000183105, 0.557139039039612],
[0.568841993808746, 0.692366003990173],
[0.547818005084991, 0.692366003990173],
[0.52461302280426, 0.692366003990173],
[0.534089982509613, 0.779141008853912],
[0.527670979499817, 0.736225962638855],
[0.526912987232208, 0.717857003211975],
[0.526877999305725, 0.704625964164734],
[0.526966989040375, 0.695277988910675],
[0.572058022022247, 0.695277988910675],
[0.573521018028259, 0.703539967536926],
[0.57683801651001, 0.711845993995667],
[0.581691026687622, 0.720062971115112],
[0.609944999217987, 0.639909982681274],
[0.986046016216278, 0.560034036636353],
[0.5867999792099, 0.69539999961853],
[0.590372025966644, 0.701822996139526],
[0.531915009021759, 0.601536989212036],
[0.577268004417419, 0.585934996604919],
[0.536915004253387, 0.593786001205444],
[0.627542972564697, 0.473352015018463],
[0.665585994720459, 0.495950996875763],
[0.588353991508484, 0.546862006187439],
[0.757824003696442, 0.14767599105835],
[0.709249973297119, 0.201507985591888],
[0.672684013843536, 0.256581008434296],
[0.600408971309662, 0.74900496006012],
[0.55826598405838, 0.261672019958496],
[0.570303976535797, 0.187870979309082],
[0.588165998458862, 0.109044015407562],
[0.711045026779175, 0.398952007293701],
[0.781069993972778, 0.435405015945435],
[0.587247014045715, 0.398931980133057],
[0.742869973182678, 0.355445981025696],
[0.572156012058258, 0.437651991844177],
[0.55186802148819, 0.536570012569427],
[0.821442008018494, 0.457556009292603],
[0.752701997756958, 0.457181990146637],
[0.71375697851181, 0.467626988887787],
[0.66711300611496, 0.460672974586487],
[0.631101012229919, 0.447153985500336],
[0.6008620262146, 0.432473003864288],
[0.523481011390686, 0.405627012252808],
[0.810747981071472, 0.523926019668579],
[0.771045982837677, 0.348959028720856],
[0.509127020835876, 0.562718033790588],
[0.595292985439301, 0.485023975372314],
[0.980530977249146, 0.401564002037048],
[0.573499977588654, 0.420000016689301],
[0.602994978427887, 0.548687994480133],
[0.733529984951019, 0.376977026462555],
[0.560611009597778, 0.519016981124878],
[0.967685997486115, 0.644356966018677],
[0.580985009670258, 0.387160003185272],
[0.537728011608124, 0.505385041236877],
[0.760966002941132, 0.779752969741821],
[0.801778972148895, 0.831938028335571],
[0.892440974712372, 0.54076099395752],
[0.816350996494293, 0.740260004997253],
[0.865594983100891, 0.333687007427216],
[0.614073991775513, 0.883246004581451],
[0.508952975273132, 0.579437971115112],
[0.617941975593567, 0.508316040039062],
[0.825608015060425, 0.397674977779388],
[0.681214988231659, 0.39623498916626],
[0.656635999679565, 0.400596976280212],
[0.603900015354156, 0.710216999053955],
[0.81208598613739, 0.588539004325867],
[0.56801301240921, 0.944564998149872],
[0.681007981300354, 0.898285031318665],
[0.733752012252808, 0.869701027870178],
[0.633830010890961, 0.398822009563446],
[0.606792986392975, 0.39553701877594],
[0.589659988880157, 0.391062021255493],
[0.805015981197357, 0.342108011245728],
[0.611334979534149, 0.362284004688263],
[0.634037971496582, 0.355970978736877],
[0.656635999679565, 0.355356991291046],
[0.681214988231659, 0.35834002494812],
[0.698584973812103, 0.363156020641327],
[0.941866993904114, 0.319076001644135],
[0.698584973812103, 0.387449026107788],
[0.584177017211914, 0.624107003211975],
[0.554318010807037, 0.566076993942261],
[0.534153997898102, 0.62064003944397],
[0.711217999458313, 0.819975018501282],
[0.664629995822906, 0.852871000766754],
[0.559099972248077, 0.902631998062134],
[0.871706008911133, 0.791940987110138],
[0.591234028339386, 0.373893976211548],
[0.544341027736664, 0.451583981513977],
[0.624562978744507, 0.924192011356354],
[0.88577002286911, 0.615028977394104],
[0.551338016986847, 0.695277988910675],
[0.551980018615723, 0.704632043838501],
[0.552887976169586, 0.715808033943176],
[0.555167973041534, 0.730794012546539],
[0.569944024085999, 0.767035007476807],
[0.593203008174896, 0.685675978660583],
[0.599261999130249, 0.681069016456604],
[0.607599973678589, 0.677703022956848],
[0.631937980651855, 0.663500010967255],
[0.752032995223999, 0.601315021514893],
[0.547226011753082, 0.420395016670227],
[0.563543975353241, 0.359827995300293],
[0.583841025829315, 0.368713974952698],
[0.586614012718201, 0.692366003990173],
[0.771915018558502, 0.683578014373779],
[0.531597018241882, 0.352482974529266],
[0.588370978832245, 0.804440975189209],
[0.52079701423645, 0.442565023899078],
[0.567984998226166, 0.493479013442993],
[0.543282985687256, 0.819254994392395],
[0.655317008495331, 0.745514988899231],
[0.621008992195129, 0.574018001556396],
[0.625559985637665, 0.78031200170517],
[0.680198013782501, 0.570719003677368],
[0.64276397228241, 0.604337990283966],
[0.704662978649139, 0.621529996395111],
[0.552012026309967, 0.862591981887817],
[0.589071989059448, 0.508637011051178],
[0.685944974422455, 0.775357007980347],
[0.645735025405884, 0.812640011310577],
[0.675342977046967, 0.703978002071381],
[0.810858011245728, 0.646304965019226],
[0.72012197971344, 0.714666962623596],
[0.866151988506317, 0.682704985141754],
[0.663187026977539, 0.644596993923187],
[0.570082008838654, 0.466325998306274],
[0.544561982154846, 0.548375964164734],
[0.562758982181549, 0.558784961700439],
[0.531987011432648, 0.530140042304993],
[0.585271000862122, 0.335177004337311],
[0.622952997684479, 0.32277899980545],
[0.655896008014679, 0.320163011550903],
[0.687132000923157, 0.322345972061157],
[0.716481983661652, 0.333200991153717],
[0.758756995201111, 0.382786989212036],
[0.897013008594513, 0.468769013881683],
[0.732392013072968, 0.424547016620636],
[0.70211398601532, 0.433162987232208],
[0.66652500629425, 0.433866024017334],
[0.633504986763, 0.426087975502014],
[0.603875994682312, 0.416586995124817],
[0.579657971858978, 0.409945011138916],
[0.992439985275269, 0.480777025222778],
[0.567192018032074, 0.569419980049133],
[0.54136598110199, 0.478899002075195],
[0.526564002037048, 0.546118021011353],
[0.523913025856018, 0.563830018043518],
[0.531529009342194, 0.555056989192963],
[0.566035985946655, 0.582329034805298],
[0.51631098985672, 0.563053965568542],
[0.5174720287323, 0.577877044677734],
[0.573594987392426, 0.389806985855103],
[0.560697972774506, 0.395331978797913],
[0.549755990505219, 0.399751007556915],
[0.710287988185883, 0.368252992630005],
[0.723330020904541, 0.363372981548309],
];

65
src/handpose/box.js Normal file
View File

@ -0,0 +1,65 @@
const tf = require('@tensorflow/tfjs');
function getBoxSize(box) {
return [
Math.abs(box.endPoint[0] - box.startPoint[0]),
Math.abs(box.endPoint[1] - box.startPoint[1]),
];
}
exports.getBoxSize = getBoxSize;
function getBoxCenter(box) {
return [
box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2,
box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2,
];
}
exports.getBoxCenter = getBoxCenter;
function cutBoxFromImageAndResize(box, image, cropSize) {
const h = image.shape[1];
const w = image.shape[2];
const boxes = [[
box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h,
box.endPoint[0] / w,
]];
return tf.image.cropAndResize(image, boxes, [0], cropSize);
}
exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize;
function scaleBoxCoordinates(box, factor) {
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
const palmLandmarks = box.palmLandmarks.map((coord) => {
const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]];
return scaledCoord;
});
return { startPoint, endPoint, palmLandmarks };
}
exports.scaleBoxCoordinates = scaleBoxCoordinates;
function enlargeBox(box, factor = 1.5) {
const center = getBoxCenter(box);
const size = getBoxSize(box);
const newHalfSize = [factor * size[0] / 2, factor * size[1] / 2];
const startPoint = [center[0] - newHalfSize[0], center[1] - newHalfSize[1]];
const endPoint = [center[0] + newHalfSize[0], center[1] + newHalfSize[1]];
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
}
exports.enlargeBox = enlargeBox;
function squarifyBox(box) {
const centers = getBoxCenter(box);
const size = getBoxSize(box);
const maxEdge = Math.max(...size);
const halfSize = maxEdge / 2;
const startPoint = [centers[0] - halfSize, centers[1] - halfSize];
const endPoint = [centers[0] + halfSize, centers[1] + halfSize];
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
}
exports.squarifyBox = squarifyBox;
function shiftBox(box, shiftFactor) {
const boxSize = [
box.endPoint[0] - box.startPoint[0], box.endPoint[1] - box.startPoint[1],
];
const shiftVector = [boxSize[0] * shiftFactor[0], boxSize[1] * shiftFactor[1]];
const startPoint = [box.startPoint[0] + shiftVector[0], box.startPoint[1] + shiftVector[1]];
const endPoint = [box.endPoint[0] + shiftVector[0], box.endPoint[1] + shiftVector[1]];
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
}
exports.shiftBox = shiftBox;

107
src/handpose/hand.js Normal file
View File

@ -0,0 +1,107 @@
const tf = require('@tensorflow/tfjs');
const bounding = require('./box');
class HandDetector {
constructor(model, width, height, anchors, iouThreshold, scoreThreshold) {
this.model = model;
this.width = width;
this.height = height;
this.iouThreshold = iouThreshold;
this.scoreThreshold = scoreThreshold;
this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
this.anchorsTensor = tf.tensor2d(this.anchors);
this.inputSizeTensor = tf.tensor1d([width, height]);
this.doubleInputSizeTensor = tf.tensor1d([width * 2, height * 2]);
}
normalizeBoxes(boxes) {
return tf.tidy(() => {
const boxOffsets = tf.slice(boxes, [0, 0], [-1, 2]);
const boxSizes = tf.slice(boxes, [0, 2], [-1, 2]);
const boxCenterPoints = tf.add(tf.div(boxOffsets, this.inputSizeTensor), this.anchorsTensor);
const halfBoxSizes = tf.div(boxSizes, this.doubleInputSizeTensor);
const startPoints = tf.mul(tf.sub(boxCenterPoints, halfBoxSizes), this.inputSizeTensor);
const endPoints = tf.mul(tf.add(boxCenterPoints, halfBoxSizes), this.inputSizeTensor);
return tf.concat2d([startPoints, endPoints], 1);
});
}
normalizeLandmarks(rawPalmLandmarks, index) {
return tf.tidy(() => {
const landmarks = tf.add(tf.div(rawPalmLandmarks.reshape([-1, 7, 2]), this.inputSizeTensor), this.anchors[index]);
return tf.mul(landmarks, this.inputSizeTensor);
});
}
async getBoundingBoxes(input) {
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
let batchedPrediction;
if (tf.getBackend() === 'webgl') {
// Currently tfjs-core does not pack depthwiseConv because it fails for
// very large inputs (https://github.com/tensorflow/tfjs/issues/1652).
// TODO(annxingyuan): call tf.enablePackedDepthwiseConv when available
// (https://github.com/tensorflow/tfjs/issues/2821)
const savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
// The model returns a tensor with the following shape:
// [1 (batch), 2944 (anchor points), 19 (data for each anchor)]
batchedPrediction = this.model.predict(normalizedInput);
tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
} else {
batchedPrediction = this.model.predict(normalizedInput);
}
const prediction = batchedPrediction.squeeze();
// Regression score for each anchor point.
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
// Bounding box for each anchor point.
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
const boxes = this.normalizeBoxes(rawBoxes);
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, 1, this.iouThreshold, this.scoreThreshold);
const boxesWithHands = await boxesWithHandsTensor.array();
const toDispose = [
normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction,
boxes, rawBoxes, scores,
];
if (boxesWithHands.length === 0) {
toDispose.forEach((tensor) => tensor.dispose());
return null;
}
const boxIndex = boxesWithHands[0];
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([
-1, 2,
]));
toDispose.push(rawPalmLandmarks);
toDispose.forEach((tensor) => tensor.dispose());
return { boxes: matchingBox, palmLandmarks };
}
/**
* Returns a Box identifying the bounding box of a hand within the image.
* Returns null if there is no hand in the image.
*
* @param input The image to classify.
*/
async estimateHandBounds(input) {
const inputHeight = input.shape[1];
const inputWidth = input.shape[2];
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
const prediction = await this.getBoundingBoxes(image);
if (prediction === null) {
image.dispose();
return null;
}
// Calling arraySync on both boxes and palmLandmarks because the tensors are
// very small so it's not worth calling await array().
const boundingBoxes = prediction.boxes.arraySync();
const startPoint = boundingBoxes[0].slice(0, 2);
const endPoint = boundingBoxes[0].slice(2, 4);
const palmLandmarks = prediction.palmLandmarks.arraySync();
image.dispose();
prediction.boxes.dispose();
prediction.palmLandmarks.dispose();
return bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]);
}
}
exports.HandDetector = HandDetector;

93
src/handpose/index.js Normal file
View File

@ -0,0 +1,93 @@
const tf = require('@tensorflow/tfjs');
const hand = require('./hand');
const keypoints = require('./keypoints');
const pipe = require('./pipeline');
// Load the bounding box detector model.
async function loadHandDetectorModel(url) {
return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') });
}
// Load the mesh detector model.
async function loadHandPoseModel(url) {
return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') });
}
// In single shot detector pipelines, the output space is discretized into a set
// of bounding boxes, each of which is assigned a score during prediction. The
// anchors define the coordinates of these boxes.
async function loadAnchors(url) {
return tf.util
.fetch(url)
.then((d) => d.json());
}
/**
* Load handpose.
*
* @param config A configuration object with the following properties:
* - `maxContinuousChecks` How many frames to go without running the bounding
* box detector. Defaults to infinity. Set to a lower value if you want a safety
* net in case the mesh detector produces consistently flawed predictions.
* - `detectionConfidence` Threshold for discarding a prediction. Defaults to
* 0.8.
* - `iouThreshold` A float representing the threshold for deciding whether
* boxes overlap too much in non-maximum suppression. Must be between [0, 1].
* Defaults to 0.3.
* - `scoreThreshold` A threshold for deciding when to remove boxes based
* on score in non-maximum suppression. Defaults to 0.75.
*/
async function load(config) {
const [ANCHORS, handDetectorModel, handPoseModel] = await Promise.all([
loadAnchors(config.detector.anchors),
loadHandDetectorModel(config.detector.modelPath),
loadHandPoseModel(config.skeleton.modelPath),
]);
const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence);
// eslint-disable-next-line no-use-before-define
const handpose = new HandPose(pipeline);
return handpose;
}
exports.load = load;
class HandPose {
constructor(pipeline) {
this.pipeline = pipeline;
}
static getAnnotations() {
return keypoints.MESH_ANNOTATIONS;
}
/**
* Finds hands in the input image.
*
* @param input The image to classify. Can be a tensor, DOM element image,
* video, or canvas.
* @param flipHorizontal Whether to flip the hand keypoints horizontally.
* Should be true for videos that are flipped by default (e.g. webcams).
*/
async estimateHands(input, config) {
const image = tf.tidy(() => {
if (!(input instanceof tf.Tensor)) {
input = tf.browser.fromPixels(input);
}
return input.toFloat().expandDims(0);
});
const prediction = await this.pipeline.estimateHand(image, config);
image.dispose();
if (!prediction) return [];
const annotations = {};
for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
}
return [{
confidence: prediction.confidence || 0,
box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
landmarks: prediction.landmarks,
annotations,
}];
}
}
exports.HandPose = HandPose;

127
src/image.js Normal file
View File

@ -0,0 +1,127 @@
const defaultFont = 'small-caps 1rem "Segoe UI"';
function clear(canvas) {
if (canvas) canvas.getContext('2d').clearRect(0, 0, canvas.width, canvas.height);
}
function crop(image, x, y, width, height, { color = 'white', title = null, font = null }) {
const canvas = new OffscreenCanvas(width, height);
const ctx = canvas.getContext('2d');
ctx.drawImage(image, x, y, width, height, 0, 0, canvas.width, canvas.height);
ctx.fillStyle = color;
ctx.font = font || defaultFont;
if (title) ctx.fillText(title, 2, 16, canvas.width - 4);
return canvas;
}
function point({ canvas = null, x = 0, y = 0, color = 'white', radius = 2, title = null, font = null }) {
if (!canvas) return;
const ctx = canvas.getContext('2d');
ctx.fillStyle = color;
ctx.beginPath();
ctx.arc(x, y, radius, 0, 2 * Math.PI);
ctx.fill();
ctx.font = font || defaultFont;
if (title) ctx.fillText(title, x + 10, y + 4);
}
function rect({ canvas = null, x = 0, y = 0, width = 0, height = 0, radius = 8, lineWidth = 2, color = 'white', title = null, font = null }) {
if (!canvas) return;
const ctx = canvas.getContext('2d');
ctx.lineWidth = lineWidth;
ctx.beginPath();
ctx.moveTo(x + radius, y);
ctx.lineTo(x + width - radius, y);
ctx.quadraticCurveTo(x + width, y, x + width, y + radius);
ctx.lineTo(x + width, y + height - radius);
ctx.quadraticCurveTo(x + width, y + height, x + width - radius, y + height);
ctx.lineTo(x + radius, y + height);
ctx.quadraticCurveTo(x, y + height, x, y + height - radius);
ctx.lineTo(x, y + radius);
ctx.quadraticCurveTo(x, y, x + radius, y);
ctx.closePath();
ctx.strokeStyle = color;
ctx.stroke();
ctx.lineWidth = 1;
ctx.fillStyle = color;
ctx.font = font || defaultFont;
if (title) ctx.fillText(title, x + 4, y + 16);
}
function line({ points = [], canvas = null, lineWidth = 2, color = 'white', title = null, font = null }) {
if (!canvas) return;
if (points.length < 2) return;
const ctx = canvas.getContext('2d');
ctx.lineWidth = lineWidth;
ctx.beginPath();
ctx.moveTo(points[0][0], points[0][1]);
for (const pt of points) ctx.lineTo(pt[0], pt[1]);
ctx.strokeStyle = color;
ctx.fillStyle = color;
ctx.stroke();
ctx.lineWidth = 1;
ctx.font = font || defaultFont;
if (title) ctx.fillText(title, points[0][0] + 4, points[0][1] + 16);
}
function spline({ points = [], canvas = null, tension = 0.5, lineWidth = 2, color = 'white', title = null, font = null }) {
if (!canvas) return;
if (points.length < 2) return;
const va = (arr, i, j) => [arr[2 * j] - arr[2 * i], arr[2 * j + 1] - arr[2 * i + 1]];
const distance = (arr, i, j) => Math.sqrt(((arr[2 * i] - arr[2 * j]) ** 2) + ((arr[2 * i + 1] - arr[2 * j + 1]) ** 2));
// eslint-disable-next-line no-unused-vars
function ctlpts(x1, y1, x2, y2, x3, y3) {
// eslint-disable-next-line prefer-rest-params
const v = va(arguments, 0, 2);
// eslint-disable-next-line prefer-rest-params
const d01 = distance(arguments, 0, 1);
// eslint-disable-next-line prefer-rest-params
const d12 = distance(arguments, 1, 2);
const d012 = d01 + d12;
return [
x2 - v[0] * tension * d01 / d012, y2 - v[1] * tension * d01 / d012,
x2 + v[0] * tension * d12 / d012, y2 + v[1] * tension * d12 / d012,
];
}
const pts = [];
for (const pt of points) {
pts.push(pt[0]);
pts.push(pt[1]);
}
let cps = [];
for (let i = 0; i < pts.length - 2; i += 1) {
cps = cps.concat(ctlpts(pts[2 * i + 0], pts[2 * i + 1], pts[2 * i + 2], pts[2 * i + 3], pts[2 * i + 4], pts[2 * i + 5]));
}
const ctx = canvas.getContext('2d');
ctx.lineWidth = lineWidth;
ctx.strokeStyle = color;
ctx.fillStyle = color;
if (points.length === 2) {
ctx.beginPath();
ctx.moveTo(pts[0], pts[1]);
ctx.lineTo(pts[2], pts[3]);
} else {
ctx.beginPath();
ctx.moveTo(pts[0], pts[1]);
// first segment is a quadratic
ctx.quadraticCurveTo(cps[0], cps[1], pts[2], pts[3]);
// for all middle points, connect with bezier
let i;
for (i = 2; i < ((pts.length / 2) - 1); i += 1) {
ctx.bezierCurveTo(cps[(2 * (i - 1) - 1) * 2], cps[(2 * (i - 1) - 1) * 2 + 1], cps[(2 * (i - 1)) * 2], cps[(2 * (i - 1)) * 2 + 1], pts[i * 2], pts[i * 2 + 1]);
}
// last segment is a quadratic
ctx.quadraticCurveTo(cps[(2 * (i - 1) - 1) * 2], cps[(2 * (i - 1) - 1) * 2 + 1], pts[i * 2], pts[i * 2 + 1]);
}
ctx.stroke();
ctx.lineWidth = 1;
ctx.font = font || defaultFont;
if (title) ctx.fillText(title, points[0][0] + 4, points[0][1] + 16);
}
exports.crop = crop;
exports.rect = rect;
exports.point = point;
exports.line = line;
exports.spline = spline;
exports.clear = clear;

81
src/index.js Normal file
View File

@ -0,0 +1,81 @@
const facemesh = require('./facemesh/index.js');
const ssrnet = require('./ssrnet/index.js');
const posenet = require('./posenet/index.js');
const handpose = require('./handpose/index.js');
// const image = require('./image.js');
// const triangulation = require('./triangulation.js').default;
const defaults = require('./config.js').default;
const models = {
facemesh: null,
blazeface: null,
ssrnet: null,
iris: null,
};
function mergeDeep(...objects) {
const isObject = (obj) => obj && typeof obj === 'object';
return objects.reduce((prev, obj) => {
Object.keys(obj).forEach((key) => {
const pVal = prev[key];
const oVal = obj[key];
if (Array.isArray(pVal) && Array.isArray(oVal)) {
prev[key] = pVal.concat(...oVal);
} else if (isObject(pVal) && isObject(oVal)) {
prev[key] = mergeDeep(pVal, oVal);
} else {
prev[key] = oVal;
}
});
return prev;
}, {});
}
async function detect(input, userConfig) {
const config = mergeDeep(defaults, userConfig);
// run posenet
let poseRes = [];
if (config.body.enabled) {
if (!models.posenet) models.posenet = await posenet.load(config.body);
poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
}
// run handpose
let handRes = [];
if (config.hand.enabled) {
if (!models.handpose) models.handpose = await handpose.load(config.hand);
handRes = await models.handpose.estimateHands(input, config.hand);
}
// run facemesh, includes blazeface and iris
const faceRes = [];
if (config.face.enabled) {
if (!models.facemesh) models.facemesh = await facemesh.load(config.face);
const faces = await models.facemesh.estimateFaces(input, config.face);
for (const face of faces) {
// run ssr-net age & gender, inherits face from blazeface
const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
// iris: array[ bottom, left, top, right, center ]
const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0])
: 0;
faceRes.push({
confidence: face.confidence,
box: face.box,
mesh: face.mesh,
annotations: face.annotations,
age: ssrdata.age,
gender: ssrdata.gender,
iris: (iris !== 0) ? Math.trunc(100 * 11.7 / iris) / 100 : 0,
});
}
}
// combine results
return { face: faceRes, body: poseRes, hand: handRes };
}
exports.detect = detect;
exports.defaults = defaults;
exports.models = models;

22
src/posenet/index.js Normal file
View File

@ -0,0 +1,22 @@
const modelMobileNet = require('./modelMobileNet');
const modelPoseNet = require('./modelPoseNet');
const decodeMultiple = require('./decodeMultiple');
const decodeSingle = require('./decodeSingle');
const keypoints = require('./keypoints');
const util = require('./util');
exports.load = modelPoseNet.load;
exports.PoseNet = modelPoseNet.PoseNet;
exports.MobileNet = modelMobileNet.MobileNet;
exports.decodeMultiplePoses = decodeMultiple.decodeMultiplePoses;
exports.decodeSinglePose = decodeSingle.decodeSinglePose;
exports.partChannels = keypoints.partChannels;
exports.partIds = keypoints.partIds;
exports.partNames = keypoints.partNames;
exports.poseChain = keypoints.poseChain;
exports.getAdjacentKeyPoints = util.getAdjacentKeyPoints;
exports.getBoundingBox = util.getBoundingBox;
exports.getBoundingBoxPoints = util.getBoundingBoxPoints;
exports.scaleAndFlipPoses = util.scaleAndFlipPoses;
exports.scalePose = util.scalePose;