mirror of https://github.com/vladmandic/human
implemented simple gesture recognition
parent
3cec6710d4
commit
d94aa0362c
18
README.md
18
README.md
|
@ -1,6 +1,6 @@
|
|||
# Human Library
|
||||
|
||||
## 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction & Emotion Prediction
|
||||
## 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction, Emotion Prediction & Gesture Recognition
|
||||
|
||||
- [**Documentation**](https://github.com/vladmandic/human#readme)
|
||||
- [**Code Repository**](https://github.com/vladmandic/human)
|
||||
|
@ -361,6 +361,11 @@ config = {
|
|||
modelPath: '../models/handskeleton/model.json',
|
||||
},
|
||||
},
|
||||
gesture: {
|
||||
enabled: true, // enable simple gesture recognition
|
||||
// takes processed data and based on geometry detects simple gestures
|
||||
// easily expandable via code, see `src/gesture.js`
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
|
@ -408,10 +413,17 @@ result = {
|
|||
emotion, // <string> 'angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'
|
||||
}
|
||||
],
|
||||
gesture: // object containing parsed gestures
|
||||
{
|
||||
face, // <array of string>
|
||||
body, // <array of string>
|
||||
hand, // <array of string>
|
||||
}
|
||||
performance = { // performance data of last execution for each module measuredin miliseconds
|
||||
backend, // time to initialize tf backend, valid only during backend startup
|
||||
load, // time to load models, valid only during model load
|
||||
image, // time for image processing
|
||||
gesture, // gesture analysis time
|
||||
body, // model time
|
||||
hand, // model time
|
||||
face, // model time
|
||||
|
@ -484,6 +496,7 @@ For example, it can perform multiple face detections at 60+ FPS, but drops to ~1
|
|||
|
||||
- Enabled all: 15 FPS
|
||||
- Image filters: 80 FPS (standalone)
|
||||
- Gesture: 80 FPS (standalone)
|
||||
- Face Detect: 80 FPS (standalone)
|
||||
- Face Geometry: 30 FPS (includes face detect)
|
||||
- Face Iris: 30 FPS (includes face detect and face geometry)
|
||||
|
@ -495,8 +508,9 @@ For example, it can perform multiple face detections at 60+ FPS, but drops to ~1
|
|||
|
||||
### Performance per module on a **smartphone** with Snapdragon 855 on a FullHD input:
|
||||
|
||||
- Enabled all: 3 FPS
|
||||
- Enabled all: 5 FPS
|
||||
- Image filters: 30 FPS (standalone)
|
||||
- Gesture: 30 FPS (standalone)
|
||||
- Face Detect: 20 FPS (standalone)
|
||||
- Face Geometry: 10 FPS (includes face detect)
|
||||
- Face Iris: 5 FPS (includes face detect and face geometry)
|
||||
|
|
|
@ -41,6 +41,9 @@ export default {
|
|||
polaroid: false, // image polaroid camera effect
|
||||
pixelate: 0, // range: 0 (no pixelate) to N (number of pixels to pixelate)
|
||||
},
|
||||
gesture: {
|
||||
enabled: true, // enable simple gesture recognition
|
||||
},
|
||||
face: {
|
||||
enabled: true, // controls if specified modul is enabled
|
||||
// face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
|
||||
|
|
|
@ -25,6 +25,8 @@ const ui = {
|
|||
useDepth: true,
|
||||
console: true,
|
||||
maxFrames: 10,
|
||||
modelsPreload: true,
|
||||
modelsWarmup: true,
|
||||
};
|
||||
|
||||
// configuration overrides
|
||||
|
@ -62,6 +64,7 @@ const config = {
|
|||
},
|
||||
body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
|
||||
hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
|
||||
gesture: { enabled: true },
|
||||
};
|
||||
|
||||
// global variables
|
||||
|
@ -123,15 +126,17 @@ function drawResults(input, result, canvas) {
|
|||
draw.face(result.face, canvas, ui, human.facemesh.triangulation);
|
||||
draw.body(result.body, canvas, ui);
|
||||
draw.hand(result.hand, canvas, ui);
|
||||
draw.gesture(result.gesture, canvas, ui);
|
||||
// update log
|
||||
const engine = human.tf.engine();
|
||||
const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
|
||||
const memory = `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
|
||||
const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : '';
|
||||
const avg = Math.trunc(10 * fps.reduce((a, b) => a + b) / fps.length) / 10;
|
||||
document.getElementById('log').innerText = `
|
||||
video: ${camera.name} | facing: ${camera.facing} | resolution: ${camera.width} x ${camera.height} ${processing}
|
||||
backend: ${human.tf.getBackend()} | ${memory} | object size: ${(str(result)).length.toLocaleString()} bytes
|
||||
performance: ${str(result.performance)}
|
||||
performance: ${str(result.performance)} FPS:${avg}
|
||||
`;
|
||||
}
|
||||
|
||||
|
@ -159,7 +164,13 @@ async function setupCamera() {
|
|||
try {
|
||||
stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: false,
|
||||
video: { facingMode: (ui.facing ? 'user' : 'environment'), width: window.innerWidth, height: window.innerHeight, resizeMode: 'none' },
|
||||
video: {
|
||||
facingMode: (ui.facing ? 'user' : 'environment'),
|
||||
width: window.innerWidth,
|
||||
height: window.innerHeight,
|
||||
resizeMode: 'none',
|
||||
contrast: 75,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
output.innerText += '\nCamera permission denied';
|
||||
|
@ -267,7 +278,7 @@ async function detectVideo() {
|
|||
document.getElementById('canvas').style.display = 'block';
|
||||
const video = document.getElementById('video');
|
||||
const canvas = document.getElementById('canvas');
|
||||
ui.baseFont = ui.baseFontProto.replace(/{size}/, '1.2rem');
|
||||
ui.baseFont = ui.baseFontProto.replace(/{size}/, '1.3rem');
|
||||
ui.baseLineHeight = ui.baseLineHeightProto;
|
||||
if ((video.srcObject !== null) && !video.paused) {
|
||||
document.getElementById('play').style.display = 'block';
|
||||
|
@ -286,7 +297,7 @@ async function detectVideo() {
|
|||
async function detectSampleImages() {
|
||||
document.getElementById('play').style.display = 'none';
|
||||
config.videoOptimized = false;
|
||||
ui.baseFont = ui.baseFontProto.replace(/{size}/, `${1.2 * ui.columns}rem`);
|
||||
ui.baseFont = ui.baseFontProto.replace(/{size}/, `${1.3 * ui.columns}rem`);
|
||||
ui.baseLineHeight = ui.baseLineHeightProto * ui.columns;
|
||||
document.getElementById('canvas').style.display = 'none';
|
||||
document.getElementById('samples-container').style.display = 'block';
|
||||
|
@ -318,6 +329,7 @@ function setupMenu() {
|
|||
menu.addBool('Face Emotion', config.face.emotion, 'enabled');
|
||||
menu.addBool('Body Pose', config.body, 'enabled');
|
||||
menu.addBool('Hand Pose', config.hand, 'enabled');
|
||||
menu.addBool('Gesture Analysis', config.gesture, 'enabled');
|
||||
|
||||
menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
|
||||
menu.addLabel('Model Parameters');
|
||||
|
@ -383,11 +395,15 @@ async function main() {
|
|||
setupMenu();
|
||||
document.getElementById('log').innerText = `Human: version ${human.version} TensorFlow/JS: version ${human.tf.version_core}`;
|
||||
// this is not required, just pre-warms the library
|
||||
status('loading');
|
||||
await human.load();
|
||||
status('initializing');
|
||||
const warmup = new ImageData(50, 50);
|
||||
await human.detect(warmup);
|
||||
if (ui.modelsPreload) {
|
||||
status('loading');
|
||||
await human.load();
|
||||
}
|
||||
if (ui.modelsWarmup) {
|
||||
status('initializing');
|
||||
const warmup = new ImageData(50, 50);
|
||||
await human.detect(warmup);
|
||||
}
|
||||
status('human: ready');
|
||||
document.getElementById('loader').style.display = 'none';
|
||||
document.getElementById('play').style.display = 'block';
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
.status { position: absolute; width: 100vw; top: 100px; text-align: center; font-size: 4rem; font-weight: 100; text-shadow: 2px 2px darkslategrey; }
|
||||
.thumbnail { margin: 8px; box-shadow: 0 0 4px 4px dimgrey; }
|
||||
.thumbnail:hover { box-shadow: 0 0 8px 8px dimgrey; filter: grayscale(1); }
|
||||
.log { position: fixed; bottom: 0; }
|
||||
.log { position: fixed; bottom: 0; margin: 0.4rem; }
|
||||
.samples-container { display: flex; flex-wrap: wrap; }
|
||||
.video { display: none; }
|
||||
.canvas { margin: 0 auto; width: 100%; }
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "@vladmandic/human",
|
||||
"version": "0.6.6",
|
||||
"description": "human: 3D Face Detection, Iris Tracking and Age & Gender Prediction",
|
||||
"description": "human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction, Emotion Prediction & Gesture Recognition",
|
||||
"sideEffects": false,
|
||||
"main": "dist/human.node.js",
|
||||
"module": "dist/human.esm.js",
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
exports.body = (res) => {
|
||||
if (!res) return [];
|
||||
const gestures = [];
|
||||
for (const pose of res) {
|
||||
// raising hands
|
||||
const leftWrist = pose.keypoints.find((a) => (a.part === 'leftWrist'));
|
||||
const rightWrist = pose.keypoints.find((a) => (a.part === 'rightWrist'));
|
||||
const nose = pose.keypoints.find((a) => (a.part === 'nose'));
|
||||
if (nose && leftWrist && rightWrist && (leftWrist.position.y < nose.position.y) && (rightWrist.position.y < nose.position.y)) gestures.push('i give up');
|
||||
else if (nose && leftWrist && (leftWrist.position.y < nose.position.y)) gestures.push('raise left hand');
|
||||
else if (nose && rightWrist && (rightWrist.position.y < nose.position.y)) gestures.push('raise right hand');
|
||||
|
||||
// leaning
|
||||
const leftShoulder = pose.keypoints.find((a) => (a.part === 'leftShoulder'));
|
||||
const rightShoulder = pose.keypoints.find((a) => (a.part === 'rightShoulder'));
|
||||
if (leftShoulder && rightShoulder) gestures.push(`leaning ${(leftShoulder.position.y > rightShoulder.position.y) ? 'left' : 'right'}`);
|
||||
}
|
||||
return gestures;
|
||||
};
|
||||
|
||||
exports.face = (res) => {
|
||||
if (!res) return [];
|
||||
const gestures = [];
|
||||
for (const face of res) {
|
||||
if (face.annotations['rightCheek'] && face.annotations['leftCheek'] && (face.annotations['rightCheek'].length > 0) && (face.annotations['leftCheek'].length > 0)) {
|
||||
gestures.push(`facing ${((face.annotations['rightCheek'][0][2] > 0) || (face.annotations['leftCheek'][0][2] < 0)) ? 'right' : 'left'}`);
|
||||
}
|
||||
}
|
||||
return gestures;
|
||||
};
|
||||
|
||||
exports.hand = (res) => {
|
||||
if (!res) return [];
|
||||
const gestures = [];
|
||||
for (const hand of res) {
|
||||
const fingers = [];
|
||||
for (const [finger, pos] of Object.entries(hand['annotations'])) {
|
||||
if (finger !== 'palmBase') fingers.push({ name: finger.toLowerCase(), position: pos[0] }); // get tip of each finger
|
||||
}
|
||||
const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a));
|
||||
const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a));
|
||||
gestures.push(`${closest.name} forward ${highest.name} up`);
|
||||
}
|
||||
return gestures;
|
||||
};
|
|
@ -22,8 +22,8 @@ const util = require('./util');
|
|||
const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8;
|
||||
const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
|
||||
const PALM_BOX_ENLARGE_FACTOR = 3;
|
||||
const HAND_BOX_SHIFT_VECTOR = [0, -0.1];
|
||||
const HAND_BOX_ENLARGE_FACTOR = 1.65;
|
||||
const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; // move detected hand box by x,y to ease landmark detection
|
||||
const HAND_BOX_ENLARGE_FACTOR = 1.65; // increased from model default 1.65;
|
||||
const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2];
|
||||
const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0;
|
||||
const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
|
||||
|
|
115
src/human.js
115
src/human.js
|
@ -4,7 +4,8 @@ const ssrnet = require('./ssrnet/ssrnet.js');
|
|||
const emotion = require('./emotion/emotion.js');
|
||||
const posenet = require('./posenet/posenet.js');
|
||||
const handpose = require('./handpose/handpose.js');
|
||||
const fxImage = require('./imagefx.js');
|
||||
const gesture = require('./gesture.js');
|
||||
const image = require('./image.js');
|
||||
const profile = require('./profile.js');
|
||||
const defaults = require('../config.js').default;
|
||||
const app = require('../package.json');
|
||||
|
@ -52,9 +53,6 @@ class Human {
|
|||
this.analyzeMemoryLeaks = false;
|
||||
this.checkSanity = false;
|
||||
this.firstRun = true;
|
||||
// internal temp canvases
|
||||
this.inCanvas = null;
|
||||
this.outCanvas = null;
|
||||
// object that contains all initialized models
|
||||
this.models = {
|
||||
facemesh: null,
|
||||
|
@ -94,6 +92,7 @@ class Human {
|
|||
if (leaked !== 0) this.log(...msg, leaked);
|
||||
}
|
||||
|
||||
// quick sanity check on inputs
|
||||
sanity(input) {
|
||||
if (!this.checkSanity) return null;
|
||||
if (!input) return 'input is not defined';
|
||||
|
@ -108,10 +107,12 @@ class Human {
|
|||
return null;
|
||||
}
|
||||
|
||||
// preload models, not explicitly required as it's done automatically on first use
|
||||
async load(userConfig) {
|
||||
if (userConfig) this.config = mergeDeep(defaults, userConfig);
|
||||
|
||||
if (this.firstRun) {
|
||||
this.checkBackend(true);
|
||||
this.log(`version: ${this.version} TensorFlow/JS version: ${tf.version_core}`);
|
||||
this.log('configuration:', this.config);
|
||||
this.log('flags:', tf.ENV.flags);
|
||||
|
@ -144,8 +145,9 @@ class Human {
|
|||
}
|
||||
}
|
||||
|
||||
async checkBackend() {
|
||||
if (tf.getBackend() !== this.config.backend) {
|
||||
// check if backend needs initialization if it changed
|
||||
async checkBackend(force) {
|
||||
if (force || (tf.getBackend() !== this.config.backend)) {
|
||||
this.state = 'backend';
|
||||
/* force backend reload
|
||||
if (this.config.backend in tf.engine().registry) {
|
||||
|
@ -156,7 +158,7 @@ class Human {
|
|||
this.log('Backend not registred:', this.config.backend);
|
||||
}
|
||||
*/
|
||||
this.log('Setting backend:', this.config.backend);
|
||||
this.log('setting backend:', this.config.backend);
|
||||
await tf.setBackend(this.config.backend);
|
||||
tf.enableProdMode();
|
||||
/* debug mode is really too mcuh
|
||||
|
@ -167,84 +169,20 @@ class Human {
|
|||
this.log('Changing WebGL: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate);
|
||||
tf.ENV.set('WEBGL_DELETE_TEXTURE_THRESHOLD', this.config.deallocate ? 0 : -1);
|
||||
}
|
||||
tf.ENV.set('WEBGL_CPU_FORWARD', true);
|
||||
// tf.ENV.set('WEBGL_CPU_FORWARD', true);
|
||||
// tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true);
|
||||
// tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
|
||||
await tf.ready();
|
||||
}
|
||||
}
|
||||
|
||||
tfImage(input) {
|
||||
let tensor;
|
||||
if (input instanceof tf.Tensor) {
|
||||
tensor = tf.clone(input);
|
||||
} else {
|
||||
const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
|
||||
const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
|
||||
let targetWidth = originalWidth;
|
||||
let targetHeight = originalHeight;
|
||||
if (this.config.filter.width > 0) targetWidth = this.config.filter.width;
|
||||
else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight);
|
||||
if (this.config.filter.height > 0) targetHeight = this.config.filter.height;
|
||||
else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth);
|
||||
if (!this.inCanvas || (this.inCanvas.width !== targetWidth) || (this.inCanvas.height !== targetHeight)) {
|
||||
this.inCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
|
||||
if (this.inCanvas.width !== targetWidth) this.inCanvas.width = targetWidth;
|
||||
if (this.inCanvas.height !== targetHeight) this.inCanvas.height = targetHeight;
|
||||
}
|
||||
const ctx = this.inCanvas.getContext('2d');
|
||||
if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
|
||||
else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, this.inCanvas.width, this.inCanvas.height);
|
||||
if (this.config.filter.enabled) {
|
||||
if (!this.fx || !this.outCanvas || (this.inCanvas.width !== this.outCanvas.width) || (this.inCanvas.height !== this.outCanvas.height)) {
|
||||
this.outCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(this.inCanvas.width, this.inCanvas.height) : document.createElement('canvas');
|
||||
if (this.outCanvas.width !== this.inCanvas.width) this.outCanvas.width = this.inCanvas.width;
|
||||
if (this.outCanvas.height !== this.inCanvas.height) this.outCanvas.height = this.inCanvas.height;
|
||||
this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas({ canvas: this.outCanvas }) : null;
|
||||
}
|
||||
this.fx.reset();
|
||||
this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled
|
||||
if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast);
|
||||
if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness);
|
||||
if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur);
|
||||
if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation);
|
||||
if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue);
|
||||
if (this.config.filter.negative) this.fx.addFilter('negative');
|
||||
if (this.config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (this.config.filter.vintage) this.fx.addFilter('brownie');
|
||||
if (this.config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome');
|
||||
if (this.config.filter.technicolor) this.fx.addFilter('technicolor');
|
||||
if (this.config.filter.polaroid) this.fx.addFilter('polaroid');
|
||||
if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate);
|
||||
this.fx.apply(this.inCanvas);
|
||||
}
|
||||
if (!this.outCanvas) this.outCanvas = this.inCanvas;
|
||||
let pixels;
|
||||
if ((this.config.backend === 'webgl') || (this.outCanvas instanceof ImageData)) {
|
||||
// tf kernel-optimized method to get imagedata, also if input is imagedata, just use it
|
||||
pixels = tf.browser.fromPixels(this.outCanvas);
|
||||
} else {
|
||||
// cpu and wasm kernel does not implement efficient fromPixels method nor we can use canvas as-is, so we do a silly one more canvas
|
||||
const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
|
||||
tempCanvas.width = targetWidth;
|
||||
tempCanvas.height = targetHeight;
|
||||
const tempCtx = tempCanvas.getContext('2d');
|
||||
tempCtx.drawImage(this.outCanvas, 0, 0);
|
||||
const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
|
||||
pixels = tf.browser.fromPixels(data);
|
||||
}
|
||||
const casted = pixels.toFloat();
|
||||
tensor = casted.expandDims(0);
|
||||
pixels.dispose();
|
||||
casted.dispose();
|
||||
}
|
||||
return { tensor, canvas: this.config.filter.return ? this.outCanvas : null };
|
||||
}
|
||||
|
||||
// main detect function
|
||||
async detect(input, userConfig = {}) {
|
||||
this.state = 'config';
|
||||
const perf = {};
|
||||
let timeStamp;
|
||||
|
||||
// update configuration
|
||||
this.config = mergeDeep(defaults, userConfig);
|
||||
if (!this.config.videoOptimized) this.config = mergeDeep(this.config, override);
|
||||
|
||||
|
@ -256,6 +194,7 @@ class Human {
|
|||
return { error };
|
||||
}
|
||||
|
||||
// detection happens inside a promise
|
||||
// eslint-disable-next-line no-async-promise-executor
|
||||
return new Promise(async (resolve) => {
|
||||
let poseRes;
|
||||
|
@ -281,9 +220,8 @@ class Human {
|
|||
this.analyze('Start Detect:');
|
||||
|
||||
timeStamp = now();
|
||||
const image = this.tfImage(input);
|
||||
const process = image.process(input, this.config);
|
||||
perf.image = Math.trunc(now() - timeStamp);
|
||||
const imageTensor = image.tensor;
|
||||
|
||||
// run facemesh, includes blazeface and iris
|
||||
const faceRes = [];
|
||||
|
@ -291,7 +229,7 @@ class Human {
|
|||
this.state = 'run:face';
|
||||
timeStamp = now();
|
||||
this.analyze('Start FaceMesh:');
|
||||
const faces = await this.models.facemesh.estimateFaces(imageTensor, this.config.face);
|
||||
const faces = await this.models.facemesh.estimateFaces(process.tensor, this.config.face);
|
||||
perf.face = Math.trunc(now() - timeStamp);
|
||||
for (const face of faces) {
|
||||
// is something went wrong, skip the face
|
||||
|
@ -334,38 +272,45 @@ class Human {
|
|||
|
||||
// run posenet
|
||||
if (this.config.async) {
|
||||
poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(process.tensor, this.config.body) : [];
|
||||
} else {
|
||||
this.state = 'run:body';
|
||||
timeStamp = now();
|
||||
this.analyze('Start PoseNet');
|
||||
poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(process.tensor, this.config.body) : [];
|
||||
this.analyze('End PoseNet:');
|
||||
perf.body = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
// run handpose
|
||||
if (this.config.async) {
|
||||
handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(process.tensor, this.config.hand) : [];
|
||||
} else {
|
||||
this.state = 'run:hand';
|
||||
timeStamp = now();
|
||||
this.analyze('Start HandPose:');
|
||||
handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(process.tensor, this.config.hand) : [];
|
||||
this.analyze('End HandPose:');
|
||||
perf.hand = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
if (this.config.async) [poseRes, handRes] = await Promise.all([poseRes, handRes]);
|
||||
|
||||
imageTensor.dispose();
|
||||
process.tensor.dispose();
|
||||
this.state = 'idle';
|
||||
|
||||
if (this.config.scoped) tf.engine().endScope();
|
||||
this.analyze('End Scope:');
|
||||
|
||||
let gestureRes = [];
|
||||
if (this.config.gesture.enabled) {
|
||||
timeStamp = now();
|
||||
gestureRes = { body: gesture.body(poseRes), hand: gesture.hand(handRes), face: gesture.face(faceRes) };
|
||||
perf.gesture = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
perf.total = Math.trunc(now() - timeStart);
|
||||
resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf, canvas: image.canvas });
|
||||
resolve({ face: faceRes, body: poseRes, hand: handRes, gesture: gestureRes, performance: perf, canvas: process.canvas });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
const tf = require('@tensorflow/tfjs');
|
||||
const fxImage = require('./imagefx.js');
|
||||
|
||||
// internal temp canvases
|
||||
let inCanvas = null;
|
||||
let outCanvas = null;
|
||||
|
||||
// process input image and return tensor
|
||||
// input can be tensor, imagedata, htmlimageelement, htmlvideoelement
|
||||
// input is resized and run through imagefx filter
|
||||
function process(input, config) {
|
||||
let tensor;
|
||||
if (input instanceof tf.Tensor) {
|
||||
tensor = tf.clone(input);
|
||||
} else {
|
||||
const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
|
||||
const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
|
||||
let targetWidth = originalWidth;
|
||||
let targetHeight = originalHeight;
|
||||
if (config.filter.width > 0) targetWidth = config.filter.width;
|
||||
else if (config.filter.height > 0) targetWidth = originalWidth * (config.filter.height / originalHeight);
|
||||
if (config.filter.height > 0) targetHeight = config.filter.height;
|
||||
else if (config.filter.width > 0) targetHeight = originalHeight * (config.filter.width / originalWidth);
|
||||
if (!inCanvas || (inCanvas.width !== targetWidth) || (inCanvas.height !== targetHeight)) {
|
||||
inCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
|
||||
if (inCanvas.width !== targetWidth) inCanvas.width = targetWidth;
|
||||
if (inCanvas.height !== targetHeight) inCanvas.height = targetHeight;
|
||||
}
|
||||
const ctx = inCanvas.getContext('2d');
|
||||
if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
|
||||
else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas.width, inCanvas.height);
|
||||
if (config.filter.enabled) {
|
||||
if (!this.fx || !outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas.height !== outCanvas.height)) {
|
||||
outCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(inCanvas.width, inCanvas.height) : document.createElement('canvas');
|
||||
if (outCanvas.width !== inCanvas.width) outCanvas.width = inCanvas.width;
|
||||
if (outCanvas.height !== inCanvas.height) outCanvas.height = inCanvas.height;
|
||||
this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas({ canvas: outCanvas }) : null;
|
||||
}
|
||||
this.fx.reset();
|
||||
this.fx.addFilter('brightness', config.filter.brightness); // must have at least one filter enabled
|
||||
if (config.filter.contrast !== 0) this.fx.addFilter('contrast', config.filter.contrast);
|
||||
if (config.filter.sharpness !== 0) this.fx.addFilter('sharpen', config.filter.sharpness);
|
||||
if (config.filter.blur !== 0) this.fx.addFilter('blur', config.filter.blur);
|
||||
if (config.filter.saturation !== 0) this.fx.addFilter('saturation', config.filter.saturation);
|
||||
if (config.filter.hue !== 0) this.fx.addFilter('hue', config.filter.hue);
|
||||
if (config.filter.negative) this.fx.addFilter('negative');
|
||||
if (config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (config.filter.vintage) this.fx.addFilter('brownie');
|
||||
if (config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (config.filter.kodachrome) this.fx.addFilter('kodachrome');
|
||||
if (config.filter.technicolor) this.fx.addFilter('technicolor');
|
||||
if (config.filter.polaroid) this.fx.addFilter('polaroid');
|
||||
if (config.filter.pixelate !== 0) this.fx.addFilter('pixelate', config.filter.pixelate);
|
||||
this.fx.apply(inCanvas);
|
||||
}
|
||||
if (!outCanvas) outCanvas = inCanvas;
|
||||
let pixels;
|
||||
if ((config.backend === 'webgl') || (outCanvas instanceof ImageData)) {
|
||||
// tf kernel-optimized method to get imagedata, also if input is imagedata, just use it
|
||||
pixels = tf.browser.fromPixels(outCanvas);
|
||||
} else {
|
||||
// cpu and wasm kernel does not implement efficient fromPixels method nor we can use canvas as-is, so we do a silly one more canvas
|
||||
const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
|
||||
tempCanvas.width = targetWidth;
|
||||
tempCanvas.height = targetHeight;
|
||||
const tempCtx = tempCanvas.getContext('2d');
|
||||
tempCtx.drawImage(outCanvas, 0, 0);
|
||||
const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
|
||||
pixels = tf.browser.fromPixels(data);
|
||||
}
|
||||
const casted = pixels.toFloat();
|
||||
tensor = casted.expandDims(0);
|
||||
pixels.dispose();
|
||||
casted.dispose();
|
||||
}
|
||||
return { tensor, canvas: config.filter.return ? outCanvas : null };
|
||||
}
|
||||
|
||||
exports.process = process;
|
Loading…
Reference in New Issue