mirror of https://github.com/vladmandic/human
major performance improvements for all models
parent
18ec5f211f
commit
d2bf2aeade
|
@ -4,6 +4,9 @@
|
|||
export default {
|
||||
backend: 'webgl', // select tfjs backend to use
|
||||
console: true, // enable debugging output to console
|
||||
async: false, // execute enabled models in parallel
|
||||
// this disables per-model performance data but slightly increases performance
|
||||
// cannot be used if profiling is enabled
|
||||
profile: false, // enable tfjs profiling
|
||||
// this has significant performance impact, only enable for debugging purposes
|
||||
// currently only implemented for age,gender,emotion models
|
||||
|
|
|
@ -90,14 +90,18 @@ const log = (...msg) => {
|
|||
|
||||
// draws processed results and starts processing of a next frame
|
||||
function drawResults(input, result, canvas) {
|
||||
// update fps
|
||||
// update fps data
|
||||
fps.push(1000 / (performance.now() - timeStamp));
|
||||
if (fps.length > ui.maxFrames) fps.shift();
|
||||
menu.updateChart('FPS', fps);
|
||||
|
||||
// enable for continous performance monitoring
|
||||
// console.log(result.performance);
|
||||
|
||||
// eslint-disable-next-line no-use-before-define
|
||||
requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop
|
||||
requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results
|
||||
|
||||
// draw fps chart
|
||||
menu.updateChart('FPS', fps);
|
||||
// draw image from video
|
||||
const ctx = canvas.getContext('2d');
|
||||
ctx.fillStyle = ui.baseBackground;
|
||||
|
|
|
@ -37,11 +37,11 @@ async function predict(image, config) {
|
|||
let data;
|
||||
if (!config.profile) {
|
||||
const emotionT = await models.emotion.predict(grayscale);
|
||||
data = await emotionT.data();
|
||||
data = emotionT.dataSync();
|
||||
tf.dispose(emotionT);
|
||||
} else {
|
||||
const profileData = await tf.profile(() => models.emotion.predict(grayscale));
|
||||
data = await profileData.result.data();
|
||||
data = profileData.result.dataSync();
|
||||
profileData.result.dispose();
|
||||
profile.run('emotion', profileData);
|
||||
}
|
||||
|
|
|
@ -30,10 +30,7 @@ exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize;
|
|||
function scaleBoxCoordinates(box, factor) {
|
||||
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
|
||||
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
|
||||
const palmLandmarks = box.palmLandmarks.map((coord) => {
|
||||
const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]];
|
||||
return scaledCoord;
|
||||
});
|
||||
const palmLandmarks = box.palmLandmarks.map((coord) => [coord[0] * factor[0], coord[1] * factor[1]]);
|
||||
return { startPoint, endPoint, palmLandmarks };
|
||||
}
|
||||
exports.scaleBoxCoordinates = scaleBoxCoordinates;
|
||||
|
|
|
@ -40,8 +40,7 @@ class HandDetector {
|
|||
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
|
||||
const boxes = this.normalizeBoxes(rawBoxes);
|
||||
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
|
||||
const boxesWithHands = await boxesWithHandsTensor.array();
|
||||
const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
|
||||
const boxesWithHands = boxesWithHandsTensor.arraySync();
|
||||
const detectedHands = tf.tidy(() => {
|
||||
const detectedBoxes = [];
|
||||
for (const i in boxesWithHands) {
|
||||
|
@ -53,7 +52,7 @@ class HandDetector {
|
|||
}
|
||||
return detectedBoxes;
|
||||
});
|
||||
toDispose.forEach((tensor) => tensor.dispose());
|
||||
[batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores].forEach((tensor) => tensor.dispose());
|
||||
return detectedHands;
|
||||
}
|
||||
|
||||
|
@ -64,28 +63,24 @@ class HandDetector {
|
|||
* @param input The image to classify.
|
||||
*/
|
||||
async estimateHandBounds(input, config) {
|
||||
// const inputHeight = input.shape[2];
|
||||
// const inputWidth = input.shape[1];
|
||||
this.iouThreshold = config.iouThreshold;
|
||||
this.scoreThreshold = config.scoreThreshold;
|
||||
this.maxHands = config.maxHands;
|
||||
const resized = input.resizeBilinear([this.width, this.height]);
|
||||
const divided = resized.div(255);
|
||||
const normalized = divided.sub(0.5);
|
||||
const image = normalized.mul(2.0);
|
||||
const divided = resized.mul([1 / 127.5]);
|
||||
const image = divided.sub(0.5);
|
||||
resized.dispose();
|
||||
divided.dispose();
|
||||
normalized.dispose();
|
||||
const predictions = await this.getBoundingBoxes(image);
|
||||
image.dispose();
|
||||
if (!predictions || (predictions.length === 0)) return null;
|
||||
const hands = [];
|
||||
for (const i in predictions) {
|
||||
const prediction = predictions[i];
|
||||
const boundingBoxes = await prediction.boxes.array();
|
||||
const startPoint = boundingBoxes[0].slice(0, 2);
|
||||
const endPoint = boundingBoxes[0].slice(2, 4);
|
||||
const palmLandmarks = await prediction.palmLandmarks.array();
|
||||
const boundingBoxes = prediction.boxes.dataSync();
|
||||
const startPoint = [boundingBoxes[0], boundingBoxes[1]];
|
||||
const endPoint = [boundingBoxes[2], boundingBoxes[3]];
|
||||
const palmLandmarks = prediction.palmLandmarks.arraySync();
|
||||
prediction.boxes.dispose();
|
||||
prediction.palmLandmarks.dispose();
|
||||
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
|
||||
|
|
140
src/human.js
140
src/human.js
|
@ -61,10 +61,13 @@ class Human {
|
|||
this.version = app.version;
|
||||
this.defaults = defaults;
|
||||
this.config = defaults;
|
||||
this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas() : null;
|
||||
this.fx = null;
|
||||
this.state = 'idle';
|
||||
this.numTensors = 0;
|
||||
this.analyzeMemoryLeaks = false;
|
||||
// internal temp canvases
|
||||
this.inCanvas = null;
|
||||
this.outCanvas = null;
|
||||
// object that contains all initialized models
|
||||
this.models = {
|
||||
facemesh: null,
|
||||
|
@ -160,56 +163,62 @@ class Human {
|
|||
}
|
||||
|
||||
tfImage(input) {
|
||||
// let imageData;
|
||||
let filtered;
|
||||
const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
|
||||
const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
|
||||
let targetWidth = originalWidth;
|
||||
let targetHeight = originalHeight;
|
||||
if (this.fx && this.config.filter.enabled && !(input instanceof tf.Tensor)) {
|
||||
if (this.config.filter.width > 0) targetWidth = this.config.filter.width;
|
||||
else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight);
|
||||
if (this.config.filter.height > 0) targetHeight = this.config.filter.height;
|
||||
else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth);
|
||||
const offscreenCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
|
||||
if (offscreenCanvas.width !== targetWidth) offscreenCanvas.width = targetWidth;
|
||||
if (offscreenCanvas.height !== targetHeight) offscreenCanvas.height = targetHeight;
|
||||
const ctx = offscreenCanvas.getContext('2d');
|
||||
if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
|
||||
else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, offscreenCanvas.width, offscreenCanvas.height);
|
||||
this.fx.reset();
|
||||
this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled
|
||||
if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast);
|
||||
if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness);
|
||||
if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur);
|
||||
if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation);
|
||||
if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue);
|
||||
if (this.config.filter.negative) this.fx.addFilter('negative');
|
||||
if (this.config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (this.config.filter.vintage) this.fx.addFilter('brownie');
|
||||
if (this.config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome');
|
||||
if (this.config.filter.technicolor) this.fx.addFilter('technicolor');
|
||||
if (this.config.filter.polaroid) this.fx.addFilter('polaroid');
|
||||
if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate);
|
||||
filtered = this.fx.apply(offscreenCanvas);
|
||||
}
|
||||
let tensor;
|
||||
if (input instanceof tf.Tensor) {
|
||||
tensor = tf.clone(input);
|
||||
} else {
|
||||
const canvas = filtered || input;
|
||||
const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
|
||||
const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
|
||||
let targetWidth = originalWidth;
|
||||
let targetHeight = originalHeight;
|
||||
if (this.config.filter.width > 0) targetWidth = this.config.filter.width;
|
||||
else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight);
|
||||
if (this.config.filter.height > 0) targetHeight = this.config.filter.height;
|
||||
else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth);
|
||||
if (!this.inCanvas || (this.inCanvas.width !== originalWidth) || (this.inCanvas.height !== originalHeight)) {
|
||||
this.inCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
|
||||
if (this.inCanvas.width !== targetWidth) this.inCanvas.width = targetWidth;
|
||||
if (this.inCanvas.height !== targetHeight) this.inCanvas.height = targetHeight;
|
||||
}
|
||||
const ctx = this.inCanvas.getContext('2d');
|
||||
if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
|
||||
else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, this.inCanvas.width, this.inCanvas.height);
|
||||
if (this.config.filter.enabled) {
|
||||
if (!this.outCanvas || (this.inCanvas.width !== this.outCanvas.width) || (this.inCanvas.height !== this.outCanvas.height)) {
|
||||
this.outCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(this.inCanvas.width, this.inCanvas.height) : document.createElement('canvas');
|
||||
if (this.outCanvas.width !== this.inCanvas.width) this.outCanvas.width = this.inCanvas.width;
|
||||
if (this.outCanvas.height !== this.inCanvas.height) this.outCanvas.height = this.inCanvas.height;
|
||||
}
|
||||
if (!this.fx) this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas({ canvas: this.outCanvas }) : null;
|
||||
this.fx.reset();
|
||||
this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled
|
||||
if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast);
|
||||
if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness);
|
||||
if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur);
|
||||
if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation);
|
||||
if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue);
|
||||
if (this.config.filter.negative) this.fx.addFilter('negative');
|
||||
if (this.config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (this.config.filter.vintage) this.fx.addFilter('brownie');
|
||||
if (this.config.filter.sepia) this.fx.addFilter('sepia');
|
||||
if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome');
|
||||
if (this.config.filter.technicolor) this.fx.addFilter('technicolor');
|
||||
if (this.config.filter.polaroid) this.fx.addFilter('polaroid');
|
||||
if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate);
|
||||
this.fx.apply(this.inCanvas);
|
||||
}
|
||||
if (!this.outCanvas) this.outCanvas = this.inCanvas;
|
||||
let pixels;
|
||||
if ((this.config.backend === 'webgl') || (canvas instanceof ImageData)) {
|
||||
if ((this.config.backend === 'webgl') || (this.outCanvas instanceof ImageData)) {
|
||||
// tf kernel-optimized method to get imagedata, also if input is imagedata, just use it
|
||||
pixels = tf.browser.fromPixels(canvas);
|
||||
pixels = tf.browser.fromPixels(this.outCanvas);
|
||||
} else {
|
||||
// cpu and wasm kernel does not implement efficient fromPixels method nor we can use canvas as-is, so we do a silly one more canvas
|
||||
const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
|
||||
tempCanvas.width = targetWidth;
|
||||
tempCanvas.height = targetHeight;
|
||||
const tempCtx = tempCanvas.getContext('2d');
|
||||
tempCtx.drawImage(canvas, 0, 0);
|
||||
tempCtx.drawImage(this.outCanvas, 0, 0);
|
||||
const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
|
||||
pixels = tf.browser.fromPixels(data);
|
||||
}
|
||||
|
@ -218,7 +227,7 @@ class Human {
|
|||
pixels.dispose();
|
||||
casted.dispose();
|
||||
}
|
||||
return { tensor, canvas: this.config.filter.return ? filtered : null };
|
||||
return { tensor, canvas: this.config.filter.return ? this.outCanvas : null };
|
||||
}
|
||||
|
||||
async detect(input, userConfig = {}) {
|
||||
|
@ -239,6 +248,11 @@ class Human {
|
|||
|
||||
// eslint-disable-next-line no-async-promise-executor
|
||||
return new Promise(async (resolve) => {
|
||||
let poseRes;
|
||||
let handRes;
|
||||
let ssrRes;
|
||||
let emotionRes;
|
||||
|
||||
const timeStart = now();
|
||||
|
||||
// configure backend
|
||||
|
@ -270,20 +284,30 @@ class Human {
|
|||
const imageTensor = image.tensor;
|
||||
|
||||
// run posenet
|
||||
this.state = 'run:body';
|
||||
timeStamp = now();
|
||||
this.analyze('Start PoseNet');
|
||||
const poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
this.analyze('End PoseNet:');
|
||||
perf.body = Math.trunc(now() - timeStamp);
|
||||
if (this.config.async) {
|
||||
poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
} else {
|
||||
this.state = 'run:body';
|
||||
timeStamp = now();
|
||||
this.analyze('Start PoseNet');
|
||||
poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
this.analyze('End PoseNet:');
|
||||
perf.body = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
// run handpose
|
||||
this.state = 'run:hand';
|
||||
timeStamp = now();
|
||||
this.analyze('Start HandPose:');
|
||||
const handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
this.analyze('End HandPose:');
|
||||
perf.hand = Math.trunc(now() - timeStamp);
|
||||
if (this.config.async) {
|
||||
handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
} else {
|
||||
this.state = 'run:hand';
|
||||
timeStamp = now();
|
||||
this.analyze('Start HandPose:');
|
||||
handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
this.analyze('End HandPose:');
|
||||
perf.hand = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
if (this.config.async) [poseRes, handRes] = await Promise.all([poseRes, handRes]);
|
||||
|
||||
// run facemesh, includes blazeface and iris
|
||||
const faceRes = [];
|
||||
|
@ -302,12 +326,12 @@ class Human {
|
|||
// run ssr-net age & gender, inherits face from blazeface
|
||||
this.state = 'run:agegender';
|
||||
timeStamp = now();
|
||||
const ssrData = (this.config.face.age.enabled || this.config.face.gender.enabled) ? await ssrnet.predict(face.image, this.config) : {};
|
||||
ssrRes = (this.config.face.age.enabled || this.config.face.gender.enabled) ? await ssrnet.predict(face.image, this.config) : {};
|
||||
perf.agegender = Math.trunc(now() - timeStamp);
|
||||
// run emotion, inherits face from blazeface
|
||||
this.state = 'run:emotion';
|
||||
timeStamp = now();
|
||||
const emotionData = this.config.face.emotion.enabled ? await emotion.predict(face.image, this.config) : {};
|
||||
emotionRes = this.config.face.emotion.enabled ? await emotion.predict(face.image, this.config) : {};
|
||||
perf.emotion = Math.trunc(now() - timeStamp);
|
||||
|
||||
// dont need face anymore
|
||||
|
@ -322,10 +346,10 @@ class Human {
|
|||
box: face.box,
|
||||
mesh: face.mesh,
|
||||
annotations: face.annotations,
|
||||
age: ssrData.age,
|
||||
gender: ssrData.gender,
|
||||
agConfidence: ssrData.confidence,
|
||||
emotion: emotionData,
|
||||
age: ssrRes.age,
|
||||
gender: ssrRes.gender,
|
||||
agConfidence: ssrRes.confidence,
|
||||
emotion: emotionRes,
|
||||
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
|
||||
});
|
||||
this.analyze('End FaceMesh:');
|
||||
|
|
Loading…
Reference in New Issue