mirror of https://github.com/vladmandic/human
pure tensor pipeline without image converts
parent
d44ff5dbb2
commit
5884c8cfe4
|
@ -268,7 +268,6 @@ config = {
|
|||
inputSize: 64, // fixed value
|
||||
minConfidence: 0.5, // threshold for discarding a prediction
|
||||
skipFrames: 10, // how many frames to go without re-running the detector, only used for video inputs
|
||||
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
|
||||
modelPath: '../models/emotion/model.json',
|
||||
},
|
||||
},
|
||||
|
|
|
@ -51,7 +51,6 @@ export default {
|
|||
inputSize: 64, // fixed value
|
||||
minConfidence: 0.5, // threshold for discarding a prediction
|
||||
skipFrames: 10, // how many frames to go without re-running the detector
|
||||
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
|
||||
modelPath: '../models/emotion/model.json',
|
||||
},
|
||||
},
|
||||
|
|
|
@ -6,16 +6,6 @@ let last = [];
|
|||
let frame = 0;
|
||||
const multiplier = 1.5;
|
||||
|
||||
function getImage(image, size) {
|
||||
const tensor = tf.tidy(() => {
|
||||
const buffer = tf.browser.fromPixels(image, 1);
|
||||
const resize = tf.image.resizeBilinear(buffer, [size, size]);
|
||||
const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
|
||||
return expand;
|
||||
});
|
||||
return tensor;
|
||||
}
|
||||
|
||||
async function load(config) {
|
||||
if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
|
||||
return models.emotion;
|
||||
|
@ -27,25 +17,23 @@ async function predict(image, config) {
|
|||
return last;
|
||||
}
|
||||
frame = 0;
|
||||
const enhance = tf.tidy(() => {
|
||||
if (image instanceof tf.Tensor) {
|
||||
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
|
||||
const [r, g, b] = tf.split(resize, 3, 3);
|
||||
if (config.face.emotion.useGrayscale) {
|
||||
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
|
||||
const r1 = tf.mul(r, [0.2989]);
|
||||
const g1 = tf.mul(g, [0.5870]);
|
||||
const b1 = tf.mul(b, [0.1140]);
|
||||
const grayscale = tf.addN([r1, g1, b1]);
|
||||
return grayscale;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
return getImage(image, config.face.emotion.inputSize);
|
||||
});
|
||||
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
|
||||
const [red, green, blue] = tf.split(resize, 3, 3);
|
||||
resize.dispose();
|
||||
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
|
||||
const redNorm = tf.mul(red, [0.2989]);
|
||||
const greenNorm = tf.mul(green, [0.5870]);
|
||||
const blueNorm = tf.mul(blue, [0.1140]);
|
||||
red.dispose();
|
||||
green.dispose();
|
||||
blue.dispose();
|
||||
const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
|
||||
redNorm.dispose();
|
||||
greenNorm.dispose();
|
||||
blueNorm.dispose();
|
||||
const obj = [];
|
||||
if (config.face.emotion.enabled) {
|
||||
const emotionT = await models.emotion.predict(enhance);
|
||||
const emotionT = await models.emotion.predict(grayscale);
|
||||
const data = await emotionT.data();
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
|
||||
|
@ -53,7 +41,7 @@ async function predict(image, config) {
|
|||
obj.sort((a, b) => b.score - a.score);
|
||||
tf.dispose(emotionT);
|
||||
}
|
||||
tf.dispose(enhance);
|
||||
tf.dispose(grayscale);
|
||||
last = obj;
|
||||
return obj;
|
||||
}
|
||||
|
|
|
@ -32,9 +32,9 @@ class HandDetector {
|
|||
}
|
||||
|
||||
async getBoundingBoxes(input) {
|
||||
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
|
||||
const batchedPrediction = this.model.predict(normalizedInput);
|
||||
const batchedPrediction = this.model.predict(input);
|
||||
const prediction = batchedPrediction.squeeze();
|
||||
console.log(prediction);
|
||||
// Regression score for each anchor point.
|
||||
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
|
||||
// Bounding box for each anchor point.
|
||||
|
@ -42,11 +42,7 @@ class HandDetector {
|
|||
const boxes = this.normalizeBoxes(rawBoxes);
|
||||
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
|
||||
const boxesWithHands = await boxesWithHandsTensor.array();
|
||||
const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
|
||||
// if (boxesWithHands.length === 0) {
|
||||
// toDispose.forEach((tensor) => tensor.dispose());
|
||||
// return null;
|
||||
// }
|
||||
const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
|
||||
const detectedHands = tf.tidy(() => {
|
||||
const detectedBoxes = [];
|
||||
for (const i in boxesWithHands) {
|
||||
|
@ -69,12 +65,18 @@ class HandDetector {
|
|||
* @param input The image to classify.
|
||||
*/
|
||||
async estimateHandBounds(input, config) {
|
||||
const inputHeight = input.shape[1];
|
||||
const inputWidth = input.shape[2];
|
||||
// const inputHeight = input.shape[2];
|
||||
// const inputWidth = input.shape[1];
|
||||
this.iouThreshold = config.iouThreshold;
|
||||
this.scoreThreshold = config.scoreThreshold;
|
||||
this.maxHands = config.maxHands;
|
||||
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
|
||||
const resized = input.resizeBilinear([this.width, this.height]);
|
||||
const divided = resized.div(255);
|
||||
const normalized = divided.sub(0.5);
|
||||
const image = normalized.mul(2.0);
|
||||
resized.dispose();
|
||||
divided.dispose();
|
||||
normalized.dispose();
|
||||
const predictions = await this.getBoundingBoxes(image);
|
||||
image.dispose();
|
||||
if (!predictions || (predictions.length === 0)) return null;
|
||||
|
@ -87,7 +89,7 @@ class HandDetector {
|
|||
const palmLandmarks = await prediction.palmLandmarks.array();
|
||||
prediction.boxes.dispose();
|
||||
prediction.palmLandmarks.dispose();
|
||||
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
|
||||
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
|
||||
}
|
||||
return hands;
|
||||
}
|
||||
|
|
|
@ -12,14 +12,7 @@ class HandPose {
|
|||
this.skipFrames = config.skipFrames;
|
||||
this.detectionConfidence = config.minConfidence;
|
||||
this.maxHands = config.maxHands;
|
||||
const image = tf.tidy(() => {
|
||||
if (!(input instanceof tf.Tensor)) {
|
||||
input = tf.browser.fromPixels(input);
|
||||
}
|
||||
return input.toFloat().expandDims(0);
|
||||
});
|
||||
const predictions = await this.pipeline.estimateHands(image, config);
|
||||
image.dispose();
|
||||
const predictions = await this.pipeline.estimateHands(input, config);
|
||||
const hands = [];
|
||||
if (!predictions) return hands;
|
||||
for (const prediction of predictions) {
|
||||
|
|
29
src/human.js
29
src/human.js
|
@ -71,7 +71,9 @@ function mergeDeep(...objects) {
|
|||
|
||||
function sanity(input) {
|
||||
if (!input) return 'input is not defined';
|
||||
if (tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) {
|
||||
if (!(input instanceof tf.Tensor)
|
||||
|| (tf.ENV.flags.IS_BROWSER
|
||||
&& (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement))) {
|
||||
const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
|
||||
if (!width || (width === 0)) return 'input is empty';
|
||||
}
|
||||
|
@ -99,6 +101,20 @@ async function load(userConfig) {
|
|||
if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
|
||||
}
|
||||
|
||||
function tfImage(input) {
|
||||
let image;
|
||||
if (input instanceof tf.Tensor) {
|
||||
image = tf.clone(input);
|
||||
} else {
|
||||
const pixels = tf.browser.fromPixels(input);
|
||||
const casted = pixels.toFloat();
|
||||
image = casted.expandDims(0);
|
||||
pixels.dispose();
|
||||
casted.dispose();
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
async function detect(input, userConfig = {}) {
|
||||
state = 'config';
|
||||
const perf = {};
|
||||
|
@ -151,11 +167,13 @@ async function detect(input, userConfig = {}) {
|
|||
|
||||
analyze('Start Detect:');
|
||||
|
||||
const imageTensor = tfImage(input);
|
||||
|
||||
// run posenet
|
||||
state = 'run:body';
|
||||
timeStamp = now();
|
||||
analyze('Start PoseNet');
|
||||
const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
|
||||
const poseRes = config.body.enabled ? await models.posenet.estimatePoses(imageTensor, config.body) : [];
|
||||
analyze('End PoseNet:');
|
||||
perf.body = Math.trunc(now() - timeStamp);
|
||||
|
||||
|
@ -163,7 +181,7 @@ async function detect(input, userConfig = {}) {
|
|||
state = 'run:hand';
|
||||
timeStamp = now();
|
||||
analyze('Start HandPose:');
|
||||
const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
|
||||
const handRes = config.hand.enabled ? await models.handpose.estimateHands(imageTensor, config.hand) : [];
|
||||
analyze('End HandPose:');
|
||||
perf.hand = Math.trunc(now() - timeStamp);
|
||||
|
||||
|
@ -173,7 +191,7 @@ async function detect(input, userConfig = {}) {
|
|||
state = 'run:face';
|
||||
timeStamp = now();
|
||||
analyze('Start FaceMesh:');
|
||||
const faces = await models.facemesh.estimateFaces(input, config.face);
|
||||
const faces = await models.facemesh.estimateFaces(imageTensor, config.face);
|
||||
perf.face = Math.trunc(now() - timeStamp);
|
||||
for (const face of faces) {
|
||||
// is something went wrong, skip the face
|
||||
|
@ -210,10 +228,11 @@ async function detect(input, userConfig = {}) {
|
|||
emotion: emotionData,
|
||||
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
|
||||
});
|
||||
analyze('End FaceMesh:');
|
||||
}
|
||||
analyze('End FaceMesh:');
|
||||
}
|
||||
|
||||
imageTensor.dispose();
|
||||
state = 'idle';
|
||||
|
||||
if (config.scoped) tf.engine().endScope();
|
||||
|
|
Loading…
Reference in New Issue