mirror of https://github.com/vladmandic/human
pure tensor pipeline without image converts
parent
d44ff5dbb2
commit
5884c8cfe4
|
@ -268,7 +268,6 @@ config = {
|
||||||
inputSize: 64, // fixed value
|
inputSize: 64, // fixed value
|
||||||
minConfidence: 0.5, // threshold for discarding a prediction
|
minConfidence: 0.5, // threshold for discarding a prediction
|
||||||
skipFrames: 10, // how many frames to go without re-running the detector, only used for video inputs
|
skipFrames: 10, // how many frames to go without re-running the detector, only used for video inputs
|
||||||
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
|
|
||||||
modelPath: '../models/emotion/model.json',
|
modelPath: '../models/emotion/model.json',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -51,7 +51,6 @@ export default {
|
||||||
inputSize: 64, // fixed value
|
inputSize: 64, // fixed value
|
||||||
minConfidence: 0.5, // threshold for discarding a prediction
|
minConfidence: 0.5, // threshold for discarding a prediction
|
||||||
skipFrames: 10, // how many frames to go without re-running the detector
|
skipFrames: 10, // how many frames to go without re-running the detector
|
||||||
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
|
|
||||||
modelPath: '../models/emotion/model.json',
|
modelPath: '../models/emotion/model.json',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -6,16 +6,6 @@ let last = [];
|
||||||
let frame = 0;
|
let frame = 0;
|
||||||
const multiplier = 1.5;
|
const multiplier = 1.5;
|
||||||
|
|
||||||
function getImage(image, size) {
|
|
||||||
const tensor = tf.tidy(() => {
|
|
||||||
const buffer = tf.browser.fromPixels(image, 1);
|
|
||||||
const resize = tf.image.resizeBilinear(buffer, [size, size]);
|
|
||||||
const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
|
|
||||||
return expand;
|
|
||||||
});
|
|
||||||
return tensor;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function load(config) {
|
async function load(config) {
|
||||||
if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
|
if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
|
||||||
return models.emotion;
|
return models.emotion;
|
||||||
|
@ -27,25 +17,23 @@ async function predict(image, config) {
|
||||||
return last;
|
return last;
|
||||||
}
|
}
|
||||||
frame = 0;
|
frame = 0;
|
||||||
const enhance = tf.tidy(() => {
|
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
|
||||||
if (image instanceof tf.Tensor) {
|
const [red, green, blue] = tf.split(resize, 3, 3);
|
||||||
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
|
resize.dispose();
|
||||||
const [r, g, b] = tf.split(resize, 3, 3);
|
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
|
||||||
if (config.face.emotion.useGrayscale) {
|
const redNorm = tf.mul(red, [0.2989]);
|
||||||
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
|
const greenNorm = tf.mul(green, [0.5870]);
|
||||||
const r1 = tf.mul(r, [0.2989]);
|
const blueNorm = tf.mul(blue, [0.1140]);
|
||||||
const g1 = tf.mul(g, [0.5870]);
|
red.dispose();
|
||||||
const b1 = tf.mul(b, [0.1140]);
|
green.dispose();
|
||||||
const grayscale = tf.addN([r1, g1, b1]);
|
blue.dispose();
|
||||||
return grayscale;
|
const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
|
||||||
}
|
redNorm.dispose();
|
||||||
return g;
|
greenNorm.dispose();
|
||||||
}
|
blueNorm.dispose();
|
||||||
return getImage(image, config.face.emotion.inputSize);
|
|
||||||
});
|
|
||||||
const obj = [];
|
const obj = [];
|
||||||
if (config.face.emotion.enabled) {
|
if (config.face.emotion.enabled) {
|
||||||
const emotionT = await models.emotion.predict(enhance);
|
const emotionT = await models.emotion.predict(grayscale);
|
||||||
const data = await emotionT.data();
|
const data = await emotionT.data();
|
||||||
for (let i = 0; i < data.length; i++) {
|
for (let i = 0; i < data.length; i++) {
|
||||||
if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
|
if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
|
||||||
|
@ -53,7 +41,7 @@ async function predict(image, config) {
|
||||||
obj.sort((a, b) => b.score - a.score);
|
obj.sort((a, b) => b.score - a.score);
|
||||||
tf.dispose(emotionT);
|
tf.dispose(emotionT);
|
||||||
}
|
}
|
||||||
tf.dispose(enhance);
|
tf.dispose(grayscale);
|
||||||
last = obj;
|
last = obj;
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,9 +32,9 @@ class HandDetector {
|
||||||
}
|
}
|
||||||
|
|
||||||
async getBoundingBoxes(input) {
|
async getBoundingBoxes(input) {
|
||||||
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
|
const batchedPrediction = this.model.predict(input);
|
||||||
const batchedPrediction = this.model.predict(normalizedInput);
|
|
||||||
const prediction = batchedPrediction.squeeze();
|
const prediction = batchedPrediction.squeeze();
|
||||||
|
console.log(prediction);
|
||||||
// Regression score for each anchor point.
|
// Regression score for each anchor point.
|
||||||
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
|
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
|
||||||
// Bounding box for each anchor point.
|
// Bounding box for each anchor point.
|
||||||
|
@ -42,11 +42,7 @@ class HandDetector {
|
||||||
const boxes = this.normalizeBoxes(rawBoxes);
|
const boxes = this.normalizeBoxes(rawBoxes);
|
||||||
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
|
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
|
||||||
const boxesWithHands = await boxesWithHandsTensor.array();
|
const boxesWithHands = await boxesWithHandsTensor.array();
|
||||||
const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
|
const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
|
||||||
// if (boxesWithHands.length === 0) {
|
|
||||||
// toDispose.forEach((tensor) => tensor.dispose());
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
const detectedHands = tf.tidy(() => {
|
const detectedHands = tf.tidy(() => {
|
||||||
const detectedBoxes = [];
|
const detectedBoxes = [];
|
||||||
for (const i in boxesWithHands) {
|
for (const i in boxesWithHands) {
|
||||||
|
@ -69,12 +65,18 @@ class HandDetector {
|
||||||
* @param input The image to classify.
|
* @param input The image to classify.
|
||||||
*/
|
*/
|
||||||
async estimateHandBounds(input, config) {
|
async estimateHandBounds(input, config) {
|
||||||
const inputHeight = input.shape[1];
|
// const inputHeight = input.shape[2];
|
||||||
const inputWidth = input.shape[2];
|
// const inputWidth = input.shape[1];
|
||||||
this.iouThreshold = config.iouThreshold;
|
this.iouThreshold = config.iouThreshold;
|
||||||
this.scoreThreshold = config.scoreThreshold;
|
this.scoreThreshold = config.scoreThreshold;
|
||||||
this.maxHands = config.maxHands;
|
this.maxHands = config.maxHands;
|
||||||
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
|
const resized = input.resizeBilinear([this.width, this.height]);
|
||||||
|
const divided = resized.div(255);
|
||||||
|
const normalized = divided.sub(0.5);
|
||||||
|
const image = normalized.mul(2.0);
|
||||||
|
resized.dispose();
|
||||||
|
divided.dispose();
|
||||||
|
normalized.dispose();
|
||||||
const predictions = await this.getBoundingBoxes(image);
|
const predictions = await this.getBoundingBoxes(image);
|
||||||
image.dispose();
|
image.dispose();
|
||||||
if (!predictions || (predictions.length === 0)) return null;
|
if (!predictions || (predictions.length === 0)) return null;
|
||||||
|
@ -87,7 +89,7 @@ class HandDetector {
|
||||||
const palmLandmarks = await prediction.palmLandmarks.array();
|
const palmLandmarks = await prediction.palmLandmarks.array();
|
||||||
prediction.boxes.dispose();
|
prediction.boxes.dispose();
|
||||||
prediction.palmLandmarks.dispose();
|
prediction.palmLandmarks.dispose();
|
||||||
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
|
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
|
||||||
}
|
}
|
||||||
return hands;
|
return hands;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,14 +12,7 @@ class HandPose {
|
||||||
this.skipFrames = config.skipFrames;
|
this.skipFrames = config.skipFrames;
|
||||||
this.detectionConfidence = config.minConfidence;
|
this.detectionConfidence = config.minConfidence;
|
||||||
this.maxHands = config.maxHands;
|
this.maxHands = config.maxHands;
|
||||||
const image = tf.tidy(() => {
|
const predictions = await this.pipeline.estimateHands(input, config);
|
||||||
if (!(input instanceof tf.Tensor)) {
|
|
||||||
input = tf.browser.fromPixels(input);
|
|
||||||
}
|
|
||||||
return input.toFloat().expandDims(0);
|
|
||||||
});
|
|
||||||
const predictions = await this.pipeline.estimateHands(image, config);
|
|
||||||
image.dispose();
|
|
||||||
const hands = [];
|
const hands = [];
|
||||||
if (!predictions) return hands;
|
if (!predictions) return hands;
|
||||||
for (const prediction of predictions) {
|
for (const prediction of predictions) {
|
||||||
|
|
29
src/human.js
29
src/human.js
|
@ -71,7 +71,9 @@ function mergeDeep(...objects) {
|
||||||
|
|
||||||
function sanity(input) {
|
function sanity(input) {
|
||||||
if (!input) return 'input is not defined';
|
if (!input) return 'input is not defined';
|
||||||
if (tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) {
|
if (!(input instanceof tf.Tensor)
|
||||||
|
|| (tf.ENV.flags.IS_BROWSER
|
||||||
|
&& (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement))) {
|
||||||
const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
|
const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
|
||||||
if (!width || (width === 0)) return 'input is empty';
|
if (!width || (width === 0)) return 'input is empty';
|
||||||
}
|
}
|
||||||
|
@ -99,6 +101,20 @@ async function load(userConfig) {
|
||||||
if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
|
if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function tfImage(input) {
|
||||||
|
let image;
|
||||||
|
if (input instanceof tf.Tensor) {
|
||||||
|
image = tf.clone(input);
|
||||||
|
} else {
|
||||||
|
const pixels = tf.browser.fromPixels(input);
|
||||||
|
const casted = pixels.toFloat();
|
||||||
|
image = casted.expandDims(0);
|
||||||
|
pixels.dispose();
|
||||||
|
casted.dispose();
|
||||||
|
}
|
||||||
|
return image;
|
||||||
|
}
|
||||||
|
|
||||||
async function detect(input, userConfig = {}) {
|
async function detect(input, userConfig = {}) {
|
||||||
state = 'config';
|
state = 'config';
|
||||||
const perf = {};
|
const perf = {};
|
||||||
|
@ -151,11 +167,13 @@ async function detect(input, userConfig = {}) {
|
||||||
|
|
||||||
analyze('Start Detect:');
|
analyze('Start Detect:');
|
||||||
|
|
||||||
|
const imageTensor = tfImage(input);
|
||||||
|
|
||||||
// run posenet
|
// run posenet
|
||||||
state = 'run:body';
|
state = 'run:body';
|
||||||
timeStamp = now();
|
timeStamp = now();
|
||||||
analyze('Start PoseNet');
|
analyze('Start PoseNet');
|
||||||
const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
|
const poseRes = config.body.enabled ? await models.posenet.estimatePoses(imageTensor, config.body) : [];
|
||||||
analyze('End PoseNet:');
|
analyze('End PoseNet:');
|
||||||
perf.body = Math.trunc(now() - timeStamp);
|
perf.body = Math.trunc(now() - timeStamp);
|
||||||
|
|
||||||
|
@ -163,7 +181,7 @@ async function detect(input, userConfig = {}) {
|
||||||
state = 'run:hand';
|
state = 'run:hand';
|
||||||
timeStamp = now();
|
timeStamp = now();
|
||||||
analyze('Start HandPose:');
|
analyze('Start HandPose:');
|
||||||
const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
|
const handRes = config.hand.enabled ? await models.handpose.estimateHands(imageTensor, config.hand) : [];
|
||||||
analyze('End HandPose:');
|
analyze('End HandPose:');
|
||||||
perf.hand = Math.trunc(now() - timeStamp);
|
perf.hand = Math.trunc(now() - timeStamp);
|
||||||
|
|
||||||
|
@ -173,7 +191,7 @@ async function detect(input, userConfig = {}) {
|
||||||
state = 'run:face';
|
state = 'run:face';
|
||||||
timeStamp = now();
|
timeStamp = now();
|
||||||
analyze('Start FaceMesh:');
|
analyze('Start FaceMesh:');
|
||||||
const faces = await models.facemesh.estimateFaces(input, config.face);
|
const faces = await models.facemesh.estimateFaces(imageTensor, config.face);
|
||||||
perf.face = Math.trunc(now() - timeStamp);
|
perf.face = Math.trunc(now() - timeStamp);
|
||||||
for (const face of faces) {
|
for (const face of faces) {
|
||||||
// is something went wrong, skip the face
|
// is something went wrong, skip the face
|
||||||
|
@ -210,10 +228,11 @@ async function detect(input, userConfig = {}) {
|
||||||
emotion: emotionData,
|
emotion: emotionData,
|
||||||
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
|
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
|
||||||
});
|
});
|
||||||
|
analyze('End FaceMesh:');
|
||||||
}
|
}
|
||||||
analyze('End FaceMesh:');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
imageTensor.dispose();
|
||||||
state = 'idle';
|
state = 'idle';
|
||||||
|
|
||||||
if (config.scoped) tf.engine().endScope();
|
if (config.scoped) tf.engine().endScope();
|
||||||
|
|
Loading…
Reference in New Issue