pure tensor pipeline without image converts

pull/293/head
Vladimir Mandic 2020-10-18 09:21:53 -04:00
parent d44ff5dbb2
commit 5884c8cfe4
6 changed files with 54 additions and 54 deletions

View File

@ -268,7 +268,6 @@ config = {
inputSize: 64, // fixed value
minConfidence: 0.5, // threshold for discarding a prediction
skipFrames: 10, // how many frames to go without re-running the detector, only used for video inputs
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json',
},
},

View File

@ -51,7 +51,6 @@ export default {
inputSize: 64, // fixed value
minConfidence: 0.5, // threshold for discarding a prediction
skipFrames: 10, // how many frames to go without re-running the detector
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json',
},
},

View File

@ -6,16 +6,6 @@ let last = [];
let frame = 0;
const multiplier = 1.5;
function getImage(image, size) {
const tensor = tf.tidy(() => {
const buffer = tf.browser.fromPixels(image, 1);
const resize = tf.image.resizeBilinear(buffer, [size, size]);
const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
return expand;
});
return tensor;
}
async function load(config) {
if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
return models.emotion;
@ -27,25 +17,23 @@ async function predict(image, config) {
return last;
}
frame = 0;
const enhance = tf.tidy(() => {
if (image instanceof tf.Tensor) {
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
const [r, g, b] = tf.split(resize, 3, 3);
if (config.face.emotion.useGrayscale) {
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
const r1 = tf.mul(r, [0.2989]);
const g1 = tf.mul(g, [0.5870]);
const b1 = tf.mul(b, [0.1140]);
const grayscale = tf.addN([r1, g1, b1]);
return grayscale;
}
return g;
}
return getImage(image, config.face.emotion.inputSize);
});
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
const [red, green, blue] = tf.split(resize, 3, 3);
resize.dispose();
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
const redNorm = tf.mul(red, [0.2989]);
const greenNorm = tf.mul(green, [0.5870]);
const blueNorm = tf.mul(blue, [0.1140]);
red.dispose();
green.dispose();
blue.dispose();
const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
redNorm.dispose();
greenNorm.dispose();
blueNorm.dispose();
const obj = [];
if (config.face.emotion.enabled) {
const emotionT = await models.emotion.predict(enhance);
const emotionT = await models.emotion.predict(grayscale);
const data = await emotionT.data();
for (let i = 0; i < data.length; i++) {
if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
@ -53,7 +41,7 @@ async function predict(image, config) {
obj.sort((a, b) => b.score - a.score);
tf.dispose(emotionT);
}
tf.dispose(enhance);
tf.dispose(grayscale);
last = obj;
return obj;
}

View File

@ -32,9 +32,9 @@ class HandDetector {
}
async getBoundingBoxes(input) {
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
const batchedPrediction = this.model.predict(normalizedInput);
const batchedPrediction = this.model.predict(input);
const prediction = batchedPrediction.squeeze();
console.log(prediction);
// Regression score for each anchor point.
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
// Bounding box for each anchor point.
@ -42,11 +42,7 @@ class HandDetector {
const boxes = this.normalizeBoxes(rawBoxes);
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
const boxesWithHands = await boxesWithHandsTensor.array();
const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
// if (boxesWithHands.length === 0) {
// toDispose.forEach((tensor) => tensor.dispose());
// return null;
// }
const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
const detectedHands = tf.tidy(() => {
const detectedBoxes = [];
for (const i in boxesWithHands) {
@ -69,12 +65,18 @@ class HandDetector {
* @param input The image to classify.
*/
async estimateHandBounds(input, config) {
const inputHeight = input.shape[1];
const inputWidth = input.shape[2];
// const inputHeight = input.shape[2];
// const inputWidth = input.shape[1];
this.iouThreshold = config.iouThreshold;
this.scoreThreshold = config.scoreThreshold;
this.maxHands = config.maxHands;
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
const resized = input.resizeBilinear([this.width, this.height]);
const divided = resized.div(255);
const normalized = divided.sub(0.5);
const image = normalized.mul(2.0);
resized.dispose();
divided.dispose();
normalized.dispose();
const predictions = await this.getBoundingBoxes(image);
image.dispose();
if (!predictions || (predictions.length === 0)) return null;
@ -87,7 +89,7 @@ class HandDetector {
const palmLandmarks = await prediction.palmLandmarks.array();
prediction.boxes.dispose();
prediction.palmLandmarks.dispose();
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
}
return hands;
}

View File

@ -12,14 +12,7 @@ class HandPose {
this.skipFrames = config.skipFrames;
this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands;
const image = tf.tidy(() => {
if (!(input instanceof tf.Tensor)) {
input = tf.browser.fromPixels(input);
}
return input.toFloat().expandDims(0);
});
const predictions = await this.pipeline.estimateHands(image, config);
image.dispose();
const predictions = await this.pipeline.estimateHands(input, config);
const hands = [];
if (!predictions) return hands;
for (const prediction of predictions) {

View File

@ -71,7 +71,9 @@ function mergeDeep(...objects) {
function sanity(input) {
if (!input) return 'input is not defined';
if (tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) {
if (!(input instanceof tf.Tensor)
|| (tf.ENV.flags.IS_BROWSER
&& (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement))) {
const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
if (!width || (width === 0)) return 'input is empty';
}
@ -99,6 +101,20 @@ async function load(userConfig) {
if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
}
function tfImage(input) {
let image;
if (input instanceof tf.Tensor) {
image = tf.clone(input);
} else {
const pixels = tf.browser.fromPixels(input);
const casted = pixels.toFloat();
image = casted.expandDims(0);
pixels.dispose();
casted.dispose();
}
return image;
}
async function detect(input, userConfig = {}) {
state = 'config';
const perf = {};
@ -151,11 +167,13 @@ async function detect(input, userConfig = {}) {
analyze('Start Detect:');
const imageTensor = tfImage(input);
// run posenet
state = 'run:body';
timeStamp = now();
analyze('Start PoseNet');
const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
const poseRes = config.body.enabled ? await models.posenet.estimatePoses(imageTensor, config.body) : [];
analyze('End PoseNet:');
perf.body = Math.trunc(now() - timeStamp);
@ -163,7 +181,7 @@ async function detect(input, userConfig = {}) {
state = 'run:hand';
timeStamp = now();
analyze('Start HandPose:');
const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
const handRes = config.hand.enabled ? await models.handpose.estimateHands(imageTensor, config.hand) : [];
analyze('End HandPose:');
perf.hand = Math.trunc(now() - timeStamp);
@ -173,7 +191,7 @@ async function detect(input, userConfig = {}) {
state = 'run:face';
timeStamp = now();
analyze('Start FaceMesh:');
const faces = await models.facemesh.estimateFaces(input, config.face);
const faces = await models.facemesh.estimateFaces(imageTensor, config.face);
perf.face = Math.trunc(now() - timeStamp);
for (const face of faces) {
// is something went wrong, skip the face
@ -210,10 +228,11 @@ async function detect(input, userConfig = {}) {
emotion: emotionData,
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
});
analyze('End FaceMesh:');
}
analyze('End FaceMesh:');
}
imageTensor.dispose();
state = 'idle';
if (config.scoped) tf.engine().endScope();