pure tensor pipeline without image converts

pull/293/head
Vladimir Mandic 2020-10-18 09:21:53 -04:00
parent d44ff5dbb2
commit 5884c8cfe4
6 changed files with 54 additions and 54 deletions

View File

@ -268,7 +268,6 @@ config = {
inputSize: 64, // fixed value inputSize: 64, // fixed value
minConfidence: 0.5, // threshold for discarding a prediction minConfidence: 0.5, // threshold for discarding a prediction
skipFrames: 10, // how many frames to go without re-running the detector, only used for video inputs skipFrames: 10, // how many frames to go without re-running the detector, only used for video inputs
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json', modelPath: '../models/emotion/model.json',
}, },
}, },

View File

@ -51,7 +51,6 @@ export default {
inputSize: 64, // fixed value inputSize: 64, // fixed value
minConfidence: 0.5, // threshold for discarding a prediction minConfidence: 0.5, // threshold for discarding a prediction
skipFrames: 10, // how many frames to go without re-running the detector skipFrames: 10, // how many frames to go without re-running the detector
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json', modelPath: '../models/emotion/model.json',
}, },
}, },

View File

@ -6,16 +6,6 @@ let last = [];
let frame = 0; let frame = 0;
const multiplier = 1.5; const multiplier = 1.5;
function getImage(image, size) {
const tensor = tf.tidy(() => {
const buffer = tf.browser.fromPixels(image, 1);
const resize = tf.image.resizeBilinear(buffer, [size, size]);
const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
return expand;
});
return tensor;
}
async function load(config) { async function load(config) {
if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath); if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
return models.emotion; return models.emotion;
@ -27,25 +17,23 @@ async function predict(image, config) {
return last; return last;
} }
frame = 0; frame = 0;
const enhance = tf.tidy(() => { const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
if (image instanceof tf.Tensor) { const [red, green, blue] = tf.split(resize, 3, 3);
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); resize.dispose();
const [r, g, b] = tf.split(resize, 3, 3); // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
if (config.face.emotion.useGrayscale) { const redNorm = tf.mul(red, [0.2989]);
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html const greenNorm = tf.mul(green, [0.5870]);
const r1 = tf.mul(r, [0.2989]); const blueNorm = tf.mul(blue, [0.1140]);
const g1 = tf.mul(g, [0.5870]); red.dispose();
const b1 = tf.mul(b, [0.1140]); green.dispose();
const grayscale = tf.addN([r1, g1, b1]); blue.dispose();
return grayscale; const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
} redNorm.dispose();
return g; greenNorm.dispose();
} blueNorm.dispose();
return getImage(image, config.face.emotion.inputSize);
});
const obj = []; const obj = [];
if (config.face.emotion.enabled) { if (config.face.emotion.enabled) {
const emotionT = await models.emotion.predict(enhance); const emotionT = await models.emotion.predict(grayscale);
const data = await emotionT.data(); const data = await emotionT.data();
for (let i = 0; i < data.length; i++) { for (let i = 0; i < data.length; i++) {
if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] }); if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
@ -53,7 +41,7 @@ async function predict(image, config) {
obj.sort((a, b) => b.score - a.score); obj.sort((a, b) => b.score - a.score);
tf.dispose(emotionT); tf.dispose(emotionT);
} }
tf.dispose(enhance); tf.dispose(grayscale);
last = obj; last = obj;
return obj; return obj;
} }

View File

@ -32,9 +32,9 @@ class HandDetector {
} }
async getBoundingBoxes(input) { async getBoundingBoxes(input) {
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2)); const batchedPrediction = this.model.predict(input);
const batchedPrediction = this.model.predict(normalizedInput);
const prediction = batchedPrediction.squeeze(); const prediction = batchedPrediction.squeeze();
console.log(prediction);
// Regression score for each anchor point. // Regression score for each anchor point.
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze()); const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
// Bounding box for each anchor point. // Bounding box for each anchor point.
@ -42,11 +42,7 @@ class HandDetector {
const boxes = this.normalizeBoxes(rawBoxes); const boxes = this.normalizeBoxes(rawBoxes);
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold); const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
const boxesWithHands = await boxesWithHandsTensor.array(); const boxesWithHands = await boxesWithHandsTensor.array();
const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores]; const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
// if (boxesWithHands.length === 0) {
// toDispose.forEach((tensor) => tensor.dispose());
// return null;
// }
const detectedHands = tf.tidy(() => { const detectedHands = tf.tidy(() => {
const detectedBoxes = []; const detectedBoxes = [];
for (const i in boxesWithHands) { for (const i in boxesWithHands) {
@ -69,12 +65,18 @@ class HandDetector {
* @param input The image to classify. * @param input The image to classify.
*/ */
async estimateHandBounds(input, config) { async estimateHandBounds(input, config) {
const inputHeight = input.shape[1]; // const inputHeight = input.shape[2];
const inputWidth = input.shape[2]; // const inputWidth = input.shape[1];
this.iouThreshold = config.iouThreshold; this.iouThreshold = config.iouThreshold;
this.scoreThreshold = config.scoreThreshold; this.scoreThreshold = config.scoreThreshold;
this.maxHands = config.maxHands; this.maxHands = config.maxHands;
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255)); const resized = input.resizeBilinear([this.width, this.height]);
const divided = resized.div(255);
const normalized = divided.sub(0.5);
const image = normalized.mul(2.0);
resized.dispose();
divided.dispose();
normalized.dispose();
const predictions = await this.getBoundingBoxes(image); const predictions = await this.getBoundingBoxes(image);
image.dispose(); image.dispose();
if (!predictions || (predictions.length === 0)) return null; if (!predictions || (predictions.length === 0)) return null;
@ -87,7 +89,7 @@ class HandDetector {
const palmLandmarks = await prediction.palmLandmarks.array(); const palmLandmarks = await prediction.palmLandmarks.array();
prediction.boxes.dispose(); prediction.boxes.dispose();
prediction.palmLandmarks.dispose(); prediction.palmLandmarks.dispose();
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height])); hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
} }
return hands; return hands;
} }

View File

@ -12,14 +12,7 @@ class HandPose {
this.skipFrames = config.skipFrames; this.skipFrames = config.skipFrames;
this.detectionConfidence = config.minConfidence; this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands; this.maxHands = config.maxHands;
const image = tf.tidy(() => { const predictions = await this.pipeline.estimateHands(input, config);
if (!(input instanceof tf.Tensor)) {
input = tf.browser.fromPixels(input);
}
return input.toFloat().expandDims(0);
});
const predictions = await this.pipeline.estimateHands(image, config);
image.dispose();
const hands = []; const hands = [];
if (!predictions) return hands; if (!predictions) return hands;
for (const prediction of predictions) { for (const prediction of predictions) {

View File

@ -71,7 +71,9 @@ function mergeDeep(...objects) {
function sanity(input) { function sanity(input) {
if (!input) return 'input is not defined'; if (!input) return 'input is not defined';
if (tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) { if (!(input instanceof tf.Tensor)
|| (tf.ENV.flags.IS_BROWSER
&& (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement))) {
const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0)); const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
if (!width || (width === 0)) return 'input is empty'; if (!width || (width === 0)) return 'input is empty';
} }
@ -99,6 +101,20 @@ async function load(userConfig) {
if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config); if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
} }
function tfImage(input) {
let image;
if (input instanceof tf.Tensor) {
image = tf.clone(input);
} else {
const pixels = tf.browser.fromPixels(input);
const casted = pixels.toFloat();
image = casted.expandDims(0);
pixels.dispose();
casted.dispose();
}
return image;
}
async function detect(input, userConfig = {}) { async function detect(input, userConfig = {}) {
state = 'config'; state = 'config';
const perf = {}; const perf = {};
@ -151,11 +167,13 @@ async function detect(input, userConfig = {}) {
analyze('Start Detect:'); analyze('Start Detect:');
const imageTensor = tfImage(input);
// run posenet // run posenet
state = 'run:body'; state = 'run:body';
timeStamp = now(); timeStamp = now();
analyze('Start PoseNet'); analyze('Start PoseNet');
const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : []; const poseRes = config.body.enabled ? await models.posenet.estimatePoses(imageTensor, config.body) : [];
analyze('End PoseNet:'); analyze('End PoseNet:');
perf.body = Math.trunc(now() - timeStamp); perf.body = Math.trunc(now() - timeStamp);
@ -163,7 +181,7 @@ async function detect(input, userConfig = {}) {
state = 'run:hand'; state = 'run:hand';
timeStamp = now(); timeStamp = now();
analyze('Start HandPose:'); analyze('Start HandPose:');
const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : []; const handRes = config.hand.enabled ? await models.handpose.estimateHands(imageTensor, config.hand) : [];
analyze('End HandPose:'); analyze('End HandPose:');
perf.hand = Math.trunc(now() - timeStamp); perf.hand = Math.trunc(now() - timeStamp);
@ -173,7 +191,7 @@ async function detect(input, userConfig = {}) {
state = 'run:face'; state = 'run:face';
timeStamp = now(); timeStamp = now();
analyze('Start FaceMesh:'); analyze('Start FaceMesh:');
const faces = await models.facemesh.estimateFaces(input, config.face); const faces = await models.facemesh.estimateFaces(imageTensor, config.face);
perf.face = Math.trunc(now() - timeStamp); perf.face = Math.trunc(now() - timeStamp);
for (const face of faces) { for (const face of faces) {
// is something went wrong, skip the face // is something went wrong, skip the face
@ -210,10 +228,11 @@ async function detect(input, userConfig = {}) {
emotion: emotionData, emotion: emotionData,
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0, iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
}); });
analyze('End FaceMesh:');
} }
analyze('End FaceMesh:');
} }
imageTensor.dispose();
state = 'idle'; state = 'idle';
if (config.scoped) tf.engine().endScope(); if (config.scoped) tf.engine().endScope();