fixed memory leak

pull/50/head
Vladimir Mandic 2020-10-12 22:01:35 -04:00
parent 96aa082c09
commit c106aa2a18
13 changed files with 563 additions and 594 deletions

View File

@ -280,4 +280,4 @@ Library can also be used on mobile devices
## Todo
- Improve detection of smaller faces
- Fix memory leak in face detector
- Verify age/gender models

View File

@ -7,13 +7,13 @@ const config = {
face: {
enabled: true,
detector: { maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
mesh: { enabled: true },
iris: { enabled: true },
age: { enabled: true, skipFrames: 5 },
gender: { enabled: true },
mesh: { enabled: false },
iris: { enabled: false },
age: { enabled: false, skipFrames: 5 },
gender: { enabled: false },
},
body: { enabled: true, maxDetections: 5, scoreThreshold: 0.75, nmsRadius: 20 },
hand: { enabled: true, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
body: { enabled: false, maxDetections: 5, scoreThreshold: 0.75, nmsRadius: 20 },
hand: { enabled: false, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
};
let settings;
@ -181,10 +181,11 @@ async function runHumanDetect() {
log.innerText = `
TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors
GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes
Result: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
`;
// rinse & repeate
requestAnimationFrame(runHumanDetect);
// setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes
requestAnimationFrame(runHumanDetect); // immediate loop
}
}
@ -242,6 +243,10 @@ async function setupCanvas() {
async function setupCamera() {
const video = document.getElementById('video');
if (!navigator.mediaDevices) {
document.getElementById('log').innerText = 'Video not supported';
return;
}
const stream = await navigator.mediaDevices.getUserMedia({
audio: false,
video: { facingMode: 'user', width: window.innerWidth, height: window.innerHeight },

430
dist/human.esm.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

430
dist/human.js vendored

File diff suppressed because one or more lines are too long

6
dist/human.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -1,20 +0,0 @@
const tf = require('@tensorflow/tfjs');
exports.disposeBox = (box) => {
box.startEndTensor.dispose();
box.startPoint.dispose();
box.endPoint.dispose();
};
exports.createBox = (startEndTensor) => ({
startEndTensor,
startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]),
endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]),
});
exports.scaleBox = (box, factors) => {
const starts = tf.mul(box.startPoint, factors);
const ends = tf.mul(box.endPoint, factors);
const newCoordinates = tf.concat2d([starts, ends], 1);
return exports.createBox(newCoordinates);
};

View File

@ -1,12 +0,0 @@
const tf = require('@tensorflow/tfjs');
const face = require('./face');
async function load(config) {
const blazeface = await tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') });
const model = new face.BlazeFaceModel(blazeface, config);
return model;
}
exports.load = load;
const face_2 = require('./face');
Object.defineProperty(exports, 'BlazeFaceModel', { enumerable: true, get() { return face_2.BlazeFaceModel; } });

View File

@ -1,10 +1,10 @@
const tf = require('@tensorflow/tfjs');
const bounding = require('./box');
const ANCHORS_CONFIG = {
strides: [8, 16],
anchors: [2, 6],
};
const NUM_LANDMARKS = 6;
function generateAnchors(width, height, outputSpec) {
const anchors = [];
@ -25,6 +25,26 @@ function generateAnchors(width, height, outputSpec) {
}
return anchors;
}
const disposeBox = (box) => {
box.startEndTensor.dispose();
box.startPoint.dispose();
box.endPoint.dispose();
};
const createBox = (startEndTensor) => ({
startEndTensor,
startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]),
endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]),
});
const scaleBox = (box, factors) => {
const starts = tf.mul(box.startPoint, factors);
const ends = tf.mul(box.endPoint, factors);
const newCoordinates = tf.concat2d([starts, ends], 1);
return createBox(newCoordinates);
};
function decodeBounds(boxOutputs, anchors, inputSize) {
const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
const centers = tf.add(boxStarts, anchors);
@ -39,12 +59,14 @@ function decodeBounds(boxOutputs, anchors, inputSize) {
const concatAxis = 1;
return tf.concat2d([startNormalized, endNormalized], concatAxis);
}
function scaleBoxFromPrediction(face, scaleFactor) {
return tf.tidy(() => {
const box = face['box'] ? face['box'] : face;
return bounding.scaleBox(box, scaleFactor).startEndTensor.squeeze();
return scaleBox(box, scaleFactor).startEndTensor.squeeze();
});
}
class BlazeFaceModel {
constructor(model, config) {
this.blazeFaceModel = model;
@ -59,11 +81,10 @@ class BlazeFaceModel {
this.scoreThreshold = config.detector.scoreThreshold;
}
async getBoundingBoxes(inputImage, returnTensors, annotateBoxes = true) {
async getBoundingBoxes(inputImage) {
const [detectedOutputs, boxes, scores] = tf.tidy(() => {
const resizedImage = inputImage.resizeBilinear([this.width, this.height]);
const normalizedImage = tf.mul(tf.sub(resizedImage.div(255), 0.5), 2);
// [1, 897, 17] 1 = batch, 897 = number of anchors
const batchedPrediction = this.blazeFaceModel.predict(normalizedImage);
const prediction = batchedPrediction.squeeze();
const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize);
@ -71,45 +92,23 @@ class BlazeFaceModel {
const scoresOut = tf.sigmoid(logits).squeeze();
return [prediction, decodedBounds, scoresOut];
});
const boxIndicesTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxFaces, this.iouThreshold, this.scoreThreshold);
const boxIndices = await boxIndicesTensor.array();
boxIndicesTensor.dispose();
let boundingBoxes = boxIndices.map((boxIndex) => tf.slice(boxes, [boxIndex, 0], [1, -1]));
if (!returnTensors) {
boundingBoxes = await Promise.all(boundingBoxes.map(async (boundingBox) => {
const vals = await boundingBox.array();
boundingBox.dispose();
return vals;
}));
}
const originalHeight = inputImage.shape[1];
const originalWidth = inputImage.shape[2];
let scaleFactor;
if (returnTensors) {
scaleFactor = tf.div([originalWidth, originalHeight], this.inputSize);
} else {
scaleFactor = [
originalWidth / this.inputSizeData[0],
originalHeight / this.inputSizeData[1],
];
}
boundingBoxes = await Promise.all(boundingBoxes.map(async (boundingBox) => {
const vals = await boundingBox.array();
boundingBox.dispose();
return vals;
}));
const annotatedBoxes = [];
for (let i = 0; i < boundingBoxes.length; i++) {
const boundingBox = boundingBoxes[i];
const annotatedBox = tf.tidy(() => {
const box = boundingBox instanceof tf.Tensor
? bounding.createBox(boundingBox)
: bounding.createBox(tf.tensor2d(boundingBox));
if (!annotateBoxes) {
return box;
}
const box = createBox(boundingBox);
const boxIndex = boxIndices[i];
let anchor;
if (returnTensors) {
anchor = this.anchors.slice([boxIndex, 0], [1, 2]);
} else {
anchor = this.anchorsData[boxIndex];
}
const anchor = this.anchorsData[boxIndex];
const landmarks = tf.slice(detectedOutputs, [boxIndex, NUM_LANDMARKS - 1], [1, -1])
.squeeze()
.reshape([NUM_LANDMARKS, -1]);
@ -123,7 +122,7 @@ class BlazeFaceModel {
detectedOutputs.dispose();
return {
boxes: annotatedBoxes,
scaleFactor,
scaleFactor: [inputImage.shape[2] / this.inputSizeData[0], inputImage.shape[1] / this.inputSizeData[1]],
};
}
@ -134,7 +133,7 @@ class BlazeFaceModel {
}
return input.toFloat().expandDims(0);
});
const { boxes, scaleFactor } = await this.getBoundingBoxes(image, returnTensors, annotateBoxes);
const { boxes, scaleFactor } = await this.getBoundingBoxes(image);
image.dispose();
if (returnTensors) {
return boxes.map((face) => {
@ -176,7 +175,7 @@ class BlazeFaceModel {
landmarks: scaledLandmarks,
probability: probabilityData,
};
bounding.disposeBox(face.box);
disposeBox(face.box);
face.landmarks.dispose();
face.probability.dispose();
}
@ -185,4 +184,13 @@ class BlazeFaceModel {
}));
}
}
async function load(config) {
const blazeface = await tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') });
const model = new BlazeFaceModel(blazeface, config);
return model;
}
exports.load = load;
exports.BlazeFaceModel = BlazeFaceModel;
exports.disposeBox = disposeBox;

View File

@ -1,81 +1,63 @@
const tf = require('@tensorflow/tfjs');
const blazeface = require('../blazeface');
const blazeface = require('./blazeface');
const keypoints = require('./keypoints');
const pipe = require('./pipeline');
const uv_coords = require('./uvcoords');
const triangulation = require('./triangulation').default;
exports.uv_coords = uv_coords;
exports.triangulation = triangulation;
async function loadDetectorModel(config) {
return blazeface.load(config);
}
async function loadMeshModel(modelUrl) {
return tf.loadGraphModel(modelUrl, { fromTFHub: modelUrl.includes('tfhub.dev') });
}
async function loadIrisModel(modelUrl) {
return tf.loadGraphModel(modelUrl, { fromTFHub: modelUrl.includes('tfhub.dev') });
}
async function load(config) {
const models = await Promise.all([
loadDetectorModel(config),
loadMeshModel(config.mesh.modelPath),
loadIrisModel(config.iris.modelPath),
]);
// eslint-disable-next-line no-use-before-define
const faceMesh = new MediaPipeFaceMesh(models[0], models[1], models[2], config);
return faceMesh;
}
exports.load = load;
class MediaPipeFaceMesh {
constructor(blazeFace, blazeMeshModel, irisModel, config) {
this.pipeline = new pipe.Pipeline(blazeFace, blazeMeshModel, irisModel, config);
this.config = config;
if (config) this.config = config;
}
async estimateFaces(input, config) {
if (config) this.config = config;
const image = tf.tidy(() => {
if (!(input instanceof tf.Tensor)) {
input = tf.browser.fromPixels(input);
}
if (!(input instanceof tf.Tensor)) input = tf.browser.fromPixels(input);
return input.toFloat().expandDims(0);
});
const results = [];
const predictions = await this.pipeline.predict(image, this.config.iris.enabled, this.config.mesh.enabled);
image.dispose();
if (!predictions) return results;
for (const prediction of predictions) {
tf.dispose(image);
const results = [];
for (const prediction of (predictions || [])) {
const confidence = prediction.confidence.arraySync();
if (confidence >= this.config.detector.minConfidence) {
const result = {
confidence: confidence || 0,
box: prediction.box ? [prediction.box.startPoint[0], prediction.box.startPoint[1], prediction.box.endPoint[0] - prediction.box.startPoint[0], prediction.box.endPoint[1] - prediction.box.startPoint[1]] : 0,
mesh: prediction.coords ? prediction.coords.arraySync() : null,
image: prediction.image ? tf.clone(prediction.image) : null,
// mesh: prediction.coords.arraySync(),
};
const mesh = prediction.coords ? prediction.coords.arraySync() : null;
const annotations = {};
if (result.mesh && result.mesh.length > 0) {
if (mesh && mesh.length > 0) {
for (const key in keypoints.MESH_ANNOTATIONS) {
if (this.config.iris.enabled || key.includes('Iris') === false) {
annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => result.mesh[index]);
annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => mesh[index]);
}
}
}
result['annotations'] = annotations;
results.push(result);
results.push({
confidence: confidence || 0,
box: prediction.box ? [prediction.box.startPoint[0], prediction.box.startPoint[1], prediction.box.endPoint[0] - prediction.box.startPoint[0], prediction.box.endPoint[1] - prediction.box.startPoint[1]] : 0,
mesh,
annotations,
image: prediction.image ? tf.clone(prediction.image) : null,
});
}
tf.dispose(prediction.confidence);
tf.dispose(prediction.image);
tf.dispose(prediction.coords);
tf.dispose(prediction);
prediction.confidence.dispose();
prediction.image.dispose();
}
tf.dispose(predictions);
return results;
}
}
async function load(config) {
const models = await Promise.all([
blazeface.load(config),
tf.loadGraphModel(config.mesh.modelPath, { fromTFHub: config.mesh.modelPath.includes('tfhub.dev') }),
tf.loadGraphModel(config.iris.modelPath, { fromTFHub: config.iris.modelPath.includes('tfhub.dev') }),
]);
const faceMesh = new MediaPipeFaceMesh(models[0], models[1], models[2], config);
return faceMesh;
}
exports.load = load;
exports.MediaPipeFaceMesh = MediaPipeFaceMesh;
exports.uv_coords = uv_coords;
exports.triangulation = triangulation;

View File

@ -142,38 +142,31 @@ class Pipeline {
async predict(input, predictIrises, predictMesh) {
if (this.shouldUpdateRegionsOfInterest()) {
const returnTensors = false;
const annotateFace = true;
const { boxes, scaleFactor } = await this.boundingBoxDetector.getBoundingBoxes(input, returnTensors, annotateFace);
const { boxes, scaleFactor } = await this.boundingBoxDetector.getBoundingBoxes(input);
if (boxes.length === 0) {
this.regionsOfInterest = [];
return null;
}
const scaledBoxes = boxes.map((prediction) => {
const predictionBoxCPU = {
const predictionBox = {
startPoint: prediction.box.startPoint.squeeze().arraySync(),
endPoint: prediction.box.endPoint.squeeze().arraySync(),
};
const scaledBox = bounding.scaleBoxCoordinates(predictionBoxCPU, scaleFactor);
prediction.box.startPoint.dispose();
prediction.box.endPoint.dispose();
const scaledBox = bounding.scaleBoxCoordinates(predictionBox, scaleFactor);
const enlargedBox = bounding.enlargeBox(scaledBox);
return {
...enlargedBox,
landmarks: prediction.landmarks.arraySync(),
};
});
boxes.forEach((box) => {
if (box != null && box.startPoint != null) {
box.startEndTensor.dispose();
box.startPoint.dispose();
box.endPoint.dispose();
}
const landmarks = prediction.landmarks.arraySync();
prediction.landmarks.dispose();
prediction.probability.dispose();
return { ...enlargedBox, landmarks };
});
this.updateRegionsOfInterest(scaledBoxes);
this.runsWithoutFaceDetector = 0;
} else {
this.runsWithoutFaceDetector++;
}
return tf.tidy(() => this.regionsOfInterest.map((box, i) => {
const results = tf.tidy(() => this.regionsOfInterest.map((box, i) => {
let angle = 0;
// The facial bounding box landmarks could come either from blazeface (if we are using a fresh box), or from the mesh model (if we are reusing an old box).
const boxLandmarksFromMeshModel = box.landmarks.length >= LANDMARKS_COUNT;
@ -201,6 +194,7 @@ class Pipeline {
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
const eyePredictions = (this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop])));
const eyePredictionsData = eyePredictions.dataSync();
eyePredictions.dispose();
const leftEyeData = eyePredictionsData.slice(0, IRIS_NUM_COORDINATES * 3);
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
const rightEyeData = eyePredictionsData.slice(IRIS_NUM_COORDINATES * 3);
@ -226,7 +220,6 @@ class Pipeline {
const transformedCoords = tf.tensor2d(transformedCoordsData);
this.regionsOfInterest[i] = { ...landmarksBox, landmarks: transformedCoords.arraySync() };
const prediction = {
// coords: tf.tensor2d(rawCoords, [rawCoords.length, 3]),
coords: transformedCoords,
box: landmarksBox,
confidence: flag.squeeze(),
@ -236,13 +229,13 @@ class Pipeline {
}
const prediction = {
coords: null,
// scaledCoords: null,
box: landmarksBox,
confidence: flag.squeeze(),
image: face,
};
return prediction;
}));
return results;
}
// Updates regions of interest if the intersection over union between the incoming and previous regions falls below a threshold.

View File

@ -33,28 +33,31 @@ function mergeDeep(...objects) {
async function detect(input, userConfig) {
const config = mergeDeep(defaults, userConfig);
// load models if enabled
if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body);
if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand);
if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
if (config.face.age.enabled) await ssrnet.loadAge(config);
if (config.face.gender.enabled) await ssrnet.loadGender(config);
tf.engine().startScope();
// run posenet
let poseRes = [];
if (config.body.enabled) {
if (!models.posenet) models.posenet = await posenet.load(config.body);
poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
}
if (config.body.enabled) poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
// run handpose
let handRes = [];
if (config.hand.enabled) {
if (!models.handpose) models.handpose = await handpose.load(config.hand);
handRes = await models.handpose.estimateHands(input, config.hand);
}
if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand);
// run facemesh, includes blazeface and iris
const faceRes = [];
if (config.face.enabled) {
if (!models.facemesh) models.facemesh = await facemesh.load(config.face);
const faces = await models.facemesh.estimateFaces(input, config.face);
for (const face of faces) {
// run ssr-net age & gender, inherits face from blazeface
const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
face.image.dispose();
// iris: array[ bottom, left, top, right, center ]
const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0])
@ -71,6 +74,8 @@ async function detect(input, userConfig) {
}
}
tf.engine().endScope();
// combine results
return { face: faceRes, body: poseRes, hand: handRes };
}

View File

@ -15,14 +15,20 @@ async function getImage(image, size) {
return tensor;
}
async function loadAge(config) {
if (!models.age) models.age = await tf.loadGraphModel(config.face.age.modelPath);
}
async function loadGender(config) {
if (!models.gender) models.gender = await tf.loadGraphModel(config.face.gender.modelPath);
}
async function predict(image, config) {
frame += 1;
if (frame >= config.face.age.skipFrames) {
frame = 0;
return last;
}
if (!models.age && config.face.age.enabled) models.age = await tf.loadGraphModel(config.face.age.modelPath);
if (!models.gender && config.face.gender.enabled) models.gender = await tf.loadGraphModel(config.face.gender.modelPath);
let enhance;
if (image instanceof tf.Tensor) {
const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
@ -48,3 +54,5 @@ async function predict(image, config) {
}
exports.predict = predict;
exports.loadAge = loadAge;
exports.loadGender = loadGender;