autodetect inputSizes

pull/91/head
Vladimir Mandic 2021-03-11 10:26:14 -05:00
parent e2cf948425
commit 3ceb3df73e
46 changed files with 443286 additions and 7050 deletions

View File

@ -67,7 +67,6 @@ export default {
// (note: module is not loaded until it is required)
detector: {
modelPath: '../models/blazeface-back.json',
inputSize: 256, // fixed value
rotation: true, // use best-guess rotated face image or just box with rotation as-is
// false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
// this parameter is not valid in nodejs
@ -91,19 +90,16 @@ export default {
mesh: {
enabled: true,
modelPath: '../models/facemesh.json',
inputSize: 192, // fixed value
},
iris: {
enabled: true,
modelPath: '../models/iris.json',
inputSize: 64, // fixed value
},
age: {
enabled: true,
modelPath: '../models/age-ssrnet-imdb.json',
inputSize: 64, // fixed value
modelPath: '../models/age.json',
skipFrames: 31, // how many frames to go without re-running the detector
// only used for video inputs
},
@ -112,14 +108,12 @@ export default {
enabled: true,
minConfidence: 0.1, // threshold for discarding a prediction
modelPath: '../models/gender.json', // can be 'gender' or 'gender-ssrnet-imdb'
inputSize: 64, // fixed value
skipFrames: 32, // how many frames to go without re-running the detector
// only used for video inputs
},
emotion: {
enabled: true,
inputSize: 64, // fixed value
minConfidence: 0.1, // threshold for discarding a prediction
skipFrames: 33, // how many frames to go without re-running the detector
modelPath: '../models/emotion.json',
@ -127,7 +121,6 @@ export default {
embedding: {
enabled: false,
inputSize: 112, // fixed value
modelPath: '../models/mobilefacenet.json',
},
},
@ -135,7 +128,6 @@ export default {
body: {
enabled: true,
modelPath: '../models/posenet.json', // can be 'posenet' or 'blazepose'
inputSize: 257, // fixed value, 257 for posenet and 256 for blazepose
maxDetections: 10, // maximum number of people detected in the input
// should be set to the minimum number for performance
// only valid for posenet as blazepose only detects single pose
@ -144,14 +136,12 @@ export default {
// only valid for posenet as blazepose only detects single pose
nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression
// only valid for posenet as blazepose only detects single pose
modelType: 'posenet-mobilenet', // can be 'posenet-mobilenet', 'posenet-resnet', 'blazepose'
},
hand: {
enabled: true,
rotation: false, // use best-guess rotated hand image or just box with rotation as-is
// false means higher performance, but incorrect finger mapping if hand is inverted
inputSize: 256, // fixed value
skipFrames: 12, // how many frames to go without re-running the hand bounding box detector
// only used for video inputs
// e.g., if model is running st 25 FPS, we can re-use existing bounding

View File

@ -3,20 +3,18 @@ import Human from '../src/human';
import Menu from './menu.js';
import GLBench from './gl-bench.js';
const userConfig = { backend: 'webgl' }; // add any user configuration overrides
// const userConfig = { backend: 'webgl' }; // add any user configuration overrides
/*
const userConfig = {
backend: 'wasm',
backend: 'webgl',
async: false,
warmup: 'none',
warmup: 'face',
videoOptimized: false,
face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: true } },
hand: { enabled: false },
gesture: { enabled: false },
body: { enabled: false, modelType: 'blazepose', modelPath: '../models/blazepose.json' },
body: { enabled: false, modelPath: '../models/blazepose.json' },
};
*/
const human = new Human(userConfig);
@ -40,7 +38,7 @@ const ui = {
detectFPS: [], // internal, holds fps values for detection performance
drawFPS: [], // internal, holds fps values for draw performance
buffered: false, // experimental, should output be buffered between frames
drawWarmup: false, // debug only, should warmup image processing be displayed on startup
drawWarmup: true, // debug only, should warmup image processing be displayed on startup
drawThread: null, // internl, perform draw operations in a separate thread
detectThread: null, // internl, perform detect operations in a separate thread
framesDraw: 0, // internal, statistics on frames drawn
@ -104,9 +102,6 @@ async function drawResults(input) {
if (ui.drawFPS.length > ui.maxFPSframes) ui.drawFPS.shift();
lastDraw = performance.now();
// enable for continous performance monitoring
// console.log(result.performance);
// draw fps chart
await menu.process.updateChart('FPS', ui.detectFPS);

View File

@ -18,12 +18,12 @@ const myConfig = {
detector: { modelPath: 'file://models/blazeface-back.json', enabled: true },
mesh: { modelPath: 'file://models/facemesh.json', enabled: true },
iris: { modelPath: 'file://models/iris.json', enabled: true },
age: { modelPath: 'file://models/age-ssrnet-imdb.json', enabled: true },
age: { modelPath: 'file://models/age.json', enabled: true },
gender: { modelPath: 'file://models/gender.json', enabled: true },
emotion: { modelPath: 'file://models/emotion.json', enabled: true },
},
// body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', inputSize: 256, enabled: true },
body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', inputSize: 257, enabled: true },
// body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', enabled: true },
body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', enabled: true },
hand: {
enabled: true,
detector: { modelPath: 'file://models/handdetect.json' },

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

98970
dist/human.esm.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

98976
dist/human.js vendored

File diff suppressed because one or more lines are too long

4
dist/human.js.map vendored

File diff suppressed because one or more lines are too long

25012
dist/human.node-gpu.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

25012
dist/human.node.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

76718
dist/tfjs.esm.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -1,12 +1,12 @@
{
"format": "graph-model",
"generatedBy": "2.0.0-dev20190603",
"convertedBy": "TensorFlow.js Converter v1.1.2",
"convertedBy": "https://github.com/vladmandic",
"modelTopology":
{
"node":
[
{"name":"sub_2","op":"Placeholder","attr":{"dtype":{"type":"DT_FLOAT"},"shape":{"shape":{"dim":[{"size":"1"},{"size":"-1"},{"size":"-1"},{"size":"3"}]}}}},
{"name":"sub_2","op":"Placeholder","attr":{"dtype":{"type":"DT_FLOAT"},"shape":{"shape":{"dim":[{"size":"1"},{"size":"257"},{"size":"257"},{"size":"3"}]}}}},
{"name":"MobilenetV1/offset_2/Conv2D_bias","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"34"}]}}},"dtype":{"type":"DT_FLOAT"}}},
{"name":"MobilenetV1/offset_2/weights","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"1024"},{"size":"34"}]}}},"dtype":{"type":"DT_FLOAT"}}},
{"name":"MobilenetV1/Conv2d_13_pointwise/Conv2D_bias","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1024"}]}}},"dtype":{"type":"DT_FLOAT"}}},

26
package-lock.json generated
View File

@ -33,7 +33,7 @@
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^4.3.1",
"rimraf": "^3.0.2",
"simple-git": "^2.36.1",
"simple-git": "^2.36.2",
"tslib": "^2.1.0",
"typescript": "^4.2.3"
},
@ -3298,9 +3298,9 @@
"dev": true
},
"node_modules/simple-git": {
"version": "2.36.1",
"resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.1.tgz",
"integrity": "sha512-bN18Ea/4IJgqgbZyE9VpVEUkAu9vyP0VWP7acP0CRC1p/N80GGJ0HhIVeFJsm8TdJLBowiJpdLesQuAZ5TFSKw==",
"version": "2.36.2",
"resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.2.tgz",
"integrity": "sha512-orBEf65GfSiQMsYedbJXSiRNnIRvhbeE5rrxZuEimCpWxDZOav0KLy2IEiPi1YJCF+zaC2quiJF8A4TsxI9/tw==",
"dev": true,
"dependencies": {
"@kwsites/file-exists": "^1.1.1",
@ -3880,9 +3880,9 @@
}
},
"node_modules/yargs-parser": {
"version": "20.2.6",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.6.tgz",
"integrity": "sha512-AP1+fQIWSM/sMiET8fyayjx/J+JmTPt2Mr0FkrgqB4todtfa53sOsrSAcIrJRD5XS20bKUwaDIuMkWKCEiQLKA==",
"version": "20.2.7",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz",
"integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==",
"dev": true,
"engines": {
"node": ">=10"
@ -6422,9 +6422,9 @@
"dev": true
},
"simple-git": {
"version": "2.36.1",
"resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.1.tgz",
"integrity": "sha512-bN18Ea/4IJgqgbZyE9VpVEUkAu9vyP0VWP7acP0CRC1p/N80GGJ0HhIVeFJsm8TdJLBowiJpdLesQuAZ5TFSKw==",
"version": "2.36.2",
"resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.2.tgz",
"integrity": "sha512-orBEf65GfSiQMsYedbJXSiRNnIRvhbeE5rrxZuEimCpWxDZOav0KLy2IEiPi1YJCF+zaC2quiJF8A4TsxI9/tw==",
"dev": true,
"requires": {
"@kwsites/file-exists": "^1.1.1",
@ -6925,9 +6925,9 @@
}
},
"yargs-parser": {
"version": "20.2.6",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.6.tgz",
"integrity": "sha512-AP1+fQIWSM/sMiET8fyayjx/J+JmTPt2Mr0FkrgqB4todtfa53sOsrSAcIrJRD5XS20bKUwaDIuMkWKCEiQLKA==",
"version": "20.2.7",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz",
"integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==",
"dev": true
}
}

View File

@ -68,7 +68,7 @@
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^4.3.1",
"rimraf": "^3.0.2",
"simple-git": "^2.36.1",
"simple-git": "^2.36.2",
"tslib": "^2.1.0",
"typescript": "^4.2.3"
}

View File

@ -23,17 +23,7 @@ export async function predict(image, config) {
if (config.videoOptimized) skipped = 0;
else skipped = Number.MAX_SAFE_INTEGER;
return new Promise(async (resolve) => {
/*
const zoom = [0, 0]; // 0..1 meaning 0%..100%
const box = [[
(image.shape[1] * zoom[0]) / image.shape[1],
(image.shape[2] * zoom[1]) / image.shape[2],
(image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
(image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
]];
const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]);
*/
const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
const enhance = tf.mul(resize, [255.0]);
tf.dispose(resize);

View File

@ -57,15 +57,15 @@ export class BlazeFaceModel {
height: number;
anchorsData: any;
anchors: any;
inputSize: number;
inputSize: any;
config: any;
scaleFaces: number;
constructor(model, config) {
this.blazeFaceModel = model;
this.width = config.face.detector.inputSize;
this.height = config.face.detector.inputSize;
this.anchorsData = generateAnchors(config.face.detector.inputSize);
this.width = model.inputs[0].shape[2];
this.height = model.inputs[0].shape[1];
this.anchorsData = generateAnchors(model.inputs[0].shape[1]);
this.anchors = tf.tensor2d(this.anchorsData);
this.inputSize = tf.tensor1d([this.width, this.height]);
this.config = config;

View File

@ -9,7 +9,7 @@ export class MediaPipeFaceMesh {
config: any;
constructor(blazeFace, blazeMeshModel, irisModel, config) {
this.facePipeline = new facepipeline.Pipeline(blazeFace, blazeMeshModel, irisModel, config);
this.facePipeline = new facepipeline.Pipeline(blazeFace, blazeMeshModel, irisModel);
this.config = config;
}

View File

@ -43,22 +43,22 @@ export class Pipeline {
boundingBoxDetector: any;
meshDetector: any;
irisModel: any;
meshWidth: number;
meshHeight: number;
boxSize: number;
meshSize: number;
irisSize: number;
irisEnlarge: number;
skipped: number;
detectedFaces: number;
constructor(boundingBoxDetector, meshDetector, irisModel, config) {
constructor(boundingBoxDetector, meshDetector, irisModel) {
// An array of facial bounding boxes.
this.storedBoxes = [];
this.boundingBoxDetector = boundingBoxDetector;
this.meshDetector = meshDetector;
this.irisModel = irisModel;
this.meshWidth = config.face.mesh.inputSize;
this.meshHeight = config.face.mesh.inputSize;
this.irisSize = config.face.iris.inputSize;
this.boxSize = boundingBoxDetector?.blazeFaceModel?.inputs[0].shape[2] || 0;
this.meshSize = meshDetector?.inputs[0].shape[2] || boundingBoxDetector?.blazeFaceModel?.inputs[0].shape[2];
this.irisSize = irisModel?.inputs[0].shape[1] || 0;
this.irisEnlarge = 2.3;
this.skipped = 0;
this.detectedFaces = 0;
@ -66,10 +66,10 @@ export class Pipeline {
transformRawCoords(rawCoords, box, angle, rotationMatrix) {
const boxSize = bounding.getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
const scaleFactor = [boxSize[0] / this.meshWidth, boxSize[1] / this.meshHeight];
const scaleFactor = [boxSize[0] / this.meshSize, boxSize[1] / this.boxSize];
const coordsScaled = rawCoords.map((coord) => ([
scaleFactor[0] * (coord[0] - this.meshWidth / 2),
scaleFactor[1] * (coord[1] - this.meshHeight / 2), coord[2],
scaleFactor[0] * (coord[0] - this.boxSize / 2),
scaleFactor[1] * (coord[1] - this.boxSize / 2), coord[2],
]));
const coordsRotationMatrix = (angle !== 0) ? util.buildRotationMatrix(angle, [0, 0]) : util.IDENTITY_MATRIX;
const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...util.rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
@ -93,9 +93,9 @@ export class Pipeline {
const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
const boxSize = bounding.getBoxSize(box);
let crop = tf.image.cropAndResize(face, [[
box.startPoint[1] / this.meshHeight,
box.startPoint[0] / this.meshWidth, box.endPoint[1] / this.meshHeight,
box.endPoint[0] / this.meshWidth,
box.startPoint[1] / this.meshSize,
box.startPoint[0] / this.meshSize, box.endPoint[1] / this.meshSize,
box.endPoint[0] / this.meshSize,
]], [0], [this.irisSize, this.irisSize]);
if (flip && tf.ENV.flags.IS_BROWSER) {
crop = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
@ -192,11 +192,11 @@ export class Pipeline {
const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
const rotatedImage = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotatedImage, [this.meshHeight, this.meshWidth]).div(255);
face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotatedImage, [this.meshSize, this.meshSize]).div(255);
} else {
rotationMatrix = util.IDENTITY_MATRIX;
const cloned = input.clone();
face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.meshHeight, this.meshWidth]).div(255);
face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.boxSize, this.boxSize]).div(255);
}
// if we're not going to produce mesh, don't spend time with further processing

View File

@ -8,7 +8,6 @@ let model;
export async function load(config) {
if (!model) {
model = await tf.loadGraphModel(config.body.modelPath);
// blazepose inputSize is 256x256px, but we can find that out dynamically
model.width = parseInt(model.signature.inputs['input_1:0'].tensorShape.dim[2].size);
model.height = parseInt(model.signature.inputs['input_1:0'].tensorShape.dim[1].size);
if (config.debug) log(`load model: ${config.body.modelPath.match(/\/(.*)\./)[1]}`);
@ -20,7 +19,7 @@ export async function predict(image, config) {
if (!model) return null;
if (!config.body.enabled) return null;
const imgSize = { width: image.shape[2], height: image.shape[1] };
const resize = tf.image.resizeBilinear(image, [model.width || config.body.inputSize, model.height || config.body.inputSize], false);
const resize = tf.image.resizeBilinear(image, [model.width, model.height], false);
const normalize = tf.div(resize, [255.0]);
resize.dispose();
let points;
@ -30,7 +29,6 @@ export async function predict(image, config) {
// const segmentation = segmentationT.arraySync(); // array 128 x 128
// segmentationT.dispose();
points = resT.find((t) => (t.size === 195 || t.size === 155)).dataSync(); // order of output tensors may change between models, full has 195 and upper has 155 items
// console.log(resT, points, segmentation);
resT.forEach((t) => t.dispose());
} else {
const profileData = await tf.profile(() => model.predict(normalize));
@ -55,6 +53,5 @@ export async function predict(image, config) {
presence: (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100, // reverse sigmoid value
});
}
// console.log('POINTS', imgSize, pts.length, pts);
return [{ keypoints }];
}

View File

@ -2,8 +2,9 @@ import { log } from '../log';
import * as tf from '../../dist/tfjs.esm.js';
import * as profile from '../profile';
// based on https://github.com/sirius-ai/MobileFaceNet_TF
// model converted from https://github.com/sirius-ai/MobileFaceNet_TF/files/3551493/FaceMobileNet192_train_false.zip
// original: https://github.com/sirius-ai/MobileFaceNet_TF
// modified: https://github.com/sirius-ai/MobileFaceNet_TF/issues/46
// download: https://github.com/sirius-ai/MobileFaceNet_TF/files/3551493/FaceMobileNet192_train_false.zip
let model;
@ -29,7 +30,7 @@ export function simmilarity(embedding1, embedding2) {
export async function predict(image, config) {
if (!model) return null;
return new Promise(async (resolve) => {
const resize = tf.image.resizeBilinear(image, [config.face.embedding.inputSize, config.face.embedding.inputSize], false);
const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
// const normalize = tf.tidy(() => resize.div(127.5).sub(0.5)); // this is -0.5...0.5 ???
let data: Array<[]> = [];
if (config.face.embedding.enabled) {

View File

@ -27,17 +27,7 @@ export async function predict(image, config) {
if (config.videoOptimized) skipped = 0;
else skipped = Number.MAX_SAFE_INTEGER;
return new Promise(async (resolve) => {
/*
const zoom = [0, 0]; // 0..1 meaning 0%..100%
const box = [[
(image.shape[1] * zoom[0]) / image.shape[1],
(image.shape[2] * zoom[1]) / image.shape[2],
(image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
(image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
]];
const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]);
*/
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
const [red, green, blue] = tf.split(resize, 3, 3);
resize.dispose();
// weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html

View File

@ -28,7 +28,7 @@ export async function predict(image, config) {
if (config.videoOptimized) skipped = 0;
else skipped = Number.MAX_SAFE_INTEGER;
return new Promise(async (resolve) => {
const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false);
const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
let enhance;
if (alternative) {
enhance = tf.tidy(() => {

View File

@ -5,6 +5,7 @@ export class HandDetector {
model: any;
anchors: any;
anchorsTensor: any;
inputSize: number;
inputSizeTensor: any;
doubleInputSizeTensor: any;
@ -12,6 +13,7 @@ export class HandDetector {
this.model = model;
this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
this.anchorsTensor = tf.tensor2d(this.anchors);
this.inputSize = inputSize;
this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
this.doubleInputSizeTensor = tf.tensor1d([inputSize * 2, inputSize * 2]);
}
@ -67,7 +69,7 @@ export class HandDetector {
async estimateHandBounds(input, config) {
const inputHeight = input.shape[1];
const inputWidth = input.shape[2];
const image = tf.tidy(() => input.resizeBilinear([config.hand.inputSize, config.hand.inputSize]).div(127.5).sub(1));
const image = tf.tidy(() => input.resizeBilinear([this.inputSize, this.inputSize]).div(127.5).sub(1));
const predictions = await this.getBoxes(image, config);
image.dispose();
const hands: Array<{}> = [];
@ -79,7 +81,7 @@ export class HandDetector {
const palmLandmarks = prediction.palmLandmarks.arraySync();
prediction.box.dispose();
prediction.palmLandmarks.dispose();
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / config.hand.inputSize, inputHeight / config.hand.inputSize]));
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / this.inputSize, inputHeight / this.inputSize]));
}
return hands;
}

View File

@ -54,8 +54,8 @@ export async function load(config) {
config.hand.enabled ? tf.loadGraphModel(config.hand.detector.modelPath, { fromTFHub: config.hand.detector.modelPath.includes('tfhub.dev') }) : null,
config.hand.landmarks ? tf.loadGraphModel(config.hand.skeleton.modelPath, { fromTFHub: config.hand.skeleton.modelPath.includes('tfhub.dev') }) : null,
]);
const handDetector = new handdetector.HandDetector(handDetectorModel, config.hand.inputSize, anchors.anchors);
const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, config.hand.inputSize);
const handDetector = new handdetector.HandDetector(handDetectorModel, handDetectorModel?.inputs[0].shape[2], anchors.anchors);
const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, handPoseModel?.inputs[0].shape[2]);
const handPose = new HandPose(handPipeline);
if (config.hand.enabled && config.debug) log(`load model: ${config.hand.detector.modelPath.match(/\/(.*)\./)[1]}`);
if (config.hand.landmarks && config.debug) log(`load model: ${config.hand.skeleton.modelPath.match(/\/(.*)\./)[1]}`);

View File

@ -109,7 +109,7 @@ class Human {
age,
gender,
emotion,
body: this.config.body.modelType.startsWith('posenet') ? posenet : blazepose,
body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose,
hand: handpose,
};
// include platform info
@ -186,8 +186,8 @@ class Human {
this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null),
this.models.embedding || ((this.config.face.enabled && this.config.face.embedding.enabled) ? embedding.load(this.config) : null),
this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null),
this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('posenet') ? posenet.load(this.config) : null),
this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('blazepose') ? blazepose.load(this.config) : null),
this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null),
this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null),
]);
} else {
if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config);
@ -196,8 +196,8 @@ class Human {
if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config);
if (this.config.face.enabled && this.config.face.embedding.enabled && !this.models.embedding) this.models.embedding = await embedding.load(this.config);
if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config);
if (this.config.body.enabled && !this.models.posenet && this.config.body.modelType.startsWith('posenet')) this.models.posenet = await posenet.load(this.config);
if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelType.startsWith('blazepose')) this.models.blazepose = await blazepose.load(this.config);
if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config);
if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config);
}
if (this.#firstRun) {
@ -477,13 +477,13 @@ class Human {
// run body: can be posenet or blazepose
this.#analyze('Start Body:');
if (this.config.async) {
if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
else bodyRes = this.config.body.enabled ? blazepose.predict(process.tensor, this.config) : [];
if (this.#perf.body) delete this.#perf.body;
} else {
this.state = 'run:body';
timeStamp = now();
if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : [];
this.#perf.body = Math.trunc(now() - timeStamp);
}

View File

@ -20,12 +20,12 @@ function getInstanceScore(existingPoses, squaredNmsRadius, instanceKeypoints) {
return notOverlappedKeypointScores / instanceKeypoints.length;
}
export function decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, config) {
export function decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, nmsRadius, maxDetections, scoreThreshold) {
const poses: Array<{ keypoints: any, score: number }> = [];
const queue = buildParts.buildPartWithScoreQueue(config.body.scoreThreshold, kLocalMaximumRadius, scoresBuffer);
const squaredNmsRadius = config.body.nmsRadius ^ 2;
const queue = buildParts.buildPartWithScoreQueue(scoreThreshold, kLocalMaximumRadius, scoresBuffer);
const squaredNmsRadius = nmsRadius ^ 2;
// Generate at most maxDetections object instances per image in decreasing root part score order.
while (poses.length < config.body.maxDetections && !queue.empty()) {
while (poses.length < maxDetections && !queue.empty()) {
// The top element in the queue is the next root candidate.
const root = queue.dequeue();
// Part-based non-maximum suppression: We reject a root candidate if it is within a disk of `nmsRadius` pixels from the corresponding part of a previously detected instance.
@ -34,7 +34,7 @@ export function decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFw
// Else start a new detection instance at the position of the root.
const keypoints = decodePose.decodePose(root, scoresBuffer, offsetsBuffer, defaultOutputStride, displacementsFwdBuffer, displacementsBwdBuffer);
const score = getInstanceScore(poses, squaredNmsRadius, keypoints);
if (score > config.body.scoreThreshold) poses.push({ keypoints, score });
if (score > scoreThreshold) poses.push({ keypoints, score });
}
return poses;
}

View File

@ -74,7 +74,7 @@ export function decodePose(root, scores, offsets, outputStride, displacementsFwd
return instanceKeypoints;
}
export async function decodeSinglePose(heatmapScores, offsets, config) {
export async function decodeSinglePose(heatmapScores, offsets, minScore) {
let totalScore = 0.0;
const heatmapValues = decoders.argmax2d(heatmapScores);
const allTensorBuffers = await Promise.all([heatmapScores.buffer(), offsets.buffer(), heatmapValues.buffer()]);
@ -95,7 +95,7 @@ export async function decodeSinglePose(heatmapScores, offsets, config) {
score,
};
});
const filteredKeypoints = instanceKeypoints.filter((kpt) => kpt.score > config.body.scoreThreshold);
const filteredKeypoints = instanceKeypoints.filter((kpt) => kpt.score > minScore);
heatmapValues.dispose();
offsetPoints.dispose();
return { keypoints: filteredKeypoints, score: totalScore / instanceKeypoints.length };

View File

@ -1,30 +1,23 @@
import * as tf from '../../dist/tfjs.esm.js';
const imageNetMean = [-123.15, -115.90, -103.06];
function nameOutputResultsMobileNet(results) {
const [offsets, heatmap, displacementFwd, displacementBwd] = results;
return { offsets, heatmap, displacementFwd, displacementBwd };
}
function nameOutputResultsResNet(results) {
const [displacementFwd, displacementBwd, offsets, heatmap] = results;
return { offsets, heatmap, displacementFwd, displacementBwd };
}
export class BaseModel {
model: any;
constructor(model) {
this.model = model;
}
predict(input, config) {
predict(input) {
return tf.tidy(() => {
const asFloat = (config.body.modelType === 'posenet-resnet') ? input.toFloat().add(imageNetMean) : input.toFloat().div(127.5).sub(1.0);
const asFloat = input.toFloat().div(127.5).sub(1.0);
const asBatch = asFloat.expandDims(0);
const results = this.model.predict(asBatch);
const results3d = results.map((y) => y.squeeze([0]));
const namedResults = (config.body.modelType === 'posenet-resnet') ? nameOutputResultsResNet(results3d) : nameOutputResultsMobileNet(results3d);
const namedResults = nameOutputResultsMobileNet(results3d);
return {
heatmapScores: namedResults.heatmap.sigmoid(),
offsets: namedResults.offsets,

View File

@ -5,43 +5,42 @@ import * as decodeMultiple from './decodeMultiple';
import * as decodePose from './decodePose';
import * as util from './util';
async function estimateMultiple(input, res, config) {
async function estimateMultiple(input, res, config, inputSize) {
return new Promise(async (resolve) => {
const height = input.shape[1];
const width = input.shape[2];
const allTensorBuffers = await util.toTensorBuffers3D([res.heatmapScores, res.offsets, res.displacementFwd, res.displacementBwd]);
const scoresBuffer = allTensorBuffers[0];
const offsetsBuffer = allTensorBuffers[1];
const displacementsFwdBuffer = allTensorBuffers[2];
const displacementsBwdBuffer = allTensorBuffers[3];
const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, config);
const scaled = util.scaleAndFlipPoses(poses, [height, width], [config.body.inputSize, config.body.inputSize]);
const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, config.body.nmsRadius, config.body.maxDetections, config.body.scoreThreshold);
const scaled = util.scaleAndFlipPoses(poses, [input.shape[1], input.shape[2]], [inputSize, inputSize]);
resolve(scaled);
});
}
async function estimateSingle(input, res, config) {
async function estimateSingle(input, res, config, inputSize) {
return new Promise(async (resolve) => {
const height = input.shape[1];
const width = input.shape[2];
const pose = await decodePose.decodeSinglePose(res.heatmapScores, res.offsets, config);
const poses = [pose];
const scaled = util.scaleAndFlipPoses(poses, [height, width], [config.body.inputSize, config.body.inputSize]);
const pose = await decodePose.decodeSinglePose(res.heatmapScores, res.offsets, config.body.scoreThreshold);
const scaled = util.scaleAndFlipPoses([pose], [input.shape[1], input.shape[2]], [inputSize, inputSize]);
resolve(scaled);
});
}
export class PoseNet {
baseModel: any;
inputSize: number
constructor(model) {
this.baseModel = model;
this.inputSize = model.model.inputs[0].shape[1];
}
async estimatePoses(input, config) {
const resized = util.resizeTo(input, [config.body.inputSize, config.body.inputSize]);
const resized = util.resizeTo(input, [this.inputSize, this.inputSize]);
const res = this.baseModel.predict(resized, config);
const poses = (config.body.maxDetections < 2) ? await estimateSingle(input, res, config) : await estimateMultiple(input, res, config);
const poses = (config.body.maxDetections < 2)
? await estimateSingle(input, res, config, this.inputSize)
: await estimateMultiple(input, res, config, this.inputSize);
res.heatmapScores.dispose();
res.offsets.dispose();

View File

@ -5,7 +5,7 @@ export declare class BlazeFaceModel {
height: number;
anchorsData: any;
anchors: any;
inputSize: number;
inputSize: any;
config: any;
scaleFaces: number;
constructor(model: any, config: any);

View File

@ -3,13 +3,13 @@ export declare class Pipeline {
boundingBoxDetector: any;
meshDetector: any;
irisModel: any;
meshWidth: number;
meshHeight: number;
boxSize: number;
meshSize: number;
irisSize: number;
irisEnlarge: number;
skipped: number;
detectedFaces: number;
constructor(boundingBoxDetector: any, meshDetector: any, irisModel: any, config: any);
constructor(boundingBoxDetector: any, meshDetector: any, irisModel: any);
transformRawCoords(rawCoords: any, box: any, angle: any, rotationMatrix: any): any;
getLeftToRightEyeDepthDifference(rawCoords: any): number;
getEyeBox(rawCoords: any, face: any, eyeInnerCornerIndex: any, eyeOuterCornerIndex: any, flip?: boolean): {

View File

@ -2,6 +2,7 @@ export declare class HandDetector {
model: any;
anchors: any;
anchorsTensor: any;
inputSize: number;
inputSizeTensor: any;
doubleInputSizeTensor: any;
constructor(model: any, inputSize: any, anchorsAnnotated: any);

View File

@ -1,4 +1,4 @@
export declare function decodeMultiplePoses(scoresBuffer: any, offsetsBuffer: any, displacementsFwdBuffer: any, displacementsBwdBuffer: any, config: any): {
export declare function decodeMultiplePoses(scoresBuffer: any, offsetsBuffer: any, displacementsFwdBuffer: any, displacementsBwdBuffer: any, nmsRadius: any, maxDetections: any, scoreThreshold: any): {
keypoints: any;
score: number;
}[];

View File

@ -1,5 +1,5 @@
export declare function decodePose(root: any, scores: any, offsets: any, outputStride: any, displacementsFwd: any, displacementsBwd: any): any[];
export declare function decodeSinglePose(heatmapScores: any, offsets: any, config: any): Promise<{
export declare function decodeSinglePose(heatmapScores: any, offsets: any, minScore: any): Promise<{
keypoints: {
position: {
y: any;

View File

@ -1,6 +1,6 @@
export declare class BaseModel {
model: any;
constructor(model: any);
predict(input: any, config: any): any;
predict(input: any): any;
dispose(): void;
}

View File

@ -1,5 +1,6 @@
export declare class PoseNet {
baseModel: any;
inputSize: number;
constructor(model: any);
estimatePoses(input: any, config: any): Promise<unknown>;
dispose(): void;