module parametrization and performance monitoring

pull/50/head
Vladimir Mandic 2020-10-14 13:23:02 -04:00
parent ded889484b
commit e1a514d7ca
23 changed files with 557 additions and 523 deletions

View File

@ -294,6 +294,18 @@ result = {
} }
``` ```
Additionally, `result` object includes internal performance data - total time spend and time per module (measured in ms):
```js
result.performance = {
body,
hand,
face,
agegender,
total,
}
```
<hr> <hr>
## Build ## Build
@ -321,7 +333,18 @@ Development dependencies are [eslint](https://github.com/eslint) used for code l
Performance will vary depending on your hardware, but also on number of resolution of input video/image, enabled modules as well as their parameters Performance will vary depending on your hardware, but also on number of resolution of input video/image, enabled modules as well as their parameters
For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple face detections at 50+ FPS, but drops to 5-10 FPS on a medium complex images if all modules are enabled For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple face detections at 60+ FPS, but drops to 10 FPS on a medium complex images if all modules are enabled
Performance per module:
- Enabled all: 10 FPS
- Face Detect: 80 FPS
- Face Geometry: 30 FPS (includes face detect)
- Face Iris: 25 FPS (includes face detect and face geometry)
- Age: 60 FPS (includes face detect)
- Gender: 60 FPS (includes face detect)
- Hand: 40 FPS
- Body: 50 FPS
Library can also be used on mobile devices Library can also be used on mobile devices

View File

@ -203,6 +203,7 @@ async function runHumanDetect(input, canvas) {
TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors
GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes
Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
Performance: ${JSON.stringify(result.performance)}
`; `;
// rinse & repeate // rinse & repeate
// if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes // if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes

187
dist/human.esm.js vendored
View File

@ -65534,6 +65534,17 @@ var require_keypoints = __commonJS((exports) => {
rightCheek: [205], rightCheek: [205],
leftCheek: [425] leftCheek: [425]
}; };
exports.MESH_TO_IRIS_INDICES_MAP = [
{key: "EyeUpper0", indices: [9, 10, 11, 12, 13, 14, 15]},
{key: "EyeUpper1", indices: [25, 26, 27, 28, 29, 30, 31]},
{key: "EyeUpper2", indices: [41, 42, 43, 44, 45, 46, 47]},
{key: "EyeLower0", indices: [0, 1, 2, 3, 4, 5, 6, 7, 8]},
{key: "EyeLower1", indices: [16, 17, 18, 19, 20, 21, 22, 23, 24]},
{key: "EyeLower2", indices: [32, 33, 34, 35, 36, 37, 38, 39, 40]},
{key: "EyeLower3", indices: [54, 55, 56, 57, 58, 59, 60, 61, 62]},
{key: "EyebrowUpper", indices: [63, 64, 65, 66, 67, 68, 69, 70]},
{key: "EyebrowLower", indices: [48, 49, 50, 51, 52, 53]}
];
}); });
// src/facemesh/box.js // src/facemesh/box.js
@ -65696,22 +65707,9 @@ var require_pipeline = __commonJS((exports) => {
const IRIS_LOWER_CENTER_INDEX = 4; const IRIS_LOWER_CENTER_INDEX = 4;
const IRIS_IRIS_INDEX = 71; const IRIS_IRIS_INDEX = 71;
const IRIS_NUM_COORDINATES = 76; const IRIS_NUM_COORDINATES = 76;
const ENLARGE_EYE_RATIO = 2.3;
const IRIS_MODEL_INPUT_SIZE = 64;
const MESH_TO_IRIS_INDICES_MAP = [
{key: "EyeUpper0", indices: [9, 10, 11, 12, 13, 14, 15]},
{key: "EyeUpper1", indices: [25, 26, 27, 28, 29, 30, 31]},
{key: "EyeUpper2", indices: [41, 42, 43, 44, 45, 46, 47]},
{key: "EyeLower0", indices: [0, 1, 2, 3, 4, 5, 6, 7, 8]},
{key: "EyeLower1", indices: [16, 17, 18, 19, 20, 21, 22, 23, 24]},
{key: "EyeLower2", indices: [32, 33, 34, 35, 36, 37, 38, 39, 40]},
{key: "EyeLower3", indices: [54, 55, 56, 57, 58, 59, 60, 61, 62]},
{key: "EyebrowUpper", indices: [63, 64, 65, 66, 67, 68, 69, 70]},
{key: "EyebrowLower", indices: [48, 49, 50, 51, 52, 53]}
];
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) { function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
for (let i = 0; i < MESH_TO_IRIS_INDICES_MAP.length; i++) { for (let i = 0; i < keypoints.MESH_TO_IRIS_INDICES_MAP.length; i++) {
const {key, indices} = MESH_TO_IRIS_INDICES_MAP[i]; const {key, indices} = keypoints.MESH_TO_IRIS_INDICES_MAP[i];
const originalIndices = keypoints.MESH_ANNOTATIONS[`${prefix}${key}`]; const originalIndices = keypoints.MESH_ANNOTATIONS[`${prefix}${key}`];
const shouldReplaceAllKeys = keys == null; const shouldReplaceAllKeys = keys == null;
if (shouldReplaceAllKeys || keys.includes(key)) { if (shouldReplaceAllKeys || keys.includes(key)) {
@ -65735,8 +65733,8 @@ var require_pipeline = __commonJS((exports) => {
this.irisModel = irisModel; this.irisModel = irisModel;
this.meshWidth = config.mesh.inputSize; this.meshWidth = config.mesh.inputSize;
this.meshHeight = config.mesh.inputSize; this.meshHeight = config.mesh.inputSize;
this.skipFrames = config.detector.skipFrames; this.irisSize = config.iris.inputSize;
this.maxFaces = config.detector.maxFaces; this.irisEnlarge = config.iris.enlargeFactor;
} }
transformRawCoords(rawCoords, box, angle, rotationMatrix) { transformRawCoords(rawCoords, box, angle, rotationMatrix) {
const boxSize = bounding.getBoxSize({startPoint: box.startPoint, endPoint: box.endPoint}); const boxSize = bounding.getBoxSize({startPoint: box.startPoint, endPoint: box.endPoint});
@ -65766,14 +65764,14 @@ var require_pipeline = __commonJS((exports) => {
return leftEyeZ - rightEyeZ; return leftEyeZ - rightEyeZ;
} }
getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) { getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) {
const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), ENLARGE_EYE_RATIO)); const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
const boxSize = bounding.getBoxSize(box); const boxSize = bounding.getBoxSize(box);
let crop = tf.image.cropAndResize(face, [[ let crop = tf.image.cropAndResize(face, [[
box.startPoint[1] / this.meshHeight, box.startPoint[1] / this.meshHeight,
box.startPoint[0] / this.meshWidth, box.startPoint[0] / this.meshWidth,
box.endPoint[1] / this.meshHeight, box.endPoint[1] / this.meshHeight,
box.endPoint[0] / this.meshWidth box.endPoint[0] / this.meshWidth
]], [0], [IRIS_MODEL_INPUT_SIZE, IRIS_MODEL_INPUT_SIZE]); ]], [0], [this.irisSize, this.irisSize]);
if (flip) { if (flip) {
crop = tf.image.flipLeftRight(crop); crop = tf.image.flipLeftRight(crop);
} }
@ -65786,8 +65784,8 @@ var require_pipeline = __commonJS((exports) => {
const y = eyeData[i * 3 + 1]; const y = eyeData[i * 3 + 1];
const z = eyeData[i * 3 + 2]; const z = eyeData[i * 3 + 2];
eyeRawCoords.push([ eyeRawCoords.push([
(flip ? 1 - x / IRIS_MODEL_INPUT_SIZE : x / IRIS_MODEL_INPUT_SIZE) * eyeBoxSize[0] + eyeBox.startPoint[0], (flip ? 1 - x / this.irisSize : x / this.irisSize) * eyeBoxSize[0] + eyeBox.startPoint[0],
y / IRIS_MODEL_INPUT_SIZE * eyeBoxSize[1] + eyeBox.startPoint[1], y / this.irisSize * eyeBoxSize[1] + eyeBox.startPoint[1],
z z
]); ]);
} }
@ -65807,7 +65805,9 @@ var require_pipeline = __commonJS((exports) => {
return [coord[0], coord[1], z]; return [coord[0], coord[1], z];
}); });
} }
async predict(input, predictIrises, predictMesh) { async predict(input, config) {
this.skipFrames = config.detector.skipFrames;
this.maxFaces = config.detector.maxFaces;
if (this.shouldUpdateRegionsOfInterest()) { if (this.shouldUpdateRegionsOfInterest()) {
const {boxes, scaleFactor} = await this.boundingBoxDetector.getBoundingBoxes(input); const {boxes, scaleFactor} = await this.boundingBoxDetector.getBoundingBoxes(input);
if (boxes.length === 0) { if (boxes.length === 0) {
@ -65854,7 +65854,7 @@ var require_pipeline = __commonJS((exports) => {
const [, flag, coords] = this.meshDetector.predict(face); const [, flag, coords] = this.meshDetector.predict(face);
const coordsReshaped = tf.reshape(coords, [-1, 3]); const coordsReshaped = tf.reshape(coords, [-1, 3]);
let rawCoords = coordsReshaped.arraySync(); let rawCoords = coordsReshaped.arraySync();
if (predictIrises) { if (config.iris.enabled) {
const {box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop} = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true); const {box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop} = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true);
const {box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop} = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]); const {box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop} = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
const eyePredictions = this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop])); const eyePredictions = this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop]));
@ -65880,7 +65880,7 @@ var require_pipeline = __commonJS((exports) => {
const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix); const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
tf.dispose(rawCoords); tf.dispose(rawCoords);
const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData)); const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData));
if (predictMesh) { if (config.mesh.enabled) {
const transformedCoords = tf.tensor2d(transformedCoordsData); const transformedCoords = tf.tensor2d(transformedCoordsData);
this.regionsOfInterest[i] = {...landmarksBox, landmarks: transformedCoords.arraySync()}; this.regionsOfInterest[i] = {...landmarksBox, landmarks: transformedCoords.arraySync()};
const prediction2 = { const prediction2 = {
@ -69098,7 +69098,7 @@ var require_facemesh = __commonJS((exports) => {
input = tf.browser.fromPixels(input); input = tf.browser.fromPixels(input);
return input.toFloat().expandDims(0); return input.toFloat().expandDims(0);
}); });
const predictions = await this.pipeline.predict(image, this.config.iris.enabled, this.config.mesh.enabled); const predictions = await this.pipeline.predict(image, config);
tf.dispose(image); tf.dispose(image);
const results = []; const results = [];
for (const prediction of predictions || []) { for (const prediction of predictions || []) {
@ -69182,12 +69182,14 @@ var require_ssrnet = __commonJS((exports) => {
const obj = {}; const obj = {};
if (config.face.age.enabled) { if (config.face.age.enabled) {
const ageT = await models.age.predict(enhance); const ageT = await models.age.predict(enhance);
obj.age = Math.trunc(10 * ageT.dataSync()[0]) / 10; const data = await ageT.data();
obj.age = Math.trunc(10 * data[0]) / 10;
tf.dispose(ageT); tf.dispose(ageT);
} }
if (config.face.gender.enabled) { if (config.face.gender.enabled) {
const genderT = await models.gender.predict(enhance); const genderT = await models.gender.predict(enhance);
obj.gender = Math.trunc(100 * genderT.dataSync()[0]) < 50 ? "female" : "male"; const data = await genderT.data();
obj.gender = Math.trunc(100 * data[0]) < 50 ? "female" : "male";
tf.dispose(genderT); tf.dispose(genderT);
} }
tf.dispose(enhance); tf.dispose(enhance);
@ -69718,15 +69720,13 @@ var require_modelPoseNet = __commonJS((exports) => {
const decodeMultiple = require_decodeMultiple(); const decodeMultiple = require_decodeMultiple();
const util = require_util3(); const util = require_util3();
class PoseNet { class PoseNet {
constructor(net, inputResolution) { constructor(net) {
this.baseModel = net; this.baseModel = net;
this.inputResolution = inputResolution;
} }
async estimatePoses(input, config) { async estimatePoses(input, config) {
const outputStride = this.baseModel.outputStride; const outputStride = config.outputStride;
const inputResolution = this.inputResolution;
const [height, width] = util.getInputTensorDimensions(input); const [height, width] = util.getInputTensorDimensions(input);
const {resized, padding} = util.padAndResizeTo(input, [inputResolution, inputResolution]); const {resized, padding} = util.padAndResizeTo(input, [config.inputResolution, config.inputResolution]);
const {heatmapScores, offsets, displacementFwd, displacementBwd} = this.baseModel.predict(resized); const {heatmapScores, offsets, displacementFwd, displacementBwd} = this.baseModel.predict(resized);
const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]); const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]);
const scoresBuffer = allTensorBuffers[0]; const scoresBuffer = allTensorBuffers[0];
@ -69734,7 +69734,7 @@ var require_modelPoseNet = __commonJS((exports) => {
const displacementsFwdBuffer = allTensorBuffers[2]; const displacementsFwdBuffer = allTensorBuffers[2];
const displacementsBwdBuffer = allTensorBuffers[3]; const displacementsBwdBuffer = allTensorBuffers[3];
const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius); const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius);
const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [inputResolution, inputResolution], padding); const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [config.inputResolution, config.inputResolution], padding);
heatmapScores.dispose(); heatmapScores.dispose();
offsets.dispose(); offsets.dispose();
displacementFwd.dispose(); displacementFwd.dispose();
@ -69748,10 +69748,9 @@ var require_modelPoseNet = __commonJS((exports) => {
} }
exports.PoseNet = PoseNet; exports.PoseNet = PoseNet;
async function loadMobileNet(config) { async function loadMobileNet(config) {
const outputStride = config.outputStride;
const graphModel = await tf.loadGraphModel(config.modelPath); const graphModel = await tf.loadGraphModel(config.modelPath);
const mobilenet = new modelMobileNet.MobileNet(graphModel, outputStride); const mobilenet = new modelMobileNet.MobileNet(graphModel, config.outputStride);
return new PoseNet(mobilenet, config.inputResolution); return new PoseNet(mobilenet);
} }
async function load(config) { async function load(config) {
return loadMobileNet(config); return loadMobileNet(config);
@ -69857,17 +69856,14 @@ var require_handdetector = __commonJS((exports) => {
const tf = require_tf_node(); const tf = require_tf_node();
const bounding = require_box2(); const bounding = require_box2();
class HandDetector { class HandDetector {
constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) { constructor(model, anchors, config) {
this.model = model; this.model = model;
this.width = width; this.width = config.inputSize;
this.height = height; this.height = config.inputSize;
this.iouThreshold = iouThreshold;
this.scoreThreshold = scoreThreshold;
this.maxHands = maxHands;
this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]); this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
this.anchorsTensor = tf.tensor2d(this.anchors); this.anchorsTensor = tf.tensor2d(this.anchors);
this.inputSizeTensor = tf.tensor1d([width, height]); this.inputSizeTensor = tf.tensor1d([config.inputSize, config.inputSize]);
this.doubleInputSizeTensor = tf.tensor1d([width * 2, height * 2]); this.doubleInputSizeTensor = tf.tensor1d([config.inputSize * 2, config.inputSize * 2]);
} }
normalizeBoxes(boxes) { normalizeBoxes(boxes) {
return tf.tidy(() => { return tf.tidy(() => {
@ -69921,9 +69917,12 @@ var require_handdetector = __commonJS((exports) => {
}); });
return detectedHands; return detectedHands;
} }
async estimateHandBounds(input) { async estimateHandBounds(input, config) {
const inputHeight = input.shape[1]; const inputHeight = input.shape[1];
const inputWidth = input.shape[2]; const inputWidth = input.shape[2];
this.iouThreshold = config.iouThreshold;
this.scoreThreshold = config.scoreThreshold;
this.maxHands = config.maxHands;
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255)); const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
const predictions = await this.getBoundingBoxes(image); const predictions = await this.getBoundingBoxes(image);
image.dispose(); image.dispose();
@ -70037,24 +70036,20 @@ var require_pipeline2 = __commonJS((exports) => {
const util = require_util4(); const util = require_util4();
const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8; const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8;
const PALM_BOX_SHIFT_VECTOR = [0, -0.4]; const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
const PALM_BOX_ENLARGE_FACTOR = 3;
const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; const HAND_BOX_SHIFT_VECTOR = [0, -0.1];
const HAND_BOX_ENLARGE_FACTOR = 1.65; const HAND_BOX_ENLARGE_FACTOR = 1.65;
const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]; const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2];
const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0; const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0;
const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2; const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
class HandPipeline { class HandPipeline {
constructor(boundingBoxDetector, meshDetector, meshWidth, meshHeight, maxContinuousChecks, detectionConfidence, maxHands) { constructor(boundingBoxDetector, meshDetector, config) {
this.regionsOfInterest = []; this.regionsOfInterest = [];
this.runsWithoutHandDetector = 0; this.runsWithoutHandDetector = 0;
this.boundingBoxDetector = boundingBoxDetector; this.boundingBoxDetector = boundingBoxDetector;
this.meshDetector = meshDetector; this.meshDetector = meshDetector;
this.maxContinuousChecks = maxContinuousChecks; this.meshWidth = config.inputSize;
this.detectionConfidence = detectionConfidence; this.meshHeight = config.inputSize;
this.maxHands = maxHands; this.enlargeFactor = config.enlargeFactor;
this.meshWidth = meshWidth;
this.meshHeight = meshHeight;
this.maxHandsNumber = 1;
} }
getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) { getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) {
const rotatedPalmLandmarks = palmLandmarks.map((coord) => { const rotatedPalmLandmarks = palmLandmarks.map((coord) => {
@ -70062,7 +70057,7 @@ var require_pipeline2 = __commonJS((exports) => {
return util.rotatePoint(homogeneousCoordinate, rotationMatrix); return util.rotatePoint(homogeneousCoordinate, rotationMatrix);
}); });
const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks); const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks);
return bounding.enlargeBox(bounding.squarifyBox(bounding.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), PALM_BOX_ENLARGE_FACTOR); return bounding.enlargeBox(bounding.squarifyBox(bounding.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), this.enlargeFactor);
} }
getBoxForHandLandmarks(landmarks) { getBoxForHandLandmarks(landmarks) {
const boundingBox = this.calculateLandmarksBoundingBox(landmarks); const boundingBox = this.calculateLandmarksBoundingBox(landmarks);
@ -70099,10 +70094,13 @@ var require_pipeline2 = __commonJS((exports) => {
coord[2] coord[2]
]); ]);
} }
async estimateHand(image, config) { async estimateHands(image, config) {
this.maxContinuousChecks = config.skipFrames;
this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands;
const useFreshBox = this.shouldUpdateRegionsOfInterest(); const useFreshBox = this.shouldUpdateRegionsOfInterest();
if (useFreshBox === true) { if (useFreshBox === true) {
const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image); const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
this.regionsOfInterest = []; this.regionsOfInterest = [];
for (const i in boundingBoxPredictions) { for (const i in boundingBoxPredictions) {
this.updateRegionsOfInterest(boundingBoxPredictions[i], true, i); this.updateRegionsOfInterest(boundingBoxPredictions[i], true, i);
@ -70188,7 +70186,7 @@ var require_pipeline2 = __commonJS((exports) => {
} }
} }
shouldUpdateRegionsOfInterest() { shouldUpdateRegionsOfInterest() {
return this.regionsOfInterest === 0 || this.runsWithoutHandDetector >= this.maxContinuousChecks; return !this.regionsOfInterest || this.regionsOfInterest.length === 0 || this.runsWithoutHandDetector >= this.maxContinuousChecks;
} }
} }
exports.HandPipeline = HandPipeline; exports.HandPipeline = HandPipeline;
@ -70200,44 +70198,21 @@ var require_handpose = __commonJS((exports) => {
const hand = require_handdetector(); const hand = require_handdetector();
const keypoints = require_keypoints3(); const keypoints = require_keypoints3();
const pipe = require_pipeline2(); const pipe = require_pipeline2();
async function loadHandDetectorModel(url) {
return tf.loadGraphModel(url, {fromTFHub: url.includes("tfhub.dev")});
}
async function loadHandPoseModel(url) {
return tf.loadGraphModel(url, {fromTFHub: url.includes("tfhub.dev")});
}
async function loadAnchors(url) {
if (tf.env().features.IS_NODE) {
const fs = require("fs");
const data = await fs.readFileSync(url.replace("file://", ""));
return JSON.parse(data);
}
return tf.util.fetch(url).then((d) => d.json());
}
async function load(config) {
const [ANCHORS, handDetectorModel, handPoseModel] = await Promise.all([
loadAnchors(config.detector.anchors),
loadHandDetectorModel(config.detector.modelPath),
loadHandPoseModel(config.skeleton.modelPath)
]);
const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands);
const handpose = new HandPose(pipeline);
return handpose;
}
exports.load = load;
class HandPose { class HandPose {
constructor(pipeline) { constructor(pipeline) {
this.pipeline = pipeline; this.pipeline = pipeline;
} }
async estimateHands(input, config) { async estimateHands(input, config) {
this.maxContinuousChecks = config.skipFrames;
this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands;
const image = tf.tidy(() => { const image = tf.tidy(() => {
if (!(input instanceof tf.Tensor)) { if (!(input instanceof tf.Tensor)) {
input = tf.browser.fromPixels(input); input = tf.browser.fromPixels(input);
} }
return input.toFloat().expandDims(0); return input.toFloat().expandDims(0);
}); });
const predictions = await this.pipeline.estimateHand(image, config); const predictions = await this.pipeline.estimateHands(image, config);
image.dispose(); image.dispose();
const hands = []; const hands = [];
if (!predictions) if (!predictions)
@ -70260,6 +70235,26 @@ var require_handpose = __commonJS((exports) => {
} }
} }
exports.HandPose = HandPose; exports.HandPose = HandPose;
async function loadAnchors(url) {
if (tf.env().features.IS_NODE) {
const fs = require("fs");
const data = await fs.readFileSync(url.replace("file://", ""));
return JSON.parse(data);
}
return tf.util.fetch(url).then((d) => d.json());
}
async function load(config) {
const [anchors, handDetectorModel, handPoseModel] = await Promise.all([
loadAnchors(config.detector.anchors),
tf.loadGraphModel(config.detector.modelPath, {fromTFHub: config.detector.modelPath.includes("tfhub.dev")}),
tf.loadGraphModel(config.skeleton.modelPath, {fromTFHub: config.skeleton.modelPath.includes("tfhub.dev")})
]);
const detector = new hand.HandDetector(handDetectorModel, anchors, config);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config);
const handpose = new HandPose(pipeline);
return handpose;
}
exports.load = load;
}); });
// src/config.js // src/config.js
@ -70277,7 +70272,7 @@ var require_config = __commonJS((exports) => {
skipFrames: 10, skipFrames: 10,
minConfidence: 0.5, minConfidence: 0.5,
iouThreshold: 0.3, iouThreshold: 0.3,
scoreThreshold: 0.5 scoreThreshold: 0.7
}, },
mesh: { mesh: {
enabled: true, enabled: true,
@ -70287,7 +70282,8 @@ var require_config = __commonJS((exports) => {
iris: { iris: {
enabled: true, enabled: true,
modelPath: "../models/iris/model.json", modelPath: "../models/iris/model.json",
inputSize: 192 enlargeFactor: 2.3,
inputSize: 64
}, },
age: { age: {
enabled: true, enabled: true,
@ -70306,7 +70302,7 @@ var require_config = __commonJS((exports) => {
inputResolution: 257, inputResolution: 257,
outputStride: 16, outputStride: 16,
maxDetections: 5, maxDetections: 5,
scoreThreshold: 0.5, scoreThreshold: 0.7,
nmsRadius: 20 nmsRadius: 20
}, },
hand: { hand: {
@ -70315,7 +70311,8 @@ var require_config = __commonJS((exports) => {
skipFrames: 10, skipFrames: 10,
minConfidence: 0.5, minConfidence: 0.5,
iouThreshold: 0.3, iouThreshold: 0.3,
scoreThreshold: 0.5, scoreThreshold: 0.7,
enlargeFactor: 1.65,
maxHands: 2, maxHands: 2,
detector: { detector: {
anchors: "../models/handdetect/anchors.json", anchors: "../models/handdetect/anchors.json",
@ -70378,17 +70375,27 @@ var require_src = __commonJS((exports) => {
savedWebglPackDepthwiseConvFlag = tf.env().get("WEBGL_PACK_DEPTHWISECONV"); savedWebglPackDepthwiseConvFlag = tf.env().get("WEBGL_PACK_DEPTHWISECONV");
tf.env().set("WEBGL_PACK_DEPTHWISECONV", true); tf.env().set("WEBGL_PACK_DEPTHWISECONV", true);
} }
const perf = {};
let timeStamp;
timeStamp = performance.now();
let poseRes = []; let poseRes = [];
if (config.body.enabled) if (config.body.enabled)
poseRes = await models.posenet.estimatePoses(input, config.body); poseRes = await models.posenet.estimatePoses(input, config.body);
perf.body = Math.trunc(performance.now() - timeStamp);
timeStamp = performance.now();
let handRes = []; let handRes = [];
if (config.hand.enabled) if (config.hand.enabled)
handRes = await models.handpose.estimateHands(input, config.hand); handRes = await models.handpose.estimateHands(input, config.hand);
perf.hand = Math.trunc(performance.now() - timeStamp);
const faceRes = []; const faceRes = [];
if (config.face.enabled) { if (config.face.enabled) {
timeStamp = performance.now();
const faces = await models.facemesh.estimateFaces(input, config.face); const faces = await models.facemesh.estimateFaces(input, config.face);
perf.face = Math.trunc(performance.now() - timeStamp);
for (const face of faces) { for (const face of faces) {
timeStamp = performance.now();
const ssrdata = config.face.age.enabled || config.face.gender.enabled ? await ssrnet.predict(face.image, config) : {}; const ssrdata = config.face.age.enabled || config.face.gender.enabled ? await ssrnet.predict(face.image, config) : {};
perf.agegender = Math.trunc(performance.now() - timeStamp);
face.image.dispose(); face.image.dispose();
const iris = face.annotations.leftEyeIris && face.annotations.rightEyeIris ? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0]) : 0; const iris = face.annotations.leftEyeIris && face.annotations.rightEyeIris ? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0]) : 0;
faceRes.push({ faceRes.push({
@ -70404,7 +70411,9 @@ var require_src = __commonJS((exports) => {
} }
tf.env().set("WEBGL_PACK_DEPTHWISECONV", savedWebglPackDepthwiseConvFlag); tf.env().set("WEBGL_PACK_DEPTHWISECONV", savedWebglPackDepthwiseConvFlag);
tf.engine().endScope(); tf.engine().endScope();
resolve({face: faceRes, body: poseRes, hand: handRes}); perf.total = Object.values(perf).reduce((a, b) => a + b);
console.log("total", perf.total);
resolve({face: faceRes, body: poseRes, hand: handRes, performance: perf});
}); });
} }
exports.detect = detect; exports.detect = detect;

File diff suppressed because one or more lines are too long

424
dist/human.js vendored

File diff suppressed because one or more lines are too long

4
dist/human.js.map vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

187
dist/human.node.js vendored
View File

@ -272,6 +272,17 @@ var require_keypoints = __commonJS((exports2) => {
rightCheek: [205], rightCheek: [205],
leftCheek: [425] leftCheek: [425]
}; };
exports2.MESH_TO_IRIS_INDICES_MAP = [
{key: "EyeUpper0", indices: [9, 10, 11, 12, 13, 14, 15]},
{key: "EyeUpper1", indices: [25, 26, 27, 28, 29, 30, 31]},
{key: "EyeUpper2", indices: [41, 42, 43, 44, 45, 46, 47]},
{key: "EyeLower0", indices: [0, 1, 2, 3, 4, 5, 6, 7, 8]},
{key: "EyeLower1", indices: [16, 17, 18, 19, 20, 21, 22, 23, 24]},
{key: "EyeLower2", indices: [32, 33, 34, 35, 36, 37, 38, 39, 40]},
{key: "EyeLower3", indices: [54, 55, 56, 57, 58, 59, 60, 61, 62]},
{key: "EyebrowUpper", indices: [63, 64, 65, 66, 67, 68, 69, 70]},
{key: "EyebrowLower", indices: [48, 49, 50, 51, 52, 53]}
];
}); });
// src/facemesh/box.js // src/facemesh/box.js
@ -434,22 +445,9 @@ var require_pipeline = __commonJS((exports2) => {
const IRIS_LOWER_CENTER_INDEX = 4; const IRIS_LOWER_CENTER_INDEX = 4;
const IRIS_IRIS_INDEX = 71; const IRIS_IRIS_INDEX = 71;
const IRIS_NUM_COORDINATES = 76; const IRIS_NUM_COORDINATES = 76;
const ENLARGE_EYE_RATIO = 2.3;
const IRIS_MODEL_INPUT_SIZE = 64;
const MESH_TO_IRIS_INDICES_MAP = [
{key: "EyeUpper0", indices: [9, 10, 11, 12, 13, 14, 15]},
{key: "EyeUpper1", indices: [25, 26, 27, 28, 29, 30, 31]},
{key: "EyeUpper2", indices: [41, 42, 43, 44, 45, 46, 47]},
{key: "EyeLower0", indices: [0, 1, 2, 3, 4, 5, 6, 7, 8]},
{key: "EyeLower1", indices: [16, 17, 18, 19, 20, 21, 22, 23, 24]},
{key: "EyeLower2", indices: [32, 33, 34, 35, 36, 37, 38, 39, 40]},
{key: "EyeLower3", indices: [54, 55, 56, 57, 58, 59, 60, 61, 62]},
{key: "EyebrowUpper", indices: [63, 64, 65, 66, 67, 68, 69, 70]},
{key: "EyebrowLower", indices: [48, 49, 50, 51, 52, 53]}
];
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) { function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
for (let i = 0; i < MESH_TO_IRIS_INDICES_MAP.length; i++) { for (let i = 0; i < keypoints.MESH_TO_IRIS_INDICES_MAP.length; i++) {
const {key, indices} = MESH_TO_IRIS_INDICES_MAP[i]; const {key, indices} = keypoints.MESH_TO_IRIS_INDICES_MAP[i];
const originalIndices = keypoints.MESH_ANNOTATIONS[`${prefix}${key}`]; const originalIndices = keypoints.MESH_ANNOTATIONS[`${prefix}${key}`];
const shouldReplaceAllKeys = keys == null; const shouldReplaceAllKeys = keys == null;
if (shouldReplaceAllKeys || keys.includes(key)) { if (shouldReplaceAllKeys || keys.includes(key)) {
@ -473,8 +471,8 @@ var require_pipeline = __commonJS((exports2) => {
this.irisModel = irisModel; this.irisModel = irisModel;
this.meshWidth = config.mesh.inputSize; this.meshWidth = config.mesh.inputSize;
this.meshHeight = config.mesh.inputSize; this.meshHeight = config.mesh.inputSize;
this.skipFrames = config.detector.skipFrames; this.irisSize = config.iris.inputSize;
this.maxFaces = config.detector.maxFaces; this.irisEnlarge = config.iris.enlargeFactor;
} }
transformRawCoords(rawCoords, box, angle, rotationMatrix) { transformRawCoords(rawCoords, box, angle, rotationMatrix) {
const boxSize = bounding.getBoxSize({startPoint: box.startPoint, endPoint: box.endPoint}); const boxSize = bounding.getBoxSize({startPoint: box.startPoint, endPoint: box.endPoint});
@ -504,14 +502,14 @@ var require_pipeline = __commonJS((exports2) => {
return leftEyeZ - rightEyeZ; return leftEyeZ - rightEyeZ;
} }
getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) { getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) {
const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), ENLARGE_EYE_RATIO)); const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
const boxSize = bounding.getBoxSize(box); const boxSize = bounding.getBoxSize(box);
let crop = tf2.image.cropAndResize(face, [[ let crop = tf2.image.cropAndResize(face, [[
box.startPoint[1] / this.meshHeight, box.startPoint[1] / this.meshHeight,
box.startPoint[0] / this.meshWidth, box.startPoint[0] / this.meshWidth,
box.endPoint[1] / this.meshHeight, box.endPoint[1] / this.meshHeight,
box.endPoint[0] / this.meshWidth box.endPoint[0] / this.meshWidth
]], [0], [IRIS_MODEL_INPUT_SIZE, IRIS_MODEL_INPUT_SIZE]); ]], [0], [this.irisSize, this.irisSize]);
if (flip) { if (flip) {
crop = tf2.image.flipLeftRight(crop); crop = tf2.image.flipLeftRight(crop);
} }
@ -524,8 +522,8 @@ var require_pipeline = __commonJS((exports2) => {
const y = eyeData[i * 3 + 1]; const y = eyeData[i * 3 + 1];
const z = eyeData[i * 3 + 2]; const z = eyeData[i * 3 + 2];
eyeRawCoords.push([ eyeRawCoords.push([
(flip ? 1 - x / IRIS_MODEL_INPUT_SIZE : x / IRIS_MODEL_INPUT_SIZE) * eyeBoxSize[0] + eyeBox.startPoint[0], (flip ? 1 - x / this.irisSize : x / this.irisSize) * eyeBoxSize[0] + eyeBox.startPoint[0],
y / IRIS_MODEL_INPUT_SIZE * eyeBoxSize[1] + eyeBox.startPoint[1], y / this.irisSize * eyeBoxSize[1] + eyeBox.startPoint[1],
z z
]); ]);
} }
@ -545,7 +543,9 @@ var require_pipeline = __commonJS((exports2) => {
return [coord[0], coord[1], z]; return [coord[0], coord[1], z];
}); });
} }
async predict(input, predictIrises, predictMesh) { async predict(input, config) {
this.skipFrames = config.detector.skipFrames;
this.maxFaces = config.detector.maxFaces;
if (this.shouldUpdateRegionsOfInterest()) { if (this.shouldUpdateRegionsOfInterest()) {
const {boxes, scaleFactor} = await this.boundingBoxDetector.getBoundingBoxes(input); const {boxes, scaleFactor} = await this.boundingBoxDetector.getBoundingBoxes(input);
if (boxes.length === 0) { if (boxes.length === 0) {
@ -592,7 +592,7 @@ var require_pipeline = __commonJS((exports2) => {
const [, flag, coords] = this.meshDetector.predict(face); const [, flag, coords] = this.meshDetector.predict(face);
const coordsReshaped = tf2.reshape(coords, [-1, 3]); const coordsReshaped = tf2.reshape(coords, [-1, 3]);
let rawCoords = coordsReshaped.arraySync(); let rawCoords = coordsReshaped.arraySync();
if (predictIrises) { if (config.iris.enabled) {
const {box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop} = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true); const {box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop} = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true);
const {box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop} = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]); const {box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop} = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
const eyePredictions = this.irisModel.predict(tf2.concat([leftEyeCrop, rightEyeCrop])); const eyePredictions = this.irisModel.predict(tf2.concat([leftEyeCrop, rightEyeCrop]));
@ -618,7 +618,7 @@ var require_pipeline = __commonJS((exports2) => {
const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix); const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
tf2.dispose(rawCoords); tf2.dispose(rawCoords);
const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData)); const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData));
if (predictMesh) { if (config.mesh.enabled) {
const transformedCoords = tf2.tensor2d(transformedCoordsData); const transformedCoords = tf2.tensor2d(transformedCoordsData);
this.regionsOfInterest[i] = {...landmarksBox, landmarks: transformedCoords.arraySync()}; this.regionsOfInterest[i] = {...landmarksBox, landmarks: transformedCoords.arraySync()};
const prediction2 = { const prediction2 = {
@ -3836,7 +3836,7 @@ var require_facemesh = __commonJS((exports2) => {
input = tf2.browser.fromPixels(input); input = tf2.browser.fromPixels(input);
return input.toFloat().expandDims(0); return input.toFloat().expandDims(0);
}); });
const predictions = await this.pipeline.predict(image, this.config.iris.enabled, this.config.mesh.enabled); const predictions = await this.pipeline.predict(image, config);
tf2.dispose(image); tf2.dispose(image);
const results = []; const results = [];
for (const prediction of predictions || []) { for (const prediction of predictions || []) {
@ -3920,12 +3920,14 @@ var require_ssrnet = __commonJS((exports2) => {
const obj = {}; const obj = {};
if (config.face.age.enabled) { if (config.face.age.enabled) {
const ageT = await models2.age.predict(enhance); const ageT = await models2.age.predict(enhance);
obj.age = Math.trunc(10 * ageT.dataSync()[0]) / 10; const data = await ageT.data();
obj.age = Math.trunc(10 * data[0]) / 10;
tf2.dispose(ageT); tf2.dispose(ageT);
} }
if (config.face.gender.enabled) { if (config.face.gender.enabled) {
const genderT = await models2.gender.predict(enhance); const genderT = await models2.gender.predict(enhance);
obj.gender = Math.trunc(100 * genderT.dataSync()[0]) < 50 ? "female" : "male"; const data = await genderT.data();
obj.gender = Math.trunc(100 * data[0]) < 50 ? "female" : "male";
tf2.dispose(genderT); tf2.dispose(genderT);
} }
tf2.dispose(enhance); tf2.dispose(enhance);
@ -4456,15 +4458,13 @@ var require_modelPoseNet = __commonJS((exports2) => {
const decodeMultiple = require_decodeMultiple(); const decodeMultiple = require_decodeMultiple();
const util = require_util2(); const util = require_util2();
class PoseNet { class PoseNet {
constructor(net, inputResolution) { constructor(net) {
this.baseModel = net; this.baseModel = net;
this.inputResolution = inputResolution;
} }
async estimatePoses(input, config) { async estimatePoses(input, config) {
const outputStride = this.baseModel.outputStride; const outputStride = config.outputStride;
const inputResolution = this.inputResolution;
const [height, width] = util.getInputTensorDimensions(input); const [height, width] = util.getInputTensorDimensions(input);
const {resized, padding} = util.padAndResizeTo(input, [inputResolution, inputResolution]); const {resized, padding} = util.padAndResizeTo(input, [config.inputResolution, config.inputResolution]);
const {heatmapScores, offsets, displacementFwd, displacementBwd} = this.baseModel.predict(resized); const {heatmapScores, offsets, displacementFwd, displacementBwd} = this.baseModel.predict(resized);
const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]); const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]);
const scoresBuffer = allTensorBuffers[0]; const scoresBuffer = allTensorBuffers[0];
@ -4472,7 +4472,7 @@ var require_modelPoseNet = __commonJS((exports2) => {
const displacementsFwdBuffer = allTensorBuffers[2]; const displacementsFwdBuffer = allTensorBuffers[2];
const displacementsBwdBuffer = allTensorBuffers[3]; const displacementsBwdBuffer = allTensorBuffers[3];
const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius); const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius);
const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [inputResolution, inputResolution], padding); const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [config.inputResolution, config.inputResolution], padding);
heatmapScores.dispose(); heatmapScores.dispose();
offsets.dispose(); offsets.dispose();
displacementFwd.dispose(); displacementFwd.dispose();
@ -4486,10 +4486,9 @@ var require_modelPoseNet = __commonJS((exports2) => {
} }
exports2.PoseNet = PoseNet; exports2.PoseNet = PoseNet;
async function loadMobileNet(config) { async function loadMobileNet(config) {
const outputStride = config.outputStride;
const graphModel = await tf2.loadGraphModel(config.modelPath); const graphModel = await tf2.loadGraphModel(config.modelPath);
const mobilenet = new modelMobileNet.MobileNet(graphModel, outputStride); const mobilenet = new modelMobileNet.MobileNet(graphModel, config.outputStride);
return new PoseNet(mobilenet, config.inputResolution); return new PoseNet(mobilenet);
} }
async function load(config) { async function load(config) {
return loadMobileNet(config); return loadMobileNet(config);
@ -4595,17 +4594,14 @@ var require_handdetector = __commonJS((exports2) => {
const tf2 = require("@tensorflow/tfjs"); const tf2 = require("@tensorflow/tfjs");
const bounding = require_box2(); const bounding = require_box2();
class HandDetector { class HandDetector {
constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) { constructor(model, anchors, config) {
this.model = model; this.model = model;
this.width = width; this.width = config.inputSize;
this.height = height; this.height = config.inputSize;
this.iouThreshold = iouThreshold;
this.scoreThreshold = scoreThreshold;
this.maxHands = maxHands;
this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]); this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
this.anchorsTensor = tf2.tensor2d(this.anchors); this.anchorsTensor = tf2.tensor2d(this.anchors);
this.inputSizeTensor = tf2.tensor1d([width, height]); this.inputSizeTensor = tf2.tensor1d([config.inputSize, config.inputSize]);
this.doubleInputSizeTensor = tf2.tensor1d([width * 2, height * 2]); this.doubleInputSizeTensor = tf2.tensor1d([config.inputSize * 2, config.inputSize * 2]);
} }
normalizeBoxes(boxes) { normalizeBoxes(boxes) {
return tf2.tidy(() => { return tf2.tidy(() => {
@ -4659,9 +4655,12 @@ var require_handdetector = __commonJS((exports2) => {
}); });
return detectedHands; return detectedHands;
} }
async estimateHandBounds(input) { async estimateHandBounds(input, config) {
const inputHeight = input.shape[1]; const inputHeight = input.shape[1];
const inputWidth = input.shape[2]; const inputWidth = input.shape[2];
this.iouThreshold = config.iouThreshold;
this.scoreThreshold = config.scoreThreshold;
this.maxHands = config.maxHands;
const image = tf2.tidy(() => input.resizeBilinear([this.width, this.height]).div(255)); const image = tf2.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
const predictions = await this.getBoundingBoxes(image); const predictions = await this.getBoundingBoxes(image);
image.dispose(); image.dispose();
@ -4775,24 +4774,20 @@ var require_pipeline2 = __commonJS((exports2) => {
const util = require_util3(); const util = require_util3();
const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8; const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8;
const PALM_BOX_SHIFT_VECTOR = [0, -0.4]; const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
const PALM_BOX_ENLARGE_FACTOR = 3;
const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; const HAND_BOX_SHIFT_VECTOR = [0, -0.1];
const HAND_BOX_ENLARGE_FACTOR = 1.65; const HAND_BOX_ENLARGE_FACTOR = 1.65;
const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]; const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2];
const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0; const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0;
const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2; const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
class HandPipeline { class HandPipeline {
constructor(boundingBoxDetector, meshDetector, meshWidth, meshHeight, maxContinuousChecks, detectionConfidence, maxHands) { constructor(boundingBoxDetector, meshDetector, config) {
this.regionsOfInterest = []; this.regionsOfInterest = [];
this.runsWithoutHandDetector = 0; this.runsWithoutHandDetector = 0;
this.boundingBoxDetector = boundingBoxDetector; this.boundingBoxDetector = boundingBoxDetector;
this.meshDetector = meshDetector; this.meshDetector = meshDetector;
this.maxContinuousChecks = maxContinuousChecks; this.meshWidth = config.inputSize;
this.detectionConfidence = detectionConfidence; this.meshHeight = config.inputSize;
this.maxHands = maxHands; this.enlargeFactor = config.enlargeFactor;
this.meshWidth = meshWidth;
this.meshHeight = meshHeight;
this.maxHandsNumber = 1;
} }
getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) { getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) {
const rotatedPalmLandmarks = palmLandmarks.map((coord) => { const rotatedPalmLandmarks = palmLandmarks.map((coord) => {
@ -4800,7 +4795,7 @@ var require_pipeline2 = __commonJS((exports2) => {
return util.rotatePoint(homogeneousCoordinate, rotationMatrix); return util.rotatePoint(homogeneousCoordinate, rotationMatrix);
}); });
const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks); const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks);
return bounding.enlargeBox(bounding.squarifyBox(bounding.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), PALM_BOX_ENLARGE_FACTOR); return bounding.enlargeBox(bounding.squarifyBox(bounding.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), this.enlargeFactor);
} }
getBoxForHandLandmarks(landmarks) { getBoxForHandLandmarks(landmarks) {
const boundingBox = this.calculateLandmarksBoundingBox(landmarks); const boundingBox = this.calculateLandmarksBoundingBox(landmarks);
@ -4837,10 +4832,13 @@ var require_pipeline2 = __commonJS((exports2) => {
coord[2] coord[2]
]); ]);
} }
async estimateHand(image, config) { async estimateHands(image, config) {
this.maxContinuousChecks = config.skipFrames;
this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands;
const useFreshBox = this.shouldUpdateRegionsOfInterest(); const useFreshBox = this.shouldUpdateRegionsOfInterest();
if (useFreshBox === true) { if (useFreshBox === true) {
const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image); const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
this.regionsOfInterest = []; this.regionsOfInterest = [];
for (const i in boundingBoxPredictions) { for (const i in boundingBoxPredictions) {
this.updateRegionsOfInterest(boundingBoxPredictions[i], true, i); this.updateRegionsOfInterest(boundingBoxPredictions[i], true, i);
@ -4926,7 +4924,7 @@ var require_pipeline2 = __commonJS((exports2) => {
} }
} }
shouldUpdateRegionsOfInterest() { shouldUpdateRegionsOfInterest() {
return this.regionsOfInterest === 0 || this.runsWithoutHandDetector >= this.maxContinuousChecks; return !this.regionsOfInterest || this.regionsOfInterest.length === 0 || this.runsWithoutHandDetector >= this.maxContinuousChecks;
} }
} }
exports2.HandPipeline = HandPipeline; exports2.HandPipeline = HandPipeline;
@ -4938,44 +4936,21 @@ var require_handpose = __commonJS((exports2) => {
const hand = require_handdetector(); const hand = require_handdetector();
const keypoints = require_keypoints3(); const keypoints = require_keypoints3();
const pipe = require_pipeline2(); const pipe = require_pipeline2();
async function loadHandDetectorModel(url) {
return tf2.loadGraphModel(url, {fromTFHub: url.includes("tfhub.dev")});
}
async function loadHandPoseModel(url) {
return tf2.loadGraphModel(url, {fromTFHub: url.includes("tfhub.dev")});
}
async function loadAnchors(url) {
if (tf2.env().features.IS_NODE) {
const fs = require("fs");
const data = await fs.readFileSync(url.replace("file://", ""));
return JSON.parse(data);
}
return tf2.util.fetch(url).then((d) => d.json());
}
async function load(config) {
const [ANCHORS, handDetectorModel, handPoseModel] = await Promise.all([
loadAnchors(config.detector.anchors),
loadHandDetectorModel(config.detector.modelPath),
loadHandPoseModel(config.skeleton.modelPath)
]);
const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands);
const handpose2 = new HandPose(pipeline);
return handpose2;
}
exports2.load = load;
class HandPose { class HandPose {
constructor(pipeline) { constructor(pipeline) {
this.pipeline = pipeline; this.pipeline = pipeline;
} }
async estimateHands(input, config) { async estimateHands(input, config) {
this.maxContinuousChecks = config.skipFrames;
this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands;
const image = tf2.tidy(() => { const image = tf2.tidy(() => {
if (!(input instanceof tf2.Tensor)) { if (!(input instanceof tf2.Tensor)) {
input = tf2.browser.fromPixels(input); input = tf2.browser.fromPixels(input);
} }
return input.toFloat().expandDims(0); return input.toFloat().expandDims(0);
}); });
const predictions = await this.pipeline.estimateHand(image, config); const predictions = await this.pipeline.estimateHands(image, config);
image.dispose(); image.dispose();
const hands = []; const hands = [];
if (!predictions) if (!predictions)
@ -4998,6 +4973,26 @@ var require_handpose = __commonJS((exports2) => {
} }
} }
exports2.HandPose = HandPose; exports2.HandPose = HandPose;
async function loadAnchors(url) {
if (tf2.env().features.IS_NODE) {
const fs = require("fs");
const data = await fs.readFileSync(url.replace("file://", ""));
return JSON.parse(data);
}
return tf2.util.fetch(url).then((d) => d.json());
}
async function load(config) {
const [anchors, handDetectorModel, handPoseModel] = await Promise.all([
loadAnchors(config.detector.anchors),
tf2.loadGraphModel(config.detector.modelPath, {fromTFHub: config.detector.modelPath.includes("tfhub.dev")}),
tf2.loadGraphModel(config.skeleton.modelPath, {fromTFHub: config.skeleton.modelPath.includes("tfhub.dev")})
]);
const detector = new hand.HandDetector(handDetectorModel, anchors, config);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config);
const handpose2 = new HandPose(pipeline);
return handpose2;
}
exports2.load = load;
}); });
// src/config.js // src/config.js
@ -5015,7 +5010,7 @@ var require_config = __commonJS((exports2) => {
skipFrames: 10, skipFrames: 10,
minConfidence: 0.5, minConfidence: 0.5,
iouThreshold: 0.3, iouThreshold: 0.3,
scoreThreshold: 0.5 scoreThreshold: 0.7
}, },
mesh: { mesh: {
enabled: true, enabled: true,
@ -5025,7 +5020,8 @@ var require_config = __commonJS((exports2) => {
iris: { iris: {
enabled: true, enabled: true,
modelPath: "../models/iris/model.json", modelPath: "../models/iris/model.json",
inputSize: 192 enlargeFactor: 2.3,
inputSize: 64
}, },
age: { age: {
enabled: true, enabled: true,
@ -5044,7 +5040,7 @@ var require_config = __commonJS((exports2) => {
inputResolution: 257, inputResolution: 257,
outputStride: 16, outputStride: 16,
maxDetections: 5, maxDetections: 5,
scoreThreshold: 0.5, scoreThreshold: 0.7,
nmsRadius: 20 nmsRadius: 20
}, },
hand: { hand: {
@ -5053,7 +5049,8 @@ var require_config = __commonJS((exports2) => {
skipFrames: 10, skipFrames: 10,
minConfidence: 0.5, minConfidence: 0.5,
iouThreshold: 0.3, iouThreshold: 0.3,
scoreThreshold: 0.5, scoreThreshold: 0.7,
enlargeFactor: 1.65,
maxHands: 2, maxHands: 2,
detector: { detector: {
anchors: "../models/handdetect/anchors.json", anchors: "../models/handdetect/anchors.json",
@ -5115,17 +5112,27 @@ async function detect(input, userConfig) {
savedWebglPackDepthwiseConvFlag = tf.env().get("WEBGL_PACK_DEPTHWISECONV"); savedWebglPackDepthwiseConvFlag = tf.env().get("WEBGL_PACK_DEPTHWISECONV");
tf.env().set("WEBGL_PACK_DEPTHWISECONV", true); tf.env().set("WEBGL_PACK_DEPTHWISECONV", true);
} }
const perf = {};
let timeStamp;
timeStamp = performance.now();
let poseRes = []; let poseRes = [];
if (config.body.enabled) if (config.body.enabled)
poseRes = await models.posenet.estimatePoses(input, config.body); poseRes = await models.posenet.estimatePoses(input, config.body);
perf.body = Math.trunc(performance.now() - timeStamp);
timeStamp = performance.now();
let handRes = []; let handRes = [];
if (config.hand.enabled) if (config.hand.enabled)
handRes = await models.handpose.estimateHands(input, config.hand); handRes = await models.handpose.estimateHands(input, config.hand);
perf.hand = Math.trunc(performance.now() - timeStamp);
const faceRes = []; const faceRes = [];
if (config.face.enabled) { if (config.face.enabled) {
timeStamp = performance.now();
const faces = await models.facemesh.estimateFaces(input, config.face); const faces = await models.facemesh.estimateFaces(input, config.face);
perf.face = Math.trunc(performance.now() - timeStamp);
for (const face of faces) { for (const face of faces) {
timeStamp = performance.now();
const ssrdata = config.face.age.enabled || config.face.gender.enabled ? await ssrnet.predict(face.image, config) : {}; const ssrdata = config.face.age.enabled || config.face.gender.enabled ? await ssrnet.predict(face.image, config) : {};
perf.agegender = Math.trunc(performance.now() - timeStamp);
face.image.dispose(); face.image.dispose();
const iris = face.annotations.leftEyeIris && face.annotations.rightEyeIris ? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0]) : 0; const iris = face.annotations.leftEyeIris && face.annotations.rightEyeIris ? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0]) : 0;
faceRes.push({ faceRes.push({
@ -5141,7 +5148,9 @@ async function detect(input, userConfig) {
} }
tf.env().set("WEBGL_PACK_DEPTHWISECONV", savedWebglPackDepthwiseConvFlag); tf.env().set("WEBGL_PACK_DEPTHWISECONV", savedWebglPackDepthwiseConvFlag);
tf.engine().endScope(); tf.engine().endScope();
resolve({face: faceRes, body: poseRes, hand: handRes}); perf.total = Object.values(perf).reduce((a, b) => a + b);
console.log("total", perf.total);
resolve({face: faceRes, body: poseRes, hand: handRes, performance: perf});
}); });
} }
exports.detect = detect; exports.detect = detect;

File diff suppressed because one or more lines are too long

View File

@ -18,7 +18,8 @@ export default {
iris: { iris: {
enabled: true, enabled: true,
modelPath: '../models/iris/model.json', modelPath: '../models/iris/model.json',
inputSize: 192, // fixed value enlargeFactor: 2.3, // empiric tuning
inputSize: 64, // fixed value
}, },
age: { age: {
enabled: true, enabled: true,
@ -47,6 +48,7 @@ export default {
minConfidence: 0.5, minConfidence: 0.5,
iouThreshold: 0.3, iouThreshold: 0.3,
scoreThreshold: 0.7, scoreThreshold: 0.7,
enlargeFactor: 1.65, // empiric tuning
maxHands: 2, maxHands: 2,
detector: { detector: {
anchors: '../models/handdetect/anchors.json', anchors: '../models/handdetect/anchors.json',

View File

@ -17,7 +17,7 @@ class MediaPipeFaceMesh {
if (!(input instanceof tf.Tensor)) input = tf.browser.fromPixels(input); if (!(input instanceof tf.Tensor)) input = tf.browser.fromPixels(input);
return input.toFloat().expandDims(0); return input.toFloat().expandDims(0);
}); });
const predictions = await this.pipeline.predict(image, this.config.iris.enabled, this.config.mesh.enabled); const predictions = await this.pipeline.predict(image, config);
tf.dispose(image); tf.dispose(image);
const results = []; const results = [];
for (const prediction of (predictions || [])) { for (const prediction of (predictions || [])) {

View File

@ -36,3 +36,14 @@ exports.MESH_ANNOTATIONS = {
rightCheek: [205], rightCheek: [205],
leftCheek: [425], leftCheek: [425],
}; };
exports.MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },
{ key: 'EyeUpper2', indices: [41, 42, 43, 44, 45, 46, 47] },
{ key: 'EyeLower0', indices: [0, 1, 2, 3, 4, 5, 6, 7, 8] },
{ key: 'EyeLower1', indices: [16, 17, 18, 19, 20, 21, 22, 23, 24] },
{ key: 'EyeLower2', indices: [32, 33, 34, 35, 36, 37, 38, 39, 40] },
{ key: 'EyeLower3', indices: [54, 55, 56, 57, 58, 59, 60, 61, 62] },
{ key: 'EyebrowUpper', indices: [63, 64, 65, 66, 67, 68, 69, 70] },
{ key: 'EyebrowLower', indices: [48, 49, 50, 51, 52, 53] },
];

View File

@ -19,23 +19,11 @@ const IRIS_UPPER_CENTER_INDEX = 3;
const IRIS_LOWER_CENTER_INDEX = 4; const IRIS_LOWER_CENTER_INDEX = 4;
const IRIS_IRIS_INDEX = 71; const IRIS_IRIS_INDEX = 71;
const IRIS_NUM_COORDINATES = 76; const IRIS_NUM_COORDINATES = 76;
const ENLARGE_EYE_RATIO = 2.3; // Factor by which to enlarge the box around the eye landmarks so the input region matches the expectations of the iris model.
const IRIS_MODEL_INPUT_SIZE = 64;
const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },
{ key: 'EyeUpper2', indices: [41, 42, 43, 44, 45, 46, 47] },
{ key: 'EyeLower0', indices: [0, 1, 2, 3, 4, 5, 6, 7, 8] },
{ key: 'EyeLower1', indices: [16, 17, 18, 19, 20, 21, 22, 23, 24] },
{ key: 'EyeLower2', indices: [32, 33, 34, 35, 36, 37, 38, 39, 40] },
{ key: 'EyeLower3', indices: [54, 55, 56, 57, 58, 59, 60, 61, 62] },
{ key: 'EyebrowUpper', indices: [63, 64, 65, 66, 67, 68, 69, 70] },
{ key: 'EyebrowLower', indices: [48, 49, 50, 51, 52, 53] },
];
// Replace the raw coordinates returned by facemesh with refined iris model coordinates. Update the z coordinate to be an average of the original and the new. This produces the best visual effect. // Replace the raw coordinates returned by facemesh with refined iris model coordinates. Update the z coordinate to be an average of the original and the new. This produces the best visual effect.
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) { function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
for (let i = 0; i < MESH_TO_IRIS_INDICES_MAP.length; i++) { for (let i = 0; i < keypoints.MESH_TO_IRIS_INDICES_MAP.length; i++) {
const { key, indices } = MESH_TO_IRIS_INDICES_MAP[i]; const { key, indices } = keypoints.MESH_TO_IRIS_INDICES_MAP[i];
const originalIndices = keypoints.MESH_ANNOTATIONS[`${prefix}${key}`]; const originalIndices = keypoints.MESH_ANNOTATIONS[`${prefix}${key}`];
const shouldReplaceAllKeys = keys == null; const shouldReplaceAllKeys = keys == null;
if (shouldReplaceAllKeys || keys.includes(key)) { if (shouldReplaceAllKeys || keys.includes(key)) {
@ -60,8 +48,8 @@ class Pipeline {
this.irisModel = irisModel; this.irisModel = irisModel;
this.meshWidth = config.mesh.inputSize; this.meshWidth = config.mesh.inputSize;
this.meshHeight = config.mesh.inputSize; this.meshHeight = config.mesh.inputSize;
this.skipFrames = config.detector.skipFrames; this.irisSize = config.iris.inputSize;
this.maxFaces = config.detector.maxFaces; this.irisEnlarge = config.iris.enlargeFactor;
} }
transformRawCoords(rawCoords, box, angle, rotationMatrix) { transformRawCoords(rawCoords, box, angle, rotationMatrix) {
@ -93,13 +81,13 @@ class Pipeline {
// Returns a box describing a cropped region around the eye fit for passing to the iris model. // Returns a box describing a cropped region around the eye fit for passing to the iris model.
getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) { getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) {
const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), ENLARGE_EYE_RATIO)); const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
const boxSize = bounding.getBoxSize(box); const boxSize = bounding.getBoxSize(box);
let crop = tf.image.cropAndResize(face, [[ let crop = tf.image.cropAndResize(face, [[
box.startPoint[1] / this.meshHeight, box.startPoint[1] / this.meshHeight,
box.startPoint[0] / this.meshWidth, box.endPoint[1] / this.meshHeight, box.startPoint[0] / this.meshWidth, box.endPoint[1] / this.meshHeight,
box.endPoint[0] / this.meshWidth, box.endPoint[0] / this.meshWidth,
]], [0], [IRIS_MODEL_INPUT_SIZE, IRIS_MODEL_INPUT_SIZE]); ]], [0], [this.irisSize, this.irisSize]);
if (flip) { if (flip) {
crop = tf.image.flipLeftRight(crop); crop = tf.image.flipLeftRight(crop);
} }
@ -115,9 +103,9 @@ class Pipeline {
const z = eyeData[i * 3 + 2]; const z = eyeData[i * 3 + 2];
eyeRawCoords.push([ eyeRawCoords.push([
(flip (flip
? (1 - (x / IRIS_MODEL_INPUT_SIZE)) ? (1 - (x / this.irisSize))
: (x / IRIS_MODEL_INPUT_SIZE)) * eyeBoxSize[0] + eyeBox.startPoint[0], : (x / this.irisSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
(y / IRIS_MODEL_INPUT_SIZE) * eyeBoxSize[1] + eyeBox.startPoint[1], z, (y / this.irisSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
]); ]);
} }
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(IRIS_IRIS_INDEX) }; return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(IRIS_IRIS_INDEX) };
@ -140,7 +128,9 @@ class Pipeline {
}); });
} }
async predict(input, predictIrises, predictMesh) { async predict(input, config) {
this.skipFrames = config.detector.skipFrames;
this.maxFaces = config.detector.maxFaces;
if (this.shouldUpdateRegionsOfInterest()) { if (this.shouldUpdateRegionsOfInterest()) {
const { boxes, scaleFactor } = await this.boundingBoxDetector.getBoundingBoxes(input); const { boxes, scaleFactor } = await this.boundingBoxDetector.getBoundingBoxes(input);
if (boxes.length === 0) { if (boxes.length === 0) {
@ -189,7 +179,7 @@ class Pipeline {
const [, flag, coords] = this.meshDetector.predict(face); const [, flag, coords] = this.meshDetector.predict(face);
const coordsReshaped = tf.reshape(coords, [-1, 3]); const coordsReshaped = tf.reshape(coords, [-1, 3]);
let rawCoords = coordsReshaped.arraySync(); let rawCoords = coordsReshaped.arraySync();
if (predictIrises) { if (config.iris.enabled) {
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true); const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true);
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]); const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
const eyePredictions = (this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop]))); const eyePredictions = (this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop])));
@ -216,7 +206,7 @@ class Pipeline {
const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix); const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
tf.dispose(rawCoords); tf.dispose(rawCoords);
const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData)); const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData));
if (predictMesh) { if (config.mesh.enabled) {
const transformedCoords = tf.tensor2d(transformedCoordsData); const transformedCoords = tf.tensor2d(transformedCoordsData);
this.regionsOfInterest[i] = { ...landmarksBox, landmarks: transformedCoords.arraySync() }; this.regionsOfInterest[i] = { ...landmarksBox, landmarks: transformedCoords.arraySync() };
const prediction = { const prediction = {

View File

@ -7,6 +7,7 @@ function getBoxSize(box) {
]; ];
} }
exports.getBoxSize = getBoxSize; exports.getBoxSize = getBoxSize;
function getBoxCenter(box) { function getBoxCenter(box) {
return [ return [
box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2,
@ -14,6 +15,7 @@ function getBoxCenter(box) {
]; ];
} }
exports.getBoxCenter = getBoxCenter; exports.getBoxCenter = getBoxCenter;
function cutBoxFromImageAndResize(box, image, cropSize) { function cutBoxFromImageAndResize(box, image, cropSize) {
const h = image.shape[1]; const h = image.shape[1];
const w = image.shape[2]; const w = image.shape[2];
@ -24,6 +26,7 @@ function cutBoxFromImageAndResize(box, image, cropSize) {
return tf.image.cropAndResize(image, boxes, [0], cropSize); return tf.image.cropAndResize(image, boxes, [0], cropSize);
} }
exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize; exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize;
function scaleBoxCoordinates(box, factor) { function scaleBoxCoordinates(box, factor) {
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]]; const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]]; const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
@ -34,6 +37,7 @@ function scaleBoxCoordinates(box, factor) {
return { startPoint, endPoint, palmLandmarks }; return { startPoint, endPoint, palmLandmarks };
} }
exports.scaleBoxCoordinates = scaleBoxCoordinates; exports.scaleBoxCoordinates = scaleBoxCoordinates;
function enlargeBox(box, factor = 1.5) { function enlargeBox(box, factor = 1.5) {
const center = getBoxCenter(box); const center = getBoxCenter(box);
const size = getBoxSize(box); const size = getBoxSize(box);
@ -43,6 +47,7 @@ function enlargeBox(box, factor = 1.5) {
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks }; return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
} }
exports.enlargeBox = enlargeBox; exports.enlargeBox = enlargeBox;
function squarifyBox(box) { function squarifyBox(box) {
const centers = getBoxCenter(box); const centers = getBoxCenter(box);
const size = getBoxSize(box); const size = getBoxSize(box);
@ -53,6 +58,7 @@ function squarifyBox(box) {
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks }; return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
} }
exports.squarifyBox = squarifyBox; exports.squarifyBox = squarifyBox;
function shiftBox(box, shiftFactor) { function shiftBox(box, shiftFactor) {
const boxSize = [ const boxSize = [
box.endPoint[0] - box.startPoint[0], box.endPoint[1] - box.startPoint[1], box.endPoint[0] - box.startPoint[0], box.endPoint[1] - box.startPoint[1],

View File

@ -2,17 +2,14 @@ const tf = require('@tensorflow/tfjs');
const bounding = require('./box'); const bounding = require('./box');
class HandDetector { class HandDetector {
constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) { constructor(model, anchors, config) {
this.model = model; this.model = model;
this.width = width; this.width = config.inputSize;
this.height = height; this.height = config.inputSize;
this.iouThreshold = iouThreshold;
this.scoreThreshold = scoreThreshold;
this.maxHands = maxHands;
this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]); this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
this.anchorsTensor = tf.tensor2d(this.anchors); this.anchorsTensor = tf.tensor2d(this.anchors);
this.inputSizeTensor = tf.tensor1d([width, height]); this.inputSizeTensor = tf.tensor1d([config.inputSize, config.inputSize]);
this.doubleInputSizeTensor = tf.tensor1d([width * 2, height * 2]); this.doubleInputSizeTensor = tf.tensor1d([config.inputSize * 2, config.inputSize * 2]);
} }
normalizeBoxes(boxes) { normalizeBoxes(boxes) {
@ -73,9 +70,12 @@ class HandDetector {
* *
* @param input The image to classify. * @param input The image to classify.
*/ */
async estimateHandBounds(input) { async estimateHandBounds(input, config) {
const inputHeight = input.shape[1]; const inputHeight = input.shape[1];
const inputWidth = input.shape[2]; const inputWidth = input.shape[2];
this.iouThreshold = config.iouThreshold;
this.scoreThreshold = config.scoreThreshold;
this.maxHands = config.maxHands;
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255)); const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
const predictions = await this.getBoundingBoxes(image); const predictions = await this.getBoundingBoxes(image);
image.dispose(); image.dispose();

View File

@ -3,71 +3,22 @@ const hand = require('./handdetector');
const keypoints = require('./keypoints'); const keypoints = require('./keypoints');
const pipe = require('./pipeline'); const pipe = require('./pipeline');
// Load the bounding box detector model.
async function loadHandDetectorModel(url) {
return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') });
}
// Load the mesh detector model.
async function loadHandPoseModel(url) {
return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') });
}
// In single shot detector pipelines, the output space is discretized into a set
// of bounding boxes, each of which is assigned a score during prediction. The
// anchors define the coordinates of these boxes.
async function loadAnchors(url) {
if (tf.env().features.IS_NODE) {
// eslint-disable-next-line global-require
const fs = require('fs');
const data = await fs.readFileSync(url.replace('file://', ''));
return JSON.parse(data);
}
return tf.util.fetch(url).then((d) => d.json());
}
/**
* Load handpose.
*
* @param config A configuration object with the following properties:
* - `maxContinuousChecks` How many frames to go without running the bounding
* box detector. Defaults to infinity. Set to a lower value if you want a safety
* net in case the mesh detector produces consistently flawed predictions.
* - `detectionConfidence` Threshold for discarding a prediction. Defaults to
* 0.8.
* - `iouThreshold` A float representing the threshold for deciding whether
* boxes overlap too much in non-maximum suppression. Must be between [0, 1].
* Defaults to 0.3.
* - `scoreThreshold` A threshold for deciding when to remove boxes based
* on score in non-maximum suppression. Defaults to 0.75.
*/
async function load(config) {
const [ANCHORS, handDetectorModel, handPoseModel] = await Promise.all([
loadAnchors(config.detector.anchors),
loadHandDetectorModel(config.detector.modelPath),
loadHandPoseModel(config.skeleton.modelPath),
]);
const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands);
// eslint-disable-next-line no-use-before-define
const handpose = new HandPose(pipeline);
return handpose;
}
exports.load = load;
class HandPose { class HandPose {
constructor(pipeline) { constructor(pipeline) {
this.pipeline = pipeline; this.pipeline = pipeline;
} }
async estimateHands(input, config) { async estimateHands(input, config) {
this.maxContinuousChecks = config.skipFrames;
this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands;
const image = tf.tidy(() => { const image = tf.tidy(() => {
if (!(input instanceof tf.Tensor)) { if (!(input instanceof tf.Tensor)) {
input = tf.browser.fromPixels(input); input = tf.browser.fromPixels(input);
} }
return input.toFloat().expandDims(0); return input.toFloat().expandDims(0);
}); });
const predictions = await this.pipeline.estimateHand(image, config); const predictions = await this.pipeline.estimateHands(image, config);
image.dispose(); image.dispose();
const hands = []; const hands = [];
if (!predictions) return hands; if (!predictions) return hands;
@ -88,3 +39,26 @@ class HandPose {
} }
} }
exports.HandPose = HandPose; exports.HandPose = HandPose;
async function loadAnchors(url) {
if (tf.env().features.IS_NODE) {
// eslint-disable-next-line global-require
const fs = require('fs');
const data = await fs.readFileSync(url.replace('file://', ''));
return JSON.parse(data);
}
return tf.util.fetch(url).then((d) => d.json());
}
async function load(config) {
const [anchors, handDetectorModel, handPoseModel] = await Promise.all([
loadAnchors(config.detector.anchors),
tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') }),
tf.loadGraphModel(config.skeleton.modelPath, { fromTFHub: config.skeleton.modelPath.includes('tfhub.dev') }),
]);
const detector = new hand.HandDetector(handDetectorModel, anchors, config);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config);
const handpose = new HandPose(pipeline);
return handpose;
}
exports.load = load;

View File

@ -4,7 +4,6 @@ const util = require('./util');
const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8; const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8;
const PALM_BOX_SHIFT_VECTOR = [0, -0.4]; const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
const PALM_BOX_ENLARGE_FACTOR = 3;
const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; const HAND_BOX_SHIFT_VECTOR = [0, -0.1];
const HAND_BOX_ENLARGE_FACTOR = 1.65; const HAND_BOX_ENLARGE_FACTOR = 1.65;
const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]; const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2];
@ -13,18 +12,14 @@ const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
// The Pipeline coordinates between the bounding box and skeleton models. // The Pipeline coordinates between the bounding box and skeleton models.
class HandPipeline { class HandPipeline {
constructor(boundingBoxDetector, meshDetector, meshWidth, meshHeight, maxContinuousChecks, detectionConfidence, maxHands) { constructor(boundingBoxDetector, meshDetector, config) {
// An array of hand bounding boxes.
this.regionsOfInterest = []; this.regionsOfInterest = [];
this.runsWithoutHandDetector = 0; this.runsWithoutHandDetector = 0;
this.boundingBoxDetector = boundingBoxDetector; this.boundingBoxDetector = boundingBoxDetector;
this.meshDetector = meshDetector; this.meshDetector = meshDetector;
this.maxContinuousChecks = maxContinuousChecks; this.meshWidth = config.inputSize;
this.detectionConfidence = detectionConfidence; this.meshHeight = config.inputSize;
this.maxHands = maxHands; this.enlargeFactor = config.enlargeFactor;
this.meshWidth = meshWidth;
this.meshHeight = meshHeight;
this.maxHandsNumber = 1; // TODO(annxingyuan): Add multi-hand support.
} }
// Get the bounding box surrounding the hand, given palm landmarks. // Get the bounding box surrounding the hand, given palm landmarks.
@ -36,7 +31,7 @@ class HandPipeline {
const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks); const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks);
// boxAroundPalm only surrounds the palm - therefore we shift it // boxAroundPalm only surrounds the palm - therefore we shift it
// upwards so it will capture fingers once enlarged + squarified. // upwards so it will capture fingers once enlarged + squarified.
return bounding.enlargeBox(bounding.squarifyBox(bounding.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), PALM_BOX_ENLARGE_FACTOR); return bounding.enlargeBox(bounding.squarifyBox(bounding.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), this.enlargeFactor);
} }
// Get the bounding box surrounding the hand, given all hand landmarks. // Get the bounding box surrounding the hand, given all hand landmarks.
@ -80,10 +75,13 @@ class HandPipeline {
]); ]);
} }
async estimateHand(image, config) { async estimateHands(image, config) {
this.maxContinuousChecks = config.skipFrames;
this.detectionConfidence = config.minConfidence;
this.maxHands = config.maxHands;
const useFreshBox = this.shouldUpdateRegionsOfInterest(); const useFreshBox = this.shouldUpdateRegionsOfInterest();
if (useFreshBox === true) { if (useFreshBox === true) {
const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image); const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
this.regionsOfInterest = []; this.regionsOfInterest = [];
for (const i in boundingBoxPredictions) { for (const i in boundingBoxPredictions) {
this.updateRegionsOfInterest(boundingBoxPredictions[i], true /* force update */, i); this.updateRegionsOfInterest(boundingBoxPredictions[i], true /* force update */, i);
@ -174,7 +172,7 @@ class HandPipeline {
} }
shouldUpdateRegionsOfInterest() { shouldUpdateRegionsOfInterest() {
return (this.regionsOfInterest === 0) || (this.runsWithoutHandDetector >= this.maxContinuousChecks); return !this.regionsOfInterest || (this.regionsOfInterest.length === 0) || (this.runsWithoutHandDetector >= this.maxContinuousChecks);
} }
} }
exports.HandPipeline = HandPipeline; exports.HandPipeline = HandPipeline;

View File

@ -2,11 +2,13 @@ function normalizeRadians(angle) {
return angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI)); return angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI));
} }
exports.normalizeRadians = normalizeRadians; exports.normalizeRadians = normalizeRadians;
function computeRotation(point1, point2) { function computeRotation(point1, point2) {
const radians = Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]); const radians = Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]);
return normalizeRadians(radians); return normalizeRadians(radians);
} }
exports.computeRotation = computeRotation; exports.computeRotation = computeRotation;
const buildTranslationMatrix = (x, y) => ([[1, 0, x], [0, 1, y], [0, 0, 1]]); const buildTranslationMatrix = (x, y) => ([[1, 0, x], [0, 1, y], [0, 0, 1]]);
function dot(v1, v2) { function dot(v1, v2) {
let product = 0; let product = 0;
@ -16,6 +18,7 @@ function dot(v1, v2) {
return product; return product;
} }
exports.dot = dot; exports.dot = dot;
function getColumnFrom2DArr(arr, columnIndex) { function getColumnFrom2DArr(arr, columnIndex) {
const column = []; const column = [];
for (let i = 0; i < arr.length; i++) { for (let i = 0; i < arr.length; i++) {
@ -24,6 +27,7 @@ function getColumnFrom2DArr(arr, columnIndex) {
return column; return column;
} }
exports.getColumnFrom2DArr = getColumnFrom2DArr; exports.getColumnFrom2DArr = getColumnFrom2DArr;
function multiplyTransformMatrices(mat1, mat2) { function multiplyTransformMatrices(mat1, mat2) {
const product = []; const product = [];
const size = mat1.length; const size = mat1.length;
@ -45,6 +49,7 @@ function buildRotationMatrix(rotation, center) {
return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix); return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix);
} }
exports.buildRotationMatrix = buildRotationMatrix; exports.buildRotationMatrix = buildRotationMatrix;
function invertTransformMatrix(matrix) { function invertTransformMatrix(matrix) {
const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]]; const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]];
const translationComponent = [matrix[0][2], matrix[1][2]]; const translationComponent = [matrix[0][2], matrix[1][2]];
@ -59,6 +64,7 @@ function invertTransformMatrix(matrix) {
]; ];
} }
exports.invertTransformMatrix = invertTransformMatrix; exports.invertTransformMatrix = invertTransformMatrix;
function rotatePoint(homogeneousCoordinate, rotationMatrix) { function rotatePoint(homogeneousCoordinate, rotationMatrix) {
return [ return [
dot(homogeneousCoordinate, rotationMatrix[0]), dot(homogeneousCoordinate, rotationMatrix[0]),

View File

@ -50,21 +50,32 @@ async function detect(input, userConfig) {
tf.env().set('WEBGL_PACK_DEPTHWISECONV', true); tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
} }
const perf = {};
let timeStamp;
// run posenet // run posenet
timeStamp = performance.now();
let poseRes = []; let poseRes = [];
if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body); if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body);
perf.body = Math.trunc(performance.now() - timeStamp);
// run handpose // run handpose
timeStamp = performance.now();
let handRes = []; let handRes = [];
if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand); if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand);
perf.hand = Math.trunc(performance.now() - timeStamp);
// run facemesh, includes blazeface and iris // run facemesh, includes blazeface and iris
const faceRes = []; const faceRes = [];
if (config.face.enabled) { if (config.face.enabled) {
timeStamp = performance.now();
const faces = await models.facemesh.estimateFaces(input, config.face); const faces = await models.facemesh.estimateFaces(input, config.face);
perf.face = Math.trunc(performance.now() - timeStamp);
for (const face of faces) { for (const face of faces) {
// run ssr-net age & gender, inherits face from blazeface // run ssr-net age & gender, inherits face from blazeface
timeStamp = performance.now();
const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {}; const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
perf.agegender = Math.trunc(performance.now() - timeStamp);
face.image.dispose(); face.image.dispose();
// iris: array[ bottom, left, top, right, center ] // iris: array[ bottom, left, top, right, center ]
const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris) const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
@ -86,7 +97,9 @@ async function detect(input, userConfig) {
tf.engine().endScope(); tf.engine().endScope();
// combine results // combine results
resolve({ face: faceRes, body: poseRes, hand: handRes }); perf.total = Object.values(perf).reduce((a, b) => a + b);
console.log('total', perf.total);
resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf });
}); });
} }

View File

@ -1,13 +1,5 @@
const tf = require('@tensorflow/tfjs'); const tf = require('@tensorflow/tfjs');
/**
* PoseNet supports using various convolution neural network models
* (e.g. ResNet and MobileNetV1) as its underlying base model.
* The following BaseModel interface defines a unified interface for
* creating such PoseNet base models. Currently both MobileNet (in
* ./mobilenet.ts) and ResNet (in ./resnet.ts) implements the BaseModel
* interface. New base models that conform to the BaseModel interface can be
* added to PoseNet.
*/
class BaseModel { class BaseModel {
constructor(model, outputStride) { constructor(model, outputStride) {
this.model = model; this.model = model;

View File

@ -4,9 +4,8 @@ const decodeMultiple = require('./decodeMultiple');
const util = require('./util'); const util = require('./util');
class PoseNet { class PoseNet {
constructor(net, inputResolution) { constructor(net) {
this.baseModel = net; this.baseModel = net;
this.inputResolution = inputResolution;
} }
/** /**
@ -30,10 +29,10 @@ class PoseNet {
* in the same scale as the original image * in the same scale as the original image
*/ */
async estimatePoses(input, config) { async estimatePoses(input, config) {
const outputStride = this.baseModel.outputStride; const outputStride = config.outputStride;
const inputResolution = this.inputResolution; // const inputResolution = config.inputResolution;
const [height, width] = util.getInputTensorDimensions(input); const [height, width] = util.getInputTensorDimensions(input);
const { resized, padding } = util.padAndResizeTo(input, [inputResolution, inputResolution]); const { resized, padding } = util.padAndResizeTo(input, [config.inputResolution, config.inputResolution]);
const { heatmapScores, offsets, displacementFwd, displacementBwd } = this.baseModel.predict(resized); const { heatmapScores, offsets, displacementFwd, displacementBwd } = this.baseModel.predict(resized);
const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]); const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]);
const scoresBuffer = allTensorBuffers[0]; const scoresBuffer = allTensorBuffers[0];
@ -41,7 +40,7 @@ class PoseNet {
const displacementsFwdBuffer = allTensorBuffers[2]; const displacementsFwdBuffer = allTensorBuffers[2];
const displacementsBwdBuffer = allTensorBuffers[3]; const displacementsBwdBuffer = allTensorBuffers[3];
const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius); const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius);
const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [inputResolution, inputResolution], padding); const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [config.inputResolution, config.inputResolution], padding);
heatmapScores.dispose(); heatmapScores.dispose();
offsets.dispose(); offsets.dispose();
displacementFwd.dispose(); displacementFwd.dispose();
@ -56,10 +55,9 @@ class PoseNet {
} }
exports.PoseNet = PoseNet; exports.PoseNet = PoseNet;
async function loadMobileNet(config) { async function loadMobileNet(config) {
const outputStride = config.outputStride;
const graphModel = await tf.loadGraphModel(config.modelPath); const graphModel = await tf.loadGraphModel(config.modelPath);
const mobilenet = new modelMobileNet.MobileNet(graphModel, outputStride); const mobilenet = new modelMobileNet.MobileNet(graphModel, config.outputStride);
return new PoseNet(mobilenet, config.inputResolution); return new PoseNet(mobilenet);
} }
/** /**
* Loads the PoseNet model instance from a checkpoint, with the MobileNet architecture. The model to be loaded is configurable using the * Loads the PoseNet model instance from a checkpoint, with the MobileNet architecture. The model to be loaded is configurable using the

View File

@ -40,12 +40,14 @@ async function predict(image, config) {
const obj = {}; const obj = {};
if (config.face.age.enabled) { if (config.face.age.enabled) {
const ageT = await models.age.predict(enhance); const ageT = await models.age.predict(enhance);
obj.age = Math.trunc(10 * ageT.dataSync()[0]) / 10; const data = await ageT.data();
obj.age = Math.trunc(10 * data[0]) / 10;
tf.dispose(ageT); tf.dispose(ageT);
} }
if (config.face.gender.enabled) { if (config.face.gender.enabled) {
const genderT = await models.gender.predict(enhance); const genderT = await models.gender.predict(enhance);
obj.gender = Math.trunc(100 * genderT.dataSync()[0]) < 50 ? 'female' : 'male'; const data = await genderT.data();
obj.gender = Math.trunc(100 * data[0]) < 50 ? 'female' : 'male';
tf.dispose(genderT); tf.dispose(genderT);
} }
tf.dispose(enhance); tf.dispose(enhance);