implement box caching for movenet

pull/193/head
Vladimir Mandic 2021-09-27 08:53:41 -04:00
parent 479c8a3d74
commit d7f4bcfef9
14 changed files with 5352 additions and 916 deletions

View File

@ -105,7 +105,7 @@ const ui = {
lastFrame: 0, // time of last frame processing lastFrame: 0, // time of last frame processing
viewportSet: false, // internal, has custom viewport been set viewportSet: false, // internal, has custom viewport been set
background: null, // holds instance of segmentation background image background: null, // holds instance of segmentation background image
exceptionHandler: true, // should capture all unhandled exceptions exceptionHandler: false, // should capture all unhandled exceptions
// webrtc // webrtc
useWebRTC: false, // use webrtc as camera source instead of local webcam useWebRTC: false, // use webrtc as camera source instead of local webcam

View File

@ -99,6 +99,31 @@ function mergeDeep(...objects) {
return prev; return prev;
}, {}); }, {});
} }
function scaleBox(keypoints3, boxScaleFact2, outputSize2) {
const coords3 = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const maxmin = [Math.max(...coords3[0]), Math.min(...coords3[0]), Math.max(...coords3[1]), Math.min(...coords3[1])];
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2];
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact2;
const box6 = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
const boxRaw3 = [
box6[0] / outputSize2[0],
box6[1] / outputSize2[1],
box6[2] / outputSize2[0],
box6[3] / outputSize2[1]
];
const yxBox = [
boxRaw3[1],
boxRaw3[0],
boxRaw3[3] + boxRaw3[1],
boxRaw3[2] + boxRaw3[0]
];
return { box: box6, boxRaw: boxRaw3, yxBox };
}
// src/config.ts // src/config.ts
var config = { var config = {
@ -355,13 +380,13 @@ function rotatePoint(homogeneousCoordinate, rotationMatrix) {
dot(homogeneousCoordinate, rotationMatrix[1]) dot(homogeneousCoordinate, rotationMatrix[1])
]; ];
} }
function generateAnchors(inputSize3) { function generateAnchors(inputSize4) {
const spec = { strides: [inputSize3 / 16, inputSize3 / 8], anchors: [2, 6] }; const spec = { strides: [inputSize4 / 16, inputSize4 / 8], anchors: [2, 6] };
const anchors3 = []; const anchors3 = [];
for (let i = 0; i < spec.strides.length; i++) { for (let i = 0; i < spec.strides.length; i++) {
const stride = spec.strides[i]; const stride = spec.strides[i];
const gridRows = Math.floor((inputSize3 + stride - 1) / stride); const gridRows = Math.floor((inputSize4 + stride - 1) / stride);
const gridCols = Math.floor((inputSize3 + stride - 1) / stride); const gridCols = Math.floor((inputSize4 + stride - 1) / stride);
const anchorsNum = spec.anchors[i]; const anchorsNum = spec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) { for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5); const anchorY = stride * (gridY + 0.5);
@ -378,17 +403,17 @@ function generateAnchors(inputSize3) {
// src/blazeface/blazeface.ts // src/blazeface/blazeface.ts
var keypointsCount = 6; var keypointsCount = 6;
function decodeBounds(boxOutputs, anchors3, inputSize3) { function decodeBounds(boxOutputs, anchors3, inputSize4) {
const boxStarts = tfjs_esm_exports.slice(boxOutputs, [0, 1], [-1, 2]); const boxStarts = tfjs_esm_exports.slice(boxOutputs, [0, 1], [-1, 2]);
const centers = tfjs_esm_exports.add(boxStarts, anchors3); const centers = tfjs_esm_exports.add(boxStarts, anchors3);
const boxSizes = tfjs_esm_exports.slice(boxOutputs, [0, 3], [-1, 2]); const boxSizes = tfjs_esm_exports.slice(boxOutputs, [0, 3], [-1, 2]);
const boxSizesNormalized = tfjs_esm_exports.div(boxSizes, inputSize3); const boxSizesNormalized = tfjs_esm_exports.div(boxSizes, inputSize4);
const centersNormalized = tfjs_esm_exports.div(centers, inputSize3); const centersNormalized = tfjs_esm_exports.div(centers, inputSize4);
const halfBoxSize = tfjs_esm_exports.div(boxSizesNormalized, 2); const halfBoxSize = tfjs_esm_exports.div(boxSizesNormalized, 2);
const starts = tfjs_esm_exports.sub(centersNormalized, halfBoxSize); const starts = tfjs_esm_exports.sub(centersNormalized, halfBoxSize);
const ends = tfjs_esm_exports.add(centersNormalized, halfBoxSize); const ends = tfjs_esm_exports.add(centersNormalized, halfBoxSize);
const startNormalized = tfjs_esm_exports.mul(starts, inputSize3); const startNormalized = tfjs_esm_exports.mul(starts, inputSize4);
const endNormalized = tfjs_esm_exports.mul(ends, inputSize3); const endNormalized = tfjs_esm_exports.mul(ends, inputSize4);
const concatAxis = 1; const concatAxis = 1;
return tfjs_esm_exports.concat2d([startNormalized, endNormalized], concatAxis); return tfjs_esm_exports.concat2d([startNormalized, endNormalized], concatAxis);
} }
@ -9590,17 +9615,9 @@ var boxScaleFact = 1.5;
var models2 = [null, null]; var models2 = [null, null];
var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"]; var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"];
var inputSize = [[0, 0], [0, 0]]; var inputSize = [[0, 0], [0, 0]];
var classes = [ var classes = ["hand", "fist", "pinch", "point", "face", "tip", "pinchtip"];
"hand",
"fist",
"pinch",
"point",
"face",
"tip",
"pinchtip"
];
var skipped3 = 0; var skipped3 = 0;
var outputSize; var outputSize = [0, 0];
var cache = { var cache = {
handBoxes: [], handBoxes: [],
fingerBoxes: [], fingerBoxes: [],
@ -9698,30 +9715,6 @@ async function detectHands(input, config3) {
hands.length = config3.hand.maxDetected || 1; hands.length = config3.hand.maxDetected || 1;
return hands; return hands;
} }
function updateBoxes(h, keypoints3) {
const finger = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const minmax = [Math.min(...finger[0]), Math.max(...finger[0]), Math.min(...finger[1]), Math.max(...finger[1])];
const center = [(minmax[0] + minmax[1]) / 2, (minmax[2] + minmax[3]) / 2];
const diff = Math.max(center[0] - minmax[0], center[1] - minmax[2], -center[0] + minmax[1], -center[1] + minmax[3]) * boxScaleFact;
h.box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
h.boxRaw = [
h.box[0] / outputSize[0],
h.box[1] / outputSize[1],
h.box[2] / outputSize[0],
h.box[3] / outputSize[1]
];
h.yxBox = [
h.boxRaw[1],
h.boxRaw[0],
h.boxRaw[3] + h.boxRaw[1],
h.boxRaw[2] + h.boxRaw[0]
];
}
async function detectFingers(input, h, config3) { async function detectFingers(input, h, config3) {
const hand3 = { const hand3 = {
id: h.id, id: h.id,
@ -9755,7 +9748,10 @@ async function detectFingers(input, h, config3) {
h.box[3] * coord[1] / inputSize[1][1] + h.box[1], h.box[3] * coord[1] / inputSize[1][1] + h.box[1],
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2] (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2]
]); ]);
updateBoxes(h, hand3.keypoints); const updatedBox = scaleBox(hand3.keypoints, boxScaleFact, outputSize);
h.box = updatedBox.box;
h.boxRaw = updatedBox.boxRaw;
h.yxBox = updatedBox.yxBox;
hand3.box = h.box; hand3.box = h.box;
hand3.landmarks = analyze(hand3.keypoints); hand3.landmarks = analyze(hand3.keypoints);
for (const key of Object.keys(fingerMap)) { for (const key of Object.keys(fingerMap)) {
@ -10035,11 +10031,13 @@ async function predict8(image24, config3) {
// src/movenet/movenet.ts // src/movenet/movenet.ts
var model6; var model6;
var keypoints2 = []; var inputSize2 = 0;
var cachedBoxes = [];
var box5 = [0, 0, 0, 0]; var box5 = [0, 0, 0, 0];
var boxRaw2 = [0, 0, 0, 0]; var boxRaw2 = [0, 0, 0, 0];
var score2 = 0; var score2 = 0;
var skipped5 = Number.MAX_SAFE_INTEGER; var skipped5 = Number.MAX_SAFE_INTEGER;
var keypoints2 = [];
var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"]; var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"];
async function load9(config3) { async function load9(config3) {
if (env.initial) if (env.initial)
@ -10053,24 +10051,28 @@ async function load9(config3) {
log("load model:", model6["modelUrl"]); log("load model:", model6["modelUrl"]);
} else if (config3.debug) } else if (config3.debug)
log("cached model:", model6["modelUrl"]); log("cached model:", model6["modelUrl"]);
inputSize2 = model6.inputs[0].shape ? model6.inputs[0].shape[2] : 0;
if (inputSize2 === -1)
inputSize2 = 256;
return model6; return model6;
} }
async function parseSinglePose(res, config3, image24) { async function parseSinglePose(res, config3, image24, inputBox) {
keypoints2.length = 0;
const kpt3 = res[0][0]; const kpt3 = res[0][0];
keypoints2.length = 0;
for (let id = 0; id < kpt3.length; id++) { for (let id = 0; id < kpt3.length; id++) {
score2 = kpt3[id][2]; score2 = kpt3[id][2];
if (score2 > config3.body.minConfidence) { if (score2 > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[id][1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[id][0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
score: Math.round(100 * score2) / 100, score: Math.round(100 * score2) / 100,
part: bodyParts2[id], part: bodyParts2[id],
positionRaw: [ positionRaw,
kpt3[id][1],
kpt3[id][0]
],
position: [ position: [
Math.round((image24.shape[2] || 0) * kpt3[id][1]), Math.round((image24.shape[2] || 0) * positionRaw[0]),
Math.round((image24.shape[1] || 0) * kpt3[id][0]) Math.round((image24.shape[1] || 0) * positionRaw[1])
] ]
}); });
} }
@ -10092,12 +10094,12 @@ async function parseSinglePose(res, config3, image24) {
Math.max(...xRaw) - Math.min(...xRaw), Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw) Math.max(...yRaw) - Math.min(...yRaw)
]; ];
const persons2 = []; const bodies = [];
persons2.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }); bodies.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 });
return persons2; return bodies;
} }
async function parseMultiPose(res, config3, image24) { async function parseMultiPose(res, config3, image24, inputBox) {
const persons2 = []; const bodies = [];
for (let id = 0; id < res[0].length; id++) { for (let id = 0; id < res[0].length; id++) {
const kpt3 = res[0][id]; const kpt3 = res[0][id];
score2 = Math.round(100 * kpt3[51 + 4]) / 100; score2 = Math.round(100 * kpt3[51 + 4]) / 100;
@ -10107,16 +10109,20 @@ async function parseMultiPose(res, config3, image24) {
for (let i = 0; i < 17; i++) { for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100; const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100;
if (partScore > config3.body.minConfidence) { if (partScore > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[3 * i + 1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[3 * i + 0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
part: bodyParts2[i], part: bodyParts2[i],
score: partScore, score: partScore,
positionRaw: [kpt3[3 * i + 1], kpt3[3 * i + 0]], positionRaw,
position: [Math.trunc(kpt3[3 * i + 1] * (image24.shape[2] || 0)), Math.trunc(kpt3[3 * i + 0] * (image24.shape[1] || 0))] position: [Math.trunc(positionRaw[0] * (image24.shape[2] || 0)), Math.trunc(positionRaw[0] * (image24.shape[1] || 0))]
}); });
} }
} }
boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]]; boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]];
persons2.push({ bodies.push({
id, id,
score: score2, score: score2,
boxRaw: boxRaw2, boxRaw: boxRaw2,
@ -10129,39 +10135,47 @@ async function parseMultiPose(res, config3, image24) {
keypoints: [...keypoints2] keypoints: [...keypoints2]
}); });
} }
return persons2; return bodies;
} }
async function predict9(image24, config3) { async function predict9(input, config3) {
if (skipped5 < (config3.body.skipFrames || 0) && config3.skipFrame && Object.keys(keypoints2).length > 0) { if (!model6 || !(model6 == null ? void 0 : model6.inputs[0].shape))
skipped5++; return [];
return [{ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }];
}
skipped5 = 0;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const tensor3 = tfjs_esm_exports.tidy(() => { const t = {};
if (!(model6 == null ? void 0 : model6.inputs[0].shape)) let bodies = [];
return null; if (!config3.skipFrame)
let inputSize3 = model6.inputs[0].shape[2]; cachedBoxes.length = 0;
if (inputSize3 === -1) skipped5++;
inputSize3 = 256; for (let i = 0; i < cachedBoxes.length; i++) {
const resize = tfjs_esm_exports.image.resizeBilinear(image24, [inputSize3, inputSize3], false); t.crop = tfjs_esm_exports.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize2, inputSize2], "bilinear");
const cast5 = tfjs_esm_exports.cast(resize, "int32"); t.cast = tfjs_esm_exports.cast(t.crop, "int32");
return cast5; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
}); const res = await t.res.array();
let resT; const newBodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, cachedBoxes[i]) : await parseMultiPose(res, config3, input, cachedBoxes[i]);
if (config3.body.enabled) bodies = bodies.concat(newBodies);
resT = await (model6 == null ? void 0 : model6.predict(tensor3)); Object.keys(t).forEach((tensor3) => tfjs_esm_exports.dispose(t[tensor3]));
tfjs_esm_exports.dispose(tensor3); }
if (!resT) if (bodies.length !== config3.body.maxDetected && skipped5 > (config3.body.skipFrames || 0)) {
resolve([]); t.resized = tfjs_esm_exports.image.resizeBilinear(input, [inputSize2, inputSize2], false);
const res = await resT.array(); t.cast = tfjs_esm_exports.cast(t.resized, "int32");
let body4; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
if (resT.shape[2] === 17) const res = await t.res.array();
body4 = await parseSinglePose(res, config3, image24); bodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, [0, 0, 1, 1]) : await parseMultiPose(res, config3, input, [0, 0, 1, 1]);
else if (resT.shape[2] === 56) Object.keys(t).forEach((tensor3) => tfjs_esm_exports.dispose(t[tensor3]));
body4 = await parseMultiPose(res, config3, image24); cachedBoxes.length = 0;
tfjs_esm_exports.dispose(resT); skipped5 = 0;
resolve(body4); }
if (config3.skipFrame) {
cachedBoxes.length = 0;
for (let i = 0; i < bodies.length; i++) {
if (bodies[i].keypoints.length > 10) {
const kpts = bodies[i].keypoints.map((kpt3) => kpt3.position);
const newBox = scaleBox(kpts, 1.5, [input.shape[2], input.shape[1]]);
cachedBoxes.push([...newBox.yxBox]);
}
}
}
resolve(bodies);
}); });
} }
@ -10253,7 +10267,7 @@ var labels = [
var model7; var model7;
var last3 = []; var last3 = [];
var skipped6 = Number.MAX_SAFE_INTEGER; var skipped6 = Number.MAX_SAFE_INTEGER;
var scaleBox = 2.5; var scaleBox2 = 2.5;
async function load10(config3) { async function load10(config3) {
if (!model7 || env.initial) { if (!model7 || env.initial) {
model7 = await tfjs_esm_exports.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model7 = await tfjs_esm_exports.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
@ -10269,7 +10283,7 @@ async function load10(config3) {
log("cached model:", model7.modelUrl); log("cached model:", model7.modelUrl);
return model7; return model7;
} }
async function process3(res, inputSize3, outputShape, config3) { async function process3(res, inputSize4, outputShape, config3) {
let id = 0; let id = 0;
let results = []; let results = [];
for (const strideSize of [1, 2, 4]) { for (const strideSize of [1, 2, 4]) {
@ -10287,14 +10301,14 @@ async function process3(res, inputSize3, outputShape, config3) {
if (score3 > config3.object.minConfidence && j !== 61) { if (score3 > config3.object.minConfidence && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize;
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize;
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize3)); const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize4));
const [x, y] = [ const [x, y] = [
cx - scaleBox / strideSize * boxOffset[0], cx - scaleBox2 / strideSize * boxOffset[0],
cy - scaleBox / strideSize * boxOffset[1] cy - scaleBox2 / strideSize * boxOffset[1]
]; ];
const [w, h] = [ const [w, h] = [
cx + scaleBox / strideSize * boxOffset[2] - x, cx + scaleBox2 / strideSize * boxOffset[2] - x,
cy + scaleBox / strideSize * boxOffset[3] - y cy + scaleBox2 / strideSize * boxOffset[3] - y
]; ];
let boxRaw3 = [x, y, w, h]; let boxRaw3 = [x, y, w, h];
boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1))); boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1)));
@ -10357,7 +10371,7 @@ async function predict10(image24, config3) {
// src/object/centernet.ts // src/object/centernet.ts
var model8; var model8;
var inputSize2 = 0; var inputSize3 = 0;
var last4 = []; var last4 = [];
var skipped7 = Number.MAX_SAFE_INTEGER; var skipped7 = Number.MAX_SAFE_INTEGER;
async function load11(config3) { async function load11(config3) {
@ -10367,7 +10381,7 @@ async function load11(config3) {
fakeOps(["floormod"], config3); fakeOps(["floormod"], config3);
model8 = await tfjs_esm_exports.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model8 = await tfjs_esm_exports.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
const inputs = Object.values(model8.modelSignature["inputs"]); const inputs = Object.values(model8.modelSignature["inputs"]);
inputSize2 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; inputSize3 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (!model8 || !model8["modelUrl"]) if (!model8 || !model8["modelUrl"])
log("load model failed:", config3.object.modelPath); log("load model failed:", config3.object.modelPath);
else if (config3.debug) else if (config3.debug)
@ -10403,14 +10417,14 @@ async function process4(res, outputShape, config3) {
const classVal = detections[0][id][5]; const classVal = detections[0][id][5];
const label = labels[classVal].label; const label = labels[classVal].label;
const [x, y] = [ const [x, y] = [
detections[0][id][0] / inputSize2, detections[0][id][0] / inputSize3,
detections[0][id][1] / inputSize2 detections[0][id][1] / inputSize3
]; ];
const boxRaw3 = [ const boxRaw3 = [
x, x,
y, y,
detections[0][id][2] / inputSize2 - x, detections[0][id][2] / inputSize3 - x,
detections[0][id][3] / inputSize2 - y detections[0][id][3] / inputSize3 - y
]; ];
const box6 = [ const box6 = [
Math.trunc(boxRaw3[0] * outputShape[0]), Math.trunc(boxRaw3[0] * outputShape[0]),
@ -10432,7 +10446,7 @@ async function predict11(input, config3) {
return last4; return last4;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const outputSize2 = [input.shape[2], input.shape[1]]; const outputSize2 = [input.shape[2], input.shape[1]];
const resize = tfjs_esm_exports.image.resizeBilinear(input, [inputSize2, inputSize2]); const resize = tfjs_esm_exports.image.resizeBilinear(input, [inputSize3, inputSize3]);
const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null; const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null;
tfjs_esm_exports.dispose(resize); tfjs_esm_exports.dispose(resize);
const obj = await process4(objectT, outputSize2, config3); const obj = await process4(objectT, outputSize2, config3);
@ -12656,7 +12670,7 @@ var Human = class {
faceRes = await faceRes; faceRes = await faceRes;
this.analyze("Start Body:"); this.analyze("Start Body:");
this.state = "detect:body"; this.state = "detect:body";
const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: 1 * faceRes.length } }) : this.config; const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: this.config.face.enabled ? 1 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet")) if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet"))
bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : []; bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : [];
@ -12685,7 +12699,7 @@ var Human = class {
this.analyze("End Body:"); this.analyze("End Body:");
this.analyze("Start Hand:"); this.analyze("Start Hand:");
this.state = "detect:hand"; this.state = "detect:hand";
const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: 2 * faceRes.length } }) : this.config; const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: this.config.face.enabled ? 2 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect")) if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect"))
handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : []; handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : [];

File diff suppressed because one or more lines are too long

252
dist/human.esm.js vendored
View File

@ -95,6 +95,31 @@ function mergeDeep(...objects) {
return prev; return prev;
}, {}); }, {});
} }
function scaleBox(keypoints3, boxScaleFact2, outputSize2) {
const coords4 = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const maxmin = [Math.max(...coords4[0]), Math.min(...coords4[0]), Math.max(...coords4[1]), Math.min(...coords4[1])];
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2];
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact2;
const box6 = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
const boxRaw3 = [
box6[0] / outputSize2[0],
box6[1] / outputSize2[1],
box6[2] / outputSize2[0],
box6[3] / outputSize2[1]
];
const yxBox = [
boxRaw3[1],
boxRaw3[0],
boxRaw3[3] + boxRaw3[1],
boxRaw3[2] + boxRaw3[0]
];
return { box: box6, boxRaw: boxRaw3, yxBox };
}
// src/config.ts // src/config.ts
var config = { var config = {
@ -41186,17 +41211,17 @@ function fftBatch(input2, inverse, cpuBackend) {
return result; return result;
} }
function fftImpl(input2, inverse, cpuBackend) { function fftImpl(input2, inverse, cpuBackend) {
const inputSize3 = util_exports.sizeFromShape(input2.shape); const inputSize4 = util_exports.sizeFromShape(input2.shape);
const inputVals = cpuBackend.data.get(input2.dataId); const inputVals = cpuBackend.data.get(input2.dataId);
const realVals = cpuBackend.data.get(inputVals.complexTensorInfos.real.dataId).values; const realVals = cpuBackend.data.get(inputVals.complexTensorInfos.real.dataId).values;
const imagVals = cpuBackend.data.get(inputVals.complexTensorInfos.imag.dataId).values; const imagVals = cpuBackend.data.get(inputVals.complexTensorInfos.imag.dataId).values;
if (isExponentOf2(inputSize3)) { if (isExponentOf2(inputSize4)) {
const result = fftRadix2(realVals, imagVals, inputSize3, inverse, cpuBackend); const result = fftRadix2(realVals, imagVals, inputSize4, inverse, cpuBackend);
const resultShape = [input2.shape[0], input2.shape[1]]; const resultShape = [input2.shape[0], input2.shape[1]];
if (inverse) { if (inverse) {
const realInfo = cpuBackend.makeTensorInfo(resultShape, "float32", result.real); const realInfo = cpuBackend.makeTensorInfo(resultShape, "float32", result.real);
const imagInfo = cpuBackend.makeTensorInfo(resultShape, "float32", result.imag); const imagInfo = cpuBackend.makeTensorInfo(resultShape, "float32", result.imag);
const sizeInfo = cpuBackend.makeTensorInfo([], "float32", util_exports.createScalarValue(inputSize3, "float32")); const sizeInfo = cpuBackend.makeTensorInfo([], "float32", util_exports.createScalarValue(inputSize4, "float32"));
const sizeInfoCopy = identity2({ inputs: { x: sizeInfo }, backend: cpuBackend }); const sizeInfoCopy = identity2({ inputs: { x: sizeInfo }, backend: cpuBackend });
const divRealInfo = realDivConfig.kernelFunc({ inputs: { a: realInfo, b: sizeInfo }, backend: cpuBackend }); const divRealInfo = realDivConfig.kernelFunc({ inputs: { a: realInfo, b: sizeInfo }, backend: cpuBackend });
const divImagInfo = realDivConfig.kernelFunc({ inputs: { a: imagInfo, b: sizeInfoCopy }, backend: cpuBackend }); const divImagInfo = realDivConfig.kernelFunc({ inputs: { a: imagInfo, b: sizeInfoCopy }, backend: cpuBackend });
@ -41213,7 +41238,7 @@ function fftImpl(input2, inverse, cpuBackend) {
return result; return result;
} else { } else {
const data = backend_util_exports.mergeRealAndImagArrays(realVals, imagVals); const data = backend_util_exports.mergeRealAndImagArrays(realVals, imagVals);
const rawOutput = fourierTransformByMatmul(data, inputSize3, inverse); const rawOutput = fourierTransformByMatmul(data, inputSize4, inverse);
return backend_util_exports.splitRealAndImagArrays(rawOutput); return backend_util_exports.splitRealAndImagArrays(rawOutput);
} }
} }
@ -41335,9 +41360,9 @@ function fourierTransformByMatmul(data, size, inverse) {
function fft2(args) { function fft2(args) {
const { inputs, backend: backend22 } = args; const { inputs, backend: backend22 } = args;
const { input: input2 } = inputs; const { input: input2 } = inputs;
const inputSize3 = util_exports.sizeFromShape(input2.shape); const inputSize4 = util_exports.sizeFromShape(input2.shape);
const innerDimensionSize = input2.shape[input2.shape.length - 1]; const innerDimensionSize = input2.shape[input2.shape.length - 1];
const batch = inputSize3 / innerDimensionSize; const batch = inputSize4 / innerDimensionSize;
const input2D = reshape3({ const input2D = reshape3({
inputs: { x: input2 }, inputs: { x: input2 },
backend: backend22, backend: backend22,
@ -41536,9 +41561,9 @@ var gatherV2Config = {
function ifft2(args) { function ifft2(args) {
const { inputs, backend: backend22 } = args; const { inputs, backend: backend22 } = args;
const { input: input2 } = inputs; const { input: input2 } = inputs;
const inputSize3 = util_exports.sizeFromShape(input2.shape); const inputSize4 = util_exports.sizeFromShape(input2.shape);
const innerDimensionSize = input2.shape[input2.shape.length - 1]; const innerDimensionSize = input2.shape[input2.shape.length - 1];
const batch = inputSize3 / innerDimensionSize; const batch = inputSize4 / innerDimensionSize;
const input2D = reshape3({ const input2D = reshape3({
inputs: { x: input2 }, inputs: { x: input2 },
backend: backend22, backend: backend22,
@ -53040,9 +53065,9 @@ var FFTProgram = class {
}; };
function fftImpl2(x, inverse, backend22) { function fftImpl2(x, inverse, backend22) {
const xData = backend22.texData.get(x.dataId); const xData = backend22.texData.get(x.dataId);
const inputSize3 = util_exports.sizeFromShape(x.shape); const inputSize4 = util_exports.sizeFromShape(x.shape);
const innerDimensionSize = x.shape[x.shape.length - 1]; const innerDimensionSize = x.shape[x.shape.length - 1];
const batch = inputSize3 / innerDimensionSize; const batch = inputSize4 / innerDimensionSize;
const input2D = reshape4({ inputs: { x }, backend: backend22, attrs: { shape: [batch, innerDimensionSize] } }); const input2D = reshape4({ inputs: { x }, backend: backend22, attrs: { shape: [batch, innerDimensionSize] } });
const xShape = input2D.shape; const xShape = input2D.shape;
const realProgram = new FFTProgram("real", xShape, inverse); const realProgram = new FFTProgram("real", xShape, inverse);
@ -60423,13 +60448,13 @@ function rotatePoint(homogeneousCoordinate, rotationMatrix) {
dot4(homogeneousCoordinate, rotationMatrix[1]) dot4(homogeneousCoordinate, rotationMatrix[1])
]; ];
} }
function generateAnchors(inputSize3) { function generateAnchors(inputSize4) {
const spec = { strides: [inputSize3 / 16, inputSize3 / 8], anchors: [2, 6] }; const spec = { strides: [inputSize4 / 16, inputSize4 / 8], anchors: [2, 6] };
const anchors3 = []; const anchors3 = [];
for (let i = 0; i < spec.strides.length; i++) { for (let i = 0; i < spec.strides.length; i++) {
const stride = spec.strides[i]; const stride = spec.strides[i];
const gridRows = Math.floor((inputSize3 + stride - 1) / stride); const gridRows = Math.floor((inputSize4 + stride - 1) / stride);
const gridCols = Math.floor((inputSize3 + stride - 1) / stride); const gridCols = Math.floor((inputSize4 + stride - 1) / stride);
const anchorsNum = spec.anchors[i]; const anchorsNum = spec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) { for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5); const anchorY = stride * (gridY + 0.5);
@ -60446,17 +60471,17 @@ function generateAnchors(inputSize3) {
// src/blazeface/blazeface.ts // src/blazeface/blazeface.ts
var keypointsCount = 6; var keypointsCount = 6;
function decodeBounds(boxOutputs, anchors3, inputSize3) { function decodeBounds(boxOutputs, anchors3, inputSize4) {
const boxStarts = slice(boxOutputs, [0, 1], [-1, 2]); const boxStarts = slice(boxOutputs, [0, 1], [-1, 2]);
const centers = add2(boxStarts, anchors3); const centers = add2(boxStarts, anchors3);
const boxSizes = slice(boxOutputs, [0, 3], [-1, 2]); const boxSizes = slice(boxOutputs, [0, 3], [-1, 2]);
const boxSizesNormalized = div(boxSizes, inputSize3); const boxSizesNormalized = div(boxSizes, inputSize4);
const centersNormalized = div(centers, inputSize3); const centersNormalized = div(centers, inputSize4);
const halfBoxSize = div(boxSizesNormalized, 2); const halfBoxSize = div(boxSizesNormalized, 2);
const starts = sub(centersNormalized, halfBoxSize); const starts = sub(centersNormalized, halfBoxSize);
const ends = add2(centersNormalized, halfBoxSize); const ends = add2(centersNormalized, halfBoxSize);
const startNormalized = mul(starts, inputSize3); const startNormalized = mul(starts, inputSize4);
const endNormalized = mul(ends, inputSize3); const endNormalized = mul(ends, inputSize4);
const concatAxis = 1; const concatAxis = 1;
return concat2d([startNormalized, endNormalized], concatAxis); return concat2d([startNormalized, endNormalized], concatAxis);
} }
@ -69658,17 +69683,9 @@ var boxScaleFact = 1.5;
var models2 = [null, null]; var models2 = [null, null];
var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"]; var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"];
var inputSize = [[0, 0], [0, 0]]; var inputSize = [[0, 0], [0, 0]];
var classes = [ var classes = ["hand", "fist", "pinch", "point", "face", "tip", "pinchtip"];
"hand",
"fist",
"pinch",
"point",
"face",
"tip",
"pinchtip"
];
var skipped3 = 0; var skipped3 = 0;
var outputSize; var outputSize = [0, 0];
var cache = { var cache = {
handBoxes: [], handBoxes: [],
fingerBoxes: [], fingerBoxes: [],
@ -69766,30 +69783,6 @@ async function detectHands(input2, config3) {
hands.length = config3.hand.maxDetected || 1; hands.length = config3.hand.maxDetected || 1;
return hands; return hands;
} }
function updateBoxes(h, keypoints3) {
const finger = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const minmax = [Math.min(...finger[0]), Math.max(...finger[0]), Math.min(...finger[1]), Math.max(...finger[1])];
const center = [(minmax[0] + minmax[1]) / 2, (minmax[2] + minmax[3]) / 2];
const diff = Math.max(center[0] - minmax[0], center[1] - minmax[2], -center[0] + minmax[1], -center[1] + minmax[3]) * boxScaleFact;
h.box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
h.boxRaw = [
h.box[0] / outputSize[0],
h.box[1] / outputSize[1],
h.box[2] / outputSize[0],
h.box[3] / outputSize[1]
];
h.yxBox = [
h.boxRaw[1],
h.boxRaw[0],
h.boxRaw[3] + h.boxRaw[1],
h.boxRaw[2] + h.boxRaw[0]
];
}
async function detectFingers(input2, h, config3) { async function detectFingers(input2, h, config3) {
const hand3 = { const hand3 = {
id: h.id, id: h.id,
@ -69823,7 +69816,10 @@ async function detectFingers(input2, h, config3) {
h.box[3] * coord[1] / inputSize[1][1] + h.box[1], h.box[3] * coord[1] / inputSize[1][1] + h.box[1],
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2] (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2]
]); ]);
updateBoxes(h, hand3.keypoints); const updatedBox = scaleBox(hand3.keypoints, boxScaleFact, outputSize);
h.box = updatedBox.box;
h.boxRaw = updatedBox.boxRaw;
h.yxBox = updatedBox.yxBox;
hand3.box = h.box; hand3.box = h.box;
hand3.landmarks = analyze(hand3.keypoints); hand3.landmarks = analyze(hand3.keypoints);
for (const key of Object.keys(fingerMap)) { for (const key of Object.keys(fingerMap)) {
@ -70103,11 +70099,13 @@ async function predict8(image7, config3) {
// src/movenet/movenet.ts // src/movenet/movenet.ts
var model7; var model7;
var keypoints2 = []; var inputSize2 = 0;
var cachedBoxes = [];
var box5 = [0, 0, 0, 0]; var box5 = [0, 0, 0, 0];
var boxRaw2 = [0, 0, 0, 0]; var boxRaw2 = [0, 0, 0, 0];
var score2 = 0; var score2 = 0;
var skipped5 = Number.MAX_SAFE_INTEGER; var skipped5 = Number.MAX_SAFE_INTEGER;
var keypoints2 = [];
var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"]; var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"];
async function load9(config3) { async function load9(config3) {
if (env2.initial) if (env2.initial)
@ -70121,24 +70119,28 @@ async function load9(config3) {
log("load model:", model7["modelUrl"]); log("load model:", model7["modelUrl"]);
} else if (config3.debug) } else if (config3.debug)
log("cached model:", model7["modelUrl"]); log("cached model:", model7["modelUrl"]);
inputSize2 = model7.inputs[0].shape ? model7.inputs[0].shape[2] : 0;
if (inputSize2 === -1)
inputSize2 = 256;
return model7; return model7;
} }
async function parseSinglePose(res, config3, image7) { async function parseSinglePose(res, config3, image7, inputBox) {
keypoints2.length = 0;
const kpt3 = res[0][0]; const kpt3 = res[0][0];
keypoints2.length = 0;
for (let id = 0; id < kpt3.length; id++) { for (let id = 0; id < kpt3.length; id++) {
score2 = kpt3[id][2]; score2 = kpt3[id][2];
if (score2 > config3.body.minConfidence) { if (score2 > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[id][1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[id][0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
score: Math.round(100 * score2) / 100, score: Math.round(100 * score2) / 100,
part: bodyParts2[id], part: bodyParts2[id],
positionRaw: [ positionRaw,
kpt3[id][1],
kpt3[id][0]
],
position: [ position: [
Math.round((image7.shape[2] || 0) * kpt3[id][1]), Math.round((image7.shape[2] || 0) * positionRaw[0]),
Math.round((image7.shape[1] || 0) * kpt3[id][0]) Math.round((image7.shape[1] || 0) * positionRaw[1])
] ]
}); });
} }
@ -70160,12 +70162,12 @@ async function parseSinglePose(res, config3, image7) {
Math.max(...xRaw) - Math.min(...xRaw), Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw) Math.max(...yRaw) - Math.min(...yRaw)
]; ];
const persons2 = []; const bodies = [];
persons2.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }); bodies.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 });
return persons2; return bodies;
} }
async function parseMultiPose(res, config3, image7) { async function parseMultiPose(res, config3, image7, inputBox) {
const persons2 = []; const bodies = [];
for (let id = 0; id < res[0].length; id++) { for (let id = 0; id < res[0].length; id++) {
const kpt3 = res[0][id]; const kpt3 = res[0][id];
score2 = Math.round(100 * kpt3[51 + 4]) / 100; score2 = Math.round(100 * kpt3[51 + 4]) / 100;
@ -70175,16 +70177,20 @@ async function parseMultiPose(res, config3, image7) {
for (let i = 0; i < 17; i++) { for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100; const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100;
if (partScore > config3.body.minConfidence) { if (partScore > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[3 * i + 1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[3 * i + 0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
part: bodyParts2[i], part: bodyParts2[i],
score: partScore, score: partScore,
positionRaw: [kpt3[3 * i + 1], kpt3[3 * i + 0]], positionRaw,
position: [Math.trunc(kpt3[3 * i + 1] * (image7.shape[2] || 0)), Math.trunc(kpt3[3 * i + 0] * (image7.shape[1] || 0))] position: [Math.trunc(positionRaw[0] * (image7.shape[2] || 0)), Math.trunc(positionRaw[0] * (image7.shape[1] || 0))]
}); });
} }
} }
boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]]; boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]];
persons2.push({ bodies.push({
id, id,
score: score2, score: score2,
boxRaw: boxRaw2, boxRaw: boxRaw2,
@ -70197,39 +70203,47 @@ async function parseMultiPose(res, config3, image7) {
keypoints: [...keypoints2] keypoints: [...keypoints2]
}); });
} }
return persons2; return bodies;
} }
async function predict9(image7, config3) { async function predict9(input2, config3) {
if (skipped5 < (config3.body.skipFrames || 0) && config3.skipFrame && Object.keys(keypoints2).length > 0) { if (!model7 || !(model7 == null ? void 0 : model7.inputs[0].shape))
skipped5++; return [];
return [{ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }];
}
skipped5 = 0;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const tensor2 = tidy(() => { const t = {};
if (!(model7 == null ? void 0 : model7.inputs[0].shape)) let bodies = [];
return null; if (!config3.skipFrame)
let inputSize3 = model7.inputs[0].shape[2]; cachedBoxes.length = 0;
if (inputSize3 === -1) skipped5++;
inputSize3 = 256; for (let i = 0; i < cachedBoxes.length; i++) {
const resize = image.resizeBilinear(image7, [inputSize3, inputSize3], false); t.crop = image.cropAndResize(input2, [cachedBoxes[i]], [0], [inputSize2, inputSize2], "bilinear");
const cast6 = cast(resize, "int32"); t.cast = cast(t.crop, "int32");
return cast6; t.res = await (model7 == null ? void 0 : model7.predict(t.cast));
}); const res = await t.res.array();
let resT; const newBodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input2, cachedBoxes[i]) : await parseMultiPose(res, config3, input2, cachedBoxes[i]);
if (config3.body.enabled) bodies = bodies.concat(newBodies);
resT = await (model7 == null ? void 0 : model7.predict(tensor2)); Object.keys(t).forEach((tensor2) => dispose(t[tensor2]));
dispose(tensor2); }
if (!resT) if (bodies.length !== config3.body.maxDetected && skipped5 > (config3.body.skipFrames || 0)) {
resolve([]); t.resized = image.resizeBilinear(input2, [inputSize2, inputSize2], false);
const res = await resT.array(); t.cast = cast(t.resized, "int32");
let body4; t.res = await (model7 == null ? void 0 : model7.predict(t.cast));
if (resT.shape[2] === 17) const res = await t.res.array();
body4 = await parseSinglePose(res, config3, image7); bodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input2, [0, 0, 1, 1]) : await parseMultiPose(res, config3, input2, [0, 0, 1, 1]);
else if (resT.shape[2] === 56) Object.keys(t).forEach((tensor2) => dispose(t[tensor2]));
body4 = await parseMultiPose(res, config3, image7); cachedBoxes.length = 0;
dispose(resT); skipped5 = 0;
resolve(body4); }
if (config3.skipFrame) {
cachedBoxes.length = 0;
for (let i = 0; i < bodies.length; i++) {
if (bodies[i].keypoints.length > 10) {
const kpts = bodies[i].keypoints.map((kpt3) => kpt3.position);
const newBox = scaleBox(kpts, 1.5, [input2.shape[2], input2.shape[1]]);
cachedBoxes.push([...newBox.yxBox]);
}
}
}
resolve(bodies);
}); });
} }
@ -70321,7 +70335,7 @@ var labels = [
var model8; var model8;
var last3 = []; var last3 = [];
var skipped6 = Number.MAX_SAFE_INTEGER; var skipped6 = Number.MAX_SAFE_INTEGER;
var scaleBox = 2.5; var scaleBox2 = 2.5;
async function load10(config3) { async function load10(config3) {
if (!model8 || env2.initial) { if (!model8 || env2.initial) {
model8 = await loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model8 = await loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
@ -70337,7 +70351,7 @@ async function load10(config3) {
log("cached model:", model8.modelUrl); log("cached model:", model8.modelUrl);
return model8; return model8;
} }
async function process3(res, inputSize3, outputShape, config3) { async function process3(res, inputSize4, outputShape, config3) {
let id = 0; let id = 0;
let results = []; let results = [];
for (const strideSize of [1, 2, 4]) { for (const strideSize of [1, 2, 4]) {
@ -70355,14 +70369,14 @@ async function process3(res, inputSize3, outputShape, config3) {
if (score3 > config3.object.minConfidence && j !== 61) { if (score3 > config3.object.minConfidence && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize;
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize;
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize3)); const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize4));
const [x, y] = [ const [x, y] = [
cx - scaleBox / strideSize * boxOffset[0], cx - scaleBox2 / strideSize * boxOffset[0],
cy - scaleBox / strideSize * boxOffset[1] cy - scaleBox2 / strideSize * boxOffset[1]
]; ];
const [w, h] = [ const [w, h] = [
cx + scaleBox / strideSize * boxOffset[2] - x, cx + scaleBox2 / strideSize * boxOffset[2] - x,
cy + scaleBox / strideSize * boxOffset[3] - y cy + scaleBox2 / strideSize * boxOffset[3] - y
]; ];
let boxRaw3 = [x, y, w, h]; let boxRaw3 = [x, y, w, h];
boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1))); boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1)));
@ -70425,7 +70439,7 @@ async function predict10(image7, config3) {
// src/object/centernet.ts // src/object/centernet.ts
var model9; var model9;
var inputSize2 = 0; var inputSize3 = 0;
var last4 = []; var last4 = [];
var skipped7 = Number.MAX_SAFE_INTEGER; var skipped7 = Number.MAX_SAFE_INTEGER;
async function load11(config3) { async function load11(config3) {
@ -70435,7 +70449,7 @@ async function load11(config3) {
fakeOps(["floormod"], config3); fakeOps(["floormod"], config3);
model9 = await loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model9 = await loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
const inputs = Object.values(model9.modelSignature["inputs"]); const inputs = Object.values(model9.modelSignature["inputs"]);
inputSize2 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; inputSize3 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (!model9 || !model9["modelUrl"]) if (!model9 || !model9["modelUrl"])
log("load model failed:", config3.object.modelPath); log("load model failed:", config3.object.modelPath);
else if (config3.debug) else if (config3.debug)
@ -70471,14 +70485,14 @@ async function process4(res, outputShape, config3) {
const classVal = detections[0][id][5]; const classVal = detections[0][id][5];
const label = labels[classVal].label; const label = labels[classVal].label;
const [x, y] = [ const [x, y] = [
detections[0][id][0] / inputSize2, detections[0][id][0] / inputSize3,
detections[0][id][1] / inputSize2 detections[0][id][1] / inputSize3
]; ];
const boxRaw3 = [ const boxRaw3 = [
x, x,
y, y,
detections[0][id][2] / inputSize2 - x, detections[0][id][2] / inputSize3 - x,
detections[0][id][3] / inputSize2 - y detections[0][id][3] / inputSize3 - y
]; ];
const box6 = [ const box6 = [
Math.trunc(boxRaw3[0] * outputShape[0]), Math.trunc(boxRaw3[0] * outputShape[0]),
@ -70500,7 +70514,7 @@ async function predict11(input2, config3) {
return last4; return last4;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const outputSize2 = [input2.shape[2], input2.shape[1]]; const outputSize2 = [input2.shape[2], input2.shape[1]];
const resize = image.resizeBilinear(input2, [inputSize2, inputSize2]); const resize = image.resizeBilinear(input2, [inputSize3, inputSize3]);
const objectT = config3.object.enabled ? model9 == null ? void 0 : model9.execute(resize, ["tower_0/detections"]) : null; const objectT = config3.object.enabled ? model9 == null ? void 0 : model9.execute(resize, ["tower_0/detections"]) : null;
dispose(resize); dispose(resize);
const obj = await process4(objectT, outputSize2, config3); const obj = await process4(objectT, outputSize2, config3);
@ -72724,7 +72738,7 @@ var Human = class {
faceRes = await faceRes; faceRes = await faceRes;
this.analyze("Start Body:"); this.analyze("Start Body:");
this.state = "detect:body"; this.state = "detect:body";
const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: 1 * faceRes.length } }) : this.config; const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: this.config.face.enabled ? 1 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet")) if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet"))
bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : []; bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : [];
@ -72753,7 +72767,7 @@ var Human = class {
this.analyze("End Body:"); this.analyze("End Body:");
this.analyze("Start Hand:"); this.analyze("Start Hand:");
this.state = "detect:hand"; this.state = "detect:hand";
const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: 2 * faceRes.length } }) : this.config; const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: this.config.face.enabled ? 2 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect")) if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect"))
handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : []; handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : [];

File diff suppressed because one or more lines are too long

548
dist/human.js vendored

File diff suppressed because one or more lines are too long

230
dist/human.node-gpu.js vendored
View File

@ -142,6 +142,31 @@ function mergeDeep(...objects) {
return prev; return prev;
}, {}); }, {});
} }
function scaleBox(keypoints3, boxScaleFact2, outputSize2) {
const coords3 = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const maxmin = [Math.max(...coords3[0]), Math.min(...coords3[0]), Math.max(...coords3[1]), Math.min(...coords3[1])];
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2];
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact2;
const box6 = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
const boxRaw3 = [
box6[0] / outputSize2[0],
box6[1] / outputSize2[1],
box6[2] / outputSize2[0],
box6[3] / outputSize2[1]
];
const yxBox = [
boxRaw3[1],
boxRaw3[0],
boxRaw3[3] + boxRaw3[1],
boxRaw3[2] + boxRaw3[0]
];
return { box: box6, boxRaw: boxRaw3, yxBox };
}
// src/config.ts // src/config.ts
var config = { var config = {
@ -378,13 +403,13 @@ function rotatePoint(homogeneousCoordinate, rotationMatrix) {
dot(homogeneousCoordinate, rotationMatrix[1]) dot(homogeneousCoordinate, rotationMatrix[1])
]; ];
} }
function generateAnchors(inputSize3) { function generateAnchors(inputSize4) {
const spec = { strides: [inputSize3 / 16, inputSize3 / 8], anchors: [2, 6] }; const spec = { strides: [inputSize4 / 16, inputSize4 / 8], anchors: [2, 6] };
const anchors3 = []; const anchors3 = [];
for (let i = 0; i < spec.strides.length; i++) { for (let i = 0; i < spec.strides.length; i++) {
const stride = spec.strides[i]; const stride = spec.strides[i];
const gridRows = Math.floor((inputSize3 + stride - 1) / stride); const gridRows = Math.floor((inputSize4 + stride - 1) / stride);
const gridCols = Math.floor((inputSize3 + stride - 1) / stride); const gridCols = Math.floor((inputSize4 + stride - 1) / stride);
const anchorsNum = spec.anchors[i]; const anchorsNum = spec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) { for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5); const anchorY = stride * (gridY + 0.5);
@ -401,17 +426,17 @@ function generateAnchors(inputSize3) {
// src/blazeface/blazeface.ts // src/blazeface/blazeface.ts
var keypointsCount = 6; var keypointsCount = 6;
function decodeBounds(boxOutputs, anchors3, inputSize3) { function decodeBounds(boxOutputs, anchors3, inputSize4) {
const boxStarts = tf2.slice(boxOutputs, [0, 1], [-1, 2]); const boxStarts = tf2.slice(boxOutputs, [0, 1], [-1, 2]);
const centers = tf2.add(boxStarts, anchors3); const centers = tf2.add(boxStarts, anchors3);
const boxSizes = tf2.slice(boxOutputs, [0, 3], [-1, 2]); const boxSizes = tf2.slice(boxOutputs, [0, 3], [-1, 2]);
const boxSizesNormalized = tf2.div(boxSizes, inputSize3); const boxSizesNormalized = tf2.div(boxSizes, inputSize4);
const centersNormalized = tf2.div(centers, inputSize3); const centersNormalized = tf2.div(centers, inputSize4);
const halfBoxSize = tf2.div(boxSizesNormalized, 2); const halfBoxSize = tf2.div(boxSizesNormalized, 2);
const starts = tf2.sub(centersNormalized, halfBoxSize); const starts = tf2.sub(centersNormalized, halfBoxSize);
const ends = tf2.add(centersNormalized, halfBoxSize); const ends = tf2.add(centersNormalized, halfBoxSize);
const startNormalized = tf2.mul(starts, inputSize3); const startNormalized = tf2.mul(starts, inputSize4);
const endNormalized = tf2.mul(ends, inputSize3); const endNormalized = tf2.mul(ends, inputSize4);
const concatAxis = 1; const concatAxis = 1;
return tf2.concat2d([startNormalized, endNormalized], concatAxis); return tf2.concat2d([startNormalized, endNormalized], concatAxis);
} }
@ -9642,17 +9667,9 @@ var boxScaleFact = 1.5;
var models2 = [null, null]; var models2 = [null, null];
var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"]; var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"];
var inputSize = [[0, 0], [0, 0]]; var inputSize = [[0, 0], [0, 0]];
var classes = [ var classes = ["hand", "fist", "pinch", "point", "face", "tip", "pinchtip"];
"hand",
"fist",
"pinch",
"point",
"face",
"tip",
"pinchtip"
];
var skipped3 = 0; var skipped3 = 0;
var outputSize; var outputSize = [0, 0];
var cache = { var cache = {
handBoxes: [], handBoxes: [],
fingerBoxes: [], fingerBoxes: [],
@ -9750,30 +9767,6 @@ async function detectHands(input, config3) {
hands.length = config3.hand.maxDetected || 1; hands.length = config3.hand.maxDetected || 1;
return hands; return hands;
} }
function updateBoxes(h, keypoints3) {
const finger = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const minmax = [Math.min(...finger[0]), Math.max(...finger[0]), Math.min(...finger[1]), Math.max(...finger[1])];
const center = [(minmax[0] + minmax[1]) / 2, (minmax[2] + minmax[3]) / 2];
const diff = Math.max(center[0] - minmax[0], center[1] - minmax[2], -center[0] + minmax[1], -center[1] + minmax[3]) * boxScaleFact;
h.box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
h.boxRaw = [
h.box[0] / outputSize[0],
h.box[1] / outputSize[1],
h.box[2] / outputSize[0],
h.box[3] / outputSize[1]
];
h.yxBox = [
h.boxRaw[1],
h.boxRaw[0],
h.boxRaw[3] + h.boxRaw[1],
h.boxRaw[2] + h.boxRaw[0]
];
}
async function detectFingers(input, h, config3) { async function detectFingers(input, h, config3) {
const hand3 = { const hand3 = {
id: h.id, id: h.id,
@ -9807,7 +9800,10 @@ async function detectFingers(input, h, config3) {
h.box[3] * coord[1] / inputSize[1][1] + h.box[1], h.box[3] * coord[1] / inputSize[1][1] + h.box[1],
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2] (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2]
]); ]);
updateBoxes(h, hand3.keypoints); const updatedBox = scaleBox(hand3.keypoints, boxScaleFact, outputSize);
h.box = updatedBox.box;
h.boxRaw = updatedBox.boxRaw;
h.yxBox = updatedBox.yxBox;
hand3.box = h.box; hand3.box = h.box;
hand3.landmarks = analyze(hand3.keypoints); hand3.landmarks = analyze(hand3.keypoints);
for (const key of Object.keys(fingerMap)) { for (const key of Object.keys(fingerMap)) {
@ -10092,11 +10088,13 @@ async function predict8(image24, config3) {
// src/movenet/movenet.ts // src/movenet/movenet.ts
var tf19 = __toModule(require_tfjs_esm()); var tf19 = __toModule(require_tfjs_esm());
var model6; var model6;
var keypoints2 = []; var inputSize2 = 0;
var cachedBoxes = [];
var box5 = [0, 0, 0, 0]; var box5 = [0, 0, 0, 0];
var boxRaw2 = [0, 0, 0, 0]; var boxRaw2 = [0, 0, 0, 0];
var score2 = 0; var score2 = 0;
var skipped5 = Number.MAX_SAFE_INTEGER; var skipped5 = Number.MAX_SAFE_INTEGER;
var keypoints2 = [];
var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"]; var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"];
async function load9(config3) { async function load9(config3) {
if (env.initial) if (env.initial)
@ -10110,24 +10108,28 @@ async function load9(config3) {
log("load model:", model6["modelUrl"]); log("load model:", model6["modelUrl"]);
} else if (config3.debug) } else if (config3.debug)
log("cached model:", model6["modelUrl"]); log("cached model:", model6["modelUrl"]);
inputSize2 = model6.inputs[0].shape ? model6.inputs[0].shape[2] : 0;
if (inputSize2 === -1)
inputSize2 = 256;
return model6; return model6;
} }
async function parseSinglePose(res, config3, image24) { async function parseSinglePose(res, config3, image24, inputBox) {
keypoints2.length = 0;
const kpt3 = res[0][0]; const kpt3 = res[0][0];
keypoints2.length = 0;
for (let id = 0; id < kpt3.length; id++) { for (let id = 0; id < kpt3.length; id++) {
score2 = kpt3[id][2]; score2 = kpt3[id][2];
if (score2 > config3.body.minConfidence) { if (score2 > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[id][1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[id][0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
score: Math.round(100 * score2) / 100, score: Math.round(100 * score2) / 100,
part: bodyParts2[id], part: bodyParts2[id],
positionRaw: [ positionRaw,
kpt3[id][1],
kpt3[id][0]
],
position: [ position: [
Math.round((image24.shape[2] || 0) * kpt3[id][1]), Math.round((image24.shape[2] || 0) * positionRaw[0]),
Math.round((image24.shape[1] || 0) * kpt3[id][0]) Math.round((image24.shape[1] || 0) * positionRaw[1])
] ]
}); });
} }
@ -10149,12 +10151,12 @@ async function parseSinglePose(res, config3, image24) {
Math.max(...xRaw) - Math.min(...xRaw), Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw) Math.max(...yRaw) - Math.min(...yRaw)
]; ];
const persons2 = []; const bodies = [];
persons2.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }); bodies.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 });
return persons2; return bodies;
} }
async function parseMultiPose(res, config3, image24) { async function parseMultiPose(res, config3, image24, inputBox) {
const persons2 = []; const bodies = [];
for (let id = 0; id < res[0].length; id++) { for (let id = 0; id < res[0].length; id++) {
const kpt3 = res[0][id]; const kpt3 = res[0][id];
score2 = Math.round(100 * kpt3[51 + 4]) / 100; score2 = Math.round(100 * kpt3[51 + 4]) / 100;
@ -10164,16 +10166,20 @@ async function parseMultiPose(res, config3, image24) {
for (let i = 0; i < 17; i++) { for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100; const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100;
if (partScore > config3.body.minConfidence) { if (partScore > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[3 * i + 1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[3 * i + 0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
part: bodyParts2[i], part: bodyParts2[i],
score: partScore, score: partScore,
positionRaw: [kpt3[3 * i + 1], kpt3[3 * i + 0]], positionRaw,
position: [Math.trunc(kpt3[3 * i + 1] * (image24.shape[2] || 0)), Math.trunc(kpt3[3 * i + 0] * (image24.shape[1] || 0))] position: [Math.trunc(positionRaw[0] * (image24.shape[2] || 0)), Math.trunc(positionRaw[0] * (image24.shape[1] || 0))]
}); });
} }
} }
boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]]; boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]];
persons2.push({ bodies.push({
id, id,
score: score2, score: score2,
boxRaw: boxRaw2, boxRaw: boxRaw2,
@ -10186,39 +10192,47 @@ async function parseMultiPose(res, config3, image24) {
keypoints: [...keypoints2] keypoints: [...keypoints2]
}); });
} }
return persons2; return bodies;
} }
async function predict9(image24, config3) { async function predict9(input, config3) {
if (skipped5 < (config3.body.skipFrames || 0) && config3.skipFrame && Object.keys(keypoints2).length > 0) { if (!model6 || !(model6 == null ? void 0 : model6.inputs[0].shape))
skipped5++; return [];
return [{ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }];
}
skipped5 = 0;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const tensor3 = tf19.tidy(() => { const t = {};
if (!(model6 == null ? void 0 : model6.inputs[0].shape)) let bodies = [];
return null; if (!config3.skipFrame)
let inputSize3 = model6.inputs[0].shape[2]; cachedBoxes.length = 0;
if (inputSize3 === -1) skipped5++;
inputSize3 = 256; for (let i = 0; i < cachedBoxes.length; i++) {
const resize = tf19.image.resizeBilinear(image24, [inputSize3, inputSize3], false); t.crop = tf19.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize2, inputSize2], "bilinear");
const cast5 = tf19.cast(resize, "int32"); t.cast = tf19.cast(t.crop, "int32");
return cast5; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
}); const res = await t.res.array();
let resT; const newBodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, cachedBoxes[i]) : await parseMultiPose(res, config3, input, cachedBoxes[i]);
if (config3.body.enabled) bodies = bodies.concat(newBodies);
resT = await (model6 == null ? void 0 : model6.predict(tensor3)); Object.keys(t).forEach((tensor3) => tf19.dispose(t[tensor3]));
tf19.dispose(tensor3); }
if (!resT) if (bodies.length !== config3.body.maxDetected && skipped5 > (config3.body.skipFrames || 0)) {
resolve([]); t.resized = tf19.image.resizeBilinear(input, [inputSize2, inputSize2], false);
const res = await resT.array(); t.cast = tf19.cast(t.resized, "int32");
let body4; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
if (resT.shape[2] === 17) const res = await t.res.array();
body4 = await parseSinglePose(res, config3, image24); bodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, [0, 0, 1, 1]) : await parseMultiPose(res, config3, input, [0, 0, 1, 1]);
else if (resT.shape[2] === 56) Object.keys(t).forEach((tensor3) => tf19.dispose(t[tensor3]));
body4 = await parseMultiPose(res, config3, image24); cachedBoxes.length = 0;
tf19.dispose(resT); skipped5 = 0;
resolve(body4); }
if (config3.skipFrame) {
cachedBoxes.length = 0;
for (let i = 0; i < bodies.length; i++) {
if (bodies[i].keypoints.length > 10) {
const kpts = bodies[i].keypoints.map((kpt3) => kpt3.position);
const newBox = scaleBox(kpts, 1.5, [input.shape[2], input.shape[1]]);
cachedBoxes.push([...newBox.yxBox]);
}
}
}
resolve(bodies);
}); });
} }
@ -10313,7 +10327,7 @@ var labels = [
var model7; var model7;
var last3 = []; var last3 = [];
var skipped6 = Number.MAX_SAFE_INTEGER; var skipped6 = Number.MAX_SAFE_INTEGER;
var scaleBox = 2.5; var scaleBox2 = 2.5;
async function load10(config3) { async function load10(config3) {
if (!model7 || env.initial) { if (!model7 || env.initial) {
model7 = await tf20.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model7 = await tf20.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
@ -10329,7 +10343,7 @@ async function load10(config3) {
log("cached model:", model7.modelUrl); log("cached model:", model7.modelUrl);
return model7; return model7;
} }
async function process3(res, inputSize3, outputShape, config3) { async function process3(res, inputSize4, outputShape, config3) {
let id = 0; let id = 0;
let results = []; let results = [];
for (const strideSize of [1, 2, 4]) { for (const strideSize of [1, 2, 4]) {
@ -10347,14 +10361,14 @@ async function process3(res, inputSize3, outputShape, config3) {
if (score3 > config3.object.minConfidence && j !== 61) { if (score3 > config3.object.minConfidence && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize;
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize;
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize3)); const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize4));
const [x, y] = [ const [x, y] = [
cx - scaleBox / strideSize * boxOffset[0], cx - scaleBox2 / strideSize * boxOffset[0],
cy - scaleBox / strideSize * boxOffset[1] cy - scaleBox2 / strideSize * boxOffset[1]
]; ];
const [w, h] = [ const [w, h] = [
cx + scaleBox / strideSize * boxOffset[2] - x, cx + scaleBox2 / strideSize * boxOffset[2] - x,
cy + scaleBox / strideSize * boxOffset[3] - y cy + scaleBox2 / strideSize * boxOffset[3] - y
]; ];
let boxRaw3 = [x, y, w, h]; let boxRaw3 = [x, y, w, h];
boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1))); boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1)));
@ -10418,7 +10432,7 @@ async function predict10(image24, config3) {
// src/object/centernet.ts // src/object/centernet.ts
var tf21 = __toModule(require_tfjs_esm()); var tf21 = __toModule(require_tfjs_esm());
var model8; var model8;
var inputSize2 = 0; var inputSize3 = 0;
var last4 = []; var last4 = [];
var skipped7 = Number.MAX_SAFE_INTEGER; var skipped7 = Number.MAX_SAFE_INTEGER;
async function load11(config3) { async function load11(config3) {
@ -10428,7 +10442,7 @@ async function load11(config3) {
fakeOps(["floormod"], config3); fakeOps(["floormod"], config3);
model8 = await tf21.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model8 = await tf21.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
const inputs = Object.values(model8.modelSignature["inputs"]); const inputs = Object.values(model8.modelSignature["inputs"]);
inputSize2 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; inputSize3 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (!model8 || !model8["modelUrl"]) if (!model8 || !model8["modelUrl"])
log("load model failed:", config3.object.modelPath); log("load model failed:", config3.object.modelPath);
else if (config3.debug) else if (config3.debug)
@ -10464,14 +10478,14 @@ async function process4(res, outputShape, config3) {
const classVal = detections[0][id][5]; const classVal = detections[0][id][5];
const label = labels[classVal].label; const label = labels[classVal].label;
const [x, y] = [ const [x, y] = [
detections[0][id][0] / inputSize2, detections[0][id][0] / inputSize3,
detections[0][id][1] / inputSize2 detections[0][id][1] / inputSize3
]; ];
const boxRaw3 = [ const boxRaw3 = [
x, x,
y, y,
detections[0][id][2] / inputSize2 - x, detections[0][id][2] / inputSize3 - x,
detections[0][id][3] / inputSize2 - y detections[0][id][3] / inputSize3 - y
]; ];
const box6 = [ const box6 = [
Math.trunc(boxRaw3[0] * outputShape[0]), Math.trunc(boxRaw3[0] * outputShape[0]),
@ -10493,7 +10507,7 @@ async function predict11(input, config3) {
return last4; return last4;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const outputSize2 = [input.shape[2], input.shape[1]]; const outputSize2 = [input.shape[2], input.shape[1]];
const resize = tf21.image.resizeBilinear(input, [inputSize2, inputSize2]); const resize = tf21.image.resizeBilinear(input, [inputSize3, inputSize3]);
const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null; const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null;
tf21.dispose(resize); tf21.dispose(resize);
const obj = await process4(objectT, outputSize2, config3); const obj = await process4(objectT, outputSize2, config3);
@ -12721,7 +12735,7 @@ var Human = class {
faceRes = await faceRes; faceRes = await faceRes;
this.analyze("Start Body:"); this.analyze("Start Body:");
this.state = "detect:body"; this.state = "detect:body";
const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: 1 * faceRes.length } }) : this.config; const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: this.config.face.enabled ? 1 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet")) if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet"))
bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : []; bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : [];
@ -12750,7 +12764,7 @@ var Human = class {
this.analyze("End Body:"); this.analyze("End Body:");
this.analyze("Start Hand:"); this.analyze("Start Hand:");
this.state = "detect:hand"; this.state = "detect:hand";
const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: 2 * faceRes.length } }) : this.config; const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: this.config.face.enabled ? 2 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect")) if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect"))
handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : []; handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : [];

View File

@ -143,6 +143,31 @@ function mergeDeep(...objects) {
return prev; return prev;
}, {}); }, {});
} }
function scaleBox(keypoints3, boxScaleFact2, outputSize2) {
const coords3 = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const maxmin = [Math.max(...coords3[0]), Math.min(...coords3[0]), Math.max(...coords3[1]), Math.min(...coords3[1])];
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2];
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact2;
const box6 = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
const boxRaw3 = [
box6[0] / outputSize2[0],
box6[1] / outputSize2[1],
box6[2] / outputSize2[0],
box6[3] / outputSize2[1]
];
const yxBox = [
boxRaw3[1],
boxRaw3[0],
boxRaw3[3] + boxRaw3[1],
boxRaw3[2] + boxRaw3[0]
];
return { box: box6, boxRaw: boxRaw3, yxBox };
}
// src/config.ts // src/config.ts
var config = { var config = {
@ -379,13 +404,13 @@ function rotatePoint(homogeneousCoordinate, rotationMatrix) {
dot(homogeneousCoordinate, rotationMatrix[1]) dot(homogeneousCoordinate, rotationMatrix[1])
]; ];
} }
function generateAnchors(inputSize3) { function generateAnchors(inputSize4) {
const spec = { strides: [inputSize3 / 16, inputSize3 / 8], anchors: [2, 6] }; const spec = { strides: [inputSize4 / 16, inputSize4 / 8], anchors: [2, 6] };
const anchors3 = []; const anchors3 = [];
for (let i = 0; i < spec.strides.length; i++) { for (let i = 0; i < spec.strides.length; i++) {
const stride = spec.strides[i]; const stride = spec.strides[i];
const gridRows = Math.floor((inputSize3 + stride - 1) / stride); const gridRows = Math.floor((inputSize4 + stride - 1) / stride);
const gridCols = Math.floor((inputSize3 + stride - 1) / stride); const gridCols = Math.floor((inputSize4 + stride - 1) / stride);
const anchorsNum = spec.anchors[i]; const anchorsNum = spec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) { for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5); const anchorY = stride * (gridY + 0.5);
@ -402,17 +427,17 @@ function generateAnchors(inputSize3) {
// src/blazeface/blazeface.ts // src/blazeface/blazeface.ts
var keypointsCount = 6; var keypointsCount = 6;
function decodeBounds(boxOutputs, anchors3, inputSize3) { function decodeBounds(boxOutputs, anchors3, inputSize4) {
const boxStarts = tf2.slice(boxOutputs, [0, 1], [-1, 2]); const boxStarts = tf2.slice(boxOutputs, [0, 1], [-1, 2]);
const centers = tf2.add(boxStarts, anchors3); const centers = tf2.add(boxStarts, anchors3);
const boxSizes = tf2.slice(boxOutputs, [0, 3], [-1, 2]); const boxSizes = tf2.slice(boxOutputs, [0, 3], [-1, 2]);
const boxSizesNormalized = tf2.div(boxSizes, inputSize3); const boxSizesNormalized = tf2.div(boxSizes, inputSize4);
const centersNormalized = tf2.div(centers, inputSize3); const centersNormalized = tf2.div(centers, inputSize4);
const halfBoxSize = tf2.div(boxSizesNormalized, 2); const halfBoxSize = tf2.div(boxSizesNormalized, 2);
const starts = tf2.sub(centersNormalized, halfBoxSize); const starts = tf2.sub(centersNormalized, halfBoxSize);
const ends = tf2.add(centersNormalized, halfBoxSize); const ends = tf2.add(centersNormalized, halfBoxSize);
const startNormalized = tf2.mul(starts, inputSize3); const startNormalized = tf2.mul(starts, inputSize4);
const endNormalized = tf2.mul(ends, inputSize3); const endNormalized = tf2.mul(ends, inputSize4);
const concatAxis = 1; const concatAxis = 1;
return tf2.concat2d([startNormalized, endNormalized], concatAxis); return tf2.concat2d([startNormalized, endNormalized], concatAxis);
} }
@ -9643,17 +9668,9 @@ var boxScaleFact = 1.5;
var models2 = [null, null]; var models2 = [null, null];
var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"]; var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"];
var inputSize = [[0, 0], [0, 0]]; var inputSize = [[0, 0], [0, 0]];
var classes = [ var classes = ["hand", "fist", "pinch", "point", "face", "tip", "pinchtip"];
"hand",
"fist",
"pinch",
"point",
"face",
"tip",
"pinchtip"
];
var skipped3 = 0; var skipped3 = 0;
var outputSize; var outputSize = [0, 0];
var cache = { var cache = {
handBoxes: [], handBoxes: [],
fingerBoxes: [], fingerBoxes: [],
@ -9751,30 +9768,6 @@ async function detectHands(input, config3) {
hands.length = config3.hand.maxDetected || 1; hands.length = config3.hand.maxDetected || 1;
return hands; return hands;
} }
function updateBoxes(h, keypoints3) {
const finger = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const minmax = [Math.min(...finger[0]), Math.max(...finger[0]), Math.min(...finger[1]), Math.max(...finger[1])];
const center = [(minmax[0] + minmax[1]) / 2, (minmax[2] + minmax[3]) / 2];
const diff = Math.max(center[0] - minmax[0], center[1] - minmax[2], -center[0] + minmax[1], -center[1] + minmax[3]) * boxScaleFact;
h.box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
h.boxRaw = [
h.box[0] / outputSize[0],
h.box[1] / outputSize[1],
h.box[2] / outputSize[0],
h.box[3] / outputSize[1]
];
h.yxBox = [
h.boxRaw[1],
h.boxRaw[0],
h.boxRaw[3] + h.boxRaw[1],
h.boxRaw[2] + h.boxRaw[0]
];
}
async function detectFingers(input, h, config3) { async function detectFingers(input, h, config3) {
const hand3 = { const hand3 = {
id: h.id, id: h.id,
@ -9808,7 +9801,10 @@ async function detectFingers(input, h, config3) {
h.box[3] * coord[1] / inputSize[1][1] + h.box[1], h.box[3] * coord[1] / inputSize[1][1] + h.box[1],
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2] (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2]
]); ]);
updateBoxes(h, hand3.keypoints); const updatedBox = scaleBox(hand3.keypoints, boxScaleFact, outputSize);
h.box = updatedBox.box;
h.boxRaw = updatedBox.boxRaw;
h.yxBox = updatedBox.yxBox;
hand3.box = h.box; hand3.box = h.box;
hand3.landmarks = analyze(hand3.keypoints); hand3.landmarks = analyze(hand3.keypoints);
for (const key of Object.keys(fingerMap)) { for (const key of Object.keys(fingerMap)) {
@ -10093,11 +10089,13 @@ async function predict8(image24, config3) {
// src/movenet/movenet.ts // src/movenet/movenet.ts
var tf19 = __toModule(require_tfjs_esm()); var tf19 = __toModule(require_tfjs_esm());
var model6; var model6;
var keypoints2 = []; var inputSize2 = 0;
var cachedBoxes = [];
var box5 = [0, 0, 0, 0]; var box5 = [0, 0, 0, 0];
var boxRaw2 = [0, 0, 0, 0]; var boxRaw2 = [0, 0, 0, 0];
var score2 = 0; var score2 = 0;
var skipped5 = Number.MAX_SAFE_INTEGER; var skipped5 = Number.MAX_SAFE_INTEGER;
var keypoints2 = [];
var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"]; var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"];
async function load9(config3) { async function load9(config3) {
if (env.initial) if (env.initial)
@ -10111,24 +10109,28 @@ async function load9(config3) {
log("load model:", model6["modelUrl"]); log("load model:", model6["modelUrl"]);
} else if (config3.debug) } else if (config3.debug)
log("cached model:", model6["modelUrl"]); log("cached model:", model6["modelUrl"]);
inputSize2 = model6.inputs[0].shape ? model6.inputs[0].shape[2] : 0;
if (inputSize2 === -1)
inputSize2 = 256;
return model6; return model6;
} }
async function parseSinglePose(res, config3, image24) { async function parseSinglePose(res, config3, image24, inputBox) {
keypoints2.length = 0;
const kpt3 = res[0][0]; const kpt3 = res[0][0];
keypoints2.length = 0;
for (let id = 0; id < kpt3.length; id++) { for (let id = 0; id < kpt3.length; id++) {
score2 = kpt3[id][2]; score2 = kpt3[id][2];
if (score2 > config3.body.minConfidence) { if (score2 > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[id][1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[id][0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
score: Math.round(100 * score2) / 100, score: Math.round(100 * score2) / 100,
part: bodyParts2[id], part: bodyParts2[id],
positionRaw: [ positionRaw,
kpt3[id][1],
kpt3[id][0]
],
position: [ position: [
Math.round((image24.shape[2] || 0) * kpt3[id][1]), Math.round((image24.shape[2] || 0) * positionRaw[0]),
Math.round((image24.shape[1] || 0) * kpt3[id][0]) Math.round((image24.shape[1] || 0) * positionRaw[1])
] ]
}); });
} }
@ -10150,12 +10152,12 @@ async function parseSinglePose(res, config3, image24) {
Math.max(...xRaw) - Math.min(...xRaw), Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw) Math.max(...yRaw) - Math.min(...yRaw)
]; ];
const persons2 = []; const bodies = [];
persons2.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }); bodies.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 });
return persons2; return bodies;
} }
async function parseMultiPose(res, config3, image24) { async function parseMultiPose(res, config3, image24, inputBox) {
const persons2 = []; const bodies = [];
for (let id = 0; id < res[0].length; id++) { for (let id = 0; id < res[0].length; id++) {
const kpt3 = res[0][id]; const kpt3 = res[0][id];
score2 = Math.round(100 * kpt3[51 + 4]) / 100; score2 = Math.round(100 * kpt3[51 + 4]) / 100;
@ -10165,16 +10167,20 @@ async function parseMultiPose(res, config3, image24) {
for (let i = 0; i < 17; i++) { for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100; const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100;
if (partScore > config3.body.minConfidence) { if (partScore > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[3 * i + 1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[3 * i + 0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
part: bodyParts2[i], part: bodyParts2[i],
score: partScore, score: partScore,
positionRaw: [kpt3[3 * i + 1], kpt3[3 * i + 0]], positionRaw,
position: [Math.trunc(kpt3[3 * i + 1] * (image24.shape[2] || 0)), Math.trunc(kpt3[3 * i + 0] * (image24.shape[1] || 0))] position: [Math.trunc(positionRaw[0] * (image24.shape[2] || 0)), Math.trunc(positionRaw[0] * (image24.shape[1] || 0))]
}); });
} }
} }
boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]]; boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]];
persons2.push({ bodies.push({
id, id,
score: score2, score: score2,
boxRaw: boxRaw2, boxRaw: boxRaw2,
@ -10187,39 +10193,47 @@ async function parseMultiPose(res, config3, image24) {
keypoints: [...keypoints2] keypoints: [...keypoints2]
}); });
} }
return persons2; return bodies;
} }
async function predict9(image24, config3) { async function predict9(input, config3) {
if (skipped5 < (config3.body.skipFrames || 0) && config3.skipFrame && Object.keys(keypoints2).length > 0) { if (!model6 || !(model6 == null ? void 0 : model6.inputs[0].shape))
skipped5++; return [];
return [{ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }];
}
skipped5 = 0;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const tensor3 = tf19.tidy(() => { const t = {};
if (!(model6 == null ? void 0 : model6.inputs[0].shape)) let bodies = [];
return null; if (!config3.skipFrame)
let inputSize3 = model6.inputs[0].shape[2]; cachedBoxes.length = 0;
if (inputSize3 === -1) skipped5++;
inputSize3 = 256; for (let i = 0; i < cachedBoxes.length; i++) {
const resize = tf19.image.resizeBilinear(image24, [inputSize3, inputSize3], false); t.crop = tf19.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize2, inputSize2], "bilinear");
const cast5 = tf19.cast(resize, "int32"); t.cast = tf19.cast(t.crop, "int32");
return cast5; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
}); const res = await t.res.array();
let resT; const newBodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, cachedBoxes[i]) : await parseMultiPose(res, config3, input, cachedBoxes[i]);
if (config3.body.enabled) bodies = bodies.concat(newBodies);
resT = await (model6 == null ? void 0 : model6.predict(tensor3)); Object.keys(t).forEach((tensor3) => tf19.dispose(t[tensor3]));
tf19.dispose(tensor3); }
if (!resT) if (bodies.length !== config3.body.maxDetected && skipped5 > (config3.body.skipFrames || 0)) {
resolve([]); t.resized = tf19.image.resizeBilinear(input, [inputSize2, inputSize2], false);
const res = await resT.array(); t.cast = tf19.cast(t.resized, "int32");
let body4; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
if (resT.shape[2] === 17) const res = await t.res.array();
body4 = await parseSinglePose(res, config3, image24); bodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, [0, 0, 1, 1]) : await parseMultiPose(res, config3, input, [0, 0, 1, 1]);
else if (resT.shape[2] === 56) Object.keys(t).forEach((tensor3) => tf19.dispose(t[tensor3]));
body4 = await parseMultiPose(res, config3, image24); cachedBoxes.length = 0;
tf19.dispose(resT); skipped5 = 0;
resolve(body4); }
if (config3.skipFrame) {
cachedBoxes.length = 0;
for (let i = 0; i < bodies.length; i++) {
if (bodies[i].keypoints.length > 10) {
const kpts = bodies[i].keypoints.map((kpt3) => kpt3.position);
const newBox = scaleBox(kpts, 1.5, [input.shape[2], input.shape[1]]);
cachedBoxes.push([...newBox.yxBox]);
}
}
}
resolve(bodies);
}); });
} }
@ -10314,7 +10328,7 @@ var labels = [
var model7; var model7;
var last3 = []; var last3 = [];
var skipped6 = Number.MAX_SAFE_INTEGER; var skipped6 = Number.MAX_SAFE_INTEGER;
var scaleBox = 2.5; var scaleBox2 = 2.5;
async function load10(config3) { async function load10(config3) {
if (!model7 || env.initial) { if (!model7 || env.initial) {
model7 = await tf20.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model7 = await tf20.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
@ -10330,7 +10344,7 @@ async function load10(config3) {
log("cached model:", model7.modelUrl); log("cached model:", model7.modelUrl);
return model7; return model7;
} }
async function process3(res, inputSize3, outputShape, config3) { async function process3(res, inputSize4, outputShape, config3) {
let id = 0; let id = 0;
let results = []; let results = [];
for (const strideSize of [1, 2, 4]) { for (const strideSize of [1, 2, 4]) {
@ -10348,14 +10362,14 @@ async function process3(res, inputSize3, outputShape, config3) {
if (score3 > config3.object.minConfidence && j !== 61) { if (score3 > config3.object.minConfidence && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize;
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize;
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize3)); const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize4));
const [x, y] = [ const [x, y] = [
cx - scaleBox / strideSize * boxOffset[0], cx - scaleBox2 / strideSize * boxOffset[0],
cy - scaleBox / strideSize * boxOffset[1] cy - scaleBox2 / strideSize * boxOffset[1]
]; ];
const [w, h] = [ const [w, h] = [
cx + scaleBox / strideSize * boxOffset[2] - x, cx + scaleBox2 / strideSize * boxOffset[2] - x,
cy + scaleBox / strideSize * boxOffset[3] - y cy + scaleBox2 / strideSize * boxOffset[3] - y
]; ];
let boxRaw3 = [x, y, w, h]; let boxRaw3 = [x, y, w, h];
boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1))); boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1)));
@ -10419,7 +10433,7 @@ async function predict10(image24, config3) {
// src/object/centernet.ts // src/object/centernet.ts
var tf21 = __toModule(require_tfjs_esm()); var tf21 = __toModule(require_tfjs_esm());
var model8; var model8;
var inputSize2 = 0; var inputSize3 = 0;
var last4 = []; var last4 = [];
var skipped7 = Number.MAX_SAFE_INTEGER; var skipped7 = Number.MAX_SAFE_INTEGER;
async function load11(config3) { async function load11(config3) {
@ -10429,7 +10443,7 @@ async function load11(config3) {
fakeOps(["floormod"], config3); fakeOps(["floormod"], config3);
model8 = await tf21.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model8 = await tf21.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
const inputs = Object.values(model8.modelSignature["inputs"]); const inputs = Object.values(model8.modelSignature["inputs"]);
inputSize2 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; inputSize3 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (!model8 || !model8["modelUrl"]) if (!model8 || !model8["modelUrl"])
log("load model failed:", config3.object.modelPath); log("load model failed:", config3.object.modelPath);
else if (config3.debug) else if (config3.debug)
@ -10465,14 +10479,14 @@ async function process4(res, outputShape, config3) {
const classVal = detections[0][id][5]; const classVal = detections[0][id][5];
const label = labels[classVal].label; const label = labels[classVal].label;
const [x, y] = [ const [x, y] = [
detections[0][id][0] / inputSize2, detections[0][id][0] / inputSize3,
detections[0][id][1] / inputSize2 detections[0][id][1] / inputSize3
]; ];
const boxRaw3 = [ const boxRaw3 = [
x, x,
y, y,
detections[0][id][2] / inputSize2 - x, detections[0][id][2] / inputSize3 - x,
detections[0][id][3] / inputSize2 - y detections[0][id][3] / inputSize3 - y
]; ];
const box6 = [ const box6 = [
Math.trunc(boxRaw3[0] * outputShape[0]), Math.trunc(boxRaw3[0] * outputShape[0]),
@ -10494,7 +10508,7 @@ async function predict11(input, config3) {
return last4; return last4;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const outputSize2 = [input.shape[2], input.shape[1]]; const outputSize2 = [input.shape[2], input.shape[1]];
const resize = tf21.image.resizeBilinear(input, [inputSize2, inputSize2]); const resize = tf21.image.resizeBilinear(input, [inputSize3, inputSize3]);
const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null; const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null;
tf21.dispose(resize); tf21.dispose(resize);
const obj = await process4(objectT, outputSize2, config3); const obj = await process4(objectT, outputSize2, config3);
@ -12722,7 +12736,7 @@ var Human = class {
faceRes = await faceRes; faceRes = await faceRes;
this.analyze("Start Body:"); this.analyze("Start Body:");
this.state = "detect:body"; this.state = "detect:body";
const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: 1 * faceRes.length } }) : this.config; const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: this.config.face.enabled ? 1 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet")) if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet"))
bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : []; bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : [];
@ -12751,7 +12765,7 @@ var Human = class {
this.analyze("End Body:"); this.analyze("End Body:");
this.analyze("Start Hand:"); this.analyze("Start Hand:");
this.state = "detect:hand"; this.state = "detect:hand";
const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: 2 * faceRes.length } }) : this.config; const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: this.config.face.enabled ? 2 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect")) if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect"))
handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : []; handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : [];

230
dist/human.node.js vendored
View File

@ -142,6 +142,31 @@ function mergeDeep(...objects) {
return prev; return prev;
}, {}); }, {});
} }
function scaleBox(keypoints3, boxScaleFact2, outputSize2) {
const coords3 = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const maxmin = [Math.max(...coords3[0]), Math.min(...coords3[0]), Math.max(...coords3[1]), Math.min(...coords3[1])];
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2];
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact2;
const box6 = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
const boxRaw3 = [
box6[0] / outputSize2[0],
box6[1] / outputSize2[1],
box6[2] / outputSize2[0],
box6[3] / outputSize2[1]
];
const yxBox = [
boxRaw3[1],
boxRaw3[0],
boxRaw3[3] + boxRaw3[1],
boxRaw3[2] + boxRaw3[0]
];
return { box: box6, boxRaw: boxRaw3, yxBox };
}
// src/config.ts // src/config.ts
var config = { var config = {
@ -378,13 +403,13 @@ function rotatePoint(homogeneousCoordinate, rotationMatrix) {
dot(homogeneousCoordinate, rotationMatrix[1]) dot(homogeneousCoordinate, rotationMatrix[1])
]; ];
} }
function generateAnchors(inputSize3) { function generateAnchors(inputSize4) {
const spec = { strides: [inputSize3 / 16, inputSize3 / 8], anchors: [2, 6] }; const spec = { strides: [inputSize4 / 16, inputSize4 / 8], anchors: [2, 6] };
const anchors3 = []; const anchors3 = [];
for (let i = 0; i < spec.strides.length; i++) { for (let i = 0; i < spec.strides.length; i++) {
const stride = spec.strides[i]; const stride = spec.strides[i];
const gridRows = Math.floor((inputSize3 + stride - 1) / stride); const gridRows = Math.floor((inputSize4 + stride - 1) / stride);
const gridCols = Math.floor((inputSize3 + stride - 1) / stride); const gridCols = Math.floor((inputSize4 + stride - 1) / stride);
const anchorsNum = spec.anchors[i]; const anchorsNum = spec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) { for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5); const anchorY = stride * (gridY + 0.5);
@ -401,17 +426,17 @@ function generateAnchors(inputSize3) {
// src/blazeface/blazeface.ts // src/blazeface/blazeface.ts
var keypointsCount = 6; var keypointsCount = 6;
function decodeBounds(boxOutputs, anchors3, inputSize3) { function decodeBounds(boxOutputs, anchors3, inputSize4) {
const boxStarts = tf2.slice(boxOutputs, [0, 1], [-1, 2]); const boxStarts = tf2.slice(boxOutputs, [0, 1], [-1, 2]);
const centers = tf2.add(boxStarts, anchors3); const centers = tf2.add(boxStarts, anchors3);
const boxSizes = tf2.slice(boxOutputs, [0, 3], [-1, 2]); const boxSizes = tf2.slice(boxOutputs, [0, 3], [-1, 2]);
const boxSizesNormalized = tf2.div(boxSizes, inputSize3); const boxSizesNormalized = tf2.div(boxSizes, inputSize4);
const centersNormalized = tf2.div(centers, inputSize3); const centersNormalized = tf2.div(centers, inputSize4);
const halfBoxSize = tf2.div(boxSizesNormalized, 2); const halfBoxSize = tf2.div(boxSizesNormalized, 2);
const starts = tf2.sub(centersNormalized, halfBoxSize); const starts = tf2.sub(centersNormalized, halfBoxSize);
const ends = tf2.add(centersNormalized, halfBoxSize); const ends = tf2.add(centersNormalized, halfBoxSize);
const startNormalized = tf2.mul(starts, inputSize3); const startNormalized = tf2.mul(starts, inputSize4);
const endNormalized = tf2.mul(ends, inputSize3); const endNormalized = tf2.mul(ends, inputSize4);
const concatAxis = 1; const concatAxis = 1;
return tf2.concat2d([startNormalized, endNormalized], concatAxis); return tf2.concat2d([startNormalized, endNormalized], concatAxis);
} }
@ -9642,17 +9667,9 @@ var boxScaleFact = 1.5;
var models2 = [null, null]; var models2 = [null, null];
var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"]; var modelOutputNodes = ["StatefulPartitionedCall/Postprocessor/Slice", "StatefulPartitionedCall/Postprocessor/ExpandDims_1"];
var inputSize = [[0, 0], [0, 0]]; var inputSize = [[0, 0], [0, 0]];
var classes = [ var classes = ["hand", "fist", "pinch", "point", "face", "tip", "pinchtip"];
"hand",
"fist",
"pinch",
"point",
"face",
"tip",
"pinchtip"
];
var skipped3 = 0; var skipped3 = 0;
var outputSize; var outputSize = [0, 0];
var cache = { var cache = {
handBoxes: [], handBoxes: [],
fingerBoxes: [], fingerBoxes: [],
@ -9750,30 +9767,6 @@ async function detectHands(input, config3) {
hands.length = config3.hand.maxDetected || 1; hands.length = config3.hand.maxDetected || 1;
return hands; return hands;
} }
function updateBoxes(h, keypoints3) {
const finger = [keypoints3.map((pt) => pt[0]), keypoints3.map((pt) => pt[1])];
const minmax = [Math.min(...finger[0]), Math.max(...finger[0]), Math.min(...finger[1]), Math.max(...finger[1])];
const center = [(minmax[0] + minmax[1]) / 2, (minmax[2] + minmax[3]) / 2];
const diff = Math.max(center[0] - minmax[0], center[1] - minmax[2], -center[0] + minmax[1], -center[1] + minmax[3]) * boxScaleFact;
h.box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff)
];
h.boxRaw = [
h.box[0] / outputSize[0],
h.box[1] / outputSize[1],
h.box[2] / outputSize[0],
h.box[3] / outputSize[1]
];
h.yxBox = [
h.boxRaw[1],
h.boxRaw[0],
h.boxRaw[3] + h.boxRaw[1],
h.boxRaw[2] + h.boxRaw[0]
];
}
async function detectFingers(input, h, config3) { async function detectFingers(input, h, config3) {
const hand3 = { const hand3 = {
id: h.id, id: h.id,
@ -9807,7 +9800,10 @@ async function detectFingers(input, h, config3) {
h.box[3] * coord[1] / inputSize[1][1] + h.box[1], h.box[3] * coord[1] / inputSize[1][1] + h.box[1],
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2] (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2]
]); ]);
updateBoxes(h, hand3.keypoints); const updatedBox = scaleBox(hand3.keypoints, boxScaleFact, outputSize);
h.box = updatedBox.box;
h.boxRaw = updatedBox.boxRaw;
h.yxBox = updatedBox.yxBox;
hand3.box = h.box; hand3.box = h.box;
hand3.landmarks = analyze(hand3.keypoints); hand3.landmarks = analyze(hand3.keypoints);
for (const key of Object.keys(fingerMap)) { for (const key of Object.keys(fingerMap)) {
@ -10092,11 +10088,13 @@ async function predict8(image24, config3) {
// src/movenet/movenet.ts // src/movenet/movenet.ts
var tf19 = __toModule(require_tfjs_esm()); var tf19 = __toModule(require_tfjs_esm());
var model6; var model6;
var keypoints2 = []; var inputSize2 = 0;
var cachedBoxes = [];
var box5 = [0, 0, 0, 0]; var box5 = [0, 0, 0, 0];
var boxRaw2 = [0, 0, 0, 0]; var boxRaw2 = [0, 0, 0, 0];
var score2 = 0; var score2 = 0;
var skipped5 = Number.MAX_SAFE_INTEGER; var skipped5 = Number.MAX_SAFE_INTEGER;
var keypoints2 = [];
var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"]; var bodyParts2 = ["nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle"];
async function load9(config3) { async function load9(config3) {
if (env.initial) if (env.initial)
@ -10110,24 +10108,28 @@ async function load9(config3) {
log("load model:", model6["modelUrl"]); log("load model:", model6["modelUrl"]);
} else if (config3.debug) } else if (config3.debug)
log("cached model:", model6["modelUrl"]); log("cached model:", model6["modelUrl"]);
inputSize2 = model6.inputs[0].shape ? model6.inputs[0].shape[2] : 0;
if (inputSize2 === -1)
inputSize2 = 256;
return model6; return model6;
} }
async function parseSinglePose(res, config3, image24) { async function parseSinglePose(res, config3, image24, inputBox) {
keypoints2.length = 0;
const kpt3 = res[0][0]; const kpt3 = res[0][0];
keypoints2.length = 0;
for (let id = 0; id < kpt3.length; id++) { for (let id = 0; id < kpt3.length; id++) {
score2 = kpt3[id][2]; score2 = kpt3[id][2];
if (score2 > config3.body.minConfidence) { if (score2 > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[id][1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[id][0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
score: Math.round(100 * score2) / 100, score: Math.round(100 * score2) / 100,
part: bodyParts2[id], part: bodyParts2[id],
positionRaw: [ positionRaw,
kpt3[id][1],
kpt3[id][0]
],
position: [ position: [
Math.round((image24.shape[2] || 0) * kpt3[id][1]), Math.round((image24.shape[2] || 0) * positionRaw[0]),
Math.round((image24.shape[1] || 0) * kpt3[id][0]) Math.round((image24.shape[1] || 0) * positionRaw[1])
] ]
}); });
} }
@ -10149,12 +10151,12 @@ async function parseSinglePose(res, config3, image24) {
Math.max(...xRaw) - Math.min(...xRaw), Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw) Math.max(...yRaw) - Math.min(...yRaw)
]; ];
const persons2 = []; const bodies = [];
persons2.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }); bodies.push({ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 });
return persons2; return bodies;
} }
async function parseMultiPose(res, config3, image24) { async function parseMultiPose(res, config3, image24, inputBox) {
const persons2 = []; const bodies = [];
for (let id = 0; id < res[0].length; id++) { for (let id = 0; id < res[0].length; id++) {
const kpt3 = res[0][id]; const kpt3 = res[0][id];
score2 = Math.round(100 * kpt3[51 + 4]) / 100; score2 = Math.round(100 * kpt3[51 + 4]) / 100;
@ -10164,16 +10166,20 @@ async function parseMultiPose(res, config3, image24) {
for (let i = 0; i < 17; i++) { for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100; const partScore = Math.round(100 * kpt3[3 * i + 2]) / 100;
if (partScore > config3.body.minConfidence) { if (partScore > config3.body.minConfidence) {
const positionRaw = [
(inputBox[3] - inputBox[1]) * kpt3[3 * i + 1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt3[3 * i + 0] + inputBox[0]
];
keypoints2.push({ keypoints2.push({
part: bodyParts2[i], part: bodyParts2[i],
score: partScore, score: partScore,
positionRaw: [kpt3[3 * i + 1], kpt3[3 * i + 0]], positionRaw,
position: [Math.trunc(kpt3[3 * i + 1] * (image24.shape[2] || 0)), Math.trunc(kpt3[3 * i + 0] * (image24.shape[1] || 0))] position: [Math.trunc(positionRaw[0] * (image24.shape[2] || 0)), Math.trunc(positionRaw[0] * (image24.shape[1] || 0))]
}); });
} }
} }
boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]]; boxRaw2 = [kpt3[51 + 1], kpt3[51 + 0], kpt3[51 + 3] - kpt3[51 + 1], kpt3[51 + 2] - kpt3[51 + 0]];
persons2.push({ bodies.push({
id, id,
score: score2, score: score2,
boxRaw: boxRaw2, boxRaw: boxRaw2,
@ -10186,39 +10192,47 @@ async function parseMultiPose(res, config3, image24) {
keypoints: [...keypoints2] keypoints: [...keypoints2]
}); });
} }
return persons2; return bodies;
} }
async function predict9(image24, config3) { async function predict9(input, config3) {
if (skipped5 < (config3.body.skipFrames || 0) && config3.skipFrame && Object.keys(keypoints2).length > 0) { if (!model6 || !(model6 == null ? void 0 : model6.inputs[0].shape))
skipped5++; return [];
return [{ id: 0, score: score2, box: box5, boxRaw: boxRaw2, keypoints: keypoints2 }];
}
skipped5 = 0;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const tensor3 = tf19.tidy(() => { const t = {};
if (!(model6 == null ? void 0 : model6.inputs[0].shape)) let bodies = [];
return null; if (!config3.skipFrame)
let inputSize3 = model6.inputs[0].shape[2]; cachedBoxes.length = 0;
if (inputSize3 === -1) skipped5++;
inputSize3 = 256; for (let i = 0; i < cachedBoxes.length; i++) {
const resize = tf19.image.resizeBilinear(image24, [inputSize3, inputSize3], false); t.crop = tf19.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize2, inputSize2], "bilinear");
const cast5 = tf19.cast(resize, "int32"); t.cast = tf19.cast(t.crop, "int32");
return cast5; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
}); const res = await t.res.array();
let resT; const newBodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, cachedBoxes[i]) : await parseMultiPose(res, config3, input, cachedBoxes[i]);
if (config3.body.enabled) bodies = bodies.concat(newBodies);
resT = await (model6 == null ? void 0 : model6.predict(tensor3)); Object.keys(t).forEach((tensor3) => tf19.dispose(t[tensor3]));
tf19.dispose(tensor3); }
if (!resT) if (bodies.length !== config3.body.maxDetected && skipped5 > (config3.body.skipFrames || 0)) {
resolve([]); t.resized = tf19.image.resizeBilinear(input, [inputSize2, inputSize2], false);
const res = await resT.array(); t.cast = tf19.cast(t.resized, "int32");
let body4; t.res = await (model6 == null ? void 0 : model6.predict(t.cast));
if (resT.shape[2] === 17) const res = await t.res.array();
body4 = await parseSinglePose(res, config3, image24); bodies = t.res.shape[2] === 17 ? await parseSinglePose(res, config3, input, [0, 0, 1, 1]) : await parseMultiPose(res, config3, input, [0, 0, 1, 1]);
else if (resT.shape[2] === 56) Object.keys(t).forEach((tensor3) => tf19.dispose(t[tensor3]));
body4 = await parseMultiPose(res, config3, image24); cachedBoxes.length = 0;
tf19.dispose(resT); skipped5 = 0;
resolve(body4); }
if (config3.skipFrame) {
cachedBoxes.length = 0;
for (let i = 0; i < bodies.length; i++) {
if (bodies[i].keypoints.length > 10) {
const kpts = bodies[i].keypoints.map((kpt3) => kpt3.position);
const newBox = scaleBox(kpts, 1.5, [input.shape[2], input.shape[1]]);
cachedBoxes.push([...newBox.yxBox]);
}
}
}
resolve(bodies);
}); });
} }
@ -10313,7 +10327,7 @@ var labels = [
var model7; var model7;
var last3 = []; var last3 = [];
var skipped6 = Number.MAX_SAFE_INTEGER; var skipped6 = Number.MAX_SAFE_INTEGER;
var scaleBox = 2.5; var scaleBox2 = 2.5;
async function load10(config3) { async function load10(config3) {
if (!model7 || env.initial) { if (!model7 || env.initial) {
model7 = await tf20.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model7 = await tf20.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
@ -10329,7 +10343,7 @@ async function load10(config3) {
log("cached model:", model7.modelUrl); log("cached model:", model7.modelUrl);
return model7; return model7;
} }
async function process3(res, inputSize3, outputShape, config3) { async function process3(res, inputSize4, outputShape, config3) {
let id = 0; let id = 0;
let results = []; let results = [];
for (const strideSize of [1, 2, 4]) { for (const strideSize of [1, 2, 4]) {
@ -10347,14 +10361,14 @@ async function process3(res, inputSize3, outputShape, config3) {
if (score3 > config3.object.minConfidence && j !== 61) { if (score3 > config3.object.minConfidence && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize;
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize;
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize3)); const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize4));
const [x, y] = [ const [x, y] = [
cx - scaleBox / strideSize * boxOffset[0], cx - scaleBox2 / strideSize * boxOffset[0],
cy - scaleBox / strideSize * boxOffset[1] cy - scaleBox2 / strideSize * boxOffset[1]
]; ];
const [w, h] = [ const [w, h] = [
cx + scaleBox / strideSize * boxOffset[2] - x, cx + scaleBox2 / strideSize * boxOffset[2] - x,
cy + scaleBox / strideSize * boxOffset[3] - y cy + scaleBox2 / strideSize * boxOffset[3] - y
]; ];
let boxRaw3 = [x, y, w, h]; let boxRaw3 = [x, y, w, h];
boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1))); boxRaw3 = boxRaw3.map((a) => Math.max(0, Math.min(a, 1)));
@ -10418,7 +10432,7 @@ async function predict10(image24, config3) {
// src/object/centernet.ts // src/object/centernet.ts
var tf21 = __toModule(require_tfjs_esm()); var tf21 = __toModule(require_tfjs_esm());
var model8; var model8;
var inputSize2 = 0; var inputSize3 = 0;
var last4 = []; var last4 = [];
var skipped7 = Number.MAX_SAFE_INTEGER; var skipped7 = Number.MAX_SAFE_INTEGER;
async function load11(config3) { async function load11(config3) {
@ -10428,7 +10442,7 @@ async function load11(config3) {
fakeOps(["floormod"], config3); fakeOps(["floormod"], config3);
model8 = await tf21.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || "")); model8 = await tf21.loadGraphModel(join(config3.modelBasePath, config3.object.modelPath || ""));
const inputs = Object.values(model8.modelSignature["inputs"]); const inputs = Object.values(model8.modelSignature["inputs"]);
inputSize2 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; inputSize3 = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (!model8 || !model8["modelUrl"]) if (!model8 || !model8["modelUrl"])
log("load model failed:", config3.object.modelPath); log("load model failed:", config3.object.modelPath);
else if (config3.debug) else if (config3.debug)
@ -10464,14 +10478,14 @@ async function process4(res, outputShape, config3) {
const classVal = detections[0][id][5]; const classVal = detections[0][id][5];
const label = labels[classVal].label; const label = labels[classVal].label;
const [x, y] = [ const [x, y] = [
detections[0][id][0] / inputSize2, detections[0][id][0] / inputSize3,
detections[0][id][1] / inputSize2 detections[0][id][1] / inputSize3
]; ];
const boxRaw3 = [ const boxRaw3 = [
x, x,
y, y,
detections[0][id][2] / inputSize2 - x, detections[0][id][2] / inputSize3 - x,
detections[0][id][3] / inputSize2 - y detections[0][id][3] / inputSize3 - y
]; ];
const box6 = [ const box6 = [
Math.trunc(boxRaw3[0] * outputShape[0]), Math.trunc(boxRaw3[0] * outputShape[0]),
@ -10493,7 +10507,7 @@ async function predict11(input, config3) {
return last4; return last4;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const outputSize2 = [input.shape[2], input.shape[1]]; const outputSize2 = [input.shape[2], input.shape[1]];
const resize = tf21.image.resizeBilinear(input, [inputSize2, inputSize2]); const resize = tf21.image.resizeBilinear(input, [inputSize3, inputSize3]);
const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null; const objectT = config3.object.enabled ? model8 == null ? void 0 : model8.execute(resize, ["tower_0/detections"]) : null;
tf21.dispose(resize); tf21.dispose(resize);
const obj = await process4(objectT, outputSize2, config3); const obj = await process4(objectT, outputSize2, config3);
@ -12721,7 +12735,7 @@ var Human = class {
faceRes = await faceRes; faceRes = await faceRes;
this.analyze("Start Body:"); this.analyze("Start Body:");
this.state = "detect:body"; this.state = "detect:body";
const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: 1 * faceRes.length } }) : this.config; const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: this.config.face.enabled ? 1 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet")) if ((_a = this.config.body.modelPath) == null ? void 0 : _a.includes("posenet"))
bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : []; bodyRes = this.config.body.enabled ? predict4(img.tensor, bodyConfig) : [];
@ -12750,7 +12764,7 @@ var Human = class {
this.analyze("End Body:"); this.analyze("End Body:");
this.analyze("Start Hand:"); this.analyze("Start Hand:");
this.state = "detect:hand"; this.state = "detect:hand";
const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: 2 * faceRes.length } }) : this.config; const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: this.config.face.enabled ? 2 * faceRes.length : 1 } }) : this.config;
if (this.config.async) { if (this.config.async) {
if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect")) if ((_j = (_i = this.config.hand.detector) == null ? void 0 : _i.modelPath) == null ? void 0 : _j.includes("handdetect"))
handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : []; handRes = this.config.hand.enabled ? predict5(img.tensor, handConfig) : [];

View File

@ -6,7 +6,7 @@
* - Hand Tracking: [**HandTracking**](https://github.com/victordibia/handtracking) * - Hand Tracking: [**HandTracking**](https://github.com/victordibia/handtracking)
*/ */
import { log, join } from '../util'; import { log, join, scaleBox } from '../util';
import * as tf from '../../dist/tfjs.esm.js'; import * as tf from '../../dist/tfjs.esm.js';
import type { HandResult } from '../result'; import type { HandResult } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types'; import type { GraphModel, Tensor } from '../tfjs/types';
@ -21,18 +21,10 @@ const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'Statef
const inputSize = [[0, 0], [0, 0]]; const inputSize = [[0, 0], [0, 0]];
const classes = [ const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
'hand',
'fist',
'pinch',
'point',
'face',
'tip',
'pinchtip',
];
let skipped = 0; let skipped = 0;
let outputSize; let outputSize: [number, number] = [0, 0];
type HandDetectResult = { type HandDetectResult = {
id: number, id: number,
@ -145,31 +137,6 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
return hands; return hands;
} }
function updateBoxes(h, keypoints) {
const finger = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all fingers coords
const minmax = [Math.min(...finger[0]), Math.max(...finger[0]), Math.min(...finger[1]), Math.max(...finger[1])]; // find min and max coordinates for x and y of all fingers
const center = [(minmax[0] + minmax[1]) / 2, (minmax[2] + minmax[3]) / 2]; // find center x and y coord of all fingers
const diff = Math.max(center[0] - minmax[0], center[1] - minmax[2], -center[0] + minmax[1], -center[1] + minmax[3]) * boxScaleFact; // largest distance from center in any direction
h.box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff),
] as [number, number, number, number];
h.boxRaw = [ // work backwards
h.box[0] / outputSize[0],
h.box[1] / outputSize[1],
h.box[2] / outputSize[0],
h.box[3] / outputSize[1],
] as [number, number, number, number];
h.yxBox = [ // work backwards
h.boxRaw[1],
h.boxRaw[0],
h.boxRaw[3] + h.boxRaw[1],
h.boxRaw[2] + h.boxRaw[0],
] as [number, number, number, number];
}
async function detectFingers(input: Tensor, h: HandDetectResult, config: Config): Promise<HandResult> { async function detectFingers(input: Tensor, h: HandDetectResult, config: Config): Promise<HandResult> {
const hand: HandResult = { const hand: HandResult = {
id: h.id, id: h.id,
@ -201,7 +168,10 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
(h.box[3] * coord[1] / inputSize[1][1]) + h.box[1], (h.box[3] * coord[1] / inputSize[1][1]) + h.box[1],
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2], (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2],
]); ]);
updateBoxes(h, hand.keypoints); // replace detected box with box calculated around keypoints const updatedBox = scaleBox(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints
h.box = updatedBox.box;
h.boxRaw = updatedBox.boxRaw;
h.yxBox = updatedBox.yxBox;
hand.box = h.box; hand.box = h.box;
hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks
for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
@ -222,16 +192,13 @@ export async function predict(input: Tensor, config: Config): Promise<HandResult
if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes
skipped++; skipped++;
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
// console.log('SKIP', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length);
} else { // calculate new boxes and run finger detection } else { // calculate new boxes and run finger detection
skipped = 0; skipped = 0;
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
// console.log('CACHE', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length);
if (hands.length !== config.hand.maxDetected) { // run hand detection only if we dont have enough hands in cache if (hands.length !== config.hand.maxDetected) { // run hand detection only if we dont have enough hands in cache
cache.handBoxes = await detectHands(input, config); cache.handBoxes = await detectHands(input, config);
const newHands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config))); const newHands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config)));
hands = hands.concat(newHands); hands = hands.concat(newHands);
// console.log('DETECT', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length);
} }
} }
cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands

View File

@ -458,7 +458,7 @@ export class Human {
// run body: can be posenet, blazepose, efficientpose, movenet // run body: can be posenet, blazepose, efficientpose, movenet
this.analyze('Start Body:'); this.analyze('Start Body:');
this.state = 'detect:body'; this.state = 'detect:body';
const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: 1 * (faceRes as FaceResult[]).length } }) : this.config; // autodetect number of bodies const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: this.config.face.enabled ? 1 * (faceRes as FaceResult[]).length : 1 } }) : this.config; // autodetect number of bodies
if (this.config.async) { if (this.config.async) {
if (this.config.body.modelPath?.includes('posenet')) bodyRes = this.config.body.enabled ? posenet.predict(img.tensor, bodyConfig) : []; if (this.config.body.modelPath?.includes('posenet')) bodyRes = this.config.body.enabled ? posenet.predict(img.tensor, bodyConfig) : [];
else if (this.config.body.modelPath?.includes('blazepose')) bodyRes = this.config.body.enabled ? blazepose.predict(img.tensor, bodyConfig) : []; else if (this.config.body.modelPath?.includes('blazepose')) bodyRes = this.config.body.enabled ? blazepose.predict(img.tensor, bodyConfig) : [];
@ -479,7 +479,7 @@ export class Human {
// run handpose // run handpose
this.analyze('Start Hand:'); this.analyze('Start Hand:');
this.state = 'detect:hand'; this.state = 'detect:hand';
const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: 2 * (faceRes as FaceResult[]).length } }) : this.config; // autodetect number of hands const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: this.config.face.enabled ? 2 * (faceRes as FaceResult[]).length : 1 } }) : this.config; // autodetect number of hands
if (this.config.async) { if (this.config.async) {
if (this.config.hand.detector?.modelPath?.includes('handdetect')) handRes = this.config.hand.enabled ? handpose.predict(img.tensor, handConfig) : []; if (this.config.hand.detector?.modelPath?.includes('handdetect')) handRes = this.config.hand.enabled ? handpose.predict(img.tensor, handConfig) : [];
else if (this.config.hand.detector?.modelPath?.includes('handtrack')) handRes = this.config.hand.enabled ? handtrack.predict(img.tensor, handConfig) : []; else if (this.config.hand.detector?.modelPath?.includes('handtrack')) handRes = this.config.hand.enabled ? handtrack.predict(img.tensor, handConfig) : [];

View File

@ -4,7 +4,7 @@
* Based on: [**MoveNet**](https://blog.tensorflow.org/2021/05/next-generation-pose-detection-with-movenet-and-tensorflowjs.html) * Based on: [**MoveNet**](https://blog.tensorflow.org/2021/05/next-generation-pose-detection-with-movenet-and-tensorflowjs.html)
*/ */
import { log, join } from '../util'; import { log, join, scaleBox } from '../util';
import * as tf from '../../dist/tfjs.esm.js'; import * as tf from '../../dist/tfjs.esm.js';
import type { BodyResult } from '../result'; import type { BodyResult } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types'; import type { GraphModel, Tensor } from '../tfjs/types';
@ -13,15 +13,17 @@ import { fakeOps } from '../tfjs/backend';
import { env } from '../env'; import { env } from '../env';
let model: GraphModel | null; let model: GraphModel | null;
let inputSize = 0;
const cachedBoxes: Array<[number, number, number, number]> = [];
type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] }; type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] };
const keypoints: Array<Keypoints> = []; type Body = { id: number, score: number, box: [number, number, number, number], boxRaw: [number, number, number, number], keypoints: Array<Keypoints> }
type Person = { id: number, score: number, box: [number, number, number, number], boxRaw: [number, number, number, number], keypoints: Array<Keypoints> }
let box: [number, number, number, number] = [0, 0, 0, 0]; let box: [number, number, number, number] = [0, 0, 0, 0];
let boxRaw: [number, number, number, number] = [0, 0, 0, 0]; let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
let score = 0; let score = 0;
let skipped = Number.MAX_SAFE_INTEGER; let skipped = Number.MAX_SAFE_INTEGER;
const keypoints: Array<Keypoints> = [];
const bodyParts = ['nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder', 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist', 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle']; const bodyParts = ['nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder', 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist', 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle'];
@ -33,25 +35,28 @@ export async function load(config: Config): Promise<GraphModel> {
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath); if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
else if (config.debug) log('load model:', model['modelUrl']); else if (config.debug) log('load model:', model['modelUrl']);
} else if (config.debug) log('cached model:', model['modelUrl']); } else if (config.debug) log('cached model:', model['modelUrl']);
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
if (inputSize === -1) inputSize = 256;
return model; return model;
} }
async function parseSinglePose(res, config, image) { async function parseSinglePose(res, config, image, inputBox) {
keypoints.length = 0;
const kpt = res[0][0]; const kpt = res[0][0];
keypoints.length = 0;
for (let id = 0; id < kpt.length; id++) { for (let id = 0; id < kpt.length; id++) {
score = kpt[id][2]; score = kpt[id][2];
if (score > config.body.minConfidence) { if (score > config.body.minConfidence) {
const positionRaw: [number, number] = [
(inputBox[3] - inputBox[1]) * kpt[id][1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt[id][0] + inputBox[0],
];
keypoints.push({ keypoints.push({
score: Math.round(100 * score) / 100, score: Math.round(100 * score) / 100,
part: bodyParts[id], part: bodyParts[id],
positionRaw: [ // normalized to 0..1 positionRaw,
kpt[id][1],
kpt[id][0],
],
position: [ // normalized to input image size position: [ // normalized to input image size
Math.round((image.shape[2] || 0) * kpt[id][1]), Math.round((image.shape[2] || 0) * positionRaw[0]),
Math.round((image.shape[1] || 0) * kpt[id][0]), Math.round((image.shape[1] || 0) * positionRaw[1]),
], ],
}); });
} }
@ -73,13 +78,13 @@ async function parseSinglePose(res, config, image) {
Math.max(...xRaw) - Math.min(...xRaw), Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw), Math.max(...yRaw) - Math.min(...yRaw),
]; ];
const persons: Array<Person> = []; const bodies: Array<Body> = [];
persons.push({ id: 0, score, box, boxRaw, keypoints }); bodies.push({ id: 0, score, box, boxRaw, keypoints });
return persons; return bodies;
} }
async function parseMultiPose(res, config, image) { async function parseMultiPose(res, config, image, inputBox) {
const persons: Array<Person> = []; const bodies: Array<Body> = [];
for (let id = 0; id < res[0].length; id++) { for (let id = 0; id < res[0].length; id++) {
const kpt = res[0][id]; const kpt = res[0][id];
score = Math.round(100 * kpt[51 + 4]) / 100; score = Math.round(100 * kpt[51 + 4]) / 100;
@ -89,16 +94,20 @@ async function parseMultiPose(res, config, image) {
for (let i = 0; i < 17; i++) { for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt[3 * i + 2]) / 100; const partScore = Math.round(100 * kpt[3 * i + 2]) / 100;
if (partScore > config.body.minConfidence) { if (partScore > config.body.minConfidence) {
const positionRaw: [number, number] = [
(inputBox[3] - inputBox[1]) * kpt[3 * i + 1] + inputBox[1],
(inputBox[2] - inputBox[0]) * kpt[3 * i + 0] + inputBox[0],
];
keypoints.push({ keypoints.push({
part: bodyParts[i], part: bodyParts[i],
score: partScore, score: partScore,
positionRaw: [kpt[3 * i + 1], kpt[3 * i + 0]], positionRaw,
position: [Math.trunc(kpt[3 * i + 1] * (image.shape[2] || 0)), Math.trunc(kpt[3 * i + 0] * (image.shape[1] || 0))], position: [Math.trunc(positionRaw[0] * (image.shape[2] || 0)), Math.trunc(positionRaw[0] * (image.shape[1] || 0))],
}); });
} }
} }
boxRaw = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]]; boxRaw = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
persons.push({ bodies.push({
id, id,
score, score,
boxRaw, boxRaw,
@ -111,36 +120,50 @@ async function parseMultiPose(res, config, image) {
keypoints: [...keypoints], keypoints: [...keypoints],
}); });
} }
return persons; return bodies;
} }
export async function predict(image: Tensor, config: Config): Promise<BodyResult[]> { export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame && Object.keys(keypoints).length > 0) { if (!model || !model?.inputs[0].shape) return [];
skipped++;
return [{ id: 0, score, box, boxRaw, keypoints }];
}
skipped = 0;
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const tensor = tf.tidy(() => { const t: Record<string, Tensor> = {};
if (!model?.inputs[0].shape) return null;
let inputSize = model.inputs[0].shape[2];
if (inputSize === -1) inputSize = 256;
const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
const cast = tf.cast(resize, 'int32');
return cast;
});
let resT; let bodies: Array<Body> = [];
if (config.body.enabled) resT = await model?.predict(tensor);
tf.dispose(tensor);
if (!resT) resolve([]); if (!config.skipFrame) cachedBoxes.length = 0; // allowed to use cache or not
const res = await resT.array(); skipped++;
let body;
if (resT.shape[2] === 17) body = await parseSinglePose(res, config, image);
else if (resT.shape[2] === 56) body = await parseMultiPose(res, config, image);
tf.dispose(resT);
resolve(body); for (let i = 0; i < cachedBoxes.length; i++) { // run detection based on cached boxes
t.crop = tf.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize, inputSize], 'bilinear');
t.cast = tf.cast(t.crop, 'int32');
t.res = await model?.predict(t.cast) as Tensor;
const res = await t.res.array();
const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cachedBoxes[i]) : await parseMultiPose(res, config, input, cachedBoxes[i]);
bodies = bodies.concat(newBodies);
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
}
if ((bodies.length !== config.body.maxDetected) && (skipped > (config.body.skipFrames || 0))) { // run detection on full frame
t.resized = tf.image.resizeBilinear(input, [inputSize, inputSize], false);
t.cast = tf.cast(t.resized, 'int32');
t.res = await model?.predict(t.cast) as Tensor;
const res = await t.res.array();
bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
cachedBoxes.length = 0; // reset cache
skipped = 0;
}
if (config.skipFrame) { // create box cache based on last detections
cachedBoxes.length = 0;
for (let i = 0; i < bodies.length; i++) {
if (bodies[i].keypoints.length > 10) { // only update cache if we detected sufficient number of keypoints
const kpts = bodies[i].keypoints.map((kpt) => kpt.position);
const newBox = scaleBox(kpts, 1.5, [input.shape[2], input.shape[1]]);
cachedBoxes.push([...newBox.yxBox]);
}
}
}
resolve(bodies);
}); });
} }

View File

@ -69,3 +69,30 @@ export async function wait(time) {
const waiting = new Promise((resolve) => setTimeout(() => resolve(true), time)); const waiting = new Promise((resolve) => setTimeout(() => resolve(true), time));
await waiting; await waiting;
} }
// helper function: find box around keypoints, square it and scale it
export function scaleBox(keypoints, boxScaleFact, outputSize) {
const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords
const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction
const box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff),
] as [number, number, number, number];
const boxRaw = [ // work backwards
box[0] / outputSize[0],
box[1] / outputSize[1],
box[2] / outputSize[0],
box[3] / outputSize[1],
] as [number, number, number, number];
const yxBox = [ // work backwards
boxRaw[1],
boxRaw[0],
boxRaw[3] + boxRaw[1],
boxRaw[2] + boxRaw[0],
] as [number, number, number, number];
return { box, boxRaw, yxBox };
}

File diff suppressed because it is too large Load Diff