mirror of https://github.com/vladmandic/human
major precision improvements to movenet and handtrack
parent
6efedef077
commit
3925d6d426
15
CHANGELOG.md
15
CHANGELOG.md
|
@ -9,11 +9,20 @@
|
|||
|
||||
## Changelog
|
||||
|
||||
### **HEAD -> main** 2021/10/10 mandic00@live.com
|
||||
|
||||
|
||||
### **origin/main** 2021/10/08 mandic00@live.com
|
||||
|
||||
- demo default config cleanup
|
||||
- improve gaze and face angle visualizations in draw
|
||||
|
||||
### **release 2.3.1** 2021/10/06 mandic00@live.com
|
||||
|
||||
|
||||
### **2.3.1** 2021/10/06 mandic00@live.com
|
||||
|
||||
|
||||
### **origin/main** 2021/10/06 mandic00@live.com
|
||||
|
||||
- workaround for chrome offscreencanvas bug
|
||||
- fix backend conflict in webworker
|
||||
- add blazepose v2 and add annotations to body results
|
||||
- fix backend order initialization
|
||||
|
|
|
@ -31,15 +31,6 @@ import jsonView from './helpers/jsonview.js';
|
|||
let human;
|
||||
|
||||
let userConfig = {
|
||||
cacheSensitivity: 0,
|
||||
hand: { enabled: true },
|
||||
body: { enabled: false },
|
||||
face: { enabled: false },
|
||||
/*
|
||||
hand: { enabled: false, maxDetected: 1, skipFrames: 0 },
|
||||
body: { enabled: false },
|
||||
face: { enabled: false },
|
||||
*/
|
||||
/*
|
||||
warmup: 'none',
|
||||
backend: 'humangl',
|
||||
|
@ -118,6 +109,7 @@ const ui = {
|
|||
lastFrame: 0, // time of last frame processing
|
||||
viewportSet: false, // internal, has custom viewport been set
|
||||
background: null, // holds instance of segmentation background image
|
||||
transferCanvas: null, // canvas used to transfer data to and from worker
|
||||
|
||||
// webrtc
|
||||
useWebRTC: false, // use webrtc as camera source instead of local webcam
|
||||
|
@ -318,7 +310,7 @@ async function drawResults(input) {
|
|||
const fps = avgDetect > 0 ? `FPS process:${avgDetect} refresh:${avgDraw}` : '';
|
||||
const backend = result.backend || human.tf.getBackend();
|
||||
const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
|
||||
const memory = result.tensors || `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
|
||||
const memory = result.tensors ? `tensors: ${result.tensors.toLocaleString()} in worker` : `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
|
||||
document.getElementById('log').innerHTML = `
|
||||
video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}<br>
|
||||
backend: ${backend} | ${memory}<br>
|
||||
|
@ -469,13 +461,17 @@ function webWorker(input, image, canvas, timestamp) {
|
|||
if (document.getElementById('gl-bench')) document.getElementById('gl-bench').style.display = ui.bench ? 'block' : 'none';
|
||||
lastDetectedResult = msg.data.result;
|
||||
|
||||
if (msg.data.image) {
|
||||
lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas');
|
||||
lastDetectedResult.canvas.width = msg.data.width;
|
||||
lastDetectedResult.canvas.height = msg.data.height;
|
||||
if (msg.data.image) { // we dont really need canvas since we draw from video
|
||||
/*
|
||||
if (!lastDetectedResult.canvas || lastDetectedResult.canvas.width !== msg.data.width || lastDetectedResult.canvas.height !== msg.data.height) {
|
||||
lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas');
|
||||
lastDetectedResult.canvas.width = msg.data.width;
|
||||
lastDetectedResult.canvas.height = msg.data.height;
|
||||
}
|
||||
const ctx = lastDetectedResult.canvas.getContext('2d');
|
||||
const imageData = new ImageData(new Uint8ClampedArray(msg.data.image), msg.data.width, msg.data.height);
|
||||
ctx.putImageData(imageData, 0, 0);
|
||||
*/
|
||||
}
|
||||
|
||||
ui.framesDetect++;
|
||||
|
@ -508,10 +504,12 @@ function runHumanDetect(input, canvas, timestamp) {
|
|||
if (ui.hintsThread) clearInterval(ui.hintsThread);
|
||||
if (ui.useWorker && human.env.offscreen) {
|
||||
// get image data from video as we cannot send html objects to webworker
|
||||
const offscreen = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(canvas.width, canvas.height) : document.createElement('canvas');
|
||||
offscreen.width = canvas.width;
|
||||
offscreen.height = canvas.height;
|
||||
const ctx = offscreen.getContext('2d');
|
||||
if (!ui.transferCanvas || ui.transferCanvas.width !== canvas.width || ui.transferCanvas.height || canvas.height) {
|
||||
ui.transferCanvas = document.createElement('canvas');
|
||||
ui.transferCanvas.width = canvas.width;
|
||||
ui.transferCanvas.height = canvas.height;
|
||||
}
|
||||
const ctx = ui.transferCanvas.getContext('2d');
|
||||
ctx.drawImage(input, 0, 0, canvas.width, canvas.height);
|
||||
const data = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
||||
// perform detection in worker
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@vladmandic/human",
|
||||
"version": "2.3.1",
|
||||
"version": "2.3.2",
|
||||
"description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition",
|
||||
"sideEffects": false,
|
||||
"main": "dist/human.node.js",
|
||||
|
@ -74,7 +74,6 @@
|
|||
"canvas": "^2.8.0",
|
||||
"dayjs": "^1.10.7",
|
||||
"esbuild": "^0.13.4",
|
||||
"eslint": "^7.32.0",
|
||||
"eslint-config-airbnb-base": "^14.2.1",
|
||||
"eslint-plugin-import": "^2.24.2",
|
||||
"eslint-plugin-json": "^3.1.0",
|
||||
|
@ -86,5 +85,8 @@
|
|||
"tslib": "^2.3.1",
|
||||
"typedoc": "0.22.5",
|
||||
"typescript": "4.4.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"eslint": "7.32.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -420,12 +420,12 @@ const config: Config = {
|
|||
rotation: true, // use best-guess rotated hand image or just box with rotation as-is
|
||||
// false means higher performance, but incorrect finger mapping if hand is inverted
|
||||
// only valid for `handdetect` variation
|
||||
skipFrames: 1, // how many max frames to go without re-running the hand bounding box detector
|
||||
skipFrames: 2, // how many max frames to go without re-running the hand bounding box detector
|
||||
// only used when cacheSensitivity is not zero
|
||||
// e.g., if model is running st 25 FPS, we can re-use existing bounding
|
||||
// box for updated hand skeleton analysis as the hand
|
||||
// hasn't moved much in short time (10 * 1/25 = 0.25 sec)
|
||||
minConfidence: 0.55, // threshold for discarding a prediction
|
||||
minConfidence: 0.50, // threshold for discarding a prediction
|
||||
iouThreshold: 0.2, // ammount of overlap between two detected objects before one object is removed
|
||||
maxDetected: -1, // maximum number of hands detected in the input
|
||||
// should be set to the minimum number for performance
|
||||
|
|
|
@ -83,7 +83,7 @@ export async function predict(input: Tensor, config: Config): Promise<FaceResult
|
|||
((box.startPoint[1] + box.endPoint[1])) / 2 + ((box.endPoint[1] + box.startPoint[1]) * pt[1] / blazeface.size()),
|
||||
]);
|
||||
face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
|
||||
for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations
|
||||
for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key] as number]]; // add annotations
|
||||
} else if (!model) { // mesh enabled, but not loaded
|
||||
if (config.debug) log('face mesh detection requested, but model is not loaded');
|
||||
} else { // mesh enabled
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
export const meshAnnotations = {
|
||||
export const meshAnnotations: Record<string, number[]> = {
|
||||
silhouette: [
|
||||
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
|
||||
397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
|
||||
|
@ -42,13 +42,13 @@ export const meshAnnotations = {
|
|||
leftCheek: [425],
|
||||
};
|
||||
|
||||
export const meshLandmarks = {
|
||||
export const meshLandmarks: Record<string, number | number[]> = {
|
||||
count: 468,
|
||||
mouth: 13,
|
||||
symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]],
|
||||
};
|
||||
|
||||
export const blazeFaceLandmarks = {
|
||||
export const blazeFaceLandmarks: Record<string, number | number[]> = {
|
||||
leftEye: 0,
|
||||
rightEye: 1,
|
||||
nose: 2,
|
||||
|
@ -58,7 +58,7 @@ export const blazeFaceLandmarks = {
|
|||
symmetryLine: [3, 2],
|
||||
};
|
||||
|
||||
export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
|
||||
export const MESH_TO_IRIS_INDICES_MAP: Array<{ key: string, indices: number[] }> = [ // A mapping from facemesh model keypoints to iris model keypoints.
|
||||
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
|
||||
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },
|
||||
{ key: 'EyeUpper2', indices: [41, 42, 43, 44, 45, 46, 47] },
|
||||
|
@ -70,7 +70,7 @@ export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypo
|
|||
// { key: 'EyebrowLower', indices: [48, 49, 50, 51, 52, 53] },
|
||||
];
|
||||
|
||||
export const UV468 = [
|
||||
export const UV468: [number, number][] = [
|
||||
[0.499976992607117, 0.652534008026123],
|
||||
[0.500025987625122, 0.547487020492554],
|
||||
[0.499974012374878, 0.602371990680695],
|
||||
|
@ -541,7 +541,7 @@ export const UV468 = [
|
|||
[0.723330020904541, 0.363372981548309],
|
||||
];
|
||||
|
||||
export const TRI468 = [
|
||||
export const TRI468: Array<number> = [
|
||||
127, 34, 139, 11, 0, 37, 232, 231, 120, 72, 37, 39, 128, 121, 47, 232, 121, 128, 104, 69, 67, 175, 171, 148, 157, 154, 155, 118, 50, 101, 73, 39, 40, 9,
|
||||
151, 108, 48, 115, 131, 194, 204, 211, 74, 40, 185, 80, 42, 183, 40, 92, 186, 230, 229, 118, 202, 212, 214, 83, 18, 17, 76, 61, 146, 160, 29, 30, 56,
|
||||
157, 173, 106, 204, 194, 135, 214, 192, 203, 165, 98, 21, 71, 68, 51, 45, 4, 144, 24, 23, 77, 146, 91, 205, 50, 187, 201, 200, 18, 91, 106, 182, 90, 91,
|
||||
|
@ -627,7 +627,7 @@ export const TRI468 = [
|
|||
259, 443, 259, 260, 444, 260, 467, 445, 309, 459, 250, 305, 289, 290, 305, 290, 460, 401, 376, 435, 309, 250, 392, 376, 411, 433, 453, 341, 464, 357,
|
||||
453, 465, 343, 357, 412, 437, 343, 399, 344, 360, 440, 420, 437, 456, 360, 420, 363, 361, 401, 288, 265, 372, 353, 390, 339, 249, 339, 448, 255];
|
||||
|
||||
export const TRI68 = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31, 41, 3, 4, 48, 3, 48, 31, 4, 5, 48, 5, 6, 48, 6, 7, 59, 6, 59, 48, 7, 8, 58, 7, 58, 59,
|
||||
export const TRI68: Array<number> = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31, 41, 3, 4, 48, 3, 48, 31, 4, 5, 48, 5, 6, 48, 6, 7, 59, 6, 59, 48, 7, 8, 58, 7, 58, 59,
|
||||
8, 9, 56, 8, 56, 57, 8, 57, 58, 9, 10, 55, 9, 55, 56, 10, 11, 54, 10, 54, 55, 11, 12, 54, 12, 13, 54, 13, 14, 35, 13, 35, 54, 14, 15, 46, 14, 46, 35, 15, 16,
|
||||
45, 15, 45, 46, 16, 26, 45, 17, 36, 18, 18, 37, 19, 18, 36, 37, 19, 38, 20, 19, 37, 38, 20, 39, 21, 20, 38, 39, 21, 39, 27, 22, 42, 23, 22, 27, 42, 23, 43, 24,
|
||||
23, 42, 43, 24, 44, 25, 24, 43, 44, 25, 45, 26, 25, 44, 45, 27, 39, 28, 27, 28, 42, 28, 39, 29, 28, 29, 42, 29, 31, 30, 29, 30, 35, 29, 40, 31, 29, 35, 47, 29,
|
||||
|
@ -636,7 +636,7 @@ export const TRI68 = [0, 1, 36, 0, 36, 17, 1, 2, 41, 1, 41, 36, 2, 3, 31, 2, 31,
|
|||
48, 59, 60, 49, 61, 50, 49, 60, 61, 50, 62, 51, 50, 61, 62, 51, 62, 52, 52, 63, 53, 52, 62, 63, 53, 64, 54, 53, 63, 64, 54, 64, 55, 55, 65, 56, 55, 64, 65, 56,
|
||||
66, 57, 56, 65, 66, 57, 66, 58, 58, 67, 59, 58, 66, 67, 59, 67, 60, 60, 67, 61, 61, 66, 62, 61, 67, 66, 62, 66, 63, 63, 65, 64, 63, 66, 65, 21, 27, 22];
|
||||
|
||||
export const TRI33 = [
|
||||
export const TRI33: Array<number> = [
|
||||
/* eyes */ 0, 8, 7, 7, 8, 1, 2, 10, 9, 9, 10, 3,
|
||||
/* brows */ 17, 0, 18, 18, 0, 7, 18, 7, 19, 19, 7, 1, 19, 1, 11, 19, 11, 20, 21, 3, 22, 21, 9, 3, 20, 9, 21, 20, 2, 9, 20, 11, 2,
|
||||
/* 4head */ 23, 17, 18, 25, 21, 22, 24, 19, 20, 24, 18, 19, 24, 20, 21, 24, 23, 18, 24, 21, 25,
|
||||
|
@ -647,9 +647,9 @@ export const TRI33 = [
|
|||
/* cont */ 26, 30, 5, 27, 6, 31, 0, 28, 26, 3, 27, 29, 17, 28, 0, 3, 29, 22, 23, 28, 17, 22, 29, 25, 28, 30, 26, 27, 31, 29,
|
||||
];
|
||||
|
||||
export const TRI7 = [0, 4, 1, 2, 4, 3, 4, 5, 6];
|
||||
export const TRI7: Array<number> = [0, 4, 1, 2, 4, 3, 4, 5, 6];
|
||||
|
||||
export const VTX68 = [
|
||||
export const VTX68: Array<number> = [
|
||||
/* cont */ 127, 234, 132, 58, 172, 150, 149, 148, 152, 377, 378, 379, 397, 288, 361, 454, 356,
|
||||
/* brows */ 70, 63, 105, 66, 107, 336, 296, 334, 293, 300,
|
||||
/* nose */ 168, 6, 195, 4, 98, 97, 2, 326, 327,
|
||||
|
@ -658,9 +658,9 @@ export const VTX68 = [
|
|||
/* mouth */ 78, 81, 13, 311, 308, 402, 14, 178,
|
||||
];
|
||||
|
||||
export const VTX33 = [33, 133, 362, 263, 1, 62, 308, 159, 145, 386, 374, 6, 102, 331, 2, 13, 14, 70, 105, 107, 336, 334, 300, 54, 10, 284, 50, 280, 234, 454, 58, 288, 152];
|
||||
export const VTX33: Array<number> = [33, 133, 362, 263, 1, 62, 308, 159, 145, 386, 374, 6, 102, 331, 2, 13, 14, 70, 105, 107, 336, 334, 300, 54, 10, 284, 50, 280, 234, 454, 58, 288, 152];
|
||||
|
||||
export const VTX7 = [33, 133, 362, 263, 1, 78, 308];
|
||||
export const VTX7: Array<number> = [33, 133, 362, 263, 1, 78, 308];
|
||||
|
||||
export const UV68 = VTX68.map((x) => UV468[x]);
|
||||
|
||||
|
|
|
@ -152,8 +152,8 @@ export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputS
|
|||
}
|
||||
|
||||
export function correctFaceRotation(box, input, inputSize) {
|
||||
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
|
||||
const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
|
||||
const symmetryLine = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
|
||||
const angle: number = computeRotation(box.landmarks[symmetryLine[0]], box.landmarks[symmetryLine[1]]);
|
||||
const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
|
||||
const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
|
||||
const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
|
||||
|
|
|
@ -22,10 +22,11 @@ const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'Statef
|
|||
const inputSize = [[0, 0], [0, 0]];
|
||||
|
||||
const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
|
||||
const faceIndex = 4;
|
||||
|
||||
const boxExpandFact = 1.6;
|
||||
const maxDetectorResolution = 512;
|
||||
const detectorExpandFact = 1.2;
|
||||
const detectorExpandFact = 1.4;
|
||||
|
||||
let skipped = 0;
|
||||
let outputSize: [number, number] = [0, 0];
|
||||
|
@ -104,10 +105,11 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
|
|||
[t.rawScores, t.rawBoxes] = await models[0].executeAsync(t.cast, modelOutputNodes) as Tensor[];
|
||||
t.boxes = tf.squeeze(t.rawBoxes, [0, 2]);
|
||||
t.scores = tf.squeeze(t.rawScores, [0]);
|
||||
const classScores = tf.unstack(t.scores, 1); // unstack scores based on classes
|
||||
classScores.splice(4, 1); // remove faces
|
||||
const classScores: Array<Tensor> = tf.unstack(t.scores, 1); // unstack scores based on classes
|
||||
tf.dispose(classScores[faceIndex]);
|
||||
classScores.splice(faceIndex, 1); // remove faces
|
||||
t.filtered = tf.stack(classScores, 1); // restack
|
||||
tf.dispose(...classScores);
|
||||
tf.dispose(classScores);
|
||||
t.max = tf.max(t.filtered, 1); // max overall score
|
||||
t.argmax = tf.argMax(t.filtered, 1); // class index of max overall score
|
||||
let id = 0;
|
||||
|
@ -117,12 +119,13 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
|
|||
const classNum = await t.argmax.data();
|
||||
for (const nmsIndex of Array.from(nms)) { // generates results for each class
|
||||
const boxSlice = tf.slice(t.boxes, nmsIndex, 1);
|
||||
const boxData = await boxSlice.data();
|
||||
const boxYX = await boxSlice.data();
|
||||
tf.dispose(boxSlice);
|
||||
const boxSquareSize = Math.max(boxData[3] - boxData[1], boxData[2] - boxData[0]);
|
||||
const boxRaw: Box = box.scale([boxData[1], boxData[0], boxSquareSize, boxSquareSize], detectorExpandFact); // for raw box we use squared and expanded box
|
||||
// const boxSquareSize = Math.max(boxData[3] - boxData[1], boxData[2] - boxData[0]);
|
||||
const boxData: Box = [boxYX[1], boxYX[0], boxYX[3] - boxYX[1], boxYX[2] - boxYX[0]]; // yx box reshaped to standard box
|
||||
const boxRaw: Box = box.scale(boxData, detectorExpandFact);
|
||||
const boxCrop: Box = box.crop(boxRaw); // crop box is based on raw box
|
||||
const boxFull: Box = [Math.trunc(boxData[1] * outputSize[0]), Math.trunc(boxData[0] * outputSize[1]), Math.trunc((boxData[3] - boxData[1]) * outputSize[0]), Math.trunc((boxData[2] - boxData[0]) * outputSize[1])]; // for box we keep original scaled values
|
||||
const boxFull: Box = [Math.trunc(boxData[0] * outputSize[0]), Math.trunc(boxData[1] * outputSize[1]), Math.trunc(boxData[2] * outputSize[0]), Math.trunc(boxData[3] * outputSize[1])];
|
||||
const score = scores[nmsIndex];
|
||||
const label = classes[classNum[nmsIndex]];
|
||||
const hand: HandDetectResult = { id: id++, score, box: boxFull, boxRaw, boxCrop, label };
|
||||
|
@ -161,10 +164,9 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
|
|||
const coordsData: Point[] = await t.reshaped.array() as Point[];
|
||||
const coordsRaw: Point[] = coordsData.map((kpt) => [kpt[0] / inputSize[1][1], kpt[1] / inputSize[1][0], (kpt[2] || 0)]);
|
||||
const coordsNorm: Point[] = coordsRaw.map((kpt) => [kpt[0] * h.boxRaw[2], kpt[1] * h.boxRaw[3], (kpt[2] || 0)]);
|
||||
console.log(outputSize, h.box);
|
||||
hand.keypoints = (coordsNorm).map((kpt) => [
|
||||
outputSize[0] * kpt[0] + h.box[0],
|
||||
outputSize[1] * kpt[1] + h.box[1],
|
||||
outputSize[0] * (kpt[0] + h.boxRaw[0]),
|
||||
outputSize[1] * (kpt[1] + h.boxRaw[1]),
|
||||
(kpt[2] || 0),
|
||||
]);
|
||||
// hand.box = box.scale(h.box, 1 / detectorExpandFact); // scale box down for visual appeal
|
||||
|
@ -179,13 +181,11 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
|
|||
return hand;
|
||||
}
|
||||
|
||||
let n = 0;
|
||||
export async function predict(input: Tensor, config: Config): Promise<HandResult[]> {
|
||||
n++;
|
||||
/** handtrack caching
|
||||
* 1. if skipFrame returned cached
|
||||
* 2. if any cached results but although not sure if its enough we continute anyhow for 10x skipframes
|
||||
* 3. eventually rerun detector to generated new cached boxes and reset skipped
|
||||
* 2. if any cached results but although not sure if its enough we continute anyhow for 5x skipframes
|
||||
* 3. if not skipframe or eventually rerun detector to generated new cached boxes and reset skipped
|
||||
* 4. generate cached boxes based on detected keypoints
|
||||
*/
|
||||
if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model
|
||||
|
@ -193,34 +193,14 @@ export async function predict(input: Tensor, config: Config): Promise<HandResult
|
|||
|
||||
skipped++; // increment skip frames
|
||||
if (config.skipFrame && (skipped <= (config.hand.skipFrames || 0))) {
|
||||
console.log(n, 'SKIP', { results: cache.hands.length });
|
||||
return cache.hands; // return cached results without running anything
|
||||
}
|
||||
return new Promise(async (resolve) => {
|
||||
console.log(n, 'DETECT', { skipped, hands: cache.hands.length, boxes: cache.boxes.length });
|
||||
// this is logically consistent but insufficiently efficient
|
||||
/*
|
||||
skipped = 0;
|
||||
if (cache.boxes.length >= (config.hand.maxDetected || 0)) {
|
||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache
|
||||
} else {
|
||||
cache.hands = []; // reset hands
|
||||
}
|
||||
|
||||
if (cache.hands.length !== config.hand.maxDetected) { // did not find enough hands based on cached boxes so run detection on full frame
|
||||
cache.boxes = await detectHands(input, config);
|
||||
if (config.skipFrame && skipped < 5 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for bit longer
|
||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
||||
}
|
||||
*/
|
||||
|
||||
if (config.skipFrame && skipped <= 10 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for 10x skipframes
|
||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
||||
console.log(n, 'HANDS', { hands: cache.hands.length });
|
||||
} else {
|
||||
cache.boxes = await detectHands(input, config);
|
||||
console.log(n, 'BOXES', { hands: cache.boxes.length });
|
||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
||||
console.log(n, 'HANDS', { hands: cache.hands.length });
|
||||
skipped = 0;
|
||||
}
|
||||
|
||||
|
@ -236,7 +216,6 @@ export async function predict(input: Tensor, config: Config): Promise<HandResult
|
|||
cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop });
|
||||
}
|
||||
}
|
||||
console.log(n, 'CACHED', { hands: cache.boxes.length });
|
||||
}
|
||||
resolve(cache.hands);
|
||||
});
|
||||
|
|
|
@ -15,6 +15,7 @@ const maxSize = 2048;
|
|||
// internal temp canvases
|
||||
let inCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; // use global variable to avoid recreating canvas on each frame
|
||||
let outCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; // use global variable to avoid recreating canvas on each frame
|
||||
let tmpCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; // use global variable to avoid recreating canvas on each frame
|
||||
// @ts-ignore // imagefx is js module that should be converted to a class
|
||||
let fx: fxImage.GLImageFilter | null; // instance of imagefx
|
||||
|
||||
|
@ -72,9 +73,13 @@ export function process(input: Input, config: Config, getTensor: boolean = true)
|
|||
}
|
||||
if (input instanceof tf.Tensor) {
|
||||
// if input is tensor, use as-is
|
||||
if ((input)['isDisposedInternal']) throw new Error('input tensor is disposed');
|
||||
else if (!input.shape || input.shape.length !== 4 || input.shape[0] !== 1 || input.shape[3] !== 3) throw new Error(`input tensor shape must be [1, height, width, 3] and instead was ${input.shape}`);
|
||||
else return { tensor: tf.clone(input), canvas: (config.filter.return ? outCanvas : null) };
|
||||
if ((input)['isDisposedInternal']) {
|
||||
throw new Error('input tensor is disposed');
|
||||
} else if (!(input as Tensor).shape || (input as Tensor).shape.length !== 4 || (input as Tensor).shape[0] !== 1 || (input as Tensor).shape[3] !== 3) {
|
||||
throw new Error(`input tensor shape must be [1, height, width, 3] and instead was ${input['shape']}`);
|
||||
} else {
|
||||
return { tensor: tf.clone(input), canvas: (config.filter.return ? outCanvas : null) };
|
||||
}
|
||||
} else {
|
||||
// check if resizing will be needed
|
||||
if (typeof input['readyState'] !== 'undefined' && input['readyState'] <= 2) {
|
||||
|
@ -114,10 +119,10 @@ export function process(input: Input, config: Config, getTensor: boolean = true)
|
|||
if (config.filter.flip && typeof inCtx.translate !== 'undefined') {
|
||||
inCtx.translate(originalWidth, 0);
|
||||
inCtx.scale(-1, 1);
|
||||
inCtx.drawImage(input as CanvasImageSource, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
||||
inCtx.drawImage(input as OffscreenCanvas, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
||||
inCtx.setTransform(1, 0, 0, 1, 0, 0); // resets transforms to defaults
|
||||
} else {
|
||||
inCtx.drawImage(input as CanvasImageSource, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
||||
inCtx.drawImage(input as OffscreenCanvas, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -160,23 +165,24 @@ export function process(input: Input, config: Config, getTensor: boolean = true)
|
|||
pixels = tf.browser ? tf.browser.fromPixels(input) : null;
|
||||
} else {
|
||||
depth = input['data'].length / input['height'] / input['width'];
|
||||
// const arr = Uint8Array.from(input['data']);
|
||||
const arr = new Uint8Array(input['data']['buffer']);
|
||||
const arr = Uint8Array.from(input['data']);
|
||||
// const arr = new Uint8Array(input['data']['buffer']);
|
||||
pixels = tf.tensor(arr, [input['height'], input['width'], depth], 'float32');
|
||||
}
|
||||
} else {
|
||||
if (!tmpCanvas || (outCanvas.width !== tmpCanvas.width) || (outCanvas?.height !== tmpCanvas?.height)) tmpCanvas = canvas(outCanvas.width, outCanvas.height); // init output canvas
|
||||
if (tf.browser && env.browser) {
|
||||
if (config.backend === 'webgl' || config.backend === 'humangl' || config.backend === 'webgpu') {
|
||||
pixels = tf.browser.fromPixels(outCanvas); // safe to reuse since both backend and context are gl based
|
||||
} else {
|
||||
const tempCanvas = copy(outCanvas); // cannot use output canvas as it already has gl context so we do a silly one more canvas
|
||||
pixels = tf.browser.fromPixels(tempCanvas);
|
||||
tmpCanvas = copy(outCanvas); // cannot use output canvas as it already has gl context so we do a silly one more canvas
|
||||
pixels = tf.browser.fromPixels(tmpCanvas);
|
||||
}
|
||||
} else {
|
||||
const tempCanvas = copy(outCanvas); // cannot use output canvas as it already has gl context so we do a silly one more canvas
|
||||
const tempCtx = tempCanvas.getContext('2d') as CanvasRenderingContext2D;
|
||||
const tempData = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
|
||||
depth = input['data'].length / targetWidth / targetHeight;
|
||||
depth = tempData.data.length / targetWidth / targetHeight;
|
||||
const arr = new Uint8Array(tempData.data.buffer);
|
||||
pixels = tf.tensor(arr, [targetWidth, targetHeight, depth]);
|
||||
}
|
||||
|
|
|
@ -192,7 +192,7 @@ async function test(Human, inputConfig) {
|
|||
else log('state', 'passed: warmup face result match');
|
||||
config.warmup = 'body';
|
||||
res = await testWarmup(human, 'default');
|
||||
if (!res || res?.face?.length !== 1 || res?.body?.length !== 0 || res?.hand?.length !== 1 || res?.gesture?.length !== 4) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
|
||||
if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 1 || res?.gesture?.length !== 5) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length);
|
||||
else log('state', 'passed: warmup body result match');
|
||||
|
||||
// test default config async
|
||||
|
|
|
@ -3,6 +3,7 @@ const Human = require('../dist/human.node-gpu.js').default;
|
|||
const test = require('./test-main.js').test;
|
||||
|
||||
const config = {
|
||||
cacheSensitivity: 0,
|
||||
modelBasePath: 'file://models/',
|
||||
backend: 'tensorflow',
|
||||
debug: false,
|
||||
|
|
|
@ -10,6 +10,7 @@ Human.env.Canvas = Canvas; // requires monkey-patch as wasm does not have tf.bro
|
|||
Human.env.Image = Image; // requires monkey-patch as wasm does not have tf.browser namespace
|
||||
|
||||
const config = {
|
||||
cacheSensitivity: 0,
|
||||
modelBasePath: 'https://vladmandic.github.io/human/models/',
|
||||
// modelBasePath: 'http://localhost:10030/models/',
|
||||
backend: 'wasm',
|
||||
|
|
|
@ -3,6 +3,7 @@ const Human = require('../dist/human.node.js').default;
|
|||
const test = require('./test-main.js').test;
|
||||
|
||||
const config = {
|
||||
cacheSensitivity: 0,
|
||||
modelBasePath: 'file://models/',
|
||||
backend: 'tensorflow',
|
||||
debug: false,
|
||||
|
|
Loading…
Reference in New Issue