mirror of https://github.com/vladmandic/human
image processing fixes
parent
110f4999a4
commit
924a0b24f0
|
@ -38,6 +38,7 @@
|
||||||
"@typescript-eslint/no-shadow": "error",
|
"@typescript-eslint/no-shadow": "error",
|
||||||
"@typescript-eslint/no-var-requires": "off",
|
"@typescript-eslint/no-var-requires": "off",
|
||||||
"@typescript-eslint/triple-slash-reference": "off",
|
"@typescript-eslint/triple-slash-reference": "off",
|
||||||
|
"@typescript-eslint/no-inferrable-types": "off",
|
||||||
"camelcase": "off",
|
"camelcase": "off",
|
||||||
"dot-notation": "off",
|
"dot-notation": "off",
|
||||||
"func-names": "off",
|
"func-names": "off",
|
||||||
|
|
|
@ -31,6 +31,15 @@ import jsonView from './helpers/jsonview.js';
|
||||||
let human;
|
let human;
|
||||||
|
|
||||||
let userConfig = {
|
let userConfig = {
|
||||||
|
cacheSensitivity: 0,
|
||||||
|
hand: { enabled: true },
|
||||||
|
body: { enabled: false },
|
||||||
|
face: { enabled: false },
|
||||||
|
/*
|
||||||
|
hand: { enabled: false, maxDetected: 1, skipFrames: 0 },
|
||||||
|
body: { enabled: false },
|
||||||
|
face: { enabled: false },
|
||||||
|
*/
|
||||||
/*
|
/*
|
||||||
warmup: 'none',
|
warmup: 'none',
|
||||||
backend: 'humangl',
|
backend: 'humangl',
|
||||||
|
@ -259,7 +268,7 @@ async function drawResults(input) {
|
||||||
}
|
}
|
||||||
// result.canvas = seg.alpha;
|
// result.canvas = seg.alpha;
|
||||||
} else if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas
|
} else if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas
|
||||||
const image = await human.image(input);
|
const image = await human.image(input, false);
|
||||||
result.canvas = image.canvas;
|
result.canvas = image.canvas;
|
||||||
human.tf.dispose(image.tensor);
|
human.tf.dispose(image.tensor);
|
||||||
}
|
}
|
||||||
|
@ -302,17 +311,17 @@ async function drawResults(input) {
|
||||||
|
|
||||||
// update log
|
// update log
|
||||||
const engine = human.tf.engine();
|
const engine = human.tf.engine();
|
||||||
const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
|
|
||||||
const memory = `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
|
|
||||||
const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : '';
|
const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : '';
|
||||||
const avgDetect = ui.detectFPS.length > 0 ? Math.trunc(10 * ui.detectFPS.reduce((a, b) => a + b, 0) / ui.detectFPS.length) / 10 : 0;
|
const avgDetect = ui.detectFPS.length > 0 ? Math.trunc(10 * ui.detectFPS.reduce((a, b) => a + b, 0) / ui.detectFPS.length) / 10 : 0;
|
||||||
const avgDraw = ui.drawFPS.length > 0 ? Math.trunc(10 * ui.drawFPS.reduce((a, b) => a + b, 0) / ui.drawFPS.length) / 10 : 0;
|
const avgDraw = ui.drawFPS.length > 0 ? Math.trunc(10 * ui.drawFPS.reduce((a, b) => a + b, 0) / ui.drawFPS.length) / 10 : 0;
|
||||||
const warning = (ui.detectFPS.length > 5) && (avgDetect < 2) ? '<font color="lightcoral">warning: your performance is low: try switching to higher performance backend, lowering resolution or disabling some models</font>' : '';
|
const warning = (ui.detectFPS.length > 5) && (avgDetect < 2) ? '<font color="lightcoral">warning: your performance is low: try switching to higher performance backend, lowering resolution or disabling some models</font>' : '';
|
||||||
const fps = avgDetect > 0 ? `FPS process:${avgDetect} refresh:${avgDraw}` : '';
|
const fps = avgDetect > 0 ? `FPS process:${avgDetect} refresh:${avgDraw}` : '';
|
||||||
const backend = engine.state.numTensors > 0 ? `${human.tf.getBackend()} | ${memory}` : `${result.backend} | tensors: ${result.tensors} in worker`;
|
const backend = result.backend || human.tf.getBackend();
|
||||||
|
const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
|
||||||
|
const memory = result.tensors || `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
|
||||||
document.getElementById('log').innerHTML = `
|
document.getElementById('log').innerHTML = `
|
||||||
video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}<br>
|
video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}<br>
|
||||||
backend: ${backend}<br>
|
backend: ${backend} | ${memory}<br>
|
||||||
performance: ${str(interpolated.performance)}ms ${fps}<br>
|
performance: ${str(interpolated.performance)}ms ${fps}<br>
|
||||||
${warning}<br>
|
${warning}<br>
|
||||||
`;
|
`;
|
||||||
|
|
|
@ -142,6 +142,11 @@ async function detectParts(input: Tensor, config: Config, outputSize: [number, n
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||||
|
/** blazepose caching
|
||||||
|
* not fully implemented
|
||||||
|
* 1. if skipFrame returned cached
|
||||||
|
* 2. run detection based on squared full frame
|
||||||
|
*/
|
||||||
const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0];
|
const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0];
|
||||||
if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) {
|
if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) {
|
||||||
skipped++;
|
skipped++;
|
||||||
|
|
|
@ -7,17 +7,19 @@
|
||||||
import { log, join } from '../util/util';
|
import { log, join } from '../util/util';
|
||||||
import * as tf from '../../dist/tfjs.esm.js';
|
import * as tf from '../../dist/tfjs.esm.js';
|
||||||
import * as coords from './efficientposecoords';
|
import * as coords from './efficientposecoords';
|
||||||
import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
|
import type { BodyResult, Point } from '../result';
|
||||||
import type { GraphModel, Tensor } from '../tfjs/types';
|
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||||
import type { Config } from '../config';
|
import type { Config } from '../config';
|
||||||
import { env } from '../util/env';
|
import { env } from '../util/env';
|
||||||
|
|
||||||
let model: GraphModel | null;
|
let model: GraphModel | null;
|
||||||
|
|
||||||
const keypoints: Array<BodyKeypoint> = [];
|
const cache: BodyResult = { id: 0, keypoints: [], box: [0, 0, 0, 0], boxRaw: [0, 0, 0, 0], score: 0, annotations: {} };
|
||||||
let box: Box = [0, 0, 0, 0];
|
|
||||||
let boxRaw: Box = [0, 0, 0, 0];
|
// const keypoints: Array<BodyKeypoint> = [];
|
||||||
let score = 0;
|
// let box: Box = [0, 0, 0, 0];
|
||||||
|
// let boxRaw: Box = [0, 0, 0, 0];
|
||||||
|
// let score = 0;
|
||||||
let skipped = Number.MAX_SAFE_INTEGER;
|
let skipped = Number.MAX_SAFE_INTEGER;
|
||||||
|
|
||||||
export async function load(config: Config): Promise<GraphModel> {
|
export async function load(config: Config): Promise<GraphModel> {
|
||||||
|
@ -48,9 +50,14 @@ function max2d(inputs, minScore) {
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function predict(image: Tensor, config: Config): Promise<BodyResult[]> {
|
export async function predict(image: Tensor, config: Config): Promise<BodyResult[]> {
|
||||||
if ((skipped < (config.body?.skipFrames || 0)) && config.skipFrame && Object.keys(keypoints).length > 0) {
|
/** blazepose caching
|
||||||
|
* not fully implemented
|
||||||
|
* 1. if skipFrame returned cached
|
||||||
|
* 2. run detection based on squared full frame
|
||||||
|
*/
|
||||||
|
if ((skipped < (config.body?.skipFrames || 0)) && config.skipFrame && Object.keys(cache.keypoints).length > 0) {
|
||||||
skipped++;
|
skipped++;
|
||||||
return [{ id: 0, score, box, boxRaw, keypoints, annotations: {} }];
|
return [cache];
|
||||||
}
|
}
|
||||||
skipped = 0;
|
skipped = 0;
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
|
@ -67,7 +74,7 @@ export async function predict(image: Tensor, config: Config): Promise<BodyResult
|
||||||
tf.dispose(tensor);
|
tf.dispose(tensor);
|
||||||
|
|
||||||
if (resT) {
|
if (resT) {
|
||||||
keypoints.length = 0;
|
cache.keypoints.length = 0;
|
||||||
const squeeze = resT.squeeze();
|
const squeeze = resT.squeeze();
|
||||||
tf.dispose(resT);
|
tf.dispose(resT);
|
||||||
// body parts are basically just a stack of 2d tensors
|
// body parts are basically just a stack of 2d tensors
|
||||||
|
@ -77,8 +84,8 @@ export async function predict(image: Tensor, config: Config): Promise<BodyResult
|
||||||
for (let id = 0; id < stack.length; id++) {
|
for (let id = 0; id < stack.length; id++) {
|
||||||
// actual processing to get coordinates and score
|
// actual processing to get coordinates and score
|
||||||
const [x, y, partScore] = max2d(stack[id], config.body.minConfidence);
|
const [x, y, partScore] = max2d(stack[id], config.body.minConfidence);
|
||||||
if (score > (config.body?.minConfidence || 0)) {
|
if (partScore > (config.body?.minConfidence || 0)) {
|
||||||
keypoints.push({
|
cache.keypoints.push({
|
||||||
score: Math.round(100 * partScore) / 100,
|
score: Math.round(100 * partScore) / 100,
|
||||||
part: coords.kpt[id],
|
part: coords.kpt[id],
|
||||||
positionRaw: [ // normalized to 0..1
|
positionRaw: [ // normalized to 0..1
|
||||||
|
@ -94,33 +101,32 @@ export async function predict(image: Tensor, config: Config): Promise<BodyResult
|
||||||
}
|
}
|
||||||
stack.forEach((s) => tf.dispose(s));
|
stack.forEach((s) => tf.dispose(s));
|
||||||
}
|
}
|
||||||
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
cache.score = cache.keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||||
const x = keypoints.map((a) => a.position[0]);
|
const x = cache.keypoints.map((a) => a.position[0]);
|
||||||
const y = keypoints.map((a) => a.position[1]);
|
const y = cache.keypoints.map((a) => a.position[1]);
|
||||||
box = [
|
cache.box = [
|
||||||
Math.min(...x),
|
Math.min(...x),
|
||||||
Math.min(...y),
|
Math.min(...y),
|
||||||
Math.max(...x) - Math.min(...x),
|
Math.max(...x) - Math.min(...x),
|
||||||
Math.max(...y) - Math.min(...y),
|
Math.max(...y) - Math.min(...y),
|
||||||
];
|
];
|
||||||
const xRaw = keypoints.map((a) => a.positionRaw[0]);
|
const xRaw = cache.keypoints.map((a) => a.positionRaw[0]);
|
||||||
const yRaw = keypoints.map((a) => a.positionRaw[1]);
|
const yRaw = cache.keypoints.map((a) => a.positionRaw[1]);
|
||||||
boxRaw = [
|
cache.boxRaw = [
|
||||||
Math.min(...xRaw),
|
Math.min(...xRaw),
|
||||||
Math.min(...yRaw),
|
Math.min(...yRaw),
|
||||||
Math.max(...xRaw) - Math.min(...xRaw),
|
Math.max(...xRaw) - Math.min(...xRaw),
|
||||||
Math.max(...yRaw) - Math.min(...yRaw),
|
Math.max(...yRaw) - Math.min(...yRaw),
|
||||||
];
|
];
|
||||||
const annotations: Record<string, Point[][]> = {};
|
|
||||||
for (const [name, indexes] of Object.entries(coords.connected)) {
|
for (const [name, indexes] of Object.entries(coords.connected)) {
|
||||||
const pt: Array<Point[]> = [];
|
const pt: Array<Point[]> = [];
|
||||||
for (let i = 0; i < indexes.length - 1; i++) {
|
for (let i = 0; i < indexes.length - 1; i++) {
|
||||||
const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]);
|
const pt0 = cache.keypoints.find((kpt) => kpt.part === indexes[i]);
|
||||||
const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]);
|
const pt1 = cache.keypoints.find((kpt) => kpt.part === indexes[i + 1]);
|
||||||
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
|
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
|
||||||
}
|
}
|
||||||
annotations[name] = pt;
|
cache.annotations[name] = pt;
|
||||||
}
|
}
|
||||||
resolve([{ id: 0, score, box, boxRaw, keypoints, annotations }]);
|
resolve([cache]);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,20 @@ export async function load(config: Config): Promise<GraphModel> {
|
||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function fixSides() { // model sometimes mixes up left vs right keypoints so we fix them
|
||||||
|
for (const pair of coords.pairs) {
|
||||||
|
let left = keypoints.find((kp) => kp.part === pair[0]);
|
||||||
|
let right = keypoints.find((kp) => kp.part === pair[1]);
|
||||||
|
if (left && right) {
|
||||||
|
if (left.position[0] > right.position[0]) {
|
||||||
|
const tmp = left;
|
||||||
|
left = right;
|
||||||
|
right = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function parseSinglePose(res, config, image, inputBox) {
|
async function parseSinglePose(res, config, image, inputBox) {
|
||||||
const kpt = res[0][0];
|
const kpt = res[0][0];
|
||||||
keypoints.length = 0;
|
keypoints.length = 0;
|
||||||
|
@ -64,6 +78,7 @@ async function parseSinglePose(res, config, image, inputBox) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fixSides();
|
||||||
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||||
const bodies: Array<BodyResult> = [];
|
const bodies: Array<BodyResult> = [];
|
||||||
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
||||||
|
@ -103,6 +118,7 @@ async function parseMultiPose(res, config, image, inputBox) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fixSides();
|
||||||
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
||||||
// movenet-multipose has built-in box details
|
// movenet-multipose has built-in box details
|
||||||
// const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
|
// const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
|
||||||
|
@ -126,6 +142,13 @@ async function parseMultiPose(res, config, image, inputBox) {
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||||
|
/** movenet caching
|
||||||
|
* 1. if skipFrame returned cached
|
||||||
|
* 2. if enough cached boxes run using cached boxes
|
||||||
|
* 3. if not enough detected bodies rerun using full frame
|
||||||
|
* 4. regenerate cached boxes based on current keypoints
|
||||||
|
*/
|
||||||
|
|
||||||
if (!model || !model?.inputs[0].shape) return []; // something is wrong with the model
|
if (!model || !model?.inputs[0].shape) return []; // something is wrong with the model
|
||||||
if (!config.skipFrame) cache.boxes.length = 0; // allowed to use cache or not
|
if (!config.skipFrame) cache.boxes.length = 0; // allowed to use cache or not
|
||||||
skipped++; // increment skip frames
|
skipped++; // increment skip frames
|
||||||
|
@ -153,7 +176,6 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
|
||||||
t.res = await model?.predict(t.cast) as Tensor;
|
t.res = await model?.predict(t.cast) as Tensor;
|
||||||
const res = await t.res.array();
|
const res = await t.res.array();
|
||||||
cache.bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
|
cache.bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
|
||||||
// cache.bodies = cache.bodies.map((body) => ({ ...body, box: box.scale(body.box, 0.5) }));
|
|
||||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||||
}
|
}
|
||||||
cache.boxes.length = 0; // reset cache
|
cache.boxes.length = 0; // reset cache
|
||||||
|
|
|
@ -18,6 +18,17 @@ export const kpt: Array<string> = [
|
||||||
'rightAnkle',
|
'rightAnkle',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
export const pairs: Array<string[]> = [
|
||||||
|
['leftEye', 'rightEye'],
|
||||||
|
['leftEar', 'rightEar'],
|
||||||
|
['leftShoulder', 'rightShoulder'],
|
||||||
|
['leftElbow', 'rightElbow'],
|
||||||
|
['leftWrist', 'rightWrist'],
|
||||||
|
['leftHip', 'rightHip'],
|
||||||
|
['leftKnee', 'rightKnee'],
|
||||||
|
['leftAnkle', 'rightAnkle'],
|
||||||
|
];
|
||||||
|
|
||||||
export const connected: Record<string, string[]> = {
|
export const connected: Record<string, string[]> = {
|
||||||
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
|
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
|
||||||
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
|
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
|
||||||
|
|
|
@ -156,6 +156,9 @@ export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxD
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||||
|
/** posenet is mostly obsolete
|
||||||
|
* caching is not implemented
|
||||||
|
*/
|
||||||
const res = tf.tidy(() => {
|
const res = tf.tidy(() => {
|
||||||
if (!model.inputs[0].shape) return [];
|
if (!model.inputs[0].shape) return [];
|
||||||
const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
|
const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
|
||||||
|
|
|
@ -47,14 +47,14 @@ export const body = (res): GestureResult[] => {
|
||||||
const leftWrist = res[i].keypoints.find((a) => (a.part === 'leftWrist'));
|
const leftWrist = res[i].keypoints.find((a) => (a.part === 'leftWrist'));
|
||||||
const rightWrist = res[i].keypoints.find((a) => (a.part === 'rightWrist'));
|
const rightWrist = res[i].keypoints.find((a) => (a.part === 'rightWrist'));
|
||||||
const nose = res[i].keypoints.find((a) => (a.part === 'nose'));
|
const nose = res[i].keypoints.find((a) => (a.part === 'nose'));
|
||||||
if (nose && leftWrist && rightWrist && (leftWrist.position.y < nose.position.y) && (rightWrist.position.y < nose.position.y)) gestures.push({ body: i, gesture: 'i give up' });
|
if (nose && leftWrist && rightWrist && (leftWrist.position[1] < nose.position[1]) && (rightWrist.position[1] < nose.position[1])) gestures.push({ body: i, gesture: 'i give up' });
|
||||||
else if (nose && leftWrist && (leftWrist.position.y < nose.position.y)) gestures.push({ body: i, gesture: 'raise left hand' });
|
else if (nose && leftWrist && (leftWrist.position[1] < nose.position[1])) gestures.push({ body: i, gesture: 'raise left hand' });
|
||||||
else if (nose && rightWrist && (rightWrist.position.y < nose.position.y)) gestures.push({ body: i, gesture: 'raise right hand' });
|
else if (nose && rightWrist && (rightWrist.position[1] < nose.position[1])) gestures.push({ body: i, gesture: 'raise right hand' });
|
||||||
|
|
||||||
// leaning
|
// leaning
|
||||||
const leftShoulder = res[i].keypoints.find((a) => (a.part === 'leftShoulder'));
|
const leftShoulder = res[i].keypoints.find((a) => (a.part === 'leftShoulder'));
|
||||||
const rightShoulder = res[i].keypoints.find((a) => (a.part === 'rightShoulder'));
|
const rightShoulder = res[i].keypoints.find((a) => (a.part === 'rightShoulder'));
|
||||||
if (leftShoulder && rightShoulder) gestures.push({ body: i, gesture: `leaning ${(leftShoulder.position.y > rightShoulder.position.y) ? 'left' : 'right'}` });
|
if (leftShoulder && rightShoulder) gestures.push({ body: i, gesture: `leaning ${(leftShoulder.position[1] > rightShoulder.position[1]) ? 'left' : 'right'}` });
|
||||||
}
|
}
|
||||||
return gestures;
|
return gestures;
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,7 +23,9 @@ const inputSize = [[0, 0], [0, 0]];
|
||||||
|
|
||||||
const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
|
const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
|
||||||
|
|
||||||
const boxExpandFact = 1.6; // increase to 160%
|
const boxExpandFact = 1.6;
|
||||||
|
const maxDetectorResolution = 512;
|
||||||
|
const detectorExpandFact = 1.2;
|
||||||
|
|
||||||
let skipped = 0;
|
let skipped = 0;
|
||||||
let outputSize: [number, number] = [0, 0];
|
let outputSize: [number, number] = [0, 0];
|
||||||
|
@ -95,7 +97,7 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
|
||||||
if (!input || !models[0]) return hands;
|
if (!input || !models[0]) return hands;
|
||||||
const t: Record<string, Tensor> = {};
|
const t: Record<string, Tensor> = {};
|
||||||
const ratio = (input.shape[2] || 1) / (input.shape[1] || 1);
|
const ratio = (input.shape[2] || 1) / (input.shape[1] || 1);
|
||||||
const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 512
|
const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, maxDetectorResolution); // use dynamic input size but cap at 512
|
||||||
const width = Math.round(height * ratio / 8) * 8;
|
const width = Math.round(height * ratio / 8) * 8;
|
||||||
t.resize = tf.image.resizeBilinear(input, [height, width]); // todo: resize with padding
|
t.resize = tf.image.resizeBilinear(input, [height, width]); // todo: resize with padding
|
||||||
t.cast = tf.cast(t.resize, 'int32');
|
t.cast = tf.cast(t.resize, 'int32');
|
||||||
|
@ -117,12 +119,13 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
|
||||||
const boxSlice = tf.slice(t.boxes, nmsIndex, 1);
|
const boxSlice = tf.slice(t.boxes, nmsIndex, 1);
|
||||||
const boxData = await boxSlice.data();
|
const boxData = await boxSlice.data();
|
||||||
tf.dispose(boxSlice);
|
tf.dispose(boxSlice);
|
||||||
const boxInput: Box = [boxData[1], boxData[0], boxData[3] - boxData[1], boxData[2] - boxData[0]];
|
const boxSquareSize = Math.max(boxData[3] - boxData[1], boxData[2] - boxData[0]);
|
||||||
const boxRaw: Box = box.scale(boxInput, 1.2); // handtrack model returns tight box so we expand it a bit
|
const boxRaw: Box = box.scale([boxData[1], boxData[0], boxSquareSize, boxSquareSize], detectorExpandFact); // for raw box we use squared and expanded box
|
||||||
const boxFull: Box = [Math.trunc(boxRaw[0] * outputSize[0]), Math.trunc(boxRaw[1] * outputSize[1]), Math.trunc(boxRaw[2] * outputSize[0]), Math.trunc(boxRaw[3] * outputSize[1])];
|
const boxCrop: Box = box.crop(boxRaw); // crop box is based on raw box
|
||||||
|
const boxFull: Box = [Math.trunc(boxData[1] * outputSize[0]), Math.trunc(boxData[0] * outputSize[1]), Math.trunc((boxData[3] - boxData[1]) * outputSize[0]), Math.trunc((boxData[2] - boxData[0]) * outputSize[1])]; // for box we keep original scaled values
|
||||||
const score = scores[nmsIndex];
|
const score = scores[nmsIndex];
|
||||||
const label = classes[classNum[nmsIndex]];
|
const label = classes[classNum[nmsIndex]];
|
||||||
const hand: HandDetectResult = { id: id++, score, box: boxFull, boxRaw, boxCrop: box.crop(boxRaw), label };
|
const hand: HandDetectResult = { id: id++, score, box: boxFull, boxRaw, boxCrop, label };
|
||||||
hands.push(hand);
|
hands.push(hand);
|
||||||
}
|
}
|
||||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||||
|
@ -146,7 +149,7 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
|
||||||
};
|
};
|
||||||
if (input && models[1] && config.hand.landmarks && h.score > (config.hand.minConfidence || 0)) {
|
if (input && models[1] && config.hand.landmarks && h.score > (config.hand.minConfidence || 0)) {
|
||||||
const t: Record<string, Tensor> = {};
|
const t: Record<string, Tensor> = {};
|
||||||
t.crop = tf.image.cropAndResize(input, [box.crop(h.boxRaw)], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
|
t.crop = tf.image.cropAndResize(input, [h.boxCrop], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
|
||||||
t.cast = tf.cast(t.crop, 'float32');
|
t.cast = tf.cast(t.crop, 'float32');
|
||||||
t.div = tf.div(t.cast, 255);
|
t.div = tf.div(t.cast, 255);
|
||||||
[t.score, t.keypoints] = models[1].execute(t.div) as Tensor[];
|
[t.score, t.keypoints] = models[1].execute(t.div) as Tensor[];
|
||||||
|
@ -155,12 +158,17 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
|
||||||
if (score >= (config.hand.minConfidence || 0)) {
|
if (score >= (config.hand.minConfidence || 0)) {
|
||||||
hand.fingerScore = score;
|
hand.fingerScore = score;
|
||||||
t.reshaped = tf.reshape(t.keypoints, [-1, 3]);
|
t.reshaped = tf.reshape(t.keypoints, [-1, 3]);
|
||||||
const rawCoords = await t.reshaped.array() as Point[];
|
const coordsData: Point[] = await t.reshaped.array() as Point[];
|
||||||
hand.keypoints = (rawCoords as Point[]).map((kpt) => [
|
const coordsRaw: Point[] = coordsData.map((kpt) => [kpt[0] / inputSize[1][1], kpt[1] / inputSize[1][0], (kpt[2] || 0)]);
|
||||||
outputSize[0] * ((h.boxCrop[3] - h.boxCrop[1]) * kpt[0] / inputSize[1][0] + h.boxCrop[1]),
|
const coordsNorm: Point[] = coordsRaw.map((kpt) => [kpt[0] * h.boxRaw[2], kpt[1] * h.boxRaw[3], (kpt[2] || 0)]);
|
||||||
outputSize[1] * ((h.boxCrop[2] - h.boxCrop[0]) * kpt[1] / inputSize[1][1] + h.boxCrop[0]),
|
console.log(outputSize, h.box);
|
||||||
(h.boxCrop[3] + h.boxCrop[3] / 2 * (kpt[2] || 0)),
|
hand.keypoints = (coordsNorm).map((kpt) => [
|
||||||
|
outputSize[0] * kpt[0] + h.box[0],
|
||||||
|
outputSize[1] * kpt[1] + h.box[1],
|
||||||
|
(kpt[2] || 0),
|
||||||
]);
|
]);
|
||||||
|
// hand.box = box.scale(h.box, 1 / detectorExpandFact); // scale box down for visual appeal
|
||||||
|
// hand.boxRaw = box.scale(h.boxRaw, 1 / detectorExpandFact); // scale box down for visual appeal
|
||||||
hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks
|
hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks
|
||||||
for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
|
for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
|
||||||
hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
|
hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
|
||||||
|
@ -171,15 +179,27 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
|
||||||
return hand;
|
return hand;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let n = 0;
|
||||||
export async function predict(input: Tensor, config: Config): Promise<HandResult[]> {
|
export async function predict(input: Tensor, config: Config): Promise<HandResult[]> {
|
||||||
|
n++;
|
||||||
|
/** handtrack caching
|
||||||
|
* 1. if skipFrame returned cached
|
||||||
|
* 2. if any cached results but although not sure if its enough we continute anyhow for 10x skipframes
|
||||||
|
* 3. eventually rerun detector to generated new cached boxes and reset skipped
|
||||||
|
* 4. generate cached boxes based on detected keypoints
|
||||||
|
*/
|
||||||
if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model
|
if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model
|
||||||
outputSize = [input.shape[2] || 0, input.shape[1] || 0];
|
outputSize = [input.shape[2] || 0, input.shape[1] || 0];
|
||||||
|
|
||||||
skipped++; // increment skip frames
|
skipped++; // increment skip frames
|
||||||
if (config.skipFrame && (skipped <= (config.hand.skipFrames || 0))) {
|
if (config.skipFrame && (skipped <= (config.hand.skipFrames || 0))) {
|
||||||
|
console.log(n, 'SKIP', { results: cache.hands.length });
|
||||||
return cache.hands; // return cached results without running anything
|
return cache.hands; // return cached results without running anything
|
||||||
}
|
}
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
|
console.log(n, 'DETECT', { skipped, hands: cache.hands.length, boxes: cache.boxes.length });
|
||||||
|
// this is logically consistent but insufficiently efficient
|
||||||
|
/*
|
||||||
skipped = 0;
|
skipped = 0;
|
||||||
if (cache.boxes.length >= (config.hand.maxDetected || 0)) {
|
if (cache.boxes.length >= (config.hand.maxDetected || 0)) {
|
||||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache
|
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache
|
||||||
|
@ -191,17 +211,32 @@ export async function predict(input: Tensor, config: Config): Promise<HandResult
|
||||||
cache.boxes = await detectHands(input, config);
|
cache.boxes = await detectHands(input, config);
|
||||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (config.skipFrame && skipped <= 10 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for 10x skipframes
|
||||||
|
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
||||||
|
console.log(n, 'HANDS', { hands: cache.hands.length });
|
||||||
|
} else {
|
||||||
|
cache.boxes = await detectHands(input, config);
|
||||||
|
console.log(n, 'BOXES', { hands: cache.boxes.length });
|
||||||
|
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
||||||
|
console.log(n, 'HANDS', { hands: cache.hands.length });
|
||||||
|
skipped = 0;
|
||||||
|
}
|
||||||
|
|
||||||
const oldCache = [...cache.boxes];
|
const oldCache = [...cache.boxes];
|
||||||
cache.boxes.length = 0; // reset cache
|
cache.boxes.length = 0; // reset cache
|
||||||
for (let i = 0; i < cache.hands.length; i++) {
|
if (config.cacheSensitivity > 0) {
|
||||||
const boxKpt = box.square(cache.hands[i].keypoints, outputSize);
|
for (let i = 0; i < cache.hands.length; i++) {
|
||||||
if (boxKpt.box[2] / (input.shape[2] || 1) > 0.05 && boxKpt.box[3] / (input.shape[1] || 1) > 0.05 && cache.hands[i].fingerScore && cache.hands[i].fingerScore > (config.hand.minConfidence || 0)) {
|
const boxKpt = box.square(cache.hands[i].keypoints, outputSize);
|
||||||
const boxScale = box.scale(boxKpt.box, boxExpandFact);
|
if (boxKpt.box[2] / (input.shape[2] || 1) > 0.05 && boxKpt.box[3] / (input.shape[1] || 1) > 0.05 && cache.hands[i].fingerScore && cache.hands[i].fingerScore > (config.hand.minConfidence || 0)) {
|
||||||
const boxScaleRaw = box.scale(boxKpt.boxRaw, boxExpandFact);
|
const boxScale = box.scale(boxKpt.box, boxExpandFact);
|
||||||
const boxCrop = box.crop(boxScaleRaw);
|
const boxScaleRaw = box.scale(boxKpt.boxRaw, boxExpandFact);
|
||||||
cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop });
|
const boxCrop = box.crop(boxScaleRaw);
|
||||||
|
cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
console.log(n, 'CACHED', { hands: cache.boxes.length });
|
||||||
}
|
}
|
||||||
resolve(cache.hands);
|
resolve(cache.hands);
|
||||||
});
|
});
|
||||||
|
|
|
@ -265,8 +265,8 @@ export class Human {
|
||||||
* @param input: {@link Input}
|
* @param input: {@link Input}
|
||||||
* @returns { tensor, canvas }
|
* @returns { tensor, canvas }
|
||||||
*/
|
*/
|
||||||
image(input: Input) {
|
image(input: Input, getTensor: boolean = true) {
|
||||||
return image.process(input, this.config);
|
return image.process(input, this.config, getTensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Segmentation method takes any input and returns processed canvas with body segmentation
|
/** Segmentation method takes any input and returns processed canvas with body segmentation
|
||||||
|
|
|
@ -13,8 +13,8 @@ export type Input = Tensor | ImageData | ImageBitmap | HTMLImageElement | HTMLMe
|
||||||
|
|
||||||
const maxSize = 2048;
|
const maxSize = 2048;
|
||||||
// internal temp canvases
|
// internal temp canvases
|
||||||
let inCanvas;
|
let inCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; // use global variable to avoid recreating canvas on each frame
|
||||||
let outCanvas;
|
let outCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; // use global variable to avoid recreating canvas on each frame
|
||||||
// @ts-ignore // imagefx is js module that should be converted to a class
|
// @ts-ignore // imagefx is js module that should be converted to a class
|
||||||
let fx: fxImage.GLImageFilter | null; // instance of imagefx
|
let fx: fxImage.GLImageFilter | null; // instance of imagefx
|
||||||
|
|
||||||
|
@ -38,11 +38,17 @@ export function canvas(width, height): HTMLCanvasElement | OffscreenCanvas {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function copy(input: HTMLCanvasElement | OffscreenCanvas, output?: HTMLCanvasElement | OffscreenCanvas) {
|
||||||
|
const outputCanvas = output || canvas(input.width, input.height);
|
||||||
|
const ctx = outputCanvas.getContext('2d') as CanvasRenderingContext2D;
|
||||||
|
ctx.drawImage(input, 0, 0);
|
||||||
|
return outputCanvas;
|
||||||
|
}
|
||||||
|
|
||||||
// process input image and return tensor
|
// process input image and return tensor
|
||||||
// input can be tensor, imagedata, htmlimageelement, htmlvideoelement
|
// input can be tensor, imagedata, htmlimageelement, htmlvideoelement
|
||||||
// input is resized and run through imagefx filter
|
// input is resized and run through imagefx filter
|
||||||
export function process(input: Input, config: Config): { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null } {
|
export function process(input: Input, config: Config, getTensor: boolean = true): { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null } {
|
||||||
let tensor;
|
|
||||||
if (!input) {
|
if (!input) {
|
||||||
// throw new Error('input is missing');
|
// throw new Error('input is missing');
|
||||||
if (config.debug) log('input is missing');
|
if (config.debug) log('input is missing');
|
||||||
|
@ -66,9 +72,9 @@ export function process(input: Input, config: Config): { tensor: Tensor | null,
|
||||||
}
|
}
|
||||||
if (input instanceof tf.Tensor) {
|
if (input instanceof tf.Tensor) {
|
||||||
// if input is tensor, use as-is
|
// if input is tensor, use as-is
|
||||||
if ((input as Tensor)['isDisposedInternal']) throw new Error('input tensor is disposed');
|
if ((input)['isDisposedInternal']) throw new Error('input tensor is disposed');
|
||||||
if ((input as Tensor).shape && (input as Tensor).shape.length === 4 && (input as unknown as Tensor).shape[0] === 1 && (input as unknown as Tensor).shape[3] === 3) tensor = tf.clone(input);
|
else if (!input.shape || input.shape.length !== 4 || input.shape[0] !== 1 || input.shape[3] !== 3) throw new Error(`input tensor shape must be [1, height, width, 3] and instead was ${input.shape}`);
|
||||||
else throw new Error(`input tensor shape must be [1, height, width, 3] and instead was ${(input as Tensor).shape}`);
|
else return { tensor: tf.clone(input), canvas: (config.filter.return ? outCanvas : null) };
|
||||||
} else {
|
} else {
|
||||||
// check if resizing will be needed
|
// check if resizing will be needed
|
||||||
if (typeof input['readyState'] !== 'undefined' && input['readyState'] <= 2) {
|
if (typeof input['readyState'] !== 'undefined' && input['readyState'] <= 2) {
|
||||||
|
@ -101,28 +107,26 @@ export function process(input: Input, config: Config): { tensor: Tensor | null,
|
||||||
if (!inCanvas || (inCanvas?.width !== targetWidth) || (inCanvas?.height !== targetHeight)) inCanvas = canvas(targetWidth, targetHeight);
|
if (!inCanvas || (inCanvas?.width !== targetWidth) || (inCanvas?.height !== targetHeight)) inCanvas = canvas(targetWidth, targetHeight);
|
||||||
|
|
||||||
// draw input to our canvas
|
// draw input to our canvas
|
||||||
const ctx = inCanvas.getContext('2d');
|
const inCtx = inCanvas.getContext('2d') as CanvasRenderingContext2D;
|
||||||
if ((typeof ImageData !== 'undefined') && (input instanceof ImageData)) {
|
if ((typeof ImageData !== 'undefined') && (input instanceof ImageData)) {
|
||||||
ctx.putImageData(input, 0, 0);
|
inCtx.putImageData(input, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
if (config.filter.flip && typeof ctx.translate !== 'undefined') {
|
if (config.filter.flip && typeof inCtx.translate !== 'undefined') {
|
||||||
ctx.translate(originalWidth, 0);
|
inCtx.translate(originalWidth, 0);
|
||||||
ctx.scale(-1, 1);
|
inCtx.scale(-1, 1);
|
||||||
ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
inCtx.drawImage(input as CanvasImageSource, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
||||||
ctx.setTransform(1, 0, 0, 1, 0, 0); // resets transforms to defaults
|
inCtx.setTransform(1, 0, 0, 1, 0, 0); // resets transforms to defaults
|
||||||
} else {
|
} else {
|
||||||
ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
inCtx.drawImage(input as CanvasImageSource, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// imagefx transforms using gl
|
|
||||||
|
if (!outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas?.height !== outCanvas?.height)) outCanvas = canvas(inCanvas.width, inCanvas.height); // init output canvas
|
||||||
|
|
||||||
|
// imagefx transforms using gl from input canvas to output canvas
|
||||||
if (config.filter.enabled && env.webgl.supported) {
|
if (config.filter.enabled && env.webgl.supported) {
|
||||||
if (!fx || !outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas?.height !== outCanvas?.height)) {
|
if (!fx) fx = env.browser ? new fxImage.GLImageFilter({ canvas: outCanvas }) : null; // && (typeof document !== 'undefined')
|
||||||
outCanvas = canvas(inCanvas?.width, inCanvas?.height);
|
env.filter = !!fx;
|
||||||
if (outCanvas?.width !== inCanvas?.width) outCanvas.width = inCanvas?.width;
|
|
||||||
if (outCanvas?.height !== inCanvas?.height) outCanvas.height = inCanvas?.height;
|
|
||||||
// log('created FX filter');
|
|
||||||
fx = env.browser ? new fxImage.GLImageFilter({ canvas: outCanvas }) : null; // && (typeof document !== 'undefined')
|
|
||||||
}
|
|
||||||
if (!fx) return { tensor: null, canvas: inCanvas };
|
if (!fx) return { tensor: null, canvas: inCanvas };
|
||||||
fx.reset();
|
fx.reset();
|
||||||
fx.addFilter('brightness', config.filter.brightness); // must have at least one filter enabled
|
fx.addFilter('brightness', config.filter.brightness); // must have at least one filter enabled
|
||||||
|
@ -140,118 +144,105 @@ export function process(input: Input, config: Config): { tensor: Tensor | null,
|
||||||
if (config.filter.polaroid) fx.addFilter('polaroid');
|
if (config.filter.polaroid) fx.addFilter('polaroid');
|
||||||
if (config.filter.pixelate !== 0) fx.addFilter('pixelate', config.filter.pixelate);
|
if (config.filter.pixelate !== 0) fx.addFilter('pixelate', config.filter.pixelate);
|
||||||
fx.apply(inCanvas);
|
fx.apply(inCanvas);
|
||||||
// read pixel data
|
|
||||||
/*
|
|
||||||
const gl = outCanvas.getContext('webgl');
|
|
||||||
if (gl) {
|
|
||||||
const glBuffer = new Uint8Array(outCanvas.width * outCanvas.height * 4);
|
|
||||||
const pixBuffer = new Uint8Array(outCanvas.width * outCanvas.height * 3);
|
|
||||||
gl.readPixels(0, 0, outCanvas.width, outCanvas.height, gl.RGBA, gl.UNSIGNED_BYTE, glBuffer);
|
|
||||||
// gl returns rbga while we only need rgb, so discarding alpha channel
|
|
||||||
// gl returns starting point as lower left, so need to invert vertical
|
|
||||||
let i = 0;
|
|
||||||
for (let y = outCanvas.height - 1; y >= 0; y--) {
|
|
||||||
for (let x = 0; x < outCanvas.width; x++) {
|
|
||||||
const index = (x + y * outCanvas.width) * 4;
|
|
||||||
pixBuffer[i++] = glBuffer[index + 0];
|
|
||||||
pixBuffer[i++] = glBuffer[index + 1];
|
|
||||||
pixBuffer[i++] = glBuffer[index + 2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
outCanvas.data = pixBuffer;
|
|
||||||
const shape = [outCanvas.height, outCanvas.width, 3];
|
|
||||||
const pixels = tf.tensor3d(outCanvas.data, shape, 'float32');
|
|
||||||
tensor = tf.expandDims(pixels, 0);
|
|
||||||
tf.dispose(pixels);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
} else {
|
} else {
|
||||||
outCanvas = inCanvas;
|
copy(inCanvas, outCanvas); // if no filters applied, output canvas is input canvas
|
||||||
if (fx) fx = null;
|
if (fx) fx = null;
|
||||||
|
env.filter = !!fx;
|
||||||
}
|
}
|
||||||
// create tensor from image if tensor is not already defined
|
|
||||||
if (!tensor) {
|
if (!getTensor) return { tensor: null, canvas: outCanvas }; // just canvas was requested
|
||||||
let pixels;
|
|
||||||
if (outCanvas.data) { // if we have data, just convert to tensor
|
// create tensor from image unless input was a tensor already
|
||||||
const shape = [outCanvas.height, outCanvas.width, 3];
|
let pixels;
|
||||||
pixels = tf.tensor3d(outCanvas.data, shape, 'float32');
|
let depth = 3;
|
||||||
} else if ((typeof ImageData !== 'undefined') && (outCanvas instanceof ImageData)) { // if input is imagedata, just use it
|
if ((typeof ImageData !== 'undefined' && input instanceof ImageData) || (input['data'] && input['width'] && input['height'])) { // if input is imagedata, just use it
|
||||||
pixels = tf.browser ? tf.browser.fromPixels(outCanvas) : null;
|
if (env.browser && tf.browser) {
|
||||||
} else if (config.backend === 'webgl' || config.backend === 'humangl') { // tf kernel-optimized method to get imagedata
|
pixels = tf.browser ? tf.browser.fromPixels(input) : null;
|
||||||
// we cant use canvas as-is as it already has a context, so we do a silly one more canvas
|
|
||||||
const tempCanvas = canvas(targetWidth, targetHeight);
|
|
||||||
tempCanvas.width = targetWidth;
|
|
||||||
tempCanvas.height = targetHeight;
|
|
||||||
const tempCtx = tempCanvas.getContext('2d');
|
|
||||||
tempCtx?.drawImage(outCanvas, 0, 0);
|
|
||||||
try {
|
|
||||||
pixels = (tf.browser && env.browser) ? tf.browser.fromPixels(tempCanvas) : null;
|
|
||||||
} catch (err) {
|
|
||||||
throw new Error('browser webgl error');
|
|
||||||
}
|
|
||||||
} else { // cpu and wasm kernel does not implement efficient fromPixels method
|
|
||||||
// we cant use canvas as-is as it already has a context, so we do a silly one more canvas and do fromPixels on ImageData instead
|
|
||||||
const tempCanvas = canvas(targetWidth, targetHeight);
|
|
||||||
if (!tempCanvas) return { tensor: null, canvas: inCanvas };
|
|
||||||
tempCanvas.width = targetWidth;
|
|
||||||
tempCanvas.height = targetHeight;
|
|
||||||
const tempCtx = tempCanvas.getContext('2d');
|
|
||||||
if (!tempCtx) return { tensor: null, canvas: inCanvas };
|
|
||||||
tempCtx.drawImage(outCanvas, 0, 0);
|
|
||||||
const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
|
|
||||||
if (tf.browser && env.browser) {
|
|
||||||
pixels = tf.browser.fromPixels(data);
|
|
||||||
} else {
|
|
||||||
pixels = tf.tidy(() => {
|
|
||||||
const imageData = tf.tensor(Array.from(data.data), [targetWidth, targetHeight, 4]);
|
|
||||||
const channels = tf.split(imageData, 4, 2); // split rgba to channels
|
|
||||||
const rgb = tf.stack([channels[0], channels[1], channels[2]], 2); // stack channels back to rgb and ignore alpha
|
|
||||||
const expand = tf.reshape(rgb, [imageData.shape[0], imageData.shape[1], 3]); // move extra dim from the end of tensor and use it as batch number instead
|
|
||||||
return expand;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (pixels) {
|
|
||||||
const casted = tf.cast(pixels, 'float32');
|
|
||||||
tensor = tf.expandDims(casted, 0);
|
|
||||||
tf.dispose(pixels);
|
|
||||||
tf.dispose(casted);
|
|
||||||
} else {
|
} else {
|
||||||
tensor = tf.zeros([1, targetWidth, targetHeight, 3]);
|
depth = input['data'].length / input['height'] / input['width'];
|
||||||
throw new Error('cannot create tensor from input');
|
// const arr = Uint8Array.from(input['data']);
|
||||||
|
const arr = new Uint8Array(input['data']['buffer']);
|
||||||
|
pixels = tf.tensor(arr, [input['height'], input['width'], depth], 'float32');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (tf.browser && env.browser) {
|
||||||
|
if (config.backend === 'webgl' || config.backend === 'humangl' || config.backend === 'webgpu') {
|
||||||
|
pixels = tf.browser.fromPixels(outCanvas); // safe to reuse since both backend and context are gl based
|
||||||
|
} else {
|
||||||
|
const tempCanvas = copy(outCanvas); // cannot use output canvas as it already has gl context so we do a silly one more canvas
|
||||||
|
pixels = tf.browser.fromPixels(tempCanvas);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const tempCanvas = copy(outCanvas); // cannot use output canvas as it already has gl context so we do a silly one more canvas
|
||||||
|
const tempCtx = tempCanvas.getContext('2d') as CanvasRenderingContext2D;
|
||||||
|
const tempData = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
|
||||||
|
depth = input['data'].length / targetWidth / targetHeight;
|
||||||
|
const arr = new Uint8Array(tempData.data.buffer);
|
||||||
|
pixels = tf.tensor(arr, [targetWidth, targetHeight, depth]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (depth === 4) { // rgba to rgb
|
||||||
|
const rgb = tf.slice3d(pixels, [0, 0, 0], [-1, -1, 3]); // strip alpha channel
|
||||||
|
tf.dispose(pixels);
|
||||||
|
pixels = rgb;
|
||||||
|
/*
|
||||||
|
const channels = tf.split(pixels, 4, 2); // split rgba to channels
|
||||||
|
tf.dispose(pixels);
|
||||||
|
const rgb = tf.stack([channels[0], channels[1], channels[2]], 2); // stack channels back to rgb and ignore alpha
|
||||||
|
pixels = tf.reshape(rgb, [rgb.shape[0], rgb.shape[1], 3]); // move extra dim from the end of tensor and use it as batch number instead
|
||||||
|
tf.dispose([rgb, ...channels]);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
if (!pixels) throw new Error('cannot create tensor from input');
|
||||||
|
const casted = tf.cast(pixels, 'float32');
|
||||||
|
const tensor = tf.expandDims(casted, 0);
|
||||||
|
tf.dispose([pixels, casted]);
|
||||||
|
return { tensor, canvas: (config.filter.return ? outCanvas : null) };
|
||||||
}
|
}
|
||||||
return { tensor, canvas: (config.filter.return ? outCanvas : null) };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let lastInputSum = 0;
|
let lastInputSum = 0;
|
||||||
let lastCacheDiff = 1;
|
let lastCacheDiff = 1;
|
||||||
|
let benchmarked = 0;
|
||||||
|
|
||||||
|
const checksum = async (input: Tensor): Promise<number> => { // use tf sum or js based sum loop depending on which is faster
|
||||||
|
const resizeFact = 48;
|
||||||
|
const reduced: Tensor = tf.image.resizeBilinear(input, [Math.trunc((input.shape[1] || 1) / resizeFact), Math.trunc((input.shape[2] || 1) / resizeFact)]);
|
||||||
|
const tfSum = async (): Promise<number> => {
|
||||||
|
const sumT = tf.sum(reduced);
|
||||||
|
const sum0 = await sumT.data();
|
||||||
|
tf.dispose(sumT);
|
||||||
|
return sum0[0];
|
||||||
|
};
|
||||||
|
const jsSum = async (): Promise<number> => {
|
||||||
|
const reducedData = await reduced.data(); // raw image rgb array
|
||||||
|
let sum0 = 0;
|
||||||
|
for (let i = 0; i < reducedData.length / 3; i++) sum0 += reducedData[3 * i + 2]; // look only at green value of each pixel
|
||||||
|
return sum0;
|
||||||
|
};
|
||||||
|
if (benchmarked === 0) {
|
||||||
|
const t0 = performance.now();
|
||||||
|
await jsSum();
|
||||||
|
const t1 = performance.now();
|
||||||
|
await tfSum();
|
||||||
|
const t2 = performance.now();
|
||||||
|
benchmarked = t1 - t0 < t2 - t1 ? 1 : 2;
|
||||||
|
}
|
||||||
|
const res = benchmarked === 1 ? await jsSum() : await tfSum();
|
||||||
|
tf.dispose(reduced);
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
|
||||||
export async function skip(config, input: Tensor) {
|
export async function skip(config, input: Tensor) {
|
||||||
if (config.cacheSensitivity === 0) return false;
|
if (config.cacheSensitivity === 0) return false;
|
||||||
const resizeFact = 32;
|
const sum = await checksum(input);
|
||||||
if (!input.shape[1] || !input.shape[2]) return false;
|
|
||||||
const reduced: Tensor = tf.image.resizeBilinear(input, [Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]);
|
|
||||||
|
|
||||||
// use tensor sum
|
|
||||||
/*
|
|
||||||
const sumT = this.tf.sum(reduced);
|
|
||||||
const sum = await sumT.data()[0] as number;
|
|
||||||
sumT.dispose();
|
|
||||||
*/
|
|
||||||
// use js loop sum, faster than uploading tensor to gpu calculating and downloading back
|
|
||||||
const reducedData = await reduced.data(); // raw image rgb array
|
|
||||||
tf.dispose(reduced);
|
|
||||||
let sum = 0;
|
|
||||||
for (let i = 0; i < reducedData.length / 3; i++) sum += reducedData[3 * i + 2]; // look only at green value of each pixel
|
|
||||||
|
|
||||||
const diff = 100 * (Math.max(sum, lastInputSum) / Math.min(sum, lastInputSum) - 1);
|
const diff = 100 * (Math.max(sum, lastInputSum) / Math.min(sum, lastInputSum) - 1);
|
||||||
lastInputSum = sum;
|
lastInputSum = sum;
|
||||||
// if previous frame was skipped, skip this frame if changed more than cacheSensitivity
|
// if previous frame was skipped, skip this frame if changed more than cacheSensitivity
|
||||||
// if previous frame was not skipped, then look for cacheSensitivity or difference larger than one in previous frame to avoid resetting cache in subsequent frames unnecessarily
|
// if previous frame was not skipped, then look for cacheSensitivity or difference larger than one in previous frame to avoid resetting cache in subsequent frames unnecessarily
|
||||||
const skipFrame = diff < Math.max(config.cacheSensitivity, lastCacheDiff);
|
let skipFrame = diff < Math.max(config.cacheSensitivity, lastCacheDiff);
|
||||||
// if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images
|
// if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images
|
||||||
lastCacheDiff = diff > 10 * config.cacheSensitivity ? 0 : diff;
|
lastCacheDiff = diff > 10 * config.cacheSensitivity ? 0 : diff;
|
||||||
// console.log('skipFrame', skipFrame, this.config.cacheSensitivity, diff);
|
skipFrame = skipFrame && (lastCacheDiff > 0); // if no cached diff value then force no skip
|
||||||
return skipFrame;
|
return skipFrame;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,8 +21,13 @@ export function square(keypoints: Array<Point>, outputSize: [number, number] = [
|
||||||
}
|
}
|
||||||
|
|
||||||
export function scale(box: Box, scaleFact: number) {
|
export function scale(box: Box, scaleFact: number) {
|
||||||
const dist = [box[2] * (scaleFact - 1), box[3] * (scaleFact - 1)];
|
const dist = [box[2] * scaleFact, box[3] * scaleFact];
|
||||||
const newBox: Box = [box[0] - dist[0] / 2, box[1] - dist[1] / 2, box[2] + dist[0], box[3] + dist[0]];
|
const newBox: Box = [
|
||||||
|
box[0] - (dist[0] - box[2]) / 2,
|
||||||
|
box[1] - (dist[1] - box[3]) / 2,
|
||||||
|
dist[0],
|
||||||
|
dist[1],
|
||||||
|
];
|
||||||
return newBox;
|
return newBox;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,39 +1,57 @@
|
||||||
import * as tf from '../../dist/tfjs.esm.js';
|
import * as tf from '../../dist/tfjs.esm.js';
|
||||||
import * as image from '../image/image';
|
import * as image from '../image/image';
|
||||||
import { mergeDeep, log } from './util';
|
import { mergeDeep } from './util';
|
||||||
|
|
||||||
export type Env = {
|
export type Env = {
|
||||||
|
/** Running in Browser */
|
||||||
browser: undefined | boolean,
|
browser: undefined | boolean,
|
||||||
|
/** Running in NodeJS */
|
||||||
node: undefined | boolean,
|
node: undefined | boolean,
|
||||||
|
/** Running in WebWorker thread */
|
||||||
worker: undefined | boolean,
|
worker: undefined | boolean,
|
||||||
|
/** Detected platform */
|
||||||
platform: undefined | string,
|
platform: undefined | string,
|
||||||
|
/** Detected agent */
|
||||||
agent: undefined | string,
|
agent: undefined | string,
|
||||||
|
/** List of supported backends */
|
||||||
backends: string[],
|
backends: string[],
|
||||||
|
/** Has any work been performed so far */
|
||||||
initial: boolean,
|
initial: boolean,
|
||||||
|
/** Are image filters supported? */
|
||||||
|
filter: undefined | boolean,
|
||||||
|
/** TFJS instance details */
|
||||||
tfjs: {
|
tfjs: {
|
||||||
version: undefined | string,
|
version: undefined | string,
|
||||||
},
|
},
|
||||||
|
/** Is offscreenCanvas supported? */
|
||||||
offscreen: undefined | boolean,
|
offscreen: undefined | boolean,
|
||||||
|
/** WASM detected capabilities */
|
||||||
wasm: {
|
wasm: {
|
||||||
supported: undefined | boolean,
|
supported: undefined | boolean,
|
||||||
backend: undefined | boolean,
|
backend: undefined | boolean,
|
||||||
simd: undefined | boolean,
|
simd: undefined | boolean,
|
||||||
multithread: undefined | boolean,
|
multithread: undefined | boolean,
|
||||||
},
|
},
|
||||||
|
/** WebGL detected capabilities */
|
||||||
webgl: {
|
webgl: {
|
||||||
supported: undefined | boolean,
|
supported: undefined | boolean,
|
||||||
backend: undefined | boolean,
|
backend: undefined | boolean,
|
||||||
version: undefined | string,
|
version: undefined | string,
|
||||||
renderer: undefined | string,
|
renderer: undefined | string,
|
||||||
},
|
},
|
||||||
|
/** WebGPU detected capabilities */
|
||||||
webgpu: {
|
webgpu: {
|
||||||
supported: undefined | boolean,
|
supported: undefined | boolean,
|
||||||
backend: undefined | boolean,
|
backend: undefined | boolean,
|
||||||
adapter: undefined | string,
|
adapter: undefined | string,
|
||||||
},
|
},
|
||||||
|
/** List of supported kernels for current backend */
|
||||||
kernels: string[],
|
kernels: string[],
|
||||||
|
/** MonkeyPatch for Canvas */
|
||||||
Canvas: undefined,
|
Canvas: undefined,
|
||||||
|
/** MonkeyPatch for Image */
|
||||||
Image: undefined,
|
Image: undefined,
|
||||||
|
/** MonkeyPatch for ImageData */
|
||||||
ImageData: undefined,
|
ImageData: undefined,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,6 +65,7 @@ export let env: Env = {
|
||||||
initial: true,
|
initial: true,
|
||||||
backends: [],
|
backends: [],
|
||||||
offscreen: undefined,
|
offscreen: undefined,
|
||||||
|
filter: undefined,
|
||||||
tfjs: {
|
tfjs: {
|
||||||
version: undefined,
|
version: undefined,
|
||||||
},
|
},
|
||||||
|
@ -144,12 +163,14 @@ export async function get() {
|
||||||
env.agent = env.agent.replace(/ /g, ' ');
|
env.agent = env.agent.replace(/ /g, ' ');
|
||||||
|
|
||||||
// chrome offscreencanvas gpu memory leak
|
// chrome offscreencanvas gpu memory leak
|
||||||
|
/*
|
||||||
const isChrome = env.agent.match(/Chrome\/.[0-9]/g);
|
const isChrome = env.agent.match(/Chrome\/.[0-9]/g);
|
||||||
const verChrome = isChrome && isChrome[0] ? isChrome[0].split('/')[1] : 0;
|
const verChrome = isChrome && isChrome[0] ? isChrome[0].split('/')[1] : 0;
|
||||||
if (verChrome > 0 && verChrome > 92 && verChrome < 96) {
|
if (verChrome > 0 && verChrome > 92 && verChrome < 96) {
|
||||||
log('disabling offscreenCanvas due to browser error:', isChrome ? isChrome[0] : 'unknown');
|
log('disabling offscreenCanvas due to browser error:', isChrome ? isChrome[0] : 'unknown');
|
||||||
env.offscreen = false;
|
env.offscreen = false;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
} else if (typeof process !== 'undefined') {
|
} else if (typeof process !== 'undefined') {
|
||||||
env.platform = `${process.platform} ${process.arch}`;
|
env.platform = `${process.platform} ${process.arch}`;
|
||||||
|
|
Loading…
Reference in New Issue