diff --git a/.eslintrc.json b/.eslintrc.json index b1558e10..1df170fb 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -38,6 +38,7 @@ "@typescript-eslint/no-shadow": "error", "@typescript-eslint/no-var-requires": "off", "@typescript-eslint/triple-slash-reference": "off", + "@typescript-eslint/no-inferrable-types": "off", "camelcase": "off", "dot-notation": "off", "func-names": "off", diff --git a/demo/index.js b/demo/index.js index cfdb14f1..3fa1c4d7 100644 --- a/demo/index.js +++ b/demo/index.js @@ -31,6 +31,15 @@ import jsonView from './helpers/jsonview.js'; let human; let userConfig = { + cacheSensitivity: 0, + hand: { enabled: true }, + body: { enabled: false }, + face: { enabled: false }, + /* + hand: { enabled: false, maxDetected: 1, skipFrames: 0 }, + body: { enabled: false }, + face: { enabled: false }, + */ /* warmup: 'none', backend: 'humangl', @@ -259,7 +268,7 @@ async function drawResults(input) { } // result.canvas = seg.alpha; } else if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas - const image = await human.image(input); + const image = await human.image(input, false); result.canvas = image.canvas; human.tf.dispose(image.tensor); } @@ -302,17 +311,17 @@ async function drawResults(input) { // update log const engine = human.tf.engine(); - const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : ''; - const memory = `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`; const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : ''; const avgDetect = ui.detectFPS.length > 0 ? Math.trunc(10 * ui.detectFPS.reduce((a, b) => a + b, 0) / ui.detectFPS.length) / 10 : 0; const avgDraw = ui.drawFPS.length > 0 ? Math.trunc(10 * ui.drawFPS.reduce((a, b) => a + b, 0) / ui.drawFPS.length) / 10 : 0; const warning = (ui.detectFPS.length > 5) && (avgDetect < 2) ? 'warning: your performance is low: try switching to higher performance backend, lowering resolution or disabling some models' : ''; const fps = avgDetect > 0 ? `FPS process:${avgDetect} refresh:${avgDraw}` : ''; - const backend = engine.state.numTensors > 0 ? `${human.tf.getBackend()} | ${memory}` : `${result.backend} | tensors: ${result.tensors} in worker`; + const backend = result.backend || human.tf.getBackend(); + const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : ''; + const memory = result.tensors || `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`; document.getElementById('log').innerHTML = ` video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}
- backend: ${backend}
+ backend: ${backend} | ${memory}
performance: ${str(interpolated.performance)}ms ${fps}
${warning}
`; diff --git a/src/body/blazepose.ts b/src/body/blazepose.ts index 1d1a6a89..ad542351 100644 --- a/src/body/blazepose.ts +++ b/src/body/blazepose.ts @@ -142,6 +142,11 @@ async function detectParts(input: Tensor, config: Config, outputSize: [number, n } export async function predict(input: Tensor, config: Config): Promise { + /** blazepose caching + * not fully implemented + * 1. if skipFrame returned cached + * 2. run detection based on squared full frame + */ const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0]; if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) { skipped++; diff --git a/src/body/efficientpose.ts b/src/body/efficientpose.ts index bb76cdd9..df8bafcc 100644 --- a/src/body/efficientpose.ts +++ b/src/body/efficientpose.ts @@ -7,17 +7,19 @@ import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import * as coords from './efficientposecoords'; -import type { BodyKeypoint, BodyResult, Box, Point } from '../result'; +import type { BodyResult, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import { env } from '../util/env'; let model: GraphModel | null; -const keypoints: Array = []; -let box: Box = [0, 0, 0, 0]; -let boxRaw: Box = [0, 0, 0, 0]; -let score = 0; +const cache: BodyResult = { id: 0, keypoints: [], box: [0, 0, 0, 0], boxRaw: [0, 0, 0, 0], score: 0, annotations: {} }; + +// const keypoints: Array = []; +// let box: Box = [0, 0, 0, 0]; +// let boxRaw: Box = [0, 0, 0, 0]; +// let score = 0; let skipped = Number.MAX_SAFE_INTEGER; export async function load(config: Config): Promise { @@ -48,9 +50,14 @@ function max2d(inputs, minScore) { } export async function predict(image: Tensor, config: Config): Promise { - if ((skipped < (config.body?.skipFrames || 0)) && config.skipFrame && Object.keys(keypoints).length > 0) { + /** blazepose caching + * not fully implemented + * 1. if skipFrame returned cached + * 2. run detection based on squared full frame + */ + if ((skipped < (config.body?.skipFrames || 0)) && config.skipFrame && Object.keys(cache.keypoints).length > 0) { skipped++; - return [{ id: 0, score, box, boxRaw, keypoints, annotations: {} }]; + return [cache]; } skipped = 0; return new Promise(async (resolve) => { @@ -67,7 +74,7 @@ export async function predict(image: Tensor, config: Config): Promise (config.body?.minConfidence || 0)) { - keypoints.push({ + if (partScore > (config.body?.minConfidence || 0)) { + cache.keypoints.push({ score: Math.round(100 * partScore) / 100, part: coords.kpt[id], positionRaw: [ // normalized to 0..1 @@ -94,33 +101,32 @@ export async function predict(image: Tensor, config: Config): Promise tf.dispose(s)); } - score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); - const x = keypoints.map((a) => a.position[0]); - const y = keypoints.map((a) => a.position[1]); - box = [ + cache.score = cache.keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); + const x = cache.keypoints.map((a) => a.position[0]); + const y = cache.keypoints.map((a) => a.position[1]); + cache.box = [ Math.min(...x), Math.min(...y), Math.max(...x) - Math.min(...x), Math.max(...y) - Math.min(...y), ]; - const xRaw = keypoints.map((a) => a.positionRaw[0]); - const yRaw = keypoints.map((a) => a.positionRaw[1]); - boxRaw = [ + const xRaw = cache.keypoints.map((a) => a.positionRaw[0]); + const yRaw = cache.keypoints.map((a) => a.positionRaw[1]); + cache.boxRaw = [ Math.min(...xRaw), Math.min(...yRaw), Math.max(...xRaw) - Math.min(...xRaw), Math.max(...yRaw) - Math.min(...yRaw), ]; - const annotations: Record = {}; for (const [name, indexes] of Object.entries(coords.connected)) { const pt: Array = []; for (let i = 0; i < indexes.length - 1; i++) { - const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]); - const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]); + const pt0 = cache.keypoints.find((kpt) => kpt.part === indexes[i]); + const pt1 = cache.keypoints.find((kpt) => kpt.part === indexes[i + 1]); if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]); } - annotations[name] = pt; + cache.annotations[name] = pt; } - resolve([{ id: 0, score, box, boxRaw, keypoints, annotations }]); + resolve([cache]); }); } diff --git a/src/body/movenet.ts b/src/body/movenet.ts index 86a96411..3b06077c 100644 --- a/src/body/movenet.ts +++ b/src/body/movenet.ts @@ -42,6 +42,20 @@ export async function load(config: Config): Promise { return model; } +function fixSides() { // model sometimes mixes up left vs right keypoints so we fix them + for (const pair of coords.pairs) { + let left = keypoints.find((kp) => kp.part === pair[0]); + let right = keypoints.find((kp) => kp.part === pair[1]); + if (left && right) { + if (left.position[0] > right.position[0]) { + const tmp = left; + left = right; + right = tmp; + } + } + } +} + async function parseSinglePose(res, config, image, inputBox) { const kpt = res[0][0]; keypoints.length = 0; @@ -64,6 +78,7 @@ async function parseSinglePose(res, config, image, inputBox) { }); } } + fixSides(); score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); const bodies: Array = []; const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]); @@ -103,6 +118,7 @@ async function parseMultiPose(res, config, image, inputBox) { }); } } + fixSides(); const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]); // movenet-multipose has built-in box details // const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]]; @@ -126,6 +142,13 @@ async function parseMultiPose(res, config, image, inputBox) { } export async function predict(input: Tensor, config: Config): Promise { + /** movenet caching + * 1. if skipFrame returned cached + * 2. if enough cached boxes run using cached boxes + * 3. if not enough detected bodies rerun using full frame + * 4. regenerate cached boxes based on current keypoints + */ + if (!model || !model?.inputs[0].shape) return []; // something is wrong with the model if (!config.skipFrame) cache.boxes.length = 0; // allowed to use cache or not skipped++; // increment skip frames @@ -153,7 +176,6 @@ export async function predict(input: Tensor, config: Config): Promise ({ ...body, box: box.scale(body.box, 0.5) })); Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); } cache.boxes.length = 0; // reset cache diff --git a/src/body/movenetcoords.ts b/src/body/movenetcoords.ts index 8a8fc0a9..ac49985a 100644 --- a/src/body/movenetcoords.ts +++ b/src/body/movenetcoords.ts @@ -18,6 +18,17 @@ export const kpt: Array = [ 'rightAnkle', ]; +export const pairs: Array = [ + ['leftEye', 'rightEye'], + ['leftEar', 'rightEar'], + ['leftShoulder', 'rightShoulder'], + ['leftElbow', 'rightElbow'], + ['leftWrist', 'rightWrist'], + ['leftHip', 'rightHip'], + ['leftKnee', 'rightKnee'], + ['leftAnkle', 'rightAnkle'], +]; + export const connected: Record = { leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'], rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'], diff --git a/src/body/posenet.ts b/src/body/posenet.ts index 6ab241c9..e7bd4703 100644 --- a/src/body/posenet.ts +++ b/src/body/posenet.ts @@ -156,6 +156,9 @@ export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxD } export async function predict(input: Tensor, config: Config): Promise { + /** posenet is mostly obsolete + * caching is not implemented + */ const res = tf.tidy(() => { if (!model.inputs[0].shape) return []; const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]); diff --git a/src/gesture/gesture.ts b/src/gesture/gesture.ts index a5a1a586..34123c36 100644 --- a/src/gesture/gesture.ts +++ b/src/gesture/gesture.ts @@ -47,14 +47,14 @@ export const body = (res): GestureResult[] => { const leftWrist = res[i].keypoints.find((a) => (a.part === 'leftWrist')); const rightWrist = res[i].keypoints.find((a) => (a.part === 'rightWrist')); const nose = res[i].keypoints.find((a) => (a.part === 'nose')); - if (nose && leftWrist && rightWrist && (leftWrist.position.y < nose.position.y) && (rightWrist.position.y < nose.position.y)) gestures.push({ body: i, gesture: 'i give up' }); - else if (nose && leftWrist && (leftWrist.position.y < nose.position.y)) gestures.push({ body: i, gesture: 'raise left hand' }); - else if (nose && rightWrist && (rightWrist.position.y < nose.position.y)) gestures.push({ body: i, gesture: 'raise right hand' }); + if (nose && leftWrist && rightWrist && (leftWrist.position[1] < nose.position[1]) && (rightWrist.position[1] < nose.position[1])) gestures.push({ body: i, gesture: 'i give up' }); + else if (nose && leftWrist && (leftWrist.position[1] < nose.position[1])) gestures.push({ body: i, gesture: 'raise left hand' }); + else if (nose && rightWrist && (rightWrist.position[1] < nose.position[1])) gestures.push({ body: i, gesture: 'raise right hand' }); // leaning const leftShoulder = res[i].keypoints.find((a) => (a.part === 'leftShoulder')); const rightShoulder = res[i].keypoints.find((a) => (a.part === 'rightShoulder')); - if (leftShoulder && rightShoulder) gestures.push({ body: i, gesture: `leaning ${(leftShoulder.position.y > rightShoulder.position.y) ? 'left' : 'right'}` }); + if (leftShoulder && rightShoulder) gestures.push({ body: i, gesture: `leaning ${(leftShoulder.position[1] > rightShoulder.position[1]) ? 'left' : 'right'}` }); } return gestures; }; diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts index 75158426..c1e1b717 100644 --- a/src/hand/handtrack.ts +++ b/src/hand/handtrack.ts @@ -23,7 +23,9 @@ const inputSize = [[0, 0], [0, 0]]; const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip']; -const boxExpandFact = 1.6; // increase to 160% +const boxExpandFact = 1.6; +const maxDetectorResolution = 512; +const detectorExpandFact = 1.2; let skipped = 0; let outputSize: [number, number] = [0, 0]; @@ -95,7 +97,7 @@ async function detectHands(input: Tensor, config: Config): Promise = {}; const ratio = (input.shape[2] || 1) / (input.shape[1] || 1); - const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 512 + const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, maxDetectorResolution); // use dynamic input size but cap at 512 const width = Math.round(height * ratio / 8) * 8; t.resize = tf.image.resizeBilinear(input, [height, width]); // todo: resize with padding t.cast = tf.cast(t.resize, 'int32'); @@ -117,12 +119,13 @@ async function detectHands(input: Tensor, config: Config): Promise tf.dispose(t[tensor])); @@ -146,7 +149,7 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) }; if (input && models[1] && config.hand.landmarks && h.score > (config.hand.minConfidence || 0)) { const t: Record = {}; - t.crop = tf.image.cropAndResize(input, [box.crop(h.boxRaw)], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear'); + t.crop = tf.image.cropAndResize(input, [h.boxCrop], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear'); t.cast = tf.cast(t.crop, 'float32'); t.div = tf.div(t.cast, 255); [t.score, t.keypoints] = models[1].execute(t.div) as Tensor[]; @@ -155,12 +158,17 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) if (score >= (config.hand.minConfidence || 0)) { hand.fingerScore = score; t.reshaped = tf.reshape(t.keypoints, [-1, 3]); - const rawCoords = await t.reshaped.array() as Point[]; - hand.keypoints = (rawCoords as Point[]).map((kpt) => [ - outputSize[0] * ((h.boxCrop[3] - h.boxCrop[1]) * kpt[0] / inputSize[1][0] + h.boxCrop[1]), - outputSize[1] * ((h.boxCrop[2] - h.boxCrop[0]) * kpt[1] / inputSize[1][1] + h.boxCrop[0]), - (h.boxCrop[3] + h.boxCrop[3] / 2 * (kpt[2] || 0)), + const coordsData: Point[] = await t.reshaped.array() as Point[]; + const coordsRaw: Point[] = coordsData.map((kpt) => [kpt[0] / inputSize[1][1], kpt[1] / inputSize[1][0], (kpt[2] || 0)]); + const coordsNorm: Point[] = coordsRaw.map((kpt) => [kpt[0] * h.boxRaw[2], kpt[1] * h.boxRaw[3], (kpt[2] || 0)]); + console.log(outputSize, h.box); + hand.keypoints = (coordsNorm).map((kpt) => [ + outputSize[0] * kpt[0] + h.box[0], + outputSize[1] * kpt[1] + h.box[1], + (kpt[2] || 0), ]); + // hand.box = box.scale(h.box, 1 / detectorExpandFact); // scale box down for visual appeal + // hand.boxRaw = box.scale(h.boxRaw, 1 / detectorExpandFact); // scale box down for visual appeal hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null)); @@ -171,15 +179,27 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) return hand; } +let n = 0; export async function predict(input: Tensor, config: Config): Promise { + n++; + /** handtrack caching + * 1. if skipFrame returned cached + * 2. if any cached results but although not sure if its enough we continute anyhow for 10x skipframes + * 3. eventually rerun detector to generated new cached boxes and reset skipped + * 4. generate cached boxes based on detected keypoints + */ if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model outputSize = [input.shape[2] || 0, input.shape[1] || 0]; skipped++; // increment skip frames if (config.skipFrame && (skipped <= (config.hand.skipFrames || 0))) { + console.log(n, 'SKIP', { results: cache.hands.length }); return cache.hands; // return cached results without running anything } return new Promise(async (resolve) => { + console.log(n, 'DETECT', { skipped, hands: cache.hands.length, boxes: cache.boxes.length }); + // this is logically consistent but insufficiently efficient + /* skipped = 0; if (cache.boxes.length >= (config.hand.maxDetected || 0)) { cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache @@ -191,17 +211,32 @@ export async function predict(input: Tensor, config: Config): Promise detectFingers(input, handBox, config))); } + */ + + if (config.skipFrame && skipped <= 10 * (config.hand.skipFrames || 0) && cache.hands.length > 0) { // we have some cached results but although not sure if its enough we continute anyhow for 10x skipframes + cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); + console.log(n, 'HANDS', { hands: cache.hands.length }); + } else { + cache.boxes = await detectHands(input, config); + console.log(n, 'BOXES', { hands: cache.boxes.length }); + cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); + console.log(n, 'HANDS', { hands: cache.hands.length }); + skipped = 0; + } const oldCache = [...cache.boxes]; cache.boxes.length = 0; // reset cache - for (let i = 0; i < cache.hands.length; i++) { - const boxKpt = box.square(cache.hands[i].keypoints, outputSize); - if (boxKpt.box[2] / (input.shape[2] || 1) > 0.05 && boxKpt.box[3] / (input.shape[1] || 1) > 0.05 && cache.hands[i].fingerScore && cache.hands[i].fingerScore > (config.hand.minConfidence || 0)) { - const boxScale = box.scale(boxKpt.box, boxExpandFact); - const boxScaleRaw = box.scale(boxKpt.boxRaw, boxExpandFact); - const boxCrop = box.crop(boxScaleRaw); - cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop }); + if (config.cacheSensitivity > 0) { + for (let i = 0; i < cache.hands.length; i++) { + const boxKpt = box.square(cache.hands[i].keypoints, outputSize); + if (boxKpt.box[2] / (input.shape[2] || 1) > 0.05 && boxKpt.box[3] / (input.shape[1] || 1) > 0.05 && cache.hands[i].fingerScore && cache.hands[i].fingerScore > (config.hand.minConfidence || 0)) { + const boxScale = box.scale(boxKpt.box, boxExpandFact); + const boxScaleRaw = box.scale(boxKpt.boxRaw, boxExpandFact); + const boxCrop = box.crop(boxScaleRaw); + cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop }); + } } + console.log(n, 'CACHED', { hands: cache.boxes.length }); } resolve(cache.hands); }); diff --git a/src/human.ts b/src/human.ts index 382416f0..f9ad0dfc 100644 --- a/src/human.ts +++ b/src/human.ts @@ -265,8 +265,8 @@ export class Human { * @param input: {@link Input} * @returns { tensor, canvas } */ - image(input: Input) { - return image.process(input, this.config); + image(input: Input, getTensor: boolean = true) { + return image.process(input, this.config, getTensor); } /** Segmentation method takes any input and returns processed canvas with body segmentation diff --git a/src/image/image.ts b/src/image/image.ts index f0301481..3b58165c 100644 --- a/src/image/image.ts +++ b/src/image/image.ts @@ -13,8 +13,8 @@ export type Input = Tensor | ImageData | ImageBitmap | HTMLImageElement | HTMLMe const maxSize = 2048; // internal temp canvases -let inCanvas; -let outCanvas; +let inCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; // use global variable to avoid recreating canvas on each frame +let outCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; // use global variable to avoid recreating canvas on each frame // @ts-ignore // imagefx is js module that should be converted to a class let fx: fxImage.GLImageFilter | null; // instance of imagefx @@ -38,11 +38,17 @@ export function canvas(width, height): HTMLCanvasElement | OffscreenCanvas { return c; } +export function copy(input: HTMLCanvasElement | OffscreenCanvas, output?: HTMLCanvasElement | OffscreenCanvas) { + const outputCanvas = output || canvas(input.width, input.height); + const ctx = outputCanvas.getContext('2d') as CanvasRenderingContext2D; + ctx.drawImage(input, 0, 0); + return outputCanvas; +} + // process input image and return tensor // input can be tensor, imagedata, htmlimageelement, htmlvideoelement // input is resized and run through imagefx filter -export function process(input: Input, config: Config): { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null } { - let tensor; +export function process(input: Input, config: Config, getTensor: boolean = true): { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null } { if (!input) { // throw new Error('input is missing'); if (config.debug) log('input is missing'); @@ -66,9 +72,9 @@ export function process(input: Input, config: Config): { tensor: Tensor | null, } if (input instanceof tf.Tensor) { // if input is tensor, use as-is - if ((input as Tensor)['isDisposedInternal']) throw new Error('input tensor is disposed'); - if ((input as Tensor).shape && (input as Tensor).shape.length === 4 && (input as unknown as Tensor).shape[0] === 1 && (input as unknown as Tensor).shape[3] === 3) tensor = tf.clone(input); - else throw new Error(`input tensor shape must be [1, height, width, 3] and instead was ${(input as Tensor).shape}`); + if ((input)['isDisposedInternal']) throw new Error('input tensor is disposed'); + else if (!input.shape || input.shape.length !== 4 || input.shape[0] !== 1 || input.shape[3] !== 3) throw new Error(`input tensor shape must be [1, height, width, 3] and instead was ${input.shape}`); + else return { tensor: tf.clone(input), canvas: (config.filter.return ? outCanvas : null) }; } else { // check if resizing will be needed if (typeof input['readyState'] !== 'undefined' && input['readyState'] <= 2) { @@ -101,28 +107,26 @@ export function process(input: Input, config: Config): { tensor: Tensor | null, if (!inCanvas || (inCanvas?.width !== targetWidth) || (inCanvas?.height !== targetHeight)) inCanvas = canvas(targetWidth, targetHeight); // draw input to our canvas - const ctx = inCanvas.getContext('2d'); + const inCtx = inCanvas.getContext('2d') as CanvasRenderingContext2D; if ((typeof ImageData !== 'undefined') && (input instanceof ImageData)) { - ctx.putImageData(input, 0, 0); + inCtx.putImageData(input, 0, 0); } else { - if (config.filter.flip && typeof ctx.translate !== 'undefined') { - ctx.translate(originalWidth, 0); - ctx.scale(-1, 1); - ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height); - ctx.setTransform(1, 0, 0, 1, 0, 0); // resets transforms to defaults + if (config.filter.flip && typeof inCtx.translate !== 'undefined') { + inCtx.translate(originalWidth, 0); + inCtx.scale(-1, 1); + inCtx.drawImage(input as CanvasImageSource, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height); + inCtx.setTransform(1, 0, 0, 1, 0, 0); // resets transforms to defaults } else { - ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height); + inCtx.drawImage(input as CanvasImageSource, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height); } } - // imagefx transforms using gl + + if (!outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas?.height !== outCanvas?.height)) outCanvas = canvas(inCanvas.width, inCanvas.height); // init output canvas + + // imagefx transforms using gl from input canvas to output canvas if (config.filter.enabled && env.webgl.supported) { - if (!fx || !outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas?.height !== outCanvas?.height)) { - outCanvas = canvas(inCanvas?.width, inCanvas?.height); - if (outCanvas?.width !== inCanvas?.width) outCanvas.width = inCanvas?.width; - if (outCanvas?.height !== inCanvas?.height) outCanvas.height = inCanvas?.height; - // log('created FX filter'); - fx = env.browser ? new fxImage.GLImageFilter({ canvas: outCanvas }) : null; // && (typeof document !== 'undefined') - } + if (!fx) fx = env.browser ? new fxImage.GLImageFilter({ canvas: outCanvas }) : null; // && (typeof document !== 'undefined') + env.filter = !!fx; if (!fx) return { tensor: null, canvas: inCanvas }; fx.reset(); fx.addFilter('brightness', config.filter.brightness); // must have at least one filter enabled @@ -140,118 +144,105 @@ export function process(input: Input, config: Config): { tensor: Tensor | null, if (config.filter.polaroid) fx.addFilter('polaroid'); if (config.filter.pixelate !== 0) fx.addFilter('pixelate', config.filter.pixelate); fx.apply(inCanvas); - // read pixel data - /* - const gl = outCanvas.getContext('webgl'); - if (gl) { - const glBuffer = new Uint8Array(outCanvas.width * outCanvas.height * 4); - const pixBuffer = new Uint8Array(outCanvas.width * outCanvas.height * 3); - gl.readPixels(0, 0, outCanvas.width, outCanvas.height, gl.RGBA, gl.UNSIGNED_BYTE, glBuffer); - // gl returns rbga while we only need rgb, so discarding alpha channel - // gl returns starting point as lower left, so need to invert vertical - let i = 0; - for (let y = outCanvas.height - 1; y >= 0; y--) { - for (let x = 0; x < outCanvas.width; x++) { - const index = (x + y * outCanvas.width) * 4; - pixBuffer[i++] = glBuffer[index + 0]; - pixBuffer[i++] = glBuffer[index + 1]; - pixBuffer[i++] = glBuffer[index + 2]; - } - } - outCanvas.data = pixBuffer; - const shape = [outCanvas.height, outCanvas.width, 3]; - const pixels = tf.tensor3d(outCanvas.data, shape, 'float32'); - tensor = tf.expandDims(pixels, 0); - tf.dispose(pixels); - } - */ } else { - outCanvas = inCanvas; + copy(inCanvas, outCanvas); // if no filters applied, output canvas is input canvas if (fx) fx = null; + env.filter = !!fx; } - // create tensor from image if tensor is not already defined - if (!tensor) { - let pixels; - if (outCanvas.data) { // if we have data, just convert to tensor - const shape = [outCanvas.height, outCanvas.width, 3]; - pixels = tf.tensor3d(outCanvas.data, shape, 'float32'); - } else if ((typeof ImageData !== 'undefined') && (outCanvas instanceof ImageData)) { // if input is imagedata, just use it - pixels = tf.browser ? tf.browser.fromPixels(outCanvas) : null; - } else if (config.backend === 'webgl' || config.backend === 'humangl') { // tf kernel-optimized method to get imagedata - // we cant use canvas as-is as it already has a context, so we do a silly one more canvas - const tempCanvas = canvas(targetWidth, targetHeight); - tempCanvas.width = targetWidth; - tempCanvas.height = targetHeight; - const tempCtx = tempCanvas.getContext('2d'); - tempCtx?.drawImage(outCanvas, 0, 0); - try { - pixels = (tf.browser && env.browser) ? tf.browser.fromPixels(tempCanvas) : null; - } catch (err) { - throw new Error('browser webgl error'); - } - } else { // cpu and wasm kernel does not implement efficient fromPixels method - // we cant use canvas as-is as it already has a context, so we do a silly one more canvas and do fromPixels on ImageData instead - const tempCanvas = canvas(targetWidth, targetHeight); - if (!tempCanvas) return { tensor: null, canvas: inCanvas }; - tempCanvas.width = targetWidth; - tempCanvas.height = targetHeight; - const tempCtx = tempCanvas.getContext('2d'); - if (!tempCtx) return { tensor: null, canvas: inCanvas }; - tempCtx.drawImage(outCanvas, 0, 0); - const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight); - if (tf.browser && env.browser) { - pixels = tf.browser.fromPixels(data); - } else { - pixels = tf.tidy(() => { - const imageData = tf.tensor(Array.from(data.data), [targetWidth, targetHeight, 4]); - const channels = tf.split(imageData, 4, 2); // split rgba to channels - const rgb = tf.stack([channels[0], channels[1], channels[2]], 2); // stack channels back to rgb and ignore alpha - const expand = tf.reshape(rgb, [imageData.shape[0], imageData.shape[1], 3]); // move extra dim from the end of tensor and use it as batch number instead - return expand; - }); - } - } - if (pixels) { - const casted = tf.cast(pixels, 'float32'); - tensor = tf.expandDims(casted, 0); - tf.dispose(pixels); - tf.dispose(casted); + + if (!getTensor) return { tensor: null, canvas: outCanvas }; // just canvas was requested + + // create tensor from image unless input was a tensor already + let pixels; + let depth = 3; + if ((typeof ImageData !== 'undefined' && input instanceof ImageData) || (input['data'] && input['width'] && input['height'])) { // if input is imagedata, just use it + if (env.browser && tf.browser) { + pixels = tf.browser ? tf.browser.fromPixels(input) : null; } else { - tensor = tf.zeros([1, targetWidth, targetHeight, 3]); - throw new Error('cannot create tensor from input'); + depth = input['data'].length / input['height'] / input['width']; + // const arr = Uint8Array.from(input['data']); + const arr = new Uint8Array(input['data']['buffer']); + pixels = tf.tensor(arr, [input['height'], input['width'], depth], 'float32'); + } + } else { + if (tf.browser && env.browser) { + if (config.backend === 'webgl' || config.backend === 'humangl' || config.backend === 'webgpu') { + pixels = tf.browser.fromPixels(outCanvas); // safe to reuse since both backend and context are gl based + } else { + const tempCanvas = copy(outCanvas); // cannot use output canvas as it already has gl context so we do a silly one more canvas + pixels = tf.browser.fromPixels(tempCanvas); + } + } else { + const tempCanvas = copy(outCanvas); // cannot use output canvas as it already has gl context so we do a silly one more canvas + const tempCtx = tempCanvas.getContext('2d') as CanvasRenderingContext2D; + const tempData = tempCtx.getImageData(0, 0, targetWidth, targetHeight); + depth = input['data'].length / targetWidth / targetHeight; + const arr = new Uint8Array(tempData.data.buffer); + pixels = tf.tensor(arr, [targetWidth, targetHeight, depth]); } } + if (depth === 4) { // rgba to rgb + const rgb = tf.slice3d(pixels, [0, 0, 0], [-1, -1, 3]); // strip alpha channel + tf.dispose(pixels); + pixels = rgb; + /* + const channels = tf.split(pixels, 4, 2); // split rgba to channels + tf.dispose(pixels); + const rgb = tf.stack([channels[0], channels[1], channels[2]], 2); // stack channels back to rgb and ignore alpha + pixels = tf.reshape(rgb, [rgb.shape[0], rgb.shape[1], 3]); // move extra dim from the end of tensor and use it as batch number instead + tf.dispose([rgb, ...channels]); + */ + } + if (!pixels) throw new Error('cannot create tensor from input'); + const casted = tf.cast(pixels, 'float32'); + const tensor = tf.expandDims(casted, 0); + tf.dispose([pixels, casted]); + return { tensor, canvas: (config.filter.return ? outCanvas : null) }; } - return { tensor, canvas: (config.filter.return ? outCanvas : null) }; } let lastInputSum = 0; let lastCacheDiff = 1; +let benchmarked = 0; + +const checksum = async (input: Tensor): Promise => { // use tf sum or js based sum loop depending on which is faster + const resizeFact = 48; + const reduced: Tensor = tf.image.resizeBilinear(input, [Math.trunc((input.shape[1] || 1) / resizeFact), Math.trunc((input.shape[2] || 1) / resizeFact)]); + const tfSum = async (): Promise => { + const sumT = tf.sum(reduced); + const sum0 = await sumT.data(); + tf.dispose(sumT); + return sum0[0]; + }; + const jsSum = async (): Promise => { + const reducedData = await reduced.data(); // raw image rgb array + let sum0 = 0; + for (let i = 0; i < reducedData.length / 3; i++) sum0 += reducedData[3 * i + 2]; // look only at green value of each pixel + return sum0; + }; + if (benchmarked === 0) { + const t0 = performance.now(); + await jsSum(); + const t1 = performance.now(); + await tfSum(); + const t2 = performance.now(); + benchmarked = t1 - t0 < t2 - t1 ? 1 : 2; + } + const res = benchmarked === 1 ? await jsSum() : await tfSum(); + tf.dispose(reduced); + return res; +}; + export async function skip(config, input: Tensor) { if (config.cacheSensitivity === 0) return false; - const resizeFact = 32; - if (!input.shape[1] || !input.shape[2]) return false; - const reduced: Tensor = tf.image.resizeBilinear(input, [Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]); - - // use tensor sum - /* - const sumT = this.tf.sum(reduced); - const sum = await sumT.data()[0] as number; - sumT.dispose(); - */ - // use js loop sum, faster than uploading tensor to gpu calculating and downloading back - const reducedData = await reduced.data(); // raw image rgb array - tf.dispose(reduced); - let sum = 0; - for (let i = 0; i < reducedData.length / 3; i++) sum += reducedData[3 * i + 2]; // look only at green value of each pixel - + const sum = await checksum(input); const diff = 100 * (Math.max(sum, lastInputSum) / Math.min(sum, lastInputSum) - 1); lastInputSum = sum; // if previous frame was skipped, skip this frame if changed more than cacheSensitivity // if previous frame was not skipped, then look for cacheSensitivity or difference larger than one in previous frame to avoid resetting cache in subsequent frames unnecessarily - const skipFrame = diff < Math.max(config.cacheSensitivity, lastCacheDiff); + let skipFrame = diff < Math.max(config.cacheSensitivity, lastCacheDiff); // if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images lastCacheDiff = diff > 10 * config.cacheSensitivity ? 0 : diff; - // console.log('skipFrame', skipFrame, this.config.cacheSensitivity, diff); + skipFrame = skipFrame && (lastCacheDiff > 0); // if no cached diff value then force no skip return skipFrame; } diff --git a/src/util/box.ts b/src/util/box.ts index ed081e2f..c0bcdb3c 100644 --- a/src/util/box.ts +++ b/src/util/box.ts @@ -21,8 +21,13 @@ export function square(keypoints: Array, outputSize: [number, number] = [ } export function scale(box: Box, scaleFact: number) { - const dist = [box[2] * (scaleFact - 1), box[3] * (scaleFact - 1)]; - const newBox: Box = [box[0] - dist[0] / 2, box[1] - dist[1] / 2, box[2] + dist[0], box[3] + dist[0]]; + const dist = [box[2] * scaleFact, box[3] * scaleFact]; + const newBox: Box = [ + box[0] - (dist[0] - box[2]) / 2, + box[1] - (dist[1] - box[3]) / 2, + dist[0], + dist[1], + ]; return newBox; } diff --git a/src/util/env.ts b/src/util/env.ts index 3ff063a3..88d3319a 100644 --- a/src/util/env.ts +++ b/src/util/env.ts @@ -1,39 +1,57 @@ import * as tf from '../../dist/tfjs.esm.js'; import * as image from '../image/image'; -import { mergeDeep, log } from './util'; +import { mergeDeep } from './util'; export type Env = { + /** Running in Browser */ browser: undefined | boolean, + /** Running in NodeJS */ node: undefined | boolean, + /** Running in WebWorker thread */ worker: undefined | boolean, + /** Detected platform */ platform: undefined | string, + /** Detected agent */ agent: undefined | string, + /** List of supported backends */ backends: string[], + /** Has any work been performed so far */ initial: boolean, + /** Are image filters supported? */ + filter: undefined | boolean, + /** TFJS instance details */ tfjs: { version: undefined | string, }, + /** Is offscreenCanvas supported? */ offscreen: undefined | boolean, + /** WASM detected capabilities */ wasm: { supported: undefined | boolean, backend: undefined | boolean, simd: undefined | boolean, multithread: undefined | boolean, }, + /** WebGL detected capabilities */ webgl: { supported: undefined | boolean, backend: undefined | boolean, version: undefined | string, renderer: undefined | string, }, + /** WebGPU detected capabilities */ webgpu: { supported: undefined | boolean, backend: undefined | boolean, adapter: undefined | string, }, + /** List of supported kernels for current backend */ kernels: string[], + /** MonkeyPatch for Canvas */ Canvas: undefined, + /** MonkeyPatch for Image */ Image: undefined, + /** MonkeyPatch for ImageData */ ImageData: undefined, } @@ -47,6 +65,7 @@ export let env: Env = { initial: true, backends: [], offscreen: undefined, + filter: undefined, tfjs: { version: undefined, }, @@ -144,12 +163,14 @@ export async function get() { env.agent = env.agent.replace(/ /g, ' '); // chrome offscreencanvas gpu memory leak + /* const isChrome = env.agent.match(/Chrome\/.[0-9]/g); const verChrome = isChrome && isChrome[0] ? isChrome[0].split('/')[1] : 0; if (verChrome > 0 && verChrome > 92 && verChrome < 96) { log('disabling offscreenCanvas due to browser error:', isChrome ? isChrome[0] : 'unknown'); env.offscreen = false; } + */ } } else if (typeof process !== 'undefined') { env.platform = `${process.platform} ${process.arch}`;