From cb461be4867c700ec2ba87cc8d1c5dc684ff5a91 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 6 Nov 2021 10:21:51 -0400 Subject: [PATCH] new frame change detection algorithm --- .build.json | 2 +- CHANGELOG.md | 5 +-- TODO.md | 31 +++++++------ demo/facematch/facematch.js | 34 ++++++++------- demo/index.js | 1 + demo/typescript/index.js | 7 +-- demo/typescript/index.ts | 6 +-- package.json | 2 +- src/config.ts | 8 +++- src/face/facemesh.ts | 8 +++- src/face/faceres.ts | 6 +-- src/gesture/gesture.ts | 4 +- src/human.ts | 4 +- src/image/enhance.ts | 16 ++++--- src/image/image.ts | 75 ++++++++++++++++++++------------ src/segmentation/segmentation.ts | 4 +- src/tfjs/backend.ts | 6 ++- src/tfjs/humangl.ts | 12 +++-- test/test-main.js | 4 +- tfjs/tf-browser.ts | 2 +- wiki | 2 +- 21 files changed, 142 insertions(+), 97 deletions(-) diff --git a/.build.json b/.build.json index 344ce9b0..927f3c8d 100644 --- a/.build.json +++ b/.build.json @@ -126,7 +126,7 @@ "format": "iife", "input": "src/human.ts", "output": "dist/human.js", - "minify": true, + "minify": false, "globalName": "Human", "external": ["fs", "os", "buffer", "util"] }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f7594d1..69888f41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,10 @@ ### **HEAD -> main** 2021/11/05 mandic00@live.com +- add histogram equalization - implement wasm missing ops - performance and memory optimizations - -### **origin/main** 2021/11/04 mandic00@live.com - +- fix react compatibility issues - improve box rescaling for all modules - improve precision using wasm backend - refactor predict with execute diff --git a/TODO.md b/TODO.md index 07003f19..f1624a7c 100644 --- a/TODO.md +++ b/TODO.md @@ -41,17 +41,20 @@ MoveNet MultiPose model does not work with WASM backend due to missing F32 broad ### Pending release -- Supports all modules on all backends - via custom implementation of missing kernel ops -- Performance and precision improvements - **face** and **hand** modules -- Use custom built TFJS for bundled version - reduced bundle size and built-in support for all backends - `nobundle` and `node` versions link to standard `@tensorflow` packages -- Add optional input histogram equalization - auto-level input for optimal brightness/contrast via `config.filter.equalization` -- Fix **ReactJS** compatibility -- Better precision using **WASM** - Previous issues due to math low-precision in WASM implementation -- Full **TS** type definitions for all modules and imports -- Focus on simplified demo +- Supports all modules on all backends + via custom implementation of missing kernel ops +- New frame change detection algorithm used for cache determination + based on temporal input difference +- New optional input histogram equalization + auto-level input for optimal brightness/contrast via `config.filter.equalization` +- Performance and precision improvements + **face**, **hand** and **gestures** modules +- Use custom built TFJS for bundled version + reduced bundle size and built-in support for all backends + `nobundle` and `node` versions link to standard `@tensorflow` packages +- Fix **ReactJS** compatibility +- Better precision using **WASM** + Previous issues due to math low-precision in WASM implementation +- Full **TS** type definitions for all modules and imports +- Focus on simplified demo + diff --git a/demo/facematch/facematch.js b/demo/facematch/facematch.js index 3620297d..09cdc089 100644 --- a/demo/facematch/facematch.js +++ b/demo/facematch/facematch.js @@ -9,25 +9,28 @@ import Human from '../../dist/human.esm.js'; const userConfig = { - backend: 'wasm', - async: false, + backend: 'humangl', + async: true, warmup: 'none', cacheSensitivity: 0, debug: true, modelBasePath: '../../models/', + deallocate: true, + filter: { + enabled: true, + equalization: true, + }, face: { enabled: true, detector: { rotation: true, return: true, maxDetected: 50 }, mesh: { enabled: true }, - embedding: { enabled: false }, - iris: { enabled: true }, + iris: { enabled: false }, emotion: { enabled: true }, description: { enabled: true }, }, hand: { enabled: false }, - gesture: { enabled: true }, + gesture: { enabled: false }, body: { enabled: false }, - filter: { enabled: true }, segmentation: { enabled: false }, }; @@ -73,9 +76,7 @@ async function SelectFaceCanvas(face) { const squeeze = human.tf.squeeze(enhanced); const normalize = human.tf.div(squeeze, 255); await human.tf.browser.toPixels(normalize, c); - human.tf.dispose(enhanced); - human.tf.dispose(squeeze); - human.tf.dispose(normalize); + human.tf.dispose([enhanced, squeeze, normalize]); const ctx = c.getContext('2d'); ctx.font = 'small-caps 0.4rem "Lato"'; ctx.fillStyle = 'rgba(255, 255, 255, 1)'; @@ -134,7 +135,7 @@ async function SelectFaceCanvas(face) { title('Selected Face'); } -async function AddFaceCanvas(index, res, fileName) { +function AddFaceCanvas(index, res, fileName) { all[index] = res.face; let ok = false; for (const i in res.face) { @@ -161,7 +162,7 @@ async function AddFaceCanvas(index, res, fileName) { }); // if we actually got face image tensor, draw canvas with that face if (res.face[i].tensor) { - await human.tf.browser.toPixels(res.face[i].tensor, canvas); + human.tf.browser.toPixels(res.face[i].tensor, canvas); document.getElementById('faces').appendChild(canvas); const ctx = canvas.getContext('2d'); if (!ctx) return false; @@ -169,7 +170,7 @@ async function AddFaceCanvas(index, res, fileName) { ctx.fillStyle = 'rgba(255, 255, 255, 1)'; ctx.fillText(`${res.face[i].age}y ${(100 * (res.face[i].genderScore || 0)).toFixed(1)}% ${res.face[i].gender}`, 4, canvas.height - 6); const arr = db.map((rec) => rec.embedding); - const result = await human.match(res.face[i].embedding, arr); + const result = human.match(res.face[i].embedding, arr); ctx.font = 'small-caps 1rem "Lato"'; if (result.similarity && res.similarity > minScore) ctx.fillText(`${(100 * result.similarity).toFixed(1)}% ${db[result.index].name}`, 4, canvas.height - 30); } @@ -184,7 +185,7 @@ async function AddImageElement(index, image, length) { const img = new Image(128, 128); img.onload = () => { // must wait until image is loaded human.detect(img, userConfig).then(async (res) => { - const ok = await AddFaceCanvas(index, res, image); // then wait until image is analyzed + const ok = AddFaceCanvas(index, res, image); // then wait until image is analyzed // log('Add image:', index + 1, image, 'faces:', res.face.length); if (ok) document.getElementById('images').appendChild(img); // and finally we can add it resolve(true); @@ -199,7 +200,7 @@ async function AddImageElement(index, image, length) { }); } -async function createFaceMatchDB() { +function createFaceMatchDB() { log('Creating Faces DB...'); for (const image of all) { for (const face of image) db.push({ name: 'unknown', source: face.fileName, embedding: face.embedding }); @@ -246,6 +247,9 @@ async function main() { // images = ['/samples/in/solvay1927.jpg']; // download and analyze all images + // const promises = []; + // for (let i = 0; i < images.length; i++) promises.push(AddImageElement(i, images[i], images.length)); + // await Promise.all(promises); for (let i = 0; i < images.length; i++) await AddImageElement(i, images[i], images.length); // print stats @@ -254,7 +258,7 @@ async function main() { log(human.tf.engine().memory()); // if we didn't download db, generate it from current faces - if (!db || db.length === 0) await createFaceMatchDB(); + if (!db || db.length === 0) createFaceMatchDB(); title(''); log('Ready'); diff --git a/demo/index.js b/demo/index.js index 068054c9..bb21bff9 100644 --- a/demo/index.js +++ b/demo/index.js @@ -712,6 +712,7 @@ function setupMenu() { menu.image = new Menu(document.body, '', { top, left: x[1] }); menu.image.addBool('enabled', userConfig.filter, 'enabled', (val) => userConfig.filter.enabled = val); + menu.image.addBool('histogram equalization', userConfig.filter, 'equalization', (val) => userConfig.filter.equalization = val); ui.menuWidth = menu.image.addRange('image width', userConfig.filter, 'width', 0, 3840, 10, (val) => userConfig.filter.width = parseInt(val)); ui.menuHeight = menu.image.addRange('image height', userConfig.filter, 'height', 0, 2160, 10, (val) => userConfig.filter.height = parseInt(val)); menu.image.addHTML('
'); diff --git a/demo/typescript/index.js b/demo/typescript/index.js index c62960f0..062894b3 100644 --- a/demo/typescript/index.js +++ b/demo/typescript/index.js @@ -7,7 +7,8 @@ // demo/typescript/index.ts import Human from "../../dist/human.esm.js"; var humanConfig = { - modelBasePath: "../../models" + modelBasePath: "../../models", + filter: { equalization: false } }; var human = new Human(humanConfig); human.env["perfadd"] = false; @@ -79,8 +80,8 @@ async function drawLoop() { setTimeout(drawLoop, 30); } async function main() { - log("human version:", human.version, "tfjs version:", human.tf.version_core); - log("platform:", human.env.platform, "agent:", human.env.agent); + log("human version:", human.version, "| tfjs version:", human.tf.version_core); + log("platform:", human.env.platform, "| agent:", human.env.agent); status("loading..."); await human.load(); log("backend:", human.tf.getBackend(), "| available:", human.env.backends); diff --git a/demo/typescript/index.ts b/demo/typescript/index.ts index 44c732bc..c85d3495 100644 --- a/demo/typescript/index.ts +++ b/demo/typescript/index.ts @@ -13,7 +13,7 @@ import Human from '../../dist/human.esm.js'; // equivalent of @vladmandic/human const humanConfig = { // user configuration for human, used to fine-tune behavior modelBasePath: '../../models', - filter: { equalization: true }, + filter: { equalization: false }, // backend: 'webgpu', // async: true, // face: { enabled: false, detector: { rotation: true }, iris: { enabled: false }, description: { enabled: false }, emotion: { enabled: false } }, @@ -99,8 +99,8 @@ async function drawLoop() { // main screen refresh loop } async function main() { // main entry point - log('human version:', human.version, 'tfjs version:', human.tf.version_core); - log('platform:', human.env.platform, 'agent:', human.env.agent); + log('human version:', human.version, '| tfjs version:', human.tf.version_core); + log('platform:', human.env.platform, '| agent:', human.env.agent); status('loading...'); await human.load(); // preload all models log('backend:', human.tf.getBackend(), '| available:', human.env.backends); diff --git a/package.json b/package.json index 6c619094..3fa69a9a 100644 --- a/package.json +++ b/package.json @@ -74,7 +74,7 @@ "canvas": "^2.8.0", "dayjs": "^1.10.7", "esbuild": "^0.13.12", - "eslint": "8.1.0", + "eslint": "8.2.0", "eslint-config-airbnb-base": "^14.2.1", "eslint-plugin-html": "^6.2.0", "eslint-plugin-import": "^2.25.2", diff --git a/src/config.ts b/src/config.ts index e89c9601..2486348a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -122,7 +122,9 @@ export interface SegmentationConfig extends GenericConfig { export interface FilterConfig { /** @property are image filters enabled? */ enabled: boolean, - /** @property perform image histogram equalization */ + /** @property perform image histogram equalization + * - equalization is performed on input as a whole and detected face before its passed for further analysis + */ equalization: boolean, /** resize input width * - if both width and height are set to 0, there is no resizing @@ -229,6 +231,9 @@ export interface Config { */ cacheSensitivity: number; + /** Perform immediate garbage collection on deallocated tensors instead of caching them */ + deallocate: boolean; + /** Internal Variable */ skipAllowed: boolean; @@ -264,6 +269,7 @@ const config: Config = { warmup: 'full', cacheSensitivity: 0.70, skipAllowed: false, + deallocate: false, filter: { enabled: true, equalization: false, diff --git a/src/face/facemesh.ts b/src/face/facemesh.ts index 84c62073..ff59794f 100644 --- a/src/face/facemesh.ts +++ b/src/face/facemesh.ts @@ -13,10 +13,11 @@ import * as blazeface from './blazeface'; import * as util from './facemeshutil'; import * as coords from './facemeshcoords'; import * as iris from './iris'; +import { histogramEqualization } from '../image/enhance'; +import { env } from '../util/env'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { FaceResult, Point } from '../result'; import type { Config } from '../config'; -import { env } from '../util/env'; type BoxCache = { startPoint: Point, endPoint: Point, landmarks: Array, confidence: number }; let boxCache: Array = []; @@ -73,6 +74,11 @@ export async function predict(input: Tensor, config: Config): Promise { // leaning const leftShoulder = res[i].keypoints.find((a) => (a.part === 'leftShoulder')); const rightShoulder = res[i].keypoints.find((a) => (a.part === 'rightShoulder')); - if (leftShoulder && rightShoulder) gestures.push({ body: i, gesture: `leaning ${(leftShoulder.position[1] > rightShoulder.position[1]) ? 'left' : 'right'}` }); + if (leftShoulder && rightShoulder && Math.abs(leftShoulder.positionRaw[1] - rightShoulder.positionRaw[1]) > 0.1) { + gestures.push({ body: i, gesture: `leaning ${(leftShoulder.position[1] > rightShoulder.position[1]) ? 'left' : 'right'}` }); + } } return gestures; }; diff --git a/src/human.ts b/src/human.ts index 69665146..72a168ca 100644 --- a/src/human.ts +++ b/src/human.ts @@ -405,7 +405,7 @@ export class Human { timeStamp = now(); this.state = 'image'; - const img = image.process(input, this.config) as { canvas: HTMLCanvasElement | OffscreenCanvas, tensor: Tensor }; + const img = await image.process(input, this.config) as { canvas: HTMLCanvasElement | OffscreenCanvas, tensor: Tensor }; this.process = img; this.performance.inputProcess = this.env.perfadd ? (this.performance.inputProcess || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); this.analyze('Get Image:'); @@ -423,7 +423,7 @@ export class Human { if (!this.performance.cachedFrames) this.performance.cachedFrames = 0; (this.performance.totalFrames as number)++; if (this.config.skipAllowed) this.performance.cachedFrames++; - this.performance.inputCheck = this.env.perfadd ? (this.performance.inputCheck || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); + this.performance.cacheCheck = this.env.perfadd ? (this.performance.cacheCheck || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); this.analyze('Check Changed:'); // prepare where to store model results diff --git a/src/image/enhance.ts b/src/image/enhance.ts index 8fb55782..4df16f54 100644 --- a/src/image/enhance.ts +++ b/src/image/enhance.ts @@ -5,16 +5,20 @@ import * as tf from '../../dist/tfjs.esm.js'; import type { Tensor } from '../exports'; -export function histogramEqualization(input: Tensor): Tensor { - const channels = tf.split(input, 3, 2); +export async function histogramEqualization(inputImage: Tensor): Promise { + // const maxValue = 254; // using 255 results in values slightly larger than 1 due to math rounding errors + const squeeze = inputImage.shape.length === 4 ? tf.squeeze(inputImage) : inputImage; + const channels = tf.split(squeeze, 3, 2); const min: Tensor[] = [tf.min(channels[0]), tf.min(channels[1]), tf.min(channels[2])]; const max: Tensor[] = [tf.max(channels[0]), tf.max(channels[1]), tf.max(channels[2])]; + const absMax = await Promise.all(max.map((channel) => channel.data())); + const maxValue = 0.99 * Math.max(absMax[0][0], absMax[1][0], absMax[2][0]); const sub = [tf.sub(channels[0], min[0]), tf.sub(channels[1], min[1]), tf.sub(channels[2], min[2])]; const range = [tf.sub(max[0], min[0]), tf.sub(max[1], min[1]), tf.sub(max[2], min[2])]; - const fact = [tf.div(255, range[0]), tf.div(255, range[1]), tf.div(255, range[2])]; + const fact = [tf.div(maxValue, range[0]), tf.div(maxValue, range[1]), tf.div(maxValue, range[2])]; const enh = [tf.mul(sub[0], fact[0]), tf.mul(sub[1], fact[1]), tf.mul(sub[2], fact[2])]; const rgb = tf.stack([enh[0], enh[1], enh[2]], 2); - const reshape = tf.reshape(rgb, [1, input.shape[0], input.shape[1], 3]); - tf.dispose([...channels, ...min, ...max, ...sub, ...range, ...fact, ...enh, rgb]); - return reshape; + const reshape = tf.reshape(rgb, [1, squeeze.shape[0], squeeze.shape[1], 3]); + tf.dispose([...channels, ...min, ...max, ...sub, ...range, ...fact, ...enh, rgb, squeeze]); + return reshape; // output shape is [1, height, width, 3] } diff --git a/src/image/image.ts b/src/image/image.ts index ad8f53b6..0278df9d 100644 --- a/src/image/image.ts +++ b/src/image/image.ts @@ -6,7 +6,7 @@ import * as tf from '../../dist/tfjs.esm.js'; import * as fxImage from './imagefx'; import type { Input, AnyCanvas, Tensor, Config } from '../exports'; import { env } from '../util/env'; -import { log, now } from '../util/util'; +import { log } from '../util/util'; import * as enhance from './enhance'; const maxSize = 2048; @@ -17,6 +17,13 @@ let tmpCanvas: AnyCanvas | null = null; // use global variable to avoid recreati // @ts-ignore // imagefx is js module that should be converted to a class let fx: fxImage.GLImageFilter | null; // instance of imagefx +const last: { inputSum: number, cacheDiff: number, sumMethod: number, inputTensor: undefined | Tensor } = { + inputSum: 0, + cacheDiff: 1, + sumMethod: 0, + inputTensor: undefined, +}; + export function canvas(width, height): AnyCanvas { let c; if (env.browser) { // browser defines canvas object @@ -48,7 +55,7 @@ export function copy(input: AnyCanvas, output?: AnyCanvas) { // process input image and return tensor // input can be tensor, imagedata, htmlimageelement, htmlvideoelement // input is resized and run through imagefx filter -export function process(input: Input, config: Config, getTensor: boolean = true): { tensor: Tensor | null, canvas: AnyCanvas | null } { +export async function process(input: Input, config: Config, getTensor: boolean = true): Promise<{ tensor: Tensor | null, canvas: AnyCanvas | null }> { if (!input) { // throw new Error('input is missing'); if (config.debug) log('input is missing'); @@ -108,7 +115,7 @@ export function process(input: Input, config: Config, getTensor: boolean = true) if ((config.filter.height || 0) > 0) targetHeight = config.filter.height; else if ((config.filter.width || 0) > 0) targetHeight = originalHeight * ((config.filter.width || 0) / originalWidth); if (!targetWidth || !targetHeight) throw new Error('input cannot determine dimension'); - if (!inCanvas || (inCanvas.width !== targetWidth) || (inCanvas.height !== targetHeight)) inCanvas = canvas(targetWidth, targetHeight); + if (!inCanvas || (inCanvas?.width !== targetWidth) || (inCanvas?.height !== targetHeight)) inCanvas = canvas(targetWidth, targetHeight); // draw input to our canvas const inCtx = inCanvas.getContext('2d') as CanvasRenderingContext2D; @@ -118,14 +125,14 @@ export function process(input: Input, config: Config, getTensor: boolean = true) if (config.filter.flip && typeof inCtx.translate !== 'undefined') { inCtx.translate(originalWidth, 0); inCtx.scale(-1, 1); - inCtx.drawImage(input as AnyCanvas, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas.width, inCanvas.height); + inCtx.drawImage(input as AnyCanvas, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height); inCtx.setTransform(1, 0, 0, 1, 0, 0); // resets transforms to defaults } else { - inCtx.drawImage(input as AnyCanvas, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas.width, inCanvas.height); + inCtx.drawImage(input as AnyCanvas, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas?.width, inCanvas?.height); } } - if (!outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas.height !== outCanvas.height)) outCanvas = canvas(inCanvas.width, inCanvas.height); // init output canvas + if (!outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas?.height !== outCanvas?.height)) outCanvas = canvas(inCanvas.width, inCanvas.height); // init output canvas // imagefx transforms using gl from input canvas to output canvas if (config.filter.enabled && env.webgl.supported) { @@ -192,26 +199,16 @@ export function process(input: Input, config: Config, getTensor: boolean = true) const rgb = tf.slice3d(pixels, [0, 0, 0], [-1, -1, 3]); // strip alpha channel tf.dispose(pixels); pixels = rgb; - /* - const channels = tf.split(pixels, 4, 2); // split rgba to channels - tf.dispose(pixels); - const rgb = tf.stack([channels[0], channels[1], channels[2]], 2); // stack channels back to rgb and ignore alpha - pixels = tf.reshape(rgb, [rgb.shape[0], rgb.shape[1], 3]); // move extra dim from the end of tensor and use it as batch number instead - tf.dispose([rgb, ...channels]); - */ } if (!pixels) throw new Error('cannot create tensor from input'); const casted = tf.cast(pixels, 'float32'); - const tensor = config.filter.equalization ? enhance.histogramEqualization(casted) : tf.expandDims(casted, 0); + const tensor = config.filter.equalization ? await enhance.histogramEqualization(casted) : tf.expandDims(casted, 0); tf.dispose([pixels, casted]); return { tensor, canvas: (config.filter.return ? outCanvas : null) }; } } -let lastInputSum = 0; -let lastCacheDiff = 1; -let benchmarked = 0; - +/* const checksum = async (input: Tensor): Promise => { // use tf sum or js based sum loop depending on which is faster const resizeFact = 48; const reduced: Tensor = tf.image.resizeBilinear(input, [Math.trunc((input.shape[1] || 1) / resizeFact), Math.trunc((input.shape[2] || 1) / resizeFact)]); @@ -227,29 +224,51 @@ const checksum = async (input: Tensor): Promise => { // use tf sum or js for (let i = 0; i < reducedData.length / 3; i++) sum0 += reducedData[3 * i + 2]; // look only at green value of each pixel return sum0; }; - if (benchmarked === 0) { + if (last.sumMethod === 0) { const t0 = now(); await jsSum(); const t1 = now(); await tfSum(); const t2 = now(); - benchmarked = t1 - t0 < t2 - t1 ? 1 : 2; + last.sumMethod = t1 - t0 < t2 - t1 ? 1 : 2; } - const res = benchmarked === 1 ? await jsSum() : await tfSum(); + const res = last.sumMethod === 1 ? await jsSum() : await tfSum(); tf.dispose(reduced); return res; }; +*/ export async function skip(config, input: Tensor) { - if (config.cacheSensitivity === 0) return false; - const sum = await checksum(input); - const diff = 100 * (Math.max(sum, lastInputSum) / Math.min(sum, lastInputSum) - 1); - lastInputSum = sum; + let skipFrame = false; + if (config.cacheSensitivity === 0) return skipFrame; + + /* + const checkSum = await checksum(input); + const diff = 100 * (Math.max(checkSum, last.inputSum) / Math.min(checkSum, last.inputSum) - 1); + last.inputSum = checkSum; // if previous frame was skipped, skip this frame if changed more than cacheSensitivity // if previous frame was not skipped, then look for cacheSensitivity or difference larger than one in previous frame to avoid resetting cache in subsequent frames unnecessarily - let skipFrame = diff < Math.max(config.cacheSensitivity, lastCacheDiff); + let skipFrame = diff < Math.max(config.cacheSensitivity, last.cacheDiff); // if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images - lastCacheDiff = diff > 10 * config.cacheSensitivity ? 0 : diff; - skipFrame = skipFrame && (lastCacheDiff > 0); // if no cached diff value then force no skip + last.cacheDiff = diff > 10 * config.cacheSensitivity ? 0 : diff; + skipFrame = skipFrame && (last.cacheDiff > 0); // if no cached diff value then force no skip + */ + + if (!last.inputTensor) { + last.inputTensor = tf.clone(input); + } else if (last.inputTensor.shape[1] !== input.shape[1] || last.inputTensor.shape[2] !== input.shape[2]) { // input resolution changed + tf.dispose(last.inputTensor); + last.inputTensor = tf.clone(input); + } else { + const t: Record = {}; + t.diff = tf.sub(input, last.inputTensor); + t.squared = tf.mul(t.diff, t.diff); + t.sum = tf.sum(t.squared); + const diffSum = await t.sum.data(); + const diffRelative = diffSum[0] / (input.shape[1] || 1) / (input.shape[2] || 1) / 255 / 3; // squared difference relative to input resolution and averaged per channel + tf.dispose([last.inputTensor, t.diff, t.squared, t.sum]); + last.inputTensor = tf.clone(input); + skipFrame = diffRelative <= config.cacheSensitivity; + } return skipFrame; } diff --git a/src/segmentation/segmentation.ts b/src/segmentation/segmentation.ts index 861258cf..d552a2cc 100644 --- a/src/segmentation/segmentation.ts +++ b/src/segmentation/segmentation.ts @@ -31,7 +31,7 @@ export async function process(input: Input, background: Input | undefined, confi if (busy) return { data: [], canvas: null, alpha: null }; busy = true; if (!model) await load(config); - const inputImage = image.process(input, config); + const inputImage = await image.process(input, config); const width = inputImage.canvas?.width || 0; const height = inputImage.canvas?.height || 0; if (!inputImage.tensor) return { data: [], canvas: null, alpha: null }; @@ -85,7 +85,7 @@ export async function process(input: Input, background: Input | undefined, confi let mergedCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; if (background && compositeCanvas) { // draw background with segmentation as overlay if background is present mergedCanvas = image.canvas(width, height); - const bgImage = image.process(background, config); + const bgImage = await image.process(background, config); tf.dispose(bgImage.tensor); const ctxMerge = mergedCanvas.getContext('2d') as CanvasRenderingContext2D; ctxMerge.drawImage(bgImage.canvas as HTMLCanvasElement, 0, 0, mergedCanvas.width, mergedCanvas.height); diff --git a/src/tfjs/backend.ts b/src/tfjs/backend.ts index e2ff8fee..1f71d44f 100644 --- a/src/tfjs/backend.ts +++ b/src/tfjs/backend.ts @@ -13,6 +13,7 @@ function registerCustomOps() { kernelFunc: (op) => tf.tidy(() => tf.sub(op.inputs.a, tf.mul(tf.div(op.inputs.a, op.inputs.b), op.inputs.b))), }; tf.registerKernel(kernelMod); + env.kernels.push('mod'); } if (!env.kernels.includes('floormod')) { const kernelMod = { @@ -21,8 +22,8 @@ function registerCustomOps() { kernelFunc: (op) => tf.tidy(() => tf.floorDiv(op.inputs.a / op.inputs.b) * op.inputs.b + tf.mod(op.inputs.a, op.inputs.b)), }; tf.registerKernel(kernelMod); + env.kernels.push('floormod'); } - env.updateBackend(); } export async function check(instance, force = false) { @@ -123,8 +124,9 @@ export async function check(instance, force = false) { instance.performance.initBackend = Math.trunc(now() - timeStamp); instance.config.backend = tf.getBackend(); - env.updateBackend(); // update env on backend init + await env.updateBackend(); // update env on backend init registerCustomOps(); + // await env.updateBackend(); // update env on backend init } return true; } diff --git a/src/tfjs/humangl.ts b/src/tfjs/humangl.ts index 5e24db73..907330c0 100644 --- a/src/tfjs/humangl.ts +++ b/src/tfjs/humangl.ts @@ -68,13 +68,11 @@ export async function register(instance): Promise { log('possible browser memory leak using webgl or conflict with multiple backend registrations'); instance.emit('error'); throw new Error('browser webgl error'); - /* - log('resetting humangl backend'); - env.initial = true; - models.reset(instance); - await tf.removeBackend(config.name); - await register(instance); // re-register - */ + // log('resetting humangl backend'); + // env.initial = true; + // models.reset(instance); + // await tf.removeBackend(config.name); + // await register(instance); // re-register }); config.canvas.addEventListener('webglcontextrestored', (e) => { log('error: humangl context restored:', e); diff --git a/test/test-main.js b/test/test-main.js index 146cdb4b..a9d3cbfb 100644 --- a/test/test-main.js +++ b/test/test-main.js @@ -192,7 +192,7 @@ async function test(Human, inputConfig) { else log('state', 'passed: warmup face result match'); config.warmup = 'body'; res = await testWarmup(human, 'default'); - if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 1 || res?.gesture?.length !== 6) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); + if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 1 || res?.gesture?.length !== 5) log('error', 'failed: warmup body result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); else log('state', 'passed: warmup body result match'); log('state', 'details:', { face: { boxScore: res.face[0].boxScore, faceScore: res.face[0].faceScore, age: res.face[0].age, gender: res.face[0].gender, genderScore: res.face[0].genderScore }, @@ -278,7 +278,7 @@ async function test(Human, inputConfig) { config.body = { minConfidence: 0.0001 }; config.hand = { minConfidence: 0.0001 }; res = await testDetect(human, 'samples/in/ai-body.jpg', 'default'); - if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 2 || res?.gesture?.length !== 8) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); + if (!res || res?.face?.length !== 1 || res?.body?.length !== 1 || res?.hand?.length !== 2 || res?.gesture?.length !== 7) log('error', 'failed: sensitive result mismatch', res?.face?.length, res?.body?.length, res?.hand?.length, res?.gesture?.length); else log('state', 'passed: sensitive result match'); // test sensitive details face diff --git a/tfjs/tf-browser.ts b/tfjs/tf-browser.ts index da8fc890..5a069b9c 100644 --- a/tfjs/tf-browser.ts +++ b/tfjs/tf-browser.ts @@ -5,7 +5,7 @@ // export all from build bundle export * from '@tensorflow/tfjs/dist/index.js'; -// export * from '@tensorflow/tfjs-backend-webgl/dist/index.js'; +export * from '@tensorflow/tfjs-backend-webgl/dist/index.js'; // export * from '@tensorflow/tfjs-backend-wasm/dist/index.js'; // add webgpu to bundle, experimental diff --git a/wiki b/wiki index 0deb501c..e5a6342e 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 0deb501cf47e1783e8ca4426b7bf4697196f09e2 +Subproject commit e5a6342e4e2dd5d79b73cafada222ef4b1d1621a