diff --git a/README.md b/README.md index 4c67864f..82565dd5 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,9 @@ JavaScript module using TensorFlow/JS Machine Learning library
-Check out [**Live Demo**](https://vladmandic.github.io/human/demo/index.html) app for processing of live WebCam video or static images +*Check out [**Simple Live Demo**](https://vladmandic.github.io/human/demo/typescript/index.html) fully annotated app as a good start starting point ([html](https://github.com/vladmandic/human/blob/main/demo/typescript/index.html))([code](https://github.com/vladmandic/human/blob/main/demo/typescript/index.ts))* + +*Check out [**Main Live Demo**](https://vladmandic.github.io/human/demo/index.html) app for advanced processing of of webcam, video stream or images static images with all possible tunable options* - To start video detection, simply press *Play* - To process images, simply drag & drop in your Browser window @@ -38,6 +40,7 @@ Check out [**Live Demo**](https://vladmandic.github.io/human/demo/index.html) ap - [**List of all Demo applications**](https://github.com/vladmandic/human/wiki/Demos) - [*Live:* **Main Application**](https://vladmandic.github.io/human/demo/index.html) +- [*Live:* **Simple Application**](https://vladmandic.github.io/human/demo/typescript/index.html) - [*Live:* **Face Extraction, Description, Identification and Matching**](https://vladmandic.github.io/human/demo/facematch/index.html) - [*Live:* **Face Extraction and 3D Rendering**](https://vladmandic.github.io/human/demo/face3d/index.html) - [*Live:* **Multithreaded Detection Showcasing Maximum Performance**](https://vladmandic.github.io/human/demo/multithread/index.html) diff --git a/TODO.md b/TODO.md index bdb68061..b0ccdfd3 100644 --- a/TODO.md +++ b/TODO.md @@ -18,10 +18,6 @@ ## Known Issues -### Type Definitions -- `tfjs.esm.d.ts` missing namespace `OptimizerConstructors` -- exports from `match` are marked as private - #### WebGPU Experimental support only until support is officially added in Chromium @@ -43,15 +39,4 @@ MoveNet MultiPose model does not work with WASM backend due to missing F32 broad - Backend WASM missing F32 broadcat implementation -### Object Detection - -Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS - -- Backend WASM missing kernel op `Mod` - -- Backend WASM missing kernel op `SparseToDense` - -


- -> const mod = (a, b) => tf.sub(a, tf.mul(tf.div(a, tf.scalar(b, 'int32')), tf.scalar(b, 'int32'))); // modulus op implemented in tf diff --git a/demo/typescript/index.js b/demo/typescript/index.js index 937fd57c..c62960f0 100644 --- a/demo/typescript/index.js +++ b/demo/typescript/index.js @@ -11,8 +11,8 @@ var humanConfig = { }; var human = new Human(humanConfig); human.env["perfadd"] = false; -human.draw.options.font = 'small-caps 24px "Lato"'; -human.draw.options.lineHeight = 24; +human.draw.options.font = 'small-caps 18px "Lato"'; +human.draw.options.lineHeight = 20; var dom = { video: document.getElementById("video"), canvas: document.getElementById("canvas"), diff --git a/demo/typescript/index.ts b/demo/typescript/index.ts index b80b2da9..1fc397ac 100644 --- a/demo/typescript/index.ts +++ b/demo/typescript/index.ts @@ -13,20 +13,20 @@ import Human from '../../dist/human.esm.js'; // equivalent of @vladmandic/human const humanConfig = { // user configuration for human, used to fine-tune behavior modelBasePath: '../../models', - // backend: 'humangl', + // backend: 'webgpu', // async: true, // face: { enabled: false, detector: { rotation: true }, iris: { enabled: false }, description: { enabled: false }, emotion: { enabled: false } }, // body: { enabled: false }, // hand: { enabled: false }, - // object: { enabled: false }, + // object: { enabled: true }, // gesture: { enabled: true }, }; const human = new Human(humanConfig); // create instance of human with overrides from user configuration human.env['perfadd'] = false; // is performance data showing instant or total values -human.draw.options.font = 'small-caps 24px "Lato"'; // set font used to draw labels when using draw methods -human.draw.options.lineHeight = 24; +human.draw.options.font = 'small-caps 18px "Lato"'; // set font used to draw labels when using draw methods +human.draw.options.lineHeight = 20; const dom = { // grab instances of dom objects so we dont have to look them up later video: document.getElementById('video') as HTMLVideoElement, diff --git a/src/body/efficientpose.ts b/src/body/efficientpose.ts index e1bbfcab..2fba724e 100644 --- a/src/body/efficientpose.ts +++ b/src/body/efficientpose.ts @@ -36,12 +36,11 @@ export async function load(config: Config): Promise { function max2d(inputs, minScore) { const [width, height] = inputs.shape; return tf.tidy(() => { - const mod = (a, b) => tf.sub(a, tf.mul(tf.div(a, tf.scalar(b, 'int32')), tf.scalar(b, 'int32'))); // modulus op implemented in tf const reshaped = tf.reshape(inputs, [height * width]); // combine all data const newScore = tf.max(reshaped, 0).dataSync()[0]; // get highest score // inside tf.tidy if (newScore > minScore) { // skip coordinate calculation is score is too low const coordinates = tf.argMax(reshaped, 0); - const x = mod(coordinates, width).dataSync()[0]; // inside tf.tidy + const x = tf.mod(coordinates, width).dataSync()[0]; // inside tf.tidy const y = tf.div(coordinates, tf.scalar(width, 'int32')).dataSync()[0]; // inside tf.tidy return [x, y, newScore]; } diff --git a/src/object/centernet.ts b/src/object/centernet.ts index 600de2b8..1f36ec31 100644 --- a/src/object/centernet.ts +++ b/src/object/centernet.ts @@ -11,7 +11,6 @@ import type { ObjectResult, Box } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import { env } from '../util/env'; -import { fakeOps } from '../tfjs/backend'; let model: GraphModel | null; let inputSize = 0; @@ -22,7 +21,7 @@ let skipped = Number.MAX_SAFE_INTEGER; export async function load(config: Config): Promise { if (env.initial) model = null; if (!model) { - fakeOps(['floormod'], config); + // fakeOps(['floormod'], config); model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath || '')) as unknown as GraphModel; const inputs = Object.values(model.modelSignature['inputs']); inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; @@ -86,7 +85,6 @@ export async function predict(input: Tensor, config: Config): Promise { const outputSize = [input.shape[2], input.shape[1]]; const resize = tf.image.resizeBilinear(input, [inputSize, inputSize]); diff --git a/src/tfjs/backend.ts b/src/tfjs/backend.ts index 83dbed4c..e2ff8fee 100644 --- a/src/tfjs/backend.ts +++ b/src/tfjs/backend.ts @@ -5,6 +5,26 @@ import { env } from '../util/env'; import * as humangl from './humangl'; import * as tf from '../../dist/tfjs.esm.js'; +function registerCustomOps() { + if (!env.kernels.includes('mod')) { + const kernelMod = { + kernelName: 'Mod', + backendName: tf.getBackend(), + kernelFunc: (op) => tf.tidy(() => tf.sub(op.inputs.a, tf.mul(tf.div(op.inputs.a, op.inputs.b), op.inputs.b))), + }; + tf.registerKernel(kernelMod); + } + if (!env.kernels.includes('floormod')) { + const kernelMod = { + kernelName: 'FloorMod', + backendName: tf.getBackend(), + kernelFunc: (op) => tf.tidy(() => tf.floorDiv(op.inputs.a / op.inputs.b) * op.inputs.b + tf.mod(op.inputs.a, op.inputs.b)), + }; + tf.registerKernel(kernelMod); + } + env.updateBackend(); +} + export async function check(instance, force = false) { instance.state = 'backend'; if (force || env.initial || (instance.config.backend && (instance.config.backend.length > 0) && (tf.getBackend() !== instance.config.backend))) { @@ -99,10 +119,12 @@ export async function check(instance, force = false) { // wait for ready tf.enableProdMode(); await tf.ready(); + instance.performance.initBackend = Math.trunc(now() - timeStamp); instance.config.backend = tf.getBackend(); env.updateBackend(); // update env on backend init + registerCustomOps(); } return true; }