diff --git a/CHANGELOG.md b/CHANGELOG.md index 5424fb62..7f41f6ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,9 @@ Repository: **** ## Changelog -### **HEAD -> main** 2021/03/16 mandic00@live.com +### **HEAD -> main** 2021/03/17 mandic00@live.com +- cleanup ### **1.1.7** 2021/03/16 mandic00@live.com diff --git a/config.ts b/config.ts index 968996d7..0a3e5331 100644 --- a/config.ts +++ b/config.ts @@ -166,4 +166,14 @@ export default { modelPath: '../models/handskeleton.json', }, }, + + object: { + enabled: false, + modelPath: '../models/nanodet.json', + minConfidence: 0.15, // threshold for discarding a prediction + iouThreshold: 0.25, // threshold for deciding whether boxes overlap too much + // in non-maximum suppression + maxResults: 10, // maximum number of objects detected in the input + skipFrames: 13, // how many frames to go without re-running the detector + }, }; diff --git a/demo/browser.js b/demo/browser.js index 30670100..75822e80 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -9,12 +9,14 @@ const userConfig = { backend: 'webgl' }; // add any user configuration overrides const userConfig = { backend: 'wasm', async: false, - warmup: 'face', - videoOptimized: false, + warmup: 'full', + videoOptimized: true, + filter: { enabled: true }, face: { enabled: true, mesh: { enabled: true }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } }, hand: { enabled: false }, gesture: { enabled: false }, body: { enabled: false, modelPath: '../models/blazepose.json' }, + object: { enabled: false }, }; */ @@ -141,6 +143,7 @@ async function drawResults(input) { human.draw.face(canvas, result.face); human.draw.body(canvas, result.body); human.draw.hand(canvas, result.hand); + human.draw.object(canvas, result.object); human.draw.gesture(canvas, result.gesture); await calcSimmilariry(result); @@ -515,6 +518,8 @@ function setupMenu() { menu.models.addHTML('
'); menu.models.addBool('gestures', human.config.gesture, 'enabled', (val) => human.config.gesture.enabled = val); menu.models.addHTML('
'); + menu.models.addBool('object detection', human.config.object, 'enabled', (val) => human.config.object.enabled = val); + menu.models.addHTML('
'); menu.models.addBool('face compare', human.config.face.embedding, 'enabled', (val) => { human.config.face.embedding.enabled = val; original = null; diff --git a/src/draw.ts b/src/draw.ts index dd114eba..9ee6bd75 100644 --- a/src/draw.ts +++ b/src/draw.ts @@ -149,7 +149,7 @@ export async function face(inCanvas, result) { ctx.fillText(labels[i], x + 4, y + 15); } ctx.lineWidth = 1; - if (f.mesh) { + if (f.mesh && f.mesh.length > 0) { if (drawOptions.drawPoints) { for (const pt of f.mesh) point(ctx, pt[0], pt[1], pt[2]); // for (const pt of f.meshRaw) point(ctx, pt[0] * inCanvas.offsetWidth, pt[1] * inCanvas.offsetHeight, pt[2]); @@ -306,12 +306,14 @@ export async function hand(inCanvas, result) { ctx.strokeStyle = drawOptions.color; ctx.fillStyle = drawOptions.color; rect(ctx, h.box[0], h.box[1], h.box[2], h.box[3]); - if (drawOptions.shadowColor && drawOptions.shadowColor !== '') { - ctx.fillStyle = drawOptions.shadowColor; - ctx.fillText('hand', h.box[0] + 3, 1 + h.box[1] + drawOptions.lineHeight, h.box[2]); + if (drawOptions.drawLabels) { + if (drawOptions.shadowColor && drawOptions.shadowColor !== '') { + ctx.fillStyle = drawOptions.shadowColor; + ctx.fillText('hand', h.box[0] + 3, 1 + h.box[1] + drawOptions.lineHeight, h.box[2]); + } + ctx.fillStyle = drawOptions.labelColor; + ctx.fillText('hand', h.box[0] + 2, 0 + h.box[1] + drawOptions.lineHeight, h.box[2]); } - ctx.fillStyle = drawOptions.labelColor; - ctx.fillText('hand', h.box[0] + 2, 0 + h.box[1] + drawOptions.lineHeight, h.box[2]); ctx.stroke(); } if (drawOptions.drawPoints) { @@ -344,6 +346,32 @@ export async function hand(inCanvas, result) { } } +export async function object(inCanvas, result) { + if (!result || !inCanvas) return; + if (!(inCanvas instanceof HTMLCanvasElement)) return; + const ctx = inCanvas.getContext('2d'); + if (!ctx) return; + ctx.lineJoin = 'round'; + ctx.font = drawOptions.font; + for (const h of result) { + if (drawOptions.drawBoxes) { + ctx.strokeStyle = drawOptions.color; + ctx.fillStyle = drawOptions.color; + rect(ctx, h.box[0], h.box[1], h.box[2] - h.box[0], h.box[3] - h.box[1]); + if (drawOptions.drawLabels) { + const label = `${Math.round(100 * h.score)}% ${h.label}`; + if (drawOptions.shadowColor && drawOptions.shadowColor !== '') { + ctx.fillStyle = drawOptions.shadowColor; + ctx.fillText(label, h.box[0] + 3, 1 + h.box[1] + drawOptions.lineHeight, h.box[2]); + } + ctx.fillStyle = drawOptions.labelColor; + ctx.fillText(label, h.box[0] + 2, 0 + h.box[1] + drawOptions.lineHeight, h.box[2]); + } + ctx.stroke(); + } + } +} + export async function canvas(inCanvas, outCanvas) { if (!inCanvas || !outCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement) || !(outCanvas instanceof HTMLCanvasElement)) return; @@ -358,4 +386,5 @@ export async function all(inCanvas, result) { body(inCanvas, result.body); hand(inCanvas, result.hand); gesture(inCanvas, result.gesture); + object(inCanvas, result.object); } diff --git a/src/human.ts b/src/human.ts index e6004bb3..226f259a 100644 --- a/src/human.ts +++ b/src/human.ts @@ -10,6 +10,7 @@ import * as embedding from './embedding/embedding'; import * as posenet from './posenet/posenet'; import * as handpose from './handpose/handpose'; import * as blazepose from './blazepose/blazepose'; +import * as nanodet from './nanodet/nanodet'; import * as gesture from './gesture/gesture'; import * as image from './image'; import * as profile from './profile'; @@ -35,12 +36,12 @@ export type Result = { mesh: Array<[Number, Number, Number]> meshRaw: Array<[Number, Number, Number]> boxRaw: [Number, Number, Number, Number], - annotations: any, + annotations: Array<{ part: String, points: Array<[Number, Number, Number]>[] }>, age: Number, gender: String, genderConfidence: Number, - emotion: String, - embedding: any, + emotion: Array<{ score: Number, emotion: String }>, + embedding: Array, iris: Number, angle: { roll: Number | null, yaw: Number | null, pitch: Number | null }, }>, @@ -52,16 +53,17 @@ export type Result = { presence: Number }>, hand: Array<{ confidence: Number, - box: any, - landmarks: any, - annotations: any, + box: [Number, Number, Number, Number], + landmarks: Array<[Number, Number, Number]>, + annotations: Array<{ part: String, points: Array<[Number, Number, Number]>[] }>, }>, gesture: Array<{ part: String, gesture: String, }>, + object: Array<{ score: Number, strideSize: Number, class: Number, label: String, center: Number[], centerRaw: Number[], box: Number[], boxRaw: Number[] }>, performance: { any }, - canvas: OffscreenCanvas | HTMLCanvasElement, + canvas: OffscreenCanvas | HTMLCanvasElement | null, } export type { default as Config } from '../config'; @@ -100,6 +102,7 @@ export class Human { gender: Model | null, emotion: Model | null, embedding: Model | null, + nanodet: Model | null, }; classes: { facemesh: typeof facemesh; @@ -108,6 +111,7 @@ export class Human { emotion: typeof emotion; body: typeof posenet | typeof blazepose; hand: typeof handpose; + nanodet: typeof nanodet; }; sysinfo: { platform: String, agent: String }; #package: any; @@ -141,6 +145,7 @@ export class Human { gender: null, emotion: null, embedding: null, + nanodet: null, }; // export access to image processing // @ts-ignore @@ -153,6 +158,7 @@ export class Human { emotion, body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose, hand: handpose, + nanodet, }; // include platform info this.sysinfo = sysinfo.info(); @@ -231,6 +237,7 @@ export class Human { this.models.handpose, this.models.posenet, this.models.blazepose, + this.models.nanodet, ] = await Promise.all([ this.models.face || (this.config.face.enabled ? facemesh.load(this.config) : null), this.models.age || ((this.config.face.enabled && this.config.face.age.enabled) ? age.load(this.config) : null), @@ -240,6 +247,7 @@ export class Human { this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null), this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null), this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null), + this.models.nanodet || (this.config.object.enabled ? nanodet.load(this.config) : null), ]); } else { if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config); @@ -250,6 +258,7 @@ export class Human { if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config); if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config); if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config); + if (this.config.object.enabled && !this.models.nanodet) this.models.nanodet = await nanodet.load(this.config); } if (this.#firstRun) { @@ -512,6 +521,7 @@ export class Human { let bodyRes; let handRes; let faceRes; + let objectRes; // run face detection followed by all models that rely on face bounding box: face mesh, age, gender, emotion if (this.config.async) { @@ -552,9 +562,22 @@ export class Human { } this.#analyze('End Hand:'); + // run nanodet + this.#analyze('Start Object:'); + if (this.config.async) { + objectRes = this.config.object.enabled ? nanodet.predict(process.tensor, this.config) : []; + if (this.#perf.object) delete this.#perf.object; + } else { + this.state = 'run:object'; + timeStamp = now(); + objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : []; + this.#perf.object = Math.trunc(now() - timeStamp); + } + this.#analyze('End Object:'); + // if async wait for results if (this.config.async) { - [faceRes, bodyRes, handRes] = await Promise.all([faceRes, bodyRes, handRes]); + [faceRes, bodyRes, handRes, objectRes] = await Promise.all([faceRes, bodyRes, handRes, objectRes]); } process.tensor.dispose(); @@ -572,7 +595,7 @@ export class Human { this.#perf.total = Math.trunc(now() - timeStart); this.state = 'idle'; - resolve({ face: faceRes, body: bodyRes, hand: handRes, gesture: gestureRes, performance: this.#perf, canvas: process.canvas }); + resolve({ face: faceRes, body: bodyRes, hand: handRes, gesture: gestureRes, object: objectRes, performance: this.#perf, canvas: process.canvas }); }); } @@ -644,13 +667,13 @@ export class Human { async warmup(userConfig: Object = {}): Promise { const t0 = now(); if (userConfig) this.config = mergeDeep(this.config, userConfig); - const video = this.config.videoOptimized; + const save = this.config.videoOptimized; this.config.videoOptimized = false; let res; if (typeof createImageBitmap === 'function') res = await this.#warmupBitmap(); else if (typeof Image !== 'undefined') res = await this.#warmupCanvas(); else res = await this.#warmupNode(); - this.config.videoOptimized = video; + this.config.videoOptimized = save; const t1 = now(); if (this.config.debug) log('Warmup', this.config.warmup, Math.round(t1 - t0), 'ms', res); return res; diff --git a/src/nanodet/nanodet.ts b/src/nanodet/nanodet.ts new file mode 100644 index 00000000..562519e6 --- /dev/null +++ b/src/nanodet/nanodet.ts @@ -0,0 +1,124 @@ +import { log } from '../log'; +import * as tf from '../../dist/tfjs.esm.js'; +import * as profile from '../profile'; + +let model; +let last: Array<{}> = []; +let skipped = Number.MAX_SAFE_INTEGER; + +const scaleBox = 2.5; // increase box size +// eslint-disable-next-line max-len +const labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'vehicle', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'animal', 'animal', 'animal', 'animal', 'animal', 'animal', 'animal', 'bear', 'animal', 'animal', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'pastry', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']; + +export async function load(config) { + if (!model) { + model = await tf.loadGraphModel(config.object.modelPath); + // @ts-ignore + model.inputSize = parseInt(Object.values(model.modelSignature['inputs'])[0].tensorShape.dim[2].size); + if (config.debug) log(`load model: ${config.object.modelPath.match(/\/(.*)\./)[1]}`); + } + return model; +} + +async function process(res, inputSize, outputShape, config) { + let results: Array<{ score: Number, strideSize: Number, class: Number, label: String, center: Number[], centerRaw: Number[], box: Number[], boxRaw: Number[] }> = []; + for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects + // find scores, boxes, classes + tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors + const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704 + // find boxes and scores output depending on stride + // log.info('Variation:', strideSize, 'strides', baseSize, 'baseSize'); + const scores = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze(); + const features = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 32))?.squeeze(); + // log.state('Found features tensor:', features?.shape); + // log.state('Found scores tensor:', scores?.shape); + const scoreIdx = scores.argMax(1).dataSync(); // location of highest scores + const scoresMax = scores.max(1).dataSync(); // values of highest scores + const boxesMax = features.reshape([-1, 4, 8]); // reshape [32] to [4,8] where 8 is change of different features inside stride + const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself + for (let i = 0; i < scores.shape[0]; i++) { + if (scoreIdx[i] !== 0 && scoresMax[i] > config.object.minConfidence) { + const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1 + const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1 + const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores + let boxRaw = [ // results normalized to range 0..1 + cx - (scaleBox / strideSize * boxOffset[0]), + cy - (scaleBox / strideSize * boxOffset[1]), + cx + (scaleBox / strideSize * boxOffset[2]), + cy + (scaleBox / strideSize * boxOffset[3]), + ]; + boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords + const box = [ // results normalized to input image pixels + boxRaw[0] * outputShape[0], + boxRaw[1] * outputShape[1], + boxRaw[2] * outputShape[0], + boxRaw[3] * outputShape[1], + ]; + const result = { + score: scoresMax[i], + strideSize, + class: scoreIdx[i] + 1, + label: labels[scoreIdx[i]], + center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)], + centerRaw: [cx, cy], + box: box.map((a) => Math.trunc(a)), + boxRaw, + }; + results.push(result); + } + } + }); + } + // deallocate tensors + res.forEach((t) => tf.dispose(t)); + + // normally nms is run on raw results, but since boxes need to be calculated this way we skip calulcation of + // unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered) + const nmsBoxes = results.map((a) => a.boxRaw); + const nmsScores = results.map((a) => a.score); + const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence); + const nmsIdx = nms.dataSync(); + tf.dispose(nms); + + // filter & sort results + results = results + .filter((a, idx) => nmsIdx.includes(idx)) + // @ts-ignore + .sort((a, b) => (b.score - a.score)); + + return results; +} + +export async function predict(image, config) { + if (!model) return null; + // console.log(skipped, config.object.skipFrames, config.videoOptimized, ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0))); + if ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0)) { + skipped++; + return last; + } + if (config.videoOptimized) skipped = 0; + else skipped = Number.MAX_SAFE_INTEGER; + return new Promise(async (resolve) => { + const outputSize = [image.shape[2], image.shape[1]]; + const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false); + const norm = resize.div(255); + resize.dispose(); + const transpose = norm.transpose([0, 3, 1, 2]); + norm.dispose(); + + let objectT; + if (!config.profile) { + if (config.object.enabled) objectT = await model.predict(transpose); + } else { + const profileObject = config.object.enabled ? await tf.profile(() => model.predict(transpose)) : {}; + objectT = profileObject.result.clone(); + profileObject.result.dispose(); + profile.run('object', profileObject); + } + transpose.dispose(); + + const obj = await process(objectT, model.inputSize, outputSize, config); + last = obj; + resolve(obj); + }); +} diff --git a/wiki b/wiki index c4a3b6f1..5c012ed4 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit c4a3b6f1f99cb1723ebd32be702e52d44276169d +Subproject commit 5c012ed4cccd0efd6cad88a5b7346d1a29176954