From ae9d6caabc345fb94902bf4649943d8e298a59d2 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 27 Mar 2021 10:25:31 -0400 Subject: [PATCH] implement nanodet --- demo/browser.js | 10 ++++++++-- package.json | 4 ++-- src/faceres/faceres.ts | 7 +++---- src/human.ts | 4 +++- src/nanodet/nanodet.ts | 39 +++++++++++++++++++++------------------ src/profile.ts | 24 ++++++++++++++++-------- 6 files changed, 53 insertions(+), 35 deletions(-) diff --git a/demo/browser.js b/demo/browser.js index 934ffe7d..9f442b06 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -8,13 +8,19 @@ import GLBench from './gl-bench.js'; const userConfig = { backend: 'webgl', async: false, + profile: false, warmup: 'full', videoOptimized: true, filter: { enabled: true }, - face: { enabled: false, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } }, + face: { enabled: true, + mesh: { enabled: true }, + iris: { enabled: true }, + description: { enabled: true }, + emotion: { enabled: true }, + }, hand: { enabled: false }, gesture: { enabled: false }, - body: { enabled: true, modelPath: '../models/efficientpose.json' }, + body: { enabled: false, modelPath: '../models/blazepose.json' }, object: { enabled: false }, }; diff --git a/package.json b/package.json index 0e483721..15978773 100644 --- a/package.json +++ b/package.json @@ -56,13 +56,13 @@ "@tensorflow/tfjs-layers": "^3.3.0", "@tensorflow/tfjs-node": "^3.3.0", "@tensorflow/tfjs-node-gpu": "^3.3.0", - "@types/node": "^14.14.36", + "@types/node": "^14.14.37", "@typescript-eslint/eslint-plugin": "^4.19.0", "@typescript-eslint/parser": "^4.19.0", "@vladmandic/pilogger": "^0.2.15", "chokidar": "^3.5.1", "dayjs": "^1.10.4", - "esbuild": "^0.10.1", + "esbuild": "^0.10.2", "eslint": "^7.23.0", "eslint-config-airbnb-base": "^14.2.1", "eslint-plugin-import": "^2.22.1", diff --git a/src/faceres/faceres.ts b/src/faceres/faceres.ts index 4b6c83a6..a5700fda 100644 --- a/src/faceres/faceres.ts +++ b/src/faceres/faceres.ts @@ -110,10 +110,9 @@ export async function predict(image, config) { if (!config.profile) { if (config.face.description.enabled) resT = await model.predict(enhanced); } else { - const profileAge = config.face.description.enabled ? await tf.profile(() => model.predict(enhanced)) : {}; - resT = profileAge.result.clone(); - profileAge.result.dispose(); - profile.run('age', profileAge); + const profileDesc = config.face.description.enabled ? await tf.profile(() => model.predict(enhanced)) : {}; + resT = profileDesc.result; + profile.run('faceres', profileDesc); } tf.dispose(enhanced); diff --git a/src/human.ts b/src/human.ts index 9a914b20..de5c3b10 100644 --- a/src/human.ts +++ b/src/human.ts @@ -292,8 +292,10 @@ export class Human { } this.tf.enableProdMode(); /* debug mode is really too mcuh - tf.enableDebugMode(); + this.tf.enableDebugMode(); */ + this.tf.ENV.set('CHECK_COMPUTATION_FOR_ERRORS', false); + this.tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true); if (this.tf.getBackend() === 'webgl') { if (this.config.deallocate) { log('changing webgl: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate); diff --git a/src/nanodet/nanodet.ts b/src/nanodet/nanodet.ts index 5ffffd52..5529bd4e 100644 --- a/src/nanodet/nanodet.ts +++ b/src/nanodet/nanodet.ts @@ -8,7 +8,6 @@ let last: Array<{}> = []; let skipped = Number.MAX_SAFE_INTEGER; const scaleBox = 2.5; // increase box size -const activateScore = false; export async function load(config) { if (!model) { @@ -28,24 +27,27 @@ async function process(res, inputSize, outputShape, config) { tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704 // find boxes and scores output depending on stride - const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze(); - const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < 80))?.squeeze(); + const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === labels.length))?.squeeze(); + const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < labels.length))?.squeeze(); const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself - const scores = activateScore ? scoresT.exp(1).arraySync() : scoresT.arraySync(); // optionally use exponential scores or just as-is + const scores = scoresT.arraySync(); // optionally use exponential scores or just as-is for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix) for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class - const score = scores[i][j] - (activateScore ? 1 : 0); // get score for current position - if (score > config.object.minConfidence) { + const score = scores[i][j]; // get score for current position + if (score > config.object.minConfidence && j !== 61) { const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1 const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1 const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores - let boxRaw = [ // results normalized to range 0..1 + const [x, y] = [ cx - (scaleBox / strideSize * boxOffset[0]), cy - (scaleBox / strideSize * boxOffset[1]), - cx + (scaleBox / strideSize * boxOffset[2]), - cy + (scaleBox / strideSize * boxOffset[3]), ]; + const [w, h] = [ + cx + (scaleBox / strideSize * boxOffset[2]) - x, + cy + (scaleBox / strideSize * boxOffset[3]) - y, + ]; + let boxRaw = [x, y, w, h]; // results normalized to range 0..1 boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords const box = [ // results normalized to input image pixels boxRaw[0] * outputShape[0], @@ -77,14 +79,16 @@ async function process(res, inputSize, outputShape, config) { // unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered) const nmsBoxes = results.map((a) => a.boxRaw); const nmsScores = results.map((a) => a.score); - const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence); - const nmsIdx = nms.dataSync(); - tf.dispose(nms); + let nmsIdx: any[] = []; + if (nmsBoxes && nmsBoxes.length > 0) { + const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence); + nmsIdx = nms.dataSync(); + tf.dispose(nms); + } // filter & sort results results = results .filter((a, idx) => nmsIdx.includes(idx)) - // @ts-ignore .sort((a, b) => (b.score - a.score)); return results; @@ -103,17 +107,16 @@ export async function predict(image, config) { const outputSize = [image.shape[2], image.shape[1]]; const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false); const norm = resize.div(255); - resize.dispose(); const transpose = norm.transpose([0, 3, 1, 2]); norm.dispose(); + resize.dispose(); let objectT; if (!config.profile) { - if (config.object.enabled) objectT = await model.predict(transpose); + if (config.object.enabled) objectT = await model.executeAsync(transpose); } else { - const profileObject = config.object.enabled ? await tf.profile(() => model.predict(transpose)) : {}; - objectT = profileObject.result.clone(); - profileObject.result.dispose(); + const profileObject = config.object.enabled ? await tf.profile(() => model.executeAsync(transpose)) : {}; + objectT = profileObject.result; profile.run('object', profileObject); } transpose.dispose(); diff --git a/src/profile.ts b/src/profile.ts index 43080f30..cd9101fa 100644 --- a/src/profile.ts +++ b/src/profile.ts @@ -2,23 +2,31 @@ import { log } from './helpers'; export const data = {}; -export function run(name: string, raw: any): void { - if (!raw || !raw.kernels) return; +export function run(modelName: string, profileData: any): void { + if (!profileData || !profileData.kernels) return; const maxResults = 5; - const time = raw.kernels + const time = profileData.kernels .filter((a) => a.kernelTimeMs > 0) .reduce((a, b) => a += b.kernelTimeMs, 0); - const slowest = raw.kernels + const slowest = profileData.kernels .map((a, i) => { a.id = i; return a; }) .filter((a) => a.kernelTimeMs > 0) .sort((a, b) => b.kernelTimeMs - a.kernelTimeMs); - const largest = raw.kernels + const largest = profileData.kernels .map((a, i) => { a.id = i; return a; }) .filter((a) => a.totalBytesSnapshot > 0) .sort((a, b) => b.totalBytesSnapshot - a.totalBytesSnapshot); if (slowest.length > maxResults) slowest.length = maxResults; if (largest.length > maxResults) largest.length = maxResults; - const res = { newBytes: raw.newBytes, newTensors: raw.newTensors, peakBytes: raw.peakBytes, numKernelOps: raw.kernels.length, timeKernelOps: time, slowestKernelOps: slowest, largestKernelOps: largest }; - data[name] = res; - log('Human profiler', name, res); + data[modelName] = { + model: modelName, + newBytes: profileData.newBytes, + newTensors: profileData.newTensors, + peakBytes: profileData.peakBytes, + numKernelOps: profileData.kernels.length, + timeKernelOps: time, + slowestKernelOps: slowest, + largestKernelOps: largest, + }; + log('profiler', modelName, data[modelName]); }