implement nanodet

pull/293/head
Vladimir Mandic 2021-03-27 10:25:31 -04:00
parent 1dd860d112
commit ae9d6caabc
6 changed files with 53 additions and 35 deletions

View File

@ -8,13 +8,19 @@ import GLBench from './gl-bench.js';
const userConfig = {
backend: 'webgl',
async: false,
profile: false,
warmup: 'full',
videoOptimized: true,
filter: { enabled: true },
face: { enabled: false, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
face: { enabled: true,
mesh: { enabled: true },
iris: { enabled: true },
description: { enabled: true },
emotion: { enabled: true },
},
hand: { enabled: false },
gesture: { enabled: false },
body: { enabled: true, modelPath: '../models/efficientpose.json' },
body: { enabled: false, modelPath: '../models/blazepose.json' },
object: { enabled: false },
};

View File

@ -56,13 +56,13 @@
"@tensorflow/tfjs-layers": "^3.3.0",
"@tensorflow/tfjs-node": "^3.3.0",
"@tensorflow/tfjs-node-gpu": "^3.3.0",
"@types/node": "^14.14.36",
"@types/node": "^14.14.37",
"@typescript-eslint/eslint-plugin": "^4.19.0",
"@typescript-eslint/parser": "^4.19.0",
"@vladmandic/pilogger": "^0.2.15",
"chokidar": "^3.5.1",
"dayjs": "^1.10.4",
"esbuild": "^0.10.1",
"esbuild": "^0.10.2",
"eslint": "^7.23.0",
"eslint-config-airbnb-base": "^14.2.1",
"eslint-plugin-import": "^2.22.1",

View File

@ -110,10 +110,9 @@ export async function predict(image, config) {
if (!config.profile) {
if (config.face.description.enabled) resT = await model.predict(enhanced);
} else {
const profileAge = config.face.description.enabled ? await tf.profile(() => model.predict(enhanced)) : {};
resT = profileAge.result.clone();
profileAge.result.dispose();
profile.run('age', profileAge);
const profileDesc = config.face.description.enabled ? await tf.profile(() => model.predict(enhanced)) : {};
resT = profileDesc.result;
profile.run('faceres', profileDesc);
}
tf.dispose(enhanced);

View File

@ -292,8 +292,10 @@ export class Human {
}
this.tf.enableProdMode();
/* debug mode is really too mcuh
tf.enableDebugMode();
this.tf.enableDebugMode();
*/
this.tf.ENV.set('CHECK_COMPUTATION_FOR_ERRORS', false);
this.tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
if (this.tf.getBackend() === 'webgl') {
if (this.config.deallocate) {
log('changing webgl: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate);

View File

@ -8,7 +8,6 @@ let last: Array<{}> = [];
let skipped = Number.MAX_SAFE_INTEGER;
const scaleBox = 2.5; // increase box size
const activateScore = false;
export async function load(config) {
if (!model) {
@ -28,24 +27,27 @@ async function process(res, inputSize, outputShape, config) {
tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors
const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704
// find boxes and scores output depending on stride
const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze();
const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < 80))?.squeeze();
const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === labels.length))?.squeeze();
const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < labels.length))?.squeeze();
const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride
const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself
const scores = activateScore ? scoresT.exp(1).arraySync() : scoresT.arraySync(); // optionally use exponential scores or just as-is
const scores = scoresT.arraySync(); // optionally use exponential scores or just as-is
for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)
for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class
const score = scores[i][j] - (activateScore ? 1 : 0); // get score for current position
if (score > config.object.minConfidence) {
const score = scores[i][j]; // get score for current position
if (score > config.object.minConfidence && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores
let boxRaw = [ // results normalized to range 0..1
const [x, y] = [
cx - (scaleBox / strideSize * boxOffset[0]),
cy - (scaleBox / strideSize * boxOffset[1]),
cx + (scaleBox / strideSize * boxOffset[2]),
cy + (scaleBox / strideSize * boxOffset[3]),
];
const [w, h] = [
cx + (scaleBox / strideSize * boxOffset[2]) - x,
cy + (scaleBox / strideSize * boxOffset[3]) - y,
];
let boxRaw = [x, y, w, h]; // results normalized to range 0..1
boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords
const box = [ // results normalized to input image pixels
boxRaw[0] * outputShape[0],
@ -77,14 +79,16 @@ async function process(res, inputSize, outputShape, config) {
// unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered)
const nmsBoxes = results.map((a) => a.boxRaw);
const nmsScores = results.map((a) => a.score);
const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence);
const nmsIdx = nms.dataSync();
tf.dispose(nms);
let nmsIdx: any[] = [];
if (nmsBoxes && nmsBoxes.length > 0) {
const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence);
nmsIdx = nms.dataSync();
tf.dispose(nms);
}
// filter & sort results
results = results
.filter((a, idx) => nmsIdx.includes(idx))
// @ts-ignore
.sort((a, b) => (b.score - a.score));
return results;
@ -103,17 +107,16 @@ export async function predict(image, config) {
const outputSize = [image.shape[2], image.shape[1]];
const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);
const norm = resize.div(255);
resize.dispose();
const transpose = norm.transpose([0, 3, 1, 2]);
norm.dispose();
resize.dispose();
let objectT;
if (!config.profile) {
if (config.object.enabled) objectT = await model.predict(transpose);
if (config.object.enabled) objectT = await model.executeAsync(transpose);
} else {
const profileObject = config.object.enabled ? await tf.profile(() => model.predict(transpose)) : {};
objectT = profileObject.result.clone();
profileObject.result.dispose();
const profileObject = config.object.enabled ? await tf.profile(() => model.executeAsync(transpose)) : {};
objectT = profileObject.result;
profile.run('object', profileObject);
}
transpose.dispose();

View File

@ -2,23 +2,31 @@ import { log } from './helpers';
export const data = {};
export function run(name: string, raw: any): void {
if (!raw || !raw.kernels) return;
export function run(modelName: string, profileData: any): void {
if (!profileData || !profileData.kernels) return;
const maxResults = 5;
const time = raw.kernels
const time = profileData.kernels
.filter((a) => a.kernelTimeMs > 0)
.reduce((a, b) => a += b.kernelTimeMs, 0);
const slowest = raw.kernels
const slowest = profileData.kernels
.map((a, i) => { a.id = i; return a; })
.filter((a) => a.kernelTimeMs > 0)
.sort((a, b) => b.kernelTimeMs - a.kernelTimeMs);
const largest = raw.kernels
const largest = profileData.kernels
.map((a, i) => { a.id = i; return a; })
.filter((a) => a.totalBytesSnapshot > 0)
.sort((a, b) => b.totalBytesSnapshot - a.totalBytesSnapshot);
if (slowest.length > maxResults) slowest.length = maxResults;
if (largest.length > maxResults) largest.length = maxResults;
const res = { newBytes: raw.newBytes, newTensors: raw.newTensors, peakBytes: raw.peakBytes, numKernelOps: raw.kernels.length, timeKernelOps: time, slowestKernelOps: slowest, largestKernelOps: largest };
data[name] = res;
log('Human profiler', name, res);
data[modelName] = {
model: modelName,
newBytes: profileData.newBytes,
newTensors: profileData.newTensors,
peakBytes: profileData.peakBytes,
numKernelOps: profileData.kernels.length,
timeKernelOps: time,
slowestKernelOps: slowest,
largestKernelOps: largest,
};
log('profiler', modelName, data[modelName]);
}