human/src/object/nanodet.ts

134 lines
6.1 KiB
TypeScript
Raw Normal View History

2021-05-25 14:58:20 +02:00
/**
* NanoDet object detection model implementation
*
* Based on: [**MB3-CenterNet**](https://github.com/610265158/mobilenetv3_centernet)
2021-05-25 14:58:20 +02:00
*/
2022-01-17 17:03:21 +01:00
import { log, now } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
2022-01-16 15:49:55 +01:00
import { loadModel } from '../tfjs/load';
2021-11-17 02:16:49 +01:00
import { constants } from '../tfjs/constants';
2021-03-23 19:46:44 +01:00
import { labels } from './labels';
2021-12-15 15:26:32 +01:00
import type { ObjectResult, ObjectType, Box } from '../result';
2021-09-13 19:28:35 +02:00
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
2021-09-27 19:58:13 +02:00
import { env } from '../util/env';
2021-12-28 17:39:54 +01:00
let model: GraphModel;
2022-08-21 19:34:51 +02:00
let last: ObjectResult[] = [];
2021-10-22 22:09:52 +02:00
let lastTime = 0;
let skipped = Number.MAX_SAFE_INTEGER;
2021-12-28 17:39:54 +01:00
let inputSize = 0;
const scaleBox = 2.5; // increase box size
2021-06-03 15:41:53 +02:00
export async function load(config: Config): Promise<GraphModel> {
2021-09-17 17:23:00 +02:00
if (!model || env.initial) {
2022-01-17 17:03:21 +01:00
model = await loadModel(config.object.modelPath);
2022-08-30 16:28:33 +02:00
const inputs = model?.['executor'] ? Object.values(model.modelSignature['inputs']) : undefined;
inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 416;
2021-12-28 17:39:54 +01:00
} else if (config.debug) log('cached model:', model['modelUrl']);
return model;
}
2021-12-28 17:39:54 +01:00
async function process(res: Tensor[], outputShape: [number, number], config: Config) {
2021-03-23 19:46:44 +01:00
let id = 0;
2022-08-21 19:34:51 +02:00
let results: ObjectResult[] = [];
const size = inputSize;
for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects
// find scores, boxes, classes
2022-08-08 21:09:26 +02:00
const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704
// find boxes and scores output depending on stride
const scoresT = tf.squeeze(res.find((a: Tensor) => (a.shape[1] === (baseSize ** 2) && (a.shape[2] || 0) === labels.length)));
const scores = await scoresT.array(); // optionally use exponential scores or just as-is
const featuresT = tf.squeeze(res.find((a: Tensor) => (a.shape[1] === (baseSize ** 2) && (a.shape[2] || 0) < labels.length)));
const boxesMaxT = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride
const boxIdxT = boxesMaxT.argMax(2); // what we need is indexes of features with highest scores, not values itself
const boxIdx = await boxIdxT.array(); // what we need is indexes of features with highest scores, not values itself
for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)
for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class
const score = scores[i][j]; // get score for current position
if (score > (config.object.minConfidence || 0) && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1
2022-08-21 19:34:51 +02:00
const boxOffset = boxIdx[i].map((a: number) => a * (baseSize / strideSize / (size))); // just grab indexes of features with highest scores
2022-08-08 21:09:26 +02:00
const [x, y] = [
cx - (scaleBox / strideSize * boxOffset[0]),
cy - (scaleBox / strideSize * boxOffset[1]),
];
const [w, h] = [
cx + (scaleBox / strideSize * boxOffset[2]) - x,
cy + (scaleBox / strideSize * boxOffset[3]) - y,
];
let boxRaw: Box = [x, y, w, h]; // results normalized to range 0..1
boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))) as Box; // fix out-of-bounds coords
const box = [ // results normalized to input image pixels
boxRaw[0] * outputShape[0],
boxRaw[1] * outputShape[1],
boxRaw[2] * outputShape[0],
boxRaw[3] * outputShape[1],
];
const result = {
id: id++,
// strideSize,
score: Math.round(100 * score) / 100,
class: j + 1,
label: labels[j].label as ObjectType,
// center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)],
// centerRaw: [cx, cy],
box: box.map((a) => Math.trunc(a)) as Box,
boxRaw,
};
results.push(result);
}
}
2022-08-08 21:09:26 +02:00
}
tf.dispose([scoresT, featuresT, boxesMaxT, boxIdxT]);
}
// normally nms is run on raw results, but since boxes need to be calculated this way we skip calulcation of
// unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered)
const nmsBoxes = results.map((a) => [a.boxRaw[1], a.boxRaw[0], a.boxRaw[3], a.boxRaw[2]]); // switches coordinates from x,y to y,x as expected by tf.nms
const nmsScores = results.map((a) => a.score);
2022-08-21 19:34:51 +02:00
let nmsIdx: number[] = [];
2021-03-27 15:25:31 +01:00
if (nmsBoxes && nmsBoxes.length > 0) {
2022-08-15 17:29:56 +02:00
const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxDetected, config.object.iouThreshold, config.object.minConfidence);
2021-08-12 15:31:16 +02:00
nmsIdx = await nms.data();
2021-03-27 15:25:31 +01:00
tf.dispose(nms);
}
// filter & sort results
results = results
2021-06-05 23:51:46 +02:00
.filter((_val, idx) => nmsIdx.includes(idx))
.sort((a, b) => (b.score - a.score));
return results;
}
2021-09-12 05:54:35 +02:00
export async function predict(image: Tensor, config: Config): Promise<ObjectResult[]> {
2022-08-30 16:28:33 +02:00
if (!model?.['executor']) return [];
2021-10-23 15:38:52 +02:00
const skipTime = (config.object.skipTime || 0) > (now() - lastTime);
const skipFrame = skipped < (config.object.skipFrames || 0);
if (config.skipAllowed && skipTime && skipFrame && (last.length > 0)) {
skipped++;
return last;
}
skipped = 0;
2021-09-12 19:17:33 +02:00
if (!env.kernels.includes('mod') || !env.kernels.includes('sparsetodense')) return last;
return new Promise(async (resolve) => {
2021-12-28 17:39:54 +01:00
const outputSize = [image.shape[2] || 0, image.shape[1] || 0];
2022-08-08 21:09:26 +02:00
const resizeT = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
const normT = tf.div(resizeT, constants.tf255);
const transposeT = tf.transpose(normT, [0, 3, 1, 2]);
let objectT;
2022-08-08 21:09:26 +02:00
if (config.object.enabled) objectT = model.execute(transposeT);
2021-10-22 22:09:52 +02:00
lastTime = now();
2021-12-28 17:39:54 +01:00
const obj = await process(objectT as Tensor[], outputSize as [number, number], config);
last = obj;
2022-08-08 21:09:26 +02:00
tf.dispose([resizeT, normT, transposeT, ...objectT]);
resolve(obj);
});
}