human/src/nanodet/nanodet.ts

import { log } from '../helpers';
import * as tf from '../../dist/tfjs.esm.js';
import * as profile from '../profile';
import { labels } from './labels';

let model;
let last: Array<{}> = [];
let skipped = Number.MAX_SAFE_INTEGER;

const scaleBox = 2.5; // increase box size
const activateScore = false;

export async function load(config) {
  if (!model) {
    model = await tf.loadGraphModel(config.object.modelPath);
    // @ts-ignore
    model.inputSize = parseInt(Object.values(model.modelSignature['inputs'])[0].tensorShape.dim[2].size);
    if (config.debug) log(`load model: ${config.object.modelPath.match(/\/(.*)\./)[1]}`);
  }
  return model;
}

async function process(res, inputSize, outputShape, config) {
  let id = 0;
  let results: Array<{ score: number, strideSize: number, class: number, label: string, center: number[], centerRaw: number[], box: number[], boxRaw: number[] }> = [];
  for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects
    // find scores, boxes, classes
    tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors
      const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704
      // find boxes and scores output depending on stride
      const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze();
      const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < 80))?.squeeze();
      const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride
      const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself
      const scores = activateScore ? scoresT.exp(1).arraySync() : scoresT.arraySync(); // optionally use exponential scores or just as-is
      for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)
        for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class
          const score = scores[i][j] - (activateScore ? 1 : 0); // get score for current position
          if (score > config.object.minConfidence) {
            const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1
            const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1
            const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores
            let boxRaw = [ // results normalized to range 0..1
              cx - (scaleBox / strideSize * boxOffset[0]),
              cy - (scaleBox / strideSize * boxOffset[1]),
              cx + (scaleBox / strideSize * boxOffset[2]),
              cy + (scaleBox / strideSize * boxOffset[3]),
            ];
            boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords
            const box = [ // results normalized to input image pixels
              boxRaw[0] * outputShape[0],
              boxRaw[1] * outputShape[1],
              boxRaw[2] * outputShape[0],
              boxRaw[3] * outputShape[1],
            ];
            const result = {
              id: id++,
              strideSize,
              score,
              class: j + 1,
              label: labels[j].label,
              center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)],
              centerRaw: [cx, cy],
              box: box.map((a) => Math.trunc(a)),
              boxRaw,
            };
            results.push(result);
          }
        }
      }
    });
  }
  // deallocate tensors
  res.forEach((t) => tf.dispose(t));

  // normally nms is run on raw results, but since boxes need to be calculated this way we skip calulcation of
  // unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered)
  const nmsBoxes = results.map((a) => a.boxRaw);
  const nmsScores = results.map((a) => a.score);
  const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence);
  const nmsIdx = nms.dataSync();
  tf.dispose(nms);

  // filter & sort results
  results = results
    .filter((a, idx) => nmsIdx.includes(idx))
    // @ts-ignore
    .sort((a, b) => (b.score - a.score));

  return results;
}

export async function predict(image, config) {
  if (!model) return null;
  // console.log(skipped, config.object.skipFrames, config.videoOptimized, ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0)));
  if ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0)) {
    skipped++;
    return last;
  }
  if (config.videoOptimized) skipped = 0;
  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
    const outputSize = [image.shape[2], image.shape[1]];
    const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);
    const norm = resize.div(255);
    resize.dispose();
    const transpose = norm.transpose([0, 3, 1, 2]);
    norm.dispose();

    let objectT;
    if (!config.profile) {
      if (config.object.enabled) objectT = await model.predict(transpose);
    } else {
      const profileObject = config.object.enabled ? await tf.profile(() => model.predict(transpose)) : {};
      objectT = profileObject.result.clone();
      profileObject.result.dispose();
      profile.run('object', profileObject);
    }
    transpose.dispose();

    const obj = await process(objectT, model.inputSize, outputSize, config);
    last = obj;
    resolve(obj);
  });
}
refactor face classes 2021-03-21 12:49:55 +01:00			`import { log } from '../helpers';`
add experimental nanodet object detection 2021-03-17 16:32:37 +01:00			`import * as tf from '../../dist/tfjs.esm.js';`
			`import * as profile from '../profile';`
update nanodet and face rotation check 2021-03-23 19:46:44 +01:00			`import { labels } from './labels';`
add experimental nanodet object detection 2021-03-17 16:32:37 +01:00
			`let model;`
			`let last: Array<{}> = [];`
			`let skipped = Number.MAX_SAFE_INTEGER;`

			`const scaleBox = 2.5; // increase box size`
update nanodet and face rotation check 2021-03-23 19:46:44 +01:00			`const activateScore = false;`
add experimental nanodet object detection 2021-03-17 16:32:37 +01:00
			`export async function load(config) {`
			`if (!model) {`
			`model = await tf.loadGraphModel(config.object.modelPath);`
			`// @ts-ignore`
			`model.inputSize = parseInt(Object.values(model.modelSignature['inputs'])[0].tensorShape.dim[2].size);`
			if (config.debug) log(`load model: ${config.object.modelPath.match(/\/(.*)\./)[1]}`);
			`}`
			`return model;`
			`}`

			`async function process(res, inputSize, outputShape, config) {`
update nanodet and face rotation check 2021-03-23 19:46:44 +01:00			`let id = 0;`
enforce types 2021-03-18 01:16:40 +01:00			`let results: Array<{ score: number, strideSize: number, class: number, label: string, center: number[], centerRaw: number[], box: number[], boxRaw: number[] }> = [];`
add experimental nanodet object detection 2021-03-17 16:32:37 +01:00			`for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects`
			`// find scores, boxes, classes`
			`tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors`
			`const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704`
			`// find boxes and scores output depending on stride`
update nanodet and face rotation check 2021-03-23 19:46:44 +01:00			`const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze();`
			`const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < 80))?.squeeze();`
			`const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride`
add experimental nanodet object detection 2021-03-17 16:32:37 +01:00			`const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself`
update nanodet and face rotation check 2021-03-23 19:46:44 +01:00			`const scores = activateScore ? scoresT.exp(1).arraySync() : scoresT.arraySync(); // optionally use exponential scores or just as-is`
			`for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)`
			`for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class`
			`const score = scores[i][j] - (activateScore ? 1 : 0); // get score for current position`
			`if (score > config.object.minConfidence) {`
			`const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1`
			`const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1`
			`const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores`
			`let boxRaw = [ // results normalized to range 0..1`
			`cx - (scaleBox / strideSize * boxOffset[0]),`
			`cy - (scaleBox / strideSize * boxOffset[1]),`
			`cx + (scaleBox / strideSize * boxOffset[2]),`
			`cy + (scaleBox / strideSize * boxOffset[3]),`
			`];`
			`boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords`
			`const box = [ // results normalized to input image pixels`
			`boxRaw[0] * outputShape[0],`
			`boxRaw[1] * outputShape[1],`
			`boxRaw[2] * outputShape[0],`
			`boxRaw[3] * outputShape[1],`
			`];`
			`const result = {`
			`id: id++,`
			`strideSize,`
			`score,`
			`class: j + 1,`
			`label: labels[j].label,`
			`center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)],`
			`centerRaw: [cx, cy],`
			`box: box.map((a) => Math.trunc(a)),`
			`boxRaw,`
			`};`
			`results.push(result);`
			`}`
add experimental nanodet object detection 2021-03-17 16:32:37 +01:00			`}`
			`}`
			`});`
			`}`
			`// deallocate tensors`
			`res.forEach((t) => tf.dispose(t));`

			`// normally nms is run on raw results, but since boxes need to be calculated this way we skip calulcation of`
			`// unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered)`
			`const nmsBoxes = results.map((a) => a.boxRaw);`
			`const nmsScores = results.map((a) => a.score);`
			`const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence);`
			`const nmsIdx = nms.dataSync();`
			`tf.dispose(nms);`

			`// filter & sort results`
			`results = results`
			`.filter((a, idx) => nmsIdx.includes(idx))`
			`// @ts-ignore`
			`.sort((a, b) => (b.score - a.score));`

			`return results;`
			`}`

			`export async function predict(image, config) {`
			`if (!model) return null;`
			`// console.log(skipped, config.object.skipFrames, config.videoOptimized, ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0)));`
			`if ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0)) {`
			`skipped++;`
			`return last;`
			`}`
			`if (config.videoOptimized) skipped = 0;`
			`else skipped = Number.MAX_SAFE_INTEGER;`
			`return new Promise(async (resolve) => {`
			`const outputSize = [image.shape[2], image.shape[1]];`
			`const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);`
			`const norm = resize.div(255);`
			`resize.dispose();`
			`const transpose = norm.transpose([0, 3, 1, 2]);`
			`norm.dispose();`

			`let objectT;`
			`if (!config.profile) {`
			`if (config.object.enabled) objectT = await model.predict(transpose);`
			`} else {`
			`const profileObject = config.object.enabled ? await tf.profile(() => model.predict(transpose)) : {};`
			`objectT = profileObject.result.clone();`
			`profileObject.result.dispose();`
			`profile.run('object', profileObject);`
			`}`
			`transpose.dispose();`

			`const obj = await process(objectT, model.inputSize, outputSize, config);`
			`last = obj;`
			`resolve(obj);`
			`});`
			`}`