mirror of https://github.com/vladmandic/human
122 lines
5.6 KiB
TypeScript
122 lines
5.6 KiB
TypeScript
![]() |
import { log, join } from '../helpers';
|
||
![]() |
import * as tf from '../../dist/tfjs.esm.js';
|
||
![]() |
import { labels } from './labels';
|
||
![]() |
|
||
|
let model;
|
||
|
let last: Array<{}> = [];
|
||
|
let skipped = Number.MAX_SAFE_INTEGER;
|
||
|
|
||
|
const scaleBox = 2.5; // increase box size
|
||
|
|
||
|
export async function load(config) {
|
||
|
if (!model) {
|
||
![]() |
model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath));
|
||
![]() |
const inputs = Object.values(model.modelSignature['inputs']);
|
||
|
model.inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : null;
|
||
|
if (!model.inputSize) throw new Error(`Human: Cannot determine model inputSize: ${config.object.modelPath}`);
|
||
![]() |
if (!model || !model.modelUrl) log('load model failed:', config.object.modelPath);
|
||
|
else if (config.debug) log('load model:', model.modelUrl);
|
||
![]() |
} else if (config.debug) log('cached model:', model.modelUrl);
|
||
![]() |
return model;
|
||
|
}
|
||
|
|
||
|
async function process(res, inputSize, outputShape, config) {
|
||
![]() |
let id = 0;
|
||
![]() |
let results: Array<{ score: number, strideSize: number, class: number, label: string, center: number[], centerRaw: number[], box: number[], boxRaw: number[] }> = [];
|
||
![]() |
for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects
|
||
|
// find scores, boxes, classes
|
||
|
tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors
|
||
|
const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704
|
||
|
// find boxes and scores output depending on stride
|
||
![]() |
const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === labels.length))?.squeeze();
|
||
|
const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < labels.length))?.squeeze();
|
||
![]() |
const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride
|
||
![]() |
const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself
|
||
![]() |
const scores = scoresT.arraySync(); // optionally use exponential scores or just as-is
|
||
![]() |
for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)
|
||
|
for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class
|
||
![]() |
const score = scores[i][j]; // get score for current position
|
||
|
if (score > config.object.minConfidence && j !== 61) {
|
||
![]() |
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1
|
||
|
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1
|
||
|
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores
|
||
![]() |
const [x, y] = [
|
||
![]() |
cx - (scaleBox / strideSize * boxOffset[0]),
|
||
|
cy - (scaleBox / strideSize * boxOffset[1]),
|
||
|
];
|
||
![]() |
const [w, h] = [
|
||
|
cx + (scaleBox / strideSize * boxOffset[2]) - x,
|
||
|
cy + (scaleBox / strideSize * boxOffset[3]) - y,
|
||
|
];
|
||
|
let boxRaw = [x, y, w, h]; // results normalized to range 0..1
|
||
![]() |
boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords
|
||
|
const box = [ // results normalized to input image pixels
|
||
|
boxRaw[0] * outputShape[0],
|
||
|
boxRaw[1] * outputShape[1],
|
||
|
boxRaw[2] * outputShape[0],
|
||
|
boxRaw[3] * outputShape[1],
|
||
|
];
|
||
|
const result = {
|
||
|
id: id++,
|
||
|
strideSize,
|
||
![]() |
score: Math.round(100 * score) / 100,
|
||
![]() |
class: j + 1,
|
||
|
label: labels[j].label,
|
||
|
center: [Math.trunc(outputShape[0] * cx), Math.trunc(outputShape[1] * cy)],
|
||
|
centerRaw: [cx, cy],
|
||
|
box: box.map((a) => Math.trunc(a)),
|
||
|
boxRaw,
|
||
|
};
|
||
|
results.push(result);
|
||
|
}
|
||
![]() |
}
|
||
|
}
|
||
|
});
|
||
|
}
|
||
|
// deallocate tensors
|
||
|
res.forEach((t) => tf.dispose(t));
|
||
|
|
||
|
// normally nms is run on raw results, but since boxes need to be calculated this way we skip calulcation of
|
||
|
// unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered)
|
||
![]() |
const nmsBoxes = results.map((a) => [a.boxRaw[1], a.boxRaw[0], a.boxRaw[3], a.boxRaw[2]]); // switches coordinates from x,y to y,x as expected by tf.nms
|
||
![]() |
const nmsScores = results.map((a) => a.score);
|
||
![]() |
let nmsIdx: any[] = [];
|
||
|
if (nmsBoxes && nmsBoxes.length > 0) {
|
||
![]() |
const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxDetected, config.object.iouThreshold, config.object.minConfidence);
|
||
![]() |
nmsIdx = nms.dataSync();
|
||
|
tf.dispose(nms);
|
||
|
}
|
||
![]() |
|
||
|
// filter & sort results
|
||
|
results = results
|
||
|
.filter((a, idx) => nmsIdx.includes(idx))
|
||
|
.sort((a, b) => (b.score - a.score));
|
||
|
|
||
|
return results;
|
||
|
}
|
||
|
|
||
|
export async function predict(image, config) {
|
||
|
if (!model) return null;
|
||
![]() |
if ((skipped < config.object.skipFrames) && config.skipFrame && (last.length > 0)) {
|
||
![]() |
skipped++;
|
||
|
return last;
|
||
|
}
|
||
![]() |
skipped = 0;
|
||
![]() |
return new Promise(async (resolve) => {
|
||
|
const outputSize = [image.shape[2], image.shape[1]];
|
||
|
const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);
|
||
|
const norm = resize.div(255);
|
||
|
const transpose = norm.transpose([0, 3, 1, 2]);
|
||
|
norm.dispose();
|
||
![]() |
resize.dispose();
|
||
![]() |
|
||
|
let objectT;
|
||
![]() |
if (config.object.enabled) objectT = await model.predict(transpose);
|
||
![]() |
transpose.dispose();
|
||
|
|
||
|
const obj = await process(objectT, model.inputSize, outputSize, config);
|
||
|
last = obj;
|
||
|
resolve(obj);
|
||
|
});
|
||
|
}
|