mirror of https://github.com/vladmandic/human
implement nanodet
parent
1dd860d112
commit
ae9d6caabc
|
@ -8,13 +8,19 @@ import GLBench from './gl-bench.js';
|
|||
const userConfig = {
|
||||
backend: 'webgl',
|
||||
async: false,
|
||||
profile: false,
|
||||
warmup: 'full',
|
||||
videoOptimized: true,
|
||||
filter: { enabled: true },
|
||||
face: { enabled: false, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
|
||||
face: { enabled: true,
|
||||
mesh: { enabled: true },
|
||||
iris: { enabled: true },
|
||||
description: { enabled: true },
|
||||
emotion: { enabled: true },
|
||||
},
|
||||
hand: { enabled: false },
|
||||
gesture: { enabled: false },
|
||||
body: { enabled: true, modelPath: '../models/efficientpose.json' },
|
||||
body: { enabled: false, modelPath: '../models/blazepose.json' },
|
||||
object: { enabled: false },
|
||||
};
|
||||
|
||||
|
|
|
@ -56,13 +56,13 @@
|
|||
"@tensorflow/tfjs-layers": "^3.3.0",
|
||||
"@tensorflow/tfjs-node": "^3.3.0",
|
||||
"@tensorflow/tfjs-node-gpu": "^3.3.0",
|
||||
"@types/node": "^14.14.36",
|
||||
"@types/node": "^14.14.37",
|
||||
"@typescript-eslint/eslint-plugin": "^4.19.0",
|
||||
"@typescript-eslint/parser": "^4.19.0",
|
||||
"@vladmandic/pilogger": "^0.2.15",
|
||||
"chokidar": "^3.5.1",
|
||||
"dayjs": "^1.10.4",
|
||||
"esbuild": "^0.10.1",
|
||||
"esbuild": "^0.10.2",
|
||||
"eslint": "^7.23.0",
|
||||
"eslint-config-airbnb-base": "^14.2.1",
|
||||
"eslint-plugin-import": "^2.22.1",
|
||||
|
|
|
@ -110,10 +110,9 @@ export async function predict(image, config) {
|
|||
if (!config.profile) {
|
||||
if (config.face.description.enabled) resT = await model.predict(enhanced);
|
||||
} else {
|
||||
const profileAge = config.face.description.enabled ? await tf.profile(() => model.predict(enhanced)) : {};
|
||||
resT = profileAge.result.clone();
|
||||
profileAge.result.dispose();
|
||||
profile.run('age', profileAge);
|
||||
const profileDesc = config.face.description.enabled ? await tf.profile(() => model.predict(enhanced)) : {};
|
||||
resT = profileDesc.result;
|
||||
profile.run('faceres', profileDesc);
|
||||
}
|
||||
tf.dispose(enhanced);
|
||||
|
||||
|
|
|
@ -292,8 +292,10 @@ export class Human {
|
|||
}
|
||||
this.tf.enableProdMode();
|
||||
/* debug mode is really too mcuh
|
||||
tf.enableDebugMode();
|
||||
this.tf.enableDebugMode();
|
||||
*/
|
||||
this.tf.ENV.set('CHECK_COMPUTATION_FOR_ERRORS', false);
|
||||
this.tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
|
||||
if (this.tf.getBackend() === 'webgl') {
|
||||
if (this.config.deallocate) {
|
||||
log('changing webgl: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate);
|
||||
|
|
|
@ -8,7 +8,6 @@ let last: Array<{}> = [];
|
|||
let skipped = Number.MAX_SAFE_INTEGER;
|
||||
|
||||
const scaleBox = 2.5; // increase box size
|
||||
const activateScore = false;
|
||||
|
||||
export async function load(config) {
|
||||
if (!model) {
|
||||
|
@ -28,24 +27,27 @@ async function process(res, inputSize, outputShape, config) {
|
|||
tf.tidy(() => { // wrap in tidy to automatically deallocate temp tensors
|
||||
const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704
|
||||
// find boxes and scores output depending on stride
|
||||
const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === 80))?.squeeze();
|
||||
const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < 80))?.squeeze();
|
||||
const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === labels.length))?.squeeze();
|
||||
const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < labels.length))?.squeeze();
|
||||
const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride
|
||||
const boxIdx = boxesMax.argMax(2).arraySync(); // what we need is indexes of features with highest scores, not values itself
|
||||
const scores = activateScore ? scoresT.exp(1).arraySync() : scoresT.arraySync(); // optionally use exponential scores or just as-is
|
||||
const scores = scoresT.arraySync(); // optionally use exponential scores or just as-is
|
||||
for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)
|
||||
for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class
|
||||
const score = scores[i][j] - (activateScore ? 1 : 0); // get score for current position
|
||||
if (score > config.object.minConfidence) {
|
||||
const score = scores[i][j]; // get score for current position
|
||||
if (score > config.object.minConfidence && j !== 61) {
|
||||
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1
|
||||
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1
|
||||
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores
|
||||
let boxRaw = [ // results normalized to range 0..1
|
||||
const [x, y] = [
|
||||
cx - (scaleBox / strideSize * boxOffset[0]),
|
||||
cy - (scaleBox / strideSize * boxOffset[1]),
|
||||
cx + (scaleBox / strideSize * boxOffset[2]),
|
||||
cy + (scaleBox / strideSize * boxOffset[3]),
|
||||
];
|
||||
const [w, h] = [
|
||||
cx + (scaleBox / strideSize * boxOffset[2]) - x,
|
||||
cy + (scaleBox / strideSize * boxOffset[3]) - y,
|
||||
];
|
||||
let boxRaw = [x, y, w, h]; // results normalized to range 0..1
|
||||
boxRaw = boxRaw.map((a) => Math.max(0, Math.min(a, 1))); // fix out-of-bounds coords
|
||||
const box = [ // results normalized to input image pixels
|
||||
boxRaw[0] * outputShape[0],
|
||||
|
@ -77,14 +79,16 @@ async function process(res, inputSize, outputShape, config) {
|
|||
// unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered)
|
||||
const nmsBoxes = results.map((a) => a.boxRaw);
|
||||
const nmsScores = results.map((a) => a.score);
|
||||
const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence);
|
||||
const nmsIdx = nms.dataSync();
|
||||
tf.dispose(nms);
|
||||
let nmsIdx: any[] = [];
|
||||
if (nmsBoxes && nmsBoxes.length > 0) {
|
||||
const nms = await tf.image.nonMaxSuppressionAsync(nmsBoxes, nmsScores, config.object.maxResults, config.object.iouThreshold, config.object.minConfidence);
|
||||
nmsIdx = nms.dataSync();
|
||||
tf.dispose(nms);
|
||||
}
|
||||
|
||||
// filter & sort results
|
||||
results = results
|
||||
.filter((a, idx) => nmsIdx.includes(idx))
|
||||
// @ts-ignore
|
||||
.sort((a, b) => (b.score - a.score));
|
||||
|
||||
return results;
|
||||
|
@ -103,17 +107,16 @@ export async function predict(image, config) {
|
|||
const outputSize = [image.shape[2], image.shape[1]];
|
||||
const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);
|
||||
const norm = resize.div(255);
|
||||
resize.dispose();
|
||||
const transpose = norm.transpose([0, 3, 1, 2]);
|
||||
norm.dispose();
|
||||
resize.dispose();
|
||||
|
||||
let objectT;
|
||||
if (!config.profile) {
|
||||
if (config.object.enabled) objectT = await model.predict(transpose);
|
||||
if (config.object.enabled) objectT = await model.executeAsync(transpose);
|
||||
} else {
|
||||
const profileObject = config.object.enabled ? await tf.profile(() => model.predict(transpose)) : {};
|
||||
objectT = profileObject.result.clone();
|
||||
profileObject.result.dispose();
|
||||
const profileObject = config.object.enabled ? await tf.profile(() => model.executeAsync(transpose)) : {};
|
||||
objectT = profileObject.result;
|
||||
profile.run('object', profileObject);
|
||||
}
|
||||
transpose.dispose();
|
||||
|
|
|
@ -2,23 +2,31 @@ import { log } from './helpers';
|
|||
|
||||
export const data = {};
|
||||
|
||||
export function run(name: string, raw: any): void {
|
||||
if (!raw || !raw.kernels) return;
|
||||
export function run(modelName: string, profileData: any): void {
|
||||
if (!profileData || !profileData.kernels) return;
|
||||
const maxResults = 5;
|
||||
const time = raw.kernels
|
||||
const time = profileData.kernels
|
||||
.filter((a) => a.kernelTimeMs > 0)
|
||||
.reduce((a, b) => a += b.kernelTimeMs, 0);
|
||||
const slowest = raw.kernels
|
||||
const slowest = profileData.kernels
|
||||
.map((a, i) => { a.id = i; return a; })
|
||||
.filter((a) => a.kernelTimeMs > 0)
|
||||
.sort((a, b) => b.kernelTimeMs - a.kernelTimeMs);
|
||||
const largest = raw.kernels
|
||||
const largest = profileData.kernels
|
||||
.map((a, i) => { a.id = i; return a; })
|
||||
.filter((a) => a.totalBytesSnapshot > 0)
|
||||
.sort((a, b) => b.totalBytesSnapshot - a.totalBytesSnapshot);
|
||||
if (slowest.length > maxResults) slowest.length = maxResults;
|
||||
if (largest.length > maxResults) largest.length = maxResults;
|
||||
const res = { newBytes: raw.newBytes, newTensors: raw.newTensors, peakBytes: raw.peakBytes, numKernelOps: raw.kernels.length, timeKernelOps: time, slowestKernelOps: slowest, largestKernelOps: largest };
|
||||
data[name] = res;
|
||||
log('Human profiler', name, res);
|
||||
data[modelName] = {
|
||||
model: modelName,
|
||||
newBytes: profileData.newBytes,
|
||||
newTensors: profileData.newTensors,
|
||||
peakBytes: profileData.peakBytes,
|
||||
numKernelOps: profileData.kernels.length,
|
||||
timeKernelOps: time,
|
||||
slowestKernelOps: slowest,
|
||||
largestKernelOps: largest,
|
||||
};
|
||||
log('profiler', modelName, data[modelName]);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue