From e41664dd188cbda3f79021fcff4fed9183b2dd8c Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 28 Dec 2021 11:39:54 -0500 Subject: [PATCH] update --- CHANGELOG.md | 9 ++-- src/config.ts | 15 ++++--- src/face/angles.ts | 28 +++++++------ src/face/blazeface.ts | 2 +- src/face/face.ts | 93 ++++++++++++++++++++++++----------------- src/face/faceres.ts | 9 ++-- src/gear/gear.ts | 4 +- src/hand/handtrack.ts | 2 +- src/object/centernet.ts | 8 ++-- src/object/nanodet.ts | 29 ++++++------- 10 files changed, 116 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f28f0ca..c2635e70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,14 @@ ## Changelog +### **HEAD -> main** 2021/12/28 mandic00@live.com + +- fix samples +- fix(src): typo +- change on how face box is calculated + ### **2.5.7** 2021/12/27 mandic00@live.com - -### **origin/main** 2021/12/22 mandic00@live.com - - fix posenet ### **release: 2.5.6** 2021/12/15 mandic00@live.com diff --git a/src/config.ts b/src/config.ts index 435bc715..79aae0d5 100644 --- a/src/config.ts +++ b/src/config.ts @@ -5,17 +5,21 @@ export interface GenericConfig { /** is module enabled? */ enabled: boolean, - /** path to model json file */ + /** path to model json file (relative to `modelBasePath` */ modelPath: string, - /** how many max frames to go without re-running model if cached results are acceptable */ + /** how many max frames to go without re-running model if cached results are acceptable + * for two-phase models such as face and hand caching applies to bounding boxes detection only */ skipFrames: number, - /** how many max milliseconds to go without re-running model if cached results are acceptable */ + /** how many max milliseconds to go without re-running model if cached results are acceptable + * for two-phase models such as face and hand caching applies to bounding boxes detection only */ skipTime: number, } /** Detector part of face configuration */ export interface FaceDetectorConfig extends GenericConfig { - /** is face rotation correction performed after detecting face? */ + /** is face rotation correction performed after detecting face? + * used to correctly analyze faces under high angles + */ rotation: boolean, /** maximum number of detected faces */ maxDetected: number, @@ -25,7 +29,8 @@ export interface FaceDetectorConfig extends GenericConfig { iouThreshold: number, /** should child models perform on masked image of a face */ mask: boolean, - /** should face detection return face tensor to be used in some other extenrnal model? */ + /** should face detection return processed and cropped face tensor that can with an external model for addtional processing? + * if enabled it must be manually deallocated to avoid memory leak */ return: boolean, } diff --git a/src/face/angles.ts b/src/face/angles.ts index 333c23a7..ba8a103e 100644 --- a/src/face/angles.ts +++ b/src/face/angles.ts @@ -1,11 +1,15 @@ -const calculateGaze = (face): { bearing: number, strength: number } => { - const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points +import type { Point, FaceResult } from '../result'; + +type Vector = [number, number, number]; + +const calculateGaze = (face: FaceResult): { bearing: number, strength: number } => { + const radians = (pt1: Point, pt2: Point) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 }; const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes const eyeRatio = 1; // factor to normalize changes x vs y - const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis + const left = (face.mesh[33][2] || 0) > (face.mesh[263][2] || 0); // pick left or right eye depending which one is closer bazed on outsize point z axis const irisCenter = left ? face.mesh[473] : face.mesh[468]; const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise ? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2] @@ -13,7 +17,7 @@ const calculateGaze = (face): { bearing: number, strength: number } => { const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions ? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]] : [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]]; - const eyeDiff = [ // x distance between extreme point and center point normalized with eye size + const eyeDiff: Point = [ // x distance between extreme point and center point normalized with eye size (eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0], eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1], ]; @@ -23,33 +27,33 @@ const calculateGaze = (face): { bearing: number, strength: number } => { return { bearing, strength }; }; -export const calculateFaceAngle = (face, imageSize): { +export const calculateFaceAngle = (face: FaceResult, imageSize: [number, number]): { angle: { pitch: number, yaw: number, roll: number }, matrix: [number, number, number, number, number, number, number, number, number], gaze: { bearing: number, strength: number }, } => { // const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360); - const normalize = (v) => { // normalize vector + const normalize = (v: Vector): Vector => { // normalize vector const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); v[0] /= length; v[1] /= length; v[2] /= length; return v; }; - const subVectors = (a, b) => { // vector subtraction (a - b) + const subVectors = (a: Vector, b: Vector): Vector => { // vector subtraction (a - b) const x = a[0] - b[0]; const y = a[1] - b[1]; const z = a[2] - b[2]; return [x, y, z]; }; - const crossVectors = (a, b) => { // vector cross product (a x b) + const crossVectors = (a: Vector, b: Vector): Vector => { // vector cross product (a x b) const x = a[1] * b[2] - a[2] * b[1]; const y = a[2] * b[0] - a[0] * b[2]; const z = a[0] * b[1] - a[1] * b[0]; return [x, y, z]; }; // 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf - const rotationMatrixToEulerAngle = (r) => { + const rotationMatrixToEulerAngle = (r: number[]): { pitch: number, yaw: number, roll: number } => { // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars const [r00, _r01, _r02, r10, r11, r12, r20, r21, r22] = r; let thetaX: number; @@ -93,10 +97,10 @@ export const calculateFaceAngle = (face, imageSize): { const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5; // top, bottom, left, right - const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [pt[0] * imageSize[0] / size, pt[1] * imageSize[1] / size, pt[2]]); // make the xyz coordinates proportional, independent of the image/box size + const pts: Point[] = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [pt[0] * imageSize[0] / size, pt[1] * imageSize[1] / size, pt[2]] as Point); // make the xyz coordinates proportional, independent of the image/box size - const y_axis = normalize(subVectors(pts[1], pts[0])); - let x_axis = normalize(subVectors(pts[3], pts[2])); + const y_axis = normalize(subVectors(pts[1] as Vector, pts[0] as Vector)); + let x_axis = normalize(subVectors(pts[3] as Vector, pts[2] as Vector)); const z_axis = normalize(crossVectors(x_axis, y_axis)); // adjust x_axis to make sure that all axes are perpendicular to each other x_axis = crossVectors(y_axis, z_axis); diff --git a/src/face/blazeface.ts b/src/face/blazeface.ts index f53cf25e..718d7606 100644 --- a/src/face/blazeface.ts +++ b/src/face/blazeface.ts @@ -36,7 +36,7 @@ export async function load(config: Config): Promise { return model; } -function decodeBounds(boxOutputs) { +function decodeBounds(boxOutputs: Tensor) { const t: Record = {}; t.boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]); t.centers = tf.add(t.boxStarts, anchors); diff --git a/src/face/face.ts b/src/face/face.ts index e4dc69e1..618d8268 100644 --- a/src/face/face.ts +++ b/src/face/face.ts @@ -16,26 +16,28 @@ import * as gear from '../gear/gear'; import * as ssrnetAge from '../gear/ssrnet-age'; import * as ssrnetGender from '../gear/ssrnet-gender'; import * as mobilefacenet from './mobilefacenet'; -import type { FaceResult } from '../result'; +import type { FaceResult, Emotion, Gender, Race } from '../result'; import type { Tensor } from '../tfjs/types'; import type { Human } from '../human'; import { calculateFaceAngle } from './angles'; +type DescRes = { age: number, gender: Gender, genderScore: number, descriptor: number[], race?: { score: number, race: Race }[] }; + export const detectFace = async (instance: Human /* instance of human */, input: Tensor): Promise => { // run facemesh, includes blazeface and iris // eslint-disable-next-line no-async-promise-executor - let timeStamp; - let ageRes; - let gearRes; - let genderRes; - let emotionRes; - let mobilefacenetRes; - let antispoofRes; - let livenessRes; - let descRes; + let timeStamp: number = now(); + let ageRes: { age: number } | Promise<{ age: number }> | null; + let gearRes: gear.GearType | Promise | null; + let genderRes: { gender: string, genderScore: number } | Promise<{ gender: string, genderScore: number }> | null; + let emotionRes: { score: number, emotion: Emotion }[] | Promise<{ score: number, emotion: Emotion }[]>; + let mobilefacenetRes: number[] | Promise | null; + let antispoofRes: number | Promise | null; + let livenessRes: number | Promise | null; + let descRes: DescRes | Promise | null; + const faceRes: Array = []; instance.state = 'run:face'; - timeStamp = now(); const faces = await facemesh.predict(input, instance.config); instance.performance.face = env.perfadd ? (instance.performance.face || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); @@ -65,11 +67,11 @@ export const detectFace = async (instance: Human /* instance of human */, input: // run emotion, inherits face from blazeface instance.analyze('Start Emotion:'); if (instance.config.async) { - emotionRes = instance.config.face.emotion?.enabled ? emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + emotionRes = instance.config.face.emotion?.enabled ? emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : []; } else { instance.state = 'run:emotion'; timeStamp = now(); - emotionRes = instance.config.face.emotion?.enabled ? await emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + emotionRes = instance.config.face.emotion?.enabled ? await emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : []; instance.performance.emotion = env.perfadd ? (instance.performance.emotion || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); } instance.analyze('End Emotion:'); @@ -77,11 +79,11 @@ export const detectFace = async (instance: Human /* instance of human */, input: // run antispoof, inherits face from blazeface instance.analyze('Start AntiSpoof:'); if (instance.config.async) { - antispoofRes = instance.config.face.antispoof?.enabled ? antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + antispoofRes = instance.config.face.antispoof?.enabled ? antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0; } else { instance.state = 'run:antispoof'; timeStamp = now(); - antispoofRes = instance.config.face.antispoof?.enabled ? await antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + antispoofRes = instance.config.face.antispoof?.enabled ? await antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0; instance.performance.antispoof = env.perfadd ? (instance.performance.antispoof || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); } instance.analyze('End AntiSpoof:'); @@ -89,11 +91,11 @@ export const detectFace = async (instance: Human /* instance of human */, input: // run liveness, inherits face from blazeface instance.analyze('Start Liveness:'); if (instance.config.async) { - livenessRes = instance.config.face.liveness?.enabled ? liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + livenessRes = instance.config.face.liveness?.enabled ? liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0; } else { instance.state = 'run:liveness'; timeStamp = now(); - livenessRes = instance.config.face.liveness?.enabled ? await liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + livenessRes = instance.config.face.liveness?.enabled ? await liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0; instance.performance.liveness = env.perfadd ? (instance.performance.antispoof || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); } instance.analyze('End Liveness:'); @@ -101,11 +103,11 @@ export const detectFace = async (instance: Human /* instance of human */, input: // run gear, inherits face from blazeface instance.analyze('Start GEAR:'); if (instance.config.async) { - gearRes = instance.config.face['gear']?.enabled ? gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; + gearRes = instance.config.face['gear']?.enabled ? gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; } else { instance.state = 'run:gear'; timeStamp = now(); - gearRes = instance.config.face['gear']?.enabled ? await gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; + gearRes = instance.config.face['gear']?.enabled ? await gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; instance.performance.gear = Math.trunc(now() - timeStamp); } instance.analyze('End GEAR:'); @@ -113,13 +115,13 @@ export const detectFace = async (instance: Human /* instance of human */, input: // run gear, inherits face from blazeface instance.analyze('Start SSRNet:'); if (instance.config.async) { - ageRes = instance.config.face['ssrnet']?.enabled ? ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; - genderRes = instance.config.face['ssrnet']?.enabled ? ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; + ageRes = instance.config.face['ssrnet']?.enabled ? ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + genderRes = instance.config.face['ssrnet']?.enabled ? ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; } else { instance.state = 'run:ssrnet'; timeStamp = now(); - ageRes = instance.config.face['ssrnet']?.enabled ? await ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; - genderRes = instance.config.face['ssrnet']?.enabled ? await ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; + ageRes = instance.config.face['ssrnet']?.enabled ? await ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; + genderRes = instance.config.face['ssrnet']?.enabled ? await ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; instance.performance.ssrnet = Math.trunc(now() - timeStamp); } instance.analyze('End SSRNet:'); @@ -127,11 +129,11 @@ export const detectFace = async (instance: Human /* instance of human */, input: // run gear, inherits face from blazeface instance.analyze('Start MobileFaceNet:'); if (instance.config.async) { - mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; + mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; } else { instance.state = 'run:mobilefacenet'; timeStamp = now(); - mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? await mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {}; + mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? await mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null; instance.performance.mobilefacenet = Math.trunc(now() - timeStamp); } instance.analyze('End MobileFaceNet:'); @@ -154,11 +156,26 @@ export const detectFace = async (instance: Human /* instance of human */, input: } instance.analyze('Finish Face:'); - // override age/gender if alternative models are used - if (instance.config.face['ssrnet']?.enabled && ageRes && genderRes) descRes = { age: ageRes.age, gender: genderRes.gender, genderScore: genderRes.genderScore }; - if (instance.config.face['gear']?.enabled && gearRes) descRes = { age: gearRes.age, gender: gearRes.gender, genderScore: gearRes.genderScore, race: gearRes.race }; - // override descriptor if embedding model is used - if (instance.config.face['mobilefacenet']?.enabled && mobilefacenetRes) descRes.descriptor = mobilefacenetRes; + if (instance.config.face['ssrnet']?.enabled && ageRes && genderRes) { // override age/gender if ssrnet model is used + descRes = { + ...(descRes as DescRes), + age: (ageRes as { age: number}).age, + gender: (genderRes as { gender: Gender, genderScore: number }).gender, + genderScore: (genderRes as { gender: Gender, genderScore: number }).genderScore, + }; + } + if (instance.config.face['gear']?.enabled && gearRes) { // override age/gender/race if gear model is used + descRes = { + ...(descRes as DescRes), + age: (gearRes as gear.GearType).age, + gender: (gearRes as gear.GearType).gender, + genderScore: (gearRes as gear.GearType).genderScore, + race: (gearRes as gear.GearType).race, + }; + } + if (instance.config.face['mobilefacenet']?.enabled && mobilefacenetRes) { // override descriptor if embedding model is used + (descRes as DescRes).descriptor = mobilefacenetRes as number[]; + } // calculate iris distance // iris: array[ center, left, top, right, bottom] @@ -183,14 +200,14 @@ export const detectFace = async (instance: Human /* instance of human */, input: ...faces[i], id: i, }; - if (descRes?.age) res.age = descRes.age; - if (descRes?.gender) res.gender = descRes.gender; - if (descRes?.genderScore) res.genderScore = descRes?.genderScore; - if (descRes?.descriptor) res.embedding = descRes?.descriptor; - if (descRes?.race) res.race = descRes?.race; - if (emotionRes) res.emotion = emotionRes; - if (antispoofRes) res.real = antispoofRes; - if (livenessRes) res.live = livenessRes; + if ((descRes as DescRes)?.age) res.age = (descRes as DescRes).age as number; + if ((descRes as DescRes)?.gender) res.gender = (descRes as DescRes).gender as Gender; + if ((descRes as DescRes)?.genderScore) res.genderScore = (descRes as DescRes)?.genderScore as number; + if ((descRes as DescRes)?.descriptor) res.embedding = (descRes as DescRes)?.descriptor as Array; + if ((descRes as DescRes)?.race) res.race = (descRes as DescRes)?.race as { score: number, race: Race }[]; + if (emotionRes) res.emotion = emotionRes as Array<{ score: number, emotion: Emotion }>; + if (antispoofRes) res.real = antispoofRes as number; + if (livenessRes) res.live = livenessRes as number; if (irisSize && irisSize !== 0) res.iris = Math.trunc(500 / irisSize / 11.7) / 100; if (rotation) res.rotation = rotation; if (tensor) res.tensor = tensor; diff --git a/src/face/faceres.ts b/src/face/faceres.ts index 2bf655b2..fccf81a4 100644 --- a/src/face/faceres.ts +++ b/src/face/faceres.ts @@ -13,11 +13,14 @@ import * as tf from '../../dist/tfjs.esm.js'; import { constants } from '../tfjs/constants'; import type { Tensor, GraphModel } from '../tfjs/types'; import type { Config } from '../config'; +import type { Gender, Race } from '../result'; + +export type FaceRes = { age: number, gender: Gender, genderScore: number, descriptor: number[], race?: { score: number, race: Race }[] }; let model: GraphModel | null; const last: Array<{ age: number, - gender: string, + gender: Gender, genderScore: number, descriptor: number[], }> = []; @@ -63,7 +66,7 @@ export function enhance(input): Tensor { */ } -export async function predict(image: Tensor, config: Config, idx, count): Promise<{ age: number, gender: string, genderScore: number, descriptor: number[] }> { +export async function predict(image: Tensor, config: Config, idx: number, count: number): Promise { if (!model) return { age: 0, gender: 'unknown', genderScore: 0, descriptor: [] }; const skipFrame = skipped < (config.face.description?.skipFrames || 0); const skipTime = (config.face.description?.skipTime || 0) > (now() - lastTime); @@ -75,7 +78,7 @@ export async function predict(image: Tensor, config: Config, idx, count): Promis return new Promise(async (resolve) => { const obj = { age: 0, - gender: 'unknown', + gender: 'unknown', genderScore: 0, descriptor: [], }; diff --git a/src/gear/gear.ts b/src/gear/gear.ts index 1d7e5199..8bc36f29 100644 --- a/src/gear/gear.ts +++ b/src/gear/gear.ts @@ -11,7 +11,7 @@ import type { Config } from '../config'; import type { GraphModel, Tensor } from '../tfjs/types'; import { env } from '../util/env'; -type GearType = { age: number, gender: Gender, genderScore: number, race: Array<{ score: number, race: Race }> } +export type GearType = { age: number, gender: Gender, genderScore: number, race: Array<{ score: number, race: Race }> } let model: GraphModel | null; const last: Array = []; const raceNames = ['white', 'black', 'asian', 'indian', 'other']; @@ -32,7 +32,7 @@ export async function load(config: Config) { } // eslint-disable-next-line @typescript-eslint/no-explicit-any -export async function predict(image: Tensor, config: Config, idx, count): Promise { +export async function predict(image: Tensor, config: Config, idx: number, count: number): Promise { if (!model) return { age: 0, gender: 'unknown', genderScore: 0, race: [] }; const skipFrame = skipped < (config.face['gear']?.skipFrames || 0); const skipTime = (config.face['gear']?.skipTime || 0) > (now() - lastTime); diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts index 2b4e6dff..5cbf8872 100644 --- a/src/hand/handtrack.ts +++ b/src/hand/handtrack.ts @@ -172,7 +172,7 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) ]); hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger gestures for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations - hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null)); + hand.annotations[key] = fingerMap[key].map((index: number) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null)); } } Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); diff --git a/src/object/centernet.ts b/src/object/centernet.ts index 7f8af7f3..c0858bd8 100644 --- a/src/object/centernet.ts +++ b/src/object/centernet.ts @@ -31,11 +31,11 @@ export async function load(config: Config): Promise { return model; } -async function process(res: Tensor | null, outputShape, config: Config) { +async function process(res: Tensor | null, outputShape: [number, number], config: Config) { if (!res) return []; const t: Record = {}; const results: Array = []; - const detections = await res.array(); + const detections = await res.array() as number[][][]; t.squeeze = tf.squeeze(res); const arr = tf.split(t.squeeze, 6, 1) as Tensor[]; // x1, y1, x2, y2, score, class t.stack = tf.stack([arr[1], arr[0], arr[3], arr[2]], 1); // reorder dims as tf.nms expects y, x @@ -43,7 +43,7 @@ async function process(res: Tensor | null, outputShape, config: Config) { t.scores = tf.squeeze(arr[4]); t.classes = tf.squeeze(arr[5]); tf.dispose([res, ...arr]); - t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.scores, config.object.maxDetected, config.object.iouThreshold, config.object.minConfidence); + t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.scores, config.object.maxDetected, config.object.iouThreshold, (config.object.minConfidence || 0)); const nms = await t.nms.data(); let i = 0; for (const id of Array.from(nms)) { @@ -81,7 +81,7 @@ export async function predict(input: Tensor, config: Config): Promise { - const outputSize = [input.shape[2], input.shape[1]]; + const outputSize = [input.shape[2] || 0, input.shape[1] || 0] as [number, number]; const resize = tf.image.resizeBilinear(input, [inputSize, inputSize]); const objectT = config.object.enabled ? model?.execute(resize, ['tower_0/detections']) as Tensor : null; lastTime = now(); diff --git a/src/object/nanodet.ts b/src/object/nanodet.ts index 3add848e..6882b5c9 100644 --- a/src/object/nanodet.ts +++ b/src/object/nanodet.ts @@ -13,25 +13,26 @@ import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import { env } from '../util/env'; -let model; +let model: GraphModel; let last: Array = []; let lastTime = 0; let skipped = Number.MAX_SAFE_INTEGER; +let inputSize = 0; const scaleBox = 2.5; // increase box size export async function load(config: Config): Promise { if (!model || env.initial) { - model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath || '')); + model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath || '')) as unknown as GraphModel; const inputs = Object.values(model.modelSignature['inputs']); - model.inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : null; - if (!model || !model.modelUrl) log('load model failed:', config.object.modelPath); - else if (config.debug) log('load model:', model.modelUrl); - } else if (config.debug) log('cached model:', model.modelUrl); + inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; + if (!model || !model['modelUrl']) log('load model failed:', config.object.modelPath); + else if (config.debug) log('load model:', model['modelUrl']); + } else if (config.debug) log('cached model:', model['modelUrl']); return model; } -async function process(res, inputSize, outputShape, config) { +async function process(res: Tensor[], outputShape: [number, number], config: Config) { let id = 0; let results: Array = []; for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects @@ -39,18 +40,18 @@ async function process(res, inputSize, outputShape, config) { tf.tidy(async () => { // wrap in tidy to automatically deallocate temp tensors const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704 // find boxes and scores output depending on stride - const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === labels.length))?.squeeze(); - const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < labels.length))?.squeeze(); + const scoresT = tf.squeeze(res.find((a: Tensor) => (a.shape[1] === (baseSize ** 2) && (a.shape[2] || 0) === labels.length))); + const featuresT = tf.squeeze(res.find((a: Tensor) => (a.shape[1] === (baseSize ** 2) && (a.shape[2] || 0) < labels.length))); const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride const boxIdx = await boxesMax.argMax(2).array(); // what we need is indexes of features with highest scores, not values itself const scores = await scoresT.array(); // optionally use exponential scores or just as-is for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix) for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class const score = scores[i][j]; // get score for current position - if (score > config.object.minConfidence && j !== 61) { + if (score > (config.object.minConfidence || 0) && j !== 61) { const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1 const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1 - const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores + const boxOffset = boxIdx[i].map((a: number) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores const [x, y] = [ cx - (scaleBox / strideSize * boxOffset[0]), cy - (scaleBox / strideSize * boxOffset[1]), @@ -116,8 +117,8 @@ export async function predict(image: Tensor, config: Config): Promise { - const outputSize = [image.shape[2], image.shape[1]]; - const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false); + const outputSize = [image.shape[2] || 0, image.shape[1] || 0]; + const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false); const norm = tf.div(resize, constants.tf255); const transpose = norm.transpose([0, 3, 1, 2]); tf.dispose(norm); @@ -128,7 +129,7 @@ export async function predict(image: Tensor, config: Config): Promise