pull/280/head
Vladimir Mandic 2021-12-28 11:39:54 -05:00
parent 36b657b901
commit 04a5e76816
11 changed files with 118 additions and 84 deletions

View File

@ -9,11 +9,14 @@
## Changelog
### **HEAD -> main** 2021/12/28 mandic00@live.com
- fix samples
- fix(src): typo
- change on how face box is calculated
### **2.5.7** 2021/12/27 mandic00@live.com
### **origin/main** 2021/12/22 mandic00@live.com
- fix posenet
### **release: 2.5.6** 2021/12/15 mandic00@live.com

View File

@ -5,17 +5,21 @@
export interface GenericConfig {
/** is module enabled? */
enabled: boolean,
/** path to model json file */
/** path to model json file (relative to `modelBasePath` */
modelPath: string,
/** how many max frames to go without re-running model if cached results are acceptable */
/** how many max frames to go without re-running model if cached results are acceptable
* for two-phase models such as face and hand caching applies to bounding boxes detection only */
skipFrames: number,
/** how many max milliseconds to go without re-running model if cached results are acceptable */
/** how many max milliseconds to go without re-running model if cached results are acceptable
* for two-phase models such as face and hand caching applies to bounding boxes detection only */
skipTime: number,
}
/** Detector part of face configuration */
export interface FaceDetectorConfig extends GenericConfig {
/** is face rotation correction performed after detecting face? */
/** is face rotation correction performed after detecting face?
* used to correctly analyze faces under high angles
*/
rotation: boolean,
/** maximum number of detected faces */
maxDetected: number,
@ -25,7 +29,8 @@ export interface FaceDetectorConfig extends GenericConfig {
iouThreshold: number,
/** should child models perform on masked image of a face */
mask: boolean,
/** should face detection return face tensor to be used in some other extenrnal model? */
/** should face detection return processed and cropped face tensor that can with an external model for addtional processing?
* if enabled it must be manually deallocated to avoid memory leak */
return: boolean,
}

View File

@ -1,11 +1,15 @@
const calculateGaze = (face): { bearing: number, strength: number } => {
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
import type { Point, FaceResult } from '../result';
type Vector = [number, number, number];
const calculateGaze = (face: FaceResult): { bearing: number, strength: number } => {
const radians = (pt1: Point, pt2: Point) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
const eyeRatio = 1; // factor to normalize changes x vs y
const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
const left = (face.mesh[33][2] || 0) > (face.mesh[263][2] || 0); // pick left or right eye depending which one is closer bazed on outsize point z axis
const irisCenter = left ? face.mesh[473] : face.mesh[468];
const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
@ -13,7 +17,7 @@ const calculateGaze = (face): { bearing: number, strength: number } => {
const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
: [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
const eyeDiff: Point = [ // x distance between extreme point and center point normalized with eye size
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
];
@ -23,33 +27,33 @@ const calculateGaze = (face): { bearing: number, strength: number } => {
return { bearing, strength };
};
export const calculateFaceAngle = (face, imageSize): {
export const calculateFaceAngle = (face: FaceResult, imageSize: [number, number]): {
angle: { pitch: number, yaw: number, roll: number },
matrix: [number, number, number, number, number, number, number, number, number],
gaze: { bearing: number, strength: number },
} => {
// const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
const normalize = (v) => { // normalize vector
const normalize = (v: Vector): Vector => { // normalize vector
const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
v[0] /= length;
v[1] /= length;
v[2] /= length;
return v;
};
const subVectors = (a, b) => { // vector subtraction (a - b)
const subVectors = (a: Vector, b: Vector): Vector => { // vector subtraction (a - b)
const x = a[0] - b[0];
const y = a[1] - b[1];
const z = a[2] - b[2];
return [x, y, z];
};
const crossVectors = (a, b) => { // vector cross product (a x b)
const crossVectors = (a: Vector, b: Vector): Vector => { // vector cross product (a x b)
const x = a[1] * b[2] - a[2] * b[1];
const y = a[2] * b[0] - a[0] * b[2];
const z = a[0] * b[1] - a[1] * b[0];
return [x, y, z];
};
// 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
const rotationMatrixToEulerAngle = (r) => {
const rotationMatrixToEulerAngle = (r: number[]): { pitch: number, yaw: number, roll: number } => {
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const [r00, _r01, _r02, r10, r11, r12, r20, r21, r22] = r;
let thetaX: number;
@ -93,10 +97,10 @@ export const calculateFaceAngle = (face, imageSize): {
const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
// top, bottom, left, right
const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [pt[0] * imageSize[0] / size, pt[1] * imageSize[1] / size, pt[2]]); // make the xyz coordinates proportional, independent of the image/box size
const pts: Point[] = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [pt[0] * imageSize[0] / size, pt[1] * imageSize[1] / size, pt[2]] as Point); // make the xyz coordinates proportional, independent of the image/box size
const y_axis = normalize(subVectors(pts[1], pts[0]));
let x_axis = normalize(subVectors(pts[3], pts[2]));
const y_axis = normalize(subVectors(pts[1] as Vector, pts[0] as Vector));
let x_axis = normalize(subVectors(pts[3] as Vector, pts[2] as Vector));
const z_axis = normalize(crossVectors(x_axis, y_axis));
// adjust x_axis to make sure that all axes are perpendicular to each other
x_axis = crossVectors(y_axis, z_axis);

View File

@ -36,7 +36,7 @@ export async function load(config: Config): Promise<GraphModel> {
return model;
}
function decodeBounds(boxOutputs) {
function decodeBounds(boxOutputs: Tensor) {
const t: Record<string, Tensor> = {};
t.boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
t.centers = tf.add(t.boxStarts, anchors);

View File

@ -16,26 +16,28 @@ import * as gear from '../gear/gear';
import * as ssrnetAge from '../gear/ssrnet-age';
import * as ssrnetGender from '../gear/ssrnet-gender';
import * as mobilefacenet from './mobilefacenet';
import type { FaceResult } from '../result';
import type { FaceResult, Emotion, Gender, Race } from '../result';
import type { Tensor } from '../tfjs/types';
import type { Human } from '../human';
import { calculateFaceAngle } from './angles';
type DescRes = { age: number, gender: Gender, genderScore: number, descriptor: number[], race?: { score: number, race: Race }[] };
export const detectFace = async (instance: Human /* instance of human */, input: Tensor): Promise<FaceResult[]> => {
// run facemesh, includes blazeface and iris
// eslint-disable-next-line no-async-promise-executor
let timeStamp;
let ageRes;
let gearRes;
let genderRes;
let emotionRes;
let mobilefacenetRes;
let antispoofRes;
let livenessRes;
let descRes;
let timeStamp: number = now();
let ageRes: { age: number } | Promise<{ age: number }> | null;
let gearRes: gear.GearType | Promise<gear.GearType> | null;
let genderRes: { gender: string, genderScore: number } | Promise<{ gender: string, genderScore: number }> | null;
let emotionRes: { score: number, emotion: Emotion }[] | Promise<{ score: number, emotion: Emotion }[]>;
let mobilefacenetRes: number[] | Promise<number[]> | null;
let antispoofRes: number | Promise<number> | null;
let livenessRes: number | Promise<number> | null;
let descRes: DescRes | Promise<DescRes> | null;
const faceRes: Array<FaceResult> = [];
instance.state = 'run:face';
timeStamp = now();
const faces = await facemesh.predict(input, instance.config);
instance.performance.face = env.perfadd ? (instance.performance.face || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp);
@ -65,11 +67,11 @@ export const detectFace = async (instance: Human /* instance of human */, input:
// run emotion, inherits face from blazeface
instance.analyze('Start Emotion:');
if (instance.config.async) {
emotionRes = instance.config.face.emotion?.enabled ? emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
emotionRes = instance.config.face.emotion?.enabled ? emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : [];
} else {
instance.state = 'run:emotion';
timeStamp = now();
emotionRes = instance.config.face.emotion?.enabled ? await emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
emotionRes = instance.config.face.emotion?.enabled ? await emotion.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : [];
instance.performance.emotion = env.perfadd ? (instance.performance.emotion || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp);
}
instance.analyze('End Emotion:');
@ -77,11 +79,11 @@ export const detectFace = async (instance: Human /* instance of human */, input:
// run antispoof, inherits face from blazeface
instance.analyze('Start AntiSpoof:');
if (instance.config.async) {
antispoofRes = instance.config.face.antispoof?.enabled ? antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
antispoofRes = instance.config.face.antispoof?.enabled ? antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0;
} else {
instance.state = 'run:antispoof';
timeStamp = now();
antispoofRes = instance.config.face.antispoof?.enabled ? await antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
antispoofRes = instance.config.face.antispoof?.enabled ? await antispoof.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0;
instance.performance.antispoof = env.perfadd ? (instance.performance.antispoof || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp);
}
instance.analyze('End AntiSpoof:');
@ -89,11 +91,11 @@ export const detectFace = async (instance: Human /* instance of human */, input:
// run liveness, inherits face from blazeface
instance.analyze('Start Liveness:');
if (instance.config.async) {
livenessRes = instance.config.face.liveness?.enabled ? liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
livenessRes = instance.config.face.liveness?.enabled ? liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0;
} else {
instance.state = 'run:liveness';
timeStamp = now();
livenessRes = instance.config.face.liveness?.enabled ? await liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
livenessRes = instance.config.face.liveness?.enabled ? await liveness.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : 0;
instance.performance.liveness = env.perfadd ? (instance.performance.antispoof || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp);
}
instance.analyze('End Liveness:');
@ -101,11 +103,11 @@ export const detectFace = async (instance: Human /* instance of human */, input:
// run gear, inherits face from blazeface
instance.analyze('Start GEAR:');
if (instance.config.async) {
gearRes = instance.config.face['gear']?.enabled ? gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
gearRes = instance.config.face['gear']?.enabled ? gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
} else {
instance.state = 'run:gear';
timeStamp = now();
gearRes = instance.config.face['gear']?.enabled ? await gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
gearRes = instance.config.face['gear']?.enabled ? await gear.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
instance.performance.gear = Math.trunc(now() - timeStamp);
}
instance.analyze('End GEAR:');
@ -113,13 +115,13 @@ export const detectFace = async (instance: Human /* instance of human */, input:
// run gear, inherits face from blazeface
instance.analyze('Start SSRNet:');
if (instance.config.async) {
ageRes = instance.config.face['ssrnet']?.enabled ? ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
genderRes = instance.config.face['ssrnet']?.enabled ? ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
ageRes = instance.config.face['ssrnet']?.enabled ? ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
genderRes = instance.config.face['ssrnet']?.enabled ? ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
} else {
instance.state = 'run:ssrnet';
timeStamp = now();
ageRes = instance.config.face['ssrnet']?.enabled ? await ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
genderRes = instance.config.face['ssrnet']?.enabled ? await ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
ageRes = instance.config.face['ssrnet']?.enabled ? await ssrnetAge.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
genderRes = instance.config.face['ssrnet']?.enabled ? await ssrnetGender.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
instance.performance.ssrnet = Math.trunc(now() - timeStamp);
}
instance.analyze('End SSRNet:');
@ -127,11 +129,11 @@ export const detectFace = async (instance: Human /* instance of human */, input:
// run gear, inherits face from blazeface
instance.analyze('Start MobileFaceNet:');
if (instance.config.async) {
mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
} else {
instance.state = 'run:mobilefacenet';
timeStamp = now();
mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? await mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : {};
mobilefacenetRes = instance.config.face['mobilefacenet']?.enabled ? await mobilefacenet.predict(faces[i].tensor || tf.tensor([]), instance.config, i, faces.length) : null;
instance.performance.mobilefacenet = Math.trunc(now() - timeStamp);
}
instance.analyze('End MobileFaceNet:');
@ -154,11 +156,26 @@ export const detectFace = async (instance: Human /* instance of human */, input:
}
instance.analyze('Finish Face:');
// override age/gender if alternative models are used
if (instance.config.face['ssrnet']?.enabled && ageRes && genderRes) descRes = { age: ageRes.age, gender: genderRes.gender, genderScore: genderRes.genderScore };
if (instance.config.face['gear']?.enabled && gearRes) descRes = { age: gearRes.age, gender: gearRes.gender, genderScore: gearRes.genderScore, race: gearRes.race };
// override descriptor if embedding model is used
if (instance.config.face['mobilefacenet']?.enabled && mobilefacenetRes) descRes.descriptor = mobilefacenetRes;
if (instance.config.face['ssrnet']?.enabled && ageRes && genderRes) { // override age/gender if ssrnet model is used
descRes = {
...(descRes as DescRes),
age: (ageRes as { age: number}).age,
gender: (genderRes as { gender: Gender, genderScore: number }).gender,
genderScore: (genderRes as { gender: Gender, genderScore: number }).genderScore,
};
}
if (instance.config.face['gear']?.enabled && gearRes) { // override age/gender/race if gear model is used
descRes = {
...(descRes as DescRes),
age: (gearRes as gear.GearType).age,
gender: (gearRes as gear.GearType).gender,
genderScore: (gearRes as gear.GearType).genderScore,
race: (gearRes as gear.GearType).race,
};
}
if (instance.config.face['mobilefacenet']?.enabled && mobilefacenetRes) { // override descriptor if embedding model is used
(descRes as DescRes).descriptor = mobilefacenetRes as number[];
}
// calculate iris distance
// iris: array[ center, left, top, right, bottom]
@ -183,14 +200,14 @@ export const detectFace = async (instance: Human /* instance of human */, input:
...faces[i],
id: i,
};
if (descRes?.age) res.age = descRes.age;
if (descRes?.gender) res.gender = descRes.gender;
if (descRes?.genderScore) res.genderScore = descRes?.genderScore;
if (descRes?.descriptor) res.embedding = descRes?.descriptor;
if (descRes?.race) res.race = descRes?.race;
if (emotionRes) res.emotion = emotionRes;
if (antispoofRes) res.real = antispoofRes;
if (livenessRes) res.live = livenessRes;
if ((descRes as DescRes)?.age) res.age = (descRes as DescRes).age as number;
if ((descRes as DescRes)?.gender) res.gender = (descRes as DescRes).gender as Gender;
if ((descRes as DescRes)?.genderScore) res.genderScore = (descRes as DescRes)?.genderScore as number;
if ((descRes as DescRes)?.descriptor) res.embedding = (descRes as DescRes)?.descriptor as Array<number>;
if ((descRes as DescRes)?.race) res.race = (descRes as DescRes)?.race as { score: number, race: Race }[];
if (emotionRes) res.emotion = emotionRes as Array<{ score: number, emotion: Emotion }>;
if (antispoofRes) res.real = antispoofRes as number;
if (livenessRes) res.live = livenessRes as number;
if (irisSize && irisSize !== 0) res.iris = Math.trunc(500 / irisSize / 11.7) / 100;
if (rotation) res.rotation = rotation;
if (tensor) res.tensor = tensor;

View File

@ -13,11 +13,14 @@ import * as tf from '../../dist/tfjs.esm.js';
import { constants } from '../tfjs/constants';
import type { Tensor, GraphModel } from '../tfjs/types';
import type { Config } from '../config';
import type { Gender, Race } from '../result';
export type FaceRes = { age: number, gender: Gender, genderScore: number, descriptor: number[], race?: { score: number, race: Race }[] };
let model: GraphModel | null;
const last: Array<{
age: number,
gender: string,
gender: Gender,
genderScore: number,
descriptor: number[],
}> = [];
@ -63,7 +66,7 @@ export function enhance(input): Tensor {
*/
}
export async function predict(image: Tensor, config: Config, idx, count): Promise<{ age: number, gender: string, genderScore: number, descriptor: number[] }> {
export async function predict(image: Tensor, config: Config, idx: number, count: number): Promise<FaceRes> {
if (!model) return { age: 0, gender: 'unknown', genderScore: 0, descriptor: [] };
const skipFrame = skipped < (config.face.description?.skipFrames || 0);
const skipTime = (config.face.description?.skipTime || 0) > (now() - lastTime);
@ -75,7 +78,7 @@ export async function predict(image: Tensor, config: Config, idx, count): Promis
return new Promise(async (resolve) => {
const obj = {
age: <number>0,
gender: <string>'unknown',
gender: <Gender>'unknown',
genderScore: <number>0,
descriptor: <number[]>[],
};

View File

@ -11,7 +11,7 @@ import type { Config } from '../config';
import type { GraphModel, Tensor } from '../tfjs/types';
import { env } from '../util/env';
type GearType = { age: number, gender: Gender, genderScore: number, race: Array<{ score: number, race: Race }> }
export type GearType = { age: number, gender: Gender, genderScore: number, race: Array<{ score: number, race: Race }> }
let model: GraphModel | null;
const last: Array<GearType> = [];
const raceNames = ['white', 'black', 'asian', 'indian', 'other'];
@ -32,7 +32,7 @@ export async function load(config: Config) {
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export async function predict(image: Tensor, config: Config, idx, count): Promise<GearType> {
export async function predict(image: Tensor, config: Config, idx: number, count: number): Promise<GearType> {
if (!model) return { age: 0, gender: 'unknown', genderScore: 0, race: [] };
const skipFrame = skipped < (config.face['gear']?.skipFrames || 0);
const skipTime = (config.face['gear']?.skipTime || 0) > (now() - lastTime);

View File

@ -172,7 +172,7 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
]);
hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger gestures
for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
hand.annotations[key] = fingerMap[key].map((index: number) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
}
}
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));

View File

@ -31,11 +31,11 @@ export async function load(config: Config): Promise<GraphModel> {
return model;
}
async function process(res: Tensor | null, outputShape, config: Config) {
async function process(res: Tensor | null, outputShape: [number, number], config: Config) {
if (!res) return [];
const t: Record<string, Tensor> = {};
const results: Array<ObjectResult> = [];
const detections = await res.array();
const detections = await res.array() as number[][][];
t.squeeze = tf.squeeze(res);
const arr = tf.split(t.squeeze, 6, 1) as Tensor[]; // x1, y1, x2, y2, score, class
t.stack = tf.stack([arr[1], arr[0], arr[3], arr[2]], 1); // reorder dims as tf.nms expects y, x
@ -43,7 +43,7 @@ async function process(res: Tensor | null, outputShape, config: Config) {
t.scores = tf.squeeze(arr[4]);
t.classes = tf.squeeze(arr[5]);
tf.dispose([res, ...arr]);
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.scores, config.object.maxDetected, config.object.iouThreshold, config.object.minConfidence);
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.scores, config.object.maxDetected, config.object.iouThreshold, (config.object.minConfidence || 0));
const nms = await t.nms.data();
let i = 0;
for (const id of Array.from(nms)) {
@ -81,7 +81,7 @@ export async function predict(input: Tensor, config: Config): Promise<ObjectResu
}
skipped = 0;
return new Promise(async (resolve) => {
const outputSize = [input.shape[2], input.shape[1]];
const outputSize = [input.shape[2] || 0, input.shape[1] || 0] as [number, number];
const resize = tf.image.resizeBilinear(input, [inputSize, inputSize]);
const objectT = config.object.enabled ? model?.execute(resize, ['tower_0/detections']) as Tensor : null;
lastTime = now();

View File

@ -13,25 +13,26 @@ import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
let model;
let model: GraphModel;
let last: Array<ObjectResult> = [];
let lastTime = 0;
let skipped = Number.MAX_SAFE_INTEGER;
let inputSize = 0;
const scaleBox = 2.5; // increase box size
export async function load(config: Config): Promise<GraphModel> {
if (!model || env.initial) {
model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath || ''));
model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath || '')) as unknown as GraphModel;
const inputs = Object.values(model.modelSignature['inputs']);
model.inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : null;
if (!model || !model.modelUrl) log('load model failed:', config.object.modelPath);
else if (config.debug) log('load model:', model.modelUrl);
} else if (config.debug) log('cached model:', model.modelUrl);
inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (!model || !model['modelUrl']) log('load model failed:', config.object.modelPath);
else if (config.debug) log('load model:', model['modelUrl']);
} else if (config.debug) log('cached model:', model['modelUrl']);
return model;
}
async function process(res, inputSize, outputShape, config) {
async function process(res: Tensor[], outputShape: [number, number], config: Config) {
let id = 0;
let results: Array<ObjectResult> = [];
for (const strideSize of [1, 2, 4]) { // try each stride size as it detects large/medium/small objects
@ -39,18 +40,18 @@ async function process(res, inputSize, outputShape, config) {
tf.tidy(async () => { // wrap in tidy to automatically deallocate temp tensors
const baseSize = strideSize * 13; // 13x13=169, 26x26=676, 52x52=2704
// find boxes and scores output depending on stride
const scoresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] === labels.length))?.squeeze();
const featuresT = res.find((a) => (a.shape[1] === (baseSize ** 2) && a.shape[2] < labels.length))?.squeeze();
const scoresT = tf.squeeze(res.find((a: Tensor) => (a.shape[1] === (baseSize ** 2) && (a.shape[2] || 0) === labels.length)));
const featuresT = tf.squeeze(res.find((a: Tensor) => (a.shape[1] === (baseSize ** 2) && (a.shape[2] || 0) < labels.length)));
const boxesMax = featuresT.reshape([-1, 4, featuresT.shape[1] / 4]); // reshape [output] to [4, output / 4] where number is number of different features inside each stride
const boxIdx = await boxesMax.argMax(2).array(); // what we need is indexes of features with highest scores, not values itself
const scores = await scoresT.array(); // optionally use exponential scores or just as-is
for (let i = 0; i < scoresT.shape[0]; i++) { // total strides (x * y matrix)
for (let j = 0; j < scoresT.shape[1]; j++) { // one score for each class
const score = scores[i][j]; // get score for current position
if (score > config.object.minConfidence && j !== 61) {
if (score > (config.object.minConfidence || 0) && j !== 61) {
const cx = (0.5 + Math.trunc(i % baseSize)) / baseSize; // center.x normalized to range 0..1
const cy = (0.5 + Math.trunc(i / baseSize)) / baseSize; // center.y normalized to range 0..1
const boxOffset = boxIdx[i].map((a) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores
const boxOffset = boxIdx[i].map((a: number) => a * (baseSize / strideSize / inputSize)); // just grab indexes of features with highest scores
const [x, y] = [
cx - (scaleBox / strideSize * boxOffset[0]),
cy - (scaleBox / strideSize * boxOffset[1]),
@ -116,8 +117,8 @@ export async function predict(image: Tensor, config: Config): Promise<ObjectResu
skipped = 0;
if (!env.kernels.includes('mod') || !env.kernels.includes('sparsetodense')) return last;
return new Promise(async (resolve) => {
const outputSize = [image.shape[2], image.shape[1]];
const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);
const outputSize = [image.shape[2] || 0, image.shape[1] || 0];
const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
const norm = tf.div(resize, constants.tf255);
const transpose = norm.transpose([0, 3, 1, 2]);
tf.dispose(norm);
@ -128,7 +129,7 @@ export async function predict(image: Tensor, config: Config): Promise<ObjectResu
lastTime = now();
tf.dispose(transpose);
const obj = await process(objectT, model.inputSize, outputSize, config);
const obj = await process(objectT as Tensor[], outputSize as [number, number], config);
last = obj;
resolve(obj);
});

View File

@ -66,7 +66,8 @@ export async function process(input: Input, background: Input | undefined, confi
}
const alphaCanvas = image.canvas(width, height);
await tf.browser.toPixels(t.data, alphaCanvas);
// @ts-ignore browser is not defined in tfjs-node
if (tf.browser) await tf.browser.toPixels(t.data, alphaCanvas);
const alphaCtx = alphaCanvas.getContext('2d') as CanvasRenderingContext2D;
if (config.segmentation.blur && config.segmentation.blur > 0) alphaCtx.filter = `blur(${config.segmentation.blur}px)`; // use css filter for bluring, can be done with gaussian blur manually instead
const alphaData = alphaCtx.getImageData(0, 0, width, height);