diff --git a/CHANGELOG.md b/CHANGELOG.md index ce8f9a06..db925577 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # @vladmandic/human -Version: **1.1.8** +Version: **1.1.9** Description: **Human: AI-powered 3D Face Detection, Face Embedding & Recognition, Body Pose Tracking, Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction & Gesture Recognition** Author: **Vladimir Mandic ** @@ -11,6 +11,10 @@ Repository: **** ### **HEAD -> main** 2021/03/17 mandic00@live.com + +### **1.1.9** 2021/03/17 mandic00@live.com + +- fix box clamping and raw output - hierarchical readme notes ### **1.1.8** 2021/03/17 mandic00@live.com diff --git a/package.json b/package.json index e6da2a8f..a1ecfb71 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "main": "dist/human.node.js", "module": "dist/human.esm.js", "browser": "dist/human.esm.js", - "types": "types/src/human.d.ts", + "types": "types/human.d.ts", "author": "Vladimir Mandic ", "bugs": { "url": "https://github.com/vladmandic/human/issues" diff --git a/config.ts b/src/config.ts similarity index 81% rename from config.ts rename to src/config.ts index 0a3e5331..cc276ef7 100644 --- a/config.ts +++ b/src/config.ts @@ -1,7 +1,121 @@ /* eslint-disable indent */ /* eslint-disable no-multi-spaces */ -export default { +/** + * Configuration interface definition for **Human** library + * + * Contains all configurable parameters + */ +export interface Config { + backend: String, + wasmPath: String, + debug: Boolean, + async: Boolean, + profile: Boolean, + deallocate: Boolean, + scoped: Boolean, + videoOptimized: Boolean, + warmup: String, + filter: { + enabled: Boolean, + width: Number, + height: Number, + return: Boolean, + brightness: Number, + contrast: Number, + sharpness: Number, + blur: Number + saturation: Number, + hue: Number, + negative: Boolean, + sepia: Boolean, + vintage: Boolean, + kodachrome: Boolean, + technicolor: Boolean, + polaroid: Boolean, + pixelate: Number, + }, + gesture: { + enabled: Boolean, + }, + face: { + enabled: Boolean, + detector: { + modelPath: String, + rotation: Boolean, + maxFaces: Number, + skipFrames: Number, + skipInitial: Boolean, + minConfidence: Number, + iouThreshold: Number, + scoreThreshold: Number, + return: Boolean, + }, + mesh: { + enabled: Boolean, + modelPath: String, + }, + iris: { + enabled: Boolean, + modelPath: String, + }, + age: { + enabled: Boolean, + modelPath: String, + skipFrames: Number, + }, + gender: { + enabled: Boolean, + minConfidence: Number, + modelPath: String, + skipFrames: Number, + }, + emotion: { + enabled: Boolean, + minConfidence: Number, + skipFrames: Number, + modelPath: String, + }, + embedding: { + enabled: Boolean, + modelPath: String, + }, + }, + body: { + enabled: Boolean, + modelPath: String, + maxDetections: Number, + scoreThreshold: Number, + nmsRadius: Number, + }, + hand: { + enabled: Boolean, + rotation: Boolean, + skipFrames: Number, + skipInitial: Boolean, + minConfidence: Number, + iouThreshold: Number, + scoreThreshold: Number, + maxHands: Number, + landmarks: Boolean, + detector: { + modelPath: String, + }, + skeleton: { + modelPath: String, + }, + }, + object: { + enabled: Boolean, + modelPath: String, + minConfidence: Number, + iouThreshold: Number, + maxResults: Number, + skipFrames: Number, + }, +} + +const config: Config = { backend: 'webgl', // select tfjs backend to use // can be 'webgl', 'wasm', 'cpu', or 'humangl' which is a custom version of webgl // leave as empty string to continue using default backend @@ -177,3 +291,4 @@ export default { skipFrames: 13, // how many frames to go without re-running the detector }, }; +export { config as defaults }; diff --git a/src/draw.ts b/src/draw/draw.ts similarity index 83% rename from src/draw.ts rename to src/draw/draw.ts index ee6f1862..35e292cd 100644 --- a/src/draw.ts +++ b/src/draw/draw.ts @@ -1,5 +1,5 @@ -import config from '../config'; -import { TRI468 as triangulation } from './blazeface/coords'; +import { defaults } from '../config'; +import { TRI468 as triangulation } from '../blazeface/coords'; export const drawOptions = { color: 'rgba(173, 216, 230, 0.3)', // 'lightblue' with light alpha channel @@ -86,7 +86,7 @@ function curves(ctx, points: number[] = []) { } } -export async function gesture(inCanvas, result) { +export async function gesture(inCanvas: HTMLCanvasElement, result: Array) { if (!result || !inCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement)) return; const ctx = inCanvas.getContext('2d'); @@ -112,7 +112,7 @@ export async function gesture(inCanvas, result) { } } -export async function face(inCanvas, result) { +export async function face(inCanvas: HTMLCanvasElement, result: Array) { if (!result || !inCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement)) return; const ctx = inCanvas.getContext('2d'); @@ -196,7 +196,7 @@ export async function face(inCanvas, result) { } const lastDrawnPose:any[] = []; -export async function body(inCanvas, result) { +export async function body(inCanvas: HTMLCanvasElement, result: Array) { if (!result || !inCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement)) return; const ctx = inCanvas.getContext('2d'); @@ -232,70 +232,70 @@ export async function body(inCanvas, result) { // torso points.length = 0; part = result[i].keypoints.find((a) => a.part === 'leftShoulder'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightShoulder'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightHip'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftHip'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftShoulder'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); if (points.length === 5) lines(ctx, points); // only draw if we have complete torso // leg left points.length = 0; part = result[i].keypoints.find((a) => a.part === 'leftHip'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftKnee'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftAnkle'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftHeel'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftFoot'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); curves(ctx, points); // leg right points.length = 0; part = result[i].keypoints.find((a) => a.part === 'rightHip'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightKnee'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightAnkle'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightHeel'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightFoot'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); curves(ctx, points); // arm left points.length = 0; part = result[i].keypoints.find((a) => a.part === 'leftShoulder'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftElbow'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftWrist'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'leftPalm'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); curves(ctx, points); // arm right points.length = 0; part = result[i].keypoints.find((a) => a.part === 'rightShoulder'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightElbow'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightWrist'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); part = result[i].keypoints.find((a) => a.part === 'rightPalm'); - if (part && part.score > config.body.scoreThreshold) points.push([part.position.x, part.position.y]); + if (part && part.score > defaults.body.scoreThreshold) points.push([part.position.x, part.position.y]); curves(ctx, points); // draw all } } } -export async function hand(inCanvas, result) { +export async function hand(inCanvas: HTMLCanvasElement, result: Array) { if (!result || !inCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement)) return; const ctx = inCanvas.getContext('2d'); @@ -348,7 +348,7 @@ export async function hand(inCanvas, result) { } } -export async function object(inCanvas, result) { +export async function object(inCanvas: HTMLCanvasElement, result: Array) { if (!result || !inCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement)) return; const ctx = inCanvas.getContext('2d'); @@ -375,14 +375,14 @@ export async function object(inCanvas, result) { } } -export async function canvas(inCanvas, outCanvas) { +export async function canvas(inCanvas: HTMLCanvasElement, outCanvas: HTMLCanvasElement) { if (!inCanvas || !outCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement) || !(outCanvas instanceof HTMLCanvasElement)) return; const outCtx = inCanvas.getContext('2d'); outCtx?.drawImage(inCanvas, 0, 0); } -export async function all(inCanvas, result) { +export async function all(inCanvas: HTMLCanvasElement, result:any) { if (!result || !inCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement)) return; face(inCanvas, result.face); diff --git a/src/human.ts b/src/human.ts index 8b8d7297..9c2a99e6 100644 --- a/src/human.ts +++ b/src/human.ts @@ -12,12 +12,25 @@ import * as handpose from './handpose/handpose'; import * as blazepose from './blazepose/blazepose'; import * as nanodet from './nanodet/nanodet'; import * as gesture from './gesture/gesture'; -import * as image from './image'; +import * as image from './image/image'; +import * as draw from './draw/draw'; import * as profile from './profile'; -import * as config from '../config'; +import { Config, defaults } from './config'; +import { Result } from './result'; import * as sample from './sample'; import * as app from '../package.json'; -import * as draw from './draw'; + +type Tensor = {}; +type Model = {}; + +export type { Config } from './config'; +export type { Result } from './result'; + +/** Defines all possible input types for **Human** detection */ +export type Input = Tensor | ImageData | ImageBitmap | HTMLVideoElement | HTMLCanvasElement | OffscreenCanvas; +/** Error message */ +export type Error = { error: String }; +export type TensorFlow = typeof tf; // helper function: gets elapsed time on both browser and nodejs const now = () => { @@ -25,50 +38,6 @@ const now = () => { return parseInt((Number(process.hrtime.bigint()) / 1000 / 1000).toString()); }; -type Tensor = {}; -type Model = {}; -export type Result = { - face: Array<{ - confidence: Number, - boxConfidence: Number, - faceConfidence: Number, - box: [Number, Number, Number, Number], - mesh: Array<[Number, Number, Number]> - meshRaw: Array<[Number, Number, Number]> - boxRaw: [Number, Number, Number, Number], - annotations: Array<{ part: String, points: Array<[Number, Number, Number]>[] }>, - age: Number, - gender: String, - genderConfidence: Number, - emotion: Array<{ score: Number, emotion: String }>, - embedding: Array, - iris: Number, - angle: { roll: Number, yaw: Number, pitch: Number }, - }>, - body: Array<{ - id: Number, - part: String, - position: { x: Number, y: Number, z: Number }, - score: Number, - presence: Number }>, - hand: Array<{ - confidence: Number, - box: [Number, Number, Number, Number], - boxRaw: [Number, Number, Number, Number], - landmarks: Array<[Number, Number, Number]>, - annotations: Array<{ part: String, points: Array<[Number, Number, Number]>[] }>, - }>, - gesture: Array<{ - part: String, - gesture: String, - }>, - object: Array<{ score: Number, strideSize: Number, class: Number, label: String, center: Number[], centerRaw: Number[], box: Number[], boxRaw: Number[] }>, - performance: { any }, - canvas: OffscreenCanvas | HTMLCanvasElement, -} - -export type { default as Config } from '../config'; - // helper function: perform deep merge of multiple objects so it allows full inheriance with overrides function mergeDeep(...objects) { const isObject = (obj) => obj && typeof obj === 'object'; @@ -83,15 +52,31 @@ function mergeDeep(...objects) { return prev; }, {}); } - +/** + * Main Class for `Human` library + * + * All methods and properties are available only as members of Human class + * + * Configuration object definition: @link Config + * Results object definition: @link Result + * Possible inputs: @link Input + */ export class Human { version: String; - config: typeof config.default; + config: Config; state: String; image: { tensor: Tensor, canvas: OffscreenCanvas | HTMLCanvasElement }; // classes - tf: typeof tf; - draw: { drawOptions?: typeof draw.drawOptions, gesture: Function, face: Function, body: Function, hand: Function, canvas: Function, all: Function }; + tf: TensorFlow; + draw: { + drawOptions?: typeof draw.drawOptions, + gesture: typeof draw.gesture, + face: typeof draw.face, + body: typeof draw.body, + hand: typeof draw.hand, + canvas: typeof draw.canvas, + all: typeof draw.all, + }; // models models: { face: facemesh.MediaPipeFaceMesh | null, @@ -123,12 +108,12 @@ export class Human { #firstRun: Boolean; // definition end - constructor(userConfig = {}) { + constructor(userConfig: Config | Object = {}) { this.tf = tf; this.draw = draw; this.#package = app; this.version = app.version; - this.config = mergeDeep(config.default, userConfig); + this.config = mergeDeep(defaults, userConfig); this.state = 'idle'; this.#numTensors = 0; this.#analyzeMemoryLeaks = false; @@ -150,7 +135,7 @@ export class Human { }; // export access to image processing // @ts-ignore - this.image = (input: Tensor | ImageData | HTMLCanvasElement | HTMLVideoElement | OffscreenCanvas) => image.process(input, this.config); + this.image = (input: Input) => image.process(input, this.config); // export raw access to underlying models this.classes = { facemesh, @@ -211,7 +196,7 @@ export class Human { } // preload models, not explicitly required as it's done automatically on first use - async load(userConfig: Object = {}) { + async load(userConfig: Config | Object = {}) { this.state = 'load'; const timeStamp = now(); if (userConfig) this.config = mergeDeep(this.config, userConfig); @@ -480,7 +465,7 @@ export class Human { } // main detect function - async detect(input: Tensor | ImageData | HTMLCanvasElement | HTMLVideoElement | OffscreenCanvas, userConfig: Object = {}): Promise { + async detect(input: Input, userConfig: Config | Object = {}): Promise { // detection happens inside a promise return new Promise(async (resolve) => { this.state = 'config'; @@ -675,7 +660,7 @@ export class Human { return res; } - async warmup(userConfig: Object = {}): Promise { + async warmup(userConfig: Config | Object = {}): Promise { const t0 = now(); if (userConfig) this.config = mergeDeep(this.config, userConfig); const save = this.config.videoOptimized; @@ -691,4 +676,7 @@ export class Human { } } +/** + * Class Human is also available as default export + */ export { Human as default }; diff --git a/src/image.ts b/src/image/image.ts similarity index 98% rename from src/image.ts rename to src/image/image.ts index ccd48182..288dae14 100644 --- a/src/image.ts +++ b/src/image/image.ts @@ -1,7 +1,7 @@ // @ts-nocheck -import { log } from './log'; -import * as tf from '../dist/tfjs.esm.js'; +import { log } from '../log'; +import * as tf from '../../dist/tfjs.esm.js'; import * as fxImage from './imagefx'; const maxSize = 2048; diff --git a/src/imagefx.js b/src/image/imagefx.js similarity index 100% rename from src/imagefx.js rename to src/image/imagefx.js diff --git a/src/result.ts b/src/result.ts new file mode 100644 index 00000000..7c626b15 --- /dev/null +++ b/src/result.ts @@ -0,0 +1,112 @@ +/** + * Result interface definition for **Human** library + * + * Contains all possible detection results + */ +export interface Result { + /** Face results + * Combined results of face detector, face mesh, age, gender, emotion, embedding, iris models + * Some values may be null if specific model is not enabled + * + * Array of individual results with one object per detected face + * Each result has: + * - overal detection confidence value + * - box detection confidence value + * - mesh detection confidence value + * - box as array of [x, y, width, height], normalized to image resolution + * - boxRaw as array of [x, y, width, height], normalized to range 0..1 + * - mesh as array of [x, y, z] points of face mesh, normalized to image resolution + * - meshRaw as array of [x, y, z] points of face mesh, normalized to range 0..1 + * - annotations as array of annotated face mesh points + * - age as value + * - gender as value + * - genderConfidence as value + * - emotion as array of possible emotions with their individual scores + * - iris as distance value + * - angle as object with values for roll, yaw and pitch angles + */ + face: Array<{ + confidence: Number, + boxConfidence: Number, + faceConfidence: Number, + box: [Number, Number, Number, Number], + boxRaw: [Number, Number, Number, Number], + mesh: Array<[Number, Number, Number]> + meshRaw: Array<[Number, Number, Number]> + annotations: Array<{ part: String, points: Array<[Number, Number, Number]>[] }>, + age: Number, + gender: String, + genderConfidence: Number, + emotion: Array<{ score: Number, emotion: String }>, + embedding: Array, + iris: Number, + angle: { roll: Number, yaw: Number, pitch: Number }, + }>, + /** Body results + * + * Array of individual results with one object per detected body + * Each results has: + * - body id number + * - body part name + * - part position with x,y,z coordinates + * - body part score value + * - body part presence value + */ + body: Array<{ + id: Number, + part: String, + position: { x: Number, y: Number, z: Number }, + score: Number, + presence: Number }>, + /** Hand results + * + * Array of individual results with one object per detected hand + * Each result has: + * - confidence as value + * - box as array of [x, y, width, height], normalized to image resolution + * - boxRaw as array of [x, y, width, height], normalized to range 0..1 + * - landmarks as array of [x, y, z] points of hand, normalized to image resolution + * - annotations as array of annotated face landmark points + */ + hand: Array<{ + confidence: Number, + box: [Number, Number, Number, Number], + boxRaw: [Number, Number, Number, Number], + landmarks: Array<[Number, Number, Number]>, + annotations: Array<{ part: String, points: Array<[Number, Number, Number]>[] }>, + }>, + /** Gesture results + * + * Array of individual results with one object per detected gesture + * Each result has: + * - part where gesture was detected + * - gesture detected + */ + gesture: Array<{ + part: String, + gesture: String, + }>, + /** Object results + * + * Array of individual results with one object per detected gesture + * Each result has: + * - score as value + * - label as detected class name + * - center as array of [x, y], normalized to image resolution + * - centerRaw as array of [x, y], normalized to range 0..1 + * - box as array of [x, y, width, height], normalized to image resolution + * - boxRaw as array of [x, y, width, height], normalized to range 0..1 + */ + object: Array<{ + score: Number, + strideSize: Number, + class: Number, + label: String, + center: Number[], + centerRaw: Number[], + box: Number[], + boxRaw: Number[], + }>, + performance: { any }, + canvas: OffscreenCanvas | HTMLCanvasElement, +} diff --git a/wiki b/wiki index 91af84e9..936e36ce 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 91af84e9543762c4f31be41dda15fb2a5549d8a6 +Subproject commit 936e36ce0fd331030b48db735836381f2f22134a