/** * Human main module * @default Human Library * @summary * @author * @copyright * @license MIT */ // module imports import { log, now, mergeDeep, validate } from './util/util'; import { defaults } from './config'; import { env, Env } from './util/env'; import * as tf from '../dist/tfjs.esm.js'; import * as app from '../package.json'; import * as backend from './tfjs/backend'; import * as blazepose from './body/blazepose'; import * as centernet from './object/centernet'; import * as draw from './util/draw'; import * as efficientpose from './body/efficientpose'; import * as face from './face/face'; import * as facemesh from './face/facemesh'; import * as faceres from './face/faceres'; import * as gesture from './gesture/gesture'; import * as handpose from './hand/handpose'; import * as handtrack from './hand/handtrack'; import * as humangl from './tfjs/humangl'; import * as image from './image/image'; import * as interpolate from './util/interpolate'; import * as match from './face/match'; import * as models from './models'; import * as movenet from './body/movenet'; import * as nanodet from './object/nanodet'; import * as persons from './util/persons'; import * as posenet from './body/posenet'; import * as segmentation from './segmentation/segmentation'; import * as warmups from './warmup'; // type definitions import type { Input, Tensor, DrawOptions, Config, Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureResult, PersonResult, AnyCanvas } from './exports'; // type exports export * from './exports'; /** **Human** library main class * * All methods and properties are available only as members of Human class * * - Configuration object definition: {@link Config} * - Results object definition: {@link Result} * - Possible inputs: {@link Input} * * @param userConfig - {@link Config} * @returns instance of {@link Human} */ export class Human { /** Current version of Human library in *semver* format */ version: string; /** Current configuration * - Defaults: [config](https://github.com/vladmandic/human/blob/main/src/config.ts#L262) */ config: Config; /** Last known result of detect run * - Can be accessed anytime after initial detection */ result: Result; /** Current state of Human library * - Can be polled to determine operations that are currently executed * - Progresses through: 'config', 'check', 'backend', 'load', 'run:', 'idle' */ state: string; /** currenty processed image tensor and canvas */ process: { tensor: Tensor | null, canvas: AnyCanvas | null }; /** Instance of TensorFlow/JS used by Human * - Can be embedded or externally provided * [TFJS API]: {@link https://js.tensorflow.org/api/latest/} */ tf; /** Object containing environment information used for diagnostics */ env: Env; /** Draw helper classes that can draw detected objects on canvas using specified draw * - canvas: draws input to canvas * - options: are global settings for all draw operations, can be overriden for each draw method {@link DrawOptions} * - face, body, hand, gesture, object, person: draws detected results as overlays on canvas */ draw: { canvas: typeof draw.canvas, face: typeof draw.face, body: typeof draw.body, hand: typeof draw.hand, gesture: typeof draw.gesture, object: typeof draw.object, person: typeof draw.person, all: typeof draw.all, options: DrawOptions }; /** Currently loaded models * @internal * {@link Models} */ models: models.Models; /** Container for events dispatched by Human * Possible events: * - `create`: triggered when Human object is instantiated * - `load`: triggered when models are loaded (explicitly or on-demand) * - `image`: triggered when input image is processed * - `result`: triggered when detection is complete * - `warmup`: triggered when warmup is complete * - `error`: triggered on some errors */ events: EventTarget | undefined; /** Reference face triangualtion array of 468 points, used for triangle references between points */ faceTriangulation: number[]; /** Refernce UV map of 468 values, used for 3D mapping of the face mesh */ faceUVMap: [number, number][]; /** Performance object that contains values for all recently performed operations */ performance: Record; // perf members are dynamically defined as needed #numTensors: number; #analyzeMemoryLeaks: boolean; #checkSanity: boolean; /** WebGL debug info */ gl: Record; // definition end /** Constructor for **Human** library that is futher used for all operations * @param userConfig - user configuration object {@link Config} */ constructor(userConfig?: Partial) { this.env = env; defaults.wasmPath = tf.version['tfjs-core'].includes('-') // custom build or official build ? 'https://vladmandic.github.io/tfjs/dist/' : `https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@${tf.version_core}/dist/`; defaults.modelBasePath = env.browser ? '../models/' : 'file://models/'; defaults.backend = env.browser ? 'humangl' : 'tensorflow'; this.version = app.version; // expose version property on instance of class Object.defineProperty(this, 'version', { value: app.version }); // expose version property directly on class itself this.config = JSON.parse(JSON.stringify(defaults)); Object.seal(this.config); if (userConfig) this.config = mergeDeep(this.config, userConfig); this.tf = tf; this.state = 'idle'; this.#numTensors = 0; this.#analyzeMemoryLeaks = false; this.#checkSanity = false; this.performance = {}; this.events = (typeof EventTarget !== 'undefined') ? new EventTarget() : undefined; // object that contains all initialized models this.models = new models.Models(); // reexport draw methods this.draw = { options: draw.options as DrawOptions, canvas: (input: AnyCanvas | HTMLImageElement | HTMLVideoElement, output: AnyCanvas) => draw.canvas(input, output), face: (output: AnyCanvas, result: FaceResult[], options?: Partial) => draw.face(output, result, options), body: (output: AnyCanvas, result: BodyResult[], options?: Partial) => draw.body(output, result, options), hand: (output: AnyCanvas, result: HandResult[], options?: Partial) => draw.hand(output, result, options), gesture: (output: AnyCanvas, result: GestureResult[], options?: Partial) => draw.gesture(output, result, options), object: (output: AnyCanvas, result: ObjectResult[], options?: Partial) => draw.object(output, result, options), person: (output: AnyCanvas, result: PersonResult[], options?: Partial) => draw.person(output, result, options), all: (output: AnyCanvas, result: Result, options?: Partial) => draw.all(output, result, options), }; this.result = { face: [], body: [], hand: [], gesture: [], object: [], performance: {}, timestamp: 0, persons: [], error: null }; // export access to image processing // @ts-ignore eslint-typescript cannot correctly infer type in anonymous function this.process = { tensor: null, canvas: null }; // export raw access to underlying models this.faceTriangulation = facemesh.triangulation; this.faceUVMap = facemesh.uvmap; // set gl info this.gl = humangl.config; // include platform info this.emit('create'); } /** internal function to measure tensor leaks */ analyze = (...msg: string[]) => { if (!this.#analyzeMemoryLeaks) return; const currentTensors = this.tf.engine().state.numTensors; const previousTensors = this.#numTensors; this.#numTensors = currentTensors; const leaked = currentTensors - previousTensors; if (leaked !== 0) log(...msg, leaked); }; /** internal function for quick sanity check on inputs @hidden */ #sanity = (input: Input): null | string => { if (!this.#checkSanity) return null; if (!input) return 'input is not defined'; if (this.env.node && !(input instanceof tf.Tensor)) return 'input must be a tensor'; try { this.tf.getBackend(); } catch { return 'backend not loaded'; } return null; }; /** Reset configuration to default values */ reset(): void { const currentBackend = this.config.backend; // save backend; this.config = JSON.parse(JSON.stringify(defaults)); this.config.backend = currentBackend; } /** Validate current configuration schema */ validate(userConfig?: Partial) { return validate(defaults, userConfig || this.config); } /** Exports face matching methods {@link match#similarity} */ public similarity = match.similarity; /** Exports face matching methods {@link match#distance} */ public distance = match.distance; /** Exports face matching methods {@link match#match} */ public match = match.match; /** Utility wrapper for performance.now() */ now(): number { return now(); } /** Process input as return canvas and tensor * * @param input - any input {@link Input} * @param getTensor - should image processing also return tensor or just canvas * Returns object with `tensor` and `canvas` */ image(input: Input, getTensor: boolean = true) { return image.process(input, this.config, getTensor); } /** Segmentation method takes any input and returns processed canvas with body segmentation * - Segmentation is not triggered as part of detect process * @param input - {@link Input} * @param background - {@link Input} * - Optional parameter background is used to fill the background with specific input * Returns: * - `data` as raw data array with per-pixel segmentation values * - `canvas` as canvas which is input image filtered with segementation data and optionally merged with background image. canvas alpha values are set to segmentation values for easy merging * - `alpha` as grayscale canvas that represents segmentation alpha values */ async segmentation(input: Input, background?: Input): Promise<{ data: number[] | Tensor, canvas: AnyCanvas | null, alpha: AnyCanvas | null }> { return segmentation.process(input, background, this.config); } /** Enhance method performs additional enhacements to face image previously detected for futher processing * * @param input - Tensor as provided in human.result.face[n].tensor * @returns Tensor */ // eslint-disable-next-line class-methods-use-this enhance(input: Tensor): Tensor | null { return faceres.enhance(input); } /** Compare two input tensors for pixel simmilarity * - use `human.image` to process any valid input and get a tensor that can be used for compare * - when passing manually generated tensors: * - both input tensors must be in format [1, height, width, 3] * - if resolution of tensors does not match, second tensor will be resized to match resolution of the first tensor * - return value is pixel similarity score normalized by input resolution and rgb channels */ compare(firstImageTensor: Tensor, secondImageTensor: Tensor): Promise { return image.compare(this.config, firstImageTensor, secondImageTensor); } /** Explicit backend initialization * - Normally done implicitly during initial load phase * - Call to explictly register and initialize TFJS backend without any other operations * - Use when changing backend during runtime */ async init(): Promise { await backend.check(this, true); await this.tf.ready(); } /** Load method preloads all configured models on-demand * - Not explicitly required as any required model is load implicitly on it's first run * * @param userConfig - {@link Config} */ async load(userConfig?: Partial): Promise { this.state = 'load'; const timeStamp = now(); const count = Object.values(this.models).filter((model) => model).length; if (userConfig) this.config = mergeDeep(this.config, userConfig) as Config; if (this.env.initial) { // print version info on first run and check for correct backend setup if (this.config.debug) log(`version: ${this.version}`); if (this.config.debug) log(`tfjs version: ${this.tf.version['tfjs-core']}`); if (!await backend.check(this)) log('error: backend check failed'); await tf.ready(); if (this.env.browser) { if (this.config.debug) log('configuration:', this.config); if (this.config.debug) log('environment:', this.env); if (this.config.debug) log('tf flags:', this.tf.ENV['flags']); } } await models.load(this); // actually loads models if (this.env.initial && this.config.debug) log('tf engine state:', this.tf.engine().state.numBytes, 'bytes', this.tf.engine().state.numTensors, 'tensors'); // print memory stats on first run this.env.initial = false; const loaded = Object.values(this.models).filter((model) => model).length; if (loaded !== count) { // number of loaded models changed await models.validate(this); // validate kernel ops used by model against current backend this.emit('load'); } const current = Math.trunc(now() - timeStamp); if (current > (this.performance.loadModels as number || 0)) this.performance.loadModels = this.env.perfadd ? (this.performance.loadModels || 0) + current : current; } /** emit event */ emit = (event: string) => { if (this.events && this.events.dispatchEvent) this.events?.dispatchEvent(new Event(event)); }; /** Runs interpolation using last known result and returns smoothened result * Interpolation is based on time since last known result so can be called independently * * @param result - {@link Result} optional use specific result set to run interpolation on * @returns result - {@link Result} */ next(result: Result = this.result): Result { return interpolate.calc(result, this.config) as Result; } /** Warmup method pre-initializes all configured models for faster inference * - can take significant time on startup * - only used for `webgl` and `humangl` backends * @param userConfig - {@link Config} * @returns result - {@link Result} */ async warmup(userConfig?: Partial) { const t0 = now(); const res = await warmups.warmup(this, userConfig); const t1 = now(); this.performance.warmup = Math.trunc(t1 - t0); return res; } /** Run detect with tensorflow profiling * - result object will contain total exeuction time information for top-20 kernels * - actual detection object can be accessed via `human.result` */ async profile(input: Input, userConfig?: Partial): Promise> { const profile = await this.tf.profile(() => this.detect(input, userConfig)); const kernels: Record = {}; for (const kernel of profile.kernels) { // sum kernel time values per kernel if (kernels[kernel.name]) kernels[kernel.name] += kernel.kernelTimeMs; else kernels[kernel.name] = kernel.kernelTimeMs; } const kernelArr: Array<{ name: string, ms: number }> = []; Object.entries(kernels).forEach((key) => kernelArr.push({ name: key[0], ms: key[1] as unknown as number })); // convert to array kernelArr.sort((a, b) => b.ms - a.ms); // sort kernelArr.length = 20; // crop const res: Record = {}; for (const kernel of kernelArr) res[kernel.name] = kernel.ms; // create perf objects return res; } /** Main detection method * - Analyze configuration: {@link Config} * - Pre-process input: {@link Input} * - Run inference for all configured models * - Process and return result: {@link Result} * * @param input - {@link Input} * @param userConfig - {@link Config} * @returns result - {@link Result} */ async detect(input: Input, userConfig?: Partial): Promise { // detection happens inside a promise this.state = 'detect'; return new Promise(async (resolve) => { this.state = 'config'; let timeStamp; // update configuration this.config = mergeDeep(this.config, userConfig) as Config; // sanity checks this.state = 'check'; const error = this.#sanity(input); if (error) { log(error, input); this.emit('error'); resolve({ face: [], body: [], hand: [], gesture: [], object: [], performance: this.performance, timestamp: now(), persons: [], error }); } const timeStart = now(); // configure backend if needed await backend.check(this); // load models if enabled await this.load(); timeStamp = now(); this.state = 'image'; const img = await image.process(input, this.config) as { canvas: AnyCanvas, tensor: Tensor }; this.process = img; this.performance.inputProcess = this.env.perfadd ? (this.performance.inputProcess || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); this.analyze('Get Image:'); if (!img.tensor) { if (this.config.debug) log('could not convert input to tensor'); this.emit('error'); resolve({ face: [], body: [], hand: [], gesture: [], object: [], performance: this.performance, timestamp: now(), persons: [], error: 'could not convert input to tensor' }); return; } this.emit('image'); timeStamp = now(); this.config.skipAllowed = await image.skip(this.config, img.tensor); if (!this.performance.totalFrames) this.performance.totalFrames = 0; if (!this.performance.cachedFrames) this.performance.cachedFrames = 0; (this.performance.totalFrames as number)++; if (this.config.skipAllowed) this.performance.cachedFrames++; this.performance.cacheCheck = this.env.perfadd ? (this.performance.cacheCheck || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); this.analyze('Check Changed:'); // prepare where to store model results // keep them with weak typing as it can be promise or not let faceRes: FaceResult[] | Promise | never[] = []; let bodyRes: BodyResult[] | Promise | never[] = []; let handRes: HandResult[] | Promise | never[] = []; let objectRes: ObjectResult[] | Promise | never[] = []; // run face detection followed by all models that rely on face bounding box: face mesh, age, gender, emotion this.state = 'detect:face'; if (this.config.async) { faceRes = this.config.face.enabled ? face.detectFace(this, img.tensor) : []; if (this.performance.face) delete this.performance.face; } else { timeStamp = now(); faceRes = this.config.face.enabled ? await face.detectFace(this, img.tensor) : []; this.performance.face = this.env.perfadd ? (this.performance.face || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); } if (this.config.async && (this.config.body.maxDetected === -1 || this.config.hand.maxDetected === -1)) faceRes = await faceRes; // need face result for auto-detect number of hands or bodies // run body: can be posenet, blazepose, efficientpose, movenet this.analyze('Start Body:'); this.state = 'detect:body'; const bodyConfig = this.config.body.maxDetected === -1 ? mergeDeep(this.config, { body: { maxDetected: this.config.face.enabled ? 1 * (faceRes as FaceResult[]).length : 1 } }) : this.config; // autodetect number of bodies if (this.config.async) { if (this.config.body.modelPath?.includes('posenet')) bodyRes = this.config.body.enabled ? posenet.predict(img.tensor, bodyConfig) : []; else if (this.config.body.modelPath?.includes('blazepose')) bodyRes = this.config.body.enabled ? blazepose.predict(img.tensor, bodyConfig) : []; else if (this.config.body.modelPath?.includes('efficientpose')) bodyRes = this.config.body.enabled ? efficientpose.predict(img.tensor, bodyConfig) : []; else if (this.config.body.modelPath?.includes('movenet')) bodyRes = this.config.body.enabled ? movenet.predict(img.tensor, bodyConfig) : []; if (this.performance.body) delete this.performance.body; } else { timeStamp = now(); if (this.config.body.modelPath?.includes('posenet')) bodyRes = this.config.body.enabled ? await posenet.predict(img.tensor, bodyConfig) : []; else if (this.config.body.modelPath?.includes('blazepose')) bodyRes = this.config.body.enabled ? await blazepose.predict(img.tensor, bodyConfig) : []; else if (this.config.body.modelPath?.includes('efficientpose')) bodyRes = this.config.body.enabled ? await efficientpose.predict(img.tensor, bodyConfig) : []; else if (this.config.body.modelPath?.includes('movenet')) bodyRes = this.config.body.enabled ? await movenet.predict(img.tensor, bodyConfig) : []; this.performance.body = this.env.perfadd ? (this.performance.body || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); } this.analyze('End Body:'); // run handpose this.analyze('Start Hand:'); this.state = 'detect:hand'; const handConfig = this.config.hand.maxDetected === -1 ? mergeDeep(this.config, { hand: { maxDetected: this.config.face.enabled ? 2 * (faceRes as FaceResult[]).length : 1 } }) : this.config; // autodetect number of hands if (this.config.async) { if (this.config.hand.detector?.modelPath?.includes('handdetect')) handRes = this.config.hand.enabled ? handpose.predict(img.tensor, handConfig) : []; else if (this.config.hand.detector?.modelPath?.includes('handtrack')) handRes = this.config.hand.enabled ? handtrack.predict(img.tensor, handConfig) : []; if (this.performance.hand) delete this.performance.hand; } else { timeStamp = now(); if (this.config.hand.detector?.modelPath?.includes('handdetect')) handRes = this.config.hand.enabled ? await handpose.predict(img.tensor, handConfig) : []; else if (this.config.hand.detector?.modelPath?.includes('handtrack')) handRes = this.config.hand.enabled ? await handtrack.predict(img.tensor, handConfig) : []; this.performance.hand = this.env.perfadd ? (this.performance.hand || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); } this.analyze('End Hand:'); // run object detection this.analyze('Start Object:'); this.state = 'detect:object'; if (this.config.async) { if (this.config.object.modelPath?.includes('nanodet')) objectRes = this.config.object.enabled ? nanodet.predict(img.tensor, this.config) : []; else if (this.config.object.modelPath?.includes('centernet')) objectRes = this.config.object.enabled ? centernet.predict(img.tensor, this.config) : []; if (this.performance.object) delete this.performance.object; } else { timeStamp = now(); if (this.config.object.modelPath?.includes('nanodet')) objectRes = this.config.object.enabled ? await nanodet.predict(img.tensor, this.config) : []; else if (this.config.object.modelPath?.includes('centernet')) objectRes = this.config.object.enabled ? await centernet.predict(img.tensor, this.config) : []; this.performance.object = this.env.perfadd ? (this.performance.object || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); } this.analyze('End Object:'); // if async wait for results this.state = 'detect:await'; if (this.config.async) [faceRes, bodyRes, handRes, objectRes] = await Promise.all([faceRes, bodyRes, handRes, objectRes]); // run gesture analysis last this.state = 'detect:gesture'; let gestureRes: GestureResult[] = []; if (this.config.gesture.enabled) { timeStamp = now(); gestureRes = [...gesture.face(faceRes as FaceResult[]), ...gesture.body(bodyRes as BodyResult[]), ...gesture.hand(handRes as HandResult[]), ...gesture.iris(faceRes as FaceResult[])]; if (!this.config.async) this.performance.gesture = this.env.perfadd ? (this.performance.gesture || 0) + Math.trunc(now() - timeStamp) : Math.trunc(now() - timeStamp); else if (this.performance.gesture) delete this.performance.gesture; } this.performance.total = this.env.perfadd ? (this.performance.total || 0) + Math.trunc(now() - timeStart) : Math.trunc(now() - timeStart); const shape = this.process?.tensor?.shape || []; this.result = { face: faceRes as FaceResult[], body: bodyRes as BodyResult[], hand: handRes as HandResult[], gesture: gestureRes, object: objectRes as ObjectResult[], performance: this.performance, canvas: this.process.canvas, timestamp: Date.now(), error: null, get persons() { return persons.join(faceRes as FaceResult[], bodyRes as BodyResult[], handRes as HandResult[], gestureRes, shape); }, }; // finally dispose input tensor tf.dispose(img.tensor); // log('Result:', result); this.emit('detect'); this.state = 'idle'; resolve(this.result); }); } } /** Class Human as default export */ /* eslint no-restricted-exports: ["off", { "restrictedNamedExports": ["default"] }] */ export { Human as default };