diff --git a/config.js b/config.js index f5572fcd..2600ef03 100644 --- a/config.js +++ b/config.js @@ -66,6 +66,7 @@ export default { // such as front-facing camera and // 'back' is optimized for distanct faces. inputSize: 256, // fixed value: 128 for front and 256 for 'back' + rotation: false, // use best-guess rotated face image or just box with rotation as-is maxFaces: 10, // maximum number of faces detected in the input // should be set to the minimum number for performance skipFrames: 15, // how many frames to go without re-running the face bounding box detector @@ -118,6 +119,12 @@ export default { skipFrames: 15, // how many frames to go without re-running the detector modelPath: '../models/emotion-large.json', // can be 'mini', 'large' }, + + embedding: { + enabled: false, + inputSize: 112, // fixed value + modelPath: '../models/mobilefacenet.json', + }, }, body: { diff --git a/demo/browser.js b/demo/browser.js index 2c452421..5b821f8d 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -22,6 +22,7 @@ const ui = { useWorker: false, worker: 'demo/worker.js', samples: ['../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg'], + compare: '../assets/sample-me.jpg', drawBoxes: true, drawPoints: false, drawPolygons: true, @@ -48,6 +49,7 @@ let menu; let menuFX; let worker; let bench; +let sample; let lastDetectedResult = {}; // helper function: translates json to human readable string @@ -72,6 +74,16 @@ const status = (msg) => { document.getElementById('status').innerText = msg; }; +async function calcSimmilariry(faces) { + if (!faces || !faces[0] || (faces[0].embedding?.length !== 192)) return; + const current = faces[0].embedding; + const original = (sample && sample.face && sample.face[0] && sample.face[0].embedding) ? sample.face[0].embedding : null; + if (original && original.length === 192) { + const simmilarity = human.simmilarity(current, original); + document.getElementById('simmilarity').innerText = `simmilarity: ${Math.trunc(1000 * simmilarity) / 10}%`; + } +} + // draws processed results and starts processing of a next frame async function drawResults(input) { const result = lastDetectedResult; @@ -79,7 +91,7 @@ async function drawResults(input) { // update fps data // const elapsed = performance.now() - timeStamp; - ui.fps.push(1000 / result.performance.total); + if (result.performance && result.performance.total) ui.fps.push(1000 / result.performance.total); if (ui.fps.length > ui.maxFPSframes) ui.fps.shift(); // enable for continous performance monitoring @@ -89,7 +101,7 @@ async function drawResults(input) { await menu.updateChart('FPS', ui.fps); // get updated canvas - result.canvas = await human.image(input, userConfig); + if (ui.buffered || !result.canvas) result.canvas = await human.image(input, userConfig); // draw image from video const ctx = canvas.getContext('2d'); @@ -102,17 +114,20 @@ async function drawResults(input) { } else { ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height); } + // draw all results await draw.face(result.face, canvas, ui, human.facemesh.triangulation); await draw.body(result.body, canvas, ui); await draw.hand(result.hand, canvas, ui); await draw.gesture(result.gesture, canvas, ui); + await calcSimmilariry(result.face); + // update log const engine = human.tf.engine(); const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : ''; const memory = `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`; const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : ''; - const avg = Math.trunc(10 * ui.fps.reduce((a, b) => a + b) / ui.fps.length) / 10; + const avg = Math.trunc(10 * ui.fps.reduce((a, b) => a + b, 0) / ui.fps.length) / 10; const warning = (ui.fps.length > 5) && (avg < 5) ? 'warning: your performance is low: try switching to higher performance backend, lowering resolution or disabling some models' : ''; document.getElementById('log').innerHTML = ` video: ${ui.camera.name} | facing: ${ui.camera.facing} | resolution: ${ui.camera.width} x ${ui.camera.height} ${processing}
@@ -277,7 +292,8 @@ async function processImage(input) { canvas.width = human.config.filter.width && human.config.filter.width > 0 ? human.config.filter.width : image.naturalWidth; canvas.height = human.config.filter.height && human.config.filter.height > 0 ? human.config.filter.height : image.naturalHeight; const result = await human.detect(image, userConfig); - drawResults(image, result, canvas); + lastDetectedResult = result; + await drawResults(image); const thumb = document.createElement('canvas'); thumb.className = 'thumbnail'; thumb.width = window.innerWidth / (ui.columns + 0.1); @@ -325,11 +341,12 @@ async function detectSampleImages() { log('Running detection of sample images'); status('processing images'); document.getElementById('samples-container').innerHTML = ''; - for (const sample of ui.samples) await processImage(sample); + for (const image of ui.samples) await processImage(image); status(''); } function setupMenu() { + document.getElementById('compare-container').style.display = human.config.face.embedding.enabled ? 'block' : 'none'; menu = new Menu(document.body, '', { top: '1rem', right: '1rem' }); const btn = menu.addButton('start video', 'pause video', () => detectVideo()); menu.addButton('process images', 'process images', () => detectSampleImages()); @@ -449,7 +466,7 @@ async function main() { // this is not required, just pre-warms all models for faster initial inference if (ui.modelsWarmup) { status('initializing'); - await human.warmup(userConfig); + sample = await human.warmup(userConfig, document.getElementById('sample-image')); } status('human: ready'); document.getElementById('loader').style.display = 'none'; diff --git a/demo/index.html b/demo/index.html index 5cff8d82..633de390 100644 --- a/demo/index.html +++ b/demo/index.html @@ -34,6 +34,7 @@ .video { display: none; } .canvas { margin: 0 auto; } .bench { position: absolute; right: 0; bottom: 0; } + .compare-image { width: 10vw; position: absolute; top: 150px; left: 30px; box-shadow: 0 0 2px 2px black; background: black; } .loader { width: 300px; height: 300px; border: 3px solid transparent; border-radius: 50%; border-top: 4px solid #f15e41; animation: spin 4s linear infinite; position: absolute; top: 30%; left: 50%; margin-left: -150px; z-index: 15; } .loader::before, .loader::after { content: ""; position: absolute; top: 6px; bottom: 6px; left: 6px; right: 6px; border-radius: 50%; border: 4px solid transparent; } .loader::before { border-top-color: #bad375; animation: 3s spin linear infinite; } @@ -70,6 +71,10 @@ +
diff --git a/src/age/age.js b/src/age/age.js index 2e82c741..ce8d4277 100644 --- a/src/age/age.js +++ b/src/age/age.js @@ -5,9 +5,6 @@ const models = {}; let last = { age: 0 }; let frame = Number.MAX_SAFE_INTEGER; -// tuning values -const zoom = [0, 0]; // 0..1 meaning 0%..100% - async function load(config) { if (!models.age) { models.age = await loadGraphModel(config.face.age.modelPath); @@ -18,12 +15,15 @@ async function load(config) { } async function predict(image, config) { + if (!models.age) return null; if ((frame < config.face.age.skipFrames) && last.age && (last.age > 0)) { frame += 1; return last; } frame = 0; return new Promise(async (resolve) => { + /* + const zoom = [0, 0]; // 0..1 meaning 0%..100% const box = [[ (image.shape[1] * zoom[0]) / image.shape[1], (image.shape[2] * zoom[1]) / image.shape[2], @@ -31,7 +31,8 @@ async function predict(image, config) { (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2], ]]; const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]); - // const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false); + */ + const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false); const enhance = tf.mul(resize, [255.0]); tf.dispose(resize); diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js index f1c60c7a..add41718 100644 --- a/src/emotion/emotion.js +++ b/src/emotion/emotion.js @@ -7,7 +7,6 @@ let last = []; let frame = Number.MAX_SAFE_INTEGER; // tuning values -const zoom = [0, 0]; // 0..1 meaning 0%..100% const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale const scale = 1; // score multiplication factor @@ -21,12 +20,15 @@ async function load(config) { } async function predict(image, config) { + if (!models.emotion) return null; if ((frame < config.face.emotion.skipFrames) && (last.length > 0)) { frame += 1; return last; } frame = 0; return new Promise(async (resolve) => { + /* + const zoom = [0, 0]; // 0..1 meaning 0%..100% const box = [[ (image.shape[1] * zoom[0]) / image.shape[1], (image.shape[2] * zoom[1]) / image.shape[2], @@ -34,7 +36,8 @@ async function predict(image, config) { (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2], ]]; const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]); - // const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); + */ + const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); const [red, green, blue] = tf.split(resize, 3, 3); resize.dispose(); // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html diff --git a/src/gender/gender.js b/src/gender/gender.js index 7afc7f30..aeef72ca 100644 --- a/src/gender/gender.js +++ b/src/gender/gender.js @@ -7,7 +7,6 @@ let frame = Number.MAX_SAFE_INTEGER; let alternative = false; // tuning values -const zoom = [0, 0]; // 0..1 meaning 0%..100% const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale async function load(config) { @@ -21,12 +20,15 @@ async function load(config) { } async function predict(image, config) { + if (!models.gender) return null; if ((frame < config.face.gender.skipFrames) && last.gender !== '') { frame += 1; return last; } frame = 0; return new Promise(async (resolve) => { + /* + const zoom = [0, 0]; // 0..1 meaning 0%..100% const box = [[ (image.shape[1] * zoom[0]) / image.shape[1], (image.shape[2] * zoom[1]) / image.shape[2], @@ -34,6 +36,8 @@ async function predict(image, config) { (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2], ]]; const resize = tf.image.cropAndResize(image, box, [0], [config.face.gender.inputSize, config.face.gender.inputSize]); + */ + const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false); let enhance; if (alternative) { enhance = tf.tidy(() => { diff --git a/src/human.js b/src/human.js index 578d1a2a..390c3d02 100644 --- a/src/human.js +++ b/src/human.js @@ -3,6 +3,7 @@ import * as facemesh from './face/facemesh.js'; import * as age from './age/age.js'; import * as gender from './gender/gender.js'; import * as emotion from './emotion/emotion.js'; +import * as embedding from './embedding/embedding.js'; import * as posenet from './body/posenet.js'; import * as handpose from './hand/handpose.js'; import * as gesture from './gesture.js'; @@ -108,6 +109,11 @@ class Human { return null; } + simmilarity(embedding1, embedding2) { + if (this.config.face.embedding.enabled) return embedding.simmilarity(embedding1, embedding2); + return 0; + } + // preload models, not explicitly required as it's done automatically on first use async load(userConfig) { this.state = 'load'; @@ -127,6 +133,7 @@ class Human { this.models.age, this.models.gender, this.models.emotion, + this.models.embedding, this.models.posenet, this.models.handpose, ] = await Promise.all([ @@ -134,6 +141,7 @@ class Human { this.models.age || ((this.config.face.enabled && this.config.face.age.enabled) ? age.load(this.config) : null), this.models.gender || ((this.config.face.enabled && this.config.face.gender.enabled) ? gender.load(this.config) : null), this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null), + this.models.embedding || ((this.config.face.enabled && this.config.face.embedding.enabled) ? embedding.load(this.config) : null), this.models.posenet || (this.config.body.enabled ? posenet.load(this.config) : null), this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config.hand) : null), ]); @@ -142,6 +150,7 @@ class Human { if (this.config.face.enabled && this.config.face.age.enabled && !this.models.age) this.models.age = await age.load(this.config); if (this.config.face.enabled && this.config.face.gender.enabled && !this.models.gender) this.models.gender = await gender.load(this.config); if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config); + if (this.config.face.enabled && this.config.face.embedding.enabled && !this.models.embedding) this.models.embedding = await embedding.load(this.config); if (this.config.body.enabled && !this.models.posenet) this.models.posenet = await posenet.load(this.config); if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config.hand); } @@ -199,6 +208,7 @@ class Human { let ageRes; let genderRes; let emotionRes; + let embeddingRes; const faceRes = []; this.state = 'run:face'; timeStamp = now(); @@ -206,11 +216,13 @@ class Human { this.perf.face = Math.trunc(now() - timeStamp); for (const face of faces) { this.analyze('Get Face'); + // is something went wrong, skip the face if (!face.image || face.image.isDisposedInternal) { this.log('Face object is disposed:', face.image); continue; } + // run age, inherits face from blazeface this.analyze('Start Age:'); if (this.config.async) { @@ -232,6 +244,7 @@ class Human { genderRes = this.config.face.gender.enabled ? await gender.predict(face.image, this.config) : {}; this.perf.gender = Math.trunc(now() - timeStamp); } + // run emotion, inherits face from blazeface this.analyze('Start Emotion:'); if (this.config.async) { @@ -244,9 +257,21 @@ class Human { } this.analyze('End Emotion:'); + // run emotion, inherits face from blazeface + this.analyze('Start Embedding:'); + if (this.config.async) { + embeddingRes = this.config.face.embedding.enabled ? embedding.predict(face.image, this.config) : {}; + } else { + this.state = 'run:embedding'; + timeStamp = now(); + embeddingRes = this.config.face.embedding.enabled ? await embedding.predict(face.image, this.config) : {}; + this.perf.embedding = Math.trunc(now() - timeStamp); + } + this.analyze('End Emotion:'); + // if async wait for results if (this.config.async) { - [ageRes, genderRes, emotionRes] = await Promise.all([ageRes, genderRes, emotionRes]); + [ageRes, genderRes, emotionRes, embeddingRes] = await Promise.all([ageRes, genderRes, emotionRes, embeddingRes]); } this.analyze('Finish Face:'); @@ -270,6 +295,7 @@ class Human { gender: genderRes.gender, genderConfidence: genderRes.confidence, emotion: emotionRes, + embedding: embeddingRes, iris: (irisSize !== 0) ? Math.trunc(irisSize) / 100 : 0, }); this.analyze('End Face'); @@ -294,23 +320,23 @@ class Human { // main detect function async detect(input, userConfig = {}) { - this.state = 'config'; - let timeStamp; - - // update configuration - this.config = mergeDeep(this.config, userConfig); - if (!this.config.videoOptimized) this.config = mergeDeep(this.config, disableSkipFrames); - - // sanity checks - this.state = 'check'; - const error = this.sanity(input); - if (error) { - this.log(error, input); - return { error }; - } - // detection happens inside a promise return new Promise(async (resolve) => { + this.state = 'config'; + let timeStamp; + + // update configuration + this.config = mergeDeep(this.config, userConfig); + if (!this.config.videoOptimized) this.config = mergeDeep(this.config, disableSkipFrames); + + // sanity checks + this.state = 'check'; + const error = this.sanity(input); + if (error) { + this.log(error, input); + resolve({ error }); + } + let poseRes; let handRes; let faceRes; @@ -391,10 +417,11 @@ class Human { }); } - async warmup(userConfig) { - const warmup = new ImageData(255, 255); - await this.detect(warmup, userConfig); + async warmup(userConfig, sample) { + if (!sample) sample = new ImageData(255, 255); + const warmup = await this.detect(sample, userConfig); this.log('warmed up'); + return warmup; } }