add rvm segmentation model

pull/356/head
Vladimir Mandic 2022-10-02 15:09:00 -04:00
parent 48df1b13f0
commit 4fddd86f3f
27 changed files with 4585 additions and 1426 deletions

22
TODO.md
View File

@ -20,25 +20,33 @@ N/A
<hr><br>
## Known Issues
## Known Issues & Limitations
### Face with Attention
`FaceMesh-Attention` is not supported when using `WASM` backend due to missing kernel op in **TFJS**
No issues with default model `FaceMesh`
### Object Detection
`NanoDet` model is not supported when using `WASM` backend due to missing kernel op in **TFJS**
No issues with default model `MB3-CenterNet`
### WebGPU
Experimental support only until support is officially added in Chromium
Enable via <chrome://flags/#enable-unsafe-webgpu>
Enable via <chrome://flags/#enable-unsafe-webgpu>
### Firefox
Running in **web workers** requires `OffscreenCanvas` which is still disabled by default in **Firefox**
Enable via `about:config` -> `gfx.offscreencanvas.enabled`
Enable via `about:config` -> `gfx.offscreencanvas.enabled`
[Details](https://developer.mozilla.org/en-US/docs/Web/API/OffscreenCanvas#browser_compatibility)
### Safari
No support for running in **web workers** as Safari still does not support `OffscreenCanvas`
[Details](https://developer.mozilla.org/en-US/docs/Web/API/OffscreenCanvas#browser_compatibility)
<hr><br>
@ -49,7 +57,13 @@ Enable via `about:config` -> `gfx.offscreencanvas.enabled`
- New method [`human.video()`](https://vladmandic.github.io/human/typedoc/classes/Human.html#video)
Runs continous detection of an input **video**
instead of processing each frame manually using `human.detect()`
- New simple demo [*Live*](https://vladmandic.github.io/human/demo/video/index.html) | [*Code*](https://github.com/vladmandic/human/blob/main/demo/video/index.html)
- New demo for **webcam** and **video** methods [*Live*](https://vladmandic.github.io/human/demo/video/index.html) | [*Code*](https://github.com/vladmandic/human/blob/main/demo/video/index.html)
*Full HTML and JavaScript code in less than a screen*
- Redesigned [`human.segmentation`](https://vladmandic.github.io/human/typedoc/classes/Human.html#segmentation)
*Breaking changes*
- New model `rvm` for high-quality body segmentation in real-time
*Not part of default deployment, download from [human-models](https://github.com/vladmandic/human-models/tree/main/models)*
- New demo for **segmentation** methods [*Live*](https://vladmandic.github.io/human/demo/segmentation/index.html) | [*Code*](https://github.com/vladmandic/human/blob/main/demo/segmentation/index.html)
*Full HTML and JavaScript code in less than a screen*
- New advanced demo using **BabylonJS and VRM** [*Live*](https://vladmandic.github.io/human-bjs-vrm) | [*Code*](https://github.com/vladmandic/human-bjs-vrm)
- Update **TypeDoc** generation [*Link*](https://vladmandic.github.io/human/typedoc)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -110,10 +110,6 @@
<canvas id="compare-canvas" width="200" height="200"></canvas>
<div id="similarity"></div>
</div>
<div id="segmentation-container" class="compare-image">
<canvas id="segmentation-mask" width="256" height="256" style="width: 256px; height: 256px;"></canvas>
<canvas id="segmentation-canvas" width="256" height="256" style="width: 256px; height: 256px;"></canvas>
</div>
<div id="samples-container" class="samples-container"></div>
<div id="hint" class="hint"></div>
<div id="log" class="log"></div>

View File

@ -111,7 +111,6 @@ const ui = {
results: false, // show results tree
lastFrame: 0, // time of last frame processing
viewportSet: false, // internal, has custom viewport been set
background: null, // holds instance of segmentation background image
transferCanvas: null, // canvas used to transfer data to and from worker
// webrtc
@ -263,21 +262,7 @@ async function drawResults(input) {
// draw fps chart
await menu.process.updateChart('FPS', ui.detectFPS);
document.getElementById('segmentation-container').style.display = userConfig.segmentation.enabled ? 'block' : 'none';
if (userConfig.segmentation.enabled && ui.buffered) { // refresh segmentation if using buffered output
const seg = await human.segmentation(input, ui.background);
if (seg.alpha) {
const canvasSegMask = document.getElementById('segmentation-mask');
const ctxSegMask = canvasSegMask.getContext('2d');
ctxSegMask.clearRect(0, 0, canvasSegMask.width, canvasSegMask.height); // need to clear as seg.alpha is alpha based canvas so it adds
ctxSegMask.drawImage(seg.alpha, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, canvasSegMask.width, canvasSegMask.height);
const canvasSegCanvas = document.getElementById('segmentation-canvas');
const ctxSegCanvas = canvasSegCanvas.getContext('2d');
ctxSegCanvas.clearRect(0, 0, canvasSegCanvas.width, canvasSegCanvas.height); // need to clear as seg.alpha is alpha based canvas so it adds
ctxSegCanvas.drawImage(seg.canvas, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, canvasSegCanvas.width, canvasSegCanvas.height);
}
// result.canvas = seg.alpha;
} else if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas
if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas
const image = await human.image(input, false);
result.canvas = image.canvas;
human.tf.dispose(image.tensor);
@ -743,7 +728,6 @@ function setupMenu() {
menu.image.addBool('technicolor', userConfig.filter, 'technicolor', (val) => userConfig.filter.technicolor = val);
menu.image.addBool('polaroid', userConfig.filter, 'polaroid', (val) => userConfig.filter.polaroid = val);
menu.image.addHTML('<input type="file" id="file-input" class="input-file"></input> &nbsp input');
menu.image.addHTML('<input type="file" id="file-background" class="input-file"></input> &nbsp background');
menu.process = new Menu(document.body, '', { top, left: x[2] });
menu.process.addList('backend', ['cpu', 'webgl', 'wasm', 'humangl'], userConfig.backend, (val) => userConfig.backend = val);
@ -791,8 +775,6 @@ function setupMenu() {
menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
menu.models.addBool('gestures', userConfig.gesture, 'enabled', (val) => userConfig.gesture.enabled = val);
menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
menu.models.addBool('body segmentation', userConfig.segmentation, 'enabled', (val) => userConfig.segmentation.enabled = val);
menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
menu.models.addBool('object detection', userConfig.object, 'enabled', (val) => userConfig.object.enabled = val);
menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
menu.models.addBool('face compare', compare, 'enabled', (val) => {
@ -860,42 +842,12 @@ async function processDataURL(f, action) {
if (e.target.result.startsWith('data:video')) await processVideo(e.target.result, f.name);
document.getElementById('canvas').style.display = 'none';
}
if (action === 'background') {
const image = new Image();
image.onerror = async () => status('image loading error');
image.onload = async () => {
ui.background = image;
if (document.getElementById('canvas').style.display === 'block') { // replace canvas used for video
const canvas = document.getElementById('canvas');
const ctx = canvas.getContext('2d');
const seg = await human.segmentation(canvas, ui.background, userConfig);
if (seg.canvas) ctx.drawImage(seg.canvas, 0, 0);
} else {
const canvases = document.getElementById('samples-container').children; // replace loaded images
for (const canvas of canvases) {
const ctx = canvas.getContext('2d');
const seg = await human.segmentation(canvas, ui.background, userConfig);
if (seg.canvas) ctx.drawImage(seg.canvas, 0, 0);
}
}
};
image.src = e.target.result;
}
resolve(true);
};
reader.readAsDataURL(f);
});
}
async function runSegmentation() {
document.getElementById('file-background').onchange = async (evt) => {
userConfig.segmentation.enabled = true;
evt.preventDefault();
if (evt.target.files.length < 2) ui.columns = 1;
for (const f of evt.target.files) await processDataURL(f, 'background');
};
}
async function dragAndDrop() {
document.body.addEventListener('dragenter', (evt) => evt.preventDefault());
document.body.addEventListener('dragleave', (evt) => evt.preventDefault());
@ -1071,9 +1023,6 @@ async function main() {
// init drag & drop
await dragAndDrop();
// init segmentation
await runSegmentation();
if (params.has('image')) {
try {
const image = JSON.parse(params.get('image'));

View File

@ -54,9 +54,6 @@ async function main() {
// run detection
const result = await human.detect(imageData);
// run segmentation
// const seg = await human.segmentation(inputCanvas);
// log.data('Segmentation:', { data: seg.data.length, alpha: typeof seg.alpha, canvas: typeof seg.canvas });
// print results summary
const persons = result.persons; // invoke persons getter, only used to print summary on console

View File

@ -0,0 +1,61 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Human Demo</title>
<meta name="viewport" content="width=device-width, shrink-to-fit=yes">
<meta name="mobile-web-app-capable" content="yes">
<meta name="application-name" content="Human Demo">
<meta name="keywords" content="Human Demo">
<meta name="description" content="Human Demo; Author: Vladimir Mandic <mandic00@live.com>">
<link rel="manifest" href="../manifest.webmanifest">
<link rel="shortcut icon" href="../favicon.ico" type="image/x-icon">
<link rel="icon" sizes="256x256" href="../assets/icons/dash-256.png">
<link rel="apple-touch-icon" href="../assets/icons/dash-256.png">
<link rel="apple-touch-startup-image" href="../assets/icons/dash-256.png">
<style>
@font-face { font-family: 'CenturyGothic'; font-display: swap; font-style: normal; font-weight: 400; src: local('CenturyGothic'), url('../assets/century-gothic.ttf') format('truetype'); }
html { font-size: 18px; }
body { font-size: 1rem; font-family: "CenturyGothic", "Segoe UI", sans-serif; font-variant: small-caps; width: -webkit-fill-available; height: 100%; background: black; color: white; overflow: hidden; margin: 0; }
select { font-size: 1rem; font-family: "CenturyGothic", "Segoe UI", sans-serif; font-variant: small-caps; background: gray; color: white; border: none; }
</style>
<script src="index.js" type="module"></script>
</head>
<body>
<noscript><h1>javascript is required</h1></noscript>
<nav>
<div id="nav" class="nav"></div>
</nav>
<header>
<div id="header" class="header" style="position: fixed; top: 0; right: 0; padding: 4px; margin: 16px; background: rgba(0, 0, 0, 0.5); z-index: 10; line-height: 2rem;">
<label for="mode">mode</label>
<select id="mode" name="mode">
<option value="default">remove background</option>
<option value="alpha">draw alpha channel</option>
<option value="foreground">full foreground</option>
<option value="state">recurrent state</option>
</select><br>
<label for="composite">composite</label>
<select id="composite" name="composite"></select><br>
<label for="ratio">downsample ratio</label>
<input type="range" name="ratio" id="ratio" min="0.1" max="1" value="0.5" step="0.05">
<div id="fps" style="margin-top: 8px"></div>
</div>
</header>
<main>
<div id="main" class="main">
<video id="webcam" style="position: fixed; top: 0; left: 0; width: 50vw; height: 50vh"></video>
<video id="video" style="position: fixed; top: 0; right: 0; width: 50vw; height: 50vh" controls></video>
<canvas id="output" style="position: fixed; bottom: 0; left: 0; width: 50vw; height: 50vh"></canvas>
<canvas id="merge" style="position: fixed; bottom: 0; right: 0; width: 50vw; height: 50vh"></canvas>
</div>
</main>
<footer>
<div id="footer" class="footer"></div>
</footer>
<aside>
<div id="aside" class="aside"></div>
</aside>
</body>
</html>

View File

@ -0,0 +1,98 @@
/**
* Human demo for browsers
* @default Human Library
* @summary <https://github.com/vladmandic/human>
* @author <https://github.com/vladmandic>
* @copyright <https://github.com/vladmandic>
* @license MIT
*/
import * as H from '../../dist/human.esm.js'; // equivalent of @vladmandic/Human
const humanConfig = { // user configuration for human, used to fine-tune behavior
// backend: 'wasm',
// wasmPath: 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@3.20.0/dist/',
modelBasePath: 'https://vladmandic.github.io/human-models/models/',
filter: { enabled: true, equalization: false, flip: false },
face: { enabled: false },
body: { enabled: false },
hand: { enabled: false },
object: { enabled: false },
gesture: { enabled: false },
segmentation: {
enabled: true,
modelPath: 'rvm.json', // can use rvm, selfie or meet
ratio: 0.5,
mode: 'default',
},
};
const human = new H.Human(humanConfig); // create instance of human with overrides from user configuration
const log = (...msg) => console.log(...msg); // eslint-disable-line no-console
async function main() {
// gather dom elements
const dom = {
video: document.getElementById('video'),
webcam: document.getElementById('webcam'),
output: document.getElementById('output'),
merge: document.getElementById('merge'),
mode: document.getElementById('mode'),
composite: document.getElementById('composite'),
ratio: document.getElementById('ratio'),
fps: document.getElementById('fps'),
};
// set defaults
dom.fps.innerText = 'initializing';
dom.ratio.valueAsNumber = human.config.segmentation.ratio;
dom.video.src = '../assets/rijeka.mp4';
dom.composite.innerHTML = ['source-atop', 'color', 'color-burn', 'color-dodge', 'copy', 'darken', 'destination-atop', 'destination-in', 'destination-out', 'destination-over', 'difference', 'exclusion', 'hard-light', 'hue', 'lighten', 'lighter', 'luminosity', 'multiply', 'overlay', 'saturation', 'screen', 'soft-light', 'source-in', 'source-out', 'source-over', 'xor'].map((gco) => `<option value="${gco}">${gco}</option>`).join(''); // eslint-disable-line max-len
const ctxMerge = dom.merge.getContext('2d');
log('human version:', human.version, '| tfjs version:', human.tf.version['tfjs-core']);
log('platform:', human.env.platform, '| agent:', human.env.agent);
await human.load(); // preload all models
log('backend:', human.tf.getBackend(), '| available:', human.env.backends);
log('models stats:', human.getModelStats());
log('models loaded:', Object.values(human.models).filter((model) => model !== null).length);
await human.warmup(); // warmup function to initialize backend for future faster detection
const numTensors = human.tf.engine().state.numTensors;
// initialize webcam
dom.webcam.onplay = () => { // start processing on video play
log('start processing');
dom.output.width = human.webcam.width;
dom.output.height = human.webcam.height;
dom.merge.width = human.webcam.width;
dom.merge.height = human.webcam.height;
loop(); // eslint-disable-line no-use-before-define
};
await human.webcam.start({ element: dom.webcam, crop: true, width: 960, height: 720 }); // use human webcam helper methods and associate webcam stream with a dom element
if (!human.webcam.track) dom.fps.innerText = 'webcam error';
// processing loop
async function loop() {
if (!human.webcam.element || human.webcam.paused) return; // check if webcam is valid and playing
human.config.segmentation.mode = dom.mode.value; // get segmentation mode from ui
human.config.segmentation.ratio = dom.ratio.valueAsNumber; // get segmentation downsample ratio from ui
const t0 = Date.now();
const rgba = await human.segmentation(human.webcam.element, human.config); // run model and process results
const t1 = Date.now();
if (!rgba) {
dom.fps.innerText = 'error';
return;
}
dom.fps.innerText = `fps: ${Math.round(10000 / (t1 - t0)) / 10}`; // mark performance
human.tf.browser.toPixels(rgba, dom.output); // draw raw output
human.tf.dispose(rgba); // dispose tensors
ctxMerge.globalCompositeOperation = 'source-over';
ctxMerge.drawImage(dom.video, 0, 0); // draw original video to first stacked canvas
ctxMerge.globalCompositeOperation = dom.composite.value;
ctxMerge.drawImage(dom.output, 0, 0); // draw processed output to second stacked canvas
if (numTensors !== human.tf.engine().state.numTensors) log({ leak: human.tf.engine().state.numTensors - numTensors }); // check for memory leaks
requestAnimationFrame(loop);
}
}
window.onload = main;

View File

@ -4,6 +4,96 @@
author: <https://github.com/vladmandic>'
*/
import*as i from"../../dist/human.esm.js";var m={modelBasePath:"../../models",filter:{enabled:!0,equalization:!1,flip:!1},face:{enabled:!0,detector:{rotation:!1},mesh:{enabled:!0},attention:{enabled:!1},iris:{enabled:!0},description:{enabled:!0},emotion:{enabled:!0}},body:{enabled:!0},hand:{enabled:!0},object:{enabled:!1},segmentation:{enabled:!1},gesture:{enabled:!0}},e=new i.Human(m);e.env.perfadd=!1;e.draw.options.font='small-caps 18px "Lato"';e.draw.options.lineHeight=20;var a={video:document.getElementById("video"),canvas:document.getElementById("canvas"),log:document.getElementById("log"),fps:document.getElementById("status"),perf:document.getElementById("performance")},n={detect:0,draw:0,tensors:0,start:0},s={detectFPS:0,drawFPS:0,frames:0,averageMs:0},o=(...t)=>{a.log.innerText+=t.join(" ")+`
`,console.log(...t)},d=t=>a.fps.innerText=t,f=t=>a.perf.innerText="tensors:"+e.tf.memory().numTensors.toString()+" | performance: "+JSON.stringify(t).replace(/"|{|}/g,"").replace(/,/g," | ");async function l(){if(!a.video.paused){n.start===0&&(n.start=e.now()),await e.detect(a.video);let t=e.tf.memory().numTensors;t-n.tensors!==0&&o("allocated tensors:",t-n.tensors),n.tensors=t,s.detectFPS=Math.round(1e3*1e3/(e.now()-n.detect))/1e3,s.frames++,s.averageMs=Math.round(1e3*(e.now()-n.start)/s.frames)/1e3,s.frames%100===0&&!a.video.paused&&o("performance",{...s,tensors:n.tensors})}n.detect=e.now(),requestAnimationFrame(l)}async function c(){if(!a.video.paused){let r=e.next(e.result);e.config.filter.flip?e.draw.canvas(r.canvas,a.canvas):e.draw.canvas(a.video,a.canvas),await e.draw.all(a.canvas,r),f(r.performance)}let t=e.now();s.drawFPS=Math.round(1e3*1e3/(t-n.draw))/1e3,n.draw=t,d(a.video.paused?"paused":`fps: ${s.detectFPS.toFixed(1).padStart(5," ")} detect | ${s.drawFPS.toFixed(1).padStart(5," ")} draw`),setTimeout(c,30)}async function u(){await e.webcam.start({element:a.video,crop:!0}),a.canvas.width=e.webcam.width,a.canvas.height=e.webcam.height,a.canvas.onclick=async()=>{e.webcam.paused?await e.webcam.play():e.webcam.pause()}}async function w(){o("human version:",e.version,"| tfjs version:",e.tf.version["tfjs-core"]),o("platform:",e.env.platform,"| agent:",e.env.agent),d("loading..."),await e.load(),o("backend:",e.tf.getBackend(),"| available:",e.env.backends),o("models stats:",e.getModelStats()),o("models loaded:",Object.values(e.models).filter(t=>t!==null).length),d("initializing..."),await e.warmup(),await u(),await l(),await c()}window.onload=w;
// demo/typescript/index.ts
import * as H from "../../dist/human.esm.js";
var humanConfig = {
modelBasePath: "../../models",
filter: { enabled: true, equalization: false, flip: false },
face: { enabled: true, detector: { rotation: false }, mesh: { enabled: true }, attention: { enabled: false }, iris: { enabled: true }, description: { enabled: true }, emotion: { enabled: true } },
body: { enabled: true },
hand: { enabled: true },
object: { enabled: false },
segmentation: { enabled: false },
gesture: { enabled: true }
};
var human = new H.Human(humanConfig);
human.env.perfadd = false;
human.draw.options.font = 'small-caps 18px "Lato"';
human.draw.options.lineHeight = 20;
var dom = {
video: document.getElementById("video"),
canvas: document.getElementById("canvas"),
log: document.getElementById("log"),
fps: document.getElementById("status"),
perf: document.getElementById("performance")
};
var timestamp = { detect: 0, draw: 0, tensors: 0, start: 0 };
var fps = { detectFPS: 0, drawFPS: 0, frames: 0, averageMs: 0 };
var log = (...msg) => {
dom.log.innerText += msg.join(" ") + "\n";
console.log(...msg);
};
var status = (msg) => dom.fps.innerText = msg;
var perf = (msg) => dom.perf.innerText = "tensors:" + human.tf.memory().numTensors.toString() + " | performance: " + JSON.stringify(msg).replace(/"|{|}/g, "").replace(/,/g, " | ");
async function detectionLoop() {
if (!dom.video.paused) {
if (timestamp.start === 0)
timestamp.start = human.now();
await human.detect(dom.video);
const tensors = human.tf.memory().numTensors;
if (tensors - timestamp.tensors !== 0)
log("allocated tensors:", tensors - timestamp.tensors);
timestamp.tensors = tensors;
fps.detectFPS = Math.round(1e3 * 1e3 / (human.now() - timestamp.detect)) / 1e3;
fps.frames++;
fps.averageMs = Math.round(1e3 * (human.now() - timestamp.start) / fps.frames) / 1e3;
if (fps.frames % 100 === 0 && !dom.video.paused)
log("performance", { ...fps, tensors: timestamp.tensors });
}
timestamp.detect = human.now();
requestAnimationFrame(detectionLoop);
}
async function drawLoop() {
if (!dom.video.paused) {
const interpolated = human.next(human.result);
if (human.config.filter.flip)
human.draw.canvas(interpolated.canvas, dom.canvas);
else
human.draw.canvas(dom.video, dom.canvas);
await human.draw.all(dom.canvas, interpolated);
perf(interpolated.performance);
}
const now = human.now();
fps.drawFPS = Math.round(1e3 * 1e3 / (now - timestamp.draw)) / 1e3;
timestamp.draw = now;
status(dom.video.paused ? "paused" : `fps: ${fps.detectFPS.toFixed(1).padStart(5, " ")} detect | ${fps.drawFPS.toFixed(1).padStart(5, " ")} draw`);
setTimeout(drawLoop, 30);
}
async function webCam() {
await human.webcam.start({ element: dom.video, crop: true });
dom.canvas.width = human.webcam.width;
dom.canvas.height = human.webcam.height;
dom.canvas.onclick = async () => {
if (human.webcam.paused)
await human.webcam.play();
else
human.webcam.pause();
};
}
async function main() {
log("human version:", human.version, "| tfjs version:", human.tf.version["tfjs-core"]);
log("platform:", human.env.platform, "| agent:", human.env.agent);
status("loading...");
await human.load();
log("backend:", human.tf.getBackend(), "| available:", human.env.backends);
log("models stats:", human.getModelStats());
log("models loaded:", Object.values(human.models).filter((model) => model !== null).length);
status("initializing...");
await human.warmup();
await webCam();
await detectionLoop();
await drawLoop();
}
window.onload = main;
//# sourceMappingURL=index.js.map

File diff suppressed because one or more lines are too long

View File

@ -3,308 +3,3 @@
For details see Wiki:
- [**List of Models & Credits**](https://github.com/vladmandic/human/wiki/Models)
## Model signatures:
```js
INFO: graph model: /home/vlado/dev/human/models/iris.json
INFO: created on: 2020-10-12T18:46:47.060Z
INFO: metadata: { generatedBy: 'https://github.com/google/mediapipe', convertedBy: 'https://github.com/vladmandic', version: undefined }
INFO: model inputs based on signature
{ name: 'input_1:0', dtype: 'DT_FLOAT', shape: [ -1, 64, 64, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'Identity:0', dytpe: 'DT_FLOAT', shape: [ -1, 1, 1, 228 ] }
INFO: tensors: 191
DATA: weights: {
files: [ 'iris.bin' ],
size: { disk: 2599092, memory: 2599092 },
count: { total: 191, float32: 189, int32: 2 },
quantized: { none: 191 },
values: { total: 649773, float32: 649764, int32: 9 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity' ],
convolution: [ '_FusedConv2D', 'DepthwiseConv2dNative', 'MaxPool' ],
arithmetic: [ 'AddV2' ],
basic_math: [ 'Prelu' ],
transformation: [ 'Pad' ],
slice_join: [ 'ConcatV2' ]
}
INFO: graph model: /home/vlado/dev/human/models/facemesh.json
INFO: created on: 2020-10-12T18:46:46.944Z
INFO: metadata: { generatedBy: 'https://github.com/google/mediapipe', convertedBy: 'https://github.com/vladmandic', version: undefined }
INFO: model inputs based on signature
{ name: 'input_1:0', dtype: 'DT_FLOAT', shape: [ 1, 192, 192, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'Identity_1:0', dytpe: 'DT_FLOAT', shape: [ 1, 266 ] }
{ id: 1, name: 'Identity_2:0', dytpe: 'DT_FLOAT', shape: [ 1, 1 ] }
{ id: 2, name: 'Identity:0', dytpe: 'DT_FLOAT', shape: [ 1, 1404 ] }
INFO: tensors: 118
DATA: weights: {
files: [ 'facemesh.bin' ],
size: { disk: 2955780, memory: 2955780 },
count: { total: 118, float32: 114, int32: 4 },
quantized: { none: 118 },
values: { total: 738945, float32: 738919, int32: 26 }
}
DATA: kernel ops: {
graph: [ 'Placeholder', 'Const', 'NoOp', 'Identity' ],
convolution: [ '_FusedConv2D', 'DepthwiseConv2dNative', 'MaxPool' ],
arithmetic: [ 'AddV2' ],
basic_math: [ 'Prelu', 'Sigmoid' ],
transformation: [ 'Pad', 'Reshape' ]
}
INFO: graph model: /home/vlado/dev/human/models/emotion.json
INFO: created on: 2020-11-05T20:11:29.740Z
INFO: metadata: { generatedBy: 'https://github.com/oarriaga/face_classification', convertedBy: 'https://github.com/vladmandic', version: undefined }
INFO: model inputs based on signature
{ name: 'input_1:0', dtype: 'DT_FLOAT', shape: [ -1, 64, 64, 1 ] }
INFO: model outputs based on signature
{ id: 0, name: 'Identity:0', dytpe: 'DT_FLOAT', shape: [ -1, 7 ] }
INFO: tensors: 23
DATA: weights: {
files: [ 'emotion.bin' ],
size: { disk: 820516, memory: 820516 },
count: { total: 23, float32: 22, int32: 1 },
quantized: { none: 23 },
values: { total: 205129, float32: 205127, int32: 2 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity' ],
convolution: [ '_FusedConv2D', 'DepthwiseConv2dNative', 'MaxPool' ],
arithmetic: [ 'AddV2' ],
basic_math: [ 'Relu' ],
reduction: [ 'Mean' ],
normalization: [ 'Softmax' ]
}
INFO: graph model: /home/vlado/dev/human/models/faceres.json
INFO: created on: 2021-03-21T14:12:59.863Z
INFO: metadata: { generatedBy: 'https://github.com/HSE-asavchenko/HSE_FaceRec_tf', convertedBy: 'https://github.com/vladmandic', version: undefined }
INFO: model inputs based on signature
{ name: 'input_1', dtype: 'DT_FLOAT', shape: [ -1, 224, 224, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'gender_pred/Sigmoid:0', dytpe: 'DT_FLOAT', shape: [ 1, 1 ] }
{ id: 1, name: 'global_pooling/Mean', dytpe: 'DT_FLOAT', shape: [ 1, 1024 ] }
{ id: 2, name: 'age_pred/Softmax:0', dytpe: 'DT_FLOAT', shape: [ 1, 100 ] }
INFO: tensors: 128
DATA: weights: {
files: [ 'faceres.bin' ],
size: { disk: 6978814, memory: 13957620 },
count: { total: 128, float32: 127, int32: 1 },
quantized: { float16: 127, none: 1 },
values: { total: 3489405, float32: 3489403, int32: 2 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder' ],
convolution: [ 'Conv2D', 'DepthwiseConv2dNative' ],
arithmetic: [ 'Add', 'Minimum', 'Maximum', 'Mul' ],
basic_math: [ 'Relu', 'Sigmoid' ],
reduction: [ 'Mean' ],
matrices: [ '_FusedMatMul' ],
normalization: [ 'Softmax' ]
}
INFO: graph model: /home/vlado/dev/human/models/blazeface.json
INFO: created on: 2020-10-15T19:57:26.419Z
INFO: metadata: { generatedBy: 'https://github.com/google/mediapipe', convertedBy: 'https://github.com/vladmandic', version: undefined }
INFO: model inputs based on signature
{ name: 'input:0', dtype: 'DT_FLOAT', shape: [ 1, 256, 256, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'Identity_3:0', dytpe: 'DT_FLOAT', shape: [ 1, 384, 16 ] }
{ id: 1, name: 'Identity:0', dytpe: 'DT_FLOAT', shape: [ 1, 512, 1 ] }
{ id: 2, name: 'Identity_1:0', dytpe: 'DT_FLOAT', shape: [ 1, 384, 1 ] }
{ id: 3, name: 'Identity_2:0', dytpe: 'DT_FLOAT', shape: [ 1, 512, 16 ] }
INFO: tensors: 112
DATA: weights: {
files: [ 'blazeface.bin' ],
size: { disk: 538928, memory: 538928 },
count: { total: 112, float32: 106, int32: 6 },
quantized: { none: 112 },
values: { total: 134732, float32: 134704, int32: 28 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity' ],
convolution: [ '_FusedConv2D', 'DepthwiseConv2dNative', 'MaxPool' ],
arithmetic: [ 'AddV2' ],
basic_math: [ 'Relu' ],
transformation: [ 'Pad', 'Reshape' ]
}
INFO: graph model: /home/vlado/dev/human/models/mb3-centernet.json
INFO: created on: 2021-05-19T11:50:13.013Z
INFO: metadata: { generatedBy: 'https://github.com/610265158/mobilenetv3_centernet', convertedBy: 'https://github.com/vladmandic', version: undefined }
INFO: model inputs based on signature
{ name: 'tower_0/images', dtype: 'DT_FLOAT', shape: [ 1, 512, 512, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'tower_0/wh', dytpe: 'DT_FLOAT', shape: [ 1, 128, 128, 4 ] }
{ id: 1, name: 'tower_0/keypoints', dytpe: 'DT_FLOAT', shape: [ 1, 128, 128, 80 ] }
{ id: 2, name: 'tower_0/detections', dytpe: 'DT_FLOAT', shape: [ 1, 100, 6 ] }
INFO: tensors: 267
DATA: weights: {
files: [ 'mb3-centernet.bin' ],
size: { disk: 4030290, memory: 8060260 },
count: { total: 267, float32: 227, int32: 40 },
quantized: { float16: 227, none: 40 },
values: { total: 2015065, float32: 2014985, int32: 80 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity' ],
convolution: [ '_FusedConv2D', 'FusedDepthwiseConv2dNative', 'DepthwiseConv2dNative', 'Conv2D', 'MaxPool' ],
arithmetic: [ 'Mul', 'Add', 'FloorDiv', 'FloorMod', 'Sub' ],
basic_math: [ 'Relu6', 'Relu', 'Sigmoid' ],
reduction: [ 'Mean' ],
image: [ 'ResizeBilinear' ],
slice_join: [ 'ConcatV2', 'GatherV2', 'StridedSlice' ],
transformation: [ 'Reshape', 'Cast', 'ExpandDims' ],
logical: [ 'Equal' ],
evaluation: [ 'TopKV2' ]
}
INFO: graph model: /home/vlado/dev/human/models/movenet-lightning.json
INFO: created on: 2021-05-29T12:26:32.994Z
INFO: metadata: { generatedBy: 'https://tfhub.dev/google/movenet/singlepose/lightning/4', convertedBy: 'https://github.com/vladmandic', version: undefined }
INFO: model inputs based on signature
{ name: 'input:0', dtype: 'DT_INT32', shape: [ 1, 192, 192, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'Identity:0', dytpe: 'DT_FLOAT', shape: [ 1, 1, 17, 3 ] }
INFO: tensors: 180
DATA: weights: {
files: [ 'movenet-lightning.bin' ],
size: { disk: 4650216, memory: 9300008 },
count: { total: 180, int32: 31, float32: 149 },
quantized: { none: 31, float16: 149 },
values: { total: 2325002, int32: 106, float32: 2324896 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity' ],
transformation: [ 'Cast', 'ExpandDims', 'Squeeze', 'Reshape' ],
slice_join: [ 'Unpack', 'Pack', 'GatherNd', 'ConcatV2' ],
arithmetic: [ 'Sub', 'Mul', 'AddV2', 'FloorDiv', 'SquaredDifference', 'RealDiv' ],
convolution: [ '_FusedConv2D', 'FusedDepthwiseConv2dNative', 'DepthwiseConv2dNative' ],
image: [ 'ResizeBilinear' ],
basic_math: [ 'Sigmoid', 'Sqrt' ],
reduction: [ 'ArgMax' ]
}
INFO: graph model: /home/vlado/dev/human/models/selfie.json
INFO: created on: 2021-06-04T13:46:56.904Z
INFO: metadata: { generatedBy: 'https://github.com/PINTO0309/PINTO_model_zoo/tree/main/109_Selfie_Segmentation', convertedBy: 'https://github.com/vladmandic', version: '561.undefined' }
INFO: model inputs based on signature
{ name: 'input_1:0', dtype: 'DT_FLOAT', shape: [ 1, 256, 256, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'activation_10:0', dytpe: 'DT_FLOAT', shape: [ 1, 256, 256, 1 ] }
INFO: tensors: 136
DATA: weights: {
files: [ 'selfie.bin' ],
size: { disk: 212886, memory: 425732 },
count: { total: 136, int32: 4, float32: 132 },
quantized: { none: 4, float16: 132 },
values: { total: 106433, int32: 10, float32: 106423 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder' ],
convolution: [ 'Conv2D', 'DepthwiseConv2dNative', 'AvgPool', 'Conv2DBackpropInput' ],
arithmetic: [ 'Add', 'Mul', 'AddV2', 'AddN' ],
basic_math: [ 'Relu6', 'Relu', 'Sigmoid' ],
image: [ 'ResizeBilinear' ]
}
INFO: graph model: /home/vlado/dev/human/models/handtrack.json
INFO: created on: 2021-09-21T12:09:47.583Z
INFO: metadata: { generatedBy: 'https://github.com/victordibia/handtracking', convertedBy: 'https://github.com/vladmandic', version: '561.undefined' }
INFO: model inputs based on signature
{ name: 'input_tensor:0', dtype: 'DT_UINT8', shape: [ 1, 320, 320, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'Identity_2:0', dytpe: 'DT_FLOAT', shape: [ 1, 100 ] }
{ id: 1, name: 'Identity_4:0', dytpe: 'DT_FLOAT', shape: [ 1, 100 ] }
{ id: 2, name: 'Identity_6:0', dytpe: 'DT_FLOAT', shape: [ 1, 12804, 4 ] }
{ id: 3, name: 'Identity_1:0', dytpe: 'DT_FLOAT', shape: [ 1, 100, 4 ] }
{ id: 4, name: 'Identity_3:0', dytpe: 'DT_FLOAT', shape: [ 1, 100, 8 ] }
{ id: 5, name: 'Identity_5:0', dytpe: 'DT_FLOAT', shape: [ 1 ] }
{ id: 6, name: 'Identity:0', dytpe: 'DT_FLOAT', shape: [ 1, 100 ] }
{ id: 7, name: 'Identity_7:0', dytpe: 'DT_FLOAT', shape: [ 1, 12804, 8 ] }
INFO: tensors: 619
DATA: weights: {
files: [ 'handtrack.bin' ],
size: { disk: 2964837, memory: 11846016 },
count: { total: 619, int32: 347, float32: 272 },
quantized: { none: 347, uint8: 272 },
values: { total: 2961504, int32: 1111, float32: 2960393 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity', 'Shape', 'NoOp' ],
control: [ 'TensorListReserve', 'Enter', 'TensorListFromTensor', 'Merge', 'LoopCond', 'Switch', 'Exit', 'TensorListStack', 'NextIteration', 'TensorListSetItem', 'TensorListGetItem' ],
logical: [ 'Less', 'LogicalAnd', 'Select', 'Greater', 'GreaterEqual' ],
convolution: [ '_FusedConv2D', 'FusedDepthwiseConv2dNative', 'DepthwiseConv2dNative' ],
arithmetic: [ 'AddV2', 'Mul', 'Sub', 'Minimum', 'Maximum' ],
transformation: [ 'Cast', 'ExpandDims', 'Squeeze', 'Reshape', 'Pad' ],
slice_join: [ 'Unpack', 'StridedSlice', 'Pack', 'ConcatV2', 'Slice', 'GatherV2', 'Split' ],
image: [ 'ResizeBilinear' ],
basic_math: [ 'Reciprocal', 'Sigmoid', 'Exp' ],
matrices: [ 'Transpose' ],
dynamic: [ 'NonMaxSuppressionV5', 'Where' ],
creation: [ 'Fill', 'Range' ],
evaluation: [ 'TopKV2' ],
reduction: [ 'Sum' ]
}
INFO: graph model: /home/vlado/dev/human/models/antispoof.json
INFO: created on: 2021-10-13T14:20:27.100Z
INFO: metadata: { generatedBy: 'https://www.kaggle.com/anku420/fake-face-detection', convertedBy: 'https://github.com/vladmandic', version: '716.undefined' }
INFO: model inputs based on signature
{ name: 'conv2d_input', dtype: 'DT_FLOAT', shape: [ -1, 128, 128, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'activation_4', dytpe: 'DT_FLOAT', shape: [ -1, 1 ] }
INFO: tensors: 11
DATA: weights: {
files: [ 'antispoof.bin' ],
size: { disk: 853098, memory: 1706188 },
count: { total: 11, float32: 10, int32: 1 },
quantized: { float16: 10, none: 1 },
values: { total: 426547, float32: 426545, int32: 2 }
}
DATA: kernel ops: { graph: [ 'Const', 'Placeholder', 'Identity' ], convolution: [ '_FusedConv2D', 'MaxPool' ], basic_math: [ 'Relu', 'Sigmoid' ], transformation: [ 'Reshape' ], matrices: [ '_FusedMatMul' ] }
INFO: graph model: /home/vlado/dev/human/models/handlandmark-full.json
INFO: created on: 2021-10-31T12:27:49.343Z
INFO: metadata: { generatedBy: 'https://github.com/google/mediapipe', convertedBy: 'https://github.com/vladmandic', version: '808.undefined' }
INFO: model inputs based on signature
{ name: 'input_1', dtype: 'DT_FLOAT', shape: [ 1, 224, 224, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'Identity_3:0', dytpe: 'DT_FLOAT', shape: [ 1, 63 ] }
{ id: 1, name: 'Identity:0', dytpe: 'DT_FLOAT', shape: [ 1, 63 ] }
{ id: 2, name: 'Identity_1:0', dytpe: 'DT_FLOAT', shape: [ 1, 1 ] }
{ id: 3, name: 'Identity_2:0', dytpe: 'DT_FLOAT', shape: [ 1, 1 ] }
INFO: tensors: 103
DATA: weights: {
files: [ 'handlandmark-full.bin' ],
size: { disk: 5431368, memory: 10862728 },
count: { total: 103, float32: 102, int32: 1 },
quantized: { float16: 102, none: 1 },
values: { total: 2715682, float32: 2715680, int32: 2 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity' ],
convolution: [ 'Conv2D', 'DepthwiseConv2dNative' ],
arithmetic: [ 'AddV2', 'AddN' ],
basic_math: [ 'Relu6', 'Sigmoid' ],
reduction: [ 'Mean' ],
matrices: [ '_FusedMatMul' ]
}
INFO: graph model: /home/vlado/dev/human/models/liveness.json
INFO: created on: 2021-11-09T12:39:11.760Z
INFO: metadata: { generatedBy: 'https://github.com/leokwu/livenessnet', convertedBy: 'https://github.com/vladmandic', version: '808.undefined' }
INFO: model inputs based on signature
{ name: 'conv2d_1_input', dtype: 'DT_FLOAT', shape: [ -1, 32, 32, 3 ] }
INFO: model outputs based on signature
{ id: 0, name: 'activation_6', dytpe: 'DT_FLOAT', shape: [ -1, 2 ] }
INFO: tensors: 23
DATA: weights: {
files: [ 'liveness.bin' ],
size: { disk: 592976, memory: 592976 },
count: { total: 23, float32: 22, int32: 1 },
quantized: { none: 23 },
values: { total: 148244, float32: 148242, int32: 2 }
}
DATA: kernel ops: {
graph: [ 'Const', 'Placeholder', 'Identity' ],
convolution: [ '_FusedConv2D', 'MaxPool' ],
arithmetic: [ 'Mul', 'Add', 'AddV2' ],
transformation: [ 'Reshape' ],
matrices: [ '_FusedMatMul' ],
normalization: [ 'Softmax' ]
}
```

View File

@ -1,5 +1,14 @@
/* eslint-disable no-multi-spaces */
/** Possible TensorFlow backends */
export type BackendEnum = '' | 'cpu' | 'wasm' | 'webgl' | 'humangl' | 'tensorflow' | 'webgpu';
/** Possible values for `human.warmup` */
export type WarmupEnum = '' | 'none' | 'face' | 'full' | 'body';
/** Possible segmentation model behavior */
export type SegmentationEnum = 'default' | 'alpha' | 'foreground' | 'state'
/** Generic config type inherited by all module types */
export interface GenericConfig {
/** is module enabled? */
@ -144,8 +153,10 @@ export interface ObjectConfig extends GenericConfig {
* remove background or replace it with user-provided background
*/
export interface SegmentationConfig extends GenericConfig {
/** blur segmentation output by <number> pixels for more realistic image */
blur: number,
/** downsample ratio, adjust to reflect approximately how much of input is taken by body */
ratio: number,
/** possible rvm segmentation mode */
mode: SegmentationEnum,
}
/** Run input through image filters before inference
@ -208,12 +219,6 @@ export interface GestureConfig {
/** is gesture detection enabled? */
enabled: boolean,
}
/** Possible TensorFlow backends */
export type BackendEnum = '' | 'cpu' | 'wasm' | 'webgl' | 'humangl' | 'tensorflow' | 'webgpu';
/** Possible values for `human.warmup` */
export type WarmupEnum = '' | 'none' | 'face' | 'full' | 'body';
/**
* Configuration interface definition for **Human** library
* Contains all configurable parameters
@ -450,8 +455,9 @@ const config: Config = {
},
segmentation: {
enabled: false,
modelPath: 'selfie.json',
blur: 8,
modelPath: 'rvm.json',
ratio: 0.5,
mode: 'default',
},
};

View File

@ -16,9 +16,9 @@ import { setModelLoadOptions } from './tfjs/load';
import * as tf from '../dist/tfjs.esm.js';
import * as app from '../package.json';
import * as backend from './tfjs/backend';
import * as draw from './draw/draw';
import * as blazepose from './body/blazepose';
import * as centernet from './object/centernet';
import * as draw from './draw/draw';
import * as efficientpose from './body/efficientpose';
import * as face from './face/face';
import * as facemesh from './face/facemesh';
@ -29,13 +29,15 @@ import * as handtrack from './hand/handtrack';
import * as humangl from './tfjs/humangl';
import * as image from './image/image';
import * as interpolate from './util/interpolate';
import * as meet from './segmentation/meet';
import * as match from './face/match';
import * as models from './models';
import * as movenet from './body/movenet';
import * as nanodet from './object/nanodet';
import * as persons from './util/persons';
import * as posenet from './body/posenet';
import * as segmentation from './segmentation/segmentation';
import * as rvm from './segmentation/rvm';
import * as selfie from './segmentation/selfie';
import * as warmups from './warmup';
// type definitions
@ -251,18 +253,23 @@ export class Human {
return image.process(input, this.config, getTensor);
}
/** Segmentation method takes any input and returns processed canvas with body segmentation
* - Segmentation is not triggered as part of detect process
/** Segmentation method takes any input and returns RGBA tensor
* Note: Segmentation is not triggered as part of detect process
*
* @param input - {@link Input}
* @param background - {@link Input}
* - Optional parameter background is used to fill the background with specific input
* Returns:
* - `data` as raw data array with per-pixel segmentation values
* - `canvas` as canvas which is input image filtered with segementation data and optionally merged with background image. canvas alpha values are set to segmentation values for easy merging
* - `alpha` as grayscale canvas that represents segmentation alpha values
* Returns tensor which contains image data in RGBA format
*/
async segmentation(input: Input, background?: Input): Promise<{ data: number[] | Tensor, canvas: AnyCanvas | null, alpha: AnyCanvas | null }> {
return segmentation.process(input, background, this.config);
async segmentation(input: Input, userConfig?: Partial<Config>): Promise<Tensor | null> {
if (userConfig) this.config = mergeDeep(this.config, userConfig) as Config;
if (!this.config.segmentation.enabled) return null;
const processed = await image.process(input, this.config);
if (!processed.tensor) return null;
let tensor: Tensor | null = null;
if (this.config.segmentation.modelPath?.includes('rvm')) tensor = await rvm.predict(processed.tensor, this.config);
if (this.config.segmentation.modelPath?.includes('meet')) tensor = await meet.predict(processed.tensor, this.config);
if (this.config.segmentation.modelPath?.includes('selfie')) tensor = await selfie.predict(processed.tensor, this.config);
tf.dispose(processed.tensor);
return tensor;
}
/** Enhance method performs additional enhacements to face image previously detected for futher processing

View File

@ -4,27 +4,29 @@
import { env } from './util/env';
import { log } from './util/util';
import * as gear from './gear/gear';
import * as ssrnetAge from './gear/ssrnet-age';
import * as ssrnetGender from './gear/ssrnet-gender';
import * as antispoof from './face/antispoof';
import * as blazeface from './face/blazeface';
import * as blazepose from './body/blazepose';
import * as centernet from './object/centernet';
import * as efficientpose from './body/efficientpose';
import * as emotion from './gear/emotion';
import * as mobilefacenet from './face/mobilefacenet';
import * as insightface from './face/insightface';
import * as facemesh from './face/facemesh';
import * as faceres from './face/faceres';
import * as gear from './gear/gear';
import * as handpose from './hand/handpose';
import * as handtrack from './hand/handtrack';
import * as insightface from './face/insightface';
import * as iris from './face/iris';
import * as liveness from './face/liveness';
import * as meet from './segmentation/meet';
import * as mobilefacenet from './face/mobilefacenet';
import * as movenet from './body/movenet';
import * as nanodet from './object/nanodet';
import * as posenet from './body/posenet';
import * as segmentation from './segmentation/segmentation';
import * as rvm from './segmentation/rvm';
import * as selfie from './segmentation/selfie';
import * as ssrnetAge from './gear/ssrnet-age';
import * as ssrnetGender from './gear/ssrnet-gender';
import { modelStats, ModelInfo } from './tfjs/load';
import type { GraphModel } from './tfjs/types';
import type { Human } from './human';
@ -54,17 +56,18 @@ export class Models {
handskeleton: null | GraphModel | Promise<GraphModel> = null;
handtrack: null | GraphModel | Promise<GraphModel> = null;
liveness: null | GraphModel | Promise<GraphModel> = null;
meet: null | GraphModel | Promise<GraphModel> = null;
movenet: null | GraphModel | Promise<GraphModel> = null;
nanodet: null | GraphModel | Promise<GraphModel> = null;
posenet: null | GraphModel | Promise<GraphModel> = null;
segmentation: null | GraphModel | Promise<GraphModel> = null;
selfie: null | GraphModel | Promise<GraphModel> = null;
rvm: null | GraphModel | Promise<GraphModel> = null;
antispoof: null | GraphModel | Promise<GraphModel> = null;
}
/** structure that holds global stats for currently loaded models */
export interface ModelStats {
numLoadedModels: number,
numEnabledModels: undefined,
numDefinedModels: number,
percentageLoaded: number,
totalSizeFromManifest: number,
@ -90,7 +93,6 @@ export const getModelStats = (currentInstance: Human): ModelStats => {
const percentageLoaded = totalSizeLoading > 0 ? totalSizeWeights / totalSizeLoading : 0;
return {
numLoadedModels: Object.values(modelStats).length,
numEnabledModels: undefined,
numDefinedModels: Object.keys(instance.models).length,
percentageLoaded,
totalSizeFromManifest,
@ -141,7 +143,9 @@ export async function load(currentInstance: Human): Promise<void> {
if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config);
if (instance.config.object.enabled && !instance.models.centernet && instance.config.object.modelPath?.includes('centernet')) instance.models.centernet = centernet.load(instance.config);
if (instance.config.object.enabled && !instance.models.nanodet && instance.config.object.modelPath?.includes('nanodet')) instance.models.nanodet = nanodet.load(instance.config);
if (instance.config.segmentation.enabled && !instance.models.segmentation) instance.models.segmentation = segmentation.load(instance.config);
if (instance.config.segmentation.enabled && !instance.models.selfie && instance.config.segmentation.modelPath?.includes('selfie')) instance.models.selfie = selfie.load(instance.config);
if (instance.config.segmentation.enabled && !instance.models.meet && instance.config.segmentation.modelPath?.includes('meet')) instance.models.meet = meet.load(instance.config);
if (instance.config.segmentation.enabled && !instance.models.rvm && instance.config.segmentation.modelPath?.includes('rvm')) instance.models.rvm = rvm.load(instance.config);
// models are loaded in parallel asynchronously so lets wait until they are actually loaded
for await (const model of Object.keys(instance.models)) {
@ -159,7 +163,7 @@ export function validateModel(currentInstance: Human | null, model: GraphModel |
if (!instance) log('instance not registred');
if (!instance?.config?.validateModels) return null;
const simpleOps = ['const', 'placeholder', 'noop', 'pad', 'squeeze', 'add', 'sub', 'mul', 'div'];
const ignoreOps = ['biasadd', 'fusedbatchnormv3', 'matmul'];
const ignoreOps = ['biasadd', 'fusedbatchnormv3', 'matmul', 'switch', 'shape', 'merge', 'split', 'broadcastto'];
const ops: string[] = [];
const missing: string[] = [];
interface Op { name: string, category: string, op: string }

54
src/segmentation/meet.ts Normal file
View File

@ -0,0 +1,54 @@
/**
* Image segmentation for body detection model
*
* Based on:
* - [**MediaPipe Meet**](https://drive.google.com/file/d/1lnP1bRi9CSqQQXUHa13159vLELYDgDu0/preview)
*/
import { log } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import { loadModel } from '../tfjs/load';
import { constants } from '../tfjs/constants';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
let model: GraphModel;
export async function load(config: Config): Promise<GraphModel> {
if (!model || env.initial) model = await loadModel(config.segmentation.modelPath);
else if (config.debug) log('cached model:', model['modelUrl']);
return model;
}
export async function predict(input: Tensor, config: Config): Promise<Tensor | null> {
if (!model) model = await load(config);
if (!model?.['executor'] || !model?.inputs?.[0].shape) return null; // something is wrong with the model
const t: Record<string, Tensor> = {};
t.resize = tf.image.resizeBilinear(input, [model.inputs[0].shape ? model.inputs[0].shape[1] : 0, model.inputs[0].shape ? model.inputs[0].shape[2] : 0], false);
t.norm = tf.div(t.resize, constants.tf255);
t.res = model.execute(t.norm) as Tensor;
t.squeeze = tf.squeeze(t.res, 0);
// t.softmax = tf.softmax(t.squeeze); // model meet has two channels for fg and bg
[t.bgRaw, t.fgRaw] = tf.unstack(t.squeeze, 2);
// t.bg = tf.softmax(t.bgRaw); // we can ignore bg channel
t.fg = tf.softmax(t.fgRaw);
t.mul = tf.mul(t.fg, constants.tf255);
t.expand = tf.expandDims(t.mul, 2);
t.output = tf.image.resizeBilinear(t.expand, [input.shape[1], input.shape[2]]);
let rgba: Tensor;
switch (config.segmentation.mode || 'default') {
case 'default':
t.input = tf.squeeze(input);
t.concat = tf.concat([t.input, t.output], -1);
rgba = tf.cast(t.concat, 'int32'); // combined original with alpha
break;
case 'alpha':
rgba = tf.cast(t.output, 'int32'); // just get alpha value from model
break;
default:
rgba = tf.tensor(0);
}
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
return rgba;
}

104
src/segmentation/rvm.ts Normal file
View File

@ -0,0 +1,104 @@
/**
* Image segmentation for body detection model
*
* Based on:
* - [**Robust Video Matting**](https://github.com/PeterL1n/RobustVideoMatting)
*/
import { log } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import { loadModel } from '../tfjs/load';
import { constants } from '../tfjs/constants';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
let model: GraphModel;
// internal state varaibles
const outputNodes = ['fgr', 'pha', 'r1o', 'r2o', 'r3o', 'r4o'];
const t: Record<string, Tensor> = {}; // contains input tensor and recurrent states
let ratio = 0;
function init(config: Config) {
tf.dispose([t.r1i, t.r2i, t.r3i, t.r4i, t.downsample_ratio]);
t.r1i = tf.tensor(0.0);
t.r2i = tf.tensor(0.0);
t.r3i = tf.tensor(0.0);
t.r4i = tf.tensor(0.0);
ratio = config.segmentation.ratio || 0.5;
t.downsample_ratio = tf.tensor(ratio); // initialize downsample ratio
}
export async function load(config: Config): Promise<GraphModel> {
if (!model || env.initial) model = await loadModel(config.segmentation.modelPath);
else if (config.debug) log('cached model:', model['modelUrl']);
init(config);
return model;
}
const normalize = (r: Tensor) => tf.tidy(() => {
const squeeze = tf.squeeze(r, ([0]));
const mul = tf.mul(squeeze, constants.tf255);
const cast = tf.cast(mul, 'int32');
return cast as Tensor;
});
function getRGBA(fgr: Tensor | null, pha: Tensor | null): Tensor { // gets rgba // either fgr or pha must be present
const rgb = fgr
? normalize(fgr) // normalize and use value
: tf.fill([pha!.shape[1] || 0, pha!.shape[2] || 0, 3], 255, 'int32'); // eslint-disable-line @typescript-eslint/no-non-null-assertion
const a = pha
? normalize(pha) // normalize and use value
: tf.fill([fgr!.shape[1] || 0, fgr!.shape[2] || 0, 1], 255, 'int32'); // eslint-disable-line @typescript-eslint/no-non-null-assertion
const rgba = tf.concat([rgb, a], -1);
tf.dispose([rgb, a]);
return rgba;
}
function getState(state: Tensor): Tensor { // gets internal recurrent states
return tf.tidy(() => {
const r: Record<string, Tensor | Tensor[]> = {};
r.unstack = tf.unstack(state, -1);
r.concat = tf.concat(r.unstack, 1);
r.split = tf.split(r.concat, 4, 1);
r.stack = tf.concat(r.split, 2);
r.squeeze = tf.squeeze(r.stack, [0]);
r.expand = tf.expandDims(r.squeeze, -1);
r.add = tf.add(r.expand, 1);
r.mul = tf.mul(r.add, 127.5);
r.cast = tf.cast(r.mul, 'int32');
r.tile = tf.tile(r.cast, [1, 1, 3]) as Tensor;
r.alpha = tf.fill([r.tile.shape[0] || 0, r.tile.shape[1] || 0, 1], 255, 'int32');
return tf.concat([r.tile, r.alpha], -1) as Tensor;
});
}
export async function predict(input: Tensor, config: Config): Promise<Tensor | null> {
if (!model) model = await load(config);
if (!model?.['executor']) return null;
// const expand = tf.expandDims(input, 0);
t.src = tf.div(input, 255);
if (ratio !== config.segmentation.ratio) init(config); // reinitialize recurrent states if requested downsample ratio changed
const [fgr, pha, r1o, r2o, r3o, r4o] = await model.executeAsync(t, outputNodes) as Tensor[]; // execute model
let rgba: Tensor;
switch (config.segmentation.mode || 'default') {
case 'default':
rgba = getRGBA(fgr, pha);
break;
case 'alpha':
rgba = getRGBA(null, pha);
break;
case 'foreground':
rgba = getRGBA(fgr, null);
break;
case 'state':
rgba = getState(r1o); // can view any internal recurrent state r10, r20, r3o, r4o
break;
default:
rgba = tf.tensor(0);
}
tf.dispose([t.src, fgr, pha, t.r1i, t.r2i, t.r3i, t.r4i]);
[t.r1i, t.r2i, t.r3i, t.r4i] = [r1o, r2o, r3o, r4o]; // update recurrent states
return rgba;
}

View File

@ -0,0 +1,49 @@
/**
* Image segmentation for body detection model
*
* Based on:
* - [**MediaPipe Selfie**](https://drive.google.com/file/d/1dCfozqknMa068vVsO2j_1FgZkW_e3VWv/preview)
*/
import { log } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import { loadModel } from '../tfjs/load';
import { constants } from '../tfjs/constants';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
let model: GraphModel;
export async function load(config: Config): Promise<GraphModel> {
if (!model || env.initial) model = await loadModel(config.segmentation.modelPath);
else if (config.debug) log('cached model:', model['modelUrl']);
return model;
}
export async function predict(input: Tensor, config: Config): Promise<Tensor | null> {
if (!model) model = await load(config);
if (!model?.['executor'] || !model?.inputs?.[0].shape) return null; // something is wrong with the model
const t: Record<string, Tensor> = {};
t.resize = tf.image.resizeBilinear(input, [model.inputs[0].shape ? model.inputs[0].shape[1] : 0, model.inputs[0].shape ? model.inputs[0].shape[2] : 0], false);
t.norm = tf.div(t.resize, constants.tf255);
t.res = model.execute(t.norm) as Tensor;
t.squeeze = tf.squeeze(t.res, 0); // meet.shape:[1,256,256,1], selfie.shape:[1,144,256,2]
t.alpha = tf.image.resizeBilinear(t.squeeze, [input.shape[1], input.shape[2]]); // model selfie has a single channel that we can use directly
t.mul = tf.mul(t.alpha, constants.tf255);
let rgba: Tensor;
switch (config.segmentation.mode || 'default') {
case 'default':
t.input = tf.squeeze(input);
t.concat = tf.concat([t.input, t.mul], -1);
rgba = tf.cast(t.concat, 'int32'); // combined original with alpha
break;
case 'alpha':
rgba = tf.cast(t.mul, 'int32'); // just get alpha value from model
break;
default:
rgba = tf.tensor(0);
}
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
return rgba;
}

View File

@ -3,7 +3,7 @@ import * as tf from '../../dist/tfjs.esm.js';
import type { GraphModel } from './types';
import type { Config } from '../config';
import * as modelsDefs from '../../models/models.json';
import { validateModel } from '../models';
// import { validateModel } from '../models';
const options = {
cacheModels: true,
@ -86,6 +86,6 @@ export async function loadModel(modelPath: string | undefined): Promise<GraphMod
log('error saving model:', modelUrl, err);
}
}
validateModel(null, model, `${modelPath || ''}`);
// validateModel(null, model, `${modelPath || ''}`);
return model;
}

View File

@ -133,7 +133,7 @@ export async function runCompile(instance: Human) {
if (Array.isArray(res)) res.forEach((t) => tf.dispose(t));
else tf.dispose(res);
} catch {
log('compile fail model:', modelName);
if (instance.config.debug) log('compile fail model:', modelName);
}
tf.dispose(tensor);
}

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@ const config = {
hand: { enabled: true },
body: { enabled: true },
object: { enabled: true },
segmentation: { enabled: true },
segmentation: { enabled: false },
filter: { enabled: false },
};

View File

@ -10,7 +10,7 @@ H.env.Image = Image; // requires monkey-patch as wasm does not have tf.browser n
const config = {
cacheSensitivity: 0,
modelBasePath: 'https://vladmandic.github.io/human/models/',
modelBasePath: 'https://vladmandic.github.io/human-models/models/',
backend: 'wasm',
// wasmPath: 'node_modules/@tensorflow/tfjs-backend-wasm/dist/',
wasmPath: `https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@${tf.version_core}/dist/`,
@ -30,7 +30,7 @@ const config = {
hand: { enabled: true, rotation: false },
body: { enabled: true },
object: { enabled: true },
segmentation: { enabled: true },
segmentation: { enabled: false },
filter: { enabled: false },
};

View File

@ -22,7 +22,7 @@ const config = {
hand: { enabled: true },
body: { enabled: true },
object: { enabled: true },
segmentation: { enabled: true },
segmentation: { enabled: false },
filter: { enabled: false },
};

View File

@ -294,7 +294,7 @@ async function test(Human, inputConfig) {
await human.load();
const models = Object.keys(human.models).map((model) => ({ name: model, loaded: (human.models[model] !== null), url: human.models[model] ? human.models[model]['modelUrl'] : null }));
const loaded = models.filter((model) => model.loaded);
if (models.length === 23 && loaded.length === 12) log('state', 'passed: models loaded', models.length, loaded.length, models);
if (models.length === 25 && loaded.length === 11) log('state', 'passed: models loaded', models.length, loaded.length, models);
else log('error', 'failed: models loaded', models.length, loaded.length, models);
log('info', 'memory:', { memory: human.tf.memory() });
log('info', 'state:', { state: human.tf.engine().state });
@ -539,14 +539,18 @@ async function test(Human, inputConfig) {
const ctx = inputCanvas.getContext('2d');
ctx.drawImage(inputImage, 0, 0); // draw input image onto canvas
res = await human.detect(inputCanvas);
if (!res || res?.face?.length !== 1) log('error', 'failed: monkey patch');
if (res?.face?.length !== 1) log('error', 'failed: monkey patch');
else log('state', 'passed: monkey patch');
// test segmentation
res = await human.segmentation(inputCanvas, inputCanvas);
if (!res || !res.data || !res.canvas) log('error', 'failed: segmentation');
else log('state', 'passed: segmentation', [res.data.length]);
human.env.Canvas = undefined;
config.segmentation = { enabled: true, modelPath: 'https://vladmandic.github.io/human-models/models/rvm.json' };
res = await human.segmentation(inputCanvas, config);
if (res?.shape?.length !== 3) log('error', 'failed: segmentation');
else log('state', 'passed: segmentation', [res.size]);
human.tf.dispose(res);
config.segmentation = { enabled: false };
human.env.Canvas = null; // disable canvas monkey-patch
// check if all instances reported same
const tensors1 = human.tf.engine().state.numTensors;

File diff suppressed because it is too large Load Diff

View File

@ -52,7 +52,7 @@
"tabSize": 2
},
"exclude": ["node_modules/", "types/", "dist/**/*.js"],
"include": ["src", "tfjs/*.ts", "types/human.d.ts", "test/**/*.ts", "demo/**/*.ts"],
"include": ["src", "tfjs/*.ts", "types/human.d.ts", "test/**/*.ts", "demo/**/*.ts", "demo/segmentation/index.js", "demo/index.js"],
"typedocOptions": {
"externalPattern": ["node_modules/", "tfjs/"]
}

2
wiki

@ -1 +1 @@
Subproject commit 7ea124ad02f27fa74241e5bfc6f34ceab1062de5
Subproject commit b6432fc419f6ee9aaf36e94b6be21930edb50bc0