diff --git a/CHANGELOG.md b/CHANGELOG.md index 42e3fcac..8d2f0fdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,10 @@ ## Changelog -### **HEAD -> main** 2021/09/20 mandic00@live.com +### **HEAD -> main** 2021/09/21 mandic00@live.com + + +### **origin/main** 2021/09/20 mandic00@live.com - support for dynamic backend switching - initial automated browser tests diff --git a/demo/index.html b/demo/index.html index 597b460a..eccba554 100644 --- a/demo/index.html +++ b/demo/index.html @@ -35,7 +35,7 @@ .video { display: none; } .canvas { margin: 0 auto; } .bench { position: absolute; right: 0; bottom: 0; } - .compare-image { width: 200px; position: absolute; top: 150px; left: 30px; box-shadow: 0 0 2px 2px black; background: black; display: none; } + .compare-image { width: 256px; position: absolute; top: 150px; left: 30px; box-shadow: 0 0 2px 2px black; background: black; display: none; } .loader { width: 300px; height: 300px; border: 3px solid transparent; border-radius: 50%; border-top: 4px solid #f15e41; animation: spin 4s linear infinite; position: absolute; bottom: 15%; left: 50%; margin-left: -150px; z-index: 15; } .loader::before, .loader::after { content: ""; position: absolute; top: 6px; bottom: 6px; left: 6px; right: 6px; border-radius: 50%; border: 4px solid transparent; } .loader::before { border-top-color: #bad375; animation: 3s spin linear infinite; } @@ -107,9 +107,13 @@
- +
+
+ + +
diff --git a/demo/index.js b/demo/index.js index 33f28115..fa56ba26 100644 --- a/demo/index.js +++ b/demo/index.js @@ -51,8 +51,8 @@ let userConfig = { }, object: { enabled: false }, gesture: { enabled: true }, - // hand: { enabled: true, landmarks: false, maxDetected: 3, minConfidence: 0.1 }, - hand: { enabled: true, maxDetected: 3, minConfidence: 0.3, detector: { modelPath: 'handtrack.json' } }, + hand: { enabled: false }, + // hand: { enabled: true, maxDetected: 1, minConfidence: 0.5, detector: { modelPath: 'handtrack.json' } }, body: { enabled: false }, // body: { enabled: true, modelPath: 'movenet-multipose.json' }, // body: { enabled: true, modelPath: 'posenet.json' }, @@ -241,8 +241,20 @@ async function drawResults(input) { // draw fps chart await menu.process.updateChart('FPS', ui.detectFPS); + document.getElementById('segmentation-container').style.display = userConfig.segmentation.enabled ? 'block' : 'none'; if (userConfig.segmentation.enabled && ui.buffered) { // refresh segmentation if using buffered output - result.canvas = await human.segmentation(input, ui.background, userConfig); + const seg = await human.segmentation(input, ui.background); + if (seg.alpha) { + let c = document.getElementById('segmentation-mask'); + let ctx = c.getContext('2d'); + ctx.clearRect(0, 0, c.width, c.height); // need to clear as seg.alpha is alpha based canvas so it adds + ctx.drawImage(seg.alpha, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, c.width, c.height); + c = document.getElementById('segmentation-canvas'); + ctx = c.getContext('2d'); + ctx.clearRect(0, 0, c.width, c.height); // need to clear as seg.alpha is alpha based canvas so it adds + ctx.drawImage(seg.canvas, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, c.width, c.height); + } + // result.canvas = seg.alpha; } else if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas const image = await human.image(input); result.canvas = image.canvas; @@ -825,14 +837,14 @@ async function processDataURL(f, action) { if (document.getElementById('canvas').style.display === 'block') { // replace canvas used for video const canvas = document.getElementById('canvas'); const ctx = canvas.getContext('2d'); - const overlaid = await human.segmentation(canvas, ui.background, userConfig); - if (overlaid) ctx.drawImage(overlaid, 0, 0); + const seg = await human.segmentation(canvas, ui.background, userConfig); + if (seg.canvas) ctx.drawImage(seg.canvas, 0, 0); } else { const canvases = document.getElementById('samples-container').children; // replace loaded images for (const canvas of canvases) { const ctx = canvas.getContext('2d'); - const overlaid = await human.segmentation(canvas, ui.background, userConfig); - if (overlaid) ctx.drawImage(overlaid, 0, 0); + const seg = await human.segmentation(canvas, ui.background, userConfig); + if (seg.canvas) ctx.drawImage(seg.canvas, 0, 0); } } }; diff --git a/src/config.ts b/src/config.ts index 2db40193..b0c7694f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -118,10 +118,12 @@ export interface ObjectConfig { * * - enabled: true/false * - modelPath: object detection model, can be absolute path or relative to modelBasePath + * - blur: blur segmentation output by pixels for more realistic image */ export interface SegmentationConfig { enabled: boolean, modelPath: string, + blur: number, } /** Run input through image filters before inference @@ -399,6 +401,7 @@ const config: Config = { // remove background or replace it with user-provided background modelPath: 'selfie.json', // experimental: object detection model, can be absolute path or relative to modelBasePath // can be 'selfie' or 'meet' + blur: 8, // blur segmentation output by n pixels for more realistic image }, }; export { config as defaults }; diff --git a/src/handtrack/handtrack.ts b/src/handtrack/handtrack.ts index a46b8f31..cf82da5d 100644 --- a/src/handtrack/handtrack.ts +++ b/src/handtrack/handtrack.ts @@ -12,7 +12,7 @@ import * as fingerPose from '../fingerpose/fingerpose'; const models: [GraphModel | null, GraphModel | null] = [null, null]; const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'StatefulPartitionedCall/Postprocessor/ExpandDims_1']; -const inputSize = [0, 0]; +const inputSize = [[0, 0], [0, 0]]; const classes = [ 'hand', @@ -36,7 +36,15 @@ type HandDetectResult = { yxBox: [number, number, number, number], } -let boxes: Array = []; +const cache: { + handBoxes: Array, + fingerBoxes: Array + tmpBoxes: Array +} = { + handBoxes: [], + fingerBoxes: [], + tmpBoxes: [], +}; const fingerMap = { thumb: [1, 2, 3, 4], @@ -55,14 +63,16 @@ export async function load(config: Config): Promise<[GraphModel, GraphModel]> { if (!models[0]) { models[0] = await tf.loadGraphModel(join(config.modelBasePath, config.hand.detector?.modelPath || '')) as unknown as GraphModel; const inputs = Object.values(models[0].modelSignature['inputs']); - inputSize[0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; + inputSize[0][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0; + inputSize[0][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; if (!models[0] || !models[0]['modelUrl']) log('load model failed:', config.object.modelPath); else if (config.debug) log('load model:', models[0]['modelUrl']); } else if (config.debug) log('cached model:', models[0]['modelUrl']); if (!models[1]) { models[1] = await tf.loadGraphModel(join(config.modelBasePath, config.hand.skeleton?.modelPath || '')) as unknown as GraphModel; const inputs = Object.values(models[1].modelSignature['inputs']); - inputSize[1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; + inputSize[1][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0; + inputSize[1][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; if (!models[1] || !models[1]['modelUrl']) log('load model failed:', config.object.modelPath); else if (config.debug) log('load model:', models[1]['modelUrl']); } else if (config.debug) log('cached model:', models[1]['modelUrl']); @@ -73,7 +83,10 @@ async function detectHands(input: Tensor, config: Config): Promise = {}; - t.resize = tf.image.resizeBilinear(input, [240, 320]); // todo: resize with padding + const ratio = (input.shape[2] || 1) / (input.shape[1] || 1); + const height = Math.min(Math.round((input.shape[1] || 0) / 8) * 8, 512); // use dynamic input size but cap at 1024 + const width = Math.round(height * ratio / 8) * 8; + t.resize = tf.image.resizeBilinear(input, [height, width]); // todo: resize with padding t.cast = tf.cast(t.resize, 'int32'); [t.rawScores, t.rawBoxes] = await models[0].executeAsync(t.cast, modelOutputNodes) as Tensor[]; t.boxes = tf.squeeze(t.rawBoxes, [0, 2]); @@ -100,40 +113,36 @@ async function detectHands(input: Tensor, config: Config): Promise tf.dispose(tensor)); Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); + hands.sort((a, b) => b.score - a.score); + if (hands.length > (config.hand.maxDetected || 1)) hands.length = (config.hand.maxDetected || 1); return hands; } -/* -const scaleFact = 1.2; - +const boxScaleFact = 1.5; // hand finger model prefers slighly larger box function updateBoxes(h, keypoints) { - const fingerX = keypoints.map((pt) => pt[0]); - const fingerY = keypoints.map((pt) => pt[1]); - const minX = Math.min(...fingerX); - const maxX = Math.max(...fingerX); - const minY = Math.min(...fingerY); - const maxY = Math.max(...fingerY); + const finger = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all fingers coords + const minmax = [Math.min(...finger[0]), Math.max(...finger[0]), Math.min(...finger[1]), Math.max(...finger[1])]; // find min and max coordinates for x and y of all fingers + const center = [(minmax[0] + minmax[1]) / 2, (minmax[2] + minmax[3]) / 2]; // find center x and y coord of all fingers + const diff = Math.max(center[0] - minmax[0], center[1] - minmax[2], -center[0] + minmax[1], -center[1] + minmax[3]) * boxScaleFact; // largest distance from center in any direction h.box = [ - Math.trunc(minX / scaleFact), - Math.trunc(minY / scaleFact), - Math.trunc(scaleFact * maxX - minX), - Math.trunc(scaleFact * maxY - minY), + Math.trunc(center[0] - diff), + Math.trunc(center[1] - diff), + Math.trunc(2 * diff), + Math.trunc(2 * diff), ] as [number, number, number, number]; - h.bowRaw = [ - h.box / outputSize[0], - h.box / outputSize[1], - h.box / outputSize[0], - h.box / outputSize[1], + h.boxRaw = [ // work backwards + h.box[0] / outputSize[0], + h.box[1] / outputSize[1], + h.box[2] / outputSize[0], + h.box[3] / outputSize[1], ] as [number, number, number, number]; - h.yxBox = [ + h.yxBox = [ // work backwards h.boxRaw[1], h.boxRaw[0], h.boxRaw[3] + h.boxRaw[1], h.boxRaw[2] + h.boxRaw[0], ] as [number, number, number, number]; - return h; } -*/ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config): Promise { const hand: HandResult = { @@ -148,60 +157,64 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) landmarks: {} as HandResult['landmarks'], annotations: {} as HandResult['annotations'], }; - if (!input || !models[1] || !config.hand.landmarks) return hand; - const t: Record = {}; - t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1], inputSize[1]], 'bilinear'); - t.cast = tf.cast(t.crop, 'float32'); - t.div = tf.div(t.cast, 255); - [t.score, t.keypoints] = models[1].execute(t.div) as Tensor[]; - const score = Math.round(100 * (await t.score.data())[0] / 100); - if (score > (config.hand.minConfidence || 0)) { - hand.fingerScore = score; - t.reshaped = tf.reshape(t.keypoints, [-1, 3]); - const rawCoords = await t.reshaped.array() as number[]; - hand.keypoints = (rawCoords as number[]).map((coord) => [ - (h.box[2] * coord[0] / inputSize[1]) + h.box[0], - (h.box[3] * coord[1] / inputSize[1]) + h.box[1], - (h.box[2] + h.box[3]) / 2 / inputSize[1] * coord[2], - ]); - // h = updateBoxes(h, hand.keypoints); // replace detected box with box calculated around keypoints - hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks - for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations - hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null)); + if (!input || !models[1]) return hand; // something is wrong + if (config.hand.landmarks) { + const t: Record = {}; + if (!h.yxBox) return hand; + t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear'); + t.cast = tf.cast(t.crop, 'float32'); + t.div = tf.div(t.cast, 255); + [t.score, t.keypoints] = models[1].execute(t.div) as Tensor[]; + const score = Math.round(100 * (await t.score.data())[0] / 100); + if (score > (config.hand.minConfidence || 0)) { + hand.fingerScore = score; + t.reshaped = tf.reshape(t.keypoints, [-1, 3]); + const rawCoords = await t.reshaped.array() as number[]; + hand.keypoints = (rawCoords as number[]).map((coord) => [ + (h.box[2] * coord[0] / inputSize[1][0]) + h.box[0], + (h.box[3] * coord[1] / inputSize[1][1]) + h.box[1], + (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2], + ]); + updateBoxes(h, hand.keypoints); // replace detected box with box calculated around keypoints + hand.box = h.box; + hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks + for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations + hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null)); + } + cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected } + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); } - Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); return hand; } -let last = 0; export async function predict(input: Tensor, config: Config): Promise { outputSize = [input.shape[2] || 0, input.shape[1] || 0]; - if ((skipped < (config.object.skipFrames || 0)) && config.skipFrame) { - // use cached boxes + let hands: Array = []; + cache.tmpBoxes = []; // clear temp cache + if (!config.hand.landmarks) cache.fingerBoxes = cache.handBoxes; // if hand detection only reset finger boxes cache + if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes skipped++; - const hands: HandResult[] = await Promise.all(boxes.map((hand) => detectFingers(input, hand, config))); - const withFingers = hands.filter((hand) => hand.fingerScore > 0).length; - if (withFingers === last) return hands; + hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache + // console.log('SKIP', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length); + } else { // calculate new boxes and run finger detection + skipped = 0; + hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache + // console.log('CACHE', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length); + if (hands.length !== config.hand.maxDetected) { // run hand detection only if we dont have enough hands in cache + cache.handBoxes = await detectHands(input, config); + const newHands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config))); + hands = hands.concat(newHands); + // console.log('DETECT', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length); + } } - // calculate new boxes - skipped = 0; - boxes = await detectHands(input, config); - const hands: HandResult[] = await Promise.all(boxes.map((hand) => detectFingers(input, hand, config))); - const withFingers = hands.filter((hand) => hand.fingerScore > 0).length; - last = withFingers; - // console.log('NEW', withFingers, hands.length, boxes.length); - return hands; + cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands + return hands as HandResult[]; } /* - - - - -*/ - -/* TODO -- smart resize -- updateboxes is drifting +- Live Site: +- TFJS Port: +- Original: +- Writeup: */ diff --git a/src/human.ts b/src/human.ts index e2ebd5b7..4935c234 100644 --- a/src/human.ts +++ b/src/human.ts @@ -47,7 +47,7 @@ export type Input = Tensor | ImageData | ImageBitmap | HTMLImageElement | HTMLMe * * - `create`: triggered when Human object is instantiated * - `load`: triggered when models are loaded (explicitly or on-demand) - * - `image`: triggered when input image is this.processed + * - `image`: triggered when input image is processed * - `result`: triggered when detection is complete * - `warmup`: triggered when warmup is complete */ @@ -111,7 +111,7 @@ export class Human { * - face: draw detected faces * - body: draw detected people and body parts * - hand: draw detected hands and hand parts - * - canvas: draw this.processed canvas which is a this.processed copy of the input + * - canvas: draw processed canvas which is a processed copy of the input * - all: meta-function that performs: canvas, face, body, hand */ draw: { canvas, face, body, hand, gesture, object, person, all, options: DrawOptions }; @@ -142,7 +142,7 @@ export class Human { * Possible events: * - `create`: triggered when Human object is instantiated * - `load`: triggered when models are loaded (explicitly or on-demand) - * - `image`: triggered when input image is this.processed + * - `image`: triggered when input image is processed * - `result`: triggered when detection is complete * - `warmup`: triggered when warmup is complete * - `error`: triggered on some errors @@ -217,7 +217,7 @@ export class Human { all: (output: HTMLCanvasElement | OffscreenCanvas, result: Result, options?: Partial) => draw.all(output, result, options), }; this.result = { face: [], body: [], hand: [], gesture: [], object: [], performance: {}, timestamp: 0, persons: [] }; - // export access to image this.processing + // export access to image processing // @ts-ignore eslint-typescript cannot correctly infer type in anonymous function this.process = { tensor: null, canvas: null }; // export raw access to underlying models @@ -284,16 +284,21 @@ export class Human { return faceres.similarity(embedding1, embedding2); } - /** Segmentation method takes any input and returns this.processed canvas with body segmentation + /** Segmentation method takes any input and returns processed canvas with body segmentation * - Optional parameter background is used to fill the background with specific input - * - Segmentation is not triggered as part of detect this.process + * - Segmentation is not triggered as part of detect process + * + * Returns: + * - `data` as raw data array with per-pixel segmentation values + * - `canvas` as canvas which is input image filtered with segementation data and optionally merged with background image. canvas alpha values are set to segmentation values for easy merging + * - `alpha` as grayscale canvas that represents segmentation alpha values * * @param input: {@link Input} * @param background?: {@link Input} - * @returns Canvas + * @returns { data, canvas, alpha } */ - async segmentation(input: Input, background?: Input) { - return input ? segmentation.process(input, background, this.config) : null; + async segmentation(input: Input, background?: Input): Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { + return segmentation.process(input, background, this.config); } /** Enhance method performs additional enhacements to face image previously detected for futher processing @@ -394,7 +399,7 @@ export class Human { /** Main detection method * - Analyze configuration: {@link Config} - * - Pre-this.process input: {@link Input} + * - Pre-process input: {@link Input} * - Run inference for all configured models * - Process and return result: {@link Result} * @@ -431,26 +436,24 @@ export class Human { timeStamp = now(); this.state = 'image'; - let img = image.process(input, this.config); + const img = image.process(input, this.config) as { canvas: HTMLCanvasElement | OffscreenCanvas, tensor: Tensor }; this.process = img; this.performance.image = Math.trunc(now() - timeStamp); this.analyze('Get Image:'); - // run segmentation prethis.processing - if (this.config.segmentation.enabled && this.process && img.tensor && img.canvas) { + // segmentation is only run explicitly via human.segmentation() which calls segmentation.process() + /* + if (this.config.segmentation.enabled && process && img.tensor && img.canvas) { this.analyze('Start Segmentation:'); this.state = 'detect:segmentation'; timeStamp = now(); - await segmentation.predict(img); + const seg = await segmentation.predict(img, this.config); + img = { canvas: seg.canvas, tensor: seg.tensor }; elapsedTime = Math.trunc(now() - timeStamp); if (elapsedTime > 0) this.performance.segmentation = elapsedTime; - if (img.canvas) { - // replace input - tf.dispose(img.tensor); - img = image.process(img.canvas, this.config); - } this.analyze('End Segmentation:'); } + */ if (!img.tensor) { if (this.config.debug) log('could not convert input to tensor'); diff --git a/src/segmentation/segmentation.ts b/src/segmentation/segmentation.ts index abea1537..b0bdd5ae 100644 --- a/src/segmentation/segmentation.ts +++ b/src/segmentation/segmentation.ts @@ -23,11 +23,11 @@ export async function load(config: Config): Promise { return model; } -export async function predict(input: { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null }): Promise { - const width = input.tensor?.shape[1] || 0; - const height = input.tensor?.shape[2] || 0; - if (!input.tensor) return null; - if (!model || !model.inputs[0].shape) return null; +export async function predict(input: { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null }, config: Config) +: Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { + const width = input.tensor?.shape[2] || 0; + const height = input.tensor?.shape[1] || 0; + if (!input.tensor || !model || !model.inputs[0].shape) return { data: null, canvas: null, alpha: null }; const resizeInput = tf.image.resizeBilinear(input.tensor, [model.inputs[0].shape[1], model.inputs[0].shape[2]], false); const norm = tf.div(resizeInput, 255); const res = model.predict(norm) as Tensor; @@ -38,7 +38,7 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC const squeeze = tf.squeeze(res, 0); tf.dispose(res); - let resizeOutput; + let dataT; if (squeeze.shape[2] === 2) { // model meet has two channels for fg and bg const softmax = squeeze.softmax(); @@ -52,82 +52,66 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC const crop = tf.image.cropAndResize(pad, [[0, 0, 0.5, 0.5]], [0], [width, height]); // otherwise run softmax after unstack and use standard resize // resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]); - resizeOutput = tf.squeeze(crop, 0); + dataT = tf.squeeze(crop, 0); tf.dispose(crop); tf.dispose(expand); tf.dispose(pad); } else { // model selfie has a single channel that we can use directly - resizeOutput = tf.image.resizeBilinear(squeeze, [width, height]); + dataT = tf.image.resizeBilinear(squeeze, [height, width]); } tf.dispose(squeeze); + const data = await dataT.dataSync(); if (env.node) { - const data = await resizeOutput.data(); - tf.dispose(resizeOutput); - return data; // we're running in nodejs so return alpha array as-is + tf.dispose(dataT); + return { data, canvas: null, alpha: null }; // running in nodejs so return alpha array as-is } - const overlay = image.canvas(width, height); - if (tf.browser) await tf.browser.toPixels(resizeOutput, overlay); - tf.dispose(resizeOutput); - - // get alpha channel data const alphaCanvas = image.canvas(width, height); - const ctxAlpha = alphaCanvas.getContext('2d') as CanvasRenderingContext2D; - ctxAlpha.filter = 'blur(8px'; - await ctxAlpha.drawImage(overlay, 0, 0); - const alpha = ctxAlpha.getImageData(0, 0, width, height).data; + await tf.browser.toPixels(dataT, alphaCanvas); + tf.dispose(dataT); + const alphaCtx = alphaCanvas.getContext('2d') as CanvasRenderingContext2D; + if (config.segmentation.blur && config.segmentation.blur > 0) alphaCtx.filter = `blur(${config.segmentation.blur}px)`; // use css filter for bluring, can be done with gaussian blur manually instead + const alphaData = alphaCtx.getImageData(0, 0, width, height); - // get original canvas merged with overlay - const original = image.canvas(width, height); - const ctx = original.getContext('2d') as CanvasRenderingContext2D; - if (input.canvas) await ctx.drawImage(input.canvas, 0, 0); - // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/globalCompositeOperation // best options are: darken, color-burn, multiply - ctx.globalCompositeOperation = 'darken'; - ctx.filter = 'blur(8px)'; // use css filter for bluring, can be done with gaussian blur manually instead - await ctx.drawImage(overlay, 0, 0); - ctx.globalCompositeOperation = 'source-over'; // reset - ctx.filter = 'none'; // reset + // original canvas where only alpha shows + const compositeCanvas = image.canvas(width, height); + const compositeCtx = compositeCanvas.getContext('2d') as CanvasRenderingContext2D; + if (input.canvas) compositeCtx.drawImage(input.canvas, 0, 0); + compositeCtx.globalCompositeOperation = 'darken'; // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/globalCompositeOperation // best options are: darken, color-burn, multiply + if (config.segmentation.blur && config.segmentation.blur > 0) compositeCtx.filter = `blur(${config.segmentation.blur}px)`; // use css filter for bluring, can be done with gaussian blur manually instead + compositeCtx.drawImage(alphaCanvas, 0, 0); + compositeCtx.globalCompositeOperation = 'source-over'; // reset composite operation + compositeCtx.filter = 'none'; // reset css filter + const compositeData = compositeCtx.getImageData(0, 0, width, height); + for (let i = 0; i < width * height; i++) compositeData.data[4 * i + 3] = alphaData.data[4 * i + 0]; // copy original alpha value to new composite canvas + compositeCtx.putImageData(compositeData, 0, 0); - input.canvas = original; - - return alpha; + return { data, canvas: compositeCanvas, alpha: alphaCanvas }; } -export async function process(input: Input, background: Input | undefined, config: Config): Promise { - if (busy) return null; +export async function process(input: Input, background: Input | undefined, config: Config) +: Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { + if (busy) return { data: null, canvas: null, alpha: null }; busy = true; if (!model) await load(config); - const img = image.process(input, config); - const tmp = image.process(background, config); - if (!img.canvas || !tmp.canvas) { - if (config.debug) log('segmentation cannot process input or background'); - return null; + const inputImage = image.process(input, config); + const segmentation = await predict(inputImage, config); + tf.dispose(inputImage.tensor); + let mergedCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; + + if (background && segmentation.canvas) { // draw background with segmentation as overlay if background is present + mergedCanvas = image.canvas(inputImage.canvas?.width || 0, inputImage.canvas?.height || 0); + const bgImage = image.process(background, config); + tf.dispose(bgImage.tensor); + const ctxMerge = mergedCanvas.getContext('2d') as CanvasRenderingContext2D; + // ctxMerge.globalCompositeOperation = 'source-over'; + ctxMerge.drawImage(bgImage.canvas as HTMLCanvasElement, 0, 0, mergedCanvas.width, mergedCanvas.height); + // ctxMerge.globalCompositeOperation = 'source-atop'; + ctxMerge.drawImage(segmentation.canvas as HTMLCanvasElement, 0, 0); + // ctxMerge.globalCompositeOperation = 'source-over'; } - const alpha = await predict(img); - tf.dispose(img.tensor); - if (background && alpha) { - const bg = tmp.canvas as HTMLCanvasElement; - tf.dispose(tmp.tensor); - const fg = img.canvas as HTMLCanvasElement; - const fgData = fg.getContext('2d')?.getImageData(0, 0, fg.width, fg.height).data as Uint8ClampedArray; - - const c = image.canvas(fg.width, fg.height); - const ctx = c.getContext('2d') as CanvasRenderingContext2D; - - ctx.globalCompositeOperation = 'copy'; // reset - ctx.drawImage(bg, 0, 0, c.width, c.height); - const cData = ctx.getImageData(0, 0, c.width, c.height) as ImageData; - for (let i = 0; i < c.width * c.height; i++) { // this should be done with globalCompositeOperation instead of looping through image data - cData.data[4 * i + 0] = ((255 - alpha[4 * i + 0]) / 255.0 * cData.data[4 * i + 0]) + (alpha[4 * i + 0] / 255.0 * fgData[4 * i + 0]); - cData.data[4 * i + 1] = ((255 - alpha[4 * i + 1]) / 255.0 * cData.data[4 * i + 1]) + (alpha[4 * i + 1] / 255.0 * fgData[4 * i + 1]); - cData.data[4 * i + 2] = ((255 - alpha[4 * i + 2]) / 255.0 * cData.data[4 * i + 2]) + (alpha[4 * i + 2] / 255.0 * fgData[4 * i + 2]); - cData.data[4 * i + 3] = ((255 - alpha[4 * i + 3]) / 255.0 * cData.data[4 * i + 3]) + (alpha[4 * i + 3] / 255.0 * fgData[4 * i + 3]); - } - ctx.putImageData(cData, 0, 0); - img.canvas = c; - } busy = false; - return img.canvas; + return { data: segmentation.data, canvas: mergedCanvas || segmentation.canvas, alpha: segmentation.alpha }; } diff --git a/wiki b/wiki index b24eafa2..d293f4a2 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit b24eafa265bda331788e0d36cf5c854a494e33d6 +Subproject commit d293f4a20b640e6bc8485dc0f8a2c2147ce33073