diff --git a/config.js b/config.js index 9ac1d3b9..f7d87a4a 100644 --- a/config.js +++ b/config.js @@ -56,7 +56,7 @@ export default { skipFrames: 10, // how many frames to go without re-running the face bounding box detector, only used for video inputs // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) - minConfidence: 0.5, // threshold for discarding a prediction + minConfidence: 0.3, // threshold for discarding a prediction iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression scoreThreshold: 0.5, // threshold for deciding when to remove boxes based on score in non-maximum suppression }, @@ -74,21 +74,21 @@ export default { age: { enabled: true, modelPath: '../models/ssrnet-age-imdb.json', // can be 'imdb' or 'wiki' - // which determines training set for model + // which determines training set for model inputSize: 64, // fixed value skipFrames: 10, // how many frames to go without re-running the detector, only used for video inputs }, gender: { enabled: true, - minConfidence: 0.8, // threshold for discarding a prediction + minConfidence: 0.3, // threshold for discarding a prediction modelPath: '../models/ssrnet-gender-imdb.json', }, emotion: { enabled: true, - inputSize: 64, // fixed value - minConfidence: 0.5, // threshold for discarding a prediction + inputSize: 64, // fixed value, 64 for 'mini' and 'lage', 48 for 'cnn' + minConfidence: 0.3, // threshold for discarding a prediction skipFrames: 10, // how many frames to go without re-running the detector - modelPath: '../models/emotion.json', + modelPath: '../models/emotion-large.json', // can be 'mini', 'large' or 'cnn' }, }, body: { @@ -106,7 +106,7 @@ export default { skipFrames: 10, // how many frames to go without re-running the hand bounding box detector, only used for video inputs // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis // as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) - minConfidence: 0.5, // threshold for discarding a prediction + minConfidence: 0.3, // threshold for discarding a prediction iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression scoreThreshold: 0.5, // threshold for deciding when to remove boxes based on score in non-maximum suppression enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace diff --git a/demo/browser.js b/demo/browser.js index bb7cd8dd..3d9636bb 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -10,7 +10,7 @@ const ui = { baseBackground: 'rgba(50, 50, 50, 1)', // 'grey' baseLabel: 'rgba(173, 216, 230, 0.9)', // 'lightblue' with dark alpha channel baseFontProto: 'small-caps {size} "Segoe UI"', - baseLineWidth: 16, + baseLineWidth: 12, baseLineHeightProto: 2, columns: 2, busy: false, @@ -55,15 +55,15 @@ const config = { videoOptimized: true, face: { enabled: true, - detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 }, + detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.3, iouThreshold: 0.3, scoreThreshold: 0.5 }, mesh: { enabled: true }, iris: { enabled: true }, age: { enabled: true, skipFrames: 10 }, gender: { enabled: true }, - emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true }, + emotion: { enabled: true, minConfidence: 0.3, useGrayscale: true }, }, body: { enabled: true, maxDetections: 10, scoreThreshold: 0.5, nmsRadius: 20 }, - hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 }, + hand: { enabled: true, skipFrames: 10, minConfidence: 0.3, iouThreshold: 0.3, scoreThreshold: 0.5 }, gesture: { enabled: true }, }; @@ -149,7 +149,7 @@ async function setupCamera() { const output = document.getElementById('log'); const live = video.srcObject ? ((video.srcObject.getVideoTracks()[0].readyState === 'live') && (video.readyState > 2) && (!video.paused)) : false; let msg = ''; - status('starting camera'); + status('setting up camera'); // setup webcam. note that navigator.mediaDevices requires that page is accessed via https if (!navigator.mediaDevices) { msg = 'camera access not supported'; @@ -179,9 +179,7 @@ async function setupCamera() { else return null; const track = stream.getVideoTracks()[0]; const settings = track.getSettings(); - log('camera constraints:', constraints, 'window:', { width: window.innerWidth, height: window.innerHeight }); - log('camera settings:', settings); - log('camera track:', track); + log('camera constraints:', constraints, 'window:', { width: window.innerWidth, height: window.innerHeight }, 'settings:', settings, 'track:', track); camera = { name: track.label, width: settings.width, height: settings.height, facing: settings.facingMode === 'user' ? 'front' : 'back' }; return new Promise((resolve) => { video.onloadeddata = async () => { @@ -193,6 +191,7 @@ async function setupCamera() { ui.busy = false; // do once more because onresize events can be delayed or skipped // if (video.width > window.innerWidth) await setupCamera(); + status(''); resolve(video); }; }); @@ -222,32 +221,29 @@ function webWorker(input, image, canvas) { // main processing function when input is webcam, can use direct invocation or web worker function runHumanDetect(input, canvas) { timeStamp = performance.now(); - // perform detect if live video or not video at all - if (input.srcObject) { - // if video not ready, just redo - const live = (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused); - if (!live) { - if (!input.paused) { - log(`video not ready: state: ${input.srcObject.getVideoTracks()[0].readyState} stream state: ${input.readyState}`); - setTimeout(() => runHumanDetect(input, canvas), 500); - } - return; - } - if (ui.useWorker) { - // get image data from video as we cannot send html objects to webworker - const offscreen = new OffscreenCanvas(canvas.width, canvas.height); - const ctx = offscreen.getContext('2d'); - ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height); - const data = ctx.getImageData(0, 0, canvas.width, canvas.height); - // perform detection in worker - webWorker(input, data, canvas); - } else { - human.detect(input, config).then((result) => { - if (result.error) log(result.error); - else drawResults(input, result, canvas); - if (config.profile) log('profile data:', human.profile()); - }); - } + // if live video + const live = input.srcObject && (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused); + if (!live) { + // if we want to continue and camera not ready, retry in 0.5sec, else just give up + if ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState <= 2)) setTimeout(() => runHumanDetect(input, canvas), 500); + else log(`camera not ready: track state: ${input.srcObject?.getVideoTracks()[0].readyState} stream state: ${input.readyState}`); + return; + } + status(''); + if (ui.useWorker) { + // get image data from video as we cannot send html objects to webworker + const offscreen = new OffscreenCanvas(canvas.width, canvas.height); + const ctx = offscreen.getContext('2d'); + ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height); + const data = ctx.getImageData(0, 0, canvas.width, canvas.height); + // perform detection in worker + webWorker(input, data, canvas); + } else { + human.detect(input, config).then((result) => { + if (result.error) log(result.error); + else drawResults(input, result, canvas); + if (config.profile) log('profile data:', human.profile()); + }); } } @@ -286,7 +282,8 @@ async function detectVideo() { document.getElementById('canvas').style.display = 'block'; const video = document.getElementById('video'); const canvas = document.getElementById('canvas'); - ui.baseFont = ui.baseFontProto.replace(/{size}/, '1.3rem'); + const size = 12 + Math.trunc(window.innerWidth / 400); + ui.baseFont = ui.baseFontProto.replace(/{size}/, `${size}px`); ui.baseLineHeight = ui.baseLineHeightProto; if ((video.srcObject !== null) && !video.paused) { document.getElementById('play').style.display = 'block'; @@ -305,7 +302,8 @@ async function detectVideo() { async function detectSampleImages() { document.getElementById('play').style.display = 'none'; config.videoOptimized = false; - ui.baseFont = ui.baseFontProto.replace(/{size}/, `${1.3 * ui.columns}rem`); + const size = Math.trunc(ui.columns * 25600 / window.innerWidth); + ui.baseFont = ui.baseFontProto.replace(/{size}/, `${size}px`); ui.baseLineHeight = ui.baseLineHeightProto * ui.columns; document.getElementById('canvas').style.display = 'none'; document.getElementById('samples-container').style.display = 'block'; diff --git a/demo/index.html b/demo/index.html index 7e608089..e1a3b8e7 100644 --- a/demo/index.html +++ b/demo/index.html @@ -35,7 +35,7 @@ .log { position: fixed; bottom: 0; margin: 0.4rem; font-size: 0.9rem; } .samples-container { display: flex; flex-wrap: wrap; } .video { display: none; } - .canvas { margin: 0 auto; height: 100%; } + .canvas { margin: 0 auto; height: 100vh; } .loader { width: 300px; height: 300px; border: 3px solid transparent; border-radius: 50%; border-top: 4px solid #f15e41; animation: spin 4s linear infinite; position: absolute; top: 30%; left: 50%; margin-left: -150px; z-index: 15; } .loader::before, .loader::after { content: ""; position: absolute; top: 6px; bottom: 6px; left: 6px; right: 6px; border-radius: 50%; border: 4px solid transparent; } .loader::before { border-top-color: #bad375; animation: 3s spin linear infinite; } diff --git a/models/emotion-large.bin b/models/emotion-large.bin new file mode 100644 index 00000000..94746d64 Binary files /dev/null and b/models/emotion-large.bin differ diff --git a/models/emotion-large.json b/models/emotion-large.json new file mode 100644 index 00000000..60f9baad --- /dev/null +++ b/models/emotion-large.json @@ -0,0 +1,76 @@ +{ + "format": "graph-model", + "generatedBy": "2.3.1", + "convertedBy": "TensorFlow.js Converter v2.7.0", + "userDefinedMetadata": + { + "signature": + { + "inputs": {"input_1:0":{"name":"input_1:0","dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"-1"},{"size":"64"},{"size":"64"},{"size":"1"}]}}}, + "outputs": {"Identity:0":{"name":"Identity:0","dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"-1"},{"size":"7"}]}}} + } + }, + "modelTopology": + { + "node": + [ + {"name":"unknown_26","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"3"},{"size":"3"},{"size":"128"},{"size":"1"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"unknown_32","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"3"},{"size":"3"},{"size":"256"},{"size":"1"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"unknown_9","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"3"},{"size":"3"},{"size":"64"},{"size":"1"}]}}}}}, + {"name":"unknown_15","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"3"},{"size":"3"},{"size":"128"},{"size":"1"}]}}}}}, + {"name":"unknown_43","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"3"},{"size":"3"},{"size":"256"},{"size":"7"}]}}}}}, + {"name":"unknown_44","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"7"}]}}}}}, + {"name":"StatefulPartitionedCall/model_1/global_average_pooling2d_1/Mean/reduction_indices","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_INT32","tensorShape":{"dim":[{"size":"2"}]}}},"dtype":{"type":"DT_INT32"}}}, + {"name":"input_1","op":"Placeholder","attr":{"shape":{"shape":{"dim":[{"size":"-1"},{"size":"64"},{"size":"64"},{"size":"1"}]}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_1/Conv2D_weights","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"3"},{"size":"3"},{"size":"1"},{"size":"32"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_1/Conv2D_bn_offset","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"32"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_2/Conv2D_weights","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"3"},{"size":"3"},{"size":"32"},{"size":"64"}]}}}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_weights","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"256"},{"size":"256"}]}}}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_2/Conv2D_bn_offset","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"64"}]}}}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_3/Conv2D_weights","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"64"},{"size":"128"}]}}}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_bn_offset","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"256"}]}}}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_3/Conv2D_bn_offset","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"128"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_weights","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"64"},{"size":"128"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_bn_offset","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"128"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_weights","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"128"},{"size":"128"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_bn_offset","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"128"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_4/Conv2D_weights","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"128"},{"size":"256"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_4/Conv2D_bn_offset","op":"Const","attr":{"dtype":{"type":"DT_FLOAT"},"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"256"}]}}}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_weights","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"128"},{"size":"256"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_bn_offset","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"256"}]}}},"dtype":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/block1_conv1_act/Relu","op":"_FusedConv2D","input":["input_1","StatefulPartitionedCall/model_1/conv2d_1/Conv2D_weights","StatefulPartitionedCall/model_1/conv2d_1/Conv2D_bn_offset"],"device":"/device:CPU:0","attr":{"fused_ops":{"list":{"s":["Qmlhc0FkZA==","UmVsdQ=="]}},"T":{"type":"DT_FLOAT"},"data_format":{"s":"TkhXQw=="},"use_cudnn_on_gpu":{"b":true},"num_args":{"i":"1"},"padding":{"s":"VkFMSUQ="},"dilations":{"list":{"i":["1","1","1","1"]}},"strides":{"list":{"i":["1","2","2","1"]}},"explicit_paddings":{"list":{}},"epsilon":{"f":0}}}, + {"name":"StatefulPartitionedCall/model_1/block1_conv2_act/Relu","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/block1_conv1_act/Relu","StatefulPartitionedCall/model_1/conv2d_2/Conv2D_weights","StatefulPartitionedCall/model_1/conv2d_2/Conv2D_bn_offset"],"device":"/device:CPU:0","attr":{"dilations":{"list":{"i":["1","1","1","1"]}},"epsilon":{"f":0},"padding":{"s":"VkFMSUQ="},"fused_ops":{"list":{"s":["Qmlhc0FkZA==","UmVsdQ=="]}},"strides":{"list":{"i":["1","1","1","1"]}},"T":{"type":"DT_FLOAT"},"num_args":{"i":"1"},"explicit_paddings":{"list":{}},"use_cudnn_on_gpu":{"b":true},"data_format":{"s":"TkhXQw=="}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d/depthwise","op":"DepthwiseConv2dNative","input":["StatefulPartitionedCall/model_1/block1_conv2_act/Relu","unknown_9"],"attr":{"dilations":{"list":{"i":["1","1","1","1"]}},"explicit_paddings":{"list":{}},"T":{"type":"DT_FLOAT"},"data_format":{"s":"TkhXQw=="},"strides":{"list":{"i":["1","1","1","1"]}},"padding":{"s":"U0FNRQ=="}}}, + {"name":"StatefulPartitionedCall/model_1/batch_normalization_1/FusedBatchNormV3","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/block1_conv2_act/Relu","StatefulPartitionedCall/model_1/conv2d_3/Conv2D_weights","StatefulPartitionedCall/model_1/conv2d_3/Conv2D_bn_offset"],"device":"/device:CPU:0","attr":{"explicit_paddings":{"list":{}},"dilations":{"list":{"i":["1","1","1","1"]}},"use_cudnn_on_gpu":{"b":true},"num_args":{"i":"1"},"epsilon":{"f":0},"data_format":{"s":"TkhXQw=="},"fused_ops":{"list":{"s":["Qmlhc0FkZA=="]}},"strides":{"list":{"i":["1","2","2","1"]}},"padding":{"s":"U0FNRQ=="},"T":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/block2_sepconv2_act/Relu","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d/depthwise","StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_weights","StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_bn_offset"],"device":"/device:CPU:0","attr":{"padding":{"s":"VkFMSUQ="},"dilations":{"list":{"i":["1","1","1","1"]}},"T":{"type":"DT_FLOAT"},"strides":{"list":{"i":["1","1","1","1"]}},"use_cudnn_on_gpu":{"b":true},"num_args":{"i":"1"},"explicit_paddings":{"list":{}},"fused_ops":{"list":{"s":["Qmlhc0FkZA==","UmVsdQ=="]}},"epsilon":{"f":0},"data_format":{"s":"TkhXQw=="}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d/depthwise","op":"DepthwiseConv2dNative","input":["StatefulPartitionedCall/model_1/block2_sepconv2_act/Relu","unknown_15"],"attr":{"strides":{"list":{"i":["1","1","1","1"]}},"explicit_paddings":{"list":{}},"T":{"type":"DT_FLOAT"},"dilations":{"list":{"i":["1","1","1","1"]}},"padding":{"s":"U0FNRQ=="},"data_format":{"s":"TkhXQw=="}}}, + {"name":"StatefulPartitionedCall/model_1/block2_sepconv2_bn/FusedBatchNormV3","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d/depthwise","StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_weights","StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_bn_offset"],"device":"/device:CPU:0","attr":{"data_format":{"s":"TkhXQw=="},"padding":{"s":"VkFMSUQ="},"explicit_paddings":{"list":{}},"fused_ops":{"list":{"s":["Qmlhc0FkZA=="]}},"T":{"type":"DT_FLOAT"},"epsilon":{"f":0},"use_cudnn_on_gpu":{"b":true},"num_args":{"i":"1"},"strides":{"list":{"i":["1","1","1","1"]}},"dilations":{"list":{"i":["1","1","1","1"]}}}}, + {"name":"StatefulPartitionedCall/model_1/max_pooling2d_1/MaxPool","op":"MaxPool","input":["StatefulPartitionedCall/model_1/block2_sepconv2_bn/FusedBatchNormV3"],"attr":{"strides":{"list":{"i":["1","2","2","1"]}},"data_format":{"s":"TkhXQw=="},"padding":{"s":"U0FNRQ=="},"T":{"type":"DT_FLOAT"},"ksize":{"list":{"i":["1","3","3","1"]}}}}, + {"name":"StatefulPartitionedCall/model_1/add_1/add","op":"AddV2","input":["StatefulPartitionedCall/model_1/max_pooling2d_1/MaxPool","StatefulPartitionedCall/model_1/batch_normalization_1/FusedBatchNormV3"],"attr":{"T":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/block3_sepconv1_act/Relu","op":"Relu","input":["StatefulPartitionedCall/model_1/add_1/add"],"attr":{"T":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/batch_normalization_2/FusedBatchNormV3","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/add_1/add","StatefulPartitionedCall/model_1/conv2d_4/Conv2D_weights","StatefulPartitionedCall/model_1/conv2d_4/Conv2D_bn_offset"],"device":"/device:CPU:0","attr":{"epsilon":{"f":0},"data_format":{"s":"TkhXQw=="},"fused_ops":{"list":{"s":["Qmlhc0FkZA=="]}},"T":{"type":"DT_FLOAT"},"num_args":{"i":"1"},"strides":{"list":{"i":["1","2","2","1"]}},"use_cudnn_on_gpu":{"b":true},"explicit_paddings":{"list":{}},"dilations":{"list":{"i":["1","1","1","1"]}},"padding":{"s":"U0FNRQ=="}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d/depthwise","op":"DepthwiseConv2dNative","input":["StatefulPartitionedCall/model_1/block3_sepconv1_act/Relu","unknown_26"],"attr":{"padding":{"s":"U0FNRQ=="},"T":{"type":"DT_FLOAT"},"dilations":{"list":{"i":["1","1","1","1"]}},"data_format":{"s":"TkhXQw=="},"explicit_paddings":{"list":{}},"strides":{"list":{"i":["1","1","1","1"]}}}}, + {"name":"StatefulPartitionedCall/model_1/block3_sepconv2_act/Relu","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d/depthwise","StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_weights","StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_bn_offset"],"device":"/device:CPU:0","attr":{"data_format":{"s":"TkhXQw=="},"fused_ops":{"list":{"s":["Qmlhc0FkZA==","UmVsdQ=="]}},"T":{"type":"DT_FLOAT"},"explicit_paddings":{"list":{}},"use_cudnn_on_gpu":{"b":true},"num_args":{"i":"1"},"dilations":{"list":{"i":["1","1","1","1"]}},"padding":{"s":"VkFMSUQ="},"strides":{"list":{"i":["1","1","1","1"]}},"epsilon":{"f":0}}}, + {"name":"StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d/depthwise","op":"DepthwiseConv2dNative","input":["StatefulPartitionedCall/model_1/block3_sepconv2_act/Relu","unknown_32"],"attr":{"explicit_paddings":{"list":{}},"padding":{"s":"U0FNRQ=="},"strides":{"list":{"i":["1","1","1","1"]}},"T":{"type":"DT_FLOAT"},"dilations":{"list":{"i":["1","1","1","1"]}},"data_format":{"s":"TkhXQw=="}}}, + {"name":"StatefulPartitionedCall/model_1/block3_sepconv2_bn/FusedBatchNormV3","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d/depthwise","StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_weights","StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_bn_offset"],"device":"/device:CPU:0","attr":{"num_args":{"i":"1"},"strides":{"list":{"i":["1","1","1","1"]}},"fused_ops":{"list":{"s":["Qmlhc0FkZA=="]}},"data_format":{"s":"TkhXQw=="},"epsilon":{"f":0},"explicit_paddings":{"list":{}},"padding":{"s":"VkFMSUQ="},"use_cudnn_on_gpu":{"b":true},"T":{"type":"DT_FLOAT"},"dilations":{"list":{"i":["1","1","1","1"]}}}}, + {"name":"StatefulPartitionedCall/model_1/max_pooling2d_2/MaxPool","op":"MaxPool","input":["StatefulPartitionedCall/model_1/block3_sepconv2_bn/FusedBatchNormV3"],"attr":{"strides":{"list":{"i":["1","2","2","1"]}},"ksize":{"list":{"i":["1","3","3","1"]}},"padding":{"s":"U0FNRQ=="},"data_format":{"s":"TkhXQw=="},"T":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/add_2/add","op":"AddV2","input":["StatefulPartitionedCall/model_1/max_pooling2d_2/MaxPool","StatefulPartitionedCall/model_1/batch_normalization_2/FusedBatchNormV3"],"attr":{"T":{"type":"DT_FLOAT"}}}, + {"name":"StatefulPartitionedCall/model_1/conv2d_5/BiasAdd","op":"_FusedConv2D","input":["StatefulPartitionedCall/model_1/add_2/add","unknown_43","unknown_44"],"device":"/device:CPU:0","attr":{"explicit_paddings":{"list":{}},"use_cudnn_on_gpu":{"b":true},"epsilon":{"f":0},"padding":{"s":"U0FNRQ=="},"num_args":{"i":"1"},"dilations":{"list":{"i":["1","1","1","1"]}},"fused_ops":{"list":{"s":["Qmlhc0FkZA=="]}},"strides":{"list":{"i":["1","1","1","1"]}},"T":{"type":"DT_FLOAT"},"data_format":{"s":"TkhXQw=="}}}, + {"name":"StatefulPartitionedCall/model_1/global_average_pooling2d_1/Mean","op":"Mean","input":["StatefulPartitionedCall/model_1/conv2d_5/BiasAdd","StatefulPartitionedCall/model_1/global_average_pooling2d_1/Mean/reduction_indices"],"attr":{"Tidx":{"type":"DT_INT32"},"T":{"type":"DT_FLOAT"},"keep_dims":{"b":false}}}, + {"name":"StatefulPartitionedCall/model_1/predictions/Softmax","op":"Softmax","input":["StatefulPartitionedCall/model_1/global_average_pooling2d_1/Mean"],"attr":{"T":{"type":"DT_FLOAT"}}}, + {"name":"Identity","op":"Identity","input":["StatefulPartitionedCall/model_1/predictions/Softmax"],"attr":{"T":{"type":"DT_FLOAT"}}} + ], + "library": {}, + "versions": + { + "producer": 440 + } + }, + "weightsManifest": + [ + { + "paths": ["emotion-large.bin"], + "weights": [{"name":"unknown_26","shape":[3,3,128,1],"dtype":"float32"},{"name":"unknown_32","shape":[3,3,256,1],"dtype":"float32"},{"name":"unknown_9","shape":[3,3,64,1],"dtype":"float32"},{"name":"unknown_15","shape":[3,3,128,1],"dtype":"float32"},{"name":"unknown_43","shape":[3,3,256,7],"dtype":"float32"},{"name":"unknown_44","shape":[7],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/global_average_pooling2d_1/Mean/reduction_indices","shape":[2],"dtype":"int32"},{"name":"StatefulPartitionedCall/model_1/conv2d_1/Conv2D_weights","shape":[3,3,1,32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_1/Conv2D_bn_offset","shape":[32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_2/Conv2D_weights","shape":[3,3,32,64],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_weights","shape":[1,1,256,256],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_2/Conv2D_bn_offset","shape":[64],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_3/Conv2D_weights","shape":[1,1,64,128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_bn_offset","shape":[256],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_3/Conv2D_bn_offset","shape":[128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_weights","shape":[1,1,64,128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_bn_offset","shape":[128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_weights","shape":[1,1,128,128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_bn_offset","shape":[128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_4/Conv2D_weights","shape":[1,1,128,256],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_4/Conv2D_bn_offset","shape":[256],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_weights","shape":[1,1,128,256],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_bn_offset","shape":[256],"dtype":"float32"}] + } + ] +} \ No newline at end of file diff --git a/models/emotion.bin b/models/emotion-mini.bin similarity index 100% rename from models/emotion.bin rename to models/emotion-mini.bin diff --git a/models/emotion.json b/models/emotion-mini.json similarity index 99% rename from models/emotion.json rename to models/emotion-mini.json index bf5deca1..70d3bbe1 100644 --- a/models/emotion.json +++ b/models/emotion-mini.json @@ -98,7 +98,7 @@ "weightsManifest": [ { - "paths": ["emotion.bin"], + "paths": ["emotion-mini.bin"], "weights": [{"name":"unknown_60","shape":[3,3,64,1],"dtype":"float32"},{"name":"unknown_66","shape":[3,3,128,1],"dtype":"float32"},{"name":"unknown_43","shape":[3,3,32,1],"dtype":"float32"},{"name":"unknown_49","shape":[3,3,64,1],"dtype":"float32"},{"name":"unknown_26","shape":[3,3,16,1],"dtype":"float32"},{"name":"unknown_32","shape":[3,3,32,1],"dtype":"float32"},{"name":"unknown_9","shape":[3,3,8,1],"dtype":"float32"},{"name":"unknown_15","shape":[3,3,16,1],"dtype":"float32"},{"name":"unknown_77","shape":[3,3,128,7],"dtype":"float32"},{"name":"unknown_78","shape":[7],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/global_average_pooling2d_1/Mean/reduction_indices","shape":[2],"dtype":"int32"},{"name":"StatefulPartitionedCall/model_1/conv2d_1/Conv2D_weights","shape":[3,3,1,8],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_6/Conv2D_weights","shape":[1,1,64,128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_1/Conv2D_bn_offset","shape":[8],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_2/Conv2D_weights","shape":[3,3,8,8],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_6/Conv2D_bn_offset","shape":[128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_2/Conv2D_bn_offset","shape":[8],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_3/Conv2D_weights","shape":[1,1,8,16],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_7/separable_conv2d_weights","shape":[1,1,64,128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_3/Conv2D_bn_offset","shape":[16],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_weights","shape":[1,1,8,16],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_1/separable_conv2d_bn_offset","shape":[16],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_7/separable_conv2d_bn_offset","shape":[128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_weights","shape":[1,1,16,16],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_2/separable_conv2d_bn_offset","shape":[16],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_4/Conv2D_weights","shape":[1,1,16,32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_4/Conv2D_bn_offset","shape":[32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_weights","shape":[1,1,16,32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_3/separable_conv2d_bn_offset","shape":[32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_8/separable_conv2d_weights","shape":[1,1,128,128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_weights","shape":[1,1,32,32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_4/separable_conv2d_bn_offset","shape":[32],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_5/Conv2D_weights","shape":[1,1,32,64],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_8/separable_conv2d_bn_offset","shape":[128],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/conv2d_5/Conv2D_bn_offset","shape":[64],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_5/separable_conv2d_weights","shape":[1,1,32,64],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_5/separable_conv2d_bn_offset","shape":[64],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_6/separable_conv2d_weights","shape":[1,1,64,64],"dtype":"float32"},{"name":"StatefulPartitionedCall/model_1/separable_conv2d_6/separable_conv2d_bn_offset","shape":[64],"dtype":"float32"}] } ] diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js index 716d9db7..342aee15 100644 --- a/src/emotion/emotion.js +++ b/src/emotion/emotion.js @@ -5,7 +5,11 @@ const annotations = ['angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neu const models = {}; let last = []; let frame = Number.MAX_SAFE_INTEGER; -const multiplier = 1.5; + +// tuning values +const zoom = [0, 0]; // 0..1 meaning 0%..100% +const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale +const scale = 1; // score multiplication factor async function load(config) { if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath); @@ -18,17 +22,25 @@ async function predict(image, config) { return last; } frame = 0; - const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); + const box = [[ + (image.shape[1] * zoom[0]) / image.shape[1], + (image.shape[2] * zoom[1]) / image.shape[2], + (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1], + (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2], + ]]; + const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]); + // const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); const [red, green, blue] = tf.split(resize, 3, 3); resize.dispose(); // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html - const redNorm = tf.mul(red, [0.2989]); - const greenNorm = tf.mul(green, [0.5870]); - const blueNorm = tf.mul(blue, [0.1140]); + const redNorm = tf.mul(red, rgb[0]); + const greenNorm = tf.mul(green, rgb[1]); + const blueNorm = tf.mul(blue, rgb[2]); red.dispose(); green.dispose(); blue.dispose(); const grayscale = tf.addN([redNorm, greenNorm, blueNorm]); + const normalize = tf.tidy(() => grayscale.sub(0.5).mul(2)); redNorm.dispose(); greenNorm.dispose(); blueNorm.dispose(); @@ -36,7 +48,7 @@ async function predict(image, config) { if (config.face.emotion.enabled) { let data; if (!config.profile) { - const emotionT = await models.emotion.predict(grayscale); + const emotionT = await models.emotion.predict(normalize); data = emotionT.dataSync(); tf.dispose(emotionT); } else { @@ -46,7 +58,7 @@ async function predict(image, config) { profile.run('emotion', profileData); } for (let i = 0; i < data.length; i++) { - if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] }); + if (scale * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * scale * data[i]) / 100), emotion: annotations[i] }); } obj.sort((a, b) => b.score - a.score); }