2021-09-21 22:48:16 +02:00
/ * *
* Hand Detection and Segmentation
* /
import { log , join } from '../helpers' ;
import * as tf from '../../dist/tfjs.esm.js' ;
import type { HandResult } from '../result' ;
import type { GraphModel , Tensor } from '../tfjs/types' ;
import type { Config } from '../config' ;
import { env } from '../env' ;
import * as fingerPose from '../fingerpose/fingerpose' ;
2021-09-23 20:09:41 +02:00
import { fakeOps } from '../tfjs/backend' ;
2021-09-21 22:48:16 +02:00
const models : [ GraphModel | null , GraphModel | null ] = [ null , null ] ;
const modelOutputNodes = [ 'StatefulPartitionedCall/Postprocessor/Slice' , 'StatefulPartitionedCall/Postprocessor/ExpandDims_1' ] ;
2021-09-23 20:09:41 +02:00
2021-09-22 21:16:14 +02:00
const inputSize = [ [ 0 , 0 ] , [ 0 , 0 ] ] ;
2021-09-21 22:48:16 +02:00
const classes = [
'hand' ,
'fist' ,
'pinch' ,
'point' ,
'face' ,
'tip' ,
'pinchtip' ,
] ;
let skipped = 0 ;
let outputSize ;
type HandDetectResult = {
id : number ,
score : number ,
box : [ number , number , number , number ] ,
boxRaw : [ number , number , number , number ] ,
label : string ,
yxBox : [ number , number , number , number ] ,
}
2021-09-22 21:16:14 +02:00
const cache : {
handBoxes : Array < HandDetectResult > ,
fingerBoxes : Array < HandDetectResult >
tmpBoxes : Array < HandDetectResult >
} = {
handBoxes : [ ] ,
fingerBoxes : [ ] ,
tmpBoxes : [ ] ,
} ;
2021-09-21 22:48:16 +02:00
const fingerMap = {
thumb : [ 1 , 2 , 3 , 4 ] ,
index : [ 5 , 6 , 7 , 8 ] ,
middle : [ 9 , 10 , 11 , 12 ] ,
ring : [ 13 , 14 , 15 , 16 ] ,
pinky : [ 17 , 18 , 19 , 20 ] ,
palm : [ 0 ] ,
} ;
2021-09-23 20:09:41 +02:00
export async function loadDetect ( config : Config ) : Promise < GraphModel > {
if ( env . initial ) models [ 0 ] = null ;
2021-09-21 22:48:16 +02:00
if ( ! models [ 0 ] ) {
2021-09-23 20:09:41 +02:00
// handtrack model has some kernel ops defined in model but those are never referenced and non-existent in tfjs
// ideally need to prune the model itself
fakeOps ( [ 'tensorlistreserve' , 'enter' , 'tensorlistfromtensor' , 'merge' , 'loopcond' , 'switch' , 'exit' , 'tensorliststack' , 'nextiteration' , 'tensorlistsetitem' , 'tensorlistgetitem' , 'reciprocal' , 'shape' , 'split' , 'where' ] , config ) ;
2021-09-21 22:48:16 +02:00
models [ 0 ] = await tf . loadGraphModel ( join ( config . modelBasePath , config . hand . detector ? . modelPath || '' ) ) as unknown as GraphModel ;
const inputs = Object . values ( models [ 0 ] . modelSignature [ 'inputs' ] ) ;
2021-09-22 21:16:14 +02:00
inputSize [ 0 ] [ 0 ] = Array . isArray ( inputs ) ? parseInt ( inputs [ 0 ] . tensorShape . dim [ 1 ] . size ) : 0 ;
inputSize [ 0 ] [ 1 ] = Array . isArray ( inputs ) ? parseInt ( inputs [ 0 ] . tensorShape . dim [ 2 ] . size ) : 0 ;
2021-09-21 22:48:16 +02:00
if ( ! models [ 0 ] || ! models [ 0 ] [ 'modelUrl' ] ) log ( 'load model failed:' , config . object . modelPath ) ;
else if ( config . debug ) log ( 'load model:' , models [ 0 ] [ 'modelUrl' ] ) ;
} else if ( config . debug ) log ( 'cached model:' , models [ 0 ] [ 'modelUrl' ] ) ;
2021-09-23 20:09:41 +02:00
return models [ 0 ] ;
}
export async function loadSkeleton ( config : Config ) : Promise < GraphModel > {
if ( env . initial ) models [ 1 ] = null ;
2021-09-21 22:48:16 +02:00
if ( ! models [ 1 ] ) {
models [ 1 ] = await tf . loadGraphModel ( join ( config . modelBasePath , config . hand . skeleton ? . modelPath || '' ) ) as unknown as GraphModel ;
const inputs = Object . values ( models [ 1 ] . modelSignature [ 'inputs' ] ) ;
2021-09-22 21:16:14 +02:00
inputSize [ 1 ] [ 0 ] = Array . isArray ( inputs ) ? parseInt ( inputs [ 0 ] . tensorShape . dim [ 1 ] . size ) : 0 ;
inputSize [ 1 ] [ 1 ] = Array . isArray ( inputs ) ? parseInt ( inputs [ 0 ] . tensorShape . dim [ 2 ] . size ) : 0 ;
2021-09-21 22:48:16 +02:00
if ( ! models [ 1 ] || ! models [ 1 ] [ 'modelUrl' ] ) log ( 'load model failed:' , config . object . modelPath ) ;
else if ( config . debug ) log ( 'load model:' , models [ 1 ] [ 'modelUrl' ] ) ;
} else if ( config . debug ) log ( 'cached model:' , models [ 1 ] [ 'modelUrl' ] ) ;
2021-09-23 20:09:41 +02:00
return models [ 1 ] ;
}
export async function load ( config : Config ) : Promise < [ GraphModel | null , GraphModel | null ] > {
if ( ! models [ 0 ] ) await loadDetect ( config ) ;
if ( ! models [ 1 ] ) await loadSkeleton ( config ) ;
return models ;
2021-09-21 22:48:16 +02:00
}
async function detectHands ( input : Tensor , config : Config ) : Promise < HandDetectResult [ ] > {
const hands : HandDetectResult [ ] = [ ] ;
if ( ! input || ! models [ 0 ] ) return hands ;
const t : Record < string , Tensor > = { } ;
2021-09-22 21:16:14 +02:00
const ratio = ( input . shape [ 2 ] || 1 ) / ( input . shape [ 1 ] || 1 ) ;
const height = Math . min ( Math . round ( ( input . shape [ 1 ] || 0 ) / 8 ) * 8 , 512 ) ; // use dynamic input size but cap at 1024
const width = Math . round ( height * ratio / 8 ) * 8 ;
t . resize = tf . image . resizeBilinear ( input , [ height , width ] ) ; // todo: resize with padding
2021-09-21 22:48:16 +02:00
t . cast = tf . cast ( t . resize , 'int32' ) ;
[ t . rawScores , t . rawBoxes ] = await models [ 0 ] . executeAsync ( t . cast , modelOutputNodes ) as Tensor [ ] ;
t . boxes = tf . squeeze ( t . rawBoxes , [ 0 , 2 ] ) ;
t . scores = tf . squeeze ( t . rawScores , [ 0 ] ) ;
const classScores = tf . unstack ( t . scores , 1 ) ;
let id = 0 ;
for ( let i = 0 ; i < classScores . length ; i ++ ) {
if ( i !== 0 && i !== 1 ) continue ;
t . nms = await tf . image . nonMaxSuppressionAsync ( t . boxes , classScores [ i ] , config . hand . maxDetected , config . hand . iouThreshold , config . hand . minConfidence ) ;
const nms = await t . nms . data ( ) ;
tf . dispose ( t . nms ) ;
for ( const res of Array . from ( nms ) ) { // generates results for each class
const boxSlice = tf . slice ( t . boxes , res , 1 ) ;
const yxBox = await boxSlice . data ( ) ;
const boxRaw : [ number , number , number , number ] = [ yxBox [ 1 ] , yxBox [ 0 ] , yxBox [ 3 ] - yxBox [ 1 ] , yxBox [ 2 ] - yxBox [ 0 ] ] ;
const box : [ number , number , number , number ] = [ Math . trunc ( boxRaw [ 0 ] * outputSize [ 0 ] ) , Math . trunc ( boxRaw [ 1 ] * outputSize [ 1 ] ) , Math . trunc ( boxRaw [ 2 ] * outputSize [ 0 ] ) , Math . trunc ( boxRaw [ 3 ] * outputSize [ 1 ] ) ] ;
tf . dispose ( boxSlice ) ;
const scoreSlice = tf . slice ( classScores [ i ] , res , 1 ) ;
const score = ( await scoreSlice . data ( ) ) [ 0 ] ;
tf . dispose ( scoreSlice ) ;
const hand : HandDetectResult = { id : id ++ , score , box , boxRaw , label : classes [ i ] , yxBox } ;
hands . push ( hand ) ;
}
}
classScores . forEach ( ( tensor ) = > tf . dispose ( tensor ) ) ;
Object . keys ( t ) . forEach ( ( tensor ) = > tf . dispose ( t [ tensor ] ) ) ;
2021-09-22 21:16:14 +02:00
hands . sort ( ( a , b ) = > b . score - a . score ) ;
if ( hands . length > ( config . hand . maxDetected || 1 ) ) hands . length = ( config . hand . maxDetected || 1 ) ;
2021-09-21 22:48:16 +02:00
return hands ;
}
2021-09-22 21:16:14 +02:00
const boxScaleFact = 1.5 ; // hand finger model prefers slighly larger box
2021-09-21 22:48:16 +02:00
function updateBoxes ( h , keypoints ) {
2021-09-22 21:16:14 +02:00
const finger = [ keypoints . map ( ( pt ) = > pt [ 0 ] ) , keypoints . map ( ( pt ) = > pt [ 1 ] ) ] ; // all fingers coords
const minmax = [ Math . min ( . . . finger [ 0 ] ) , Math . max ( . . . finger [ 0 ] ) , Math . min ( . . . finger [ 1 ] ) , Math . max ( . . . finger [ 1 ] ) ] ; // find min and max coordinates for x and y of all fingers
const center = [ ( minmax [ 0 ] + minmax [ 1 ] ) / 2 , ( minmax [ 2 ] + minmax [ 3 ] ) / 2 ] ; // find center x and y coord of all fingers
const diff = Math . max ( center [ 0 ] - minmax [ 0 ] , center [ 1 ] - minmax [ 2 ] , - center [ 0 ] + minmax [ 1 ] , - center [ 1 ] + minmax [ 3 ] ) * boxScaleFact ; // largest distance from center in any direction
2021-09-21 22:48:16 +02:00
h . box = [
2021-09-22 21:16:14 +02:00
Math . trunc ( center [ 0 ] - diff ) ,
Math . trunc ( center [ 1 ] - diff ) ,
Math . trunc ( 2 * diff ) ,
Math . trunc ( 2 * diff ) ,
2021-09-21 22:48:16 +02:00
] as [ number , number , number , number ] ;
2021-09-22 21:16:14 +02:00
h . boxRaw = [ // work backwards
h . box [ 0 ] / outputSize [ 0 ] ,
h . box [ 1 ] / outputSize [ 1 ] ,
h . box [ 2 ] / outputSize [ 0 ] ,
h . box [ 3 ] / outputSize [ 1 ] ,
2021-09-21 22:48:16 +02:00
] as [ number , number , number , number ] ;
2021-09-22 21:16:14 +02:00
h . yxBox = [ // work backwards
2021-09-21 22:48:16 +02:00
h . boxRaw [ 1 ] ,
h . boxRaw [ 0 ] ,
h . boxRaw [ 3 ] + h . boxRaw [ 1 ] ,
h . boxRaw [ 2 ] + h . boxRaw [ 0 ] ,
] as [ number , number , number , number ] ;
}
async function detectFingers ( input : Tensor , h : HandDetectResult , config : Config ) : Promise < HandResult > {
const hand : HandResult = {
id : h.id ,
score : Math.round ( 100 * h . score ) / 100 ,
boxScore : Math.round ( 100 * h . score ) / 100 ,
fingerScore : 0 ,
box : h.box ,
boxRaw : h.boxRaw ,
label : h.label ,
keypoints : [ ] ,
landmarks : { } as HandResult [ 'landmarks' ] ,
annotations : { } as HandResult [ 'annotations' ] ,
} ;
2021-09-22 21:16:14 +02:00
if ( ! input || ! models [ 1 ] ) return hand ; // something is wrong
if ( config . hand . landmarks ) {
const t : Record < string , Tensor > = { } ;
if ( ! h . yxBox ) return hand ;
t . crop = tf . image . cropAndResize ( input , [ h . yxBox ] , [ 0 ] , [ inputSize [ 1 ] [ 0 ] , inputSize [ 1 ] [ 1 ] ] , 'bilinear' ) ;
t . cast = tf . cast ( t . crop , 'float32' ) ;
t . div = tf . div ( t . cast , 255 ) ;
[ t . score , t . keypoints ] = models [ 1 ] . execute ( t . div ) as Tensor [ ] ;
const score = Math . round ( 100 * ( await t . score . data ( ) ) [ 0 ] / 100 ) ;
if ( score > ( config . hand . minConfidence || 0 ) ) {
hand . fingerScore = score ;
t . reshaped = tf . reshape ( t . keypoints , [ - 1 , 3 ] ) ;
const rawCoords = await t . reshaped . array ( ) as number [ ] ;
hand . keypoints = ( rawCoords as number [ ] ) . map ( ( coord ) = > [
( h . box [ 2 ] * coord [ 0 ] / inputSize [ 1 ] [ 0 ] ) + h . box [ 0 ] ,
( h . box [ 3 ] * coord [ 1 ] / inputSize [ 1 ] [ 1 ] ) + h . box [ 1 ] ,
( h . box [ 2 ] + h . box [ 3 ] ) / 2 / inputSize [ 1 ] [ 0 ] * coord [ 2 ] ,
] ) ;
updateBoxes ( h , hand . keypoints ) ; // replace detected box with box calculated around keypoints
hand . box = h . box ;
hand . landmarks = fingerPose . analyze ( hand . keypoints ) as HandResult [ 'landmarks' ] ; // calculate finger landmarks
for ( const key of Object . keys ( fingerMap ) ) { // map keypoints to per-finger annotations
hand . annotations [ key ] = fingerMap [ key ] . map ( ( index ) = > ( hand . landmarks && hand . keypoints [ index ] ? hand . keypoints [ index ] : null ) ) ;
}
cache . tmpBoxes . push ( h ) ; // if finger detection is enabled, only update cache if fingers are detected
2021-09-21 22:48:16 +02:00
}
2021-09-22 21:16:14 +02:00
Object . keys ( t ) . forEach ( ( tensor ) = > tf . dispose ( t [ tensor ] ) ) ;
2021-09-21 22:48:16 +02:00
}
return hand ;
}
export async function predict ( input : Tensor , config : Config ) : Promise < HandResult [ ] > {
outputSize = [ input . shape [ 2 ] || 0 , input . shape [ 1 ] || 0 ] ;
2021-09-22 21:16:14 +02:00
let hands : Array < HandResult > = [ ] ;
cache . tmpBoxes = [ ] ; // clear temp cache
if ( ! config . hand . landmarks ) cache . fingerBoxes = cache . handBoxes ; // if hand detection only reset finger boxes cache
if ( ( skipped < ( config . hand . skipFrames || 0 ) ) && config . skipFrame ) { // just run finger detection while reusing cached boxes
2021-09-21 22:48:16 +02:00
skipped ++ ;
2021-09-22 21:16:14 +02:00
hands = await Promise . all ( cache . fingerBoxes . map ( ( hand ) = > detectFingers ( input , hand , config ) ) ) ; // run from finger box cache
// console.log('SKIP', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length);
} else { // calculate new boxes and run finger detection
skipped = 0 ;
hands = await Promise . all ( cache . fingerBoxes . map ( ( hand ) = > detectFingers ( input , hand , config ) ) ) ; // run from finger box cache
// console.log('CACHE', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length);
if ( hands . length !== config . hand . maxDetected ) { // run hand detection only if we dont have enough hands in cache
cache . handBoxes = await detectHands ( input , config ) ;
const newHands = await Promise . all ( cache . handBoxes . map ( ( hand ) = > detectFingers ( input , hand , config ) ) ) ;
hands = hands . concat ( newHands ) ;
// console.log('DETECT', skipped, hands.length, cache.handBoxes.length, cache.fingerBoxes.length, cache.tmpBoxes.length);
}
2021-09-21 22:48:16 +02:00
}
2021-09-22 21:16:14 +02:00
cache . fingerBoxes = [ . . . cache . tmpBoxes ] ; // repopulate cache with validated hands
return hands as HandResult [ ] ;
2021-09-21 22:48:16 +02:00
}
/ *
2021-09-22 21:16:14 +02:00
- Live Site : < https : / / victordibia.com / handtrack.js / # / >
- TFJS Port : < https : / / github.com / victordibia / handtrack.js / >
- Original : < https : / / github.com / victordibia / handtracking >
- Writeup : < https : / / medium.com / @ victor.dibia / how - to - build - a - real - time - hand - detector - using - neural - networks - ssd - on - tensorflow - d6bac0e4b2ce >
2021-09-23 20:09:41 +02:00
- Convert :
tensorflowjs_converter -- input_format = tf_frozen_model -- output_format = tfjs_graph_model \
-- output_node_names = 'num_detections,detection_boxes,detection_scores,detection_classes' -- saved_model_tags = serve -- quantize_uint8 = * \
-- strip_debug_ops = * -- weight_shard_size_bytes = 10000000000 -- control_flow_v2 = true frozen_inference_graph . pb graph
webmodel / efficientdet512d0 / base / model . json
webmodel / centernet512fpn / base / model . json
https : //github.com/victordibia/handtrack.js/commit/70d5d9c98e69688414cddaad044bd8730bc982d1#diff-c40e819be4ec1dc29f26913f5cdeb05202261b3a1725ab259cb235ea0f0fc5d6
git rev - list HEAD -- webmodel / *
9 ba7220fb31e9168aa248500cc70800566f4c719
70 d5d9c98e69688414cddaad044bd8730bc982d1
git checkout 9 ba7220fb31e9168aa248500cc70800566f4c719 ^ -- webmodel
git checkout 70 d5d9c98e69688414cddaad044bd8730bc982d1 ^ -- webmodel
2021-09-21 22:48:16 +02:00
* /