human/src/handpose/handdetector.ts

92 lines
4.0 KiB
TypeScript
Raw Normal View History

2020-11-18 14:26:28 +01:00
import * as tf from '../../dist/tfjs.esm.js';
2020-11-10 02:13:38 +01:00
import * as box from './box';
2021-04-25 22:56:10 +02:00
import * as anchors from './anchors';
import { Tensor, GraphModel } from '../tfjs/types';
2020-10-12 01:22:43 +02:00
2021-02-08 17:39:09 +01:00
export class HandDetector {
model: GraphModel;
2021-05-22 20:53:51 +02:00
anchors: number[][];
2021-05-23 03:54:18 +02:00
anchorsTensor: Tensor;
2021-03-11 16:26:14 +01:00
inputSize: number;
2021-05-23 03:54:18 +02:00
inputSizeTensor: Tensor;
doubleInputSizeTensor: Tensor;
2021-02-13 15:21:48 +01:00
2021-04-25 22:56:10 +02:00
constructor(model) {
2020-10-12 01:22:43 +02:00
this.model = model;
2021-04-25 22:56:10 +02:00
this.anchors = anchors.anchors.map((anchor) => [anchor.x, anchor.y]);
2020-10-12 01:22:43 +02:00
this.anchorsTensor = tf.tensor2d(this.anchors);
// @ts-ignore model is not undefined here
2021-04-25 22:56:10 +02:00
this.inputSize = this.model?.inputs[0].shape[2];
this.inputSizeTensor = tf.tensor1d([this.inputSize, this.inputSize]);
this.doubleInputSizeTensor = tf.tensor1d([this.inputSize * 2, this.inputSize * 2]);
2020-10-12 01:22:43 +02:00
}
normalizeBoxes(boxes) {
return tf.tidy(() => {
const boxOffsets = tf.slice(boxes, [0, 0], [-1, 2]);
const boxSizes = tf.slice(boxes, [0, 2], [-1, 2]);
const boxCenterPoints = tf.add(tf.div(boxOffsets, this.inputSizeTensor), this.anchorsTensor);
const halfBoxSizes = tf.div(boxSizes, this.doubleInputSizeTensor);
const startPoints = tf.mul(tf.sub(boxCenterPoints, halfBoxSizes), this.inputSizeTensor);
const endPoints = tf.mul(tf.add(boxCenterPoints, halfBoxSizes), this.inputSizeTensor);
return tf.concat2d([startPoints, endPoints], 1);
});
}
normalizeLandmarks(rawPalmLandmarks, index) {
return tf.tidy(() => {
2021-07-29 22:06:03 +02:00
const landmarks = tf.add(tf.div(tf.reshape(rawPalmLandmarks, [-1, 7, 2]), this.inputSizeTensor), this.anchors[index]);
2020-10-12 01:22:43 +02:00
return tf.mul(landmarks, this.inputSizeTensor);
});
}
2020-11-08 07:17:25 +01:00
async getBoxes(input, config) {
const batched = this.model.predict(input) as Tensor;
2021-06-05 18:59:11 +02:00
const predictions = tf.squeeze(batched);
2021-07-29 22:06:03 +02:00
tf.dispose(batched);
const scoresT = tf.tidy(() => tf.squeeze(tf.sigmoid(tf.slice(predictions, [0, 0], [-1, 1]))));
2020-11-26 16:37:04 +01:00
const scores = scoresT.dataSync();
2020-11-08 07:17:25 +01:00
const rawBoxes = tf.slice(predictions, [0, 1], [-1, 4]);
2020-10-12 01:22:43 +02:00
const boxes = this.normalizeBoxes(rawBoxes);
2021-07-29 22:06:03 +02:00
tf.dispose(rawBoxes);
2021-04-25 19:16:04 +02:00
const filteredT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
2020-11-08 15:56:02 +01:00
const filtered = filteredT.arraySync();
2020-11-08 18:26:45 +01:00
2021-07-29 22:06:03 +02:00
tf.dispose(scoresT);
tf.dispose(filteredT);
const hands: Array<{ box: Tensor, palmLandmarks: Tensor, confidence: number }> = [];
2020-11-26 16:37:04 +01:00
for (const index of filtered) {
if (scores[index] >= config.hand.minConfidence) {
const matchingBox = tf.slice(boxes, [index, 0], [1, -1]);
const rawPalmLandmarks = tf.slice(predictions, [index, 5], [1, 14]);
2021-07-29 22:06:03 +02:00
const palmLandmarks = tf.tidy(() => tf.reshape(this.normalizeLandmarks(rawPalmLandmarks, index), [-1, 2]));
tf.dispose(rawPalmLandmarks);
2020-11-26 16:37:04 +01:00
hands.push({ box: matchingBox, palmLandmarks, confidence: scores[index] });
2020-11-08 15:56:02 +01:00
}
2020-11-04 07:11:24 +01:00
}
2021-07-29 22:06:03 +02:00
tf.dispose(predictions);
tf.dispose(boxes);
2020-11-04 07:11:24 +01:00
return hands;
2020-10-12 01:22:43 +02:00
}
2021-05-22 20:53:51 +02:00
async estimateHandBounds(input, config): Promise<{ startPoint: number[]; endPoint: number[]; palmLandmarks: number[]; confidence: number }[]> {
2020-11-04 07:11:24 +01:00
const inputHeight = input.shape[1];
const inputWidth = input.shape[2];
2021-07-29 22:06:03 +02:00
const image = tf.tidy(() => tf.sub(tf.div(tf.image.resizeBilinear(input, [this.inputSize, this.inputSize]), 127.5), 1));
2020-11-08 07:17:25 +01:00
const predictions = await this.getBoxes(image, config);
2021-07-29 22:06:03 +02:00
tf.dispose(image);
2021-05-22 20:53:51 +02:00
const hands: Array<{ startPoint: number[]; endPoint: number[]; palmLandmarks: number[]; confidence: number }> = [];
2020-12-11 16:11:49 +01:00
if (!predictions || predictions.length === 0) return hands;
2020-11-04 07:11:24 +01:00
for (const prediction of predictions) {
2020-11-08 15:56:02 +01:00
const boxes = prediction.box.dataSync();
const startPoint = boxes.slice(0, 2);
const endPoint = boxes.slice(2, 4);
const palmLandmarks = prediction.palmLandmarks.arraySync();
2021-07-29 22:06:03 +02:00
tf.dispose(prediction.box);
tf.dispose(prediction.palmLandmarks);
2021-03-11 16:26:14 +01:00
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / this.inputSize, inputHeight / this.inputSize]));
2020-10-14 17:43:33 +02:00
}
return hands;
2020-10-12 01:22:43 +02:00
}
}