face-api/src/ssdMobilenetv1/SsdMobilenetv1.ts

124 lines
3.6 KiB
TypeScript
Raw Normal View History

2020-12-23 18:58:47 +01:00
import * as tf from '../../dist/tfjs.esm';
2020-08-26 00:24:48 +02:00
2020-12-19 17:46:41 +01:00
import { Rect } from '../classes/index';
2020-08-26 00:24:48 +02:00
import { FaceDetection } from '../classes/FaceDetection';
2020-12-19 17:46:41 +01:00
import { NetInput, TNetInput, toNetInput } from '../dom/index';
2020-08-26 00:24:48 +02:00
import { NeuralNetwork } from '../NeuralNetwork';
import { extractParams } from './extractParams';
2021-01-12 16:14:33 +01:00
import { extractParamsFromWeightMap } from './extractParamsFromWeightMap';
2020-08-26 00:24:48 +02:00
import { mobileNetV1 } from './mobileNetV1';
import { nonMaxSuppression } from './nonMaxSuppression';
import { outputLayer } from './outputLayer';
import { predictionLayer } from './predictionLayer';
import { ISsdMobilenetv1Options, SsdMobilenetv1Options } from './SsdMobilenetv1Options';
import { NetParams } from './types';
export class SsdMobilenetv1 extends NeuralNetwork<NetParams> {
constructor() {
2020-12-23 17:26:55 +01:00
super('SsdMobilenetv1');
2020-08-26 00:24:48 +02:00
}
public forwardInput(input: NetInput) {
2020-12-23 17:26:55 +01:00
const { params } = this;
2020-08-26 00:24:48 +02:00
if (!params) {
2020-12-23 17:26:55 +01:00
throw new Error('SsdMobilenetv1 - load model before inference');
2020-08-26 00:24:48 +02:00
}
return tf.tidy(() => {
const batchTensor = tf.cast(input.toBatchTensor(512, false), 'float32');
2020-12-23 17:26:55 +01:00
const x = tf.sub(tf.mul(batchTensor, tf.scalar(0.007843137718737125)), tf.scalar(1)) as tf.Tensor4D;
const features = mobileNetV1(x, params.mobilenetv1);
2020-08-26 00:24:48 +02:00
const {
boxPredictions,
2020-12-23 17:26:55 +01:00
classPredictions,
} = predictionLayer(features.out, features.conv11, params.prediction_layer);
2020-08-26 00:24:48 +02:00
2020-12-23 17:26:55 +01:00
return outputLayer(boxPredictions, classPredictions, params.output_layer);
});
2020-08-26 00:24:48 +02:00
}
public async forward(input: TNetInput) {
2020-12-23 17:26:55 +01:00
return this.forwardInput(await toNetInput(input));
2020-08-26 00:24:48 +02:00
}
public async locateFaces(
input: TNetInput,
2020-12-23 17:26:55 +01:00
options: ISsdMobilenetv1Options = {},
2020-08-26 00:24:48 +02:00
): Promise<FaceDetection[]> {
2020-12-23 17:26:55 +01:00
const { maxResults, minConfidence } = new SsdMobilenetv1Options(options);
2020-08-26 00:24:48 +02:00
2020-12-23 17:26:55 +01:00
const netInput = await toNetInput(input);
2020-08-26 00:24:48 +02:00
const {
boxes: _boxes,
2020-12-23 17:26:55 +01:00
scores: _scores,
} = this.forwardInput(netInput);
2020-08-26 00:24:48 +02:00
2020-12-23 17:26:55 +01:00
const boxes = _boxes[0];
const scores = _scores[0];
2020-08-26 00:24:48 +02:00
for (let i = 1; i < _boxes.length; i++) {
2020-12-23 17:26:55 +01:00
_boxes[i].dispose();
_scores[i].dispose();
2020-08-26 00:24:48 +02:00
}
2020-12-23 17:26:55 +01:00
const scoresData = Array.from(await scores.data());
const iouThreshold = 0.5;
2020-08-26 00:24:48 +02:00
const indices = nonMaxSuppression(
boxes,
2020-10-13 22:57:06 +02:00
scoresData as number[],
2020-08-26 00:24:48 +02:00
maxResults,
iouThreshold,
2020-12-23 17:26:55 +01:00
minConfidence,
);
2020-08-26 00:24:48 +02:00
2020-12-23 17:26:55 +01:00
const reshapedDims = netInput.getReshapedInputDimensions(0);
const inputSize = netInput.inputSize as number;
const padX = inputSize / reshapedDims.width;
const padY = inputSize / reshapedDims.height;
2020-08-26 00:24:48 +02:00
2020-12-23 17:26:55 +01:00
const boxesData = boxes.arraySync();
2020-08-26 00:24:48 +02:00
const results = indices
2020-12-23 17:26:55 +01:00
.map((idx) => {
2020-08-26 00:24:48 +02:00
const [top, bottom] = [
Math.max(0, boxesData[idx][0]),
2020-12-23 17:26:55 +01:00
Math.min(1.0, boxesData[idx][2]),
].map((val) => val * padY);
2020-08-26 00:24:48 +02:00
const [left, right] = [
Math.max(0, boxesData[idx][1]),
2020-12-23 17:26:55 +01:00
Math.min(1.0, boxesData[idx][3]),
].map((val) => val * padX);
2020-08-26 00:24:48 +02:00
return new FaceDetection(
2020-10-13 22:57:06 +02:00
scoresData[idx] as number,
2020-08-26 00:24:48 +02:00
new Rect(
left,
top,
right - left,
2020-12-23 17:26:55 +01:00
bottom - top,
2020-08-26 00:24:48 +02:00
),
{
height: netInput.getInputHeight(0),
2020-12-23 17:26:55 +01:00
width: netInput.getInputWidth(0),
},
);
});
2020-08-26 00:24:48 +02:00
2020-12-23 17:26:55 +01:00
boxes.dispose();
scores.dispose();
return results;
2020-08-26 00:24:48 +02:00
}
protected getDefaultModelName(): string {
2020-12-23 17:26:55 +01:00
return 'ssd_mobilenetv1_model';
2020-08-26 00:24:48 +02:00
}
2021-01-12 16:14:33 +01:00
protected extractParamsFromWeightMap(weightMap: tf.NamedTensorMap) {
return extractParamsFromWeightMap(weightMap);
2020-08-26 00:24:48 +02:00
}
protected extractParams(weights: Float32Array) {
2020-12-23 17:26:55 +01:00
return extractParams(weights);
2020-08-26 00:24:48 +02:00
}
2020-12-23 17:26:55 +01:00
}