PoseNet

相关链接：

关于加载 Pre-trained PoseNet 模型

MobileNet: smaller, faster, less accurate；
ResNet: larger, slower, more accurate；
详细参数的说明请参考上面的 Github-posenet 及 Demo；

关于模型本地化的方法，可以参照如下方式：

打开 PoseNet Demo 页面；
调整参数达到期望的效果；
在开发者工具中找到对应的资源，保存在本地
- json 和 bin 文件放在同一位置（本地同一文件夹中）；
- 例如 ResNet50, outputStride: 32, quantBytes: 2 时，为 model-stride32.json 和 group1-shard1of23.bin ~ group1-shard23of23.bin；
- 参数不同时，对应的 json 和 bin 文件不同，请调整至合适的参数后下载；

posenet.load({
	/*
	architecture: 'ResNet50',
	inputResolution: 250,
	outputStride: 32,
	quantBytes: 2,
	modelUrl: 'model/resnet50/model-stride32.json'
	*/
	architecture: 'MobileNetV1',
	inputResolution: 500,
	outputStride: 16,
	multiplier: 1,
	quantBytes: 4,
	modelUrl: 'model/mobilenetv1/model-stride16.json'
}).then(net => {
	let pose = net.estimateSinglePose(image, {
		flipHorizontal: false
	});
	return pose;
}).then(pose => {
	
});

关于姿势的匹配

请参考最上面 Move Mirror 文章，其中将主要用到的匹配技术阐述的比较清晰。简单总结如下

需要将匹配图片进行剪裁缩放处理：原因在于说，如果两个人姿势是基本一样的，但是确处于图片中的不同位置（例如一个比较靠近图片左边，一个比较靠近图片右边），那么他们的 Keypoints 中的坐标值还是会有很大的区别。推荐的做法是，根据 posenet 计算出的 boundingBox 中的 minX, maxX, minY, MaxY 剪裁出人物，然后绘制到统一尺寸的图片中心，然后将 Keypoints 中的坐标按照新图片中的位置从新计算。
进行 L2 normalization，这里我有个问题没有搞清楚：再进行 Consine Similarity 匹配时，similarity 方法以及包括了 l2norm ，那么文章中提到的 L2 normalization 是否指的就是 similarity 中的 l2norm 呢，还是说需要额外处理？
采用 cosine distance 的匹配方案：需要将采集数据和比对数据进行处理，长度 34 的坐标对数组；
采用 weighted matching 的匹配方案：需要将采集数据和比对数据进行处理，长度 52 的数组，0-33为坐标对，34-50为置信度，51为置信度之和；
匹配结果，数值越小越接近；

const boundingBox = posenet.getBoundingBox(pose.keypoints);
const bx = boundingBox.minX;
const by = boundingBox.minY;
const bw = boundingBox.maxX - boundingBox.minX;
const bh = boundingBox.maxY - boundingBox.minY;

const w = 480, h = 480;
let nx, ny, nw, nh, s;
if(bw / bh > w / h) {
	s = w / bw;
}
else {
	s = h / bh;
}
nw = bw * s;
nh = bh * s;
nx = (w - nw) * 0.5;
ny = (h - nh) * 0.5;

ctx.rect(0, 0, w, h);
ctx.fillStyle = 'grey';
ctx.fill();

ctx.drawImage(image, bx, by, bw, bh, nx, ny, nw, nh);

let newKeypoints = [];
for(let i = 0; i < pose.keypoints.length; i ++) {
	newKeypoints[i] = {
		score: pose.keypoints[i].score,
		part: pose.keypoints[i].part,
		position: {
			x: nx + (pose.keypoints[i].position.x - bx) * s,
			y: ny + (pose.keypoints[i].position.y - by) * s
		}
	};
}

let curData = [], matData = [];
let idx = 0, ord = 0;
for(let j = 0; j < 34; j ++) {
   curData[j] = (ord > 0) ? newKeypoints[idx].position.y : newKeypoints[idx].position.x;
   matData[j] = (ord > 0) ? matKeypoints[idx].position.y : matKeypoints[idx].position.x;

   ord ++;
   if(ord > 1) {
       ord = 0;
       idx ++;
   }
}

let cosineSimilarity = similarity(curData, matData);
let distance = 2 * (1 - cosineSimilarity);
console.log(Math.sqrt(distance));

let curKeypoints = [], matKeypoints = [];
let curData = [], matData = [];
let curSum = 0, matSum = 0;
let idx = 0, ord = 0;
curKeypoints = formatKeypoints(newKeypoints);
matKeypoints = formatKeypoints(MATCH_KEYPOINTS);
for(let j = 0; j < 52; j ++) {
   if(j < 34) {
	   curData[j] = ord > 0 ? curKeypoints[idx].position.y : curKeypoints[idx].position.x;
	   matData[j] = ord > 0 ? matKeypoints[idx].position.y : matKeypoints[idx].position.x;

	   ord ++;
	   if(ord > 1) {
		   ord = 0;
		   idx ++;
	   }
   }
   else if(j >= 34 && j < 51) {
	   if(j === 34) {
		   idx = 0;
	   }
	   
	   curData[j] = curKeypoints[idx].score;
	   matData[j] = matKeypoints[idx].score;

	   curSum += curKeypoints[idx].score;
	   matSum += matKeypoints[idx].score;

	   idx ++;
   }
   else {
	   curData[j] = curSum;
	   matData[j] = matSum;
   }
}

let vector1PoseXY = curData.slice(0, 34);
let vector1Confidences = curData.slice(34, 51);
let vector1ConfidenceSum = curData.slice(51, 52);

let vector2PoseXY = matData.slice(0, 34);

let summation1 = 1 / vector1ConfidenceSum;

let summation2 = 0;
for(let k = 0; k < vector1PoseXY.length; k ++) {
   let tempConf = Math.floor(k / 2);
   let tempSum = vector1Confidences[tempConf] * Math.abs(vector1PoseXY[k] - vector2PoseXY[k]);
   summation2 = summation2 + tempSum;
}

console.log(summation1 * summation2);

发表回复 取消回复

发表回复取消回复