数据集转换
数据集转换的意义在于将原本的 txt
点云文件转换为更方便运算的npy
点云文件,同时,将原本的xyzrgb
这 6 个维度转换为xyzrgbc
,最后一个c维度代表该点云所属的类别。
for anno_path in anno_paths:print(anno_path)try:elements = anno_path.split('/')out_filename = elements[-3]+'_'+elements[-2]+'.npy' # Area_1_hallway_1.npycollect_point_label(anno_path, os.path.join(output_folder, out_filename), 'numpy')except:print(anno_path, 'ERROR!!')
其中,anno_paths的值如下:
['D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/conferenceRoom_1/Annotations',
'D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/conferenceRoom_2/Annotations',
'D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/copyRoom_1/Annotations',
'D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/hallway_1/Annotations', ]
随后,进入collect_point_label方法,首先根据每个点云目标的名称来获得其类别,并将其转换为对应的类别id。
for f in glob.glob(os.path.join(anno_path, '*.txt')):cls = os.path.basename(f).split('_')[0]print(f)if cls not in g_classes: # note: in some room there is 'staris' class..cls = 'clutter'points = np.loadtxt(f)labels = np.ones((points.shape[0],1)) * g_class2label[cls]points_list.append(np.concatenate([points, labels], 1)) # Nx7
以第一个Annotations
中的内容为例:
读取的第一个点云内容如下,其类别是beam
labels = np.ones((points.shape[0],1)) * g_class2label[cls]
生成对应的类别编号
最后将其拼接在一起即可:
最终该点云集下得到转换后的点云列表:
将List转换为numpy类型:
data_label = np.concatenate(points_list, 0)
点云集减去最小值:
xyz_min = np.amin(data_label, axis=0)[0:3]
data_label[:, 0:3] -= xyz_min
最终将这些点云保存为npy
类型
np.save(out_filename, data_label)
最终得到转换后的数据集如下:
模型训练
参数设置
点云类型,当我们更换数据集时也要修改
classes = ['ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 'table', 'chair', 'sofa', 'bookcase','board', 'clutter']
模型训练这块主要是参数配置:其中比较重要的是模型(model),batch_size,epoch,log_dir(保存路径)。test_area(测试集)
parser = argparse.ArgumentParser('Model')parser.add_argument('--model', type=str, default='pointnet2_sem_seg_msg', help='model name [default: pointnet_sem_seg]')parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]')parser.add_argument('--epoch', default=1, type=int, help='Epoch to run [default: 32]')parser.add_argument('--learning_rate', default=0.001, type=float, help='Initial learning rate [default: 0.001]')parser.add_argument('--gpu', type=str, default='0', help='GPU to use [default: GPU 0]')parser.add_argument('--optimizer', type=str, default='Adam', help='Adam or SGD [default: Adam]')parser.add_argument('--log_dir', type=str, default="pointnet2_sem_seg_msg", help='Log path [default: None]')parser.add_argument('--decay_rate', type=float, default=1e-4, help='weight decay [default: 1e-4]')parser.add_argument('--npoint', type=int, default=4096, help='Point Number [default: 4096]')parser.add_argument('--step_size', type=int, default=10, help='Decay step for lr decay [default: every 10 epochs]')parser.add_argument('--lr_decay', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]')parser.add_argument('--test_area', type=int, default=5, help='Which area to use for test, option: 1-6 [default: 5]')
此外,还需要注意的是,当我们换了数据集后,需要修改这个路径和类别数目
root = 'data/stanford_indoor3d/'
NUM_CLASSES = 13
数据集加载
加载训练集:
TRAIN_DATASET = S3DISDataset(split='train', data_root=root, num_point=NUM_POINT, test_area=args.test_area, block_size=1.0, sample_rate=1.0, transform=None)
训练集加载完成后信息如下,其中 room_idx 代表所属房间的id。
room_labels是一个列表,共有204个场所,每个list成员代表一个场所,其内为numpy类别,代表每个场所内不同类型的点云:
使用pytorch框架加载训练集:
trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=BATCH_SIZE, shuffle=True, num_workers=0,pin_memory=True, drop_last=True,worker_init_fn=lambda x: np.random.seed(x + int(time.time())))
数据集各类别权重:
PointNet++结构与损失函数
加载模型与损失函数,这里我们可以看到其损失函数使用的是nll_loss,其是一个分类损失:
total_loss = F.nll_loss(pred, target, weight=weight)
MODEL = importlib.import_module(args.model)
shutil.copy('models/%s.py' % args.model, str(experiment_dir))
shutil.copy('models/pointnet2_utils.py', str(experiment_dir))classifier = MODEL.get_model(NUM_CLASSES).cuda()
criterion = MODEL.get_loss().cuda()
模型结构定义如下:
import torch.nn as nn
import torch.nn.functional as F
from models.pointnet2_utils import PointNetSetAbstractionMsg,PointNetFeaturePropagationclass get_model(nn.Module):def __init__(self, num_classes):super(get_model, self).__init__()self.sa1 = PointNetSetAbstractionMsg(1024, [0.05, 0.1], [16, 32], 9, [[16, 16, 32], [32, 32, 64]])self.sa2 = PointNetSetAbstractionMsg(256, [0.1, 0.2], [16, 32], 32+64, [[64, 64, 128], [64, 96, 128]])self.sa3 = PointNetSetAbstractionMsg(64, [0.2, 0.4], [16, 32], 128+128, [[128, 196, 256], [128, 196, 256]])self.sa4 = PointNetSetAbstractionMsg(16, [0.4, 0.8], [16, 32], 256+256, [[256, 256, 512], [256, 384, 512]])self.fp4 = PointNetFeaturePropagation(512+512+256+256, [256, 256])self.fp3 = PointNetFeaturePropagation(128+128+256, [256, 256])self.fp2 = PointNetFeaturePropagation(32+64+256, [256, 128])self.fp1 = PointNetFeaturePropagation(128, [128, 128, 128])self.conv1 = nn.Conv1d(128, 128, 1)self.bn1 = nn.BatchNorm1d(128)self.drop1 = nn.Dropout(0.5)self.conv2 = nn.Conv1d(128, num_classes, 1)def forward(self, xyz):l0_points = xyzl0_xyz = xyz[:,:3,:]l1_xyz, l1_points = self.sa1(l0_xyz, l0_points)l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)l4_xyz, l4_points = self.sa4(l3_xyz, l3_points)l3_points = self.fp4(l3_xyz, l4_xyz, l3_points, l4_points)l2_points = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)l1_points = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points)l0_points = self.fp1(l0_xyz, l1_xyz, None, l1_points)x = self.drop1(F.relu(self.bn1(self.conv1(l0_points))))x = self.conv2(x)x = F.log_softmax(x, dim=1)x = x.permute(0, 2, 1)return x, l4_pointsclass get_loss(nn.Module):def __init__(self):super(get_loss, self).__init__()def forward(self, pred, target, trans_feat, weight):total_loss = F.nll_loss(pred, target, weight=weight)return total_lossif __name__ == '__main__':import torchmodel = get_model(13)xyz = torch.rand(6, 9, 2048)(model(xyz))
这里博主在开始时具有困惑,为何传入的值xyz的格式为(6,9,2048)
呢,不应该是(point_num,7)
吗?事实上,6代表的是batch-size,9为点云信息维度,2048是点云数量。
加载最优模型
加载最优模型,保证是在最优模型基础上进行训练:
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')start_epoch = checkpoint['epoch']classifier.load_state_dict(checkpoint['model_state_dict'])log_string('Use pretrain model')except:log_string('No existing model, starting training from scratch...')start_epoch = 0classifier = classifier.apply(weights_init)
设置优化器
优化器设置,默认即可
if args.optimizer == 'Adam':optimizer = torch.optim.Adam(classifier.parameters(),lr=args.learning_rate,betas=(0.9, 0.999),eps=1e-08,weight_decay=args.decay_rate)else:optimizer = torch.optim.SGD(classifier.parameters(), lr=args.learning_rate, momentum=0.9)
训练开始
开始迭代训练,下面的几个参数用于记录正确分类数量,总损失等,同时将模型开启训练
for epoch in range(start_epoch, args.epoch):total_correct = 0total_seen = 0loss_sum = 0classifier = classifier.train()
下面的代码是训练的核心部分,即完成加载数据集,将数据送入模型,计算损失,反向传播等功能,其中我们着重看一下数据在模型中是如何变化的。
for i, (points, target) in tqdm(enumerate(trainDataLoader), total=len(trainDataLoader), smoothing=0.9):optimizer.zero_grad()#梯度清零,方便计算points = points.data.numpy()#获取点云数据points[:, :, :3] = provider.rotate_point_cloud_z(points[:, :, :3])points = torch.Tensor(points)points, target = points.float().cuda(), target.long().cuda()points = points.transpose(2, 1)seg_pred, trans_feat = classifier(points)seg_pred = seg_pred.contiguous().view(-1, NUM_CLASSES)batch_label = target.view(-1, 1)[:, 0].cpu().data.numpy()target = target.view(-1, 1)[:, 0]loss = criterion(seg_pred, target, trans_feat, weights)loss.backward()optimizer.step()pred_choice = seg_pred.cpu().data.max(1)[1].numpy()correct = np.sum(pred_choice == batch_label)total_correct += correcttotal_seen += (BATCH_SIZE * NUM_POINT)loss_sum += loss
首先,加载的点云(points
)与真值(targets
)如下:
其中,对于points,16是batch-size,4096是点云数量(我们在前一篇博客中说过,为了使输出的值统一,我们的输入值的数量也要统一,这里设置一个batch中输入的点云数量为4096,9则是其点云维度。
点云数据类型转换:
points = points.data.numpy()
points[:, :, :3] = provider.rotate_point_cloud_z(points[:, :, :3])
points = torch.Tensor(points)
points, target = points.float().cuda(), target.long().cuda()
points = points.transpose(2, 1)
此时,points的格式如下:
将点云数据送入模型,得到的值如下:
seg_pred, trans_feat = classifier(points)
将预测值转换为(点云数量,类别)的形式
seg_pred = seg_pred.contiguous().view(-1, NUM_CLASSES)
损失计算
batch_label = target.view(-1, 1)[:, 0].cpu().data.numpy()
target = target.view(-1, 1)[:, 0]
loss = criterion(seg_pred, target, trans_feat, weights)
这里,虽然传入的值中含有 trans_feat
,但该值并不参与损失计算。
class get_loss(nn.Module):def __init__(self):super(get_loss, self).__init__()def forward(self, pred, target, trans_feat, weight):total_loss = F.nll_loss(pred, target, weight=weight)return total_loss
传入的值如下:
随后进行反向传播等操作即可
loss.backward()
optimizer.step()pred_choice = seg_pred.cpu().data.max(1)[1].numpy()
correct = np.sum(pred_choice == batch_label)
total_correct += correct
total_seen += (BATCH_SIZE * NUM_POINT)
loss_sum += loss
模型测试
在完成模型训练后,我们紧接着可以使用训练好的模型进行测试,点云分割所使用的评价指标是mIOU
在测试时,需要指定的参数如下:
parser = argparse.ArgumentParser('Model')parser.add_argument('--batch_size', type=int, default=16, help='batch size in testing [default: 32]')parser.add_argument('--gpu', type=str, default='0', help='specify gpu device')parser.add_argument('--num_point', type=int, default=4096, help='point number [default: 4096]')parser.add_argument('--log_dir', type=str,default="pointnet2_sem_seg_msg", help='experiment root')parser.add_argument('--visual', action='store_true', default=False, help='visualize result [default: False]')parser.add_argument('--test_area', type=int, default=5, help='area for testing, option: 1-6 [default: 5]')parser.add_argument('--num_votes', type=int, default=3, help='aggregate segmentation scores with voting [default: 5]')return parser.parse_args()
这里我们使用的测试集是Area5,其内有67个场所
num_batches = len(TEST_DATASET_WHOLE_SCENE)total_seen_class = [0 for _ in range(NUM_CLASSES)]total_correct_class = [0 for _ in range(NUM_CLASSES)]total_iou_deno_class = [0 for _ in range(NUM_CLASSES)]log_string('---- EVALUATION WHOLE SCENE----')for batch_idx in range(num_batches):#开启循环测试
数据维度转换
这块主要是将测试集的点云进行维度转换,变为(16,9,4096)格式,挺绕的,这里就不详细介绍了。
获取第一个场所的点云数据:
whole_scene_data = TEST_DATASET_WHOLE_SCENE.scene_points_list[batch_idx]
模型推理
由于测试的时候并不是像训练那样随机采样block,而是需要把整个场景全部输入网络,所以用到了S3DISDataLoader.py中定义的ScannetDatasetWholeScene()来制作数据。具体来说是将一个房间按给定步长网格化,然后有重叠的移动block进行点的采样,和训练的时候一样,block中的点如果不足4096,就重复采样一些点。这样在每个block内部一般都会有数个小的batch,将每个batch输入网络进行预测得到相应的预测分数进行保存,最后计算IOU,并将每个点类别信息和语义标签的颜色信息进行关联,然后一同写入文件。
最终,经过一系列转换,得到输入模型的数据维度依旧为(16,9,4096)
seg_pred, _ = classifier(torch_data)
batch_pred_label = seg_pred.contiguous().cpu().data.max(2)[1].numpy()
点云输出结果转换为对应的类别
组合点云和类别,在先前为方便运算,将场景中的点云进行了切分,每个batch含有4096个,在完成对4096个点的分类后,将其组合起来,即恢复为原来的场景。
vote_label_pool = add_vote(vote_label_pool, batch_point_index[0:real_batch_size, ...],batch_pred_label[0:real_batch_size, ...],batch_smpw[0:real_batch_size, ...])
def add_vote(vote_label_pool, point_idx, pred_label, weight):B = pred_label.shape[0]N = pred_label.shape[1]for b in range(B):for n in range(N):if weight[b, n] != 0 and not np.isinf(weight[b, n]):vote_label_pool[int(point_idx[b, n]), int(pred_label[b, n])] += 1return vote_label_pool
将点云取最值作为类别
pred_label = np.argmax(vote_label_pool, 1)
可以看到,第一个场景中每个点的类别已经分配好了,其点云的数量与原本的点云数量一致
在这里就已经完成了对点云的分类,将其与xyz组合后,根据类别设置对应的颜色,也就完成了点云分割,这部分即推理过程。
mIOU计算
for l in range(NUM_CLASSES):total_seen_class_tmp[l] += np.sum((whole_scene_label == l))total_correct_class_tmp[l] += np.sum((pred_label == l) & (whole_scene_label == l))total_iou_deno_class_tmp[l] += np.sum(((pred_label == l) | (whole_scene_label == l)))total_seen_class[l] += total_seen_class_tmp[l]total_correct_class[l] += total_correct_class_tmp[l]total_iou_deno_class[l] += total_iou_deno_class_tmp[l]iou_map = np.array(total_correct_class_tmp) / (np.array(total_iou_deno_class_tmp, dtype=float) + 1e-6)print(iou_map)arr = np.array(total_seen_class_tmp)tmp_iou = np.mean(iou_map[arr != 0])log_string('Mean IoU of %s: %.4f' % (scene_id[batch_idx], tmp_iou))print('----------------------------')filename = os.path.join(visual_dir, scene_id[batch_idx] + '.txt')with open(filename, 'w') as pl_save:for i in pred_label:pl_save.write(str(int(i)) + '\n')pl_save.close()for i in range(whole_scene_label.shape[0]):color = g_label2color[pred_label[i]]color_gt = g_label2color[whole_scene_label[i]]if args.visual:fout.write('v %f %f %f %d %d %d\n' % (whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color[0], color[1],color[2]))fout_gt.write('v %f %f %f %d %d %d\n' % (whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color_gt[0],color_gt[1], color_gt[2]))if args.visual:fout.close()fout_gt.close()
至此,我们完成了PointNet++模型的流程梳理。