语义分割进行训练时,对于 Caffe 深度框架,需要的数据集处理. 语义分割数据集准备

<h2>Dataset 数据集下载</h2>

# augmented PASCAL VOC
cd $DATASETS
wget http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz # 1.3 GB
tar -zxvf benchmark.tgz
mv benchmark_RELEASE VOC_aug

# original PASCAL VOC 2012
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar # 2 GB
tar -xvf VOCtrainval_11-May-2012.tar
mv VOCdevkit/VOC2012 VOC2012_orig && rm -r VOCdevkit

<h2>Data conversions 数据转换</h2>

  • augmented PASCAL VOC 数据集的 ground truth labels 是以 Matlab data files的格式存在的,需要进行转换:
    • Step1 定义 mat2png 脚本;
    • Step2 转换 mat 成 png.
#!/usr/bin/env python
# Martin Kersner, m.kersner@gmail.com
# 2016/03/17

from future import print_function
import os
import sys
import glob
import scipy.io
from PIL import Image as PILImage

# Mat to png conversion for http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html
# 'GTcls' key is for class segmentation
# 'GTinst' key is for instance segmentation
def mat2png_hariharan(mat_file, key='GTcls'):
    mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False)
    return mat[key].Segmentation

def main():
    input_path, output_path = process_arguments(sys.argv) 

    if os.path.isdir(input_path) and os.path.isdir(output_path):
        mat_files = glob.glob(os.path.join(input_path, '*.mat'))
        convert_mat2png(mat_files, output_path)
    else:
        help('Input or output path does not exist!n')

def process_arguments(argv):
    num_args = len(argv)

    input_path  = None
    output_path = None 

    if num_args == 3:
        input_path  = argv[1]
        output_path = argv[2]
    else:
        help()

    return input_path, output_path

def convert_mat2png(mat_files, output_path):
    if not mat_files:
        help('Input directory does not contain any Matlab files!n')

    for mat in mat_files:
        numpy_img = mat2png_hariharan(mat)
        pil_img = PILImage.fromarray(numpy_img)
        pil_img.save(os.path.join(output_path, modify_image_name(mat, 'png')))

# Extract name of image from given path, replace its extension with specified one
# and return new name only, not path.
def modify_image_name(path, ext):
    return os.path.basename(path).split('.')[0] + '.' + ext

def help(msg=''):
    print(msg +
        'Usage: python mat2png.py INPUT_PATH OUTPUT_PATHn'
        'INPUT_PATH denotes path containing Matlab files for conversion.n'
        'OUTPUT_PATH denotes path where converted Png files ar going to be saved.'
        , file=sys.stderr)
    exit()

if name == '__main__':
    main()
cd $DATASETS/VOC_aug/dataset
mkdir cls_png
cd $DATASETSDIR
./mat2png.py $DATASETS/VOC_aug/dataset/cls $DATASETS/VOC_aug/dataset/cls_png
  • Caffe的softmax loss函数只能接受一维的 ground truth labels. 但 original PASCAL VOC 2012中的 ground truth labels 是以RGB图像的形式保存的,因此需要降维:
    • Step1 定义转换python脚本:convert_labels.py
    • Step2 转换 ground truth labels 为 1D.
#!/usr/bin/env python    
#Martin Kersner, m.kersner@gmail.com   
#2016/01/25    

from future import print_function
import os
import sys
import numpy as np
from skimage.io import imread, imsave

def pascal_palette():
  palette = {(  0,   0,   0) : 0 ,
             (128,   0,   0) : 1 ,
             (  0, 128,   0) : 2 ,
             (128, 128,   0) : 3 ,
             (  0,   0, 128) : 4 ,
             (128,   0, 128) : 5 ,
             (  0, 128, 128) : 6 ,
             (128, 128, 128) : 7 ,
             ( 64,   0,   0) : 8 ,
             (192,   0,   0) : 9 ,
             ( 64, 128,   0) : 10,
             (192, 128,   0) : 11,
             ( 64,   0, 128) : 12,
             (192,   0, 128) : 13,
             ( 64, 128, 128) : 14,
             (192, 128, 128) : 15,
             (  0,  64,   0) : 16,
             (128,  64,   0) : 17,
             (  0, 192,   0) : 18,
             (128, 192,   0) : 19,
             (  0,  64, 128) : 20 }

  return palette

def convert_from_color_segmentation(arr_3d):
    arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
    palette = pascal_palette()

    for c, i in palette.items():
        m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
        arr_2d[m] = i

    return arr_2d


def main():
    ##
    ext = '.png'
    ##
    path, txt_file, path_converted = process_arguments(sys.argv)

    # Create dir for converted labels
    if not os.path.isdir(path_converted):
        os.makedirs(path_converted)

    with open(txt_file, 'rb') as f:
        for img_name in f:
            img_base_name = img_name.strip()
            img_name = os.path.join(path, img_base_name) + ext
            img = imread(img_name)

            if (len(img.shape) > 2):
                img = convert_from_color_segmentation(img)
                imsave(os.path.join(path_converted, img_base_name) + ext, img)
            else:
                print(img_name + " is not composed of three dimensions, therefore " 
                "shouldn't be processed by this script.n"
                "Exiting." , file=sys.stderr)
                exit()

def process_arguments(argv):
    if len(argv) != 4:
        help()

    path = argv[1]
    list_file = argv[2]
    new_path = argv[3]

    return path, list_file, new_path 

def help():
    print('Usage: python convert_labels.py PATH LIST_FILE NEW_PATHn'
        'PATH points to directory with segmentation image labels.n'
        'LIST_FILE denotes text file containing names of images in PATH.n'
        'Names do not include extension of images.n'
        'NEW_PATH points to directory where converted labels will be stored.'
        , file=sys.stderr)
    exit()

if name == '__main__':
    main()
cd $DATASETS/VOC2012_orig
mkdir SegmentationClass_1D
cd $DATASETSDIR
./convert_labels.py $DATASETS/VOC2012_orig/SegmentationClass/ \
  $DATASETS/VOC2012_orig/ImageSets/Segmentation/trainval.txt \
  $DATASETS/VOC2012_orig/SegmentationClass_1D/
Last modification:October 9th, 2018 at 09:31 am