【训练过程】1) Create Training File（创建训练文件）

Put the folders of VOC dataset（clean images是原始的干净图像（VOC））, collected old photos (e.g., Real_L_old and Real_RGB_old（real_l_old是只有灰度（亮度）的照片集，real_rgb_old是彩色照片集）) into one shared folder. Thencd Global/da

马鹏森

16889人浏览 · 2021-06-18 20:04:08

马鹏森 · 2021-06-18 20:04:08 发布

1) Create Training File（创建训练文件）

cd Global/data/
python Create_Bigfile.py

Note: Remember to modify the code based on your own environment.

过程：

创建3个文件，分别命名为：VOC.bigfile、Real_L_old.bigfile、Real_RGB_old.bigfile，即其中每一个文件夹对应一个大文件
向大文件中分别写入总图像数 + 文件名 + 图像数据，即大文件由这三部分组成，

# 1) Create Training File（创建训练文件）

import os
import struct


IMG_EXTENSIONS = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',]


def is_image_file(filename):
    """判断图像是否是文件"""
    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)

def make_dataset(dir):
    """将当前文件夹下的完整图像路径合成为一个list"""
    images = []
    assert os.path.isdir(dir), '%s is not a valid directory' % dir

    for root, _, fnames in sorted(os.walk(dir)):
        for fname in fnames:
            if is_image_file(fname):
                path = os.path.join(root, fname)
                images.append(path)

    return images


### Modify these lines in your own environment
indir="/data/temp_old"
target_folders=['VOC','Real_L_old','Real_RGB_old']
out_dir ="/data/out_temp_old"
indir = os.path.abspath(os.path.join(os.getcwd(), "../..")+indir)
out_dir = os.path.abspath(os.path.join(os.getcwd(), "../..")+out_dir)
if os.path.exists(out_dir) is False:
    os.makedirs(out_dir)
###



# 初始化总共的需要处理的图像的数量
total_num_image = 0
for target_folder in target_folders:
    # data/temp_old/VOC、Real_L_old、Real_RGB_old
    curr_indir = os.path.join(indir, target_folder)
    # 1.创建大文件，data/temp_old/VOC.bigfile、Real_L_old.bigfile、Real_RGB_old.bigfile
    curr_out_file = os.path.join(os.path.join(out_dir, '%s.bigfile'%(target_folder)))
    # data/temp_old/VOC、Real_L_old、Real_RGB_old三个文件夹下每一个文件夹下的所有的图像文件组成一个列表，总共组成3个列表
    image_lists = make_dataset(curr_indir)
    image_lists.sort()

    with open(curr_out_file, 'wb') as wfid:
        # 2.write total image number  写入总图像数
        wfid.write(struct.pack('i', len(image_lists)))

        for i, img_path in enumerate(image_lists):

             # 3.write file name first  先写文件名
             img_name = os.path.basename(img_path)
             img_name_bytes = img_name.encode('utf-8')
             wfid.write(struct.pack('i', len(img_name_bytes)))
             wfid.write(img_name_bytes)

             # 4.write image data in  写入图像数据
             with open(img_path, 'rb') as img_fid:
                 img_bytes = img_fid.read()
             wfid.write(struct.pack('i', len(img_bytes)))
             wfid.write(img_bytes)
             total_num_image=total_num_image+1
             print('write %s images done' % image_lists[i])

print("The total number of images processed is:",total_num_image)

AI Agent技术社区

Agent 垂直技术社区，欢迎活跃、内容共建。

更多推荐