使用Python将YOLO数据集文件夹下的图片文件夹和标注文件夹拆分成固定文件数量的小数据集:
import os
import shutil
from natsort import natsorteddef split_yolo_files(source_dir='q:\\source_dir\\', group_size=50):"""Split images and annotations into grouped directories with nested structure.Args:source_dir (str): Source directory containing 'Images' and 'Annotations' foldersgroup_size (int): Number of files per group (default: 50)"""try:# Validate source directory structureimages_dir = os.path.join(source_dir, 'Images')annotations_dir = os.path.join(source_dir, 'Annotations')if not os.path.exists(images_dir):raise FileNotFoundError(f"Images directory not found: {images_dir}")if not os.path.exists(annotations_dir):raise FileNotFoundError(f"Annotations directory not found: {annotations_dir}")# Get and sort image files naturallyimage_files = [f for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]image_files = natsorted(image_files)if not image_files:raise FileNotFoundError(f"No images found in {images_dir}")# Calculate number of groups needednum_groups = (len(image_files) + group_size - 1) // group_sizeprint(f"Found {len(image_files)} images, splitting into {num_groups} groups")# Process each groupfor group_num in range(1, num_groups + 1):group_prefix = f"{group_num:02d}"group_dir = f"new_{group_prefix}"group_images_dir = os.path.join(group_dir, 'Images')group_annots_dir = os.path.join(group_dir, 'Annotations')# Create nested output directoriesos.makedirs(group_images_dir, exist_ok=True)os.makedirs(group_annots_dir, exist_ok=True)# Get files for current groupstart_idx = (group_num - 1) * group_sizeend_idx = min(group_num * group_size, len(image_files))group_images = image_files[start_idx:end_idx]print(f"\nProcessing group {group_prefix}:")print(f" Creating directory: {group_dir}")print(f" Copying {len(group_images)} images to {group_images_dir}")print(f" Copying annotations to {group_annots_dir}")# Copy images and corresponding annotationsfor img_file in group_images:# Copy imagesrc_img = os.path.join(images_dir, img_file)dst_img = os.path.join(group_images_dir, img_file)shutil.copy2(src_img, dst_img)# Copy corresponding annotationbase_name = os.path.splitext(img_file)[0]xml_file = f"{base_name}.xml"src_xml = os.path.join(annotations_dir, xml_file)if os.path.exists(src_xml):dst_xml = os.path.join(group_annots_dir, xml_file)shutil.copy2(src_xml, dst_xml)else:print(f" Warning: Missing annotation for {img_file}")print("\nOperation completed successfully")print(f"Created {num_groups} group directories with nested structure")except Exception as e:print(f"\nError occurred: {str(e)}")print("Please check:")print("- source_dir contains 'Images' and 'Annotations' folders (case sensitive)")print("- Image files have corresponding XML annotations")print("- You have write permissions and sufficient disk space")if __name__ == "__main__":print("YOLO Dataset Splitter with Nested Structure")print("==========================================")split_yolo_files()print("\nNote: Original files remain unchanged (copies created in new directories)")
- 读取同级目录下source_dir中的Images和Annotations子目录
- 对图片文件进行自然排序(natsort)
- 创建以new_为前缀的文件夹(new_01, new_02等)
- 每个new_xx文件夹下包含Images和Annotations子目录
- 每50个图片文件保存到new_xx/Images子目录
- 对应的XML标注文件保存到new_xx/Annotations子目录
- 保持图片和标注文件的对应关系