2022/5/30 23:19:58
0 前言
1 数据增强的实现
1.1 贴背景
1.2 随机旋转
1.3 随机色调变换
1.4 随机透视变换
1.5 完整代码
2 总结
0 前言
那么什么是数据增强呢?Data Augmentation
数据增强可以分为两类,离线增强和在线增强。离线增强 : 直接对数据集进行处理,数据的数目会变成增强因子乘以原数据集的数目,这种方法常常用于数据集很小的时候。在线增强 : 这种增强的方法用于,获得 batch 数据之后,然后对这个 batch 的数据进行增强,如旋转、平移、翻折等相应的变化,由于有些数据集不能接受线性级别的增长,这种方法长用于大的数据集,很多机器学习框架已经支持了这种数据增强方式,并且可以使用 GPU 优化计算。
视频讲解地址:【深度学习】【数据增强】【目标检测】带或不带标注框的图片离线增强的实现(贴背景、随机旋转、随机色调变换、随机透视变换)(附源码)_哔哩哔哩 (゜-゜)つロ 干杯~-bilibili
1 数据增强的实现
1.1 贴背景
def add_background_randomly(image, background, box_list=[]): """ box_list = [(cls_type_0, rect_0), (cls_type_1, rect_1), ... , (cls_type_n, rect_n)] rect = [x0, y0, x1, y1, x2, y2, x3, y3] left_top = (x0, y0), right_top = (x1, y1), right_bottom = (x2, y2), left_bottom = (x3, y3) """ img_height, img_width = image.shape[:2] bg_height, bg_width = background.shape[:2] # resize image smaller to background # the image accounts for at least two-thirds and not more than four-fifths min_size = min(bg_height, bg_width) // 3 * 2 max_size = min(bg_height, bg_width) // 5 * 4 new_size = random.randint(min_size, max_size) resize_multiple = round(new_size / max(img_height, img_width), 4) # image = image.resize((int(img_width * resize_multiple), int(img_height * resize_multiple)), Image.ANTIALIAS) image = cv2.resize(image, (int(img_width * resize_multiple), int(img_height * resize_multiple))) img_height, img_width = image.shape[:2] # paste the image to the background # height_pos = random.randint((bg_height-img_height)//3, (bg_height-img_height)//3*2) # width_pos = random.randint((bg_width-img_width)//3, (bg_width-img_width)//3*2) height_pos = random.randint(0, (bg_height-img_height)) width_pos = random.randint(0, (bg_width-img_width)) background[height_pos:(height_pos+img_height), width_pos:(width_pos+img_width)] = image img_height, img_width = background.shape[:2] # calculate the boxes after adding background new_box_list = [] for cls_type, rect in box_list: for coor_index in range(len(rect)//2): # resize rect[coor_index*2] = int(rect[coor_index*2] * resize_multiple) # x rect[coor_index*2+1] = int(rect[coor_index*2+1] * resize_multiple) # y # paste rect[coor_index*2] += width_pos # x rect[coor_index*2+1] += height_pos # y # limite rect[coor_index*2] = max(min(rect[coor_index*2], img_width), 0) # x rect[coor_index*2+1] = max(min(rect[coor_index*2+1], img_height), 0)# y box = (cls_type, rect) new_box_list.append(box) image_with_boxes = [background, new_box_list] return image_with_boxes
1.2 随机旋转
def rotate_image(image, label_box_list=[], angle=90, color=(0, 0, 0), img_scale=1.0): """ rotate with angle, background filled with color, default black (0, 0, 0) label_box = (cls_type, box) box = [x0, y0, x1, y1, x2, y2, x3, y3] """ # grab the rotation matrix (applying the negative of the angle to rotate clockwise), # then grab the sine and cosine (i.e., the rotation components of the matrix) # if angle < 0, counterclockwise rotation; if angle > 0, clockwise rotation # 1.0 - scale, to adjust the size scale (image scaling parameter), recommended 0.75 height_ori, width_ori = image.shape[:2] x_center_ori, y_center_ori = (width_ori // 2, height_ori // 2) rotation_matrix = cv2.getRotationMatrix2D((x_center_ori, y_center_ori), angle, img_scale) cos = np.abs(rotation_matrix[0, 0]) sin = np.abs(rotation_matrix[0, 1]) # compute the new bounding dimensions of the image width_new = int((height_ori * sin) + (width_ori * cos)) height_new = int((height_ori * cos) + (width_ori * sin)) # adjust the rotation matrix to take into account translation rotation_matrix[0, 2] += (width_new / 2) - x_center_ori rotation_matrix[1, 2] += (height_new / 2) - y_center_ori # perform the actual rotation and return the image # borderValue - color to fill missing background, default black, customizable image_new = cv2.warpAffine(image, rotation_matrix, (width_new, height_new), borderValue=color) # each point coordinates angle = angle / 180 * math.pi box_rot_list = cal_rotate_box(label_box_list, angle, (x_center_ori, y_center_ori), (width_new//2, height_new//2)) box_new_list = [] for cls_type, box_rot in box_rot_list: for index in range(len(box_rot)//2): box_rot[index*2] = int(box_rot[index*2]) box_rot[index*2] = max(min(box_rot[index*2], width_new), 0) box_rot[index*2+1] = int(box_rot[index*2+1]) box_rot[index*2+1] = max(min(box_rot[index*2+1], height_new), 0) box_new_list.append((cls_type, box_rot)) image_with_boxes = [image_new, box_new_list] return image_with_boxes def cal_rotate_box(box_list, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] box_list_new = [] for (cls_type, box) in box_list: box_new = [] for index in range(len(box)//2): box_new.extend(cal_rotate_coordinate(box[index*2], box[index*2+1], angle, ori_center, new_center)) label_box = (cls_type, box_new) box_list_new.append(label_box) return box_list_new def cal_rotate_coordinate(x_ori, y_ori, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] x_0 = x_ori - ori_center[0] y_0 = ori_center[1] - y_ori x_new = x_0 * math.cos(angle) - y_0 * math.sin(angle) + new_center[0] y_new = new_center[1] - (y_0 * math.cos(angle) + x_0 * math.sin(angle)) return (x_new, y_new)
1.3 随机色调变换
def hue_change(image): if np.random.rand() < 0.8: image = transforms.ColorJitter(brightness=0.5)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(contrast=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(saturation=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(hue=0.2)(image) return image
1.4 随机透视变换
1 def perspective_tranform(image, perspective_rate=0.5, label_box_list=[]): 2 # perspective transform 3 img_height, img_width = image.shape[:2] 4 # points_src = np.float32([[rect[0], rect[1]], [rect[2], rect[3]], [rect[4], rect[5]], [rect[6], rect[7]]]) 5 points_src = np.float32([[0, 0], [img_width-1, 0], [img_width-1, img_height-1], [0, img_height-1]]) 6 max_width = int(img_width * (1.0 + perspective_rate)) 7 max_height = int(img_height * (1.0 + perspective_rate)) 8 min_width = int(img_width * (1.0 - perspective_rate)) 9 min_height = int(img_height * (1.0 + perspective_rate)) 10 delta_width = (max_width - min_width) // 2 11 delta_height = (max_height - min_height) // 2 12 x0 = random.randint(0, delta_width) 13 y0 = random.randint(0, delta_height) 14 x1 = random.randint(delta_width + min_width, max_width) 15 y1 = random.randint(0, delta_height) 16 x2 = random.randint(delta_width + min_width, max_width) 17 y2 = random.randint(delta_height + min_height, max_height) 18 x3 = random.randint(0, delta_width) 19 y3 = random.randint(delta_height + min_height, max_height) 20 points_dst = np.float32([[x0, y0], [x1, y1], [x2, y2], [x3, y3]]) 21 # width_new = max(x0, x1, x2, x3) - min(x0, x1, x2, x3) 22 # height_new = max(y0, y1, y2, y3) - min(y0, y1, y2, y3) 23 M = cv2.getPerspectiveTransform(points_src, points_dst) 24 image_res = cv2.warpPerspective(image, M, (max_width, max_height)) 25 # cut 26 image_new = image_res[min(y0, y1):max(y2, y3), min(x0, x3):max(x1, x2)] 27 28 # labels 29 box_new_list = [] 30 for cls_type, box in label_box_list: 31 # after transformation 32 for index in range(len(box)//2): 33 px = (M[0][0]*box[index*2] + M[0][1]*box[index*2+1] + M[0][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) 34 py = (M[1][0]*box[index*2] + M[1][1]*box[index*2+1] + M[1][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) 35 box[index*2] = int(px) 36 box[index*2+1] = int(py) 37 # cut 38 box[index*2] -= min(x0, x3) 39 box[index*2+1] -= min(y0, y1) 40 box[index*2] = max(min(box[index*2], image_new.shape[1]), 0) 41 box[index*2+1] = max(min(box[index*2+1], image_new.shape[0]), 0) 42 box_new_list.append((cls_type, box)) 43 44 image_with_boxes = [image_new, box_new_list] 45 46 return image_with_boxes
1.5 完整代码
import os import random from PIL import Image, ImageOps from tqdm import tqdm import torchvision.transforms as transforms import cv2 import numpy as np import math import shutil def add_background_randomly(image, background, box_list=[]): """ box_list = [(cls_type_0, rect_0), (cls_type_1, rect_1), ... , (cls_type_n, rect_n)] rect = [x0, y0, x1, y1, x2, y2, x3, y3] left_top = (x0, y0), right_top = (x1, y1), right_bottom = (x2, y2), left_bottom = (x3, y3) """ img_height, img_width = image.shape[:2] bg_height, bg_width = background.shape[:2] # resize image smaller to background # the image accounts for at least two-thirds and not more than four-fifths min_size = min(bg_height, bg_width) // 3 * 2 max_size = min(bg_height, bg_width) // 5 * 4 new_size = random.randint(min_size, max_size) resize_multiple = round(new_size / max(img_height, img_width), 4) # image = image.resize((int(img_width * resize_multiple), int(img_height * resize_multiple)), Image.ANTIALIAS) image = cv2.resize(image, (int(img_width * resize_multiple), int(img_height * resize_multiple))) img_height, img_width = image.shape[:2] # paste the image to the background # height_pos = random.randint((bg_height-img_height)//3, (bg_height-img_height)//3*2) # width_pos = random.randint((bg_width-img_width)//3, (bg_width-img_width)//3*2) height_pos = random.randint(0, (bg_height-img_height)) width_pos = random.randint(0, (bg_width-img_width)) background[height_pos:(height_pos+img_height), width_pos:(width_pos+img_width)] = image img_height, img_width = background.shape[:2] # calculate the boxes after adding background new_box_list = [] for cls_type, rect in box_list: for coor_index in range(len(rect)//2): # resize rect[coor_index*2] = int(rect[coor_index*2] * resize_multiple) # x rect[coor_index*2+1] = int(rect[coor_index*2+1] * resize_multiple) # y # paste rect[coor_index*2] += width_pos # x rect[coor_index*2+1] += height_pos # y # limite rect[coor_index*2] = max(min(rect[coor_index*2], img_width), 0) # x rect[coor_index*2+1] = max(min(rect[coor_index*2+1], img_height), 0)# y box = (cls_type, rect) new_box_list.append(box) image_with_boxes = [background, new_box_list] return image_with_boxes def rotate_image(image, label_box_list=[], angle=90, color=(0, 0, 0), img_scale=1.0): """ rotate with angle, background filled with color, default black (0, 0, 0) label_box = (cls_type, box) box = [x0, y0, x1, y1, x2, y2, x3, y3] """ # grab the rotation matrix (applying the negative of the angle to rotate clockwise), # then grab the sine and cosine (i.e., the rotation components of the matrix) # if angle < 0, counterclockwise rotation; if angle > 0, clockwise rotation # 1.0 - scale, to adjust the size scale (image scaling parameter), recommended 0.75 height_ori, width_ori = image.shape[:2] x_center_ori, y_center_ori = (width_ori // 2, height_ori // 2) rotation_matrix = cv2.getRotationMatrix2D((x_center_ori, y_center_ori), angle, img_scale) cos = np.abs(rotation_matrix[0, 0]) sin = np.abs(rotation_matrix[0, 1]) # compute the new bounding dimensions of the image width_new = int((height_ori * sin) + (width_ori * cos)) height_new = int((height_ori * cos) + (width_ori * sin)) # adjust the rotation matrix to take into account translation rotation_matrix[0, 2] += (width_new / 2) - x_center_ori rotation_matrix[1, 2] += (height_new / 2) - y_center_ori # perform the actual rotation and return the image # borderValue - color to fill missing background, default black, customizable image_new = cv2.warpAffine(image, rotation_matrix, (width_new, height_new), borderValue=color) # each point coordinates angle = angle / 180 * math.pi box_rot_list = cal_rotate_box(label_box_list, angle, (x_center_ori, y_center_ori), (width_new//2, height_new//2)) box_new_list = [] for cls_type, box_rot in box_rot_list: for index in range(len(box_rot)//2): box_rot[index*2] = int(box_rot[index*2]) box_rot[index*2] = max(min(box_rot[index*2], width_new), 0) box_rot[index*2+1] = int(box_rot[index*2+1]) box_rot[index*2+1] = max(min(box_rot[index*2+1], height_new), 0) box_new_list.append((cls_type, box_rot)) image_with_boxes = [image_new, box_new_list] return image_with_boxes def cal_rotate_box(box_list, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] box_list_new = [] for (cls_type, box) in box_list: box_new = [] for index in range(len(box)//2): box_new.extend(cal_rotate_coordinate(box[index*2], box[index*2+1], angle, ori_center, new_center)) label_box = (cls_type, box_new) box_list_new.append(label_box) return box_list_new def cal_rotate_coordinate(x_ori, y_ori, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] x_0 = x_ori - ori_center[0] y_0 = ori_center[1] - y_ori x_new = x_0 * math.cos(angle) - y_0 * math.sin(angle) + new_center[0] y_new = new_center[1] - (y_0 * math.cos(angle) + x_0 * math.sin(angle)) return (x_new, y_new) def hue_change(image): if np.random.rand() < 0.8: image = transforms.ColorJitter(brightness=0.5)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(contrast=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(saturation=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(hue=0.2)(image) return image def perspective_tranform(image, perspective_rate=0.5, label_box_list=[]): # perspective transform img_height, img_width = image.shape[:2] # points_src = np.float32([[rect[0], rect[1]], [rect[2], rect[3]], [rect[4], rect[5]], [rect[6], rect[7]]]) points_src = np.float32([[0, 0], [img_width-1, 0], [img_width-1, img_height-1], [0, img_height-1]]) max_width = int(img_width * (1.0 + perspective_rate)) max_height = int(img_height * (1.0 + perspective_rate)) min_width = int(img_width * (1.0 - perspective_rate)) min_height = int(img_height * (1.0 + perspective_rate)) delta_width = (max_width - min_width) // 2 delta_height = (max_height - min_height) // 2 x0 = random.randint(0, delta_width) y0 = random.randint(0, delta_height) x1 = random.randint(delta_width + min_width, max_width) y1 = random.randint(0, delta_height) x2 = random.randint(delta_width + min_width, max_width) y2 = random.randint(delta_height + min_height, max_height) x3 = random.randint(0, delta_width) y3 = random.randint(delta_height + min_height, max_height) points_dst = np.float32([[x0, y0], [x1, y1], [x2, y2], [x3, y3]]) # width_new = max(x0, x1, x2, x3) - min(x0, x1, x2, x3) # height_new = max(y0, y1, y2, y3) - min(y0, y1, y2, y3) M = cv2.getPerspectiveTransform(points_src, points_dst) image_res = cv2.warpPerspective(image, M, (max_width, max_height)) # cut image_new = image_res[min(y0, y1):max(y2, y3), min(x0, x3):max(x1, x2)] # labels box_new_list = [] for cls_type, box in label_box_list: # after transformation for index in range(len(box)//2): px = (M[0][0]*box[index*2] + M[0][1]*box[index*2+1] + M[0][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) py = (M[1][0]*box[index*2] + M[1][1]*box[index*2+1] + M[1][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) box[index*2] = int(px) box[index*2+1] = int(py) # cut box[index*2] -= min(x0, x3) box[index*2+1] -= min(y0, y1) box[index*2] = max(min(box[index*2], image_new.shape[1]), 0) box[index*2+1] = max(min(box[index*2+1], image_new.shape[0]), 0) box_new_list.append((cls_type, box)) image_with_boxes = [image_new, box_new_list] return image_with_boxes if __name__ == "__main__": # test img_test_path = os.path.join(test_path, file_name) points = np.array([[rect[0],rect[1]], [rect[2],rect[3]], [rect[4],rect[5]], [rect[6],rect[7]]], np.int32) image_rect = cv2.polylines(image_res, pts=[points], isClosed=True, color=(0,0,255), thickness=3) cv2.imwrite(img_test_path, image_res) # print("")
2 总结
- 2024-12-27数据结构与算法面试题详解及练习
- 2024-12-27网络请求面试题详解与实战
- 2024-12-27数据结构和算法面试真题详解与实战教程
- 2024-12-27网络请求面试真题解析与实战教程
- 2024-12-27数据结构和算法大厂面试真题详解与实战指南
- 2024-12-27TS大厂面试真题解析与应对策略
- 2024-12-27TS大厂面试真题详解与解析
- 2024-12-27网站安全入门:如何识别和修复漏洞
- 2024-12-27SQL注入基础教程
- 2024-12-27初学者指南:理解和修复跨域漏洞