class GradualWarmupScheduler(_LRScheduler): """ Args: optimizer (Optimizer): Wrapped optimizer. multiplier: target learning rate = base lr * multiplier total_epoch: target learning rate is reached at total_epoch, gradually after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau) """
def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None): self.multiplier = multiplier if self.multiplier <= 1.: raise ValueError('multiplier should be greater than 1.') self.total_epoch = total_epoch self.after_scheduler = after_scheduler self.finished = False super().__init__(optimizer)
def get_lr(self): if self.last_epoch > self.total_epoch: if self.after_scheduler: if not self.finished: self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs] self.finished = True return self.after_scheduler.get_lr() return [base_lr * self.multiplier for base_lr in self.base_lrs]
return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
def step(self, epoch=None): if self.finished and self.after_scheduler: return self.after_scheduler.step(epoch) else: return super(GradualWarmupScheduler, self).step(epoch)
Linear scaling learning rate
Linear scaling learning rate是在论文[3]中针对比较大的batch size而提出的一种方法。
def _smooth_label(self, target, length, smooth_factor): """convert targets to one-hot format, and smooth them. Args: target: target in form with [label1, label2, label_batchsize] length: length of one-hot format(number of classes) smooth_factor: smooth factor for label smooth
Returns: smoothed labels in one hot format """ one_hot = self._one_hot(target, length, value=1 - smooth_factor) one_hot += smooth_factor / length
return one_hot.to(target.device)
Random image cropping and patching
Random image cropping and patching (RICAP)[7]方法随机裁剪四个图片的中部分,然后把它们拼接为一个图片,同时混合这四个图片的标签。
# calculate loss and accuracy loss = sum([W_[k] * criterion(output, c_[k]) for k in range(4)]) acc = sum([W_[k] * accuracy(output, c_[k])[0] for k in range(4)])
class Cutout(object): """Randomly mask out one or more patches from an image.
Args: n_holes (int): Number of patches to cut out of each image. length (int): The length (in pixels) of each square patch. """ def __init__(self, n_holes, length): self.n_holes = n_holes self.length = length
def __call__(self, img): """ Args: img (Tensor): Tensor image of size (C, H, W). Returns: Tensor: Image with n_holes of dimension length x length cut out of it. """ h = img.size(1) w = img.size(2)
mask = np.ones((h, w), np.float32)
for n in range(self.n_holes): y = np.random.randint(h) x = np.random.randint(w)
Random erasing[6]其实和cutout非常类似,也是一种模拟物体遮挡情况的数据增强方法。区别在于,cutout是把图片中随机抽中的矩形区域的像素值置为0,相当于裁剪掉,random erasing是用随机数或者数据集中像素的平均值替换原来的像素值。而且,cutout每次裁剪掉的区域大小是固定的,Random erasing替换掉的区域大小是随机的。
Random erasing代码如下:
from __future__ import absolute_import from torchvision.transforms import * from PIL import Image import random import math import numpy as np import torch
class RandomErasing(object): ''' probability: The probability that the operation will be performed. sh: max erasing area r1: min aspect ratio mean: erasing value ''' def __init__(self, probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0.4914, 0.4822, 0.4465]): self.probability = probability self.mean = mean self.sl = sl self.sh = sh self.r1 = r1 def __call__(self, img): if random.uniform(0, 1) > self.probability: return img
for attempt in range(100): area = img.size()[1] * img.size()[2]
target_area = random.uniform(self.sl, self.sh) * area aspect_ratio = random.uniform(self.r1, 1/self.r1)
h = int(round(math.sqrt(target_area * aspect_ratio))) w = int(round(math.sqrt(target_area / aspect_ratio)))