DFCIL 代码解读
ICCV2021论文: Always Be Dreaming
源码
0. 运行实验
运行给定的 baseline
cd /home/yule/Data/CIL/DFCIL
conda activate DFCIL
sh experiments/cifar100-fivetask.sh
sh experiments/cifar100-tentask.sh
sh experiments/cifar100-twentytask.sh
结果对比 CIFAR-100 (with 2000 image coreset)
tasks | 5 | 10 | 20 |
---|
UB | 69.9 ± 0.2 | 69.9 ± 0.2 | 69.9 ± 0.2 | Naive Rehearsal | 34.0 ± 0.2 | 24.0 ± 1.0 | 14.9 ± 0.7 | LwF | 39.4 ± 0.3 | 27.4 ± 0.8 | 16.6 ± 0.4 | E2E | 47.4 ± 0.8 | 38.4 ± 1.3 | 32.7 ± 1.9 | BiC | 53.7 ± 0.4 | 45.9 ± 1.8 | 37.5 ± 3.2 | Ours (no coreset) | 43.9 ± 0.9 | 33.7 ± 1.2 | 20.0 ± 1.4 |
命令行参数
DW: false
batch_size: 128
beta: 1.0
dataroot: data
dataset: CIFAR100
deep_inv_params:
- 0.001
- 50.0
- 0.001
- 1000.0
- 1
first_split_size: 5
gen_model_name: CIFAR_GEN
gen_model_type: generator
gpuid:
- 0
learner_name: AlwaysBeDreaming
learner_type: datafree
load_model_dir: null
log_dir: outputs/my_test/DFCIL-twentytask/CIFAR100
lr: 0.1
max_task: -1
memory: 0
model_name: resnet32
model_type: resnet
momentum: 0.9
mu: 0.1
optimizer: SGD
oracle_flag: false
other_split_size: 5
overwrite: 0
power_iters: 10000
rand_split: true
repeat: 1
schedule:
- 100
- 150
- 200
- 250
schedule_type: decay
temp: 2.0
train_aug: true
validation: false
weight_decay: 0.0002
workers: 8
python -u run_dfcil.py --dataset CIFAR100 --train_aug --rand_split --gpuid $GPUID --repeat $REPEAT \
--first_split_size $SPLIT --other_split_size $SPLIT --schedule $SCHEDULE --schedule_type decay --batch_size $BS \
--optimizer $OPT --lr $LR --momentum $MOM --weight_decay $WD \
--mu 1e-1 --memory 0 --model_name $MODELNAME --model_type resnet \
--learner_type datafree --learner_name AlwaysBeDreaming \
--gen_model_name CIFAR_GEN --gen_model_type generator \
--beta 1 --power_iters $PI --deep_inv_params 1e-3 5e1 1e-3 1e3 1 \
--overwrite $OVERWRITE --max_task $MAXTASK --log_dir ${OUTDIR}/abd
python -u run_dfcil.py --dataset CIFAR100 --train_aug --rand_split --gpuid $GPUID --repeat $REPEAT \
--first_split_size $SPLIT --other_split_size $SPLIT --schedule $SCHEDULE --schedule_type decay --batch_size $BS \
--optimizer $OPT --lr $LR --momentum $MOM --weight_decay $WD \
--mu 1 --memory 0 --model_name $MODELNAME --model_type resnet \
--learner_type kd --learner_name LWF \
--overwrite $OVERWRITE --max_task $MAXTASK --log_dir ${OUTDIR}/lwf
1. run_dfcil.py
算法整体流程如下:
- 读取并保存命令行参数
- 加载之前的结果
- ?? 训练模型
- 输出结果
1.1 整体训练流程
首先args.repeat 参数控制整个训练的次数,默认值=1
整个训练流程如下:
- 设定一个trainer,该定义文件位于
trainer.py ,并初始化第一次训练时的相关参数
trainer = Trainer(args, seed, metric_keys, save_keys)
- 模型训练
avg_metrics = trainer.train(avg_metrics)
- 模型测试
avg_metrics = trainer.evaluate(avg_metrics)
- 保存结果
1.2 trainer
这部分介绍初始化trainer ,具体的代码如下
trainer = Trainer(args, seed, metric_keys, save_keys)
初始化
class Trainer:
def __init__(self, args, seed, metric_keys, save_keys):
初始化相关参数
self.seed = seed
self.metric_keys = metric_keys
self.save_keys = save_keys
self.log_dir = args.log_dir
self.batch_size = args.batch_size
self.workers = args.workers
选择数据集
self.top_k = 1
if args.dataset == 'CIFAR10':
Dataset = dataloaders.iCIFAR10
num_classes = 10
self.dataset_size = [32, 32, 3]
elif args.dataset == 'CIFAR100':
Dataset = dataloaders.iCIFAR100
num_classes = 100
self.dataset_size = [32, 32, 3]
else:
raise ValueError('Dataset not implemented!')
将数据集的类别打乱,并根据args.other_split_size 和args.first_split_size 两个参数来分割任务
重点的几个参数
self.tasks
self.task_names
self.max_task = len(self.task_names)
train_transform = dataloaders.utils.get_transform(
dataset=args.dataset, phase='train', aug=args.train_aug, dgr=self.dgr)
test_transform = dataloaders.utils.get_transform(
dataset=args.dataset, phase='test', aug=args.train_aug, dgr=self.dgr)
self.train_dataset = Dataset(args.dataroot, train=True, tasks=self.tasks,
download_flag=True, transform=train_transform,
seed=self.seed, validation=args.validation)
self.test_dataset = Dataset(args.dataroot, train=False, tasks=self.tasks,
download_flag=False, transform=test_transform,
seed=self.seed, validation=args.validation)
self.learner_type, self.learner_name = args.learner_type, args.learner_name
self.learner = learners.__dict__[self.learner_type].__dict__[
self.learner_name](self.learner_config)
?? 1.3 模型训练
模型训练的函数如下:
avg_metrics = trainer.train(avg_metrics)
模型调用trainer.train()函数来进行模型的训练,包括下面的几个部分:
task = self.tasks_logits[i]
self.train_dataset.load_dataset(i, train=True)
self.add_dim = len(task)
load_dataset的函数定义如下,其中t是当前任务序号:
def load_dataset(self, t, train=True):
if train:
self.data, self.targets = self.archive[t]
else:
self.data = np.concatenate(
[self.archive[s][0] for s in range(t+1)], axis=0)
self.targets = np.concatenate(
[self.archive[s][1] for s in range(t+1)], axis=0)
self.t = t
print(np.unique(self.targets))
self.learner.add_valid_output_dim(self.add_dim)
具体的函数定义如下:
def add_valid_output_dim(self, dim=0):
self.log('Incremental class: Old valid output dimension:',
self.valid_out_dim)
self.valid_out_dim += dim
self.log('Incremental class: New Valid output dimension:',
self.valid_out_dim)
return self.valid_out_dim
self.train_dataset.append_coreset(only=False)
train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size,
shuffle=True, drop_last=True, num_workers=int(self.workers))
self.test_dataset.load_dataset(i, train=False)
test_loader = DataLoader(self.test_dataset, batch_size=self.batch_size,
shuffle=False, drop_last=False, num_workers=self.workers)
avg_train_time = self.learner.learn_batch(train_loader, self.train_dataset, model_save_dir, test_loader)
调用datafree.DeepInversionGenBN.learn_batch 函数,具体介绍见3.2 DeepInversionGenBN
训练完一个任务之后,对之前任务的模型先进行保存
self.previous_teacher = Teacher(solver=copy.deepcopy(self.model), generator=self.generator, gen_opt=self.generator_optimizer, img_shape=(-1, train_dataset.nch, train_dataset.im_size,train_dataset.im_size), iters=self.power_iters, deep_inv_params=self.deep_inv_params, class_idx=np.arange(self.valid_out_dim), train=need_train, config=self.config)
然后进行采样,具体介绍见Teacher
self.sample(self.previous_teacher, self.batch_size,self.device, return_scores=False)
teacher.sample(dim, device, return_scores=return_scores)
self.learner.save_model(model_save_dir)
acc_table.append(self.task_eval(j))
1.4 模型测试
模型测试的代码如下:
avg_metrics = trainer.evaluate(avg_metrics)
2. dataloader
2.1 Dataset
这部分介绍数据集的建立,文件夹位于dataloaders ,目前实现了CIFAR10和CIFAR100两个数据集
数据集的初始化,定义了iDataset 类作为所有数据集的父类
初始化中几个重要的参数
class iDataset(data.Dataset):
def __init__(self, root,
train=True, transform=None, download_flag=False,
tasks=None, seed=-1, validation=False, kfolds=5):
self.tasks = tasks
self.class_mapping = {}
self.data = np.asarray(self.data)
self.targets = np.asarray(self.targets)
self.archive.append((self.data[locs].copy(), self.targets[locs].copy()))
self.ic = False
2.2 加载数据集
调用格式如下:
Dataset = dataloaders.iCIFAR100
train_transform = dataloaders.utils.get_transform(
dataset=args.dataset, phase='train', aug=args.train_aug, dgr=self.dgr)
test_transform = dataloaders.utils.get_transform(
dataset=args.dataset, phase='test', aug=args.train_aug, dgr=self.dgr)
self.train_dataset = Dataset(args.dataroot, train=True, tasks=self.tasks,
download_flag=True, transform=train_transform,
seed=self.seed, validation=args.validation)
self.test_dataset = Dataset(args.dataroot, train=False, tasks=self.tasks,
download_flag=False, transform=test_transform,
seed=self.seed, validation=args.validation)
数据增强方法
dataloaders.utils.get_transform
所使用的数据增强方法如下
transform_list.extend([
transforms.ColorJitter(brightness=63/255, contrast=0.8),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomCrop(crop_size, padding=4),
transforms.ToTensor(),
transforms.Normalize(dset_mean, dset_std),
])
数据集初始化见2.1 Dataset
self.train_dataset = Dataset(args.dataroot, train=True, tasks=self.tasks,
download_flag=True, transform=train_transform,
seed=self.seed, validation=args.validation)
self.test_dataset = Dataset(args.dataroot, train=False, tasks=self.tasks,
download_flag=False, transform=test_transform,
seed=self.seed, validation=args.validation)
utils.py
文件位于dataloaders/utils.py ,用于数据集的下载和数据增强等等操作
?? loader.py
文件位于dataloaders/loader.py ,用于初始化数据集
3. datafree.py
这里主要存放有训练模型的初始化配置以及训练代码等等,文件位于learners/datafree.py
以文章使用的AlwaysBeDreaming 模型为例,首先是设置初始化参数
self.learner_config = {'num_classes': num_classes,
'lr': args.lr,
'momentum': args.momentum,
'weight_decay': args.weight_decay,
'schedule': args.schedule,
'schedule_type': args.schedule_type,
'model_type': args.model_type,
'model_name': args.model_name,
'gen_model_type': args.gen_model_type,
'gen_model_name': args.gen_model_name,
'optimizer': args.optimizer,
'gpuid': args.gpuid,
'memory': args.memory,
'temp': args.temp,
'out_dim': num_classes,
'overwrite': args.overwrite == 1,
'beta': args.beta,
'mu': args.mu,
'DW': args.DW,
'batch_size': args.batch_size,
'power_iters': args.power_iters,
'deep_inv_params': args.deep_inv_params,
'tasks': self.tasks_logits,
'top_k': self.top_k,
}
self.learner_type, self.learner_name = args.learner_type, args.learner_name
然后进行模型的初始化
self.learner = learners.__dict__[self.learner_type].__dict__[self.learner_name](self.learner_config)
3.1 AlwaysBeDreaming
以文章采用的模型为例来说明模型的初始化过程
class AlwaysBeDreaming(DeepInversionGenBN):
def __init__(self, learner_config):
super(AlwaysBeDreaming, self).__init__(learner_config)
self.kl_loss = nn.KLDivLoss(reduction='batchmean').cuda()
DeepInversionGenBN的介绍如3.2 DeepInversionGenBN所示
3.2 DeepInversionGenBN
这部分是AlwaysBeDreaming的父类方法,设置了一些函数供子类调用
初始化
这里展示几个比较重要的参数,具体的初始化方法看源码
class DeepInversionGenBN(NormalNN):
def __init__(self, learner_config):
super(DeepInversionGenBN, self).__init__(learner_config)
self.inversion_replay = False
self.previous_teacher = None
self.kd_criterion = nn.MSELoss(reduction="none")
self.generator = self.create_generator()
self.generator_optimizer = Adam(params=self.generator.parameters(), lr=self.deep_inv_params[0])
if self.gpu:
self.cuda_gen()
NormalNN方法介绍见3.3 NormalNN
训练函数
def learn_batch(self, train_loader, train_dataset, model_save_dir, val_loader=None)
训练流程主要如下:
- 重置优化器
self.data_weighting(train_dataset)
- 在验证集上验证模型精度
调用NormalNN.validation 函数,这部分看3.3 NormalNN
主要就是:1. 使用模型前向传播得到预测结果; 2. 与gt计算准确率
if val_loader is not None:
self.validation(val_loader)
- 训练模型
loss, loss_class, loss_kd, output= self.update_model(x_com, y_com, y_hat_com, dw_force = dw_cls, kd_index = np.arange(len(x), len(x_com)))
accumulate_acc(output[:self.batch_size], y_com[:self.batch_size], task, acc, topk=(self.top_k,))
3.3 NormalNN
这部分是所有模型的父类方法,继承自nn.Module ,所有模型都要经过这个重写
初始化
这里展示几个比较重要的参数,具体的初始化方法看源码
class NormalNN(nn.Module):
def __init__(self, learner_config):
super(NormalNN, self).__init__()
self.model = self.create_model()
self.memory_size = self.config['memory']
self.dw = self.config['DW']
if self.memory_size <= 0:
self.dw = False
self.criterion_fn = nn.CrossEntropyLoss(reduction="none")
self.init_optimizer()
重点是初始化模型
self.model = self.create_model()
调用的函数如下,这里的模型函数在models/resnet.py 中被定义,目前只有resnet32
def create_model(self):
cfg = self.config
model = models.__dict__[cfg['model_type']].__dict__[
cfg['model_name']](out_dim=self.out_dim)
return model
验证函数
def validation(self, dataloader, model=None, task_in=None, verbal=True)
几个重点的函数
output = model.forward(input)[:, :self.valid_out_dim]
acc = accumulate_acc(output, target, task,acc, topk=(self.top_k,))
4. Teacher
代码位于learners/datafree_helper.py
这个是当一个任务结束之后,旧模型会变成Teacher模型并冻结参数,使用形式如下:
self.previous_teacher = Teacher(solver=copy.deepcopy(self.model), generator=self.generator, gen_opt=self.generator_optimizer, img_shape=(-1, train_dataset.nch, train_dataset.im_size,train_dataset.im_size), iters=self.power_iters, deep_inv_params=self.deep_inv_params, class_idx=np.arange(self.valid_out_dim), train=need_train, config=self.config)
初始化
选择几个重点的参数,具体的内容看源码
class Teacher(nn.Module):
def __init__(self, solver, generator, gen_opt, img_shape, iters, class_idx, deep_inv_params, train = True, config=None):
self.sover = solver
self.generator = generator
self.img_shape = img_shape
self.criterion = nn.CrossEntropyLoss()
self.mse_loss = nn.MSELoss(reduction="none").cuda()
sample
|