Aapter-transformers库包的使用练习。 实验结果表明: (1)不使用Adapter 内存占用率:2323MB (2)实验3 内存占用率:1885MB (2)实验4 内存占用率:1911MB (2)实验5 内存占用率:1917MB (2)实验6 内存占用率:2107MB (2)实验7 内存占用率:1913MB (2)实验8 内存占用率:1973MB (2)实验9 内存占用率:2253MB
调用第三方库包
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch
import transformers
import random
from tqdm import tqdm
device = "cuda" if torch.cuda.is_available() else "cpu"
D:\Environment\Anconda\envs\py388\lib\site-packages\tqdm-4.63.0-py3.8.egg\tqdm\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
torch.cuda.empty_cache()
import os
import datetime
def printbar():
nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("\n"+"=========="*8 + "%s"%nowtime)
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def read_imdb_split(split_dir):
texts = []
labels = []
spilt_data=pd.read_csv(split_dir,sep='\t'
)
spilt_data.columns=['text','label']
texts=list(spilt_data.text)
labels=list(spilt_data.label)
return texts, labels
path='D://Dataset//GLUE//SST//SST-2//'
train_texts, train_labels = read_imdb_split(path+'train.tsv')
test_texts, test_labels = read_imdb_split(path+'dev.tsv')
数据分析
df=pd.read_csv('D://Dataset//GLUE//SST//SST-2//dev.tsv',sep='\t')
df.head(5)
| sentence | label |
---|
0 | it 's a charming and often affecting journey . | 1 |
---|
1 | unflinchingly bleak and desperate | 0 |
---|
2 | allows us to hope that nolan is poised to emba... | 1 |
---|
3 | the acting , costumes , music , cinematography... | 1 |
---|
4 | it 's slow -- very , very slow . | 0 |
---|
df.shape
(872, 2)
df.columns=['text','label']
plt.hist(df['text'].apply(lambda x: min(len(x.split()), 1000)), bins=20)
plt.ylabel("Number of texts")
plt.xlabel("Word count")
print(f"average word count: {np.mean(df['text'].apply(lambda x: len(x.split())))}")
average word count: 19.548165137614678
数据处理
random_seed = 0
numpy_seed = 1
torch_seed = 2
cuda_seed = 3
random.seed(random_seed)
np.random.seed(numpy_seed)
torch.manual_seed(torch_seed)
torch.cuda.manual_seed_all(cuda_seed)
from sklearn.model_selection import train_test_split
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts,
train_labels, test_size=.4,random_state=random_seed)
model_path='D://Model//bert-base-cased//'
from transformers import BertForSequenceClassification,BertTokenizer
tokenizer = BertTokenizer.from_pretrained(model_path)
tokenizer
PreTrainedTokenizer(name_or_path='D://Model//bert-base-cased//', vocab_size=28996, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})
maxlen=20
train_encodings = tokenizer(train_texts[:128],
padding='max_length',
truncation=True,
max_length=maxlen,
return_tensors='pt')
val_encodings = tokenizer(val_texts[:64],
padding='max_length',
truncation=True,
max_length=maxlen,
return_tensors='pt')
test_encodings = tokenizer(test_texts[:32],
padding='max_length',
truncation=True,
max_length=maxlen,
return_tensors='pt')
import gc
gc.collect()
20
class IMDbDataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
train_dataset = IMDbDataset(train_encodings, train_labels[:128])
val_dataset = IMDbDataset(val_encodings, val_labels[:64])
test_dataset = IMDbDataset(test_encodings, test_labels[:32])
from torch.utils.data import DataLoader
train_data_loader = DataLoader(
dataset = train_dataset,
batch_size=32,
shuffle=True
)
val_data_loader = DataLoader(
dataset = val_dataset,
batch_size=32,
shuffle=True
)
test_data_loader = DataLoader(
dataset = val_dataset,
batch_size=32,
shuffle=True
)
model = BertForSequenceClassification.from_pretrained(model_path)
Some weights of the model checkpoint at D://Model//bert-base-cased// were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at D://Model//bert-base-cased// and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
model
BertForSequenceClassification(
(shared_parameters): ModuleDict()
(bert): BertModel(
(shared_parameters): ModuleDict()
(invertible_adapters): ModuleDict()
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(28996, 768, padding_idx=0)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): BertEncoder(
(layer): ModuleList(
(0): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(1): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(2): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(3): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(4): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(5): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(6): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(7): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(8): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(9): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(10): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
)
)
(pooler): BertPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
(prefix_tuning): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
(dropout): Dropout(p=0.1, inplace=False)
(classifier): Linear(in_features=768, out_features=2, bias=True)
)
import gc
gc.collect()
462
采用adapter的操作
实验2
adapter_name = model.load_adapter('./tmp/pre/', config='pfeiffer')
adapter_name
'sst-2'
model.set_active_adapters(adapter_name)
model
BertForSequenceClassification(
(shared_parameters): ModuleDict()
(bert): BertModel(
(shared_parameters): ModuleDict()
(invertible_adapters): ModuleDict()
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(28996, 768, padding_idx=0)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): BertEncoder(
(layer): ModuleList(
(0): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(1): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(2): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(3): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(4): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(5): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(6): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(7): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(8): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(9): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(10): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(prefix_tuning): PrefixTuningShim(
(pool): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict()
(adapter_fusion_layer): ModuleDict()
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(adapters): ModuleDict(
(sst-2): Adapter(
(non_linearity): Activation_Function_Class(
(f): ReLU()
)
(adapter_down): Sequential(
(0): Linear(in_features=768, out_features=48, bias=True)
(1): Activation_Function_Class(
(f): ReLU()
)
)
(adapter_up): Linear(in_features=48, out_features=768, bias=True)
)
)
(adapter_fusion_layer): ModuleDict()
)
)
)
)
(pooler): BertPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
(prefix_tuning): PrefixTuningPool(
(prefix_tunings): ModuleDict()
)
)
(dropout): Dropout(p=0.1, inplace=False)
(classifier): Linear(in_features=768, out_features=2, bias=True)
)
实验3
model.add_adapter("ss")
model.train_adapter("ss")
model.set_active_adapters("ss")
实验4
from transformers.adapters import AdapterConfig
config = AdapterConfig(mh_adapter=True, output_adapter=True, reduction_factor=16, non_linearity="relu")
model.add_adapter("bottleneck_adapter", config=config)
model.train_adapter("bottleneck_adapter")
model.set_active_adapters("bottleneck_adapter")
model
实验5
from transformers.adapters import PfeifferInvConfig
config = PfeifferInvConfig()
model.add_adapter("lang_adapter", config=config)
model.train_adapter("lang_adapter")
model.set_active_adapters("lang_adapter")
实验6
from transformers.adapters import PrefixTuningConfig
config = PrefixTuningConfig(flat=False, prefix_length=30)
model.add_adapter("prefix_tuning", config=config)
model.train_adapter("prefix_tuning")
model.set_active_adapters("prefix_tuning")
实验7
from transformers.adapters import CompacterConfig
config = CompacterConfig()
model.add_adapter("dummy", config=config)
model.train_adapter("dummy")
model.set_active_adapters("dummy")
实验8
from transformers.adapters import AdapterConfig, ConfigUnion
config = ConfigUnion(
AdapterConfig(mh_adapter=True, output_adapter=False, reduction_factor=16, non_linearity="relu"),
AdapterConfig(mh_adapter=False, output_adapter=True, reduction_factor=2, non_linearity="relu"),
)
model.add_adapter("union_adapter", config=config)
model.train_adapter("union_adapter")
model.set_active_adapters("union_adapter")
实验9
from transformers.adapters import MAMConfig
config = MAMConfig()
model.add_adapter("mam_adapter", config=config)
model.train_adapter("mam_adapter")
model.set_active_adapters("mam_adapter")
实验10
from transformers.adapters import ConfigUnion, ParallelConfig, PrefixTuningConfig
config = ConfigUnion(
PrefixTuningConfig(bottleneck_size=800),
ParallelConfig(),
)
model.add_adapter("mam_adapter", config=config)
model.train_adapter("mam_adapter")
model.set_active_adapters("mam_adapter")
开始训练
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr = 0.01)
metric_func = accuracy_score
metric_name = "accuracy"
def train_epoch(tmp_model,dataloader,tmp_op,epoch):
loss_function=torch.nn.CrossEntropyLoss()
tmp_model.train()
tmp_model.zero_grad()
total_loss = 0.0
total_metric=0.0
preds = []
labels = []
for step,batch in enumerate(dataloader,1):
tmp_op.zero_grad()
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
label = batch['labels'].to(device)
outputs = model(input_ids, attention_mask=attention_mask, labels=label)
loss = outputs[0]
logits=outputs[1]
total_loss += float(loss.item())
pred = torch.argmax(logits, dim=-1)
preds.extend(pred.cpu().tolist())
labels.extend(batch['labels'].cpu().tolist())
metric = accuracy_score(preds, labels)
total_metric += float(metric.item())
loss.requires_grad_(True)
tmp_op.step()
return total_loss/step, total_metric/step
def evaluate(tmp_model,dataloader, split, post_evaluate_hook=None):
preds = []
labels = []
tmp_model.eval()
val_total_loss = 0.0
val_total_metric=0.0
with torch.no_grad():
for step,batch in enumerate(dataloader,1):
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
label = batch['labels'].to(device)
outputs = tmp_model(input_ids, attention_mask=attention_mask, labels=label)
loss = outputs[0]
logits=outputs[1]
pred = torch.argmax(logits, dim=-1)
preds.extend(pred.cpu().tolist())
labels.extend(batch['labels'].cpu().tolist())
val_total_loss += loss.item()
metric = accuracy_score(preds, labels)
val_total_metric += float(metric.item())
tmp_model.train()
return val_total_loss/step,val_total_metric/step
model.device
device(type='cpu')
model=model.to(device)
model.device
device(type='cuda', index=0)
epochs = 10
dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name])
print("Start Training...")
nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("=========="*8 + "%s"%nowtime)
for epoch in range(1,epochs+1):
train_loss,train_metric= train_epoch(model,train_data_loader,optimizer,epoch)
val_loss,val_metric= evaluate(model,val_data_loader, "Valid")
info = (epoch, train_loss, train_metric, val_loss,val_metric)
dfhistory.loc[epoch-1] = info
print(("\nEPOCH = %d, loss = %.4f,"+ metric_name + \
" = %.4f, val_loss = %.4f, "+"val_"+ metric_name+" = %.4f")
%info)
nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("\n"+"=========="*8 + "%s"%nowtime)
print('Finished Training...')
Start Training...
================================================================================2022-04-25 16:31:11
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
EPOCH = 1, loss = 0.6848,accuracy = 0.6237, val_loss = 0.6649, val_accuracy = 0.6484
================================================================================2022-04-25 16:31:12
EPOCH = 2, loss = 0.6789,accuracy = 0.5677, val_loss = 0.6649, val_accuracy = 0.5703
================================================================================2022-04-25 16:31:12
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
EPOCH = 3, loss = 0.6778,accuracy = 0.5755, val_loss = 0.6649, val_accuracy = 0.6484
================================================================================2022-04-25 16:31:13
EPOCH = 4, loss = 0.6801,accuracy = 0.5684, val_loss = 0.6649, val_accuracy = 0.6016
================================================================================2022-04-25 16:31:13
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
EPOCH = 5, loss = 0.6824,accuracy = 0.5931, val_loss = 0.6649, val_accuracy = 0.6953
================================================================================2022-04-25 16:31:13
EPOCH = 6, loss = 0.6765,accuracy = 0.5664, val_loss = 0.6649, val_accuracy = 0.6797
================================================================================2022-04-25 16:31:13
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
EPOCH = 7, loss = 0.6770,accuracy = 0.5358, val_loss = 0.6649, val_accuracy = 0.6016
================================================================================2022-04-25 16:31:13
EPOCH = 8, loss = 0.6820,accuracy = 0.5658, val_loss = 0.6649, val_accuracy = 0.6172
================================================================================2022-04-25 16:31:13
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
EPOCH = 9, loss = 0.6757,accuracy = 0.6055, val_loss = 0.6649, val_accuracy = 0.6016
================================================================================2022-04-25 16:31:13
EPOCH = 10, loss = 0.6750,accuracy = 0.6003, val_loss = 0.6649, val_accuracy = 0.5859
================================================================================2022-04-25 16:31:13
Finished Training...
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
C:\Users\22274\AppData\Local\Temp/ipykernel_7576/4205912457.py:7: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
dfhistory_path='history.csv'
dfhistory.to_csv(dfhistory_path)
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
def plot_metric(dfhistory, metric):
train_metrics = dfhistory[metric]
val_metrics = dfhistory['val_'+metric]
epochs = range(1, len(train_metrics) + 1)
plt.plot(epochs, train_metrics, 'bo--')
plt.plot(epochs, val_metrics, 'ro-')
plt.title('Training and validation '+ metric)
plt.xlabel("Epochs")
plt.ylabel(metric)
plt.legend(["train_"+metric, 'val_'+metric])
plt.savefig(metric+"yuan.jpg")
plt.show()
plot_metric(dfhistory,"loss")
plot_metric(dfhistory,"accuracy")
|