학습률 스케쥴러 Learning Rate Scheduler¶

https://pytorch.org/docs/stable/optim.html?highlight=lr_scheduler
optim.lr_scheduler.StepLR()
optim.lr_scheduler.MultiStepLR()
optim.lr_scheduler.ExponentialLR()
optim.lr_scheduler.ReduceLROnPlateau()

In [1]:

!pip install torch torchvision

Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.1.0)
Requirement already satisfied: torchvision in /usr/local/lib/python3.6/dist-packages (0.3.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.16.4)
Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision) (4.3.0)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from torchvision) (1.12.0)
Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->torchvision) (0.46)

1. Settings¶

1) Import required libraries¶

In [0]:

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

2) Set hyperparameters¶

In [0]:

batch_size = 256
learning_rate = 0.001
num_epoch = 10

2. Data¶

1) Download Data¶

In [0]:

mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

2) Check Dataset¶

In [5]:

print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

torch.Size([1, 28, 28]) 60000

Out[5]:

(torch.Size([1, 28, 28]), 10000)

3) Set DataLoader¶

In [0]:

train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

3. Model & Optimizer¶

1) CNN Model¶

In [0]:

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,3,padding=1),  # 28 x 28
            nn.ReLU(),
            nn.Conv2d(16,32,3,padding=1), # 28 x 28
            nn.ReLU(),
            nn.MaxPool2d(2,2),            # 14 x 14
            nn.Conv2d(32,64,3,padding=1), # 14 x 14
            nn.ReLU(),
            nn.MaxPool2d(2,2)             #  7 x 7
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )        
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)
        return out

2) Loss func & Optimizer¶

In [8]:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# ������ ���� ������ �н����� ������ ���� �н����� ���ҽ�ŵ�ϴ�.
#scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma= 0.99)       

# ������ ���� ����(���ÿ����� 10,30,80)���� �н����� ������ �����ݴϴ�.
#scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,80], gamma= 0.1)  

# �� epoch���� �н����� ������ �����ݴϴ�.
#scheduler = lr_scheduler.ExponentialLR(optimizer, gamma= 0.99)                             

# https://pytorch.org/docs/stable/optim.html?highlight=lr_scheduler#torch.optim.lr_scheduler.ReduceLROnPlateau
# ������ ��Ʈ������ ������ ���� �� �������� ������ �н����� ���ҽ�ŵ�ϴ�. ex) ��Ȯ��, dice score ���
# �� �����췯���� �پ��� ���ڰ� ���µ� ������ ������ ��ť��Ʈ�� ���� �ٶ��ϴ�.
# ���⼭�� patience ��, ������ ���� �پ���� ������ �� epoch ��ŭ�� ���Ѻ� �������� 1�� ������� ������ �� epoch ���� �н����� �����ϴ°��� Ȯ���� �� �ֽ��ϴ�.
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,threshold=1,patience=1,mode='min')    

# ���� https://www.geeksforgeeks.org/python-dir-function/
print(dir(scheduler))
print(dir(optimizer))

cuda:0
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_cmp', '_init_is_better', '_reduce_lr', '_reset', 'best', 'cooldown', 'cooldown_counter', 'eps', 'factor', 'in_cooldown', 'is_better', 'last_epoch', 'load_state_dict', 'min_lrs', 'mode', 'mode_worse', 'num_bad_epochs', 'optimizer', 'patience', 'state_dict', 'step', 'threshold', 'threshold_mode', 'verbose']
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'add_param_group', 'defaults', 'load_state_dict', 'param_groups', 'state', 'state_dict', 'step', 'zero_grad']

4. Train¶

In [9]:

for i in range(num_epoch):
    # ReduceLRONPlateau ������ �Ʒ��� �ڵ带 ����ϼ���
    #scheduler.step()  
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output,y_)
        loss.backward()
        optimizer.step()
    
    # ReduceLRONPlateau �� �ش�˴ϴ�. �� �ڵ忡���� �ս��� �پ���� ������ �н����� ���ߵ��� ���������ϴ�.
    scheduler.step(loss)      
    
    if i % 10 == 0:
        print(loss)   
            
    #print("Epoch: {}, Learning Rate: {}".format(i,scheduler.get_lr()))  
    print("Epoch: {}, Learning Rate: {}".format(i,scheduler.optimizer.state_dict()['param_groups'][0]['lr']))

tensor(2.3034, device='cuda:0', grad_fn=<NllLossBackward>)
Epoch: 0, Learning Rate: 0.001
Epoch: 1, Learning Rate: 0.0001
Epoch: 2, Learning Rate: 0.0001
Epoch: 3, Learning Rate: 1e-05
Epoch: 4, Learning Rate: 1e-05
Epoch: 5, Learning Rate: 1.0000000000000002e-06
Epoch: 6, Learning Rate: 1.0000000000000002e-06
Epoch: 7, Learning Rate: 1.0000000000000002e-07
Epoch: 8, Learning Rate: 1.0000000000000002e-07
Epoch: 9, Learning Rate: 1.0000000000000004e-08

5. Test¶

In [10]:

correct = 0
total = 0

with torch.no_grad():
  for image,label in test_loader:
      x = image.to(device)
      y_= label.to(device)

      output = model.forward(x)
      _,output_index = torch.max(output,1)

      total += label.size(0)
      correct += (output_index == y_).sum().float()

  print("Accuracy of Test Data: {}".format(100*correct/total))

Accuracy of Test Data: 9.805688858032227

In [0]: