학습률 스케쥴러 Learning Rate Scheduler

In [1]:
!pip install torch torchvision
Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.1.0)
Requirement already satisfied: torchvision in /usr/local/lib/python3.6/dist-packages (0.3.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.16.4)
Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision) (4.3.0)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from torchvision) (1.12.0)
Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->torchvision) (0.46)

1. Settings

1) Import required libraries

In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

2) Set hyperparameters

In [0]:
batch_size = 256
learning_rate = 0.001
num_epoch = 10

2. Data

1) Download Data

In [0]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

2) Check Dataset

In [5]:
print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()
torch.Size([1, 28, 28]) 60000
Out[5]:
(torch.Size([1, 28, 28]), 10000)

3) Set DataLoader

In [0]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

3. Model & Optimizer

1) CNN Model

In [0]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,3,padding=1),  # 28 x 28
            nn.ReLU(),
            nn.Conv2d(16,32,3,padding=1), # 28 x 28
            nn.ReLU(),
            nn.MaxPool2d(2,2),            # 14 x 14
            nn.Conv2d(32,64,3,padding=1), # 14 x 14
            nn.ReLU(),
            nn.MaxPool2d(2,2)             #  7 x 7
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )        
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)
        return out

2) Loss func & Optimizer

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# ÁöÁ¤ÇÑ ½ºÅÜ ´ÜÀ§·Î ÇнÀ·ü¿¡ °¨¸¶¸¦ °öÇØ ÇнÀ·üÀ» °¨¼Ò½Ãŵ´Ï´Ù.
#scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma= 0.99)       

# ÁöÁ¤ÇÑ ½ºÅÜ ÁöÁ¡(¿¹½Ã¿¡¼­´Â 10,30,80)¸¶´Ù ÇнÀ·ü¿¡ °¨¸¶¸¦ °öÇØÁÝ´Ï´Ù.
#scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,80], gamma= 0.1)  

# ¸Å epoch¸¶´Ù ÇнÀ·ü¿¡ °¨¸¶¸¦ °öÇØÁÝ´Ï´Ù.
#scheduler = lr_scheduler.ExponentialLR(optimizer, gamma= 0.99)                             

# https://pytorch.org/docs/stable/optim.html?highlight=lr_scheduler#torch.optim.lr_scheduler.ReduceLROnPlateau
# ÁöÁ¤ÇÑ ¸ÞÆ®¸¯À¸·Î ÃøÁ¤ÇÑ °ªÀÌ ´õ ³ª¾ÆÁöÁö ¾ÊÀ¸¸é ÇнÀ·üÀ» °¨¼Ò½Ãŵ´Ï´Ù. ex) Á¤È®µµ, dice score µîµî
# ÀÌ ½ºÄÉÁì·¯¿¡´Â ´Ù¾çÇÑ ÀÎÀÚ°¡ µé¾î°¡´Âµ¥ °¢°¢ÀÇ ¿ªÇÒÀº µµÅ¥¸ÕÆ®¸¦ Âü°í ¹Ù¶ø´Ï´Ù.
# ¿©±â¼­´Â patience Áï, ÁöÁ¤ÇÑ °ªÀÌ ÁÙ¾îµéÁö ¾ÊÀ»¶§ ¸î epoch ¸¸Å­À» ÁöÄѺ¼ °ÍÀÎÁö¸¦ 1·Î ³·Ãç³ù±â ¶§¹®¿¡ ¸Å epoch ¸¶´Ù ÇнÀ·üÀÌ °¨¼ÒÇϴ°ÍÀ» È®ÀÎÇÒ ¼ö ÀÖ½À´Ï´Ù.
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,threshold=1,patience=1,mode='min')    

# Âü°í https://www.geeksforgeeks.org/python-dir-function/
print(dir(scheduler))
print(dir(optimizer))
cuda:0
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_cmp', '_init_is_better', '_reduce_lr', '_reset', 'best', 'cooldown', 'cooldown_counter', 'eps', 'factor', 'in_cooldown', 'is_better', 'last_epoch', 'load_state_dict', 'min_lrs', 'mode', 'mode_worse', 'num_bad_epochs', 'optimizer', 'patience', 'state_dict', 'step', 'threshold', 'threshold_mode', 'verbose']
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'add_param_group', 'defaults', 'load_state_dict', 'param_groups', 'state', 'state_dict', 'step', 'zero_grad']

4. Train

In [9]:
for i in range(num_epoch):
    # ReduceLRONPlateau »©°í´Â ¾Æ·¡ÀÇ Äڵ带 »ç¿ëÇϼ¼¿ä
    #scheduler.step()  
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output,y_)
        loss.backward()
        optimizer.step()
    
    # ReduceLRONPlateau ¸¸ ÇØ´çµË´Ï´Ù. ÀÌ Äڵ忡¼­´Â ¼Õ½ÇÀÌ ÁÙ¾îµéÁö ¾ÊÀ¸¸é ÇнÀ·üÀ» ³·Ãßµµ·Ï ¸¸µé¾î³ù½À´Ï´Ù.
    scheduler.step(loss)      
    
    if i % 10 == 0:
        print(loss)   
            
    #print("Epoch: {}, Learning Rate: {}".format(i,scheduler.get_lr()))  
    print("Epoch: {}, Learning Rate: {}".format(i,scheduler.optimizer.state_dict()['param_groups'][0]['lr']))
tensor(2.3034, device='cuda:0', grad_fn=<NllLossBackward>)
Epoch: 0, Learning Rate: 0.001
Epoch: 1, Learning Rate: 0.0001
Epoch: 2, Learning Rate: 0.0001
Epoch: 3, Learning Rate: 1e-05
Epoch: 4, Learning Rate: 1e-05
Epoch: 5, Learning Rate: 1.0000000000000002e-06
Epoch: 6, Learning Rate: 1.0000000000000002e-06
Epoch: 7, Learning Rate: 1.0000000000000002e-07
Epoch: 8, Learning Rate: 1.0000000000000002e-07
Epoch: 9, Learning Rate: 1.0000000000000004e-08

5. Test

In [10]:
correct = 0
total = 0

with torch.no_grad():
  for image,label in test_loader:
      x = image.to(device)
      y_= label.to(device)

      output = model.forward(x)
      _,output_index = torch.max(output,1)

      total += label.size(0)
      correct += (output_index == y_).sum().float()

  print("Accuracy of Test Data: {}".format(100*correct/total))
Accuracy of Test Data: 9.805688858032227
In [0]: