In [0]:
!pip install torch torchvision
Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.1.0)
Requirement already satisfied: torchvision in /usr/local/lib/python3.6/dist-packages (0.3.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.16.4)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from torchvision) (1.12.0)
Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision) (4.3.0)
Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->torchvision) (0.46)
In [0]:
# ´Ü¼øÇÑ ¹®ÀÚ RNNÀ» ¸¸µé¾îº¸°Ú½À´Ï´Ù.

import torch 
import torch.nn as nn
import torch.optim as optim
import numpy as np
In [0]:
# ÇÏÀÌÆÛÆĶó¹ÌÅÍ ¼³Á¤

n_hidden = 35 
lr = 0.01
epochs = 1000
In [0]:
# »ç¿ëÇÏ´Â ¹®ÀÚ´Â ¿µ¾î ¼Ò¹®ÀÚ ¹× ¸î°¡Áö Ư¼ö¹®ÀÚ·Î Á¦ÇÑÇß½À´Ï´Ù.
# alphabet(0-25), space(26), ... , start(0), end(1)

string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars =  "abcdefghijklmnopqrstuvwxyz ?!.,:;01"

# ¹®ÀÚµéÀ» ¸®½ºÆ®·Î ¹Ù²Ù°í ÀÌÀÇ ±æÀÌ(=¹®ÀÚÀÇ °³¼ö)¸¦ ÀúÀåÇسõ½À´Ï´Ù.
char_list = [i for i in chars]
n_letters = len(char_list)
In [0]:
# ¹®ÀÚ¸¦ ±×´ë·Î ¾²Áö¾Ê°í one-hot º¤ÅÍ·Î ¹Ù²ã¼­ ¿¬»ê¿¡ ¾²µµ·Ï ÇÏ°Ú½À´Ï´Ù.

#Start = [0 0 0 ¡¦ 1 0]
#a =     [1 0 0 ¡¦ 0 0]
#b =     [0 1 0 ¡¦ 0 0]
#c =     [0 0 1 ¡¦ 0 0]
#...
#end =   [0 0 0 ¡¦ 0 1]
In [0]:
# ¹®ÀÚ¿­À» one-hot º¤ÅÍÀÇ ½ºÅÃÀ¸·Î ¸¸µå´Â ÇÔ¼ö
# abc -> [[1 0 0 ¡¦ 0 0],
#         [0 1 0 ¡¦ 0 0],
#         [0 0 1 ¡¦ 0 0]]

def string_to_onehot(string):
    # ¸ÕÀú ½ÃÀÛ ÅäÅ«°ú ³¡ ÅäÅ«À» ¸¸µé¾îÁÝ´Ï´Ù.
    start = np.zeros(shape=n_letters ,dtype=int)
    end = np.zeros(shape=n_letters ,dtype=int)
    start[-2] = 1
    end[-1] = 1
    # ¿©±â¼­ºÎÅÍ´Â ¹®ÀÚ¿­ÀÇ ¹®ÀÚµéÀ» Â÷·Ê´ë·Î ¹Þ¾Æ¼­ ÁøÇàÇÕ´Ï´Ù.
    for i in string:
        # ¸ÕÀú ¹®ÀÚ°¡ ¸î¹ø° ¹®ÀÚÀÎÁö ã½À´Ï´Ù.
        # a:0, b:1, c:2,...
        idx = char_list.index(i)
        # 0À¸·Î¸¸ ±¸¼ºµÈ ¹è¿­À» ¸¸µé¾îÁÝ´Ï´Ù.
        # [0 0 0 ¡¦ 0 0]
        zero = np.zeros(shape=n_letters ,dtype=int)
        # ÇØ´ç ¹®ÀÚ Àε¥½º¸¸ 1·Î ¹Ù²ãÁÝ´Ï´Ù.
        # b: [0 1 0 ¡¦ 0 0]
        zero[idx]=1
        # start¿Í »õ·Î »ý±ä zero¸¦ ºÙÀÌ°í À̸¦ start¿¡ ÇÒ´çÇÕ´Ï´Ù.
        # ÀÌ°Ô ¹Ýº¹µÇ¸é start¿¡´Â ¹®ÀÚ¸¦ one-hot º¤ÅÍ·Î ¹Ù²Û ¹è¿­µéÀÌ Á¡Á¡ ½×¿©°¡°Ô µË´Ï´Ù.
        start = np.vstack([start,zero])
    # ¹®ÀÚ¿­ÀÌ ´Ù ³¡³ª¸é ½×¾Æ¿Â start¿Í end¸¦ ºÙ¿©ÁÝ´Ï´Ù.
    output = np.vstack([start,end])
    return output
In [0]:
# One-hot º¤Å͸¦ ¹®ÀÚ·Î ¹Ù²ãÁÖ´Â ÇÔ¼ö 
# [1 0 0 ... 0 0] -> a 
# https://pytorch.org/docs/stable/tensors.html?highlight=numpy#torch.Tensor.numpy

def onehot_to_word(onehot_1):
    # ÅÙ¼­¸¦ ÀÔ·ÂÀ¸·Î ¹Þ¾Æ ³ÑÆÄÀÌ ¹è¿­·Î ¹Ù²ãÁÝ´Ï´Ù.
    onehot = torch.Tensor.numpy(onehot_1)
    # one-hot º¤ÅÍÀÇ ÃÖ´ë°ª(=1) À§Ä¡ À妽º·Î ¹®ÀÚ¸¦ ã½À´Ï´Ù.
    return char_list[onehot.argmax()]
In [0]:
# RNN with 1 hidden layer

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.act_fn = nn.Tanh()
    
    def forward(self, input, hidden):
        # ÀԷ°ú hidden state¸¦ catÇÔ¼ö·Î ºÙ¿©ÁÝ´Ï´Ù.
        combined = torch.cat((input, hidden), 1)
        # ºÙÀÎ °ªÀ» i2h ¹× i2o¿¡ Åë°ú½ÃÄÑ hidden state´Â ¾÷µ¥ÀÌÆ®, °á°ú°ªÀº °è»êÇØÁÝ´Ï´Ù.
        hidden = self.act_fn(self.i2h(combined))
        output = self.i2o(combined)
        return output, hidden
    
    # ¾ÆÁ÷ ÀÔ·ÂÀÌ ¾øÀ»¶§(t=0)ÀÇ hidden state¸¦ ÃʱâÈ­ÇØÁÝ´Ï´Ù. 
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)
    
rnn = RNN(n_letters, n_hidden, n_letters)
In [0]:
# ¼Õ½ÇÇÔ¼ö¿Í ÃÖÀûÈ­ÇÔ¼ö¸¦ ¼³Á¤ÇØÁÝ´Ï´Ù.

loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)
In [0]:
# train

# ¹®ÀÚ¿­À» onehot º¤ÅÍ·Î ¸¸µé°í À̸¦ ÅäÄ¡ ÅÙ¼­·Î ¹Ù²ãÁÝ´Ï´Ù.
# ¶ÇÇÑ µ¥ÀÌÅÍŸÀÔµµ ÇнÀ¿¡ ¸Â°Ô ¹Ù²ãÁÝ´Ï´Ù.
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

for i in range(epochs):
    optimizer.zero_grad()
    # ÇнÀ¿¡ ¾Õ¼­ hidden state¸¦ ÃʱâÈ­ÇØÁÝ´Ï´Ù.
    hidden = rnn.init_hidden()
    
    # ¹®ÀÚ¿­ Àüü¿¡ ´ëÇÑ ¼Õ½ÇÀ» ±¸Çϱâ À§ÇØ total_loss¶ó´Â º¯¼ö¸¦ ¸¸µé¾îÁÝ´Ï´Ù. 
    total_loss = 0
    for j in range(one_hot.size()[0]-1):
        # ÀÔ·ÂÀº ¾Õ¿¡ ±ÛÀÚ 
        # pyotrch ¿¡¼­ p y t o r c
        input_ = one_hot[j:j+1,:]
        # ¸ñÇ¥°ªÀº µÚ¿¡ ±ÛÀÚ
        # pytorch ¿¡¼­ y t o r c h
        target = one_hot[j+1]
        output, hidden = rnn.forward(input_, hidden)
        
        loss = loss_func(output.view(-1),target.view(-1))
        total_loss += loss

    total_loss.backward()
    optimizer.step()

    if i % 10 == 0:
        print(total_loss)
tensor(2.8275, grad_fn=<AddBackward0>)
tensor(1.2019, grad_fn=<AddBackward0>)
tensor(0.7401, grad_fn=<AddBackward0>)
tensor(0.4701, grad_fn=<AddBackward0>)
tensor(0.3155, grad_fn=<AddBackward0>)
tensor(0.2274, grad_fn=<AddBackward0>)
tensor(0.1543, grad_fn=<AddBackward0>)
tensor(0.1213, grad_fn=<AddBackward0>)
tensor(0.0939, grad_fn=<AddBackward0>)
tensor(0.0763, grad_fn=<AddBackward0>)
tensor(0.0635, grad_fn=<AddBackward0>)
tensor(0.0544, grad_fn=<AddBackward0>)
tensor(0.0472, grad_fn=<AddBackward0>)
tensor(0.0414, grad_fn=<AddBackward0>)
tensor(0.0380, grad_fn=<AddBackward0>)
tensor(0.0327, grad_fn=<AddBackward0>)
tensor(0.0354, grad_fn=<AddBackward0>)
tensor(0.0278, grad_fn=<AddBackward0>)
tensor(0.0277, grad_fn=<AddBackward0>)
tensor(0.0244, grad_fn=<AddBackward0>)
tensor(0.0217, grad_fn=<AddBackward0>)
tensor(0.0202, grad_fn=<AddBackward0>)
tensor(0.0205, grad_fn=<AddBackward0>)
tensor(0.0200, grad_fn=<AddBackward0>)
tensor(0.0174, grad_fn=<AddBackward0>)
tensor(0.0162, grad_fn=<AddBackward0>)
tensor(0.0204, grad_fn=<AddBackward0>)
tensor(0.0162, grad_fn=<AddBackward0>)
tensor(0.0143, grad_fn=<AddBackward0>)
tensor(0.0135, grad_fn=<AddBackward0>)
tensor(0.0174, grad_fn=<AddBackward0>)
tensor(0.0142, grad_fn=<AddBackward0>)
tensor(0.0126, grad_fn=<AddBackward0>)
tensor(0.0116, grad_fn=<AddBackward0>)
tensor(0.0110, grad_fn=<AddBackward0>)
tensor(0.0155, grad_fn=<AddBackward0>)
tensor(0.0136, grad_fn=<AddBackward0>)
tensor(0.0110, grad_fn=<AddBackward0>)
tensor(0.0099, grad_fn=<AddBackward0>)
tensor(0.0092, grad_fn=<AddBackward0>)
tensor(0.0110, grad_fn=<AddBackward0>)
tensor(0.0111, grad_fn=<AddBackward0>)
tensor(0.0091, grad_fn=<AddBackward0>)
tensor(0.0083, grad_fn=<AddBackward0>)
tensor(0.0077, grad_fn=<AddBackward0>)
tensor(0.0074, grad_fn=<AddBackward0>)
tensor(0.0104, grad_fn=<AddBackward0>)
tensor(0.0153, grad_fn=<AddBackward0>)
tensor(0.0089, grad_fn=<AddBackward0>)
tensor(0.0072, grad_fn=<AddBackward0>)
tensor(0.0067, grad_fn=<AddBackward0>)
tensor(0.0063, grad_fn=<AddBackward0>)
tensor(0.0060, grad_fn=<AddBackward0>)
tensor(0.0058, grad_fn=<AddBackward0>)
tensor(0.0071, grad_fn=<AddBackward0>)
tensor(0.0083, grad_fn=<AddBackward0>)
tensor(0.0067, grad_fn=<AddBackward0>)
tensor(0.0054, grad_fn=<AddBackward0>)
tensor(0.0052, grad_fn=<AddBackward0>)
tensor(0.0050, grad_fn=<AddBackward0>)
tensor(0.0150, grad_fn=<AddBackward0>)
tensor(0.0064, grad_fn=<AddBackward0>)
tensor(0.0051, grad_fn=<AddBackward0>)
tensor(0.0046, grad_fn=<AddBackward0>)
tensor(0.0044, grad_fn=<AddBackward0>)
tensor(0.0136, grad_fn=<AddBackward0>)
tensor(0.0054, grad_fn=<AddBackward0>)
tensor(0.0046, grad_fn=<AddBackward0>)
tensor(0.0041, grad_fn=<AddBackward0>)
tensor(0.0038, grad_fn=<AddBackward0>)
tensor(0.0037, grad_fn=<AddBackward0>)
tensor(0.0228, grad_fn=<AddBackward0>)
tensor(0.0074, grad_fn=<AddBackward0>)
tensor(0.0049, grad_fn=<AddBackward0>)
tensor(0.0039, grad_fn=<AddBackward0>)
tensor(0.0035, grad_fn=<AddBackward0>)
tensor(0.0033, grad_fn=<AddBackward0>)
tensor(0.0032, grad_fn=<AddBackward0>)
tensor(0.0204, grad_fn=<AddBackward0>)
tensor(0.0057, grad_fn=<AddBackward0>)
tensor(0.0037, grad_fn=<AddBackward0>)
tensor(0.0033, grad_fn=<AddBackward0>)
tensor(0.0031, grad_fn=<AddBackward0>)
tensor(0.0029, grad_fn=<AddBackward0>)
tensor(0.0028, grad_fn=<AddBackward0>)
tensor(0.0028, grad_fn=<AddBackward0>)
tensor(0.0072, grad_fn=<AddBackward0>)
tensor(0.0047, grad_fn=<AddBackward0>)
tensor(0.0036, grad_fn=<AddBackward0>)
tensor(0.0029, grad_fn=<AddBackward0>)
tensor(0.0026, grad_fn=<AddBackward0>)
tensor(0.0025, grad_fn=<AddBackward0>)
tensor(0.0024, grad_fn=<AddBackward0>)
tensor(0.0023, grad_fn=<AddBackward0>)
tensor(0.0186, grad_fn=<AddBackward0>)
tensor(0.0050, grad_fn=<AddBackward0>)
tensor(0.0030, grad_fn=<AddBackward0>)
tensor(0.0026, grad_fn=<AddBackward0>)
tensor(0.0023, grad_fn=<AddBackward0>)
tensor(0.0022, grad_fn=<AddBackward0>)
In [0]:
# test 
# hidden state ´Â óÀ½ Çѹø¸¸ ÃʱâÈ­ÇØÁÝ´Ï´Ù.

start = torch.zeros(1,n_letters)
start[:,-2] = 1

with torch.no_grad():
    hidden = rnn.init_hidden()
    # óÀ½ ÀÔ·ÂÀ¸·Î start tokenÀ» Àü´ÞÇØÁÝ´Ï´Ù.
    input_ = start
    # output string¿¡ ¹®ÀÚµéÀ» °è¼Ó ºÙ¿©ÁÝ´Ï´Ù.
    output_string = ""

    # ¿ø·¡´Â end tokenÀÌ ³ª¿Ã¶§ ±îÁö ¹Ýº¹ÇÏ´Â°Ô ¸ÂÀ¸³ª ³¡³ªÁö ¾Ê¾Æ¼­ stringÀÇ ±æÀÌ·Î Á¤Çß½À´Ï´Ù.
    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        # °á°ú°ªÀ» ¹®ÀÚ·Î ¹Ù²ã¼­ output_string¿¡ ºÙ¿©ÁÝ´Ï´Ù.
        output_string += onehot_to_word(output.data)
        # ¶ÇÇÑ À̹øÀÇ °á°ú°ªÀÌ ´ÙÀ½ÀÇ ÀԷ°ªÀÌ µË´Ï´Ù.
        input_ = output

print(output_string)
hello pytorch ml longeomm rome om  omb ome om eomg omelo. eongnomelp

이 파일을 다 보셨으면 [참고]LSTM 연습코드를 먼저 보시고 6.4절로 넘어가시는걸 추천드립니다.