!pip install torch torchvision
Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.1.0) Requirement already satisfied: torchvision in /usr/local/lib/python3.6/dist-packages (0.3.0) Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.16.4) Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from torchvision) (1.12.0) Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision) (4.3.0) Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->torchvision) (0.46)
# ´Ü¼øÇÑ ¹®ÀÚ RNNÀ» ¸¸µé¾îº¸°Ú½À´Ï´Ù.
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
# ÇÏÀÌÆÛÆÄ¶ó¹ÌÅÍ ¼³Á¤
n_hidden = 35
lr = 0.01
epochs = 1000
# »ç¿ëÇÏ´Â ¹®ÀÚ´Â ¿µ¾î ¼Ò¹®ÀÚ ¹× ¸î°¡Áö Ư¼ö¹®ÀÚ·Î Á¦ÇÑÇß½À´Ï´Ù.
# alphabet(0-25), space(26), ... , start(0), end(1)
string = "hello pytorch. how long can a rnn cell remember? show me your limit!"
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
# ¹®ÀÚµéÀ» ¸®½ºÆ®·Î ¹Ù²Ù°í ÀÌÀÇ ±æÀÌ(=¹®ÀÚÀÇ °³¼ö)¸¦ ÀúÀåÇØ³õ½À´Ï´Ù.
char_list = [i for i in chars]
n_letters = len(char_list)
# ¹®ÀÚ¸¦ ±×´ë·Î ¾²Áö¾Ê°í one-hot º¤ÅÍ·Î ¹Ù²ã¼ ¿¬»ê¿¡ ¾²µµ·Ï ÇϰڽÀ´Ï´Ù.
#Start = [0 0 0 ¡¦ 1 0]
#a = [1 0 0 ¡¦ 0 0]
#b = [0 1 0 ¡¦ 0 0]
#c = [0 0 1 ¡¦ 0 0]
#...
#end = [0 0 0 ¡¦ 0 1]
# ¹®ÀÚ¿À» one-hot º¤ÅÍÀÇ ½ºÅÃÀ¸·Î ¸¸µå´Â ÇÔ¼ö
# abc -> [[1 0 0 ¡¦ 0 0],
# [0 1 0 ¡¦ 0 0],
# [0 0 1 ¡¦ 0 0]]
def string_to_onehot(string):
# ¸ÕÀú ½ÃÀÛ ÅäÅ«°ú ³¡ ÅäÅ«À» ¸¸µé¾îÁÝ´Ï´Ù.
start = np.zeros(shape=n_letters ,dtype=int)
end = np.zeros(shape=n_letters ,dtype=int)
start[-2] = 1
end[-1] = 1
# ¿©±â¼ºÎÅÍ´Â ¹®ÀÚ¿ÀÇ ¹®ÀÚµéÀ» Â÷·Ê´ë·Î ¹Þ¾Æ¼ ÁøÇàÇÕ´Ï´Ù.
for i in string:
# ¸ÕÀú ¹®ÀÚ°¡ ¸î¹øÂ° ¹®ÀÚÀÎÁö ã½À´Ï´Ù.
# a:0, b:1, c:2,...
idx = char_list.index(i)
# 0À¸·Î¸¸ ±¸¼ºµÈ ¹è¿À» ¸¸µé¾îÁÝ´Ï´Ù.
# [0 0 0 ¡¦ 0 0]
zero = np.zeros(shape=n_letters ,dtype=int)
# ÇØ´ç ¹®ÀÚ Àε¥½º¸¸ 1·Î ¹Ù²ãÁÝ´Ï´Ù.
# b: [0 1 0 ¡¦ 0 0]
zero[idx]=1
# start¿Í »õ·Î »ý±ä zero¸¦ ºÙÀ̰í À̸¦ start¿¡ ÇÒ´çÇÕ´Ï´Ù.
# ÀÌ°Ô ¹Ýº¹µÇ¸é start¿¡´Â ¹®ÀÚ¸¦ one-hot º¤ÅÍ·Î ¹Ù²Û ¹è¿µéÀÌ Á¡Á¡ ½×¿©°¡°Ô µË´Ï´Ù.
start = np.vstack([start,zero])
# ¹®ÀÚ¿ÀÌ ´Ù ³¡³ª¸é ½×¾Æ¿Â start¿Í end¸¦ ºÙ¿©ÁÝ´Ï´Ù.
output = np.vstack([start,end])
return output
# One-hot º¤Å͸¦ ¹®ÀÚ·Î ¹Ù²ãÁÖ´Â ÇÔ¼ö
# [1 0 0 ... 0 0] -> a
# https://pytorch.org/docs/stable/tensors.html?highlight=numpy#torch.Tensor.numpy
def onehot_to_word(onehot_1):
# ÅÙ¼¸¦ ÀÔ·ÂÀ¸·Î ¹Þ¾Æ ³ÑÆÄÀÌ ¹è¿·Î ¹Ù²ãÁÝ´Ï´Ù.
onehot = torch.Tensor.numpy(onehot_1)
# one-hot º¤ÅÍÀÇ ÃÖ´ë°ª(=1) À§Ä¡ À妽º·Î ¹®ÀÚ¸¦ ã½À´Ï´Ù.
return char_list[onehot.argmax()]
# RNN with 1 hidden layer
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.act_fn = nn.Tanh()
def forward(self, input, hidden):
# ÀԷ°ú hidden state¸¦ catÇÔ¼ö·Î ºÙ¿©ÁÝ´Ï´Ù.
combined = torch.cat((input, hidden), 1)
# ºÙÀÎ °ªÀ» i2h ¹× i2o¿¡ Åë°ú½ÃÄÑ hidden state´Â ¾÷µ¥ÀÌÆ®, °á°ú°ªÀº °è»êÇØÁÝ´Ï´Ù.
hidden = self.act_fn(self.i2h(combined))
output = self.i2o(combined)
return output, hidden
# ¾ÆÁ÷ ÀÔ·ÂÀÌ ¾øÀ»¶§(t=0)ÀÇ hidden state¸¦ ÃʱâÈÇØÁÝ´Ï´Ù.
def init_hidden(self):
return torch.zeros(1, self.hidden_size)
rnn = RNN(n_letters, n_hidden, n_letters)
# ¼Õ½ÇÇÔ¼ö¿Í ÃÖÀûÈÇÔ¼ö¸¦ ¼³Á¤ÇØÁÝ´Ï´Ù.
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)
# train
# ¹®ÀÚ¿À» onehot º¤ÅÍ·Î ¸¸µé°í À̸¦ ÅäÄ¡ ÅÙ¼·Î ¹Ù²ãÁÝ´Ï´Ù.
# ¶ÇÇÑ µ¥ÀÌÅÍŸÀÔµµ ÇнÀ¿¡ ¸Â°Ô ¹Ù²ãÁÝ´Ï´Ù.
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())
for i in range(epochs):
optimizer.zero_grad()
# ÇнÀ¿¡ ¾Õ¼ hidden state¸¦ ÃʱâÈÇØÁÝ´Ï´Ù.
hidden = rnn.init_hidden()
# ¹®ÀÚ¿ Àüü¿¡ ´ëÇÑ ¼Õ½ÇÀ» ±¸Çϱâ À§ÇØ total_loss¶ó´Â º¯¼ö¸¦ ¸¸µé¾îÁÝ´Ï´Ù.
total_loss = 0
for j in range(one_hot.size()[0]-1):
# ÀÔ·ÂÀº ¾Õ¿¡ ±ÛÀÚ
# pyotrch ¿¡¼ p y t o r c
input_ = one_hot[j:j+1,:]
# ¸ñÇ¥°ªÀº µÚ¿¡ ±ÛÀÚ
# pytorch ¿¡¼ y t o r c h
target = one_hot[j+1]
output, hidden = rnn.forward(input_, hidden)
loss = loss_func(output.view(-1),target.view(-1))
total_loss += loss
total_loss.backward()
optimizer.step()
if i % 10 == 0:
print(total_loss)
tensor(2.8275, grad_fn=<AddBackward0>) tensor(1.2019, grad_fn=<AddBackward0>) tensor(0.7401, grad_fn=<AddBackward0>) tensor(0.4701, grad_fn=<AddBackward0>) tensor(0.3155, grad_fn=<AddBackward0>) tensor(0.2274, grad_fn=<AddBackward0>) tensor(0.1543, grad_fn=<AddBackward0>) tensor(0.1213, grad_fn=<AddBackward0>) tensor(0.0939, grad_fn=<AddBackward0>) tensor(0.0763, grad_fn=<AddBackward0>) tensor(0.0635, grad_fn=<AddBackward0>) tensor(0.0544, grad_fn=<AddBackward0>) tensor(0.0472, grad_fn=<AddBackward0>) tensor(0.0414, grad_fn=<AddBackward0>) tensor(0.0380, grad_fn=<AddBackward0>) tensor(0.0327, grad_fn=<AddBackward0>) tensor(0.0354, grad_fn=<AddBackward0>) tensor(0.0278, grad_fn=<AddBackward0>) tensor(0.0277, grad_fn=<AddBackward0>) tensor(0.0244, grad_fn=<AddBackward0>) tensor(0.0217, grad_fn=<AddBackward0>) tensor(0.0202, grad_fn=<AddBackward0>) tensor(0.0205, grad_fn=<AddBackward0>) tensor(0.0200, grad_fn=<AddBackward0>) tensor(0.0174, grad_fn=<AddBackward0>) tensor(0.0162, grad_fn=<AddBackward0>) tensor(0.0204, grad_fn=<AddBackward0>) tensor(0.0162, grad_fn=<AddBackward0>) tensor(0.0143, grad_fn=<AddBackward0>) tensor(0.0135, grad_fn=<AddBackward0>) tensor(0.0174, grad_fn=<AddBackward0>) tensor(0.0142, grad_fn=<AddBackward0>) tensor(0.0126, grad_fn=<AddBackward0>) tensor(0.0116, grad_fn=<AddBackward0>) tensor(0.0110, grad_fn=<AddBackward0>) tensor(0.0155, grad_fn=<AddBackward0>) tensor(0.0136, grad_fn=<AddBackward0>) tensor(0.0110, grad_fn=<AddBackward0>) tensor(0.0099, grad_fn=<AddBackward0>) tensor(0.0092, grad_fn=<AddBackward0>) tensor(0.0110, grad_fn=<AddBackward0>) tensor(0.0111, grad_fn=<AddBackward0>) tensor(0.0091, grad_fn=<AddBackward0>) tensor(0.0083, grad_fn=<AddBackward0>) tensor(0.0077, grad_fn=<AddBackward0>) tensor(0.0074, grad_fn=<AddBackward0>) tensor(0.0104, grad_fn=<AddBackward0>) tensor(0.0153, grad_fn=<AddBackward0>) tensor(0.0089, grad_fn=<AddBackward0>) tensor(0.0072, grad_fn=<AddBackward0>) tensor(0.0067, grad_fn=<AddBackward0>) tensor(0.0063, grad_fn=<AddBackward0>) tensor(0.0060, grad_fn=<AddBackward0>) tensor(0.0058, grad_fn=<AddBackward0>) tensor(0.0071, grad_fn=<AddBackward0>) tensor(0.0083, grad_fn=<AddBackward0>) tensor(0.0067, grad_fn=<AddBackward0>) tensor(0.0054, grad_fn=<AddBackward0>) tensor(0.0052, grad_fn=<AddBackward0>) tensor(0.0050, grad_fn=<AddBackward0>) tensor(0.0150, grad_fn=<AddBackward0>) tensor(0.0064, grad_fn=<AddBackward0>) tensor(0.0051, grad_fn=<AddBackward0>) tensor(0.0046, grad_fn=<AddBackward0>) tensor(0.0044, grad_fn=<AddBackward0>) tensor(0.0136, grad_fn=<AddBackward0>) tensor(0.0054, grad_fn=<AddBackward0>) tensor(0.0046, grad_fn=<AddBackward0>) tensor(0.0041, grad_fn=<AddBackward0>) tensor(0.0038, grad_fn=<AddBackward0>) tensor(0.0037, grad_fn=<AddBackward0>) tensor(0.0228, grad_fn=<AddBackward0>) tensor(0.0074, grad_fn=<AddBackward0>) tensor(0.0049, grad_fn=<AddBackward0>) tensor(0.0039, grad_fn=<AddBackward0>) tensor(0.0035, grad_fn=<AddBackward0>) tensor(0.0033, grad_fn=<AddBackward0>) tensor(0.0032, grad_fn=<AddBackward0>) tensor(0.0204, grad_fn=<AddBackward0>) tensor(0.0057, grad_fn=<AddBackward0>) tensor(0.0037, grad_fn=<AddBackward0>) tensor(0.0033, grad_fn=<AddBackward0>) tensor(0.0031, grad_fn=<AddBackward0>) tensor(0.0029, grad_fn=<AddBackward0>) tensor(0.0028, grad_fn=<AddBackward0>) tensor(0.0028, grad_fn=<AddBackward0>) tensor(0.0072, grad_fn=<AddBackward0>) tensor(0.0047, grad_fn=<AddBackward0>) tensor(0.0036, grad_fn=<AddBackward0>) tensor(0.0029, grad_fn=<AddBackward0>) tensor(0.0026, grad_fn=<AddBackward0>) tensor(0.0025, grad_fn=<AddBackward0>) tensor(0.0024, grad_fn=<AddBackward0>) tensor(0.0023, grad_fn=<AddBackward0>) tensor(0.0186, grad_fn=<AddBackward0>) tensor(0.0050, grad_fn=<AddBackward0>) tensor(0.0030, grad_fn=<AddBackward0>) tensor(0.0026, grad_fn=<AddBackward0>) tensor(0.0023, grad_fn=<AddBackward0>) tensor(0.0022, grad_fn=<AddBackward0>)
# test
# hidden state ´Â óÀ½ Çѹø¸¸ ÃʱâÈÇØÁÝ´Ï´Ù.
start = torch.zeros(1,n_letters)
start[:,-2] = 1
with torch.no_grad():
hidden = rnn.init_hidden()
# óÀ½ ÀÔ·ÂÀ¸·Î start tokenÀ» Àü´ÞÇØÁÝ´Ï´Ù.
input_ = start
# output string¿¡ ¹®ÀÚµéÀ» °è¼Ó ºÙ¿©ÁÝ´Ï´Ù.
output_string = ""
# ¿ø·¡´Â end tokenÀÌ ³ª¿Ã¶§ ±îÁö ¹Ýº¹ÇÏ´Â°Ô ¸ÂÀ¸³ª ³¡³ªÁö ¾Ê¾Æ¼ stringÀÇ ±æÀÌ·Î Á¤Çß½À´Ï´Ù.
for i in range(len(string)):
output, hidden = rnn.forward(input_, hidden)
# °á°ú°ªÀ» ¹®ÀÚ·Î ¹Ù²ã¼ output_string¿¡ ºÙ¿©ÁÝ´Ï´Ù.
output_string += onehot_to_word(output.data)
# ¶ÇÇÑ À̹øÀÇ °á°ú°ªÀÌ ´ÙÀ½ÀÇ ÀԷ°ªÀÌ µË´Ï´Ù.
input_ = output
print(output_string)
hello pytorch ml longeomm rome om omb ome om eomg omelo. eongnomelp