# ¿¬»ê¿¡ ÇÊ¿äÇÑ numpy, ½Ã°£À» ÃøÁ¤Çϱâ À§ÇØ datetimeÀ» ºÒ·¯¿É´Ï´Ù.
import numpy as np
from datetime import datetime
start = datetime.now()
# ·£´ýÇÏ°Ô 3x4 ÇüÅÂÀÇ º¯¼ö x,y,z¸¦ ¼³Á¤ÇØÁÝ´Ï´Ù.
np.random.seed(0)
N,D = 3,4
x = np.random.randn(N,D)
y = np.random.randn(N,D)
z = np.random.randn(N,D)
# x,y,z¸¦ ÀÌ¿ëÇØ x*y+z¸¦ °è»êÇØÁÝ´Ï´Ù.
a = x * y
b = a + z
c = np.sum(b)
# ±â¿ï±â(gradient)°¡ 1À̶ó°í °¡Á¤ÇÏ°í ¿ªÀüÆĸ¦ ÇØÁÝ´Ï´Ù. ¿ªÀüÆÄ¿¡ ´ëÇÑ ³»¿ëÀº 4Àå¿¡¼ ÀÚ¼¼È÷ ´Ù·ì´Ï´Ù.
grad_c = 1.0
grad_b = grad_c * np.ones((N,D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_y = grad_a * y
grad_x = grad_a * x
# °¢°¢ÀÇ ±â¿ï±â°¡ ¸îÀÎÁö °É¸° ½Ã°£Àº ¾ó¸¶ÀÎÁö È®ÀÎÇغ¾´Ï´Ù.
print(grad_x)
print(grad_y)
print(grad_z)
print(datetime.now()-start)
[[ 1.76405235 0.40015721 0.97873798 2.2408932 ] [ 1.86755799 -0.97727788 0.95008842 -0.15135721] [-0.10321885 0.4105985 0.14404357 1.45427351]] [[ 0.76103773 0.12167502 0.44386323 0.33367433] [ 1.49407907 -0.20515826 0.3130677 -0.85409574] [-2.55298982 0.6536186 0.8644362 -0.74216502]] [[1. 1. 1. 1.] [1. 1. 1. 1.] [1. 1. 1. 1.]] 0:00:00.002931
# À̹ø¿¡´Â ÅÙ¼Ç÷ΠÇÁ·¹ÀÓ¿öÅ©¸¦ ÀÌ¿ëÇØ °°Àº ¿¬»êÀ» Çغ¸µµ·Ï ÇÏ°Ú½À´Ï´Ù.
import tensorflow as tf
import numpy as np
from datetime import datetime
start = datetime.now()
# ÅÙ¼Ç÷δ ¿¬»ê ±×·¡ÇÁ¸¦ ¸ÕÀú Á¤ÀÇÇÏ°í ÃßÈÄ¿¡ ¿©±â¿¡ °ªÀ» Àü´ÞÇÏ´Â ¹æ½ÄÀÔ´Ï´Ù. ¿©±â¼´Â ºñ¾îÀÖ´Â ±×·¡ÇÁ¸¸ Á¤ÀÇÇØÁÝ´Ï´Ù.
# Define Graph on GPU
with tf.device('/gpu:0'): # ÇØ´ç ¿¬»êÀ» 0¹ø gpu¿¡¼ ÇÏ°Ú´Ù´Â ÀǹÌÀÔ´Ï´Ù.
x = tf.placeholder(tf.float32) # ºñ¾îÀÖ´Â ³ëµåÀÎ placeholder¸¦ Á¤ÀÇÇÏ°í ¿©±â¿¡ µé¾î°¡´Â µ¥ÀÌÅÍŸÀÔÀ» ¸í½Ã Çسõ½À´Ï´Ù.
y = tf.placeholder(tf.float32)
z = tf.placeholder(tf.float32)
a = x * y # ¿¬»ê °úÁ¤ ¶ÇÇÑ Á¤ÀÇÇØÁÝ´Ï´Ù.
b = a + z
c = tf.reduce_sum(b)
grad_x, grad_y, grad_z = tf.gradients(c,[x,y,z]) # c¿¡ ´ëÇÑ x,y,zÀÇ ±â¿ï±â(gradient)¸¦ ±¸ÇÏ°í À̸¦ °¢°¢ grad_x, grad_y, grad_z¿¡ ÀúÀåÇϵµ·Ï ÁöÁ¤Çسõ½À´Ï´Ù.
# ½ÇÁ¦ÀûÀÎ °è»êÀÌ ÀÌ·ç¾îÁö´Â ºÎºÐ. ÅÙ¼Ç÷ο¡¼´Â À̸¦ ¼¼¼ÇÀ̶ó°í ÇÕ´Ï´Ù.
with tf.Session() as sess:
values = {
x: np.random.randn(N,D), # ¿©±â¼ ½ÇÁ¦ °ªµéÀÌ »ý¼ºµË´Ï´Ù.
y: np.random.randn(N,D),
z: np.random.randn(N,D)
}
out = sess.run([c,grad_x,grad_y,grad_z],feed_dict = values) # ¼¼¼Ç¿¡¼ ½ÇÁ¦·Î °ªÀ» °è»êÇÏ´Â ºÎºÐÀÔ´Ï´Ù. feed_dict¸¦ ÅëÇؼ °ªµéÀ» Àü´ÞÇÕ´Ï´Ù.
c_val, grad_x_val, grad_y_val, grad_z_val = out
# °ªµéÀ» È®ÀÎÇÏ°í °É¸° ½Ã°£À» ÃøÁ¤ÇÕ´Ï´Ù.
print(grad_x_val)
print(grad_y_val)
print(grad_z_val)
print(datetime.now()-start)
[[ 1.4882522 1.8958892 1.1787796 -0.17992483] [-1.0707526 1.0544517 -0.40317693 1.222445 ] [ 0.20827498 0.97663903 0.3563664 0.7065732 ]] [[ 1.1394007 -1.2348258 0.40234163 -0.6848101 ] [-0.87079716 -0.5788497 -0.31155252 0.05616534] [-1.1651498 0.9008265 0.46566245 -1.5362437 ]] [[1. 1. 1. 1.] [1. 1. 1. 1.] [1. 1. 1. 1.]] 0:00:00.055856
# À̹ø¿¡´Â ÆÄÀÌÅäÄ¡¸¦ ÀÌ¿ëÇØ °°Àº ¿¬»êÀ» ÁøÇàÇغ¸µµ·Ï ÇÏ°Ú½À´Ï´Ù.
import torch
from datetime import datetime
start = datetime.now()
N,D = 3,4
# x,y,z¸¦ ·£´ýÇÏ°Ô ÃʱâÈ ÇØÁÝ´Ï´Ù.
# https://pytorch.org/docs/stable/torch.html?highlight=randn#torch.randn
x = torch.randn(N,D,device=torch.device('cuda'), requires_grad=True)
y = torch.randn(N,D,device=torch.device('cuda'), requires_grad=True)
z = torch.randn(N,D,device=torch.device('cuda'), requires_grad=True)
# ¿¬»ê ±×·¡ÇÁ´Â Á¤Àǵʰú µ¿½Ã¿¡ ¿¬»êµË´Ï´Ù.
a = x * y
b = a + z
c = torch.sum(b)
# ±â¿ï±â(gradient)°¡ 1.0¶ó°í °¡Á¤ÇÏ°í ÃÖÁ¾ °ªÀÎ c¿¡¼ backward¸¦ ÅëÇØ ¿ªÀüÆĸ¦ ÇØÁÝ´Ï´Ù.
# ³ÑÆÄÀÌ¿Í ºñ±³ÇßÀ»¶§ ÀÌ °úÁ¤ÀÌ ÀÚµ¿ÀûÀ¸·Î °Ô»êµÇ´Â °ÍÀ» È®ÀÎÇÒ ¼ö ÀÖ½À´Ï´Ù.
c.backward(gradient=torch.cuda.FloatTensor([1.0]))
# °¢°¢ÀÇ ±â¿ï±â¿Í °É¸° ½Ã°£À» Ãâ·ÂÇÕ´Ï´Ù.
print(x.grad)
print(y.grad)
print(z.grad)
print(datetime.now()-start)
tensor([[ 0.7840, -2.7837, 0.9255, 0.8922], [ 0.3718, -0.5948, 0.6418, -0.6967], [-0.2106, -0.6479, -0.7273, 0.5540]], device='cuda:0') tensor([[-1.1159, 0.4730, -2.2700, -1.4020], [ 0.2738, -1.5292, -1.6174, 0.8994], [ 0.0773, 1.6193, 0.9806, -0.8056]], device='cuda:0') tensor([[1., 1., 1., 1.], [1., 1., 1., 1.], [1., 1., 1., 1.]], device='cuda:0') 0:00:00.006936