Я обучил глубокую модель Q-Learning, используя Chanier:
class Q_Network (chainer.Chain):
def __init__(self, input_size, hidden_size, output_size):
super (Q_Network, self).__init__ (
fc1=L.Linear (input_size, hidden_size),
fc2=L.Linear (hidden_size, hidden_size),
fc3=L.Linear (hidden_size, output_size)
)
def __call__(self, x):
h = F.relu (self.fc1 (x))
h = F.relu (self.fc2 (h))
y = self.fc3 (h)
return y
def reset(self):
self.zerograds ()
def train_dqn(env):
Q = Q_Network (input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = copy.deepcopy (Q)
optimizer = chainer.optimizers.Adam ()
optimizer.setup (Q)
epoch_num = 50
step_max = len (env.data) - 1
memory_size = 200
batch_size = 20
# epsilon = 1.0
epsilon = 0.9
epsilon_decrease = 1e-3
epsilon_min = 0.1
start_reduce_epsilon = 200
train_freq = 10
update_q_freq = 20
# gamma = 0.97
gamma = 0.9
show_log_freq = 5
memory = []
total_step = 0
total_rewards = []
total_losses = []
start = time.time ()
for epoch in range (epoch_num):
pobs = env.reset ()
step = 0
done = False
total_reward = 0
total_loss = 0
while not done and step < step_max:
# select act
pact = np.random.randint (3)
if np.random.rand () > epsilon:
pact = Q (np.array (pobs, dtype=np.float32).reshape (1, -1))
pact = np.argmax (pact.data)
# act
obs, reward, done = env.step (pact)
# add memory
memory.append ((pobs, pact, reward, obs, done))
if len (memory) > memory_size:
memory.pop (0)
# train or update q
if len (memory) == memory_size:
if total_step % train_freq == 0:
shuffled_memory = np.random.permutation (memory)
memory_idx = range (len (shuffled_memory))
for i in memory_idx[::batch_size]:
batch = np.array (shuffled_memory[i:i + batch_size])
b_pobs = np.array (batch[:, 0].tolist (), dtype=np.float32).reshape (batch_size, -1)
b_pact = np.array (batch[:, 1].tolist (), dtype=np.int32)
b_reward = np.array (batch[:, 2].tolist (), dtype=np.int32)
b_obs = np.array (batch[:, 3].tolist (), dtype=np.float32).reshape (batch_size, -1)
b_done = np.array (batch[:, 4].tolist (), dtype=np.bool)
q = Q (b_pobs)
maxq = np.max (Q_ast (b_obs).data, axis=1)
target = copy.deepcopy (q.data)
for j in range (batch_size):
target[j, b_pact[j]] = b_reward[j] + gamma * maxq[j] * (not b_done[j])
Q.reset ()
loss = F.mean_squared_error (q, target)
total_loss += loss.data
loss.backward ()
optimizer.update ()
if total_step % update_q_freq == 0:
Q_ast = copy.deepcopy (Q)
# epsilon
if epsilon > epsilon_min and total_step > start_reduce_epsilon:
epsilon -= epsilon_decrease
# next step
total_reward += reward
pobs = obs
step += 1
total_step += 1
total_rewards.append (total_reward)
total_losses.append (total_loss)
if (epoch + 1) % show_log_freq == 0:
log_reward = sum (total_rewards[((epoch + 1) - show_log_freq):]) / show_log_freq
log_loss = sum (total_losses[((epoch + 1) - show_log_freq):]) / show_log_freq
elapsed_time = time.time () - start
print ('\t'.join (map (str, [epoch + 1, epsilon, total_step, log_reward, log_loss, elapsed_time])))
start = time.time ()
return Q, total_losses, total_rewards
if __name__ == "__main__":
Q, total_losses, total_rewards = train_dqn (Environment1 (train))
serializers.save_npz(r'C:\Users\willi\Desktop\dqn\dqn.model', Q)
После сохранения модели я снова вызываю модель и загружаю в нее данные, чтобы она могла предсказать:
модель нагрузки:
model = Q_Network (input_size=91, hidden_size=100, output_size=3)
serializers.load_npz(r'C:\Users\willi\Desktop\dqn\dqn.model', model)
подать одну строку данных:
data = pd.read_csv (r'C:\Users\willi\Downloads\spyv.csv')
данные выглядят так:
open high low close volume datetime
0 236.250 239.01 236.22 238.205 2327395 30600
1 238.205 240.47 238.00 239.920 1506096 30660
2 239.955 240.30 238.85 239.700 1357531 30720
3 239.690 243.33 239.66 241.650 1265604 30780
4 241.570 242.13 240.20 240.490 896000 30840
Теперь предскажите:
x = data.iloc[1].to_numpy()
y = model(x)
Но ошибка говорит:
IndexError: tuple index out of range
Полная ошибка:
IndexError Traceback (most recent call last)
<ipython-input-7-b745008aa965> in <module>
64
65 x = data.iloc[1].to_numpy()
---> 66 y = Q(x)
67
68
~\ddqn.ipynb in __call__(self, x)
~\Anaconda3\lib\site-packages\chainer\link.py in __call__(self, *args, **kwargs)
285 # forward is implemented in the child classes
286 forward = self.forward # type: ignore
--> 287 out = forward(*args, **kwargs)
288
289 # Call forward_postprocess hook
~\Anaconda3\lib\site-packages\chainer\links\connection\linear.py in forward(self, x, n_batch_axes)
181 in_size = utils.size_of_shape(x.shape[n_batch_axes:])
182 self._initialize_params(in_size)
--> 183 return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
~\Anaconda3\lib\site-packages\chainer\functions\connection\linear.py in linear(x, W, b, n_batch_axes)
306 args = x, W, b
307
--> 308 y, = LinearFunction().apply(args)
309 if n_batch_axes > 1:
310 y = y.reshape(batch_shape + (-1,))
~\Anaconda3\lib\site-packages\chainer\function_node.py in apply(self, inputs)
305
306 if configuration.config.type_check:
--> 307 self._check_data_type_forward(in_data)
308
309 self.check_layout_forward(input_vars)
~\Anaconda3\lib\site-packages\chainer\function_node.py in _check_data_type_forward(self, in_data)
444 try:
445 with type_check.light_mode:
--> 446 self.check_type_forward(in_type)
447 return
448 except type_check.InvalidType:
~\Anaconda3\lib\site-packages\chainer\functions\connection\linear.py in check_type_forward(self, in_types)
27 x_type.ndim == 2,
28 w_type.ndim == 2,
---> 29 x_type.shape[1] == w_type.shape[1],
30 )
31 if type_check.eval(n_in) == 3:
IndexError: tuple index out of range