Skip to content

Commit

Permalink
log prob calculated after data collected
Browse files Browse the repository at this point in the history
  • Loading branch information
seungeunrho committed Nov 20, 2019
1 parent 36d0f5b commit efd145e
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions REINFORCE.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 28,9 @@ def put_data(self, item):

def train_net(self):
R = 0
for r, log_prob in self.data[::-1]:
for r, prob in self.data[::-1]:
R = r gamma * R
loss = -log_prob * R
loss = -torch.log(prob) * R
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
Expand All @@ -49,7 49,7 @@ def main():
m = Categorical(prob)
a = m.sample()
s_prime, r, done, info = env.step(a.item())
pi.put_data((r,torch.log(prob[a])))
pi.put_data((r,prob[a]))

s = s_prime
score = r
Expand Down

0 comments on commit efd145e

Please sign in to comment.