-
Notifications
You must be signed in to change notification settings - Fork 8.7k
Description
Describe the bug
in gym\utils\passive_env_checker.py:if not isinstance(terminated, (bool, np.bool8)):,but module 'numpy' has no attribute 'bool8'.
Code example
import gym
import torch
from torch.distributions import Categorical
from torch import nn, optim
import torch.nn.functional as F
def compute_policy_loss(n, log_p):
r = list()
for i in range(n, 0, -1):
r.append(i * 1.0)
r = torch.tensor(r)
r = (r - r.mean()) / r.std()
loss = 0
for pi, ri in zip(log_p, r):
loss += -pi * ri
return loss
class CartPolePolicy(nn.Module):
def init(self):
super(CartPolePolicy, self).init()
self.fc1 = nn.Linear(in_features=4, out_features=128)
self.fc2 = nn.Linear(128, 2)
self.drop = nn.Dropout(p=0.6)
def forward(self, x):
x = self.fc1(x)
x = self.drop(x)
x = F.relu(x)
x = self.fc2(x)
return F.softmax(x, dim=1)
if name == 'main':
env = gym.make("CartPole-v0")
env.reset(seed=543)
torch.manual_seed(543)
policy = CartPolePolicy()
optimizer = optim.Adam(policy.parameters(), lr=0.01)
max_episod = 10000
max_action = 10000
max_steps = 5000
for episod in range(1, max_episod + 1):
state, _ = env.reset()
step = 0
log_p = list()
for step in range(1, max_action + 1):
state = torch.from_numpy(state).float().unsqueeze(0)
probs = policy(state)
m = Categorical(probs)
action = m.sample()
state, _, done, _, _ = env.step(action.item())
if done:
break
log_p.append(m.log_prob(action))
if step > max_steps:
print(f"完成! 最后一回合 {episod} 运行步数 {step}")
break
optimizer.zero_grad()
loss = compute_policy_loss(step, log_p)
loss.backward()
optimizer.step()
if episod % 10 == 0:
print(f"回合数 {episod} 累计运行步数 {step}")
torch.save(policy.state_dict(), f"cartpole_policy.pth")
System Info
Describe the characteristic of your environment:
*pip install gym
*Windows 11
- Python 3.12.3
- I have checked that there is no similar issue in the repo (required)