1. 程式人生 > 其它 >在機器學習和深度學習中建立屬於自己的資料集

在機器學習和深度學習中建立屬於自己的資料集

import gym //匯入模組
from policynet import PolicyGradient
import matplotlib.pyplot as plt
import time
 
DISPLAY_REWARD_THRESHOLD = 1000
RENDER = False
 
#建立一個環境
env = gym.make('CartPole-v0')  //建立一個小車倒立擺模型
env.seed(1)
env = env.unwrapped
 
print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)
 
RL = PolicyGradient(
    n_actions=env.action_space.n,
    n_features=env.observation_space.shape[0],
    learning_rate=0.02,
    reward_decay=0.99,
 
)
#學習過程
for i_episode in range(85):
    observation = env.reset()
    while True:
        if RENDER: env.render()
        #取樣動作,探索環境
        # action = RL.choose_action(observation)
        # observation_, reward, done, info = env.step(action)
        action = RL.choose_action(observation)
 
        observation_, reward, done, info = env.step(action)
 
        #將觀測,動作和回報儲存起來
        RL.store_transition(observation, action, reward)
        if done:
            ep_rs_sum = sum(RL.ep_rs)
            if 'running_reward' not in globals():
                running_reward = ep_rs_sum
            else:
                running_reward = running_reward * 0.99+ep_rs_sum * 0.01
            if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True
            print("episode:", i_episode, "rewards:", int(running_reward))
            #每個episode學習一次
            vt = RL.learn()
            if i_episode == 0:
                plt.plot(vt)
                plt.xlabel('episode steps')
                plt.ylabel('normalized state-action value')
                plt.show()
            break
 
        #智慧體探索一步
        observation = observation_
# #測試過程
for i in range(10):
    observation = env.reset()
    count = 0
    while True:
        # 取樣動作,探索環境
        env.render()
        action = RL.greedy(observation)
        #action = RL.choose_action(observation)
        #action = RL.sample_action(observation)
        # print (action)
        # print(action1)
        observation_, reward, done, info = env.step(action)
        if done:
            print(count)
            break
        observation = observation_
        count+=1
        #time.sleep(0.001)
        print (count)

連線
222

33