9import multiprocessing
10import multiprocessing.connection
11
12import cv2
13import gym
14import numpy as npThis is a wrapper for OpenAI gym game environment. We do a few things here:
1. Apply the same action on four frames and get the last frame 2. Convert observation frames to gray and scale it to (84, 84) 3. Stack four frames of the last four actions 4. Add episode information (total reward for the entire episode) for monitoring 5. Restrict an episode to a single life (game has 5 lives, we reset after every single life)
Observation is tensor of size (4, 84, 84). It is four frames (images of the game screen) stacked on first axis. i.e, each channel is a frame.
17class Game:38 def __init__(self, seed: int):create environment
40 self.env = gym.make('BreakoutNoFrameskip-v4')
41 self.env.seed(seed)tensor for a stack of 4 frames
44 self.obs_4 = np.zeros((4, 84, 84))buffer to keep the maximum of last 2 frames
47 self.obs_2_max = np.zeros((2, 84, 84))keep track of the episode rewards
50 self.rewards = []and number of lives left
52 self.lives = 0Executes action
for 4 time steps and returns a tuple of (observation, reward, done, episode_info).
observation
: stacked 4 frames (this frame and frames for last 3 actions) reward
: total reward while the action was executed done
: whether the episode finished (a life lost) episode_info
: episode information if completed54 def step(self, action):66 reward = 0.
67 done = Nonerun for 4 steps
70 for i in range(4):execute the action in the OpenAI Gym environment
72 obs, r, done, info = self.env.step(action)
73
74 if i >= 2:
75 self.obs_2_max[i % 2] = self._process_obs(obs)
76
77 reward += rget number of lives left
80 lives = self.env.unwrapped.ale.lives()reset if a life is lost
82 if lives < self.lives:
83 done = True
84 breakmaintain rewards for each step
87 self.rewards.append(reward)
88
89 if done:if finished, set episode information if episode is over, and reset
91 episode_info = {"reward": sum(self.rewards), "length": len(self.rewards)}
92 self.reset()
93 else:
94 episode_info = Noneget the max of last two frames
97 obs = self.obs_2_max.max(axis=0)push it to the stack of 4 frames
100 self.obs_4 = np.roll(self.obs_4, shift=-1, axis=0)
101 self.obs_4[-1] = obs
102
103 return self.obs_4, reward, done, episode_info105 def reset(self):reset OpenAI Gym environment
112 obs = self.env.reset()reset caches
115 obs = self._process_obs(obs)
116 for i in range(4):
117 self.obs_4[i] = obs
118 self.rewards = []
119
120 self.lives = self.env.unwrapped.ale.lives()
121
122 return self.obs_4124 @staticmethod
125 def _process_obs(obs):130 obs = cv2.cvtColor(obs, cv2.COLOR_RGB2GRAY)
131 obs = cv2.resize(obs, (84, 84), interpolation=cv2.INTER_AREA)
132 return obs135def worker_process(remote: multiprocessing.connection.Connection, seed: int):create game
143 game = Game(seed)wait for instructions from the connection and execute them
146 while True:
147 cmd, data = remote.recv()
148 if cmd == "step":
149 remote.send(game.step(data))
150 elif cmd == "reset":
151 remote.send(game.reset())
152 elif cmd == "close":
153 remote.close()
154 break
155 else:
156 raise NotImplementedErrorCreates a new worker and runs it in a separate process.
159class Worker:164 def __init__(self, seed):
165 self.child, parent = multiprocessing.Pipe()
166 self.process = multiprocessing.Process(target=worker_process, args=(parent, seed))
167 self.process.start()