1
1
using . PyCall
2
2
3
-
4
3
np = pyimport (" numpy" )
5
4
6
- export PettingzooEnv
5
+ export PettingZooEnv
6
+
7
7
8
8
"""
9
- PettingzooEnv (;kwargs...)
9
+ PettingZooEnv (;kwargs...)
10
10
11
- `PettingzooEnv ` is an interface of the python library pettingzoo for multi agent reinforcement learning environments. It can be used to test multi
11
+ `PettingZooEnv ` is an interface of the python library Pettingzoo for multi agent reinforcement learning environments. It can be used to test multi
12
12
agent reinforcement learning algorithms implemented in JUlia ReinforcementLearning.
13
13
"""
14
- function PettingzooEnv (name:: String ; seed= 123 , args... )
14
+
15
+ function PettingZooEnv (name:: String ; seed= 123 , args... )
15
16
if ! PyCall. pyexists (" pettingzoo.$name " )
16
17
error (" Cannot import pettingzoo.$name " )
17
18
end
@@ -20,7 +21,7 @@ function PettingzooEnv(name::String; seed=123, args...)
20
21
pyenv. reset (seed= seed)
21
22
obs_space = space_transform (pyenv. observation_space (pyenv. agents[1 ]))
22
23
act_space = space_transform (pyenv. action_space (pyenv. agents[1 ]))
23
- env = PettingzooEnv {typeof(act_space),typeof(obs_space),typeof(pyenv)} (
24
+ env = PettingZooEnv {typeof(act_space),typeof(obs_space),typeof(pyenv)} (
24
25
pyenv,
25
26
obs_space,
26
27
act_space,
33
34
34
35
# basic function needed for simulation ========================================================================
35
36
36
- function RLBase. reset! (env:: PettingzooEnv )
37
+ function RLBase. reset! (env:: PettingZooEnv )
37
38
pycall! (env. state, env. pyenv. reset, PyObject, env. seed)
38
- env. ts = 1
39
39
nothing
40
40
end
41
41
42
- function RLBase. is_terminated (env:: PettingzooEnv )
42
+ function RLBase. is_terminated (env:: PettingZooEnv )
43
43
_, _, t, d, _ = pycall (env. pyenv. last, PyObject)
44
44
t || d
45
45
end
48
48
49
49
# # State / observation implementations ========================================================================
50
50
51
- RLBase. state (env:: PettingzooEnv , :: Observation{Any} , players:: Tuple ) = Dict (p => state (env, p) for p in players)
51
+ RLBase. state (env:: PettingZooEnv , :: Observation{Any} , players:: Tuple ) = Dict (p => state (env, p) for p in players)
52
52
53
53
54
54
# partial observability is default for pettingzoo
55
- function RLBase. state (env:: PettingzooEnv , :: Observation{Any} , player)
55
+ function RLBase. state (env:: PettingZooEnv , :: Observation{Any} , player)
56
56
env. pyenv. observe (player)
57
57
end
58
58
59
59
60
60
# # state space =========================================================================================================================================
61
61
62
- RLBase. state_space (env:: PettingzooEnv , :: Observation{Any} , players) = Space (Dict (player => state_space (env, player) for player in players))
62
+ RLBase. state_space (env:: PettingZooEnv , :: Observation{Any} , players) = Space (Dict (player => state_space (env, player) for player in players))
63
63
64
64
# partial observability
65
- RLBase. state_space (env:: PettingzooEnv , :: Observation{Any} , player:: String ) = space_transform (env. pyenv. observation_space (player))
65
+ RLBase. state_space (env:: PettingZooEnv , :: Observation{Any} , player:: Symbol ) = space_transform (env. pyenv. observation_space (String ( player) ))
66
66
67
67
# for full observability. Be careful: action_space has also to be adjusted
68
- # RLBase.state_space(env::PettingzooEnv , ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space)
68
+ # RLBase.state_space(env::PettingZooEnv , ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space)
69
69
70
70
71
71
# # action space implementations ====================================================================================
72
72
73
- RLBase. action_space (env:: PettingzooEnv , players:: Tuple{String } ) =
73
+ RLBase. action_space (env:: PettingZooEnv , players:: Tuple{Symbol } ) =
74
74
Space (Dict (p => action_space (env, p) for p in players))
75
75
76
- RLBase. action_space (env:: PettingzooEnv , player:: String ) = space_transform (env. pyenv. action_space (player))
76
+ RLBase. action_space (env:: PettingZooEnv , player:: Symbol ) = space_transform (env. pyenv. action_space (String ( player) ))
77
77
78
- RLBase. action_space (env:: PettingzooEnv , player:: Integer ) = space_transform (env. pyenv. action_space (env. pyenv. agents[player]))
78
+ RLBase. action_space (env:: PettingZooEnv , player:: Integer ) = space_transform (env. pyenv. action_space (env. pyenv. agents[player]))
79
79
80
- RLBase. action_space (env:: PettingzooEnv , player:: DefaultPlayer ) = env. action_space
80
+ RLBase. action_space (env:: PettingZooEnv , player:: DefaultPlayer ) = env. action_space
81
81
82
82
# # action functions ========================================================================================================================
83
83
84
- function RLBase. act! (env:: PettingzooEnv , actions:: Dict , players:: Tuple )
85
- @assert length (actions) == length (players)
86
- env. ts += 1
87
- for p in players
88
- env (actions[p])
84
+ function RLBase. act! (env:: PettingZooEnv , actions:: Dict{Symbol, Int} )
85
+ @assert length (actions) == length (players (env))
86
+ for p in env. pyenv. agents
87
+ pycall (env. pyenv. step, PyObject, actions[p])
89
88
end
90
89
end
91
90
92
- function RLBase. act! (env:: PettingzooEnv , actions:: Dict , player )
93
- @assert length (actions) == length (players ( env) )
94
- for p in players ( env)
95
- env (actions[p])
91
+ function RLBase. act! (env:: PettingZooEnv , actions:: Dict{Symbol, Real} )
92
+ @assert length (actions) == length (env. pyenv . agents )
93
+ for p in env. pyenv . agents
94
+ pycall ( env. pyenv . step, PyObject, np . array (actions[p]; dtype = np . float32) )
96
95
end
97
96
end
98
97
99
- function RLBase. act! (env:: PettingzooEnv , actions:: Dict{String, Int } )
100
- @assert length (actions) == length (players ( env) )
98
+ function RLBase. act! (env:: PettingZooEnv , actions:: Dict{Symbol, Vector } )
99
+ @assert length (actions) == length (env. pyenv . agents )
101
100
for p in env. pyenv. agents
102
- pycall (env. pyenv . step, PyObject, actions[p] )
101
+ RLBase . act! (env, p )
103
102
end
104
103
end
105
104
106
- function RLBase. act! (env:: PettingzooEnv , actions:: Dict{String, Real} )
107
- @assert length (actions) == length (players (env))
108
- env. ts += 1
109
- for p in env. pyenv. agents
110
- pycall (env. pyenv. step, PyObject, np. array (actions[p]; dtype= np. float32))
105
+ function RLBase. act! (env:: PettingZooEnv , actions:: NamedTuple )
106
+ @assert length (actions) == length (env. pyenv. agents)
107
+ for player ∈ players (env)
108
+ RLBase. act! (env, actions[player])
111
109
end
112
110
end
113
111
114
- function RLBase. act! (env:: PettingzooEnv , action:: Vector )
112
+ # for vectors, pettingzoo need them to be in proper numpy type
113
+ function RLBase. act! (env:: PettingZooEnv , action:: Vector )
115
114
pycall (env. pyenv. step, PyObject, np. array (action; dtype= np. float32))
116
115
end
117
116
118
- function RLBase. act! (env:: PettingzooEnv , action:: Integer )
119
- env. ts += 1
117
+ function RLBase. act! (env:: PettingZooEnv , action)
120
118
pycall (env. pyenv. step, PyObject, action)
121
119
end
122
120
123
121
# reward of player ======================================================================================================================
124
- function RLBase. reward (env:: PettingzooEnv , player:: String )
125
- env. pyenv. rewards[player]
122
+ function RLBase. reward (env:: PettingZooEnv , player:: Symbol )
123
+ env. pyenv. rewards[String ( player) ]
126
124
end
127
125
128
126
129
127
# Multi agent part =========================================================================================================================================
130
128
131
129
132
- RLBase. players (env:: PettingzooEnv ) = env. pyenv. agents
130
+ RLBase. players (env:: PettingZooEnv ) = Symbol .(env. pyenv. agents)
131
+
132
+ function RLBase. current_player (env:: PettingZooEnv )
133
+ return Symbol (env. pyenv. agents[env. current_player])
134
+ end
133
135
134
- function RLBase. current_player (env:: PettingzooEnv , post_action= false )
135
- cur_id = env. ts % length (env. pyenv. agents) == 0 ? length (env. pyenv. agents) : env. ts % length (env. pyenv. agents)
136
- cur_id = post_action ? (cur_id - 1 == 0 ? length (env. pyenv. agents) : cur_id - 1 ) : cur_id
137
- return env. pyenv. agents[cur_id]
136
+ function RLBase. next_player! (env:: PettingZooEnv )
137
+ env. current_player = env. current_player < length (env. pyenv. agents) ? env. current_player + 1 : 1
138
138
end
139
139
140
- function RLBase. NumAgentStyle (env:: PettingzooEnv )
140
+ function RLBase. NumAgentStyle (env:: PettingZooEnv )
141
141
n = length (env. pyenv. agents)
142
142
if n == 1
143
143
SingleAgent ()
@@ -146,9 +146,8 @@ function RLBase.NumAgentStyle(env::PettingzooEnv)
146
146
end
147
147
end
148
148
149
-
150
- RLBase. DynamicStyle (:: PettingzooEnv ) = SEQUENTIAL
151
- RLBase. ActionStyle (:: PettingzooEnv ) = MINIMAL_ACTION_SET
152
- RLBase. InformationStyle (:: PettingzooEnv ) = IMPERFECT_INFORMATION
153
- RLBase. ChanceStyle (:: PettingzooEnv ) = EXPLICIT_STOCHASTIC
149
+ RLBase. DynamicStyle (:: PettingZooEnv ) = SIMULTANEOUS
150
+ RLBase. ActionStyle (:: PettingZooEnv ) = MINIMAL_ACTION_SET
151
+ RLBase. InformationStyle (:: PettingZooEnv ) = IMPERFECT_INFORMATION
152
+ RLBase. ChanceStyle (:: PettingZooEnv ) = EXPLICIT_STOCHASTIC
154
153
0 commit comments