0
I created an interface with Tkinter to use as Environment for my studies with reinforcement learning. My goal is to run the class that creates Nvironment and with an external loop, send actions to train my learning model.
Problem I’m not handling well: The model training routine was within the same environmental class, so I had no problem at all, after I decided that the training routine will stay out of the class that started the problems. I tried to solve with Thread, but it generated a scope problem.
Code of the class creating the environment:
import os
import sys
import threading
import tkinter as tk
import numpy as np
import time
import copy
import cv2
from PIL import Image
class Maze(threading.Thread):
def __init__(self, config, mode):
threading.Thread.__init__(self)
self.root = None
self.frame = None
self.config = config
self.mode = mode
self.shape = self.config["environment"].shape
self.init = [0, 0, 0]
self.current = [0, 0, 0]
self.ids = []
self.lines = 0
self.columns = 0
self.canvas = None
self.colorAnimate = 'mediumOrchid1'
self.paletteInit = ('white', 'white', 'white', 'white')
self.paletteLocked = ('gray10', 'gray10', 'gray10', 'gray10',)
self.paletteUnlocked = ('white', 'white', 'white', 'white')
self.paletteCurrent = ('gray80', 'gray80', 'gray80', 'gray80')
self.palettePositiveReward = ('lime green', 'lime green', 'lime green', 'lime green')
self.paletteNegativeReward = ('firebrick1', 'firebrick1', 'firebrick1', 'firebrick1')
self.start()
def run(self):
self.root = tk.Tk()
self.frame = tk.Frame(self.root)
self.frame.pack()
# Draw canvas
self.draw_canvas()
# Start the Tk GUI.
self.root.mainloop()
def draw_canvas(self):
canvas_height = self.config["height"]
canvas_width = self.config["width"]
self.canvas = tk.Canvas(self.frame, width=canvas_width, height=canvas_height, background='gray75')
self.canvas.pack()
y = 0
for i in self.config["environment"]:
x = 0
self.columns = 0
for j in i:
if j[0] == ' ':
self.draw(x=x, y=y, color=self.paletteUnlocked)
elif j[0] == '+':
self.draw(x=x, y=y, color=self.palettePositiveReward)
elif j[0] == '-':
self.draw(x=x, y=y, color=self.paletteNegativeReward)
elif j[0] == 'I':
self.draw(x=x, y=y, color=self.paletteInit)
self.init[0] = self.lines
self.init[1] = self.columns
self.current = copy.deepcopy(self.init)
else:
self.draw(x=x, y=y, color=self.paletteLocked)
x += self.config["widthSquares"]*2
self.columns += 1
y += self.config["widthSquares"]*2
self.lines += 1
self.ids = self.ids.astype(int)
self.ids = self.ids.reshape(self.shape[0], self.shape[1], 4)
def draw(self, x, y, color):
c1, c2, c3, c4 = color
x1 = x / 2
y1 = y / 2
x2 = (self.config["widthSquares"] + x) / 2
y2 = (self.config["widthSquares"] + y) / 2
x3 = self.config["widthSquares"] + x / 2
y3 = y / 2
points = [x1, y1, x2, y2, x3, y3]
id0 = self.canvas.create_polygon(points, fill=c1)
x1 = self.config["widthSquares"] + x / 2
y1 = self.config["widthSquares"] + y / 2
x2 = (self.config["widthSquares"] + x) / 2
y2 = (self.config["widthSquares"] + y) / 2
x3 = self.config["widthSquares"] + x / 2
y3 = y / 2
points = [x1, y1, x2, y2, x3, y3]
id1 = self.canvas.create_polygon(points, fill=c2)
x1 = self.config["widthSquares"] + x / 2
y1 = self.config["widthSquares"] + y / 2
x2 = (self.config["widthSquares"] + x) / 2
y2 = (self.config["widthSquares"] + y) / 2
x3 = x / 2
y3 = self.config["widthSquares"] + y / 2
points = [x1, y1, x2, y2, x3, y3]
id2 = self.canvas.create_polygon(points, fill=c3)
x1 = x / 2
y1 = y / 2
x2 = (self.config["widthSquares"] + x) / 2
y2 = (self.config["widthSquares"] + y) / 2
x3 = x / 2
y3 = self.config["widthSquares"] + y / 2
points = [x1, y1, x2, y2, x3, y3]
id3 = self.canvas.create_polygon(points, fill=c4)
self.ids = np.append(self.ids, [id0, id1, id2, id3])
Here the code of the training routine:
import numpy as np
from environment.Maze import Maze
from agents.QTable import QTable
def train(config):
# Episodes to run
episodes = config["episodes"]
# Shape of array
shape = config["environment"].shape
# Length for Q-Table (All possible states for this environment)
state_dim = shape[0] * shape[1]
# Instance Q-Table
q_table = QTable(state_dim=state_dim, action_dim=4, gamma=0.999, alpha=0.8)
# Instance Game Maze
appMaze = Maze(config, mode='train-qtable')
# Initialize reward with zero
reward = 0
# Loop episodes
for episode in range(1, episodes + 1):
# Print current episode
print("\nEpisode: {}/{}".format(episode, episodes))
# Reset game
appMaze.reset()
# Done False, is init
done = False
# Get current state
state = appMaze.get_observable()
# Play
while not done:
# Get action by q_table or epsilon pseudo random
action = q_table.select_action(state)
# Execute action
next_state, reward, done = appMaze.step(action)
# Update Q-Table
q_table.update_q_table(state, next_state, action, reward)
# Set current state now
state = next_state
if reward > 0:
print("Winner")
else:
print("Looser")
# Finish
print("Finished")
if __name__ == "__main__":
# Config for environment
config = {
"height": 600, # Height for canvas
"width": 600, # Width for canvas
"widthSquares": 100, # Width and height for square
"episodes": 100000, # Run this number of episodes
"animate": False, # Animate action
"delayBetweenAction": 0, # Delay in seconds
"rewardPositive": 10,
"rewardNegative": -10,
"rewardEachStep": -0.01,
"rewardInvalidStep": -1,
"image_dim": (64, 64, 2),
"environment": np.array([
[[' '], [' '], [' '], [' '], [' '], [' ']],
[[' '], ['X'], ['X'], [' '], ['X'], ['X']],
[['I'], ['X'], [' '], [' '], [' '], [' ']],
[[' '], ['X'], ['X'], ['X'], ['X'], ['+']],
[[' '], [' '], [' '], [' '], [' '], [' ']],
[['-'], ['-'], ['-'], ['-'], ['-'], ['-']]
])
}
# Run train
train(config)
When you run this code, everything runs normal until you get to the line appMaze.reset()
.
In the Maze class there is the variable self.ids = []
, which is responsible for storing the ID s of the rectangles that were drawn in the canvas, only with this ID I can change the color of the rectangles fill and animate the direction chosen by the agent, but when it arrives in this reset line, this variable is empty, as well as its definition in the method __init__
, thereby generating the error:
IndexError: list index out of range
I know I missed something, but like all beginners, it’s not clear to me.
When running the code, it creates an interface like this:
Thanks in advance.
Hello Paul, thank you for your reply. Unfortunately she didn’t help me, because the Maze class, when I removed Tkinter, it works as expected. Looks like Tkinter’s thread is influencing this behavior I didn’t expect.
– Duque