0
I created an interface with Tkinter to use as Environment for my studies with reinforcement learning. My goal is to run the class that creates Nvironment and with an external loop, send actions to train my learning model.
Problem I’m not handling well: The model training routine was within the same environmental class, so I had no problem at all, after I decided that the training routine will stay out of the class that started the problems. I tried to solve with Thread, but it generated a scope problem.
Code of the class creating the environment:
import os
import sys
import threading
import tkinter as tk
import numpy as np
import time
import copy
import cv2
from PIL import Image
class Maze(threading.Thread):
    def __init__(self, config, mode):
        threading.Thread.__init__(self)
        self.root   =  None
        self.frame  =  None
        self.config =  config
        self.mode   =  mode
        self.shape  =  self.config["environment"].shape
        self.init    = [0, 0, 0]
        self.current = [0, 0, 0]
        self.ids     = []
        self.lines   = 0
        self.columns = 0
        self.canvas = None
        self.colorAnimate  = 'mediumOrchid1'
        self.paletteInit   = ('white', 'white', 'white', 'white')
        self.paletteLocked = ('gray10', 'gray10', 'gray10', 'gray10',)
        self.paletteUnlocked = ('white', 'white', 'white', 'white')
        self.paletteCurrent  = ('gray80', 'gray80', 'gray80', 'gray80')
        self.palettePositiveReward = ('lime green', 'lime green', 'lime green', 'lime green')
        self.paletteNegativeReward = ('firebrick1', 'firebrick1', 'firebrick1', 'firebrick1')
        self.start()
    def run(self):
        self.root  = tk.Tk()
        self.frame = tk.Frame(self.root)
        self.frame.pack()
        # Draw canvas
        self.draw_canvas()
        # Start the Tk GUI.
        self.root.mainloop()
    def draw_canvas(self):
        canvas_height = self.config["height"]
        canvas_width  = self.config["width"]
        self.canvas   = tk.Canvas(self.frame, width=canvas_width, height=canvas_height, background='gray75')
        self.canvas.pack()
        y = 0
        for i in self.config["environment"]:
            x = 0
            self.columns = 0
            for j in i:
                if j[0] == ' ':
                    self.draw(x=x, y=y, color=self.paletteUnlocked)
                elif j[0] == '+':
                    self.draw(x=x, y=y, color=self.palettePositiveReward)
                elif j[0] == '-':
                    self.draw(x=x, y=y, color=self.paletteNegativeReward)
                elif j[0] == 'I':
                    self.draw(x=x, y=y, color=self.paletteInit)
                    self.init[0] = self.lines
                    self.init[1] = self.columns
                    self.current = copy.deepcopy(self.init)
                else:
                    self.draw(x=x, y=y, color=self.paletteLocked)
                x += self.config["widthSquares"]*2
                self.columns += 1
            y += self.config["widthSquares"]*2
            self.lines += 1
        self.ids    =  self.ids.astype(int)
        self.ids    =  self.ids.reshape(self.shape[0], self.shape[1], 4)
    def draw(self, x, y, color):
        c1, c2, c3, c4 = color
        x1 = x / 2
        y1 = y / 2
        x2 = (self.config["widthSquares"] + x) / 2
        y2 = (self.config["widthSquares"] + y) / 2
        x3 = self.config["widthSquares"] + x / 2
        y3 = y / 2
        points = [x1, y1, x2, y2, x3, y3]
        id0 = self.canvas.create_polygon(points, fill=c1)
        x1 = self.config["widthSquares"] + x / 2
        y1 = self.config["widthSquares"] + y / 2
        x2 = (self.config["widthSquares"] + x) / 2
        y2 = (self.config["widthSquares"] + y) / 2
        x3 = self.config["widthSquares"] + x / 2
        y3 = y / 2
        points = [x1, y1, x2, y2, x3, y3]
        id1 = self.canvas.create_polygon(points, fill=c2)
        x1 = self.config["widthSquares"] + x / 2
        y1 = self.config["widthSquares"] + y / 2
        x2 = (self.config["widthSquares"] + x) / 2
        y2 = (self.config["widthSquares"] + y) / 2
        x3 = x / 2
        y3 = self.config["widthSquares"] + y / 2
        points = [x1, y1, x2, y2, x3, y3]
        id2 = self.canvas.create_polygon(points, fill=c3)
        x1 = x / 2
        y1 = y / 2
        x2 = (self.config["widthSquares"] + x) / 2
        y2 = (self.config["widthSquares"] + y) / 2
        x3 = x / 2
        y3 = self.config["widthSquares"] + y / 2
        points = [x1, y1, x2, y2, x3, y3]
        id3 = self.canvas.create_polygon(points, fill=c4)
        self.ids = np.append(self.ids, [id0, id1, id2, id3])
Here the code of the training routine:
import numpy as np
from environment.Maze import Maze
from agents.QTable import QTable
def train(config):
    # Episodes to run
    episodes = config["episodes"]
    # Shape of array
    shape = config["environment"].shape
    # Length for Q-Table (All possible states for this environment)
    state_dim = shape[0] * shape[1]
    # Instance Q-Table
    q_table = QTable(state_dim=state_dim, action_dim=4, gamma=0.999, alpha=0.8)
    # Instance Game Maze
    appMaze = Maze(config, mode='train-qtable')
    # Initialize reward with zero
    reward = 0
    # Loop episodes
    for episode in range(1, episodes + 1):
        # Print current episode
        print("\nEpisode: {}/{}".format(episode, episodes))
        # Reset game
        appMaze.reset()
        # Done False, is init
        done = False
        # Get current state
        state = appMaze.get_observable()
        # Play
        while not done:
            # Get action by q_table or epsilon pseudo random
            action = q_table.select_action(state)
            # Execute action
            next_state, reward, done = appMaze.step(action)
            # Update Q-Table
            q_table.update_q_table(state, next_state, action, reward)
            # Set current state now
            state = next_state
        if reward > 0:
            print("Winner")
        else:
            print("Looser")
    # Finish
    print("Finished")
if __name__ == "__main__":
    # Config for environment
    config = {
        "height": 600,  # Height for canvas
        "width": 600,  # Width for canvas
        "widthSquares": 100,  # Width and height for square
        "episodes": 100000, # Run this number of episodes
        "animate": False, # Animate action
        "delayBetweenAction": 0,  # Delay in seconds
        "rewardPositive": 10,
        "rewardNegative": -10,
        "rewardEachStep": -0.01,
        "rewardInvalidStep": -1,
        "image_dim": (64, 64, 2),
        "environment": np.array([
            [[' '], [' '], [' '], [' '], [' '], [' ']],
            [[' '], ['X'], ['X'], [' '], ['X'], ['X']],
            [['I'], ['X'], [' '], [' '], [' '], [' ']],
            [[' '], ['X'], ['X'], ['X'], ['X'], ['+']],
            [[' '], [' '], [' '], [' '], [' '], [' ']],
            [['-'], ['-'], ['-'], ['-'], ['-'], ['-']]
        ])
    }
    # Run train
    train(config)
When you run this code, everything runs normal until you get to the line appMaze.reset().
In the Maze class there is the variable self.ids = [], which is responsible for storing the ID s of the rectangles that were drawn in the canvas, only with this ID I can change the color of the rectangles fill and animate the direction chosen by the agent, but when it arrives in this reset line, this variable is empty, as well as its definition in the method __init__, thereby generating the error:
IndexError: list index out of range
I know I missed something, but like all beginners, it’s not clear to me.
When running the code, it creates an interface like this:
Thanks in advance.
Hello Paul, thank you for your reply. Unfortunately she didn’t help me, because the Maze class, when I removed Tkinter, it works as expected. Looks like Tkinter’s thread is influencing this behavior I didn’t expect.
– Duque