Adaptive heuristic critic, solves a maze

ABV · July 18, 2011

For fun here is the AHC using the ‘best 4’ policy trying to solve a maze, after around 100 trials the agent should have it sorted. You may notice a bug with the obstacles but this does not seem to affect the overall results

#include <GUIConstantsEx.au3>
#include <Array.au3>
#include <WindowsConstants.au3>
#include <staticconstants.au3>

Opt("GUIOnEventMode", 1)
Local $iGridSize = 20
Local $NumberGrid [$iGridSize][$iGridSize]
Local $VisualGrid [$iGridSize][$iGridSize]
Local $VisualGridData [$iGridSize][$iGridSize]
Local $VOffset = 0
Local $HOffset = 0
Local $x
Local $y
Local $x_1 = -1
Local $y_1 = -1
Local $x_2 = -1
Local $y_2 = -1
Local $x_3 = -1
Local $y_3 = -1
Local $Trials
Local $Attempts
Local $rLamdba = 0.22
Local $rAlpha = 0.1
Local $bradio
Local $MazeLength = 184
Local $GoalX = 19
Local $GoalY = 4
Dim $Maze[$MazeLength][2]=  [[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,7],[1,8],[1,9],[1,10],[1,11],[1,12],[1,13],[1,14],[1,15],[1,16],[1,17],[1,18], _ 
                     [2,5],[2,7],[2,12],[2,18], _
                     [3,0],[3,1],[3,2],[3,3],[3,5],[3,7],[3,9],[3,10],[3,11],[3,12],[3,14],[3,15],[3,16],[3,18], _
                     [4,3],[4,5],[4,7],[4,9],[4,10],[4,12],[4,14],[4,16],[4,18], _
                     [5,1],[5,2],[5,3],[5,5],[5,7],[5,9],[5,12],[5,14],[5,16],[5,18],[5,19], _
                     [6,5],[6,7],[6,9],[6,10],[6,12],[6,14],[6,16], _
                     [7,1],[7,2],[7,3],[7,5],[7,7],[7,14],[7,16],[7,17],[7,18], _
                     [8,2],[8,3],[8,4],[8,5],[8,7],[8,8],[8,9],[8,10],[8,11],[8,12],[8,14],[8,18], _
                     [9,0],[9,11],[9,12],[9,14],[9,15],[9,16],[9,18], _
                     [10,0],[10,1],[10,2],[10,3],[10,4],[10,5],[10,6],[10,7],[10,8],[10,9],[10,16],[10,18], _
                     [11,11],[11,12],[11,13],[11,14],[11,16],[11,18], _
                     [12,1],[12,2],[12,3],[12,4],[12,5],[12,6],[12,7],[12,8],[12,9],[12,10],[12,11],[12,14],[12,16],[12,18], _
                     [13,9],[13,11],[13,12],[13,14],[13,16],[13,18], _
                     [14,0],[14,1],[14,2],[14,3],[14,5],[14,6],[14,7],[14,8],[14,9],[14,11],[14,14],[14,16],[14,18], _
                     [15,11],[15,13],[15,14],[15,16],[15,18], _
                     [16,1],[16,2],[16,3],[16,4],[16,5],[16,6],[16,7],[16,8],[16,9],[16,10],[16,11],[16,13],[16,14],[16,16],[16,18], _
                     [17,2],[17,4],[17,6],[17,8],[17,9],[17,11],[17,14],[17,16],[17,18], _
                     [18,0],[18,2],[18,4],[18,6],[18,9],[18,11],[18,12],[18,14],[18,16],[18,18], _
                     [19,0],[19,14],[19,18] ]

;_ArrayDisplay($Maze)
$GUI = GUICreate("Adaptive heuristic critic - Reinforcement learning", 525, 620)
GUISetOnEvent($GUI_EVENT_CLOSE, "SpecialEvents")
;Create Grid 
For $y  = 0 To ($iGridSize-1)
    For $x = 0 To ($iGridSize-1)
        $VisualGrid[$y][$x] = GUICtrlCreateInput("", 25+ $HOffset, 25 + $VOffset, 25, 25, BitOr($SS_CENTER,$SS_CENTERIMAGE));BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))
        $VOffset += 24
    Next
    $HOffset += 24
    $VOffset = 0
    ;ConsoleWrite($top & @CRLF)
Next
;create labels and Inputs
GUICtrlSetFont(-1, 12, 800, 1, "Times New Roman")
GUICtrlCreateLabel("Number of Attempts", 300, 527, 175, 20)
$GUIAttempts = GUICtrlCreateInput("", 400, 525, 75, 20,BitOr($SS_CENTER,$SS_CENTERIMAGE))
GUICtrlCreateLabel("Number of Trials", 300, 552, 175, 20)
$GUITrials = GUICtrlCreateInput("", 400, 550, 75, 20,BitOr($SS_CENTER,$SS_CENTERIMAGE))
GUICtrlCreateLabel("G = Goal, A = Agent  O = Obstacle ", 20, 525, 250, 500)

;Create an "OK" button
$OK_Btn = GUICtrlCreateButton("Go", 400, 595, 75, 20)
GUICtrlSetOnEvent($OK_Btn, "OKPressed")
;$radio = GUICtrlCreateCheckbox("Policy Roulette (T) / Highest (F)", 300, 575, 180, 20)


;Set Font and Initalise to empty
For $y  = 0 To ($iGridSize-1)
    For $x = 0 To ($iGridSize-1)
            GUICtrlSetFont($VisualGrid[$y][$x], 12, 800, 1, "Times New Roman")
            GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))
        $VOffset += 24
    Next
    $HOffset += 24
Next

;Initalise Number Grid 
For $y = 0 to ($iGridSize-1) Step 1
    For $x = 0 to ($iGridSize-1) Step 1
        $NumberGrid[$y][$x] = 0.1;random(0.1,1)
    Next
Next



GUISetState()

While (1)
    Sleep(10)
Wend

Func SpecialEvents()
    ;Destroy the GUI including the controls
    GUIDelete()
    ;Exit the script
    Exit
EndFunc 

Func OKPressed()
        $bradio = 4;GUICtrlRead($radio)
    while GUIGetMsg() <> -3
    
        Local $arXY[2]
    
        sleep(50)
        
        ;create Maze X,Y

        For $x = 0 to $MazeLength -1 Step 1
            CreateObstical($Maze[$x][0], $Maze[$x][1])
        Next


        ;Set Goal and reward Position
        $VisualGridData[$GoalX][$GoalY] = "G"
        GUICtrlSetData($VisualGrid[$GoalX][4],$VisualGridData[$GoalX][$GoalY]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))
        $NumberGrid[$GoalX][$GoalY] = 500
        ;Create Some Obstacles
        
        
        ;Set Random Agnet Start Point that does not land on an obstacle
        do
            $x = Random(0,($iGridSize-1),1)
            $y = Random(0,($iGridSize-1),1)
        until ArrayTest($x,$y) = 0
        
        $VisualGridData[$y][$x] = "A"
        GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))

        $Attempts = 0
        
        While (($x <> $GoalY) OR ($y <> $GoalX))
            sleep(60)
        
            $x_3 = $x_2
            $y_3 = $y_2
            $x_2 = $x_1
            $y_2 = $y_1
            $y_1 = $y
            $x_1 = $x

            ;Delete old agents position
            $VisualGridData[$y_1][$x_1] = ""
            GUICtrlSetData($VisualGrid[$y_1][$x_1],$VisualGridData[$y_1][$x_1]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))
        
            ;Determine the next position using a roulette wheel or higher 4
            NewPosition($x,$y,$x_1,$y_1,$bradio)
        
            ;Pay back credit using Temporal Difference Algorithm
            TDLearning($x,$y,$x_1,$y_1,$x_2,$y_2)
        
            ;Draw agents new position
            $VisualGridData[$y][$x] = "A"
            GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))
        
            $Attempts += 1
            GUICtrlSetData($GUIAttempts,$Attempts)

        WEnd    
        
        $VisualGridData[$y][$x] = ""
        GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))
        $Trials += 1
        
;~      IF $Trials = 100 Then ;This is here for debug
;~          _arraydisplay($NumberGrid)
;~          $Trials = 0
;~      EndIf
        
        GUICtrlSetData($GUITrials,$Trials)
    WEnd
    
EndFunc


Func NewPosition(ByRef $x,ByRef $y, $x_1, $y_1, $bradio)
    
    Local $rRX[4]
    
    $rRX = Select4Value($x,$y)
    IF $bradio = 4 Then
        Best4($rRX, $x,$y,$x_1, $y_1)
    Else
        Roulette($rRX, $x,$y)
    EndIf
    
    
EndFunc
    
Func TDLearning($x,$y,$x_1,$y_1,$x_2,$y_2)
    
    IF $x_3 <> -1 Then
        $NumberGrid[$y_3][$x_3] = $NumberGrid[$y_3][$x_3]+((($NumberGrid[$y_2][$x_2] - $NumberGrid[$y_3][$x_3]) + (($NumberGrid[$y_1][$x_1] - $NumberGrid[$y_2][$x_2])*$rLamdba) + (($NumberGrid[$y][$x] - $NumberGrid[$y_1][$x_1])*($rLamdba*$rLamdba)))*$rAlpha)
    EndIf
    
    IF $x_2 <> -1 Then
        $NumberGrid[$y_2][$x_2] = $NumberGrid[$y_2][$x_2] + ((($NumberGrid[$y_1][$x_1] - $NumberGrid[$y_2][$x_2])*$rLamdba) + (($NumberGrid[$y][$x] - $NumberGrid[$y_1][$x_1])*$rLamdba)*$rAlpha)
    EndIf
    
    IF $x_1 <> -1 Then
        $NumberGrid[$y_1][$x_1] = $NumberGrid[$y_1][$x_1] + (($NumberGrid[$y][$x] - $NumberGrid[$y_1][$x_1])*$rAlpha)
    EndIf
    
EndFunc

Func Best4($rRX, ByRef $x, ByRef $y,$x_1, $y_1)
    
    Local $Best4MaxIndex
    
    $Best4MaxIndex = _ArrayMaxIndex($rRX)
        
    IF $x_1 = $x AND $y_1 = $y Then ;Board...
        $NumberGrid[$y][$x] = $NumberGrid[$y][$x] - ($NumberGrid[$y][$x]*0.05)
    EndIf
    
    If $Best4MaxIndex = 0 Then
        IF $x+1 > ($iGridSize-1) Then
            $x=$x
            $y=$y
        Else
            $x=$x+1
            $y=$y
        Endif
    ElseIf $Best4MaxIndex = 1 Then
        IF $x-1 < 0 Then
            $x=$x
            $y=$y
        Else
            $x=$x-1
            $y=$y
        Endif
    ElseIf $Best4MaxIndex = 2 Then
        IF $y+1 > ($iGridSize-1) Then
            $x=$x
            $y=$y
        Else
            $x=$x
            $y=$y+1
        Endif
    ElseIf $Best4MaxIndex = 3 Then
        IF $y-1 < 0 Then
            $x=$x
            $y=$y
        Else
            $x=$x
            $y=$y-1
        Endif
    Else
        $x=$x
        $y=$y
    EndIF
    
    
    
EndFunc

Func Roulette($rRX, ByRef $x, ByRef $y)
    
    Local $RouletteMax
    Local $RouletteSpin
    Local $Rxplus1 = $rRX[0]
    Local $Rxsub1  = $rRX[1]
    Local $Ryplus1 = $rRX[2]
    Local $Rysub1  = $rRX[3]

    $RouletteMax = $Rxplus1 + $Rxsub1 + $Ryplus1 + $Rysub1
    $RouletteSpin = Random(0,$RouletteMax)
    
    If $RouletteSpin >= 0 AND $RouletteSpin <= $Rxplus1 Then
        IF $x+1 > ($iGridSize-1) Then
            $x=$x
            $y=$y
        Else
            $x=$x+1
            $y=$y
        Endif
    ElseIf $RouletteSpin >= $Rxplus1 AND $RouletteSpin <= $Rxplus1+$Rxsub1 Then
        IF $x-1 < 0 Then
            $x=$x
            $y=$y
        Else
            $x=$x-1
            $y=$y
        Endif
    ElseIf $RouletteSpin >= $Rxplus1+$Rxsub1 AND $RouletteSpin <= $Rxplus1+$Rxsub1+$Ryplus1 Then
        IF $y+1 > ($iGridSize-1) Then
            $x=$x
            $y=$y
        Else
            $x=$x
            $y=$y+1
        Endif
    ElseIf $RouletteSpin >= $Rxplus1+$Rxsub1+$Ryplus1 AND $RouletteSpin <= $Rxplus1 + $Rxsub1 + $Ryplus1 + $Rysub1 Then
        IF $y-1 < 0 Then
            $x=$x
            $y=$y
        Else
            $x=$x
            $y=$y-1
        Endif
    Else
        $x=$x
        $y=$y
    EndIF
    
EndFunc

Func Select4Value(ByRef $x, ByRef $y)
    
    Local $rRX[4]

    IF $x+1 > ($iGridSize-1) Then
        $rRX[0] = 0
    Else
        ;$x=$x+1
        $rRX[0] = $NumberGrid[$y][$x+1]
    Endif
        
    IF $x-1 < 0 Then
        $rRX[1] = 0
    Else
        ;$x=$x-1
        $rRX[1] = $NumberGrid[$y][$x-1]
    Endif
    
    IF $y+1 > ($iGridSize-1) Then
        $rRX[2] = 0
    Else
        ;$y=$y+1
        $rRX[2] = $NumberGrid[$y+1][$x]
    Endif
    
    IF $y-1 < 0 Then
        $rRX[3] = 0
    Else
        ;$y=$y-1
        $rRX[3] = $NumberGrid[$y-1][$x]
    Endif
    
    ;_ArrayDisplay($rRX) ;debug
    Return $rRX
EndFunc

Func CreateObstical(ByRef $x, ByRef $y)
    $VisualGridData[$y][$x] = "O"
    $NumberGrid[$y][$x] = 0
    GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME))
EndFunc
    
Func ArrayTest(ByRef $x, ByRef $y)
    
    Local $unique = 0
    
    For $i = 0 to $MazeLength-1 Step 1
        IF ($Maze[$i][0] = $y) AND ($Maze[$i][1] = $x) Then
            $unique = 1
            ;msgbox(1,"h",$Maze[$i][0]&","& $y&","&$Maze[$i][1] &","& $x)
        EndIF
    Next
    Return $unique
EndFunc

jvanegmond · July 18, 2011

I think after about 1000-1500 it did the maze everytime perfectly.

Could you give some more background information about what exactly it is learning? At first I thought you were just rewarding going left/right, but that doesn't lead to these results.

ABV · July 19, 2011

I think after about 1000-1500 it did the maze everytime perfectly.
Could you give some more background information about what exactly it is learning? At first I thought you were just rewarding going left/right, but that doesn't lead to these results.

This link will explain it best

Reinforcement Learning: A Survey

jvanegmond · July 19, 2011

This link will explain it best
Reinforcement Learning: A Survey

Sweet. Thanks.

Sign In

Adaptive heuristic critic, solves a maze

Recommended Posts

ABV

jvanegmond

ABV

jvanegmond

Create an account or sign in to comment

Create an account

Sign in

Browse

AutoIt Resources

Release

Beta