ABV Posted July 18, 2011 Share Posted July 18, 2011 For fun here is the AHC using the ‘best 4’ policy trying to solve a maze, after around 100 trials the agent should have it sorted. You may notice a bug with the obstacles but this does not seem to affect the overall results expandcollapse popup#include <GUIConstantsEx.au3> #include <Array.au3> #include <WindowsConstants.au3> #include <staticconstants.au3> Opt("GUIOnEventMode", 1) Local $iGridSize = 20 Local $NumberGrid [$iGridSize][$iGridSize] Local $VisualGrid [$iGridSize][$iGridSize] Local $VisualGridData [$iGridSize][$iGridSize] Local $VOffset = 0 Local $HOffset = 0 Local $x Local $y Local $x_1 = -1 Local $y_1 = -1 Local $x_2 = -1 Local $y_2 = -1 Local $x_3 = -1 Local $y_3 = -1 Local $Trials Local $Attempts Local $rLamdba = 0.22 Local $rAlpha = 0.1 Local $bradio Local $MazeLength = 184 Local $GoalX = 19 Local $GoalY = 4 Dim $Maze[$MazeLength][2]= [[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,7],[1,8],[1,9],[1,10],[1,11],[1,12],[1,13],[1,14],[1,15],[1,16],[1,17],[1,18], _ [2,5],[2,7],[2,12],[2,18], _ [3,0],[3,1],[3,2],[3,3],[3,5],[3,7],[3,9],[3,10],[3,11],[3,12],[3,14],[3,15],[3,16],[3,18], _ [4,3],[4,5],[4,7],[4,9],[4,10],[4,12],[4,14],[4,16],[4,18], _ [5,1],[5,2],[5,3],[5,5],[5,7],[5,9],[5,12],[5,14],[5,16],[5,18],[5,19], _ [6,5],[6,7],[6,9],[6,10],[6,12],[6,14],[6,16], _ [7,1],[7,2],[7,3],[7,5],[7,7],[7,14],[7,16],[7,17],[7,18], _ [8,2],[8,3],[8,4],[8,5],[8,7],[8,8],[8,9],[8,10],[8,11],[8,12],[8,14],[8,18], _ [9,0],[9,11],[9,12],[9,14],[9,15],[9,16],[9,18], _ [10,0],[10,1],[10,2],[10,3],[10,4],[10,5],[10,6],[10,7],[10,8],[10,9],[10,16],[10,18], _ [11,11],[11,12],[11,13],[11,14],[11,16],[11,18], _ [12,1],[12,2],[12,3],[12,4],[12,5],[12,6],[12,7],[12,8],[12,9],[12,10],[12,11],[12,14],[12,16],[12,18], _ [13,9],[13,11],[13,12],[13,14],[13,16],[13,18], _ [14,0],[14,1],[14,2],[14,3],[14,5],[14,6],[14,7],[14,8],[14,9],[14,11],[14,14],[14,16],[14,18], _ [15,11],[15,13],[15,14],[15,16],[15,18], _ [16,1],[16,2],[16,3],[16,4],[16,5],[16,6],[16,7],[16,8],[16,9],[16,10],[16,11],[16,13],[16,14],[16,16],[16,18], _ [17,2],[17,4],[17,6],[17,8],[17,9],[17,11],[17,14],[17,16],[17,18], _ [18,0],[18,2],[18,4],[18,6],[18,9],[18,11],[18,12],[18,14],[18,16],[18,18], _ [19,0],[19,14],[19,18] ] ;_ArrayDisplay($Maze) $GUI = GUICreate("Adaptive heuristic critic - Reinforcement learning", 525, 620) GUISetOnEvent($GUI_EVENT_CLOSE, "SpecialEvents") ;Create Grid For $y = 0 To ($iGridSize-1) For $x = 0 To ($iGridSize-1) $VisualGrid[$y][$x] = GUICtrlCreateInput("", 25+ $HOffset, 25 + $VOffset, 25, 25, BitOr($SS_CENTER,$SS_CENTERIMAGE));BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) $VOffset += 24 Next $HOffset += 24 $VOffset = 0 ;ConsoleWrite($top & @CRLF) Next ;create labels and Inputs GUICtrlSetFont(-1, 12, 800, 1, "Times New Roman") GUICtrlCreateLabel("Number of Attempts", 300, 527, 175, 20) $GUIAttempts = GUICtrlCreateInput("", 400, 525, 75, 20,BitOr($SS_CENTER,$SS_CENTERIMAGE)) GUICtrlCreateLabel("Number of Trials", 300, 552, 175, 20) $GUITrials = GUICtrlCreateInput("", 400, 550, 75, 20,BitOr($SS_CENTER,$SS_CENTERIMAGE)) GUICtrlCreateLabel("G = Goal, A = Agent O = Obstacle ", 20, 525, 250, 500) ;Create an "OK" button $OK_Btn = GUICtrlCreateButton("Go", 400, 595, 75, 20) GUICtrlSetOnEvent($OK_Btn, "OKPressed") ;$radio = GUICtrlCreateCheckbox("Policy Roulette (T) / Highest (F)", 300, 575, 180, 20) ;Set Font and Initalise to empty For $y = 0 To ($iGridSize-1) For $x = 0 To ($iGridSize-1) GUICtrlSetFont($VisualGrid[$y][$x], 12, 800, 1, "Times New Roman") GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) $VOffset += 24 Next $HOffset += 24 Next ;Initalise Number Grid For $y = 0 to ($iGridSize-1) Step 1 For $x = 0 to ($iGridSize-1) Step 1 $NumberGrid[$y][$x] = 0.1;random(0.1,1) Next Next GUISetState() While (1) Sleep(10) Wend Func SpecialEvents() ;Destroy the GUI including the controls GUIDelete() ;Exit the script Exit EndFunc Func OKPressed() $bradio = 4;GUICtrlRead($radio) while GUIGetMsg() <> -3 Local $arXY[2] sleep(50) ;create Maze X,Y For $x = 0 to $MazeLength -1 Step 1 CreateObstical($Maze[$x][0], $Maze[$x][1]) Next ;Set Goal and reward Position $VisualGridData[$GoalX][$GoalY] = "G" GUICtrlSetData($VisualGrid[$GoalX][4],$VisualGridData[$GoalX][$GoalY]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) $NumberGrid[$GoalX][$GoalY] = 500 ;Create Some Obstacles ;Set Random Agnet Start Point that does not land on an obstacle do $x = Random(0,($iGridSize-1),1) $y = Random(0,($iGridSize-1),1) until ArrayTest($x,$y) = 0 $VisualGridData[$y][$x] = "A" GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) $Attempts = 0 While (($x <> $GoalY) OR ($y <> $GoalX)) sleep(60) $x_3 = $x_2 $y_3 = $y_2 $x_2 = $x_1 $y_2 = $y_1 $y_1 = $y $x_1 = $x ;Delete old agents position $VisualGridData[$y_1][$x_1] = "" GUICtrlSetData($VisualGrid[$y_1][$x_1],$VisualGridData[$y_1][$x_1]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) ;Determine the next position using a roulette wheel or higher 4 NewPosition($x,$y,$x_1,$y_1,$bradio) ;Pay back credit using Temporal Difference Algorithm TDLearning($x,$y,$x_1,$y_1,$x_2,$y_2) ;Draw agents new position $VisualGridData[$y][$x] = "A" GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) $Attempts += 1 GUICtrlSetData($GUIAttempts,$Attempts) WEnd $VisualGridData[$y][$x] = "" GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) $Trials += 1 ;~ IF $Trials = 100 Then ;This is here for debug ;~ _arraydisplay($NumberGrid) ;~ $Trials = 0 ;~ EndIf GUICtrlSetData($GUITrials,$Trials) WEnd EndFunc Func NewPosition(ByRef $x,ByRef $y, $x_1, $y_1, $bradio) Local $rRX[4] $rRX = Select4Value($x,$y) IF $bradio = 4 Then Best4($rRX, $x,$y,$x_1, $y_1) Else Roulette($rRX, $x,$y) EndIf EndFunc Func TDLearning($x,$y,$x_1,$y_1,$x_2,$y_2) IF $x_3 <> -1 Then $NumberGrid[$y_3][$x_3] = $NumberGrid[$y_3][$x_3]+((($NumberGrid[$y_2][$x_2] - $NumberGrid[$y_3][$x_3]) + (($NumberGrid[$y_1][$x_1] - $NumberGrid[$y_2][$x_2])*$rLamdba) + (($NumberGrid[$y][$x] - $NumberGrid[$y_1][$x_1])*($rLamdba*$rLamdba)))*$rAlpha) EndIf IF $x_2 <> -1 Then $NumberGrid[$y_2][$x_2] = $NumberGrid[$y_2][$x_2] + ((($NumberGrid[$y_1][$x_1] - $NumberGrid[$y_2][$x_2])*$rLamdba) + (($NumberGrid[$y][$x] - $NumberGrid[$y_1][$x_1])*$rLamdba)*$rAlpha) EndIf IF $x_1 <> -1 Then $NumberGrid[$y_1][$x_1] = $NumberGrid[$y_1][$x_1] + (($NumberGrid[$y][$x] - $NumberGrid[$y_1][$x_1])*$rAlpha) EndIf EndFunc Func Best4($rRX, ByRef $x, ByRef $y,$x_1, $y_1) Local $Best4MaxIndex $Best4MaxIndex = _ArrayMaxIndex($rRX) IF $x_1 = $x AND $y_1 = $y Then ;Board... $NumberGrid[$y][$x] = $NumberGrid[$y][$x] - ($NumberGrid[$y][$x]*0.05) EndIf If $Best4MaxIndex = 0 Then IF $x+1 > ($iGridSize-1) Then $x=$x $y=$y Else $x=$x+1 $y=$y Endif ElseIf $Best4MaxIndex = 1 Then IF $x-1 < 0 Then $x=$x $y=$y Else $x=$x-1 $y=$y Endif ElseIf $Best4MaxIndex = 2 Then IF $y+1 > ($iGridSize-1) Then $x=$x $y=$y Else $x=$x $y=$y+1 Endif ElseIf $Best4MaxIndex = 3 Then IF $y-1 < 0 Then $x=$x $y=$y Else $x=$x $y=$y-1 Endif Else $x=$x $y=$y EndIF EndFunc Func Roulette($rRX, ByRef $x, ByRef $y) Local $RouletteMax Local $RouletteSpin Local $Rxplus1 = $rRX[0] Local $Rxsub1 = $rRX[1] Local $Ryplus1 = $rRX[2] Local $Rysub1 = $rRX[3] $RouletteMax = $Rxplus1 + $Rxsub1 + $Ryplus1 + $Rysub1 $RouletteSpin = Random(0,$RouletteMax) If $RouletteSpin >= 0 AND $RouletteSpin <= $Rxplus1 Then IF $x+1 > ($iGridSize-1) Then $x=$x $y=$y Else $x=$x+1 $y=$y Endif ElseIf $RouletteSpin >= $Rxplus1 AND $RouletteSpin <= $Rxplus1+$Rxsub1 Then IF $x-1 < 0 Then $x=$x $y=$y Else $x=$x-1 $y=$y Endif ElseIf $RouletteSpin >= $Rxplus1+$Rxsub1 AND $RouletteSpin <= $Rxplus1+$Rxsub1+$Ryplus1 Then IF $y+1 > ($iGridSize-1) Then $x=$x $y=$y Else $x=$x $y=$y+1 Endif ElseIf $RouletteSpin >= $Rxplus1+$Rxsub1+$Ryplus1 AND $RouletteSpin <= $Rxplus1 + $Rxsub1 + $Ryplus1 + $Rysub1 Then IF $y-1 < 0 Then $x=$x $y=$y Else $x=$x $y=$y-1 Endif Else $x=$x $y=$y EndIF EndFunc Func Select4Value(ByRef $x, ByRef $y) Local $rRX[4] IF $x+1 > ($iGridSize-1) Then $rRX[0] = 0 Else ;$x=$x+1 $rRX[0] = $NumberGrid[$y][$x+1] Endif IF $x-1 < 0 Then $rRX[1] = 0 Else ;$x=$x-1 $rRX[1] = $NumberGrid[$y][$x-1] Endif IF $y+1 > ($iGridSize-1) Then $rRX[2] = 0 Else ;$y=$y+1 $rRX[2] = $NumberGrid[$y+1][$x] Endif IF $y-1 < 0 Then $rRX[3] = 0 Else ;$y=$y-1 $rRX[3] = $NumberGrid[$y-1][$x] Endif ;_ArrayDisplay($rRX) ;debug Return $rRX EndFunc Func CreateObstical(ByRef $x, ByRef $y) $VisualGridData[$y][$x] = "O" $NumberGrid[$y][$x] = 0 GUICtrlSetData($VisualGrid[$y][$x],$VisualGridData[$y][$x]);BitOr($SS_CENTER,$SS_CENTERIMAGE,$SS_BLACKFRAME)) EndFunc Func ArrayTest(ByRef $x, ByRef $y) Local $unique = 0 For $i = 0 to $MazeLength-1 Step 1 IF ($Maze[$i][0] = $y) AND ($Maze[$i][1] = $x) Then $unique = 1 ;msgbox(1,"h",$Maze[$i][0]&","& $y&","&$Maze[$i][1] &","& $x) EndIF Next Return $unique EndFunc Link to comment Share on other sites More sharing options...
jvanegmond Posted July 18, 2011 Share Posted July 18, 2011 I think after about 1000-1500 it did the maze everytime perfectly. Could you give some more background information about what exactly it is learning? At first I thought you were just rewarding going left/right, but that doesn't lead to these results. github.com/jvanegmond Link to comment Share on other sites More sharing options...
ABV Posted July 19, 2011 Author Share Posted July 19, 2011 I think after about 1000-1500 it did the maze everytime perfectly.Could you give some more background information about what exactly it is learning? At first I thought you were just rewarding going left/right, but that doesn't lead to these results.This link will explain it bestReinforcement Learning: A Survey Link to comment Share on other sites More sharing options...
jvanegmond Posted July 19, 2011 Share Posted July 19, 2011 This link will explain it bestReinforcement Learning: A SurveySweet. Thanks. github.com/jvanegmond Link to comment Share on other sites More sharing options...
Recommended Posts
Create an account or sign in to comment
You need to be a member in order to leave a comment
Create an account
Sign up for a new account in our community. It's easy!
Register a new accountSign in
Already have an account? Sign in here.
Sign In Now