enter reinforcement
This commit is contained in:
1
reinforcement/test_cases/CONFIG
Normal file
1
reinforcement/test_cases/CONFIG
Normal file
@ -0,0 +1 @@
|
||||
order: "q1 q2 q3 q4 q5 q6 q7"
|
410
reinforcement/test_cases/q1/1-tinygrid.solution
Normal file
410
reinforcement/test_cases/q1/1-tinygrid.solution
Normal file
@ -0,0 +1,410 @@
|
||||
values_k_0: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
-10.0000
|
||||
0.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
-10.0000
|
||||
5.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
2.5000
|
||||
illegal
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q1/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q1/1-tinygrid.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
132
reinforcement/test_cases/q1/1-tinygrid.test_output
Normal file
132
reinforcement/test_cases/q1/1-tinygrid.test_output
Normal file
@ -0,0 +1,132 @@
|
||||
Values at iteration 0 are correct.
|
||||
Student/correct solution:
|
||||
values_k_0: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action south are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action west are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action east are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action north are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Values at iteration 1 are NOT correct.
|
||||
Student solution:
|
||||
values_k_1: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
values_k_1: """
|
||||
-10.0000
|
||||
0.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action south are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action west are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action east are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action north are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
-5.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
410
reinforcement/test_cases/q1/2-tinygrid-noisy.solution
Normal file
410
reinforcement/test_cases/q1/2-tinygrid-noisy.solution
Normal file
@ -0,0 +1,410 @@
|
||||
values_k_0: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
-10.0000
|
||||
0.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
-5.6250
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
5.6250
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
-10.0000
|
||||
5.6250
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
-4.5703
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
3.1641
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
6.6797
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
3.1641
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
-10.0000
|
||||
6.6797
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
-4.3726
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
3.7573
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
6.8774
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
3.7573
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
-10.0000
|
||||
6.8774
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
-4.3355
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
3.8686
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
6.9145
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
3.8686
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
-10.0000
|
||||
6.9145
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
-4.3285
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
3.8894
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
6.9215
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
3.8894
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
-10.0000
|
||||
6.9215
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
-4.3272
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
3.8933
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
6.9228
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
3.8933
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
-10.0000
|
||||
6.9228
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
-4.3270
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
3.8941
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
6.9230
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
3.8941
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
-10.0000
|
||||
6.9230
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
-4.3269
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
6.9231
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
-10.0000
|
||||
6.9231
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
-4.3269
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
6.9231
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
-10.0000
|
||||
6.9231
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-4.3269
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
6.9231
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
3.8942
|
||||
illegal
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q1/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q1/2-tinygrid-noisy.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
132
reinforcement/test_cases/q1/2-tinygrid-noisy.test_output
Normal file
132
reinforcement/test_cases/q1/2-tinygrid-noisy.test_output
Normal file
@ -0,0 +1,132 @@
|
||||
Values at iteration 0 are correct.
|
||||
Student/correct solution:
|
||||
values_k_0: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action south are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action west are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action east are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action north are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Values at iteration 1 are NOT correct.
|
||||
Student solution:
|
||||
values_k_1: """
|
||||
0.0000
|
||||
0.0000
|
||||
0.0000
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
values_k_1: """
|
||||
-10.0000
|
||||
0.0000
|
||||
10.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action south are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
5.6250
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action west are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000
|
||||
illegal
|
||||
10.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action east are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action north are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
-5.6250
|
||||
illegal
|
||||
"""
|
||||
|
||||
|
678
reinforcement/test_cases/q1/3-bridge.solution
Normal file
678
reinforcement/test_cases/q1/3-bridge.solution
Normal file
@ -0,0 +1,678 @@
|
||||
values_k_0: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.0750 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.4575 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.0750 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.4575 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -8.5000 -100.0000
|
||||
-100.0000 -8.5000 -100.0000
|
||||
-100.0000 -8.5000 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4363 illegal
|
||||
illegal -76.8974 illegal
|
||||
illegal -77.2225 illegal
|
||||
illegal -77.1900 illegal
|
||||
illegal -76.8187 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -15.0025 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4363 illegal
|
||||
illegal -76.8974 illegal
|
||||
illegal -77.2225 illegal
|
||||
illegal -77.1900 illegal
|
||||
illegal -76.8187 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.0025 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -19.9769 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1737 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4663 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -19.9769 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1737 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4663 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 -0.8500 -100.0000
|
||||
-100.0000 -9.1502 -100.0000
|
||||
-100.0000 -15.4999 -100.0000
|
||||
-100.0000 -14.4173 -100.0000
|
||||
-100.0000 -7.7350 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -9.1502 illegal
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -15.4999 illegal
|
||||
illegal -20.3575 illegal
|
||||
illegal -19.5292 illegal
|
||||
illegal -14.4173 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.4639 illegal
|
||||
illegal -77.1949 illegal
|
||||
illegal -77.5016 illegal
|
||||
illegal -77.4875 illegal
|
||||
illegal -77.0702 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
policy: """
|
||||
__________ exit __________
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit south exit
|
||||
exit south exit
|
||||
__________ exit __________
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
27
reinforcement/test_cases/q1/3-bridge.test
Normal file
27
reinforcement/test_cases/q1/3-bridge.test
Normal file
@ -0,0 +1,27 @@
|
||||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
216
reinforcement/test_cases/q1/3-bridge.test_output
Normal file
216
reinforcement/test_cases/q1/3-bridge.test_output
Normal file
@ -0,0 +1,216 @@
|
||||
Values at iteration 0 are correct.
|
||||
Student/correct solution:
|
||||
values_k_0: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action south are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action west are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action east are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action north are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Values at iteration 1 are NOT correct.
|
||||
Student solution:
|
||||
values_k_1: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
values_k_1: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
-100.0000 0.0000 -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action south are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -7.7350 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action west are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -76.0750 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.4575 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 10.0000 __________
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
-100.0000 illegal -100.0000
|
||||
__________ 1.0000 __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action east are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -76.0750 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.5000 illegal
|
||||
illegal -76.4575 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action north are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -0.8500 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
illegal -8.5000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
|
544
reinforcement/test_cases/q1/4-discountgrid.solution
Normal file
544
reinforcement/test_cases/q1/4-discountgrid.solution
Normal file
@ -0,0 +1,544 @@
|
||||
values_k_0: """
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ __________ 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_1: """
|
||||
-10.0000 0.0000 10.0000 0.0000 0.0000
|
||||
-10.0000 0.0000 __________ 0.0000 0.0000
|
||||
-10.0000 0.0000 1.0000 0.0000 0.0000
|
||||
-10.0000 0.0000 __________ __________ 0.0000
|
||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.9000 0.0000
|
||||
illegal -0.9000 __________ 0.0000 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0000
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal 7.2000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.7200 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.9000 0.0000
|
||||
illegal -0.9000 __________ 0.0000 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0000
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal -7.2000 illegal 7.2000 0.0000
|
||||
illegal -7.2000 __________ 0.0000 0.0000
|
||||
illegal -7.2000 illegal 0.7200 0.0000
|
||||
illegal -7.2000 __________ __________ 0.0000
|
||||
illegal -7.2000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_2: """
|
||||
-10.0000 7.2000 10.0000 7.2000 0.0000
|
||||
-10.0000 0.0000 __________ 0.0000 0.0000
|
||||
-10.0000 0.7200 1.0000 0.7200 0.0000
|
||||
-10.0000 0.0000 __________ __________ 0.0000
|
||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal 5.1840 illegal 6.0840 0.6480
|
||||
illegal 4.2840 __________ 5.1840 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0648
|
||||
illegal -0.3816 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal 7.8480 illegal 0.6480 0.0000
|
||||
illegal 0.7128 __________ 0.7128 0.0000
|
||||
illegal 0.7200 illegal 0.0648 0.0000
|
||||
illegal 0.0648 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal 0.0000 illegal 0.9000 0.6480
|
||||
illegal -0.3816 __________ 0.5184 0.0000
|
||||
illegal -0.8100 illegal 0.6084 0.0648
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal -6.5520 illegal 7.8480 5.1840
|
||||
illegal -6.4872 __________ 0.7128 0.0000
|
||||
illegal -7.2000 illegal 0.7848 0.5184
|
||||
illegal -7.1352 __________ __________ 0.0000
|
||||
illegal -7.2000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_3: """
|
||||
-10.0000 7.8480 10.0000 7.8480 5.1840
|
||||
-10.0000 4.2840 __________ 5.1840 0.0000
|
||||
-10.0000 0.7200 1.0000 0.7848 0.5184
|
||||
-10.0000 0.0648 __________ __________ 0.0000
|
||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal 5.6506 illegal 7.0171 4.9054
|
||||
illegal 5.1361 __________ 6.1171 4.1990
|
||||
illegal 2.2745 illegal 3.8691 0.1173
|
||||
illegal -0.3758 __________ __________ 0.3732
|
||||
illegal -0.8533 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal 8.2919 illegal 4.9054 4.1990
|
||||
illegal 3.8556 __________ 0.7770 0.5132
|
||||
illegal 1.1114 illegal 0.9104 0.3732
|
||||
illegal 0.1115 __________ __________ 0.0467
|
||||
illegal 0.0058 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal 3.0845 illegal 5.0990 1.1729
|
||||
illegal 0.0040 __________ 1.0316 0.8398
|
||||
illegal -0.7633 illegal 0.7017 0.1173
|
||||
illegal -0.8942 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal -6.1081 illegal 8.3729 6.1171
|
||||
illegal -6.4289 __________ 4.5094 4.2457
|
||||
illegal -6.8086 illegal 1.2572 0.5651
|
||||
illegal -7.1352 __________ __________ 0.0467
|
||||
illegal -7.1942 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
values_k_4: """
|
||||
-10.0000 8.2919 10.0000 8.3729 6.1171
|
||||
-10.0000 5.1361 __________ 6.1171 4.2457
|
||||
-10.0000 2.2745 1.0000 3.8691 0.5651
|
||||
-10.0000 0.1115 __________ __________ 0.3732
|
||||
-10.0000 0.0058 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal 5.9702 illegal 7.4790 5.7084
|
||||
illegal 5.5324 __________ 6.9611 5.3370
|
||||
illegal 2.8880 illegal 4.5452 3.4560
|
||||
illegal 0.7477 __________ __________ 0.4740
|
||||
illegal -0.8198 0.0005 0.0000 0.2687
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal 8.4085 illegal 5.7084 5.3370
|
||||
illegal 4.6490 __________ 4.1587 3.6583
|
||||
illegal 1.1923 illegal 1.3056 0.8225
|
||||
illegal 0.2855 __________ __________ 0.3196
|
||||
illegal 0.0106 0.0000 0.0000 0.0336
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal 3.6980 illegal 5.8549 4.3610
|
||||
illegal 1.1999 __________ 3.7184 1.3395
|
||||
illegal -0.7298 illegal 2.9266 0.6678
|
||||
illegal -0.8858 __________ __________ 0.0672
|
||||
illegal -0.8958 0.0005 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal -5.9915 illegal 8.5041 6.9611
|
||||
illegal -6.2490 __________ 5.5061 5.0057
|
||||
illegal -6.7277 illegal 1.6188 3.2015
|
||||
illegal -6.9948 __________ __________ 0.3196
|
||||
illegal -7.1894 0.0042 0.0000 0.0336
|
||||
"""
|
||||
|
||||
values_k_5: """
|
||||
-10.0000 8.4085 10.0000 8.5041 6.9611
|
||||
-10.0000 5.5324 __________ 6.9611 5.3370
|
||||
-10.0000 2.8880 1.0000 4.5452 3.4560
|
||||
-10.0000 0.7477 __________ __________ 0.4740
|
||||
-10.0000 0.0106 0.0042 0.0000 0.2687
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal 6.0541 illegal 7.6495 6.4039
|
||||
illegal 5.6521 __________ 7.2298 6.1188
|
||||
illegal 3.1733 illegal 5.4130 4.5627
|
||||
illegal 1.2467 __________ __________ 2.5736
|
||||
illegal -0.3613 0.0040 0.0246 0.3655
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal 8.4547 illegal 6.4039 6.1188
|
||||
illegal 5.0000 __________ 5.0171 4.7802
|
||||
illegal 1.2852 illegal 3.5239 3.0113
|
||||
illegal 0.7992 __________ __________ 0.6765
|
||||
illegal 0.0713 0.0008 0.1935 0.2603
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal 3.9833 illegal 6.5385 5.2345
|
||||
illegal 1.6773 __________ 4.3794 3.5951
|
||||
illegal -0.2717 illegal 3.6736 1.0614
|
||||
illegal -0.8251 __________ __________ 0.2788
|
||||
illegal -0.8920 0.0040 0.0246 0.2177
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal -5.9453 illegal 8.5919 7.2298
|
||||
illegal -6.1833 __________ 6.1864 5.9496
|
||||
illegal -6.6348 illegal 1.7556 3.7955
|
||||
illegal -6.9391 __________ __________ 0.6765
|
||||
illegal -7.1318 0.0084 0.0030 0.0668
|
||||
"""
|
||||
|
||||
values_k_6: """
|
||||
-10.0000 8.4547 10.0000 8.5919 7.2298
|
||||
-10.0000 5.6521 __________ 7.2298 6.1188
|
||||
-10.0000 3.1733 1.0000 5.4130 4.5627
|
||||
-10.0000 1.2467 __________ __________ 2.5736
|
||||
-10.0000 0.0713 0.0084 0.1935 0.3655
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal 6.0874 illegal 7.7368 6.6294
|
||||
illegal 5.6961 __________ 7.3875 6.4068
|
||||
illegal 3.2595 illegal 5.7061 5.3034
|
||||
illegal 1.4970 __________ __________ 3.7484
|
||||
illegal -0.0017 0.0298 0.1730 1.9033
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal 8.4696 illegal 6.6294 6.4068
|
||||
illegal 5.1160 __________ 5.6660 5.4669
|
||||
illegal 1.3409 illegal 4.4230 4.0675
|
||||
illegal 1.1896 __________ __________ 2.2966
|
||||
illegal 0.1246 0.1408 0.2980 0.5277
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal 4.0695 illegal 6.7561 5.8295
|
||||
illegal 1.8935 __________ 5.0988 4.4865
|
||||
illegal 0.0876 illegal 4.3980 2.7508
|
||||
illegal -0.7365 __________ __________ 0.7264
|
||||
illegal -0.8479 0.0298 0.1730 0.3135
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal -5.9304 illegal 8.6239 7.3875
|
||||
illegal -6.1535 __________ 6.4659 6.2668
|
||||
illegal -6.5791 illegal 1.8579 4.6797
|
||||
illegal -6.9080 __________ __________ 2.2966
|
||||
illegal -7.0814 0.0528 0.0408 0.4038
|
||||
"""
|
||||
|
||||
values_k_7: """
|
||||
-10.0000 8.4696 10.0000 8.6239 7.3875
|
||||
-10.0000 5.6961 __________ 7.3875 6.4068
|
||||
-10.0000 3.2595 1.0000 5.7061 5.3034
|
||||
-10.0000 1.4970 __________ __________ 3.7484
|
||||
-10.0000 0.1246 0.1408 0.2980 1.9033
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal 6.0981 illegal 7.7741 6.7600
|
||||
illegal 5.7108 __________ 7.4507 6.5605
|
||||
illegal 3.2912 illegal 5.8863 5.6038
|
||||
illegal 1.5816 __________ __________ 4.4932
|
||||
illegal 0.1905 0.1394 0.3985 2.8970
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal 8.4749 illegal 6.7600 6.5605
|
||||
illegal 5.1568 __________ 5.9026 5.7551
|
||||
illegal 1.3674 illegal 4.9969 4.7324
|
||||
illegal 1.3824 __________ __________ 3.3475
|
||||
illegal 0.2473 0.2399 1.4240 1.8790
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal 4.1012 illegal 6.8839 6.0539
|
||||
illegal 1.9595 __________ 5.3499 5.0599
|
||||
illegal 0.2678 illegal 4.6757 3.6897
|
||||
illegal -0.6755 __________ __________ 2.0451
|
||||
illegal -0.7976 0.1394 0.3985 1.5685
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal -5.9251 illegal 8.6410 7.4507
|
||||
illegal -6.1444 __________ 6.6087 6.4612
|
||||
illegal -6.5526 illegal 1.8984 5.0224
|
||||
illegal -6.8954 __________ __________ 3.3475
|
||||
illegal -7.0541 0.1151 0.1550 0.7232
|
||||
"""
|
||||
|
||||
values_k_8: """
|
||||
-10.0000 8.4749 10.0000 8.6410 7.4507
|
||||
-10.0000 5.7108 __________ 7.4507 6.5605
|
||||
-10.0000 3.2912 1.0000 5.8863 5.6038
|
||||
-10.0000 1.5816 __________ __________ 4.4932
|
||||
-10.0000 0.2473 0.2399 1.4240 2.8970
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal 6.1019 illegal 7.7921 6.8128
|
||||
illegal 5.7159 __________ 7.4826 6.6255
|
||||
illegal 3.3017 illegal 5.9589 5.7577
|
||||
illegal 1.6120 __________ __________ 4.8435
|
||||
illegal 0.2603 0.3231 1.3076 3.6240
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal 8.4767 illegal 6.8128 6.6255
|
||||
illegal 5.1707 __________ 6.0310 5.8985
|
||||
illegal 1.3763 illegal 5.2350 5.0295
|
||||
illegal 1.4572 __________ __________ 4.0001
|
||||
illegal 0.3373 1.0685 2.3421 2.7509
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal 4.1117 illegal 6.9351 6.1718
|
||||
illegal 1.9836 __________ 5.4992 5.2957
|
||||
illegal 0.3287 illegal 4.8325 4.2692
|
||||
illegal -0.5796 __________ __________ 2.8946
|
||||
illegal -0.7003 0.3231 1.3076 2.4747
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal -5.9233 illegal 8.6483 7.4826
|
||||
illegal -6.1411 __________ 6.6720 6.5394
|
||||
illegal -6.5437 illegal 1.9203 5.2330
|
||||
illegal -6.8815 __________ __________ 4.0001
|
||||
illegal -7.0354 0.2213 0.4290 1.6904
|
||||
"""
|
||||
|
||||
values_k_9: """
|
||||
-10.0000 8.4767 10.0000 8.6483 7.4826
|
||||
-10.0000 5.7159 __________ 7.4826 6.6255
|
||||
-10.0000 3.3017 1.0000 5.9589 5.7577
|
||||
-10.0000 1.6120 __________ __________ 4.8435
|
||||
-10.0000 0.3373 1.0685 2.3421 3.6240
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal 6.1032 illegal 7.8002 6.8392
|
||||
illegal 5.7177 __________ 7.4965 6.6572
|
||||
illegal 3.3055 illegal 5.9956 5.8249
|
||||
illegal 1.6223 __________ __________ 5.0174
|
||||
illegal 0.3568 1.0105 2.1087 4.0243
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal 8.4773 illegal 6.8392 6.6572
|
||||
illegal 5.1755 __________ 6.0850 5.9620
|
||||
illegal 1.3795 illegal 5.3553 5.1777
|
||||
illegal 1.4881 __________ __________ 4.3316
|
||||
illegal 0.9447 1.8787 3.0308 3.3713
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal 4.1155 illegal 6.9609 6.2222
|
||||
illegal 1.9917 __________ 5.5601 5.4153
|
||||
illegal 0.3506 illegal 4.8986 4.5418
|
||||
illegal -0.5121 __________ __________ 3.4811
|
||||
illegal -0.5610 1.0105 2.1087 3.1462
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal -5.9227 illegal 8.6518 7.4965
|
||||
illegal -6.1399 __________ 6.7021 6.5791
|
||||
illegal -6.5405 illegal 1.9297 5.3226
|
||||
illegal -6.8725 __________ __________ 4.3316
|
||||
illegal -7.0246 0.4352 1.1909 2.4484
|
||||
"""
|
||||
|
||||
values_k_100: """
|
||||
-10.0000 8.4777 10.0000 8.6547 7.5087
|
||||
-10.0000 5.7186 __________ 7.5087 6.6836
|
||||
-10.0000 3.3074 1.0000 6.0258 5.8841
|
||||
-10.0000 2.0045 __________ __________ 5.1665
|
||||
-10.0000 2.9289 3.4513 3.9306 4.4765
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal 6.1039 illegal 7.8072 6.8610
|
||||
illegal 5.7186 __________ 7.5087 6.6836
|
||||
illegal 3.3074 illegal 6.0258 5.8841
|
||||
illegal 1.6617 __________ __________ 5.1665
|
||||
illegal 0.8539 3.1023 3.5435 4.4765
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal 8.4777 illegal 6.8610 6.6836
|
||||
illegal 5.1780 __________ 6.1334 6.0175
|
||||
illegal 1.4151 illegal 5.4546 5.3030
|
||||
illegal 2.0045 __________ __________ 4.6523
|
||||
illegal 2.9289 3.4513 3.9306 4.0910
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal 4.1174 illegal 6.9820 6.2669
|
||||
illegal 1.9960 __________ 5.6159 5.5138
|
||||
illegal 0.6333 illegal 4.9582 4.7918
|
||||
illegal 1.3892 __________ __________ 4.1531
|
||||
illegal 1.5194 3.1023 3.5435 3.9797
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal -5.9223 illegal 8.6547 7.5087
|
||||
illegal -6.1393 __________ 6.7275 6.6116
|
||||
illegal -6.5049 illegal 1.9381 5.4051
|
||||
illegal -6.6387 __________ __________ 4.6523
|
||||
illegal -6.7560 2.7300 3.1924 3.6979
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit east exit west west
|
||||
exit north __________ north north
|
||||
exit north exit north north
|
||||
exit east __________ __________ north
|
||||
exit east east east north
|
||||
"""
|
||||
|
||||
actions: """
|
||||
north
|
||||
east
|
||||
exit
|
||||
south
|
||||
west
|
||||
"""
|
||||
|
24
reinforcement/test_cases/q1/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q1/4-discountgrid.test
Normal file
@ -0,0 +1,24 @@
|
||||
class: "ValueIterationTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
182
reinforcement/test_cases/q1/4-discountgrid.test_output
Normal file
182
reinforcement/test_cases/q1/4-discountgrid.test_output
Normal file
@ -0,0 +1,182 @@
|
||||
Values at iteration 0 are correct.
|
||||
Student/correct solution:
|
||||
values_k_0: """
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ __________ 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action south are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action west are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action east are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 0 for action north are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Values at iteration 1 are NOT correct.
|
||||
Student solution:
|
||||
values_k_1: """
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ 0.0000 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
0.0000 0.0000 __________ __________ 0.0000
|
||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
values_k_1: """
|
||||
-10.0000 0.0000 10.0000 0.0000 0.0000
|
||||
-10.0000 0.0000 __________ 0.0000 0.0000
|
||||
-10.0000 0.0000 1.0000 0.0000 0.0000
|
||||
-10.0000 0.0000 __________ __________ 0.0000
|
||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action south are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.9000 0.0000
|
||||
illegal -0.9000 __________ 0.0000 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0000
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action west are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_west: """
|
||||
illegal -7.2000 illegal 7.2000 0.0000
|
||||
illegal -7.2000 __________ 0.0000 0.0000
|
||||
illegal -7.2000 illegal 0.7200 0.0000
|
||||
illegal -7.2000 __________ __________ 0.0000
|
||||
illegal -7.2000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action exit are correct.
|
||||
Student/correct solution:
|
||||
q_values_k_1_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-10.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action east are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_east: """
|
||||
illegal 7.2000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.7200 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Q-Values at iteration 1 for action north are NOT correct.
|
||||
Student solution:
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
||||
Correct solution:
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.9000 0.0000
|
||||
illegal -0.9000 __________ 0.0000 0.0000
|
||||
illegal -0.8100 illegal 0.0900 0.0000
|
||||
illegal -0.9000 __________ __________ 0.0000
|
||||
illegal -0.9000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
|
2
reinforcement/test_cases/q1/CONFIG
Normal file
2
reinforcement/test_cases/q1/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "5"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q2/1-question-2.1.solution
Normal file
2
reinforcement/test_cases/q2/1-question-2.1.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q2/1-question-2.1.test.
|
||||
# File intentionally blank.
|
31
reinforcement/test_cases/q2/1-question-2.1.test
Normal file
31
reinforcement/test_cases/q2/1-question-2.1.test
Normal file
@ -0,0 +1,31 @@
|
||||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question2a"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
E E N _ _
|
||||
_ _ _ _ _
|
||||
"""
|
2
reinforcement/test_cases/q2/2-question-2.2.solution
Normal file
2
reinforcement/test_cases/q2/2-question-2.2.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q2/2-question-2.2.test.
|
||||
# File intentionally blank.
|
31
reinforcement/test_cases/q2/2-question-2.2.test
Normal file
31
reinforcement/test_cases/q2/2-question-2.2.test
Normal file
@ -0,0 +1,31 @@
|
||||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question2b"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
E E S _ _
|
||||
N _ S _ _
|
||||
N _ _ _ _
|
||||
N _ _ _ _
|
||||
_ _ _ _ _
|
||||
"""
|
2
reinforcement/test_cases/q2/3-question-2.3.solution
Normal file
2
reinforcement/test_cases/q2/3-question-2.3.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q2/3-question-2.3.test.
|
||||
# File intentionally blank.
|
31
reinforcement/test_cases/q2/3-question-2.3.test
Normal file
31
reinforcement/test_cases/q2/3-question-2.3.test
Normal file
@ -0,0 +1,31 @@
|
||||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question2c"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
E E E E N
|
||||
_ _ _ _ _
|
||||
"""
|
2
reinforcement/test_cases/q2/4-question-2.4.solution
Normal file
2
reinforcement/test_cases/q2/4-question-2.4.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q2/4-question-2.4.test.
|
||||
# File intentionally blank.
|
36
reinforcement/test_cases/q2/4-question-2.4.test
Normal file
36
reinforcement/test_cases/q2/4-question-2.4.test
Normal file
@ -0,0 +1,36 @@
|
||||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question2d"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
N _ _ _ _
|
||||
_ _ _ _ _
|
||||
"""
|
||||
|
||||
# State the most probable path must visit
|
||||
# (x,y) for a particular location; (0,0) is bottom left
|
||||
# TERMINAL_STATE for the terminal state
|
||||
pathVisits: "(4,2)"
|
2
reinforcement/test_cases/q2/5-question-2.5.solution
Normal file
2
reinforcement/test_cases/q2/5-question-2.5.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q2/5-question-2.5.test.
|
||||
# File intentionally blank.
|
36
reinforcement/test_cases/q2/5-question-2.5.test
Normal file
36
reinforcement/test_cases/q2/5-question-2.5.test
Normal file
@ -0,0 +1,36 @@
|
||||
class: "GridPolicyTest"
|
||||
|
||||
# Function in module in analysis that returns (discount, noise)
|
||||
parameterFn: "question2e"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
_ _ _ _ _
|
||||
_ # _ _ _
|
||||
_ # 1 # 10
|
||||
S _ _ _ _
|
||||
-10 -10 -10 -10 -10
|
||||
"""
|
||||
gridName: "discountGrid"
|
||||
|
||||
# Policy specification
|
||||
# _ policy choice not checked
|
||||
# N, E, S, W policy action must be north, east, south, west
|
||||
#
|
||||
policy: """
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
_ _ _ _ _
|
||||
"""
|
||||
|
||||
# State the most probable path must not visit
|
||||
# (x,y) for a particular location; (0,0) is bottom left
|
||||
# TERMINAL_STATE for the terminal state
|
||||
pathNotVisits: "TERMINAL_STATE"
|
2
reinforcement/test_cases/q2/CONFIG
Normal file
2
reinforcement/test_cases/q2/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "5"
|
||||
class: "NumberPassedQuestion"
|
342
reinforcement/test_cases/q3/1-tinygrid.solution
Normal file
342
reinforcement/test_cases/q3/1-tinygrid.solution
Normal file
@ -0,0 +1,342 @@
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.4534
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.4063
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
2.1267
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.3919
|
||||
illegal
|
||||
"""
|
||||
|
||||
values: """
|
||||
-9.4767
|
||||
2.1267
|
||||
9.8175
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q3/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q3/1-tinygrid.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
70
reinforcement/test_cases/q3/1-tinygrid.test_output
Normal file
70
reinforcement/test_cases/q3/1-tinygrid.test_output
Normal file
@ -0,0 +1,70 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
342
reinforcement/test_cases/q3/2-tinygrid-noisy.solution
Normal file
342
reinforcement/test_cases/q3/2-tinygrid-noisy.solution
Normal file
@ -0,0 +1,342 @@
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.6670
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.9499
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
3.2562
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.8236
|
||||
illegal
|
||||
"""
|
||||
|
||||
values: """
|
||||
-9.4767
|
||||
3.2562
|
||||
9.8175
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q3/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q3/2-tinygrid-noisy.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
70
reinforcement/test_cases/q3/2-tinygrid-noisy.test_output
Normal file
70
reinforcement/test_cases/q3/2-tinygrid-noisy.test_output
Normal file
@ -0,0 +1,70 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
570
reinforcement/test_cases/q3/3-bridge.solution
Normal file
570
reinforcement/test_cases/q3/3-bridge.solution
Normal file
@ -0,0 +1,570 @@
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.1000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -5.8648 illegal
|
||||
illegal -0.7995 illegal
|
||||
illegal -0.1671 illegal
|
||||
illegal -1.2642 illegal
|
||||
illegal -0.5871 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -17.0676 illegal
|
||||
illegal -26.5534 illegal
|
||||
illegal -3.6957 illegal
|
||||
illegal -43.5952 illegal
|
||||
illegal -31.6884 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_exit: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 illegal -96.9097
|
||||
-97.7472 illegal -94.1850
|
||||
-89.0581 illegal -96.9097
|
||||
-97.2187 illegal -87.8423
|
||||
-92.8210 illegal -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -6.8377 illegal
|
||||
illegal -6.7277 illegal
|
||||
illegal -3.4723 illegal
|
||||
illegal -8.4015 illegal
|
||||
illegal -5.5718 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -27.0626 illegal
|
||||
illegal -39.0610 illegal
|
||||
illegal -40.5887 illegal
|
||||
illegal -16.2839 illegal
|
||||
illegal -20.7770 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 -5.8648 -96.9097
|
||||
-97.7472 -0.7995 -94.1850
|
||||
-89.0581 -0.1671 -96.9097
|
||||
-97.2187 -1.2642 -87.8423
|
||||
-92.8210 -0.5871 -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
policy: """
|
||||
__________ exit __________
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
__________ exit __________
|
||||
"""
|
||||
|
27
reinforcement/test_cases/q3/3-bridge.test
Normal file
27
reinforcement/test_cases/q3/3-bridge.test
Normal file
@ -0,0 +1,27 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
110
reinforcement/test_cases/q3/3-bridge.test_output
Normal file
110
reinforcement/test_cases/q3/3-bridge.test_output
Normal file
@ -0,0 +1,110 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
456
reinforcement/test_cases/q3/4-discountgrid.solution
Normal file
456
reinforcement/test_cases/q3/4-discountgrid.solution
Normal file
@ -0,0 +1,456 @@
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
-1.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal -0.0900 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
-1.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_north: """
|
||||
illegal 4.3205 illegal 6.1517 3.8095
|
||||
illegal 4.4238 __________ 5.2284 3.5129
|
||||
illegal 1.0694 illegal 3.6867 2.0418
|
||||
illegal 0.3423 __________ __________ 1.0655
|
||||
illegal 0.0073 0.0079 0.0484 0.3768
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_east: """
|
||||
illegal 8.0584 illegal 3.7245 3.3947
|
||||
illegal 2.0499 __________ 3.2373 2.1742
|
||||
illegal 0.8687 illegal 1.7398 1.2671
|
||||
illegal 0.2927 __________ __________ 0.6669
|
||||
illegal 0.0239 0.0097 0.1611 0.2051
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-9.9999 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_south: """
|
||||
illegal -0.3521 illegal 3.6948 2.9139
|
||||
illegal -0.5605 __________ 2.1346 1.5674
|
||||
illegal 0.2093 illegal 1.5389 0.5521
|
||||
illegal -0.5505 __________ __________ 0.1006
|
||||
illegal -1.8501 0.0060 0.0514 0.1223
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_west: """
|
||||
illegal -6.2001 illegal 7.5146 4.9014
|
||||
illegal -5.4013 __________ 4.0484 3.4126
|
||||
illegal -8.0399 illegal 0.9653 1.6081
|
||||
illegal -7.4767 __________ __________ 0.3934
|
||||
illegal -6.3432 0.0179 0.0188 0.1028
|
||||
"""
|
||||
|
||||
values: """
|
||||
-10.0000 8.0584 10.0000 7.5146 4.9014
|
||||
-10.0000 4.4238 __________ 5.2284 3.5129
|
||||
-10.0000 1.0694 1.0000 3.6867 2.0418
|
||||
-10.0000 0.3423 __________ __________ 1.0655
|
||||
-9.9999 0.0239 0.0179 0.1611 0.3768
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit east exit west west
|
||||
exit north __________ north north
|
||||
exit north exit north north
|
||||
exit north __________ __________ north
|
||||
exit east west east north
|
||||
"""
|
||||
|
24
reinforcement/test_cases/q3/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q3/4-discountgrid.test
Normal file
@ -0,0 +1,24 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
90
reinforcement/test_cases/q3/4-discountgrid.test_output
Normal file
90
reinforcement/test_cases/q3/4-discountgrid.test_output
Normal file
@ -0,0 +1,90 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
2
reinforcement/test_cases/q3/CONFIG
Normal file
2
reinforcement/test_cases/q3/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "5"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q4/1-tinygrid.solution
Normal file
2
reinforcement/test_cases/q4/1-tinygrid.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q7/1-tinygrid.test.
|
||||
# File intentionally blank.
|
22
reinforcement/test_cases/q4/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q4/1-tinygrid.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q4/2-tinygrid-noisy.solution
Normal file
2
reinforcement/test_cases/q4/2-tinygrid-noisy.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q7/2-tinygrid-noisy.test.
|
||||
# File intentionally blank.
|
22
reinforcement/test_cases/q4/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q4/2-tinygrid-noisy.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q4/3-bridge.solution
Normal file
2
reinforcement/test_cases/q4/3-bridge.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q7/3-bridge.test.
|
||||
# File intentionally blank.
|
27
reinforcement/test_cases/q4/3-bridge.test
Normal file
27
reinforcement/test_cases/q4/3-bridge.test
Normal file
@ -0,0 +1,27 @@
|
||||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q4/4-discountgrid.solution
Normal file
2
reinforcement/test_cases/q4/4-discountgrid.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q7/4-discountgrid.test.
|
||||
# File intentionally blank.
|
24
reinforcement/test_cases/q4/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q4/4-discountgrid.test
Normal file
@ -0,0 +1,24 @@
|
||||
class: "EpsilonGreedyTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
2
reinforcement/test_cases/q4/CONFIG
Normal file
2
reinforcement/test_cases/q4/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "2"
|
||||
class: "PassAllTestsQuestion"
|
2
reinforcement/test_cases/q5/CONFIG
Normal file
2
reinforcement/test_cases/q5/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "1"
|
||||
class: "PartialCreditQuestion"
|
2
reinforcement/test_cases/q5/grade-agent.solution
Normal file
2
reinforcement/test_cases/q5/grade-agent.solution
Normal file
@ -0,0 +1,2 @@
|
||||
# This is the solution file for test_cases/q9/grade-agent.test.
|
||||
# File intentionally blank.
|
6
reinforcement/test_cases/q5/grade-agent.test
Normal file
6
reinforcement/test_cases/q5/grade-agent.test
Normal file
@ -0,0 +1,6 @@
|
||||
class: "EvalAgentTest"
|
||||
|
||||
# 100 test games after 2000 training games
|
||||
pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed"
|
||||
|
||||
winsThresholds: "70"
|
429
reinforcement/test_cases/q6/1-tinygrid.solution
Normal file
429
reinforcement/test_cases/q6/1-tinygrid.solution
Normal file
@ -0,0 +1,429 @@
|
||||
weights_k_0: """
|
||||
{((0, 0), 'exit'): 0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{((0, 0), 'exit'): 1.9,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.17195000000000002,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{((0, 0), 'exit'): 4.0951,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.17195000000000002,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{((0, 0), 'exit'): 4.68559,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.17195000000000002,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_100: """
|
||||
{((0, 0), 'exit'): 9.817519963685992,
|
||||
((0, 1), 'east'): 0.40629236674335106,
|
||||
((0, 1), 'north'): -0.4534185789984799,
|
||||
((0, 1), 'south'): 2.126721095524319,
|
||||
((0, 1), 'west'): 0.39193283364906867,
|
||||
((0, 2), 'exit'): -9.476652366972639}
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.4534
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.4063
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
2.1267
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.3919
|
||||
illegal
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q6/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q6/1-tinygrid.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
71
reinforcement/test_cases/q6/1-tinygrid.test_output
Normal file
71
reinforcement/test_cases/q6/1-tinygrid.test_output
Normal file
@ -0,0 +1,71 @@
|
||||
==================== Iteration 0 ====================
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
429
reinforcement/test_cases/q6/2-tinygrid-noisy.solution
Normal file
429
reinforcement/test_cases/q6/2-tinygrid-noisy.solution
Normal file
@ -0,0 +1,429 @@
|
||||
weights_k_0: """
|
||||
{((0, 0), 'exit'): 0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{((0, 0), 'exit'): 1.0,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{((0, 0), 'exit'): 1.9,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{((0, 0), 'exit'): 2.71,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.0,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{((0, 0), 'exit'): 3.439,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.257925,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{((0, 0), 'exit'): 4.0951,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.257925,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{((0, 0), 'exit'): 4.68559,
|
||||
((0, 1), 'east'): 0,
|
||||
((0, 1), 'north'): 0,
|
||||
((0, 1), 'south'): 0.257925,
|
||||
((0, 1), 'west'): 0,
|
||||
((0, 2), 'exit'): -1.0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
weights_k_100: """
|
||||
{((0, 0), 'exit'): 9.817519963685992,
|
||||
((0, 1), 'east'): 0.9498968104823575,
|
||||
((0, 1), 'north'): -0.66699795412272,
|
||||
((0, 1), 'south'): 3.256207905310105,
|
||||
((0, 1), 'west'): 0.8236280735014627,
|
||||
((0, 2), 'exit'): -9.476652366972639}
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.6670
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.9499
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
3.2562
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.8236
|
||||
illegal
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q6/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q6/2-tinygrid-noisy.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
71
reinforcement/test_cases/q6/2-tinygrid-noisy.test_output
Normal file
71
reinforcement/test_cases/q6/2-tinygrid-noisy.test_output
Normal file
@ -0,0 +1,71 @@
|
||||
==================== Iteration 0 ====================
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
935
reinforcement/test_cases/q6/3-bridge.solution
Normal file
935
reinforcement/test_cases/q6/3-bridge.solution
Normal file
@ -0,0 +1,935 @@
|
||||
weights_k_0: """
|
||||
{((0, 1), 'exit'): 0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): 0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{((0, 1), 'exit'): 0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{((0, 1), 'exit'): 0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): 0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): 0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): 0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): 0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0.0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): -10.0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0.0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{((0, 1), 'exit'): -10.0,
|
||||
((0, 2), 'exit'): -10.0,
|
||||
((0, 3), 'exit'): -10.0,
|
||||
((0, 4), 'exit'): -10.0,
|
||||
((0, 5), 'exit'): -10.0,
|
||||
((1, 0), 'exit'): 0.1,
|
||||
((1, 1), 'east'): 0,
|
||||
((1, 1), 'north'): 0,
|
||||
((1, 1), 'south'): 0,
|
||||
((1, 1), 'west'): 0,
|
||||
((1, 2), 'east'): 0,
|
||||
((1, 2), 'north'): 0,
|
||||
((1, 2), 'south'): 0,
|
||||
((1, 2), 'west'): 0,
|
||||
((1, 3), 'east'): 0,
|
||||
((1, 3), 'north'): 0,
|
||||
((1, 3), 'south'): 0.0,
|
||||
((1, 3), 'west'): 0,
|
||||
((1, 4), 'east'): 0,
|
||||
((1, 4), 'north'): 0,
|
||||
((1, 4), 'south'): 0,
|
||||
((1, 4), 'west'): 0,
|
||||
((1, 5), 'east'): 0,
|
||||
((1, 5), 'north'): 0,
|
||||
((1, 5), 'south'): 0.0,
|
||||
((1, 5), 'west'): 0,
|
||||
((1, 6), 'exit'): 0,
|
||||
((2, 1), 'exit'): 0,
|
||||
((2, 2), 'exit'): 0,
|
||||
((2, 3), 'exit'): -10.0,
|
||||
((2, 4), 'exit'): 0,
|
||||
((2, 5), 'exit'): 0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.1000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
weights_k_500: """
|
||||
{((0, 1), 'exit'): -92.82102012308148,
|
||||
((0, 2), 'exit'): -97.21871610556306,
|
||||
((0, 3), 'exit'): -89.05810108684878,
|
||||
((0, 4), 'exit'): -97.74716004550608,
|
||||
((0, 5), 'exit'): -96.56631617970748,
|
||||
((1, 0), 'exit'): 0.9576088417247839,
|
||||
((1, 1), 'east'): -31.68839649871871,
|
||||
((1, 1), 'north'): -0.5871409700255297,
|
||||
((1, 1), 'south'): -5.571799344704395,
|
||||
((1, 1), 'west'): -20.777007017445538,
|
||||
((1, 2), 'east'): -43.595242197319,
|
||||
((1, 2), 'north'): -1.264202431807023,
|
||||
((1, 2), 'south'): -8.401530599975509,
|
||||
((1, 2), 'west'): -16.283916171605192,
|
||||
((1, 3), 'east'): -3.6956691,
|
||||
((1, 3), 'north'): -0.16712710492783758,
|
||||
((1, 3), 'south'): -3.4722840178579073,
|
||||
((1, 3), 'west'): -40.58867937480968,
|
||||
((1, 4), 'east'): -26.553386621338632,
|
||||
((1, 4), 'north'): -0.799493322153628,
|
||||
((1, 4), 'south'): -6.727671187497919,
|
||||
((1, 4), 'west'): -39.06095135014759,
|
||||
((1, 5), 'east'): -17.067638934181446,
|
||||
((1, 5), 'north'): -5.864753060887024,
|
||||
((1, 5), 'south'): -6.83769420759525,
|
||||
((1, 5), 'west'): -27.062643066307515,
|
||||
((1, 6), 'exit'): 9.353891811077332,
|
||||
((2, 1), 'exit'): -97.21871610556306,
|
||||
((2, 2), 'exit'): -87.84233454094309,
|
||||
((2, 3), 'exit'): -96.90968456173674,
|
||||
((2, 4), 'exit'): -94.185026299696,
|
||||
((2, 5), 'exit'): -96.90968456173674}
|
||||
"""
|
||||
|
||||
q_values_k_500_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -5.8648 illegal
|
||||
illegal -0.7995 illegal
|
||||
illegal -0.1671 illegal
|
||||
illegal -1.2642 illegal
|
||||
illegal -0.5871 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -17.0676 illegal
|
||||
illegal -26.5534 illegal
|
||||
illegal -3.6957 illegal
|
||||
illegal -43.5952 illegal
|
||||
illegal -31.6884 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_exit: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 illegal -96.9097
|
||||
-97.7472 illegal -94.1850
|
||||
-89.0581 illegal -96.9097
|
||||
-97.2187 illegal -87.8423
|
||||
-92.8210 illegal -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -6.8377 illegal
|
||||
illegal -6.7277 illegal
|
||||
illegal -3.4723 illegal
|
||||
illegal -8.4015 illegal
|
||||
illegal -5.5718 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -27.0626 illegal
|
||||
illegal -39.0610 illegal
|
||||
illegal -40.5887 illegal
|
||||
illegal -16.2839 illegal
|
||||
illegal -20.7770 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
27
reinforcement/test_cases/q6/3-bridge.test
Normal file
27
reinforcement/test_cases/q6/3-bridge.test
Normal file
@ -0,0 +1,27 @@
|
||||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
111
reinforcement/test_cases/q6/3-bridge.test_output
Normal file
111
reinforcement/test_cases/q6/3-bridge.test_output
Normal file
@ -0,0 +1,111 @@
|
||||
==================== Iteration 0 ====================
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
1210
reinforcement/test_cases/q6/4-discountgrid.solution
Normal file
1210
reinforcement/test_cases/q6/4-discountgrid.solution
Normal file
File diff suppressed because it is too large
Load Diff
24
reinforcement/test_cases/q6/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q6/4-discountgrid.test
Normal file
@ -0,0 +1,24 @@
|
||||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
91
reinforcement/test_cases/q6/4-discountgrid.test_output
Normal file
91
reinforcement/test_cases/q6/4-discountgrid.test_output
Normal file
@ -0,0 +1,91 @@
|
||||
==================== Iteration 0 ====================
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
880
reinforcement/test_cases/q6/5-coord-extractor.solution
Normal file
880
reinforcement/test_cases/q6/5-coord-extractor.solution
Normal file
@ -0,0 +1,880 @@
|
||||
weights_k_0: """
|
||||
{'action=east': 0,
|
||||
'action=exit': 0,
|
||||
'action=north': 0,
|
||||
'action=south': 0,
|
||||
'action=west': 0,
|
||||
'x=0': 0,
|
||||
'x=1': 0,
|
||||
'x=2': 0,
|
||||
'x=3': 0,
|
||||
'x=4': 0,
|
||||
'y=0': 0,
|
||||
'y=1': 0,
|
||||
'y=2': 0,
|
||||
'y=3': 0,
|
||||
'y=4': 0,
|
||||
(0, 0): 0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_1: """
|
||||
{'action=east': 0,
|
||||
'action=exit': 0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': 0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0,
|
||||
'x=4': 0,
|
||||
'y=0': 0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0,
|
||||
'y=4': 0,
|
||||
(0, 0): 0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_2: """
|
||||
{'action=east': 0,
|
||||
'action=exit': 0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': 0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0,
|
||||
'y=0': 0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0,
|
||||
(0, 0): 0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_3: """
|
||||
{'action=east': 0,
|
||||
'action=exit': -1.0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': -1.0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0,
|
||||
'y=0': -1.0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ illegal illegal
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ __________ illegal
|
||||
-4.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_4: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0,
|
||||
'x=0': -1.0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0,
|
||||
'y=0': -1.0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ illegal illegal
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ __________ illegal
|
||||
-4.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_5: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.0,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -1.0,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -1.0,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): 0,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ illegal illegal
|
||||
-3.0000 illegal -1.0000 illegal illegal
|
||||
-3.0000 illegal __________ __________ illegal
|
||||
-4.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_6: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.7000000000000002,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -1.7000000000000002,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -1.7000000000000002,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-5.1000 illegal -1.7000 illegal illegal
|
||||
-5.8000 illegal __________ illegal illegal
|
||||
-5.1000 illegal -1.7000 illegal illegal
|
||||
-5.1000 illegal __________ __________ illegal
|
||||
-6.1000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_7: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.4300000000000002,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -1.7000000000000002,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0.27,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -1.7000000000000002,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0.27,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): 0,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0.27,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-4.8300 illegal -0.8900 illegal illegal
|
||||
-5.5300 illegal __________ illegal illegal
|
||||
-4.8300 illegal -0.6200 illegal illegal
|
||||
-4.8300 illegal __________ __________ illegal
|
||||
-5.8300 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_8: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.947,
|
||||
'action=north': 0,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -2.217,
|
||||
'x=1': 0.0,
|
||||
'x=2': 0.27,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -2.217,
|
||||
'y=1': 0.0,
|
||||
'y=2': 0.27,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): -0.517,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): 0,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0.27,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-6.3810 illegal -1.4070 illegal illegal
|
||||
-7.0810 illegal __________ illegal illegal
|
||||
-6.3810 illegal -1.1370 illegal illegal
|
||||
-6.8980 illegal __________ __________ illegal
|
||||
-7.3810 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_9: """
|
||||
{'action=east': 0.0,
|
||||
'action=exit': -1.947,
|
||||
'action=north': -0.62082,
|
||||
'action=south': 0.0,
|
||||
'action=west': 0.0,
|
||||
'x=0': -2.217,
|
||||
'x=1': -0.62082,
|
||||
'x=2': 0.27,
|
||||
'x=3': 0.0,
|
||||
'x=4': 0.0,
|
||||
'y=0': -2.217,
|
||||
'y=1': -0.62082,
|
||||
'y=2': 0.27,
|
||||
'y=3': 0.0,
|
||||
'y=4': 0.0,
|
||||
(0, 0): -1.0,
|
||||
(0, 1): -0.517,
|
||||
(0, 2): 0,
|
||||
(0, 3): -0.7000000000000001,
|
||||
(0, 4): 0,
|
||||
(1, 0): 0.0,
|
||||
(1, 1): -0.62082,
|
||||
(1, 2): 0,
|
||||
(1, 3): 0,
|
||||
(1, 4): 0,
|
||||
(2, 0): 0,
|
||||
(2, 2): 0.27,
|
||||
(2, 4): 0,
|
||||
(3, 0): 0.0,
|
||||
(3, 2): 0,
|
||||
(3, 3): 0,
|
||||
(3, 4): 0,
|
||||
(4, 0): 0,
|
||||
(4, 1): 0.0,
|
||||
(4, 2): 0,
|
||||
(4, 3): 0,
|
||||
(4, 4): 0}
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal -1.8625 illegal -0.6208 -0.6208
|
||||
illegal -1.8625 __________ -0.6208 -0.6208
|
||||
illegal -1.8625 illegal -0.6208 -0.6208
|
||||
illegal -2.4833 __________ __________ -0.6208
|
||||
illegal -1.8625 -0.0808 -0.6208 -0.6208
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.2416 __________ 0.0000 0.0000
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.8625 __________ __________ 0.0000
|
||||
illegal -1.2416 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-6.3810 illegal -1.4070 illegal illegal
|
||||
-7.0810 illegal __________ illegal illegal
|
||||
-6.3810 illegal -1.1370 illegal illegal
|
||||
-6.8980 illegal __________ __________ illegal
|
||||
-7.3810 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.2416 __________ 0.0000 0.0000
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.8625 __________ __________ 0.0000
|
||||
illegal -1.2416 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.2416 __________ 0.0000 0.0000
|
||||
illegal -1.2416 illegal 0.0000 0.0000
|
||||
illegal -1.8625 __________ __________ 0.0000
|
||||
illegal -1.2416 0.5400 0.0000 0.0000
|
||||
"""
|
||||
|
||||
weights_k_3000: """
|
||||
{'action=east': 6.719916513522846,
|
||||
'action=exit': -2.2444981376861555,
|
||||
'action=north': 4.568574519923728,
|
||||
'action=south': 3.761510351874819,
|
||||
'action=west': 1.2828606322891556,
|
||||
'x=0': -3.604063955849794,
|
||||
'x=1': 0.6731476152061693,
|
||||
'x=2': 4.000208353074704,
|
||||
'x=3': 5.988311380073477,
|
||||
'x=4': 7.0307604874198235,
|
||||
'y=0': -3.604063955849794,
|
||||
'y=1': 0.6731476152061693,
|
||||
'y=2': 4.000208353074704,
|
||||
'y=3': 5.988311380073477,
|
||||
'y=4': 7.0307604874198235,
|
||||
(0, 0): -0.7073688447583666,
|
||||
(0, 1): -0.7542862401704076,
|
||||
(0, 2): -0.7043014501203066,
|
||||
(0, 3): -0.7433344649617668,
|
||||
(0, 4): -0.6947729558389527,
|
||||
(1, 0): 2.364273811399719,
|
||||
(1, 1): -0.2695405704605499,
|
||||
(1, 2): -0.7105979212702271,
|
||||
(1, 3): -1.4866826750327933,
|
||||
(1, 4): 0.7756949705700219,
|
||||
(2, 0): 2.64064253491107,
|
||||
(2, 2): -3.7381118310263166,
|
||||
(2, 4): 5.097677649189953,
|
||||
(3, 0): 2.505262939441149,
|
||||
(3, 2): 0.27218788923837256,
|
||||
(3, 3): 2.2611084206093195,
|
||||
(3, 4): 0.9497521307846304,
|
||||
(4, 0): 1.7330586015291545,
|
||||
(4, 1): 0.980194046153168,
|
||||
(4, 2): 0.78786289128181,
|
||||
(4, 3): 1.493343270762865,
|
||||
(4, 4): 2.0363016776928333}
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_north: """
|
||||
illegal 6.6906 illegal 17.4949 20.6664
|
||||
illegal 4.4282 __________ 18.8063 20.1234
|
||||
illegal 5.2043 illegal 16.8174 19.4180
|
||||
illegal 5.6453 __________ __________ 19.6103
|
||||
illegal 8.2791 15.2096 19.0505 20.3632
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_east: """
|
||||
illegal 8.8419 illegal 19.6463 22.8177
|
||||
illegal 6.5795 __________ 20.9576 22.2748
|
||||
illegal 7.3556 illegal 18.9687 21.5693
|
||||
illegal 7.7967 __________ __________ 21.7616
|
||||
illegal 10.4305 17.3610 21.2018 22.5145
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_exit: """
|
||||
-10.1474 illegal 10.8536 illegal illegal
|
||||
-10.1960 illegal __________ illegal illegal
|
||||
-10.1569 illegal 2.0178 illegal illegal
|
||||
-10.2069 illegal __________ __________ illegal
|
||||
-10.1600 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_south: """
|
||||
illegal 5.8835 illegal 16.6879 19.8593
|
||||
illegal 3.6211 __________ 17.9992 19.3164
|
||||
illegal 4.3972 illegal 16.0103 18.6109
|
||||
illegal 4.8383 __________ __________ 18.8032
|
||||
illegal 7.4721 14.4026 18.2434 19.5561
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_west: """
|
||||
illegal 3.4049 illegal 14.2092 17.3807
|
||||
illegal 1.1425 __________ 15.5206 16.8377
|
||||
illegal 1.9186 illegal 13.5317 16.1322
|
||||
illegal 2.3596 __________ __________ 16.3246
|
||||
illegal 4.9934 11.9239 15.7647 17.0774
|
||||
"""
|
||||
|
25
reinforcement/test_cases/q6/5-coord-extractor.test
Normal file
25
reinforcement/test_cases/q6/5-coord-extractor.test
Normal file
@ -0,0 +1,25 @@
|
||||
class: "ApproximateQLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
extractor: "CoordinateExtractor"
|
||||
|
91
reinforcement/test_cases/q6/5-coord-extractor.test_output
Normal file
91
reinforcement/test_cases/q6/5-coord-extractor.test_output
Normal file
@ -0,0 +1,91 @@
|
||||
==================== Iteration 0 ====================
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
2
reinforcement/test_cases/q6/CONFIG
Normal file
2
reinforcement/test_cases/q6/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "3"
|
||||
class: "PassAllTestsQuestion"
|
0
reinforcement/test_cases/q7/1-smallGrid.solution
Normal file
0
reinforcement/test_cases/q7/1-smallGrid.solution
Normal file
3
reinforcement/test_cases/q7/1-smallGrid.test
Normal file
3
reinforcement/test_cases/q7/1-smallGrid.test
Normal file
@ -0,0 +1,3 @@
|
||||
class: "DeepQLearningTest"
|
||||
|
||||
layout: "smallGrid"
|
2
reinforcement/test_cases/q7/CONFIG
Normal file
2
reinforcement/test_cases/q7/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "4"
|
||||
class: "PartialCreditQuestion"
|
Reference in New Issue
Block a user