enter reinforcement

This commit is contained in:
2024-07-06 01:30:00 +08:00
parent f105ba0150
commit e3f8181056
116 changed files with 19698 additions and 0 deletions

View File

@ -0,0 +1,342 @@
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
q_values_k_1_action_north: """
illegal
0.0000
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_1_action_south: """
illegal
0.0000
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
q_values_k_2_action_north: """
illegal
0.0000
illegal
"""
q_values_k_2_action_east: """
illegal
0.0000
illegal
"""
q_values_k_2_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_2_action_south: """
illegal
0.0000
illegal
"""
q_values_k_2_action_west: """
illegal
0.0000
illegal
"""
q_values_k_3_action_north: """
illegal
0.0000
illegal
"""
q_values_k_3_action_east: """
illegal
0.0000
illegal
"""
q_values_k_3_action_exit: """
0.0000
illegal
1.9000
"""
q_values_k_3_action_south: """
illegal
0.0000
illegal
"""
q_values_k_3_action_west: """
illegal
0.0000
illegal
"""
q_values_k_4_action_north: """
illegal
0.0000
illegal
"""
q_values_k_4_action_east: """
illegal
0.0000
illegal
"""
q_values_k_4_action_exit: """
0.0000
illegal
2.7100
"""
q_values_k_4_action_south: """
illegal
0.0000
illegal
"""
q_values_k_4_action_west: """
illegal
0.0000
illegal
"""
q_values_k_5_action_north: """
illegal
0.0000
illegal
"""
q_values_k_5_action_east: """
illegal
0.0000
illegal
"""
q_values_k_5_action_exit: """
-1.0000
illegal
2.7100
"""
q_values_k_5_action_south: """
illegal
0.0000
illegal
"""
q_values_k_5_action_west: """
illegal
0.0000
illegal
"""
q_values_k_6_action_north: """
illegal
0.0000
illegal
"""
q_values_k_6_action_east: """
illegal
0.0000
illegal
"""
q_values_k_6_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_6_action_south: """
illegal
0.0000
illegal
"""
q_values_k_6_action_west: """
illegal
0.0000
illegal
"""
q_values_k_7_action_north: """
illegal
0.0000
illegal
"""
q_values_k_7_action_east: """
illegal
0.0000
illegal
"""
q_values_k_7_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_7_action_south: """
illegal
0.1720
illegal
"""
q_values_k_7_action_west: """
illegal
0.0000
illegal
"""
q_values_k_8_action_north: """
illegal
0.0000
illegal
"""
q_values_k_8_action_east: """
illegal
0.0000
illegal
"""
q_values_k_8_action_exit: """
-1.0000
illegal
4.0951
"""
q_values_k_8_action_south: """
illegal
0.1720
illegal
"""
q_values_k_8_action_west: """
illegal
0.0000
illegal
"""
q_values_k_9_action_north: """
illegal
0.0000
illegal
"""
q_values_k_9_action_east: """
illegal
0.0000
illegal
"""
q_values_k_9_action_exit: """
-1.0000
illegal
4.6856
"""
q_values_k_9_action_south: """
illegal
0.1720
illegal
"""
q_values_k_9_action_west: """
illegal
0.0000
illegal
"""
q_values_k_100_action_north: """
illegal
-0.4534
illegal
"""
q_values_k_100_action_east: """
illegal
0.4063
illegal
"""
q_values_k_100_action_exit: """
-9.4767
illegal
9.8175
"""
q_values_k_100_action_south: """
illegal
2.1267
illegal
"""
q_values_k_100_action_west: """
illegal
0.3919
illegal
"""
values: """
-9.4767
2.1267
9.8175
"""
policy: """
exit
south
exit
"""

View File

@ -0,0 +1,22 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.5"
noise: "0.0"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,70 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""

View File

@ -0,0 +1,342 @@
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
q_values_k_1_action_north: """
illegal
0.0000
illegal
"""
q_values_k_1_action_east: """
illegal
0.0000
illegal
"""
q_values_k_1_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_1_action_south: """
illegal
0.0000
illegal
"""
q_values_k_1_action_west: """
illegal
0.0000
illegal
"""
q_values_k_2_action_north: """
illegal
0.0000
illegal
"""
q_values_k_2_action_east: """
illegal
0.0000
illegal
"""
q_values_k_2_action_exit: """
0.0000
illegal
1.0000
"""
q_values_k_2_action_south: """
illegal
0.0000
illegal
"""
q_values_k_2_action_west: """
illegal
0.0000
illegal
"""
q_values_k_3_action_north: """
illegal
0.0000
illegal
"""
q_values_k_3_action_east: """
illegal
0.0000
illegal
"""
q_values_k_3_action_exit: """
0.0000
illegal
1.9000
"""
q_values_k_3_action_south: """
illegal
0.0000
illegal
"""
q_values_k_3_action_west: """
illegal
0.0000
illegal
"""
q_values_k_4_action_north: """
illegal
0.0000
illegal
"""
q_values_k_4_action_east: """
illegal
0.0000
illegal
"""
q_values_k_4_action_exit: """
0.0000
illegal
2.7100
"""
q_values_k_4_action_south: """
illegal
0.0000
illegal
"""
q_values_k_4_action_west: """
illegal
0.0000
illegal
"""
q_values_k_5_action_north: """
illegal
0.0000
illegal
"""
q_values_k_5_action_east: """
illegal
0.0000
illegal
"""
q_values_k_5_action_exit: """
-1.0000
illegal
2.7100
"""
q_values_k_5_action_south: """
illegal
0.0000
illegal
"""
q_values_k_5_action_west: """
illegal
0.0000
illegal
"""
q_values_k_6_action_north: """
illegal
0.0000
illegal
"""
q_values_k_6_action_east: """
illegal
0.0000
illegal
"""
q_values_k_6_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_6_action_south: """
illegal
0.0000
illegal
"""
q_values_k_6_action_west: """
illegal
0.0000
illegal
"""
q_values_k_7_action_north: """
illegal
0.0000
illegal
"""
q_values_k_7_action_east: """
illegal
0.0000
illegal
"""
q_values_k_7_action_exit: """
-1.0000
illegal
3.4390
"""
q_values_k_7_action_south: """
illegal
0.2579
illegal
"""
q_values_k_7_action_west: """
illegal
0.0000
illegal
"""
q_values_k_8_action_north: """
illegal
0.0000
illegal
"""
q_values_k_8_action_east: """
illegal
0.0000
illegal
"""
q_values_k_8_action_exit: """
-1.0000
illegal
4.0951
"""
q_values_k_8_action_south: """
illegal
0.2579
illegal
"""
q_values_k_8_action_west: """
illegal
0.0000
illegal
"""
q_values_k_9_action_north: """
illegal
0.0000
illegal
"""
q_values_k_9_action_east: """
illegal
0.0000
illegal
"""
q_values_k_9_action_exit: """
-1.0000
illegal
4.6856
"""
q_values_k_9_action_south: """
illegal
0.2579
illegal
"""
q_values_k_9_action_west: """
illegal
0.0000
illegal
"""
q_values_k_100_action_north: """
illegal
-0.6670
illegal
"""
q_values_k_100_action_east: """
illegal
0.9499
illegal
"""
q_values_k_100_action_exit: """
-9.4767
illegal
9.8175
"""
q_values_k_100_action_south: """
illegal
3.2562
illegal
"""
q_values_k_100_action_west: """
illegal
0.8236
illegal
"""
values: """
-9.4767
3.2562
9.8175
"""
policy: """
exit
south
exit
"""

View File

@ -0,0 +1,22 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.75"
noise: "0.25"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,70 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""

View File

@ -0,0 +1,570 @@
q_values_k_0_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_exit: """
__________ 0.0000 __________
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_0_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_0_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_1_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_1_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_2_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_2_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_3_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_3_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_4_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_4_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
0.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_5_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_5_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_6_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_6_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
0.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_7_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_7_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.0000 __________
"""
q_values_k_8_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_8_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_exit: """
__________ 0.0000 __________
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
-10.0000 illegal -10.0000
-10.0000 illegal 0.0000
-10.0000 illegal 0.0000
__________ 0.1000 __________
"""
q_values_k_9_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_9_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
q_values_k_500_action_north: """
__________ illegal __________
illegal -5.8648 illegal
illegal -0.7995 illegal
illegal -0.1671 illegal
illegal -1.2642 illegal
illegal -0.5871 illegal
__________ illegal __________
"""
q_values_k_500_action_east: """
__________ illegal __________
illegal -17.0676 illegal
illegal -26.5534 illegal
illegal -3.6957 illegal
illegal -43.5952 illegal
illegal -31.6884 illegal
__________ illegal __________
"""
q_values_k_500_action_exit: """
__________ 9.3539 __________
-96.5663 illegal -96.9097
-97.7472 illegal -94.1850
-89.0581 illegal -96.9097
-97.2187 illegal -87.8423
-92.8210 illegal -97.2187
__________ 0.9576 __________
"""
q_values_k_500_action_south: """
__________ illegal __________
illegal -6.8377 illegal
illegal -6.7277 illegal
illegal -3.4723 illegal
illegal -8.4015 illegal
illegal -5.5718 illegal
__________ illegal __________
"""
q_values_k_500_action_west: """
__________ illegal __________
illegal -27.0626 illegal
illegal -39.0610 illegal
illegal -40.5887 illegal
illegal -16.2839 illegal
illegal -20.7770 illegal
__________ illegal __________
"""
values: """
__________ 9.3539 __________
-96.5663 -5.8648 -96.9097
-97.7472 -0.7995 -94.1850
-89.0581 -0.1671 -96.9097
-97.2187 -1.2642 -87.8423
-92.8210 -0.5871 -97.2187
__________ 0.9576 __________
"""
policy: """
__________ exit __________
exit north exit
exit north exit
exit north exit
exit north exit
exit north exit
__________ exit __________
"""

View File

@ -0,0 +1,27 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
# 10 #
-100 _ -100
-100 _ -100
-100 _ -100
-100 _ -100
-100 S -100
# 1 #
"""
gridName: "bridgeGrid"
discount: "0.85"
noise: "0.1"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "500"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,110 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_exit: """
__________ 0.0000 __________
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""

View File

@ -0,0 +1,456 @@
q_values_k_0_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_0_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_0_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_1_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_1_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
q_values_k_2_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_2_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_3_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_4_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_4_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_5_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_5_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_6_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_6_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
0.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_7_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_7_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
-1.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_8_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_8_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal -0.0900 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_exit: """
0.0000 illegal 0.0000 illegal illegal
-1.0000 illegal __________ illegal illegal
0.0000 illegal 0.1000 illegal illegal
-1.0000 illegal __________ __________ illegal
-1.0000 illegal illegal illegal illegal
"""
q_values_k_9_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_9_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
q_values_k_3000_action_north: """
illegal 4.3205 illegal 6.1517 3.8095
illegal 4.4238 __________ 5.2284 3.5129
illegal 1.0694 illegal 3.6867 2.0418
illegal 0.3423 __________ __________ 1.0655
illegal 0.0073 0.0079 0.0484 0.3768
"""
q_values_k_3000_action_east: """
illegal 8.0584 illegal 3.7245 3.3947
illegal 2.0499 __________ 3.2373 2.1742
illegal 0.8687 illegal 1.7398 1.2671
illegal 0.2927 __________ __________ 0.6669
illegal 0.0239 0.0097 0.1611 0.2051
"""
q_values_k_3000_action_exit: """
-10.0000 illegal 10.0000 illegal illegal
-10.0000 illegal __________ illegal illegal
-10.0000 illegal 1.0000 illegal illegal
-10.0000 illegal __________ __________ illegal
-9.9999 illegal illegal illegal illegal
"""
q_values_k_3000_action_south: """
illegal -0.3521 illegal 3.6948 2.9139
illegal -0.5605 __________ 2.1346 1.5674
illegal 0.2093 illegal 1.5389 0.5521
illegal -0.5505 __________ __________ 0.1006
illegal -1.8501 0.0060 0.0514 0.1223
"""
q_values_k_3000_action_west: """
illegal -6.2001 illegal 7.5146 4.9014
illegal -5.4013 __________ 4.0484 3.4126
illegal -8.0399 illegal 0.9653 1.6081
illegal -7.4767 __________ __________ 0.3934
illegal -6.3432 0.0179 0.0188 0.1028
"""
values: """
-10.0000 8.0584 10.0000 7.5146 4.9014
-10.0000 4.4238 __________ 5.2284 3.5129
-10.0000 1.0694 1.0000 3.6867 2.0418
-10.0000 0.3423 __________ __________ 1.0655
-9.9999 0.0239 0.0179 0.1611 0.3768
"""
policy: """
exit east exit west west
exit north __________ north north
exit north exit north north
exit north __________ __________ north
exit east west east north
"""

View File

@ -0,0 +1,24 @@
class: "QLearningTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10 _ 10 _ _
-10 _ # _ _
-10 _ 1 _ _
-10 _ # # _
-10 S _ _ _
"""
discount: "0.9"
noise: "0.2"
livingReward: "0.0"
epsilon: "0.2"
learningRate: "0.1"
numExperiences: "3000"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,90 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""

View File

@ -0,0 +1,2 @@
max_points: "5"
class: "PassAllTestsQuestion"