enter reinforcement
This commit is contained in:
342
reinforcement/test_cases/q3/1-tinygrid.solution
Normal file
342
reinforcement/test_cases/q3/1-tinygrid.solution
Normal file
@ -0,0 +1,342 @@
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.1720
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.4534
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.4063
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
2.1267
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.3919
|
||||
illegal
|
||||
"""
|
||||
|
||||
values: """
|
||||
-9.4767
|
||||
2.1267
|
||||
9.8175
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q3/1-tinygrid.test
Normal file
22
reinforcement/test_cases/q3/1-tinygrid.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.5"
|
||||
noise: "0.0"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
70
reinforcement/test_cases/q3/1-tinygrid.test_output
Normal file
70
reinforcement/test_cases/q3/1-tinygrid.test_output
Normal file
@ -0,0 +1,70 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
342
reinforcement/test_cases/q3/2-tinygrid-noisy.solution
Normal file
342
reinforcement/test_cases/q3/2-tinygrid-noisy.solution
Normal file
@ -0,0 +1,342 @@
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
1.9000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
2.7100
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
3.4390
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.0951
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
-1.0000
|
||||
illegal
|
||||
4.6856
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal
|
||||
0.2579
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_north: """
|
||||
illegal
|
||||
-0.6670
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_east: """
|
||||
illegal
|
||||
0.9499
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_exit: """
|
||||
-9.4767
|
||||
illegal
|
||||
9.8175
|
||||
"""
|
||||
|
||||
q_values_k_100_action_south: """
|
||||
illegal
|
||||
3.2562
|
||||
illegal
|
||||
"""
|
||||
|
||||
q_values_k_100_action_west: """
|
||||
illegal
|
||||
0.8236
|
||||
illegal
|
||||
"""
|
||||
|
||||
values: """
|
||||
-9.4767
|
||||
3.2562
|
||||
9.8175
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit
|
||||
south
|
||||
exit
|
||||
"""
|
||||
|
22
reinforcement/test_cases/q3/2-tinygrid-noisy.test
Normal file
22
reinforcement/test_cases/q3/2-tinygrid-noisy.test
Normal file
@ -0,0 +1,22 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10
|
||||
S
|
||||
10
|
||||
"""
|
||||
discount: "0.75"
|
||||
noise: "0.25"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "100"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
70
reinforcement/test_cases/q3/2-tinygrid-noisy.test_output
Normal file
70
reinforcement/test_cases/q3/2-tinygrid-noisy.test_output
Normal file
@ -0,0 +1,70 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000
|
||||
illegal
|
||||
0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
illegal
|
||||
illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal
|
||||
0.0000
|
||||
illegal
|
||||
"""
|
||||
|
570
reinforcement/test_cases/q3/3-bridge.solution
Normal file
570
reinforcement/test_cases/q3/3-bridge.solution
Normal file
@ -0,0 +1,570 @@
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
0.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
0.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal -10.0000
|
||||
-10.0000 illegal 0.0000
|
||||
-10.0000 illegal 0.0000
|
||||
__________ 0.1000 __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_north: """
|
||||
__________ illegal __________
|
||||
illegal -5.8648 illegal
|
||||
illegal -0.7995 illegal
|
||||
illegal -0.1671 illegal
|
||||
illegal -1.2642 illegal
|
||||
illegal -0.5871 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_east: """
|
||||
__________ illegal __________
|
||||
illegal -17.0676 illegal
|
||||
illegal -26.5534 illegal
|
||||
illegal -3.6957 illegal
|
||||
illegal -43.5952 illegal
|
||||
illegal -31.6884 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_exit: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 illegal -96.9097
|
||||
-97.7472 illegal -94.1850
|
||||
-89.0581 illegal -96.9097
|
||||
-97.2187 illegal -87.8423
|
||||
-92.8210 illegal -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_south: """
|
||||
__________ illegal __________
|
||||
illegal -6.8377 illegal
|
||||
illegal -6.7277 illegal
|
||||
illegal -3.4723 illegal
|
||||
illegal -8.4015 illegal
|
||||
illegal -5.5718 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
q_values_k_500_action_west: """
|
||||
__________ illegal __________
|
||||
illegal -27.0626 illegal
|
||||
illegal -39.0610 illegal
|
||||
illegal -40.5887 illegal
|
||||
illegal -16.2839 illegal
|
||||
illegal -20.7770 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
values: """
|
||||
__________ 9.3539 __________
|
||||
-96.5663 -5.8648 -96.9097
|
||||
-97.7472 -0.7995 -94.1850
|
||||
-89.0581 -0.1671 -96.9097
|
||||
-97.2187 -1.2642 -87.8423
|
||||
-92.8210 -0.5871 -97.2187
|
||||
__________ 0.9576 __________
|
||||
"""
|
||||
|
||||
policy: """
|
||||
__________ exit __________
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
exit north exit
|
||||
__________ exit __________
|
||||
"""
|
||||
|
27
reinforcement/test_cases/q3/3-bridge.test
Normal file
27
reinforcement/test_cases/q3/3-bridge.test
Normal file
@ -0,0 +1,27 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
# 10 #
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 _ -100
|
||||
-100 S -100
|
||||
# 1 #
|
||||
"""
|
||||
gridName: "bridgeGrid"
|
||||
discount: "0.85"
|
||||
noise: "0.1"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.5"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "500"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
110
reinforcement/test_cases/q3/3-bridge.test_output
Normal file
110
reinforcement/test_cases/q3/3-bridge.test_output
Normal file
@ -0,0 +1,110 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
__________ 0.0000 __________
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
0.0000 illegal 0.0000
|
||||
__________ 0.0000 __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
illegal illegal illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
__________ illegal __________
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
illegal 0.0000 illegal
|
||||
__________ illegal __________
|
||||
"""
|
||||
|
456
reinforcement/test_cases/q3/4-discountgrid.solution
Normal file
456
reinforcement/test_cases/q3/4-discountgrid.solution
Normal file
@ -0,0 +1,456 @@
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_1_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_1_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_2_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_2_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_4_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_4_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_5_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_5_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_6_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_6_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_7_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_7_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
-1.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_8_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_8_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal -0.0900 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
-1.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.1000 illegal illegal
|
||||
-1.0000 illegal __________ __________ illegal
|
||||
-1.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_9_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_9_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_north: """
|
||||
illegal 4.3205 illegal 6.1517 3.8095
|
||||
illegal 4.4238 __________ 5.2284 3.5129
|
||||
illegal 1.0694 illegal 3.6867 2.0418
|
||||
illegal 0.3423 __________ __________ 1.0655
|
||||
illegal 0.0073 0.0079 0.0484 0.3768
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_east: """
|
||||
illegal 8.0584 illegal 3.7245 3.3947
|
||||
illegal 2.0499 __________ 3.2373 2.1742
|
||||
illegal 0.8687 illegal 1.7398 1.2671
|
||||
illegal 0.2927 __________ __________ 0.6669
|
||||
illegal 0.0239 0.0097 0.1611 0.2051
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_exit: """
|
||||
-10.0000 illegal 10.0000 illegal illegal
|
||||
-10.0000 illegal __________ illegal illegal
|
||||
-10.0000 illegal 1.0000 illegal illegal
|
||||
-10.0000 illegal __________ __________ illegal
|
||||
-9.9999 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_south: """
|
||||
illegal -0.3521 illegal 3.6948 2.9139
|
||||
illegal -0.5605 __________ 2.1346 1.5674
|
||||
illegal 0.2093 illegal 1.5389 0.5521
|
||||
illegal -0.5505 __________ __________ 0.1006
|
||||
illegal -1.8501 0.0060 0.0514 0.1223
|
||||
"""
|
||||
|
||||
q_values_k_3000_action_west: """
|
||||
illegal -6.2001 illegal 7.5146 4.9014
|
||||
illegal -5.4013 __________ 4.0484 3.4126
|
||||
illegal -8.0399 illegal 0.9653 1.6081
|
||||
illegal -7.4767 __________ __________ 0.3934
|
||||
illegal -6.3432 0.0179 0.0188 0.1028
|
||||
"""
|
||||
|
||||
values: """
|
||||
-10.0000 8.0584 10.0000 7.5146 4.9014
|
||||
-10.0000 4.4238 __________ 5.2284 3.5129
|
||||
-10.0000 1.0694 1.0000 3.6867 2.0418
|
||||
-10.0000 0.3423 __________ __________ 1.0655
|
||||
-9.9999 0.0239 0.0179 0.1611 0.3768
|
||||
"""
|
||||
|
||||
policy: """
|
||||
exit east exit west west
|
||||
exit north __________ north north
|
||||
exit north exit north north
|
||||
exit north __________ __________ north
|
||||
exit east west east north
|
||||
"""
|
||||
|
24
reinforcement/test_cases/q3/4-discountgrid.test
Normal file
24
reinforcement/test_cases/q3/4-discountgrid.test
Normal file
@ -0,0 +1,24 @@
|
||||
class: "QLearningTest"
|
||||
|
||||
# GridWorld specification
|
||||
# _ is empty space
|
||||
# numbers are terminal states with that value
|
||||
# # is a wall
|
||||
# S is a start state
|
||||
#
|
||||
grid: """
|
||||
-10 _ 10 _ _
|
||||
-10 _ # _ _
|
||||
-10 _ 1 _ _
|
||||
-10 _ # # _
|
||||
-10 S _ _ _
|
||||
"""
|
||||
discount: "0.9"
|
||||
noise: "0.2"
|
||||
livingReward: "0.0"
|
||||
epsilon: "0.2"
|
||||
learningRate: "0.1"
|
||||
numExperiences: "3000"
|
||||
valueIterations: "100"
|
||||
iterations: "10000"
|
||||
|
90
reinforcement/test_cases/q3/4-discountgrid.test_output
Normal file
90
reinforcement/test_cases/q3/4-discountgrid.test_output
Normal file
@ -0,0 +1,90 @@
|
||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_south: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_west: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
||||
q_values_k_0_action_exit: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_exit: """
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ illegal illegal
|
||||
0.0000 illegal 0.0000 illegal illegal
|
||||
0.0000 illegal __________ __________ illegal
|
||||
0.0000 illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_east: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ illegal illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
illegal illegal __________ __________ illegal
|
||||
illegal illegal illegal illegal illegal
|
||||
"""
|
||||
|
||||
Correct solution:
|
||||
q_values_k_0_action_north: """
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ 0.0000 0.0000
|
||||
illegal 0.0000 illegal 0.0000 0.0000
|
||||
illegal 0.0000 __________ __________ 0.0000
|
||||
illegal 0.0000 0.0000 0.0000 0.0000
|
||||
"""
|
||||
|
2
reinforcement/test_cases/q3/CONFIG
Normal file
2
reinforcement/test_cases/q3/CONFIG
Normal file
@ -0,0 +1,2 @@
|
||||
max_points: "5"
|
||||
class: "PassAllTestsQuestion"
|
Reference in New Issue
Block a user