rein q1
This commit is contained in:
@ -1,132 +0,0 @@
|
|||||||
Values at iteration 0 are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
values_k_0: """
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action south are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action west are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
-10.0000
|
|
||||||
illegal
|
|
||||||
10.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action east are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action north are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Values at iteration 1 are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
values_k_1: """
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
values_k_1: """
|
|
||||||
-10.0000
|
|
||||||
0.0000
|
|
||||||
10.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action south are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
illegal
|
|
||||||
5.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action west are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_exit: """
|
|
||||||
-10.0000
|
|
||||||
illegal
|
|
||||||
10.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action east are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action north are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
illegal
|
|
||||||
-5.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
@ -1,132 +0,0 @@
|
|||||||
Values at iteration 0 are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
values_k_0: """
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action south are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action west are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
-10.0000
|
|
||||||
illegal
|
|
||||||
10.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action east are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action north are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Values at iteration 1 are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
values_k_1: """
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
values_k_1: """
|
|
||||||
-10.0000
|
|
||||||
0.0000
|
|
||||||
10.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action south are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
illegal
|
|
||||||
5.6250
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action west are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_exit: """
|
|
||||||
-10.0000
|
|
||||||
illegal
|
|
||||||
10.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action east are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action north are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
illegal
|
|
||||||
-5.6250
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
@ -1,216 +0,0 @@
|
|||||||
Values at iteration 0 are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
values_k_0: """
|
|
||||||
__________ 0.0000 __________
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
__________ 0.0000 __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action south are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action west are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
__________ 10.0000 __________
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
__________ 1.0000 __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action east are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action north are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Values at iteration 1 are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
values_k_1: """
|
|
||||||
__________ 0.0000 __________
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000
|
|
||||||
__________ 0.0000 __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
values_k_1: """
|
|
||||||
__________ 10.0000 __________
|
|
||||||
-100.0000 0.0000 -100.0000
|
|
||||||
-100.0000 0.0000 -100.0000
|
|
||||||
-100.0000 0.0000 -100.0000
|
|
||||||
-100.0000 0.0000 -100.0000
|
|
||||||
-100.0000 0.0000 -100.0000
|
|
||||||
__________ 1.0000 __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action south are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
illegal -7.7350 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action west are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_west: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_west: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal -76.0750 illegal
|
|
||||||
illegal -76.5000 illegal
|
|
||||||
illegal -76.5000 illegal
|
|
||||||
illegal -76.5000 illegal
|
|
||||||
illegal -76.4575 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_exit: """
|
|
||||||
__________ 10.0000 __________
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
-100.0000 illegal -100.0000
|
|
||||||
__________ 1.0000 __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action east are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_east: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_east: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal -76.0750 illegal
|
|
||||||
illegal -76.5000 illegal
|
|
||||||
illegal -76.5000 illegal
|
|
||||||
illegal -76.5000 illegal
|
|
||||||
illegal -76.4575 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action north are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal -0.8500 illegal
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
illegal -8.5000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
@ -1,182 +0,0 @@
|
|||||||
Values at iteration 0 are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
values_k_0: """
|
|
||||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 __________ 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 __________ __________ 0.0000
|
|
||||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action south are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action west are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
-10.0000 illegal 10.0000 illegal illegal
|
|
||||||
-10.0000 illegal __________ illegal illegal
|
|
||||||
-10.0000 illegal 1.0000 illegal illegal
|
|
||||||
-10.0000 illegal __________ __________ illegal
|
|
||||||
-10.0000 illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action east are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action north are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Values at iteration 1 are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
values_k_1: """
|
|
||||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 __________ 0.0000 0.0000
|
|
||||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
|
||||||
0.0000 0.0000 __________ __________ 0.0000
|
|
||||||
0.0000 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
values_k_1: """
|
|
||||||
-10.0000 0.0000 10.0000 0.0000 0.0000
|
|
||||||
-10.0000 0.0000 __________ 0.0000 0.0000
|
|
||||||
-10.0000 0.0000 1.0000 0.0000 0.0000
|
|
||||||
-10.0000 0.0000 __________ __________ 0.0000
|
|
||||||
-10.0000 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action south are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_south: """
|
|
||||||
illegal 0.0000 illegal 0.9000 0.0000
|
|
||||||
illegal -0.9000 __________ 0.0000 0.0000
|
|
||||||
illegal -0.8100 illegal 0.0900 0.0000
|
|
||||||
illegal -0.9000 __________ __________ 0.0000
|
|
||||||
illegal -0.9000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action west are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_west: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_west: """
|
|
||||||
illegal -7.2000 illegal 7.2000 0.0000
|
|
||||||
illegal -7.2000 __________ 0.0000 0.0000
|
|
||||||
illegal -7.2000 illegal 0.7200 0.0000
|
|
||||||
illegal -7.2000 __________ __________ 0.0000
|
|
||||||
illegal -7.2000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action exit are correct.
|
|
||||||
Student/correct solution:
|
|
||||||
q_values_k_1_action_exit: """
|
|
||||||
-10.0000 illegal 10.0000 illegal illegal
|
|
||||||
-10.0000 illegal __________ illegal illegal
|
|
||||||
-10.0000 illegal 1.0000 illegal illegal
|
|
||||||
-10.0000 illegal __________ __________ illegal
|
|
||||||
-10.0000 illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action east are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_east: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_east: """
|
|
||||||
illegal 7.2000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.7200 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Q-Values at iteration 1 for action north are NOT correct.
|
|
||||||
Student solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_1_action_north: """
|
|
||||||
illegal 0.0000 illegal 0.9000 0.0000
|
|
||||||
illegal -0.9000 __________ 0.0000 0.0000
|
|
||||||
illegal -0.8100 illegal 0.0900 0.0000
|
|
||||||
illegal -0.9000 __________ __________ 0.0000
|
|
||||||
illegal -0.9000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
@ -65,6 +65,35 @@ class ValueIterationAgent(ValueEstimationAgent):
|
|||||||
value iteration, V_k+1(...) depends on V_k(...)'s.
|
value iteration, V_k+1(...) depends on V_k(...)'s.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
# Write value iteration code here
|
||||||
|
# Hints:
|
||||||
|
# - After each iteration, store the new values in self.values
|
||||||
|
# - When updating a value, use self.values[state] = <new value>
|
||||||
|
# - You will need to copy the state values into a separate dictionary
|
||||||
|
# to avoid changing values before computing the update.
|
||||||
|
# - The difference between the new value and the old value (|V_k+1(s) - V_k(s)|)
|
||||||
|
# should be less than self.epsilon for all states s
|
||||||
|
# - Make sure to use the discount factor self.discount
|
||||||
|
# - Make sure to use the bellman equations to update the state values
|
||||||
|
# - The number of iterations is given by self.iterations
|
||||||
|
# - You may use the util.Counter() class
|
||||||
|
# - You may also use the self.mdp.getTransitionStatesAndProbs(state, action) method
|
||||||
|
# - You may also use the self.mdp.getReward(state, action, nextState) method
|
||||||
|
# - You may also use the self.mdp.getPossibleActions(state) method
|
||||||
|
# - You may also use the self.mdp.isTerminal(state) method
|
||||||
|
|
||||||
|
for i in range(self.iterations):
|
||||||
|
newValues = util.Counter()
|
||||||
|
for state in self.mdp.getStates():
|
||||||
|
if self.mdp.isTerminal(state):
|
||||||
|
newValues[state] = 0
|
||||||
|
else:
|
||||||
|
maxQValue = float("-inf")
|
||||||
|
for action in self.mdp.getPossibleActions(state):
|
||||||
|
qValue = self.computeQValueFromValues(state, action)
|
||||||
|
maxQValue = max(maxQValue, qValue)
|
||||||
|
newValues[state] = maxQValue
|
||||||
|
self.values = newValues
|
||||||
|
|
||||||
|
|
||||||
def getValue(self, state):
|
def getValue(self, state):
|
||||||
@ -79,6 +108,11 @@ class ValueIterationAgent(ValueEstimationAgent):
|
|||||||
value function stored in self.values.
|
value function stored in self.values.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
qValue = 0
|
||||||
|
for nextState, prob in self.mdp.getTransitionStatesAndProbs(state, action):
|
||||||
|
reward = self.mdp.getReward(state, action, nextState)
|
||||||
|
qValue += prob * (reward + self.discount * self.values[nextState])
|
||||||
|
return qValue
|
||||||
|
|
||||||
|
|
||||||
def computeActionFromValues(self, state):
|
def computeActionFromValues(self, state):
|
||||||
@ -91,6 +125,14 @@ class ValueIterationAgent(ValueEstimationAgent):
|
|||||||
terminal state, you should return None.
|
terminal state, you should return None.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
bestAction = None
|
||||||
|
bestQValue = float("-inf")
|
||||||
|
for action in self.mdp.getPossibleActions(state):
|
||||||
|
qValue = self.computeQValueFromValues(state, action)
|
||||||
|
if qValue > bestQValue:
|
||||||
|
bestQValue = qValue
|
||||||
|
bestAction = action
|
||||||
|
return bestAction
|
||||||
|
|
||||||
|
|
||||||
def getPolicy(self, state):
|
def getPolicy(self, state):
|
||||||
|
Reference in New Issue
Block a user