enter reinforcement

This commit is contained in:
2024-07-06 01:30:00 +08:00
parent f105ba0150
commit e3f8181056
116 changed files with 19698 additions and 0 deletions

View File

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q7/1-tinygrid.test.
# File intentionally blank.

View File

@ -0,0 +1,22 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.5"
noise: "0.0"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q7/2-tinygrid-noisy.test.
# File intentionally blank.

View File

@ -0,0 +1,22 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10
S
10
"""
discount: "0.75"
noise: "0.25"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "100"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q7/3-bridge.test.
# File intentionally blank.

View File

@ -0,0 +1,27 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
# 10 #
-100 _ -100
-100 _ -100
-100 _ -100
-100 _ -100
-100 S -100
# 1 #
"""
gridName: "bridgeGrid"
discount: "0.85"
noise: "0.1"
livingReward: "0.0"
epsilon: "0.5"
learningRate: "0.1"
numExperiences: "500"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,2 @@
# This is the solution file for test_cases/q7/4-discountgrid.test.
# File intentionally blank.

View File

@ -0,0 +1,24 @@
class: "EpsilonGreedyTest"
# GridWorld specification
# _ is empty space
# numbers are terminal states with that value
# # is a wall
# S is a start state
#
grid: """
-10 _ 10 _ _
-10 _ # _ _
-10 _ 1 _ _
-10 _ # # _
-10 S _ _ _
"""
discount: "0.9"
noise: "0.2"
livingReward: "0.0"
epsilon: "0.2"
learningRate: "0.1"
numExperiences: "3000"
valueIterations: "100"
iterations: "10000"

View File

@ -0,0 +1,2 @@
max_points: "2"
class: "PassAllTestsQuestion"