我正在尝试实现值迭代算法。我有一个网格
grid = [[0, 0, 0, +1], [0, "W", 0, -1], [0, 0, 0, 0]]
一个动作列表
actlist = {UP:1, DOWN:2, LEFT:3, RIGHT:4}
和一个奖励函数
reward = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
我写了一个函数T,它返回一个包含3个元组的元组。
def T(i,j,actions):if(i == 0 and j == 0): if(actions == UP): return (i,i,0.8),(i,i,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i+1,j,0.8),(i,j,0.1),(i,j+1,0.1) elif(actions == LEFT): return (i,j,0.8),(i,j,0.1),(i+1,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i,i,0.1),(i+1,j,0.1)elif (i == 0 and j == 1): if(actions == UP): return (i,i,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == LEFT): return (i,j-1,0.8),(i,j,0.1),(i,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)elif(i == 0 and j == 2): if(actions == UP): return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == DOWN): return(i+1,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == LEFT): return (i,j-1,0.8),(i,j,0.1),(i+1,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i,j,0.1),(i+1,j,0.1)elif(i == 0 and j == 3): if(actions == UP): return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1) elif(actions == DOWN): return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1) elif(actions == LEFT): return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1) elif(actions == RIGHT): return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)# 第二行elif (i == 1 and j == 0): if(actions == UP): return (i-1,j,0.8),(i,j,0.1),(i,j,0.1) elif(actions == DOWN): return (i+1,j,0.8),(i,j,0.1),(i,j,0.1) elif(actions == LEFT): return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1) elif(actions == RIGHT): return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1)elif(i == 1 and j ==1): if(actions == UP): return (i,j,0.8),(i,j,0.1),(i,j,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j,0.1),(i,j,0.1) elif(actions == LEFT): return (i,j,0.8),(i,j,0.1),(i,j,0.1) elif(actions == RIGHT): return (i,j,0.8),(i,j,0.1),(i,j,0.1)elif (i == 1 and j == 2): if(actions == UP): return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i+1,j,0.8),(i,j,0.1),(i,j+1,0.1) elif(actions == LEFT): return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i-1,j,0.1),(i+1,j,0.1)elif(i == 1 and j == 3): if(actions == UP): return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1) elif(actions == DOWN): return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1) elif(actions == LEFT): return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1) elif(actions == RIGHT): return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1) # 第三行elif(i == 2 and j == 0): if(actions == UP): return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j,0.1),(i,j+1,1,0.1) elif(actions == LEFT): return (i,j,0.8),(i-1,j,0.1),(i,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)elif (i == 2 and j == 1): if(actions == UP): return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == LEFT): return (i,j-1,0.8),(i,j,0.1),(i,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)elif(i == 2 and j == 2): if(actions == UP): return (i-1,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == LEFT): return (i,j-1,0.8),(i-1,j,0.1),(i,j,1) elif(actions == RIGHT): return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)elif(i == 2 and j == 3): if(actions == UP): return (i-1,j,0.8),(i,j-1,0.1),(i,j,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j-1,0.1),(i,j,0.1) elif(actions == LEFT): return (i,j-1,0.8),(i-1,j,0.1),(i,j,0.1) elif(actions == RIGHT): return (i,j,0.8),(i-1,j,0.1),(i,j,0.1)
这个函数在值迭代函数中被调用:
def value_iteration():U1 = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] while True: U=U1.copy() delta = 0 for i in range(len(grid)): for j in range(len(grid[i])): U1[i][j] = max(sum(p*(R(k,l)+gamma*U[k][l]) for (k,l,p) in T(i,j,a)) for a in actlist) print(i,j,U1[i][j]) delta = max(delta, abs(U1[i][j] - U[i][j])) if delta <= epsilon*(1 - gamma)/gamma: return U
问题是,前两个循环迭代运行正常,输出如下
0 00 10 20 31 01 11 2 1 3
但之后代码停止并报错
ValueError: too many values to unpack (expected 3)
回答:
查看** **中的元组,可能这就是原因
# 第三行elif(i == 2 and j == 0): if(actions == UP): return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j,0.1),**(i,j+1,1,0.1)** elif(actions == LEFT): return (i,j,0.8),(i-1,j,0.1),(i,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)elif (i == 2 and j == 1): if(actions == UP): return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == DOWN): return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1) elif(actions == LEFT): return (i,j-1,0.8),(i,j,0.1),(i,j,0.1) elif(actions == RIGHT): return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)