我需要为一个机器人代理找到一种算法来执行以下操作(抱歉,我不知道该如何称呼它):
-
机器人位于一个10×10的网格上,网格中有障碍物(每个方格要么是障碍物,要么可以通行)
-
机器人有一个碰撞传感器:当机器人撞到障碍物时会激活
-
网格上有胡萝卜,这些胡萝卜在持续生长。有快速生长的方格和慢速生长的方格
-
每一步,机器人可以:前进或向右或向左转90°或停留在原地
-
胡萝卜和障碍物的位置事先不知道
-
在机器人移动时(即使在收割后),胡萝卜继续生长
-
胡萝卜在大多数非障碍物方格中生长
-
机器人不知道方格是快速生长还是慢速生长
-
每个方格中可以有0到20个胡萝卜。在每个时间实例中,方格中胡萝卜数量增加的概率为p = 0.01(或对于快速生长的方格为p = 0.02)
-
你可以测量你收割的胡萝卜数量
目标是在2000步内获得最大数量的胡萝卜
有没有一种懒惰/简单的实现方法?
到目前为止,我有点迷失了,因为这不是一个迷宫求解问题。这是否可以被视为一种泛洪填充算法?有没有更简单的办法?
我并不一定在寻找“解决”这个问题的方法,而是希望找到一个简单的近似方法,如果可能的话
回答:
确实,要找到一个机器人实现,它在不知道食物来源位置和数量的情况下拥有完美的策略,这需要一些工作
任何给定的机器人策略在每次运行中可能都无法获得最大可能的收获。因此,问题在于,哪种策略在多次模拟运行中最成功
为了找到给定方格类型统计分布(P(fastFood),P(slowFood),P(obstacle))的合理策略,可以考虑以下想法:
让Bot(npatch)是一个寻找npatch个食物点的机器人。其策略是在寻找第二个食物点之前先吃完第一个食物点,以此类推。当它访问了npatch个食物源(或找不到更多食物点)时,它会返回到找到的第一个食物点并重新收割
现在,这类机器人(Bot(npatch))可以在统计上相关的多次模拟运行中相互竞争。最佳机器人将是比赛的赢家
这种方法可以被认为是受遗传算法启发,但没有混合任何基因,而是简单地迭代所有基因(1..npatch)。也许有人有想法如何将这个想法转变为完全的遗传算法。这可能涉及转变为Bot(npatch,searchStrategy),然后,应用遗传算法的多个基因
每当模拟参数发生变化时,显然需要重复比赛,因为根据世界中食物点的数量,可能值得或不值得去寻找另一个食物点,如果已经知道了一些食物点的话
下面的代码是用F#编写的,是那个问题的模拟器(如果我正确理解了所有要求的话…)。编写一个新的机器人就像编写一个函数一样简单,然后将该函数传递给模拟器
这可以被视为我为那些想尝试自己机器人的人准备的复活节彩蛋
我编写的两个机器人被称为“marvinRobot”,它做了Marvin会做的事情,以及“lazyRobot”,一个在找到的第一个食物源上扎营的机器人
type Square = | Empty | Obstacle | Food of float * (float -> float) // available * growth | Unknownlet rnd = new System.Random()let grow p a = let r = rnd.NextDouble() if r < p then a + 1.0 else alet slowGrowth a = grow 0.01 alet fastGrowth a = grow 0.02 alet eatPerTick = 1.0 let maxFoodPerSquare = 20.0let randomPick values = let count = List.length values let r = rnd.Next(0,count-1) values.Item(r)type World = Square[,]let randomSquare pobstacle pfood = let r = rnd.NextDouble() match r with | x1 when x1 < pobstacle -> Obstacle | x2 when x2 < (pobstacle + pfood) && x2 >= pobstacle -> Food(rnd.NextDouble() * maxFoodPerSquare, randomPick [slowGrowth; fastGrowth]) | _ -> Emptylet createRandomWorld n pobstacle pfood = Array2D.init n n (fun col row -> randomSquare pobstacle pfood)let createUnknownWorld n = Array2D.create n n Unknowntype Position = { Column : int; Row : int }type RoboState = { Memory : Square[,]; Pos : Position; Heading : Position }type RoboAction = | TurnRight | TurnLeft | MoveOne | Eat | Idletype RoboActor = World -> RoboState -> RoboActionlet right heading : Position = match heading with | { Column = 0; Row = 1 } -> { Column = -1; Row = 0 } | { Column = -1; Row = 0 } -> { Column = 0; Row = -1 } | { Column = 0; Row = -1 } -> { Column = 1; Row = 0 } | { Column = 1; Row = 0 } -> { Column = 0; Row = 1 } | _ -> failwith "Invalid heading!"let left heading : Position = match heading with | { Column = -1; Row = 0 } -> { Column = 0; Row = 1 } | { Column = 0; Row = -1 } -> { Column = -1; Row = 0 } | { Column = 1; Row = 0 } -> { Column = 0; Row = -1 } | { Column = 0; Row = 1 } -> { Column = 1; Row = 0 } | _ -> failwith "Invalid heading!"let checkAccess n position = let inRange v = v >= 0 && v < n (inRange position.Column) && (inRange position.Row)let tickWorld world = world |> Array2D.map (fun sq -> match sq with | Empty -> Empty | Obstacle -> Obstacle | Food(a,r) -> Food(min (r a) maxFoodPerSquare, r) | Unknown -> Unknown )let rec step robot world roboState i imax acc = if i < imax then let action = robot world roboState match action with | TurnRight -> let rs1 = { roboState with Heading = right roboState.Heading } let wrld1 = tickWorld world step robot wrld1 rs1 (i+1) imax acc | TurnLeft -> let rs1 = { roboState with Heading = left roboState.Heading } let wrld1 = tickWorld world step robot wrld1 rs1 (i+1) imax acc | MoveOne -> let rs1 = let c = { Column = roboState.Pos.Column + roboState.Heading.Column Row = roboState.Pos.Row + roboState.Heading.Row } if checkAccess (Array2D.length1 world) c then match world.[c.Column,c.Row] with | Obstacle -> roboState.Memory.[c.Column,c.Row] <- Obstacle roboState | _ -> { roboState with Pos = c } else roboState let wrld1 = tickWorld world step robot wrld1 rs1 (i+1) imax acc | Eat -> let eat,acc1 = match world.[roboState.Pos.Column,roboState.Pos.Row] with | Empty -> Empty,acc | Obstacle -> Obstacle,acc | Food(a,r) -> let eaten = if a >= eatPerTick then eatPerTick else 0.0 printfn "eating %f carrots" eaten Food(a - eaten, r),eaten + acc | Unknown -> Unknown,acc world.[roboState.Pos.Column,roboState.Pos.Row] <- eat let wrld1 = tickWorld world step robot wrld1 roboState (i+1) imax acc1 | Idle -> step robot (tickWorld world) roboState (i+1) imax acc else acclet initRoboState n = { Memory = createUnknownWorld n; Pos = { Column = 0; Row = 0;}; Heading = {Column = 1; Row = 0} }let simulate n pobstacle pfood imax robot = let w0 = createRandomWorld n pobstacle pfood let r0 = initRoboState n printfn "World: %A" w0 printfn "Initial Robo State: %A" r0 let result = step robot w0 r0 0 imax 0.0 printfn "Final Robo State: %A" r0 result// Not that Marvin would care, but the rule for this simulator is that the // bot may only inspect the square in the world at the current position.// This means, IT CANNOT SEE the neighboring squares.// This means, that if there is a obstacle next to current square, // it costs a simulation tick to find out, trying to bump against it.// Any access to other squares in world is considered cheating!// world is passed in spite of all said above to allow for alternate rules.let marvinRobot world roboState = Idle// Tries to find a square with food, then stays there, eating when there is something to eat.let lazyRobot (world : World) (roboState : RoboState) = let search() = let status action : RoboAction = match action with | TurnLeft -> printfn "%A TurnLeft at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading | TurnRight -> printfn "%ATurnRight at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading | MoveOne -> printfn "%A MoveOne at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading | Idle -> printfn "%A Idle at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading | Eat -> printfn "%A Eat at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading action let neighbors = [ roboState.Heading, MoveOne; (roboState.Heading |> right),TurnRight; (roboState.Heading |> left),TurnLeft; (roboState.Heading |> right |> right),TurnRight ] |> List.map (fun (p,a) -> (p.Column,p.Row),a) |> List.map (fun ((c,r),a) -> (roboState.Pos.Column + c,roboState.Pos.Row + r),a) |> List.filter (fun ((c,r),a) -> checkAccess (Array2D.length1 world){Position.Column = c; Row = r}) |> List.sortBy (fun ((c,r),a) -> match roboState.Memory.[c,r] with | Food(_,_) -> 0 | Unknown -> 1 | Empty -> 2 | Obstacle -> 3) |> List.map (fun ((c,r),a) -> { Column = c; Row = r},a) if neighbors.IsEmpty then failwith "It's a trap!" // can happen if bot is surrounded by obstacles, e.g. in a corner else let p,a = neighbors.Head status a roboState.Memory.[roboState.Pos.Column, roboState.Pos.Row] <- world.[roboState.Pos.Column,roboState.Pos.Row] match world.[roboState.Pos.Column,roboState.Pos.Row] with | Food(a,_) -> printfn "Found food at %A" roboState.Pos Eat | _ -> search()//simulate 10 0.1 0.05 2000 marvinRobotsimulate 10 0.1 0.1 2000 lazyRobot
最后一个提示:如果你用0.0食物点进行模拟,你的机器人应该访问地图上的所有方格。如果它未能做到这一点,那它肯定不是一个好的机器人;)