Adds concrete pomdp implementation

samagra14 · samagra14 · commit a27a59e7c78f · 2018-07-07T12:52:39.000+05:30
diff --git a/aima-core/src/main/java/aima/core/probability/mdp/search/POMDPValueIteration.java b/aima-core/src/main/java/aima/core/probability/mdp/search/POMDPValueIteration.java
@@ -10,10 +10,12 @@
 public class POMDPValueIteration<S,A extends Action,E> {
     public POMDP<S,A,E> pomdp;
     public double maxError;
+    public int depth;
 
-    public POMDPValueIteration(POMDP<S, A, E> pomdp, double maxError) {
+    public POMDPValueIteration(POMDP<S, A, E> pomdp, double maxError, int maxDepth) {
         this.pomdp = pomdp;
         this.maxError = maxError;
+        this.depth = maxDepth;
     }
 
     public HashMap<List<A>, List<Double>> pomdpValueIteration(POMDP pomdp, double maxError){
@@ -26,10 +28,12 @@ public HashMap<List<A>, List<Double>> pomdpValueIteration(POMDP pomdp, double ma
         }
         uDash = new HashMap<>();
         uDash.put(new ArrayList<>(),utilities);
-        while(maxDifference(u,uDash) < maxError*(1-pomdp.getDiscount())/pomdp.getDiscount()){
+        int i = 0;
+        while(maxDifference(u,uDash) < maxError*(1-pomdp.getDiscount())/pomdp.getDiscount() || (i<=this.depth)){
             u = new HashMap<>(uDash);
             uDash = increasePlanDepths(uDash);
             uDash = removeDominatedPlans(uDash);
+            i++;
         }
         return u;
     }
@@ -54,7 +58,7 @@ private HashMap<List<A>, List<Double>> increasePlanDepths(HashMap<List<A>,
                              this.pomdp.states()) {
                             tempUtility+=this.pomdp.sensorModel(observation,
                                     actualState)*uDash.get(plan).
-                                    get(((ArrayList)this.pomdp.states()).indexOf(actualState));
+                                    get((new ArrayList<>(this.pomdp.states())).indexOf(actualState));
                         }
                         planUtility = tempUtility*this.pomdp.transitionProbability(actualState,
                                 currentState,action);
@@ -70,10 +74,10 @@ private HashMap<List<A>, List<Double>> increasePlanDepths(HashMap<List<A>,
     }
 
     private HashMap<List<A>, List<Double>> removeDominatedPlans(HashMap<List<A>, List<Double>> uDash) {
-        return null;
+        return uDash;
     }
 
     private double maxDifference(HashMap<List<A>, List<Double>> u, HashMap<List<A>, List<Double>> uDash) {
-        return 0.0;
+        return 2;
     }
 }
diff --git a/aima-core/src/test/java/aima/test/core/unit/probability/POMDP.java b/aima-core/src/test/java/aima/test/core/unit/probability/POMDP.java
@@ -0,0 +1,85 @@
+package aima.test.core.unit.probability;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class POMDP implements aima.core.probability.mdp.POMDP {
+    double gamma = 1.0;
+    State initialState = State.ZERO;
+
+    @Override
+    public double getDiscount() {
+        return gamma;
+    }
+
+    @Override
+    public double sensorModel(Object observedState, Object actualState) {
+        if (observedState.equals(actualState))
+            return 0.9;
+        else
+            return 0.1;
+    }
+
+    @Override
+    public Set getAllActions() {
+        HashSet<Action> actions = new HashSet<>();
+        actions.add(Action.GO);
+        actions.add(Action.STAY);
+        return actions;
+    }
+
+    @Override
+    public Set states() {
+        HashSet<State> states = new HashSet<>();
+        states.add(State.ZERO);
+        states.add(State.ONE);
+        return states;
+    }
+
+    @Override
+    public Object getInitialState() {
+        return this.initialState;
+    }
+
+    @Override
+    public Set actions(Object o) {
+        return this.getAllActions();
+    }
+
+    @Override
+    public double transitionProbability(Object sDelta, Object o, aima.core.agent.Action action) {
+        if (action.equals(Action.GO)) {
+            if (sDelta.equals(o))
+                return 0.1;
+            else
+                return 0.9;
+        } else if (action.equals(Action.STAY)) {
+            if (sDelta.equals(o))
+                return 0.9;
+            else
+                return 0.1;
+        }
+        return 0;
+    }
+
+    @Override
+    public double reward(Object o) {
+        if (o.equals(State.ZERO))
+            return 0.0;
+        else
+            return 1.0;
+    }
+
+    public enum State {
+        ZERO, ONE
+    }
+
+    public enum Action implements aima.core.agent.Action {
+        GO, STAY;
+
+        @Override
+        public boolean isNoOp() {
+            return false;
+        }
+    }
+}
diff --git a/aima-core/src/test/java/aima/test/core/unit/probability/POMDPValueIterationTest.java b/aima-core/src/test/java/aima/test/core/unit/probability/POMDPValueIterationTest.java
@@ -0,0 +1,19 @@
+package aima.test.core.unit.probability;
+
+import aima.core.probability.mdp.search.POMDPValueIteration;
+import org.junit.Test;
+
+public class POMDPValueIterationTest {
+    @Test
+    public void test(){
+        POMDPValueIteration algo = new POMDPValueIteration(new POMDP(),0.1,2);
+        System.out.println(algo.pomdpValueIteration(new POMDP(),0.1).toString());
+        /**
+         * Result comes out to be:
+         * {[STAY, GO]=[1.71, 1.19],
+         * [GO, STAY]=[0.11000000000000001, 1.9900000000000002],
+         * [STAY, STAY]=[0.19, 2.71],
+         * [GO, GO]=[0.9900000000000001, 1.11]}
+         */
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -10,10 +10,12 @@`
`10`	`10`	`public class POMDPValueIteration<S,A extends Action,E> {`
`11`	`11`	`public POMDP<S,A,E> pomdp;`
`12`	`12`	`public double maxError;`
	`13`	`+ public int depth;`
`13`	`14`
`14`		`- public POMDPValueIteration(POMDP<S, A, E> pomdp, double maxError) {`
	`15`	`+ public POMDPValueIteration(POMDP<S, A, E> pomdp, double maxError, int maxDepth) {`
`15`	`16`	`this.pomdp = pomdp;`
`16`	`17`	`this.maxError = maxError;`
	`18`	`+ this.depth = maxDepth;`
`17`	`19`	`}`
`18`	`20`
`19`	`21`	`public HashMap<List<A>, List<Double>> pomdpValueIteration(POMDP pomdp, double maxError){`
`@@ -26,10 +28,12 @@ public HashMap<List<A>, List<Double>> pomdpValueIteration(POMDP pomdp, double ma`
`26`	`28`	`}`
`27`	`29`	`uDash = new HashMap<>();`
`28`	`30`	`uDash.put(new ArrayList<>(),utilities);`
`29`		`- while(maxDifference(u,uDash) < maxError*(1-pomdp.getDiscount())/pomdp.getDiscount()){`
	`31`	`+ int i = 0;`
	`32`	`+ while(maxDifference(u,uDash) < maxError*(1-pomdp.getDiscount())/pomdp.getDiscount() \|\| (i<=this.depth)){`
`30`	`33`	`u = new HashMap<>(uDash);`
`31`	`34`	`uDash = increasePlanDepths(uDash);`
`32`	`35`	`uDash = removeDominatedPlans(uDash);`
	`36`	`+ i++;`
`33`	`37`	`}`
`34`	`38`	`return u;`
`35`	`39`	`}`
`@@ -54,7 +58,7 @@ private HashMap<List<A>, List<Double>> increasePlanDepths(HashMap<List<A>,`
`54`	`58`	`this.pomdp.states()) {`
`55`	`59`	`tempUtility+=this.pomdp.sensorModel(observation,`
`56`	`60`	`actualState)*uDash.get(plan).`
`57`		`- get(((ArrayList)this.pomdp.states()).indexOf(actualState));`
	`61`	`+ get((new ArrayList<>(this.pomdp.states())).indexOf(actualState));`
`58`	`62`	`}`
`59`	`63`	`planUtility = tempUtility*this.pomdp.transitionProbability(actualState,`
`60`	`64`	`currentState,action);`
`@@ -70,10 +74,10 @@ private HashMap<List<A>, List<Double>> increasePlanDepths(HashMap<List<A>,`
`70`	`74`	`}`
`71`	`75`
`72`	`76`	`private HashMap<List<A>, List<Double>> removeDominatedPlans(HashMap<List<A>, List<Double>> uDash) {`
`73`		`- return null;`
	`77`	`+ return uDash;`
`74`	`78`	`}`
`75`	`79`
`76`	`80`	`private double maxDifference(HashMap<List<A>, List<Double>> u, HashMap<List<A>, List<Double>> uDash) {`
`77`		`- return 0.0;`
	`81`	`+ return 2;`
`78`	`82`	`}`
`79`	`83`	`}`