Re-implemented RewardFunction.java to incorporate functional interfaces and lambda expressions

damnOblivious · damnOblivious · commit f3e6bdbac0f7 · 2018-02-18T17:49:01.000+05:30
diff --git a/aima-core/src/main/java/aima/core/learning/reinforcement/agent/PassiveADPAgent.java b/aima-core/src/main/java/aima/core/learning/reinforcement/agent/PassiveADPAgent.java
@@ -18,7 +18,7 @@
 /**
  * Artificial Intelligence A Modern Approach (3rd Edition): page 834.<br>
  * <br>
- * 
+ *
  * <pre>
  * function PASSIVE-ADP-AGENT(percept) returns an action
  *   inputs: percept, a percept indicating the current state s' and reward signal r'
@@ -28,7 +28,7 @@
  *               N<sub>sa</sub>, a table of frequencies for state-action pairs, initially zero
  *               N<sub>s'|sa</sub>, a table of outcome frequencies give state-action pairs, initially zero
  *               s, a, the previous state and action, initially null
- *               
+ *
  *   if s' is new then U[s'] <- r'; R[s'] <- r'
  *   if s is not null then
  *        increment N<sub>sa</sub>[s,a] and N<sub>s'|sa</sub>[s',s,a]
@@ -38,19 +38,19 @@
  *   if s'.TERMINAL? then s,a <- null else s,a <- s',&pi;[s']
  *   return a
  * </pre>
- * 
+ *
  * Figure 21.2 A passive reinforcement learning agent based on adaptive dynamic
  * programming. The POLICY-EVALUATION function solves the fixed-policy Bellman
  * equations, as described on page 657.
- * 
+ *
  * @param <S>
  *            the state type.
  * @param <A>
  *            the action type.
- * 
+ *
  * @author Ciaran O'Reilly
  * @author Ravi Mohan
- * 
+ *
  */
 public class PassiveADPAgent<S, A extends Action> extends
 		ReinforcementAgent<S, A> {
@@ -75,7 +75,7 @@ public class PassiveADPAgent<S, A extends Action> extends
 
 	/**
 	 * Constructor.
-	 * 
+	 *
 	 * @param fixedPolicy
 	 *            &pi; a fixed policy.
 	 * @param states
@@ -91,6 +91,8 @@ public PassiveADPAgent(Map<S, A> fixedPolicy, Set<S> states,
 			S initialState, ActionsFunction<S, A> actionsFunction,
 			PolicyEvaluation<S, A> policyEvaluation) {
 		this.pi.putAll(fixedPolicy);
+		RewardFunction<S> rewardfn = (s) -> R.get(s);
+
 		this.mdp = new MDP<S, A>(states, initialState, actionsFunction,
 				new TransitionProbabilityFunction<S, A>() {
 					public double probability(S sDelta, S s, A a) {
@@ -99,17 +101,13 @@ public double probability(S sDelta, S s, A a) {
 
 						return null == p ? 0.0 : p.doubleValue();
 					}
-				}, new RewardFunction<S>() {
-					public double reward(S s) {
-						return R.get(s);
-					}
-				});
+				}, rewardfn);
 		this.policyEvaluation = policyEvaluation;
 	}
 
 	/**
 	 * Passive reinforcement learning based on adaptive dynamic programming.
-	 * 
+	 *
 	 * @param percept
 	 *            a percept indicating the current state s' and reward signal
 	 *            r'.
diff --git a/aima-core/src/main/java/aima/core/probability/example/MDPFactory.java b/aima-core/src/main/java/aima/core/probability/example/MDPFactory.java
@@ -16,7 +16,7 @@
 import aima.core.probability.mdp.impl.MDP;
 
 /**
- * 
+ *
  * @author Ciaran O'Reilly
  * @author Ravi Mohan
  */
@@ -25,7 +25,7 @@ public class MDPFactory {
 	/**
 	 * Constructs an MDP that can be used to generate the utility values
 	 * detailed in Fig 17.3.
-	 * 
+	 *
 	 * @param cw
 	 *            the cell world from figure 17.1.
 	 * @return an MDP that can be used to generate the utility values detailed
@@ -43,7 +43,7 @@ public static MarkovDecisionProcess<Cell<Double>, CellWorldAction> createMDPForF
 	/**
 	 * Returns the allowed actions from a specified cell within the cell world
 	 * described in Fig 17.1.
-	 * 
+	 *
 	 * @param cw
 	 *            the cell world from figure 17.1.
 	 * @return the set of actions allowed at a particular cell. This set will be
@@ -75,7 +75,7 @@ public Set<CellWorldAction> actions(Cell<Double> s) {
 	 * the 'intended' outcome occurs with probability 0.8, but with probability
 	 * 0.2 the agent moves at right angles to the intended direction. A
 	 * collision with a wall results in no movement.
-	 * 
+	 *
 	 * @param cw
 	 *            the cell world from figure 17.1.
 	 * @return the transition probability function as described in figure 17.1.
@@ -106,7 +106,7 @@ public double probability(Cell<Double> sDelta, Cell<Double> s,
 
 				return prob;
 			}
-			
+
 			private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
 					CellWorldAction a) {
 				// There can be three possible outcomes for the planned action
@@ -124,17 +124,12 @@ private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
 	}
 
 	/**
-	 * 
+	 *
 	 * @return the reward function which takes the content of the cell as being
 	 *         the reward value.
 	 */
 	public static RewardFunction<Cell<Double>> createRewardFunctionForFigure17_1() {
-		RewardFunction<Cell<Double>> rf = new RewardFunction<Cell<Double>>() {
-			@Override
-			public double reward(Cell<Double> s) {
-				return s.getContent();
-			}
-		};
-		return rf;
+		RewardFunction<Cell<Double>> rewardfn = s -> s.getContent();
+		return rewardfn;
 	}
 }
diff --git a/aima-core/src/main/java/aima/core/probability/mdp/RewardFunction.java b/aima-core/src/main/java/aima/core/probability/mdp/RewardFunction.java
@@ -1,21 +1,23 @@
 package aima.core.probability.mdp;
+import java.util.function.Function;
 
 /**
  * An interface for MDP reward functions.
- * 
- * @param <S>
- *            the state type.
+ *
+ * @param <S> The type used to represent states.
+ *
  * @author Ciaran O'Reilly
  * @author Ravi Mohan
  */
-public interface RewardFunction<S> {
-	
+public interface RewardFunction<S> extends Function<S, Double> {
 	/**
-	 * Get the reward associated with being in state s.
-	 * 
+	 * Extends inbuilt functional interface Function< T, R>.
+	 * having single abstract method
+	 *
+	 * R apply(T)
+	 *
 	 * @param s
 	 *            the state whose award is sought.
 	 * @return the reward associated with being in state s.
 	 */
-	double reward(S s);
-}
+}
diff --git a/aima-core/src/main/java/aima/core/probability/mdp/impl/MDP.java b/aima-core/src/main/java/aima/core/probability/mdp/impl/MDP.java
@@ -10,12 +10,12 @@
 
 /**
  * Default implementation of the MarkovDecisionProcess<S, A> interface.
- * 
+ *
  * @param <S>
  *            the state type.
  * @param <A>
  *            the action type.
- * 
+ *
  * @author Ciaran O'Reilly
  * @author Ravi Mohan
  */
@@ -61,7 +61,7 @@ public double transitionProbability(S sDelta, S s, A a) {
 
 	@Override
 	public double reward(S s) {
-		return rewardFunction.reward(s);
+		return rewardFunction.apply(s);
 	}
 
 	// END-MarkovDecisionProcess