Skip to content

Commit f3e6bdb

Browse files
committed
Re-implemented RewardFunction.java to incorporate functional interfaces and lambda expressions
1 parent ecac39a commit f3e6bdb

File tree

4 files changed

+33
-38
lines changed

4 files changed

+33
-38
lines changed

aima-core/src/main/java/aima/core/learning/reinforcement/agent/PassiveADPAgent.java

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
/**
1919
* Artificial Intelligence A Modern Approach (3rd Edition): page 834.<br>
2020
* <br>
21-
*
21+
*
2222
* <pre>
2323
* function PASSIVE-ADP-AGENT(percept) returns an action
2424
* inputs: percept, a percept indicating the current state s' and reward signal r'
@@ -28,7 +28,7 @@
2828
* N<sub>sa</sub>, a table of frequencies for state-action pairs, initially zero
2929
* N<sub>s'|sa</sub>, a table of outcome frequencies give state-action pairs, initially zero
3030
* s, a, the previous state and action, initially null
31-
*
31+
*
3232
* if s' is new then U[s'] <- r'; R[s'] <- r'
3333
* if s is not null then
3434
* increment N<sub>sa</sub>[s,a] and N<sub>s'|sa</sub>[s',s,a]
@@ -38,19 +38,19 @@
3838
* if s'.TERMINAL? then s,a <- null else s,a <- s',&pi;[s']
3939
* return a
4040
* </pre>
41-
*
41+
*
4242
* Figure 21.2 A passive reinforcement learning agent based on adaptive dynamic
4343
* programming. The POLICY-EVALUATION function solves the fixed-policy Bellman
4444
* equations, as described on page 657.
45-
*
45+
*
4646
* @param <S>
4747
* the state type.
4848
* @param <A>
4949
* the action type.
50-
*
50+
*
5151
* @author Ciaran O'Reilly
5252
* @author Ravi Mohan
53-
*
53+
*
5454
*/
5555
public class PassiveADPAgent<S, A extends Action> extends
5656
ReinforcementAgent<S, A> {
@@ -75,7 +75,7 @@ public class PassiveADPAgent<S, A extends Action> extends
7575

7676
/**
7777
* Constructor.
78-
*
78+
*
7979
* @param fixedPolicy
8080
* &pi; a fixed policy.
8181
* @param states
@@ -91,6 +91,8 @@ public PassiveADPAgent(Map<S, A> fixedPolicy, Set<S> states,
9191
S initialState, ActionsFunction<S, A> actionsFunction,
9292
PolicyEvaluation<S, A> policyEvaluation) {
9393
this.pi.putAll(fixedPolicy);
94+
RewardFunction<S> rewardfn = (s) -> R.get(s);
95+
9496
this.mdp = new MDP<S, A>(states, initialState, actionsFunction,
9597
new TransitionProbabilityFunction<S, A>() {
9698
public double probability(S sDelta, S s, A a) {
@@ -99,17 +101,13 @@ public double probability(S sDelta, S s, A a) {
99101

100102
return null == p ? 0.0 : p.doubleValue();
101103
}
102-
}, new RewardFunction<S>() {
103-
public double reward(S s) {
104-
return R.get(s);
105-
}
106-
});
104+
}, rewardfn);
107105
this.policyEvaluation = policyEvaluation;
108106
}
109107

110108
/**
111109
* Passive reinforcement learning based on adaptive dynamic programming.
112-
*
110+
*
113111
* @param percept
114112
* a percept indicating the current state s' and reward signal
115113
* r'.

aima-core/src/main/java/aima/core/probability/example/MDPFactory.java

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import aima.core.probability.mdp.impl.MDP;
1717

1818
/**
19-
*
19+
*
2020
* @author Ciaran O'Reilly
2121
* @author Ravi Mohan
2222
*/
@@ -25,7 +25,7 @@ public class MDPFactory {
2525
/**
2626
* Constructs an MDP that can be used to generate the utility values
2727
* detailed in Fig 17.3.
28-
*
28+
*
2929
* @param cw
3030
* the cell world from figure 17.1.
3131
* @return an MDP that can be used to generate the utility values detailed
@@ -43,7 +43,7 @@ public static MarkovDecisionProcess<Cell<Double>, CellWorldAction> createMDPForF
4343
/**
4444
* Returns the allowed actions from a specified cell within the cell world
4545
* described in Fig 17.1.
46-
*
46+
*
4747
* @param cw
4848
* the cell world from figure 17.1.
4949
* @return the set of actions allowed at a particular cell. This set will be
@@ -75,7 +75,7 @@ public Set<CellWorldAction> actions(Cell<Double> s) {
7575
* the 'intended' outcome occurs with probability 0.8, but with probability
7676
* 0.2 the agent moves at right angles to the intended direction. A
7777
* collision with a wall results in no movement.
78-
*
78+
*
7979
* @param cw
8080
* the cell world from figure 17.1.
8181
* @return the transition probability function as described in figure 17.1.
@@ -106,7 +106,7 @@ public double probability(Cell<Double> sDelta, Cell<Double> s,
106106

107107
return prob;
108108
}
109-
109+
110110
private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
111111
CellWorldAction a) {
112112
// There can be three possible outcomes for the planned action
@@ -124,17 +124,12 @@ private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
124124
}
125125

126126
/**
127-
*
127+
*
128128
* @return the reward function which takes the content of the cell as being
129129
* the reward value.
130130
*/
131131
public static RewardFunction<Cell<Double>> createRewardFunctionForFigure17_1() {
132-
RewardFunction<Cell<Double>> rf = new RewardFunction<Cell<Double>>() {
133-
@Override
134-
public double reward(Cell<Double> s) {
135-
return s.getContent();
136-
}
137-
};
138-
return rf;
132+
RewardFunction<Cell<Double>> rewardfn = s -> s.getContent();
133+
return rewardfn;
139134
}
140135
}
Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
11
package aima.core.probability.mdp;
2+
import java.util.function.Function;
23

34
/**
45
* An interface for MDP reward functions.
5-
*
6-
* @param <S>
7-
* the state type.
6+
*
7+
* @param <S> The type used to represent states.
8+
*
89
* @author Ciaran O'Reilly
910
* @author Ravi Mohan
1011
*/
11-
public interface RewardFunction<S> {
12-
12+
public interface RewardFunction<S> extends Function<S, Double> {
1313
/**
14-
* Get the reward associated with being in state s.
15-
*
14+
* Extends inbuilt functional interface Function< T, R>.
15+
* having single abstract method
16+
*
17+
* R apply(T)
18+
*
1619
* @param s
1720
* the state whose award is sought.
1821
* @return the reward associated with being in state s.
1922
*/
20-
double reward(S s);
21-
}
23+
}

aima-core/src/main/java/aima/core/probability/mdp/impl/MDP.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010

1111
/**
1212
* Default implementation of the MarkovDecisionProcess<S, A> interface.
13-
*
13+
*
1414
* @param <S>
1515
* the state type.
1616
* @param <A>
1717
* the action type.
18-
*
18+
*
1919
* @author Ciaran O'Reilly
2020
* @author Ravi Mohan
2121
*/
@@ -61,7 +61,7 @@ public double transitionProbability(S sDelta, S s, A a) {
6161

6262
@Override
6363
public double reward(S s) {
64-
return rewardFunction.reward(s);
64+
return rewardFunction.apply(s);
6565
}
6666

6767
// END-MarkovDecisionProcess

0 commit comments

Comments
 (0)