Skip to content

Commit 263ed56

Browse files
committed
Started on QLearning (Still not working)
1 parent 12b432f commit 263ed56

File tree

10 files changed

+172
-62
lines changed

10 files changed

+172
-62
lines changed

Virus/Virus/Board.cs

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public void Display()
116116
/// <returns></returns>
117117
public virtual int MoveBrick(int brickToMoveX, int brickToMoveY, int moveToHereX, int moveToHereY)
118118
{
119-
if (board[brickToMoveX,brickToMoveY] != playerTurn)
119+
if (board[brickToMoveX, brickToMoveY] != playerTurn)
120120
{
121121
return -1;
122122
}
@@ -197,7 +197,7 @@ public List<Move> FindAvailableMoves(int playerNumber)
197197
{
198198
int x3 = bricks[i].Item2 - x2;
199199
int y3 = bricks[i].Item3 - y2;
200-
200+
201201
int result = TryMakeMove(bricks[i].Item1, bricks[i].Item2, bricks[i].Item3, x3, y3);
202202
if (result != -1)
203203
{
@@ -224,40 +224,12 @@ public List<Move> FindAvailableMoves(int playerNumber)
224224

225225
private bool CantMove()
226226
{
227-
227+
228228
if (FindAvailableMoves(playerTurn).Count > 0)
229229
{
230230
return false;
231231
}
232232
return true;
233-
/*List<Tuple<int,int,int>> list = GetBricks(playerTurn);
234-
foreach (var item in list)
235-
{
236-
237-
for (int x = -1; x <= 1; x++)
238-
{
239-
for (int y = -1; y <= 1; y++)
240-
{
241-
int x2 = -1;
242-
int y2 = -1;
243-
if (x > item.Item2)
244-
x2 = (x - item.Item2);
245-
else
246-
x2 = (item.Item2 - x);
247-
if (y > item.Item3)
248-
y2 = (y - item.Item3);
249-
else
250-
y2 = (item.Item3 - y);
251-
252-
int result = TryMakeMove(playerTurn, item.Item2, item.Item3, x2, y2);
253-
if (result != -1)
254-
{
255-
return false;
256-
}
257-
}
258-
}
259-
}
260-
return true;*/
261233
}
262234

263235
/// <summary>
@@ -338,8 +310,6 @@ public int TryMakeMove(int playerNumber, int brickToMoveX, int brickToMoveY, int
338310

339311
int taken = 0;
340312

341-
//Jump move
342-
//Jump move
343313
if (jumping)
344314
{
345315
taken += AssumeCapturedPieces(moveToHereX, moveToHereY, player);
@@ -348,11 +318,8 @@ public int TryMakeMove(int playerNumber, int brickToMoveX, int brickToMoveY, int
348318
}
349319
else
350320
{
351-
//Normal move
352321
taken += AssumeCapturedPieces(moveToHereX, moveToHereY, player);
353-
354322
}
355-
356323
return taken;
357324
}
358325

@@ -377,8 +344,7 @@ public int MakeMove(int brickToMoveX, int brickToMoveY, int moveToHereX, int mov
377344
int player = board[brickToMoveX, brickToMoveY];
378345

379346
int taken = 0;
380-
381-
//Jump move
347+
382348
if (jumping)
383349
{
384350
taken += CapturePieces(moveToHereX, moveToHereY, player);
@@ -389,7 +355,6 @@ public int MakeMove(int brickToMoveX, int brickToMoveY, int moveToHereX, int mov
389355
}
390356
else
391357
{
392-
//Normal move
393358
taken += CapturePieces(moveToHereX, moveToHereY, player);
394359
taken += 1;
395360
board[moveToHereX, moveToHereY] = player;
@@ -660,5 +625,11 @@ public Board Copy()
660625
}
661626
return board;
662627
}
628+
public Board GetNewBoard(Board board, Move move)
629+
{
630+
Board temp = board.Copy();
631+
temp.MakeMove(move.fromX, move.fromY, move.toX, move.toY);
632+
return temp;
633+
}
663634
}
664635
}

Virus/Virus/Game.cs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,35 +26,44 @@ private void PlayGame(int size)
2626
public void StartGame()
2727
{
2828
//VirusPlayer player1 = new NeuralNetworkComputer(board, 1, ActivationFunction.SigmoidDerivative, false, 2);
29-
MiniMaxComputer player1 = new MiniMaxComputer(board, 1, SQL.GetClient(), true, 3);
30-
VirusPlayer player2 = new SemiSmartComputer(board, 2);
29+
//VirusPlayer player1 = new MiniMaxComputer(board, 1, SQL.GetClient(), true, 3);
30+
VirusPlayer player1 = new QLearningComputer(board, 0.1, 0.1, 1);
31+
VirusPlayer player2 = new RandomComputer(board, 2);
3132
bool visual = false;
3233
int[] result = new int[2];
3334

34-
for (int i = 0; i < 5; i++)
35+
for (int i = 0; i < 10000; i++)
3536
{
36-
while (!board.IsDone())
37+
for (int j = 0; j < 3; j++)
3738
{
38-
//List<Persistance.EntityFramework.Node> res = SQL.GetClient().ReadAllNodes();
39-
player1.play();
40-
if (visual)
39+
while (!board.IsDone())
4140
{
42-
board.Display();
41+
player1.play();
42+
if (visual)
43+
{
44+
board.Display();
45+
}
46+
player2.play();
47+
if (visual)
48+
{
49+
board.Display();
50+
}
4351
}
44-
player2.play();
45-
if (visual)
52+
player1.AfterGame();
53+
player2.AfterGame();
54+
int[] result2 = board.GetScore();
55+
for (int b = 0; b < result2.Count(); b++)
4656
{
47-
board.Display();
57+
result[b] += result2[b];
4858
}
49-
Console.Read();
59+
60+
board.reset();
5061
}
51-
int[] result2 = board.GetScore();
52-
for (int b = 0; b < result2.Count(); b++)
53-
{
54-
result[b] += result2[b];
55-
}
56-
board.reset();
62+
Console.WriteLine("Game size " + gameSize + " Player 1 points: " + result[0]);
63+
Console.WriteLine("Game size " + gameSize + " Player 2 points: " + result[1]);
64+
result = new int[2];
5765
}
66+
5867

5968
Log.WriteLineToLog("Game size " + gameSize + " Player 1 points: " + result[0]);
6069
Log.WriteLineToLog("Game size " + gameSize + " Player 2 points: " + result[1]);

Virus/Virus/MiniMaxComputer.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ public Tuple<Board, Move> PredictMiniMaxMove(Board tempboard)
360360
tempboard.MoveBrick(bestMove.fromX, bestMove.fromY, bestMove.toX, bestMove.toY);
361361
return new Tuple<Board, Move>(tempboard, bestMove);
362362
}
363+
364+
public void AfterGame()
365+
{
366+
}
367+
363368
private class Node : IRelationshipAllowingParticipantNode<Node>, IRelationshipAllowingSourceNode<Node>, IRelationshipAllowingTargetNode<Node>
364369
{
365370
public List<Node> children = new List<Node>();

Virus/Virus/NeuralNetworkComputer.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,5 +208,9 @@ private void Retrain(double[][] input, double[][] output)
208208
play();
209209
//end
210210
}
211+
212+
public void AfterGame()
213+
{
214+
}
211215
}
212216
}

Virus/Virus/Program.cs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,8 @@ class Program
1111
{
1212
static void Main(string[] args)
1313
{
14-
for (int i = 7; i < 12; i++)
15-
{
16-
Game game = new Game(i);
17-
game.StartGame();
18-
}
14+
Game game = new Game(4);
15+
game.StartGame();
1916
}
2017
}
2118
}

Virus/Virus/QLearningComputer.cs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace Virus
8+
{
9+
public class QLearningComputer : VirusPlayer
10+
{
11+
private double learningRate;
12+
private double discountFactor;
13+
private int playerNumber;
14+
private Board board;
15+
private double[,] Qreward;
16+
private double[,] Rreward;
17+
private List<Move> movesMade;
18+
/// <summary>
19+
/// Takes in the game as a board and then the learning rate which should be a number between 0 and 1
20+
/// then the discount rate which also should be a number between 0 and 1 but it determines the importance
21+
/// of future rewards.
22+
/// </summary>
23+
/// <param name="board"></param>
24+
/// <param name="learningRate"></param>
25+
/// <param name="playerNumber"></param>
26+
public QLearningComputer(Board board, double learningRate, double discountFactor, int playerNumber)
27+
{
28+
this.board = board;
29+
this.learningRate = learningRate;
30+
this.discountFactor = discountFactor;
31+
this.playerNumber = playerNumber;
32+
Qreward = new double[board.boardSize, board.boardSize];
33+
Rreward = new double[board.boardSize, board.boardSize];
34+
movesMade = new List<Move>();
35+
for (int x = 0; x < board.boardSize; x++)
36+
{
37+
for (int y = 0; y < board.boardSize; y++)
38+
{
39+
Rreward[x, y] = 1000;
40+
}
41+
}
42+
}
43+
44+
public void AfterGame()
45+
{
46+
int[] result = board.GetScore();
47+
foreach (Move move in movesMade)
48+
{
49+
if (result[playerNumber - 1] > result[0] || result[playerNumber - 1] > result[1])
50+
{
51+
Rreward[move.toX, move.toY]++;
52+
}
53+
else
54+
{
55+
Rreward[move.toX, move.toY]--;
56+
}
57+
Qreward[move.toX, move.toY]++;
58+
}
59+
movesMade.Clear();
60+
}
61+
62+
public void play()
63+
{
64+
Move moveToTake = null;
65+
66+
List<Move> movesAvailable = board.FindAvailableMoves(playerNumber);
67+
68+
double currentBestMove = 0;
69+
70+
foreach (Move move in movesAvailable)
71+
{
72+
double tmp = Max(board.GetNewBoard(board, move));
73+
if (currentBestMove > (Reward(move) + learningRate * tmp))
74+
{
75+
currentBestMove = Reward(move) + learningRate * tmp;
76+
moveToTake = move;
77+
}
78+
}
79+
if (movesAvailable.Count > 0)
80+
{
81+
if (moveToTake == null)
82+
{
83+
Move move = movesAvailable[new Random().Next(movesAvailable.Count - 1)];
84+
board.MoveBrick(move.fromX, move.fromY, move.toX, move.toY);
85+
movesMade.Add(move);
86+
}
87+
else
88+
{
89+
board.MoveBrick(moveToTake.fromX, moveToTake.fromY, moveToTake.toX, moveToTake.toY);
90+
movesMade.Add(moveToTake);
91+
}
92+
}
93+
}
94+
95+
private double Reward(Move move)
96+
{
97+
return Rreward[move.fromX, move.fromY];
98+
}
99+
100+
public double Max(Board board)
101+
{
102+
double best = 0;
103+
foreach (Move move in board.FindAvailableMoves(playerNumber))
104+
{
105+
if (Qreward[move.fromX,move.fromY] > best)
106+
{
107+
best = discountFactor * Qreward[move.fromX, move.fromY];
108+
}
109+
}
110+
return best;
111+
}
112+
}
113+
}

Virus/Virus/RandomComputer.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ public RandomComputer(Board board, int playerNumber)
1717
this.playerNumber = playerNumber;
1818
}
1919

20+
public void AfterGame()
21+
{
22+
}
23+
2024
public void play()
2125
{
2226
List<Tuple<int, int, int, int>> result = FindAvailableMoves();

Virus/Virus/SemiSmartComputer.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ public SemiSmartComputer(Board board, int playerNumber)
1616
this.board = board;
1717
this.playerNumber = playerNumber;
1818
}
19+
20+
public void AfterGame()
21+
{
22+
}
23+
1924
public void play()
2025
{
2126
List<Move> result = board.FindAvailableMoves(playerNumber);

Virus/Virus/Virus.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
<Compile Include="Persistance\SQL.cs" />
102102
<Compile Include="Program.cs" />
103103
<Compile Include="Properties\AssemblyInfo.cs" />
104+
<Compile Include="QLearningComputer.cs" />
104105
<Compile Include="RandomComputer.cs" />
105106
<Compile Include="SemiSmartComputer.cs" />
106107
<Compile Include="VirusPlayer.cs" />

Virus/Virus/VirusPlayer.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ namespace Virus
99
public interface VirusPlayer
1010
{
1111
void play();
12+
void AfterGame();
1213
}
1314
}

0 commit comments

Comments
 (0)