Skip to content

Commit 371ad0d

Browse files
committed
added bypassTag and dirty/clean bypass mechanism
1 parent a63d23e commit 371ad0d

File tree

3 files changed

+268
-58
lines changed

3 files changed

+268
-58
lines changed

gem5/src/mem/DRAMCacheCtrl.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ class DRAMCacheCtrl(DRAMCtrl):
3838

3939
prediction_accuracy = Param.Unsigned(95, "Required prediction accuracy")
4040

41+
# bypass tag store - fully assocative structure
42+
bypass_tag_enable = Param.Bool(True,"enable bypass tag store")
43+
bypass_tag_size = Param.Unsigned(64,"Number of entires in the bypass tag store")
44+
45+
dirty_clean_bypass_enable = Param.Bool(True,"Bypass based on dirty clean status of set")
46+
4147
# A single DDR3-1600 x64 channel (one command and address bus), with
4248
# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
4349
# an 8x8 configuration.

gem5/src/mem/dramcache_ctrl.cc

Lines changed: 191 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
#include "mem/dramcache_ctrl.hh"
9+
#include "src/gpu/gpgpu-sim/cuda_gpu.hh"
910
#include "debug/DRAMCache.hh"
1011
#include "debug/Drain.hh"
1112
#include "mem/ruby/system/RubyPort.hh"
@@ -16,9 +17,16 @@
1617
using namespace std;
1718
using namespace Data;
1819

20+
// these things are #defined in gpgpu-sim; since we include cuda_gpu for
21+
// CudaGPU::running flag, we need to undef these
22+
// #define WRITE 'W' -> gpgpu-sim/dram.h:39:15
23+
// #define READ 'R' -> gpgpu-sim/dram.h:39:14
24+
#undef WRITE
25+
#undef READ
1926

2027
map <int, DRAMCacheCtrl::predictorTable> DRAMCacheCtrl::predictor;
2128
int DRAMCacheCtrl::predAccuracy;
29+
bool DRAMCacheCtrl::switched_gpu_running;
2230

2331
DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
2432
DRAMCtrl (p), respondWriteEvent(this),
@@ -46,6 +54,13 @@ DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
4654

4755
DRAMCacheCtrl::predAccuracy = p->prediction_accuracy;
4856

57+
bypassTagEnable = p->bypass_tag_enable;
58+
dirtyCleanBypassEnable = p->dirty_clean_bypass_enable;
59+
if (bypassTagEnable)
60+
{
61+
bypassTag = new LRUTagStore(this, p->bypass_tag_size);
62+
}
63+
4964
rowsPerBank = (1024 * 1024 * deviceCapacity)
5065
/ (rowBufferSize * banksPerRank * ranksPerChannel);
5166

@@ -76,6 +91,8 @@ DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
7691
// we initalize a static seed for our randomPredictor
7792
randomPredictor.init(3594);
7893

94+
switched_gpu_running = false;
95+
7996
max_gpu_lines_sample_counter = 0;
8097
inform("DRAMCache per controller capacity %d MB\n", deviceCapacity);
8198
inform("DRAMCache scheduling policy %s\n", memSchedPolicy);
@@ -84,6 +101,9 @@ DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
84101
writeLowThreshold, writeHighThreshold);
85102
inform("DRAMCache address mapping %d, page mgmt %d", addrMapping, pageMgmt);
86103
inform("DRAMCache mshrs %d, writebuffers %d", p->mshrs , p->write_buffers);
104+
inform("DRAMCache Bypass tag enabled %d bypass tag size %d",
105+
bypassTagEnable, p->bypass_tag_size);
106+
inform("DRAMCache Dirty/Clean bypass enabled %d", dirtyCleanBypassEnable);
87107
}
88108

89109
void
@@ -376,6 +396,11 @@ DRAMCacheCtrl::doWriteBack(Addr evictAddr, int contextId)
376396

377397
allocateWriteBuffer(wbPkt,curTick()+1);
378398

399+
unsigned int cacheBlock = evictAddr/dramCache_block_size;
400+
unsigned int cacheSet = cacheBlock % dramCache_num_sets;
401+
set[cacheSet].valid = false;
402+
set[cacheSet].dirty = false;
403+
379404
}
380405

381406
DRAMCtrl::DRAMPacket*
@@ -996,24 +1021,6 @@ DRAMCacheCtrl::processRespondEvent ()
9961021

9971022
// put the data into dramcache
9981023
// fill the dramcache and update tags
999-
unsigned int cacheBlock = dram_pkt->pkt->getAddr()/dramCache_block_size;
1000-
unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1001-
unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1002-
1003-
Addr evictAddr = regenerateBlkAddr(cacheSet, set[cacheSet].tag);
1004-
DPRINTF(DRAMCache, "%s Evicting addr %d in cacheSet %d dirty %d\n",
1005-
__func__, evictAddr ,cacheSet, set[cacheSet].dirty);
1006-
1007-
// this block needs to be evicted
1008-
if (set[cacheSet].dirty)
1009-
doWriteBack(evictAddr, isGPUOwned[cacheSet]?31:0);
1010-
1011-
// change the tag directory
1012-
set[cacheSet].tag = cacheTag;
1013-
set[cacheSet].dirty = false;
1014-
set[cacheSet].valid = true;
1015-
isGPUOwned[cacheSet] =
1016-
dram_pkt->pkt->req->contextId() == 31 ? true:false;
10171024

10181025
// add to fillQueue since we encountered a miss
10191026
// create a packet and add to fillQueue
@@ -1215,7 +1222,7 @@ DRAMCacheCtrl::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
12151222

12161223
addr = addr / dramCache_block_size;
12171224
addr = addr % dramCache_num_sets;
1218-
uint64_t cacheRow = floor(addr/15);
1225+
uint64_t cacheRow = floor(addr/15);
12191226
// ADARSH packet count is 2; we need to number our sets in multiplies of 2
12201227
addr = addr * pktCount;
12211228

@@ -1481,10 +1488,37 @@ DRAMCacheCtrl::addToFillQueue(PacketPtr pkt, unsigned int pktCount)
14811488

14821489
Addr addr = pkt->getAddr();
14831490

1491+
// update tags
1492+
unsigned int cacheBlock = pkt->getAddr()/dramCache_block_size;
1493+
unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1494+
unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1495+
1496+
Addr evictAddr = regenerateBlkAddr(cacheSet, set[cacheSet].tag);
1497+
DPRINTF(DRAMCache, "%s PAM Evicting addr %d in cacheSet %d dirty %d\n",
1498+
__func__, evictAddr ,cacheSet, set[cacheSet].dirty);
1499+
1500+
// this block needs to be evicted
1501+
if (set[cacheSet].dirty && set[cacheSet].valid)
1502+
doWriteBack(evictAddr, isGPUOwned[cacheSet]?31:0);
1503+
1504+
// change the tag directory
1505+
set[cacheSet].tag = cacheTag;
1506+
set[cacheSet].dirty = false;
1507+
set[cacheSet].valid = true;
1508+
isGPUOwned[cacheSet] =
1509+
pkt->req->contextId() == 31 ? true:false;
1510+
1511+
// remove from bypass tag when we fill into DRAMCache
1512+
if (bypassTagEnable)
1513+
{
1514+
bypassTag->removeFromBypassTag(pkt->getAddr());
1515+
bypassTag->insertIntoBypassTag(evictAddr);
1516+
}
1517+
14841518
// ADARSH calcuating DRAM cache address here
14851519
addr = addr / dramCache_block_size;
14861520
addr = addr % dramCache_num_sets;
1487-
uint64_t cacheRow = floor(addr/15);
1521+
uint64_t cacheRow = floor(addr/15);
14881522
// ADARSH packet count is 2; we need to number our sets in multiplies of 2
14891523
addr = addr * pktCount;
14901524

@@ -1649,6 +1683,52 @@ DRAMCacheCtrl::recvTimingReq (PacketPtr pkt)
16491683
return true;
16501684
}
16511685

1686+
if (bypassTagEnable)
1687+
{
1688+
if (bypassTag->isHit(pkt->getAddr()))
1689+
{
1690+
if (pkt->isRead())
1691+
{
1692+
DPRINTF(DRAMCache,"Read bypass addr %d\n", pkt->getAddr());
1693+
bypassTag->num_read_hits++;
1694+
if (pkt->req->contextId()!=31)
1695+
bypassTag->num_cpu_read_hits++;
1696+
allocateMissBuffer(pkt, curTick()+PREDICTION_LATENCY, true);
1697+
}
1698+
else
1699+
{
1700+
DPRINTF(DRAMCache,"Write bypass addr %d\n", pkt->getAddr());
1701+
bypassTag->num_write_hits++;
1702+
if (pkt->req->contextId()!=31)
1703+
bypassTag->num_cpu_write_hits++;
1704+
access(pkt);
1705+
RequestPtr req = new Request(pkt->getAddr(),
1706+
dramCache_block_size, 0, Request::wbMasterId);
1707+
req->setContextId(pkt->req->contextId());
1708+
PacketPtr clone_pkt = new Packet(req, MemCmd::WriteReq);
1709+
clone_pkt->allocate();
1710+
allocateWriteBuffer(clone_pkt,curTick()+PREDICTION_LATENCY);
1711+
respond(pkt, frontendLatency);
1712+
}
1713+
return true;
1714+
}
1715+
else
1716+
{
1717+
if (pkt->isRead())
1718+
{
1719+
bypassTag->num_read_misses++;
1720+
if (pkt->req->contextId()!=31)
1721+
bypassTag->num_cpu_read_misses++;
1722+
}
1723+
else
1724+
{
1725+
bypassTag->num_write_misses++;
1726+
if (pkt->req->contextId()!=31)
1727+
bypassTag->num_cpu_write_misses++;
1728+
}
1729+
}
1730+
}
1731+
16521732
#ifdef PASS_PC
16531733
// perform prediction using cache address; lookup RubyPort::predictorTable
16541734
int cid = pkt->req->contextId();
@@ -1671,9 +1751,43 @@ DRAMCacheCtrl::recvTimingReq (PacketPtr pkt)
16711751
// UPDATE we modified this to 2 bursts including tag since we assume the
16721752
// tags also burst out using an odd burst size of slightly greater than 64B
16731753

1754+
// if GPU state changed
1755+
if(switched_gpu_running != CudaGPU::running)
1756+
{
1757+
if(CudaGPU::running && dirtyCleanBypassEnable)
1758+
{
1759+
inform("GPU started, dirty/clean bypass for CPU req started");
1760+
}
1761+
else
1762+
{
1763+
inform("GPU stopped, dirty/clean bypass for CPU req stopped");
1764+
}
1765+
}
1766+
1767+
// track GPU running state
1768+
switched_gpu_running = CudaGPU::running;
1769+
16741770
// check local buffers and do not accept if full
16751771
if (pkt->isRead ())
16761772
{
1773+
unsigned int cacheBlock = pkt->getAddr()/dramCache_block_size;
1774+
unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1775+
unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1776+
// CPU request and GPU is running and dirtyCleanBypass is enabled
1777+
// and the set is clean then bypass
1778+
if (pkt->req->contextId() != 31 && CudaGPU::running &&
1779+
dirtyCleanBypassEnable)
1780+
{
1781+
if (!set[cacheSet].dirty)
1782+
{
1783+
allocateMissBuffer(pkt, PREDICTION_LATENCY, true);
1784+
dramCache_dirty_clean_bypass++;
1785+
return true;
1786+
}
1787+
else if (cacheTag!=set[cacheSet].tag)
1788+
dramCache_dirty_clean_bypass_miss++;
1789+
}
1790+
16771791
assert(size != 0);
16781792
if (readQueueFull (DRAM_PKT_COUNT))
16791793
{
@@ -1832,26 +1946,6 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)
18321946
if (wasFull && !mq->isFull())
18331947
clearBlocked ((BlockedCause) mq->index);
18341948

1835-
// update tags
1836-
unsigned int cacheBlock = pkt->getAddr()/dramCache_block_size;
1837-
unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1838-
unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1839-
1840-
Addr evictAddr = regenerateBlkAddr(cacheSet, set[cacheSet].tag);
1841-
DPRINTF(DRAMCache, "%s PAM Evicting addr %d in cacheSet %d dirty %d\n",
1842-
__func__, evictAddr ,cacheSet, set[cacheSet].dirty);
1843-
1844-
// this block needs to be evicted
1845-
if (set[cacheSet].dirty)
1846-
doWriteBack(evictAddr, isGPUOwned[cacheSet]?31:0);
1847-
1848-
// change the tag directory
1849-
set[cacheSet].tag = cacheTag;
1850-
set[cacheSet].dirty = false;
1851-
set[cacheSet].valid = true;
1852-
isGPUOwned[cacheSet] =
1853-
pkt->req->contextId() == 31 ? true:false;
1854-
18551949
// add to fillQueue since we encountered a miss
18561950
// create a packet and add to fillQueue
18571951
// we can delete the packet as we never derefence it
@@ -1896,10 +1990,6 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)
18961990
if (!set[cacheSet].valid)
18971991
{
18981992
// cold miss - the set was not valid
1899-
set[cacheSet].tag = cacheTag;
1900-
set[cacheSet].dirty = false;
1901-
set[cacheSet].valid = true;
1902-
isGPUOwned[cacheSet] = pkt->req->contextId() == 31 ? true:false;
19031993

19041994
// add to fillQueue since we encountered a miss
19051995
// create a packet and add to fillQueue
@@ -1919,19 +2009,6 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)
19192009
}
19202010
else if (set[cacheSet].tag != cacheTag)
19212011
{
1922-
Addr evictAddr = regenerateBlkAddr(cacheSet, set[cacheSet].tag);
1923-
DPRINTF(DRAMCache, "%s Evicting addr %d in cacheSet %d dirty %d\n",
1924-
__func__,evictAddr ,cacheSet, set[cacheSet].dirty);
1925-
// this block needs to be evicted
1926-
if (set[cacheSet].dirty)
1927-
doWriteBack(evictAddr, isGPUOwned[cacheSet]?31:0);
1928-
1929-
// change the tag directory
1930-
set[cacheSet].tag = cacheTag;
1931-
set[cacheSet].dirty = false;
1932-
set[cacheSet].valid = true;
1933-
isGPUOwned[cacheSet] = pkt->req->contextId() == 31 ? true:false;
1934-
19352012
// add to fillQueue since we encountered a miss
19362013
// create a packet and add to fillQueue
19372014
// we can delete the packet as we never derefence it
@@ -2256,11 +2333,60 @@ DRAMCacheCtrl::drain()
22562333
}
22572334
}
22582335

2336+
2337+
void
2338+
DRAMCacheCtrl::LRUTagStore::regStats(string name)
2339+
{
2340+
using namespace Stats;
2341+
2342+
num_read_hits
2343+
.name (name + ".bypasstag_read_hits")
2344+
.desc ("num of read hits in tag store");
2345+
2346+
num_read_misses
2347+
.name (name + ".bypasstag_read_misses")
2348+
.desc ("num of read misses in tag store");
2349+
2350+
num_cpu_read_hits
2351+
.name (name + ".bypasstag_cpu_read_hits")
2352+
.desc ("num of cpu read hits in tag store");
2353+
2354+
num_cpu_read_misses
2355+
.name (name + ".bypasstag_cpu_read_misses")
2356+
.desc ("num of cpu read misses in tag store");
2357+
2358+
num_cpu_write_hits
2359+
.name (name + ".bypasstag_cpu_write_hits")
2360+
.desc ("num of cpu write hits in tag store");
2361+
2362+
num_cpu_write_misses
2363+
.name (name + ".bypasstag_cpu_write_misses")
2364+
.desc ("num of cpu read misses in tag store");
2365+
2366+
hit_rate
2367+
.name (name + ".bypasstag_hit_rate")
2368+
.desc ("hit rate of the tag store");
2369+
2370+
hit_rate = (num_read_hits + num_write_hits) /
2371+
(num_read_hits + num_write_hits + num_read_misses + num_write_misses);
2372+
2373+
cpu_hit_rate
2374+
.name (name + ".bypasstag_cpu_hit_rate")
2375+
.desc ("cpu hit rate of the tag store");
2376+
2377+
cpu_hit_rate = (num_cpu_read_hits + num_cpu_write_hits) /
2378+
(num_cpu_read_hits + num_cpu_write_hits +
2379+
num_cpu_read_misses + num_cpu_write_misses);
2380+
2381+
}
2382+
22592383
void
22602384
DRAMCacheCtrl::regStats ()
22612385
{
22622386
using namespace Stats;
22632387
DRAMCtrl::regStats ();
2388+
if (bypassTagEnable)
2389+
bypassTag->regStats(name());
22642390

22652391
dramCache_read_hits
22662392
.name (name () + ".dramCache_read_hits")
@@ -2574,6 +2700,14 @@ DRAMCacheCtrl::regStats ()
25742700
gpuWrBusLat
25752701
.name(name() + ".gpuWrBusLat")
25762702
.desc("ticks spent in databus transfers for GPU Requests for writes");
2703+
2704+
dramCache_dirty_clean_bypass
2705+
.name(name() + ".dramCache_dirty_clean_bypass")
2706+
.desc("num times dirty clean mechanism did a bypass");
2707+
2708+
dramCache_dirty_clean_bypass_miss
2709+
.name(name() + ".dramCache_dirty_clean_bypass_miss")
2710+
.desc("num of times read miss to a dirty line");
25772711
}
25782712

25792713
BaseMasterPort &

0 commit comments

Comments
 (0)