6
6
*/
7
7
8
8
#include " mem/dramcache_ctrl.hh"
9
+ #include " src/gpu/gpgpu-sim/cuda_gpu.hh"
9
10
#include " debug/DRAMCache.hh"
10
11
#include " debug/Drain.hh"
11
12
#include " mem/ruby/system/RubyPort.hh"
16
17
using namespace std ;
17
18
using namespace Data ;
18
19
20
+ // these things are #defined in gpgpu-sim; since we include cuda_gpu for
21
+ // CudaGPU::running flag, we need to undef these
22
+ // #define WRITE 'W' -> gpgpu-sim/dram.h:39:15
23
+ // #define READ 'R' -> gpgpu-sim/dram.h:39:14
24
+ #undef WRITE
25
+ #undef READ
19
26
20
27
map <int , DRAMCacheCtrl::predictorTable> DRAMCacheCtrl::predictor;
21
28
int DRAMCacheCtrl::predAccuracy;
29
+ bool DRAMCacheCtrl::switched_gpu_running;
22
30
23
31
DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
24
32
DRAMCtrl (p), respondWriteEvent(this ),
@@ -46,6 +54,13 @@ DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
46
54
47
55
DRAMCacheCtrl::predAccuracy = p->prediction_accuracy ;
48
56
57
+ bypassTagEnable = p->bypass_tag_enable ;
58
+ dirtyCleanBypassEnable = p->dirty_clean_bypass_enable ;
59
+ if (bypassTagEnable)
60
+ {
61
+ bypassTag = new LRUTagStore (this , p->bypass_tag_size );
62
+ }
63
+
49
64
rowsPerBank = (1024 * 1024 * deviceCapacity)
50
65
/ (rowBufferSize * banksPerRank * ranksPerChannel);
51
66
@@ -76,6 +91,8 @@ DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
76
91
// we initalize a static seed for our randomPredictor
77
92
randomPredictor.init (3594 );
78
93
94
+ switched_gpu_running = false ;
95
+
79
96
max_gpu_lines_sample_counter = 0 ;
80
97
inform (" DRAMCache per controller capacity %d MB\n " , deviceCapacity);
81
98
inform (" DRAMCache scheduling policy %s\n " , memSchedPolicy);
@@ -84,6 +101,9 @@ DRAMCacheCtrl::DRAMCacheCtrl (const DRAMCacheCtrlParams* p) :
84
101
writeLowThreshold, writeHighThreshold);
85
102
inform (" DRAMCache address mapping %d, page mgmt %d" , addrMapping, pageMgmt);
86
103
inform (" DRAMCache mshrs %d, writebuffers %d" , p->mshrs , p->write_buffers );
104
+ inform (" DRAMCache Bypass tag enabled %d bypass tag size %d" ,
105
+ bypassTagEnable, p->bypass_tag_size );
106
+ inform (" DRAMCache Dirty/Clean bypass enabled %d" , dirtyCleanBypassEnable);
87
107
}
88
108
89
109
void
@@ -376,6 +396,11 @@ DRAMCacheCtrl::doWriteBack(Addr evictAddr, int contextId)
376
396
377
397
allocateWriteBuffer (wbPkt,curTick ()+1 );
378
398
399
+ unsigned int cacheBlock = evictAddr/dramCache_block_size;
400
+ unsigned int cacheSet = cacheBlock % dramCache_num_sets;
401
+ set[cacheSet].valid = false ;
402
+ set[cacheSet].dirty = false ;
403
+
379
404
}
380
405
381
406
DRAMCtrl::DRAMPacket*
@@ -996,24 +1021,6 @@ DRAMCacheCtrl::processRespondEvent ()
996
1021
997
1022
// put the data into dramcache
998
1023
// fill the dramcache and update tags
999
- unsigned int cacheBlock = dram_pkt->pkt ->getAddr ()/dramCache_block_size;
1000
- unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1001
- unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1002
-
1003
- Addr evictAddr = regenerateBlkAddr (cacheSet, set[cacheSet].tag );
1004
- DPRINTF (DRAMCache, " %s Evicting addr %d in cacheSet %d dirty %d\n " ,
1005
- __func__, evictAddr ,cacheSet, set[cacheSet].dirty );
1006
-
1007
- // this block needs to be evicted
1008
- if (set[cacheSet].dirty )
1009
- doWriteBack (evictAddr, isGPUOwned[cacheSet]?31 :0 );
1010
-
1011
- // change the tag directory
1012
- set[cacheSet].tag = cacheTag;
1013
- set[cacheSet].dirty = false ;
1014
- set[cacheSet].valid = true ;
1015
- isGPUOwned[cacheSet] =
1016
- dram_pkt->pkt ->req ->contextId () == 31 ? true :false ;
1017
1024
1018
1025
// add to fillQueue since we encountered a miss
1019
1026
// create a packet and add to fillQueue
@@ -1215,7 +1222,7 @@ DRAMCacheCtrl::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
1215
1222
1216
1223
addr = addr / dramCache_block_size;
1217
1224
addr = addr % dramCache_num_sets;
1218
- uint64_t cacheRow = floor (addr/15 );
1225
+ uint64_t cacheRow = floor (addr/15 );
1219
1226
// ADARSH packet count is 2; we need to number our sets in multiplies of 2
1220
1227
addr = addr * pktCount;
1221
1228
@@ -1481,10 +1488,37 @@ DRAMCacheCtrl::addToFillQueue(PacketPtr pkt, unsigned int pktCount)
1481
1488
1482
1489
Addr addr = pkt->getAddr ();
1483
1490
1491
+ // update tags
1492
+ unsigned int cacheBlock = pkt->getAddr ()/dramCache_block_size;
1493
+ unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1494
+ unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1495
+
1496
+ Addr evictAddr = regenerateBlkAddr (cacheSet, set[cacheSet].tag );
1497
+ DPRINTF (DRAMCache, " %s PAM Evicting addr %d in cacheSet %d dirty %d\n " ,
1498
+ __func__, evictAddr ,cacheSet, set[cacheSet].dirty );
1499
+
1500
+ // this block needs to be evicted
1501
+ if (set[cacheSet].dirty && set[cacheSet].valid )
1502
+ doWriteBack (evictAddr, isGPUOwned[cacheSet]?31 :0 );
1503
+
1504
+ // change the tag directory
1505
+ set[cacheSet].tag = cacheTag;
1506
+ set[cacheSet].dirty = false ;
1507
+ set[cacheSet].valid = true ;
1508
+ isGPUOwned[cacheSet] =
1509
+ pkt->req ->contextId () == 31 ? true :false ;
1510
+
1511
+ // remove from bypass tag when we fill into DRAMCache
1512
+ if (bypassTagEnable)
1513
+ {
1514
+ bypassTag->removeFromBypassTag (pkt->getAddr ());
1515
+ bypassTag->insertIntoBypassTag (evictAddr);
1516
+ }
1517
+
1484
1518
// ADARSH calcuating DRAM cache address here
1485
1519
addr = addr / dramCache_block_size;
1486
1520
addr = addr % dramCache_num_sets;
1487
- uint64_t cacheRow = floor (addr/15 );
1521
+ uint64_t cacheRow = floor (addr/15 );
1488
1522
// ADARSH packet count is 2; we need to number our sets in multiplies of 2
1489
1523
addr = addr * pktCount;
1490
1524
@@ -1649,6 +1683,52 @@ DRAMCacheCtrl::recvTimingReq (PacketPtr pkt)
1649
1683
return true ;
1650
1684
}
1651
1685
1686
+ if (bypassTagEnable)
1687
+ {
1688
+ if (bypassTag->isHit (pkt->getAddr ()))
1689
+ {
1690
+ if (pkt->isRead ())
1691
+ {
1692
+ DPRINTF (DRAMCache," Read bypass addr %d\n " , pkt->getAddr ());
1693
+ bypassTag->num_read_hits ++;
1694
+ if (pkt->req ->contextId ()!=31 )
1695
+ bypassTag->num_cpu_read_hits ++;
1696
+ allocateMissBuffer (pkt, curTick ()+PREDICTION_LATENCY, true );
1697
+ }
1698
+ else
1699
+ {
1700
+ DPRINTF (DRAMCache," Write bypass addr %d\n " , pkt->getAddr ());
1701
+ bypassTag->num_write_hits ++;
1702
+ if (pkt->req ->contextId ()!=31 )
1703
+ bypassTag->num_cpu_write_hits ++;
1704
+ access (pkt);
1705
+ RequestPtr req = new Request (pkt->getAddr (),
1706
+ dramCache_block_size, 0 , Request::wbMasterId);
1707
+ req->setContextId (pkt->req ->contextId ());
1708
+ PacketPtr clone_pkt = new Packet (req, MemCmd::WriteReq);
1709
+ clone_pkt->allocate ();
1710
+ allocateWriteBuffer (clone_pkt,curTick ()+PREDICTION_LATENCY);
1711
+ respond (pkt, frontendLatency);
1712
+ }
1713
+ return true ;
1714
+ }
1715
+ else
1716
+ {
1717
+ if (pkt->isRead ())
1718
+ {
1719
+ bypassTag->num_read_misses ++;
1720
+ if (pkt->req ->contextId ()!=31 )
1721
+ bypassTag->num_cpu_read_misses ++;
1722
+ }
1723
+ else
1724
+ {
1725
+ bypassTag->num_write_misses ++;
1726
+ if (pkt->req ->contextId ()!=31 )
1727
+ bypassTag->num_cpu_write_misses ++;
1728
+ }
1729
+ }
1730
+ }
1731
+
1652
1732
#ifdef PASS_PC
1653
1733
// perform prediction using cache address; lookup RubyPort::predictorTable
1654
1734
int cid = pkt->req ->contextId ();
@@ -1671,9 +1751,43 @@ DRAMCacheCtrl::recvTimingReq (PacketPtr pkt)
1671
1751
// UPDATE we modified this to 2 bursts including tag since we assume the
1672
1752
// tags also burst out using an odd burst size of slightly greater than 64B
1673
1753
1754
+ // if GPU state changed
1755
+ if (switched_gpu_running != CudaGPU::running)
1756
+ {
1757
+ if (CudaGPU::running && dirtyCleanBypassEnable)
1758
+ {
1759
+ inform (" GPU started, dirty/clean bypass for CPU req started" );
1760
+ }
1761
+ else
1762
+ {
1763
+ inform (" GPU stopped, dirty/clean bypass for CPU req stopped" );
1764
+ }
1765
+ }
1766
+
1767
+ // track GPU running state
1768
+ switched_gpu_running = CudaGPU::running;
1769
+
1674
1770
// check local buffers and do not accept if full
1675
1771
if (pkt->isRead ())
1676
1772
{
1773
+ unsigned int cacheBlock = pkt->getAddr ()/dramCache_block_size;
1774
+ unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1775
+ unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1776
+ // CPU request and GPU is running and dirtyCleanBypass is enabled
1777
+ // and the set is clean then bypass
1778
+ if (pkt->req ->contextId () != 31 && CudaGPU::running &&
1779
+ dirtyCleanBypassEnable)
1780
+ {
1781
+ if (!set[cacheSet].dirty )
1782
+ {
1783
+ allocateMissBuffer (pkt, PREDICTION_LATENCY, true );
1784
+ dramCache_dirty_clean_bypass++;
1785
+ return true ;
1786
+ }
1787
+ else if (cacheTag!=set[cacheSet].tag )
1788
+ dramCache_dirty_clean_bypass_miss++;
1789
+ }
1790
+
1677
1791
assert (size != 0 );
1678
1792
if (readQueueFull (DRAM_PKT_COUNT))
1679
1793
{
@@ -1832,26 +1946,6 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)
1832
1946
if (wasFull && !mq->isFull ())
1833
1947
clearBlocked ((BlockedCause) mq->index );
1834
1948
1835
- // update tags
1836
- unsigned int cacheBlock = pkt->getAddr ()/dramCache_block_size;
1837
- unsigned int cacheSet = cacheBlock % dramCache_num_sets;
1838
- unsigned int cacheTag = cacheBlock / dramCache_num_sets;
1839
-
1840
- Addr evictAddr = regenerateBlkAddr (cacheSet, set[cacheSet].tag );
1841
- DPRINTF (DRAMCache, " %s PAM Evicting addr %d in cacheSet %d dirty %d\n " ,
1842
- __func__, evictAddr ,cacheSet, set[cacheSet].dirty );
1843
-
1844
- // this block needs to be evicted
1845
- if (set[cacheSet].dirty )
1846
- doWriteBack (evictAddr, isGPUOwned[cacheSet]?31 :0 );
1847
-
1848
- // change the tag directory
1849
- set[cacheSet].tag = cacheTag;
1850
- set[cacheSet].dirty = false ;
1851
- set[cacheSet].valid = true ;
1852
- isGPUOwned[cacheSet] =
1853
- pkt->req ->contextId () == 31 ? true :false ;
1854
-
1855
1949
// add to fillQueue since we encountered a miss
1856
1950
// create a packet and add to fillQueue
1857
1951
// we can delete the packet as we never derefence it
@@ -1896,10 +1990,6 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)
1896
1990
if (!set[cacheSet].valid )
1897
1991
{
1898
1992
// cold miss - the set was not valid
1899
- set[cacheSet].tag = cacheTag;
1900
- set[cacheSet].dirty = false ;
1901
- set[cacheSet].valid = true ;
1902
- isGPUOwned[cacheSet] = pkt->req ->contextId () == 31 ? true :false ;
1903
1993
1904
1994
// add to fillQueue since we encountered a miss
1905
1995
// create a packet and add to fillQueue
@@ -1919,19 +2009,6 @@ DRAMCacheCtrl::recvTimingResp (PacketPtr pkt)
1919
2009
}
1920
2010
else if (set[cacheSet].tag != cacheTag)
1921
2011
{
1922
- Addr evictAddr = regenerateBlkAddr (cacheSet, set[cacheSet].tag );
1923
- DPRINTF (DRAMCache, " %s Evicting addr %d in cacheSet %d dirty %d\n " ,
1924
- __func__,evictAddr ,cacheSet, set[cacheSet].dirty );
1925
- // this block needs to be evicted
1926
- if (set[cacheSet].dirty )
1927
- doWriteBack (evictAddr, isGPUOwned[cacheSet]?31 :0 );
1928
-
1929
- // change the tag directory
1930
- set[cacheSet].tag = cacheTag;
1931
- set[cacheSet].dirty = false ;
1932
- set[cacheSet].valid = true ;
1933
- isGPUOwned[cacheSet] = pkt->req ->contextId () == 31 ? true :false ;
1934
-
1935
2012
// add to fillQueue since we encountered a miss
1936
2013
// create a packet and add to fillQueue
1937
2014
// we can delete the packet as we never derefence it
@@ -2256,11 +2333,60 @@ DRAMCacheCtrl::drain()
2256
2333
}
2257
2334
}
2258
2335
2336
+
2337
+ void
2338
+ DRAMCacheCtrl::LRUTagStore::regStats (string name)
2339
+ {
2340
+ using namespace Stats ;
2341
+
2342
+ num_read_hits
2343
+ .name (name + " .bypasstag_read_hits" )
2344
+ .desc (" num of read hits in tag store" );
2345
+
2346
+ num_read_misses
2347
+ .name (name + " .bypasstag_read_misses" )
2348
+ .desc (" num of read misses in tag store" );
2349
+
2350
+ num_cpu_read_hits
2351
+ .name (name + " .bypasstag_cpu_read_hits" )
2352
+ .desc (" num of cpu read hits in tag store" );
2353
+
2354
+ num_cpu_read_misses
2355
+ .name (name + " .bypasstag_cpu_read_misses" )
2356
+ .desc (" num of cpu read misses in tag store" );
2357
+
2358
+ num_cpu_write_hits
2359
+ .name (name + " .bypasstag_cpu_write_hits" )
2360
+ .desc (" num of cpu write hits in tag store" );
2361
+
2362
+ num_cpu_write_misses
2363
+ .name (name + " .bypasstag_cpu_write_misses" )
2364
+ .desc (" num of cpu read misses in tag store" );
2365
+
2366
+ hit_rate
2367
+ .name (name + " .bypasstag_hit_rate" )
2368
+ .desc (" hit rate of the tag store" );
2369
+
2370
+ hit_rate = (num_read_hits + num_write_hits) /
2371
+ (num_read_hits + num_write_hits + num_read_misses + num_write_misses);
2372
+
2373
+ cpu_hit_rate
2374
+ .name (name + " .bypasstag_cpu_hit_rate" )
2375
+ .desc (" cpu hit rate of the tag store" );
2376
+
2377
+ cpu_hit_rate = (num_cpu_read_hits + num_cpu_write_hits) /
2378
+ (num_cpu_read_hits + num_cpu_write_hits +
2379
+ num_cpu_read_misses + num_cpu_write_misses);
2380
+
2381
+ }
2382
+
2259
2383
void
2260
2384
DRAMCacheCtrl::regStats ()
2261
2385
{
2262
2386
using namespace Stats ;
2263
2387
DRAMCtrl::regStats ();
2388
+ if (bypassTagEnable)
2389
+ bypassTag->regStats (name ());
2264
2390
2265
2391
dramCache_read_hits
2266
2392
.name (name () + " .dramCache_read_hits" )
@@ -2574,6 +2700,14 @@ DRAMCacheCtrl::regStats ()
2574
2700
gpuWrBusLat
2575
2701
.name (name () + " .gpuWrBusLat" )
2576
2702
.desc (" ticks spent in databus transfers for GPU Requests for writes" );
2703
+
2704
+ dramCache_dirty_clean_bypass
2705
+ .name (name () + " .dramCache_dirty_clean_bypass" )
2706
+ .desc (" num times dirty clean mechanism did a bypass" );
2707
+
2708
+ dramCache_dirty_clean_bypass_miss
2709
+ .name (name () + " .dramCache_dirty_clean_bypass_miss" )
2710
+ .desc (" num of times read miss to a dirty line" );
2577
2711
}
2578
2712
2579
2713
BaseMasterPort &
0 commit comments