Skip to content

Commit c20558e

Browse files
committed
dont insert CPU lines into DRAMCache on return path when GPU is running
1 parent 0a58fea commit c20558e

File tree

2 files changed

+71
-57
lines changed

2 files changed

+71
-57
lines changed

gem5/src/mem/dramcache_ctrl.cc

Lines changed: 70 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,78 +1510,87 @@ DRAMCacheCtrl::addToFillQueue(PacketPtr pkt, unsigned int pktCount)
15101510
DPRINTF(DRAMCache, "%s PAM Evicting addr %d in cacheSet %d dirty %d\n",
15111511
__func__, evictAddr ,cacheSet, set[cacheSet].dirty);
15121512

1513-
// this block needs to be evicted
1514-
if (set[cacheSet].dirty && set[cacheSet].valid)
1513+
if (CudaGPU::running && (pkt->req->contextId()!=31) &&
1514+
(dirtyCleanBypassEnable||bloomFilterEnable))
15151515
{
1516-
if (bloomFilterEnable)
1517-
cbf->remove(evictAddr);
1518-
doWriteBack(evictAddr, isGPUOwned[cacheSet]?31:0);
1516+
// we are not inserting into DRAMCache as we are in the bypass phase
1517+
dramCache_cpu_not_inserted++;
15191518
}
1520-
1521-
bool wasGPU = isGPUOwned[cacheSet];
1522-
// change the tag directory
1523-
set[cacheSet].tag = cacheTag;
1524-
set[cacheSet].dirty = false;
1525-
set[cacheSet].valid = true;
1526-
isGPUOwned[cacheSet] =
1527-
pkt->req->contextId() == 31 ? true:false;
1528-
1529-
// remove from bypass tag when we fill into DRAMCache
1530-
// only CPU lines are allowed into the eviction bypasstag table
1531-
if (bypassTagEnable && !wasGPU)
1519+
else
15321520
{
1533-
bypassTag->removeFromBypassTag(pkt->getAddr());
1534-
bypassTag->insertIntoBypassTag(evictAddr);
1535-
}
1521+
// this block needs to be evicted
1522+
if (set[cacheSet].dirty && set[cacheSet].valid)
1523+
{
1524+
if (bloomFilterEnable)
1525+
cbf->remove(evictAddr);
1526+
doWriteBack(evictAddr, isGPUOwned[cacheSet]?31:0);
1527+
}
15361528

1537-
// ADARSH calcuating DRAM cache address here
1538-
addr = addr / dramCache_block_size;
1539-
addr = addr % dramCache_num_sets;
1540-
uint64_t cacheRow = floor(addr/15);
1541-
// ADARSH packet count is 2; we need to number our sets in multiplies of 2
1542-
addr = addr * pktCount;
1529+
bool wasGPU = isGPUOwned[cacheSet];
1530+
// change the tag directory
1531+
set[cacheSet].tag = cacheTag;
1532+
set[cacheSet].dirty = false;
1533+
set[cacheSet].valid = true;
1534+
isGPUOwned[cacheSet] =
1535+
pkt->req->contextId() == 31 ? true:false;
1536+
1537+
// remove from bypass tag when we fill into DRAMCache
1538+
// only CPU lines are allowed into the eviction bypasstag table
1539+
if (bypassTagEnable && !wasGPU)
1540+
{
1541+
bypassTag->removeFromBypassTag(pkt->getAddr());
1542+
bypassTag->insertIntoBypassTag(evictAddr);
1543+
}
15431544

1544-
// account for tags for each 15 sets (i.e each row)
1545-
addr += (cacheRow* 2);
1545+
// ADARSH calcuating DRAM cache address here
1546+
addr = addr / dramCache_block_size;
1547+
addr = addr % dramCache_num_sets;
1548+
uint64_t cacheRow = floor(addr/15);
1549+
// ADARSH packet count is 2; we need to number our sets in multiplies of 2
1550+
addr = addr * pktCount;
15461551

1547-
if (fillQueueFull(pktCount))
1548-
{
1549-
// fillQueue is full we just drop the requests since we don't want to
1550-
// add complications by doing a retry etc.
1551-
// no correctness issues - fillQueue is just to model DRAM latencies
1552-
warn("DRAMCache fillQueue full, dropping req addr %d", pkt->getAddr());
1553-
return;
1554-
}
1552+
// account for tags for each 15 sets (i.e each row)
1553+
addr += (cacheRow* 2);
15551554

1556-
for (int cnt = 0; cnt < pktCount; ++cnt) {
1557-
dramCache_fillBursts++;
1555+
if (fillQueueFull(pktCount))
1556+
{
1557+
// fillQueue is full we just drop the requests since we don't want to
1558+
// add complications by doing a retry etc.
1559+
// no correctness issues - fillQueue is just to model DRAM latencies
1560+
warn("DRAMCache fillQueue full, dropping req addr %d", pkt->getAddr());
1561+
return;
1562+
}
15581563

1559-
// see if we can merge with an existing item in the fill
1560-
// queue and keep track of whether we have merged or not
1561-
bool merged = isInFillQueue.find(make_pair(pkt->getAddr(),addr)) !=
1562-
isInFillQueue.end();
1564+
for (int cnt = 0; cnt < pktCount; ++cnt) {
1565+
dramCache_fillBursts++;
15631566

1564-
// if the item was not merged we need to enqueue it
1565-
if (!merged)
1566-
{
1567-
assert(pkt->req->hasContextId());
1568-
DRAMPacket* dram_pkt = decodeAddr(pkt, addr, burstSize, false);
1569-
dram_pkt->requestAddr = pkt->getAddr();
1570-
dram_pkt->isFill = true;
1567+
// see if we can merge with an existing item in the fill
1568+
// queue and keep track of whether we have merged or not
1569+
bool merged = isInFillQueue.find(make_pair(pkt->getAddr(),addr)) !=
1570+
isInFillQueue.end();
15711571

1572-
assert(fillQueue.size() < fillBufferSize);
1572+
// if the item was not merged we need to enqueue it
1573+
if (!merged)
1574+
{
1575+
assert(pkt->req->hasContextId());
1576+
DRAMPacket* dram_pkt = decodeAddr(pkt, addr, burstSize, false);
1577+
dram_pkt->requestAddr = pkt->getAddr();
1578+
dram_pkt->isFill = true;
15731579

1574-
DPRINTF(DRAMCache, "Adding to fill queue addr:%d\n", pkt->getAddr());
1580+
assert(fillQueue.size() < fillBufferSize);
15751581

1576-
fillQueue.push_back(dram_pkt);
1577-
isInFillQueue.insert(make_pair(pkt->getAddr(),addr));
1582+
DPRINTF(DRAMCache, "Adding to fill queue addr:%d\n", pkt->getAddr());
15781583

1579-
dramCache_avgFillQLen = fillQueue.size();
1584+
fillQueue.push_back(dram_pkt);
1585+
isInFillQueue.insert(make_pair(pkt->getAddr(),addr));
15801586

1581-
assert(fillQueue.size() == isInFillQueue.size());
1582-
}
1587+
dramCache_avgFillQLen = fillQueue.size();
15831588

1584-
addr = addr + 1;
1589+
assert(fillQueue.size() == isInFillQueue.size());
1590+
}
1591+
1592+
addr = addr + 1;
1593+
}
15851594
}
15861595

15871596
// If we are not already scheduled to get a request out of the
@@ -2813,6 +2822,10 @@ DRAMCacheCtrl::regStats ()
28132822
dramCache_bloom_filter_mispred_miss
28142823
.name(name() + ".dramCache_bloom_filter_mispred_miss")
28152824
.desc("num req mispred by bloom filter which were miss");
2825+
2826+
dramCache_cpu_not_inserted
2827+
.name(name() + ".dramCache_cpu_not_inserted")
2828+
.desc("num times cpu line was not inserted into cache");
28162829
}
28172830

28182831
BaseMasterPort &

gem5/src/mem/dramcache_ctrl.hh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ class DRAMCacheCtrl : public DRAMCtrl
290290
Stats::Scalar dramCache_bloom_filter_bypass;
291291
Stats::Scalar dramCache_bloom_filter_mispred_hit;
292292
Stats::Scalar dramCache_bloom_filter_mispred_miss;
293+
Stats::Scalar dramCache_cpu_not_inserted;
293294

294295
Random randomPredictor;
295296

0 commit comments

Comments
 (0)