@@ -1510,78 +1510,87 @@ DRAMCacheCtrl::addToFillQueue(PacketPtr pkt, unsigned int pktCount)
1510
1510
DPRINTF (DRAMCache, " %s PAM Evicting addr %d in cacheSet %d dirty %d\n " ,
1511
1511
__func__, evictAddr ,cacheSet, set[cacheSet].dirty );
1512
1512
1513
- // this block needs to be evicted
1514
- if (set[cacheSet]. dirty && set[cacheSet]. valid )
1513
+ if (CudaGPU::running && (pkt-> req -> contextId ()!= 31 ) &&
1514
+ (dirtyCleanBypassEnable||bloomFilterEnable) )
1515
1515
{
1516
- if (bloomFilterEnable)
1517
- cbf->remove (evictAddr);
1518
- doWriteBack (evictAddr, isGPUOwned[cacheSet]?31 :0 );
1516
+ // we are not inserting into DRAMCache as we are in the bypass phase
1517
+ dramCache_cpu_not_inserted++;
1519
1518
}
1520
-
1521
- bool wasGPU = isGPUOwned[cacheSet];
1522
- // change the tag directory
1523
- set[cacheSet].tag = cacheTag;
1524
- set[cacheSet].dirty = false ;
1525
- set[cacheSet].valid = true ;
1526
- isGPUOwned[cacheSet] =
1527
- pkt->req ->contextId () == 31 ? true :false ;
1528
-
1529
- // remove from bypass tag when we fill into DRAMCache
1530
- // only CPU lines are allowed into the eviction bypasstag table
1531
- if (bypassTagEnable && !wasGPU)
1519
+ else
1532
1520
{
1533
- bypassTag->removeFromBypassTag (pkt->getAddr ());
1534
- bypassTag->insertIntoBypassTag (evictAddr);
1535
- }
1521
+ // this block needs to be evicted
1522
+ if (set[cacheSet].dirty && set[cacheSet].valid )
1523
+ {
1524
+ if (bloomFilterEnable)
1525
+ cbf->remove (evictAddr);
1526
+ doWriteBack (evictAddr, isGPUOwned[cacheSet]?31 :0 );
1527
+ }
1536
1528
1537
- // ADARSH calcuating DRAM cache address here
1538
- addr = addr / dramCache_block_size;
1539
- addr = addr % dramCache_num_sets;
1540
- uint64_t cacheRow = floor (addr/15 );
1541
- // ADARSH packet count is 2; we need to number our sets in multiplies of 2
1542
- addr = addr * pktCount;
1529
+ bool wasGPU = isGPUOwned[cacheSet];
1530
+ // change the tag directory
1531
+ set[cacheSet].tag = cacheTag;
1532
+ set[cacheSet].dirty = false ;
1533
+ set[cacheSet].valid = true ;
1534
+ isGPUOwned[cacheSet] =
1535
+ pkt->req ->contextId () == 31 ? true :false ;
1536
+
1537
+ // remove from bypass tag when we fill into DRAMCache
1538
+ // only CPU lines are allowed into the eviction bypasstag table
1539
+ if (bypassTagEnable && !wasGPU)
1540
+ {
1541
+ bypassTag->removeFromBypassTag (pkt->getAddr ());
1542
+ bypassTag->insertIntoBypassTag (evictAddr);
1543
+ }
1543
1544
1544
- // account for tags for each 15 sets (i.e each row)
1545
- addr += (cacheRow* 2 );
1545
+ // ADARSH calcuating DRAM cache address here
1546
+ addr = addr / dramCache_block_size;
1547
+ addr = addr % dramCache_num_sets;
1548
+ uint64_t cacheRow = floor (addr/15 );
1549
+ // ADARSH packet count is 2; we need to number our sets in multiplies of 2
1550
+ addr = addr * pktCount;
1546
1551
1547
- if (fillQueueFull (pktCount))
1548
- {
1549
- // fillQueue is full we just drop the requests since we don't want to
1550
- // add complications by doing a retry etc.
1551
- // no correctness issues - fillQueue is just to model DRAM latencies
1552
- warn (" DRAMCache fillQueue full, dropping req addr %d" , pkt->getAddr ());
1553
- return ;
1554
- }
1552
+ // account for tags for each 15 sets (i.e each row)
1553
+ addr += (cacheRow* 2 );
1555
1554
1556
- for (int cnt = 0 ; cnt < pktCount; ++cnt) {
1557
- dramCache_fillBursts++;
1555
+ if (fillQueueFull (pktCount))
1556
+ {
1557
+ // fillQueue is full we just drop the requests since we don't want to
1558
+ // add complications by doing a retry etc.
1559
+ // no correctness issues - fillQueue is just to model DRAM latencies
1560
+ warn (" DRAMCache fillQueue full, dropping req addr %d" , pkt->getAddr ());
1561
+ return ;
1562
+ }
1558
1563
1559
- // see if we can merge with an existing item in the fill
1560
- // queue and keep track of whether we have merged or not
1561
- bool merged = isInFillQueue.find (make_pair (pkt->getAddr (),addr)) !=
1562
- isInFillQueue.end ();
1564
+ for (int cnt = 0 ; cnt < pktCount; ++cnt) {
1565
+ dramCache_fillBursts++;
1563
1566
1564
- // if the item was not merged we need to enqueue it
1565
- if (!merged)
1566
- {
1567
- assert (pkt->req ->hasContextId ());
1568
- DRAMPacket* dram_pkt = decodeAddr (pkt, addr, burstSize, false );
1569
- dram_pkt->requestAddr = pkt->getAddr ();
1570
- dram_pkt->isFill = true ;
1567
+ // see if we can merge with an existing item in the fill
1568
+ // queue and keep track of whether we have merged or not
1569
+ bool merged = isInFillQueue.find (make_pair (pkt->getAddr (),addr)) !=
1570
+ isInFillQueue.end ();
1571
1571
1572
- assert (fillQueue.size () < fillBufferSize);
1572
+ // if the item was not merged we need to enqueue it
1573
+ if (!merged)
1574
+ {
1575
+ assert (pkt->req ->hasContextId ());
1576
+ DRAMPacket* dram_pkt = decodeAddr (pkt, addr, burstSize, false );
1577
+ dram_pkt->requestAddr = pkt->getAddr ();
1578
+ dram_pkt->isFill = true ;
1573
1579
1574
- DPRINTF (DRAMCache, " Adding to fill queue addr:%d \n " , pkt-> getAddr () );
1580
+ assert (fillQueue. size () < fillBufferSize );
1575
1581
1576
- fillQueue.push_back (dram_pkt);
1577
- isInFillQueue.insert (make_pair (pkt->getAddr (),addr));
1582
+ DPRINTF (DRAMCache, " Adding to fill queue addr:%d\n " , pkt->getAddr ());
1578
1583
1579
- dramCache_avgFillQLen = fillQueue.size ();
1584
+ fillQueue.push_back (dram_pkt);
1585
+ isInFillQueue.insert (make_pair (pkt->getAddr (),addr));
1580
1586
1581
- assert (fillQueue.size () == isInFillQueue.size ());
1582
- }
1587
+ dramCache_avgFillQLen = fillQueue.size ();
1583
1588
1584
- addr = addr + 1 ;
1589
+ assert (fillQueue.size () == isInFillQueue.size ());
1590
+ }
1591
+
1592
+ addr = addr + 1 ;
1593
+ }
1585
1594
}
1586
1595
1587
1596
// If we are not already scheduled to get a request out of the
@@ -2813,6 +2822,10 @@ DRAMCacheCtrl::regStats ()
2813
2822
dramCache_bloom_filter_mispred_miss
2814
2823
.name (name () + " .dramCache_bloom_filter_mispred_miss" )
2815
2824
.desc (" num req mispred by bloom filter which were miss" );
2825
+
2826
+ dramCache_cpu_not_inserted
2827
+ .name (name () + " .dramCache_cpu_not_inserted" )
2828
+ .desc (" num times cpu line was not inserted into cache" );
2816
2829
}
2817
2830
2818
2831
BaseMasterPort &
0 commit comments