predicate predictor calls added; LSQ logic was changed to keep stores longer so as to allow greater chance for LSQ forwarding; TAGE-SC-L added; micro op cache added; issue with widths of different stages fixed; bug fix related to looking for FMA unit before looking for floatALU/ floatMul/ floatVectorALU, floatVectorMul; causalityTool now works with java-11, ubuntu 20.04, pin-98332
This commit is contained in:
parent
b834fdbefe
commit
776bb9cbfe
|
@ -24,7 +24,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
using namespace std;
|
||||
#include <fcntl.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
|
@ -688,7 +688,7 @@ VOID Instruction(INS ins, VOID *v) {
|
|||
|
||||
UINT32 memOperands = INS_MemoryOperandCount(ins);
|
||||
|
||||
if (INS_IsBranchOrCall(ins))//INS_IsIndirectBranchOrCall(ins))
|
||||
if (INS_IsControlFlow(ins))//INS_IsBranchOrCall(ins))//INS_IsIndirectBranchOrCall(ins))
|
||||
{
|
||||
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR) BrnFun, IARG_THREAD_ID,
|
||||
IARG_BRANCH_TARGET_ADDR, IARG_BRANCH_TAKEN, IARG_INST_PTR,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
PIN_KIT ?=/home/rajshekar/softwares/pin-97554/
|
||||
PIN_KIT ?=/home/rajshekar/softwares/pin-98332/
|
||||
CXX=$(shell make PIN_ROOT=$(PIN_KIT) VAR=CXX -f pin_makefile print_var)
|
||||
LINKER=$(shell make PIN_ROOT=$(PIN_KIT) VAR=LINKER -f pin_makefile print_var)
|
||||
TOOL_CXXFLAGS=$(shell make PIN_ROOT=$(PIN_KIT) VAR=TOOL_CXXFLAGS -f pin_makefile print_var)
|
||||
|
@ -50,6 +50,8 @@ $(BINDIR)/causalityTool.$(LIB_EXTENSION): $(BINDIR)/causalityTool.$(OBJ_EXTENSIO
|
|||
|
||||
$(BINDIR)/causalityTool.$(OBJ_EXTENSION): causalityTool.cpp $(COMMDIR)/IPCBase.h $(COMMDIR)/shm/shmem.h $(COMMDIR)/filePacket/filePacket.h $(COMMDIR)/shm/shmem.cc
|
||||
$(CXX) $(TOOL_CXXFLAGS) $(COMM_INCLUDE) -c causalityTool.cpp ../../simulator/emulatorinterface/communication/shm/shmem.cc
|
||||
mkdir $(JNIBINDIR)
|
||||
mkdir $(BINDIR)
|
||||
mv causalityTool.$(OBJ_EXTENSION) $(BINDIR)/causalityTool.$(OBJ_EXTENSION)
|
||||
mv shmem.$(OBJ_EXTENSION) $(BINDIR)/shmem.$(OBJ_EXTENSION)
|
||||
|
||||
|
@ -58,16 +60,21 @@ $(BINDIR)/shmem.$(OBJ_EXTENSION): $(COMMDIR)/IPCBase.h $(COMMDIR)/shm/shmem.h $
|
|||
|
||||
|
||||
################################ JNI stuff comes here ############################################
|
||||
JNIPACKAGE = emulatorinterface.communication.shm.SharedMem
|
||||
JNINCLUDE =-I/usr/lib/jvm/java-8-openjdk-amd64/include/linux -I/usr/lib/jvm/java-8-openjdk-amd64/include
|
||||
#JNIPACKAGE = emulatorinterface.communication.shm.SharedMem #use this for java-8
|
||||
JNIPACKAGE = ../../simulator/emulatorinterface/communication/shm/SharedMem.java #use this for java-11
|
||||
#JNINCLUDE =-I/usr/lib/jvm/java-8-openjdk-amd64/include/linux -I/usr/lib/jvm/java-8-openjdk-amd64/include #use this for java-8
|
||||
JNINCLUDE =-I/usr/lib/jvm/java-11-openjdk-amd64/include/linux -I/usr/lib/jvm/java-11-openjdk-amd64/include #use this for java-11
|
||||
JNILinkingFlags = -shared -Wall $(POSITION_INDEPENDENCE)
|
||||
JAVAH = javah -jni
|
||||
#JAVAH = javah -jni #use this for java-8
|
||||
JAVAH = javac #use this for java-11
|
||||
|
||||
$(JNIBINDIR)/libshmlib.$(LIB_EXTENSION): $(JNIBINDIR)/SharedMem.h $(COMMDIR)/shm/JNIShm.c $(COMMDIR)/common.h
|
||||
$(shell $(JNICOMMAND))
|
||||
|
||||
$(JNIBINDIR)/SharedMem.h: $(TOPBINDIR)/emulatorinterface/communication/shm/SharedMem.class
|
||||
$(JAVAH) -classpath $(TOPBINDIR) -o $(JNIBINDIR)/SharedMem.h $(JNIPACKAGE)
|
||||
#$(JAVAH) -classpath $(TOPBINDIR) -o $(JNIBINDIR)/SharedMem.h $(JNIPACKAGE) #use this for java-8
|
||||
$(JAVAH) -classpath $(TOPBINDIR) -h $(JNIBINDIR) $(JNIPACKAGE) #use this line and the next for java-11
|
||||
mv $(JNIBINDIR)/emulatorinterface_communication_shm_SharedMem.h $(JNIBINDIR)/SharedMem.h
|
||||
|
||||
clean:
|
||||
rm -rf $(BINDIR)/* $(JNIBINDIR)/*
|
||||
rm -rf $(BINDIR) $(JNIBINDIR)
|
||||
|
|
|
@ -5,8 +5,9 @@ public class BranchPredictorConfig {
|
|||
public int BHRsize;
|
||||
public int saturating_bits;
|
||||
public BP predictorMode;
|
||||
public String TAGESCLLibDirectory;
|
||||
|
||||
public static enum BP {
|
||||
NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GShare, GAg, GAp, PAg, PAp,TAGE
|
||||
NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GShare, GAg, GAp, PAg, PAp,TAGE, TAGE_SC_L,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,9 @@ public class CoreConfig
|
|||
public int STLBAccessPorts;
|
||||
public int STLBPortOccupancy;
|
||||
|
||||
public int NoOfMicroOpCacheEntries;
|
||||
public int DecodeWidth;
|
||||
public int RenameWidth;
|
||||
public int IssueWidth;
|
||||
public int RetireWidth;
|
||||
public int ROBSize;
|
||||
|
|
|
@ -481,6 +481,9 @@ public class XMLParser
|
|||
core.STLBPortOccupancy = Integer.parseInt(getImmediateString("PortOccupancy", sTLBElmnt));
|
||||
core.sTLBPower = getEnergyConfig(sTLBElmnt);
|
||||
|
||||
Element microOpCacheElmnt = (Element)(coreElmnt.getElementsByTagName("MicroOpCache")).item(0);
|
||||
core.NoOfMicroOpCacheEntries = Integer.parseInt(getImmediateString("NumberOfMicroOps", microOpCacheElmnt));
|
||||
|
||||
Element decodeElmnt = (Element)(coreElmnt.getElementsByTagName("Decode")).item(0);
|
||||
core.DecodeWidth = Integer.parseInt(getImmediateString("Width", decodeElmnt));
|
||||
core.decodePower = getEnergyConfig(decodeElmnt);
|
||||
|
@ -499,6 +502,7 @@ public class XMLParser
|
|||
core.resultsBroadcastBusPower = getEnergyConfig(resultsBroadcastBusElmnt);
|
||||
|
||||
Element renameElmnt = (Element)(coreElmnt.getElementsByTagName("Rename")).item(0);
|
||||
core.RenameWidth = Integer.parseInt(getImmediateString("Width", renameElmnt));
|
||||
|
||||
Element ratElmnt = (Element)(renameElmnt.getElementsByTagName("RAT")).item(0);
|
||||
core.intRATPower = getEnergyConfig((Element)ratElmnt.getElementsByTagName("Integer").item(0));
|
||||
|
@ -1066,9 +1070,14 @@ public class XMLParser
|
|||
{
|
||||
branchPredictor.predictorMode = BP.TAGE;
|
||||
}
|
||||
else if(tempStr.equalsIgnoreCase("TAGE-SC-L"))
|
||||
{
|
||||
branchPredictor.predictorMode = BP.TAGE_SC_L;
|
||||
}
|
||||
branchPredictor.PCBits = Integer.parseInt(getImmediateString("PCBits", predictorElmnt));
|
||||
branchPredictor.BHRsize = Integer.parseInt(getImmediateString("BHRsize", predictorElmnt));
|
||||
branchPredictor.saturating_bits = Integer.parseInt(getImmediateString("SaturatingBits", predictorElmnt));
|
||||
branchPredictor.TAGESCLLibDirectory = getImmediateString("TAGESCLLibDirectory", predictorElmnt);
|
||||
}
|
||||
|
||||
private static boolean setDirectoryCoherent(String immediateString) {
|
||||
|
|
|
@ -47,12 +47,12 @@ TDP = 15W
|
|||
<!-- If store packets in a file option is set to true, this parameter indicates the basename for the trace files -->
|
||||
<!--One trace file is maintained for each store. The name of trace file for core n is basename_n.gz-->
|
||||
<!--We do not allow overwriting of trace files. So if a tracefile with same name is pre-existing, kindly rename it-->
|
||||
<BasenameForTraceFiles>/home/rajshekar/tmp/gcc_trace</BasenameForTraceFiles>
|
||||
<BasenameForTraceFiles>/home/rajshekar/projects/tejas/tests/test1_trace</BasenameForTraceFiles>
|
||||
|
||||
<PinTool>/home/rajshekar/softwares/pin-97554/</PinTool>
|
||||
<PinTool>/home/rajshekar/softwares/pin-98332/</PinTool>
|
||||
<PinInstrumentor>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-pin/causalityTool.so</PinInstrumentor>
|
||||
<QemuTool>TODO/home/prathmesh/workspace/qemu/x86_64-linux-user/qemu-x86_64 /home/prathmesh/tmp/testQemu.o</QemuTool>
|
||||
<ShmLibDirectory>/home/rajshekar/resources/tejas_configs/</ShmLibDirectory>
|
||||
<ShmLibDirectory>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-comm/</ShmLibDirectory>
|
||||
<GetBenchmarkPIDScript>/home/rajshekar_resources/tejas_configs/getBenchmarkPID.sh</GetBenchmarkPIDScript>
|
||||
<KillEmulatorScript>/home/rajshekar/resources/tejas_configs/killAllDescendents.sh</KillEmulatorScript>
|
||||
</Emulator>
|
||||
|
@ -112,11 +112,12 @@ TDP = 15W
|
|||
<PipelineType>outOfOrder</PipelineType> <!--inOrder,outOfOrder(set issue width for multi-issue in-order)-->
|
||||
|
||||
<BranchPredictor>
|
||||
<Predictor_Mode>TAGE</Predictor_Mode> <!-- Legal Values are NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GAg, GAp, GShare, PAg, PAp, TAGE -->
|
||||
<Predictor_Mode>TAGE</Predictor_Mode> <!-- Legal Values are NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GAg, GAp, GShare, PAg, PAp, TAGE, TAGE-SC-L -->
|
||||
<PCBits>8</PCBits>
|
||||
<BHRsize>16</BHRsize>
|
||||
<BranchMispredPenalty>17</BranchMispredPenalty> <!--Branch misprediction penalty--><!-- https://www.7-cpu.com/cpu/Skylake.html -->
|
||||
<SaturatingBits>2</SaturatingBits>
|
||||
<TAGESCLLibDirectory>/home/rajshekar/projects/tejas/workspace/Tejas/src/simulator/pipeline/branchpredictor/TAGESCL/</TAGESCLLibDirectory>
|
||||
<LeakageEnergy>0.0178</LeakageEnergy>
|
||||
<DynamicEnergy>0.0962</DynamicEnergy>
|
||||
</BranchPredictor>
|
||||
|
@ -165,6 +166,10 @@ TDP = 15W
|
|||
<DynamicEnergy>0.06792852941</DynamicEnergy> <!-- TODO this number isn't right -->
|
||||
</STLB>
|
||||
|
||||
<MicroOpCache>
|
||||
<NumberOfMicroOps>2304</NumberOfMicroOps>
|
||||
</MicroOpCache>
|
||||
|
||||
<Decode> <!--Instruction decode-->
|
||||
<Width>6</Width>
|
||||
<LeakageEnergy>0.0598</LeakageEnergy>
|
||||
|
@ -172,6 +177,7 @@ TDP = 15W
|
|||
</Decode>
|
||||
|
||||
<Rename>
|
||||
<Width>6</Width>
|
||||
<RAT>
|
||||
<Integer>
|
||||
<LeakageEnergy>0.0045</LeakageEnergy>
|
||||
|
|
|
@ -207,6 +207,10 @@ public class Core extends SimulationElement{
|
|||
return coreConfig.DecodeWidth;
|
||||
}
|
||||
|
||||
public int getRenameWidth() {
|
||||
return coreConfig.RenameWidth;
|
||||
}
|
||||
|
||||
public int getVectorRegisterFileSize() {
|
||||
return coreConfig.VectorRegFileSize;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import memorysystem.nuca.NucaCache;
|
|||
import memorysystem.nuca.NucaCache.NucaType;
|
||||
import net.NocInterface;
|
||||
import net.Router;
|
||||
import pipeline.outoforder.OutOrderExecutionEngine;
|
||||
import config.CoreConfig;
|
||||
import config.EmulatorConfig;
|
||||
import config.EnergyConfig;
|
||||
|
@ -33,6 +34,7 @@ import emulatorinterface.translator.qemuTranslationCache.TranslatedInstructionCa
|
|||
|
||||
import dram.MainMemoryDRAMController;
|
||||
import config.MainMemoryConfig;
|
||||
import config.PipelineType;
|
||||
|
||||
public class Statistics {
|
||||
|
||||
|
@ -199,12 +201,22 @@ public class Statistics {
|
|||
outputFileWriter.write("time taken\t=\t" + formatDouble((double)coreCyclesTaken[i]/GlobalClock.effectiveGlobalClockFrequency) + " microseconds\n");
|
||||
outputFileWriter.write("\n");
|
||||
|
||||
if(cores[i].getCoreConfig().pipelineType == PipelineType.outOfOrder)
|
||||
{
|
||||
outputFileWriter.write("number of micro-op cache accesses = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numSearches + "\n");
|
||||
outputFileWriter.write("micro-op cache hit rate = " + formatDouble((double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numHits/(double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numSearches) + "\n");
|
||||
outputFileWriter.write("\n");
|
||||
}
|
||||
|
||||
outputFileWriter.write("number of branches\t=\t" + cores[i].getExecEngine().getNumberOfBranches() + "\n");
|
||||
outputFileWriter.write("number of mispredicted branches\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedBranches() + "\n");
|
||||
outputFileWriter.write("branch predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedBranches()/(double)cores[i].getExecEngine().getNumberOfBranches())*100.0)) + " %\n");
|
||||
outputFileWriter.write("number of jumps\t=\t" + cores[i].getExecEngine().getNumberOfJumps() + "\n");
|
||||
outputFileWriter.write("number of mispredicted jump targets\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedTargets() + "\n");
|
||||
outputFileWriter.write("target predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedTargets()/(double)cores[i].getExecEngine().getNumberOfJumps())*100.0)) + " %\n");
|
||||
outputFileWriter.write("number of predicate instructions\t=\t" + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateCount + "\n");
|
||||
outputFileWriter.write("number of mispredicted predicate instructions\t=\t" + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateMispredCount + "\n");
|
||||
outputFileWriter.write("predicate predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateMispredCount/(double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateCount)*100.0)) + " %\n");
|
||||
outputFileWriter.write("\n");
|
||||
|
||||
outputFileWriter.write("predictor type = " + coreConfig.branchPredictor.predictorMode + "\n");
|
||||
|
@ -213,6 +225,14 @@ public class Statistics {
|
|||
outputFileWriter.write("Saturating bits = " + coreConfig.branchPredictor.saturating_bits + "\n");
|
||||
outputFileWriter.write("\n");
|
||||
|
||||
outputFileWriter.write("\nIW Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall1Count());
|
||||
outputFileWriter.write("\nrename stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall2Count());
|
||||
outputFileWriter.write("\nLSQ Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall3Count());
|
||||
outputFileWriter.write("\nROB Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall4Count());
|
||||
outputFileWriter.write("\nMispred stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall5Count());
|
||||
outputFileWriter.write("\nSerialization instruction stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall6Count());
|
||||
outputFileWriter.write("\n");
|
||||
|
||||
}
|
||||
outputFileWriter.write("\n");
|
||||
}
|
||||
|
|
|
@ -301,6 +301,13 @@ public class LSQ extends SimulationElement
|
|||
}
|
||||
|
||||
public boolean isFull(boolean isLoad)
|
||||
{
|
||||
if(privIsFull(isLoad))
|
||||
freeOneEntry(isLoad);
|
||||
return privIsFull(isLoad);
|
||||
}
|
||||
|
||||
private boolean privIsFull(boolean isLoad)
|
||||
{
|
||||
if(isLoad)
|
||||
{
|
||||
|
@ -495,52 +502,10 @@ committed
|
|||
LSQEntry tmpEntry = lsqueue[i];
|
||||
|
||||
// if it is a store, send the request to the cache
|
||||
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE)
|
||||
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE
|
||||
|| tmpEntry.getType() == LSQEntryType.LOAD && tmpEntry.isForwarded())
|
||||
{
|
||||
if(tmpEntry.isValid() == false)
|
||||
{
|
||||
misc.Error.showErrorAndExit("store not ready to be committed");
|
||||
}
|
||||
|
||||
boolean requestIssued =
|
||||
containingMemSys.issueRequestToL1Cache(RequestType.Cache_Write,
|
||||
tmpEntry.getAddr());
|
||||
|
||||
if(requestIssued == false)
|
||||
{
|
||||
event.addEventTime(1);
|
||||
event.getEventQ().addEvent(event);
|
||||
break; //removals must be in-order : if u can't commit the operation at the head, u can't commit the ones that follow it
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if(head == tail)
|
||||
{
|
||||
head = tail = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this.head = this.incrementQ(this.head);
|
||||
}
|
||||
this.curNumStoresInQ--;
|
||||
tmpEntry.setRemoved(true);
|
||||
}
|
||||
}
|
||||
|
||||
//If it is a LOAD which has received its value
|
||||
else if (tmpEntry.isForwarded())
|
||||
{
|
||||
if(head == tail)
|
||||
{
|
||||
head = tail = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this.head = this.incrementQ(this.head);
|
||||
}
|
||||
this.curNumLoadsInQ--;
|
||||
tmpEntry.setRemoved(true);
|
||||
tmpEntry.setCanBeRemoved(true);
|
||||
}
|
||||
|
||||
//If it is a LOAD which has not yet received its value
|
||||
|
@ -559,6 +524,82 @@ committed
|
|||
//incrementNumAccesses(1);
|
||||
}
|
||||
|
||||
public void freeOneEntry(boolean isLoadToBeRemoved)
|
||||
{
|
||||
boolean removedEnough = false;
|
||||
|
||||
while(removedEnough == false)
|
||||
{
|
||||
LSQEntry tmpEntry = lsqueue[head];
|
||||
|
||||
if(tmpEntry.isCanBeRemoved() == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// if it is a store, send the request to the cache
|
||||
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE)
|
||||
{
|
||||
if(tmpEntry.isValid() == false)
|
||||
{
|
||||
misc.Error.showErrorAndExit("store not ready to be committed");
|
||||
}
|
||||
|
||||
boolean requestIssued =
|
||||
containingMemSys.issueRequestToL1Cache(RequestType.Cache_Write,
|
||||
tmpEntry.getAddr());
|
||||
|
||||
if(requestIssued == false)
|
||||
{
|
||||
return; //removals must be in-order : if u can't commit the operation at the head, u can't commit the ones that follow it
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if(head == tail)
|
||||
{
|
||||
head = tail = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this.head = this.incrementQ(this.head);
|
||||
}
|
||||
this.curNumStoresInQ--;
|
||||
tmpEntry.setRemoved(true);
|
||||
|
||||
if(isLoadToBeRemoved == false)
|
||||
removedEnough = true;
|
||||
}
|
||||
}
|
||||
|
||||
//If it is a LOAD which has received its value
|
||||
else if (tmpEntry.isForwarded())
|
||||
{
|
||||
if(head == tail)
|
||||
{
|
||||
head = tail = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this.head = this.incrementQ(this.head);
|
||||
}
|
||||
this.curNumLoadsInQ--;
|
||||
tmpEntry.setRemoved(true);
|
||||
|
||||
if(isLoadToBeRemoved == true)
|
||||
removedEnough = true;
|
||||
}
|
||||
|
||||
//If it is a LOAD which has not yet received its value
|
||||
else
|
||||
{
|
||||
System.err.println("Error in LSQ " +this.containingMemSys.coreID+ " : ROB sent commit for a load which has not received its value");
|
||||
misc.Error.showErrorAndExit(tmpEntry.getIndexInQ() + " : load : " + tmpEntry.getAddr());
|
||||
}
|
||||
//incrementNumAccesses(1);
|
||||
}
|
||||
}
|
||||
|
||||
void incrementNumAccesses(int incrementBy)
|
||||
{
|
||||
numAccesses += incrementBy;
|
||||
|
|
|
@ -33,7 +33,7 @@ public class LSQEntry
|
|||
private boolean valid;
|
||||
private boolean issued;
|
||||
private boolean forwarded;//Whether the load has got its value or not
|
||||
|
||||
private boolean canBeRemoved;
|
||||
private boolean removed; //If the entry has been committed and removed from the LSQ
|
||||
|
||||
public enum LSQEntryType {LOAD, STORE};
|
||||
|
@ -45,6 +45,7 @@ public class LSQEntry
|
|||
valid = false;
|
||||
issued = false;
|
||||
forwarded = false;
|
||||
canBeRemoved = false;
|
||||
removed = true;
|
||||
}
|
||||
|
||||
|
@ -54,6 +55,7 @@ public class LSQEntry
|
|||
valid = false;
|
||||
issued = false;
|
||||
forwarded = false;
|
||||
canBeRemoved = false;
|
||||
removed = false;
|
||||
}
|
||||
|
||||
|
@ -108,6 +110,14 @@ public class LSQEntry
|
|||
this.forwarded = forwarded;
|
||||
}
|
||||
|
||||
public boolean isCanBeRemoved() {
|
||||
return canBeRemoved;
|
||||
}
|
||||
|
||||
public void setCanBeRemoved(boolean canBeRemoved) {
|
||||
this.canBeRemoved = canBeRemoved;
|
||||
}
|
||||
|
||||
protected boolean isRemoved() {
|
||||
return removed;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import pipeline.branchpredictor.PApPredictor;
|
|||
import pipeline.branchpredictor.PerfectPredictor;
|
||||
import pipeline.branchpredictor.TournamentPredictor;
|
||||
import pipeline.branchpredictor.TAGE;
|
||||
import pipeline.branchpredictor.TAGESCL.TAGESCL;
|
||||
import pipeline.branchpredictor.BTB;
|
||||
import generic.Core;
|
||||
import generic.GenericCircularQueue;
|
||||
|
@ -88,6 +89,8 @@ public abstract class ExecutionEngine {
|
|||
this.branchPredictor = new TAGE(this,
|
||||
coreConfig.branchPredictor.PCBits,
|
||||
coreConfig.branchPredictor.saturating_bits);
|
||||
else if(coreConfig.branchPredictor.predictorMode == BP.TAGE_SC_L)
|
||||
this.branchPredictor = new TAGESCL(this);
|
||||
|
||||
BTB = new BTB(coreConfig.branchPredictor.PCBits, coreConfig.branchPredictor.BHRsize);
|
||||
}
|
||||
|
|
|
@ -10,13 +10,13 @@ public class OpTypeToFUTypeMapping {
|
|||
public static FunctionalUnitType[] intALUFUs = {FunctionalUnitType.integerALU};
|
||||
public static FunctionalUnitType[] intMulFUs = {FunctionalUnitType.integerMul};
|
||||
public static FunctionalUnitType[] intDivFUs = {FunctionalUnitType.integerDiv};
|
||||
public static FunctionalUnitType[] floatALUFUs = {FunctionalUnitType.floatALU, FunctionalUnitType.FMA};
|
||||
public static FunctionalUnitType[] floatMulFUs = {FunctionalUnitType.floatMul, FunctionalUnitType.FMA};
|
||||
public static FunctionalUnitType[] floatALUFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatALU};
|
||||
public static FunctionalUnitType[] floatMulFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatMul};
|
||||
public static FunctionalUnitType[] floatDivFUs = {FunctionalUnitType.floatDiv};
|
||||
public static FunctionalUnitType[] intVectorALUFUs = {FunctionalUnitType.integerVectorALU};
|
||||
public static FunctionalUnitType[] intVectorMulFUs = {FunctionalUnitType.integerVectorMul};
|
||||
public static FunctionalUnitType[] floatVectorALUFUs = {FunctionalUnitType.floatVectorALU, FunctionalUnitType.FMA};
|
||||
public static FunctionalUnitType[] floatVectorMulFUs = {FunctionalUnitType.floatVectorMul, FunctionalUnitType.FMA};
|
||||
public static FunctionalUnitType[] floatVectorALUFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatVectorALU};
|
||||
public static FunctionalUnitType[] floatVectorMulFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatVectorMul};
|
||||
public static FunctionalUnitType[] FMAFUs = {FunctionalUnitType.FMA};
|
||||
public static FunctionalUnitType[] VectorFMAFUs = {FunctionalUnitType.FMA};
|
||||
public static FunctionalUnitType[] loadFUs = {FunctionalUnitType.load};
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
all:
|
||||
javac -h . TAGESCLInvoker.java
|
||||
#gcc -fPIC -I/usr/lib/jvm/java-8-openjdk-amd64/include/ -I/usr/lib/jvm/java-8-openjdk-amd64/include/linux/ -shared -o libnative.so pipeline_branchpredictor_TAGESCL_TAGESCLInvoker.cc
|
||||
gcc -fPIC -I/usr/lib/jvm/java-11-openjdk-amd64/include/ -I/usr/lib/jvm/java-11-openjdk-amd64/include/linux/ -shared -o libnative.so pipeline_branchpredictor_TAGESCL_TAGESCLInvoker.cc
|
||||
clean:
|
||||
rm -f *.h
|
||||
rm -f *.class
|
||||
rm -f *.so
|
|
@ -0,0 +1,32 @@
|
|||
package pipeline.branchpredictor.TAGESCL;
|
||||
|
||||
import pipeline.ExecutionEngine;
|
||||
import pipeline.branchpredictor.BranchPredictor;
|
||||
|
||||
public class TAGESCL extends BranchPredictor {
|
||||
|
||||
TAGESCLInvoker ti;
|
||||
public TAGESCL(ExecutionEngine containingExecEngine)
|
||||
{
|
||||
super(containingExecEngine);
|
||||
ti = new TAGESCLInvoker(containingExecEngine.getContainingCore().getCoreConfig().branchPredictor.TAGESCLLibDirectory);
|
||||
}
|
||||
|
||||
public boolean predict(long address, boolean outcome)
|
||||
{
|
||||
return ti.invokerPredict(address);
|
||||
}
|
||||
|
||||
public void Train(long address, boolean outcome, boolean predict)
|
||||
{
|
||||
misc.Error.showErrorAndExit("use the other Train() function");
|
||||
//don't use this!!
|
||||
//use Train(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
|
||||
}
|
||||
|
||||
public void Train(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
|
||||
{
|
||||
ti.invokerTrain (PC, opType, resolveDir, predDir, branchTarget);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package pipeline.branchpredictor.TAGESCL;
|
||||
|
||||
public class TAGESCLInvoker {
|
||||
|
||||
private native void initialize();
|
||||
private native boolean predict (long PC);
|
||||
private native void train (long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget);
|
||||
|
||||
public TAGESCLInvoker(String TAGESCLLibDirectory)
|
||||
{
|
||||
System.load(TAGESCLLibDirectory + "/libnative.so");
|
||||
initialize();
|
||||
}
|
||||
|
||||
public boolean invokerPredict(long address)
|
||||
{
|
||||
return predict(address);
|
||||
}
|
||||
|
||||
public void invokerTrain(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
|
||||
{
|
||||
train (PC, opType, resolveDir, predDir, branchTarget);
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -39,9 +39,7 @@ public class DecodeLogic extends SimulationElement {
|
|||
|
||||
public void performDecode()
|
||||
{
|
||||
if(containingExecutionEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/
|
||||
|| containingExecutionEngine.isToStall1() == true /*IW full*/
|
||||
|| containingExecutionEngine.isToStall2() == true /*rename stall*/)
|
||||
if(containingExecutionEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package pipeline.outoforder;
|
||||
|
||||
import config.SimulationConfig;
|
||||
import config.SystemConfig;
|
||||
import emulatorinterface.translator.x86.registers.Registers;
|
||||
import main.ArchitecturalComponent;
|
||||
import main.CustomObjectPool;
|
||||
|
@ -11,6 +12,7 @@ import generic.Core;
|
|||
import generic.Event;
|
||||
import generic.EventQueue;
|
||||
import generic.GenericCircularQueue;
|
||||
import generic.GlobalClock;
|
||||
import generic.Instruction;
|
||||
import generic.OperationType;
|
||||
import generic.PortType;
|
||||
|
@ -25,12 +27,14 @@ public class FetchLogic extends SimulationElement {
|
|||
GenericCircularQueue<Instruction>[] inputToPipeline;
|
||||
int inputPipeToReadNext;
|
||||
ICacheBuffer iCacheBuffer;
|
||||
MicroOpCache microOpCache;
|
||||
GenericCircularQueue<Instruction> fetchBuffer;
|
||||
int fetchWidth;
|
||||
OperationType[] instructionsToBeDropped;
|
||||
boolean sleep;
|
||||
|
||||
long serialNo;
|
||||
long lastValidIPSeen = -1;
|
||||
|
||||
public FetchLogic(Core core, OutOrderExecutionEngine execEngine)
|
||||
{
|
||||
|
@ -38,6 +42,7 @@ public class FetchLogic extends SimulationElement {
|
|||
this.core = core;
|
||||
this.execEngine = execEngine;
|
||||
fetchBuffer = execEngine.getFetchBuffer();
|
||||
microOpCache = execEngine.getMicroOpCache();
|
||||
fetchWidth = core.getDecodeWidth();
|
||||
inputPipeToReadNext = 0;
|
||||
sleep = false;
|
||||
|
@ -77,13 +82,9 @@ public class FetchLogic extends SimulationElement {
|
|||
|
||||
Instruction newInstruction;
|
||||
|
||||
if(!execEngine.isToStall1() &&
|
||||
!execEngine.isToStall2() &&
|
||||
!execEngine.isToStall3() &&
|
||||
!execEngine.isToStall4() &&
|
||||
!execEngine.isToStall5() &&
|
||||
!execEngine.isToStall6())
|
||||
{
|
||||
if(execEngine.isToStall5())
|
||||
return;
|
||||
|
||||
//add instructions, for whom "fetch" from iCache has completed, to fetch buffer
|
||||
//decode stage reads from this buffer
|
||||
for(int i = 0; i < fetchWidth; i++)
|
||||
|
@ -97,6 +98,11 @@ public class FetchLogic extends SimulationElement {
|
|||
if(newInstruction != null)
|
||||
{
|
||||
fetchBuffer.enqueue(newInstruction);
|
||||
|
||||
if(SimulationConfig.debugMode)
|
||||
{
|
||||
System.out.println("fetched : " + GlobalClock.getCurrentTime()/core.getStepSize() + " : " + newInstruction);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -104,7 +110,6 @@ public class FetchLogic extends SimulationElement {
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//this loop reads from inputToPipeline and places the instruction in iCacheBuffer
|
||||
//fetch of the instruction is also issued to the iCache
|
||||
|
@ -191,10 +196,32 @@ public class FetchLogic extends SimulationElement {
|
|||
{
|
||||
// The first micro-operation of an instruction has a valid CISC IP. All the subsequent
|
||||
// micro-ops will have IP = -1(meaning invalid). We must not forward this requests to iCache.
|
||||
if(newInstruction.getCISCProgramCounter()!=-1)
|
||||
// If the micro-ops are available in the micro-op cache, we don't need to access the i-cache
|
||||
if(newInstruction.getCISCProgramCounter()!=-1 && newInstruction.getCISCProgramCounter() != lastValidIPSeen
|
||||
&& microOpCache.isPresentInCache(newInstruction.getCISCProgramCounter()) == false)
|
||||
{
|
||||
execEngine.getCoreMemorySystem().issueRequestToInstrCache(newInstruction.getCISCProgramCounter());
|
||||
}
|
||||
else
|
||||
{
|
||||
iCacheBuffer.fetchComplete[iCacheBuffer.tail] = true;
|
||||
if(newInstruction.getCISCProgramCounter()==-1 || newInstruction.getCISCProgramCounter() == lastValidIPSeen)
|
||||
microOpCache.isPresentInCache(lastValidIPSeen); //accessing micro-op cache just to get the micro-op cache LRU and counters right
|
||||
}
|
||||
|
||||
if(newInstruction.getCISCProgramCounter()!=-1)
|
||||
{
|
||||
lastValidIPSeen = newInstruction.getCISCProgramCounter();
|
||||
}
|
||||
else
|
||||
{
|
||||
newInstruction.setCISCProgramCounter(lastValidIPSeen);
|
||||
}
|
||||
|
||||
if(SimulationConfig.debugMode)
|
||||
{
|
||||
System.out.println("fetch_initiated : " + GlobalClock.getCurrentTime()/core.getStepSize() + " : " + newInstruction);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -235,7 +262,8 @@ public class FetchLogic extends SimulationElement {
|
|||
|
||||
public void processCompletionOfMemRequest(long address)
|
||||
{
|
||||
iCacheBuffer.updateFetchComplete(address);
|
||||
int numberOfMicroOpsUpdated = iCacheBuffer.updateFetchComplete(address);
|
||||
microOpCache.addToCache(address, numberOfMicroOpsUpdated);
|
||||
}
|
||||
|
||||
public GenericCircularQueue<Instruction>[] getInputToPipeline() {
|
||||
|
|
|
@ -78,21 +78,26 @@ public class ICacheBuffer {
|
|||
return toBeReturned;
|
||||
}
|
||||
|
||||
public void updateFetchComplete(long programCounter)
|
||||
public int updateFetchComplete(long programCounter)
|
||||
{
|
||||
int numberOfMicroOpsUpdated = 0;
|
||||
|
||||
if(head == -1)
|
||||
return;
|
||||
return numberOfMicroOpsUpdated;
|
||||
|
||||
for(int i = head; ; i = (i + 1)%size)
|
||||
{
|
||||
if(buffer[i] != null && buffer[i].getCISCProgramCounter() == programCounter)
|
||||
{
|
||||
fetchComplete[i] = true;
|
||||
numberOfMicroOpsUpdated++;
|
||||
}
|
||||
|
||||
if(i == tail)
|
||||
break;
|
||||
}
|
||||
|
||||
return numberOfMicroOpsUpdated;
|
||||
}
|
||||
|
||||
public boolean isFull()
|
||||
|
|
|
@ -16,7 +16,7 @@ public class IWPushLogic extends SimulationElement {
|
|||
OutOrderExecutionEngine execEngine;
|
||||
GenericCircularQueue<ReorderBufferEntry> renameBuffer;
|
||||
InstructionWindow IW;
|
||||
int decodeWidth;
|
||||
int renameWidth;
|
||||
|
||||
public IWPushLogic(Core core, OutOrderExecutionEngine execEngine)
|
||||
{
|
||||
|
@ -25,7 +25,7 @@ public class IWPushLogic extends SimulationElement {
|
|||
this.execEngine = execEngine;
|
||||
renameBuffer = execEngine.getRenameBuffer();
|
||||
IW = execEngine.getInstructionWindow();
|
||||
decodeWidth = core.getDecodeWidth();
|
||||
renameWidth = core.getRenameWidth();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -39,7 +39,7 @@ public class IWPushLogic extends SimulationElement {
|
|||
return;
|
||||
}
|
||||
|
||||
for(int i = 0; i < decodeWidth; i++)
|
||||
for(int i = 0; i < renameWidth; i++)
|
||||
{
|
||||
ReorderBufferEntry headROBEntry = renameBuffer.peek(0);
|
||||
if(headROBEntry != null)
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
package pipeline.outoforder;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Vector;
|
||||
|
||||
import config.SimulationConfig;
|
||||
import generic.Event;
|
||||
import generic.EventQueue;
|
||||
import generic.GlobalClock;
|
||||
import generic.PortType;
|
||||
import generic.SimulationElement;
|
||||
|
||||
public class MicroOpCache extends SimulationElement {
|
||||
|
||||
int maxSize; //in terms of number of micro-ops
|
||||
int curSize;
|
||||
HashMap<Long, MicroOpCacheEntry> uopCache;
|
||||
|
||||
public long numAdditions;
|
||||
public long numSearches;
|
||||
public long numHits;
|
||||
|
||||
public MicroOpCache(int maxSize) {
|
||||
super(PortType.Unlimited, -1, -1, -1, -1);
|
||||
this.maxSize = maxSize;
|
||||
uopCache = new HashMap<Long, MicroOpCacheEntry>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleEvent(EventQueue eventQ, Event event) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
public boolean isPresentInCache(long searchPC) //will be called for each micro-op (and not each CISC instruction)
|
||||
{
|
||||
numSearches++;
|
||||
|
||||
MicroOpCacheEntry entry = uopCache.get(searchPC);
|
||||
if(entry != null)
|
||||
{
|
||||
entry.timeLastUsed = GlobalClock.getCurrentTime();
|
||||
numHits++;
|
||||
if(SimulationConfig.debugMode)
|
||||
{
|
||||
System.out.println("hit in microp-cache : " + GlobalClock.getCurrentTime()/24 + " : " + Long.toHexString(searchPC));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public void addToCache(long newPC, int numberOfMicroOps)
|
||||
{
|
||||
if(uopCache.containsKey(newPC) == false)
|
||||
{
|
||||
//remove old entries to make place for the new one
|
||||
while(curSize + numberOfMicroOps > maxSize)
|
||||
{
|
||||
//find LRU PC
|
||||
long LRU_PC = -1;
|
||||
MicroOpCacheEntry LRUEntry = null;
|
||||
for(Map.Entry<Long, MicroOpCacheEntry> entry : uopCache.entrySet())
|
||||
{
|
||||
if(LRUEntry == null)
|
||||
{
|
||||
LRUEntry = entry.getValue();
|
||||
LRU_PC = entry.getKey();
|
||||
}
|
||||
else
|
||||
{
|
||||
if(entry.getValue().timeLastUsed < LRUEntry.timeLastUsed)
|
||||
{
|
||||
LRUEntry = entry.getValue();
|
||||
LRU_PC = entry.getKey();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//remove all micro-ops corresponding to LRU PC
|
||||
uopCache.remove(LRU_PC);
|
||||
curSize -= LRUEntry.numberOfMicroOps;
|
||||
}
|
||||
|
||||
//add new micro-ops
|
||||
MicroOpCacheEntry newEntry = new MicroOpCacheEntry();
|
||||
newEntry.numberOfMicroOps = numberOfMicroOps;
|
||||
newEntry.timeLastUsed = GlobalClock.getCurrentTime();
|
||||
uopCache.put(newPC, newEntry);
|
||||
curSize += numberOfMicroOps;
|
||||
|
||||
numAdditions += numberOfMicroOps;
|
||||
if(SimulationConfig.debugMode)
|
||||
{
|
||||
System.out.println("add to microp-cache : " + GlobalClock.getCurrentTime()/24 + " : " + Long.toHexString(newPC));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class MicroOpCacheEntry
|
||||
{
|
||||
int numberOfMicroOps;
|
||||
long timeLastUsed;
|
||||
}
|
|
@ -22,6 +22,7 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
|
|||
//components of the execution engine
|
||||
private ICacheBuffer iCacheBuffer;
|
||||
private FetchLogic fetcher;
|
||||
private MicroOpCache microOpCache;
|
||||
private GenericCircularQueue<Instruction> fetchBuffer;
|
||||
private DecodeLogic decoder;
|
||||
private GenericCircularQueue<ReorderBufferEntry> decodeBuffer;
|
||||
|
@ -81,11 +82,12 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
|
|||
vectorRegisterFile = new RegisterFile(core, core.getVectorRegisterFileSize());
|
||||
vectorRenameTable = new RenameTable(this, core.getNVectorArchitecturalRegisters(), core.getVectorRegisterFileSize(), vectorRegisterFile, core.getNo_of_input_pipes());
|
||||
|
||||
fetchBuffer = new GenericCircularQueue(Instruction.class, core.getDecodeWidth());
|
||||
fetchBuffer = new GenericCircularQueue(Instruction.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
|
||||
microOpCache = new MicroOpCache(core.getCoreConfig().NoOfMicroOpCacheEntries);
|
||||
fetcher = new FetchLogic(core, this);
|
||||
decodeBuffer = new GenericCircularQueue(ReorderBufferEntry.class, core.getDecodeWidth());
|
||||
decodeBuffer = new GenericCircularQueue(ReorderBufferEntry.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
|
||||
decoder = new DecodeLogic(core, this);
|
||||
renameBuffer = new GenericCircularQueue(ReorderBufferEntry.class, core.getDecodeWidth());
|
||||
renameBuffer = new GenericCircularQueue(ReorderBufferEntry.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
|
||||
renamer = new RenameLogic(core, this);
|
||||
IWPusher = new IWPushLogic(core, this);
|
||||
selector = new SelectLogic(core, this);
|
||||
|
@ -202,6 +204,10 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
|
|||
return renameBuffer;
|
||||
}
|
||||
|
||||
public MicroOpCache getMicroOpCache() {
|
||||
return microOpCache;
|
||||
}
|
||||
|
||||
public FetchLogic getFetcher() {
|
||||
return fetcher;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ public class RenameLogic extends SimulationElement {
|
|||
OutOrderExecutionEngine execEngine;
|
||||
GenericCircularQueue<ReorderBufferEntry> decodeBuffer;
|
||||
GenericCircularQueue<ReorderBufferEntry> renameBuffer;
|
||||
int decodeWidth;
|
||||
int renameWidth;
|
||||
|
||||
int threadID;
|
||||
Instruction instruction;
|
||||
|
@ -37,18 +37,17 @@ public class RenameLogic extends SimulationElement {
|
|||
this.execEngine = execEngine;
|
||||
decodeBuffer = execEngine.getDecodeBuffer();
|
||||
renameBuffer = execEngine.getRenameBuffer();
|
||||
decodeWidth = core.getDecodeWidth();
|
||||
renameWidth = core.getRenameWidth();
|
||||
}
|
||||
|
||||
public void performRename()
|
||||
{
|
||||
if(execEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/
|
||||
|| execEngine.isToStall1() == true /*IW full*/)
|
||||
if(execEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
for(int i = 0; i < decodeWidth; i++)
|
||||
for(int i = 0; i < renameWidth; i++)
|
||||
{
|
||||
if(renameBuffer.isFull() == true)
|
||||
{
|
||||
|
|
|
@ -21,9 +21,11 @@ import java.io.OutputStreamWriter;
|
|||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
import main.CustomObjectPool;
|
||||
import pipeline.branchpredictor.TAGESCL.TAGESCL;
|
||||
import config.EmulatorConfig;
|
||||
import config.EnergyConfig;
|
||||
import config.SimulationConfig;
|
||||
import config.BranchPredictorConfig.BP;
|
||||
|
||||
public class ReorderBuffer extends SimulationElement{
|
||||
|
||||
|
@ -49,6 +51,8 @@ public class ReorderBuffer extends SimulationElement{
|
|||
int stall6Count;
|
||||
long branchCount;
|
||||
long mispredCount;
|
||||
public long predicateCount;
|
||||
public long predicateMispredCount;
|
||||
long jumpCount;
|
||||
long targetMispredCount;
|
||||
long lastValidIPSeen;
|
||||
|
@ -249,25 +253,44 @@ public class ReorderBuffer extends SimulationElement{
|
|||
if(firstOpType == OperationType.branch)
|
||||
{
|
||||
//perform prediction
|
||||
boolean prediction = execEngine.getBranchPredictor().predict(
|
||||
boolean prediction;
|
||||
prediction = execEngine.getBranchPredictor().predict(
|
||||
lastValidIPSeen,
|
||||
first.getInstruction().isBranchTaken());
|
||||
|
||||
if(prediction != first.getInstruction().isBranchTaken())
|
||||
{
|
||||
if(SimulationConfig.debugMode)
|
||||
{
|
||||
System.out.println("branch mispredicted : " + firstInstruction.getSerialNo());
|
||||
}
|
||||
|
||||
anyMispredictedBranch = true;
|
||||
mispredCount++;
|
||||
}
|
||||
this.execEngine.getBranchPredictor().incrementNumAccesses(1);
|
||||
|
||||
//train predictor
|
||||
if(core.getCoreConfig().branchPredictor.predictorMode != BP.TAGE_SC_L)
|
||||
{
|
||||
execEngine.getBranchPredictor().Train(
|
||||
lastValidIPSeen,
|
||||
firstInstruction.isBranchTaken(),
|
||||
prediction
|
||||
);
|
||||
this.execEngine.getBranchPredictor().incrementNumAccesses(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
((TAGESCL)execEngine.getBranchPredictor()).Train(
|
||||
lastValidIPSeen,
|
||||
9, //OPTYPE_JMP_DIRECT_COND,
|
||||
firstInstruction.isBranchTaken(),
|
||||
prediction,
|
||||
firstInstruction.getBranchTargetAddress()
|
||||
);
|
||||
}
|
||||
|
||||
this.execEngine.getBTB().GHRTrain(firstInstruction.isBranchTaken());
|
||||
this.execEngine.getBranchPredictor().incrementNumAccesses(2);
|
||||
|
||||
branchCount++;
|
||||
}
|
||||
|
@ -275,19 +298,75 @@ public class ReorderBuffer extends SimulationElement{
|
|||
//jump operation
|
||||
if(firstOpType == OperationType.jump)
|
||||
{
|
||||
long actualTarget = firstInstruction.getBranchTargetAddress();
|
||||
long actualTarget = first.getInstruction().getBranchTargetAddress();
|
||||
long predictedTarget = this.execEngine.getBTB().BTBPredict(lastValidIPSeen);
|
||||
|
||||
if(actualTarget != predictedTarget)
|
||||
{
|
||||
if(SimulationConfig.debugMode)
|
||||
{
|
||||
System.out.println("jump target mispredicted : " + firstInstruction.getSerialNo());
|
||||
}
|
||||
|
||||
anyMispredictedBranch = true;
|
||||
targetMispredCount++;
|
||||
}
|
||||
|
||||
this.execEngine.getBTB().BTBTrain(lastValidIPSeen, actualTarget);
|
||||
|
||||
if(core.getCoreConfig().branchPredictor.predictorMode == BP.TAGE_SC_L)
|
||||
{
|
||||
((TAGESCL)execEngine.getBranchPredictor()).Train(
|
||||
lastValidIPSeen,
|
||||
4, //OPTYPE_JMP_DIRECT_UNCOND,
|
||||
true,
|
||||
true,
|
||||
firstInstruction.getBranchTargetAddress()
|
||||
);
|
||||
}
|
||||
jumpCount++;
|
||||
}
|
||||
|
||||
//predicate prediction
|
||||
if(firstInstruction.isPredicate())
|
||||
{
|
||||
//perform prediction
|
||||
boolean prediction = execEngine.getBranchPredictor().predict(
|
||||
lastValidIPSeen,
|
||||
!first.getInstruction().isPredicateAndNotExecuted());
|
||||
if(prediction != !first.getInstruction().isPredicateAndNotExecuted())
|
||||
{
|
||||
if(SimulationConfig.debugMode)
|
||||
{
|
||||
System.out.println("predicate mispredicted : " + firstInstruction.getSerialNo());
|
||||
}
|
||||
|
||||
anyMispredictedBranch = true;
|
||||
predicateMispredCount++;
|
||||
}
|
||||
|
||||
//train predictor
|
||||
if(core.getCoreConfig().branchPredictor.predictorMode != BP.TAGE_SC_L)
|
||||
{
|
||||
execEngine.getBranchPredictor().Train(
|
||||
lastValidIPSeen,
|
||||
!firstInstruction.isPredicateAndNotExecuted(),
|
||||
prediction
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
((TAGESCL)execEngine.getBranchPredictor()).Train(
|
||||
lastValidIPSeen,
|
||||
9, //OPTYPE_JMP_DIRECT_COND,
|
||||
!firstInstruction.isPredicateAndNotExecuted(),
|
||||
prediction,
|
||||
firstInstruction.getBranchTargetAddress()
|
||||
);
|
||||
}
|
||||
predicateCount++;
|
||||
}
|
||||
|
||||
//Signal LSQ for committing the Instruction at the queue head
|
||||
if(firstOpType == OperationType.load || firstOpType == OperationType.store)
|
||||
{
|
||||
|
@ -648,6 +727,10 @@ public class ReorderBuffer extends SimulationElement{
|
|||
return stall5Count;
|
||||
}
|
||||
|
||||
public int getStall6Count() {
|
||||
return stall6Count;
|
||||
}
|
||||
|
||||
public long getBranchCount() {
|
||||
return branchCount;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue