predicate predictor calls added; LSQ logic was changed to keep stores longer so as to allow greater chance for LSQ forwarding; TAGE-SC-L added; micro op cache added; issue with widths of different stages fixed; bug fix related to looking for FMA unit before looking for floatALU/ floatMul/ floatVectorALU, floatVectorMul; causalityTool now works with java-11, ubuntu 20.04, pin-98332

This commit is contained in:
Rajshekar K K 2023-06-06 18:10:28 +05:30
parent b834fdbefe
commit 776bb9cbfe
24 changed files with 2290 additions and 118 deletions

View File

@ -24,7 +24,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
using namespace std;
#include <fcntl.h> #include <fcntl.h>
#ifndef _WIN32 #ifndef _WIN32
@ -688,7 +688,7 @@ VOID Instruction(INS ins, VOID *v) {
UINT32 memOperands = INS_MemoryOperandCount(ins); UINT32 memOperands = INS_MemoryOperandCount(ins);
if (INS_IsBranchOrCall(ins))//INS_IsIndirectBranchOrCall(ins)) if (INS_IsControlFlow(ins))//INS_IsBranchOrCall(ins))//INS_IsIndirectBranchOrCall(ins))
{ {
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR) BrnFun, IARG_THREAD_ID, INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR) BrnFun, IARG_THREAD_ID,
IARG_BRANCH_TARGET_ADDR, IARG_BRANCH_TAKEN, IARG_INST_PTR, IARG_BRANCH_TARGET_ADDR, IARG_BRANCH_TAKEN, IARG_INST_PTR,

View File

@ -1,4 +1,4 @@
PIN_KIT ?=/home/rajshekar/softwares/pin-97554/ PIN_KIT ?=/home/rajshekar/softwares/pin-98332/
CXX=$(shell make PIN_ROOT=$(PIN_KIT) VAR=CXX -f pin_makefile print_var) CXX=$(shell make PIN_ROOT=$(PIN_KIT) VAR=CXX -f pin_makefile print_var)
LINKER=$(shell make PIN_ROOT=$(PIN_KIT) VAR=LINKER -f pin_makefile print_var) LINKER=$(shell make PIN_ROOT=$(PIN_KIT) VAR=LINKER -f pin_makefile print_var)
TOOL_CXXFLAGS=$(shell make PIN_ROOT=$(PIN_KIT) VAR=TOOL_CXXFLAGS -f pin_makefile print_var) TOOL_CXXFLAGS=$(shell make PIN_ROOT=$(PIN_KIT) VAR=TOOL_CXXFLAGS -f pin_makefile print_var)
@ -50,6 +50,8 @@ $(BINDIR)/causalityTool.$(LIB_EXTENSION): $(BINDIR)/causalityTool.$(OBJ_EXTENSIO
$(BINDIR)/causalityTool.$(OBJ_EXTENSION): causalityTool.cpp $(COMMDIR)/IPCBase.h $(COMMDIR)/shm/shmem.h $(COMMDIR)/filePacket/filePacket.h $(COMMDIR)/shm/shmem.cc $(BINDIR)/causalityTool.$(OBJ_EXTENSION): causalityTool.cpp $(COMMDIR)/IPCBase.h $(COMMDIR)/shm/shmem.h $(COMMDIR)/filePacket/filePacket.h $(COMMDIR)/shm/shmem.cc
$(CXX) $(TOOL_CXXFLAGS) $(COMM_INCLUDE) -c causalityTool.cpp ../../simulator/emulatorinterface/communication/shm/shmem.cc $(CXX) $(TOOL_CXXFLAGS) $(COMM_INCLUDE) -c causalityTool.cpp ../../simulator/emulatorinterface/communication/shm/shmem.cc
mkdir $(JNIBINDIR)
mkdir $(BINDIR)
mv causalityTool.$(OBJ_EXTENSION) $(BINDIR)/causalityTool.$(OBJ_EXTENSION) mv causalityTool.$(OBJ_EXTENSION) $(BINDIR)/causalityTool.$(OBJ_EXTENSION)
mv shmem.$(OBJ_EXTENSION) $(BINDIR)/shmem.$(OBJ_EXTENSION) mv shmem.$(OBJ_EXTENSION) $(BINDIR)/shmem.$(OBJ_EXTENSION)
@ -58,16 +60,21 @@ $(BINDIR)/shmem.$(OBJ_EXTENSION): $(COMMDIR)/IPCBase.h $(COMMDIR)/shm/shmem.h $
################################ JNI stuff comes here ############################################ ################################ JNI stuff comes here ############################################
JNIPACKAGE = emulatorinterface.communication.shm.SharedMem #JNIPACKAGE = emulatorinterface.communication.shm.SharedMem #use this for java-8
JNINCLUDE =-I/usr/lib/jvm/java-8-openjdk-amd64/include/linux -I/usr/lib/jvm/java-8-openjdk-amd64/include JNIPACKAGE = ../../simulator/emulatorinterface/communication/shm/SharedMem.java #use this for java-11
#JNINCLUDE =-I/usr/lib/jvm/java-8-openjdk-amd64/include/linux -I/usr/lib/jvm/java-8-openjdk-amd64/include #use this for java-8
JNINCLUDE =-I/usr/lib/jvm/java-11-openjdk-amd64/include/linux -I/usr/lib/jvm/java-11-openjdk-amd64/include #use this for java-11
JNILinkingFlags = -shared -Wall $(POSITION_INDEPENDENCE) JNILinkingFlags = -shared -Wall $(POSITION_INDEPENDENCE)
JAVAH = javah -jni #JAVAH = javah -jni #use this for java-8
JAVAH = javac #use this for java-11
$(JNIBINDIR)/libshmlib.$(LIB_EXTENSION): $(JNIBINDIR)/SharedMem.h $(COMMDIR)/shm/JNIShm.c $(COMMDIR)/common.h $(JNIBINDIR)/libshmlib.$(LIB_EXTENSION): $(JNIBINDIR)/SharedMem.h $(COMMDIR)/shm/JNIShm.c $(COMMDIR)/common.h
$(shell $(JNICOMMAND)) $(shell $(JNICOMMAND))
$(JNIBINDIR)/SharedMem.h: $(TOPBINDIR)/emulatorinterface/communication/shm/SharedMem.class $(JNIBINDIR)/SharedMem.h: $(TOPBINDIR)/emulatorinterface/communication/shm/SharedMem.class
$(JAVAH) -classpath $(TOPBINDIR) -o $(JNIBINDIR)/SharedMem.h $(JNIPACKAGE) #$(JAVAH) -classpath $(TOPBINDIR) -o $(JNIBINDIR)/SharedMem.h $(JNIPACKAGE) #use this for java-8
$(JAVAH) -classpath $(TOPBINDIR) -h $(JNIBINDIR) $(JNIPACKAGE) #use this line and the next for java-11
mv $(JNIBINDIR)/emulatorinterface_communication_shm_SharedMem.h $(JNIBINDIR)/SharedMem.h
clean: clean:
rm -rf $(BINDIR)/* $(JNIBINDIR)/* rm -rf $(BINDIR) $(JNIBINDIR)

View File

@ -5,8 +5,9 @@ public class BranchPredictorConfig {
public int BHRsize; public int BHRsize;
public int saturating_bits; public int saturating_bits;
public BP predictorMode; public BP predictorMode;
public String TAGESCLLibDirectory;
public static enum BP { public static enum BP {
NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GShare, GAg, GAp, PAg, PAp,TAGE NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GShare, GAg, GAp, PAg, PAp,TAGE, TAGE_SC_L,
} }
} }

View File

@ -58,7 +58,9 @@ public class CoreConfig
public int STLBAccessPorts; public int STLBAccessPorts;
public int STLBPortOccupancy; public int STLBPortOccupancy;
public int NoOfMicroOpCacheEntries;
public int DecodeWidth; public int DecodeWidth;
public int RenameWidth;
public int IssueWidth; public int IssueWidth;
public int RetireWidth; public int RetireWidth;
public int ROBSize; public int ROBSize;

View File

@ -481,6 +481,9 @@ public class XMLParser
core.STLBPortOccupancy = Integer.parseInt(getImmediateString("PortOccupancy", sTLBElmnt)); core.STLBPortOccupancy = Integer.parseInt(getImmediateString("PortOccupancy", sTLBElmnt));
core.sTLBPower = getEnergyConfig(sTLBElmnt); core.sTLBPower = getEnergyConfig(sTLBElmnt);
Element microOpCacheElmnt = (Element)(coreElmnt.getElementsByTagName("MicroOpCache")).item(0);
core.NoOfMicroOpCacheEntries = Integer.parseInt(getImmediateString("NumberOfMicroOps", microOpCacheElmnt));
Element decodeElmnt = (Element)(coreElmnt.getElementsByTagName("Decode")).item(0); Element decodeElmnt = (Element)(coreElmnt.getElementsByTagName("Decode")).item(0);
core.DecodeWidth = Integer.parseInt(getImmediateString("Width", decodeElmnt)); core.DecodeWidth = Integer.parseInt(getImmediateString("Width", decodeElmnt));
core.decodePower = getEnergyConfig(decodeElmnt); core.decodePower = getEnergyConfig(decodeElmnt);
@ -499,6 +502,7 @@ public class XMLParser
core.resultsBroadcastBusPower = getEnergyConfig(resultsBroadcastBusElmnt); core.resultsBroadcastBusPower = getEnergyConfig(resultsBroadcastBusElmnt);
Element renameElmnt = (Element)(coreElmnt.getElementsByTagName("Rename")).item(0); Element renameElmnt = (Element)(coreElmnt.getElementsByTagName("Rename")).item(0);
core.RenameWidth = Integer.parseInt(getImmediateString("Width", renameElmnt));
Element ratElmnt = (Element)(renameElmnt.getElementsByTagName("RAT")).item(0); Element ratElmnt = (Element)(renameElmnt.getElementsByTagName("RAT")).item(0);
core.intRATPower = getEnergyConfig((Element)ratElmnt.getElementsByTagName("Integer").item(0)); core.intRATPower = getEnergyConfig((Element)ratElmnt.getElementsByTagName("Integer").item(0));
@ -1066,9 +1070,14 @@ public class XMLParser
{ {
branchPredictor.predictorMode = BP.TAGE; branchPredictor.predictorMode = BP.TAGE;
} }
else if(tempStr.equalsIgnoreCase("TAGE-SC-L"))
{
branchPredictor.predictorMode = BP.TAGE_SC_L;
}
branchPredictor.PCBits = Integer.parseInt(getImmediateString("PCBits", predictorElmnt)); branchPredictor.PCBits = Integer.parseInt(getImmediateString("PCBits", predictorElmnt));
branchPredictor.BHRsize = Integer.parseInt(getImmediateString("BHRsize", predictorElmnt)); branchPredictor.BHRsize = Integer.parseInt(getImmediateString("BHRsize", predictorElmnt));
branchPredictor.saturating_bits = Integer.parseInt(getImmediateString("SaturatingBits", predictorElmnt)); branchPredictor.saturating_bits = Integer.parseInt(getImmediateString("SaturatingBits", predictorElmnt));
branchPredictor.TAGESCLLibDirectory = getImmediateString("TAGESCLLibDirectory", predictorElmnt);
} }
private static boolean setDirectoryCoherent(String immediateString) { private static boolean setDirectoryCoherent(String immediateString) {

View File

@ -47,12 +47,12 @@ TDP = 15W
<!-- If store packets in a file option is set to true, this parameter indicates the basename for the trace files --> <!-- If store packets in a file option is set to true, this parameter indicates the basename for the trace files -->
<!--One trace file is maintained for each store. The name of trace file for core n is basename_n.gz--> <!--One trace file is maintained for each store. The name of trace file for core n is basename_n.gz-->
<!--We do not allow overwriting of trace files. So if a tracefile with same name is pre-existing, kindly rename it--> <!--We do not allow overwriting of trace files. So if a tracefile with same name is pre-existing, kindly rename it-->
<BasenameForTraceFiles>/home/rajshekar/tmp/gcc_trace</BasenameForTraceFiles> <BasenameForTraceFiles>/home/rajshekar/projects/tejas/tests/test1_trace</BasenameForTraceFiles>
<PinTool>/home/rajshekar/softwares/pin-97554/</PinTool> <PinTool>/home/rajshekar/softwares/pin-98332/</PinTool>
<PinInstrumentor>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-pin/causalityTool.so</PinInstrumentor> <PinInstrumentor>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-pin/causalityTool.so</PinInstrumentor>
<QemuTool>TODO/home/prathmesh/workspace/qemu/x86_64-linux-user/qemu-x86_64 /home/prathmesh/tmp/testQemu.o</QemuTool> <QemuTool>TODO/home/prathmesh/workspace/qemu/x86_64-linux-user/qemu-x86_64 /home/prathmesh/tmp/testQemu.o</QemuTool>
<ShmLibDirectory>/home/rajshekar/resources/tejas_configs/</ShmLibDirectory> <ShmLibDirectory>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-comm/</ShmLibDirectory>
<GetBenchmarkPIDScript>/home/rajshekar_resources/tejas_configs/getBenchmarkPID.sh</GetBenchmarkPIDScript> <GetBenchmarkPIDScript>/home/rajshekar_resources/tejas_configs/getBenchmarkPID.sh</GetBenchmarkPIDScript>
<KillEmulatorScript>/home/rajshekar/resources/tejas_configs/killAllDescendents.sh</KillEmulatorScript> <KillEmulatorScript>/home/rajshekar/resources/tejas_configs/killAllDescendents.sh</KillEmulatorScript>
</Emulator> </Emulator>
@ -112,11 +112,12 @@ TDP = 15W
<PipelineType>outOfOrder</PipelineType> <!--inOrder,outOfOrder(set issue width for multi-issue in-order)--> <PipelineType>outOfOrder</PipelineType> <!--inOrder,outOfOrder(set issue width for multi-issue in-order)-->
<BranchPredictor> <BranchPredictor>
<Predictor_Mode>TAGE</Predictor_Mode> <!-- Legal Values are NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GAg, GAp, GShare, PAg, PAp, TAGE --> <Predictor_Mode>TAGE</Predictor_Mode> <!-- Legal Values are NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GAg, GAp, GShare, PAg, PAp, TAGE, TAGE-SC-L -->
<PCBits>8</PCBits> <PCBits>8</PCBits>
<BHRsize>16</BHRsize> <BHRsize>16</BHRsize>
<BranchMispredPenalty>17</BranchMispredPenalty> <!--Branch misprediction penalty--><!-- https://www.7-cpu.com/cpu/Skylake.html --> <BranchMispredPenalty>17</BranchMispredPenalty> <!--Branch misprediction penalty--><!-- https://www.7-cpu.com/cpu/Skylake.html -->
<SaturatingBits>2</SaturatingBits> <SaturatingBits>2</SaturatingBits>
<TAGESCLLibDirectory>/home/rajshekar/projects/tejas/workspace/Tejas/src/simulator/pipeline/branchpredictor/TAGESCL/</TAGESCLLibDirectory>
<LeakageEnergy>0.0178</LeakageEnergy> <LeakageEnergy>0.0178</LeakageEnergy>
<DynamicEnergy>0.0962</DynamicEnergy> <DynamicEnergy>0.0962</DynamicEnergy>
</BranchPredictor> </BranchPredictor>
@ -165,6 +166,10 @@ TDP = 15W
<DynamicEnergy>0.06792852941</DynamicEnergy> <!-- TODO this number isn't right --> <DynamicEnergy>0.06792852941</DynamicEnergy> <!-- TODO this number isn't right -->
</STLB> </STLB>
<MicroOpCache>
<NumberOfMicroOps>2304</NumberOfMicroOps>
</MicroOpCache>
<Decode> <!--Instruction decode--> <Decode> <!--Instruction decode-->
<Width>6</Width> <Width>6</Width>
<LeakageEnergy>0.0598</LeakageEnergy> <LeakageEnergy>0.0598</LeakageEnergy>
@ -172,6 +177,7 @@ TDP = 15W
</Decode> </Decode>
<Rename> <Rename>
<Width>6</Width>
<RAT> <RAT>
<Integer> <Integer>
<LeakageEnergy>0.0045</LeakageEnergy> <LeakageEnergy>0.0045</LeakageEnergy>

View File

@ -207,6 +207,10 @@ public class Core extends SimulationElement{
return coreConfig.DecodeWidth; return coreConfig.DecodeWidth;
} }
public int getRenameWidth() {
return coreConfig.RenameWidth;
}
public int getVectorRegisterFileSize() { public int getVectorRegisterFileSize() {
return coreConfig.VectorRegFileSize; return coreConfig.VectorRegFileSize;
} }

View File

@ -22,6 +22,7 @@ import memorysystem.nuca.NucaCache;
import memorysystem.nuca.NucaCache.NucaType; import memorysystem.nuca.NucaCache.NucaType;
import net.NocInterface; import net.NocInterface;
import net.Router; import net.Router;
import pipeline.outoforder.OutOrderExecutionEngine;
import config.CoreConfig; import config.CoreConfig;
import config.EmulatorConfig; import config.EmulatorConfig;
import config.EnergyConfig; import config.EnergyConfig;
@ -33,6 +34,7 @@ import emulatorinterface.translator.qemuTranslationCache.TranslatedInstructionCa
import dram.MainMemoryDRAMController; import dram.MainMemoryDRAMController;
import config.MainMemoryConfig; import config.MainMemoryConfig;
import config.PipelineType;
public class Statistics { public class Statistics {
@ -199,12 +201,22 @@ public class Statistics {
outputFileWriter.write("time taken\t=\t" + formatDouble((double)coreCyclesTaken[i]/GlobalClock.effectiveGlobalClockFrequency) + " microseconds\n"); outputFileWriter.write("time taken\t=\t" + formatDouble((double)coreCyclesTaken[i]/GlobalClock.effectiveGlobalClockFrequency) + " microseconds\n");
outputFileWriter.write("\n"); outputFileWriter.write("\n");
if(cores[i].getCoreConfig().pipelineType == PipelineType.outOfOrder)
{
outputFileWriter.write("number of micro-op cache accesses = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numSearches + "\n");
outputFileWriter.write("micro-op cache hit rate = " + formatDouble((double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numHits/(double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numSearches) + "\n");
outputFileWriter.write("\n");
}
outputFileWriter.write("number of branches\t=\t" + cores[i].getExecEngine().getNumberOfBranches() + "\n"); outputFileWriter.write("number of branches\t=\t" + cores[i].getExecEngine().getNumberOfBranches() + "\n");
outputFileWriter.write("number of mispredicted branches\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedBranches() + "\n"); outputFileWriter.write("number of mispredicted branches\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedBranches() + "\n");
outputFileWriter.write("branch predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedBranches()/(double)cores[i].getExecEngine().getNumberOfBranches())*100.0)) + " %\n"); outputFileWriter.write("branch predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedBranches()/(double)cores[i].getExecEngine().getNumberOfBranches())*100.0)) + " %\n");
outputFileWriter.write("number of jumps\t=\t" + cores[i].getExecEngine().getNumberOfJumps() + "\n"); outputFileWriter.write("number of jumps\t=\t" + cores[i].getExecEngine().getNumberOfJumps() + "\n");
outputFileWriter.write("number of mispredicted jump targets\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedTargets() + "\n"); outputFileWriter.write("number of mispredicted jump targets\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedTargets() + "\n");
outputFileWriter.write("target predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedTargets()/(double)cores[i].getExecEngine().getNumberOfJumps())*100.0)) + " %\n"); outputFileWriter.write("target predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedTargets()/(double)cores[i].getExecEngine().getNumberOfJumps())*100.0)) + " %\n");
outputFileWriter.write("number of predicate instructions\t=\t" + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateCount + "\n");
outputFileWriter.write("number of mispredicted predicate instructions\t=\t" + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateMispredCount + "\n");
outputFileWriter.write("predicate predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateMispredCount/(double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateCount)*100.0)) + " %\n");
outputFileWriter.write("\n"); outputFileWriter.write("\n");
outputFileWriter.write("predictor type = " + coreConfig.branchPredictor.predictorMode + "\n"); outputFileWriter.write("predictor type = " + coreConfig.branchPredictor.predictorMode + "\n");
@ -213,6 +225,14 @@ public class Statistics {
outputFileWriter.write("Saturating bits = " + coreConfig.branchPredictor.saturating_bits + "\n"); outputFileWriter.write("Saturating bits = " + coreConfig.branchPredictor.saturating_bits + "\n");
outputFileWriter.write("\n"); outputFileWriter.write("\n");
outputFileWriter.write("\nIW Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall1Count());
outputFileWriter.write("\nrename stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall2Count());
outputFileWriter.write("\nLSQ Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall3Count());
outputFileWriter.write("\nROB Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall4Count());
outputFileWriter.write("\nMispred stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall5Count());
outputFileWriter.write("\nSerialization instruction stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall6Count());
outputFileWriter.write("\n");
} }
outputFileWriter.write("\n"); outputFileWriter.write("\n");
} }

View File

@ -301,6 +301,13 @@ public class LSQ extends SimulationElement
} }
public boolean isFull(boolean isLoad) public boolean isFull(boolean isLoad)
{
if(privIsFull(isLoad))
freeOneEntry(isLoad);
return privIsFull(isLoad);
}
private boolean privIsFull(boolean isLoad)
{ {
if(isLoad) if(isLoad)
{ {
@ -495,52 +502,10 @@ committed
LSQEntry tmpEntry = lsqueue[i]; LSQEntry tmpEntry = lsqueue[i];
// if it is a store, send the request to the cache // if it is a store, send the request to the cache
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE) if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE
|| tmpEntry.getType() == LSQEntryType.LOAD && tmpEntry.isForwarded())
{ {
if(tmpEntry.isValid() == false) tmpEntry.setCanBeRemoved(true);
{
misc.Error.showErrorAndExit("store not ready to be committed");
}
boolean requestIssued =
containingMemSys.issueRequestToL1Cache(RequestType.Cache_Write,
tmpEntry.getAddr());
if(requestIssued == false)
{
event.addEventTime(1);
event.getEventQ().addEvent(event);
break; //removals must be in-order : if u can't commit the operation at the head, u can't commit the ones that follow it
}
else
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumStoresInQ--;
tmpEntry.setRemoved(true);
}
}
//If it is a LOAD which has received its value
else if (tmpEntry.isForwarded())
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumLoadsInQ--;
tmpEntry.setRemoved(true);
} }
//If it is a LOAD which has not yet received its value //If it is a LOAD which has not yet received its value
@ -559,6 +524,82 @@ committed
//incrementNumAccesses(1); //incrementNumAccesses(1);
} }
public void freeOneEntry(boolean isLoadToBeRemoved)
{
boolean removedEnough = false;
while(removedEnough == false)
{
LSQEntry tmpEntry = lsqueue[head];
if(tmpEntry.isCanBeRemoved() == false)
{
return;
}
// if it is a store, send the request to the cache
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE)
{
if(tmpEntry.isValid() == false)
{
misc.Error.showErrorAndExit("store not ready to be committed");
}
boolean requestIssued =
containingMemSys.issueRequestToL1Cache(RequestType.Cache_Write,
tmpEntry.getAddr());
if(requestIssued == false)
{
return; //removals must be in-order : if u can't commit the operation at the head, u can't commit the ones that follow it
}
else
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumStoresInQ--;
tmpEntry.setRemoved(true);
if(isLoadToBeRemoved == false)
removedEnough = true;
}
}
//If it is a LOAD which has received its value
else if (tmpEntry.isForwarded())
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumLoadsInQ--;
tmpEntry.setRemoved(true);
if(isLoadToBeRemoved == true)
removedEnough = true;
}
//If it is a LOAD which has not yet received its value
else
{
System.err.println("Error in LSQ " +this.containingMemSys.coreID+ " : ROB sent commit for a load which has not received its value");
misc.Error.showErrorAndExit(tmpEntry.getIndexInQ() + " : load : " + tmpEntry.getAddr());
}
//incrementNumAccesses(1);
}
}
void incrementNumAccesses(int incrementBy) void incrementNumAccesses(int incrementBy)
{ {
numAccesses += incrementBy; numAccesses += incrementBy;

View File

@ -33,7 +33,7 @@ public class LSQEntry
private boolean valid; private boolean valid;
private boolean issued; private boolean issued;
private boolean forwarded;//Whether the load has got its value or not private boolean forwarded;//Whether the load has got its value or not
private boolean canBeRemoved;
private boolean removed; //If the entry has been committed and removed from the LSQ private boolean removed; //If the entry has been committed and removed from the LSQ
public enum LSQEntryType {LOAD, STORE}; public enum LSQEntryType {LOAD, STORE};
@ -45,6 +45,7 @@ public class LSQEntry
valid = false; valid = false;
issued = false; issued = false;
forwarded = false; forwarded = false;
canBeRemoved = false;
removed = true; removed = true;
} }
@ -54,6 +55,7 @@ public class LSQEntry
valid = false; valid = false;
issued = false; issued = false;
forwarded = false; forwarded = false;
canBeRemoved = false;
removed = false; removed = false;
} }
@ -108,6 +110,14 @@ public class LSQEntry
this.forwarded = forwarded; this.forwarded = forwarded;
} }
public boolean isCanBeRemoved() {
return canBeRemoved;
}
public void setCanBeRemoved(boolean canBeRemoved) {
this.canBeRemoved = canBeRemoved;
}
protected boolean isRemoved() { protected boolean isRemoved() {
return removed; return removed;
} }

View File

@ -20,6 +20,7 @@ import pipeline.branchpredictor.PApPredictor;
import pipeline.branchpredictor.PerfectPredictor; import pipeline.branchpredictor.PerfectPredictor;
import pipeline.branchpredictor.TournamentPredictor; import pipeline.branchpredictor.TournamentPredictor;
import pipeline.branchpredictor.TAGE; import pipeline.branchpredictor.TAGE;
import pipeline.branchpredictor.TAGESCL.TAGESCL;
import pipeline.branchpredictor.BTB; import pipeline.branchpredictor.BTB;
import generic.Core; import generic.Core;
import generic.GenericCircularQueue; import generic.GenericCircularQueue;
@ -88,6 +89,8 @@ public abstract class ExecutionEngine {
this.branchPredictor = new TAGE(this, this.branchPredictor = new TAGE(this,
coreConfig.branchPredictor.PCBits, coreConfig.branchPredictor.PCBits,
coreConfig.branchPredictor.saturating_bits); coreConfig.branchPredictor.saturating_bits);
else if(coreConfig.branchPredictor.predictorMode == BP.TAGE_SC_L)
this.branchPredictor = new TAGESCL(this);
BTB = new BTB(coreConfig.branchPredictor.PCBits, coreConfig.branchPredictor.BHRsize); BTB = new BTB(coreConfig.branchPredictor.PCBits, coreConfig.branchPredictor.BHRsize);
} }

View File

@ -10,13 +10,13 @@ public class OpTypeToFUTypeMapping {
public static FunctionalUnitType[] intALUFUs = {FunctionalUnitType.integerALU}; public static FunctionalUnitType[] intALUFUs = {FunctionalUnitType.integerALU};
public static FunctionalUnitType[] intMulFUs = {FunctionalUnitType.integerMul}; public static FunctionalUnitType[] intMulFUs = {FunctionalUnitType.integerMul};
public static FunctionalUnitType[] intDivFUs = {FunctionalUnitType.integerDiv}; public static FunctionalUnitType[] intDivFUs = {FunctionalUnitType.integerDiv};
public static FunctionalUnitType[] floatALUFUs = {FunctionalUnitType.floatALU, FunctionalUnitType.FMA}; public static FunctionalUnitType[] floatALUFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatALU};
public static FunctionalUnitType[] floatMulFUs = {FunctionalUnitType.floatMul, FunctionalUnitType.FMA}; public static FunctionalUnitType[] floatMulFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatMul};
public static FunctionalUnitType[] floatDivFUs = {FunctionalUnitType.floatDiv}; public static FunctionalUnitType[] floatDivFUs = {FunctionalUnitType.floatDiv};
public static FunctionalUnitType[] intVectorALUFUs = {FunctionalUnitType.integerVectorALU}; public static FunctionalUnitType[] intVectorALUFUs = {FunctionalUnitType.integerVectorALU};
public static FunctionalUnitType[] intVectorMulFUs = {FunctionalUnitType.integerVectorMul}; public static FunctionalUnitType[] intVectorMulFUs = {FunctionalUnitType.integerVectorMul};
public static FunctionalUnitType[] floatVectorALUFUs = {FunctionalUnitType.floatVectorALU, FunctionalUnitType.FMA}; public static FunctionalUnitType[] floatVectorALUFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatVectorALU};
public static FunctionalUnitType[] floatVectorMulFUs = {FunctionalUnitType.floatVectorMul, FunctionalUnitType.FMA}; public static FunctionalUnitType[] floatVectorMulFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatVectorMul};
public static FunctionalUnitType[] FMAFUs = {FunctionalUnitType.FMA}; public static FunctionalUnitType[] FMAFUs = {FunctionalUnitType.FMA};
public static FunctionalUnitType[] VectorFMAFUs = {FunctionalUnitType.FMA}; public static FunctionalUnitType[] VectorFMAFUs = {FunctionalUnitType.FMA};
public static FunctionalUnitType[] loadFUs = {FunctionalUnitType.load}; public static FunctionalUnitType[] loadFUs = {FunctionalUnitType.load};

View File

@ -0,0 +1,8 @@
all:
javac -h . TAGESCLInvoker.java
#gcc -fPIC -I/usr/lib/jvm/java-8-openjdk-amd64/include/ -I/usr/lib/jvm/java-8-openjdk-amd64/include/linux/ -shared -o libnative.so pipeline_branchpredictor_TAGESCL_TAGESCLInvoker.cc
gcc -fPIC -I/usr/lib/jvm/java-11-openjdk-amd64/include/ -I/usr/lib/jvm/java-11-openjdk-amd64/include/linux/ -shared -o libnative.so pipeline_branchpredictor_TAGESCL_TAGESCLInvoker.cc
clean:
rm -f *.h
rm -f *.class
rm -f *.so

View File

@ -0,0 +1,32 @@
package pipeline.branchpredictor.TAGESCL;
import pipeline.ExecutionEngine;
import pipeline.branchpredictor.BranchPredictor;
public class TAGESCL extends BranchPredictor {
TAGESCLInvoker ti;
public TAGESCL(ExecutionEngine containingExecEngine)
{
super(containingExecEngine);
ti = new TAGESCLInvoker(containingExecEngine.getContainingCore().getCoreConfig().branchPredictor.TAGESCLLibDirectory);
}
public boolean predict(long address, boolean outcome)
{
return ti.invokerPredict(address);
}
public void Train(long address, boolean outcome, boolean predict)
{
misc.Error.showErrorAndExit("use the other Train() function");
//don't use this!!
//use Train(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
}
public void Train(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
{
ti.invokerTrain (PC, opType, resolveDir, predDir, branchTarget);
}
}

View File

@ -0,0 +1,25 @@
package pipeline.branchpredictor.TAGESCL;
public class TAGESCLInvoker {
private native void initialize();
private native boolean predict (long PC);
private native void train (long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget);
public TAGESCLInvoker(String TAGESCLLibDirectory)
{
System.load(TAGESCLLibDirectory + "/libnative.so");
initialize();
}
public boolean invokerPredict(long address)
{
return predict(address);
}
public void invokerTrain(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
{
train (PC, opType, resolveDir, predDir, branchTarget);
}
}

View File

@ -39,9 +39,7 @@ public class DecodeLogic extends SimulationElement {
public void performDecode() public void performDecode()
{ {
if(containingExecutionEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/ if(containingExecutionEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/)
|| containingExecutionEngine.isToStall1() == true /*IW full*/
|| containingExecutionEngine.isToStall2() == true /*rename stall*/)
{ {
return; return;
} }

View File

@ -1,6 +1,7 @@
package pipeline.outoforder; package pipeline.outoforder;
import config.SimulationConfig; import config.SimulationConfig;
import config.SystemConfig;
import emulatorinterface.translator.x86.registers.Registers; import emulatorinterface.translator.x86.registers.Registers;
import main.ArchitecturalComponent; import main.ArchitecturalComponent;
import main.CustomObjectPool; import main.CustomObjectPool;
@ -11,6 +12,7 @@ import generic.Core;
import generic.Event; import generic.Event;
import generic.EventQueue; import generic.EventQueue;
import generic.GenericCircularQueue; import generic.GenericCircularQueue;
import generic.GlobalClock;
import generic.Instruction; import generic.Instruction;
import generic.OperationType; import generic.OperationType;
import generic.PortType; import generic.PortType;
@ -25,12 +27,14 @@ public class FetchLogic extends SimulationElement {
GenericCircularQueue<Instruction>[] inputToPipeline; GenericCircularQueue<Instruction>[] inputToPipeline;
int inputPipeToReadNext; int inputPipeToReadNext;
ICacheBuffer iCacheBuffer; ICacheBuffer iCacheBuffer;
MicroOpCache microOpCache;
GenericCircularQueue<Instruction> fetchBuffer; GenericCircularQueue<Instruction> fetchBuffer;
int fetchWidth; int fetchWidth;
OperationType[] instructionsToBeDropped; OperationType[] instructionsToBeDropped;
boolean sleep; boolean sleep;
long serialNo; long serialNo;
long lastValidIPSeen = -1;
public FetchLogic(Core core, OutOrderExecutionEngine execEngine) public FetchLogic(Core core, OutOrderExecutionEngine execEngine)
{ {
@ -38,6 +42,7 @@ public class FetchLogic extends SimulationElement {
this.core = core; this.core = core;
this.execEngine = execEngine; this.execEngine = execEngine;
fetchBuffer = execEngine.getFetchBuffer(); fetchBuffer = execEngine.getFetchBuffer();
microOpCache = execEngine.getMicroOpCache();
fetchWidth = core.getDecodeWidth(); fetchWidth = core.getDecodeWidth();
inputPipeToReadNext = 0; inputPipeToReadNext = 0;
sleep = false; sleep = false;
@ -77,33 +82,33 @@ public class FetchLogic extends SimulationElement {
Instruction newInstruction; Instruction newInstruction;
if(!execEngine.isToStall1() && if(execEngine.isToStall5())
!execEngine.isToStall2() && return;
!execEngine.isToStall3() &&
!execEngine.isToStall4() &&
!execEngine.isToStall5() &&
!execEngine.isToStall6())
{
//add instructions, for whom "fetch" from iCache has completed, to fetch buffer
//decode stage reads from this buffer
for(int i = 0; i < fetchWidth; i++)
{
if(fetchBuffer.isFull() == true)
{
break;
}
newInstruction = iCacheBuffer.getNextInstruction(); //add instructions, for whom "fetch" from iCache has completed, to fetch buffer
if(newInstruction != null) //decode stage reads from this buffer
for(int i = 0; i < fetchWidth; i++)
{
if(fetchBuffer.isFull() == true)
{
break;
}
newInstruction = iCacheBuffer.getNextInstruction();
if(newInstruction != null)
{
fetchBuffer.enqueue(newInstruction);
if(SimulationConfig.debugMode)
{ {
fetchBuffer.enqueue(newInstruction); System.out.println("fetched : " + GlobalClock.getCurrentTime()/core.getStepSize() + " : " + newInstruction);
}
else
{
this.core.getExecEngine().incrementInstructionMemStall(1);
break;
} }
} }
else
{
this.core.getExecEngine().incrementInstructionMemStall(1);
break;
}
} }
//this loop reads from inputToPipeline and places the instruction in iCacheBuffer //this loop reads from inputToPipeline and places the instruction in iCacheBuffer
@ -191,10 +196,32 @@ public class FetchLogic extends SimulationElement {
{ {
// The first micro-operation of an instruction has a valid CISC IP. All the subsequent // The first micro-operation of an instruction has a valid CISC IP. All the subsequent
// micro-ops will have IP = -1(meaning invalid). We must not forward this requests to iCache. // micro-ops will have IP = -1(meaning invalid). We must not forward this requests to iCache.
if(newInstruction.getCISCProgramCounter()!=-1) // If the micro-ops are available in the micro-op cache, we don't need to access the i-cache
if(newInstruction.getCISCProgramCounter()!=-1 && newInstruction.getCISCProgramCounter() != lastValidIPSeen
&& microOpCache.isPresentInCache(newInstruction.getCISCProgramCounter()) == false)
{ {
execEngine.getCoreMemorySystem().issueRequestToInstrCache(newInstruction.getCISCProgramCounter()); execEngine.getCoreMemorySystem().issueRequestToInstrCache(newInstruction.getCISCProgramCounter());
} }
else
{
iCacheBuffer.fetchComplete[iCacheBuffer.tail] = true;
if(newInstruction.getCISCProgramCounter()==-1 || newInstruction.getCISCProgramCounter() == lastValidIPSeen)
microOpCache.isPresentInCache(lastValidIPSeen); //accessing micro-op cache just to get the micro-op cache LRU and counters right
}
if(newInstruction.getCISCProgramCounter()!=-1)
{
lastValidIPSeen = newInstruction.getCISCProgramCounter();
}
else
{
newInstruction.setCISCProgramCounter(lastValidIPSeen);
}
if(SimulationConfig.debugMode)
{
System.out.println("fetch_initiated : " + GlobalClock.getCurrentTime()/core.getStepSize() + " : " + newInstruction);
}
} }
} }
else else
@ -235,7 +262,8 @@ public class FetchLogic extends SimulationElement {
public void processCompletionOfMemRequest(long address) public void processCompletionOfMemRequest(long address)
{ {
iCacheBuffer.updateFetchComplete(address); int numberOfMicroOpsUpdated = iCacheBuffer.updateFetchComplete(address);
microOpCache.addToCache(address, numberOfMicroOpsUpdated);
} }
public GenericCircularQueue<Instruction>[] getInputToPipeline() { public GenericCircularQueue<Instruction>[] getInputToPipeline() {

View File

@ -78,21 +78,26 @@ public class ICacheBuffer {
return toBeReturned; return toBeReturned;
} }
public void updateFetchComplete(long programCounter) public int updateFetchComplete(long programCounter)
{ {
int numberOfMicroOpsUpdated = 0;
if(head == -1) if(head == -1)
return; return numberOfMicroOpsUpdated;
for(int i = head; ; i = (i + 1)%size) for(int i = head; ; i = (i + 1)%size)
{ {
if(buffer[i] != null && buffer[i].getCISCProgramCounter() == programCounter) if(buffer[i] != null && buffer[i].getCISCProgramCounter() == programCounter)
{ {
fetchComplete[i] = true; fetchComplete[i] = true;
numberOfMicroOpsUpdated++;
} }
if(i == tail) if(i == tail)
break; break;
} }
return numberOfMicroOpsUpdated;
} }
public boolean isFull() public boolean isFull()

View File

@ -16,7 +16,7 @@ public class IWPushLogic extends SimulationElement {
OutOrderExecutionEngine execEngine; OutOrderExecutionEngine execEngine;
GenericCircularQueue<ReorderBufferEntry> renameBuffer; GenericCircularQueue<ReorderBufferEntry> renameBuffer;
InstructionWindow IW; InstructionWindow IW;
int decodeWidth; int renameWidth;
public IWPushLogic(Core core, OutOrderExecutionEngine execEngine) public IWPushLogic(Core core, OutOrderExecutionEngine execEngine)
{ {
@ -25,7 +25,7 @@ public class IWPushLogic extends SimulationElement {
this.execEngine = execEngine; this.execEngine = execEngine;
renameBuffer = execEngine.getRenameBuffer(); renameBuffer = execEngine.getRenameBuffer();
IW = execEngine.getInstructionWindow(); IW = execEngine.getInstructionWindow();
decodeWidth = core.getDecodeWidth(); renameWidth = core.getRenameWidth();
} }
/* /*
@ -39,7 +39,7 @@ public class IWPushLogic extends SimulationElement {
return; return;
} }
for(int i = 0; i < decodeWidth; i++) for(int i = 0; i < renameWidth; i++)
{ {
ReorderBufferEntry headROBEntry = renameBuffer.peek(0); ReorderBufferEntry headROBEntry = renameBuffer.peek(0);
if(headROBEntry != null) if(headROBEntry != null)

View File

@ -0,0 +1,107 @@
package pipeline.outoforder;
import java.util.HashMap;
import java.util.Map;
import java.util.Vector;
import config.SimulationConfig;
import generic.Event;
import generic.EventQueue;
import generic.GlobalClock;
import generic.PortType;
import generic.SimulationElement;
public class MicroOpCache extends SimulationElement {
int maxSize; //in terms of number of micro-ops
int curSize;
HashMap<Long, MicroOpCacheEntry> uopCache;
public long numAdditions;
public long numSearches;
public long numHits;
public MicroOpCache(int maxSize) {
super(PortType.Unlimited, -1, -1, -1, -1);
this.maxSize = maxSize;
uopCache = new HashMap<Long, MicroOpCacheEntry>();
}
@Override
public void handleEvent(EventQueue eventQ, Event event) {
// TODO Auto-generated method stub
}
public boolean isPresentInCache(long searchPC) //will be called for each micro-op (and not each CISC instruction)
{
numSearches++;
MicroOpCacheEntry entry = uopCache.get(searchPC);
if(entry != null)
{
entry.timeLastUsed = GlobalClock.getCurrentTime();
numHits++;
if(SimulationConfig.debugMode)
{
System.out.println("hit in microp-cache : " + GlobalClock.getCurrentTime()/24 + " : " + Long.toHexString(searchPC));
}
return true;
}
return false;
}
public void addToCache(long newPC, int numberOfMicroOps)
{
if(uopCache.containsKey(newPC) == false)
{
//remove old entries to make place for the new one
while(curSize + numberOfMicroOps > maxSize)
{
//find LRU PC
long LRU_PC = -1;
MicroOpCacheEntry LRUEntry = null;
for(Map.Entry<Long, MicroOpCacheEntry> entry : uopCache.entrySet())
{
if(LRUEntry == null)
{
LRUEntry = entry.getValue();
LRU_PC = entry.getKey();
}
else
{
if(entry.getValue().timeLastUsed < LRUEntry.timeLastUsed)
{
LRUEntry = entry.getValue();
LRU_PC = entry.getKey();
}
}
}
//remove all micro-ops corresponding to LRU PC
uopCache.remove(LRU_PC);
curSize -= LRUEntry.numberOfMicroOps;
}
//add new micro-ops
MicroOpCacheEntry newEntry = new MicroOpCacheEntry();
newEntry.numberOfMicroOps = numberOfMicroOps;
newEntry.timeLastUsed = GlobalClock.getCurrentTime();
uopCache.put(newPC, newEntry);
curSize += numberOfMicroOps;
numAdditions += numberOfMicroOps;
if(SimulationConfig.debugMode)
{
System.out.println("add to microp-cache : " + GlobalClock.getCurrentTime()/24 + " : " + Long.toHexString(newPC));
}
}
}
}
class MicroOpCacheEntry
{
int numberOfMicroOps;
long timeLastUsed;
}

View File

@ -22,6 +22,7 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
//components of the execution engine //components of the execution engine
private ICacheBuffer iCacheBuffer; private ICacheBuffer iCacheBuffer;
private FetchLogic fetcher; private FetchLogic fetcher;
private MicroOpCache microOpCache;
private GenericCircularQueue<Instruction> fetchBuffer; private GenericCircularQueue<Instruction> fetchBuffer;
private DecodeLogic decoder; private DecodeLogic decoder;
private GenericCircularQueue<ReorderBufferEntry> decodeBuffer; private GenericCircularQueue<ReorderBufferEntry> decodeBuffer;
@ -81,11 +82,12 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
vectorRegisterFile = new RegisterFile(core, core.getVectorRegisterFileSize()); vectorRegisterFile = new RegisterFile(core, core.getVectorRegisterFileSize());
vectorRenameTable = new RenameTable(this, core.getNVectorArchitecturalRegisters(), core.getVectorRegisterFileSize(), vectorRegisterFile, core.getNo_of_input_pipes()); vectorRenameTable = new RenameTable(this, core.getNVectorArchitecturalRegisters(), core.getVectorRegisterFileSize(), vectorRegisterFile, core.getNo_of_input_pipes());
fetchBuffer = new GenericCircularQueue(Instruction.class, core.getDecodeWidth()); fetchBuffer = new GenericCircularQueue(Instruction.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
microOpCache = new MicroOpCache(core.getCoreConfig().NoOfMicroOpCacheEntries);
fetcher = new FetchLogic(core, this); fetcher = new FetchLogic(core, this);
decodeBuffer = new GenericCircularQueue(ReorderBufferEntry.class, core.getDecodeWidth()); decodeBuffer = new GenericCircularQueue(ReorderBufferEntry.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
decoder = new DecodeLogic(core, this); decoder = new DecodeLogic(core, this);
renameBuffer = new GenericCircularQueue(ReorderBufferEntry.class, core.getDecodeWidth()); renameBuffer = new GenericCircularQueue(ReorderBufferEntry.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
renamer = new RenameLogic(core, this); renamer = new RenameLogic(core, this);
IWPusher = new IWPushLogic(core, this); IWPusher = new IWPushLogic(core, this);
selector = new SelectLogic(core, this); selector = new SelectLogic(core, this);
@ -202,6 +204,10 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
return renameBuffer; return renameBuffer;
} }
public MicroOpCache getMicroOpCache() {
return microOpCache;
}
public FetchLogic getFetcher() { public FetchLogic getFetcher() {
return fetcher; return fetcher;
} }

View File

@ -23,7 +23,7 @@ public class RenameLogic extends SimulationElement {
OutOrderExecutionEngine execEngine; OutOrderExecutionEngine execEngine;
GenericCircularQueue<ReorderBufferEntry> decodeBuffer; GenericCircularQueue<ReorderBufferEntry> decodeBuffer;
GenericCircularQueue<ReorderBufferEntry> renameBuffer; GenericCircularQueue<ReorderBufferEntry> renameBuffer;
int decodeWidth; int renameWidth;
int threadID; int threadID;
Instruction instruction; Instruction instruction;
@ -37,18 +37,17 @@ public class RenameLogic extends SimulationElement {
this.execEngine = execEngine; this.execEngine = execEngine;
decodeBuffer = execEngine.getDecodeBuffer(); decodeBuffer = execEngine.getDecodeBuffer();
renameBuffer = execEngine.getRenameBuffer(); renameBuffer = execEngine.getRenameBuffer();
decodeWidth = core.getDecodeWidth(); renameWidth = core.getRenameWidth();
} }
public void performRename() public void performRename()
{ {
if(execEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/ if(execEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/)
|| execEngine.isToStall1() == true /*IW full*/)
{ {
return; return;
} }
for(int i = 0; i < decodeWidth; i++) for(int i = 0; i < renameWidth; i++)
{ {
if(renameBuffer.isFull() == true) if(renameBuffer.isFull() == true)
{ {

View File

@ -21,9 +21,11 @@ import java.io.OutputStreamWriter;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
import main.CustomObjectPool; import main.CustomObjectPool;
import pipeline.branchpredictor.TAGESCL.TAGESCL;
import config.EmulatorConfig; import config.EmulatorConfig;
import config.EnergyConfig; import config.EnergyConfig;
import config.SimulationConfig; import config.SimulationConfig;
import config.BranchPredictorConfig.BP;
public class ReorderBuffer extends SimulationElement{ public class ReorderBuffer extends SimulationElement{
@ -49,6 +51,8 @@ public class ReorderBuffer extends SimulationElement{
int stall6Count; int stall6Count;
long branchCount; long branchCount;
long mispredCount; long mispredCount;
public long predicateCount;
public long predicateMispredCount;
long jumpCount; long jumpCount;
long targetMispredCount; long targetMispredCount;
long lastValidIPSeen; long lastValidIPSeen;
@ -249,25 +253,44 @@ public class ReorderBuffer extends SimulationElement{
if(firstOpType == OperationType.branch) if(firstOpType == OperationType.branch)
{ {
//perform prediction //perform prediction
boolean prediction = execEngine.getBranchPredictor().predict( boolean prediction;
lastValidIPSeen, prediction = execEngine.getBranchPredictor().predict(
first.getInstruction().isBranchTaken()); lastValidIPSeen,
first.getInstruction().isBranchTaken());
if(prediction != first.getInstruction().isBranchTaken()) if(prediction != first.getInstruction().isBranchTaken())
{ {
if(SimulationConfig.debugMode)
{
System.out.println("branch mispredicted : " + firstInstruction.getSerialNo());
}
anyMispredictedBranch = true; anyMispredictedBranch = true;
mispredCount++; mispredCount++;
} }
this.execEngine.getBranchPredictor().incrementNumAccesses(1);
//train predictor //train predictor
execEngine.getBranchPredictor().Train( if(core.getCoreConfig().branchPredictor.predictorMode != BP.TAGE_SC_L)
lastValidIPSeen, {
firstInstruction.isBranchTaken(), execEngine.getBranchPredictor().Train(
prediction lastValidIPSeen,
); firstInstruction.isBranchTaken(),
this.execEngine.getBranchPredictor().incrementNumAccesses(1); prediction
);
}
else
{
((TAGESCL)execEngine.getBranchPredictor()).Train(
lastValidIPSeen,
9, //OPTYPE_JMP_DIRECT_COND,
firstInstruction.isBranchTaken(),
prediction,
firstInstruction.getBranchTargetAddress()
);
}
this.execEngine.getBTB().GHRTrain(firstInstruction.isBranchTaken()); this.execEngine.getBTB().GHRTrain(firstInstruction.isBranchTaken());
this.execEngine.getBranchPredictor().incrementNumAccesses(2);
branchCount++; branchCount++;
} }
@ -275,19 +298,75 @@ public class ReorderBuffer extends SimulationElement{
//jump operation //jump operation
if(firstOpType == OperationType.jump) if(firstOpType == OperationType.jump)
{ {
long actualTarget = firstInstruction.getBranchTargetAddress(); long actualTarget = first.getInstruction().getBranchTargetAddress();
long predictedTarget = this.execEngine.getBTB().BTBPredict(lastValidIPSeen); long predictedTarget = this.execEngine.getBTB().BTBPredict(lastValidIPSeen);
if(actualTarget != predictedTarget) if(actualTarget != predictedTarget)
{ {
if(SimulationConfig.debugMode)
{
System.out.println("jump target mispredicted : " + firstInstruction.getSerialNo());
}
anyMispredictedBranch = true; anyMispredictedBranch = true;
targetMispredCount++; targetMispredCount++;
} }
this.execEngine.getBTB().BTBTrain(lastValidIPSeen, actualTarget); this.execEngine.getBTB().BTBTrain(lastValidIPSeen, actualTarget);
if(core.getCoreConfig().branchPredictor.predictorMode == BP.TAGE_SC_L)
{
((TAGESCL)execEngine.getBranchPredictor()).Train(
lastValidIPSeen,
4, //OPTYPE_JMP_DIRECT_UNCOND,
true,
true,
firstInstruction.getBranchTargetAddress()
);
}
jumpCount++; jumpCount++;
} }
//predicate prediction
if(firstInstruction.isPredicate())
{
//perform prediction
boolean prediction = execEngine.getBranchPredictor().predict(
lastValidIPSeen,
!first.getInstruction().isPredicateAndNotExecuted());
if(prediction != !first.getInstruction().isPredicateAndNotExecuted())
{
if(SimulationConfig.debugMode)
{
System.out.println("predicate mispredicted : " + firstInstruction.getSerialNo());
}
anyMispredictedBranch = true;
predicateMispredCount++;
}
//train predictor
if(core.getCoreConfig().branchPredictor.predictorMode != BP.TAGE_SC_L)
{
execEngine.getBranchPredictor().Train(
lastValidIPSeen,
!firstInstruction.isPredicateAndNotExecuted(),
prediction
);
}
else
{
((TAGESCL)execEngine.getBranchPredictor()).Train(
lastValidIPSeen,
9, //OPTYPE_JMP_DIRECT_COND,
!firstInstruction.isPredicateAndNotExecuted(),
prediction,
firstInstruction.getBranchTargetAddress()
);
}
predicateCount++;
}
//Signal LSQ for committing the Instruction at the queue head //Signal LSQ for committing the Instruction at the queue head
if(firstOpType == OperationType.load || firstOpType == OperationType.store) if(firstOpType == OperationType.load || firstOpType == OperationType.store)
{ {
@ -648,6 +727,10 @@ public class ReorderBuffer extends SimulationElement{
return stall5Count; return stall5Count;
} }
public int getStall6Count() {
return stall6Count;
}
public long getBranchCount() { public long getBranchCount() {
return branchCount; return branchCount;
} }