predicate predictor calls added; LSQ logic was changed to keep stores longer so as to allow greater chance for LSQ forwarding; TAGE-SC-L added; micro op cache added; issue with widths of different stages fixed; bug fix related to looking for FMA unit before looking for floatALU/ floatMul/ floatVectorALU, floatVectorMul; causalityTool now works with java-11, ubuntu 20.04, pin-98332

This commit is contained in:
Rajshekar K K 2023-06-06 18:10:28 +05:30
parent b834fdbefe
commit 776bb9cbfe
24 changed files with 2290 additions and 118 deletions

View File

@ -24,7 +24,7 @@
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
using namespace std;
#include <fcntl.h>
#ifndef _WIN32
@ -688,7 +688,7 @@ VOID Instruction(INS ins, VOID *v) {
UINT32 memOperands = INS_MemoryOperandCount(ins);
if (INS_IsBranchOrCall(ins))//INS_IsIndirectBranchOrCall(ins))
if (INS_IsControlFlow(ins))//INS_IsBranchOrCall(ins))//INS_IsIndirectBranchOrCall(ins))
{
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR) BrnFun, IARG_THREAD_ID,
IARG_BRANCH_TARGET_ADDR, IARG_BRANCH_TAKEN, IARG_INST_PTR,

View File

@ -1,4 +1,4 @@
PIN_KIT ?=/home/rajshekar/softwares/pin-97554/
PIN_KIT ?=/home/rajshekar/softwares/pin-98332/
CXX=$(shell make PIN_ROOT=$(PIN_KIT) VAR=CXX -f pin_makefile print_var)
LINKER=$(shell make PIN_ROOT=$(PIN_KIT) VAR=LINKER -f pin_makefile print_var)
TOOL_CXXFLAGS=$(shell make PIN_ROOT=$(PIN_KIT) VAR=TOOL_CXXFLAGS -f pin_makefile print_var)
@ -50,6 +50,8 @@ $(BINDIR)/causalityTool.$(LIB_EXTENSION): $(BINDIR)/causalityTool.$(OBJ_EXTENSIO
$(BINDIR)/causalityTool.$(OBJ_EXTENSION): causalityTool.cpp $(COMMDIR)/IPCBase.h $(COMMDIR)/shm/shmem.h $(COMMDIR)/filePacket/filePacket.h $(COMMDIR)/shm/shmem.cc
$(CXX) $(TOOL_CXXFLAGS) $(COMM_INCLUDE) -c causalityTool.cpp ../../simulator/emulatorinterface/communication/shm/shmem.cc
mkdir $(JNIBINDIR)
mkdir $(BINDIR)
mv causalityTool.$(OBJ_EXTENSION) $(BINDIR)/causalityTool.$(OBJ_EXTENSION)
mv shmem.$(OBJ_EXTENSION) $(BINDIR)/shmem.$(OBJ_EXTENSION)
@ -58,16 +60,21 @@ $(BINDIR)/shmem.$(OBJ_EXTENSION): $(COMMDIR)/IPCBase.h $(COMMDIR)/shm/shmem.h $
################################ JNI stuff comes here ############################################
JNIPACKAGE = emulatorinterface.communication.shm.SharedMem
JNINCLUDE =-I/usr/lib/jvm/java-8-openjdk-amd64/include/linux -I/usr/lib/jvm/java-8-openjdk-amd64/include
#JNIPACKAGE = emulatorinterface.communication.shm.SharedMem #use this for java-8
JNIPACKAGE = ../../simulator/emulatorinterface/communication/shm/SharedMem.java #use this for java-11
#JNINCLUDE =-I/usr/lib/jvm/java-8-openjdk-amd64/include/linux -I/usr/lib/jvm/java-8-openjdk-amd64/include #use this for java-8
JNINCLUDE =-I/usr/lib/jvm/java-11-openjdk-amd64/include/linux -I/usr/lib/jvm/java-11-openjdk-amd64/include #use this for java-11
JNILinkingFlags = -shared -Wall $(POSITION_INDEPENDENCE)
JAVAH = javah -jni
#JAVAH = javah -jni #use this for java-8
JAVAH = javac #use this for java-11
$(JNIBINDIR)/libshmlib.$(LIB_EXTENSION): $(JNIBINDIR)/SharedMem.h $(COMMDIR)/shm/JNIShm.c $(COMMDIR)/common.h
$(shell $(JNICOMMAND))
$(JNIBINDIR)/SharedMem.h: $(TOPBINDIR)/emulatorinterface/communication/shm/SharedMem.class
$(JAVAH) -classpath $(TOPBINDIR) -o $(JNIBINDIR)/SharedMem.h $(JNIPACKAGE)
#$(JAVAH) -classpath $(TOPBINDIR) -o $(JNIBINDIR)/SharedMem.h $(JNIPACKAGE) #use this for java-8
$(JAVAH) -classpath $(TOPBINDIR) -h $(JNIBINDIR) $(JNIPACKAGE) #use this line and the next for java-11
mv $(JNIBINDIR)/emulatorinterface_communication_shm_SharedMem.h $(JNIBINDIR)/SharedMem.h
clean:
rm -rf $(BINDIR)/* $(JNIBINDIR)/*
rm -rf $(BINDIR) $(JNIBINDIR)

View File

@ -5,8 +5,9 @@ public class BranchPredictorConfig {
public int BHRsize;
public int saturating_bits;
public BP predictorMode;
public String TAGESCLLibDirectory;
public static enum BP {
NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GShare, GAg, GAp, PAg, PAp,TAGE
NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GShare, GAg, GAp, PAg, PAp,TAGE, TAGE_SC_L,
}
}

View File

@ -58,7 +58,9 @@ public class CoreConfig
public int STLBAccessPorts;
public int STLBPortOccupancy;
public int NoOfMicroOpCacheEntries;
public int DecodeWidth;
public int RenameWidth;
public int IssueWidth;
public int RetireWidth;
public int ROBSize;

View File

@ -481,6 +481,9 @@ public class XMLParser
core.STLBPortOccupancy = Integer.parseInt(getImmediateString("PortOccupancy", sTLBElmnt));
core.sTLBPower = getEnergyConfig(sTLBElmnt);
Element microOpCacheElmnt = (Element)(coreElmnt.getElementsByTagName("MicroOpCache")).item(0);
core.NoOfMicroOpCacheEntries = Integer.parseInt(getImmediateString("NumberOfMicroOps", microOpCacheElmnt));
Element decodeElmnt = (Element)(coreElmnt.getElementsByTagName("Decode")).item(0);
core.DecodeWidth = Integer.parseInt(getImmediateString("Width", decodeElmnt));
core.decodePower = getEnergyConfig(decodeElmnt);
@ -499,6 +502,7 @@ public class XMLParser
core.resultsBroadcastBusPower = getEnergyConfig(resultsBroadcastBusElmnt);
Element renameElmnt = (Element)(coreElmnt.getElementsByTagName("Rename")).item(0);
core.RenameWidth = Integer.parseInt(getImmediateString("Width", renameElmnt));
Element ratElmnt = (Element)(renameElmnt.getElementsByTagName("RAT")).item(0);
core.intRATPower = getEnergyConfig((Element)ratElmnt.getElementsByTagName("Integer").item(0));
@ -1066,9 +1070,14 @@ public class XMLParser
{
branchPredictor.predictorMode = BP.TAGE;
}
else if(tempStr.equalsIgnoreCase("TAGE-SC-L"))
{
branchPredictor.predictorMode = BP.TAGE_SC_L;
}
branchPredictor.PCBits = Integer.parseInt(getImmediateString("PCBits", predictorElmnt));
branchPredictor.BHRsize = Integer.parseInt(getImmediateString("BHRsize", predictorElmnt));
branchPredictor.saturating_bits = Integer.parseInt(getImmediateString("SaturatingBits", predictorElmnt));
branchPredictor.TAGESCLLibDirectory = getImmediateString("TAGESCLLibDirectory", predictorElmnt);
}
private static boolean setDirectoryCoherent(String immediateString) {

View File

@ -47,12 +47,12 @@ TDP = 15W
<!-- If store packets in a file option is set to true, this parameter indicates the basename for the trace files -->
<!--One trace file is maintained for each store. The name of trace file for core n is basename_n.gz-->
<!--We do not allow overwriting of trace files. So if a tracefile with same name is pre-existing, kindly rename it-->
<BasenameForTraceFiles>/home/rajshekar/tmp/gcc_trace</BasenameForTraceFiles>
<BasenameForTraceFiles>/home/rajshekar/projects/tejas/tests/test1_trace</BasenameForTraceFiles>
<PinTool>/home/rajshekar/softwares/pin-97554/</PinTool>
<PinTool>/home/rajshekar/softwares/pin-98332/</PinTool>
<PinInstrumentor>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-pin/causalityTool.so</PinInstrumentor>
<QemuTool>TODO/home/prathmesh/workspace/qemu/x86_64-linux-user/qemu-x86_64 /home/prathmesh/tmp/testQemu.o</QemuTool>
<ShmLibDirectory>/home/rajshekar/resources/tejas_configs/</ShmLibDirectory>
<ShmLibDirectory>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-comm/</ShmLibDirectory>
<GetBenchmarkPIDScript>/home/rajshekar_resources/tejas_configs/getBenchmarkPID.sh</GetBenchmarkPIDScript>
<KillEmulatorScript>/home/rajshekar/resources/tejas_configs/killAllDescendents.sh</KillEmulatorScript>
</Emulator>
@ -112,11 +112,12 @@ TDP = 15W
<PipelineType>outOfOrder</PipelineType> <!--inOrder,outOfOrder(set issue width for multi-issue in-order)-->
<BranchPredictor>
<Predictor_Mode>TAGE</Predictor_Mode> <!-- Legal Values are NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GAg, GAp, GShare, PAg, PAp, TAGE -->
<Predictor_Mode>TAGE</Predictor_Mode> <!-- Legal Values are NoPredictor, PerfectPredictor, AlwaysTaken, AlwaysNotTaken, Tournament, Bimodal, GAg, GAp, GShare, PAg, PAp, TAGE, TAGE-SC-L -->
<PCBits>8</PCBits>
<BHRsize>16</BHRsize>
<BranchMispredPenalty>17</BranchMispredPenalty> <!--Branch misprediction penalty--><!-- https://www.7-cpu.com/cpu/Skylake.html -->
<SaturatingBits>2</SaturatingBits>
<TAGESCLLibDirectory>/home/rajshekar/projects/tejas/workspace/Tejas/src/simulator/pipeline/branchpredictor/TAGESCL/</TAGESCLLibDirectory>
<LeakageEnergy>0.0178</LeakageEnergy>
<DynamicEnergy>0.0962</DynamicEnergy>
</BranchPredictor>
@ -165,6 +166,10 @@ TDP = 15W
<DynamicEnergy>0.06792852941</DynamicEnergy> <!-- TODO this number isn't right -->
</STLB>
<MicroOpCache>
<NumberOfMicroOps>2304</NumberOfMicroOps>
</MicroOpCache>
<Decode> <!--Instruction decode-->
<Width>6</Width>
<LeakageEnergy>0.0598</LeakageEnergy>
@ -172,6 +177,7 @@ TDP = 15W
</Decode>
<Rename>
<Width>6</Width>
<RAT>
<Integer>
<LeakageEnergy>0.0045</LeakageEnergy>

View File

@ -207,6 +207,10 @@ public class Core extends SimulationElement{
return coreConfig.DecodeWidth;
}
public int getRenameWidth() {
return coreConfig.RenameWidth;
}
public int getVectorRegisterFileSize() {
return coreConfig.VectorRegFileSize;
}

View File

@ -22,6 +22,7 @@ import memorysystem.nuca.NucaCache;
import memorysystem.nuca.NucaCache.NucaType;
import net.NocInterface;
import net.Router;
import pipeline.outoforder.OutOrderExecutionEngine;
import config.CoreConfig;
import config.EmulatorConfig;
import config.EnergyConfig;
@ -33,6 +34,7 @@ import emulatorinterface.translator.qemuTranslationCache.TranslatedInstructionCa
import dram.MainMemoryDRAMController;
import config.MainMemoryConfig;
import config.PipelineType;
public class Statistics {
@ -199,12 +201,22 @@ public class Statistics {
outputFileWriter.write("time taken\t=\t" + formatDouble((double)coreCyclesTaken[i]/GlobalClock.effectiveGlobalClockFrequency) + " microseconds\n");
outputFileWriter.write("\n");
if(cores[i].getCoreConfig().pipelineType == PipelineType.outOfOrder)
{
outputFileWriter.write("number of micro-op cache accesses = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numSearches + "\n");
outputFileWriter.write("micro-op cache hit rate = " + formatDouble((double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numHits/(double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getMicroOpCache().numSearches) + "\n");
outputFileWriter.write("\n");
}
outputFileWriter.write("number of branches\t=\t" + cores[i].getExecEngine().getNumberOfBranches() + "\n");
outputFileWriter.write("number of mispredicted branches\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedBranches() + "\n");
outputFileWriter.write("branch predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedBranches()/(double)cores[i].getExecEngine().getNumberOfBranches())*100.0)) + " %\n");
outputFileWriter.write("number of jumps\t=\t" + cores[i].getExecEngine().getNumberOfJumps() + "\n");
outputFileWriter.write("number of mispredicted jump targets\t=\t" + cores[i].getExecEngine().getNumberOfMispredictedTargets() + "\n");
outputFileWriter.write("target predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)cores[i].getExecEngine().getNumberOfMispredictedTargets()/(double)cores[i].getExecEngine().getNumberOfJumps())*100.0)) + " %\n");
outputFileWriter.write("number of predicate instructions\t=\t" + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateCount + "\n");
outputFileWriter.write("number of mispredicted predicate instructions\t=\t" + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateMispredCount + "\n");
outputFileWriter.write("predicate predictor accuracy\t=\t" + formatDouble((double)((double)(1.0 - (double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateMispredCount/(double)((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().predicateCount)*100.0)) + " %\n");
outputFileWriter.write("\n");
outputFileWriter.write("predictor type = " + coreConfig.branchPredictor.predictorMode + "\n");
@ -213,6 +225,14 @@ public class Statistics {
outputFileWriter.write("Saturating bits = " + coreConfig.branchPredictor.saturating_bits + "\n");
outputFileWriter.write("\n");
outputFileWriter.write("\nIW Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall1Count());
outputFileWriter.write("\nrename stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall2Count());
outputFileWriter.write("\nLSQ Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall3Count());
outputFileWriter.write("\nROB Full stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall4Count());
outputFileWriter.write("\nMispred stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall5Count());
outputFileWriter.write("\nSerialization instruction stall = " + ((OutOrderExecutionEngine)cores[i].getExecEngine()).getReorderBuffer().getStall6Count());
outputFileWriter.write("\n");
}
outputFileWriter.write("\n");
}

View File

@ -301,6 +301,13 @@ public class LSQ extends SimulationElement
}
public boolean isFull(boolean isLoad)
{
if(privIsFull(isLoad))
freeOneEntry(isLoad);
return privIsFull(isLoad);
}
private boolean privIsFull(boolean isLoad)
{
if(isLoad)
{
@ -495,52 +502,10 @@ committed
LSQEntry tmpEntry = lsqueue[i];
// if it is a store, send the request to the cache
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE)
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE
|| tmpEntry.getType() == LSQEntryType.LOAD && tmpEntry.isForwarded())
{
if(tmpEntry.isValid() == false)
{
misc.Error.showErrorAndExit("store not ready to be committed");
}
boolean requestIssued =
containingMemSys.issueRequestToL1Cache(RequestType.Cache_Write,
tmpEntry.getAddr());
if(requestIssued == false)
{
event.addEventTime(1);
event.getEventQ().addEvent(event);
break; //removals must be in-order : if u can't commit the operation at the head, u can't commit the ones that follow it
}
else
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumStoresInQ--;
tmpEntry.setRemoved(true);
}
}
//If it is a LOAD which has received its value
else if (tmpEntry.isForwarded())
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumLoadsInQ--;
tmpEntry.setRemoved(true);
tmpEntry.setCanBeRemoved(true);
}
//If it is a LOAD which has not yet received its value
@ -559,6 +524,82 @@ committed
//incrementNumAccesses(1);
}
public void freeOneEntry(boolean isLoadToBeRemoved)
{
boolean removedEnough = false;
while(removedEnough == false)
{
LSQEntry tmpEntry = lsqueue[head];
if(tmpEntry.isCanBeRemoved() == false)
{
return;
}
// if it is a store, send the request to the cache
if(tmpEntry.getType() == LSQEntry.LSQEntryType.STORE)
{
if(tmpEntry.isValid() == false)
{
misc.Error.showErrorAndExit("store not ready to be committed");
}
boolean requestIssued =
containingMemSys.issueRequestToL1Cache(RequestType.Cache_Write,
tmpEntry.getAddr());
if(requestIssued == false)
{
return; //removals must be in-order : if u can't commit the operation at the head, u can't commit the ones that follow it
}
else
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumStoresInQ--;
tmpEntry.setRemoved(true);
if(isLoadToBeRemoved == false)
removedEnough = true;
}
}
//If it is a LOAD which has received its value
else if (tmpEntry.isForwarded())
{
if(head == tail)
{
head = tail = -1;
}
else
{
this.head = this.incrementQ(this.head);
}
this.curNumLoadsInQ--;
tmpEntry.setRemoved(true);
if(isLoadToBeRemoved == true)
removedEnough = true;
}
//If it is a LOAD which has not yet received its value
else
{
System.err.println("Error in LSQ " +this.containingMemSys.coreID+ " : ROB sent commit for a load which has not received its value");
misc.Error.showErrorAndExit(tmpEntry.getIndexInQ() + " : load : " + tmpEntry.getAddr());
}
//incrementNumAccesses(1);
}
}
void incrementNumAccesses(int incrementBy)
{
numAccesses += incrementBy;

View File

@ -33,7 +33,7 @@ public class LSQEntry
private boolean valid;
private boolean issued;
private boolean forwarded;//Whether the load has got its value or not
private boolean canBeRemoved;
private boolean removed; //If the entry has been committed and removed from the LSQ
public enum LSQEntryType {LOAD, STORE};
@ -45,6 +45,7 @@ public class LSQEntry
valid = false;
issued = false;
forwarded = false;
canBeRemoved = false;
removed = true;
}
@ -54,6 +55,7 @@ public class LSQEntry
valid = false;
issued = false;
forwarded = false;
canBeRemoved = false;
removed = false;
}
@ -108,6 +110,14 @@ public class LSQEntry
this.forwarded = forwarded;
}
public boolean isCanBeRemoved() {
return canBeRemoved;
}
public void setCanBeRemoved(boolean canBeRemoved) {
this.canBeRemoved = canBeRemoved;
}
protected boolean isRemoved() {
return removed;
}

View File

@ -20,6 +20,7 @@ import pipeline.branchpredictor.PApPredictor;
import pipeline.branchpredictor.PerfectPredictor;
import pipeline.branchpredictor.TournamentPredictor;
import pipeline.branchpredictor.TAGE;
import pipeline.branchpredictor.TAGESCL.TAGESCL;
import pipeline.branchpredictor.BTB;
import generic.Core;
import generic.GenericCircularQueue;
@ -88,6 +89,8 @@ public abstract class ExecutionEngine {
this.branchPredictor = new TAGE(this,
coreConfig.branchPredictor.PCBits,
coreConfig.branchPredictor.saturating_bits);
else if(coreConfig.branchPredictor.predictorMode == BP.TAGE_SC_L)
this.branchPredictor = new TAGESCL(this);
BTB = new BTB(coreConfig.branchPredictor.PCBits, coreConfig.branchPredictor.BHRsize);
}

View File

@ -10,13 +10,13 @@ public class OpTypeToFUTypeMapping {
public static FunctionalUnitType[] intALUFUs = {FunctionalUnitType.integerALU};
public static FunctionalUnitType[] intMulFUs = {FunctionalUnitType.integerMul};
public static FunctionalUnitType[] intDivFUs = {FunctionalUnitType.integerDiv};
public static FunctionalUnitType[] floatALUFUs = {FunctionalUnitType.floatALU, FunctionalUnitType.FMA};
public static FunctionalUnitType[] floatMulFUs = {FunctionalUnitType.floatMul, FunctionalUnitType.FMA};
public static FunctionalUnitType[] floatALUFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatALU};
public static FunctionalUnitType[] floatMulFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatMul};
public static FunctionalUnitType[] floatDivFUs = {FunctionalUnitType.floatDiv};
public static FunctionalUnitType[] intVectorALUFUs = {FunctionalUnitType.integerVectorALU};
public static FunctionalUnitType[] intVectorMulFUs = {FunctionalUnitType.integerVectorMul};
public static FunctionalUnitType[] floatVectorALUFUs = {FunctionalUnitType.floatVectorALU, FunctionalUnitType.FMA};
public static FunctionalUnitType[] floatVectorMulFUs = {FunctionalUnitType.floatVectorMul, FunctionalUnitType.FMA};
public static FunctionalUnitType[] floatVectorALUFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatVectorALU};
public static FunctionalUnitType[] floatVectorMulFUs = {FunctionalUnitType.FMA, FunctionalUnitType.floatVectorMul};
public static FunctionalUnitType[] FMAFUs = {FunctionalUnitType.FMA};
public static FunctionalUnitType[] VectorFMAFUs = {FunctionalUnitType.FMA};
public static FunctionalUnitType[] loadFUs = {FunctionalUnitType.load};

View File

@ -0,0 +1,8 @@
all:
javac -h . TAGESCLInvoker.java
#gcc -fPIC -I/usr/lib/jvm/java-8-openjdk-amd64/include/ -I/usr/lib/jvm/java-8-openjdk-amd64/include/linux/ -shared -o libnative.so pipeline_branchpredictor_TAGESCL_TAGESCLInvoker.cc
gcc -fPIC -I/usr/lib/jvm/java-11-openjdk-amd64/include/ -I/usr/lib/jvm/java-11-openjdk-amd64/include/linux/ -shared -o libnative.so pipeline_branchpredictor_TAGESCL_TAGESCLInvoker.cc
clean:
rm -f *.h
rm -f *.class
rm -f *.so

View File

@ -0,0 +1,32 @@
package pipeline.branchpredictor.TAGESCL;
import pipeline.ExecutionEngine;
import pipeline.branchpredictor.BranchPredictor;
public class TAGESCL extends BranchPredictor {
TAGESCLInvoker ti;
public TAGESCL(ExecutionEngine containingExecEngine)
{
super(containingExecEngine);
ti = new TAGESCLInvoker(containingExecEngine.getContainingCore().getCoreConfig().branchPredictor.TAGESCLLibDirectory);
}
public boolean predict(long address, boolean outcome)
{
return ti.invokerPredict(address);
}
public void Train(long address, boolean outcome, boolean predict)
{
misc.Error.showErrorAndExit("use the other Train() function");
//don't use this!!
//use Train(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
}
public void Train(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
{
ti.invokerTrain (PC, opType, resolveDir, predDir, branchTarget);
}
}

View File

@ -0,0 +1,25 @@
package pipeline.branchpredictor.TAGESCL;
public class TAGESCLInvoker {
private native void initialize();
private native boolean predict (long PC);
private native void train (long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget);
public TAGESCLInvoker(String TAGESCLLibDirectory)
{
System.load(TAGESCLLibDirectory + "/libnative.so");
initialize();
}
public boolean invokerPredict(long address)
{
return predict(address);
}
public void invokerTrain(long PC, int opType, boolean resolveDir, boolean predDir, long branchTarget)
{
train (PC, opType, resolveDir, predDir, branchTarget);
}
}

View File

@ -39,9 +39,7 @@ public class DecodeLogic extends SimulationElement {
public void performDecode()
{
if(containingExecutionEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/
|| containingExecutionEngine.isToStall1() == true /*IW full*/
|| containingExecutionEngine.isToStall2() == true /*rename stall*/)
if(containingExecutionEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/)
{
return;
}

View File

@ -1,6 +1,7 @@
package pipeline.outoforder;
import config.SimulationConfig;
import config.SystemConfig;
import emulatorinterface.translator.x86.registers.Registers;
import main.ArchitecturalComponent;
import main.CustomObjectPool;
@ -11,6 +12,7 @@ import generic.Core;
import generic.Event;
import generic.EventQueue;
import generic.GenericCircularQueue;
import generic.GlobalClock;
import generic.Instruction;
import generic.OperationType;
import generic.PortType;
@ -25,12 +27,14 @@ public class FetchLogic extends SimulationElement {
GenericCircularQueue<Instruction>[] inputToPipeline;
int inputPipeToReadNext;
ICacheBuffer iCacheBuffer;
MicroOpCache microOpCache;
GenericCircularQueue<Instruction> fetchBuffer;
int fetchWidth;
OperationType[] instructionsToBeDropped;
boolean sleep;
long serialNo;
long lastValidIPSeen = -1;
public FetchLogic(Core core, OutOrderExecutionEngine execEngine)
{
@ -38,6 +42,7 @@ public class FetchLogic extends SimulationElement {
this.core = core;
this.execEngine = execEngine;
fetchBuffer = execEngine.getFetchBuffer();
microOpCache = execEngine.getMicroOpCache();
fetchWidth = core.getDecodeWidth();
inputPipeToReadNext = 0;
sleep = false;
@ -77,13 +82,9 @@ public class FetchLogic extends SimulationElement {
Instruction newInstruction;
if(!execEngine.isToStall1() &&
!execEngine.isToStall2() &&
!execEngine.isToStall3() &&
!execEngine.isToStall4() &&
!execEngine.isToStall5() &&
!execEngine.isToStall6())
{
if(execEngine.isToStall5())
return;
//add instructions, for whom "fetch" from iCache has completed, to fetch buffer
//decode stage reads from this buffer
for(int i = 0; i < fetchWidth; i++)
@ -97,6 +98,11 @@ public class FetchLogic extends SimulationElement {
if(newInstruction != null)
{
fetchBuffer.enqueue(newInstruction);
if(SimulationConfig.debugMode)
{
System.out.println("fetched : " + GlobalClock.getCurrentTime()/core.getStepSize() + " : " + newInstruction);
}
}
else
{
@ -104,7 +110,6 @@ public class FetchLogic extends SimulationElement {
break;
}
}
}
//this loop reads from inputToPipeline and places the instruction in iCacheBuffer
//fetch of the instruction is also issued to the iCache
@ -191,10 +196,32 @@ public class FetchLogic extends SimulationElement {
{
// The first micro-operation of an instruction has a valid CISC IP. All the subsequent
// micro-ops will have IP = -1(meaning invalid). We must not forward this requests to iCache.
if(newInstruction.getCISCProgramCounter()!=-1)
// If the micro-ops are available in the micro-op cache, we don't need to access the i-cache
if(newInstruction.getCISCProgramCounter()!=-1 && newInstruction.getCISCProgramCounter() != lastValidIPSeen
&& microOpCache.isPresentInCache(newInstruction.getCISCProgramCounter()) == false)
{
execEngine.getCoreMemorySystem().issueRequestToInstrCache(newInstruction.getCISCProgramCounter());
}
else
{
iCacheBuffer.fetchComplete[iCacheBuffer.tail] = true;
if(newInstruction.getCISCProgramCounter()==-1 || newInstruction.getCISCProgramCounter() == lastValidIPSeen)
microOpCache.isPresentInCache(lastValidIPSeen); //accessing micro-op cache just to get the micro-op cache LRU and counters right
}
if(newInstruction.getCISCProgramCounter()!=-1)
{
lastValidIPSeen = newInstruction.getCISCProgramCounter();
}
else
{
newInstruction.setCISCProgramCounter(lastValidIPSeen);
}
if(SimulationConfig.debugMode)
{
System.out.println("fetch_initiated : " + GlobalClock.getCurrentTime()/core.getStepSize() + " : " + newInstruction);
}
}
}
else
@ -235,7 +262,8 @@ public class FetchLogic extends SimulationElement {
public void processCompletionOfMemRequest(long address)
{
iCacheBuffer.updateFetchComplete(address);
int numberOfMicroOpsUpdated = iCacheBuffer.updateFetchComplete(address);
microOpCache.addToCache(address, numberOfMicroOpsUpdated);
}
public GenericCircularQueue<Instruction>[] getInputToPipeline() {

View File

@ -78,21 +78,26 @@ public class ICacheBuffer {
return toBeReturned;
}
public void updateFetchComplete(long programCounter)
public int updateFetchComplete(long programCounter)
{
int numberOfMicroOpsUpdated = 0;
if(head == -1)
return;
return numberOfMicroOpsUpdated;
for(int i = head; ; i = (i + 1)%size)
{
if(buffer[i] != null && buffer[i].getCISCProgramCounter() == programCounter)
{
fetchComplete[i] = true;
numberOfMicroOpsUpdated++;
}
if(i == tail)
break;
}
return numberOfMicroOpsUpdated;
}
public boolean isFull()

View File

@ -16,7 +16,7 @@ public class IWPushLogic extends SimulationElement {
OutOrderExecutionEngine execEngine;
GenericCircularQueue<ReorderBufferEntry> renameBuffer;
InstructionWindow IW;
int decodeWidth;
int renameWidth;
public IWPushLogic(Core core, OutOrderExecutionEngine execEngine)
{
@ -25,7 +25,7 @@ public class IWPushLogic extends SimulationElement {
this.execEngine = execEngine;
renameBuffer = execEngine.getRenameBuffer();
IW = execEngine.getInstructionWindow();
decodeWidth = core.getDecodeWidth();
renameWidth = core.getRenameWidth();
}
/*
@ -39,7 +39,7 @@ public class IWPushLogic extends SimulationElement {
return;
}
for(int i = 0; i < decodeWidth; i++)
for(int i = 0; i < renameWidth; i++)
{
ReorderBufferEntry headROBEntry = renameBuffer.peek(0);
if(headROBEntry != null)

View File

@ -0,0 +1,107 @@
package pipeline.outoforder;
import java.util.HashMap;
import java.util.Map;
import java.util.Vector;
import config.SimulationConfig;
import generic.Event;
import generic.EventQueue;
import generic.GlobalClock;
import generic.PortType;
import generic.SimulationElement;
public class MicroOpCache extends SimulationElement {
int maxSize; //in terms of number of micro-ops
int curSize;
HashMap<Long, MicroOpCacheEntry> uopCache;
public long numAdditions;
public long numSearches;
public long numHits;
public MicroOpCache(int maxSize) {
super(PortType.Unlimited, -1, -1, -1, -1);
this.maxSize = maxSize;
uopCache = new HashMap<Long, MicroOpCacheEntry>();
}
@Override
public void handleEvent(EventQueue eventQ, Event event) {
// TODO Auto-generated method stub
}
public boolean isPresentInCache(long searchPC) //will be called for each micro-op (and not each CISC instruction)
{
numSearches++;
MicroOpCacheEntry entry = uopCache.get(searchPC);
if(entry != null)
{
entry.timeLastUsed = GlobalClock.getCurrentTime();
numHits++;
if(SimulationConfig.debugMode)
{
System.out.println("hit in microp-cache : " + GlobalClock.getCurrentTime()/24 + " : " + Long.toHexString(searchPC));
}
return true;
}
return false;
}
public void addToCache(long newPC, int numberOfMicroOps)
{
if(uopCache.containsKey(newPC) == false)
{
//remove old entries to make place for the new one
while(curSize + numberOfMicroOps > maxSize)
{
//find LRU PC
long LRU_PC = -1;
MicroOpCacheEntry LRUEntry = null;
for(Map.Entry<Long, MicroOpCacheEntry> entry : uopCache.entrySet())
{
if(LRUEntry == null)
{
LRUEntry = entry.getValue();
LRU_PC = entry.getKey();
}
else
{
if(entry.getValue().timeLastUsed < LRUEntry.timeLastUsed)
{
LRUEntry = entry.getValue();
LRU_PC = entry.getKey();
}
}
}
//remove all micro-ops corresponding to LRU PC
uopCache.remove(LRU_PC);
curSize -= LRUEntry.numberOfMicroOps;
}
//add new micro-ops
MicroOpCacheEntry newEntry = new MicroOpCacheEntry();
newEntry.numberOfMicroOps = numberOfMicroOps;
newEntry.timeLastUsed = GlobalClock.getCurrentTime();
uopCache.put(newPC, newEntry);
curSize += numberOfMicroOps;
numAdditions += numberOfMicroOps;
if(SimulationConfig.debugMode)
{
System.out.println("add to microp-cache : " + GlobalClock.getCurrentTime()/24 + " : " + Long.toHexString(newPC));
}
}
}
}
class MicroOpCacheEntry
{
int numberOfMicroOps;
long timeLastUsed;
}

View File

@ -22,6 +22,7 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
//components of the execution engine
private ICacheBuffer iCacheBuffer;
private FetchLogic fetcher;
private MicroOpCache microOpCache;
private GenericCircularQueue<Instruction> fetchBuffer;
private DecodeLogic decoder;
private GenericCircularQueue<ReorderBufferEntry> decodeBuffer;
@ -81,11 +82,12 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
vectorRegisterFile = new RegisterFile(core, core.getVectorRegisterFileSize());
vectorRenameTable = new RenameTable(this, core.getNVectorArchitecturalRegisters(), core.getVectorRegisterFileSize(), vectorRegisterFile, core.getNo_of_input_pipes());
fetchBuffer = new GenericCircularQueue(Instruction.class, core.getDecodeWidth());
fetchBuffer = new GenericCircularQueue(Instruction.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
microOpCache = new MicroOpCache(core.getCoreConfig().NoOfMicroOpCacheEntries);
fetcher = new FetchLogic(core, this);
decodeBuffer = new GenericCircularQueue(ReorderBufferEntry.class, core.getDecodeWidth());
decodeBuffer = new GenericCircularQueue(ReorderBufferEntry.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
decoder = new DecodeLogic(core, this);
renameBuffer = new GenericCircularQueue(ReorderBufferEntry.class, core.getDecodeWidth());
renameBuffer = new GenericCircularQueue(ReorderBufferEntry.class, (core.getDecodeWidth()>core.getRenameWidth()?core.getDecodeWidth():core.getRenameWidth()));
renamer = new RenameLogic(core, this);
IWPusher = new IWPushLogic(core, this);
selector = new SelectLogic(core, this);
@ -202,6 +204,10 @@ public class OutOrderExecutionEngine extends ExecutionEngine {
return renameBuffer;
}
public MicroOpCache getMicroOpCache() {
return microOpCache;
}
public FetchLogic getFetcher() {
return fetcher;
}

View File

@ -23,7 +23,7 @@ public class RenameLogic extends SimulationElement {
OutOrderExecutionEngine execEngine;
GenericCircularQueue<ReorderBufferEntry> decodeBuffer;
GenericCircularQueue<ReorderBufferEntry> renameBuffer;
int decodeWidth;
int renameWidth;
int threadID;
Instruction instruction;
@ -37,18 +37,17 @@ public class RenameLogic extends SimulationElement {
this.execEngine = execEngine;
decodeBuffer = execEngine.getDecodeBuffer();
renameBuffer = execEngine.getRenameBuffer();
decodeWidth = core.getDecodeWidth();
renameWidth = core.getRenameWidth();
}
public void performRename()
{
if(execEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/
|| execEngine.isToStall1() == true /*IW full*/)
if(execEngine.isToStall5() == true /*pipeline stalled due to branch mis-prediction*/)
{
return;
}
for(int i = 0; i < decodeWidth; i++)
for(int i = 0; i < renameWidth; i++)
{
if(renameBuffer.isFull() == true)
{

View File

@ -21,9 +21,11 @@ import java.io.OutputStreamWriter;
import java.util.zip.GZIPOutputStream;
import main.CustomObjectPool;
import pipeline.branchpredictor.TAGESCL.TAGESCL;
import config.EmulatorConfig;
import config.EnergyConfig;
import config.SimulationConfig;
import config.BranchPredictorConfig.BP;
public class ReorderBuffer extends SimulationElement{
@ -49,6 +51,8 @@ public class ReorderBuffer extends SimulationElement{
int stall6Count;
long branchCount;
long mispredCount;
public long predicateCount;
public long predicateMispredCount;
long jumpCount;
long targetMispredCount;
long lastValidIPSeen;
@ -249,25 +253,44 @@ public class ReorderBuffer extends SimulationElement{
if(firstOpType == OperationType.branch)
{
//perform prediction
boolean prediction = execEngine.getBranchPredictor().predict(
boolean prediction;
prediction = execEngine.getBranchPredictor().predict(
lastValidIPSeen,
first.getInstruction().isBranchTaken());
if(prediction != first.getInstruction().isBranchTaken())
{
if(SimulationConfig.debugMode)
{
System.out.println("branch mispredicted : " + firstInstruction.getSerialNo());
}
anyMispredictedBranch = true;
mispredCount++;
}
this.execEngine.getBranchPredictor().incrementNumAccesses(1);
//train predictor
if(core.getCoreConfig().branchPredictor.predictorMode != BP.TAGE_SC_L)
{
execEngine.getBranchPredictor().Train(
lastValidIPSeen,
firstInstruction.isBranchTaken(),
prediction
);
this.execEngine.getBranchPredictor().incrementNumAccesses(1);
}
else
{
((TAGESCL)execEngine.getBranchPredictor()).Train(
lastValidIPSeen,
9, //OPTYPE_JMP_DIRECT_COND,
firstInstruction.isBranchTaken(),
prediction,
firstInstruction.getBranchTargetAddress()
);
}
this.execEngine.getBTB().GHRTrain(firstInstruction.isBranchTaken());
this.execEngine.getBranchPredictor().incrementNumAccesses(2);
branchCount++;
}
@ -275,19 +298,75 @@ public class ReorderBuffer extends SimulationElement{
//jump operation
if(firstOpType == OperationType.jump)
{
long actualTarget = firstInstruction.getBranchTargetAddress();
long actualTarget = first.getInstruction().getBranchTargetAddress();
long predictedTarget = this.execEngine.getBTB().BTBPredict(lastValidIPSeen);
if(actualTarget != predictedTarget)
{
if(SimulationConfig.debugMode)
{
System.out.println("jump target mispredicted : " + firstInstruction.getSerialNo());
}
anyMispredictedBranch = true;
targetMispredCount++;
}
this.execEngine.getBTB().BTBTrain(lastValidIPSeen, actualTarget);
if(core.getCoreConfig().branchPredictor.predictorMode == BP.TAGE_SC_L)
{
((TAGESCL)execEngine.getBranchPredictor()).Train(
lastValidIPSeen,
4, //OPTYPE_JMP_DIRECT_UNCOND,
true,
true,
firstInstruction.getBranchTargetAddress()
);
}
jumpCount++;
}
//predicate prediction
if(firstInstruction.isPredicate())
{
//perform prediction
boolean prediction = execEngine.getBranchPredictor().predict(
lastValidIPSeen,
!first.getInstruction().isPredicateAndNotExecuted());
if(prediction != !first.getInstruction().isPredicateAndNotExecuted())
{
if(SimulationConfig.debugMode)
{
System.out.println("predicate mispredicted : " + firstInstruction.getSerialNo());
}
anyMispredictedBranch = true;
predicateMispredCount++;
}
//train predictor
if(core.getCoreConfig().branchPredictor.predictorMode != BP.TAGE_SC_L)
{
execEngine.getBranchPredictor().Train(
lastValidIPSeen,
!firstInstruction.isPredicateAndNotExecuted(),
prediction
);
}
else
{
((TAGESCL)execEngine.getBranchPredictor()).Train(
lastValidIPSeen,
9, //OPTYPE_JMP_DIRECT_COND,
!firstInstruction.isPredicateAndNotExecuted(),
prediction,
firstInstruction.getBranchTargetAddress()
);
}
predicateCount++;
}
//Signal LSQ for committing the Instruction at the queue head
if(firstOpType == OperationType.load || firstOpType == OperationType.store)
{
@ -648,6 +727,10 @@ public class ReorderBuffer extends SimulationElement{
return stall5Count;
}
public int getStall6Count() {
return stall6Count;
}
public long getBranchCount() {
return branchCount;
}