support added for predicated instructions that are not executed

This commit is contained in:
Rajshekar K K 2021-01-20 11:50:21 +05:30
parent 98a305645d
commit fa8251d31d
11 changed files with 534 additions and 417 deletions

View File

@ -505,7 +505,7 @@ VOID BarrierInit(ADDRINT first_arg, ADDRINT val, UINT32 encode, THREADID tid) {
} }
} }
/*** This function is called on every instruction ***/ /*** This function is called on every instruction ***/
VOID printip(THREADID tid, VOID *ip, char *asmString) { VOID printip(THREADID tid, VOID *ip, char *asmString, bool predicateInsnNotExecuted) {
if(outOfROIPhase) if(outOfROIPhase)
{ {
@ -612,9 +612,18 @@ VOID printip(THREADID tid, VOID *ip, char *asmString) {
uint64_t nip = MASK & (uint64_t) ip; uint64_t nip = MASK & (uint64_t) ip;
if(traceMethod==SharedMemory) { if(traceMethod==SharedMemory) {
if(predicateInsnNotExecuted)
{
while (tst->analysisFn(tid, nip, PREDICATE_INSN_NOT_EXECUTED, 1) == -1) {
PIN_Yield();
}
}
else
{
while (tst->analysisFn(tid, nip, INSTRUCTION, 1) == -1) { while (tst->analysisFn(tid, nip, INSTRUCTION, 1) == -1) {
PIN_Yield(); PIN_Yield();
} }
}
} else if(traceMethod==File) { } else if(traceMethod==File) {
while (tst->analysisFnAssembly(tid, nip, ASSEMBLY, asmString) == -1) { while (tst->analysisFnAssembly(tid, nip, ASSEMBLY, asmString) == -1) {
PIN_Yield(); PIN_Yield();
@ -674,7 +683,7 @@ VOID Instruction(INS ins, VOID *v) {
asmChar = (char *)asmString->c_str(); asmChar = (char *)asmString->c_str();
} }
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)printip, IARG_THREAD_ID, IARG_INST_PTR, IARG_PTR, asmChar, IARG_END); INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)printip, IARG_THREAD_ID, IARG_INST_PTR, IARG_PTR, asmChar, IARG_EXECUTING, IARG_END);
UINT32 memOperands = INS_MemoryOperandCount(ins); UINT32 memOperands = INS_MemoryOperandCount(ins);

View File

@ -31,5 +31,7 @@
#define PARENT_SPAWN 35 #define PARENT_SPAWN 35
#define CHILD_START 36 #define CHILD_START 36
#define PREDICATE_INSN_NOT_EXECUTED 37
const char* findType(int type); const char* findType(int type);

View File

@ -19,9 +19,11 @@
Contributors: Moksh Upadhyay, Abhishek Sagar, Prathmesh Kallurkar Contributors: Moksh Upadhyay, Abhishek Sagar, Prathmesh Kallurkar
***************************************************************************** *****************************************************************************
based on Intel® Core™ i7-7820X X-series Processor based on Intel® Core™ i5-7200U (Kabylake) Processor
TDP = 112W https://en.wikichip.org/wiki/intel/core_i5/i5-7200u
52.5 mm x 45 mm https://www.agner.org/optimize/instruction_tables.pdf
TDP = 15W
42 mm * 24 mm
/--> /-->
<Configuration> <Configuration>
<Emulator> <Emulator>
@ -45,13 +47,14 @@ TDP = 112W
<!-- If store packets in a file option is set to true, this parameter indicates the basename for the trace files --> <!-- If store packets in a file option is set to true, this parameter indicates the basename for the trace files -->
<!--One trace file is maintained for each store. The name of trace file for core n is basename_n.gz--> <!--One trace file is maintained for each store. The name of trace file for core n is basename_n.gz-->
<!--We do not allow overwriting of trace files. So if a tracefile with same name is pre-existing, kindly rename it--> <!--We do not allow overwriting of trace files. So if a tracefile with same name is pre-existing, kindly rename it-->
<BasenameForTraceFiles>/home/prathmesh/tejasupdate/Tejas-dram/test/helloworld_trace</BasenameForTraceFiles> <BasenameForTraceFiles>/home/rajshekar/tmp/gcc_trace</BasenameForTraceFiles>
<PinTool>/home/rajshekar/softwares/pin-97554/</PinTool> <PinTool>/home/rajshekar/softwares/pin-97554/</PinTool>
<PinInstrumentor>/home/rajshekar/projects/nvms/workspace/Tejas/src/emulator/pin/obj-pin/causalityTool.so</PinInstrumentor> <PinInstrumentor>/home/rajshekar/projects/tejas/workspace/Tejas/src/emulator/pin/obj-pin/causalityTool.so</PinInstrumentor>
<QemuTool>TODO/home/prathmesh/workspace/qemu/x86_64-linux-user/qemu-x86_64 /home/prathmesh/tmp/testQemu.o</QemuTool> <QemuTool>TODO/home/prathmesh/workspace/qemu/x86_64-linux-user/qemu-x86_64 /home/prathmesh/tmp/testQemu.o</QemuTool>
<ShmLibDirectory>/home/rajshekar/projects/nvms/workspace/Tejas/src/emulator/pin/obj-comm</ShmLibDirectory> <ShmLibDirectory>/home/rajshekar/resources/tejas_configs/</ShmLibDirectory>
<KillEmulatorScript>/home/rajshekar/projects/nvms/workspace/Tejas/src/simulator/main/killAllDescendents.sh</KillEmulatorScript> <GetBenchmarkPIDScript>/home/rajshekar_resources/tejas_configs/getBenchmarkPID.sh</GetBenchmarkPIDScript>
<KillEmulatorScript>/home/rajshekar/resources/tejas_configs/killAllDescendents.sh</KillEmulatorScript>
</Emulator> </Emulator>
<!--Simulation Parameters--> <!--Simulation Parameters-->
@ -71,23 +74,23 @@ TDP = 112W
<DetachMemSysInsn>false</DetachMemSysInsn> <DetachMemSysInsn>false</DetachMemSysInsn>
<PrintPowerStats>true</PrintPowerStats> <PrintPowerStats>true</PrintPowerStats>
<Broadcast>false</Broadcast> <Broadcast>false</Broadcast>
<pinpointsSim>true</pinpointsSim> <pinpointsSim>false</pinpointsSim>
<pinpointsFile>/home/rajshekar/benchmarks/cpu2006/PinPoints/perlbench.ref.t.sorted</pinpointsFile> <pinpointsFile>/mnt/srishtistr0/scratch/rajshekar/tejas/PinPoints_working_directory/soplex.test.Data/t.sorted</pinpointsFile>
<NumInsToIgnore>0</NumInsToIgnore> <!--Ignores these many profilable instructions from the start of the program--> <NumInsToIgnore>00000000</NumInsToIgnore> <!--Ignores these many profilable instructions from the start of the program-->
<subsetSim>true</subsetSim> <subsetSim>false</subsetSim>
<subsetSimSize>1000000000</subsetSimSize> <subsetSimSize>2000000</subsetSimSize>
<markerFunctions>false</markerFunctions> <markerFunctions>false</markerFunctions>
<startSimMarker>XXX_startInstrumentation</startSimMarker> <startSimMarker>add</startSimMarker>
<endSimMarker>XXX_endInstrumentation</endSimMarker> <endSimMarker>sub</endSimMarker>
<NumCores>8</NumCores> <NumCores>2</NumCores>
</Simulation> </Simulation>
<!--System Parameters--> <!--System Parameters-->
<System> <System>
<MainMemory> <MainMemory>
<MemControllerToUse>SIMPLE</MemControllerToUse> <!-- Set the value as DRAM to enable DRAM else use SIMPLE to disable DRAM --> <MemControllerToUse>SIMPLE</MemControllerToUse> <!-- Set the value as DRAM to enable DRAM else use SIMPLE to disable DRAM -->
<MainMemoryLatency>100</MainMemoryLatency> <!--The latency of main memory (in clock cycles)--> <MainMemoryLatency>132</MainMemoryLatency> <!--The latency of main memory (in clock cycles)-->
<MainMemoryFrequency>2100</MainMemoryFrequency> <!--Operating frequency of the main memory (in MHz)--> <MainMemoryFrequency>2400</MainMemoryFrequency> <!--Operating frequency of the main memory (in MHz)-->
<MainMemoryPortType>FCFS</MainMemoryPortType> <!--Type of access ports in the Main Memory (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)--> <MainMemoryPortType>FCFS</MainMemoryPortType> <!--Type of access ports in the Main Memory (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<MainMemoryAccessPorts>1</MainMemoryAccessPorts> <!--Number of access ports in the Main Memory--> <MainMemoryAccessPorts>1</MainMemoryAccessPorts> <!--Number of access ports in the Main Memory-->
<MainMemoryPortOccupancy>1</MainMemoryPortOccupancy> <!--The occupancy of the Main Memory ports (in clock cycles)--> <MainMemoryPortOccupancy>1</MainMemoryPortOccupancy> <!--The occupancy of the Main Memory ports (in clock cycles)-->
@ -104,8 +107,8 @@ TDP = 112W
<!--Core Parameters--> <!--Core Parameters-->
<Core> <Core>
<CoreNumber>0-7</CoreNumber> <CoreNumber>0-1</CoreNumber>
<CoreFrequency>4200</CoreFrequency> <!--Operating frequency of the core (in MHz)--> <CoreFrequency>2500</CoreFrequency> <!--Operating frequency of the core (in MHz)-->
<PipelineType>outOfOrder</PipelineType> <!--inOrder,outOfOrder(set issue width for multi-issue in-order)--> <PipelineType>outOfOrder</PipelineType> <!--inOrder,outOfOrder(set issue width for multi-issue in-order)-->
<BranchPredictor> <BranchPredictor>
@ -130,27 +133,38 @@ TDP = 112W
</LSQ> </LSQ>
<ITLB> <ITLB>
<Size>128</Size> <!--Maximum number of entries in the ITLB--> <Size>128</Size> <!--Maximum number of entries in the TLB-->
<Latency>1</Latency> <!--In clock cycles--> <Latency>1</Latency> <!--In clock cycles-->
<MissPenalty>10</MissPenalty> <!--In clock cycles--> <MissPenalty>-1</MissPenalty> <!--In clock cycles; -1 indicates there is another TLB level below-->
<PortType>UL</PortType> <!--Type of access ports in the ITLB (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)--> <PortType>UL</PortType> <!--Type of access ports in the TLB (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<AccessPorts>-1</AccessPorts> <!--Number of access ports in the ITLB--> <AccessPorts>-1</AccessPorts> <!--Number of access ports in the TLB-->
<PortOccupancy>-1</PortOccupancy> <!--The occupancy of the ITLB ports (in clock cycles)--> <PortOccupancy>-1</PortOccupancy> <!--The occupancy of the TLB ports (in clock cycles)-->
<LeakageEnergy>0.00546275</LeakageEnergy> <LeakageEnergy>0.00546275</LeakageEnergy>
<DynamicEnergy>0.06792852941</DynamicEnergy> <DynamicEnergy>0.06792852941</DynamicEnergy>
</ITLB> </ITLB>
<DTLB> <DTLB>
<Size>64</Size> <!--Maximum number of entries in the DTLB--> <Size>64</Size> <!--Maximum number of entries in the TLB-->
<Latency>1</Latency> <!--In clock cycles--> <Latency>1</Latency> <!--In clock cycles-->
<MissPenalty>10</MissPenalty> <!--In clock cycles--> <MissPenalty>-1</MissPenalty> <!--In clock cycles; -1 indicates there is another TLB level below-->
<PortType>UL</PortType> <!--Type of access ports in the ITLB (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)--> <PortType>UL</PortType> <!--Type of access ports in the TLB (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<AccessPorts>-1</AccessPorts> <!--Number of access ports in the ITLB--> <AccessPorts>-1</AccessPorts> <!--Number of access ports in the TLB-->
<PortOccupancy>-1</PortOccupancy> <!--The occupancy of the ITLB ports (in clock cycles)--> <PortOccupancy>-1</PortOccupancy> <!--The occupancy of the TLB ports (in clock cycles)-->
<LeakageEnergy>0.00546275</LeakageEnergy> <LeakageEnergy>0.00546275</LeakageEnergy>
<DynamicEnergy>0.06792852941</DynamicEnergy> <DynamicEnergy>0.06792852941</DynamicEnergy>
</DTLB> </DTLB>
<STLB> <!-- unified second level TLB -->
<Size>1536</Size> <!--Maximum number of entries in the TLB-->
<Latency>9</Latency> <!--In clock cycles-->
<MissPenalty>17</MissPenalty> <!--In clock cycles; -1 indicates there is another TLB level below-->
<PortType>UL</PortType> <!--Type of access ports in the TLB (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<AccessPorts>-1</AccessPorts> <!--Number of access ports in the TLB-->
<PortOccupancy>-1</PortOccupancy> <!--The occupancy of the TLB ports (in clock cycles)-->
<LeakageEnergy>0.00546275</LeakageEnergy> <!-- TODO this number isn't right -->
<DynamicEnergy>0.06792852941</DynamicEnergy> <!-- TODO this number isn't right -->
</STLB>
<Decode> <!--Instruction decode--> <Decode> <!--Instruction decode-->
<Width>6</Width> <Width>6</Width>
<LeakageEnergy>0.0598</LeakageEnergy> <LeakageEnergy>0.0598</LeakageEnergy>
@ -188,7 +202,7 @@ TDP = 112W
</InstructionWindow> </InstructionWindow>
<ROB> <ROB>
<RetireWidth>4</RetireWidth> <!--Instruction retire width--> <RetireWidth>6</RetireWidth> <!--Instruction retire width-->
<ROBSize>224</ROBSize> <!--Maximum number of entries in the ROB--> <ROBSize>224</ROBSize> <!--Maximum number of entries in the ROB-->
<LeakageEnergy>0.0058</LeakageEnergy> <LeakageEnergy>0.0058</LeakageEnergy>
<DynamicEnergy>0.0304</DynamicEnergy> <DynamicEnergy>0.0304</DynamicEnergy>
@ -197,17 +211,17 @@ TDP = 112W
<RegisterFile> <RegisterFile>
<Integer> <Integer>
<IntRegFileSize>180</IntRegFileSize> <!--Maximum number of entries in the Integer register file--> <IntRegFileSize>180</IntRegFileSize> <!--Maximum number of entries in the Integer register file-->
<IntArchRegNum>32</IntArchRegNum> <!--Number of Integer architectural registers--> <IntArchRegNum>64</IntArchRegNum> <!--Number of Integer architectural registers-->
<LeakageEnergy>0.0108</LeakageEnergy> <LeakageEnergy>0.0108</LeakageEnergy>
<DynamicEnergy>0.0572</DynamicEnergy> <DynamicEnergy>0.0572</DynamicEnergy>
</Integer> </Integer>
<Float> <Vector>
<FloatRegFileSize>144</FloatRegFileSize> <!--Maximum number of entries in the Floating point register file--> <VectorRegFileSize>168</VectorRegFileSize> <!--Maximum number of entries in the Floating point register file-->
<FloatArchRegNum>32</FloatArchRegNum> <!--Number of Floating point architectural registers--> <VectorArchRegNum>64</VectorArchRegNum> <!--Number of Floating point architectural registers-->
<LeakageEnergy>0.0075</LeakageEnergy> <LeakageEnergy>0.0075</LeakageEnergy>
<DynamicEnergy>0.0207</DynamicEnergy> <DynamicEnergy>0.0207</DynamicEnergy>
</Float> </Vector>
</RegisterFile> </RegisterFile>
@ -228,7 +242,7 @@ TDP = 112W
<IntMul> <IntMul>
<Num>1</Num> <Num>1</Num>
<Latency>3</Latency> <Latency>4</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>1</PortNumber> <PortNumber>1</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy> <LeakageEnergy>0.0271</LeakageEnergy>
@ -237,8 +251,8 @@ TDP = 112W
<IntDiv> <IntDiv>
<Num>1</Num> <Num>1</Num>
<Latency>21</Latency> <Latency>50</Latency>
<ReciprocalOfThroughput>12</ReciprocalOfThroughput> <ReciprocalOfThroughput>50</ReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>0</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy> <LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
@ -264,8 +278,8 @@ TDP = 112W
<FloatDiv> <FloatDiv>
<Num>1</Num> <Num>1</Num>
<Latency>24</Latency> <Latency>14</Latency>
<ReciprocalOfThroughput>12</ReciprocalOfThroughput> <ReciprocalOfThroughput>4</ReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>0</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy> <LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
@ -273,7 +287,7 @@ TDP = 112W
<IntVectorALU> <IntVectorALU>
<Num>3</Num> <Num>3</Num>
<Latency>3</Latency> <Latency>1</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>0</PortNumber>
<PortNumber>1</PortNumber> <PortNumber>1</PortNumber>
@ -284,7 +298,7 @@ TDP = 112W
<IntVectorMul> <IntVectorMul>
<Num>2</Num> <Num>2</Num>
<Latency>3</Latency> <Latency>5</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>0</PortNumber>
<PortNumber>1</PortNumber> <PortNumber>1</PortNumber>
@ -294,7 +308,7 @@ TDP = 112W
<FloatVectorALU> <FloatVectorALU>
<Num>0</Num> <Num>0</Num>
<Latency>3</Latency> <Latency>1</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>0</PortNumber>
<PortNumber>1</PortNumber> <PortNumber>1</PortNumber>
@ -305,7 +319,7 @@ TDP = 112W
<FloatVectorMul> <FloatVectorMul>
<Num>0</Num> <Num>0</Num>
<Latency>3</Latency> <Latency>5</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>0</PortNumber>
<PortNumber>1</PortNumber> <PortNumber>1</PortNumber>
@ -315,24 +329,59 @@ TDP = 112W
<FMA> <FMA>
<Num>2</Num> <Num>2</Num>
<FPALULatency>4</FPALULatency> <Latency>4</Latency>
<FPMulLatency>4</FPMulLatency> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<FPVectorALULatency>4</FPVectorALULatency>
<FPVectorMulLatency>4</FPVectorMulLatency>
<FMALatency>4</FMALatency>
<VectorFMALatency>4</VectorFMALatency>
<FPALUReciprocalOfThroughput>3</FPALUReciprocalOfThroughput>
<FPMulReciprocalOfThroughput>3</FPMulReciprocalOfThroughput>
<FPVectorALUReciprocalOfThroughput>3</FPVectorALUReciprocalOfThroughput>
<FPVectorMulReciprocalOfThroughput>3</FPVectorMulReciprocalOfThroughput>
<FMAReciprocalOfThroughput>3</FMAReciprocalOfThroughput>
<VectorFMAReciprocalOfThroughput>3</VectorFMAReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>0</PortNumber>
<PortNumber>1</PortNumber> <PortNumber>1</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy> <LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
</FMA> </FMA>
<AES>
<Num>1</Num>
<Latency>4</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>0</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</AES>
<VectorString>
<Num>1</Num>
<Latency>9</Latency>
<ReciprocalOfThroughput>5</ReciprocalOfThroughput>
<PortNumber>0</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</VectorString>
<BitScan>
<Num>1</Num>
<Latency>3</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>1</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</BitScan>
<VectorShuffle>
<Num>1</Num>
<Latency>1</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>5</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</VectorShuffle>
<LEA>
<Num>1</Num>
<Latency>3</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>5</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</LEA>
<Branch> <Branch>
<Num>2</Num> <Num>2</Num>
<Latency>1</Latency> <Latency>1</Latency>
@ -343,16 +392,6 @@ TDP = 112W
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
</Branch> </Branch>
<Load>
<Num>2</Num>
<Latency>1</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>2</PortNumber>
<PortNumber>3</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</Load>
<LoadAGU> <LoadAGU>
<Num>2</Num> <Num>2</Num>
<Latency>1</Latency> <Latency>1</Latency>
@ -363,14 +402,15 @@ TDP = 112W
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
</LoadAGU> </LoadAGU>
<Store> <Load>
<Num>1</Num> <Num>2</Num>
<Latency>1</Latency> <Latency>1</Latency>
<ReciprocalOfThroughput>1</ReciprocalOfThroughput> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>4</PortNumber> <PortNumber>2</PortNumber>
<PortNumber>3</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy> <LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
</Store> </Load>
<StoreAGU> <StoreAGU>
<Num>1</Num> <Num>1</Num>
@ -381,50 +421,14 @@ TDP = 112W
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
</StoreAGU> </StoreAGU>
<AES> <Store>
<Num>1</Num> <Num>1</Num>
<Latency>24</Latency> <Latency>1</Latency>
<ReciprocalOfThroughput>12</ReciprocalOfThroughput> <ReciprocalOfThroughput>1</ReciprocalOfThroughput>
<PortNumber>0</PortNumber> <PortNumber>4</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy> <LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy> <DynamicEnergy>0.6514</DynamicEnergy>
</AES> </Store>
<VectorString>
<Num>1</Num>
<Latency>24</Latency>
<ReciprocalOfThroughput>12</ReciprocalOfThroughput>
<PortNumber>0</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</VectorString>
<BitScan>
<Num>1</Num>
<Latency>24</Latency>
<ReciprocalOfThroughput>12</ReciprocalOfThroughput>
<PortNumber>1</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</BitScan>
<VectorShuffle>
<Num>1</Num>
<Latency>24</Latency>
<ReciprocalOfThroughput>12</ReciprocalOfThroughput>
<PortNumber>5</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</VectorShuffle>
<LEA>
<Num>1</Num>
<Latency>24</Latency>
<ReciprocalOfThroughput>12</ReciprocalOfThroughput>
<PortNumber>5</PortNumber>
<LeakageEnergy>0.0271</LeakageEnergy>
<DynamicEnergy>0.6514</DynamicEnergy>
</LEA>
<ResultsBroadcastBus> <ResultsBroadcastBus>
<LeakageEnergy>0.0239</LeakageEnergy> <LeakageEnergy>0.0239</LeakageEnergy>
@ -460,24 +464,24 @@ TDP = 112W
</Core> </Core>
<SharedCaches> <SharedCaches>
<Cache name="L3" type="L3Cache_8M_16"/><!--Intel® Core™ i9-7960X X-series Processor actually has 11M L3. Reducing to 8 to keep it a power of 2 (required by Tejas)--> <Cache name="L3" type="L3Cache_4M_8"/><!--Intel® Core™ i7-7820X X-series Processor actually has 3M L3 12-way. Changing to 4M 8-way to keep it a power of 2 (required by Tejas)-->
<Cache name="D1" type="Directory1"/> <Cache name="D1" type="Directory1"/>
</SharedCaches> </SharedCaches>
<Interconnect>BUS</Interconnect> <Interconnect>NOC</Interconnect>
<NOC> <NOC>
<NocConfigFile>/home/rajshekar/projects/nvms/configurations/config_1052_8core_skylake_NocConfig.txt</NocConfigFile> <NocConfigFile>/home/rajshekar/resources/tejas_configs/config_2core_kabylake_NocConfig.txt</NocConfigFile>
<NocSelScheme>STATIC</NocSelScheme> <NocSelScheme>STATIC</NocSelScheme>
<NocNumberOfBuffers>4</NocNumberOfBuffers> <NocNumberOfBuffers>4</NocNumberOfBuffers>
<NocPortType>FCFS</NocPortType> <NocPortType>FCFS</NocPortType>
<NocAccessPorts>4</NocAccessPorts> <NocAccessPorts>4</NocAccessPorts>
<NocPortOccupancy>1</NocPortOccupancy> <NocPortOccupancy>1</NocPortOccupancy>
<NocLatency>1</NocLatency> <NocLatency>1</NocLatency>
<NocOperatingFreq>2000</NocOperatingFreq> <NocOperatingFreq>2400</NocOperatingFreq>
<NocTopology>TORUS</NocTopology> <!--NOCTopology--> <NocTopology>TORUS</NocTopology> <!--NOCTopology-->
<NocRoutingAlgorithm>SIMPLE</NocRoutingAlgorithm> <NocRoutingAlgorithm>SIMPLE</NocRoutingAlgorithm>
<NocLatencyBetweenNOCElements>4</NocLatencyBetweenNOCElements> <NocLatencyBetweenNOCElements>2</NocLatencyBetweenNOCElements>
<NocRouterArbiter>RR_ARBITER</NocRouterArbiter> <NocRouterArbiter>RR_ARBITER</NocRouterArbiter>
<TechPoint>90</TechPoint> <TechPoint>90</TechPoint>
<NocConnection>ELECTRICAL</NocConnection> <NocConnection>ELECTRICAL</NocConnection>
@ -539,7 +543,8 @@ TDP = 112W
<!--Give all the library elements here--> <!--Give all the library elements here-->
<Library> <Library>
<UnifiedCache_32K_8> <UnifiedCache_32K_8>
<Frequency>4200</Frequency> <!-- private caches take frequency of containing core --> <AMAT>-1</AMAT>
<Frequency>4000</Frequency> <!-- private caches take frequency of containing core -->
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>8</Associativity> <Associativity>8</Associativity>
@ -552,9 +557,9 @@ TDP = 112W
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>None</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>16</MSHRSize> <MSHRSize>16</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -567,7 +572,8 @@ TDP = 112W
</UnifiedCache_32K_8> </UnifiedCache_32K_8>
<ICache_32K_8> <ICache_32K_8>
<Frequency>4200</Frequency> <!-- private caches take frequency of containing core --> <AMAT>-1</AMAT>
<Frequency>2500</Frequency> <!-- private caches take frequency of containing core -->
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>8</Associativity> <Associativity>8</Associativity>
@ -580,9 +586,9 @@ TDP = 112W
<ReadWritePorts>4</ReadWritePorts> <ReadWritePorts>4</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>Power4</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>16</MSHRSize> <MSHRSize>16</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -595,22 +601,23 @@ TDP = 112W
</ICache_32K_8> </ICache_32K_8>
<L1Cache_32K_8> <L1Cache_32K_8>
<Frequency>4200</Frequency> <!-- private caches take frequency of containing core --> <AMAT>-1</AMAT>
<Frequency>2500</Frequency> <!-- private caches take frequency of containing core -->
<WriteMode>WT</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WT</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>8</Associativity> <Associativity>8</Associativity>
<Size>32768</Size> <!--In Bytes--> <Size>32768</Size> <!--In Bytes-->
<ReadLatency>4</ReadLatency> <!--In clock cycles--> <ReadLatency>4</ReadLatency> <!--In clock cycles-->
<WriteLatency>4</WriteLatency> <!--In clock cycles--> <WriteLatency>4</WriteLatency> <!--In clock cycles-->
<PortType>UL</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)--> <PortType>FCFS</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<ReadPorts>0</ReadPorts> <ReadPorts>1</ReadPorts>
<WritePorts>0</WritePorts> <WritePorts>1</WritePorts>
<ReadWritePorts>2</ReadWritePorts> <ReadWritePorts>0</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>2</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>Power4</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>16</MSHRSize> <MSHRSize>16</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -623,6 +630,7 @@ TDP = 112W
</L1Cache_32K_8> </L1Cache_32K_8>
<L2Cache_256K_8> <L2Cache_256K_8>
<AMAT>-1</AMAT>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>8</Associativity> <Associativity>8</Associativity>
@ -635,9 +643,9 @@ TDP = 112W
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>None</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>256</MSHRSize> <MSHRSize>256</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -650,7 +658,8 @@ TDP = 112W
</L2Cache_256K_8> </L2Cache_256K_8>
<L2Cache_256K_4> <L2Cache_256K_4>
<Frequency>4200</Frequency> <!-- private caches take frequency of containing core --> <AMAT>-1</AMAT>
<Frequency>2500</Frequency> <!-- private caches take frequency of containing core -->
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>4</Associativity> <Associativity>4</Associativity>
@ -658,14 +667,14 @@ TDP = 112W
<ReadLatency>12</ReadLatency> <!--In clock cycles--> <ReadLatency>12</ReadLatency> <!--In clock cycles-->
<WriteLatency>12</WriteLatency> <!--In clock cycles--> <WriteLatency>12</WriteLatency> <!--In clock cycles-->
<PortType>FCFS</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)--> <PortType>FCFS</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<ReadPorts>0</ReadPorts> <ReadPorts>1</ReadPorts>
<WritePorts>0</WritePorts> <WritePorts>1</WritePorts>
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>0</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>D1</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>Power4</Prefetcher> <Prefetcher>Power4</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>256</MSHRSize> <MSHRSize>256</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -677,7 +686,35 @@ TDP = 112W
<CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified--> <CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified-->
</L2Cache_256K_4> </L2Cache_256K_4>
<L2Cache_1M_16>
<AMAT>-1</AMAT>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>16</Associativity>
<Size>1048576</Size> <!--In Bytes-->
<Latency>12</Latency> <!--In clock cycles-->
<PortType>FCFS</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<ReadPorts>1</ReadPorts>
<WritePorts>1</WritePorts>
<ReadWritePorts>0</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>D1</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>Power4</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>256</MSHRSize>
<BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
<ONuca>NONE</ONuca> <!--ONUCA type (BCAST, TSI)-->
<NucaMapping>S</NucaMapping> <!--Valid for NUCA; S: Set-Associative A: Address-Mapped -->
<LeakageEnergy>0.1592</LeakageEnergy>
<ReadDynamicEnergy>0.43964264705</ReadDynamicEnergy>
<WriteDynamicEnergy>0.43964264705</WriteDynamicEnergy>
<CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified-->
</L2Cache_1M_16>
<L3Cache_1M_8> <L3Cache_1M_8>
<AMAT>-1</AMAT>
<Frequency>2000</Frequency> <Frequency>2000</Frequency>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
@ -691,9 +728,9 @@ TDP = 112W
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>None</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>8</MSHRSize> <MSHRSize>8</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -705,23 +742,24 @@ TDP = 112W
<CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified--> <CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified-->
</L3Cache_1M_8> </L3Cache_1M_8>
<L3Cache_8M_16> <L3Cache_8M_16><!--Intel® Core™ i7-7820X X-series Processor actually has 11M L3. Reducing to 8 to keep it a power of 2 (required by Tejas)-->
<Frequency>2000</Frequency> <AMAT>-1</AMAT>
<Frequency>4000</Frequency>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>16</Associativity> <Associativity>16</Associativity>
<Size>8388608</Size> <!--In Bytes--> <Size>8388608</Size> <!--In Bytes-->
<ReadLatency>60</ReadLatency> <!--In clock cycles--> <ReadLatency>77</ReadLatency> <!--In clock cycles-->
<WriteLatency>60</WriteLatency> <!--In clock cycles--> <WriteLatency>77</WriteLatency> <!--In clock cycles-->
<PortType>UL</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)--> <PortType>UL</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<ReadPorts>0</ReadPorts> <ReadPorts>1</ReadPorts>
<WritePorts>0</WritePorts> <WritePorts>1</WritePorts>
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>5</PortReadOccupancy> <PortReadOccupancy>2</PortReadOccupancy>
<PortWriteOccupancy>5</PortWriteOccupancy> <PortWriteOccupancy>2</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>Power4</Prefetcher> <Prefetcher>Power4</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>8</MSHRSize> <MSHRSize>8</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -733,7 +771,37 @@ TDP = 112W
<CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified--> <CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified-->
</L3Cache_8M_16> </L3Cache_8M_16>
<L3Cache_4M_8><!--Intel® Core™ i5-7200U Processor actually has 3M 12-way L3. Changing to 4M 8-way to keep it a power of 2 (required by Tejas)-->
<AMAT>-1</AMAT>
<Frequency>2500</Frequency>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes-->
<Associativity>8</Associativity>
<Size>4194304</Size> <!--In Bytes-->
<ReadLatency>44</ReadLatency> <!--In clock cycles-->
<WriteLatency>44</WriteLatency> <!--In clock cycles-->
<PortType>UL</PortType> <!--Type of access ports in the Cache (UL : Unlimited; FCFS : First Come First Serve; PR : Priority port)-->
<ReadPorts>1</ReadPorts>
<WritePorts>1</WritePorts>
<ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>2</PortReadOccupancy>
<PortWriteOccupancy>2</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>Power4</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>8</MSHRSize>
<BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
<ONuca>NONE</ONuca> <!--ONUCA type (BCAST, TSI, NONE)-->
<NucaMapping>S</NucaMapping> <!--Valid for NUCA; S: Set-Associative A: Address-Mapped -->
<LeakageEnergy>0.1892</LeakageEnergy>
<ReadDynamicEnergy>0.60964264705</ReadDynamicEnergy>
<WriteDynamicEnergy>0.60964264705</WriteDynamicEnergy>
<CacheType>Unified</CacheType> <!--I : Instruction, D : Data, U : Unified-->
</L3Cache_4M_8>
<L3Cache_12M_16> <L3Cache_12M_16>
<AMAT>-1</AMAT>
<Frequency>2000</Frequency> <Frequency>2000</Frequency>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
@ -747,9 +815,9 @@ TDP = 112W
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>None</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>8</MSHRSize> <MSHRSize>8</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -762,6 +830,7 @@ TDP = 112W
</L3Cache_12M_16> </L3Cache_12M_16>
<L3Cache_22M_16> <L3Cache_22M_16>
<AMAT>-1</AMAT>
<Frequency>2000</Frequency> <Frequency>2000</Frequency>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
@ -775,9 +844,9 @@ TDP = 112W
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>None</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>8</MSHRSize> <MSHRSize>8</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -790,6 +859,7 @@ TDP = 112W
</L3Cache_22M_16> </L3Cache_22M_16>
<L3Cache_16M_16> <L3Cache_16M_16>
<AMAT>-1</AMAT>
<Frequency>2000</Frequency> <Frequency>2000</Frequency>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<BlockSize>64</BlockSize> <!--In bytes--> <BlockSize>64</BlockSize> <!--In bytes-->
@ -803,9 +873,9 @@ TDP = 112W
<ReadWritePorts>1</ReadWritePorts> <ReadWritePorts>1</ReadWritePorts>
<PortReadOccupancy>5</PortReadOccupancy> <PortReadOccupancy>5</PortReadOccupancy>
<PortWriteOccupancy>5</PortWriteOccupancy> <PortWriteOccupancy>5</PortWriteOccupancy>
<Coherence>None</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>None</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>None</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>8</MSHRSize> <MSHRSize>8</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->
@ -818,7 +888,8 @@ TDP = 112W
</L3Cache_16M_16> </L3Cache_16M_16>
<Directory1> <Directory1>
<Frequency>2000</Frequency> <AMAT>-1</AMAT>
<Frequency>2400</Frequency>
<WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)--> <WriteMode>WB</WriteMode> <!--Write-back (WB) or write-through (WT)-->
<LastLevel>N</LastLevel> <!--Whether this is the last level in the hierarchy or not (Y for yes, N for no)--> <LastLevel>N</LastLevel> <!--Whether this is the last level in the hierarchy or not (Y for yes, N for no)-->
<BlockSize>64</BlockSize> <!--In bytes (this should be same as the block size of the Caches between those you want coherence)--> <BlockSize>64</BlockSize> <!--In bytes (this should be same as the block size of the Caches between those you want coherence)-->
@ -832,9 +903,9 @@ TDP = 112W
<ReadWritePorts>2</ReadWritePorts> <ReadWritePorts>2</ReadWritePorts>
<PortReadOccupancy>1</PortReadOccupancy> <PortReadOccupancy>1</PortReadOccupancy>
<PortWriteOccupancy>1</PortWriteOccupancy> <PortWriteOccupancy>1</PortWriteOccupancy>
<Coherence>N</Coherence> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <Coherence>N</Coherence> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<Prefetcher>None</Prefetcher> <Prefetcher>None</Prefetcher>
<NumBuses>1</NumBuses> <!--Coherence of upper level (N : None, S : Snoopy, D : Directory)--> <NumBuses>1</NumBuses> <!--Coherence of level (N : None, S : Snoopy, D : Directory)-->
<MSHRSize>16</MSHRSize> <MSHRSize>16</MSHRSize>
<BusOccupancy>0</BusOccupancy> <BusOccupancy>0</BusOccupancy>
<Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)--> <Nuca>NONE</Nuca> <!--NUCA type (S_NUCA, D_NUCA, NONE)-->

View File

@ -44,6 +44,8 @@ public class DynamicInstructionBuffer implements Encoding
private long ip; private long ip;
private boolean predicateInsnNotExecuted;
public DynamicInstructionBuffer() public DynamicInstructionBuffer()
{ {
memRead = new long[64]; memRead = new long[64];
@ -59,6 +61,8 @@ public class DynamicInstructionBuffer implements Encoding
branchAddress = -1; branchAddress = -1;
predicateInsnNotExecuted = false;
ip = arrayListPacket.get(0).ip; ip = arrayListPacket.get(0).ip;
int numAssemblyPackets = 0; int numAssemblyPackets = 0;
@ -101,6 +105,10 @@ public class DynamicInstructionBuffer implements Encoding
branchAddress = p.tgt; branchAddress = p.tgt;
break; break;
case (PREDICATE_INSN_NOT_EXECUTED):
predicateInsnNotExecuted = true;
break;
case (ASSEMBLY): case (ASSEMBLY):
numAssemblyPackets++; numAssemblyPackets++;
@ -190,4 +198,8 @@ public class DynamicInstructionBuffer implements Encoding
boolean readAuthenticBranch = branchInformationRead==false && branchAddress!=-1; boolean readAuthenticBranch = branchInformationRead==false && branchAddress!=-1;
return readAuthenticBranch; return readAuthenticBranch;
} }
public boolean isPredicateInsnNotExecuted() {
return predicateInsnNotExecuted;
}
} }

View File

@ -204,6 +204,7 @@ public class RunnableThread implements Encoding, Runnable {
iNew.setDestinationOperandMemValue(Long.parseLong(splited[i+2])); iNew.setDestinationOperandMemValue(Long.parseLong(splited[i+2]));
iNew.setBranchTargetAddress(Long.parseLong(splited[i+3])); iNew.setBranchTargetAddress(Long.parseLong(splited[i+3]));
iNew.setBranchTaken(Boolean.parseBoolean(splited[i+4])); iNew.setBranchTaken(Boolean.parseBoolean(splited[i+4]));
iNew.setPredicateAndNotExecuted(Boolean.parseBoolean(splited[i+7]));
//iNew.setSerialNo(Long.parseLong(splited[i+6])); //iNew.setSerialNo(Long.parseLong(splited[i+6]));
} }
} }
@ -833,7 +834,7 @@ public class RunnableThread implements Encoding, Runnable {
} }
if (pnew.value!=INSTRUCTION && !(pnew.value>6 && pnew.value<26) && pnew.value!=Encoding.ASSEMBLY ) { if (pnew.value!=INSTRUCTION && !(pnew.value>6 && pnew.value<26) && pnew.value!=Encoding.ASSEMBLY && pnew.value!=Encoding.PREDICATE_INSN_NOT_EXECUTED ) {
// just append the packet to outstanding packetList for current instruction pointer // just append the packet to outstanding packetList for current instruction pointer
thread.packetList.add(pnew); thread.packetList.add(pnew);

View File

@ -42,4 +42,6 @@ public interface Encoding {
static final int PARENT_SPAWN = 35; static final int PARENT_SPAWN = 35;
static final int CHILD_START = 36; static final int CHILD_START = 36;
static final int PREDICATE_INSN_NOT_EXECUTED = 37;
} }

View File

@ -9,7 +9,7 @@ public class IntegerALU implements DynamicInstructionHandler
public int handle(int microOpIndex, public int handle(int microOpIndex,
Instruction microOp, DynamicInstructionBuffer dynamicInstructionBuffer) Instruction microOp, DynamicInstructionBuffer dynamicInstructionBuffer)
{ {
//nothing to be done in such cases microOp.setPredicateAndNotExecuted(dynamicInstructionBuffer.isPredicateInsnNotExecuted());
return ++microOpIndex; return ++microOpIndex;
} }
} }

View File

@ -44,7 +44,9 @@ public class ConditionalMove implements X86StaticInstructionHandler
(operand2.isIntegerRegisterOperand() || operand2.isImmediateOperand()) && (operand2.isIntegerRegisterOperand() || operand2.isImmediateOperand()) &&
(operand3==null)) (operand3==null))
{ {
instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(operand2, Registers.getEFlagsRegister(), operand1)); Operand temp = Registers.getTempIntReg(tempRegisterNum);
instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(Registers.getEFlagsRegister(), operand1, temp));
instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(operand2, temp, operand1));
} }
//if operand1 = register and operand2 = memory - load //if operand1 = register and operand2 = memory - load
@ -52,8 +54,10 @@ public class ConditionalMove implements X86StaticInstructionHandler
operand2.isMemoryOperand() && operand2.isMemoryOperand() &&
operand3==null) operand3==null)
{ {
Operand temp = Registers.getTempIntReg(tempRegisterNum);
instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(Registers.getEFlagsRegister(), operand1, temp));
Operand sourceOperand = OperandTranslator.processSourceMemoryOperand(operand2, instructionArrayList, tempRegisterNum, true); Operand sourceOperand = OperandTranslator.processSourceMemoryOperand(operand2, instructionArrayList, tempRegisterNum, true);
instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(sourceOperand, Registers.getEFlagsRegister(), operand1)); instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(sourceOperand, temp, operand1));
} }
// //if operand1 = memory and operand2 = memory - store // //if operand1 = memory and operand2 = memory - store

View File

@ -19,7 +19,7 @@ public class ConditionalSet implements X86StaticInstructionHandler
operand2==null && operand3==null) operand2==null && operand3==null)
{ {
instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(Registers.getEFlagsRegister(), instructionArrayList.appendInstruction(Instruction.getIntALUInstruction(Registers.getEFlagsRegister(),
Operand.getImmediateOperand(), operand1)); operand1, operand1));
} }
else if(operand1.isMemoryOperand()) else if(operand1.isMemoryOperand())

View File

@ -44,11 +44,14 @@ public class Instruction implements Serializable
private long serialNo; private long serialNo;
private int threadID; private int threadID;
private boolean isPredicateAndNotExecuted;
public Instruction() public Instruction()
{ {
this.sourceOperand1 = null; this.sourceOperand1 = null;
this.sourceOperand2 = null; this.sourceOperand2 = null;
this.destinationOperand = null; this.destinationOperand = null;
isPredicateAndNotExecuted = false;
} }
public void clear() public void clear()
@ -57,6 +60,7 @@ public class Instruction implements Serializable
this.sourceOperand1 = null; this.sourceOperand1 = null;
this.sourceOperand2 = null; this.sourceOperand2 = null;
this.destinationOperand = null; this.destinationOperand = null;
isPredicateAndNotExecuted = false;
} }
public Instruction(OperationType type, Operand sourceOperand1, public Instruction(OperationType type, Operand sourceOperand1,
@ -66,6 +70,7 @@ public class Instruction implements Serializable
this.sourceOperand1 = sourceOperand1; this.sourceOperand1 = sourceOperand1;
this.sourceOperand2 = sourceOperand2; this.sourceOperand2 = sourceOperand2;
this.destinationOperand = destinationOperand; this.destinationOperand = destinationOperand;
isPredicateAndNotExecuted = false;
} }
private void set(OperationType type, Operand sourceOperand1, private void set(OperationType type, Operand sourceOperand1,
@ -75,6 +80,7 @@ public class Instruction implements Serializable
this.sourceOperand1 = sourceOperand1; this.sourceOperand1 = sourceOperand1;
this.sourceOperand2 = sourceOperand2; this.sourceOperand2 = sourceOperand2;
this.destinationOperand = destinationOperand; this.destinationOperand = destinationOperand;
isPredicateAndNotExecuted = false;
} }
// /* our clone constructor */ // /* our clone constructor */
@ -123,6 +129,8 @@ public class Instruction implements Serializable
this.serialNo = sourceInstruction.serialNo; this.serialNo = sourceInstruction.serialNo;
this.threadID = sourceInstruction.threadID; this.threadID = sourceInstruction.threadID;
this.isPredicateAndNotExecuted = sourceInstruction.isPredicateAndNotExecuted;
} }
public static Instruction getIntALUInstruction(Operand sourceOperand1, Operand sourceOperand2, Operand destinationOperand) public static Instruction getIntALUInstruction(Operand sourceOperand1, Operand sourceOperand2, Operand destinationOperand)
@ -492,6 +500,14 @@ public class Instruction implements Serializable
this.destinationOperandMemValue = destinationOperandMemValue; this.destinationOperandMemValue = destinationOperandMemValue;
} }
public boolean isPredicateAndNotExecuted() {
return isPredicateAndNotExecuted;
}
public void setPredicateAndNotExecuted(boolean isPredicateAndNotExecuted) {
this.isPredicateAndNotExecuted = isPredicateAndNotExecuted;
}
/** /**
* strInstruction method returns the instruction information in a string. * strInstruction method returns the instruction information in a string.
* @return String describing the instruction * @return String describing the instruction

View File

@ -448,7 +448,7 @@ public class ReorderBuffer extends SimulationElement{
else bw.write(" null null null"); else bw.write(" null null null");
bw.write(" "+tmp.getSourceOperand1MemValue()+" "+ bw.write(" "+tmp.getSourceOperand1MemValue()+" "+
tmp.getSourceOperand2MemValue()+" "+tmp.getDestinationOperandMemValue()+" "+ tmp.getSourceOperand2MemValue()+" "+tmp.getDestinationOperandMemValue()+" "+
tmp.getBranchTargetAddress()+" "+tmp.isBranchTaken()+" "+tmp.getThreadID()+" "+tmp.getSerialNo()+"\n"); tmp.getBranchTargetAddress()+" "+tmp.isBranchTaken()+" "+tmp.getThreadID()+" "+tmp.getSerialNo()+" "+tmp.isPredicateAndNotExecuted()+"\n");
} }
catch(Exception e) catch(Exception e)
{ {