/***************************************************************************** Tejas Simulator ------------------------------------------------------------------------------------------------------------ Copyright [2010] [Indian Institute of Technology, Delhi] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ------------------------------------------------------------------------------------------------------------ Contributors: Abhishek Sagar, Eldhose Peter, Prathmesh Kallurkar, Ritu Jha, Kunal Kishore, Apoorva Temurnikar, Bhumika Singh, Sakshi Goel *****************************************************************************/ #include #include #include "pin.H" #include #include #include #include #ifndef _WIN32 #include #include #include #define CRITICAL_SECTION pthread_mutex_t #include #include #include #endif #include #include #include // #include //#include "IPCBase.h" #define MaxThreads (10000) #include "shmem.h" //#include "filePacket.h" #include "encoding.h" #ifdef __MACH__ #include #include #endif OS_THREAD_ID father_id = INVALID_OS_THREAD_ID; #ifdef _LP64 #define MASK 0xffffffffffffffff #else #define MASK 0x00000000ffffffff #endif #ifdef _WIN32 #include #endif // Defining command line arguments KNOB KnobMap(KNOB_MODE_WRITEONCE, "pintool", "map", "1", "Maps"); KNOB KnobIgnore(KNOB_MODE_WRITEONCE, "pintool", "numIgn", "0", "Ignore these many profilable instructions"); KNOB KnobSimulate(KNOB_MODE_WRITEONCE, "pintool", "numSim", "0", "Simulate these many profilable instructions (-1 if no subset simulation is desired)"); KNOB KnobMaxNumActiveThreads(KNOB_MODE_WRITEONCE, "pintool", "maxNumActiveThreads", "0", "Number of maximum application threads"); KNOB KnobId(KNOB_MODE_WRITEONCE, "pintool", "id", "1", "shm id to generate key"); KNOB KnobPinPointsFile(KNOB_MODE_WRITEONCE, "pintool", "pinpointsFile", "nofile", "pinpoints file (pass numIgn = 0, numSim = -1)"); KNOB KnobStartMarker(KNOB_MODE_WRITEONCE, "pintool", "startMarker", "", "start marker function name"); KNOB KnobEndMarker(KNOB_MODE_WRITEONCE, "pintool", "endMarker", "", "end marker function name"); KNOB KnobTraceMethod(KNOB_MODE_WRITEONCE, "pintool", "traceMethod", "0", "Trace Method (sharedMemory,file). Compulsary argument"); KNOB KnobTraceFileName(KNOB_MODE_WRITEONCE, "pintool", "traceFileName", "0", "Basename for compressed trace files (_x.gz will be appended to filename where x is core number). Compulsary for file trace method."); enum TraceMethod{SharedMemory, File}; enum TraceMethod traceMethod; PIN_MUTEX mainLockForPintool; INT32 numThreads = 0; INT32 livethreads = 0; UINT64 checkSum = 0; IPC::Shm *tst; bool pumpingStatus[MaxThreads]; ADDRINT curSynchVar[MaxThreads]; static UINT64 numIns = 0; UINT64 numInsToIgnore = 0; INT64 numInsToSimulate = 0; std::string startMarker; std::string endMarker; BOOL instructionIgnorePhase = false; BOOL outOfROIPhase = false; UINT64 numCISC[MaxThreads]; UINT64 totalNumCISC; bool threadAlive[MaxThreads]; std::string pinpointsFilename; unsigned long * sliceArray; int numberOfSlices; int currentSlice; uint32_t *threadMapping; bool *isThreadActive; long *parentId; long *currentId; int MaxNumActiveThreads; #define PacketEpoch 50 uint32_t countPacket[MaxThreads]; /* #ifdef _WIN32 tejas_win::CRITICAL_SECTION *lockForWritingToCommunicationStream; #else CRITICAL_SECTION *lockForWritingToCommunicationStream; #endif void lockIAmWriting(int tid) { #ifdef _WIN32 tejas_win::EnterCriticalSection(&lockForWritingToCommunicationStream[tid]); #else pthread_mutex_lock(&lockForWritingToCommunicationStream[tid]); #endif } void unlockIAmWriting(int tid) { #ifdef _WIN32 tejas_win::LeaveCriticalSection(&lockForWritingToCommunicationStream[tid]); #else pthread_mutex_unlock(&lockForWritingToCommunicationStream[tid]); #endif }*/ void waitForThreadsAndTerminatePin() { // Iterate over all the threads // If each thread is in non-alive status, terminate PIN /* for (int tid = 0; tidunload(); exit(0); } // needs -lrt (real-time lib) // 1970-01-01 epoch UTC time, 1 nanosecond resolution // modified for mac by shikhar /* #ifndef _WIN32 // OS X does not have clock_gettime, use clock_get_time void current_utc_time(timespec *ts) { #ifdef __MACH__ clock_serv_t cclock; mach_timespec_t mts; host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); clock_get_time(cclock, &mts); mach_port_deallocate(mach_task_self(), cclock); ts->tv_sec = mts.tv_sec; ts->tv_nsec = mts.tv_nsec; #else clock_gettime(CLOCK_REALTIME, ts); #endif } #endif */ int time_ctr = 0; uint64_t ClockGetTime() { #ifdef _WIN32 tejas_win::SYSTEMTIME st; tejas_win::GetSystemTime(&st); return st.wMilliseconds * 1000000; #else //struct timespec ts; //current_utc_time(&ts); //return (uint64_t) ts.tv_sec * 1000000000LL + (uint64_t) ts.tv_nsec; return time_ctr++; #endif } // this compulsory is true if it is entering some function // so that even if halts we have a timer packet here. void sendTimerPacket(int tid, bool compulsory) { if ((countPacket[tid]++ % PacketEpoch)==0 || compulsory){ PIN_MutexLock(&mainLockForPintool); checkSum +=TIMER; PIN_MutexUnlock(&mainLockForPintool); countPacket[tid]=0; uint64_t time = ClockGetTime(); while (tst->analysisFn(tid, time, TIMER, 0) == -1) { PIN_Yield(); } } } bool isThreadBeyondNumberOfCores(int threadID) { int index; for(index=0; index < MaxNumActiveThreads; index++) { if(threadMapping[index] == (unsigned int)threadID) return false; if(isThreadActive[index] == false) return false; } return true; } int findThreadMapping(unsigned int id) { int index; for(index=0; index < MaxNumActiveThreads; index++) { if(threadMapping[index] == id) return index; } // cout<<"FATAL ERROR : ThreadMapping cannot resolve " << id << endl; return 1; // fflush(stdout); // exit(0); } #define cmp(a) (rtn_name->find(a) != string::npos) bool isActive(int tid) { return pumpingStatus[tid]; } void reActivate(int tid) { tid= findThreadMapping(tid); pumpingStatus[tid] = true; cout << "reAcivated " << tid << "\n"; curSynchVar[tid] = 0; } void deActivate(int tid, ADDRINT addr) { tid= findThreadMapping(tid); curSynchVar[tid] = addr; cout << "deAcivated " << tid << "\n"; pumpingStatus[tid] = false; } bool hasEntered(int tid, ADDRINT addr) { tid= findThreadMapping(tid); return (curSynchVar[tid] == addr); } int findParentSegment(long parent) { int index; for(index=0; index< MaxNumActiveThreads; index++) { if(currentId[index] == parent) return index; } cout<<"FATAL ERROR--- cannot find parent\n"; return 0; } VOID ThreadStart(THREADID threadid, CONTEXT *ctxt, INT32 flags, VOID *v) { cout << "threadstart() : tid = " << threadid << " " << isThreadBeyondNumberOfCores((int)threadid) << endl; if(isThreadBeyondNumberOfCores((int)threadid)) { return; } if(threadid != 45) { return; } PIN_MutexLock(&mainLockForPintool); numThreads++; livethreads++; int i; for(i=0;iMaxNumActiveThreads) { cout<<"Number of live threads till now = "<onThread_start(threadid); while (tst->analysisFn(threadid, parent, CHILD_START, PIN_GetParentTid()) == -1) { PIN_Yield(); } if(parent != -1){ while (tst->analysisFn(parent, threadid, PARENT_SPAWN, PIN_GetTid()) == -1) { PIN_Yield(); } } PIN_MutexUnlock(&mainLockForPintool); } VOID ThreadFini(THREADID tid, const CONTEXT *ctxt, INT32 flags, VOID *v) { PIN_MutexLock(&mainLockForPintool); tid= findThreadMapping(tid); printf("thread %d finished exec\n",tid); fflush(stdout); while (tst->onThread_finish(tid, (numCISC[tid])) == -1) { PIN_Yield(); } isThreadActive[tid] = false; cout << "wrote -1 for tid " << tid << "\n"; livethreads--; threadAlive[tid] = false; fflush(stdout); PIN_MutexUnlock(&mainLockForPintool); } //Pass a memory read record VOID RecordMemRead(THREADID tid, VOID * ip, VOID * addr) { if(tid != 45) return; tid= findThreadMapping(tid); if (!isActive(tid)) return; if(instructionIgnorePhase) return; if(outOfROIPhase) return; sendTimerPacket(tid,false); PIN_MutexLock(&mainLockForPintool); checkSum +=MEMREAD; PIN_MutexUnlock(&mainLockForPintool); uint64_t nip = MASK & (uint64_t) ip; uint64_t naddr = MASK & (uint64_t) addr; while (tst->analysisFn(tid, nip, MEMREAD, naddr) == -1) { PIN_Yield(); } } // Pass a memory write record VOID RecordMemWrite(THREADID tid, VOID * ip, VOID * addr) { if(tid != 45) return; tid= findThreadMapping(tid); if (!isActive(tid)) return; if(instructionIgnorePhase) return; if(outOfROIPhase) return; sendTimerPacket(tid,false); PIN_MutexLock(&mainLockForPintool); checkSum +=MEMWRITE; PIN_MutexUnlock(&mainLockForPintool); uint64_t nip = MASK & (uint64_t) ip; uint64_t naddr = MASK & (uint64_t) addr; while (tst->analysisFn(tid, nip, MEMWRITE, naddr) == -1) { PIN_Yield(); } } VOID BrnFun(THREADID tid, ADDRINT tadr, BOOL taken, VOID *ip) { if(tid != 45) return; tid= findThreadMapping(tid); if (!isActive(tid)) return; if(instructionIgnorePhase) return; if(outOfROIPhase) return; sendTimerPacket(tid,false); uint64_t nip = MASK & (uint64_t) ip; uint64_t ntadr = MASK & (uint64_t) tadr; if (taken) { PIN_MutexLock(&mainLockForPintool); checkSum +=TAKEN; PIN_MutexUnlock(&mainLockForPintool); while (tst->analysisFn(tid, nip, TAKEN, ntadr) == -1) { PIN_Yield(); } } else { PIN_MutexLock(&mainLockForPintool); checkSum +=NOTTAKEN; PIN_MutexUnlock(&mainLockForPintool); while (tst->analysisFn(tid, nip, NOTTAKEN, ntadr) == -1) { PIN_Yield(); } } } VOID RegValRead(THREADID tid,VOID * ip,REG* _reg) { if (instructionIgnorePhase) return; if(outOfROIPhase) return; checkSum+=6; uint64_t nip = MASK & (uint64_t)ip; uint64_t _nreg = MASK & (uint64_t)_reg; tid= findThreadMapping(tid); while (tst->analysisFn(tid,nip,6,_nreg)== -1) { PIN_Yield(); } } VOID RegValWrite(THREADID tid,VOID * ip,REG* _reg) { if (instructionIgnorePhase) return; if(outOfROIPhase) return; tid= findThreadMapping(tid); checkSum+=7; uint64_t nip = MASK & (uint64_t)ip; uint64_t _nreg = MASK & (uint64_t)_reg; while (tst->analysisFn(tid,nip,7,_nreg)== -1) { PIN_Yield(); } } VOID CountIns() { if (!instructionIgnorePhase) return; numIns++; if (numIns>numInsToIgnore) instructionIgnorePhase = false; //activate Now } VOID FunStartInstrumentation() { outOfROIPhase = false; numInsToSimulate = KnobSimulate; cout << "at function " << startMarker << " : beginning instrumentation" << endl; cout << "outOfROIPhase = " << outOfROIPhase<< endl; cout << "numSim = " << totalNumCISC << endl; fflush(stdout); } VOID FunEndInstrumentation() { outOfROIPhase = true; cout << "at function " << endMarker << " : stopping instrumentation" << endl; cout << "outOfROIPhase = " << outOfROIPhase<< endl; cout << "numSim = " << totalNumCISC << endl; fflush(stdout); } VOID FunEntry(ADDRINT first_arg, UINT32 encode, THREADID tid) { uint64_t time = ClockGetTime(); tid= findThreadMapping(tid); sendTimerPacket(tid,true); PIN_MutexLock(&mainLockForPintool); checkSum +=encode; PIN_MutexUnlock(&mainLockForPintool); uint64_t uarg = MASK & (uint64_t) first_arg; while (tst->analysisFn(tid, time, encode, uarg) == -1) { PIN_Yield(); } } VOID FunExit(ADDRINT first_arg, UINT32 encode, THREADID tid) { uint64_t time = ClockGetTime(); tid= findThreadMapping(tid); sendTimerPacket(tid,false); PIN_MutexLock(&mainLockForPintool); checkSum +=encode; PIN_MutexUnlock(&mainLockForPintool); uint64_t uarg = MASK & (uint64_t) first_arg; while (tst->analysisFn(tid, time, encode, uarg) == -1) { PIN_Yield(); } } /*** Called on the initialization of a barrier ***/ VOID BarrierInit(ADDRINT first_arg, ADDRINT val, UINT32 encode, THREADID tid) { PIN_MutexLock(&mainLockForPintool); checkSum +=encode; PIN_MutexUnlock(&mainLockForPintool); tid= threadMapping[tid]; uint64_t uarg = MASK & (uint64_t) first_arg; uint64_t value = MASK & (uint64_t) val; while (tst->analysisFn(tid, value, encode, uarg) == -1) { PIN_Yield(); } } /*** This function is called on every instruction ***/ VOID printip(THREADID tid, VOID *ip, char *asmString) { if(outOfROIPhase) { return; } tid= findThreadMapping(tid); PIN_MutexLock(&mainLockForPintool); // if(ignoreActive == false) { numCISC[tid]++; totalNumCISC++; // } if(pinpointsFilename.compare("nofile") == 0) { if(totalNumCISC >= numInsToIgnore) { if(numInsToSimulate < 0 || totalNumCISC < numInsToIgnore + numInsToSimulate) { instructionIgnorePhase = false; } else { instructionIgnorePhase = true; } } else { instructionIgnorePhase = true; } if(numInsToSimulate > 0) { bool subsetDone = false; // for(int i = 0; i < MaxNumActiveThreads; i++) // { // if(numCISC[i] >= (numInsToIgnore + numInsToSimulate)) // { // subsetDone = true; // break; // } // } if(numCISC[0] >= (numInsToIgnore + numInsToSimulate)) { subsetDone = true; } if(subsetDone) { // Now, we will write -2 packet in shared memory. // This will ensure that complete emulator (PIN) gets stopped. while (tst->onSubset_finish((int)tid, (numCISC[tid])) == -1) { PIN_Yield(); } cout<<"subset finish called by thread "<setSubsetsimComplete(true); // threadAlive[tid] = false; waitForThreadsAndTerminatePin(); } } } else { if(totalNumCISC >= sliceArray[currentSlice] * 3000000) { if(totalNumCISC <= (sliceArray[currentSlice] + 1) * 3000000) { instructionIgnorePhase = false; } else { instructionIgnorePhase = true; cout << "completed slice : " << currentSlice << "\t\ttotalNumCisc = " << totalNumCISC << "\n"; cout << totalNumCISC << "\t\t" << (sliceArray[numberOfSlices - 1] + 1) * 3000000 << "\t\t" < (sliceArray[numberOfSlices - 1] + 1) * 3000000) { for(int i = 0; i < MaxThreads; i++) { if(threadAlive[i] == true) { int tid_1 = i; cout << "attempting to write -1\n"; while (tst->onThread_finish(tid_1, (numCISC[tid_1])) == -1) { PIN_Yield(); } cout << "wrote -1 for tid " << tid_1 << "\n"; livethreads--; threadAlive[tid_1] = false; fflush(stdout); } } if(livethreads == 0) { cout << "subset simulation complete\n"; fflush(stdout); tst->unload(); exit(0); } ASSERT(livethreads != 0, "subset sim complete, but live threads not zero!!!\n"); } } if(instructionIgnorePhase==false) { // For every instruction, I am sending one Instruction packet to Tejas. // For rep instruction, this function is called for each iteration of rep. uint64_t nip = MASK & (uint64_t) ip; if(traceMethod==SharedMemory) { while (tst->analysisFn(tid, nip, INSTRUCTION, 1) == -1) { PIN_Yield(); } } else if(traceMethod==File) { while (tst->analysisFnAssembly(tid, nip, ASSEMBLY, asmString) == -1) { PIN_Yield(); } } } if(numCISC[tid] % 1000000 == 0 && numCISC[tid] > 0) { cout << "numCISC on thread " << tid <<" = "< 0) { cout <<"totalNumCISC = "<setSubsetsimComplete(true); // Now, we will write -2 packet in shared memory. // This will ensure that complete emulator (PIN) gets stopped. // FIXME : We are trying to write in the communication stream for thread 0 // Hopefully this function is called for the master thread i.e. thread 0 while (tst->onSubset_finish((int)0, (numCISC[0])) == -1) { PIN_Yield(); } cout<<"subset finish called by thread "<<0< -- ... int main(int argc, char * argv[]) { // Knobs get initialized only after initializing PIN //if (numInsToIgnore>0) instructionIgnorePhase = true; outOfROIPhase = true; /*UINT64 mask = KnobMap; if (sched_setaffinity(0, sizeof(mask), (cpu_set_t *)&mask) <0) { perror("sched_setaffinity"); }*/ PIN_InitSymbols(); // Initialize pin if (PIN_Init(argc, argv)) return Usage(); std::string traceMethodStr = KnobTraceMethod; if(strcmp(traceMethodStr.c_str(), "sharedMemory")==0) { traceMethod = SharedMemory; } else if(strcmp(traceMethodStr.c_str(), "file")==0) { traceMethod = File; } else { printf("Invalid trace method : %s !!\n", traceMethodStr.c_str()); exit(1); } MaxNumActiveThreads = KnobMaxNumActiveThreads; threadMapping = new uint32_t[MaxNumActiveThreads]; isThreadActive = new bool[MaxNumActiveThreads]; parentId = new long[MaxNumActiveThreads]; currentId = new long[MaxNumActiveThreads]; int index; for(index = 0; index < MaxNumActiveThreads; index++) { parentId[index] = -1; currentId[index] = -1; isThreadActive[index] = false; } numInsToIgnore = KnobIgnore; startMarker = KnobStartMarker; endMarker = KnobEndMarker; numInsToSimulate = KnobSimulate; pinpointsFilename = KnobPinPointsFile; UINT64 id = KnobId; if(startMarker.compare("") != 0) { outOfROIPhase = true; } else { outOfROIPhase = false; } cout << "numIgn = " << numInsToIgnore << endl; cout << "numSim = " << numInsToSimulate << endl; cout << "id received = " << id << endl; cout << "pinpoints file received = " << pinpointsFilename << endl; cout << "maxNumActiveThreads = " << MaxNumActiveThreads << "\n"; cout << "start marker = " << startMarker << endl; cout <<"end marker = " << endMarker << endl; cout <<"outOfROIPhase = " << outOfROIPhase << endl; /* #ifdef _WIN32 lockForWritingToCommunicationStream = new tejas_win::CRITICAL_SECTION[MaxNumActiveThreads]; #else lockForWritingToCommunicationStream = new pthread_mutex_t[MaxNumActiveThreads]; #endif*/ for (int i = 0; i