/*********************************************
    SelfModifyingCPUID.c
    Written by Keith Oxenrider
    koxenrider[at]sol[dash]biotech[dot]com
    January 25, 2005 (my boy is 3 months old today!)

    This code is hereby placed in the public domain.
    No warrenty expressed or implied, use at your own risk!
*********************************************/

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "SelfModifyingCPUID.h"

typedef long (* FunctionLongLong)(long, long);
typedef void (* FunctionVoid)(void);

#define MAX_INSTRUCTION_BYTES 8192 //just to have a large enough buffer

void printBits(int bits2print, int numBits, FILE *fout){
    unsigned int test;
    int i;

    for (i=numBits-1; i>-1; i--){
        test = (unsigned int)pow(2, i);
        if (test & bits2print){
            fprintf(fout, "1");
        }else{
            fprintf(fout, "0");
        }
    }
}

const char *GetBrandID(unsigned char brandID){
    int i;
    for (i=0; i<BrandIDCnt; i++){
        if (BrandIDs[i] == brandID)
            break;
    }
    if (i==BrandIDCnt)
        return BrandStr[BrandIDCnt];
    return BrandStr[i];
}

void ReportFeatureFlags(unsigned int EDX, unsigned int ECX){
    unsigned int test = 1;
    int i;
    
    printf("\n\nThis processor has the following EDX feature flags:\n");
    for (i=0; i<32; i++){
        if (test & EDX){
            printf("\t%s\n", EDXFeatureFlags[i]);
        }else{
            if (strncmp(EDXFeatureFlags[i], "Reserved", 8))
                printf("NOT SUPPORTED: %s\n", EDXFeatureFlags[i]);
        }

        if (i==19){
            if (test & EDX)
                boolHasCLFLUSHInstr = TRUE;
        }
        if (i==28){
            if (test & EDX)
                boolIsHyperThreaded = TRUE;
        }
        test <<= 1;
    }


    printf("\n\nThis processor has the following ECX feature flags:\n");
    test = 1;
    for (i=0; i<32; i++){
        if (test & ECX){
            printf("\t%s\n", ECXFeatureFlags[i]);
        }else{
            if (strncmp(ECXFeatureFlags[i], "Reserved", 8))
                printf("NOT SUPPORTED: %s\n", ECXFeatureFlags[i]);
        }
        test <<= 1;
    }
}


//right out of K&R 2nd ed, p49
unsigned int getbits(unsigned int x, int p, int n){
    return (x >> (p+1-n)) & ~(~0 << n);
}

void ReportProcessorSig(unsigned int EAX){
    unsigned int ExFam,ExMod,Type,Family,Model,Stepping, tmp=EAX;
    ExFam=ExMod=Type=Family=Model=Stepping=0;

    Stepping = getbits(EAX, 3, 4);
    Model = getbits(EAX, 7, 4);
    Family = getbits(EAX, 11, 4);
    Type = getbits(EAX, 13, 2);
    ExMod = getbits(EAX, 19, 4);
    ExFam = getbits(EAX, 27, 8);

    printf("\n\nProcessor Signature:");
    printf("(bin EAX: "); printBits(tmp, 32, stdout); printf(")");
    printf("(hex EAX: %08X)", tmp);
    printf("\n");
    printf("\tStepping: "); printBits(Stepping, 4, stdout); printf("\n");
    printf("\tModel: "); printBits(Model, 4, stdout); printf("\n");
    printf("\tFamily: "); printBits(Family, 4, stdout); printf("\n");
    printf("\tType: "); printBits(Type, 2, stdout); printf("\n");
    printf("\tExMod: "); printBits(ExMod, 4, stdout); printf("\n");
    printf("\tExFam: "); printBits(ExFam, 8, stdout); printf("\n");

}


void ReportCacheTLB(unsigned int EAX, unsigned int EBX, 
                    unsigned int ECX, unsigned int EDX,
                    BOOL skipByte){
    int i,j,k;
    unsigned char *chrPtr;

    printf("\n\nCache and TLB information:\n");
    for (k=0; k<4; k++){
        if (k==0)
            chrPtr = (unsigned char *)&EAX;
        if (k==1)
            chrPtr = (unsigned char *)&EBX;
        if (k==2)
            chrPtr = (unsigned char *)&ECX;
        if (k==3)
            chrPtr = (unsigned char *)&EDX;
        for (i=0; i<4; i++){
            if (skipByte && i == 0 && k == 0) continue;//least significant byte is count first time around
            if (chrPtr[i] == 0) continue;
            for (j=0; j<CacheTLBCnt; j++){
                if (CacheTLB[j].val == chrPtr[i]) break;
            }
            if (j==CacheTLBCnt){
                printf("Unsupported value for 0x%02X\n", chrPtr[i]);
            }else{
                printf("\t%s\n", CacheTLB[j].descr);
            }
        }
    }

}

/*******************************
 *
 *
 *  writeCPUIDInstructions
 *
 *
 *******************************/

unsigned int writeCPUIDInstructions(unsigned char *codePtr, unsigned char *typPtr,
                 int memptr, unsigned int *destArr){
    unsigned int i, j;
    unsigned char *destPtr = (unsigned char *) &destArr;
    unsigned char reg = 0xC3;

//fprintf(stderr, "Inside writeCPUIDInstructions\n");

    codePtr[memptr++] = 0x55; // push ebp (stack pointer)
    codePtr[memptr++] = 0xB8; // mov type to eax (which kind of CPUID instruction
    for (i=0; i<4; i++)   // load the four bytes of type
        codePtr[memptr++] = typPtr[i];
    codePtr[memptr++] = 0x0F; // execute the CPUID instruction
    codePtr[memptr++] = 0xA2;

    //now we want to copy the register values to a fixed memory location
    //so we can easily process them after the return
    //normally this data is left on the stack
    for (i=0; i<4; i++){
        codePtr[memptr++] = 0xA3; // mov contents of EAX to memory location
        for (j=0; j<4; j++)   // four bytes of memory location (this won't work on a 64 bit machine!)
            codePtr[memptr++] = destPtr[j];
        codePtr[memptr++] = 0x8B; //mov other register to EAX,
        codePtr[memptr++] = reg--;//order: ebx (C3H), edx (C2H), ecx (C1H)
        destArr++;//increment pointer to new memory address
    }
    codePtr[memptr++] = 0x5D; // pop ebp (restore stack pointer)
    codePtr[memptr++] = 0xC3; // ret eax (return value is ignored, data already stored)

    return memptr;
}

/*******************************
 *
 *
 *  writeSUMInstructions
 *
 *
 *******************************/

unsigned int writeSUMInstructions(unsigned char * chrArr, int arrPtr){

    chrArr[arrPtr++] = 0x55; // push ebp
    chrArr[arrPtr++] = 0x8B; // mov ebp, esp
    chrArr[arrPtr++] = 0xEC;
    chrArr[arrPtr++] = 0x8B; // mov eax,[bp+8]
    chrArr[arrPtr++] = 0x45;
    chrArr[arrPtr++] = 0x08;
    chrArr[arrPtr++] = 0x03; // add eax,[bp+12]
    chrArr[arrPtr++] = 0x45;
    chrArr[arrPtr++] = 0x0C;
    chrArr[arrPtr++] = 0x5D; // pop ebp
    chrArr[arrPtr++] = 0xC3; // ret eax

    return arrPtr;
}



/*******************************
 *
 *
 *  main
 *
 *
 *******************************/

int main(){
    unsigned char *chrArr;
    unsigned int i, j, k, cnt, arg;
    unsigned int maxCPUIDCycles=0;
    FunctionVoid ExeCPUID;
    BOOL doSums = FALSE;
    BOOL doBrandID = TRUE;
    BOOL doCPUID = TRUE;
    BOOL showRegisters = FALSE;
    unsigned int data[4*MAXCPUIDTESTS];//for storing eax, ebx, edx, ecx from CPUID
    char *ptrTmp;


    chrArr = (unsigned char*) malloc(MAX_INSTRUCTION_BYTES);
    if (!chrArr){
        fprintf(stderr, "Failed to allocate memory for instruction buffer!\n");
        exit(1);
    }

    if (doSums){//a couple of sums, just for the heck of it (and what the original code had)...
        FunctionLongLong ComputeSum1, ComputeSum2;
        unsigned int val1, val2;
        unsigned int retVal1, retVal2;

        ComputeSum1 = (FunctionLongLong) chrArr;
        cnt = 0;
        cnt = writeSUMInstructions(chrArr, cnt);

        ComputeSum2 = (FunctionLongLong) &chrArr[cnt];
        cnt = writeSUMInstructions(chrArr, cnt);

        val1 = 123456;
        val2 = 654321;
        retVal1 = ComputeSum1(val1, val2);
        printf("Using ComputSum1, result for adding %d and %d is %d\n", val1, val2, retVal1);

        val1 = 7890;
        val2 = 9876;
        retVal2 = ComputeSum2(val1, val2);
        printf("Using ComputSum2, result for adding %d and %d is %d\n", val1, val2, retVal2);

        val1 = 1999;
        val2 = 2003;
        retVal1 = ComputeSum1(val1, val2);
        printf("Reusing ComputSum1, result for adding %d and %d is %d\n", val1, val2, retVal1);

    }

    if (doBrandID){
        //test for "Processor Brand String Feature"
        ExeCPUID = (FunctionVoid) chrArr;
        arg = 0x80000000;
        cnt=0;
        cnt = writeCPUIDInstructions(chrArr, (unsigned char *)&arg, cnt, data);
        ExeCPUID();
        if (data[0] >= 0x80000004){
            //feature is supported
            char buf[50];
            int bufptr=0;
            int dataBlockPtr=0;
            char *datptr = (char *) &data;
            BOOL IsIntialSpace = TRUE;

            j=0;
            for (i=2;i<5;i++){//current documentation only supports 2, 3, and 4
                ExeCPUID = (FunctionVoid) chrArr;
                arg = 0x80000000+i;
                cnt=0;
                cnt = writeCPUIDInstructions(chrArr, (unsigned char *)&arg, cnt, &data[dataBlockPtr*4]);
                ExeCPUID();

                //this block of nastiness is because it appears that Intel 
                //doesn't want to make this easy (or maybe it was the way I copied
                //the registers)
                ptrTmp = (char *)&data[dataBlockPtr*4];
                for (j=0; j<4; j++){
                    if (IsIntialSpace && ptrTmp[(0*4)+j] == ' ') continue;
                    IsIntialSpace = FALSE;
                    buf[bufptr++] = ptrTmp[(0*4)+j];
                }
                for (j=0; j<4; j++){
                    if (IsIntialSpace && ptrTmp[(0*4)+j] == ' ') continue;
                    IsIntialSpace = FALSE;
                    buf[bufptr++] = ptrTmp[(1*4)+j];
                }
                for (j=0; j<4; j++){
                    if (IsIntialSpace && ptrTmp[(0*4)+j] == ' ') continue;
                    IsIntialSpace = FALSE;
                    buf[bufptr++] = ptrTmp[(3*4)+j];
                }
                for (j=0; j<4; j++){
                    if (IsIntialSpace && ptrTmp[(0*4)+j] == ' ') continue;
                    IsIntialSpace = FALSE;
                    buf[bufptr++] = ptrTmp[(2*4)+j];
                }
                dataBlockPtr++;
            }
            buf[bufptr++] = '\0';
            if (showRegisters){
                for (i=0; i<3; i++){
                    printf("\tEAX: %08X\n", data[(i*4)+0]);
                    printf("\tEBX: %08X\n", data[(i*4)+1]);
                    printf("\tEDX: %08X\n", data[(i*4)+2]);
                    printf("\tECX: %08X\n", data[(i*4)+3]);
                }
                ptrTmp = (char *)&data;
                printf("[");
                for (i=0; i<48; i++){
                    printf("%c", ptrTmp[i]);
                    if ((i+1)%4 == 0 && i < 47)
                        printf("]\n[");
                }
                printf("]\n");
            }
            printf("Processor Brand String Feature = '%s'\n", buf);
        }else{
            printf("'Processor Brand String Feature' is not supported\n");
        }
    }



    if (doCPUID){
        int dataBlockPtr=0;
        printf("\nNow we look at the output from the CPUID instruction...\n\n");
        i=0;
        ExeCPUID = (FunctionVoid) chrArr;
        do{
            cnt=0;
            cnt = writeCPUIDInstructions(chrArr, (unsigned char *)&i, cnt, data);
//            fprintf(stderr, "Wrote %d bytes for CPUID type %d\n", cnt, i);

            ExeCPUID();
            if (showRegisters){
                printf("CPUID output when EAX = %08X\n", i);
                printf("\tEAX: %08X\n", data[0]);
                printf("\tEBX: %08X\n", data[1]);
                printf("\tEDX: %08X\n", data[2]);
                printf("\tECX: %08X\n", data[3]);
            }

            if (i==0){
                maxCPUIDCycles = data[0];
                printf("CPU Vendor ID: '");
                for (j=1; j<4; j++){
                    ptrTmp = (char *)&data[j];
                    for (k=0; k<4; k++)
                        printf("%c", ptrTmp[k]);
                }
                printf("'\n\tNumber of CPUID EAX tests: %d\n", maxCPUIDCycles+1);
            }
            if (i==1){
                unsigned char APIC_ID, COUNT, CHUNKS;

                ptrTmp = (char *)&data[1];
                APIC_ID = ptrTmp[3];
                COUNT = ptrTmp[2];
                CHUNKS = ptrTmp[1];

                printf("The Brand ID (0x%02X) is %s\n", ptrTmp[0], GetBrandID(ptrTmp[0]));
                ReportFeatureFlags(data[2], data[3]);
                ReportProcessorSig(data[0]);

                if (boolIsHyperThreaded){
                    printf("The number of logical processors (Hyperthreading): %d\n", COUNT);
                }
                if (boolHasCLFLUSHInstr){
                    printf("The CLFLUSH size: %d\n", CHUNKS);
                    printf("\tThe cache line size: %d\n", CHUNKS*8);
                }
            }
            if (i==2){
                ReportCacheTLB(data[0], data[1], data[2], data[3], TRUE);
                //this hasn't been tested 'cuz my machine doesn't support it
                ptrTmp = (char *)&data[0];
                for (k=1; k<(*ptrTmp); k++){
                    cnt=0;
                    cnt = writeCPUIDInstructions(chrArr, (unsigned char *)&i, cnt, data);
                    ExeCPUID();
                    if (showRegisters){
                        printf("CPUID output when EAX = %08X\n", i);
                        printf("\tEAX: %08X\n", data[0]);
                        printf("\tEBX: %08X\n", data[1]);
                        printf("\tEDX: %08X\n", data[2]);
                        printf("\tECX: %08X\n", data[3]);
                    }
                    ReportCacheTLB(data[0], data[1], data[2], data[3], FALSE);
                }
            }
            i++;
            for (j=0; j<4; j++) data[j] = 0;
        }while (i<(maxCPUIDCycles+1) && i<4);//documentation only supports up to 4
    }
    free(chrArr);
    return 0;
}

