; From Max Maischein  -  RDTSC / RDMSR / Timing code

comment %

Since everybody wants to _know_ how fast the new optimization is compared to the
old one, I thought of writing me a small testbed (for small pieces of code only)
using the Pentium machine specific registers, especially register 10h which
counts the machine ticks. But somehow in my program the first time it is run it
takes substantially longer than the following times....

The small include file follows.

-max

  A small timing testbed which uses the P5 instruction RDTSC to get
  the exact number of ticks an instruction takes. Macros are used
  to implement the Pentium instructions RDTSC and WRMSR, which TASM 3.2
  and TASM 4.0 do not implement (afaik). Information about the
  instructions comes from the file OPCODES.LST by Potemkins Hacker Group
  distributed with Ralf Browns interrupt list.

  Insert your (sequence of) instruction(s) below.

      MSR 10h is Time Stamp Counter  (TSC) (Read/Write)

  The code runs in 16-bit real (V86) mode, the output code is ripped from
  a crude library which was designed for 32-bit flat protected mode, which
  explains the heavy use of extended registers.

comment ends %

RDMSR           macro   OpType
ifnb <OpType>
  ifdifi <OpType>, <ECX>                ; If the operand type differs from ECX
                push    ecx             ; Save ECX
                mov     ecx, OpType     ; load ECX with new value
  endif
endif
                ; Simply execute the instruction
                db      0Fh, 032h       ; Data bytes for RDMSR instruction
ifnb <OpType>
  ifdifi <OpType>, <ECX>                ; If the operand type differs from ECX
                pop     ecx             ; Restore ECX
  endif
endif
                endm

                .model small
                .stack 500h
                .code
                .386
                org    100h

main proc near
                STARTUPCODE

                push    ds                      ; The library assumes DS=ES
                pop     es

                mov     ecx, 10h
                rdmsr                           ; EAX timer counter,
                                                ; EDX=???
                mov     [InitValue], eax        ; This instruction is already
                                                ; accounted for below ...
       ; ----------------------------------------------------------------
       ; Insert your instruction(s) here

                nop
                nop
                nop
                nop

       ; End of your code - DS must be preserved
       ; ----------------------------------------------------------------
                mov     ecx, 10h
                rdmsr                           ; Get CPU clocks
                sub     eax, [InitValue]        ; that passed
                sub     eax, 317                ; Adjust for the instructions
                                                ; above with magic value ...

                mov     edi, offset OutputBuffer
                push    edi
                call    DumpEAXDec
                mov     byte ptr [di], 0        ; Make ASCIIZ

                pop     edi                     ; Write the string
                call    WriteLN

                EXITCODE                        ; Get outta here
endp

; Pull in some utility routines from another module as includefile
; for simplicity
include         io.inc

                .data
InitValue       dd      0               ; Our start value
OutputBuffer    label byte              ; For the output of the time taken


ends
end main

-----------
io.inc

; simple I0 rewtines

EAX2DEC proc near
; Converts the binary value in EAX into a string in EAX:EDX with CL containing
; the count of valid characters in EDX:EAX, that is, the string is stored in
; reverse order, the first byte to be written is in AL.
; The routine is _not_ prepared to handle values above 2GB !
                        push    ebx
                        push    edi
                        push    esi                     ; ESI:EBX is the
temporary storage
                        xor     ebx, ebx
                        xor     esi, esi

                        xor     ecx, ecx                ; Use ECX as counter

                        mov     edi, 10                 ; Use EDI as divisor
@NextDigit:
                        cdq                             ; Sign-extend EAX into EDX
                        div     edi                     ; Divide EDX:EAX by 10
                        ;idiv   edi  ??

                        mov     bl, dl                  ; Get remainder
                        add     bl, '0'                 ; Convert to ASCII
                        inc     ecx                     ; Increment counter
                        or      eax, eax                ; Are we done ?
                        jz      DivisionDone

                        shld    esi, ebx, 8             ; Shift digits one place
                        shl     ebx, 8
                        jmp     @NextDigit
DivisionDone:
                        mov     edx, esi
                        xchg    eax, ebx                ; Get values in reverse
order

                        pop     esi
                        pop     edi
                        pop     ebx
                        ret
endp

DumpEAxDec proc near
                        push    eax
                        push    ecx
                        push    edx
                        call    EAX2Dec
DecNextDigit:
                        mov     [edi], eax
                        mov     [edi+4], edx
                        add     edi, ecx

                        pop     edx
                        pop     ecx
                        pop     eax
                        ret
endp

WriteASCIIZ     proc near
; Writes the ASCIIZ string pointed to by EDI to stdOut
; Note that this routine thrashes the file-IO-buffer
                        pushad
                        xor     eax, eax        ; AL=0
DOSOutputScan:
                        xor     ebx, ebx        ;
                        inc     ebx             ; EBX=1=stdOut
                        mov     edx, edi        ; EDX->string
                        mov     ecx, -1         ; ECX=counter for length of
string
                        cld
                        repnz   scasb           ; Find end of string
                        neg     ecx             ; CX=length of string
                        dec     ecx
DosWrite:
                        mov     ah, 40h         ; Write to device
                        int     21h
                        popad
                        ret
endp

.data
CRLFData                db      13, 10

.code
WriteLN         proc near
; Writes an ASCIIZ string and then goes to the next line ...
; Dirty fall through optimization ...
;ENTRY: EDI-> ASCIIZ string to be written
                        call    WriteASCIIZ
endp
WriteCRLF       proc near
; Writes a CR and then a LF - as you might have guessed ...
                        pushad
                        xor     ebx, ebx
                        inc     ebx             ; EBX=1=stdOut
                        mov     ecx, 2
                        mov     edx, offset CRLFData
                        jmp     short DosWrite
endp
