AMD Typewriter x86 사용자 설명서

다운로드
페이지 256
Efficient 64-Bit Integer Arithmetic
87
22007E/0—November 1999
AMD Athlon™ Processor x86 Code Optimization 
Example 4 (Left shift):  
;shift operand in EDX:EAX left, shift count in ECX (count
;
applied modulo 64)
SHLD    EDX, EAX, CL      ;first apply shift count 
SHL     EAX, CL           ; mod 32 to EDX:EAX
TEST    ECX, 32           ;need to shift by another 32?
JZ      $lshift_done      ;no, done
MOV     EDX, EAX          ;left shift EDX:EAX 
XOR     EAX, EAX          ; by 32 bits
$lshift_done:
Example 5 (Right shift):  
SHRD    EAX, EDX, CL      ;first apply shift count 
SHR     EDX, CL           ; mod 32 to EDX:EAX
TEST    ECX, 32           ;need to shift by another 32?
JZ      $rshift_done      ;no, done
MOV     EAX, EDX          ;left shift EDX:EAX
XOR     EDX, EDX          ; by 32 bits
$rshift_done:
Example 6 (Multiplication):  
;_llmul computes the low-order half of the product of its
;
arguments, two 64-bit integers
;
;INPUT:
[ESP+8]:[ESP+4]   multiplicand
[ESP+16]:[ESP+12] multiplier
;
;OUTPUT: EDX:EAX      (multiplicand * multiplier) % 2^64
;
;DESTROYS:  EAX,ECX,EDX,EFlags
_llmul PROC
MOV     EDX, [ESP+8] 
;multiplicand_hi
MOV     ECX, [ESP+16] 
;multiplier_hi
OR      EDX, ECX 
;one operand >= 2^32?
MOV     EDX, [ESP+12] 
;multiplier_lo
MOV     EAX, [ESP+4] 
;multiplicand_lo
JNZ     $twomul 
;yes, need two multiplies
MUL     EDX 
;multiplicand_lo * multiplier_lo
RET 
;done, return to caller
$twomul:
IMUL    EDX, [ESP+8] ;p3_lo = multiplicand_hi*multiplier_lo
IMUL    ECX, EAX 
;p2_lo = multiplier_hi*multiplicand_lo
ADD     ECX, EDX 
; p2_lo + p3_lo
MUL     DWORD PTR [ESP+12] ;p1=multiplicand_lo*multiplier_lo
ADD     EDX, ECX 
;p1+p2lo+p3_lo = result in EDX:EAX
RET 
;done, return to caller
_llmul ENDP