View
216
Download
0
Category
Preview:
Citation preview
1
Introduction to x86 Assembly, part IIor “What does my laptop actually do?”
Ymir Vigfusson
Some slides gracefully borrowed from 18-213@CMU
2
Review Example
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
logical:pushl %ebpmovl %esp,%ebp
movl 12(%ebp),%eaxxorl 8(%ebp),%eaxsarl $17,%eaxandl $8185,%eax
popl %ebpret
Body
SetUp
Finish
movl 12(%ebp),%eax # eax = yxorl 8(%ebp),%eax # eax = x^y (t1)sarl $17,%eax # eax = t1>>17 (t2)andl $8185,%eax # eax = t2 & mask (rval)
3
Review Example
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
logical:pushl %ebpmovl %esp,%ebp
movl 12(%ebp),%eaxxorl 8(%ebp),%eaxsarl $17,%eaxandl $8185,%eax
popl %ebpret
Body
SetUp
Finish
movl 12(%ebp),%eax # eax = yxorl 8(%ebp),%eax # eax = x^y (t1)sarl $17,%eax # eax = t1>>17 (t2)andl $8185,%eax # eax = t2 & mask (rval)
4
Review Example
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
logical:pushl %ebpmovl %esp,%ebp
movl 12(%ebp),%eaxxorl 8(%ebp),%eaxsarl $17,%eaxandl $8185,%eax
popl %ebpret
Body
SetUp
Finish
movl 12(%ebp),%eax # eax = yxorl 8(%ebp),%eax # eax = x^y (t1)sarl $17,%eax # eax = t1>>17 (t2)andl $8185,%eax # eax = t2 & mask (rval)
5
Review Example
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
int logical(int x, int y){ int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval;}
logical:pushl %ebpmovl %esp,%ebp
movl 12(%ebp),%eaxxorl 8(%ebp),%eaxsarl $17,%eaxandl $8185,%eax
popl %ebpret
Body
SetUp
Finish
movl 12(%ebp),%eax # eax = yxorl 8(%ebp),%eax # eax = x^y (t1)sarl $17,%eax # eax = t1>>17 (t2)andl $8185,%eax # eax = t2 & mask (rval)
213 = 8192, 213 – 7 = 8185213 = 8192, 213 – 7 = 8185
6
Control: Condition codes
7
Processor State (IA32, Partial) Information
about currently executing program Temporary data
( %eax, … ) Location of runtime stack
( %ebp,%esp ) Location of current code
control point( %eip, … )
Status of recent tests( CF, ZF, SF, OF )
%eip
General purposeregisters
Current stack top
Current stack frame
Instruction pointer
CF ZF SF OF Condition codes
%eax
%ecx
%edx
%ebx
%esi
%edi
%esp
%ebp
8
Condition Codes (Implicit Setting)
Single bit registersCF Carry Flag (for unsigned) SF Sign Flag (for signed)ZF Zero Flag OF Overflow Flag (for signed)
Implicitly set (think of it as side effect) by arithmetic operationsExample: addl/addq Src,Dest ↔ t = a+bCF set if carry out from most significant bit (unsigned overflow)ZF set if t == 0SF set if t < 0 (as signed)OF set if two’s-complement (signed) overflow(a>0 && b>0 && t<0) || (a<0 && b<0 && t>=0)
Not set by lea instruction
9
Condition Codes (Explicit Setting: Compare)
Explicit Setting by Compare Instructioncmpl Src2, Src1cmpl b,a like computing a-b without setting destination
CF set if carry out from most significant bit (used for unsigned comparisons)ZF set if a == bSF set if (a-b) < 0 (as signed)OF set if two’s-complement (signed) overflow(a>0 && b<0 && (a-b)<0) || (a<0 && b>0 && (a-b)>0)
10
Condition Codes (Explicit Setting: Test)
Explicit Setting by Test instructiontestl Src2, Src1testl b,a like computing a&b without setting destination
Sets condition codes based on value of Src1 & Src2Useful to have one of the operands be a mask
ZF set when a&b == 0SF set when a&b < 0
11
Reading Condition Codes SetX Instructions
Set single byte based on combinations of condition codes
SetX Condition Descriptionsete ZF Equal / Zerosetne ~ZF Not Equal / Not Zerosets SF Negativesetns ~SF Nonnegativesetg ~(SF^OF)&~ZF Greater (Signed)
setge ~(SF^OF) Greater or Equal (Signed)
setl (SF^OF) Less (Signed)setle (SF^OF)|ZF Less or Equal (Signed)seta ~CF&~ZF Above (unsigned)setb CF Below (unsigned)
12
movl 12(%ebp),%eax # eax = ycmpl %eax,8(%ebp) # Compare x : ysetg %al # al = x > ymovzbl %al,%eax # Zero rest of %eax
Reading Condition Codes (Cont.)
SetX Instructions: Set single byte based on combination of condition
codes One of 8 addressable byte
registers Does not alter remaining 3 bytes Typically use movzbl to finish jobint gt (int x, int y){ return x > y;}
int gt (int x, int y){ return x > y;}
Body
%eax %ah %al
%ecx %ch %cl
%edx %dh %dl
%ebx %bh %bl
%esi
%edi
%esp
%ebp
13
Conditional branches and moves
14
Jumping jX Instructions
Jump to different part of code depending on condition codes
jX Condition Descriptionjmp 1 Unconditional
je ZF Equal / Zero
jne ~ZF Not Equal / Not Zero
js SF Negative
jns ~SF Nonnegative
jg ~(SF^OF)&~ZF Greater (Signed)
jge ~(SF^OF) Greater or Equal (Signed)
jl (SF^OF) Less (Signed)
jle (SF^OF)|ZF Less or Equal (Signed)
ja ~CF&~ZF Above (unsigned)
jb CF Below (unsigned)
15
Conditional Branch Example
int absdiff(int x, int y){ int result; if (x > y) { result = x-y; } else { result = y-x; } return result;}
int absdiff(int x, int y){ int result; if (x > y) { result = x-y; } else { result = y-x; } return result;}
absdiff:pushl %ebpmovl %esp, %ebpmovl 8(%ebp), %edxmovl 12(%ebp), %eaxcmpl %eax, %edxjle .L6subl %eax, %edxmovl %edx, %eaxjmp .L7
.L6:subl %edx, %eax
.L7:popl %ebpret
Body1
Setup
Finish
Body2b
Body2a
16
Conditional Branch Example (Cont.)int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
C allows “goto” as means of transferring control Closer to machine-level
programming style Generally
considered bad coding style
absdiff:pushl %ebpmovl %esp, %ebpmovl 8(%ebp), %edxmovl 12(%ebp), %eaxcmpl %eax, %edxjle .L6subl %eax, %edxmovl %edx, %eaxjmp .L7
.L6:subl %edx, %eax
.L7:popl %ebpret
Body1
Setup
Finish
Body2b
Body2a
17
GO TO statements considered harmful
18
Conditional Branch Example (Cont.)int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
absdiff:pushl %ebpmovl %esp, %ebpmovl 8(%ebp), %edxmovl 12(%ebp), %eaxcmpl %eax, %edxjle .L6subl %eax, %edxmovl %edx, %eaxjmp .L7
.L6:subl %edx, %eax
.L7:popl %ebpret
Body1
Setup
Finish
Body2b
Body2a
19
Conditional Branch Example (Cont.)int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
absdiff:pushl %ebpmovl %esp, %ebpmovl 8(%ebp), %edxmovl 12(%ebp), %eaxcmpl %eax, %edxjle .L6subl %eax, %edxmovl %edx, %eaxjmp .L7
.L6:subl %edx, %eax
.L7:popl %ebpret
Body1
Setup
Finish
Body2b
Body2a
20
Conditional Branch Example (Cont.)int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
int goto_ad(int x, int y){ int result; if (x <= y) goto Else; result = x-y; goto Exit;Else: result = y-x;Exit: return result;}
absdiff:pushl %ebpmovl %esp, %ebpmovl 8(%ebp), %edxmovl 12(%ebp), %eaxcmpl %eax, %edxjle .L6subl %eax, %edxmovl %edx, %eaxjmp .L7
.L6:subl %edx, %eax
.L7:popl %ebpret
Body1
Setup
Finish
Body2b
Body2a
21
Loops
22
C Codeint pcount_do(unsigned x) { int result = 0; do { result += x & 0x1; x >>= 1; } while (x); return result;}
int pcount_do(unsigned x) { int result = 0; do { result += x & 0x1; x >>= 1; } while (x); return result;}
Goto Versionint pcount_do(unsigned x){ int result = 0;loop: result += x & 0x1; x >>= 1; if (x) goto loop; return result;}
int pcount_do(unsigned x){ int result = 0;loop: result += x & 0x1; x >>= 1; if (x) goto loop; return result;}
“Do-While” Loop Example
Count number of 1’s in argument x (“popcount”)
Use conditional branch to either continue looping or to exit loop
23
Goto Version“Do-While” Loop Compilation
Registers:%edx x%ecx result
movl $0, %ecx # result = 0.L2: # loop:
movl %edx, %eaxandl $1, %eax # t = x & 1addl %eax, %ecx # result += tshrl %edx # x >>= 1jne .L2 # If !0, goto loop
int pcount_do(unsigned x) { int result = 0;loop: result += x & 0x1; x >>= 1; if (x) goto loop; return result;}
int pcount_do(unsigned x) { int result = 0;loop: result += x & 0x1; x >>= 1; if (x) goto loop; return result;}
24
C Code
do Body while (Test);
do Body while (Test);
Goto Version
loop: Body if (Test) goto loop
loop: Body if (Test) goto loop
General “Do-While” Translation
Body:
Test returns integer = 0 interpreted as false ≠ 0 interpreted as true
{ Statement1; Statement2; … Statementn;}
25
C Code Goto Version
“While” Loop Example
Is this code equivalent to the do-while version? Must jump out of loop if test fails
int pcount_while(unsigned x) { int result = 0; while (x) { result += x & 0x1; x >>= 1; } return result;}
int pcount_while(unsigned x) { int result = 0; while (x) { result += x & 0x1; x >>= 1; } return result;}
int pcount_do(unsigned x) { int result = 0; if (!x) goto done;loop: result += x & 0x1; x >>= 1; if (x) goto loop;done: return result;}
int pcount_do(unsigned x) { int result = 0; if (!x) goto done;loop: result += x & 0x1; x >>= 1; if (x) goto loop;done: return result;}
26
While version
while (Test) Bodywhile (Test) Body
Do-While Version
if (!Test) goto done; do Body while(Test);done:
if (!Test) goto done; do Body while(Test);done:
General “While” Translation
Goto Version
if (!Test) goto done;loop: Body if (Test) goto loop;done:
if (!Test) goto done;loop: Body if (Test) goto loop;done:
27
C Code
“For” Loop Example
Is this code equivalent to other versions?
#define WSIZE 8*sizeof(int)int pcount_for(unsigned x) { int i; int result = 0; for (i = 0; i < WSIZE; i++) { unsigned mask = 1 << i; result += (x & mask) != 0; } return result;}
#define WSIZE 8*sizeof(int)int pcount_for(unsigned x) { int i; int result = 0; for (i = 0; i < WSIZE; i++) { unsigned mask = 1 << i; result += (x & mask) != 0; } return result;}
28
“For” Loop While Loop
for (Init; Test; Update )
Body
For Version
Init;
while (Test ) {
Body
Update;
}
While Version
29
“For” Loop Form
for (Init; Test; Update )
Body
General Form
for (i = 0; i < WSIZE; i++) { unsigned mask = 1 << i; result += (x & mask) != 0; }
for (i = 0; i < WSIZE; i++) { unsigned mask = 1 << i; result += (x & mask) != 0; }
i = 0i = 0
i < WSIZEi < WSIZE
i++i++
{ unsigned mask = 1 << i; result += (x & mask) != 0;}
{ unsigned mask = 1 << i; result += (x & mask) != 0;}
Init
Test
Update
Body
30
“For” Loop … Goto
for (Init; Test; Update )
Body
For Version
Init;
while (Test ) {
Body
Update;
}
While Version
Init; if (!Test) goto done; do Body Update while(Test);done:
Init; if (!Test) goto done; do Body Update while(Test);done:
Init; if (!Test) goto done;loop: Body Update if (Test) goto loop;done:
Init; if (!Test) goto done;loop: Body Update if (Test) goto loop;done:
31
C Code
“For” Loop Conversion Example
Initial test can be optimized away
#define WSIZE 8*sizeof(int)int pcount_for(unsigned x) { int i; int result = 0; for (i = 0; i < WSIZE; i++) { unsigned mask = 1 << i; result += (x & mask) != 0; } return result;}
#define WSIZE 8*sizeof(int)int pcount_for(unsigned x) { int i; int result = 0; for (i = 0; i < WSIZE; i++) { unsigned mask = 1 << i; result += (x & mask) != 0; } return result;}
Goto Version
int pcount_for_gt(unsigned x) { int i; int result = 0; i = 0; if (!(i < WSIZE)) goto done; loop: { unsigned mask = 1 << i; result += (x & mask) != 0; } i++; if (i < WSIZE) goto loop; done: return result;}
int pcount_for_gt(unsigned x) { int i; int result = 0; i = 0; if (!(i < WSIZE)) goto done; loop: { unsigned mask = 1 << i; result += (x & mask) != 0; } i++; if (i < WSIZE) goto loop; done: return result;}
Init
!Test
Body
UpdateTest
32
So what about these arrays?
int a[16];
char *c;c = (char *)malloc(256);
How are arrays actually represented in assembly?
33
Basic Data Types Integral
Stored & operated on in general (integer) registers Signed vs. unsigned depends on instructions used
Intel ASM Bytes Cbyte b 1 [unsigned] charword w 2 [unsigned] shortdouble word l 4 [unsigned] intquad word q 8 [unsigned] long int (x86-64)
Floating Point Stored & operated on in floating point registers
Intel ASM Bytes CSingle s 4 floatDouble l 8 doubleExtended t 10/12/16 long double
34
Array Allocation Basic Principle
T A[L]; Array of data type T and length L Contiguously allocated region of L * sizeof(T) bytes
char string[12];
x x + 12
int val[5];
x x + 4 x + 8 x + 12 x + 16 x + 20
double a[3];
x + 24x x + 8 x + 16
char *p[3];
x x + 8 x + 16 x + 24
x x + 4 x + 8 x + 12
IA32
x86-64
35
Array Access Basic Principle
T A[L]; Array of data type T and length L Identifier A can be used as a pointer to array element 0: Type T*
Reference Type? Value?val[4] int 3val int * xval+1 int * x + 4&val[2] int * x + 8val[5] int ??*(val+1)int 5val + i int * x + 4i
int val[5]; 1 5 2 1 3
x x + 4 x + 8 x + 12 x + 16 x + 20
WATWAT
36
Array Example
Declaration “zip_dig cmu” equivalent to “int cmu[5]” Example arrays were allocated in successive 20 byte blocks
Not guaranteed to happen in general
#define ZLEN 5typedef int zip_dig[ZLEN];
zip_dig cmu = { 1, 5, 2, 1, 3 };zip_dig mit = { 0, 2, 1, 3, 9 };zip_dig ucb = { 9, 4, 7, 2, 0 };
zip_dig cmu; 1 5 2 1 3
16 20 24 28 32 36
zip_dig mit; 0 2 1 3 9
36 40 44 48 52 56
zip_dig ucb; 9 4 7 2 0
56 60 64 68 72 76
37
Array Access - Idea
Array start
4 element array of ints
%edx
%eaxOffset
38
Array Accessing Example
Register %edx contains starting address of array
Register %eax contains array index
Desired digit at 4*%eax + %edx
Use memory reference (%edx,%eax,4)
int get_digit (zip_dig z, int dig){ return z[dig];}
# %edx = z # %eax = dig
movl (%edx,%eax,4),%eax # z[dig]
IA32
zip_dig cmu; 1 5 2 1 3
16 20 24 28 32 36
39
# edx = zmovl $0, %eax # %eax = i
.L4: # loop:addl $1, (%edx,%eax,4) # z[i]++addl $1, %eax # i++cmpl $5, %eax # i:5jne .L4 # if !=, goto loop
Array Loop Example (IA32)
void zincr(zip_dig z) { int i; for (i = 0; i < ZLEN; i++) z[i]++;}
40
Pointer Loop Example (IA32)void zincr_p(zip_dig z) { int *zend = z+ZLEN; do { (*z)++; z++; } while (z != zend); }
void zincr_v(zip_dig z) { void *vz = z; int i = 0; do { (*((int *) (vz+i)))++; i += ISIZE; } while (i != ISIZE*ZLEN);}
# edx = z = vzmovl $0, %eax # i = 0
.L8: # loop:addl $1, (%edx,%eax) # Increment vz+iaddl $4, %eax # i += 4cmpl $20, %eax # Compare i:20jne .L8 # if !=, goto loop
41
How do we fit a 2D matrix into memory?
41
a b c
d e f
g h i
a b c
d e f
g h i
Row-major ordering
Q: How do we find cell (i,j)?WAT
4242
43
Nested Array Example
“zip_dig pgh[4]” equivalent to “int pgh[4][5]” Variable pgh: array of 4 elements, allocated contiguously Each element is an array of 5 int’s, allocated contiguously
Important: “Row-Major” ordering of all elements guaranteed
#define PCOUNT 4zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }};
zip_digpgh[4];
76 96 116 136 156
1 5 2 0 6 1 5 2 1 3 1 5 2 1 7 1 5 2 2 1
44
Multidimensional (Nested) Arrays Declaration
T A[R][C]; 2D array of data type T R rows, C columns Type T element requires K bytes
Array Size R * C * K bytes
Arrangement Row-Major Ordering
A[0][0] A[0][C-1]
A[R-1][0]
• • •
• • • A[R-1][C-1]
•••
•••
int A[R][C];
• • •A[0][0]
A[0]
[C-1]• • •
A[1][0]
A[1]
[C-1]• • •
A[R-1][0]
A[R-1][C-1]
• • •
4*R*C Bytes
a b c
d e f
g h i
45
• • •
Nested Array Row Access Row Vectors
A[i] is array of C elements Each element of type T requires K bytes Starting address A + i * (C * K)
• • •A[i][0]
A[i]
[C-1]
A[i]
• • •A
[R-1][0]
A[R-1][C-1]
A[R-1]
• • •
A
• • •A[0][0]
A[0]
[C-1]
A[0]
A+i*C*4 A+(R-1)*C*4
int A[R][C];
46
Nested Array Row Access Code
Row Vector pgh[index] is array of 5 int’s Starting address pgh+20*index
IA32 Code Computes and returns address Compute as pgh + 4*(index+4*index)
int *get_pgh_zip(int index){ return pgh[index];}
# %eax = indexleal (%eax,%eax,4),%eax # 5 * indexleal pgh(,%eax,4),%eax # pgh + (20 * index)
#define PCOUNT 4zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }};
47
• • •
Nested Array Row Access Array Elements
A[i][j] is element of type T, which requires K bytes Address A + i * (C * K) + j * K = A + (i * C + j)* K
• • • • • •A[i][j]
A[i]
• • •A
[R-1][0]
A[R-1][C-1]
A[R-1]
• • •
A
• • •A[0][0]
A[0]
[C-1]
A[0]
A+i*C*4 A+(R-1)*C*4
int A[R][C];
A+i*C*4+j*4
48
• • •
Nested Array Row Access Array Elements
A[i][j] is element of type T, which requires K bytes Address A + i * (C * K) + j * K = A + (i * C + j)* K
• • • • • •A[i][j]
A[i]
• • •A
[R-1][0]
A[R-1][C-1]
A[R-1]
• • •
A
• • •A[0][0]
A[0]
[C-1]
A[0]
A+i*C*4 A+(R-1)*C*4
int A[R][C];
A+i*C*4+j*4
A[i][j] ==
A + (i*C + j)*K
49
Nested Array Element Access Code
Array Elements pgh[index][dig] is int Address: pgh + 20*index + 4*dig
= pgh + 4*(5*index + dig) IA32 Code
Computes address pgh + 4*((index+4*index)+dig)
int get_pgh_digit (int index, int dig){ return pgh[index][dig];}
movl 8(%ebp), %eax # indexleal (%eax,%eax,4), %eax # 5*indexaddl 12(%ebp), %eax # 5*index+digmovl pgh(,%eax,4), %eax # offset 4*(5*index+dig)
50
struct rec { int a[3]; int i; struct rec *n;};
Structure Allocation
Concept Contiguously-allocated region of memory Refer to members within structure by names Members may be of different types
Memory Layoutia n
0 12 16 20
51
struct rec { int a[3]; int i; struct rec *n;};
IA32 Assembly# %edx = val# %eax = rmovl %edx, 12(%eax) # Mem[r+12] = val
void set_i(struct rec *r, int val){ r->i = val;}
Structure Access
Accessing Structure Member Pointer indicates first byte of structure Access elements with offsets
ia n
0 12 16 20
r+12r
52
movl 12(%ebp), %eax # Get idxsall $2, %eax # idx*4addl 8(%ebp), %eax # r+idx*4
int *get_ap (struct rec *r, int idx){ return &r->a[idx];}
Generating Pointer to Structure Member
Generating Pointer to Array Element Offset of each structure
member determined at compile time
Arguments Mem[%ebp+8]: r Mem[%ebp+12]: idx
r+idx*4r
ia n
0 12 16 20
struct rec { int a[3]; int i; struct rec *n;};
Recommended