int sum(int a, int b)
{
int ans;
ans = a + b;
return ans;
}
Assignment typically uses the mov instruction.
Addition is the addl instruction.
The return value for a function is always expected to be
in register %eax.
So the assembler ret instruction doesn't have an operand. The return value is implicitly whatever is in register %eax.
In the following examples register %eax will be denoted by reg1
int sum(int a, int b) sum:
{
int ans; reg1 = b
ans = a + b; add a, reg // reg = reg + a
return ans; mov reg1, ans
} mov ans, reg1
return
sum: sum:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
reg1 = b movl 12(%ebp), %eax
add a, reg1 addl 8(%ebp), %eax
mov reg1, ans movl %eax, -4(%ebp)
mov ans, reg1 movl -4(%ebp), %eax
return leave
ret
One of the operands of mov must be a register or an immediate value, not a memory location.
-
Moving the value in %eax to memory location for ans and then moving it back to a register (actually the same register), would be avoided with optimization level 2.
There is no direct IA32 assembler instruction for 'if' or 'while'.
The assembler instructions to use are of the following form:
compare ... // result of compare is stored somewhere
if ( result ) goto Label // conditional jumps
goto Label // unconditional jumps
Example: if (x >= y) goto L2
cmpl y,x
jge L2
Note the order of the operands of cmpl!!!
<max(int x, int y)>: int max(int x, int y)
{
int maxVal; int maxVal;
if (x >= y) goto L2 if ( x < y ) {
maxVal = y maxVal = y;
goto L4 } else {
L2: maxVal = x maxVal = x;
L4: return maxVal }
return maxVal;
}
max: <max(int x, int y)>:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl 8(%ebp), %eax reg1 = x
cmpl 12(%ebp), %eax [compare y, reg1]
jge .L2 if (reg1 >= y) goto L2
movl 12(%ebp), %eax reg1 = y
movl %eax, -4(%ebp) maxVal = reg1
jmp .L4 goto L4
.L2: L2:
movl 8(%ebp), %eax reg1 = x
movl %eax, -4(%ebp) maxVal = reg1
.L4: L4:
movl -4(%ebp), %eax reg1 = maxVal
leave
ret return
Notes:
At L2 memory location x is moved to register %eax (it's already there).
Then register %eax value is moved to memory location maxVal. The register is used as a
temporary only because the movl instruction can't have both operands be in memory.
At L4, the value in maxVal is moved back to register %eax. This time register %eax is not used as
a temporary location, but because the return value of a function is always expected to be in register %eax.
Compiled with optimization level 2:
gcc -S -O2 if1.c
max:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %edx reg2 = x
movl 12(%ebp), %eax reg1 = y
cmpl %edx, %eax [cmp reg2, reg1]
jge .L2 if (reg1 >= reg2) goto L2
movl %edx, %eax reg1 = reg2
.L2: L2:
popl %ebp
ret return
int sumFirst(int n)
{
int sum = 0;
while(n > 0) {
sum += n;
n--;
}
return sum;
}
There is no instruction that directly implements 'while' in IA32.
The instructions available are the same form as used for 'if statements':
compare ...
if ( result ) goto Label // conditional jumps
goto Label // unconditional jumps
int sumFirst(int n) sumFirst:
{
int sum = 0; sum = 0
goto L2
while(n > 0) L3:
reg1 = n
sum += n; sum = sum + reg1
n--; n = n - 1
L2:
} if(n > 0) goto L3
return sum reg1 = sum
return
}
sumFirst: sumFirst:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl $0, -4(%ebp) sum = 0
jmp .L2 goto L2
.L3: L3:
movl 8(%ebp), %eax reg1 = n
addl %eax, -4(%ebp) sum = sum + reg1
subl $1, 8(%ebp) n = n - 1
.L2: L2:
cmpl $0, 8(%ebp) [compare 0, n]
jg .L3 if(n > 0) goto L3
movl -4(%ebp), %eax reg1 = sum
leave
ret return
If an operand is in memory, an instruction might want to know just
the location (i.e., in order to store some value there).
movl %eax, -4(%ebp) ; move contents of %eax to memory location -4(%ebp)
If the first operand of movl is a memory location, it also moves
the contents to the destination register.
movl -4(%ebp), %eax
An add instruction would need to know both the values stored
at the locations of its operands and the location of the
second operand:
; add contents of memory -4(%ebp) and the contents of register %eax
; store the result as the new contents of the memory location -4(%ebp).
addl %eax, -4(%ebp)
Note: In both cases these instructions need the
contents of the operands!
leal is the Load Effective Address instruction.
The first operand should be a memory location and the second
operand should be a register.
However, in contrast to movl and
addl, leal moves that memory address to the destination register.
leal -4(%ebp), %eax
; calculate the address -4(%ebp) and
; store that address in register %eax
Compare this with movl:
movl -4(%ebp), %eax
; calculate the address -4(%ebp) and
; store the contents of
; that address in register %eax
Consider the following loop that accesses the array elements
x[0], x[1], ... x[N-1]:
for(int i = 0; i < N; i++) {
x[i] = 10;
}
The code generated might look like this:
**** for(int i = 0; i < N; i++) {
movl $0, -8(%ebp)
jmp .L2
.L3:
**** x[i] = 10;
movl -8(%ebp), %edx ; -8(%ebp) is i
movl -16(%ebp), %eax ; -16(%ebp) is the beginning address of array x
movl $10, (%eax,%edx,4) ; address: %eax + 4 * %edx
addl $1, -8(%ebp) ; i++
.L2:
movl -8(%ebp), %eax
cmpl -12(%ebp), %eax
jl .L3 ; if (i < N) goto L3
The IA32 assembler operand for x[i] used the scaled
index form where x is an int array.
(%eax, %edx, 4)
%eax contains the beginning address of arrray x
%edx contains the value of i
4 multiplier
address: contentsOf(%eax) + 4 * contentsOf(%edx)
The index i can't just be added to the address of x to
get the address of x[i] since the size of each int is 4 bytes.
For example, the address of x[1] is not 0x8001,
but is 0x8004:
Element |
i |
Address |
x[0] |
0 |
0x8000 |
x[1] |
1 |
0x8004 |
x[2] |
2 |
0x8008 |
x[3] |
3 |
0x800C |
So i must be multiplied by this size 4 and then
added.
Using scaled index operand does the multiplication without
needing a separate imull instruction.
Indexed operand is like scaled index, but without the
multiplier:
(%ebx, %esi)
address: contentsOf(%ebx) + contentsOf(%esi)
and an positive or negative offset can also be used:
12(%ebx, %esi)
address: contentsOf(%ebx) + contentsOf(%esi) + 12
Assume the following register contents:
Register |
Contents |
%eax |
0x8000 |
%edx |
3 |
Each of the following IA32 assembler expressions indicate
a memory location.
Fill in the address for each one.
Operand |
Memory Address |
(%eax) |
|
4(%eax) |
|
-4(%eax) |
|
9(%eax, %edx) |
|
(%eax,%edx,1) |
|
(%eax,%edx,4) |
|
0x8000(,%edx,8) |
|
Given the following register and memory contents,
Register |
Contents |
%edx |
0x8000 |
%ebx |
2 |
Memory Address |
Contents |
0x8000 |
0x5 |
0x8004 |
0xA |
0x8008 |
0xF |
fill in the value stored in register %eax after execution of each instruction:
Instruction |
Value stored in %eax |
leal (%edx), %eax |
|
movl (%edx), %eax |
|
leal 4(%edx), %eax |
|
movl 4(%edx), %eax |
|
leal (%edx, %ebx, 4), %eax |
|
movl (%edx, %ebx, 4), %eax |
|
int swapElements(int a[], int i, int j)
{
int tmp;
tmp = a[i];
a[i] = a[j];
a[j] = tmp;
}
int swapElements(int a[], int i, int j)
{
int tmp;
tmp = a[i];
a[i] = a[j];
a[j] = tmp;
}
The compiler decides to generate code something like this with
4 assignments instead of 3:
int tmp1, tmp2;
tmp1 = a[i];
tmp2 = a[j];
a[i] = tmp2;
a[j] = tmp1;
swapElements:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %edx ; address of array a in %edx
movl 12(%ebp), %ecx ; i in %ecx
movl 16(%ebp), %eax ; j in %eax
pushl %ebx
leal (%edx,%ecx,4), %ecx ; address of a[i] in %ecx
leal (%edx,%eax,4), %eax ; address of a[j] in %eax
movl (%ecx), %ebx ; value of a[i] in %ebx
movl (%eax), %edx ; value of a[j] in %edx
movl %edx, (%ecx) ; %edx stored in a[i]
movl %ebx, (%eax) ; %ebx stored in a[j]
popl %ebx
popl %ebp
ret ; return
-
The variable tmp is not allocated a memory
location. Instead register %ebx and %edx are being used
as 2 temporaries - ond for a[i] and a[j].
The compiler likes this better since registers as
temporaries have faster access and at least one register is needed anyway in movl
instructions.
-
That is, the assembly code actually uses a different algorithm. It
uses two register temporaries instead of one
memory temporary.
-
Since two register temporaries are used, there are 4
movl instructions to do the swap instead of 3. This
is because the movl instruction can't have both
operands be memory locations.
(But 4 is better than using 2 movl instructions for each of the 3 assignments.)
Register %ebx that is used as the temporary r4, is callee save register:
If the called function uses %ebx, it must save the current value before using the register and must restore the value
before the function returns.
Caller save: %eax, %ecx, %edx
Callee save: %ebx, %esi, %edi
int max(int a, int b);
int f(int x, int y)
{
int m;
m = max(x,y);
return m * m;
}
To call max, assembly code for function f must
int f(int x, int y)
{
int m;
m = max(x,y);
return m * m;
}
f:
pushl %ebp
movl %esp, %ebp
subl $8, %esp ; make the stack 8 byte bigger
movl 12(%ebp), %eax ; parameter y is at 12(%ebp)
movl %eax, 4(%esp) ; move y to 4 bytes below stack top
movl 8(%ebp), %eax ; parameter x is at 8(%ebp)
movl %eax, (%esp) ; move x to the stack top
call max ; call max, the return value will be in %eax
leave ; restore the stack top to its old value
imull %eax, %eax ; R[%eax] = R[%eax] * R[%eax]
ret ; return value in %eax
Recall: A stack frame is a block of memory allocated on top of the call
stack for a function execution.
A stack frame contains:
-
Information necessary to restore the stack frame of the
calling function
-
Space for those function local variables that must be allocated address
in memory.
-
Saved values of any of the "callee save" registers (%ebx,
%esi, %edi) that the function uses.
When the function returns, the function must execute code that
will
- restore the values of any "callee save" registers
- pop its own stack frame off the stack, and
- jump to the instruction in the calling function just after
the function call
Two registers, %ebp (frame pointer) and %esp
(stack pointer) contain the addresses of the bottom and the top,
respectively, of a function's stack frame when it is executing.
Memory, including stack, when main has called f, and f has set up
its stack frame.
low memory address |
Text (compiled, linked code) |
|
Global Data |
|
heap area |
(%esp) |
(top of f's stack frame)
f's stack frame
|
(%ebp) |
bottom of f's stack frame |
|
main's stack frame |
IA32 Instruction: call[25] [top]
The call instruction
call function_address
-
pushes the address of the next instruction (the return
address) onto the stack, and
-
sets the value of the %eip register to the function address specified by the call operand
The second action is what one means by "jumps" to the function address.
The simplest call is to a function with no
parameters.
int main()
{
int n;
n = f();
}
The call to f generates assembler code generated in main:
call _f
The symbol _f will be replaced by an address of the code
for function f.
Stack just before executing the IA32 call
instruction in main:
low memory address |
Text (compiled, linked code) |
|
Global Data |
|
heap area |
(%esp) |
(top of main's stack frame)
main's stack frame
|
(%ebp) |
bottom of main's stack frame |
The only change call makes to the stack is to push the
address just after the call instruction (the return
address) onto the stack.
low memory address |
Text (compiled, linked code) |
|
Global Data |
|
heap area |
(%esp) |
return
address in main |
|
main's stack frame |
(%ebp) |
bottom of main's stack frame |
Function f that is called by main must set up its
own stack frame:
-
save main's frame pointer on the stack
-
make the top of the stack be the bottom of f's stack frame
-
If memory space for local variables is needed, adjust the top
of f's stack frame
1. save main's frame pointer on the stack
pushl %ebp
2. make the top of the stack be the bottom of f's stack frame
movl %esp, %ebp
3. adjust top of f's stack frame if necessary. (Suppose for example,
memory space for 2 local int variables is needed)
subl $8, %esp
After function f executes
pushl %ebp
movl %esp, %ebp
subl $8, %esp
The stack frame for f contains
- space for two integers at -4(%ebp) and -8(%ebp)
- the saved %ebp value (frame
pointer for main):
low memory address |
Text (compiled, linked code) |
|
Global Data |
|
heap area |
(%esp) |
(top of f's stack frame)
f's stack frame
|
(%ebp) |
main's saved
%ebp value |
4(%ebp) |
return address in
main's code |
|
main's stack frame |
When f returns, it must
- deallocate all the space for all its
local variables and restore the stack frame for its
caller, main.
In short, it must pop the stack!
- reload the program counter register, %eip, with the address
of the instruction just after the call instruction in
main.
Local variables are deallocated by simply changing the the stack
pointer register to point to the bottom of the current stack frame.
Data in local variable memory locations may not
actually be erased, but this data will no longer be accessible
on return to the caller and will be overwritten if
another function is called.
The ret instruction removes the top value of the stack
and stores it in register %eip. That's all!
So before executing the ret instruction, f must
prepare the stack so that the stack pointer is changed so that the
top of the stack contains the instruction return address in main's
code.
This preparation can be done with two instructions:
movl %ebp, %esp ; make %esp point to the bottom of f's frame
popl %ebp ; reset frame pointer %ebp to point to bottom of main's frame
The popl instruction copies the top of the stack into
%ebp, but also increments the stack pointer %esp.
Now the return address is uncovered and is at the top of the
stack. So the ret instruction can do its work.
The assembler code to return from a function is:
movl %ebp, %esp
popl %ebp
ret
There is also a leave assembly instruction. It is
equivalent to the first two instructions. So you may also see
the code for returning from a function as:
leave
ret
int sum(int, int); f:
pushl %ebp
int f() movl %esp, %ebp
{ subl $24, %esp
int n = 5; movl $5, -12(%ebp)
int m = 10; movl $10, -8(%ebp)
int ans; movl -8(%ebp), %eax
movl %eax, 4(%esp) ; m on the stack 4 bytes below top
ans = sum(n, m); movl -12(%ebp), %eax
movl %eax, (%esp) ; n at top of the stack
return 0; call sum ; call sum(n,m)
} movl %eax, -4(%ebp)
movl $0, %eax
leave
ret
int sum(int x, int y) sum:
{ pushl %ebp
return x + y; movl %esp, %ebp
} movl 12(%ebp), %eax ; y in %eax
addl 8(%ebp), %eax ; add x to %eax
popl %ebp
ret ; return %eax