Oct11: 373

int sum(int a, int b)
{
  int ans;
  ans = a + b;
  return ans;
}

Assignment typically uses the mov instruction.

Addition is the addl instruction.

The return value for a function is always expected to be in register %eax.

So the assembler ret instruction doesn't have an operand. The return value is implicitly whatever is in register %eax.

In the following examples register %eax will be denoted by reg1

int sum(int a, int b)    sum: 			       
{			 						       
  int ans;               reg1 = b		       
  ans = a + b;           add a, reg   // reg = reg + a 
  return ans;            mov reg1, ans		       
}                        mov ans, reg1
                         return

 sum: 		     sum:					            
 		     	pushl	%ebp					
		     	movl	%esp, %ebp	
	       	     	subl	$16, %esp	
 reg1 = b	     	movl	12(%ebp), %eax		       
 add a, reg1   	     	addl	8(%ebp), %eax	
 mov reg1, ans	     	movl	%eax, -4(%ebp)		       
 mov ans, reg1        	movl	-4(%ebp), %eax		       
 return            	leave			          
		     	ret

One of the operands of mov must be a register or an immediate value, not a memory location.
Moving the value in %eax to memory location for ans and then moving it back to a register (actually the same register), would be avoided with optimization level 2.

There is no direct IA32 assembler instruction for 'if' or 'while'.

The assembler instructions to use are of the following form:

  compare ...               // result of compare is stored somewhere
  if ( result ) goto Label  // conditional jumps
  goto Label                // unconditional jumps

Example: if (x >= y) goto L2

  cmpl y,x
  jge L2

Note the order of the operands of cmpl!!!

<max(int x, int y)>:          int max(int x, int y) 
			      {		      
     int maxVal;	        int maxVal;	      
     if (x >= y) goto L2        if ( x < y ) {       
     maxVal = y		          maxVal = y;	      
     goto L4		        } else {	       
L2:  maxVal = x		          maxVal = x;	      
L4:  return maxVal	        }		       
			        return maxVal;      
			      }

max:				  <max(int x, int y)>:          
	pushl	%ebp		
	movl	%esp, %ebp	
	subl	$16, %esp	
	movl	8(%ebp), %eax	    reg1 = x
	cmpl	12(%ebp), %eax	    [compare y, reg1]
	jge	.L2		    if (reg1 >= y) goto L2
	movl	12(%ebp), %eax	    reg1 = y
	movl	%eax, -4(%ebp)	    maxVal = reg1
	jmp	.L4		    goto L4
.L2:				 L2:                           
	movl	8(%ebp), %eax	    reg1 = x               
	movl	%eax, -4(%ebp)	    maxVal = reg1
.L4:		                 L4:		
	movl	-4(%ebp), %eax	    reg1 = maxVal
	leave			  
	ret                         return

Notes:

At L2 memory location x is moved to register %eax (it's already there). Then register %eax value is moved to memory location maxVal. The register is used as a temporary only because the movl instruction can't have both operands be in memory.
At L4, the value in maxVal is moved back to register %eax. This time register %eax is not used as a temporary location, but because the return value of a function is always expected to be in register %eax.

Compiled with optimization level 2:

  gcc -S -O2 if1.c

max:				
	pushl	%ebp		
	movl	%esp, %ebp	
	movl	8(%ebp), %edx	  reg2 = x
	movl	12(%ebp), %eax	  reg1 = y
	cmpl	%edx, %eax	  [cmp reg2, reg1]
	jge	.L2		  if (reg1 >= reg2) goto L2
	movl	%edx, %eax	  reg1 = reg2
.L2:				L2:
	popl	%ebp		  
	ret                       return


int sumFirst(int n)
{
  int sum = 0;
  while(n > 0) {
    sum += n;
    n--;
  }
  return sum;
}

There is no instruction that directly implements 'while' in IA32.

The instructions available are the same form as used for 'if statements':

  compare ...
  if ( result ) goto Label  // conditional jumps
  goto Label                // unconditional jumps


int sumFirst(int n)       sumFirst:		 
{		       						 
  int sum = 0;            sum = 0		 
                          goto L2		 
  while(n > 0)         L3:			 
                          reg1 = n		 
    sum += n;             sum = sum + reg1
    n--;                  n = n - 1		 
                       L2:			 
  }                       if(n > 0) goto L3	 
  return sum              reg1 = sum		 
                          return                 
}

sumFirst:                             sumFirst:
	pushl	%ebp
	movl	%esp, %ebp            		 
	subl	$16, %esp          						 
	movl	$0, -4(%ebp)	      sum = 0		 
	jmp	.L2		      goto L2		 
.L3:				   L3:			 
	movl	8(%ebp), %eax	      reg1 = n		 
	addl	%eax, -4(%ebp)	      sum = sum + reg1
	subl	$1, 8(%ebp)	      n = n - 1		 
.L2:				   L2:			 
	cmpl	$0, 8(%ebp)	      [compare 0, n]
	jg	.L3		      if(n > 0) goto L3	 
	movl	-4(%ebp), %eax	      reg1 = sum
	leave
	ret                           return

If an operand is in memory, an instruction might want to know just the location (i.e., in order to store some value there).

 movl %eax, -4(%ebp)  ; move contents of %eax to memory location -4(%ebp)

If the first operand of movl is a memory location, it also moves the contents to the destination register.

 movl -4(%ebp), %eax

An add instruction would need to know both the values stored at the locations of its operands and the location of the second operand:


 ; add contents of memory -4(%ebp)  and the contents of  register %eax
 ; store the result as the new contents of the memory location -4(%ebp).      

 addl %eax, -4(%ebp)

Note: In both cases these instructions need the contents of the operands!

leal is the Load Effective Address instruction.

The first operand should be a memory location and the second operand should be a register.

However, in contrast to movl and addl, leal moves that memory address to the destination register.

 leal -4(%ebp), %eax

 ; calculate the address -4(%ebp) and
 ; store that address in register %eax

Compare this with movl:

 movl -4(%ebp), %eax

 ; calculate the address -4(%ebp) and
 ; store the contents of
 ; that address in register %eax

Consider the following loop that accesses the array elements x[0], x[1], ... x[N-1]:

 
  for(int i = 0; i < N; i++) {
    x[i] = 10;
  }

The code generated might look like this:


  ****   for(int i = 0; i < N; i++) {
               movl    $0, -8(%ebp)

               jmp     .L2
       .L3:
  ****     x[i] = 10;
               movl    -8(%ebp), %edx     ;   -8(%ebp) is i
               movl    -16(%ebp), %eax    ;   -16(%ebp) is the beginning address of array x
               movl    $10, (%eax,%edx,4) ;  address: %eax  + 4 * %edx

               addl    $1, -8(%ebp)       ; i++
       .L2:
               movl    -8(%ebp), %eax
               cmpl    -12(%ebp), %eax    
               jl      .L3                ; if (i < N) goto L3

The IA32 assembler operand for x[i] used the scaled index form where x is an int array.

      (%eax, %edx, 4)

      %eax contains the beginning address of arrray x
      %edx contains the value of i
      4 multiplier

      address: contentsOf(%eax) + 4 * contentsOf(%edx)

The index i can't just be added to the address of x to get the address of x[i] since the size of each int is 4 bytes.

For example, the address of x[1] is not 0x8001, but is 0x8004:

Element	i	Address
x[0]	0	0x8000
x[1]	1	0x8004
x[2]	2	0x8008
x[3]	3	0x800C

So i must be multiplied by this size 4 and then added.

Using scaled index operand does the multiplication without needing a separate imull instruction.

Indexed operand is like scaled index, but without the multiplier:

      (%ebx, %esi)

      address: contentsOf(%ebx) + contentsOf(%esi)

and an positive or negative offset can also be used:

      12(%ebx, %esi)

      address: contentsOf(%ebx) + contentsOf(%esi) + 12

Assume the following register contents:

Register	Contents
%eax	0x8000
%edx	3

Each of the following IA32 assembler expressions indicate a memory location. Fill in the address for each one.

Operand	Memory Address
(%eax)
4(%eax)
-4(%eax)
9(%eax, %edx)
(%eax,%edx,1)
(%eax,%edx,4)
0x8000(,%edx,8)

Given the following register and memory contents,

Register	Contents
%edx	0x8000
%ebx	2

Memory Address	Contents
0x8000	0x5
0x8004	0xA
0x8008	0xF

fill in the value stored in register %eax after execution of each instruction:

Instruction	Value stored in %eax
leal (%edx), %eax
movl (%edx), %eax
leal 4(%edx), %eax
movl 4(%edx), %eax
leal (%edx, %ebx, 4), %eax
movl (%edx, %ebx, 4), %eax

int swapElements(int a[], int i, int j)
{
  int tmp;
  tmp = a[i];
  a[i] = a[j];
  a[j] = tmp;
}


int swapElements(int a[], int i, int j)
{
  int tmp;
  tmp = a[i];
  a[i] = a[j];
  a[j] = tmp;
}

The compiler decides to generate code something like this with 4 assignments instead of 3:

      int tmp1, tmp2;
      tmp1 = a[i];
      tmp2 = a[j];
      a[i] = tmp2;
      a[j] = tmp1;


swapElements:
	pushl	%ebp
	movl	%esp, %ebp
	movl	8(%ebp), %edx          ; address of array a in %edx
	movl	12(%ebp), %ecx         ; i in %ecx  
	movl	16(%ebp), %eax         ; j in %eax	     
	pushl	%ebx			 							     
	leal	(%edx,%ecx,4), %ecx    ; address of a[i] in %ecx
	leal	(%edx,%eax,4), %eax    ; address of a[j] in %eax     
	movl	(%ecx), %ebx           ; value of a[i] in %ebx
	movl	(%eax), %edx           ; value of a[j] in %edx
	movl	%edx, (%ecx)           ; %edx stored in a[i]
	movl	%ebx, (%eax)           ; %ebx stored in a[j]
	popl	%ebx			 							     
	popl	%ebp			 							     
	ret                            ; return

The variable tmp is not allocated a memory location. Instead register %ebx and %edx are being used as 2 temporaries - ond for a[i] and a[j].

The compiler likes this better since registers as temporaries have faster access and at least one register is needed anyway in movl instructions.
That is, the assembly code actually uses a different algorithm. It uses two register temporaries instead of one memory temporary.
Since two register temporaries are used, there are 4 movl instructions to do the swap instead of 3. This is because the movl instruction can't have both operands be memory locations.

(But 4 is better than using 2 movl instructions for each of the 3 assignments.)
Register %ebx that is used as the temporary r4, is callee save register:

If the called function uses %ebx, it must save the current value before using the register and must restore the value before the function returns.

Caller save: %eax, %ecx, %edx

Callee save: %ebx, %esi, %edi

int max(int a, int b);

int f(int x, int y)
{
  int m;

  m = max(x,y);

  return m * m;
}

To call max, assembly code for function f must

Push y, then x onto the stack
Execute the IA32 call instruction with operand max

int f(int x, int y)
{
  int m;

  m = max(x,y);

  return m * m;
}


f:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $8, %esp          ; make the stack 8 byte bigger
        movl    12(%ebp), %eax    ; parameter y is at 12(%ebp)
        movl    %eax, 4(%esp)     ; move y to 4 bytes below stack top
        movl    8(%ebp), %eax     ; parameter x is at 8(%ebp)
        movl    %eax, (%esp)      ; move x to the stack top
        call    max               ; call max, the return value will be in %eax
        leave                     ; restore the stack top to its old value
        imull   %eax, %eax        ; R[%eax] = R[%eax] * R[%eax]
        ret                       ; return value in %eax

Recall: A stack frame is a block of memory allocated on top of the call stack for a function execution.

A stack frame contains:

Information necessary to restore the stack frame of the calling function
Space for those function local variables that must be allocated address in memory.
Saved values of any of the "callee save" registers (%ebx, %esi, %edi) that the function uses.

When the function returns, the function must execute code that will

restore the values of any "callee save" registers
pop its own stack frame off the stack, and
jump to the instruction in the calling function just after the function call

Two registers, %ebp (frame pointer) and %esp (stack pointer) contain the addresses of the bottom and the top, respectively, of a function's stack frame when it is executing.

Memory, including stack, when main has called f, and f has set up its stack frame.

low memory address	Text (compiled, linked code)
	Global Data
	heap area
(%esp)	(top of f's stack frame) f's stack frame
(%ebp)	bottom of f's stack frame
	main's stack frame

The call instruction

      call function_address

pushes the address of the next instruction (the return address) onto the stack, and
sets the value of the %eip register to the function address specified by the call operand

The second action is what one means by "jumps" to the function address.

The simplest call is to a function with no parameters.

int main()
{
   int n;
   n = f();
}

The call to f generates assembler code generated in main:

      call _f

The symbol _f will be replaced by an address of the code for function f.

Stack just before executing the IA32 call instruction in main:

low memory address	Text (compiled, linked code)
	Global Data
	heap area
(%esp)	(top of main's stack frame) main's stack frame
(%ebp)	bottom of main's stack frame

The only change call makes to the stack is to push the address just after the call instruction (the return address) onto the stack.

low memory address	Text (compiled, linked code)
	Global Data
	heap area
(%esp)	return address in main
	main's stack frame
(%ebp)	bottom of main's stack frame

Function f that is called by main must set up its own stack frame:

save main's frame pointer on the stack
make the top of the stack be the bottom of f's stack frame
If memory space for local variables is needed, adjust the top of f's stack frame

1. save main's frame pointer on the stack
 
    pushl %ebp

2. make the top of the stack be the bottom of f's stack frame

   movl %esp, %ebp

3. adjust top of f's stack frame if necessary. (Suppose for example,
   memory space for 2 local int variables is needed)

   subl $8, %esp

After function f executes

        pushl %ebp
        movl  %esp, %ebp
        subl  $8, %esp

The stack frame for f contains

space for two integers at -4(%ebp) and -8(%ebp)
the saved %ebp value (frame pointer for main):

low memory address	Text (compiled, linked code)
	Global Data
	heap area
(%esp)	(top of f's stack frame) f's stack frame
(%ebp)	main's saved %ebp value
4(%ebp)	return address in main's code
	main's stack frame

When f returns, it must

deallocate all the space for all its local variables and restore the stack frame for its caller, main.
In short, it must pop the stack!
reload the program counter register, %eip, with the address of the instruction just after the call instruction in main.

Local variables are deallocated by simply changing the the stack pointer register to point to the bottom of the current stack frame.

Data in local variable memory locations may not actually be erased, but this data will no longer be accessible on return to the caller and will be overwritten if another function is called.

The ret instruction removes the top value of the stack and stores it in register %eip. That's all!

So before executing the ret instruction, f must prepare the stack so that the stack pointer is changed so that the top of the stack contains the instruction return address in main's code.

This preparation can be done with two instructions:

      movl %ebp, %esp ; make %esp point to the bottom of f's frame
      popl %ebp       ; reset frame pointer %ebp to point to bottom of main's frame

The popl instruction copies the top of the stack into %ebp, but also increments the stack pointer %esp.

Now the return address is uncovered and is at the top of the stack. So the ret instruction can do its work.

The assembler code to return from a function is:

      movl %ebp, %esp
      popl %ebp
      ret

There is also a leave assembly instruction. It is equivalent to the first two instructions. So you may also see the code for returning from a function as:

      leave
      ret

int sum(int, int);                  f:                          
                                     pushl   %ebp               
int f()                              movl    %esp, %ebp 
{                                    subl    $24, %esp  
  int n = 5;                         movl    $5, -12(%ebp)      
  int m = 10;                        movl    $10, -8(%ebp)      
  int ans;                           movl    -8(%ebp), %eax     
                                     movl    %eax, 4(%esp)   ; m on the stack  4 bytes below top
  ans = sum(n, m);                   movl    -12(%ebp), %eax    
                                     movl    %eax, (%esp)    ; n at top of the stack
  return 0;                          call    sum             ; call sum(n,m)
}                                    movl    %eax, -4(%ebp)     
                                     movl    $0, %eax   
                                     leave                      
                                     ret

int sum(int x, int y)               sum:                               
{                                     pushl   %ebp             
  return x + y;                       movl    %esp, %ebp     
}                                     movl    12(%ebp), %eax ; y in %eax
                                      addl    8(%ebp), %eax  ; add  x to %eax
                                      popl    %ebp             
                                      ret                    ; return %eax

CSC373 Oct11

slide version

single file version

Contents

IA32 Assembler for: assignment statements (1)[1] [top]

IA32 Assembler for: assignment statements (1)[2] [top]

IA32 Assembler for: assignment statements (2)[3] [top]

IA32 Assembler for: if statements (0)[4] [top]

IA32 Assembler for: C if statements (1)[5] [top]

IA32 Assembler for: if statements (2)[6] [top]

IA32 Assembler for: if statements (3)[7] [top]

IA32 Assembler for: loop statements (1)[8] [top]

IA32 Assembler for: loop statements (2)[9] [top]

IA32 Assembler for: loop statements (3)[10] [top]

The IA32 movl and addl instructions [11] [top]

The IA32 leal instruction [12] [top]

Memory Operands for Array Elements [13] [top]

Scaled Index Operand [14] [top]

Indexed Operand [15] [top]

Practice: memory operands [16] [top]

Practice: movl and leal [17] [top]

IA32 Assembler for: accessing array elements (1)[18] [top]

IA32 Assembler for: accessing array elements (2)[19] [top]

IA32 Assembly for: accessing array elements (3)[20] [top]

IA32 Assembly for: Function Calls (1)[21] [top]

IA32 Assembly for: Function Calls (2)[22] [top]

IA32 Function Calls: Stack Frames [23] [top]

Basic Stack Frame Illustrated [24] [top]

IA32 Instruction: call[25] [top]

Example: call [26] [top]

Stack Just Before call Instruction [27] [top]

Stack Just After call Instruction [28] [top]

Stack Frame Setup Code [29] [top]

Stack After f's Stack Frame Setup Code [30] [top]

IA32 Functions: Return [31] [top]

IA32 ret Instruction [32] [top]

IA32 Function Return: Summary [33] [top]

Example: f calls sum [34] [top]

Example: sum [35] [top]

CSC373 Oct11

slide version

single file version

Contents

IA32 Assembler for: assignment statements (1)[1] [top]

IA32 Assembler for: assignment statements (1)[2] [top]

IA32 Assembler for: assignment statements (2)[3] [top]

IA32 Assembler for: if statements (0)[4] [top]

IA32 Assembler for: C if statements (1)[5] [top]

IA32 Assembler for: if statements (2)[6] [top]

IA32 Assembler for: if statements (3)[7] [top]

IA32 Assembler for: loop statements (1)[8] [top]

IA32 Assembler for: loop statements (2)[9] [top]

IA32 Assembler for: loop statements (3)[10] [top]

The IA32 movl and addl instructions[11] [top]

The IA32 leal instruction[12] [top]

Memory Operands for Array Elements[13] [top]

Scaled Index Operand[14] [top]

Indexed Operand[15] [top]

Practice: memory operands[16] [top]

Practice: movl and leal[17] [top]

IA32 Assembler for: accessing array elements (1)[18] [top]

IA32 Assembler for: accessing array elements (2)[19] [top]

IA32 Assembly for: accessing array elements (3)[20] [top]

IA32 Assembly for: Function Calls (1)[21] [top]

IA32 Assembly for: Function Calls (2)[22] [top]

IA32 Function Calls: Stack Frames[23] [top]

Basic Stack Frame Illustrated[24] [top]

IA32 Instruction: call[25] [top]

Example: call[26] [top]

Stack Just Before call Instruction[27] [top]

Stack Just After call Instruction[28] [top]