/* * encode.c - write target code for pcc3 compiler

 *
 * The target is the EM64T Architecture.
 *
 * This code also assumes the compiler will run on an EM64T machine.
 *
 * All operands are left on top of the run-time stack so no register
 * allocation is performed. Registers are only used as temporaries
 * while doing individual operations.
 */

#include "defs.h"
#include "types.h"
#include "tree.h"
#include "symtab.h"
#include "encode.h"
#include "globals.h"
#include "message.h"
#include "semUtils.h"
#include "tarch.h"

static void encodeStructUnionAssign(BinopTree b);
static void encodeAssign(BinopTree b);
static void encodeFcall(FcallTree b);
static void encodeReturn(ReturnTree r);

static void encodeConvert(UnopTree u);

/*
 *  Track size of the locals area for current function. Local variables
 *  are allocated in the locals area from the low addresses back to the
 *  high addresses. That is, first local variable is at the lowest address.
 *  This may appear backwards as the variables asigned the smaller addresses
 *  will appear at the higher negative offsets in the generated code.
 */
static int sizeOfLocalsArea;

/*
 *  Track stack depth from entry point to the function currently being
 *  compiled. The issue is that the stack must be aligned on a 16-byte
 *  boundary when an fcall is executed. The stack is on a 16-byte boundary
 *  after the function entry is complete: fcall saves %rsp, then %rbp
 *  is saved. So the code generator must increment this variable every
 *  time there is a push and decrement every time there is a pop. When
 *  code is being generated for fcall, this variable is checked to see
 *  if a dummy quadword needs to be placed on the stack between the IA-32
 *  style arg block and the EM64T style arg block.
 */
int encodeStackDepth;

/*
 *  genLabel: generate a unique label number
 *            prepend L$ to get full label name
 *            e.g.: L$5
 */
unsigned int genLabel(void)
{
  static unsigned int nextLabel = 0;

  return nextLabel++;
}


/**
 * \brief Emits a push instruction with the given constants onto
 *        the stack.
 */  
void emit_push_const(unsigned long int val)
{
  encodeStackDepth += 8;
  emit("\tpushq\t$%d\t# depth %d", val, encodeStackDepth);
}

/**
 * \brief Emits a push instruction from the given reigster onto the
 *        stack.
 */
void emit_push_reg(const char *reg)
{
  encodeStackDepth += 8;
  emit("\tpushq\t%%%s\t# depth %d", reg, encodeStackDepth);
}

/**
 * \brief Emits a pop instruction into the given reigster from the
 *        stack.
 */
void emit_pop_reg(const char *reg)
{
  encodeStackDepth -= 8;
  emit("\tpopq\t%%%s\t#depth %d", reg, encodeStackDepth);
}

/**
 * \brief Emits a stack growing instruction.  This function allocates space
 *        on top of the stack.
 */
void emit_stack_grow(unsigned int size)
{
  encodeStackDepth += size;
  emit("\tsubq\t$%u,%%rsp\t#depth %d", size, encodeStackDepth);
}

/**
 * \brief Emits a stack shrinking instruction.  This function deallocates space
 *        on top of the stack.
 */
void emit_stack_shrink(unsigned int size)
{
  encodeStackDepth -= size;
  emit("\taddq\t$%u,%%rsp\t#depth %d", size, encodeStackDepth);
}

/********************************************
  encode_init

  output output file prologue
********************************************/
void encode_init(void)      /* beginning of program */
{
  emit("\t.data");          /* global variables will be first */

  encodeStackDepth = 0;
}


/************************************************************
  encode_global_var_decl
*************************************************************/

void encode_global_var_decl(ST_ID id, Type type)
{
  int size;

  size = computeSize( type );
  emit("\t.comm\t%s, %d", st_get_id_str(id), size);
}


/************************************************************
  encode_static_var_decl
*************************************************************/

void encode_static_var_decl(ST_ID id, Type type)
{
  int size;

  size = computeSize( type );
  emit("\t.lcomm\t%s, %d", st_get_id_str(id), size);
}

/* function to generate code for local static variables
 * defferent from global static for the following case:
 * fun1() { static int a; }
 * fun2() { static float a; }
 */
void encode_local_static( ST_ID id, unsigned int label, Type type )
{
    int size;

    size = computeSize( type );
    emit("\t.data");
    emit("\t.lcomm\t%s$L%d, %d", st_get_id_str(id), label, size );
    emit("\t.text");
}

/* compute the size in bytes for the given Type */
int computeSize( Type type )
{
  switch(typeQuery(type)) {

  case TYFLOAT :
    return TARCH_SIZEOF_FLOAT;

  case TYDOUBLE :
    return TARCH_SIZEOF_DOUBLE;

  case TYLONGDOUBLE :
    return TARCH_SIZEOF_LDOUBLE;

  case TYUNSIGNEDINT :
    return TARCH_SIZEOF_UINT;

  case TYUNSIGNEDCHAR :
    return TARCH_SIZEOF_UCHAR;

  case TYUNSIGNEDSHORTINT :
    return TARCH_SIZEOF_USHORT;

  case TYUNSIGNEDLONGINT :
    return TARCH_SIZEOF_ULONG;

  case TYSIGNEDCHAR :
    return TARCH_SIZEOF_SCHAR;

  case TYSIGNEDINT :
    return TARCH_SIZEOF_SINT;

  case TYSIGNEDLONGINT:
    return TARCH_SIZEOF_SLONG;

  case TYSIGNEDSHORTINT:
    return TARCH_SIZEOF_SSHORT;

  case TYVOID:
    bug("TYVOID in computeSize()");
    return 0; /*NOTREACHED*/

  case TYPOINTER:        /* all pointers are of the same size */
    return TARCH_SIZEOF_ULONG;

  case TYARRAY:
  {
    DimFlag dimflag;
    unsigned int dim;
    Type elementType;

    elementType = typeQueryArray(type, &dimflag, &dim);
    if ( dimflag == DIMENSION_PRESENT )
    {
      return computeSize(elementType) * dim;
    }
    else
    {
      bug("array with no dimension in computeSize()");
      return 0; /*NOTREACHED*/
    }
  }/* end of case array */

  case TYERROR:
    return 0;

  case TYSTRUCT:
    return computeStructSize(type);

  case TYUNION:
    return computeUnionSize(type);

  default:
    bug("unsupported type in computeSize()\n");
    return 0; /*NOTREACHED*/

  }/* end of switch */
  return 0; /*NOTREACHED*/

}/* end of computeSize */


/* compute the alignment of the given Type
 * TODO: these values should be abstracted to constants in a tarch.h
 * file. */
int determineAlignment( Type type )
{
  switch( typeQuery(type) ) {
  case TYFLOAT :
    return 4;
  case TYDOUBLE :
    return 4;
  case TYLONGDOUBLE :
    return 16;
  case TYUNSIGNEDCHAR :
  case TYSIGNEDCHAR :
    return 1;
  case TYUNSIGNEDSHORTINT :
  case TYSIGNEDSHORTINT:
    return 2;
  case TYUNSIGNEDINT :
  case TYSIGNEDINT :
    return 4;
  case TYSIGNEDLONGINT:
  case TYUNSIGNEDLONGINT :
  case TYPOINTER:
    return 8;

  case TYVOID:
    bug("TYVOID in determineAlignment()");
    return 0; /*NOTREACHED*/

  case TYERROR:
    return 0;

  case TYARRAY:    /* equal to its element type alignment */
  {
    DimFlag dimflag;
    unsigned int dim;
    Type elementType;

    elementType = typeQueryArray(type, &dimflag, &dim);
    return determineAlignment(elementType);
  }

  case TYSTRUCT:
  case TYUNION:
    return 16;      /* worst case alignment */

  default:
    bug("unsupported type in determineAlignment()\n");
    return 0; /*NOTREACHED*/

}/* end of switch */

}/* end of determineAlignment */

int worstCaseAlignment(void)
{
  return 16;
}

/*
 *  Used to round frame sizes up to even multiple of 16.
 *
 */
static int roundEven16(int val)
{
  if ((val & 0xF) == 0) return val;

  return (val | 0xF) + 1;
}

/**
 * \brief Returns the name of the register to use for the given integer
 *        argument number.
 */
static const char *int_reg_arg(unsigned int num)
{
  char *intRegs[6];

  intRegs[0] = "%rdi";
  intRegs[1] = "%rsi";
  intRegs[2] = "%rdx";
  intRegs[3] = "%rcx";
  intRegs[4] = "%r8";
  intRegs[5] = "%r9";

  if (num > 6)
    bug("int_reg_arg called with num > 6\n"); 

  return intRegs[num - 1];
}

/**
 * \brief Returns the name of the register to use for the given floating
 *        point argument number.
 */
static const char *float_reg_arg(unsigned int num)
{
  char *floatRegs[8];

  floatRegs[0] = "%xmm0";
  floatRegs[1] = "%xmm1";
  floatRegs[2] = "%xmm2";
  floatRegs[3] = "%xmm3";
  floatRegs[4] = "%xmm4";
  floatRegs[5] = "%xmm5";
  floatRegs[6] = "%xmm6";
  floatRegs[7] = "%xmm7";

  if (num > 8)
    bug("float_reg_arg called with num > 6\n");

  return floatRegs[num - 1]; 
}

/* is the given Type derer-able on the target architecture? */
BOOLEAN isDerefable(Type t)
{
  switch(typeQuery(t)) {

  case TYFLOAT:
  case TYDOUBLE:
  case TYLONGDOUBLE:
  case TYSIGNEDLONGINT:
  case TYSIGNEDSHORTINT:
  case TYSIGNEDINT:
  case TYUNSIGNEDLONGINT:
  case TYUNSIGNEDSHORTINT:
  case TYUNSIGNEDINT:
  case TYUNSIGNEDCHAR:
  case TYSIGNEDCHAR:
  case TYPOINTER:
    return TRUE;

  case TYSTRUCT:
  case TYUNION:
  case TYARRAY:
  case TYERROR:
  case TYVOID:
    return FALSE;

  case TYFUNCTION:
    return FALSE;

  default:
    bug("unknown type in isDerefable");
  }

  return FALSE; /*NOTREACHED*/
}


/*
 * determineIntelSuffix: Type -> Intel single char type spec (ie b, w, l, s, etc)
 *
 * NOTE: Do not use this for MOV instructions since there is no routine
 *       to determine which register version to use (%rax, %eax, %ax, %al,
 *       etc.). -- eaburns
 */
char determineIntelSuffix(Type t)
{
  switch (typeQuery(t)) {
  case TYFLOAT:
    return TARCH_ISUFFIX_FLOAT;
  case TYDOUBLE:
    return TARCH_ISUFFIX_DOUBLE;
  case TYLONGDOUBLE:
    return TARCH_ISUFFIX_LDOUBLE;
  case TYSIGNEDLONGINT:
    return TARCH_ISUFFIX_SLONG;
  case TYSIGNEDSHORTINT:
    return TARCH_ISUFFIX_SSHORT;
  case TYSIGNEDINT:
    return TARCH_ISUFFIX_SINT;
  case TYUNSIGNEDLONGINT:
    return TARCH_ISUFFIX_ULONG;
  case TYUNSIGNEDSHORTINT:
    return TARCH_ISUFFIX_USHORT;
  case TYUNSIGNEDINT:
    return TARCH_ISUFFIX_UINT;
  case TYUNSIGNEDCHAR:
    return TARCH_ISUFFIX_UCHAR;
  case TYSIGNEDCHAR:
    return TARCH_ISUFFIX_SCHAR;
  case TYPOINTER:
    return TARCH_ISUFFIX_ULONG;
  case TYVOID:
    bug("void type seen in determineIntelSuffix()");
    break;
  case TYERROR:
    bug("error type seen in determineIntelSuffix()");
    break;
  case TYSTRUCT:
    bug("struct type seen in determineIntelSuffix()");
    break;
  case TYUNION:
    bug("union type seen in determineIntelSuffix()");
    break;
  case TYARRAY:
    bug("array type seen in determineIntelSuffix()");
    break;
  case TYFUNCTION:
    bug("function type seen in determineIntelSuffix()");
    break;
  default:
    bug("unknown type seen in determineIntelSuffix()");
  }
  return ' '; /*NOTREACHED*/
}


/*
 *  encodeAssignDetail: make details of encoding ASSIGN generally
 *                     available. Used, for example, in compound
 *                     assignment operators. This only supports
 *                     the "basic" types (ie not struct/union).
 *
 *  type - Type of value to be assigned.
 *
 *  t - Tree being encoded.
 *
 */
void encodeAssignDetail(Type type, Tree t)
{
  switch(typeQuery(type)) {

  case TYFLOAT:

    /* pop rhs */
    emit_pop_reg("rax");

    /* pop lhs */
    emit_pop_reg("rdx");

    /* actually do the assign */
    emit("\tmovl\t%%eax, (%%rdx)");

    /* push result */
    emit_push_reg("rax");

    break;

  case TYDOUBLE:

    /* get rhs (64 bits) from top of stack */
    emit("\tfldl\t(%%rsp)");

    /* pop */
    emit_stack_shrink(8); 

    /* pop lhs */
    emit_pop_reg("rax");

    /* actually do the assign */
    emit("\tfstl\t(%%rax)");

    /* alloc spot (64 bits) for result */
    emit_stack_grow(8);

    /* put result on top of stack (and pop fp register stack) */
    emit("\tfstpl\t(%%rsp)");

    break;

  case TYLONGDOUBLE:

    /* get rhs (80 bits stored in 16 bytes) from top of stack */
    emit("\tfldt\t(%%rsp)");

    /* pop */
    emit_stack_shrink(16);

    /* pop lhs */
    emit_pop_reg("rax");

    /* actually do the assign (also pops the fp register stack) */
    emit("\tfstpt\t(%%rax)");

    /* re-load the result */
    emit("\tfldt\t(%%rax)");

    /* alloc spot (80 bits stored in 16 bytes) for result */
    emit_stack_grow(16);

    /* put result on top of stack (and pop fp register stack) */
    emit("\tfstpt\t(%%rsp)");

    break;

  case TYSIGNEDCHAR:
  case TYUNSIGNEDCHAR:
  case TYSIGNEDSHORTINT:
  case TYUNSIGNEDSHORTINT:
  case TYSIGNEDINT:
  case TYUNSIGNEDINT:
  case TYSIGNEDLONGINT:
  case TYUNSIGNEDLONGINT:
  case TYPOINTER:

    /* pop rhs */
    emit_pop_reg("rax");

    /* pop lhs */
    emit_pop_reg("rdx");

    /* do the assign */
    switch(computeSize(type)) {
    case 1:
      emit("\tmovb\t%%al, (%%rdx)");
      break;
    case 2:
      emit("\tmovw\t%%ax, (%%rdx)");
      break;
    case 4:
      emit("\tmovl\t%%eax, (%%rdx)");
      break;
    case 8:
      emit("\tmovq\t%%rax, (%%rdx)");
      break;
    default:
      bugT(t, "unknown size in encodeAssignDetail = %d\n",
      computeSize(type));
  }

    /* push the result */
    emit_push_reg("rax");
    break;

  case TYSTRUCT:
  case TYUNION:
  case TYARRAY:
  case TYERROR:
  case TYVOID:
  case TYFUNCTION:
  default:
    bugT(t, "unknown type in encodeAssignDetail");
  }

  /* mark the tree as visited by the code generator */
  ((ExpTree) t)->reg = setReg();
}

/*
 *  encodeDerefDetail: make details of encoding DEREF generally
 *                     available. Used, for example, in compound
 *                     assignment operators.
 *
 *  type - Type of value to be loaded.
 *
 *  t - Tree being encoded.
 *
 */
void encodeDerefDetail(Type type, Tree t)
{
  /* pop address to be deref-ed */
  emit_pop_reg("rax");

  switch(typeQuery(type)) {

  case TYFLOAT:

    /* load to fp register */
    emit("\tflds\t(%%rax)");

    /* alloc spot (64 bits) for result */
    emit_stack_grow(8);

    /* put value on top of stack (and pop fp register stack) */
    emit("\tfstps\t(%%rsp)");

    break;

  case TYDOUBLE:

    /* load to fp register */
    emit("\tfldl\t(%%rax)");

    /* alloc spot (64 bits) for result */
    emit_stack_grow(8);

    /* put value on top of stack (and pop fp register stack) */
    emit("\tfstpl\t(%%rsp)");

    break;

  case TYLONGDOUBLE:

    /* load to fp register */
    emit("\tfldt\t(%%rax)");

    /* alloc spot (80 bits stored in 16 bytes) for result */
    emit_stack_grow(16);

    /* put value on top of stack (and pop fp register stack) */
    emit("\tfstpt\t(%%rsp)");

    break;

  case TYSIGNEDSHORTINT:

    /* do the deref */
    emit("\tmovswq\t(%%rax), %%rax");

    /* push result onto stack */
    emit_push_reg("rax");
    break;

  case TYUNSIGNEDSHORTINT:

    /* do the deref */
    emit("\tmovzwq\t(%%rax), %%rax");

    /* push result onto stack */
    emit_push_reg("rax");
    break;

  case TYUNSIGNEDCHAR:

    /* do the deref */
    emit("\tmovzbq\t(%%rax), %%rax");

    /* push result onto stack */
    emit_push_reg("rax");
    break;

  case TYSIGNEDCHAR:

    /* do the deref */
    emit("\tmovsbq\t(%%rax), %%rax");

    /* push result onto stack */
    emit_push_reg("rax");
    break;

  case TYSIGNEDINT:

    /* do the deref */
    emit("\tmovslq\t(%%rax), %%rax");

    /* push result onto stack */
    emit_push_reg("rax");
    break;

  case TYUNSIGNEDINT:

    /* do the deref */

    /*
     * This is weak, there is no movzlq instruction, so all this garbage
     * emulates "movzlq (%rax), %rax"
     */
    emit("\tmovq\t%%rax, %%r11");
    emit("\txorq\t%%rax, %%rax");
    emit("\tmovl\t(%%r11), %%eax");

    /* push result onto stack */
    emit_push_reg("rax");

    break;

  case TYSIGNEDLONGINT:
  case TYUNSIGNEDLONGINT:
  case TYPOINTER:
    /* do the deref */
    emit("\tmovq\t(%%rax), %%rax");

    /* push result onto stack */
    emit_push_reg("rax");
    break;

  case TYSTRUCT:
  case TYUNION:
  case TYARRAY:
  case TYERROR:
  case TYVOID:
  case TYFUNCTION:
    bugT(t, "unexpected type in encodeDerefDetail");

  default:
    bugT(t, "unknown type in encodeDerefDetail");
  }

  /* mark the tree as visited by the code generator */
  ((ExpTree) t)->reg = setReg();
}

/*
 *  encodeDup
 *
 *  A DUP node indicates that a subtree has two parents. The DUP
 *  node is placed on top of the subtree on the path that will be
 *  evaluated first. Therefore, to encode DUP, we must simply duplicate
 *  the top of stack.
 *
 *  Currently this routine is only used to implement compound assignments.
 *  Therefore the only value DUP-ed is an address value. So this routine
 *  will only DUP a 8-byte value.
 */
static void encodeDup(UnopTree u)
{
  /* copy and push the value */
  emit("\tmovq\t(%%rsp), %%rax");
  emit_push_reg("rax");

  /* mark the tree as visited by the code generator */
  u->expn.reg = setReg();

  return;
}

/*
 * encodeUnop: code generation for unary operators
 *
 * Note: No support for SIZEOF as it will be fully handled at compile time
 *       (during semantic analysis).
 */
static void encodeUnop(UnopTree u)
{
  switch (u->op) {
  case NO_OP:
    break;
  case DEREF_OP:
    encodeDerefDetail(u->expn.type, (Tree) u);
    break;
  case DUP_OP:
    encodeDup(u);
    break;
  case INCRA_OP:
    encodeIncra(u);
    break;
  case DECRA_OP:
    encodeDecra(u);
    break;
  case INCRB_OP:
    encodeIncrb(u);
    break;
  case DECRB_OP:
    encodeDecrb(u);
    break;
  case USUB_OP:
    encodeUsub(u);
    break;
  case LNOT_OP:
    encodeLnot(u);
    break;
  case CONVERT_OP:
    encodeConvert(u);
    break;
  case SIZEOF_OP:
    bugT((Tree) u, "SIZEOF not expected in encodeUnary");
    break;
  case ADDR_OF_OP:
    encodeAddrOf(u);
    break;
  case COMPLMT_OP:
    encodeComplmt(u);
    break;
  case UADD_OP:
    bugT((Tree) u, "UADD seen in encodeUnary!");
    break;
  default:
    bugT((Tree) u, "unknown operation in encodeUnary");
  }
}

/*
 * encodeBinop: code generation for binary operators
 *
 */
static void encodeBinop(BinopTree b)
{
  switch (b->op) {

  case INDEX_OP:
    encodeIndex(b);
    break;

  case LSHIFT_OP:
    encodeLshift(b);
    break;

  case RSHIFT_OP:
    encodeRshift(b);
    break;

  case ANDD_OP:
    encodeAndd(b);
    break;

  case XORR_OP:
    encodeXorr(b);
    break;

  case ORR_OP:
    encodeOrr(b);
    break;

  case MULT_ASSIGN_OP:
    encodeMultAssign(b);
    break;

  case DIV_ASSIGN_OP:
    encodeDivAssign(b);
    break;

  case MOD_ASSIGN_OP:
    encodeModAssign(b);
    break;

  case ADD_ASSIGN_OP:
    encodeAddAssign(b);
    break;

  case SUB_ASSIGN_OP:
    encodeSubAssign(b);
    break;

  case LEFT_ASSIGN_OP:
    encodeLeftAssign(b);
    break;

  case RIGHT_ASSIGN_OP:
    encodeRightAssign(b);
    break;

  case AND_ASSIGN_OP:
    encodeAndAssign(b);
    break;

  case XOR_ASSIGN_OP:
    encodeXorAssign(b);
    break;

  case OR_ASSIGN_OP:
    encodeOrAssign(b);
    break;

  case MULT_OP:
    encodeMult(b);
    break;

  case DIV_OP:
    encodeDiv(b);
    break;

  case MOD_OP:
    encodeMod(b);
    break;

  case ADD_OP:
    encodeAdd(b);
    break;

  case SUB_OP:
    encodeSub(b);
    break;

  case LT_OP:
    encodeLt(b);
    break;

  case GT_OP:
    encodeGt(b);
    break;

  case LTE_OP:
    encodeLte(b);
    break;

  case GTE_OP:
    encodeGte(b);
    break;

  case EQL_OP:
    encodeEql(b);
    break;

  case NEQL_OP:
    encodeNeql(b);
    break;

  case ASSIGN_OP:
    encodeAssign(b);
    break;

  case LOR_OP:
  case LAND_OP:
  case COMMA_OP:
    bugT((Tree) b, "unexpected LOR, LAND or COMMA seen in encodeBinary");

  default:
    bugT((Tree) b, "unknown operation in encodeBinary");
  }
}

/*
 *  encode: traverses an AST and generates code in a (basically)
 *          bottom-up fashion.
 */

void encode( Tree t )
{
  UnopTree u;
  BinopTree b;
  TriopTree tr;
  FcallTree f;
  EseqTree e;
  PtrTree ptr;
  FieldRefTree fr;
  CastopTree c;
  VarTree v;
  IntConstTree i;
  LongConstTree long_const;
  FpConstTree fp;
  StringLitTree s;
  LabelTree la;
  CaseTree ca;
  DefaultTree d;
  SeqTree se;
  IfElseTree ie;
  SwitchTree sw;
  WhileTree w;
  DoWhileTree dw;
  ForTree fo;
  GotoTree g;
  ContinueTree co;
  BreakTree br;
  ReturnTree r;
  int labNum;

  if ( t == NULL )
  {
     return;
  }

  switch( t->tag ){

     case UNOP_TAG:
              u = ( UnopTree ) t;
              if (typeQuery(u->expn.type) == TYERROR) break;
              if (u->expn.reg) break;
              encode( ( Tree ) u->left );
              encodeUnop(u);
              break;

     case BINOP_TAG:
              b = ( BinopTree ) t;

              if (typeQuery(b->expn.type) == TYERROR) break;
              if (b->expn.reg) break;

              /* logical operators cannot do default evaluation order */
              if (b->op == LAND_OP)
              {
                encodeLand(b);
              }
              else if (b->op == LOR_OP)
              {
                encodeLor(b);
              }
              /* convenient to override default evaluation for comma, too */
              else if (b->op == COMMA_OP)
              {
                encodeComma(b);
              }
              else
              {
                encode( ( Tree ) b->left );
                encode( ( Tree ) b->right );
                encodeBinop(b);
              }
              break;

     case TRIOP_TAG:
              tr = ( TriopTree ) t;
              if (typeQuery(tr->expn.type) == TYERROR) break;
              if (tr->expn.reg) break;
              if (tr->op == COND_OP)
              {
                encodeCond(tr);
              }
              else
              {
                bugT(t, "encode: unknown triop");
              }
              break;

     case FCALL_TAG:
              f = ( FcallTree ) t;
              if (typeQuery(f->expn.type) == TYERROR) break;
              if (f->expn.reg) break;
              encodeFcall(f);
              break;

     case ESEQ_TAG:
              e = ( EseqTree ) t;
              if (e->expn.reg) break;
              encode( ( Tree ) e->left );
              encode( ( Tree ) e->right );
              break;

     case PTR_TAG:
              ptr = ( PtrTree ) t;
              if (typeQuery(ptr->expn.type) == TYERROR) break;
              if (ptr->expn.reg) break;
              encode( ( Tree ) ptr->left );
              encodePtr( ptr );
              break;

     case FIELDREF_TAG:
              fr = ( FieldRefTree ) t;
              if (typeQuery(fr->expn.type) == TYERROR) break;
              if (fr->expn.reg) break;
              encode( ( Tree ) fr->left );
              encodeFieldRef( fr );
              break;

     case CASTOP_TAG:
              c = ( CastopTree ) t;
              if (typeQuery(c->expn.type) == TYERROR) break;
              if (c->expn.reg) break;
              encode( ( Tree ) c->left );
              encodeCast(c);
              break;

     case VAR_TAG:
              v = ( VarTree ) t;
              if (typeQuery(v->expn.type) == TYERROR) break;
              if (v->expn.reg) break;

              /* mark tree as visited */
              v->expn.reg = setReg();

              /* global variable, local extern, or function */
              if ((v->block == 0) || (v->class == EXTERN_SC) ||
                  (typeQuery(v->expn.type) == TYFUNCTION))
              {
                emit("\tpushq\t$%s", st_get_id_str(v->name));
              }
              /* local variable or parameter */
              else
              {
                /* for static locals use a munged name */
                if (v->class == STATIC_SC)
                {
                  emit("\tpushq\t$%s$L%u", st_get_id_str(v->name),
                    v->staticLabel);
                }
                /*
                 *  local/parameter is on the stack
                 *
                 *  Parameters are at positive offsets from the ebp
                 *  while locals are at negative offsets from the ebp.
                 *
                 *  Add 16 to the parameter offset in order to skip over
                 *  the old ebp and the old eip.
                 *
                 *  Locals are assigned spots starting at the bottom of
                 *  of the locals area and working back toward 0(%ebp).
                 */
                else
                {
                  switch (v->stdrTag) {

                  case PDECL:
                    emit("\tleaq\t%d(%%rbp), %%rax", v->offset + 16);
                    break;

                  case LDECL:
                    emit("\tleaq\t%d(%%rbp), %%rax",
                      -(sizeOfLocalsArea-(v->offset)));
                    break;

                  default:
                    bugT(t, "encode: unknown stdr tag for VAR");

                  }

                  /* do not use emit_push_reg() since the stack depth is
                   * kept track of already for VAR_TAGs */
                  emit("\tpushq\t%%rax");
                }
              }
              encodeStackDepth += 8;
              break;

     case INT_CONST_TAG:
     case UINT_CONST_TAG:

              i = (IntConstTree) t;
              if (i->expn.reg) break;

              /* mark tree as visited */
              i->expn.reg = setReg();

              emit("\tpushq\t$%d", i->value);
              encodeStackDepth += 8;
              break;

     case LONG_CONST_TAG:
     case ULONG_CONST_TAG:

              long_const = (LongConstTree) t;
              if (long_const->expn.reg) break;

              /* mark tree as visited */
              long_const->expn.reg = setReg();

              /* 
               *  EM64T does not support 64-bit immediate values
               *  and can only push quadword. So need to do two
               *  32-bit moves.
               */
              emit("\tsubq\t$8, %%rsp");
              emit("\tmovl\t$%d, (%%rsp)", (int) long_const->value);
              emit("\tmovl\t$%d, 4(%%rsp)", (int) (long_const->value >> 32));
              encodeStackDepth += 8;
              break;

     case FP_CONST_TAG:
              fp = ( FpConstTree ) t;
              if (fp->expn.reg) break;

              /* mark tree as visited */
              fp->expn.reg = setReg();

              {
                int lab = genLabel();
                int size = computeSize(fp->expn.type);
                char c = determineIntelSuffix(fp->expn.type);
                char *typeStr;

                switch(typeQuery(fp->expn.type)) {

                case TYFLOAT:
                  typeStr = "float";
                  break;

                case TYDOUBLE:
                  typeStr = "double";
                  break;

                case TYLONGDOUBLE:
                  typeStr = "tfloat";
                  break;

                default:
                  bugT(t, "encode: unexpected type in FP_CONST_TAG");
                }

                /* put constant value in memory in data section */
                emit("\t.data");
                emit("FPC%d:\t.%s %s", lab, typeStr, fp->value);
                emit("\t.text");

                /* load to fp register */
                emit("\tfld%c\tFPC%d", c, lab);

                /* alloc spot on top of stack */
                emit_stack_grow(size < TARCH_SIZEOF_STACKWORD ?
                     TARCH_SIZEOF_STACKWORD : size);

                /* put value on top of stack (and pop fp register stack) */
                emit("\tfstp%c\t(%%rsp)", c);

              }
              break;

     case STRING_LIT_TAG:
              s = ( StringLitTree ) t;
              if (s->expn.reg) break;

              /* mark tree as visited */
              s->expn.reg = setReg();

              emit("\t.data");
              labNum = genLabel();
              emit("L$%d:\t.asciz\t%s", labNum, s->string);
              emit("\t.text");
              emit("\tpushq\t$L$%d", labNum);
              encodeStackDepth += 8;
              break;

     case LABEL_TAG:
              la = ( LabelTree ) t;
              encodeLabel(la);
              break;

     case CASE_TAG:
              ca = ( CaseTree ) t;
              encodeCase(ca);
              break;

     case DEFAULT_TAG:
              d = ( DefaultTree ) t;
              encodeDefault(d);
              break;

     case SEQ_TAG:
              se = ( SeqTree ) t;
              encode( se->left );
              freeExpressionStatementRegister( se->left );
              encode( se->right );
              freeExpressionStatementRegister( se->right );
              break;

     case IFELSE_TAG:
              ie = ( IfElseTree ) t;
              encodeIfElse(ie);
              break;

     case SWITCH_TAG:
              sw = ( SwitchTree ) t;
              encodeSwitch(sw);
              break;

     case WHILE_TAG:
              w = ( WhileTree ) t;
              encodeWhile(w);
              break;

     case DOWHILE_TAG:
              dw = ( DoWhileTree ) t;
              encodeDoWhile(dw);
              break;

     case FOR_TAG:
              fo = ( ForTree ) t;
              encodeFor( fo );
              break;

     case GOTO_TAG:
              g = ( GotoTree ) t;
              encodeGoto(g);
              break;

     case CONTINUE_TAG:
              co = ( ContinueTree ) t;
              encodeContinue(co);
              break;

     case BREAK_TAG:
              br = ( BreakTree ) t;
              encodeBreak(br);
              break;

     case RETURN_TAG:
              r = ( ReturnTree ) t;
              encode( ( Tree ) r->expr );
              encodeReturn(r);
              break;

     case ERROR_TAG:
              break;

     default:
            bugT(t,  "Invalid tree tag in encode()" );

  }

  return;
}

/*
 * Computes the size of the IA-32 argument area on the stack.
 *
 */
unsigned int paramListSize(ParamList params)
{
  unsigned int size;
  ParamList p;

  size = 0;

  if (params == NULL)
    return 0;

  for (p = params; p; p = p->next) {
    switch (typeQuery(p->type)) {
    case TYSIGNEDSHORTINT:
    case TYSIGNEDINT:
    case TYUNSIGNEDSHORTINT:
    case TYUNSIGNEDINT:
    case TYUNSIGNEDCHAR:
    case TYSIGNEDCHAR:
    case TYSIGNEDLONGINT:
    case TYUNSIGNEDLONGINT:
    case TYPOINTER:
      size += 8;
      break;
    case TYFLOAT:
    case TYDOUBLE:
      size += 8;
      break;
    case TYLONGDOUBLE:
      size += 16;
      break;
    case TYSTRUCT:
    case TYUNION:
      size +=  computeSize(p->type);
      break;
    case TYVOID:
      return 0;
    case TYERROR:
      bug("error type in paramListSize");
    case TYARRAY:
      bug("array type in paramListSize");
    case TYFUNCTION:
      bug("function type in paramListSize");
    default:
      bug("unknown type in paramListSize");
    }
  }

  return size;
}

/*
 *  encodeFunctionEntry: generates code for function entry.
 *
 *  We don't use the savedRegsSize argument.
 *
 *  argBuildSize is used to pass the size of the parameters for this
 *  function.  We use this to allocate the space to move the parameters
 *  from registers and the EM64T frame onto the stack. That is, it is
 *  size of the IA-32 argument area on the stack.
 *
 *  The isStatic parameter tracks whether the function was declared
 *  with "static" storage class.
 *
 */
void encodeFunctionEntry(ST_ID name, int localsSize, int savedRegsSize,
                         int argBuildSize, BOOLEAN isStatic)
{
  int frameSize;

  /* stack depth at the beginning of a function is *always* zero */
  encodeStackDepth = 0;

  /* quiet warnings */
  savedRegsSize = 0;

  argBuildSize = roundEven16(argBuildSize);

  frameSize = roundEven16(localsSize);

  /* need to remember this in order to generate code for LDECLS later */
  sizeOfLocalsArea = frameSize;

  emit("\t.text");
  if (!isStatic)
  {
    emit("\t.globl\t%s", st_get_id_str(name));
  }
  emit("%s:", st_get_id_str(name));

  /*
   * Allocate space on the stack for the IA-32-like parameter list and
   * a non-16-byte aligned pointer that is used for a fake return
   * value.
   *
   */
  if (argBuildSize > 0) {

    /*
     * An extra 16 bytes are pushed on to the stack, there are 8 bytes
     * on either side of the IA-32-ish param list for alignment reasons
     * described in the beautiful art below:
     *
     * | EM64T  param    |
     * | EM64T  param    | <-- 16 byte aligned
     * [ return value    ]
     * [ first 8 bytes   ] <-- 16 byte aligned
     * | IA-32-ish param |
     * | IA-32-ish param |
     * | IA-32-ish param |
     * | IA-32-ish param | <-- 16 byte aligned
     * [ second 8 bytes  ] <-- fake return value (not aligned)
     * [ base pointer    ] <-- 16 byte aligned (YAY!)
     */
    emit_stack_grow(argBuildSize + 16);
  }

  /* the function is 16-byte aligned *after* pushing %rbp since the
   * return value makes it only 8-byte aligned... so don't keep track of
   * this in our encodeStackDepth */
  emit("\tpushq\t%%rbp");

  emit("\tmovq\t%%rsp, %%rbp");

  if (frameSize != 0)
    emit_stack_grow(frameSize);
}

/*
 *  encodeFunctionExit: generates code for function exit.
 *
 *  We don't use the savedRegsSize argument.
 *
 *  The isStatic parameter tracks whether the function was declared
 *  with "static" storage class.
 *
 */
void encodeFunctionExit(ST_ID name, int localsSize, int savedRegsSize,
                        int argBuildSize)
{
  int frameSize;

  /* quiet warnings */
  savedRegsSize = 0;

  frameSize = roundEven16(localsSize);

  emit("%s$exit:", st_get_id_str(name));
  if (frameSize != 0)
    emit_stack_shrink(frameSize);

  /* the function is 16-byte aligned *after* pushing %rbp since the
   * return value makes it only 8-byte aligned... so don't keep track of
   * this in our encodeStackDepth */
  emit("\tpopq\t%%rbp");

  if (argBuildSize > 0)
    emit_stack_shrink(roundEven16(argBuildSize) + 16);

  emit("\tret");
  emit("\t.data");

  if (encodeStackDepth != 0)
  {
    bug("encodeStackDepth=%d not zero in encodeFunctionExit",
        encodeStackDepth);
  }

}

/*
 *  encodeConvert
 *
 *  This routine needs to deal with the fact that we store integer types as
 *  either 32 bits or 64 bits on the stack.
 *
 *  Still the conversion possibilities are greatly restricted. The
 *  five possibilities are then 32-bit integer, 64-bit integer, 32-bit f.p.,
 *  64-bit f.p.  and 80-bit f.p.
 *
 *  I believe this code still basically uses the IA-32 approach to conversion
 *  when possible.
 *
 *  The complication is that, when converting from f.p. type to integer type,
 *  we need to manipulate the f.p. control word in order to set the rounding
 *  mode to "truncate" otherwise the "convert to integer" instruction will
 *  round, rather than truncate.
 *
 *  Also problem when going from unsigned integer types to f.p. types. Need
 *  to load "long long" into the f.p. register in order to make sure the
 *  sign bit is clear.
 *
 */
static void encodeConvert(UnopTree u)
{
  TypeTag destType;
  TypeTag sourceType;
  int source_size;
  int dest_size;

  /* first some structural sanity checks */
  if (!u)
  {
    bug("null pointer passed to encodeConvert");
  }
  if (!u->left)
  {
    bug("null child pointer passed to encodeConvert");
  }

  /* mark tree as visited */
  u->expn.reg = setReg();

  destType = typeQuery(u->expn.type);

  /* if error, do nothing */
  if (destType == TYERROR) return;

  /* if destination type is void, just pop the stack */
  if (destType == TYVOID)
  {
    freeExpressionStatementRegister((Tree)(u->left));
    return;
  }

  /* weed out bug cases */
  switch (destType) {
  case TYFLOAT:
  case TYDOUBLE:
  case TYLONGDOUBLE:
  case TYSIGNEDLONGINT:
  case TYSIGNEDSHORTINT:
  case TYSIGNEDINT:
  case TYUNSIGNEDLONGINT:
  case TYUNSIGNEDSHORTINT:
  case TYUNSIGNEDINT:
  case TYUNSIGNEDCHAR:
  case TYSIGNEDCHAR:
    break;

  case TYPOINTER:
    /* nothing to be done for pointer conversions */
    return;
  case TYVOID:
    bugT((Tree) u, "void destination type mishandled in encodeConvert()");
    break;
  case TYERROR:
    bugT((Tree) u, "error type seen in encodeConvert()");
    break;
  case TYSTRUCT:
    bugT((Tree) u, "struct type seen in encodeConvert()");
    break;
  case TYUNION:
    bugT((Tree) u, "union type seen in encodeConvert()");
    break;
  case TYARRAY:
    bugT((Tree) u, "array type seen in encodeConvert()");
    break;
  case TYFUNCTION:
    bugT((Tree) u, "function type seen in encodeConvert()");
    break;
  default:
    bugT((Tree) u, "unknown type seen in encodeConvert()");
  }

  sourceType = typeQuery(u->left->type);

  source_size = computeSize(u->left->type);
  if (source_size < TARCH_SIZEOF_STACKWORD)
    source_size = TARCH_SIZEOF_STACKWORD;

  dest_size = computeSize(u->expn.type);
  if (dest_size < TARCH_SIZEOF_STACKWORD)
    dest_size = TARCH_SIZEOF_STACKWORD;

  switch (sourceType) {
  case TYFLOAT:
  case TYDOUBLE:
  case TYLONGDOUBLE:

    switch (destType) {

      case TYFLOAT:
      case TYDOUBLE:
      case TYLONGDOUBLE:

        if (sourceType != destType)
        {
          /* load value to be converted into fp reg */
          emit("\tfld%c\t(%%rsp)", determineIntelSuffix(u->left->type));

          /* free top of stack */
          emit_stack_shrink(source_size);

          /* alloc space on top of stack for converted value */
          emit_stack_grow(dest_size);

          /* put value on top of stack (and pop fp register stack) */
          emit("\tfstp%c\t(%%rsp)", determineIntelSuffix(u->expn.type));
        }
        return;

      default:

        /* dest is some integer type */

        /* load value to be converted into fp reg */
        emit("\tfld%c\t(%%rsp)", determineIntelSuffix(u->left->type));

        /* free top of stack */
        emit_stack_shrink(source_size);

        /* set rounding mode to be "truncate" */
        /* TODO: this code may not be the same in EM64T -- eaburns */
        emit("\tsubq\t$4, %%rsp");     /* alloc two words on top of stack */
        emit("\tfnstcw\t(%%rsp)");     /* store fp control word in first word */
        emit("\tmovw\t(%%rsp), %%ax"); /* put control word in %ax */
        emit("\tmovb\t$12, %%ah");     /* set RC bits to "truncate" */
        emit("\tmovw\t%%ax, 2(%%rsp)");/* put modified cntrl word in 2nd slot */
        emit("\tfldcw\t2(%%rsp)");     /* load modified control word */
        emit("\tmovw\t(%%rsp), %%ax"); /* put old control word in %ax */
        emit("\taddq\t$4, %%rsp");     /* free the two words */

        /* alloc space for integer result on top of stack */
        emit_stack_grow(dest_size);
        /* convert value to signed long */
        emit("\tfistpq\t(%%rsp)");

        /* re-establish old fp control word */
        emit("\tsubq\t$2, %%rsp");      /* alloc one word on top of stack */
        emit("\tmovw\t%%ax, (%%rsp)");  /* put original cntrl word on stack */
        emit("\tfldcw\t(%%rsp)");       /* re-load original control word */
        emit("\taddq\t$2, %%rsp");      /* free word on top of stack */

        /* unsigned destination types need the upper bytes cleared */
        if (destType == TYUNSIGNEDINT) {
          emit("\tmovl\t$0, 4(%%rsp)");
        } else if (destType == TYUNSIGNEDSHORTINT) {
          emit("\tmovl\t$0, 4(%%rsp)");
          emit("\tmovw\t$0, 2(%%rsp)");
        } else if (destType == TYUNSIGNEDCHAR) {
          emit("\tmovl\t$0, 4(%%rsp)");
          emit("\tmovw\t$0, 2(%%rsp)");
          emit("\tmovb\t$0, 1(%%rsp)");
        }

        /* all done */
        return;
    }

  case TYSIGNEDSHORTINT:
  case TYSIGNEDINT:
  case TYUNSIGNEDSHORTINT:
  case TYUNSIGNEDINT:
  case TYUNSIGNEDCHAR:
  case TYSIGNEDCHAR:
    switch (destType) {

      case TYFLOAT:
      case TYDOUBLE:
      case TYLONGDOUBLE:

        /* load the value into a fp reg */
        emit("\tfildl\t(%%rsp)");

        /* free the top of stack */
        emit_stack_shrink(source_size);

        /* alloc space on top of stack for converted value */
        emit_stack_grow(dest_size);

        /* put value on top of stack (and pop fp register stack) */
        emit("\tfstp%c\t(%%rsp)", determineIntelSuffix(u->expn.type));

        /* all done */
        return;

      case TYSIGNEDLONGINT:
      case TYUNSIGNEDLONGINT:
        emit_pop_reg("rax");
        /* sign extend shorter values */
        if (sourceType == TYSIGNEDCHAR)
          emit("\tmovsbq\t%%al, %%rax");
        else if (sourceType == TYSIGNEDSHORTINT)
          emit("\tmovswq\t%%ax, %%rax");
        else if (sourceType == TYSIGNEDINT)
          emit("\tmovslq\t%%eax, %%rax");
        emit_push_reg("rax");
        return;
      case TYUNSIGNEDSHORTINT:
        /* clear upper 48-bits of the short */
        emit("\tmovw\t$0, 4(%%rsp)");
        emit("\tmovw\t$0, 2(%%rsp)");
        return;
      case TYUNSIGNEDCHAR:
        /* clear upper 56-bits of the short */
        emit("\tmovw\t$0, 4(%%rsp)");
        emit("\tmovw\t$0, 2(%%rsp)");
        emit("\tmovb\t$0, 1(%%rsp)");
        return;
      default:
        return;
    }
  case TYSIGNEDLONGINT:
  case TYUNSIGNEDLONGINT:
    switch (destType) {

      case TYFLOAT:
      case TYDOUBLE:
      case TYLONGDOUBLE:

        /* load the zero-extended value to a fp reg */
        emit("\tfildq\t(%%rsp)");

        /* free the top of stack */
        emit_stack_shrink(source_size);

        /* alloc space on top of stack for converted value */
        emit_stack_grow(dest_size);

        /* put value on top of stack (and pop fp register stack) */
        emit("\tfstp%c\t(%%rsp)", determineIntelSuffix(u->expn.type));

        /* all done */
        return;

      case TYUNSIGNEDINT:
        /* clear upper 32-bits of the quadword */
        emit("\tmovl\t$0, 4(%%rsp)");
      case TYUNSIGNEDSHORTINT:
        /* clear upper 48-bits of the quadword */
        emit("\tmovl\t$0, 4(%%rsp)");
        emit("\tmovw\t$0, 2(%%rsp)");
        return;
      case TYUNSIGNEDCHAR:
        /* clear upper 56-bits of the quadword */
        emit("\tmovl\t$0, 4(%%rsp)");
        emit("\tmovw\t$0, 2(%%rsp)");
        emit("\tmovb\t$0, 1(%%rsp)");
        return;
      default:
        return;
    }
  case TYPOINTER:
    /* nothing to be done but should never reach here */
    return;
  case TYVOID:
    bugT((Tree) u, "void source type seen in encodeConvert()");
    break;
  case TYERROR:
    bugT((Tree) u, "error type seen in encodeConvert()");
    break;
  case TYSTRUCT:
    bugT((Tree) u, "struct type seen in encodeConvert()");
    break;
  case TYUNION:
    bugT((Tree) u, "union type seen in encodeConvert()");
    break;
  case TYARRAY:
    bugT((Tree) u, "array type seen in encodeConvert()");
    break;
  case TYFUNCTION:
    bugT((Tree) u, "function type seen in encodeConvert()");
    break;
  default:
    bugT((Tree) u, "unknown type seen in encodeConvert()");
  }
}

/*
 *  Arrays and functions are always passed by their base address.
 *
 *  Also all parameters are at least 8 bytes in size. This is
 *  because we always keep the run-time stack aligned on 8 byte boundaries.
 */
int computeParamSize(Type t)
{
  switch (typeQuery(t)) {

  case TYARRAY:
  case TYFUNCTION:
    return TARCH_SIZEOF_POINTER;

  default:
  {
    int tmp;
    tmp = computeSize(t);
    if (tmp < TARCH_SIZEOF_STACKWORD) tmp = TARCH_SIZEOF_STACKWORD;
    return tmp;
  }

  }
  return 0; /* NOTREACHED */
}

/*
 *  Given address of stuct/union on top of stack, replace address
 *  with a copy of the struct/union.
 */
static void copyStructUnionToTopOfStack(int size)
{
  emit_pop_reg("rax");
  emit_stack_grow(size);             /* alloc space for struct/union */
  emit("\tmovq\t%%rsp, %%rdi");      /* remember where to copy to */
  emit("\tmovq\t$%d, %%rdx", size);  /* pass length of struct/union */
  emit("\tmovq\t%%rax, %%rsi");      /* pass base address */
  emit("\tcall\tmemcpy");            /* call memcpy */
}

/*
 *  Recursive function to traverse and encode FCALL parameter list in reverse
 *  order.
 *
 *  Need to track the amount of stack allocated in order to pop the
 *  arguments after the call. And to properly do stack alignment.
 *  Need to worry about alignment because stack must be on 16-byte boundary
 *  boundary when fcall is issued.
 *
 *  The second parameter is the function return type. If the return type
 *  is a struct/union then the size of the first argument is 8: a pointer
 *  to where to put the return value.
 *
 *  Special care must be taken with struct args. Encoding arg will only
 *  produce the base address on top of the stack. I must pop the base
 *  address and copy the struct on to the top of stack.
 *
 */
static int encodeFcallHelper(ExpTree e, Type retType, BOOLEAN isRoot)
{
  int ret;
  int tmp;

  if (((Tree) e) -> tag == ESEQ_TAG) {
    ret = encodeFcallHelper(((EseqTree)e) -> right, retType, FALSE);

    if (isRoot && isStructUnionType(retType)) {
        /*
         * I believe that we are reserving a pointer on the stack to use
         * for the destination address for a struct|union that is
         * returned by this function?
         * -- eaburns
         *
         * Yes, I think that is true. But the tricky part is that there
         * is a dummy argument for the return value inserted first in
         * the list of arguments. -- pjh
         */
        ret += TARCH_SIZEOF_POINTER;
        encode((Tree)(((EseqTree)e) -> left));

    } else {
      tmp = computeParamSize(((EseqTree)e) -> left -> type);
      ret += tmp < TARCH_SIZEOF_STACKWORD ? TARCH_SIZEOF_STACKWORD : tmp;

      encode((Tree)(((EseqTree)e) -> left));

      if (isStructUnion(((EseqTree)e) -> left))
        copyStructUnionToTopOfStack(computeSize(((EseqTree)e)->left->type));
    }

  } else { /* not a sequence node */
    if (isRoot && isStructUnionType(retType))
    {
        ret = TARCH_SIZEOF_POINTER;
        encode((Tree) e);
    } else {
      tmp = computeParamSize(e -> type);
      ret = tmp < TARCH_SIZEOF_STACKWORD ? TARCH_SIZEOF_STACKWORD : tmp;
      encode((Tree) e);
      if (isStructUnion(e))
        copyStructUnionToTopOfStack(computeSize(e->type));
    }
  }

  return ret;
}

/*
 * This routine is called once for each argument in order. It decides
 * if an argument will be passed in a register or on the stack. It
 * is passed the type of the argument, plus counts for how many
 * integer and floating point arguments have been used by prior
 * arguments. These counts are updated by this routine if a register
 * is used by the current argument.
 */
static int analyzeOneArg(Type t, int* intRegCnt, int* floatRegCnt)
{
    switch (typeQuery(t))
    {
    case TYSIGNEDSHORTINT:
    case TYSIGNEDINT:
    case TYUNSIGNEDSHORTINT:
    case TYUNSIGNEDINT:
    case TYUNSIGNEDCHAR:
    case TYSIGNEDCHAR:
    case TYSIGNEDLONGINT:
    case TYUNSIGNEDLONGINT:
    case TYPOINTER:
      *intRegCnt += 1;
      if (*intRegCnt > 6)
      {
        return 8;
      }
      else
      {
        return 0;
      }

    case TYFLOAT:
    case TYDOUBLE:
      *floatRegCnt += 1;
      if (*floatRegCnt > 6)
      {
        return 8;
      }
      else
      {
        return 0;
      }

    case TYLONGDOUBLE:
      return 16;

    case TYSTRUCT:
    case TYUNION:
      /*
       * pjh: This is an approximation to what should be done as it
       *      simply passes small structs using only integer registers.
       *      This should handle the common cases, however.
       */
      {
        int size = computeSize(t);

        if (size % 8 != 0)
        {
          bug("struct size not an even multiple of 8");
        }

        if (size == 16)
        {
          /* both quadwords must fit in registers or struct is passed in mem */
          if (*intRegCnt > 4)
          {
            return 16;
          }
          else
          {
            *intRegCnt += 2;
            return 0;
          }
        }
        else if (size == 8)
        {
          *intRegCnt += 1;
          if (*intRegCnt > 6)
          {
            return 8;
          }
          else
          {
            return 0;
          }
        }
        else
        {
          return size;
        }
      }

    case TYERROR:
      bug("error type in analyzeOneArg");
      break;

    case TYVOID:
      bug("void type in analyzeOneArg");

    case TYARRAY:
      bug("array type in analyzeOneArg");

    case TYFUNCTION:
      bug("function type in analyzeOneArg");

    default:
      bug("unknown type in analyzeOneArg");
    }
  return 0; /* not reached */
}

/*
 * Calculate how many bytes will be passed on the stack for an argument list.
 */
static int bytesPassedViaStack(ExpTree e, Type retType)
{
  int ret;
  int intRegCnt;
  int floatRegCnt;

  ret = 0;

  intRegCnt = 0;
  floatRegCnt = 0;

  /* more than one argument */
  if (((Tree) e) -> tag == ESEQ_TAG) {

    ExpTree cur;

    cur = e;

    /* first arg might be a pointer to support returning structs */
    if (isStructUnionType(retType) && computeSize(retType) > 16) {
      intRegCnt += 1;
      cur = ((EseqTree)cur) -> right; /* skip dummy argument for return val */
    }

    while (((Tree) cur) -> tag == ESEQ_TAG) {
      ret += analyzeOneArg(((EseqTree)cur)->left->type, &intRegCnt,
                           &floatRegCnt);
      emit("# aOA %d %d %d", intRegCnt, floatRegCnt, ret);
      cur = ((EseqTree)cur) -> right;
    }

    ret += analyzeOneArg(cur->type, &intRegCnt, &floatRegCnt);
    emit("# aOA %d %d %d", intRegCnt, floatRegCnt, ret);

  } else { /* only one argument */

    /* first arg might be a pointer to support returning structs */
    if (isStructUnionType(retType) && computeSize(retType) > 16) {
      intRegCnt += 1;
    }
    else
    {
      ret += analyzeOneArg(e->type, &intRegCnt, &floatRegCnt);
    }
  }

  return ret;
}

/*
 * This routine processes the argument list one more time in order
 * to generate code to place each argument in its proper place.
 *
 * Moves from the IA-32-ish calling convention to the EM64T convention.
 */
static void moveOneArg(Type t, int* intRegCnt, int* floatRegCnt,
                       int* ia32Offset, int* em64tOffset)
{
  int size;

  emit("# moveoneArg called (%d, %d, %d, %d)", *intRegCnt, *floatRegCnt,
    *ia32Offset, *em64tOffset);

  switch (typeQuery(t))
  {
  case TYSIGNEDSHORTINT:
  case TYSIGNEDINT:
  case TYUNSIGNEDSHORTINT:
  case TYUNSIGNEDINT:
  case TYUNSIGNEDCHAR:
  case TYSIGNEDCHAR:
  case TYSIGNEDLONGINT:
  case TYUNSIGNEDLONGINT:
  case TYPOINTER:
    *intRegCnt += 1;
    if (*intRegCnt > TARCH_INT_REGCLASS_SIZE)
    {
      emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
      emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
      *ia32Offset += 8;
      *em64tOffset += 8;
    }
    else
    {
      emit("\tmovq\t%d(%%rsp), %s", *ia32Offset, int_reg_arg(*intRegCnt));
      *ia32Offset += 8;
    }
    return;

  case TYFLOAT:
    *floatRegCnt += 1;
    if (*floatRegCnt > TARCH_SSE_REGCLASS_SIZE)
    {
      emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
      emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
      *ia32Offset += 8;
      *em64tOffset += 8;
    }
    else
    {
      emit("\tmovss\t%d(%%rsp), %s", *ia32Offset, float_reg_arg(*floatRegCnt));
      *ia32Offset += 8;
    }
    return;

  case TYDOUBLE:
    *floatRegCnt += 1;
    if (*floatRegCnt > TARCH_SSE_REGCLASS_SIZE)
    {
      emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
      emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
      *ia32Offset += 8;
      *em64tOffset += 8;
    }
    else
    {
      emit("\tmovsd\t%d(%%rsp), %s", *ia32Offset, float_reg_arg(*floatRegCnt));
      *ia32Offset += 8;
    }
    return;

  case TYLONGDOUBLE:
    /* first need to worry about alignment in the IA-32 param area */
    while (*em64tOffset % 16) (*em64tOffset)++;
    /* move first half */
    emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
    emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
    /* move second half */
    emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset+8);
    emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset+8);
    *ia32Offset += 16;
    *em64tOffset += 16;
    return;

  case TYSTRUCT:
  case TYUNION:
    /*
     * pjh: This is an approximation to what should be done as it
     *      simply passes small structs using only integer registers.
     *      This should handle the common cases, however.
     */

    /* first need to worry about alignment in the IA-32 param area */
    while (*em64tOffset % 16) (*em64tOffset)++;

    size = computeSize(t);
    if (size % 8 != 0)
    {
      bug("struct/union size not even multiple of 8 in moveOneArg!\n");
    }
    if (size == 16)
    {
      if (*intRegCnt + 2 > TARCH_INT_REGCLASS_SIZE)
      {
        emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
        emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
        *ia32Offset += 8;
        *em64tOffset += 8;
        emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
        emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
        *ia32Offset += 8;
        *em64tOffset += 8;
      }
      else
      {
        *intRegCnt += 1;
        emit("\tmovq\t%d(%%rsp), %s", *ia32Offset, int_reg_arg(*intRegCnt));
        *ia32Offset += 8;
        *intRegCnt += 1;
        emit("\tmovq\t%d(%%rsp), %s", *ia32Offset, int_reg_arg(*intRegCnt));
        *ia32Offset += 8;
      }
    }
    else if (size == 8)
    {
      *intRegCnt += 1;
      if (*intRegCnt > TARCH_INT_REGCLASS_SIZE)
      {
        emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
        emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
        *ia32Offset += 8;
        *em64tOffset += 8;
      }
      else
      {
        emit("\tmovq\t%d(%%rsp), %s", *ia32Offset, int_reg_arg(*intRegCnt));
        *ia32Offset += 8;
      }
    }
    else
    {
      while (size > 0)
      {
        emit("\tmovq\t%d(%%rsp), %%r11", *ia32Offset);
        emit("\tmovq\t%%r11, %d(%%rsp)", *em64tOffset);
        *ia32Offset += 8;
        *em64tOffset += 8;
        size -= 8;
      }
    }
    return;

  case TYERROR:
    bug("error type in moveOneArg");

  case TYVOID:
    bug("void type in moveOneArg");

  case TYARRAY:
    bug("array type in moveOneArg");

  case TYFUNCTION:
    bug("function type in moveOneArg");

  default:
    bug("unknown type in moveOneArg");
  }
}

/*
 * Arguments have all been previously loaded on top of stack. Now
 * arguments are moved to registers or to a new area lower on the
 * stack. 
 */
static int moveArguments(ExpTree e, Type retType, int argsSpace,
                         int bytesOnStack, int alignmentIncrement)
{
  int intRegCnt;
  int floatRegCnt;
  int ia32Offset;
  int em64tOffset;

  intRegCnt = 0;
  floatRegCnt = 0;
  ia32Offset = bytesOnStack;
  em64tOffset = 0;

  emit("# moveArguments called (%d, %d)", argsSpace, bytesOnStack);

  /* must skip over the alignment padding when accessing IA-32 args */
  ia32Offset += alignmentIncrement;

  /* first arg might be a pointer to support returning structs */
  if (isStructUnionType(retType) && computeSize(retType) > 16) {
    emit("\tmovq\t%d(%%rsp), %%rdi", ia32Offset);
    intRegCnt += 1;
    ia32Offset += 8;
  }

  if (e != NULL)
  {
    /* more than one argument */
    if (((Tree) e) -> tag == ESEQ_TAG) {
      ExpTree cur;
      cur = e;
      if (isStructUnionType(retType)) { /* skip dummy arg for return val */
        cur = ((EseqTree)cur) -> right;
      }
      while (((Tree) cur) -> tag == ESEQ_TAG) {
        moveOneArg(((EseqTree)cur)->left->type, &intRegCnt,
                             &floatRegCnt, &ia32Offset, &em64tOffset);
        cur = ((EseqTree)cur) -> right;
      }
      moveOneArg(cur->type, &intRegCnt, &floatRegCnt, &ia32Offset,
        &em64tOffset);

    } else { /* only one argument */
      if (!isStructUnionType(retType)) { /* skip dummy arg for return val */
        moveOneArg(e->type, &intRegCnt, &floatRegCnt, &ia32Offset,
                        &em64tOffset);
      }
    }
  }

  return floatRegCnt;
}

/*
 *  Encode a function call.
 *
 *  Just need to process the argument list in reverse order to get
 *  arguments evaluated on top of the stack. Then emit the call instruction.
 *  When function returns need to push the result on top of stack.
 */
static void encodeFcall(FcallTree b)
{
  int argsSpace;
  int bytesOnStack;
  int numberOfSseRegisters;
  int alignmentIncrement;

  argsSpace = 0;
  bytesOnStack = 0;
  numberOfSseRegisters = 0;

  if (b->params != NULL) {

    /* Use recursive function to load arguments on stack in reverse order. */
    argsSpace = encodeFcallHelper(b->params,  b->expn.type, TRUE);

    /* Figure out how many bytes need to be passed on the stack. */
    bytesOnStack = bytesPassedViaStack(b->params, b->expn.type);
  }

  /* What are we calling? */
  encode((Tree) b->func);
  emit_pop_reg("rbx");

  alignmentIncrement = 16 -
    ((encodeStackDepth + bytesOnStack) % 16);

  /* allocate space on the stack for arguments to be placed on stack */
  if ((bytesOnStack + alignmentIncrement) > 0) {
    emit("\t#bytesOnStack=%d, alignmentIncrement=%d\n",
         bytesOnStack, alignmentIncrement);
    emit_stack_grow(bytesOnStack + alignmentIncrement);
  }

  /*
   * Go through the arguments again to move values to registers or to
   * new EM64T argument area lower on stack. This must be called even
   * if there are no arguments in order to get the stack aligned on
   * a 16-byte boundary.
   */
  numberOfSseRegisters = moveArguments(b->params,  b->expn.type,
                                       argsSpace, bytesOnStack,
                                       alignmentIncrement);

  /* Do the call */
  emit("\tmovq\t$%d, %rax", numberOfSseRegisters > TARCH_SSE_REGCLASS_SIZE ?
                            TARCH_SSE_REGCLASS_SIZE : numberOfSseRegisters);
  emit("\tcall\t*%%rbx");

  if (argsSpace + bytesOnStack + alignmentIncrement != 0) {
    emit("\t#argsSpace=%d, bytesOnStack=%d, alignmentIncrement=%d\n",
         argsSpace, bytesOnStack, alignmentIncrement);
    emit_stack_shrink(argsSpace + bytesOnStack + alignmentIncrement);
  }

  /* Mark the tree as visited */
  b->expn.reg = setReg();

  /* If there is a return value, push it onto stack */
  switch (typeQuery(((ExpTree) b)-> type))
  {
    case TYVOID:        /* Do Nothing */
      break;

    case TYFLOAT:

      /* alloc space on top of stack for return value */
      emit_stack_grow(8);

      /* put value on top of stack */
      emit("\tmovss\t%%xmm0, (%%rsp)");

      break;

    case TYDOUBLE:

      /* alloc space on top of stack for return value */
      emit_stack_grow(8);

      /* put value on top of stack */
      emit("\tmovsd\t%%xmm0, (%%rsp)");

      break;

    case TYLONGDOUBLE:

      /* alloc space on top of stack for return value */
      emit_stack_grow(16);

      /* put value on top of stack (and pop fp register stack) */
      emit("\tfstpt\t(%%rsp)");

      break;

    case TYSIGNEDSHORTINT:
    case TYSIGNEDINT:
    case TYUNSIGNEDSHORTINT:
    case TYUNSIGNEDINT:
    case TYUNSIGNEDCHAR:
    case TYSIGNEDCHAR:
    case TYSIGNEDLONGINT:
    case TYUNSIGNEDLONGINT:
    case TYARRAY:
    case TYPOINTER:
      /* push return value (in %rax) onto stack */
      emit_push_reg("rax");
      break;

    case TYSTRUCT:
    case TYUNION:
    {
      int size = computeSize(((ExpTree) b)-> type);

      if (size % 8 != 0)
      {
        bug("struct/union size not even multiple of 8!");
      }

      /*
       * small struct is assumed to be in (%rax, %rdx) which is not always
       * right but close enough for now.
       */
      if (size <= 16)
      {
        /*
         *  Move first quadword to temp for return value, which is always
         *  first in the locals area.
         */
        emit("\tmovq\t%%rax, 8(%%rsp)");

        /* move second quadword, if there is one, to temp */
        if (size > 8)
        {
          emit("\tmovq\t%%rdx, 16(%%rsp)");
        }

        /* now push address of temp onto the stack */
        emit("\tleaq\t8(%%rsp), %rax");
        emit_push_reg("rax");
      }
      else
      {
        /* otherwise address of temp has been returned; just push it */
        emit_push_reg("rax");
      }
    }
    break;

    default:
      bug("unexpected return type");
  }

}

/**
 * \brief Moves a single argument from the em64t calling convention onto
 *        the stack in IA-32-style convention.
 */
static void pushOneArg(Type t, int *int_regs, int *float_regs,
                       int *ia32_offs, int *em64t_offs)
{
  int size;

  switch (typeQuery(t)) {
  case TYSIGNEDSHORTINT:
  case TYSIGNEDINT:
  case TYUNSIGNEDSHORTINT:
  case TYUNSIGNEDINT:
  case TYUNSIGNEDCHAR:
  case TYSIGNEDCHAR:
  case TYSIGNEDLONGINT:
  case TYUNSIGNEDLONGINT:
  case TYPOINTER:
    *int_regs += 1;
    if (*int_regs > TARCH_INT_REGCLASS_SIZE) {
      emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs);
      emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs);
      *em64t_offs += 8;
    } else {
      emit("\tmovq\t%s, %d(%%rbp)", int_reg_arg(*int_regs),
           *ia32_offs);
    }
    *ia32_offs += 8;
    break;
  case TYFLOAT:
  case TYDOUBLE:
    *float_regs += 1;
    if (*float_regs > TARCH_SSE_REGCLASS_SIZE) {
      emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs);
      emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs);
      *em64t_offs += 8;
    } else {
      emit("\tmovs%c\t%s, %d(%%rbp)",
           typeQuery(t) == TYFLOAT ? 's' : 'd',
           float_reg_arg(*float_regs),
           *ia32_offs);
    }
    *ia32_offs += 8;
    break;
  case TYLONGDOUBLE:
    emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs);
    emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs);
    emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs + 8);
    emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs + 8);
    *ia32_offs += 16;
    *em64t_offs += 16;
    break;
  case TYSTRUCT:
  case TYUNION:
    /*
     * pjh: This is an approximation to what should be done as it
     *      simply passes small structs using only integer registers.
     *      This should handle the common cases, however.
     */
    size = computeSize(t);
    if (size % 8 != 0)
    {
      bug("struct/union size not even multiple of 8 in moveOneArg!\n");
    }
    if (size == 16)
    {
      /* whole struct must fit in memory or it gets passed on stack */
      if (*int_regs + 2 > TARCH_INT_REGCLASS_SIZE)
      {
        emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs);
        emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs);
        *ia32_offs += 8;
        *em64t_offs += 8;
        emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs);
        emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs);
        *ia32_offs += 8;
        *em64t_offs += 8;
      }
      else
      {
        *int_regs += 1;
        emit("\tmovq\t%s, %d(%%rbp)", int_reg_arg(*int_regs), *ia32_offs);
        *ia32_offs += 8;
        *int_regs += 1;
        emit("\tmovq\t%s, %d(%%rbp)", int_reg_arg(*int_regs), *ia32_offs);
        *ia32_offs += 8;
      }
    }
    else if (size == 8)
    {
      *int_regs += 1;
      if (*int_regs > TARCH_INT_REGCLASS_SIZE)
      {
        emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs);
        emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs);
        *em64t_offs += 8;
      }
      else
      {
        emit("\tmovq\t%s, %d(%%rbp)", int_reg_arg(*int_regs),
           *ia32_offs);
      }
      *ia32_offs += 8;
    }
    else
    {
      while (size > 0)
      {
        emit("\tmovq\t%d(%%rbp), %%r11", *em64t_offs);
        emit("\tmovq\t%%r11, %d(%%rbp)", *ia32_offs);
        *ia32_offs += 8;
        *em64t_offs += 8;
        size -= 8;
      }
    }
    break;

  case TYERROR:
    bug("error type in pushOneArg");

  case TYVOID:
    bug("void type in pushOneArg");

  case TYARRAY:
    bug("array type in pushOneArg");

  case TYFUNCTION:
    bug("function type in pushOneArg");

  default:
    bug("unknown type in pushOneArg");
  }
}

/*
 *  EM64T uses this to move function parameters to their correct place
 *  in the IA-32-ish parameter block that is allocated below the base
 *  pointer on the stack by encodeFunctionEntry().
 *
 */
void encodeMoveParametersToLocalSlots(ParamList list, Tree t)
{
  int list_size;
  int int_regs;
  int float_regs;
  int ia32_offs;
  int em64t_offs;
  ParamList p;

  /* ignore warning */
  t = t;

  list_size = paramListSize(list);
  if (list_size == 0)
    return;

  int_regs = float_regs = 0;

  /* compute the offset to our EM64T memory arguments.
   * add an extra 32 bytes to take into account the re-alignment bytes
   * put on the stack before our IA-32-ish parameter block (see
   * encodeFunctionEntry()), the fake return value slot, the real return
   * slot and the base pointer.
   */
  em64t_offs = roundEven16(list_size) + 32;
  /* 16 byte starting IA-32 offset (to hope over the return value, and
   * base pointer). */
  ia32_offs = 16;

  for (p = list; p; p = p->next)
    pushOneArg(p->type, &int_regs, &float_regs, &ia32_offs, &em64t_offs);
}

/*
 *  struct/union done via memcpy.
 */
static void encodeStructUnionAssign(BinopTree b)
{
  int structUnionSize;

  structUnionSize = computeSize( b->expn.type );

  /*
   *  The stack now contains lhs (dest) and rhs (src) with rhs on top.
   *  I need to have len, rhs (src) and lhs (dest) with dest on top.
   *
   *  Tricky: I need to leave the src address on top of the stack as
   *  the result of the assignment.
   */
  emit_pop_reg("rsi");                 /* pop rhs (src)  */
  emit_pop_reg("rdi");                 /* pop lhs (dest) */
  emit_push_reg("rsi");
  emit("\tmovq\t$%d, %%rdx", structUnionSize); /* push len       */

  /* now call memcpy */
  emit( "\tcall\tmemcpy" );

  /* mark the tree as visited by the code generator */
  b->expn.reg = setReg();
}

static void encodeAssign(BinopTree b)
{
  /* special handling needed for structs and unions */
  if (isStructUnionType(b->expn.type))
  {
    encodeStructUnionAssign(b);
  }
  else
  {
    encodeAssignDetail(b->expn.type, (Tree)b);
  }
}

static void encodeReturn(ReturnTree r)
{
  if ( r->expr == NULL )
  {
     emit( "\tjmp\t%s$exit", st_get_id_str( gCurrentFunctionName ) );
  }
  else
  {
     switch (typeQuery(r->expr->type))
     {
     case TYSIGNEDSHORTINT:
     case TYSIGNEDINT:
     case TYUNSIGNEDSHORTINT:
     case TYUNSIGNEDINT:
     case TYUNSIGNEDCHAR:
     case TYSIGNEDCHAR:
     case TYSIGNEDLONGINT:
     case TYUNSIGNEDLONGINT:
     case TYPOINTER:
        emit_pop_reg("rax");
        break;

     case TYFLOAT:
        emit("\tmovss\t(%%rsp),%%xmm0");
        emit_stack_shrink(8);
        break;

     case TYDOUBLE:
        emit("\tmovsd\t(%%rsp),%%xmm0");
        emit_stack_shrink(8);
        break;

     case TYLONGDOUBLE:
        emit("\tfldt\t(%%rsp)");
        emit_stack_shrink(16);
        break;

     case TYSTRUCT:
     case TYUNION:
       {
        /*
         * memcpy the struct content to where the extra argument points
         * unless it is a small struct in which case it is returned in rax
         * and rdx. this is not exactly right, as some small structs should
         * be returned in f.p. registers, but this is good enough for now.
         */

        int size = computeSize(r->expr->type);

        if (size % 8 != 0)
        {
          bug("struct/union size not even multiple of 8!");
        }

        if (size <= 16)
        {
          /* source address is now on top of the stack: pop it into %rsi */
          emit_pop_reg("rsi");

          /* get first quadword into %rax */
          emit("\tmovq\t(%%rsi), %%rax");

          /* get second quadword, if there is one, into %rdx */
          if (size > 8) 
          {
            emit("\tmovq\t8(%%rsi), %%rdx");
          }
        }
        else
        {
          /* source address is now on top of the stack: pop it into %rsi */
          emit_pop_reg("rsi"); /* passes the source address */

          /* push the struct size */
          emit("\tmovq\t$%d, %%rdx", size);

          /* push destination address, which is in first locals slot */
          emit("\tmovq\t8(%%rbp), %%rdi");

          /* do memory copy*/
          emit("\tcall\tmemcpy");

          /* return the struct address */
          emit("\tmovq\t8(%%rbp), %%rax");
        }
       }
       break;

     case TYERROR:
       break; /* should never get here? */

     case TYVOID:
       bugT((Tree) r, "void type in encodeReturn");

     case TYARRAY:
       bugT((Tree) r, "array type in encodeReturn");

     case TYFUNCTION:
       bugT((Tree) r, "function type in encodeReturn");

     default:
       bugT((Tree) r, "unknown type in encodeReturn");
     }

     emit( "\tjmp\t%s$exit", st_get_id_str( gCurrentFunctionName ) );

  }

}

/*
 *  We are not using registers but we need to pop the stack.
 */
void freeExpressionStatementRegister(Tree t)
{
  if (!isExpTree(t)) return;

  switch(typeQuery(((ExpTree)t)->type)) {

  case TYFLOAT:
  case TYSIGNEDSHORTINT:
  case TYSIGNEDINT:
  case TYUNSIGNEDSHORTINT:
  case TYUNSIGNEDINT:
  case TYUNSIGNEDCHAR:
  case TYSIGNEDCHAR:
  case TYSIGNEDLONGINT:
  case TYUNSIGNEDLONGINT:
  case TYDOUBLE:
  case TYPOINTER:
  case TYSTRUCT:
  case TYUNION:
    emit_stack_shrink(8);
    break;

  case TYLONGDOUBLE:
    emit_stack_shrink(16);
    break;

  case TYERROR:
  case TYVOID:
    break;

  case TYARRAY:
  case TYFUNCTION:
    bugT(t, "unexpected type in freeExpressionStatementRegister");

  default:
    bugT(t, "unknown type in freeExpressionStatementRegister");
  }
}

/*
 *  Need to adjust running offset as shift from params to locals?
 *
 *  On IA-32 I want to reset the offset to zero since params are positive
 *  offsets from %ebp and locals are negative offsets from %ebp.
 */
int adjustFunctionOffset(int currentOffset)
{
  /* quiet warning */
  currentOffset = 0;

  return 0;
}
/* vi: set ts=2 expandtab: */
