737 lines
32 KiB
C++
737 lines
32 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
//
|
|
// x86Float.cpp - Floating-point code-generation for x86 processors
|
|
//
|
|
|
|
#include "x86Float.h"
|
|
#include "x86Emitter.h"
|
|
#include "FloatUtils.h"
|
|
|
|
// Note: In comments below, TOS = Top of FPU stack
|
|
|
|
//====================================================================================================
|
|
// Register classes for float and double operands
|
|
// FIXME - For now, all float and double virtual registers are allocated on the stack
|
|
const VRClass vrcFloat = vrcStackSlot;
|
|
const VRClass vrcDouble = vrcStackSlot; // FIXME - Stack slots are temporarily 8-bytes, so they can hold a double
|
|
|
|
|
|
//====================================================================================================
|
|
// Floating-point instruction classes
|
|
|
|
/*--------------------------------------------------------------------------------
|
|
Floating-Point instructions where one operand is a memory location and the other is
|
|
the top of stack, e.g. binary operations like add, and load/store/conversion instructions.
|
|
|
|
oiBaseOpcode
|
|
contains the opcode for the instruction
|
|
|
|
oiOpcodeInformation
|
|
6 set if opcode extension is used
|
|
[5 4 3] contain Regfield opcode extension (if bit 6 is set)
|
|
others set to 0
|
|
*/
|
|
|
|
// Utility macros for setting up opcode table
|
|
#define NO_EXTENSION 8 // Register extensions range from 0 to 7, so this special code indicates that
|
|
// there is no R/M byte
|
|
|
|
#define FLOAT_INFO(first_opcode, reg_extension) \
|
|
first_opcode, ((reg_extension != NO_EXTENSION) * ((1<< 6) | ((reg_extension) << 3)))
|
|
|
|
x86OpcodeInfo InsnFloatMemory::opcodeTable[] =
|
|
{
|
|
{FLOAT_INFO(0xD9, 3), "fstp32"}, // TOS => 32-bit float memory. Pop TOS.
|
|
{FLOAT_INFO(0xDD, 3), "fstp64"}, // TOS => 64-bit double memory. Pop TOS.
|
|
{FLOAT_INFO(0xD9, 2), "fst32"}, // TOS => 32-bit float memory. (Don't pop FPU stack.)
|
|
{FLOAT_INFO(0xDD, 2), "fst64"}, // TOS => 64-bit double memory. (Don't pop FPU stack.)
|
|
{FLOAT_INFO(0xDB, 3), "fistp32"}, // Round(TOS) => 32-bit int memory. Pop TOS.
|
|
{FLOAT_INFO(0xDF, 7), "fistp64"}, // Round(TOS) => 64-bit long memory. Pop TOS.
|
|
{FLOAT_INFO(0xD9, 0), "fld32"}, // 32-bit float memory => Push on FPU stack
|
|
{FLOAT_INFO(0xDD, 0), "fld64"}, // 64-bit float memory => Push on FPU stack
|
|
{FLOAT_INFO(0xDB, 0), "fild32"}, // 32-bit int memory => convert to FP and push on FPU stack
|
|
{FLOAT_INFO(0xDF, 5), "fild64"}, // 64-bit long memory => convert to FP and push on FPU stack
|
|
{FLOAT_INFO(0xD8, 0), "fadd32"}, // Add TOS and 32-bit float memory => replace TOS
|
|
{FLOAT_INFO(0xDC, 0), "fadd64"}, // Add TOS and 64-bit double memory => replace TOS
|
|
{FLOAT_INFO(0xD8, 1), "fmul32"}, // Multiply TOS and 32-bit float memory => replace TOS
|
|
{FLOAT_INFO(0xDC, 1), "fmul64"}, // Multiply TOS and 64-bit double memory => replace TOS
|
|
{FLOAT_INFO(0xD8, 4), "fsub32"}, // Subtract TOS from 32-bit float memory => replace TOS
|
|
{FLOAT_INFO(0xDC, 4), "fsub64"}, // Subtract TOS from 64-bit double memory => replace TOS
|
|
{FLOAT_INFO(0xD8, 5), "fsubr32"}, // Subtract 32-bit float memory from TOS => replace TOS
|
|
{FLOAT_INFO(0xDC, 5), "fsubr64"}, // Subtract 64-bit double memory from TOS => replace TOS
|
|
{FLOAT_INFO(0xD8, 6), "fdiv32"}, // Divide TOS by 32-bit float memory => replace TOS
|
|
{FLOAT_INFO(0xDC, 6), "fdiv64"}, // Divide TOS by 64-bit double memory => replace TOS
|
|
{FLOAT_INFO(0xD8, 3), "fcomp32"}, // Compare TOS to 32-bit float memory, setting FPU flags, pop TOS
|
|
{FLOAT_INFO(0xDC, 3), "fcomp64"} // Compare TOS to 64-bit double memory, setting FPU flags, pop TOS
|
|
};
|
|
|
|
void InsnFloatMemory::
|
|
formatToMemory(void* inStart, Uint32 inOffset, MdFormatter& inFormatter)
|
|
{
|
|
assert(opcodeInfo != NULL && iArgumentList != NULL);
|
|
Uint8 *opLocation = (Uint8*)inStart;
|
|
|
|
// Format the opcode to memory
|
|
*opLocation++ = opcodeInfo->oiBaseOpcode;
|
|
|
|
// Find the location of the argument list and format it to memory
|
|
iArgumentList->alFormatToMemory((void*)opLocation, inOffset, *this, inFormatter);
|
|
|
|
// If the opcode has an opcode extension then OR it into the proper place. ( the reg field of the modr/m byte.)
|
|
Uint8 regFieldExtension = kRegfield_Mask & opcodeInfo->oiOpcodeInformation;
|
|
*opLocation |= regFieldExtension;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------------------------------------
|
|
Floating-Point instructions where source and destination are implicitly on the FPU stack
|
|
e.g. negation, comparison
|
|
|
|
oiBaseOpcode
|
|
contains the first byte of the opcode for the instruction
|
|
|
|
oiOpcodeInformation
|
|
contains the second byte of the opcode for the instruction
|
|
*/
|
|
|
|
x86OpcodeInfo InsnFloatReg::opcodeTable[] =
|
|
{
|
|
{0xDF, 0xE9, "fucomip st, st(1)"}, // Compare top two operands on FPU stack and set EFLAGS, pop FPU stack
|
|
{0xD9, 0xE0, "fchs"}, // Negate top of stack value
|
|
{0xDF, 0xE0, "fnstsw ax"} // Copy FPU status register to AX
|
|
};
|
|
|
|
void InsnFloatReg::
|
|
formatToMemory(void* inStart, Uint32 /*inOffset*/, MdFormatter& /*inFormatter*/)
|
|
{
|
|
// Format the opcode to memory. There is no argument list.
|
|
Uint8* start = (Uint8*) inStart;
|
|
*start++ = opcodeInfo->oiBaseOpcode;
|
|
*start = opcodeInfo->oiOpcodeInformation;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// Instruction generation utilities
|
|
|
|
InsnDoubleOpDir& x86Emitter::
|
|
copyFromFloatToIntegerRegister(DataNode& inDataNode, InsnUseXDefineYFromPool& defInsn)
|
|
{
|
|
VirtualRegister& vr = defineTemporary(defInsn, 0, vrcStackSlot);
|
|
InsnDoubleOpDir& copyInsn = *new(mPool) InsnDoubleOpDir(&inDataNode, mPool, raCopyI, atRegAllocStackSlot, atRegDirect, 1, 1);
|
|
useTemporaryVR(copyInsn, vr, 0);
|
|
return copyInsn;
|
|
}
|
|
|
|
InsnDoubleOpDir& x86Emitter::
|
|
copyFromIntegerRegisterToFloat(DataNode& inDataNode, InsnUseXDefineYFromPool& defInsn)
|
|
{
|
|
VirtualRegister& vr = defineTemporary(defInsn, 0);
|
|
InsnDoubleOpDir& copyInsn = *new(mPool) InsnDoubleOpDir(&inDataNode, mPool, raCopyI, atRegDirect, atRegAllocStackSlot, 1, 1);
|
|
useTemporaryVR(copyInsn, vr, 0);
|
|
return copyInsn;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// Floating-point binary operations, i.e. add, subtract, multiply, divide, modulus
|
|
|
|
void x86Emitter::
|
|
emit_BinaryFloat(Primitive& inPrimitive,
|
|
x86FloatMemoryType binary_op, x86FloatMemoryType load_op, x86FloatMemoryType store_op,
|
|
VRClass vrClass)
|
|
{
|
|
// Fetch first operand of binary op from memory and push it on the FPU stack
|
|
InsnFloatMemory &loadInsn = *new InsnFloatMemory(&inPrimitive, mPool, load_op, 1, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), loadInsn, 0, vrClass);
|
|
InstructionDefine& define1 = defineTemporaryOrder(loadInsn, 0);
|
|
|
|
// Fetch second operand and perform binary operation, result replaces top of FPU stack
|
|
InsnFloatMemory &binaryInsn = *new InsnFloatMemory(&inPrimitive, mPool, binary_op, 2, 1);
|
|
useProducer(inPrimitive.nthInputVariable(1), binaryInsn, 0, vrClass);
|
|
useTemporaryOrder(binaryInsn, define1, 1);
|
|
InstructionDefine& define2 = defineTemporaryOrder(binaryInsn, 0);
|
|
|
|
// Pop result of binary operation from FPU stack and store into memory
|
|
InsnFloatMemory &storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, store_op, 1, 1);
|
|
useTemporaryOrder(storeInsn, define2, 0);
|
|
defineProducer(inPrimitive, storeInsn, 0, vrClass); // result
|
|
}
|
|
|
|
// Emit 32-bit float binary operation
|
|
void x86Emitter::
|
|
emit_BinaryFloat32(Primitive& inPrimitive, x86FloatMemoryType binary_op)
|
|
{
|
|
emit_BinaryFloat(inPrimitive, binary_op, fld32, fstp32, vrcFloat);
|
|
}
|
|
|
|
// Emit 64-bit float binary operation
|
|
void x86Emitter::
|
|
emit_BinaryFloat64(Primitive& inPrimitive, x86FloatMemoryType binary_op)
|
|
{
|
|
emit_BinaryFloat(inPrimitive, binary_op, fld64, fstp64, vrcDouble);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FAdd_F(Primitive& inPrimitive) {
|
|
emit_BinaryFloat32(inPrimitive, fadd32);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FAdd_D(Primitive& inPrimitive) {
|
|
emit_BinaryFloat64(inPrimitive, fadd64);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FMul_F(Primitive& inPrimitive) {
|
|
emit_BinaryFloat32(inPrimitive, fmul32);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FMul_D(Primitive& inPrimitive) {
|
|
emit_BinaryFloat64(inPrimitive, fmul64);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FSub_F(Primitive& inPrimitive) {
|
|
emit_BinaryFloat32(inPrimitive, fsub32);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FSub_D(Primitive& inPrimitive) {
|
|
emit_BinaryFloat64(inPrimitive, fsub64);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FDiv_F(Primitive& inPrimitive) {
|
|
emit_BinaryFloat32(inPrimitive, fdiv32);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FDiv_D(Primitive& inPrimitive) {
|
|
emit_BinaryFloat64(inPrimitive, fdiv64);
|
|
}
|
|
|
|
// FIXME - Modulus is wrapper around fmod function. Should be changed to inline code.
|
|
void x86Emitter::
|
|
emit_FRem_D(Primitive& inPrimitive)
|
|
{
|
|
new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)(void))&javaFMod);
|
|
}
|
|
|
|
// Wrapper around fmod() for 32-bit float operands instead of double operands
|
|
static Flt32 fmod32(Flt32 a, Flt32 b)
|
|
{
|
|
return (Flt32)javaFMod(a, b);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_FRem_F(Primitive& inPrimitive)
|
|
{
|
|
new(mPool) CallS_C(&inPrimitive, mPool, 2, true, *this, (void (*)(void))&fmod32);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------------------------------------
|
|
// Conversions to/from floating-point
|
|
//
|
|
// All conversions are two steps:
|
|
// 1) Load input operand onto FPU stack from memory, with possible conversion to floating-point type
|
|
// 2) Simultaneously convert and store from top of FPU stack into memory location, with possible
|
|
// conversion to integer type.
|
|
|
|
void x86Emitter::
|
|
emit_FConv(Primitive& inPrimitive)
|
|
{
|
|
InsnFloatMemory *loadInsn;
|
|
|
|
// Fetch input operand from memory and push it on the FPU stack
|
|
switch (inPrimitive.nthInputVariable(0).getKind()) {
|
|
case vkFloat:
|
|
loadInsn = new InsnFloatMemory(&inPrimitive, mPool, fld32, 1, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), *loadInsn, 0, vrcFloat);
|
|
break;
|
|
|
|
case vkDouble:
|
|
loadInsn = new InsnFloatMemory(&inPrimitive, mPool, fld64, 1, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), *loadInsn, 0, vrcDouble);
|
|
break;
|
|
|
|
case vkInt:
|
|
{
|
|
InsnDoubleOpDir& copyInsn = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raCopyI, atRegDirect, atRegAllocStackSlot, 1, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), copyInsn, 0);
|
|
VirtualRegister& tmp = defineTemporary(copyInsn, 0, vrcStackSlot);
|
|
|
|
loadInsn = new InsnFloatMemory(&inPrimitive, mPool, fild32, 1, 1);
|
|
useTemporaryVR(*loadInsn, tmp, 0, vrcStackSlot);
|
|
}
|
|
break;
|
|
|
|
case vkLong:
|
|
{
|
|
InsnDoubleOpDir& copyInsnHi = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raCopyI, atRegDirect, atRegAllocStackSlotHi32, 1, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), copyInsnHi, 0, vrcInteger, vidHigh);
|
|
VirtualRegister& tmp64 = defineTemporary(copyInsnHi, 0, vrcStackSlot);
|
|
|
|
InsnDoubleOpDir& copyInsnLo = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raCopyI, atRegDirect, atRegAllocStackSlot, 2, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), copyInsnLo, 0, vrcInteger, vidLow);
|
|
useTemporaryVR(copyInsnLo, tmp64, 1);
|
|
InstructionDefine& orderStoreLoad = defineTemporaryOrder(copyInsnLo, 0);
|
|
|
|
loadInsn = new InsnFloatMemory(&inPrimitive, mPool, fild64, 2, 1);
|
|
useTemporaryVR(*loadInsn, tmp64, 0, vrcStackSlot);
|
|
useTemporaryOrder(*loadInsn, orderStoreLoad, 1);
|
|
}
|
|
break;
|
|
default:
|
|
// Absence of default case generates gcc warnings.
|
|
break;
|
|
}
|
|
|
|
InstructionDefine& order = defineTemporaryOrder(*loadInsn, 0);
|
|
|
|
// Store value from top of FPU stack into memory
|
|
ValueKind vk = inPrimitive.getKind();
|
|
switch (vk) {
|
|
case vkFloat:
|
|
{
|
|
// Pop result from FPU stack and store into memory as 32-bit float
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, fstp32, 1, 1);
|
|
useTemporaryOrder(storeInsn, order, 0);
|
|
defineProducer(inPrimitive, storeInsn, 0, vrcFloat);
|
|
}
|
|
break;
|
|
|
|
case vkDouble:
|
|
{
|
|
// Pop result from FPU stack and store into memory as 64-bit double
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, fstp64, 1, 1);
|
|
useTemporaryOrder(storeInsn, order, 0);
|
|
defineProducer(inPrimitive, storeInsn, 0, vrcDouble);
|
|
}
|
|
break;
|
|
|
|
case vkInt:
|
|
case vkLong:
|
|
{
|
|
/* Rounding is controlled by the RC flag in the FPU. Round-to-nearest is the desired rounding mode for
|
|
all floating-point instructions *except* conversions from floating-point types to integer types, in
|
|
which case round-towards-zero (truncation) is mandated. Rather than temporarily changing the RC flag
|
|
for all such conversions, we achieve the equivalent result by subtracting or adding 0.5 to the value
|
|
before rounding, i.e.
|
|
|
|
truncate(x) <==> round(x + sign(x) * 0.5)
|
|
|
|
FIXME - we still don't handle out-of-range inputs and NaNs per the Java spec.
|
|
|
|
*/
|
|
|
|
// Store the 32-bit representation of the floating-point input operand into memory so that we can extract its sign.
|
|
InsnFloatMemory& storeInsn1 = *new InsnFloatMemory(&inPrimitive, mPool, fst32, 1, 1);
|
|
useTemporaryOrder(storeInsn1, order, 0);
|
|
VirtualRegister& tmpVR1 = defineTemporary(storeInsn1, 0, vrcStackSlot);
|
|
|
|
// Extract the sign bit of the input operand
|
|
x86Instruction& andInsn = *new(mPool) x86Instruction(&inPrimitive, mPool, iaAndImm, 0x80000000, atRegAllocStackSlot, 1, 1);
|
|
useTemporaryVR(andInsn, tmpVR1, 0, vrcStackSlot);
|
|
redefineTemporary(andInsn, tmpVR1, 0, vrcStackSlot);
|
|
|
|
// Generate 0.5 * sign(input)
|
|
const float half = 0.5;
|
|
x86Instruction& orInsn = *new(mPool) x86Instruction(&inPrimitive, mPool, iaOrImm, *(Uint32*)&half, atRegAllocStackSlot, 1, 1);
|
|
useTemporaryVR(orInsn, tmpVR1, 0, vrcStackSlot);
|
|
redefineTemporary(orInsn, tmpVR1, 0, vrcStackSlot);
|
|
|
|
// Subtract 0.5 * sign(input) from input operand
|
|
InsnFloatMemory& subInsn = *new InsnFloatMemory(&inPrimitive, mPool, fsub32, 1, 1);
|
|
useTemporaryVR(subInsn, tmpVR1, 0, vrcStackSlot);
|
|
redefineTemporaryOrder(subInsn, order, 0);
|
|
|
|
if (vk == vkInt) {
|
|
// Pop result from FPU stack, convert to 32-bit integer, and store into memory
|
|
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, fistp32, 1, 1);
|
|
useTemporaryOrder(storeInsn, order, 0);
|
|
|
|
// All transfers from the FPU must go through memory, so make a copy from the memory location
|
|
// to the integer register destination.
|
|
InsnDoubleOpDir& copyInsn = copyFromFloatToIntegerRegister(inPrimitive, storeInsn);
|
|
defineProducer(inPrimitive, copyInsn, 0);
|
|
|
|
} else { // vkLong
|
|
// Pop result from FPU stack, convert to 64-bit integer, and store into memory
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, fistp64, 1, 1);
|
|
useTemporaryOrder(storeInsn, order, 0);
|
|
VirtualRegister& tmp64 = defineTemporary(storeInsn, 0, vrcStackSlot); // 64-bit integer stack slot
|
|
|
|
// Copy high 32-bits from memory to integer register
|
|
InsnDoubleOpDir& copyInsnHi = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raCopyI, atRegAllocStackSlotHi32, atRegDirect, 1, 1);
|
|
useTemporaryVR(copyInsnHi, tmp64, 0, vrcStackSlot);
|
|
defineProducer(inPrimitive, copyInsnHi, 0, vrcInteger, vidHigh);
|
|
|
|
// Copy low 32-bits from memory to integer register
|
|
InsnDoubleOpDir& copyInsnLo = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raCopyI, atRegAllocStackSlot, atRegDirect, 1, 1);
|
|
useTemporaryVR(copyInsnLo, tmp64, 0, vrcStackSlot);
|
|
defineProducer(inPrimitive, copyInsnLo, 0, vrcInteger, vidLow);
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
// Absence of default case generates gcc warnings.
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
//====================================================================================================
|
|
// Floating-point function-call glue
|
|
|
|
// Obtain the 32-bit float return value of a function call
|
|
void x86Emitter::
|
|
emit_CallReturnF(InsnUseXDefineYFromPool& callInsn, DataNode& callPrimitive, DataNode& returnValProducer)
|
|
{
|
|
InstructionDefine& define = defineTemporaryOrder(callInsn, 1);
|
|
|
|
// Pop result from FPU stack and store into memory as 32-bit float
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&callPrimitive, mPool, fstp32, 1, 1);
|
|
useTemporaryOrder(storeInsn, define, 0);
|
|
defineProducer(returnValProducer, storeInsn, 0, vrcFloat);
|
|
}
|
|
|
|
// Obtain the 64-bit double return value of a function call
|
|
void x86Emitter::
|
|
emit_CallReturnD(InsnUseXDefineYFromPool& callInsn, DataNode& callPrimitive, DataNode& returnValProducer)
|
|
{
|
|
InstructionDefine& define = defineTemporaryOrder(callInsn, 1);
|
|
|
|
// Pop result from FPU stack and store into memory as 64-bit double
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&callPrimitive, mPool, fstp64, 1, 1);
|
|
useTemporaryOrder(storeInsn, define, 0);
|
|
defineProducer(returnValProducer, storeInsn, 0, vrcDouble);
|
|
}
|
|
|
|
// Retrieve a 32-bit float argument from the call stack
|
|
void x86Emitter::
|
|
emit_ArgF(PrimArg& arg, InstructionDefine& order, int curStackOffset)
|
|
{
|
|
InsnDoubleOpDir& loadParam = *new(mPool) InsnDoubleOpDir(&arg, mPool, raLoadI, curStackOffset, atStackOffset, atRegDirect, 1, 1);
|
|
useTemporaryOrder(loadParam, order, 0);
|
|
InsnDoubleOpDir& copyInsn = copyFromIntegerRegisterToFloat(arg, loadParam);
|
|
defineProducer(arg, copyInsn, 0, vrcFloat);
|
|
}
|
|
|
|
// Retrieve a 64-bit double argument from the call stack
|
|
void x86Emitter::
|
|
emit_ArgD(PrimArg& arg, InstructionDefine& order, int curStackOffset)
|
|
{
|
|
InsnFloatMemory& loadInsn = *new InsnFloatMemory(&arg, mPool, fld64, atStackOffset, curStackOffset, 1, 1);
|
|
useTemporaryOrder(loadInsn, order, 0);
|
|
redefineTemporaryOrder(loadInsn, order, 0);
|
|
|
|
InsnFloatMemory& copyInsn = *new InsnFloatMemory(&arg, mPool, fstp64, 1, 1);
|
|
useTemporaryOrder(copyInsn, order, 0);
|
|
defineProducer(arg, copyInsn, 0, vrcDouble);
|
|
}
|
|
|
|
// Push float function return value on top of FPU stack
|
|
void x86Emitter::
|
|
emit_Result_F(Primitive& inPrimitive)
|
|
{
|
|
InsnFloatMemory ©Insn = *new InsnFloatMemory(&inPrimitive, mPool, fld32, 1, 1);
|
|
InsnExternalUse& extInsn = *new(mPool) InsnExternalUse(&inPrimitive, mPool, 1);
|
|
|
|
useProducer(inPrimitive.nthInputVariable(0), copyInsn, 0, vrcFloat);
|
|
InstructionDefine& define = defineTemporaryOrder(copyInsn, 0);
|
|
useTemporaryOrder(extInsn, define, 0);
|
|
|
|
inPrimitive.setInstructionRoot(&extInsn);
|
|
}
|
|
|
|
// Push double function return value on top of FPU stack
|
|
void x86Emitter::
|
|
emit_Result_D(Primitive& inPrimitive)
|
|
{
|
|
InsnFloatMemory ©Insn = *new InsnFloatMemory(&inPrimitive, mPool, fld64, 1, 1);
|
|
InsnExternalUse& extInsn = *new(mPool) InsnExternalUse(&inPrimitive, mPool, 1);
|
|
|
|
useProducer(inPrimitive.nthInputVariable(0), copyInsn, 0, vrcDouble);
|
|
InstructionDefine& define = defineTemporaryOrder(copyInsn, 0);
|
|
useTemporaryOrder(extInsn, define, 0);
|
|
|
|
inPrimitive.setInstructionRoot(&extInsn);
|
|
}
|
|
|
|
//====================================================================================================
|
|
// Comparisons
|
|
|
|
// Matches pattern: poCatL_I(poFCmp_F(Vfloat, Vfloat))
|
|
/* Emits code that follows this pattern:
|
|
|
|
fld ; push second_arg on FPU stack
|
|
fcomp [ebp + xx] ; Load first_arg, set integer condition flags and pop all args
|
|
fnstsw ax ; Copy FPU status reg into AX
|
|
sahf ; Copy AX into EFLAGS status reg
|
|
seta al ; al = (first_arg > second_arg) ? 1 : 0;
|
|
setb bl ; bl = ((first_arg < second_arg) || (first_arg == NAN) || (second_arg == NAN)) ? 1 : 0
|
|
sub ebx, eax ; Result in lowest byte is -1, 0, or +1
|
|
movsx eax, bl ; Sign-extend low byte to 32-bits
|
|
|
|
(Some changes in operand usage will appear depending on the exact pattern of primitives being matched.)
|
|
*/
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpF(Primitive& inPrimitive, DataNode &first_operand, DataNode &second_operand,
|
|
bool negate_result, x86FloatMemoryType load_op, x86FloatMemoryType cmpOp, VRClass vrClass)
|
|
{
|
|
// Push first operand on FPU stack
|
|
InsnFloatMemory& loadInsn2 = *new InsnFloatMemory(&inPrimitive, mPool, load_op, 1, 1);
|
|
useProducer(first_operand, loadInsn2, 0, vrClass);
|
|
InstructionDefine& order = defineTemporaryOrder(loadInsn2, 0);
|
|
|
|
// Set FPU status flags
|
|
// FIXME - Should this define a condition-flag edge ? There really should be separate
|
|
// edge types for integer and FPU condition codes.
|
|
InsnFloatMemory& cmpInsn = *new InsnFloatMemory(&inPrimitive, mPool, cmpOp, 2, 1);
|
|
useProducer(second_operand, cmpInsn, 0, vrClass);
|
|
useTemporaryOrder(cmpInsn, order, 1);
|
|
redefineTemporaryOrder(cmpInsn, order, 0);
|
|
|
|
// Copy FPU status flags to AX register
|
|
InsnFloatReg& copyFromStatusFlagsInsn = *new InsnFloatReg(&inPrimitive, mPool, fnstsw, 1, 1);
|
|
useTemporaryOrder(copyFromStatusFlagsInsn, order, 0);
|
|
VirtualRegister& FPUstatus = defineTemporary(copyFromStatusFlagsInsn, 0);
|
|
FPUstatus.preColorRegister(x86GPRToColor[EAX]);
|
|
|
|
// sahf instruction (copy from AX into integer status flags register)
|
|
InsnNoArgs& sahfInsn = *new(mPool) InsnNoArgs(&inPrimitive, mPool, opSahf, 1, 1);
|
|
useTemporaryVR(sahfInsn, FPUstatus, 0);
|
|
sahfInsn.addDefine(0, udCond);
|
|
|
|
// setnbe instruction
|
|
InsnSet& setInsn1 = *new(mPool) InsnSet(&inPrimitive, mPool, ccJNBE, 1, 1);
|
|
setInsn1.addUse(0, udCond);
|
|
setInsn1.getInstructionUseBegin()[0].src = &sahfInsn; // condition edge
|
|
|
|
// setb instruction
|
|
InsnSet& setInsn2 = *new(mPool) InsnSet(&inPrimitive, mPool, ccJB, 1, 1);
|
|
setInsn2.addUse(0, udCond);
|
|
setInsn2.getInstructionUseBegin()[0].src = &sahfInsn; // condition edge
|
|
|
|
VirtualRegister* tmpVR1, *tmpVR2;
|
|
if (negate_result) {
|
|
tmpVR2 = &defineTemporary(setInsn1, 0); // (first_operand > second_operand) -> tmpVR2
|
|
tmpVR1 = &defineTemporary(setInsn2, 0); // ((first_operand < second_operand) ||
|
|
// (first_operand == NAN) ||
|
|
// (second_operand == NAN)) -> tmpVR1
|
|
} else {
|
|
tmpVR1 = &defineTemporary(setInsn1, 0); // (first_operand > second_operand) -> tmpVR1
|
|
tmpVR2 = &defineTemporary(setInsn2, 0); // ((first_operand < second_operand) ||
|
|
// (first_operand == NAN) ||
|
|
// (second_operand == NAN)) -> tmpVR2
|
|
}
|
|
|
|
// FIXME - We must store result of SET instruction in either AL, BL, CL, or DL, but there's no
|
|
// way to indicate this restriction to the register allocator so, for now, we hard-code the registers
|
|
tmpVR1->preColorRegister(x86GPRToColor[EAX]);
|
|
tmpVR2->preColorRegister(x86GPRToColor[EBX]);
|
|
|
|
// sub instruction
|
|
InsnDoubleOpDir& subInsn = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raSub);
|
|
useTemporaryVR(subInsn, *tmpVR1, 0);
|
|
useTemporaryVR(subInsn, *tmpVR2, 1);
|
|
redefineTemporary(subInsn, *tmpVR1, 0); // tmpVR1 - tmpVR2 -> tmpVR1
|
|
|
|
// Upper 24 bits are garbage. Sign-extend byte to 32-bits using movs instruction
|
|
InsnDoubleOp& extInsn = *new(mPool) InsnDoubleOp(&inPrimitive, mPool, opMovSxB, atRegDirect, atRegDirect, 1, 1);
|
|
useTemporaryVR(extInsn, *tmpVR1, 0);
|
|
defineProducer(inPrimitive, extInsn, 0); // exts(tmpVR1) -> result
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpF_G(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(0), cmpPrimitive.nthInputVariable(1),
|
|
false, fld32, fcomp32, vrcFloat);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpF_L(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(1), cmpPrimitive.nthInputVariable(0),
|
|
true, fld32, fcomp32, vrcFloat);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpD_G(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(0), cmpPrimitive.nthInputVariable(1),
|
|
false, fld64, fcomp64, vrcDouble);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpD_L(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(1), cmpPrimitive.nthInputVariable(0),
|
|
true, fld64, fcomp64, vrcDouble);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpCF_G(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(1), cmpPrimitive.nthInputVariable(0),
|
|
false, fld32, fcomp32, vrcFloat);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpCF_L(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(0), cmpPrimitive.nthInputVariable(1),
|
|
true, fld32, fcomp32, vrcFloat);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpCD_G(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(1), cmpPrimitive.nthInputVariable(0),
|
|
false, fld64, fcomp64, vrcDouble);
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_3wayCmpCD_L(Primitive& inPrimitive)
|
|
{
|
|
Primitive& cmpPrimitive = Primitive::cast(inPrimitive.nthInputVariable(0));
|
|
emit_3wayCmpF(inPrimitive, cmpPrimitive.nthInputVariable(0), cmpPrimitive.nthInputVariable(1),
|
|
true, fld64, fcomp64, vrcDouble);
|
|
}
|
|
|
|
//====================================================================================================
|
|
// Constants
|
|
|
|
// Generate 32-bit float constant
|
|
void x86Emitter::
|
|
emit_LoadConstant_F(Primitive& inPrimitive)
|
|
{
|
|
Uint32 constant = (*static_cast<const PrimConst *>(&inPrimitive)).value.i;
|
|
|
|
x86Instruction* newInsn;
|
|
|
|
if(constant == 0)
|
|
newInsn = new(mPool) x86Instruction(&inPrimitive, mPool, srmMoveImm0, 0, 0, 1);
|
|
else
|
|
newInsn = new(mPool) x86Instruction(&inPrimitive, mPool, ceMoveImm, constant, atRegDirect, 0, 1);
|
|
|
|
defineProducer(inPrimitive, copyFromIntegerRegisterToFloat(inPrimitive, *newInsn), 0, vrcFloat); // result
|
|
}
|
|
|
|
// Generate 64-bit double constant
|
|
// FIXME: Need to create an in-memory literal pool for storing double constants, rather than using immediate instructions
|
|
void x86Emitter::
|
|
emit_LoadConstant_D(Primitive& inPrimitive)
|
|
{
|
|
Flt64 constant = (*static_cast<const PrimConst *>(&inPrimitive)).value.d;
|
|
|
|
// Store 64-bit double constant in literal pool
|
|
// FIXME - We should have a literal pool for each method to store in-memory constants
|
|
// and which can be released when the method is discarded.
|
|
Flt64* literalPoolEntry = (Flt64*)malloc(sizeof(Flt64));
|
|
*literalPoolEntry = constant;
|
|
|
|
// Fetch from memory and temporarily push 64-bit double on the FPU stack
|
|
InsnFloatMemory &loadInsn = *new InsnFloatMemory(&inPrimitive, mPool, fld64, atAbsoluteAddress, (Uint32)literalPoolEntry, 0, 1);
|
|
InstructionDefine& order = defineTemporaryOrder(loadInsn, 0);
|
|
|
|
// Pop 64-bit double from FPU stack and store into double variable
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, fstp64, 1, 1);
|
|
useTemporaryOrder(storeInsn, order, 0);
|
|
defineProducer(inPrimitive, storeInsn, 0, vrcDouble); // result
|
|
}
|
|
|
|
//====================================================================================================
|
|
// Floating-point memory operations
|
|
|
|
void x86Emitter::
|
|
emit_Ld_F(Primitive& inPrimitive)
|
|
{
|
|
// Load 32-bit float into an integer register
|
|
InsnDoubleOpDir& loadInsn = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raLoadI, atRegisterIndirect, atRegDirect);
|
|
useProducer(inPrimitive.nthInputVariable(0), loadInsn, 1); // memory edge
|
|
useProducer(inPrimitive.nthInputVariable(1), loadInsn, 0); // address
|
|
|
|
// Store into 32-bit float variable
|
|
InsnDoubleOpDir& storeInsn = copyFromIntegerRegisterToFloat(inPrimitive, loadInsn);
|
|
defineProducer(inPrimitive, storeInsn, 0, vrcFloat); // result
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_Ld_D(Primitive& inPrimitive)
|
|
{
|
|
// Fetch from memory and temporarily push 64-bit double on the FPU stack
|
|
InsnFloatMemory &loadInsn = *new InsnFloatMemory(&inPrimitive, mPool, fld64, atRegisterIndirect, 2, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), loadInsn, 1); // memory edge
|
|
useProducer(inPrimitive.nthInputVariable(1), loadInsn, 0); // address
|
|
InstructionDefine& order = defineTemporaryOrder(loadInsn, 0);
|
|
|
|
// Pop 64-bit double from FPU stack
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, fstp64, 1, 1);
|
|
useTemporaryOrder(storeInsn, order, 0);
|
|
defineProducer(inPrimitive, storeInsn, 0, vrcDouble); // result
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_St_F(Primitive& inPrimitive)
|
|
{
|
|
// Load 32-bit float into an integer register
|
|
InsnDoubleOpDir& loadInsn = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raCopyI, atRegAllocStackSlot, atRegDirect, 1, 1);
|
|
useProducer(inPrimitive.nthInputVariable(2), loadInsn, 0); // data
|
|
VirtualRegister& floatVal = defineTemporary(loadInsn, 0); // intermediate output
|
|
|
|
// Store temporary integer register into indirect register destination
|
|
InsnDoubleOpDir& storeInsn = *new(mPool) InsnDoubleOpDir(&inPrimitive, mPool, raStoreI, atRegisterIndirect, atRegDirect, 3, 1);
|
|
useProducer(inPrimitive.nthInputVariable(1), storeInsn, 0); // address
|
|
useTemporaryVR(storeInsn, floatVal, 1); // data
|
|
useProducer(inPrimitive.nthInputVariable(0), storeInsn, 2); // memory edge in
|
|
defineProducer(inPrimitive, storeInsn, 0); // memory edge out
|
|
}
|
|
|
|
void x86Emitter::
|
|
emit_St_D(Primitive& inPrimitive)
|
|
{
|
|
// Temporarily push 64-bit double on the FPU stack
|
|
InsnFloatMemory& loadInsn = *new InsnFloatMemory(&inPrimitive, mPool, fld64, 1, 1);
|
|
useProducer(inPrimitive.nthInputVariable(2), loadInsn, 0, vrcDouble); // data
|
|
InstructionDefine& order = defineTemporaryOrder(loadInsn, 0);
|
|
|
|
// Pop result from FPU stack and store into memory as 64-bit double
|
|
InsnFloatMemory& storeInsn = *new InsnFloatMemory(&inPrimitive, mPool, fstp64, atRegisterIndirect, 3, 1);
|
|
useProducer(inPrimitive.nthInputVariable(1), storeInsn, 0); // address
|
|
useTemporaryOrder(storeInsn, order, 1);
|
|
useProducer(inPrimitive.nthInputVariable(0), storeInsn, 2); // memory edge in
|
|
defineProducer(inPrimitive, storeInsn, 0); // memory edge out
|
|
}
|